Merge pull request #568 from ucb-bar/dev-dsptools

Dsptools example cleanup
This commit is contained in:
Abraham Gonzalez
2020-05-28 15:25:09 -07:00
committed by GitHub
18 changed files with 887 additions and 15 deletions

View File

@@ -204,6 +204,16 @@ jobs:
steps: steps:
- prepare-rtl: - prepare-rtl:
project-key: "chipyard-sha3" project-key: "chipyard-sha3"
prepare-chipyard-streaming-fir:
executor: main-env
steps:
- prepare-rtl:
project-key: "chipyard-streaming-fir"
prepare-chipyard-streaming-passthrough:
executor: main-env
steps:
- prepare-rtl:
project-key: "chipyard-streaming-passthrough"
prepare-chipyard-hetero: prepare-chipyard-hetero:
executor: main-env executor: main-env
steps: steps:
@@ -287,6 +297,16 @@ jobs:
steps: steps:
- run-tests: - run-tests:
project-key: "chipyard-sha3" project-key: "chipyard-sha3"
chipyard-streaming-fir-run-tests:
executor: main-env
steps:
- run-tests:
project-key: "chipyard-streaming-fir"
chipyard-streaming-passthrough-run-tests:
executor: main-env
steps:
- run-tests:
project-key: "chipyard-streaming-passthrough"
chipyard-hetero-run-tests: chipyard-hetero-run-tests:
executor: main-env executor: main-env
steps: steps:
@@ -439,6 +459,16 @@ workflows:
- install-riscv-toolchain - install-riscv-toolchain
- install-verilator - install-verilator
- prepare-chipyard-streaming-fir:
requires:
- install-riscv-toolchain
- install-verilator
- prepare-chipyard-streaming-passthrough:
requires:
- install-riscv-toolchain
- install-verilator
- prepare-chipyard-hetero: - prepare-chipyard-hetero:
requires: requires:
- install-riscv-toolchain - install-riscv-toolchain
@@ -525,6 +555,14 @@ workflows:
requires: requires:
- prepare-chipyard-sha3 - prepare-chipyard-sha3
- chipyard-streaming-fir-run-tests:
requires:
- prepare-chipyard-streaming-fir
- chipyard-streaming-passthrough-run-tests:
requires:
- prepare-chipyard-streaming-passthrough
- chipyard-hetero-run-tests: - chipyard-hetero-run-tests:
requires: requires:
- prepare-chipyard-hetero - prepare-chipyard-hetero

View File

@@ -49,6 +49,8 @@ LOCAL_FIRESIM_DIR=$LOCAL_CHIPYARD_DIR/sims/firesim/sim
declare -A mapping declare -A mapping
mapping["chipyard-rocket"]="SUB_PROJECT=chipyard" mapping["chipyard-rocket"]="SUB_PROJECT=chipyard"
mapping["chipyard-sha3"]="SUB_PROJECT=chipyard CONFIG=Sha3RocketConfig" mapping["chipyard-sha3"]="SUB_PROJECT=chipyard CONFIG=Sha3RocketConfig"
mapping["chipyard-streaming-fir"]="SUB_PROJECT=chipyard CONFIG=StreamingFIRRocketConfig"
mapping["chipyard-streaming-passthrough"]="SUB_PROJECT=chipyard CONFIG=StreamingPassthroughRocketConfig"
mapping["chipyard-hetero"]="SUB_PROJECT=chipyard CONFIG=LargeBoomAndRocketConfig" mapping["chipyard-hetero"]="SUB_PROJECT=chipyard CONFIG=LargeBoomAndRocketConfig"
mapping["chipyard-boom"]="SUB_PROJECT=chipyard CONFIG=SmallBoomConfig" mapping["chipyard-boom"]="SUB_PROJECT=chipyard CONFIG=SmallBoomConfig"
mapping["chipyard-blkdev"]="SUB_PROJECT=chipyard CONFIG=SimBlockDeviceRocketConfig" mapping["chipyard-blkdev"]="SUB_PROJECT=chipyard CONFIG=SimBlockDeviceRocketConfig"

View File

@@ -62,6 +62,14 @@ case $1 in
(cd $LOCAL_CHIPYARD_DIR/generators/sha3/software && ./build.sh) (cd $LOCAL_CHIPYARD_DIR/generators/sha3/software && ./build.sh)
$LOCAL_SIM_DIR/simulator-chipyard-Sha3RocketConfig $LOCAL_CHIPYARD_DIR/generators/sha3/software/benchmarks/bare/sha3-rocc.riscv $LOCAL_SIM_DIR/simulator-chipyard-Sha3RocketConfig $LOCAL_CHIPYARD_DIR/generators/sha3/software/benchmarks/bare/sha3-rocc.riscv
;; ;;
chipyard-streaming-passthrough)
make -C $LOCAL_CHIPYARD_DIR/tests
$LOCAL_SIM_DIR/simulator-chipyard-StreamingPassthroughRocketConfig $LOCAL_CHIPYARD_DIR/tests/streaming-passthrough.riscv
;;
chipyard-streaming-fir)
make -C $LOCAL_CHIPYARD_DIR/tests
$LOCAL_SIM_DIR/simulator-chipyard-StreamingFIRRocketConfig $LOCAL_CHIPYARD_DIR/tests/streaming-fir.riscv
;;
chipyard-spiflashread) chipyard-spiflashread)
make -C $LOCAL_CHIPYARD_DIR/tests make -C $LOCAL_CHIPYARD_DIR/tests
make -C $LOCAL_SIM_DIR ${mapping[$1]} BINARY=$LOCAL_CHIPYARD_DIR/tests/spiflashread.riscv SIM_FLAGS="+spiflash0=${LOCAL_CHIPYARD_DIR}/tests/spiflash.img" run-binary make -C $LOCAL_SIM_DIR ${mapping[$1]} BINARY=$LOCAL_CHIPYARD_DIR/tests/spiflashread.riscv SIM_FLAGS="+spiflash0=${LOCAL_CHIPYARD_DIR}/tests/spiflash.img" run-binary

View File

@@ -20,6 +20,8 @@ lazy val commonSettings = Seq(
libraryDependencies += "com.github.scopt" %% "scopt" % "3.7.0", libraryDependencies += "com.github.scopt" %% "scopt" % "3.7.0",
libraryDependencies += "org.scala-lang.modules" % "scala-jline" % "2.12.1", libraryDependencies += "org.scala-lang.modules" % "scala-jline" % "2.12.1",
libraryDependencies += "com.typesafe.play" %% "play-json" % "2.6.10", libraryDependencies += "com.typesafe.play" %% "play-json" % "2.6.10",
libraryDependencies += "org.typelevel" %% "spire" % "0.16.2",
libraryDependencies += "org.scalanlp" %% "breeze" % "1.0",
addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full), addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full),
unmanagedBase := (chipyardRoot / unmanagedBase).value, unmanagedBase := (chipyardRoot / unmanagedBase).value,
allDependencies := allDependencies.value.filterNot(_.organization == "edu.berkeley.cs"), allDependencies := allDependencies.value.filterNot(_.organization == "edu.berkeley.cs"),
@@ -129,6 +131,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/"))
lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard")) lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard"))
.dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell, .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell,
sha3, // On separate line to allow for cleaner tutorial-setup patches sha3, // On separate line to allow for cleaner tutorial-setup patches
dsptools, `rocket-dsptools`,
gemmini, icenet, tracegen, ariane, nvdla) gemmini, icenet, tracegen, ariane, nvdla)
.settings(commonSettings) .settings(commonSettings)
@@ -180,19 +183,17 @@ lazy val barstoolsMacros = (project in file("./tools/barstools/macros/"))
.enablePlugins(sbtassembly.AssemblyPlugin) .enablePlugins(sbtassembly.AssemblyPlugin)
.settings(commonSettings) .settings(commonSettings)
lazy val dsptools = (project in file("./tools/dsptools")) lazy val dsptools = freshProject("dsptools", file("./tools/dsptools"))
.dependsOn(chisel, chisel_testers) .dependsOn(chisel, chisel_testers)
.settings( .settings(
commonSettings, commonSettings,
libraryDependencies ++= Seq( libraryDependencies ++= Seq(
"org.typelevel" %% "spire" % "0.14.1", "junit" % "junit" % "4.13" % "test",
"org.scalanlp" %% "breeze" % "0.13.2", "org.scalatest" %% "scalatest" % "3.0.8",
"junit" % "junit" % "4.12" % "test", "org.scalacheck" %% "scalacheck" % "1.14.3" % "test"
"org.scalatest" %% "scalatest" % "3.0.5" % "test",
"org.scalacheck" %% "scalacheck" % "1.14.0" % "test"
)) ))
lazy val `rocket-dsptools` = (project in file("./tools/dsptools/rocket")) lazy val `rocket-dsptools` = freshProject("rocket-dsptools", file("./tools/dsptools/rocket"))
.dependsOn(rocketchip, dsptools) .dependsOn(rocketchip, dsptools)
.settings(commonSettings) .settings(commonSettings)

View File

@@ -0,0 +1,125 @@
.. _dsptools-blocks:
Dsptools is a Chisel library that aids in writing custom signal processing accelerators. It does this by:
* Giving types and helpers that allow you to express mathematical operations more directly.
* Typeclasses that let you write polymorphic generators, for example an FIR filter generator that works for both real- and complex-valued filters.
* Structures for packaging DSP blocks and integrating them into a rocketchip-based SoC.
* Test harnesses for testing DSP circuits, as well as VIP-style drivers and monitors for DSP blocks.
The `Dsptools repository <https://github.com/ucb-bar/dsptools/>`_ has more documentation.
Dsptools Blocks
===============
A ``DspBlock`` is the basic unit of signal processing functionality that can be integrated into an SoC.
It has a AXI4-stream interface and an optional memory interface.
The idea is that these ``DspBlocks`` can be easily designed, unit tested, and assembled lego-style to build complex functionality.
A ``DspChain`` is one example of how to assemble ``DspBlocks``, in which case the streaming interfaces are connected serially into a pipeline, and a bus is instatiated and connected to every block with a memory interface.
Chipyard has example designs that integrate a ``DspBlock`` to a rocketchip-based SoC as an MMIO peripheral. The custom ``DspBlock`` has a ``ReadQueue`` before it and a ``WriteQueue`` after it, which allow memory mapped access to the streaming interfaces so the rocket core can interact with the ``DspBlock`` [#]_. This section will primarily focus on designing Tilelink-based peripherals. However, through the resources provided in Dsptools, one could also define an AXI4-based peripheral by following similar steps. Furthermore, the examples here are simple, but can be extended to implement more complex accelerators, for example an `OFDM baseband <https://github.com/grebe/ofdm>`_ or a `spectrometer <https://github.com/ucb-art/craft2-chip>`_.
.. figure:: ../_static/images/fir-block-diagram.svg
:align: center
:alt: Block diagram showing how FIR is integrated with rocket.
:width: 400px
For this example, we will show you how to connect a simple FIR filter created using Dsptools as an MMIO peripheral as shown in the figure above. The full code can be found in ``generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala``. That being said, one could substitute any module with a ready valid interface in the place of the FIR and achieve the same results. As long as the read and valid signals of the module are attached to those of a corresponding ``DSPBlock`` wrapper, and that wrapper is placed in a chain with a ``ReadQueue`` and a ``WriteQueue``, following the general outline establised by these steps will allow you to interact with that block as a memory mapped IO.
The module ``GenericFIR`` is the overall wrapper of our FIR module. This module links together a variable number of ``GenericFIRDirectCell`` submodules, each of which performs the computations for one coefficient in a FIR direct form architecture. It is important to note that both modules are type-generic, which means that they can be instantiated for any datatype ``T`` that implements ``Ring`` operations (e.g. addition, multiplication, identities).
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: GenericFIR chisel
:end-before: DOC include end: GenericFIR chisel
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: GenericFIRDirectCell chisel
:end-before: DOC include end: GenericFIRDirectCell chisel
Creating a DspBlock
-------------------
The first step in attaching the FIR filter as a MMIO peripheral is to create an abstract subclass of ``DspBlock`` the wraps around the ``GenericFIR`` module. Streaming outputs and inputs are packed and unpacked into ``UInt`` s. If there were control signals, this is where they'd go from raw IOs to memory mapped. The main steps of this process are as follows.
1. Instantiate a ``GenericFIR`` within ``GenericFIRBlock``.
2. Attach the ready and valid signals from the in and out connections.
3. Cast the module input data to the input type of ``GenericFIR`` (``GenericFIRBundle``) and attach.
4. Cast the output of ``GenericFIR`` to ``UInt`` and attach to the module output.
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: GenericFIRBlock chisel
:end-before: DOC include end: GenericFIRBlock chisel
Note that at this point the ``GenericFIRBlock`` does not have a type of memory interface specified. This abstract class can be used to create different flavors that use AXI-4, TileLink, AHB, or whatever other memory interface you like like.
Connecting DspBlock by TileLink
-------------------------------
With these classes implemented, you can begin to construct the chain by extending ``GenericFIRBlock`` while using the ``TLDspBlock`` trait via mixin.
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: TLGenericFIRBlock chisel
:end-before: DOC include end: TLGenericFIRBlock chisel
We can then construct the final chain by utilizing the ``TLWriteQueue`` and ``TLReadeQueue`` modules found in ``generators/chipyard/src/main/scala/example/dsptools/DspBlocks.scala``. The chain is created by passing a list of factory functions to the constructor of ``TLChain``. The constructor then automatically instantiates these ``DspBlocks``, connects their stream nodes in order, creates a bus, and connects any ``DspBlocks`` that have memory interfaces to the bus.
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: TLGenericFIRChain chisel
:end-before: DOC include end: TLGenericFIRChain chisel
Top Level Traits
----------------
As in the previous MMIO example, we use a cake pattern to hook up our module to our SoC.
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: CanHavePeripheryStreamingFIR chisel
:end-before: DOC include end: CanHavePeripheryStreamingFIR chisel
Note that this is the point at which we decide the datatype for our FIR. You could create different configs that use different types for the FIR, for example a config that instantiates a complex-valued FIR filter.
Constructing the Top and Config
-------------------------------
Once again following the path of the previous MMIO example, we now want to mix our traits into the system as a whole. The code is from ``generators/chipyard/src/main/scala/DigitalTop.scala``
.. literalinclude:: ../../generators/chipyard/src/main/scala/DigitalTop.scala
:language: scala
:start-after: DOC include start: DigitalTop
:end-before: DOC include end: DigitalTop
Finally, we create the configuration class in ``generators/chipyard/src/main/scala/config/RocketConfigs.scala`` that uses the ``WithFIR`` mixin defined in ``generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala``.
.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala
:language: scala
:start-after: DOC include start: WithStreamingFIR
:end-before: DOC include end: WithStreamingFIR
.. literalinclude:: ../../generators/chipyard/src/main/scala/config/RocketConfigs.scala
:language: scala
:start-after: DOC include start: StreamingFIRRocketConfig
:end-before: DOC include end: StreamingFIRRocketConfig
FIR Testing
-----------
We can now test that the FIR is working. The test program is found in ``tests/streaming-fir.c``.
.. literalinclude:: ../../tests/streaming-fir.c
:language: c
The test feed a series of values into the fir and compares the output to a golden model of computation. The base of the module's MMIO write region is at 0x2000 and the base of the read region is at 0x2100 by default.
Compiling this program with ``make`` produces a ``streaming-fir.riscv`` executable.
Now we can run our simulation.
.. code-block:: shell
cd sims/verilator
make CONFIG=StreamingFIRRocketConfig BINARY=../../tests/streaming-fir.riscv run-binary
.. [#] ``ReadQueue`` and ``WriteQueue`` are good illustrations of how to write a ``DspBlock`` and how they can be integrated into rocket, but in a real design a DMA engine would be preferred. ``ReadQueue`` will stall the processor if you try to read an empty queue, and ``WriteQueue`` will stall if you try to write to a full queue, which a DMA engine can more elegantly avoid. Furthermore, a DMA engine can do the work of moving data, freeing the processor to do other useful work (or sleep).

View File

@@ -11,7 +11,9 @@ These guides will walk you through customization of your system-on-chip:
- Adding custom MMIO widgets to the Chipyard memory system by Tilelink or AXI4, with custom Top-level IOs - Adding custom MMIO widgets to the Chipyard memory system by Tilelink or AXI4, with custom Top-level IOs
- Standard practices for using keys, traits, and configs to parameterize your design - Adding custom Dsptools based blocks as MMIO widgets.
- Standard practices for using Keys, Traits, and Configs to parameterize your design
- Customizing the memory hierarchy - Customizing the memory hierarchy
@@ -36,6 +38,7 @@ We recommend reading all these pages in order. Hit next to get started!
RoCC-or-MMIO RoCC-or-MMIO
RoCC-Accelerators RoCC-Accelerators
MMIO-Peripherals MMIO-Peripherals
Dsptools-Blocks
Keys-Traits-Configs Keys-Traits-Configs
DMA-Devices DMA-Devices
Incorporating-Verilog-Blocks Incorporating-Verilog-Blocks

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 32 KiB

View File

@@ -23,6 +23,8 @@ class DigitalTop(implicit p: Parameters) extends System
with icenet.CanHavePeripheryIceNIC // Enables optionally adding the IceNIC for FireSim with icenet.CanHavePeripheryIceNIC // Enables optionally adding the IceNIC for FireSim
with chipyard.example.CanHavePeripheryInitZero // Enables optionally adding the initzero example widget with chipyard.example.CanHavePeripheryInitZero // Enables optionally adding the initzero example widget
with chipyard.example.CanHavePeripheryGCD // Enables optionally adding the GCD example widget with chipyard.example.CanHavePeripheryGCD // Enables optionally adding the GCD example widget
with chipyard.example.CanHavePeripheryStreamingFIR // Enables optionally adding the DSPTools FIR example widget
with chipyard.example.CanHavePeripheryStreamingPassthrough // Enables optionally adding the DSPTools streaming-passthrough example widget
with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA
{ {
override lazy val module = new DigitalTopModule(this) override lazy val module = new DigitalTopModule(this)

View File

@@ -465,6 +465,46 @@ class RingSystemBusRocketConfig extends Config(
new freechips.rocketchip.system.BaseConfig) new freechips.rocketchip.system.BaseConfig)
// DOC include end: RingSystemBusRocket // DOC include end: RingSystemBusRocket
class StreamingPassthroughRocketConfig extends Config(
new chipyard.example.WithStreamingPassthrough ++ // use top with tilelink-controlled streaming passthrough
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++
new chipyard.iobinders.WithBlackBoxSimMem ++
new chipyard.iobinders.WithTiedOffDebug ++
new chipyard.iobinders.WithSimSerial ++
new testchipip.WithTSI ++
new chipyard.config.WithBootROM ++
new chipyard.config.WithUART ++
new chipyard.config.WithL2TLBs(1024) ++
new freechips.rocketchip.subsystem.WithNoMMIOPort ++
new freechips.rocketchip.subsystem.WithNoSlavePort ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig)
// DOC include start: StreamingFIRRocketConfig
class StreamingFIRRocketConfig extends Config (
new chipyard.example.WithStreamingFIR ++ // use top with tilelink-controlled streaming FIR
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++
new chipyard.iobinders.WithBlackBoxSimMem ++
new chipyard.iobinders.WithTiedOffDebug ++
new chipyard.iobinders.WithSimSerial ++
new testchipip.WithTSI ++
new chipyard.config.WithBootROM ++
new chipyard.config.WithUART ++
new chipyard.config.WithL2TLBs(1024) ++
new freechips.rocketchip.subsystem.WithNoMMIOPort ++
new freechips.rocketchip.subsystem.WithNoSlavePort ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig)
// DOC include end: StreamingFIRRocketConfig
class SmallNVDLARocketConfig extends Config( class SmallNVDLARocketConfig extends Config(
new chipyard.iobinders.WithUARTAdapter ++ new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++ new chipyard.iobinders.WithTieOffInterrupts ++

View File

@@ -0,0 +1,162 @@
package chipyard.example
import chisel3._
import chisel3.util._
import dspblocks._
import dsptools.numbers._
import freechips.rocketchip.amba.axi4stream._
import freechips.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.regmapper._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
/**
* The memory interface writes entries into the queue.
* They stream out the streaming interface
* @param depth number of entries in the queue
* @param streamParameters parameters for the stream node
* @param p
*/
abstract class WriteQueue[D, U, E, O, B <: Data]
(
val depth: Int,
val streamParameters: AXI4StreamMasterParameters = AXI4StreamMasterParameters()
)(implicit p: Parameters) extends DspBlock[D, U, E, O, B] with HasCSR {
// stream node, output only
val streamNode = AXI4StreamMasterNode(streamParameters)
lazy val module = new LazyModuleImp(this) {
require(streamNode.out.length == 1)
// get the output bundle associated with the AXI4Stream node
val out = streamNode.out.head._1
// width (in bits) of the output interface
val width = out.params.n * 8
// instantiate a queue
val queue = Module(new Queue(UInt(out.params.dataBits.W), depth))
// connect queue output to streaming output
out.valid := queue.io.deq.valid
out.bits.data := queue.io.deq.bits
// don't use last
out.bits.last := false.B
queue.io.deq.ready := out.ready
regmap(
// each write adds an entry to the queue
0x0 -> Seq(RegField.w(width, queue.io.enq)),
// read the number of entries in the queue
(width+7)/8 -> Seq(RegField.r(width, queue.io.count)),
)
}
}
/**
* TLDspBlock specialization of WriteQueue
* @param depth number of entries in the queue
* @param csrAddress address range for peripheral
* @param beatBytes beatBytes of TL interface
* @param p
*/
class TLWriteQueue (depth: Int, csrAddress: AddressSet, beatBytes: Int)
(implicit p: Parameters) extends WriteQueue[
TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle
](depth) with TLHasCSR {
val devname = "tlQueueIn"
val devcompat = Seq("ucb-art", "dsptools")
val device = new SimpleDevice(devname, devcompat) {
override def describe(resources: ResourceBindings): Description = {
val Description(name, mapping) = super.describe(resources)
Description(name, mapping)
}
}
// make diplomatic TL node for regmap
override val mem = Some(TLRegisterNode(address = Seq(csrAddress), device = device, beatBytes = beatBytes))
}
object TLWriteQueue {
def apply(
depth: Int = 8,
csrAddress: AddressSet = AddressSet(0x2000, 0xff),
beatBytes: Int = 8,
)(implicit p: Parameters) = {
val writeQueue = LazyModule(new TLWriteQueue(depth = depth, csrAddress = csrAddress, beatBytes = beatBytes))
writeQueue
}
}
/**
* The streaming interface adds elements into the queue.
* The memory interface can read elements out of the queue.
* @param depth number of entries in the queue
* @param streamParameters parameters for the stream node
* @param p
*/
abstract class ReadQueue[D, U, E, O, B <: Data]
(
val depth: Int,
val streamParameters: AXI4StreamSlaveParameters = AXI4StreamSlaveParameters()
)(implicit p: Parameters) extends DspBlock[D, U, E, O, B] with HasCSR {
val streamNode = AXI4StreamSlaveNode(streamParameters)
lazy val module = new LazyModuleImp(this) {
require(streamNode.in.length == 1)
// get the input associated with the stream node
val in = streamNode.in.head._1
// make a Decoupled[UInt] that RegReadFn can do something with
val out = Wire(Decoupled(UInt()))
// get width of streaming input interface
val width = in.params.n * 8
// instantiate a queue
val queue = Module(new Queue(UInt(in.params.dataBits.W), depth))
// connect input to the streaming interface
queue.io.enq.valid := in.valid
queue.io.enq.bits := in.bits.data
in.ready := queue.io.enq.ready
// connect output to wire
out.valid := queue.io.deq.valid
out.bits := queue.io.deq.bits
queue.io.deq.ready := out.ready
regmap(
// map the output of the queue
0x0 -> Seq(RegField.r(width, RegReadFn(out))),
// read the number of elements in the queue
(width+7)/8 -> Seq(RegField.r(width, queue.io.count)),
)
}
}
/**
* TLDspBlock specialization of ReadQueue
* @param depth number of entries in the queue
* @param csrAddress address range
* @param beatBytes beatBytes of TL interface
* @param p
*/
class TLReadQueue( depth: Int, csrAddress: AddressSet, beatBytes: Int)
(implicit p: Parameters) extends ReadQueue[
TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle
](depth) with TLHasCSR {
val devname = "tlQueueOut"
val devcompat = Seq("ucb-art", "dsptools")
val device = new SimpleDevice(devname, devcompat) {
override def describe(resources: ResourceBindings): Description = {
val Description(name, mapping) = super.describe(resources)
Description(name, mapping)
}
}
// make diplomatic TL node for regmap
override val mem = Some(TLRegisterNode(address = Seq(csrAddress), device = device, beatBytes = beatBytes))
}
object TLReadQueue {
def apply(
depth: Int = 8,
csrAddress: AddressSet = AddressSet(0x2100, 0xff),
beatBytes: Int = 8)(implicit p: Parameters) = {
val readQueue = LazyModule(new TLReadQueue(depth = depth, csrAddress = csrAddress, beatBytes = beatBytes))
readQueue
}
}

View File

@@ -0,0 +1,225 @@
//// See LICENSE for license details.
//
package chipyard.example
import chisel3._
import chisel3.experimental.FixedPoint
import chisel3.util._
import dspblocks._
import dsptools.numbers._
import freechips.rocketchip.amba.axi4stream._
import freechips.rocketchip.config.{Parameters, Field, Config}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
// FIR params
case class GenericFIRParams(
writeAddress: BigInt = 0x2000,
readAddress: BigInt = 0x2100,
depth: Int
)
case object GenericFIRKey extends Field[Option[GenericFIRParams]](None)
class GenericFIRCellBundle[T<:Data:Ring](genIn:T, genOut:T) extends Bundle {
val data: T = genIn.cloneType
val carry: T = genOut.cloneType
override def cloneType: this.type = GenericFIRCellBundle(genIn, genOut).asInstanceOf[this.type]
}
object GenericFIRCellBundle {
def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRCellBundle[T] = new GenericFIRCellBundle(genIn, genOut)
}
class GenericFIRCellIO[T<:Data:Ring](genIn:T, genOut:T) extends Bundle {
val coeff = Input(genIn.cloneType)
val in = Flipped(Decoupled(GenericFIRCellBundle(genIn, genOut)))
val out = Decoupled(GenericFIRCellBundle(genIn, genOut))
}
object GenericFIRCellIO {
def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRCellIO[T] = new GenericFIRCellIO(genIn, genOut)
}
class GenericFIRBundle[T<:Data:Ring](proto: T) extends Bundle {
val data: T = proto.cloneType
override def cloneType: this.type = GenericFIRBundle(proto).asInstanceOf[this.type]
}
object GenericFIRBundle {
def apply[T<:Data:Ring](proto: T): GenericFIRBundle[T] = new GenericFIRBundle(proto)
}
class GenericFIRIO[T<:Data:Ring](genIn:T, genOut:T) extends Bundle {
val in = Flipped(Decoupled(GenericFIRBundle(genIn)))
val out = Decoupled(GenericFIRBundle(genOut))
}
object GenericFIRIO {
def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRIO[T] = new GenericFIRIO(genIn, genOut)
}
// A generic FIR filter
// DOC include start: GenericFIR chisel
class GenericFIR[T<:Data:Ring](genIn:T, genOut:T, coeffs: Seq[T]) extends Module {
val io = IO(GenericFIRIO(genIn, genOut))
// Construct a vector of genericFIRDirectCells
val directCells = Seq.fill(coeffs.length){ Module(new GenericFIRDirectCell(genIn, genOut)).io }
// Construct the direct FIR chain
for ((cell, coeff) <- directCells.zip(coeffs)) {
cell.coeff := coeff
}
// Connect input to first cell
directCells.head.in.bits.data := io.in.bits.data
directCells.head.in.bits.carry := Ring[T].zero
directCells.head.in.valid := io.in.valid
io.in.ready := directCells.head.in.ready
// Connect adjacent cells
// Note that .tail() returns a collection that consists of all
// elements in the inital collection minus the first one.
// This means that we zip together directCells[0, n] and
// directCells[1, n]. However, since zip ignores unmatched elements,
// the resulting zip is (directCells[0], directCells[1]) ...
// (directCells[n-1], directCells[n])
for ((current, next) <- directCells.zip(directCells.tail)) {
next.in.bits := current.out.bits
next.in.valid := current.out.valid
current.out.ready := next.in.ready
}
// Connect output to last cell
io.out.bits.data := directCells.last.out.bits.carry
directCells.last.out.ready := io.out.ready
io.out.valid := directCells.last.out.valid
}
// DOC include end: GenericFIR chisel
// A generic FIR direct cell used to construct a larger direct FIR chain
//
// in ----- [z^-1]-- out
// |
// coeff ----[*]
// |
// carryIn --[+]-- carryOut
//
// DOC include start: GenericFIRDirectCell chisel
class GenericFIRDirectCell[T<:Data:Ring](genIn: T, genOut: T) extends Module {
val io = IO(GenericFIRCellIO(genIn, genOut))
// Registers to delay the input and the valid to propagate with calculations
val hasNewData = RegInit(0.U)
val inputReg = Reg(genIn.cloneType)
// Passthrough ready
io.in.ready := io.out.ready
// When a new transaction is ready on the input, we will have new data to output
// next cycle. Take this data in
when (io.in.fire()) {
hasNewData := 1.U
inputReg := io.in.bits.data
}
// We should output data when our cell has new data to output and is ready to
// recieve new data. This insures that every cell in the chain passes its data
// on at the same time
io.out.valid := hasNewData & io.in.fire()
io.out.bits.data := inputReg
// Compute carry
// This uses the ring implementation for + and *, i.e.
// (a * b) maps to (Ring[T].prod(a, b)) for whicever T you use
io.out.bits.carry := inputReg * io.coeff + io.in.bits.carry
}
// DOC include end: GenericFIRDirectCell chisel
// DOC include start: GenericFIRBlock chisel
abstract class GenericFIRBlock[D, U, EO, EI, B<:Data, T<:Data:Ring]
(
genIn: T,
genOut: T,
coeffs: Seq[T]
)(implicit p: Parameters) extends DspBlock[D, U, EO, EI, B] {
val streamNode = AXI4StreamIdentityNode()
val mem = None
lazy val module = new LazyModuleImp(this) {
require(streamNode.in.length == 1)
require(streamNode.out.length == 1)
val in = streamNode.in.head._1
val out = streamNode.out.head._1
// instantiate generic fir
val fir = Module(new GenericFIR(genIn, genOut, coeffs))
// Attach ready and valid to outside interface
in.ready := fir.io.in.ready
fir.io.in.valid := in.valid
fir.io.out.ready := out.ready
out.valid := fir.io.out.valid
// cast UInt to T
fir.io.in.bits := in.bits.data.asTypeOf(GenericFIRBundle(genIn))
// cast T to UInt
out.bits.data := fir.io.out.bits.asUInt
}
}
// DOC include end: GenericFIRBlock chisel
// DOC include start: TLGenericFIRBlock chisel
class TLGenericFIRBlock[T<:Data:Ring]
(
val genIn: T,
val genOut: T,
coeffs: Seq[T]
)(implicit p: Parameters) extends
GenericFIRBlock[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle, T](
genIn, genOut, coeffs
) with TLDspBlock
// DOC include end: TLGenericFIRBlock chisel
// DOC include start: TLGenericFIRChain chisel
class TLGenericFIRChain[T<:Data:Ring] (genIn: T, genOut: T, coeffs: Seq[T], params: GenericFIRParams)(implicit p: Parameters)
extends TLChain(Seq(
TLWriteQueue(params.depth, AddressSet(params.writeAddress, 0xff))(_),
{ implicit p: Parameters =>
val fir = LazyModule(new TLGenericFIRBlock(genIn, genOut, coeffs))
fir
},
TLReadQueue(params.depth, AddressSet(params.readAddress, 0xff))(_)
))
// DOC include end: TLGenericFIRChain chisel
// DOC include start: CanHavePeripheryStreamingFIR chisel
trait CanHavePeripheryStreamingFIR extends BaseSubsystem {
val streamingFIR = p(GenericFIRKey) match {
case Some(params) => {
val streamingFIR = LazyModule(new TLGenericFIRChain(
genIn = FixedPoint(8.W, 3.BP),
genOut = FixedPoint(8.W, 3.BP),
coeffs = Seq(1.F(0.BP), 2.F(0.BP), 3.F(0.BP)),
params = params))
pbus.toVariableWidthSlave(Some("streamingFIR")) { streamingFIR.mem.get := TLFIFOFixer() }
Some(streamingFIR)
}
case None => None
}
}
// DOC include end: CanHavePeripheryStreamingFIR chisel
/**
* Mixin to add FIR to rocket config
*/
// DOC include start: WithStreamingFIR
class WithStreamingFIR extends Config((site, here, up) => {
case GenericFIRKey => Some(GenericFIRParams(depth = 8))
})
// DOC include end: WithStreamingFIR

View File

@@ -0,0 +1,150 @@
//// See LICENSE for license details.
//
package chipyard.example
import chisel3._
import chisel3.{Bundle, Module}
import chisel3.util._
import dspblocks._
import dsptools.numbers._
import freechips.rocketchip.amba.axi4stream._
import freechips.rocketchip.config.{Parameters, Field, Config}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
// Simple passthrough to use as testbed sanity check
// StreamingPassthrough params
case class StreamingPassthroughParams(
writeAddress: BigInt = 0x2000,
readAddress: BigInt = 0x2100,
depth: Int
)
// StreamingPassthrough key
case object StreamingPassthroughKey extends Field[Option[StreamingPassthroughParams]](None)
class StreamingPassthroughBundle[T<:Data:Ring](proto: T) extends Bundle {
val data: T = proto.cloneType
override def cloneType: this.type = StreamingPassthroughBundle(proto).asInstanceOf[this.type]
}
object StreamingPassthroughBundle {
def apply[T<:Data:Ring](proto: T): StreamingPassthroughBundle[T] = new StreamingPassthroughBundle(proto)
}
class StreamingPassthroughIO[T<:Data:Ring](proto: T) extends Bundle {
val in = Flipped(Decoupled(StreamingPassthroughBundle(proto)))
val out = Decoupled(StreamingPassthroughBundle(proto))
}
object StreamingPassthroughIO {
def apply[T<:Data:Ring](proto: T): StreamingPassthroughIO[T] = new StreamingPassthroughIO(proto)
}
class StreamingPassthrough[T<:Data:Ring](proto: T) extends Module {
val io = IO(StreamingPassthroughIO(proto))
io.in.ready := io.out.ready
io.out.bits.data := io.in.bits.data
io.out.valid := io.in.valid
}
/**
* Make DspBlock wrapper for StreamingPassthrough
* @param cordicParams parameters for cordic
* @param ev$1
* @param ev$2
* @param ev$3
* @param p
* @tparam D
* @tparam U
* @tparam EO
* @tparam EI
* @tparam B
* @tparam T Type parameter for passthrough, i.e. FixedPoint or DspReal
*/
abstract class StreamingPassthroughBlock[D, U, EO, EI, B<:Data, T<:Data:Ring]
(
proto: T
)(implicit p: Parameters) extends DspBlock[D, U, EO, EI, B] {
val streamNode = AXI4StreamIdentityNode()
val mem = None
lazy val module = new LazyModuleImp(this) {
require(streamNode.in.length == 1)
require(streamNode.out.length == 1)
val in = streamNode.in.head._1
val out = streamNode.out.head._1
// instantiate passthrough
val passthrough = Module(new StreamingPassthrough(proto))
// Pass ready and valid from read queue to write queue
in.ready := passthrough.io.in.ready
passthrough.io.in.valid := in.valid
// cast UInt to T
passthrough.io.in.bits := in.bits.data.asTypeOf(StreamingPassthroughBundle(proto))
passthrough.io.out.ready := out.ready
out.valid := passthrough.io.out.valid
// cast T to UInt
out.bits.data := passthrough.io.out.bits.asUInt
}
}
/**
* TLDspBlock specialization of StreamingPassthrough
* @param cordicParams parameters for passthrough
* @param ev$1
* @param ev$2
* @param ev$3
* @param p
* @tparam T Type parameter for passthrough data type
*/
class TLStreamingPassthroughBlock[T<:Data:Ring]
(
val proto: T
)(implicit p: Parameters) extends
StreamingPassthroughBlock[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle, T](proto)
with TLDspBlock
/**
* A chain of queues acting as our MMIOs with the passthrough module in between them.
* @param depth depth of queues
* @param ev$1
* @param ev$2
* @param ev$3
* @param p
* @tparam T Type parameter for passthrough, i.e. FixedPoint or DspReal
*/
class TLStreamingPassthroughChain[T<:Data:Ring](params: StreamingPassthroughParams, proto: T)(implicit p: Parameters)
extends TLChain(Seq(
TLWriteQueue(params.depth, AddressSet(params.writeAddress, 0xff))(_),
{ implicit p: Parameters => {
val streamingPassthrough = LazyModule(new TLStreamingPassthroughBlock(proto))
streamingPassthrough
}},
TLReadQueue(params.depth, AddressSet(params.readAddress, 0xff))(_)
))
trait CanHavePeripheryStreamingPassthrough { this: BaseSubsystem =>
val passthrough = p(StreamingPassthroughKey) match {
case Some(params) => {
val streamingPassthroughChain = LazyModule(new TLStreamingPassthroughChain(params, UInt(32.W)))
pbus.toVariableWidthSlave(Some("streamingPassthrough")) { streamingPassthroughChain.mem.get := TLFIFOFixer() }
Some(streamingPassthroughChain)
}
case None => None
}
}
/**
* Mixin to add passthrough to rocket config
*/
class WithStreamingPassthrough extends Config((site, here, up) => {
case StreamingPassthroughKey => Some(StreamingPassthroughParams(depth = 8))
})

View File

@@ -1,17 +1,17 @@
diff --git a/build.sbt b/build.sbt diff --git a/build.sbt b/build.sbt
index 0c4581f..ff0597c 100644 index 5d642c1..56f6fda 100644
--- a/build.sbt --- a/build.sbt
+++ b/build.sbt +++ b/build.sbt
@@ -128,7 +128,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/")) @@ -130,7 +130,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/"))
lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard")) lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard"))
.dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell, .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell,
- sha3, // On separate line to allow for cleaner tutorial-setup patches - sha3, // On separate line to allow for cleaner tutorial-setup patches
+// sha3, // On separate line to allow for cleaner tutorial-setup patches +// sha3, // On separate line to allow for cleaner tutorial-setup patches
dsptools, `rocket-dsptools`,
gemmini, icenet, tracegen, ariane, nvdla) gemmini, icenet, tracegen, ariane, nvdla)
.settings(commonSettings) .settings(commonSettings)
@@ -158,9 +158,9 @@ lazy val ariane = (project in file("generators/ariane"))
@@ -155,9 +155,9 @@ lazy val ariane = (project in file("generators/ariane"))
.dependsOn(rocketchip) .dependsOn(rocketchip)
.settings(commonSettings) .settings(commonSettings)

View File

@@ -5,7 +5,8 @@ LDFLAGS= -static
include libgloss.mk include libgloss.mk
PROGRAMS = pwm blkdev accum charcount nic-loopback big-blkdev pingd nvdla spiflashread spiflashwrite PROGRAMS = pwm blkdev accum charcount nic-loopback big-blkdev pingd \
streaming-passthrough streaming-fir nvdla spiflashread spiflashwrite
spiflash.img: spiflash.py spiflash.img: spiflash.py
python3 $< python3 $<

65
tests/streaming-fir.c Normal file
View File

@@ -0,0 +1,65 @@
#define PASSTHROUGH_WRITE 0x2000
#define PASSTHROUGH_WRITE_COUNT 0x2008
#define PASSTHROUGH_READ 0x2100
#define PASSTHROUGH_READ_COUNT 0x2108
#define BP 3
#define BP_SCALE ((double)(1 << BP))
#include "mmio.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
uint64_t roundi(double x)
{
if (x < 0.0) {
return (uint64_t)(x - 0.5);
} else {
return (uint64_t)(x + 0.5);
}
}
int main(void)
{
double test_vector[15] = {1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5, 0.25, 0.125, 0.125};
uint32_t num_tests = sizeof(test_vector) / sizeof(double);
printf("Starting writing %d inputs\n", num_tests);
for (int i = 0; i < num_tests; i++) {
reg_write64(PASSTHROUGH_WRITE, roundi(test_vector[i] * BP_SCALE));
}
printf("Done writing\n");
uint32_t rcnt = reg_read32(PASSTHROUGH_READ_COUNT);
printf("Write count: %d\n", reg_read32(PASSTHROUGH_WRITE_COUNT));
printf("Read count: %d\n", rcnt);
int failed = 0;
if (rcnt != 0) {
for (int i = 0; i < num_tests - 3; i++) {
uint32_t res = reg_read32(PASSTHROUGH_READ);
// double res = ((double)reg_read32(PASSTHROUGH_READ)) / BP_SCALE;
double expected_double = 3*test_vector[i] + 2*test_vector[i+1] + test_vector[i+2];
uint32_t expected = ((uint32_t)(expected_double * BP_SCALE + 0.5)) & 0xFF;
if (res == expected) {
printf("\n\nPass: Got %u Expected %u\n\n", res, expected);
} else {
failed = 1;
printf("\n\nFail: Got %u Expected %u\n\n", res, expected);
}
}
} else {
failed = 1;
}
if (failed) {
printf("\n\nSome tests failed\n\n");
} else {
printf("\n\nAll tests passed\n\n");
}
return 0;
}

View File

@@ -0,0 +1,49 @@
#define PASSTHROUGH_WRITE 0x2000
#define PASSTHROUGH_WRITE_COUNT 0x2008
#define PASSTHROUGH_READ 0x2100
#define PASSTHROUGH_READ_COUNT 0x2108
#include "mmio.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
int main(void)
{
printf("Starting writing\n");
uint32_t test_vector[7] = {3, 2, 1, 0, -1, -2, -3} ;
for (int i = 0; i < 7; i++) {
reg_write64(PASSTHROUGH_WRITE, test_vector[i]);
}
printf("Done writing\n");
uint32_t rcnt = reg_read32(PASSTHROUGH_READ_COUNT);
printf("Write count: %d\n", reg_read32(PASSTHROUGH_WRITE_COUNT));
printf("Read count: %d\n", rcnt);
int failed = 0;
if (rcnt != 0) {
for (int i = 0; i < 7; i++) {
uint32_t res = reg_read32(PASSTHROUGH_READ);
uint32_t expected = test_vector[i];
if (res == expected) {
printf("\n\nPass: Got %d Expected %d\n\n", res, test_vector[i]);
} else {
failed = 1;
printf("\n\nFail: Got %d Expected %d\n\n", res, test_vector[i]);
}
}
} else {
failed = 1;
}
if (failed) {
printf("\n\nSome tests failed\n\n");
} else {
printf("\n\nAll tests passed\n\n");
}
return 0;
}