diff --git a/.circleci/config.yml b/.circleci/config.yml index 03943fa8..ab7006e1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -204,6 +204,16 @@ jobs: steps: - prepare-rtl: project-key: "chipyard-sha3" + prepare-chipyard-streaming-fir: + executor: main-env + steps: + - prepare-rtl: + project-key: "chipyard-streaming-fir" + prepare-chipyard-streaming-passthrough: + executor: main-env + steps: + - prepare-rtl: + project-key: "chipyard-streaming-passthrough" prepare-chipyard-hetero: executor: main-env steps: @@ -287,6 +297,16 @@ jobs: steps: - run-tests: project-key: "chipyard-sha3" + chipyard-streaming-fir-run-tests: + executor: main-env + steps: + - run-tests: + project-key: "chipyard-streaming-fir" + chipyard-streaming-passthrough-run-tests: + executor: main-env + steps: + - run-tests: + project-key: "chipyard-streaming-passthrough" chipyard-hetero-run-tests: executor: main-env steps: @@ -439,6 +459,16 @@ workflows: - install-riscv-toolchain - install-verilator + - prepare-chipyard-streaming-fir: + requires: + - install-riscv-toolchain + - install-verilator + + - prepare-chipyard-streaming-passthrough: + requires: + - install-riscv-toolchain + - install-verilator + - prepare-chipyard-hetero: requires: - install-riscv-toolchain @@ -525,6 +555,14 @@ workflows: requires: - prepare-chipyard-sha3 + - chipyard-streaming-fir-run-tests: + requires: + - prepare-chipyard-streaming-fir + + - chipyard-streaming-passthrough-run-tests: + requires: + - prepare-chipyard-streaming-passthrough + - chipyard-hetero-run-tests: requires: - prepare-chipyard-hetero diff --git a/.circleci/defaults.sh b/.circleci/defaults.sh index cdb2c37b..b9aeeb3b 100755 --- a/.circleci/defaults.sh +++ b/.circleci/defaults.sh @@ -49,6 +49,8 @@ LOCAL_FIRESIM_DIR=$LOCAL_CHIPYARD_DIR/sims/firesim/sim declare -A mapping mapping["chipyard-rocket"]="SUB_PROJECT=chipyard" mapping["chipyard-sha3"]="SUB_PROJECT=chipyard CONFIG=Sha3RocketConfig" +mapping["chipyard-streaming-fir"]="SUB_PROJECT=chipyard CONFIG=StreamingFIRRocketConfig" +mapping["chipyard-streaming-passthrough"]="SUB_PROJECT=chipyard CONFIG=StreamingPassthroughRocketConfig" mapping["chipyard-hetero"]="SUB_PROJECT=chipyard CONFIG=LargeBoomAndRocketConfig" mapping["chipyard-boom"]="SUB_PROJECT=chipyard CONFIG=SmallBoomConfig" mapping["chipyard-blkdev"]="SUB_PROJECT=chipyard CONFIG=SimBlockDeviceRocketConfig" diff --git a/.circleci/run-tests.sh b/.circleci/run-tests.sh index 387f9b68..4f4779fd 100755 --- a/.circleci/run-tests.sh +++ b/.circleci/run-tests.sh @@ -62,6 +62,14 @@ case $1 in (cd $LOCAL_CHIPYARD_DIR/generators/sha3/software && ./build.sh) $LOCAL_SIM_DIR/simulator-chipyard-Sha3RocketConfig $LOCAL_CHIPYARD_DIR/generators/sha3/software/benchmarks/bare/sha3-rocc.riscv ;; + chipyard-streaming-passthrough) + make -C $LOCAL_CHIPYARD_DIR/tests + $LOCAL_SIM_DIR/simulator-chipyard-StreamingPassthroughRocketConfig $LOCAL_CHIPYARD_DIR/tests/streaming-passthrough.riscv + ;; + chipyard-streaming-fir) + make -C $LOCAL_CHIPYARD_DIR/tests + $LOCAL_SIM_DIR/simulator-chipyard-StreamingFIRRocketConfig $LOCAL_CHIPYARD_DIR/tests/streaming-fir.riscv + ;; chipyard-spiflashread) make -C $LOCAL_CHIPYARD_DIR/tests make -C $LOCAL_SIM_DIR ${mapping[$1]} BINARY=$LOCAL_CHIPYARD_DIR/tests/spiflashread.riscv SIM_FLAGS="+spiflash0=${LOCAL_CHIPYARD_DIR}/tests/spiflash.img" run-binary diff --git a/build.sbt b/build.sbt index 17fdbba5..5d642c1d 100644 --- a/build.sbt +++ b/build.sbt @@ -20,6 +20,8 @@ lazy val commonSettings = Seq( libraryDependencies += "com.github.scopt" %% "scopt" % "3.7.0", libraryDependencies += "org.scala-lang.modules" % "scala-jline" % "2.12.1", libraryDependencies += "com.typesafe.play" %% "play-json" % "2.6.10", + libraryDependencies += "org.typelevel" %% "spire" % "0.16.2", + libraryDependencies += "org.scalanlp" %% "breeze" % "1.0", addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full), unmanagedBase := (chipyardRoot / unmanagedBase).value, allDependencies := allDependencies.value.filterNot(_.organization == "edu.berkeley.cs"), @@ -129,6 +131,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/")) lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard")) .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell, sha3, // On separate line to allow for cleaner tutorial-setup patches + dsptools, `rocket-dsptools`, gemmini, icenet, tracegen, ariane, nvdla) .settings(commonSettings) @@ -180,19 +183,17 @@ lazy val barstoolsMacros = (project in file("./tools/barstools/macros/")) .enablePlugins(sbtassembly.AssemblyPlugin) .settings(commonSettings) -lazy val dsptools = (project in file("./tools/dsptools")) +lazy val dsptools = freshProject("dsptools", file("./tools/dsptools")) .dependsOn(chisel, chisel_testers) .settings( commonSettings, libraryDependencies ++= Seq( - "org.typelevel" %% "spire" % "0.14.1", - "org.scalanlp" %% "breeze" % "0.13.2", - "junit" % "junit" % "4.12" % "test", - "org.scalatest" %% "scalatest" % "3.0.5" % "test", - "org.scalacheck" %% "scalacheck" % "1.14.0" % "test" + "junit" % "junit" % "4.13" % "test", + "org.scalatest" %% "scalatest" % "3.0.8", + "org.scalacheck" %% "scalacheck" % "1.14.3" % "test" )) -lazy val `rocket-dsptools` = (project in file("./tools/dsptools/rocket")) +lazy val `rocket-dsptools` = freshProject("rocket-dsptools", file("./tools/dsptools/rocket")) .dependsOn(rocketchip, dsptools) .settings(commonSettings) diff --git a/docs/Customization/Dsptools-Blocks.rst b/docs/Customization/Dsptools-Blocks.rst new file mode 100644 index 00000000..bdf68fc1 --- /dev/null +++ b/docs/Customization/Dsptools-Blocks.rst @@ -0,0 +1,125 @@ +.. _dsptools-blocks: + +Dsptools is a Chisel library that aids in writing custom signal processing accelerators. It does this by: +* Giving types and helpers that allow you to express mathematical operations more directly. +* Typeclasses that let you write polymorphic generators, for example an FIR filter generator that works for both real- and complex-valued filters. +* Structures for packaging DSP blocks and integrating them into a rocketchip-based SoC. +* Test harnesses for testing DSP circuits, as well as VIP-style drivers and monitors for DSP blocks. + +The `Dsptools repository `_ has more documentation. + + +Dsptools Blocks +=============== +A ``DspBlock`` is the basic unit of signal processing functionality that can be integrated into an SoC. +It has a AXI4-stream interface and an optional memory interface. +The idea is that these ``DspBlocks`` can be easily designed, unit tested, and assembled lego-style to build complex functionality. +A ``DspChain`` is one example of how to assemble ``DspBlocks``, in which case the streaming interfaces are connected serially into a pipeline, and a bus is instatiated and connected to every block with a memory interface. + +Chipyard has example designs that integrate a ``DspBlock`` to a rocketchip-based SoC as an MMIO peripheral. The custom ``DspBlock`` has a ``ReadQueue`` before it and a ``WriteQueue`` after it, which allow memory mapped access to the streaming interfaces so the rocket core can interact with the ``DspBlock`` [#]_. This section will primarily focus on designing Tilelink-based peripherals. However, through the resources provided in Dsptools, one could also define an AXI4-based peripheral by following similar steps. Furthermore, the examples here are simple, but can be extended to implement more complex accelerators, for example an `OFDM baseband `_ or a `spectrometer `_. + +.. figure:: ../_static/images/fir-block-diagram.svg + :align: center + :alt: Block diagram showing how FIR is integrated with rocket. + :width: 400px + +For this example, we will show you how to connect a simple FIR filter created using Dsptools as an MMIO peripheral as shown in the figure above. The full code can be found in ``generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala``. That being said, one could substitute any module with a ready valid interface in the place of the FIR and achieve the same results. As long as the read and valid signals of the module are attached to those of a corresponding ``DSPBlock`` wrapper, and that wrapper is placed in a chain with a ``ReadQueue`` and a ``WriteQueue``, following the general outline establised by these steps will allow you to interact with that block as a memory mapped IO. + +The module ``GenericFIR`` is the overall wrapper of our FIR module. This module links together a variable number of ``GenericFIRDirectCell`` submodules, each of which performs the computations for one coefficient in a FIR direct form architecture. It is important to note that both modules are type-generic, which means that they can be instantiated for any datatype ``T`` that implements ``Ring`` operations (e.g. addition, multiplication, identities). + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: GenericFIR chisel + :end-before: DOC include end: GenericFIR chisel + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: GenericFIRDirectCell chisel + :end-before: DOC include end: GenericFIRDirectCell chisel + +Creating a DspBlock +------------------- + +The first step in attaching the FIR filter as a MMIO peripheral is to create an abstract subclass of ``DspBlock`` the wraps around the ``GenericFIR`` module. Streaming outputs and inputs are packed and unpacked into ``UInt`` s. If there were control signals, this is where they'd go from raw IOs to memory mapped. The main steps of this process are as follows. + +1. Instantiate a ``GenericFIR`` within ``GenericFIRBlock``. +2. Attach the ready and valid signals from the in and out connections. +3. Cast the module input data to the input type of ``GenericFIR`` (``GenericFIRBundle``) and attach. +4. Cast the output of ``GenericFIR`` to ``UInt`` and attach to the module output. + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: GenericFIRBlock chisel + :end-before: DOC include end: GenericFIRBlock chisel + +Note that at this point the ``GenericFIRBlock`` does not have a type of memory interface specified. This abstract class can be used to create different flavors that use AXI-4, TileLink, AHB, or whatever other memory interface you like like. + +Connecting DspBlock by TileLink +------------------------------- +With these classes implemented, you can begin to construct the chain by extending ``GenericFIRBlock`` while using the ``TLDspBlock`` trait via mixin. + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: TLGenericFIRBlock chisel + :end-before: DOC include end: TLGenericFIRBlock chisel + +We can then construct the final chain by utilizing the ``TLWriteQueue`` and ``TLReadeQueue`` modules found in ``generators/chipyard/src/main/scala/example/dsptools/DspBlocks.scala``. The chain is created by passing a list of factory functions to the constructor of ``TLChain``. The constructor then automatically instantiates these ``DspBlocks``, connects their stream nodes in order, creates a bus, and connects any ``DspBlocks`` that have memory interfaces to the bus. + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: TLGenericFIRChain chisel + :end-before: DOC include end: TLGenericFIRChain chisel + +Top Level Traits +---------------- +As in the previous MMIO example, we use a cake pattern to hook up our module to our SoC. + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: CanHavePeripheryStreamingFIR chisel + :end-before: DOC include end: CanHavePeripheryStreamingFIR chisel + +Note that this is the point at which we decide the datatype for our FIR. You could create different configs that use different types for the FIR, for example a config that instantiates a complex-valued FIR filter. + +Constructing the Top and Config +------------------------------- + +Once again following the path of the previous MMIO example, we now want to mix our traits into the system as a whole. The code is from ``generators/chipyard/src/main/scala/DigitalTop.scala`` + +.. literalinclude:: ../../generators/chipyard/src/main/scala/DigitalTop.scala + :language: scala + :start-after: DOC include start: DigitalTop + :end-before: DOC include end: DigitalTop + +Finally, we create the configuration class in ``generators/chipyard/src/main/scala/config/RocketConfigs.scala`` that uses the ``WithFIR`` mixin defined in ``generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala``. + +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala + :language: scala + :start-after: DOC include start: WithStreamingFIR + :end-before: DOC include end: WithStreamingFIR + +.. literalinclude:: ../../generators/chipyard/src/main/scala/config/RocketConfigs.scala + :language: scala + :start-after: DOC include start: StreamingFIRRocketConfig + :end-before: DOC include end: StreamingFIRRocketConfig + +FIR Testing +----------- + +We can now test that the FIR is working. The test program is found in ``tests/streaming-fir.c``. + +.. literalinclude:: ../../tests/streaming-fir.c + :language: c + +The test feed a series of values into the fir and compares the output to a golden model of computation. The base of the module's MMIO write region is at 0x2000 and the base of the read region is at 0x2100 by default. + +Compiling this program with ``make`` produces a ``streaming-fir.riscv`` executable. + +Now we can run our simulation. + +.. code-block:: shell + + cd sims/verilator + make CONFIG=StreamingFIRRocketConfig BINARY=../../tests/streaming-fir.riscv run-binary + +.. [#] ``ReadQueue`` and ``WriteQueue`` are good illustrations of how to write a ``DspBlock`` and how they can be integrated into rocket, but in a real design a DMA engine would be preferred. ``ReadQueue`` will stall the processor if you try to read an empty queue, and ``WriteQueue`` will stall if you try to write to a full queue, which a DMA engine can more elegantly avoid. Furthermore, a DMA engine can do the work of moving data, freeing the processor to do other useful work (or sleep). diff --git a/docs/Customization/index.rst b/docs/Customization/index.rst index 90d36fda..9421b79a 100644 --- a/docs/Customization/index.rst +++ b/docs/Customization/index.rst @@ -11,7 +11,9 @@ These guides will walk you through customization of your system-on-chip: - Adding custom MMIO widgets to the Chipyard memory system by Tilelink or AXI4, with custom Top-level IOs -- Standard practices for using keys, traits, and configs to parameterize your design +- Adding custom Dsptools based blocks as MMIO widgets. + +- Standard practices for using Keys, Traits, and Configs to parameterize your design - Customizing the memory hierarchy @@ -36,6 +38,7 @@ We recommend reading all these pages in order. Hit next to get started! RoCC-or-MMIO RoCC-Accelerators MMIO-Peripherals + Dsptools-Blocks Keys-Traits-Configs DMA-Devices Incorporating-Verilog-Blocks diff --git a/docs/_static/images/fir-block-diagram.svg b/docs/_static/images/fir-block-diagram.svg new file mode 100644 index 00000000..c56379e5 --- /dev/null +++ b/docs/_static/images/fir-block-diagram.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/generators/chipyard/src/main/scala/DigitalTop.scala b/generators/chipyard/src/main/scala/DigitalTop.scala index a61594f2..ae363539 100644 --- a/generators/chipyard/src/main/scala/DigitalTop.scala +++ b/generators/chipyard/src/main/scala/DigitalTop.scala @@ -23,6 +23,8 @@ class DigitalTop(implicit p: Parameters) extends System with icenet.CanHavePeripheryIceNIC // Enables optionally adding the IceNIC for FireSim with chipyard.example.CanHavePeripheryInitZero // Enables optionally adding the initzero example widget with chipyard.example.CanHavePeripheryGCD // Enables optionally adding the GCD example widget + with chipyard.example.CanHavePeripheryStreamingFIR // Enables optionally adding the DSPTools FIR example widget + with chipyard.example.CanHavePeripheryStreamingPassthrough // Enables optionally adding the DSPTools streaming-passthrough example widget with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA { override lazy val module = new DigitalTopModule(this) diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index 415c89ee..06257c7b 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -465,6 +465,46 @@ class RingSystemBusRocketConfig extends Config( new freechips.rocketchip.system.BaseConfig) // DOC include end: RingSystemBusRocket +class StreamingPassthroughRocketConfig extends Config( + new chipyard.example.WithStreamingPassthrough ++ // use top with tilelink-controlled streaming passthrough + new chipyard.iobinders.WithUARTAdapter ++ + new chipyard.iobinders.WithTieOffInterrupts ++ + new chipyard.iobinders.WithBlackBoxSimMem ++ + new chipyard.iobinders.WithTiedOffDebug ++ + new chipyard.iobinders.WithSimSerial ++ + new testchipip.WithTSI ++ + new chipyard.config.WithBootROM ++ + new chipyard.config.WithUART ++ + new chipyard.config.WithL2TLBs(1024) ++ + new freechips.rocketchip.subsystem.WithNoMMIOPort ++ + new freechips.rocketchip.subsystem.WithNoSlavePort ++ + new freechips.rocketchip.subsystem.WithInclusiveCache ++ + new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++ + new freechips.rocketchip.subsystem.WithNBigCores(1) ++ + new freechips.rocketchip.subsystem.WithCoherentBusTopology ++ + new freechips.rocketchip.system.BaseConfig) + +// DOC include start: StreamingFIRRocketConfig +class StreamingFIRRocketConfig extends Config ( + new chipyard.example.WithStreamingFIR ++ // use top with tilelink-controlled streaming FIR + new chipyard.iobinders.WithUARTAdapter ++ + new chipyard.iobinders.WithTieOffInterrupts ++ + new chipyard.iobinders.WithBlackBoxSimMem ++ + new chipyard.iobinders.WithTiedOffDebug ++ + new chipyard.iobinders.WithSimSerial ++ + new testchipip.WithTSI ++ + new chipyard.config.WithBootROM ++ + new chipyard.config.WithUART ++ + new chipyard.config.WithL2TLBs(1024) ++ + new freechips.rocketchip.subsystem.WithNoMMIOPort ++ + new freechips.rocketchip.subsystem.WithNoSlavePort ++ + new freechips.rocketchip.subsystem.WithInclusiveCache ++ + new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++ + new freechips.rocketchip.subsystem.WithNBigCores(1) ++ + new freechips.rocketchip.subsystem.WithCoherentBusTopology ++ + new freechips.rocketchip.system.BaseConfig) +// DOC include end: StreamingFIRRocketConfig + class SmallNVDLARocketConfig extends Config( new chipyard.iobinders.WithUARTAdapter ++ new chipyard.iobinders.WithTieOffInterrupts ++ diff --git a/generators/chipyard/src/main/scala/example/dsptools/DspBlocks.scala b/generators/chipyard/src/main/scala/example/dsptools/DspBlocks.scala new file mode 100644 index 00000000..b51e2223 --- /dev/null +++ b/generators/chipyard/src/main/scala/example/dsptools/DspBlocks.scala @@ -0,0 +1,162 @@ +package chipyard.example + +import chisel3._ +import chisel3.util._ +import dspblocks._ +import dsptools.numbers._ +import freechips.rocketchip.amba.axi4stream._ +import freechips.rocketchip.config.Parameters +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.regmapper._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.subsystem._ + +/** + * The memory interface writes entries into the queue. + * They stream out the streaming interface + * @param depth number of entries in the queue + * @param streamParameters parameters for the stream node + * @param p + */ +abstract class WriteQueue[D, U, E, O, B <: Data] +( + val depth: Int, + val streamParameters: AXI4StreamMasterParameters = AXI4StreamMasterParameters() +)(implicit p: Parameters) extends DspBlock[D, U, E, O, B] with HasCSR { + // stream node, output only + val streamNode = AXI4StreamMasterNode(streamParameters) + + lazy val module = new LazyModuleImp(this) { + require(streamNode.out.length == 1) + + // get the output bundle associated with the AXI4Stream node + val out = streamNode.out.head._1 + // width (in bits) of the output interface + val width = out.params.n * 8 + // instantiate a queue + val queue = Module(new Queue(UInt(out.params.dataBits.W), depth)) + // connect queue output to streaming output + out.valid := queue.io.deq.valid + out.bits.data := queue.io.deq.bits + // don't use last + out.bits.last := false.B + queue.io.deq.ready := out.ready + + regmap( + // each write adds an entry to the queue + 0x0 -> Seq(RegField.w(width, queue.io.enq)), + // read the number of entries in the queue + (width+7)/8 -> Seq(RegField.r(width, queue.io.count)), + ) + } +} + +/** + * TLDspBlock specialization of WriteQueue + * @param depth number of entries in the queue + * @param csrAddress address range for peripheral + * @param beatBytes beatBytes of TL interface + * @param p + */ +class TLWriteQueue (depth: Int, csrAddress: AddressSet, beatBytes: Int) +(implicit p: Parameters) extends WriteQueue[ + TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle +](depth) with TLHasCSR { + val devname = "tlQueueIn" + val devcompat = Seq("ucb-art", "dsptools") + val device = new SimpleDevice(devname, devcompat) { + override def describe(resources: ResourceBindings): Description = { + val Description(name, mapping) = super.describe(resources) + Description(name, mapping) + } + } + // make diplomatic TL node for regmap + override val mem = Some(TLRegisterNode(address = Seq(csrAddress), device = device, beatBytes = beatBytes)) +} + +object TLWriteQueue { + def apply( + depth: Int = 8, + csrAddress: AddressSet = AddressSet(0x2000, 0xff), + beatBytes: Int = 8, + )(implicit p: Parameters) = { + val writeQueue = LazyModule(new TLWriteQueue(depth = depth, csrAddress = csrAddress, beatBytes = beatBytes)) + writeQueue + } +} + +/** + * The streaming interface adds elements into the queue. + * The memory interface can read elements out of the queue. + * @param depth number of entries in the queue + * @param streamParameters parameters for the stream node + * @param p + */ +abstract class ReadQueue[D, U, E, O, B <: Data] +( + val depth: Int, + val streamParameters: AXI4StreamSlaveParameters = AXI4StreamSlaveParameters() +)(implicit p: Parameters) extends DspBlock[D, U, E, O, B] with HasCSR { + val streamNode = AXI4StreamSlaveNode(streamParameters) + + lazy val module = new LazyModuleImp(this) { + require(streamNode.in.length == 1) + + // get the input associated with the stream node + val in = streamNode.in.head._1 + // make a Decoupled[UInt] that RegReadFn can do something with + val out = Wire(Decoupled(UInt())) + // get width of streaming input interface + val width = in.params.n * 8 + // instantiate a queue + val queue = Module(new Queue(UInt(in.params.dataBits.W), depth)) + // connect input to the streaming interface + queue.io.enq.valid := in.valid + queue.io.enq.bits := in.bits.data + in.ready := queue.io.enq.ready + // connect output to wire + out.valid := queue.io.deq.valid + out.bits := queue.io.deq.bits + queue.io.deq.ready := out.ready + + regmap( + // map the output of the queue + 0x0 -> Seq(RegField.r(width, RegReadFn(out))), + // read the number of elements in the queue + (width+7)/8 -> Seq(RegField.r(width, queue.io.count)), + ) + } +} + +/** + * TLDspBlock specialization of ReadQueue + * @param depth number of entries in the queue + * @param csrAddress address range + * @param beatBytes beatBytes of TL interface + * @param p + */ +class TLReadQueue( depth: Int, csrAddress: AddressSet, beatBytes: Int) +(implicit p: Parameters) extends ReadQueue[ + TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle +](depth) with TLHasCSR { + val devname = "tlQueueOut" + val devcompat = Seq("ucb-art", "dsptools") + val device = new SimpleDevice(devname, devcompat) { + override def describe(resources: ResourceBindings): Description = { + val Description(name, mapping) = super.describe(resources) + Description(name, mapping) + } + } + // make diplomatic TL node for regmap + override val mem = Some(TLRegisterNode(address = Seq(csrAddress), device = device, beatBytes = beatBytes)) +} + +object TLReadQueue { + def apply( + depth: Int = 8, + csrAddress: AddressSet = AddressSet(0x2100, 0xff), + beatBytes: Int = 8)(implicit p: Parameters) = { + val readQueue = LazyModule(new TLReadQueue(depth = depth, csrAddress = csrAddress, beatBytes = beatBytes)) + readQueue + } +} diff --git a/generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala b/generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala new file mode 100644 index 00000000..ed16b25d --- /dev/null +++ b/generators/chipyard/src/main/scala/example/dsptools/GenericFIR.scala @@ -0,0 +1,225 @@ +//// See LICENSE for license details. +// +package chipyard.example + +import chisel3._ +import chisel3.experimental.FixedPoint +import chisel3.util._ +import dspblocks._ +import dsptools.numbers._ +import freechips.rocketchip.amba.axi4stream._ +import freechips.rocketchip.config.{Parameters, Field, Config} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.subsystem._ + +// FIR params +case class GenericFIRParams( + writeAddress: BigInt = 0x2000, + readAddress: BigInt = 0x2100, + depth: Int +) + +case object GenericFIRKey extends Field[Option[GenericFIRParams]](None) + +class GenericFIRCellBundle[T<:Data:Ring](genIn:T, genOut:T) extends Bundle { + val data: T = genIn.cloneType + val carry: T = genOut.cloneType + + override def cloneType: this.type = GenericFIRCellBundle(genIn, genOut).asInstanceOf[this.type] +} +object GenericFIRCellBundle { + def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRCellBundle[T] = new GenericFIRCellBundle(genIn, genOut) +} + +class GenericFIRCellIO[T<:Data:Ring](genIn:T, genOut:T) extends Bundle { + val coeff = Input(genIn.cloneType) + val in = Flipped(Decoupled(GenericFIRCellBundle(genIn, genOut))) + val out = Decoupled(GenericFIRCellBundle(genIn, genOut)) +} +object GenericFIRCellIO { + def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRCellIO[T] = new GenericFIRCellIO(genIn, genOut) +} + +class GenericFIRBundle[T<:Data:Ring](proto: T) extends Bundle { + val data: T = proto.cloneType + + override def cloneType: this.type = GenericFIRBundle(proto).asInstanceOf[this.type] +} +object GenericFIRBundle { + def apply[T<:Data:Ring](proto: T): GenericFIRBundle[T] = new GenericFIRBundle(proto) +} + +class GenericFIRIO[T<:Data:Ring](genIn:T, genOut:T) extends Bundle { + val in = Flipped(Decoupled(GenericFIRBundle(genIn))) + val out = Decoupled(GenericFIRBundle(genOut)) +} +object GenericFIRIO { + def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRIO[T] = new GenericFIRIO(genIn, genOut) +} + +// A generic FIR filter +// DOC include start: GenericFIR chisel +class GenericFIR[T<:Data:Ring](genIn:T, genOut:T, coeffs: Seq[T]) extends Module { + val io = IO(GenericFIRIO(genIn, genOut)) + + // Construct a vector of genericFIRDirectCells + val directCells = Seq.fill(coeffs.length){ Module(new GenericFIRDirectCell(genIn, genOut)).io } + + // Construct the direct FIR chain + for ((cell, coeff) <- directCells.zip(coeffs)) { + cell.coeff := coeff + } + + // Connect input to first cell + directCells.head.in.bits.data := io.in.bits.data + directCells.head.in.bits.carry := Ring[T].zero + directCells.head.in.valid := io.in.valid + io.in.ready := directCells.head.in.ready + + // Connect adjacent cells + // Note that .tail() returns a collection that consists of all + // elements in the inital collection minus the first one. + // This means that we zip together directCells[0, n] and + // directCells[1, n]. However, since zip ignores unmatched elements, + // the resulting zip is (directCells[0], directCells[1]) ... + // (directCells[n-1], directCells[n]) + for ((current, next) <- directCells.zip(directCells.tail)) { + next.in.bits := current.out.bits + next.in.valid := current.out.valid + current.out.ready := next.in.ready + } + + // Connect output to last cell + io.out.bits.data := directCells.last.out.bits.carry + directCells.last.out.ready := io.out.ready + io.out.valid := directCells.last.out.valid + +} +// DOC include end: GenericFIR chisel + +// A generic FIR direct cell used to construct a larger direct FIR chain +// +// in ----- [z^-1]-- out +// | +// coeff ----[*] +// | +// carryIn --[+]-- carryOut +// +// DOC include start: GenericFIRDirectCell chisel +class GenericFIRDirectCell[T<:Data:Ring](genIn: T, genOut: T) extends Module { + val io = IO(GenericFIRCellIO(genIn, genOut)) + + // Registers to delay the input and the valid to propagate with calculations + val hasNewData = RegInit(0.U) + val inputReg = Reg(genIn.cloneType) + + // Passthrough ready + io.in.ready := io.out.ready + + // When a new transaction is ready on the input, we will have new data to output + // next cycle. Take this data in + when (io.in.fire()) { + hasNewData := 1.U + inputReg := io.in.bits.data + } + + // We should output data when our cell has new data to output and is ready to + // recieve new data. This insures that every cell in the chain passes its data + // on at the same time + io.out.valid := hasNewData & io.in.fire() + io.out.bits.data := inputReg + + // Compute carry + // This uses the ring implementation for + and *, i.e. + // (a * b) maps to (Ring[T].prod(a, b)) for whicever T you use + io.out.bits.carry := inputReg * io.coeff + io.in.bits.carry +} +// DOC include end: GenericFIRDirectCell chisel + + +// DOC include start: GenericFIRBlock chisel +abstract class GenericFIRBlock[D, U, EO, EI, B<:Data, T<:Data:Ring] +( + genIn: T, + genOut: T, + coeffs: Seq[T] +)(implicit p: Parameters) extends DspBlock[D, U, EO, EI, B] { + val streamNode = AXI4StreamIdentityNode() + val mem = None + + lazy val module = new LazyModuleImp(this) { + require(streamNode.in.length == 1) + require(streamNode.out.length == 1) + + val in = streamNode.in.head._1 + val out = streamNode.out.head._1 + + // instantiate generic fir + val fir = Module(new GenericFIR(genIn, genOut, coeffs)) + + // Attach ready and valid to outside interface + in.ready := fir.io.in.ready + fir.io.in.valid := in.valid + + fir.io.out.ready := out.ready + out.valid := fir.io.out.valid + + // cast UInt to T + fir.io.in.bits := in.bits.data.asTypeOf(GenericFIRBundle(genIn)) + + // cast T to UInt + out.bits.data := fir.io.out.bits.asUInt + } +} +// DOC include end: GenericFIRBlock chisel + +// DOC include start: TLGenericFIRBlock chisel +class TLGenericFIRBlock[T<:Data:Ring] +( + val genIn: T, + val genOut: T, + coeffs: Seq[T] +)(implicit p: Parameters) extends +GenericFIRBlock[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle, T]( + genIn, genOut, coeffs +) with TLDspBlock +// DOC include end: TLGenericFIRBlock chisel + +// DOC include start: TLGenericFIRChain chisel +class TLGenericFIRChain[T<:Data:Ring] (genIn: T, genOut: T, coeffs: Seq[T], params: GenericFIRParams)(implicit p: Parameters) + extends TLChain(Seq( + TLWriteQueue(params.depth, AddressSet(params.writeAddress, 0xff))(_), + { implicit p: Parameters => + val fir = LazyModule(new TLGenericFIRBlock(genIn, genOut, coeffs)) + fir + }, + TLReadQueue(params.depth, AddressSet(params.readAddress, 0xff))(_) + )) +// DOC include end: TLGenericFIRChain chisel + +// DOC include start: CanHavePeripheryStreamingFIR chisel +trait CanHavePeripheryStreamingFIR extends BaseSubsystem { + val streamingFIR = p(GenericFIRKey) match { + case Some(params) => { + val streamingFIR = LazyModule(new TLGenericFIRChain( + genIn = FixedPoint(8.W, 3.BP), + genOut = FixedPoint(8.W, 3.BP), + coeffs = Seq(1.F(0.BP), 2.F(0.BP), 3.F(0.BP)), + params = params)) + pbus.toVariableWidthSlave(Some("streamingFIR")) { streamingFIR.mem.get := TLFIFOFixer() } + Some(streamingFIR) + } + case None => None + } +} +// DOC include end: CanHavePeripheryStreamingFIR chisel + +/** + * Mixin to add FIR to rocket config + */ +// DOC include start: WithStreamingFIR +class WithStreamingFIR extends Config((site, here, up) => { + case GenericFIRKey => Some(GenericFIRParams(depth = 8)) +}) +// DOC include end: WithStreamingFIR diff --git a/generators/chipyard/src/main/scala/example/dsptools/StreamingPassthrough.scala b/generators/chipyard/src/main/scala/example/dsptools/StreamingPassthrough.scala new file mode 100644 index 00000000..923f12e2 --- /dev/null +++ b/generators/chipyard/src/main/scala/example/dsptools/StreamingPassthrough.scala @@ -0,0 +1,150 @@ +//// See LICENSE for license details. +// +package chipyard.example + +import chisel3._ +import chisel3.{Bundle, Module} +import chisel3.util._ +import dspblocks._ +import dsptools.numbers._ +import freechips.rocketchip.amba.axi4stream._ +import freechips.rocketchip.config.{Parameters, Field, Config} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.subsystem._ + +// Simple passthrough to use as testbed sanity check +// StreamingPassthrough params +case class StreamingPassthroughParams( + writeAddress: BigInt = 0x2000, + readAddress: BigInt = 0x2100, + depth: Int +) + +// StreamingPassthrough key +case object StreamingPassthroughKey extends Field[Option[StreamingPassthroughParams]](None) + +class StreamingPassthroughBundle[T<:Data:Ring](proto: T) extends Bundle { + val data: T = proto.cloneType + + override def cloneType: this.type = StreamingPassthroughBundle(proto).asInstanceOf[this.type] +} +object StreamingPassthroughBundle { + def apply[T<:Data:Ring](proto: T): StreamingPassthroughBundle[T] = new StreamingPassthroughBundle(proto) +} + +class StreamingPassthroughIO[T<:Data:Ring](proto: T) extends Bundle { + val in = Flipped(Decoupled(StreamingPassthroughBundle(proto))) + val out = Decoupled(StreamingPassthroughBundle(proto)) +} +object StreamingPassthroughIO { + def apply[T<:Data:Ring](proto: T): StreamingPassthroughIO[T] = new StreamingPassthroughIO(proto) +} + +class StreamingPassthrough[T<:Data:Ring](proto: T) extends Module { + val io = IO(StreamingPassthroughIO(proto)) + + io.in.ready := io.out.ready + io.out.bits.data := io.in.bits.data + io.out.valid := io.in.valid +} + +/** + * Make DspBlock wrapper for StreamingPassthrough + * @param cordicParams parameters for cordic + * @param ev$1 + * @param ev$2 + * @param ev$3 + * @param p + * @tparam D + * @tparam U + * @tparam EO + * @tparam EI + * @tparam B + * @tparam T Type parameter for passthrough, i.e. FixedPoint or DspReal + */ +abstract class StreamingPassthroughBlock[D, U, EO, EI, B<:Data, T<:Data:Ring] +( + proto: T +)(implicit p: Parameters) extends DspBlock[D, U, EO, EI, B] { + val streamNode = AXI4StreamIdentityNode() + val mem = None + + lazy val module = new LazyModuleImp(this) { + require(streamNode.in.length == 1) + require(streamNode.out.length == 1) + + val in = streamNode.in.head._1 + val out = streamNode.out.head._1 + + // instantiate passthrough + val passthrough = Module(new StreamingPassthrough(proto)) + + // Pass ready and valid from read queue to write queue + in.ready := passthrough.io.in.ready + passthrough.io.in.valid := in.valid + + // cast UInt to T + passthrough.io.in.bits := in.bits.data.asTypeOf(StreamingPassthroughBundle(proto)) + + passthrough.io.out.ready := out.ready + out.valid := passthrough.io.out.valid + + // cast T to UInt + out.bits.data := passthrough.io.out.bits.asUInt + } +} + +/** + * TLDspBlock specialization of StreamingPassthrough + * @param cordicParams parameters for passthrough + * @param ev$1 + * @param ev$2 + * @param ev$3 + * @param p + * @tparam T Type parameter for passthrough data type + */ +class TLStreamingPassthroughBlock[T<:Data:Ring] +( + val proto: T +)(implicit p: Parameters) extends +StreamingPassthroughBlock[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle, T](proto) +with TLDspBlock + +/** + * A chain of queues acting as our MMIOs with the passthrough module in between them. + * @param depth depth of queues + * @param ev$1 + * @param ev$2 + * @param ev$3 + * @param p + * @tparam T Type parameter for passthrough, i.e. FixedPoint or DspReal + */ +class TLStreamingPassthroughChain[T<:Data:Ring](params: StreamingPassthroughParams, proto: T)(implicit p: Parameters) + extends TLChain(Seq( + TLWriteQueue(params.depth, AddressSet(params.writeAddress, 0xff))(_), + { implicit p: Parameters => { + val streamingPassthrough = LazyModule(new TLStreamingPassthroughBlock(proto)) + streamingPassthrough + }}, + TLReadQueue(params.depth, AddressSet(params.readAddress, 0xff))(_) + )) + +trait CanHavePeripheryStreamingPassthrough { this: BaseSubsystem => + val passthrough = p(StreamingPassthroughKey) match { + case Some(params) => { + val streamingPassthroughChain = LazyModule(new TLStreamingPassthroughChain(params, UInt(32.W))) + pbus.toVariableWidthSlave(Some("streamingPassthrough")) { streamingPassthroughChain.mem.get := TLFIFOFixer() } + Some(streamingPassthroughChain) + } + case None => None + } +} + +/** + * Mixin to add passthrough to rocket config + */ +class WithStreamingPassthrough extends Config((site, here, up) => { + case StreamingPassthroughKey => Some(StreamingPassthroughParams(depth = 8)) +}) + diff --git a/scripts/tutorial-patches/build.sbt.patch b/scripts/tutorial-patches/build.sbt.patch index 40e773fe..6c1e3007 100644 --- a/scripts/tutorial-patches/build.sbt.patch +++ b/scripts/tutorial-patches/build.sbt.patch @@ -1,17 +1,17 @@ diff --git a/build.sbt b/build.sbt -index 0c4581f..ff0597c 100644 +index 5d642c1..56f6fda 100644 --- a/build.sbt +++ b/build.sbt -@@ -128,7 +128,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/")) +@@ -130,7 +130,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/")) lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard")) .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell, - sha3, // On separate line to allow for cleaner tutorial-setup patches +// sha3, // On separate line to allow for cleaner tutorial-setup patches + dsptools, `rocket-dsptools`, gemmini, icenet, tracegen, ariane, nvdla) .settings(commonSettings) - -@@ -155,9 +155,9 @@ lazy val ariane = (project in file("generators/ariane")) +@@ -158,9 +158,9 @@ lazy val ariane = (project in file("generators/ariane")) .dependsOn(rocketchip) .settings(commonSettings) diff --git a/tests/Makefile b/tests/Makefile index af1bbd9c..ca8268ea 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -5,7 +5,8 @@ LDFLAGS= -static include libgloss.mk -PROGRAMS = pwm blkdev accum charcount nic-loopback big-blkdev pingd nvdla spiflashread spiflashwrite +PROGRAMS = pwm blkdev accum charcount nic-loopback big-blkdev pingd \ + streaming-passthrough streaming-fir nvdla spiflashread spiflashwrite spiflash.img: spiflash.py python3 $< diff --git a/tests/streaming-fir.c b/tests/streaming-fir.c new file mode 100644 index 00000000..be61534e --- /dev/null +++ b/tests/streaming-fir.c @@ -0,0 +1,65 @@ +#define PASSTHROUGH_WRITE 0x2000 +#define PASSTHROUGH_WRITE_COUNT 0x2008 +#define PASSTHROUGH_READ 0x2100 +#define PASSTHROUGH_READ_COUNT 0x2108 + +#define BP 3 +#define BP_SCALE ((double)(1 << BP)) + +#include "mmio.h" + +#include +#include +#include +#include + +uint64_t roundi(double x) +{ + if (x < 0.0) { + return (uint64_t)(x - 0.5); + } else { + return (uint64_t)(x + 0.5); + } +} + +int main(void) +{ + double test_vector[15] = {1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5, 0.25, 0.125, 0.125}; + uint32_t num_tests = sizeof(test_vector) / sizeof(double); + printf("Starting writing %d inputs\n", num_tests); + + for (int i = 0; i < num_tests; i++) { + reg_write64(PASSTHROUGH_WRITE, roundi(test_vector[i] * BP_SCALE)); + } + + printf("Done writing\n"); + uint32_t rcnt = reg_read32(PASSTHROUGH_READ_COUNT); + printf("Write count: %d\n", reg_read32(PASSTHROUGH_WRITE_COUNT)); + printf("Read count: %d\n", rcnt); + + int failed = 0; + if (rcnt != 0) { + for (int i = 0; i < num_tests - 3; i++) { + uint32_t res = reg_read32(PASSTHROUGH_READ); + // double res = ((double)reg_read32(PASSTHROUGH_READ)) / BP_SCALE; + double expected_double = 3*test_vector[i] + 2*test_vector[i+1] + test_vector[i+2]; + uint32_t expected = ((uint32_t)(expected_double * BP_SCALE + 0.5)) & 0xFF; + if (res == expected) { + printf("\n\nPass: Got %u Expected %u\n\n", res, expected); + } else { + failed = 1; + printf("\n\nFail: Got %u Expected %u\n\n", res, expected); + } + } + } else { + failed = 1; + } + + if (failed) { + printf("\n\nSome tests failed\n\n"); + } else { + printf("\n\nAll tests passed\n\n"); + } + + return 0; +} diff --git a/tests/streaming-passthrough.c b/tests/streaming-passthrough.c new file mode 100644 index 00000000..a25e367b --- /dev/null +++ b/tests/streaming-passthrough.c @@ -0,0 +1,49 @@ +#define PASSTHROUGH_WRITE 0x2000 +#define PASSTHROUGH_WRITE_COUNT 0x2008 +#define PASSTHROUGH_READ 0x2100 +#define PASSTHROUGH_READ_COUNT 0x2108 + +#include "mmio.h" + +#include +#include +#include +#include + +int main(void) +{ + printf("Starting writing\n"); + uint32_t test_vector[7] = {3, 2, 1, 0, -1, -2, -3} ; + for (int i = 0; i < 7; i++) { + reg_write64(PASSTHROUGH_WRITE, test_vector[i]); + } + + printf("Done writing\n"); + uint32_t rcnt = reg_read32(PASSTHROUGH_READ_COUNT); + printf("Write count: %d\n", reg_read32(PASSTHROUGH_WRITE_COUNT)); + printf("Read count: %d\n", rcnt); + + int failed = 0; + if (rcnt != 0) { + for (int i = 0; i < 7; i++) { + uint32_t res = reg_read32(PASSTHROUGH_READ); + uint32_t expected = test_vector[i]; + if (res == expected) { + printf("\n\nPass: Got %d Expected %d\n\n", res, test_vector[i]); + } else { + failed = 1; + printf("\n\nFail: Got %d Expected %d\n\n", res, test_vector[i]); + } + } + } else { + failed = 1; + } + + if (failed) { + printf("\n\nSome tests failed\n\n"); + } else { + printf("\n\nAll tests passed\n\n"); + } + + return 0; +} diff --git a/tools/chisel-testers b/tools/chisel-testers index f410c593..1aa906fe 160000 --- a/tools/chisel-testers +++ b/tools/chisel-testers @@ -1 +1 @@ -Subproject commit f410c59316e5c43bac96411889aba8c5ab9a8fc0 +Subproject commit 1aa906fe168eb5ddca705ec955b27cf5c8856e4d diff --git a/tools/dsptools b/tools/dsptools index 15145ab6..211166e6 160000 --- a/tools/dsptools +++ b/tools/dsptools @@ -1 +1 @@ -Subproject commit 15145ab6230f869676de7eb730b4267fff7b11e8 +Subproject commit 211166e635861fb1937828aee38c166baf0840b5