Merge pull request #568 from ucb-bar/dev-dsptools

Dsptools example cleanup
This commit is contained in:
Abraham Gonzalez
2020-05-28 15:25:09 -07:00
committed by GitHub
18 changed files with 887 additions and 15 deletions

View File

@@ -23,6 +23,8 @@ class DigitalTop(implicit p: Parameters) extends System
with icenet.CanHavePeripheryIceNIC // Enables optionally adding the IceNIC for FireSim
with chipyard.example.CanHavePeripheryInitZero // Enables optionally adding the initzero example widget
with chipyard.example.CanHavePeripheryGCD // Enables optionally adding the GCD example widget
with chipyard.example.CanHavePeripheryStreamingFIR // Enables optionally adding the DSPTools FIR example widget
with chipyard.example.CanHavePeripheryStreamingPassthrough // Enables optionally adding the DSPTools streaming-passthrough example widget
with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA
{
override lazy val module = new DigitalTopModule(this)

View File

@@ -465,6 +465,46 @@ class RingSystemBusRocketConfig extends Config(
new freechips.rocketchip.system.BaseConfig)
// DOC include end: RingSystemBusRocket
class StreamingPassthroughRocketConfig extends Config(
new chipyard.example.WithStreamingPassthrough ++ // use top with tilelink-controlled streaming passthrough
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++
new chipyard.iobinders.WithBlackBoxSimMem ++
new chipyard.iobinders.WithTiedOffDebug ++
new chipyard.iobinders.WithSimSerial ++
new testchipip.WithTSI ++
new chipyard.config.WithBootROM ++
new chipyard.config.WithUART ++
new chipyard.config.WithL2TLBs(1024) ++
new freechips.rocketchip.subsystem.WithNoMMIOPort ++
new freechips.rocketchip.subsystem.WithNoSlavePort ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig)
// DOC include start: StreamingFIRRocketConfig
class StreamingFIRRocketConfig extends Config (
new chipyard.example.WithStreamingFIR ++ // use top with tilelink-controlled streaming FIR
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++
new chipyard.iobinders.WithBlackBoxSimMem ++
new chipyard.iobinders.WithTiedOffDebug ++
new chipyard.iobinders.WithSimSerial ++
new testchipip.WithTSI ++
new chipyard.config.WithBootROM ++
new chipyard.config.WithUART ++
new chipyard.config.WithL2TLBs(1024) ++
new freechips.rocketchip.subsystem.WithNoMMIOPort ++
new freechips.rocketchip.subsystem.WithNoSlavePort ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig)
// DOC include end: StreamingFIRRocketConfig
class SmallNVDLARocketConfig extends Config(
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++

View File

@@ -0,0 +1,162 @@
package chipyard.example
import chisel3._
import chisel3.util._
import dspblocks._
import dsptools.numbers._
import freechips.rocketchip.amba.axi4stream._
import freechips.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.regmapper._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
/**
* The memory interface writes entries into the queue.
* They stream out the streaming interface
* @param depth number of entries in the queue
* @param streamParameters parameters for the stream node
* @param p
*/
abstract class WriteQueue[D, U, E, O, B <: Data]
(
val depth: Int,
val streamParameters: AXI4StreamMasterParameters = AXI4StreamMasterParameters()
)(implicit p: Parameters) extends DspBlock[D, U, E, O, B] with HasCSR {
// stream node, output only
val streamNode = AXI4StreamMasterNode(streamParameters)
lazy val module = new LazyModuleImp(this) {
require(streamNode.out.length == 1)
// get the output bundle associated with the AXI4Stream node
val out = streamNode.out.head._1
// width (in bits) of the output interface
val width = out.params.n * 8
// instantiate a queue
val queue = Module(new Queue(UInt(out.params.dataBits.W), depth))
// connect queue output to streaming output
out.valid := queue.io.deq.valid
out.bits.data := queue.io.deq.bits
// don't use last
out.bits.last := false.B
queue.io.deq.ready := out.ready
regmap(
// each write adds an entry to the queue
0x0 -> Seq(RegField.w(width, queue.io.enq)),
// read the number of entries in the queue
(width+7)/8 -> Seq(RegField.r(width, queue.io.count)),
)
}
}
/**
* TLDspBlock specialization of WriteQueue
* @param depth number of entries in the queue
* @param csrAddress address range for peripheral
* @param beatBytes beatBytes of TL interface
* @param p
*/
class TLWriteQueue (depth: Int, csrAddress: AddressSet, beatBytes: Int)
(implicit p: Parameters) extends WriteQueue[
TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle
](depth) with TLHasCSR {
val devname = "tlQueueIn"
val devcompat = Seq("ucb-art", "dsptools")
val device = new SimpleDevice(devname, devcompat) {
override def describe(resources: ResourceBindings): Description = {
val Description(name, mapping) = super.describe(resources)
Description(name, mapping)
}
}
// make diplomatic TL node for regmap
override val mem = Some(TLRegisterNode(address = Seq(csrAddress), device = device, beatBytes = beatBytes))
}
object TLWriteQueue {
def apply(
depth: Int = 8,
csrAddress: AddressSet = AddressSet(0x2000, 0xff),
beatBytes: Int = 8,
)(implicit p: Parameters) = {
val writeQueue = LazyModule(new TLWriteQueue(depth = depth, csrAddress = csrAddress, beatBytes = beatBytes))
writeQueue
}
}
/**
* The streaming interface adds elements into the queue.
* The memory interface can read elements out of the queue.
* @param depth number of entries in the queue
* @param streamParameters parameters for the stream node
* @param p
*/
abstract class ReadQueue[D, U, E, O, B <: Data]
(
val depth: Int,
val streamParameters: AXI4StreamSlaveParameters = AXI4StreamSlaveParameters()
)(implicit p: Parameters) extends DspBlock[D, U, E, O, B] with HasCSR {
val streamNode = AXI4StreamSlaveNode(streamParameters)
lazy val module = new LazyModuleImp(this) {
require(streamNode.in.length == 1)
// get the input associated with the stream node
val in = streamNode.in.head._1
// make a Decoupled[UInt] that RegReadFn can do something with
val out = Wire(Decoupled(UInt()))
// get width of streaming input interface
val width = in.params.n * 8
// instantiate a queue
val queue = Module(new Queue(UInt(in.params.dataBits.W), depth))
// connect input to the streaming interface
queue.io.enq.valid := in.valid
queue.io.enq.bits := in.bits.data
in.ready := queue.io.enq.ready
// connect output to wire
out.valid := queue.io.deq.valid
out.bits := queue.io.deq.bits
queue.io.deq.ready := out.ready
regmap(
// map the output of the queue
0x0 -> Seq(RegField.r(width, RegReadFn(out))),
// read the number of elements in the queue
(width+7)/8 -> Seq(RegField.r(width, queue.io.count)),
)
}
}
/**
* TLDspBlock specialization of ReadQueue
* @param depth number of entries in the queue
* @param csrAddress address range
* @param beatBytes beatBytes of TL interface
* @param p
*/
class TLReadQueue( depth: Int, csrAddress: AddressSet, beatBytes: Int)
(implicit p: Parameters) extends ReadQueue[
TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle
](depth) with TLHasCSR {
val devname = "tlQueueOut"
val devcompat = Seq("ucb-art", "dsptools")
val device = new SimpleDevice(devname, devcompat) {
override def describe(resources: ResourceBindings): Description = {
val Description(name, mapping) = super.describe(resources)
Description(name, mapping)
}
}
// make diplomatic TL node for regmap
override val mem = Some(TLRegisterNode(address = Seq(csrAddress), device = device, beatBytes = beatBytes))
}
object TLReadQueue {
def apply(
depth: Int = 8,
csrAddress: AddressSet = AddressSet(0x2100, 0xff),
beatBytes: Int = 8)(implicit p: Parameters) = {
val readQueue = LazyModule(new TLReadQueue(depth = depth, csrAddress = csrAddress, beatBytes = beatBytes))
readQueue
}
}

View File

@@ -0,0 +1,225 @@
//// See LICENSE for license details.
//
package chipyard.example
import chisel3._
import chisel3.experimental.FixedPoint
import chisel3.util._
import dspblocks._
import dsptools.numbers._
import freechips.rocketchip.amba.axi4stream._
import freechips.rocketchip.config.{Parameters, Field, Config}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
// FIR params
case class GenericFIRParams(
writeAddress: BigInt = 0x2000,
readAddress: BigInt = 0x2100,
depth: Int
)
case object GenericFIRKey extends Field[Option[GenericFIRParams]](None)
class GenericFIRCellBundle[T<:Data:Ring](genIn:T, genOut:T) extends Bundle {
val data: T = genIn.cloneType
val carry: T = genOut.cloneType
override def cloneType: this.type = GenericFIRCellBundle(genIn, genOut).asInstanceOf[this.type]
}
object GenericFIRCellBundle {
def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRCellBundle[T] = new GenericFIRCellBundle(genIn, genOut)
}
class GenericFIRCellIO[T<:Data:Ring](genIn:T, genOut:T) extends Bundle {
val coeff = Input(genIn.cloneType)
val in = Flipped(Decoupled(GenericFIRCellBundle(genIn, genOut)))
val out = Decoupled(GenericFIRCellBundle(genIn, genOut))
}
object GenericFIRCellIO {
def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRCellIO[T] = new GenericFIRCellIO(genIn, genOut)
}
class GenericFIRBundle[T<:Data:Ring](proto: T) extends Bundle {
val data: T = proto.cloneType
override def cloneType: this.type = GenericFIRBundle(proto).asInstanceOf[this.type]
}
object GenericFIRBundle {
def apply[T<:Data:Ring](proto: T): GenericFIRBundle[T] = new GenericFIRBundle(proto)
}
class GenericFIRIO[T<:Data:Ring](genIn:T, genOut:T) extends Bundle {
val in = Flipped(Decoupled(GenericFIRBundle(genIn)))
val out = Decoupled(GenericFIRBundle(genOut))
}
object GenericFIRIO {
def apply[T<:Data:Ring](genIn:T, genOut:T): GenericFIRIO[T] = new GenericFIRIO(genIn, genOut)
}
// A generic FIR filter
// DOC include start: GenericFIR chisel
class GenericFIR[T<:Data:Ring](genIn:T, genOut:T, coeffs: Seq[T]) extends Module {
val io = IO(GenericFIRIO(genIn, genOut))
// Construct a vector of genericFIRDirectCells
val directCells = Seq.fill(coeffs.length){ Module(new GenericFIRDirectCell(genIn, genOut)).io }
// Construct the direct FIR chain
for ((cell, coeff) <- directCells.zip(coeffs)) {
cell.coeff := coeff
}
// Connect input to first cell
directCells.head.in.bits.data := io.in.bits.data
directCells.head.in.bits.carry := Ring[T].zero
directCells.head.in.valid := io.in.valid
io.in.ready := directCells.head.in.ready
// Connect adjacent cells
// Note that .tail() returns a collection that consists of all
// elements in the inital collection minus the first one.
// This means that we zip together directCells[0, n] and
// directCells[1, n]. However, since zip ignores unmatched elements,
// the resulting zip is (directCells[0], directCells[1]) ...
// (directCells[n-1], directCells[n])
for ((current, next) <- directCells.zip(directCells.tail)) {
next.in.bits := current.out.bits
next.in.valid := current.out.valid
current.out.ready := next.in.ready
}
// Connect output to last cell
io.out.bits.data := directCells.last.out.bits.carry
directCells.last.out.ready := io.out.ready
io.out.valid := directCells.last.out.valid
}
// DOC include end: GenericFIR chisel
// A generic FIR direct cell used to construct a larger direct FIR chain
//
// in ----- [z^-1]-- out
// |
// coeff ----[*]
// |
// carryIn --[+]-- carryOut
//
// DOC include start: GenericFIRDirectCell chisel
class GenericFIRDirectCell[T<:Data:Ring](genIn: T, genOut: T) extends Module {
val io = IO(GenericFIRCellIO(genIn, genOut))
// Registers to delay the input and the valid to propagate with calculations
val hasNewData = RegInit(0.U)
val inputReg = Reg(genIn.cloneType)
// Passthrough ready
io.in.ready := io.out.ready
// When a new transaction is ready on the input, we will have new data to output
// next cycle. Take this data in
when (io.in.fire()) {
hasNewData := 1.U
inputReg := io.in.bits.data
}
// We should output data when our cell has new data to output and is ready to
// recieve new data. This insures that every cell in the chain passes its data
// on at the same time
io.out.valid := hasNewData & io.in.fire()
io.out.bits.data := inputReg
// Compute carry
// This uses the ring implementation for + and *, i.e.
// (a * b) maps to (Ring[T].prod(a, b)) for whicever T you use
io.out.bits.carry := inputReg * io.coeff + io.in.bits.carry
}
// DOC include end: GenericFIRDirectCell chisel
// DOC include start: GenericFIRBlock chisel
abstract class GenericFIRBlock[D, U, EO, EI, B<:Data, T<:Data:Ring]
(
genIn: T,
genOut: T,
coeffs: Seq[T]
)(implicit p: Parameters) extends DspBlock[D, U, EO, EI, B] {
val streamNode = AXI4StreamIdentityNode()
val mem = None
lazy val module = new LazyModuleImp(this) {
require(streamNode.in.length == 1)
require(streamNode.out.length == 1)
val in = streamNode.in.head._1
val out = streamNode.out.head._1
// instantiate generic fir
val fir = Module(new GenericFIR(genIn, genOut, coeffs))
// Attach ready and valid to outside interface
in.ready := fir.io.in.ready
fir.io.in.valid := in.valid
fir.io.out.ready := out.ready
out.valid := fir.io.out.valid
// cast UInt to T
fir.io.in.bits := in.bits.data.asTypeOf(GenericFIRBundle(genIn))
// cast T to UInt
out.bits.data := fir.io.out.bits.asUInt
}
}
// DOC include end: GenericFIRBlock chisel
// DOC include start: TLGenericFIRBlock chisel
class TLGenericFIRBlock[T<:Data:Ring]
(
val genIn: T,
val genOut: T,
coeffs: Seq[T]
)(implicit p: Parameters) extends
GenericFIRBlock[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle, T](
genIn, genOut, coeffs
) with TLDspBlock
// DOC include end: TLGenericFIRBlock chisel
// DOC include start: TLGenericFIRChain chisel
class TLGenericFIRChain[T<:Data:Ring] (genIn: T, genOut: T, coeffs: Seq[T], params: GenericFIRParams)(implicit p: Parameters)
extends TLChain(Seq(
TLWriteQueue(params.depth, AddressSet(params.writeAddress, 0xff))(_),
{ implicit p: Parameters =>
val fir = LazyModule(new TLGenericFIRBlock(genIn, genOut, coeffs))
fir
},
TLReadQueue(params.depth, AddressSet(params.readAddress, 0xff))(_)
))
// DOC include end: TLGenericFIRChain chisel
// DOC include start: CanHavePeripheryStreamingFIR chisel
trait CanHavePeripheryStreamingFIR extends BaseSubsystem {
val streamingFIR = p(GenericFIRKey) match {
case Some(params) => {
val streamingFIR = LazyModule(new TLGenericFIRChain(
genIn = FixedPoint(8.W, 3.BP),
genOut = FixedPoint(8.W, 3.BP),
coeffs = Seq(1.F(0.BP), 2.F(0.BP), 3.F(0.BP)),
params = params))
pbus.toVariableWidthSlave(Some("streamingFIR")) { streamingFIR.mem.get := TLFIFOFixer() }
Some(streamingFIR)
}
case None => None
}
}
// DOC include end: CanHavePeripheryStreamingFIR chisel
/**
* Mixin to add FIR to rocket config
*/
// DOC include start: WithStreamingFIR
class WithStreamingFIR extends Config((site, here, up) => {
case GenericFIRKey => Some(GenericFIRParams(depth = 8))
})
// DOC include end: WithStreamingFIR

View File

@@ -0,0 +1,150 @@
//// See LICENSE for license details.
//
package chipyard.example
import chisel3._
import chisel3.{Bundle, Module}
import chisel3.util._
import dspblocks._
import dsptools.numbers._
import freechips.rocketchip.amba.axi4stream._
import freechips.rocketchip.config.{Parameters, Field, Config}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
// Simple passthrough to use as testbed sanity check
// StreamingPassthrough params
case class StreamingPassthroughParams(
writeAddress: BigInt = 0x2000,
readAddress: BigInt = 0x2100,
depth: Int
)
// StreamingPassthrough key
case object StreamingPassthroughKey extends Field[Option[StreamingPassthroughParams]](None)
class StreamingPassthroughBundle[T<:Data:Ring](proto: T) extends Bundle {
val data: T = proto.cloneType
override def cloneType: this.type = StreamingPassthroughBundle(proto).asInstanceOf[this.type]
}
object StreamingPassthroughBundle {
def apply[T<:Data:Ring](proto: T): StreamingPassthroughBundle[T] = new StreamingPassthroughBundle(proto)
}
class StreamingPassthroughIO[T<:Data:Ring](proto: T) extends Bundle {
val in = Flipped(Decoupled(StreamingPassthroughBundle(proto)))
val out = Decoupled(StreamingPassthroughBundle(proto))
}
object StreamingPassthroughIO {
def apply[T<:Data:Ring](proto: T): StreamingPassthroughIO[T] = new StreamingPassthroughIO(proto)
}
class StreamingPassthrough[T<:Data:Ring](proto: T) extends Module {
val io = IO(StreamingPassthroughIO(proto))
io.in.ready := io.out.ready
io.out.bits.data := io.in.bits.data
io.out.valid := io.in.valid
}
/**
* Make DspBlock wrapper for StreamingPassthrough
* @param cordicParams parameters for cordic
* @param ev$1
* @param ev$2
* @param ev$3
* @param p
* @tparam D
* @tparam U
* @tparam EO
* @tparam EI
* @tparam B
* @tparam T Type parameter for passthrough, i.e. FixedPoint or DspReal
*/
abstract class StreamingPassthroughBlock[D, U, EO, EI, B<:Data, T<:Data:Ring]
(
proto: T
)(implicit p: Parameters) extends DspBlock[D, U, EO, EI, B] {
val streamNode = AXI4StreamIdentityNode()
val mem = None
lazy val module = new LazyModuleImp(this) {
require(streamNode.in.length == 1)
require(streamNode.out.length == 1)
val in = streamNode.in.head._1
val out = streamNode.out.head._1
// instantiate passthrough
val passthrough = Module(new StreamingPassthrough(proto))
// Pass ready and valid from read queue to write queue
in.ready := passthrough.io.in.ready
passthrough.io.in.valid := in.valid
// cast UInt to T
passthrough.io.in.bits := in.bits.data.asTypeOf(StreamingPassthroughBundle(proto))
passthrough.io.out.ready := out.ready
out.valid := passthrough.io.out.valid
// cast T to UInt
out.bits.data := passthrough.io.out.bits.asUInt
}
}
/**
* TLDspBlock specialization of StreamingPassthrough
* @param cordicParams parameters for passthrough
* @param ev$1
* @param ev$2
* @param ev$3
* @param p
* @tparam T Type parameter for passthrough data type
*/
class TLStreamingPassthroughBlock[T<:Data:Ring]
(
val proto: T
)(implicit p: Parameters) extends
StreamingPassthroughBlock[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle, T](proto)
with TLDspBlock
/**
* A chain of queues acting as our MMIOs with the passthrough module in between them.
* @param depth depth of queues
* @param ev$1
* @param ev$2
* @param ev$3
* @param p
* @tparam T Type parameter for passthrough, i.e. FixedPoint or DspReal
*/
class TLStreamingPassthroughChain[T<:Data:Ring](params: StreamingPassthroughParams, proto: T)(implicit p: Parameters)
extends TLChain(Seq(
TLWriteQueue(params.depth, AddressSet(params.writeAddress, 0xff))(_),
{ implicit p: Parameters => {
val streamingPassthrough = LazyModule(new TLStreamingPassthroughBlock(proto))
streamingPassthrough
}},
TLReadQueue(params.depth, AddressSet(params.readAddress, 0xff))(_)
))
trait CanHavePeripheryStreamingPassthrough { this: BaseSubsystem =>
val passthrough = p(StreamingPassthroughKey) match {
case Some(params) => {
val streamingPassthroughChain = LazyModule(new TLStreamingPassthroughChain(params, UInt(32.W)))
pbus.toVariableWidthSlave(Some("streamingPassthrough")) { streamingPassthroughChain.mem.get := TLFIFOFixer() }
Some(streamingPassthroughChain)
}
case None => None
}
}
/**
* Mixin to add passthrough to rocket config
*/
class WithStreamingPassthrough extends Config((site, here, up) => {
case StreamingPassthroughKey => Some(StreamingPassthroughParams(depth = 8))
})