move gemmini connections and smem from tile to cluster
This commit is contained in:
@@ -4,15 +4,15 @@
|
|||||||
package radiance.tile
|
package radiance.tile
|
||||||
|
|
||||||
import chisel3._
|
import chisel3._
|
||||||
import chisel3.experimental.SourceInfo
|
|
||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
|
import freechips.rocketchip.diplomacy._
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import freechips.rocketchip.prci.ClockSinkParameters
|
||||||
|
import freechips.rocketchip.regmapper.RegField
|
||||||
import freechips.rocketchip.subsystem._
|
import freechips.rocketchip.subsystem._
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.diplomacy.{LazyModule, AddressSet, SimpleDevice, ClockCrossingType}
|
import freechips.rocketchip.util.BundleField
|
||||||
import freechips.rocketchip.regmapper.RegField
|
import gemmini._
|
||||||
import freechips.rocketchip.prci.ClockSinkParameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
|
||||||
case class RadianceClusterParams(
|
case class RadianceClusterParams(
|
||||||
val clusterId: Int,
|
val clusterId: Int,
|
||||||
@@ -39,19 +39,177 @@ class RadianceCluster (
|
|||||||
// Instantiate cluster-local shared memory scratchpad
|
// Instantiate cluster-local shared memory scratchpad
|
||||||
//
|
//
|
||||||
// Instantiate the same number of banks as there are lanes.
|
// Instantiate the same number of banks as there are lanes.
|
||||||
val numLsuLanes = 4 // FIXME: hardcoded
|
// val numLsuLanes = 4 // FIXME: hardcoded
|
||||||
val wordSize = 4
|
val wordSize = 4
|
||||||
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
|
||||||
// Banked-by-word (4 bytes)
|
|
||||||
// base for bank 1: ff...000000|01|00
|
|
||||||
// mask for bank 1; 00...111111|00|11
|
|
||||||
val base = 0xff000000L | (bankId * wordSize)
|
|
||||||
val mask = 0x00001fffL ^ ((numLsuLanes - 1) * wordSize)
|
|
||||||
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = wordSize))
|
|
||||||
}
|
|
||||||
smemBanks.foreach(_.node := clbus.outwardNode)
|
|
||||||
|
|
||||||
val numCores = leafTiles.size
|
val gemminis = leafTiles.values.filter(_.isInstanceOf[GemminiTile]).asInstanceOf[Iterable[GemminiTile]]
|
||||||
|
require(gemminis.size == 1, "there should be one and only one gemmini per cluster")
|
||||||
|
val gemmini = gemminis.head.gemmini
|
||||||
|
val gemminiTile = gemminis.head
|
||||||
|
// val gemminiConfig = thisClusterParams.gemminiConfig.get // TODO: handle None gracefully
|
||||||
|
val gemminiConfig = gemmini.config
|
||||||
|
|
||||||
|
val max_write_width_bytes = gemminiConfig.dma_buswidth / 8
|
||||||
|
|
||||||
|
val radianceTiles = leafTiles.values.filter(_.isInstanceOf[RadianceTile]).asInstanceOf[Iterable[RadianceTile]]
|
||||||
|
|
||||||
|
val numCores = leafTiles.size - gemminis.size
|
||||||
|
|
||||||
|
// **************************************
|
||||||
|
// ______ _________ ___
|
||||||
|
// / __/ |/ / __/ |/ /
|
||||||
|
// _\ \/ /|_/ / _// /|_/ /
|
||||||
|
// /___/_/ /_/___/_/ /_/
|
||||||
|
//
|
||||||
|
// **************************************
|
||||||
|
|
||||||
|
// TODO: parametrize bank configuration
|
||||||
|
// TODO: move rw split node to separate file
|
||||||
|
// TODO: stride by word
|
||||||
|
val unified_mem_read_node = TLIdentityNode()
|
||||||
|
val unified_mem_write_node = TLIdentityNode()
|
||||||
|
val spad_data_len = gemminiConfig.sp_width / 8
|
||||||
|
val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8
|
||||||
|
val max_data_len = spad_data_len // max acc_data_len
|
||||||
|
val smem_base = gemminiConfig.tl_ext_mem_base
|
||||||
|
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / max_data_len
|
||||||
|
val smem_width = max_data_len
|
||||||
|
val smem_banks = gemminiConfig.sp_banks
|
||||||
|
val smem_subbanks = 1
|
||||||
|
|
||||||
|
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
|
||||||
|
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
|
||||||
|
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
|
||||||
|
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
|
||||||
|
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
||||||
|
|
||||||
|
// this node accepts both read and write requests,
|
||||||
|
// splits & arbitrates them into one client node per type of operation
|
||||||
|
// it keeps the read and write channels fully separate to allow parallel processing
|
||||||
|
val unified_mem_node = TLNexusNode(
|
||||||
|
clientFn = { seq =>
|
||||||
|
val in_mapping = TLXbar.mapInputIds(seq)
|
||||||
|
val read_src_range = IdRange(in_mapping.map(_.start).min, in_mapping.map(_.end).max)
|
||||||
|
assert((read_src_range.start == 0) && isPow2(read_src_range.end))
|
||||||
|
val write_src_range = read_src_range.shift(read_src_range.size)
|
||||||
|
val visibilities = seq.flatMap(_.masters.flatMap(_.visibility))
|
||||||
|
val vis_min = visibilities.map(_.base).min
|
||||||
|
val vis_max = visibilities.map{ x => x.base + x.mask }.max
|
||||||
|
val vis_mask = vis_max - vis_min
|
||||||
|
require(isPow2(vis_mask + 1) || vis_mask == -1)
|
||||||
|
println(f"combined visibilities of unified memory node clients: ${vis_min}, ${vis_mask}")
|
||||||
|
|
||||||
|
seq(0).v1copy(
|
||||||
|
echoFields = BundleField.union(seq.flatMap(_.echoFields)),
|
||||||
|
requestFields = BundleField.union(seq.flatMap(_.requestFields)),
|
||||||
|
responseKeys = seq.flatMap(_.responseKeys).distinct,
|
||||||
|
minLatency = seq.map(_.minLatency).min,
|
||||||
|
clients = Seq(
|
||||||
|
TLMasterParameters.v1(
|
||||||
|
name = "unified_mem_read_client",
|
||||||
|
sourceId = read_src_range,
|
||||||
|
visibility = Seq(AddressSet(vis_min, vis_mask)),
|
||||||
|
supportsProbe = TransferSizes.mincover(seq.map(_.anyEmitClaims.get)),
|
||||||
|
supportsGet = TransferSizes.mincover(seq.map(_.anyEmitClaims.get)),
|
||||||
|
supportsPutFull = TransferSizes.none,
|
||||||
|
supportsPutPartial = TransferSizes.none
|
||||||
|
),
|
||||||
|
TLMasterParameters.v1(
|
||||||
|
name = "unified_mem_write_client",
|
||||||
|
sourceId = write_src_range,
|
||||||
|
visibility = Seq(AddressSet(vis_min, vis_mask)),
|
||||||
|
supportsProbe = TransferSizes.mincover(
|
||||||
|
seq.map(_.anyEmitClaims.putFull) ++seq.map(_.anyEmitClaims.putPartial)),
|
||||||
|
supportsGet = TransferSizes.none,
|
||||||
|
supportsPutFull = TransferSizes.mincover(seq.map(_.anyEmitClaims.putFull)),
|
||||||
|
supportsPutPartial = TransferSizes.mincover(seq.map(_.anyEmitClaims.putPartial))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
},
|
||||||
|
managerFn = { seq =>
|
||||||
|
// val fifoIdFactory = TLXbar.relabeler()
|
||||||
|
seq(0).v1copy(
|
||||||
|
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
||||||
|
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
||||||
|
minLatency = seq.map(_.minLatency).min,
|
||||||
|
endSinkId = TLXbar.mapOutputIds(seq).map(_.end).max,
|
||||||
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(f"unified_mem_manager"),
|
||||||
|
address = Seq(AddressSet(gemmini.spad_base, smem_depth * smem_width * smem_banks - 1)),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
get = TransferSizes(1, smem_width),
|
||||||
|
putFull = TransferSizes(1, smem_width),
|
||||||
|
putPartial = TransferSizes(1, smem_width)),
|
||||||
|
fifoId = Some(0)
|
||||||
|
))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
unified_mem_read_node := TLWidthWidget(spad_data_len) := unified_mem_node
|
||||||
|
unified_mem_write_node := TLWidthWidget(spad_data_len) := unified_mem_node
|
||||||
|
|
||||||
|
val stride_by_word = false
|
||||||
|
// collection of read and write managers for each sram (sub)bank
|
||||||
|
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) {
|
||||||
|
assert(false, "TODO under construction")
|
||||||
|
// assert((config.sp_capacity match { case CapacityInKilobytes(kb) => kb * 1024}) ==
|
||||||
|
// gemmini.config.sp_bank_entries * spad_data_len / max_data_len * gemmini.config.sp_banks * max_data_len)
|
||||||
|
(0 until gemminiConfig.sp_banks).map { bank =>
|
||||||
|
LazyModule(new TLRAM(
|
||||||
|
address = AddressSet(max_data_len * bank,
|
||||||
|
((gemminiConfig.sp_bank_entries * spad_data_len / max_data_len - 1) * gemminiConfig.sp_banks + bank)
|
||||||
|
* max_data_len + (max_data_len - 1)),
|
||||||
|
beatBytes = max_data_len
|
||||||
|
))
|
||||||
|
}.map(x => Seq(x.node))
|
||||||
|
} else {
|
||||||
|
require(isPow2(smem_banks))
|
||||||
|
(0 until smem_banks).map { bank =>
|
||||||
|
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(f"sp_bank${bank}_read_mgr"),
|
||||||
|
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||||
|
smem_depth * smem_width - 1)),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
get = TransferSizes(1, smem_width)),
|
||||||
|
fifoId = Some(0)
|
||||||
|
)),
|
||||||
|
beatBytes = smem_width
|
||||||
|
))),
|
||||||
|
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(f"sp_bank${bank}_write_mgr"),
|
||||||
|
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||||
|
smem_depth * smem_width - 1)),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
putFull = TransferSizes(1, smem_width),
|
||||||
|
putPartial = TransferSizes(1, smem_width)),
|
||||||
|
fifoId = Some(0)
|
||||||
|
)),
|
||||||
|
beatBytes = smem_width
|
||||||
|
))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val smem_r_xbar = TLXbar()
|
||||||
|
val smem_w_xbar = TLXbar()
|
||||||
|
smem_r_xbar :=* unified_mem_read_node
|
||||||
|
smem_w_xbar :=* unified_mem_write_node
|
||||||
|
|
||||||
|
smem_bank_mgrs.foreach { mem =>
|
||||||
|
require(mem.length == 2)
|
||||||
|
mem.head := smem_r_xbar
|
||||||
|
mem.last := TLFragmenter(spad_data_len, max_write_width_bytes) := smem_w_xbar
|
||||||
|
}
|
||||||
|
|
||||||
|
// connect tile smem nodes to xbar, and xbar to banks
|
||||||
|
// val smem_xbar = TLXbar()
|
||||||
|
unified_mem_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||||
|
gemminiTile.slaveNode :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||||
|
// printf and perf counter buffer FIXME: make configurable
|
||||||
|
TLRAM(AddressSet(x"ff004000", numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
|
||||||
|
|
||||||
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
||||||
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
||||||
@@ -65,7 +223,7 @@ class RadianceCluster (
|
|||||||
// Tie corresponding smem ports from every tile into a single port using
|
// Tie corresponding smem ports from every tile into a single port using
|
||||||
// Xbars so that the number of ports going into the sharedmem do not scale
|
// Xbars so that the number of ports going into the sharedmem do not scale
|
||||||
// with the number of tiles.
|
// with the number of tiles.
|
||||||
leafTiles.foreach { case (id, tile: RadianceTile) =>
|
radianceTiles.foreach { tile =>
|
||||||
// (perSmemPortXbars zip tile.smemNodes).foreach {
|
// (perSmemPortXbars zip tile.smemNodes).foreach {
|
||||||
// case (xbar, node) => xbar.node := node
|
// case (xbar, node) => xbar.node := node
|
||||||
// }
|
// }
|
||||||
@@ -78,7 +236,7 @@ class RadianceCluster (
|
|||||||
val regDevice = new SimpleDevice("radiance-cluster-barrier-reg",
|
val regDevice = new SimpleDevice("radiance-cluster-barrier-reg",
|
||||||
Seq(s"radiance-cluster-barrier-reg${clusterId}"))
|
Seq(s"radiance-cluster-barrier-reg${clusterId}"))
|
||||||
val regNode = TLRegisterNode(
|
val regNode = TLRegisterNode(
|
||||||
address = Seq(AddressSet(0xff003f00L, 0xff)),
|
address = Seq(AddressSet(0xff004f00L, 0xff)),
|
||||||
device = regDevice,
|
device = regDevice,
|
||||||
beatBytes = wordSize,
|
beatBytes = wordSize,
|
||||||
concurrency = 1)
|
concurrency = 1)
|
||||||
@@ -92,11 +250,8 @@ class RadianceCluster (
|
|||||||
}
|
}
|
||||||
|
|
||||||
class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(outer) {
|
class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(outer) {
|
||||||
outer.leafTiles.foreach { case (id, tile: RadianceTile) =>
|
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.length}")
|
||||||
// println(s"======= RadianceCluster: tile.smemXbar.node.edge = ${tile.smemXbar.node.out.size}")
|
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
||||||
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.length}")
|
|
||||||
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
|
||||||
}
|
|
||||||
|
|
||||||
val numBarriers = 4 // FIXME: hardcoded
|
val numBarriers = 4 // FIXME: hardcoded
|
||||||
|
|
||||||
@@ -146,5 +301,164 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
|||||||
0x38 -> Seq(RegField(32, perCoreSyncedRegs(3)(1))),
|
0x38 -> Seq(RegField(32, perCoreSyncedRegs(3)(1))),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: remove Pipeline dependency of gemmini
|
||||||
|
def makeSmemBanks: Unit = {
|
||||||
|
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
||||||
|
val mem_depth = outer.smem_depth
|
||||||
|
val mem_width = outer.smem_width
|
||||||
|
|
||||||
|
val mem = TwoPortSyncMem(
|
||||||
|
n = mem_depth,
|
||||||
|
t = UInt((mem_width * 8).W),
|
||||||
|
mask_len = mem_width // byte level mask
|
||||||
|
)
|
||||||
|
|
||||||
|
val (r_node, r_edge) = r.in.head
|
||||||
|
val (w_node, w_edge) = w.in.head
|
||||||
|
|
||||||
|
// READ
|
||||||
|
mem.io.ren := r_node.a.fire
|
||||||
|
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||||
|
|
||||||
|
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
||||||
|
data_pipe_in.valid := RegNext(mem.io.ren)
|
||||||
|
data_pipe_in.bits := mem.io.rdata
|
||||||
|
|
||||||
|
val metadata_pipe_in = Wire(DecoupledIO(new Bundle {
|
||||||
|
val source = r_node.a.bits.source.cloneType
|
||||||
|
val size = r_node.a.bits.size.cloneType
|
||||||
|
}))
|
||||||
|
metadata_pipe_in.valid := mem.io.ren
|
||||||
|
metadata_pipe_in.bits.source := r_node.a.bits.source
|
||||||
|
metadata_pipe_in.bits.size := r_node.a.bits.size
|
||||||
|
|
||||||
|
val sram_read_backup_reg = RegInit(0.U.asTypeOf(Valid(mem.io.rdata.cloneType)))
|
||||||
|
|
||||||
|
val data_pipe_inst = Module(new Pipeline(data_pipe_in.bits.cloneType, 1)())
|
||||||
|
data_pipe_inst.io.in <> data_pipe_in
|
||||||
|
val data_pipe = data_pipe_inst.io.out
|
||||||
|
val metadata_pipe = Pipeline(metadata_pipe_in, 2)
|
||||||
|
assert((data_pipe.valid || sram_read_backup_reg.valid) === metadata_pipe.valid)
|
||||||
|
|
||||||
|
// data pipe is filled, but D is not ready and SRAM read came back
|
||||||
|
when (data_pipe.valid && !r_node.d.ready && data_pipe_in.valid) {
|
||||||
|
assert(!data_pipe_in.ready) // we should fill backup reg only if data pipe is not enqueueing
|
||||||
|
assert(!sram_read_backup_reg.valid) // backup reg should be empty
|
||||||
|
assert(!metadata_pipe_in.ready) // metadata should be filled previous cycle
|
||||||
|
sram_read_backup_reg.valid := true.B
|
||||||
|
sram_read_backup_reg.bits := mem.io.rdata
|
||||||
|
}.otherwise {
|
||||||
|
assert(data_pipe_in.ready || !data_pipe_in.valid) // do not skip any response
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(metadata_pipe_in.fire || !mem.io.ren) // when requesting sram, metadata needs to be ready
|
||||||
|
assert(r_node.d.fire === metadata_pipe.fire) // metadata dequeues iff D fires
|
||||||
|
|
||||||
|
// when D becomes ready, and data pipe has emptied, time for backup to empty
|
||||||
|
when (r_node.d.ready && sram_read_backup_reg.valid && !data_pipe.valid) {
|
||||||
|
sram_read_backup_reg.valid := false.B
|
||||||
|
}
|
||||||
|
assert(!(sram_read_backup_reg.valid && data_pipe.valid && data_pipe_in.fire)) // must empty backup before filling data pipe
|
||||||
|
assert(data_pipe_in.valid === data_pipe_in.fire)
|
||||||
|
|
||||||
|
r_node.d.bits := r_edge.AccessAck(
|
||||||
|
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U),
|
||||||
|
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U),
|
||||||
|
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits))
|
||||||
|
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid
|
||||||
|
// r node A is not ready only if D is not ready and both slots filled
|
||||||
|
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid)
|
||||||
|
data_pipe.ready := r_node.d.ready
|
||||||
|
metadata_pipe.ready := r_node.d.ready
|
||||||
|
|
||||||
|
// WRITE
|
||||||
|
mem.io.wen := w_node.a.fire
|
||||||
|
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||||
|
mem.io.wdata := w_node.a.bits.data
|
||||||
|
mem.io.mask := w_node.a.bits.mask.asBools
|
||||||
|
w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr)
|
||||||
|
w_node.d.valid := w_node.a.valid
|
||||||
|
w_node.d.bits := w_edge.AccessAck(w_node.a.bits)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def connectUnifiedMemNode: Unit = {
|
||||||
|
val u_out = outer.unified_mem_node.out
|
||||||
|
val u_in = outer.unified_mem_node.in
|
||||||
|
assert(u_out.length == 2)
|
||||||
|
println(f"gemmini unified memory node has ${u_in.length} incoming client(s)")
|
||||||
|
|
||||||
|
val r_out = u_out.head
|
||||||
|
val w_out = u_out.last
|
||||||
|
|
||||||
|
val in_src = TLXbar.mapInputIds(u_in.map(_._2.client))
|
||||||
|
val in_src_size = in_src.map(_.end).max
|
||||||
|
assert(isPow2(in_src_size)) // should be checked already, but just to be sure
|
||||||
|
|
||||||
|
// arbitrate all reads into one read while assigning source prefix, same for write
|
||||||
|
val a_arbiter_in = (u_in zip in_src).map { case ((in_node, _), src_range) =>
|
||||||
|
val in_r: DecoupledIO[TLBundleA] =
|
||||||
|
WireDefault(0.U.asTypeOf(Decoupled(new TLBundleA(in_node.a.bits.params.copy(
|
||||||
|
sourceBits = log2Up(in_src_size) + 1
|
||||||
|
)))))
|
||||||
|
val in_w: DecoupledIO[TLBundleA] = WireDefault(0.U.asTypeOf(in_r.cloneType))
|
||||||
|
|
||||||
|
val req_is_read = in_node.a.bits.opcode === TLMessages.Get
|
||||||
|
|
||||||
|
(Seq(in_r.bits.user, in_r.bits.address, in_r.bits.opcode, in_r.bits.size,
|
||||||
|
in_r.bits.mask, in_r.bits.param, in_r.bits.data)
|
||||||
|
zip Seq(in_node.a.bits.user, in_node.a.bits.address, in_node.a.bits.opcode, in_node.a.bits.size,
|
||||||
|
in_node.a.bits.mask, in_node.a.bits.param, in_node.a.bits.data))
|
||||||
|
.foreach { case (x, y) => x := y }
|
||||||
|
in_r.bits.source := in_node.a.bits.source | src_range.start.U | Mux(req_is_read, 0.U, in_src_size.U)
|
||||||
|
in_w.bits := in_r.bits
|
||||||
|
|
||||||
|
in_r.valid := in_node.a.valid && req_is_read
|
||||||
|
in_w.valid := in_node.a.valid && !req_is_read
|
||||||
|
in_node.a.ready := Mux(req_is_read, in_r.ready, in_w.ready)
|
||||||
|
|
||||||
|
(in_r, in_w)
|
||||||
|
}
|
||||||
|
// we cannot use round robin because it might reorder requests, even from the same client
|
||||||
|
val (a_arbiter_in_r_nodes, a_arbiter_in_w_nodes) = a_arbiter_in.unzip
|
||||||
|
TLArbiter.lowest(r_out._2, r_out._1.a, a_arbiter_in_r_nodes:_*)
|
||||||
|
TLArbiter.lowest(w_out._2, w_out._1.a, a_arbiter_in_w_nodes:_*)
|
||||||
|
|
||||||
|
def trim(id: UInt, size: Int): UInt = if (size <= 1) 0.U else id(log2Ceil(size)-1, 0) // from Xbar
|
||||||
|
// for each unified mem node client, arbitrate read/write responses on d channel
|
||||||
|
(u_in zip in_src).zipWithIndex.foreach { case (((in_node, in_edge), src_range), i) =>
|
||||||
|
// assign d channel back based on source, invalid if source prefix mismatch
|
||||||
|
val resp = Seq(r_out._1.d, w_out._1.d)
|
||||||
|
val source_match = resp.zipWithIndex.map { case (r, i) =>
|
||||||
|
(r.bits.source(r.bits.source.getWidth - 1) === i.U(1.W)) && // MSB indicates read(0)/write(1)
|
||||||
|
src_range.contains(trim(r.bits.source, in_src_size))
|
||||||
|
}
|
||||||
|
val d_arbiter_in = resp.map(r => WireDefault(
|
||||||
|
0.U.asTypeOf(Decoupled(new TLBundleD(r.bits.params.copy(
|
||||||
|
sourceBits = in_node.d.bits.source.getWidth,
|
||||||
|
sizeBits = in_node.d.bits.size.getWidth
|
||||||
|
))))
|
||||||
|
))
|
||||||
|
|
||||||
|
(d_arbiter_in lazyZip resp lazyZip source_match).foreach { case (arb_in, r, sm) =>
|
||||||
|
(Seq(arb_in.bits.user, arb_in.bits.opcode, arb_in.bits.data, arb_in.bits.param,
|
||||||
|
arb_in.bits.sink, arb_in.bits.denied, arb_in.bits.corrupt)
|
||||||
|
zip Seq(r.bits.user, r.bits.opcode, r.bits.data, r.bits.param,
|
||||||
|
r.bits.sink, r.bits.denied, r.bits.corrupt))
|
||||||
|
.foreach { case (x, y) => x := y }
|
||||||
|
arb_in.bits.source := trim(r.bits.source, 1 << in_node.d.bits.source.getWidth) // we can trim b/c isPow2(prefix)
|
||||||
|
arb_in.bits.size := trim(r.bits.size, 1 << in_node.d.bits.size.getWidth) // FIXME: check truncation
|
||||||
|
|
||||||
|
arb_in.valid := r.valid && sm
|
||||||
|
r.ready := arb_in.ready
|
||||||
|
}
|
||||||
|
|
||||||
|
TLArbiter.robin(in_edge, in_node.d, d_arbiter_in:_*)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
makeSmemBanks
|
||||||
|
connectUnifiedMemNode
|
||||||
|
|
||||||
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
|
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,19 +5,17 @@ package radiance.tile
|
|||||||
|
|
||||||
import chisel3._
|
import chisel3._
|
||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import org.chipsalliance.cde.config._
|
|
||||||
import freechips.rocketchip.devices.tilelink._
|
import freechips.rocketchip.devices.tilelink._
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
import freechips.rocketchip.interrupts._
|
|
||||||
import freechips.rocketchip.tilelink._
|
|
||||||
import freechips.rocketchip.rocket._
|
|
||||||
import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike
|
|
||||||
import freechips.rocketchip.util._
|
|
||||||
import freechips.rocketchip.prci.ClockSinkParameters
|
import freechips.rocketchip.prci.ClockSinkParameters
|
||||||
import freechips.rocketchip.regmapper.RegField
|
import freechips.rocketchip.regmapper.RegField
|
||||||
|
import freechips.rocketchip.rocket._
|
||||||
|
import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike
|
||||||
import freechips.rocketchip.tile._
|
import freechips.rocketchip.tile._
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.util._
|
||||||
|
import org.chipsalliance.cde.config._
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
import gemmini.{CapacityInKilobytes, Gemmini, GemminiCustomConfigs, GemminiFPConfigs}
|
|
||||||
import radiance.subsystem.{GPUMemParams, GPUMemory}
|
import radiance.subsystem.{GPUMemParams, GPUMemory}
|
||||||
|
|
||||||
case class RadianceTileParams(
|
case class RadianceTileParams(
|
||||||
@@ -57,14 +55,14 @@ case class RadianceTileParams(
|
|||||||
// though. TODO see how BOOM does that
|
// though. TODO see how BOOM does that
|
||||||
case class VortexCoreParams(
|
case class VortexCoreParams(
|
||||||
bootFreqHz: BigInt = 0,
|
bootFreqHz: BigInt = 0,
|
||||||
useVM: Boolean = true,
|
useVM: Boolean = false,
|
||||||
useUser: Boolean = false,
|
useUser: Boolean = false,
|
||||||
useSupervisor: Boolean = false,
|
useSupervisor: Boolean = false,
|
||||||
useHypervisor: Boolean = false,
|
useHypervisor: Boolean = false,
|
||||||
useDebug: Boolean = true,
|
useDebug: Boolean = true,
|
||||||
useAtomics: Boolean = true,
|
useAtomics: Boolean = false,
|
||||||
useAtomicsOnlyForIO: Boolean = false,
|
useAtomicsOnlyForIO: Boolean = false,
|
||||||
useCompressed: Boolean = true,
|
useCompressed: Boolean = false,
|
||||||
useRVE: Boolean = false,
|
useRVE: Boolean = false,
|
||||||
useConditionalZero: Boolean = false,
|
useConditionalZero: Boolean = false,
|
||||||
nLocalInterrupts: Int = 0,
|
nLocalInterrupts: Int = 0,
|
||||||
@@ -89,8 +87,8 @@ case class VortexCoreParams(
|
|||||||
clockGate: Boolean = false,
|
clockGate: Boolean = false,
|
||||||
mvendorid: Int = 0, // 0 means non-commercial implementation
|
mvendorid: Int = 0, // 0 means non-commercial implementation
|
||||||
mimpid: Int = 0x20181004, // release date in BCD
|
mimpid: Int = 0x20181004, // release date in BCD
|
||||||
mulDiv: Option[MulDivParams] = Some(MulDivParams()),
|
mulDiv: Option[MulDivParams] = None,
|
||||||
fpu: Option[FPUParams] = Some(FPUParams()),
|
fpu: Option[FPUParams] = None,
|
||||||
debugROB: Boolean = false, // if enabled, uses a C++ debug ROB to generate trace-with-wdata
|
debugROB: Boolean = false, // if enabled, uses a C++ debug ROB to generate trace-with-wdata
|
||||||
haveCease: Boolean = true, // non-standard CEASE instruction
|
haveCease: Boolean = true, // non-standard CEASE instruction
|
||||||
haveSimTimeout: Boolean = true // add plusarg for simulation timeout
|
haveSimTimeout: Boolean = true // add plusarg for simulation timeout
|
||||||
@@ -344,39 +342,6 @@ class RadianceTile private (
|
|||||||
tlMasterXbar.node :=* AddressOrNode(base) :=* dcacheNode
|
tlMasterXbar.node :=* AddressOrNode(base) :=* dcacheNode
|
||||||
}
|
}
|
||||||
|
|
||||||
// ROCC
|
|
||||||
// TODO: parametrize
|
|
||||||
val gemmini = LazyModule(new Gemmini(GemminiFPConfigs.FP32DefaultConfig.copy(
|
|
||||||
has_training_convs = false,
|
|
||||||
has_max_pool = false,
|
|
||||||
use_tl_ext_mem = true,
|
|
||||||
tl_ext_mem_base = x"ff000000",
|
|
||||||
sp_singleported = false,
|
|
||||||
spad_read_delay = 4,
|
|
||||||
use_shared_ext_mem = true,
|
|
||||||
acc_sub_banks = 1,
|
|
||||||
has_normalizations = false,
|
|
||||||
meshRows = 8,
|
|
||||||
meshColumns = 8,
|
|
||||||
dma_buswidth = 256,
|
|
||||||
tile_latency = 0,
|
|
||||||
sp_capacity = CapacityInKilobytes(16),
|
|
||||||
acc_capacity = CapacityInKilobytes(8),
|
|
||||||
)))
|
|
||||||
val roccs: Seq[LazyRoCC] = Seq(gemmini)
|
|
||||||
tlMasterXbar.node :=* AddressOrNode(base) :=* gemmini.atlNode
|
|
||||||
tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
|
||||||
|
|
||||||
// MMIO
|
|
||||||
// gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
|
||||||
// sharedmem access
|
|
||||||
//
|
|
||||||
// FIXME: gemmini spad has 16B data width; core smem interface has 4B. Need
|
|
||||||
// to consolidate by either coalescing, or changing gemmini spad to
|
|
||||||
// strided-by-word
|
|
||||||
// gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
|
||||||
// TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
|
||||||
|
|
||||||
/* below are copied from rocket */
|
/* below are copied from rocket */
|
||||||
|
|
||||||
val tile_master_blocker =
|
val tile_master_blocker =
|
||||||
|
|||||||
Reference in New Issue
Block a user