Tie tile smem ports together using Xbars; comment-out Gemmini spad
TODO pull Gemmini out to the cluster as well
This commit is contained in:
@@ -34,26 +34,52 @@ class RadianceCluster (
|
||||
|
||||
clbus.clockGroupNode := allClockGroupsNode
|
||||
|
||||
val numLsuLanes = 4
|
||||
// Instantiate cluster-local shared memory scratchpad
|
||||
//
|
||||
// Instantiate the same number of banks as there are lanes.
|
||||
val numLsuLanes = 4 // FIXME: hardcoded
|
||||
val wordSize = 4
|
||||
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||
// Banked-by-word (4 bytes)
|
||||
// base for bank 1: ff...000000|01|00
|
||||
// mask for bank 1; 00...111111|00|11
|
||||
// val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
||||
// val mask = 0x00001fffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
||||
val base = 0xff000000L | (bankId * wordSize)
|
||||
val mask = 0x00ffffffL ^ ((numLsuLanes - 1) * wordSize)
|
||||
val mask = 0x00001fffL ^ ((numLsuLanes - 1) * wordSize)
|
||||
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = wordSize))
|
||||
}
|
||||
smemBanks.foreach(_.node := clbus.outwardNode)
|
||||
|
||||
println(s"===== Cluster: nTotalTiles = ${nTotalTiles}")
|
||||
println(s"===== Cluster: nLeafTiles = ${nLeafTiles}")
|
||||
// HACK: This is a work around the normal bus connecting API by downcasting
|
||||
// tile and directly accessing the node inside that is not exposed as a
|
||||
// master in HierarchicalElementCrossingParamsLike.
|
||||
val tile = leafTiles(0).asInstanceOf[RadianceTile]
|
||||
val perSmemPortXbars = Seq.fill(tile.smemNodes.size) { LazyModule(new TLXbar) }
|
||||
|
||||
leafTiles.map { case (id, tile: RadianceTile) =>
|
||||
println(s"======= RadianceCluster: connecting cluster ${id} to clbus")
|
||||
clbus.inwardNode :=* tile.smemXbar.node
|
||||
// clbus.inwardNode :=* tile.smemNodes(0)
|
||||
// Tie corresponding smem ports from every tile into a single port using
|
||||
// Xbars so that the number of ports going into the sharedmem do not scale
|
||||
// with the number of tiles.
|
||||
leafTiles.foreach { case (id, tile: RadianceTile) =>
|
||||
(perSmemPortXbars zip tile.smemNodes).foreach {
|
||||
case (xbar, node) => xbar.node := node
|
||||
}
|
||||
// tile.smemNodes.foreach (clbus.inwardNode := _)
|
||||
}
|
||||
perSmemPortXbars.foreach { clbus.inwardNode := _.node }
|
||||
|
||||
override lazy val module = new RadianceClusterModuleImp(this)
|
||||
}
|
||||
|
||||
class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(outer) {
|
||||
outer.leafTiles.foreach { case (id, tile: RadianceTile) =>
|
||||
// println(s"======= RadianceCluster: tile.smemXbar.node.edge = ${tile.smemXbar.node.out.size}")
|
||||
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.size}")
|
||||
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
||||
}
|
||||
|
||||
outer.perSmemPortXbars(0).node.out(0)._2.slave.slaves(0).address.foreach { addrSet =>
|
||||
println(s"====== perSmemPortXbars(0).slaves(0).addr: ${addrSet.toString()}")
|
||||
}
|
||||
outer.perSmemPortXbars(0).node.out(0)._2.master.masters(0).visibility.foreach { addrSet =>
|
||||
println(s"====== perSmemPortXbars(0).masters(0).addr: ${addrSet.toString()}")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,8 +143,8 @@ class RadianceTile private (
|
||||
"SIMTCoreKey not defined; make sure to use WithSimtConfig when using RadianceTile"
|
||||
)
|
||||
|
||||
// NOTE: when changing these, remember to change +define+NUM_THREADS/WARPS in
|
||||
// EXTRA_SIM_PREPROC_DEFINES as well!
|
||||
// NOTE: when changing these, remember to change +define+NUM_CORES/THREADS/WARPS in
|
||||
// radiance.mk as well!
|
||||
val numWarps = p(SIMTCoreKey) match {
|
||||
case Some(simtParam) => simtParam.nWarps
|
||||
case None => 4
|
||||
@@ -303,9 +303,6 @@ class RadianceTile private (
|
||||
// Conditionally instantiate L1 cache
|
||||
val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match {
|
||||
case Some(vortexL1Config) => {
|
||||
println(
|
||||
s"============ Using Vortex L1 cache ================="
|
||||
)
|
||||
// require(
|
||||
// p(CoalescerKey).isDefined,
|
||||
// "Vortex L1 configuration currently only works when coalescer is also enabled."
|
||||
@@ -328,23 +325,6 @@ class RadianceTile private (
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate sharedmem banks
|
||||
//
|
||||
// Instantiate the same number of banks as there are lanes.
|
||||
// TODO: parametrize
|
||||
// val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||
// // Banked-by-word (4 bytes)
|
||||
// // base for bank 1: ff...000000|01|00
|
||||
// // mask for bank 1; 00...111111|00|11
|
||||
// val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
||||
// val mask = 0x00001fffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
||||
// LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
||||
// }
|
||||
// smem lanes-to-banks crossbar
|
||||
val smemXbar = LazyModule(new TLXbar)
|
||||
smemNodes.foreach(smemXbar.node := _)
|
||||
// smemBanks.foreach(_.node := smemXbar.node)
|
||||
|
||||
val base = p(GPUMemory()) match {
|
||||
case Some(GPUMemParams(baseAddr, _)) => baseAddr
|
||||
case _ => BigInt(0)
|
||||
@@ -361,32 +341,32 @@ class RadianceTile private (
|
||||
|
||||
// ROCC
|
||||
// TODO: parametrize
|
||||
val gemmini = LazyModule(new Gemmini(GemminiFPConfigs.FP32DefaultConfig.copy(
|
||||
has_training_convs = false,
|
||||
has_max_pool = false,
|
||||
use_tl_ext_mem = true,
|
||||
tl_ext_mem_base = x"ff000000",
|
||||
sp_singleported = false,
|
||||
spad_read_delay = 8,
|
||||
use_shared_ext_mem = true,
|
||||
acc_sub_banks = 1,
|
||||
has_normalizations = false,
|
||||
sp_capacity = CapacityInKilobytes(16),
|
||||
acc_capacity = CapacityInKilobytes(8),
|
||||
)))
|
||||
val roccs: Seq[LazyRoCC] = Seq(gemmini)
|
||||
tlMasterXbar.node :=* AddressOrNode(base) :=* gemmini.atlNode
|
||||
tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
||||
// val gemmini = LazyModule(new Gemmini(GemminiFPConfigs.FP32DefaultConfig.copy(
|
||||
// has_training_convs = false,
|
||||
// has_max_pool = false,
|
||||
// use_tl_ext_mem = true,
|
||||
// tl_ext_mem_base = x"ff000000",
|
||||
// sp_singleported = false,
|
||||
// spad_read_delay = 8,
|
||||
// use_shared_ext_mem = true,
|
||||
// acc_sub_banks = 1,
|
||||
// has_normalizations = false,
|
||||
// sp_capacity = CapacityInKilobytes(16),
|
||||
// acc_capacity = CapacityInKilobytes(8),
|
||||
// )))
|
||||
// val roccs: Seq[LazyRoCC] = Seq(gemmini)
|
||||
// tlMasterXbar.node :=* AddressOrNode(base) :=* gemmini.atlNode
|
||||
// tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
||||
|
||||
// MMIO
|
||||
gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
// gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
// sharedmem access
|
||||
//
|
||||
// FIXME: gemmini spad has 16B data width; core smem interface has 4B. Need
|
||||
// to consolidate by either coalescing, or changing gemmini spad to
|
||||
// strided-by-word
|
||||
gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
||||
// gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
// TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
||||
|
||||
/* below are copied from rocket */
|
||||
|
||||
@@ -700,33 +680,33 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
// TODO: generalize for useVxCache
|
||||
if (!outer.radianceParams.useVxCache) {}
|
||||
|
||||
// RoCC
|
||||
if (outer.roccs.size > 0) {
|
||||
val (respArb, cmdRouter) = {
|
||||
val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size))
|
||||
val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p))
|
||||
outer.roccs.zipWithIndex.foreach { case (rocc, i) =>
|
||||
// ptwPorts ++= rocc.module.io.ptw
|
||||
rocc.module.io.ptw <> DontCare
|
||||
rocc.module.io.mem <> DontCare
|
||||
rocc.module.io.cmd <> cmdRouter.io.out(i)
|
||||
respArb.io.in(i) <> Queue(rocc.module.io.resp)
|
||||
}
|
||||
// Create this FPU just for RoCC
|
||||
// val nFPUPorts = outer.roccs.filter(_.usesFPU).size
|
||||
val fp_rocc_ios = outer.roccs.map(_.module.io)
|
||||
fp_rocc_ios.map { io =>
|
||||
io.fpu_req.ready := false.B
|
||||
io.fpu_resp.valid := false.B
|
||||
io.fpu_resp.bits := DontCare
|
||||
}
|
||||
(respArb, cmdRouter)
|
||||
}
|
||||
// // RoCC
|
||||
// if (outer.roccs.size > 0) {
|
||||
// val (respArb, cmdRouter) = {
|
||||
// val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size))
|
||||
// val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p))
|
||||
// outer.roccs.zipWithIndex.foreach { case (rocc, i) =>
|
||||
// // ptwPorts ++= rocc.module.io.ptw
|
||||
// rocc.module.io.ptw <> DontCare
|
||||
// rocc.module.io.mem <> DontCare
|
||||
// rocc.module.io.cmd <> cmdRouter.io.out(i)
|
||||
// respArb.io.in(i) <> Queue(rocc.module.io.resp)
|
||||
// }
|
||||
// // Create this FPU just for RoCC
|
||||
// // val nFPUPorts = outer.roccs.filter(_.usesFPU).size
|
||||
// val fp_rocc_ios = outer.roccs.map(_.module.io)
|
||||
// fp_rocc_ios.map { io =>
|
||||
// io.fpu_req.ready := false.B
|
||||
// io.fpu_resp.valid := false.B
|
||||
// io.fpu_resp.bits := DontCare
|
||||
// }
|
||||
// (respArb, cmdRouter)
|
||||
// }
|
||||
|
||||
cmdRouter.io.in <> DontCare
|
||||
outer.roccs.foreach(_.module.io.exception := DontCare)
|
||||
respArb.io.out <> DontCare
|
||||
}
|
||||
// cmdRouter.io.in <> DontCare
|
||||
// outer.roccs.foreach(_.module.io.exception := DontCare)
|
||||
// respArb.io.out <> DontCare
|
||||
// }
|
||||
}
|
||||
|
||||
// Some @copypaste from CoalescerSourceGen.
|
||||
|
||||
Reference in New Issue
Block a user