Tie tile smem ports together using Xbars; comment-out Gemmini spad
TODO pull Gemmini out to the cluster as well
This commit is contained in:
@@ -34,26 +34,52 @@ class RadianceCluster (
|
|||||||
|
|
||||||
clbus.clockGroupNode := allClockGroupsNode
|
clbus.clockGroupNode := allClockGroupsNode
|
||||||
|
|
||||||
val numLsuLanes = 4
|
// Instantiate cluster-local shared memory scratchpad
|
||||||
|
//
|
||||||
|
// Instantiate the same number of banks as there are lanes.
|
||||||
|
val numLsuLanes = 4 // FIXME: hardcoded
|
||||||
val wordSize = 4
|
val wordSize = 4
|
||||||
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||||
// Banked-by-word (4 bytes)
|
// Banked-by-word (4 bytes)
|
||||||
// base for bank 1: ff...000000|01|00
|
// base for bank 1: ff...000000|01|00
|
||||||
// mask for bank 1; 00...111111|00|11
|
// mask for bank 1; 00...111111|00|11
|
||||||
// val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
|
||||||
// val mask = 0x00001fffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
|
||||||
val base = 0xff000000L | (bankId * wordSize)
|
val base = 0xff000000L | (bankId * wordSize)
|
||||||
val mask = 0x00ffffffL ^ ((numLsuLanes - 1) * wordSize)
|
val mask = 0x00001fffL ^ ((numLsuLanes - 1) * wordSize)
|
||||||
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = wordSize))
|
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = wordSize))
|
||||||
}
|
}
|
||||||
smemBanks.foreach(_.node := clbus.outwardNode)
|
smemBanks.foreach(_.node := clbus.outwardNode)
|
||||||
|
|
||||||
println(s"===== Cluster: nTotalTiles = ${nTotalTiles}")
|
// HACK: This is a work around the normal bus connecting API by downcasting
|
||||||
println(s"===== Cluster: nLeafTiles = ${nLeafTiles}")
|
// tile and directly accessing the node inside that is not exposed as a
|
||||||
|
// master in HierarchicalElementCrossingParamsLike.
|
||||||
|
val tile = leafTiles(0).asInstanceOf[RadianceTile]
|
||||||
|
val perSmemPortXbars = Seq.fill(tile.smemNodes.size) { LazyModule(new TLXbar) }
|
||||||
|
|
||||||
leafTiles.map { case (id, tile: RadianceTile) =>
|
// Tie corresponding smem ports from every tile into a single port using
|
||||||
println(s"======= RadianceCluster: connecting cluster ${id} to clbus")
|
// Xbars so that the number of ports going into the sharedmem do not scale
|
||||||
clbus.inwardNode :=* tile.smemXbar.node
|
// with the number of tiles.
|
||||||
// clbus.inwardNode :=* tile.smemNodes(0)
|
leafTiles.foreach { case (id, tile: RadianceTile) =>
|
||||||
|
(perSmemPortXbars zip tile.smemNodes).foreach {
|
||||||
|
case (xbar, node) => xbar.node := node
|
||||||
|
}
|
||||||
|
// tile.smemNodes.foreach (clbus.inwardNode := _)
|
||||||
|
}
|
||||||
|
perSmemPortXbars.foreach { clbus.inwardNode := _.node }
|
||||||
|
|
||||||
|
override lazy val module = new RadianceClusterModuleImp(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(outer) {
|
||||||
|
outer.leafTiles.foreach { case (id, tile: RadianceTile) =>
|
||||||
|
// println(s"======= RadianceCluster: tile.smemXbar.node.edge = ${tile.smemXbar.node.out.size}")
|
||||||
|
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.size}")
|
||||||
|
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
||||||
|
}
|
||||||
|
|
||||||
|
outer.perSmemPortXbars(0).node.out(0)._2.slave.slaves(0).address.foreach { addrSet =>
|
||||||
|
println(s"====== perSmemPortXbars(0).slaves(0).addr: ${addrSet.toString()}")
|
||||||
|
}
|
||||||
|
outer.perSmemPortXbars(0).node.out(0)._2.master.masters(0).visibility.foreach { addrSet =>
|
||||||
|
println(s"====== perSmemPortXbars(0).masters(0).addr: ${addrSet.toString()}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -143,8 +143,8 @@ class RadianceTile private (
|
|||||||
"SIMTCoreKey not defined; make sure to use WithSimtConfig when using RadianceTile"
|
"SIMTCoreKey not defined; make sure to use WithSimtConfig when using RadianceTile"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NOTE: when changing these, remember to change +define+NUM_THREADS/WARPS in
|
// NOTE: when changing these, remember to change +define+NUM_CORES/THREADS/WARPS in
|
||||||
// EXTRA_SIM_PREPROC_DEFINES as well!
|
// radiance.mk as well!
|
||||||
val numWarps = p(SIMTCoreKey) match {
|
val numWarps = p(SIMTCoreKey) match {
|
||||||
case Some(simtParam) => simtParam.nWarps
|
case Some(simtParam) => simtParam.nWarps
|
||||||
case None => 4
|
case None => 4
|
||||||
@@ -303,9 +303,6 @@ class RadianceTile private (
|
|||||||
// Conditionally instantiate L1 cache
|
// Conditionally instantiate L1 cache
|
||||||
val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match {
|
val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match {
|
||||||
case Some(vortexL1Config) => {
|
case Some(vortexL1Config) => {
|
||||||
println(
|
|
||||||
s"============ Using Vortex L1 cache ================="
|
|
||||||
)
|
|
||||||
// require(
|
// require(
|
||||||
// p(CoalescerKey).isDefined,
|
// p(CoalescerKey).isDefined,
|
||||||
// "Vortex L1 configuration currently only works when coalescer is also enabled."
|
// "Vortex L1 configuration currently only works when coalescer is also enabled."
|
||||||
@@ -328,23 +325,6 @@ class RadianceTile private (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Instantiate sharedmem banks
|
|
||||||
//
|
|
||||||
// Instantiate the same number of banks as there are lanes.
|
|
||||||
// TODO: parametrize
|
|
||||||
// val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
|
||||||
// // Banked-by-word (4 bytes)
|
|
||||||
// // base for bank 1: ff...000000|01|00
|
|
||||||
// // mask for bank 1; 00...111111|00|11
|
|
||||||
// val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
|
||||||
// val mask = 0x00001fffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
|
||||||
// LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
|
||||||
// }
|
|
||||||
// smem lanes-to-banks crossbar
|
|
||||||
val smemXbar = LazyModule(new TLXbar)
|
|
||||||
smemNodes.foreach(smemXbar.node := _)
|
|
||||||
// smemBanks.foreach(_.node := smemXbar.node)
|
|
||||||
|
|
||||||
val base = p(GPUMemory()) match {
|
val base = p(GPUMemory()) match {
|
||||||
case Some(GPUMemParams(baseAddr, _)) => baseAddr
|
case Some(GPUMemParams(baseAddr, _)) => baseAddr
|
||||||
case _ => BigInt(0)
|
case _ => BigInt(0)
|
||||||
@@ -361,32 +341,32 @@ class RadianceTile private (
|
|||||||
|
|
||||||
// ROCC
|
// ROCC
|
||||||
// TODO: parametrize
|
// TODO: parametrize
|
||||||
val gemmini = LazyModule(new Gemmini(GemminiFPConfigs.FP32DefaultConfig.copy(
|
// val gemmini = LazyModule(new Gemmini(GemminiFPConfigs.FP32DefaultConfig.copy(
|
||||||
has_training_convs = false,
|
// has_training_convs = false,
|
||||||
has_max_pool = false,
|
// has_max_pool = false,
|
||||||
use_tl_ext_mem = true,
|
// use_tl_ext_mem = true,
|
||||||
tl_ext_mem_base = x"ff000000",
|
// tl_ext_mem_base = x"ff000000",
|
||||||
sp_singleported = false,
|
// sp_singleported = false,
|
||||||
spad_read_delay = 8,
|
// spad_read_delay = 8,
|
||||||
use_shared_ext_mem = true,
|
// use_shared_ext_mem = true,
|
||||||
acc_sub_banks = 1,
|
// acc_sub_banks = 1,
|
||||||
has_normalizations = false,
|
// has_normalizations = false,
|
||||||
sp_capacity = CapacityInKilobytes(16),
|
// sp_capacity = CapacityInKilobytes(16),
|
||||||
acc_capacity = CapacityInKilobytes(8),
|
// acc_capacity = CapacityInKilobytes(8),
|
||||||
)))
|
// )))
|
||||||
val roccs: Seq[LazyRoCC] = Seq(gemmini)
|
// val roccs: Seq[LazyRoCC] = Seq(gemmini)
|
||||||
tlMasterXbar.node :=* AddressOrNode(base) :=* gemmini.atlNode
|
// tlMasterXbar.node :=* AddressOrNode(base) :=* gemmini.atlNode
|
||||||
tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
// tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
||||||
|
|
||||||
// MMIO
|
// MMIO
|
||||||
gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
// gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
||||||
// sharedmem access
|
// sharedmem access
|
||||||
//
|
//
|
||||||
// FIXME: gemmini spad has 16B data width; core smem interface has 4B. Need
|
// FIXME: gemmini spad has 16B data width; core smem interface has 4B. Need
|
||||||
// to consolidate by either coalescing, or changing gemmini spad to
|
// to consolidate by either coalescing, or changing gemmini spad to
|
||||||
// strided-by-word
|
// strided-by-word
|
||||||
gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
// gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
||||||
TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
// TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
||||||
|
|
||||||
/* below are copied from rocket */
|
/* below are copied from rocket */
|
||||||
|
|
||||||
@@ -700,33 +680,33 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
|||||||
// TODO: generalize for useVxCache
|
// TODO: generalize for useVxCache
|
||||||
if (!outer.radianceParams.useVxCache) {}
|
if (!outer.radianceParams.useVxCache) {}
|
||||||
|
|
||||||
// RoCC
|
// // RoCC
|
||||||
if (outer.roccs.size > 0) {
|
// if (outer.roccs.size > 0) {
|
||||||
val (respArb, cmdRouter) = {
|
// val (respArb, cmdRouter) = {
|
||||||
val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size))
|
// val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size))
|
||||||
val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p))
|
// val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p))
|
||||||
outer.roccs.zipWithIndex.foreach { case (rocc, i) =>
|
// outer.roccs.zipWithIndex.foreach { case (rocc, i) =>
|
||||||
// ptwPorts ++= rocc.module.io.ptw
|
// // ptwPorts ++= rocc.module.io.ptw
|
||||||
rocc.module.io.ptw <> DontCare
|
// rocc.module.io.ptw <> DontCare
|
||||||
rocc.module.io.mem <> DontCare
|
// rocc.module.io.mem <> DontCare
|
||||||
rocc.module.io.cmd <> cmdRouter.io.out(i)
|
// rocc.module.io.cmd <> cmdRouter.io.out(i)
|
||||||
respArb.io.in(i) <> Queue(rocc.module.io.resp)
|
// respArb.io.in(i) <> Queue(rocc.module.io.resp)
|
||||||
}
|
// }
|
||||||
// Create this FPU just for RoCC
|
// // Create this FPU just for RoCC
|
||||||
// val nFPUPorts = outer.roccs.filter(_.usesFPU).size
|
// // val nFPUPorts = outer.roccs.filter(_.usesFPU).size
|
||||||
val fp_rocc_ios = outer.roccs.map(_.module.io)
|
// val fp_rocc_ios = outer.roccs.map(_.module.io)
|
||||||
fp_rocc_ios.map { io =>
|
// fp_rocc_ios.map { io =>
|
||||||
io.fpu_req.ready := false.B
|
// io.fpu_req.ready := false.B
|
||||||
io.fpu_resp.valid := false.B
|
// io.fpu_resp.valid := false.B
|
||||||
io.fpu_resp.bits := DontCare
|
// io.fpu_resp.bits := DontCare
|
||||||
}
|
// }
|
||||||
(respArb, cmdRouter)
|
// (respArb, cmdRouter)
|
||||||
}
|
// }
|
||||||
|
|
||||||
cmdRouter.io.in <> DontCare
|
// cmdRouter.io.in <> DontCare
|
||||||
outer.roccs.foreach(_.module.io.exception := DontCare)
|
// outer.roccs.foreach(_.module.io.exception := DontCare)
|
||||||
respArb.io.out <> DontCare
|
// respArb.io.out <> DontCare
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some @copypaste from CoalescerSourceGen.
|
// Some @copypaste from CoalescerSourceGen.
|
||||||
|
|||||||
Reference in New Issue
Block a user