add imp component to rad smem, add core serialized access, update 2p smem
This commit is contained in:
81
src/main/scala/radiance/memory/SyncMem.scala
Normal file
81
src/main/scala/radiance/memory/SyncMem.scala
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
package radiance.memory
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.util._
|
||||||
|
import midas.targetutils.SynthesizePrintf
|
||||||
|
|
||||||
|
// modified from gemmini's two port sync mem
|
||||||
|
class TwoPortSyncMem[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8) extends Module {
|
||||||
|
val maskWidth = t.getWidth / maskedUnitWidth
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val waddr = Input(UInt((log2Ceil(n) max 1).W))
|
||||||
|
val raddr = Input(UInt((log2Ceil(n) max 1).W))
|
||||||
|
val wdata = Input(t)
|
||||||
|
val rdata = Output(t)
|
||||||
|
val wen = Input(Bool())
|
||||||
|
val ren = Input(Bool())
|
||||||
|
val mask = Input(UInt(maskWidth.W))
|
||||||
|
})
|
||||||
|
|
||||||
|
when (io.wen && io.ren && io.raddr === io.waddr) {
|
||||||
|
SynthesizePrintf(printf("WARNING: read and write collided at address 0x%x\n", io.raddr))
|
||||||
|
}
|
||||||
|
|
||||||
|
val maskElem = UInt(maskedUnitWidth.W)
|
||||||
|
val memT = Vec(maskWidth, maskElem)
|
||||||
|
val mem = SyncReadMem(n, memT, SyncReadMem.WriteFirst)
|
||||||
|
|
||||||
|
io.rdata := mem.read(io.raddr, io.ren).asTypeOf(t)
|
||||||
|
|
||||||
|
when (io.wen) {
|
||||||
|
mem.write(io.waddr, io.wdata.asTypeOf(memT), io.mask.asBools)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class TwoReadOneWriteSyncMem[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8) extends Module {
|
||||||
|
val maskWidth = t.getWidth / maskedUnitWidth
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val waddr = Input(UInt((log2Ceil(n) max 1).W))
|
||||||
|
val raddr0 = Input(UInt((log2Ceil(n) max 1).W))
|
||||||
|
val raddr1 = Input(UInt((log2Ceil(n) max 1).W))
|
||||||
|
val wdata = Input(t)
|
||||||
|
val rdata0 = Output(t)
|
||||||
|
val rdata1 = Output(t)
|
||||||
|
val wen = Input(Bool())
|
||||||
|
val ren0 = Input(Bool())
|
||||||
|
val ren1 = Input(Bool())
|
||||||
|
val mask = Input(UInt(maskWidth.W))
|
||||||
|
})
|
||||||
|
|
||||||
|
when (io.wen && io.ren0 && io.raddr0 === io.waddr) {
|
||||||
|
SynthesizePrintf(printf("WARNING: read0 and write collided at address 0x%x\n", io.raddr0))
|
||||||
|
}
|
||||||
|
when (io.wen && io.ren1 && io.raddr1 === io.waddr) {
|
||||||
|
SynthesizePrintf(printf("WARNING: read1 and write collided at address 0x%x\n", io.raddr1))
|
||||||
|
}
|
||||||
|
|
||||||
|
val maskElem = UInt(maskedUnitWidth.W)
|
||||||
|
val memT = Vec(maskWidth, maskElem)
|
||||||
|
val mem0 = SyncReadMem(n, memT, SyncReadMem.WriteFirst)
|
||||||
|
val mem1 = SyncReadMem(n, memT, SyncReadMem.WriteFirst)
|
||||||
|
|
||||||
|
io.rdata0 := mem0.read(io.raddr0, io.ren0).asTypeOf(t)
|
||||||
|
io.rdata1 := mem1.read(io.raddr1, io.ren1).asTypeOf(t)
|
||||||
|
|
||||||
|
when (io.wen) {
|
||||||
|
mem0.write(io.waddr, io.wdata.asTypeOf(memT), io.mask.asBools)
|
||||||
|
mem1.write(io.waddr, io.wdata.asTypeOf(memT), io.mask.asBools)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
object TwoPortSyncMem {
|
||||||
|
def apply[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8): TwoPortSyncMem[T] = {
|
||||||
|
Module(new TwoPortSyncMem[T](n, t, maskedUnitWidth))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object TwoReadOneWriteSyncMem {
|
||||||
|
def apply[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8): TwoReadOneWriteSyncMem[T] = {
|
||||||
|
Module(new TwoReadOneWriteSyncMem[T](n, t, maskedUnitWidth))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -20,17 +20,22 @@ object ExtPolicyNodeImp extends SimpleNodeImp[Int, Int, Int, ExtPolicyBundle] {
|
|||||||
case class ExtPolicyMasterNode(w: Int)(implicit valName: ValName) extends SourceNode(ExtPolicyNodeImp)(Seq(w))
|
case class ExtPolicyMasterNode(w: Int)(implicit valName: ValName) extends SourceNode(ExtPolicyNodeImp)(Seq(w))
|
||||||
case class ExtPolicySlaveNode()(implicit valName: ValName) extends SinkNode(ExtPolicyNodeImp)(Seq(0))
|
case class ExtPolicySlaveNode()(implicit valName: ValName) extends SinkNode(ExtPolicyNodeImp)(Seq(0))
|
||||||
|
|
||||||
class XbarWithExtPolicy(nameSuffix: Option[String] = None)
|
class XbarWithExtPolicy(nameSuffix: Option[String] = None, useFallback: Boolean = true)
|
||||||
(implicit p: Parameters) extends TLXbar(nameSuffix = nameSuffix) {
|
(implicit p: Parameters) extends TLXbar(nameSuffix = nameSuffix) {
|
||||||
val policySlaveNode = ExtPolicySlaveNode()
|
val policySlaveNode = ExtPolicySlaveNode()
|
||||||
|
|
||||||
class ImplChild extends Impl {
|
class ImplChild extends Impl {
|
||||||
val policy: TLArbiter.Policy = (width, valids, select) => {
|
val policy: TLArbiter.Policy = (width, valids, select) => {
|
||||||
val in = policySlaveNode.in.head._1
|
val in = policySlaveNode.in.head._1
|
||||||
val hintHit = (valids & in.hint).orR
|
|
||||||
val fallback = TLArbiter.lowestIndexFirst(width, valids, !hintHit && select)
|
|
||||||
in.actual := select.asTypeOf(in.actual.cloneType)
|
in.actual := select.asTypeOf(in.actual.cloneType)
|
||||||
Mux(hintHit, in.hint, fallback)
|
|
||||||
|
if (useFallback) {
|
||||||
|
val hintHit = (valids & in.hint).orR
|
||||||
|
val fallback = TLArbiter.lowestIndexFirst(width, valids, !hintHit && select)
|
||||||
|
Mux(hintHit, in.hint, fallback)
|
||||||
|
} else {
|
||||||
|
in.hint
|
||||||
|
}
|
||||||
}
|
}
|
||||||
TLXbar.circuit(policy, node.in, node.out)
|
TLXbar.circuit(policy, node.in, node.out)
|
||||||
}
|
}
|
||||||
@@ -44,4 +49,14 @@ object XbarWithExtPolicy {
|
|||||||
val xbar = LazyModule(new XbarWithExtPolicy(nameSuffix))
|
val xbar = LazyModule(new XbarWithExtPolicy(nameSuffix))
|
||||||
xbar
|
xbar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
object XbarWithExtPolicyNoFallback {
|
||||||
|
def apply(nameSuffix: Option[String] = None)
|
||||||
|
(implicit p: Parameters): (XbarWithExtPolicy, TLIdentityNode) = {
|
||||||
|
val inIdNode = TLIdentityNode()
|
||||||
|
val xbar = LazyModule(new XbarWithExtPolicy(nameSuffix, false))
|
||||||
|
xbar.node :=* inIdNode
|
||||||
|
(xbar, inIdNode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -15,6 +15,12 @@ import radiance.tile._
|
|||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||||
|
|
||||||
|
sealed trait RadianceSmemSerialization
|
||||||
|
|
||||||
|
case object FullySerialized extends RadianceSmemSerialization
|
||||||
|
case object CoreSerialized extends RadianceSmemSerialization
|
||||||
|
case object NotSerialized extends RadianceSmemSerialization
|
||||||
|
|
||||||
case class RadianceSharedMemKey(address: BigInt,
|
case class RadianceSharedMemKey(address: BigInt,
|
||||||
size: Int,
|
size: Int,
|
||||||
numBanks: Int,
|
numBanks: Int,
|
||||||
@@ -23,7 +29,7 @@ case class RadianceSharedMemKey(address: BigInt,
|
|||||||
strideByWord: Boolean = true,
|
strideByWord: Boolean = true,
|
||||||
filterAligned: Boolean = true,
|
filterAligned: Boolean = true,
|
||||||
disableMonitors: Boolean = true,
|
disableMonitors: Boolean = true,
|
||||||
serializeUnaligned: Boolean = true)
|
serializeUnaligned: RadianceSmemSerialization = FullySerialized)
|
||||||
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
|
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
|
||||||
|
|
||||||
case class RadianceFrameBufferKey(baseAddress: BigInt,
|
case class RadianceFrameBufferKey(baseAddress: BigInt,
|
||||||
@@ -56,7 +62,7 @@ class WithRadianceCores(
|
|||||||
nTLBWays = 1,
|
nTLBWays = 1,
|
||||||
nTLBBasePageSectors = 1,
|
nTLBBasePageSectors = 1,
|
||||||
nTLBSuperpages = 1,
|
nTLBSuperpages = 1,
|
||||||
nMSHRs = 0,
|
nMSHRs = 0,
|
||||||
blockBytes = site(CacheBlockBytes))),
|
blockBytes = site(CacheBlockBytes))),
|
||||||
icache = Some(ICacheParams(
|
icache = Some(ICacheParams(
|
||||||
rowBits = site(SystemBusKey).beatBits,
|
rowBits = site(SystemBusKey).beatBits,
|
||||||
@@ -194,8 +200,8 @@ class WithRadianceSharedMem(address: BigInt,
|
|||||||
strideByWord: Boolean = true,
|
strideByWord: Boolean = true,
|
||||||
filterAligned: Boolean = true,
|
filterAligned: Boolean = true,
|
||||||
disableMonitors: Boolean = true,
|
disableMonitors: Boolean = true,
|
||||||
serializeUnaligned: Boolean = true
|
serializeUnaligned: RadianceSmemSerialization = FullySerialized
|
||||||
) extends Config((site, _, _) => {
|
) extends Config((_, _, _) => {
|
||||||
case RadianceSharedMemKey => {
|
case RadianceSharedMemKey => {
|
||||||
require(isPow2(size) && size >= 1024)
|
require(isPow2(size) && size >= 1024)
|
||||||
Some(RadianceSharedMemKey(
|
Some(RadianceSharedMemKey(
|
||||||
|
|||||||
@@ -38,7 +38,9 @@ class RadianceCluster (
|
|||||||
|
|
||||||
// TODO: this probably needs to be instantiated inside the radiance shared mem module
|
// TODO: this probably needs to be instantiated inside the radiance shared mem module
|
||||||
def virgoSharedMemComponentsGen() = new VirgoSharedMemComponents(thisClusterParams, gemminiTiles, radianceTiles)
|
def virgoSharedMemComponentsGen() = new VirgoSharedMemComponents(thisClusterParams, gemminiTiles, radianceTiles)
|
||||||
LazyModule(new RadianceSharedMem(virgoSharedMemComponentsGen, clbus)).suggestName("shared_mem")
|
def virgoSharedMemComponentsImpGen(outer: VirgoSharedMemComponents) = new VirgoSharedMemComponentsImp(outer)
|
||||||
|
LazyModule(new RadianceSharedMem(
|
||||||
|
virgoSharedMemComponentsGen, Some(virgoSharedMemComponentsImpGen(_)), clbus)).suggestName("shared_mem")
|
||||||
|
|
||||||
// direct core-accelerator connections
|
// direct core-accelerator connections
|
||||||
val smemKey = p(RadianceSharedMemKey).get
|
val smemKey = p(RadianceSharedMemKey).get
|
||||||
|
|||||||
@@ -4,14 +4,14 @@ import chisel3._
|
|||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import org.chipsalliance.diplomacy.lazymodule._
|
import org.chipsalliance.diplomacy.lazymodule._
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
import radiance.memory._
|
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes}
|
import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes}
|
||||||
|
import gemmini.Pipeline
|
||||||
import radiance.subsystem.RadianceSharedMemKey
|
import radiance.subsystem.RadianceSharedMemKey
|
||||||
import gemmini._
|
import radiance.memory._
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
|
||||||
trait RadianceSmemNodeProvider {
|
abstract class RadianceSmemNodeProvider {
|
||||||
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]]
|
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]]
|
||||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]]
|
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]]
|
||||||
val nonuniformRNodes: Seq[TLNode]
|
val nonuniformRNodes: Seq[TLNode]
|
||||||
@@ -19,8 +19,11 @@ trait RadianceSmemNodeProvider {
|
|||||||
val clBusClients: Seq[TLNode]
|
val clBusClients: Seq[TLNode]
|
||||||
}
|
}
|
||||||
|
|
||||||
class RadianceSharedMem(
|
abstract class RadianceSmemNodeProviderImp[T <: RadianceSmemNodeProvider](val outer: T) {}
|
||||||
provider: () => RadianceSmemNodeProvider,
|
|
||||||
|
class RadianceSharedMem[T <: RadianceSmemNodeProvider](
|
||||||
|
provider: () => T,
|
||||||
|
val providerImp: Option[(T) => RadianceSmemNodeProviderImp[T]],
|
||||||
clbus: TLBusWrapper
|
clbus: TLBusWrapper
|
||||||
)(implicit p: Parameters) extends LazyModule {
|
)(implicit p: Parameters) extends LazyModule {
|
||||||
val smemKey = p(RadianceSharedMemKey).get
|
val smemKey = p(RadianceSharedMemKey).get
|
||||||
@@ -31,6 +34,7 @@ class RadianceSharedMem(
|
|||||||
val smemDepth = smemKey.size / smemWidth / smemBanks
|
val smemDepth = smemKey.size / smemWidth / smemBanks
|
||||||
val smemSubbanks = smemWidth / wordSize
|
val smemSubbanks = smemWidth / wordSize
|
||||||
val smemSize = smemWidth * smemDepth * smemBanks
|
val smemSize = smemWidth * smemDepth * smemBanks
|
||||||
|
val strideByWord = smemKey.strideByWord
|
||||||
|
|
||||||
require(isPow2(smemBanks))
|
require(isPow2(smemBanks))
|
||||||
|
|
||||||
@@ -38,11 +42,7 @@ class RadianceSharedMem(
|
|||||||
val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
||||||
(smNodes.uniformRNodes, smNodes.uniformWNodes, smNodes.nonuniformRNodes, smNodes.nonuniformWNodes)
|
(smNodes.uniformRNodes, smNodes.uniformWNodes, smNodes.nonuniformRNodes, smNodes.nonuniformWNodes)
|
||||||
|
|
||||||
// TODO: move this to config
|
implicit val disableMonitors = smemKey.disableMonitors // otherwise it generate 1k+ different tl monitors
|
||||||
val strideByWord = true
|
|
||||||
val filterAligned = true
|
|
||||||
val serializeUnaligned = true
|
|
||||||
implicit val disableMonitors = true // otherwise it generate 1k+ different tl monitors
|
|
||||||
|
|
||||||
// collection of read and write managers for each sram (sub)bank
|
// collection of read and write managers for each sram (sub)bank
|
||||||
val smemBankMgrs : Seq[Seq[TLManagerNode]] = if (strideByWord) {
|
val smemBankMgrs : Seq[Seq[TLManagerNode]] = if (strideByWord) {
|
||||||
@@ -180,9 +180,11 @@ class RadianceSharedMem(
|
|||||||
lazy val module = new RadianceSharedMemImp(this)
|
lazy val module = new RadianceSharedMemImp(this)
|
||||||
}
|
}
|
||||||
|
|
||||||
class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer) {
|
class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedMem[T]) extends LazyModuleImp(outer) {
|
||||||
|
|
||||||
def makeBuffer[T <: Data](mem: TwoPortSyncMem[T], rNode: TLBundle, rEdge: TLEdgeIn,
|
val smNodesImp = outer.providerImp.map(impFn => impFn(outer.smNodes))
|
||||||
|
|
||||||
|
def makeBuffer[U <: Data](mem: TwoPortSyncMem[U], rNode: TLBundle, rEdge: TLEdgeIn,
|
||||||
wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
|
wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
|
||||||
mem.io.ren := rNode.a.fire
|
mem.io.ren := rNode.a.fire
|
||||||
|
|
||||||
@@ -240,7 +242,7 @@ class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer
|
|||||||
// WRITE
|
// WRITE
|
||||||
mem.io.wen := RegNext(wNode.a.fire)
|
mem.io.wen := RegNext(wNode.a.fire)
|
||||||
mem.io.wdata := RegNext(wNode.a.bits.data)
|
mem.io.wdata := RegNext(wNode.a.bits.data)
|
||||||
mem.io.mask := RegNext(VecInit(wNode.a.bits.mask.asBools))
|
mem.io.mask := RegNext(wNode.a.bits.mask)
|
||||||
|
|
||||||
val writeResp = Wire(Flipped(wNode.d.cloneType))
|
val writeResp = Wire(Flipped(wNode.d.cloneType))
|
||||||
writeResp.bits := wEdge.AccessAck(wNode.a.bits)
|
writeResp.bits := wEdge.AccessAck(wNode.a.bits)
|
||||||
@@ -286,7 +288,6 @@ class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer
|
|||||||
val mem = TwoPortSyncMem(
|
val mem = TwoPortSyncMem(
|
||||||
n = memDepth,
|
n = memDepth,
|
||||||
t = UInt((wordWidth * 8).W),
|
t = UInt((wordWidth * 8).W),
|
||||||
mask_len = wordWidth // byte level mask
|
|
||||||
)
|
)
|
||||||
// TODO: bring in cluster id
|
// TODO: bring in cluster id
|
||||||
// mem.suggestName(s"rad_smem_cl${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
// mem.suggestName(s"rad_smem_cl${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
||||||
@@ -346,7 +347,6 @@ class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer
|
|||||||
val mem = TwoPortSyncMem(
|
val mem = TwoPortSyncMem(
|
||||||
n = memDepth,
|
n = memDepth,
|
||||||
t = UInt((memWidth * 8).W),
|
t = UInt((memWidth * 8).W),
|
||||||
mask_len = memWidth // byte level mask
|
|
||||||
)
|
)
|
||||||
|
|
||||||
val (rNode, rEdge) = r.in.head
|
val (rNode, rEdge) = r.in.head
|
||||||
|
|||||||
@@ -9,8 +9,9 @@ import radiance.memory._
|
|||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.diplomacy.{AddressSet, BufferParams}
|
import freechips.rocketchip.diplomacy.{AddressSet, BufferParams}
|
||||||
import freechips.rocketchip.subsystem.BaseClusterParams
|
import freechips.rocketchip.subsystem.BaseClusterParams
|
||||||
import radiance.subsystem.RadianceSharedMemKey
|
import radiance.subsystem.{CoreSerialized, FullySerialized, NotSerialized, RadianceSharedMemKey}
|
||||||
import gemmini._
|
import gemmini._
|
||||||
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
|
||||||
// virgo-specific tilelink nodes
|
// virgo-specific tilelink nodes
|
||||||
// generic smem implementation is in RadianceSharedMem.scala
|
// generic smem implementation is in RadianceSharedMem.scala
|
||||||
@@ -28,6 +29,9 @@ class VirgoSharedMemComponents(
|
|||||||
val smemSubbanks = smemWidth / wordSize
|
val smemSubbanks = smemWidth / wordSize
|
||||||
val smemSize = smemWidth * smemDepth * smemBanks
|
val smemSize = smemWidth * smemDepth * smemBanks
|
||||||
|
|
||||||
|
val numCores = radianceTiles.length
|
||||||
|
val numLanes = radianceTiles.head.numLsuLanes
|
||||||
|
|
||||||
val gemminis = gemminiTiles.map(_.gemmini)
|
val gemminis = gemminiTiles.map(_.gemmini)
|
||||||
val gemminiConfigs = gemminis.map(_.config)
|
val gemminiConfigs = gemminis.map(_.config)
|
||||||
gemminiConfigs.foreach { config =>
|
gemminiConfigs.foreach { config =>
|
||||||
@@ -55,11 +59,25 @@ class VirgoSharedMemComponents(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
val tcNodeFanouts = radianceTiles.flatMap(_.tcSmemNodes)
|
val tcNodeFanouts = radianceTiles.flatMap(_.tcSmemNodes)
|
||||||
.map(connectOne(_, () => TLBuffer(BufferParams(2, false, false), BufferParams(0))))
|
// .map(connectOne(_, () => TLBuffer(BufferParams(2, false, false), BufferParams(0))))
|
||||||
.map(connectXbarName(_, Some("tc_fanout")))
|
.map(connectXbarName(_, Some("tc_fanout")))
|
||||||
val clBusClients: Seq[TLNode] = radianceSmemFanout
|
val clBusClients: Seq[TLNode] = radianceSmemFanout
|
||||||
|
|
||||||
val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
// convert to monad (very fancy)
|
||||||
|
val coreSerialOpt: Option[Unit] = serializeUnaligned match {
|
||||||
|
case CoreSerialized => Some(())
|
||||||
|
case _ => None
|
||||||
|
}
|
||||||
|
|
||||||
|
// uniform mux select for selecting lanes from a single core in unison
|
||||||
|
val coreSerialPolicy = coreSerialOpt.map(_ => Seq.fill(2)(Seq.fill(numLanes)(ExtPolicyMasterNode(numCores))))
|
||||||
|
val laneSerialXbars = coreSerialOpt.map(_ => Seq.tabulate(2) { rw =>
|
||||||
|
Seq.tabulate(numLanes) { lid =>
|
||||||
|
XbarWithExtPolicyNoFallback(Some(f"lane_${lid}_serial_in_xbar_$rw"))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
override val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
||||||
|
|
||||||
if (strideByWord) {
|
if (strideByWord) {
|
||||||
def distAndDuplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = {
|
def distAndDuplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = {
|
||||||
@@ -93,57 +111,35 @@ class VirgoSharedMemComponents(
|
|||||||
Seq.fill(smemSubbanks)(tcBank.map(connectOne(_, () => TLBuffer(BufferParams(2, false, false)))).map(connectXbarName(_, Some("tc_dist_fanout"))))
|
Seq.fill(smemSubbanks)(tcBank.map(connectOne(_, () => TLBuffer(BufferParams(2, false, false)))).map(connectXbarName(_, Some("tc_dist_fanout"))))
|
||||||
} // (banks, subbanks, tc client)
|
} // (banks, subbanks, tc client)
|
||||||
|
|
||||||
|
val unalignedRWNodes: ArrayBuffer[ArrayBuffer[TLNexusNode]] = // mutable for readability
|
||||||
|
ArrayBuffer.fill(numLanes)(ArrayBuffer.fill(numCores)(null))
|
||||||
|
|
||||||
if (filterAligned) {
|
if (filterAligned) {
|
||||||
val numLsuLanes = radianceTiles.head.numLsuLanes
|
val numLaneDupes = Math.max(1, smemSubbanks / numLanes)
|
||||||
val numLaneDupes = Math.max(1, smemSubbanks / numLsuLanes)
|
val filterRange = Math.min(smemSubbanks, numLanes)
|
||||||
val filterRange = Math.min(smemSubbanks, numLsuLanes)
|
|
||||||
println(s"num_lsu_lanes ${numLsuLanes} num_lane_dupes ${numLaneDupes} filter_range ${filterRange}")
|
|
||||||
|
|
||||||
// (subbank, sources, aligned) = rw node
|
// (subbank, sources) = rw node
|
||||||
val (fAligned, fUnaligned) = if (numLsuLanes >= smemSubbanks) {
|
val fAligned = if (numLanes >= smemSubbanks) {
|
||||||
val filterNodes: Seq[Seq[(TLNode, TLNode)]] = Seq.tabulate(numLaneDupes) { did =>
|
val filterNodes: Seq[Seq[TLNode]] = Seq.tabulate(filterRange) { wid =>
|
||||||
Seq.tabulate(filterRange) { wid =>
|
val address = AddressSet(smemBase + wordSize * wid, (smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||||
val trueWid = did * filterRange + wid
|
|
||||||
val address = AddressSet(smemBase + wordSize * trueWid, (smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
|
||||||
|
|
||||||
radianceSmemFanout.grouped(numLsuLanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
|
radianceSmemFanout.grouped(numLanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
|
||||||
lanes.zipWithIndex.flatMap { case (lane, lid) =>
|
lanes.zipWithIndex.flatMap { case (lane, lid) =>
|
||||||
if ((lid % filterRange) == wid) {
|
if ((lid % filterRange) == wid) {
|
||||||
println(f"c${cid}_l${lid} connected to d${did}w${wid}")
|
val filterNode = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${wid}"))
|
||||||
val filterNode = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${trueWid}"))
|
DisableMonitors { implicit p => filterNode := lane }
|
||||||
DisableMonitors { implicit p => filterNode := lane }
|
|
||||||
// Seq((aligned splitter, unaligned splitter))
|
unalignedRWNodes(lid)(cid) = connectOne(filterNode, () =>
|
||||||
Seq((
|
RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}"))
|
||||||
connectOne(filterNode, () =>
|
|
||||||
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${trueWid}")),
|
Seq(connectOne(filterNode, () =>
|
||||||
connectOne(filterNode, () =>
|
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${wid}")))
|
||||||
RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}"))
|
} else Seq()
|
||||||
))
|
|
||||||
} else Seq()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}.flatten
|
|
||||||
|
|
||||||
val fAligned = Seq.fill(2)(filterNodes.map(_.map(_._1).map(connectXbarName(_, Some("rad_aligned")))))
|
|
||||||
val fUnaligned = if (serializeUnaligned) {
|
|
||||||
Seq.fill(2) {
|
|
||||||
val serializedNode = TLEphemeralNode()
|
|
||||||
val serializedInXbar = LazyModule(new TLXbar())
|
|
||||||
val serializedOutXbar = LazyModule(new TLXbar())
|
|
||||||
serializedInXbar.suggestName("unaligned_serialized_in_xbar")
|
|
||||||
serializedOutXbar.suggestName("unaligned_serialized_out_xbar")
|
|
||||||
guardMonitors { implicit p =>
|
|
||||||
filterNodes.foreach(_.map(_._2).foreach(serializedInXbar.node := _))
|
|
||||||
serializedNode := serializedInXbar.node
|
|
||||||
serializedOutXbar.node := serializedNode
|
|
||||||
}
|
|
||||||
Seq(serializedOutXbar.node)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Seq.fill(2)(filterNodes.flatMap(_.map(_._2).map(connectXbar.apply)))
|
|
||||||
}
|
}
|
||||||
(fAligned, fUnaligned)
|
|
||||||
|
Seq.fill(2)(filterNodes.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
||||||
} else { // aligned: (subbanks, cores) = rw node
|
} else { // aligned: (subbanks, cores) = rw node
|
||||||
// (lanes, cores) = filter_node
|
// (lanes, cores) = filter_node
|
||||||
val filterNodes = Seq.tabulate(filterRange) { wid =>
|
val filterNodes = Seq.tabulate(filterRange) { wid =>
|
||||||
@@ -151,7 +147,7 @@ class VirgoSharedMemComponents(
|
|||||||
AddressSet(smemBase + (did * filterRange + wid) * wordSize,
|
AddressSet(smemBase + (did * filterRange + wid) * wordSize,
|
||||||
(smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
(smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||||
}
|
}
|
||||||
radianceSmemFanout.grouped(numLsuLanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
|
radianceSmemFanout.grouped(numLanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
|
||||||
val lane = lanes(wid)
|
val lane = lanes(wid)
|
||||||
val filterNode = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"))
|
val filterNode = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"))
|
||||||
guardMonitors { implicit p =>
|
guardMonitors { implicit p =>
|
||||||
@@ -169,29 +165,40 @@ class VirgoSharedMemComponents(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}.flatten
|
}.flatten
|
||||||
val fUnalignedRW = filterNodes.zipWithIndex.flatMap { case (cores, lid) =>
|
filterNodes.zipWithIndex.foreach { case (cores, lid) =>
|
||||||
cores.zipWithIndex.map { case (fn, cid) =>
|
cores.zipWithIndex.foreach { case (fn, cid) =>
|
||||||
connectOne(fn, () => RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
|
unalignedRWNodes(lid)(cid) = connectOne(fn, () =>
|
||||||
|
RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val fAligned = Seq.fill(2)(fAlignedRW.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
Seq.fill(2)(fAlignedRW.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
||||||
|
}
|
||||||
|
|
||||||
val fUnaligned = if (serializeUnaligned) {
|
val fUnaligned: Seq[Seq[TLNode]] = serializeUnaligned match {
|
||||||
Seq.fill(2) {
|
case FullySerialized => Seq.fill(2) {
|
||||||
val serializedNode = TLEphemeralNode()
|
val serializedNode = TLEphemeralNode()
|
||||||
val serializedInXbar = TLXbar(nameSuffix = Some("unaligned_ser_in"))
|
val serializedInXbar = LazyModule(new TLXbar())
|
||||||
val serializedOutXbar = TLXbar(nameSuffix = Some("unaligned_ser_out"))
|
val serializedOutXbar = LazyModule(new TLXbar())
|
||||||
guardMonitors { implicit p =>
|
serializedInXbar.suggestName("unaligned_serialized_in_xbar")
|
||||||
fUnalignedRW.foreach(serializedInXbar := _)
|
serializedOutXbar.suggestName("unaligned_serialized_out_xbar")
|
||||||
serializedNode := serializedInXbar
|
guardMonitors { implicit p =>
|
||||||
serializedOutXbar := serializedNode
|
unalignedRWNodes.flatten.foreach(serializedInXbar.node := _)
|
||||||
}
|
serializedNode := serializedInXbar.node
|
||||||
Seq(serializedOutXbar)
|
serializedOutXbar.node := serializedNode
|
||||||
}
|
}
|
||||||
} else {
|
Seq(serializedOutXbar.node)
|
||||||
Seq.fill(2)(fUnalignedRW.map(connectXbar.apply))
|
|
||||||
}
|
}
|
||||||
(fAligned, fUnaligned)
|
case CoreSerialized => Seq.tabulate(2) { rw =>
|
||||||
|
// we can either have one core per lane selected (multiple mux selects)
|
||||||
|
// or strictly lanes from a single selected core (one mux select). doing the latter here
|
||||||
|
unalignedRWNodes.toSeq.zipWithIndex.map { case (coresRW, lid) =>
|
||||||
|
val laneSerialXbar = laneSerialXbars.get(rw)(lid)
|
||||||
|
laneSerialXbar._1.policySlaveNode := coreSerialPolicy.get(rw)(lid)
|
||||||
|
coresRW.foreach(laneSerialXbar._2 := _)
|
||||||
|
connectXbarName(connectOne(laneSerialXbar._1.node, TLEphemeralNode.apply), Some(s"lane_${lid}_serial_out"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case NotSerialized => Seq.fill(2)(unalignedRWNodes.toSeq.flatten.map(connectXbar.apply))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -215,6 +222,8 @@ class VirgoSharedMemComponents(
|
|||||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
|
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
|
||||||
(wb zip wsb).map { case (ww, wsw) => ww ++ wsw }
|
(wb zip wsb).map { case (ww, wsw) => ww ++ wsw }
|
||||||
}
|
}
|
||||||
|
// random accesses are not serialized here, require so
|
||||||
|
require(serializeUnaligned == NotSerialized, "when not filtering, unaligned accesses must be serialized")
|
||||||
// these nodes are random access
|
// these nodes are random access
|
||||||
val nonuniformRNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_r")))
|
val nonuniformRNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_r")))
|
||||||
val nonuniformWNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_w")))
|
val nonuniformWNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_w")))
|
||||||
@@ -242,3 +251,23 @@ class VirgoSharedMemComponents(
|
|||||||
(Seq.empty, Seq.empty, Seq(unifiedMemReadNode), Seq(unifiedMemWriteNode))
|
(Seq.empty, Seq.empty, Seq(unifiedMemReadNode), Seq(unifiedMemWriteNode))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class VirgoSharedMemComponentsImp[T <: VirgoSharedMemComponents]
|
||||||
|
(override val outer: T) extends RadianceSmemNodeProviderImp[T](outer) {
|
||||||
|
|
||||||
|
(outer.laneSerialXbars zip outer.coreSerialPolicy).foreach { case (xbarsRW, policiesRW) =>
|
||||||
|
(xbarsRW zip policiesRW).foreach { case (xbars, policies) =>
|
||||||
|
// for each lane, if any core is valid
|
||||||
|
val coreValids = xbars.map(_._2.in.map(_._1)).transpose.map { core => VecInit(core.map(_.a.valid)).asUInt.orR }
|
||||||
|
val select = xbars.map(_._2.out.map(_._1)).transpose.map { core => VecInit(core.map(_.a.ready)).asUInt.orR }
|
||||||
|
val coreSelect = TLArbiter.roundRobin(outer.numCores, VecInit(coreValids).asUInt, VecInit(select).asUInt.orR)
|
||||||
|
// TODO: roll this into XbarWithExtPolicy
|
||||||
|
xbars.foreach { lane =>
|
||||||
|
(lane._2.in.map(_._1) lazyZip lane._2.out.map(_._1) lazyZip coreSelect.asBools).foreach { case (li, lo, cs) =>
|
||||||
|
lo.a.valid := li.a.valid && cs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
policies.foreach { _.out.head._1.hint := coreSelect }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user