Files
radiance/src/main/scala/radiance/subsystem/Configs.scala
2024-09-10 15:38:12 -07:00

391 lines
14 KiB
Scala

// See LICENSE.SiFive for license details.
// See LICENSE.Berkeley for license details.
package radiance.subsystem
import chisel3._
import chisel3.util._
import org.chipsalliance.cde.config._
import freechips.rocketchip.rocket._
import freechips.rocketchip.tile._
import freechips.rocketchip.subsystem._
import gemmini._
import gemmini.Arithmetic.FloatArithmetic._
import radiance.tile._
import radiance.memory._
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
case class RadianceSharedMemKey(address: BigInt,
size: Int,
numBanks: Int,
numWords: Int,
wordSize: Int = 4,
strideByWord: Boolean = true,
filterAligned: Boolean = true,
disableMonitors: Boolean = true,
serializeUnaligned: Boolean = true)
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
case class RadianceFrameBufferKey(baseAddress: BigInt,
width: Int,
size: Int,
validAddress: BigInt,
fbName: String = "fb")
case object RadianceFrameBufferKey extends Field[Seq[RadianceFrameBufferKey]](Seq())
class WithRadianceCores(
n: Int,
location: HierarchicalLocation,
crossing: RocketCrossingParams,
tensorCoreFP16: Boolean,
useVxCache: Boolean
) extends Config((site, _, up) => {
case TilesLocated(`location`) => {
val prev = up(TilesLocated(`location`))
val idOffset = up(NumTiles)
val coreIdOffset = up(NumRadianceCores)
val vortex = RadianceTileParams(
core = VortexCoreParams(tensorCoreFP16 = tensorCoreFP16),
btb = None,
useVxCache = useVxCache,
dcache = Some(DCacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBSets = 1,
nTLBWays = 1,
nTLBBasePageSectors = 1,
nTLBSuperpages = 1,
nMSHRs = 0,
blockBytes = site(CacheBlockBytes))),
icache = Some(ICacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBSets = 1,
nTLBWays = 1,
nTLBBasePageSectors = 1,
nTLBSuperpages = 1,
blockBytes = site(CacheBlockBytes))))
List.tabulate(n)(i => RadianceTileAttachParams(
vortex.copy(
tileId = i + idOffset,
coreId = i + coreIdOffset,
),
crossing
)) ++ prev
}
case NumTiles => up(NumTiles) + n
case NumRadianceCores => up(NumRadianceCores) + n
}) {
// constructor override that omits `crossing`
def this(n: Int, location: HierarchicalLocation = InSubsystem,
tensorCoreFP16: Boolean = false, useVxCache: Boolean = false)
= this(n, location, RocketCrossingParams(
master = HierarchicalElementMasterPortParams.locationDefault(location),
slave = HierarchicalElementSlavePortParams.locationDefault(location),
mmioBaseAddressPrefixWhere = location match {
case InSubsystem => CBUS
case InCluster(clusterId) => CCBUS(clusterId)
}
), tensorCoreFP16, useVxCache)
}
object RadianceGemminiDataType extends Enumeration {
type Type = Value
val FP32, FP16, BF16, Int8 = Value
}
class WithRadianceGemmini(location: HierarchicalLocation, crossing: RocketCrossingParams,
dim: Int, accSizeInKB: Int, tileSize: Either[(Int, Int, Int), Int],
dataType: RadianceGemminiDataType.Type, dmaBytes: Int) extends Config((site, _, up) => {
case TilesLocated(`location`) => {
val prev = up(TilesLocated(`location`))
val idOffset = up(NumTiles)
if (idOffset == 0) {
// FIXME: this doesn't work for multiple clusters when idOffset may not be 0
println("******WARNING****** gemmini tile id is 0! radiance tiles in the same cluster needs to be before gemmini")
}
val numPrevGemminis = prev.map(_.tileParams).map {
case _: GemminiTileParams => 1
case _ => 0
}.sum
val smKey = site(RadianceSharedMemKey).get
val tileParams = GemminiTileParams(
gemminiConfig = {
implicit val arithmetic: Arithmetic[Float] =
Arithmetic.FloatArithmetic.asInstanceOf[Arithmetic[Float]]
dataType match {
case FP32 => GemminiFPConfigs.FP32DefaultConfig
case FP16 => GemminiFPConfigs.FP16DefaultConfig.copy(
acc_scale_args = Some(ScaleArguments(
(t: Float, u: Float) => {t},
1, Float(8, 24), -1, identity = "1.0", c_str = "((x))"
)),
mvin_scale_args = Some(ScaleArguments(
(t: Float, u: Float) => t * u,
1, Float(5, 11), -1, identity = "0x3c00", c_str="((x) * (scale))"
)),
mvin_scale_acc_args = None,
has_training_convs = false,
// hardcode_d_to_garbage_addr = true,
acc_read_full_width = false, // set to true to output fp32
)
case BF16 => GemminiFPConfigs.BF16DefaultConfig
// TODO: Int8
}}.copy(
dataflow = Dataflow.WS,
ex_read_from_acc = false,
ex_write_to_spad = false,
has_training_convs = false,
has_max_pool = false,
use_tl_ext_mem = true,
sp_singleported = false,
spad_read_delay = 4,
use_shared_ext_mem = true,
acc_sub_banks = 1,
has_normalizations = false,
meshRows = dim,
meshColumns = dim,
tile_latency = 0,
mesh_output_delay = 1,
acc_latency = 3,
dma_maxbytes = site(CacheBlockBytes),
dma_buswidth = dmaBytes,
tl_ext_mem_base = smKey.address,
sp_banks = smKey.numBanks,
sp_capacity = CapacityInKilobytes(smKey.size >> 10),
acc_capacity = CapacityInKilobytes(accSizeInKB),
),
tileId = idOffset,
tileSize = tileSize,
slaveAddress = smKey.address + smKey.size + 0x3000 + 0x100 * numPrevGemminis
)
Seq(GemminiTileAttachParams(
tileParams,
crossing
)) ++ prev
}
case NumTiles => up(NumTiles) + 1
}) {
def this(location: HierarchicalLocation, dim: Int, accSizeInKB: Int, tileSize: Either[(Int, Int, Int), Int],
dataType: RadianceGemminiDataType.Type = RadianceGemminiDataType.FP32, dmaBytes: Int = 256) =
this(location, RocketCrossingParams(
master = HierarchicalElementMasterPortParams.locationDefault(location),
slave = HierarchicalElementSlavePortParams.locationDefault(location),
mmioBaseAddressPrefixWhere = location match {
case InSubsystem => CBUS
case InCluster(clusterId) => CCBUS(clusterId)
}
), dim, accSizeInKB, tileSize, dataType, dmaBytes)
def this(location: HierarchicalLocation, dim: Int, accSizeInKB: Int, tileSize: Int) =
this(location, dim, accSizeInKB, Right(tileSize))
def this(location: HierarchicalLocation, dim: Int, accSizeInKB: Int, tileSize: (Int, Int, Int),
dataType: RadianceGemminiDataType.Type) =
this(location, dim, accSizeInKB, Left(tileSize), dataType)
}
class WithRadianceSharedMem(address: BigInt,
size: Int,
numBanks: Int,
numWords: Int,
strideByWord: Boolean = true,
filterAligned: Boolean = true,
disableMonitors: Boolean = true,
serializeUnaligned: Boolean = true
) extends Config((site, _, _) => {
case RadianceSharedMemKey => {
require(isPow2(size) && size >= 1024)
Some(RadianceSharedMemKey(
address, size, numBanks, numWords, 4, strideByWord,
filterAligned, disableMonitors, serializeUnaligned
))
}
})
class WithRadianceFrameBuffer(baseAddress: BigInt,
width: Int,
size: Int,
validAddress: BigInt,
fbName: String = "fb") extends Config((_, _, up) => {
case RadianceFrameBufferKey => {
up(RadianceFrameBufferKey) ++ Seq(
RadianceFrameBufferKey(baseAddress, width, size, validAddress, fbName)
)
}
})
class WithFuzzerCores(
n: Int,
useVxCache: Boolean
) extends Config((site, _, up) => {
case TilesLocated(InSubsystem) => {
val prev = up(TilesLocated(InSubsystem))
val idOffset = up(NumTiles)
val fuzzer = FuzzerTileParams(
core = VortexCoreParams(),
useVxCache = useVxCache)
List.tabulate(n)(i => FuzzerTileAttachParams(
fuzzer.copy(tileId = i + idOffset),
RocketCrossingParams()
)) ++ prev
}
case NumTiles => up(NumTiles) + 1
case NumRadianceCores => up(NumRadianceCores) + 1
})
class WithRadianceCluster(
clusterId: Int,
location: HierarchicalLocation = InSubsystem,
crossing: RocketCrossingParams = RocketCrossingParams()
) extends Config((site, here, up) => {
case ClustersLocated(`location`) => up(ClustersLocated(location)) :+ RadianceClusterAttachParams(
RadianceClusterParams(clusterId = clusterId),
crossing)
case TLNetworkTopologyLocated(InCluster(`clusterId`)) => List(
ClusterBusTopologyParams(
clusterId = clusterId,
csbus = site(SystemBusKey),
ccbus = site(ControlBusKey).copy(errorDevice = None),
coherence = site(ClusterBankedCoherenceKey(clusterId))
)
)
case PossibleTileLocations => up(PossibleTileLocations) :+ InCluster(clusterId)
})
// `nSrcIds`: number of source IDs for each mem lane. This is for all warps
class WithSimtConfig(nWarps: Int = 4, nCoreLanes: Int = 4, nMemLanes: Int = 4, nSrcIds: Int = 8)
extends Config((site, _, up) => {
case SIMTCoreKey => {
Some(up(SIMTCoreKey).getOrElse(SIMTCoreParams()).copy(
nWarps = nWarps,
nCoreLanes = nCoreLanes,
nMemLanes = nMemLanes,
nSrcIds = nSrcIds
))
}
})
class WithMemtraceCore(tracefilename: String, traceHasSource: Boolean = false)
extends Config((site, _, _) => {
case MemtraceCoreKey => {
require(
site(SIMTCoreKey).isDefined,
"Memtrace core requires a SIMT configuration. Use WithNLanes to enable SIMT."
)
Some(MemtraceCoreParams(tracefilename, traceHasSource))
}
})
class WithPriorityCoalXbar extends Config((site, _, up) => {
case CoalXbarKey => {
Some(up(CoalXbarKey).getOrElse(CoalXbarParam))
}
})
class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, here, up) => {
case VortexL1Key => {
Some(defaultVortexL1Config.copy(
numBanks = nBanks,
inputSize = up(SIMTCoreKey).get.nMemLanes * 4/*32b word*/,
cacheLineSize = up(SIMTCoreKey).get.nMemLanes * 4/*32b word*/,
memSideSourceIds = 16,
mshrSize = 16,
))
}
})
// When `enable` is false, we still elaborate Coalescer, but it acts as a
// pass-through logic that always outputs un-coalesced requests. This is
// useful for when we want to keep the generated wire and net names the same
// to e.g. compare waveforms.
class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => {
case CoalescerKey => {
val (nLanes, numOldSrcIds) = up(SIMTCoreKey) match {
case Some(param) => (param.nMemLanes, param.nSrcIds)
case None => (1,1)
}
val sbusWidthInBytes = site(SystemBusKey).beatBytes
// FIXME: coalescer fails to instantiate with 4-byte bus
require(sbusWidthInBytes > 2,
"FIXME: coalescer currently doesn't instantiate with 4-byte sbus")
// If instantiating L1 cache, the maximum coalescing size should match the
// cache line size
val maxCoalSizeInBytes = up(VortexL1Key) match {
case Some(param) => param.inputSize
case None => sbusWidthInBytes
}
// Note: this config chooses a single-sized coalescing logic by default.
Some(DefaultCoalescerConfig.copy(
enable = enable,
numLanes = nLanes,
numOldSrcIds = numOldSrcIds,
numNewSrcIds = nNewSrcIds,
addressWidth = 32, // FIXME hardcoded as 32-bit system
dataBusWidth = log2Ceil(maxCoalSizeInBytes),
coalLogSizes = Seq(log2Ceil(maxCoalSizeInBytes))
)
)
}
})
class WithNCustomSmallRocketCores(
n: Int,
overrideIdOffset: Option[Int] = None,
crossing: RocketCrossingParams = RocketCrossingParams()
) extends Config((site, here, up) => {
case TilesLocated(InSubsystem) => {
val prev = up(TilesLocated(InSubsystem))
val idOffset = up(NumTiles)
val med = RocketTileParams(
core = RocketCoreParams(fpu = None),
btb = None,
dcache = Some(DCacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 2,
nWays = 1,
nTLBSets = 1,
nTLBWays = 2,
nTLBBasePageSectors = 1,
nTLBSuperpages = 1,
nMSHRs = 0,
blockBytes = site(CacheBlockBytes))),
icache = Some(ICacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 2,
nWays = 1,
nTLBSets = 1,
nTLBWays = 2,
nTLBBasePageSectors = 1,
nTLBSuperpages = 1,
blockBytes = site(CacheBlockBytes))))
List.tabulate(n)(i => RocketTileAttachParams(
med.copy(tileId = i + idOffset),
crossing
)) ++ prev
}
case NumTiles => up(NumTiles) + n
})
class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16),
size: BigInt = 0x80000000) extends Config((site, here, up) => {
case GPUMemory() => Some(GPUMemParams(address, size))
case ExtMem => up(ExtMem).map(x => {
val gap = address - x.master.base - x.master.size
x.copy(master = x.master.copy(size = x.master.size + gap + size))
})
})
case class GPUMemParams(address: BigInt = BigInt("0x100000000", 16), size: BigInt = 0x80000000)
case class GPUMemory() extends Field[Option[GPUMemParams]](None)
object RadianceSimArgs extends Field[Option[Boolean]](None)
class WithRadianceSimParams(enabled: Boolean) extends Config((_, _, _) => {
case RadianceSimArgs => Some(enabled)
})