Merge branch 'main' of https://github.com/ucb-bar/radiance into main
This commit is contained in:
Submodule src/main/resources/vsrc/vortex updated: eb63767051...df4b21507e
@@ -17,10 +17,10 @@ trait CanHaveMemtraceCore { this: BaseSubsystem =>
|
||||
// Safe to use get as WithMemtraceCore requires WithNLanes to be defined
|
||||
val simtParam = p(SIMTCoreKey).get
|
||||
val config = DefaultCoalescerConfig.copy(
|
||||
numLanes = simtParam.nLanes,
|
||||
numLanes = simtParam.nMemLanes,
|
||||
numOldSrcIds = simtParam.nSrcIds
|
||||
)
|
||||
val numLanes = simtParam.nLanes
|
||||
val numLanes = simtParam.nMemLanes
|
||||
val filename = param.tracefilename
|
||||
|
||||
// Need to explicitly generate clock domain; see rocket-chip 8881ccd
|
||||
|
||||
@@ -12,8 +12,13 @@ import freechips.rocketchip.tilelink._
|
||||
|
||||
// TODO: find better place for these
|
||||
|
||||
// Note: numNewSrcId is not a part of CoreParam, because the SIMT core should be agnostic to how inflight coalesced request can be genertated
|
||||
case class SIMTCoreParams(nLanes: Int = 4, nSrcIds: Int = 8)
|
||||
case class SIMTCoreParams(
|
||||
nWarps: Int = 4, // # of warps in the core
|
||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||
// cache; relates to the LSU lanes
|
||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||
)
|
||||
case class MemtraceCoreParams(
|
||||
tracefilename: String = "undefined",
|
||||
traceHasSource: Boolean = false
|
||||
@@ -2325,7 +2330,7 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig)
|
||||
// A dummy harness around the coalescer for use in VLSI flow.
|
||||
// Should not instantiate any memtrace modules.
|
||||
class DummyCoalescer(implicit p: Parameters) extends LazyModule {
|
||||
val numLanes = p(SIMTCoreKey).get.nLanes
|
||||
val numLanes = p(SIMTCoreKey).get.nMemLanes
|
||||
val config = DefaultCoalescerConfig.copy(numLanes = numLanes)
|
||||
|
||||
val driver = LazyModule(new DummyDriver(config))
|
||||
@@ -2362,7 +2367,7 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters)
|
||||
// tracedriver --> coalescer --> tracelogger --> tlram
|
||||
class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters)
|
||||
extends LazyModule {
|
||||
val numLanes = p(SIMTCoreKey).get.nLanes
|
||||
val numLanes = p(SIMTCoreKey).get.nMemLanes
|
||||
val config = DefaultCoalescerConfig.copy(numLanes = numLanes)
|
||||
|
||||
val driver = LazyModule(new MemTraceDriver(config, filename))
|
||||
@@ -2454,7 +2459,7 @@ class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit
|
||||
|
||||
// tracedriver --> coalescer --> tlram
|
||||
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
|
||||
val numLanes = p(SIMTCoreKey).get.nLanes
|
||||
val numLanes = p(SIMTCoreKey).get.nMemLanes
|
||||
val config = DefaultCoalescerConfig.copy(numLanes = numLanes)
|
||||
|
||||
val filename = "vecadd.core1.thread4.trace"
|
||||
|
||||
@@ -8,7 +8,7 @@ import freechips.rocketchip.subsystem.{BaseSubsystemConfig}
|
||||
import freechips.rocketchip.devices.tilelink._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util._
|
||||
import radiance.subsystem.WithSimtLanes
|
||||
import radiance.subsystem.WithSimtConfig
|
||||
import freechips.rocketchip.unittest._
|
||||
//import rocket.VortexFatBankTest
|
||||
|
||||
@@ -27,7 +27,7 @@ class WithCoalescingUnitTests extends Config((site, _, _) => {
|
||||
// Module(new TLRAMCoalescerLoggerTest(filename="sfilter.core1.thread4.trace", timeout=timeout)),
|
||||
// Module(new TLRAMCoalescerLoggerTest(filename="nearn.core1.thread4.trace", timeout=50000000 * site(TestDurationMultiplier))),
|
||||
// Module(new TLRAMCoalescerLoggerTest(filename="psort.core1.thread4.trace", timeout=timeout)),
|
||||
// Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.trace", timeout=timeout)(new WithSimtLanes(32))),
|
||||
// Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.trace", timeout=timeout)(new WithSimtConfig(32))),
|
||||
// Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.lane4.trace", timeout=timeout)),
|
||||
) }
|
||||
})
|
||||
@@ -48,12 +48,12 @@ class WithCoalescingUnitSynthesisDummy(nLanes: Int) extends Config((site, _, _)
|
||||
implicit val p = q
|
||||
val timeout = 50000 * site(TestDurationMultiplier)
|
||||
Seq(
|
||||
Module(new DummyCoalescerTest(timeout=timeout)(new WithSimtLanes(nLanes=4))),
|
||||
Module(new DummyCoalescerTest(timeout=timeout)(new WithSimtConfig(nMemLanes=4))),
|
||||
) }
|
||||
})
|
||||
|
||||
class CoalescingUnitTestConfig extends Config(new WithCoalescingUnitTests ++ new WithTestDuration(10) ++ new WithSimtLanes(nLanes=4) ++ new BaseSubsystemConfig)
|
||||
//class VortexFatBankUnitTestConfig extends Config(new WithVortexFatBankUnitTests ++ new WithTestDuration(10) ++ new WithSimtLanes(nLanes=4) ++ new BaseSubsystemConfig)
|
||||
class CoalescingUnitTestConfig extends Config(new WithCoalescingUnitTests ++ new WithTestDuration(10) ++ new WithSimtConfig(nMemLanes=4) ++ new BaseSubsystemConfig)
|
||||
//class VortexFatBankUnitTestConfig extends Config(new WithVortexFatBankUnitTests ++ new WithTestDuration(10) ++ new WithSimtConfig(nLanes=4) ++ new BaseSubsystemConfig)
|
||||
|
||||
// Dummy configs of various sizes for synthesis
|
||||
class CoalescingSynthesisDummyLane4Config extends Config(new WithCoalescingUnitSynthesisDummy(4) ++ new WithTestDuration(10) ++ new BaseSubsystemConfig)
|
||||
|
||||
@@ -10,6 +10,7 @@ import org.chipsalliance.cde.config.{Parameters, Field}
|
||||
case object VortexL1Key extends Field[Option[VortexL1Config]](None /*default*/ )
|
||||
|
||||
case class VortexL1Config(
|
||||
cacheSize: Int, // total cache size in bytes
|
||||
numBanks: Int,
|
||||
wordSize: Int, // This is the read/write granularity of the L1 cache
|
||||
cacheLineSize: Int,
|
||||
@@ -34,6 +35,7 @@ case class VortexL1Config(
|
||||
|
||||
object defaultVortexL1Config
|
||||
extends VortexL1Config(
|
||||
cacheSize = 16384,
|
||||
numBanks = 4,
|
||||
wordSize = 16,
|
||||
cacheLineSize = 16,
|
||||
@@ -98,7 +100,7 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
||||
TLMasterPortParameters.v1(
|
||||
clients = Seq(
|
||||
TLMasterParameters.v1(
|
||||
name = "VortexBank",
|
||||
name = "VortexBankPassthrough",
|
||||
sourceId = IdRange(
|
||||
0,
|
||||
1 << (log2Ceil(
|
||||
@@ -173,7 +175,7 @@ class VortexBank(
|
||||
TLMasterPortParameters.v1(
|
||||
clients = Seq(
|
||||
TLMasterParameters.v1(
|
||||
name = "VortexBank",
|
||||
name = s"VortexBank${bankId}",
|
||||
sourceId = IdRange(0, config.memSideSourceIds),
|
||||
supportsProbe = TransferSizes(1, config.wordSize),
|
||||
supportsGet = TransferSizes(1, config.wordSize),
|
||||
@@ -203,6 +205,8 @@ class VortexBankImp(
|
||||
val vxCache = Module(
|
||||
new VX_cache_top(
|
||||
WORD_SIZE = config.wordSize,
|
||||
// distribute total size across numBanks
|
||||
CACHE_SIZE = config.cacheSize / config.numBanks,
|
||||
CACHE_LINE_SIZE = config.cacheLineSize,
|
||||
CORE_TAG_WIDTH = config.coreTagPlusSizeWidth,
|
||||
MSHR_SIZE = config.mshrSize
|
||||
@@ -389,7 +393,7 @@ class VortexBankImp(
|
||||
class VX_cache_top(
|
||||
// these values should match the default settings in Verilog
|
||||
// TODO: INSTANCE_ID
|
||||
CACHE_SIZE: Int = 16384 / 4, // <FIXME, divided by 4 for faster simulation
|
||||
CACHE_SIZE: Int = 16384,
|
||||
CACHE_LINE_SIZE: Int = 16,
|
||||
NUM_WAYS: Int = 4,
|
||||
// for single-bank configuration, set NUM_REQS = 1 and instead set
|
||||
@@ -408,10 +412,10 @@ class VX_cache_top(
|
||||
) extends BlackBox(
|
||||
Map(
|
||||
// NOTE: NUM_REQS is analogous to SIMD width, whereas NUM_BANKS is the
|
||||
// actual number of banks. VX_cache.sv instantiates VX_stream_xbar
|
||||
// that arbitrates the higher NUM_REQS into NUM_BANKS. Since we do
|
||||
// that logic ourselves using TL units, fix those params to 1 for the
|
||||
// Verilog side.
|
||||
// actual number of banks. In the original Vortex code, VX_cache has
|
||||
// VX_stream_xbar that arbitrates the incoming NUM_REQS into outgoing
|
||||
// NUM_BANKS. Since we do that logic ourselves using TL Xbars, fix
|
||||
// those params to 1 for Verilog.
|
||||
"NUM_REQS" -> 1,
|
||||
"CACHE_SIZE" -> CACHE_SIZE,
|
||||
"LINE_SIZE" -> CACHE_LINE_SIZE,
|
||||
@@ -606,7 +610,7 @@ class NewSourceGenerator[T <: Data](
|
||||
oldestMetadata := occupancyTable(oldestIndex).meta
|
||||
oldestAge := occupancyTable(oldestIndex).age
|
||||
assert(
|
||||
oldestAge <= 2000.U,
|
||||
oldestAge <= 10000.U,
|
||||
"One id in the SourceGen is not released for long time, potential bug !"
|
||||
)
|
||||
|
||||
|
||||
@@ -13,10 +13,12 @@ import radiance.memory._
|
||||
|
||||
class WithRadianceCores(
|
||||
n: Int,
|
||||
location: HierarchicalLocation,
|
||||
crossing: RocketCrossingParams,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem), site)
|
||||
case TilesLocated(`location`) => {
|
||||
val prev = up(TilesLocated(`location`), site)
|
||||
val idOffset = prev.size
|
||||
val vortex = RadianceTileParams(
|
||||
core = VortexCoreParams(fpu = None),
|
||||
@@ -43,10 +45,19 @@ class WithRadianceCores(
|
||||
blockBytes = site(CacheBlockBytes))))
|
||||
List.tabulate(n)(i => RadianceTileAttachParams(
|
||||
vortex.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
crossing
|
||||
)) ++ prev
|
||||
}
|
||||
})
|
||||
}) {
|
||||
def this(n: Int, location: HierarchicalLocation = InSubsystem, useVxCache: Boolean = false) = this(n, location, RocketCrossingParams(
|
||||
master = HierarchicalElementMasterPortParams.locationDefault(location),
|
||||
slave = HierarchicalElementSlavePortParams.locationDefault(location),
|
||||
mmioBaseAddressPrefixWhere = location match {
|
||||
case InSubsystem => CBUS
|
||||
case InCluster(clusterId) => CCBUS(clusterId)
|
||||
}
|
||||
), useVxCache)
|
||||
}
|
||||
|
||||
class WithFuzzerCores(
|
||||
n: Int,
|
||||
@@ -65,11 +76,33 @@ class WithFuzzerCores(
|
||||
}
|
||||
})
|
||||
|
||||
class WithRadianceCluster(
|
||||
clusterId: Int,
|
||||
location: HierarchicalLocation = InSubsystem,
|
||||
crossing: RocketCrossingParams = RocketCrossingParams() // TODO make this not rocket
|
||||
) extends Config((site, here, up) => {
|
||||
case ClustersLocated(`location`) => up(ClustersLocated(location)) :+ RadianceClusterAttachParams(
|
||||
RadianceClusterParams(clusterId = clusterId),
|
||||
crossing)
|
||||
case TLNetworkTopologyLocated(InCluster(`clusterId`)) => List(
|
||||
ClusterBusTopologyParams(
|
||||
clusterId = clusterId,
|
||||
csbus = site(SystemBusKey),
|
||||
ccbus = site(ControlBusKey).copy(errorDevice = None),
|
||||
coherence = site(ClusterBankedCoherenceKey(clusterId))
|
||||
)
|
||||
)
|
||||
case PossibleTileLocations => up(PossibleTileLocations) :+ InCluster(clusterId)
|
||||
})
|
||||
|
||||
// `nSrcIds`: number of source IDs for dmem requests on each SIMT lane
|
||||
class WithSimtLanes(nLanes: Int, nSrcIds: Int = 8) extends Config((site, _, up) => {
|
||||
class WithSimtConfig(nWarps: Int = 4, nCoreLanes: Int = 4, nMemLanes: Int = 4, nSrcIds: Int = 8)
|
||||
extends Config((site, _, up) => {
|
||||
case SIMTCoreKey => {
|
||||
Some(up(SIMTCoreKey, site).getOrElse(SIMTCoreParams()).copy(
|
||||
nLanes = nLanes,
|
||||
nWarps = nWarps,
|
||||
nCoreLanes = nCoreLanes,
|
||||
nMemLanes = nMemLanes,
|
||||
nSrcIds = nSrcIds
|
||||
))
|
||||
}
|
||||
@@ -105,7 +138,7 @@ class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
||||
class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => {
|
||||
case CoalescerKey => {
|
||||
val (nLanes, numOldSrcIds) = up(SIMTCoreKey, site) match {
|
||||
case Some(param) => (param.nLanes, param.nSrcIds)
|
||||
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||
case None => (1,1)
|
||||
}
|
||||
|
||||
@@ -182,4 +215,4 @@ class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16),
|
||||
})
|
||||
})
|
||||
case class GPUMemParams(address: BigInt = BigInt("0x100000000", 16), size: BigInt = 0x80000000)
|
||||
case class GPUMemory() extends Field[Option[GPUMemParams]](None)
|
||||
case class GPUMemory() extends Field[Option[GPUMemParams]](None)
|
||||
|
||||
@@ -9,3 +9,10 @@ case class RadianceTileAttachParams(
|
||||
tileParams: RadianceTileParams,
|
||||
crossingParams: RocketCrossingParams
|
||||
) extends CanAttachTile { type TileType = RadianceTile }
|
||||
|
||||
case class RadianceClusterAttachParams (
|
||||
clusterParams: RadianceClusterParams,
|
||||
crossingParams: HierarchicalElementCrossingParamsLike
|
||||
) extends CanAttachCluster {
|
||||
type ClusterType = RadianceCluster
|
||||
}
|
||||
|
||||
98
src/main/scala/radiance/tile/Barrier.scala
Normal file
98
src/main/scala/radiance/tile/Barrier.scala
Normal file
@@ -0,0 +1,98 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
// See LICENSE.Berkeley for license details.
|
||||
|
||||
package radiance.tile
|
||||
|
||||
import chisel3._
|
||||
import chisel3.experimental.SourceInfo
|
||||
import chisel3.util._
|
||||
|
||||
import org.chipsalliance.cde.config.{Field, Parameters}
|
||||
import freechips.rocketchip.subsystem._
|
||||
import freechips.rocketchip.diplomacy._
|
||||
|
||||
case class EmptyParams()
|
||||
|
||||
case class BarrierParams(
|
||||
barrierIdBits: Int,
|
||||
numCoreBits: Int
|
||||
)
|
||||
|
||||
class BarrierRequestBits(
|
||||
param: BarrierParams
|
||||
) extends Bundle {
|
||||
val barrierId = UInt(param.barrierIdBits.W)
|
||||
val sizeMinusOne = UInt(param.numCoreBits.W)
|
||||
val coreId = UInt(param.numCoreBits.W)
|
||||
}
|
||||
|
||||
class BarrierResponseBits(
|
||||
param: BarrierParams
|
||||
) extends Bundle {
|
||||
val barrierId = UInt(param.barrierIdBits.W)
|
||||
}
|
||||
|
||||
class BarrierBundle(param: BarrierParams) extends Bundle {
|
||||
val req = Decoupled(new BarrierRequestBits(param))
|
||||
val resp = Flipped(Decoupled(new BarrierResponseBits(param)))
|
||||
}
|
||||
|
||||
// FIXME Separate BarrierEdgeParams from BarrierParams
|
||||
object BarrierNodeImp extends SimpleNodeImp[BarrierParams, EmptyParams, BarrierParams, BarrierBundle] {
|
||||
def edge(pd: BarrierParams, pu: EmptyParams, p: Parameters, sourceInfo: SourceInfo) = {
|
||||
// barrier parameters flow strictly downward from the master node
|
||||
pd
|
||||
}
|
||||
def bundle(e: BarrierParams) = new BarrierBundle(e)
|
||||
// FIXME render
|
||||
def render(e: BarrierParams) = RenderedEdge(colour = "ffffff", label = "X")
|
||||
}
|
||||
|
||||
case class BarrierMasterNode(val srcParams: BarrierParams)(implicit valName: ValName)
|
||||
extends SourceNode(BarrierNodeImp)(Seq(srcParams))
|
||||
case class BarrierSlaveNode(val numEdges: Int)(implicit valName: ValName)
|
||||
extends SinkNode(BarrierNodeImp)(Seq.fill(numEdges)(EmptyParams()))
|
||||
|
||||
class BarrierSynchronizer(param: BarrierParams) extends Module {
|
||||
val numBarrierIds = 1 << param.barrierIdBits
|
||||
val numCores = 1 << param.numCoreBits
|
||||
println(s"numBarrierIds: ${numBarrierIds}, numCores: ${numCores}")
|
||||
|
||||
val io = IO(new Bundle {
|
||||
val reqs = Vec(numCores, Flipped(Decoupled(new BarrierRequestBits(param))))
|
||||
val resp = Decoupled(new BarrierResponseBits(param))
|
||||
})
|
||||
|
||||
// 2-dimensional table of per-id, per-core "done" signals
|
||||
val table = RegInit(VecInit(Seq.fill(numBarrierIds)(VecInit(Seq.fill(numCores)(false.B)))))
|
||||
val done = Wire(Vec(numBarrierIds, Bool()))
|
||||
table.zipWithIndex.foreach { case (row, i) =>
|
||||
done(i) := row.reduce(_ && _)
|
||||
}
|
||||
dontTouch(done)
|
||||
|
||||
io.reqs.zipWithIndex.foreach { case (req, coreId) =>
|
||||
// always ready; all this module does is latch to boolean regs
|
||||
req.ready := true.B
|
||||
when(req.fire) {
|
||||
assert(coreId.U === req.bits.coreId)
|
||||
// FIXME: don't need coreId to be hardware here
|
||||
table(req.bits.barrierId)(coreId.U) := true.B
|
||||
}
|
||||
}
|
||||
|
||||
val doneArbiter = Module(new RRArbiter(Bool(), numBarrierIds))
|
||||
(doneArbiter.io.in zip done).zipWithIndex.foreach { case ((in, d), i) =>
|
||||
in.valid := d
|
||||
in.bits := d
|
||||
when(in.fire) {
|
||||
table(i).foreach(_ := false.B)
|
||||
}
|
||||
}
|
||||
io.resp.valid := doneArbiter.io.out.valid
|
||||
io.resp.bits.barrierId := doneArbiter.io.chosen
|
||||
when(io.resp.fire) {
|
||||
table(io.resp.bits.barrierId).foreach(_ := false.B)
|
||||
}
|
||||
doneArbiter.io.out.ready := io.resp.ready
|
||||
}
|
||||
@@ -60,7 +60,7 @@ class FuzzerTile private (
|
||||
// val statusNode = BundleBridgeSource(() => new GroundTestStatus)
|
||||
|
||||
val (numLanes, numSrcIds) = p(SIMTCoreKey) match {
|
||||
case Some(param) => (param.nLanes, param.nSrcIds)
|
||||
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||
case None => {
|
||||
require(false, "fuzzer requires SIMTCoreKey to be defined")
|
||||
(0, 0)
|
||||
|
||||
150
src/main/scala/radiance/tile/RadianceCluster.scala
Normal file
150
src/main/scala/radiance/tile/RadianceCluster.scala
Normal file
@@ -0,0 +1,150 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
// See LICENSE.Berkeley for license details.
|
||||
|
||||
package radiance.tile
|
||||
|
||||
import chisel3._
|
||||
import chisel3.experimental.SourceInfo
|
||||
import chisel3.util._
|
||||
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import freechips.rocketchip.subsystem._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.diplomacy.{LazyModule, AddressSet, SimpleDevice, ClockCrossingType}
|
||||
import freechips.rocketchip.regmapper.RegField
|
||||
import freechips.rocketchip.prci.ClockSinkParameters
|
||||
|
||||
case class RadianceClusterParams(
|
||||
val clusterId: Int,
|
||||
val clockSinkParams: ClockSinkParameters = ClockSinkParameters()
|
||||
) extends InstantiableClusterParams[RadianceCluster] {
|
||||
val baseName = "radiance_cluster"
|
||||
val uniqueName = s"${baseName}_$clusterId"
|
||||
def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByClusterIdImpl)(implicit p: Parameters): RadianceCluster = {
|
||||
new RadianceCluster(this, crossing.crossingType, lookup)
|
||||
}
|
||||
}
|
||||
|
||||
class RadianceCluster (
|
||||
thisClusterParams: RadianceClusterParams,
|
||||
crossing: ClockCrossingType,
|
||||
lookup: LookupByClusterIdImpl
|
||||
)(implicit p: Parameters) extends Cluster(thisClusterParams, crossing, lookup) {
|
||||
// cluster-local bus, used for shared memory traffic that never leaves the
|
||||
// confines of a cluster
|
||||
val clbus = tlBusWrapperLocationMap(CLBUS(clusterId))
|
||||
|
||||
clbus.clockGroupNode := allClockGroupsNode
|
||||
|
||||
// Instantiate cluster-local shared memory scratchpad
|
||||
//
|
||||
// Instantiate the same number of banks as there are lanes.
|
||||
val numLsuLanes = 4 // FIXME: hardcoded
|
||||
val wordSize = 4
|
||||
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||
// Banked-by-word (4 bytes)
|
||||
// base for bank 1: ff...000000|01|00
|
||||
// mask for bank 1; 00...111111|00|11
|
||||
val base = 0xff000000L | (bankId * wordSize)
|
||||
val mask = 0x00001fffL ^ ((numLsuLanes - 1) * wordSize)
|
||||
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = wordSize))
|
||||
}
|
||||
smemBanks.foreach(_.node := clbus.outwardNode)
|
||||
|
||||
val numCores = leafTiles.size
|
||||
|
||||
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
||||
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
||||
|
||||
// HACK: This is a workaround of the CanAttachTile bus connecting API that
|
||||
// works by downcasting tile and directly accessing the node inside that is
|
||||
// not exposed as a master in HierarchicalElementCrossingParamsLike.
|
||||
// val tile = leafTiles(0).asInstanceOf[RadianceTile]
|
||||
// val perSmemPortXbars = Seq.fill(tile.smemNodes.size) { LazyModule(new TLXbar) }
|
||||
|
||||
// Tie corresponding smem ports from every tile into a single port using
|
||||
// Xbars so that the number of ports going into the sharedmem do not scale
|
||||
// with the number of tiles.
|
||||
leafTiles.foreach { case (id, tile: RadianceTile) =>
|
||||
// (perSmemPortXbars zip tile.smemNodes).foreach {
|
||||
// case (xbar, node) => xbar.node := node
|
||||
// }
|
||||
tile.smemNodes.foreach(clbus.inwardNode := _)
|
||||
barrierSlaveNode := tile.barrierMasterNode
|
||||
}
|
||||
// perSmemPortXbars.foreach { clbus.inwardNode := _.node }
|
||||
|
||||
// Memory-mapped register for barrier sync
|
||||
val regDevice = new SimpleDevice("radiance-cluster-barrier-reg",
|
||||
Seq(s"radiance-cluster-barrier-reg${clusterId}"))
|
||||
val regNode = TLRegisterNode(
|
||||
address = Seq(AddressSet(0xff003f00L, 0xff)),
|
||||
device = regDevice,
|
||||
beatBytes = wordSize,
|
||||
concurrency = 1)
|
||||
regNode := clbus.outwardNode
|
||||
|
||||
nodes.foreach({ node =>
|
||||
println(s"======= RadianceCluster node.name: ${node.name}")
|
||||
})
|
||||
|
||||
override lazy val module = new RadianceClusterModuleImp(this)
|
||||
}
|
||||
|
||||
class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(outer) {
|
||||
outer.leafTiles.foreach { case (id, tile: RadianceTile) =>
|
||||
// println(s"======= RadianceCluster: tile.smemXbar.node.edge = ${tile.smemXbar.node.out.size}")
|
||||
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.length}")
|
||||
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
||||
}
|
||||
|
||||
val numBarriers = 4 // FIXME: hardcoded
|
||||
|
||||
// @cleanup: This assumes barrier params on all edges are the same, i.e. all
|
||||
// cores are configured to have the same barrier id range. While true, might
|
||||
// be better to actually assert this
|
||||
val barrierParam = outer.barrierSlaveNode.in(0)._2
|
||||
val synchronizer = Module(new BarrierSynchronizer(barrierParam))
|
||||
(synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) =>
|
||||
req <> b.req
|
||||
b.resp <> synchronizer.io.resp // broadcast
|
||||
}
|
||||
|
||||
// outer.barrierSlaveNode.in.foreach { case (b, e) =>
|
||||
// val fakeBarrierRespId = RegNext(b.req.bits.barrierId)
|
||||
// val fakeBarrierRespValid = RegNext(b.req.fire)
|
||||
// b.req.ready := true.B // barrier module is always ready
|
||||
// b.resp.valid := fakeBarrierRespValid
|
||||
// b.resp.bits.barrierId := fakeBarrierRespId
|
||||
// }
|
||||
|
||||
val allSyncedRegs = Seq.fill(numBarriers)(Wire(UInt(32.W)))
|
||||
val perCoreSyncedRegs = Seq.fill(numBarriers)(Seq.fill(outer.numCores)(RegInit(0.U(32.W))))
|
||||
(allSyncedRegs zip perCoreSyncedRegs).foreach{ case (all, per) =>
|
||||
all := per.reduce((x0, x1) => (x0 =/= 0.U) && (x1 =/= 0.U))
|
||||
|
||||
val allPassed = per.map(_ === 2.U).reduce(_ && _)
|
||||
when(allPassed) {
|
||||
per.foreach(_ := 0.U)
|
||||
}
|
||||
|
||||
dontTouch(all)
|
||||
}
|
||||
// FIXME: 4 cores per cluster hardcoded
|
||||
outer.regNode.regmap(
|
||||
0x00 -> Seq(RegField.r(32, allSyncedRegs(0))),
|
||||
0x04 -> Seq(RegField(32, perCoreSyncedRegs(0)(0))),
|
||||
0x08 -> Seq(RegField(32, perCoreSyncedRegs(0)(1))),
|
||||
0x10 -> Seq(RegField.r(32, allSyncedRegs(1))),
|
||||
0x14 -> Seq(RegField(32, perCoreSyncedRegs(1)(0))),
|
||||
0x18 -> Seq(RegField(32, perCoreSyncedRegs(1)(1))),
|
||||
0x20 -> Seq(RegField.r(32, allSyncedRegs(2))),
|
||||
0x24 -> Seq(RegField(32, perCoreSyncedRegs(2)(0))),
|
||||
0x28 -> Seq(RegField(32, perCoreSyncedRegs(2)(1))),
|
||||
0x30 -> Seq(RegField.r(32, allSyncedRegs(3))),
|
||||
0x34 -> Seq(RegField(32, perCoreSyncedRegs(3)(0))),
|
||||
0x38 -> Seq(RegField(32, perCoreSyncedRegs(3)(1))),
|
||||
)
|
||||
|
||||
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
|
||||
}
|
||||
@@ -140,10 +140,21 @@ class RadianceTile private (
|
||||
|
||||
require(
|
||||
p(SIMTCoreKey).isDefined,
|
||||
"SIMTCoreKey not defined; make sure to use WithSimtLanes when using RadianceTile"
|
||||
"SIMTCoreKey not defined; make sure to use WithSimtConfig when using RadianceTile"
|
||||
)
|
||||
val numLanes = p(SIMTCoreKey) match {
|
||||
case Some(simtParam) => simtParam.nLanes
|
||||
|
||||
// NOTE: when changing these, remember to change +define+NUM_CORES/THREADS/WARPS in
|
||||
// radiance.mk as well!
|
||||
val numWarps = p(SIMTCoreKey) match {
|
||||
case Some(simtParam) => simtParam.nWarps
|
||||
case None => 4
|
||||
}
|
||||
val numCoreLanes = p(SIMTCoreKey) match {
|
||||
case Some(simtParam) => simtParam.nCoreLanes
|
||||
case None => 4
|
||||
}
|
||||
val numLsuLanes = p(SIMTCoreKey) match {
|
||||
case Some(simtParam) => simtParam.nMemLanes
|
||||
case None => 4
|
||||
}
|
||||
|
||||
@@ -170,13 +181,14 @@ class RadianceTile private (
|
||||
|
||||
val smemSourceWidth = 4 // FIXME: hardcoded
|
||||
|
||||
val numWarps = 4 // TODO: parametrize
|
||||
// Replicates some of the logic of how Vortex determines the tag width of
|
||||
// memory requests so that Chisel and Verilog are in agreement on bitwidths.
|
||||
// See VX_gpu_pkg.sv
|
||||
val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps))
|
||||
val UUID_WIDTH = 44
|
||||
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
||||
val numLsuLanes = 4
|
||||
// see VX_gpu_pkg.sv
|
||||
val LSUQ_SIZE = 8 * (numLanes / numLsuLanes)
|
||||
|
||||
val LSUQ_SIZE = 8 * (numCoreLanes / numLsuLanes)
|
||||
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
|
||||
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
||||
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
||||
@@ -291,15 +303,13 @@ class RadianceTile private (
|
||||
// Conditionally instantiate L1 cache
|
||||
val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match {
|
||||
case Some(vortexL1Config) => {
|
||||
println(
|
||||
s"============ Using Vortex L1 cache ================="
|
||||
)
|
||||
println("VortexL1Cache instantiated")
|
||||
// require(
|
||||
// p(CoalescerKey).isDefined,
|
||||
// "Vortex L1 configuration currently only works when coalescer is also enabled."
|
||||
// )
|
||||
|
||||
val icache = LazyModule(new VortexL1Cache(vortexL1Config))
|
||||
val icache = LazyModule(new VortexL1Cache(vortexL1Config.copy(numBanks = 1)))
|
||||
val dcache = LazyModule(new VortexL1Cache(vortexL1Config))
|
||||
// imemNodes.foreach { icache.coresideNode := TLWidthWidget(4) := _ }
|
||||
assert(imemNodes.length == 1) // FIXME
|
||||
@@ -316,22 +326,10 @@ class RadianceTile private (
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate sharedmem banks
|
||||
//
|
||||
// Instantiate the same number of banks as there are lanes.
|
||||
// TODO: parametrize
|
||||
// val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||
// // Banked-by-word (4 bytes)
|
||||
// // base for bank 1: ff...000000|01|00
|
||||
// // mask for bank 1; 00...111111|00|11
|
||||
// val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
||||
// val mask = 0x00001fffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
||||
// LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
||||
// }
|
||||
// smem lanes-to-banks crossbar
|
||||
val smemXbar = LazyModule(new TLXbar)
|
||||
smemNodes.foreach(smemXbar.node := _)
|
||||
// smemBanks.foreach(_.node := smemXbar.node)
|
||||
// Barrier synchronization node
|
||||
// FIXME: hardcoded params
|
||||
val barrierParams = BarrierParams(barrierIdBits = 2, numCoreBits = 1)
|
||||
val barrierMasterNode = BarrierMasterNode(barrierParams)
|
||||
|
||||
val base = p(GPUMemory()) match {
|
||||
case Some(GPUMemParams(baseAddr, _)) => baseAddr
|
||||
@@ -346,7 +344,6 @@ class RadianceTile private (
|
||||
tlMasterXbar.node :=* AddressOrNode(base) :=* dcacheNode
|
||||
}
|
||||
|
||||
|
||||
// ROCC
|
||||
// TODO: parametrize
|
||||
val gemmini = LazyModule(new Gemmini(GemminiFPConfigs.FP32DefaultConfig.copy(
|
||||
@@ -371,14 +368,14 @@ class RadianceTile private (
|
||||
tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
||||
|
||||
// MMIO
|
||||
gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
// gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
// sharedmem access
|
||||
//
|
||||
// FIXME: gemmini spad has 16B data width; core smem interface has 4B. Need
|
||||
// to consolidate by either coalescing, or changing gemmini spad to
|
||||
// strided-by-word
|
||||
gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
||||
// gemmini.unified_mem_node :=* TLWidthWidget(4) :=* smemXbar.node
|
||||
// TLRAM(AddressSet(x"ff004000", 0xfff)) := TLFragmenter(4, 4) := smemXbar.node
|
||||
|
||||
/* below are copied from rocket */
|
||||
|
||||
@@ -462,6 +459,10 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
extends BaseTileModuleImp(outer) {
|
||||
Annotated.params(this, outer.radianceParams)
|
||||
|
||||
auto.elements.foreach({case (name, _) =>
|
||||
println(s"======= RadianceTile.elements.name: ${name}")
|
||||
})
|
||||
|
||||
val core = Module(new Vortex(outer)(outer.p))
|
||||
|
||||
core.io.clock := clock
|
||||
@@ -532,6 +533,11 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
// TODO: make imemNodes not a vector
|
||||
imemTLAdapter.io.inReq <> core.io.imem.get(0).a
|
||||
core.io.imem.get(0).d <> imemTLAdapter.io.inResp
|
||||
|
||||
performanceCounters(Seq(imemTLAdapter.io.inReq), Seq(imemTLAdapter.io.inResp),
|
||||
desc = s"core${outer.tileId}-imem")
|
||||
|
||||
// now connect TL adapter downstream ports to the tile egress ports
|
||||
outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq
|
||||
imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d
|
||||
}
|
||||
@@ -629,6 +635,10 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
}
|
||||
core.io.dmem_d_valid := dmem_d_valid_vec.asUInt
|
||||
|
||||
performanceCounters(dmemTLAdapters.map(_.io.inReq), dmemTLAdapters.map(_.io.inResp),
|
||||
desc = s"core${outer.tileId}-dmem")
|
||||
|
||||
// now connect TL adapter downstream ports to the tile egress ports
|
||||
(dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) =>
|
||||
tlOut.a <> tlAdapter.io.outReq
|
||||
tlAdapter.io.outResp <> tlOut.d
|
||||
@@ -678,47 +688,114 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
tlAdapter.io.inResp.ready := core.io.smem_d_ready(i)
|
||||
}
|
||||
|
||||
performanceCounters(smemTLAdapters.map(_.io.inReq), smemTLAdapters.map(_.io.inResp),
|
||||
desc = s"core${outer.tileId}-smem")
|
||||
|
||||
// now connect TL adapter downstream ports to the tile egress ports
|
||||
(smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) =>
|
||||
tlOut.a <> tlAdapter.io.outReq
|
||||
tlAdapter.io.outResp <> tlOut.d
|
||||
}
|
||||
}
|
||||
|
||||
def connectBarrier = {
|
||||
require(outer.barrierMasterNode.out.length == 1)
|
||||
// FIXME: bits not flattened
|
||||
outer.barrierMasterNode.out(0)._1.req.valid := core.io.gbar_req_valid
|
||||
outer.barrierMasterNode.out(0)._1.req.bits.barrierId := core.io.gbar_req_id
|
||||
outer.barrierMasterNode.out(0)._1.req.bits.coreId := core.io.gbar_req_core_id
|
||||
core.io.gbar_req_ready := outer.barrierMasterNode.out(0)._1.req.ready
|
||||
|
||||
core.io.gbar_rsp_valid := outer.barrierMasterNode.out(0)._1.resp.valid
|
||||
core.io.gbar_rsp_id := outer.barrierMasterNode.out(0)._1.resp.bits.barrierId
|
||||
// core doesn't have a resp.ready port
|
||||
outer.barrierMasterNode.out(0)._1.resp.ready := true.B
|
||||
}
|
||||
|
||||
def performanceCounters(reqBundles: Seq[DecoupledIO[VortexBundleA]],
|
||||
respBundles: Seq[DecoupledIO[VortexBundleD]],
|
||||
desc: String) = {
|
||||
val currentPendingReqs = RegInit(SInt(32.W), 0.S)
|
||||
val pendingReqsCumulative = RegInit(SInt(32.W), 0.S)
|
||||
val totalReqs = RegInit(UInt(32.W), 0.U)
|
||||
|
||||
val reqFireCountPerCycle = Wire(UInt(32.W))
|
||||
val respFireCountPerCycle = Wire(UInt(32.W))
|
||||
val reqReadFires = reqBundles.map { b => b.fire && b.bits.opcode === 4.U /* Get */ }
|
||||
val respReadFires = respBundles.map { b => b.fire && b.bits.opcode === 1.U /* AccessAckData */}
|
||||
reqFireCountPerCycle := PopCount(reqReadFires)
|
||||
respFireCountPerCycle := PopCount(respReadFires)
|
||||
totalReqs := totalReqs + reqFireCountPerCycle
|
||||
|
||||
val diffPendingReqs = reqFireCountPerCycle.asSInt - respFireCountPerCycle.asSInt
|
||||
currentPendingReqs := currentPendingReqs + diffPendingReqs
|
||||
pendingReqsCumulative := pendingReqsCumulative + currentPendingReqs
|
||||
|
||||
val prevFinished = RegNext(core.io.finished)
|
||||
val justFinished = !prevFinished && core.io.finished
|
||||
when (justFinished) {
|
||||
printf(s"PERF: ${desc}: average request latency (cum_pending / total): %d / %d\n",
|
||||
pendingReqsCumulative, totalReqs)
|
||||
}
|
||||
|
||||
dontTouch(totalReqs)
|
||||
dontTouch(diffPendingReqs)
|
||||
dontTouch(currentPendingReqs)
|
||||
dontTouch(pendingReqsCumulative)
|
||||
}
|
||||
|
||||
connectImem
|
||||
connectDmem
|
||||
connectSmem
|
||||
connectBarrier
|
||||
}
|
||||
|
||||
// TODO: generalize for useVxCache
|
||||
if (!outer.radianceParams.useVxCache) {}
|
||||
|
||||
// RoCC
|
||||
if (outer.roccs.size > 0) {
|
||||
val (respArb, cmdRouter) = {
|
||||
val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size))
|
||||
val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p))
|
||||
outer.roccs.zipWithIndex.foreach { case (rocc, i) =>
|
||||
// ptwPorts ++= rocc.module.io.ptw
|
||||
rocc.module.io.ptw <> DontCare
|
||||
rocc.module.io.mem <> DontCare
|
||||
rocc.module.io.cmd <> cmdRouter.io.out(i)
|
||||
respArb.io.in(i) <> Queue(rocc.module.io.resp)
|
||||
}
|
||||
// Create this FPU just for RoCC
|
||||
// val nFPUPorts = outer.roccs.filter(_.usesFPU).size
|
||||
val fp_rocc_ios = outer.roccs.map(_.module.io)
|
||||
fp_rocc_ios.map { io =>
|
||||
io.fpu_req.ready := false.B
|
||||
io.fpu_resp.valid := false.B
|
||||
io.fpu_resp.bits := DontCare
|
||||
}
|
||||
(respArb, cmdRouter)
|
||||
}
|
||||
// // RoCC
|
||||
// if (outer.roccs.size > 0) {
|
||||
// val (respArb, cmdRouter) = {
|
||||
// val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size))
|
||||
// val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p))
|
||||
// outer.roccs.zipWithIndex.foreach { case (rocc, i) =>
|
||||
// // ptwPorts ++= rocc.module.io.ptw
|
||||
// rocc.module.io.ptw <> DontCare
|
||||
// rocc.module.io.mem <> DontCare
|
||||
// rocc.module.io.cmd <> cmdRouter.io.out(i)
|
||||
// respArb.io.in(i) <> Queue(rocc.module.io.resp)
|
||||
// }
|
||||
// // Create this FPU just for RoCC
|
||||
// // val nFPUPorts = outer.roccs.filter(_.usesFPU).size
|
||||
// val fp_rocc_ios = outer.roccs.map(_.module.io)
|
||||
// fp_rocc_ios.map { io =>
|
||||
// io.fpu_req.ready := false.B
|
||||
// io.fpu_resp.valid := false.B
|
||||
// io.fpu_resp.bits := DontCare
|
||||
// }
|
||||
// (respArb, cmdRouter)
|
||||
// }
|
||||
|
||||
cmdRouter.io.in <> DontCare
|
||||
outer.roccs.foreach(_.module.io.exception := DontCare)
|
||||
respArb.io.out <> DontCare
|
||||
}
|
||||
// cmdRouter.io.in <> DontCare
|
||||
// outer.roccs.foreach(_.module.io.exception := DontCare)
|
||||
// respArb.io.out <> DontCare
|
||||
// }
|
||||
}
|
||||
|
||||
class ClusterSynchronizer(
|
||||
barrierIdWidth: Int,
|
||||
numCoreWidth: Int,
|
||||
) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val req = Flipped(Decoupled(new Bundle {
|
||||
val barrierId = UInt(barrierIdWidth.W)
|
||||
val sizeMinusOne = UInt(numCoreWidth.W)
|
||||
val coreId = UInt(numCoreWidth.W)
|
||||
}))
|
||||
val resp = Decoupled(new Bundle {
|
||||
val barrierId = UInt(barrierIdWidth.W)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// Some @copypaste from CoalescerSourceGen.
|
||||
@@ -768,7 +845,6 @@ class VortexTLAdapter(
|
||||
io.outReq.bits.corrupt := 0.U
|
||||
io.inReq.ready := io.outReq.ready
|
||||
// VortexBundleD <> TLBundleD
|
||||
// Filtering out write requests is handled inside the wrapper Verilog
|
||||
io.inResp.valid := io.outResp.valid
|
||||
io.inResp.bits.opcode := io.outResp.bits.opcode
|
||||
io.inResp.bits.size := io.outResp.bits.size
|
||||
|
||||
@@ -41,18 +41,11 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
|
||||
val interrupts = Input(new freechips.rocketchip.rocket.CoreInterrupts(false/*hasBeu*/))
|
||||
|
||||
// conditionally instantiate ports depending on whether we want to use VX_cache or not
|
||||
// TODO: flatten this like dmem and smem
|
||||
val imem = if (!tile.radianceParams.useVxCache) Some(Vec(1, new Bundle {
|
||||
val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32))
|
||||
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32)))
|
||||
})) else None
|
||||
val dmem = if (!tile.radianceParams.useVxCache) Some(Vec(tile.numLsuLanes, new Bundle {
|
||||
// val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32))
|
||||
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32)))
|
||||
})) else None
|
||||
val smem = if (!tile.radianceParams.useVxCache) Some(Vec(tile.numLsuLanes, new Bundle {
|
||||
// val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32))
|
||||
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32)))
|
||||
})) else None
|
||||
val mem = if (tile.radianceParams.useVxCache) Some(new Bundle {
|
||||
val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128))
|
||||
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 15, dataWidth = 128)))
|
||||
@@ -96,6 +89,17 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
|
||||
val smem_d_bits_data = Input(UInt((tile.numLsuLanes * 32).W))
|
||||
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||
|
||||
// FIXME: hardcoded
|
||||
val NB_WIDTH = 2
|
||||
val NC_WIDTH = 1
|
||||
val gbar_req_valid = Output(Bool())
|
||||
val gbar_req_id = Output(UInt(NB_WIDTH.W))
|
||||
val gbar_req_size_m1 = Output(UInt(NC_WIDTH.W))
|
||||
val gbar_req_core_id = Output(UInt(NC_WIDTH.W))
|
||||
val gbar_req_ready = Input(Bool())
|
||||
val gbar_rsp_valid = Input(Bool())
|
||||
val gbar_rsp_id = Input(UInt(NB_WIDTH.W))
|
||||
|
||||
// val fpu = Flipped(new FPUCoreIO())
|
||||
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
||||
//val trace = Output(new TraceBundle)
|
||||
@@ -112,6 +116,7 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
||||
// see VX_csr_data that implements the read logic for CSR_MHARTID/GWID.
|
||||
Map(
|
||||
"CORE_ID" -> tile.tileParams.tileId,
|
||||
"CORES_PER_CLUSTER" -> 2, // FIXME: hardcoded
|
||||
// TODO: can we get this as a parameter?
|
||||
"BOOTROM_HANG100" -> 0x10100,
|
||||
"NUM_THREADS" -> tile.numLsuLanes
|
||||
@@ -194,10 +199,6 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
||||
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_tags.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_wrap.sv")
|
||||
|
||||
// gbar is only used in the socket/cluster hierarchy
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_arb.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_bus_if.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_unit.sv")
|
||||
// mem_arb is used in VX_socket or VX_cache_cluster
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_arb.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_bus_if.sv")
|
||||
@@ -217,6 +218,14 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
||||
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh")
|
||||
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv")
|
||||
|
||||
// used when PERF_ENABLE is defined
|
||||
addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_perf_if.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_pipeline_perf_if.sv")
|
||||
// used when GBAR_ENABLE is defined
|
||||
addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_bus_if.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_arb.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_unit.sv")
|
||||
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_allocator.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/libs/VX_avs_adapter.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv")
|
||||
@@ -245,6 +254,9 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
||||
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_mux.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_buffer.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_toggle_buffer.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_buffer.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_arbiter.sv")
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv")
|
||||
|
||||
Reference in New Issue
Block a user