framebuffer, shared memory key, etc
This commit is contained in:
14
radiance.mk
14
radiance.mk
@@ -11,6 +11,9 @@ RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
|
||||
|
||||
# EXTRA_SIM_REQS += radpie
|
||||
EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradpie
|
||||
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
|
||||
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
|
||||
endif
|
||||
EXTRA_SIM_PREPROC_DEFINES += \
|
||||
+define+SIMULATION \
|
||||
+define+GPR_RESET \
|
||||
@@ -21,12 +24,11 @@ EXTRA_SIM_PREPROC_DEFINES += \
|
||||
+define+GBAR_ENABLE \
|
||||
+define+GBAR_CLUSTER_ENABLE \
|
||||
+define+NUM_FPU_BLOCKS=2 \
|
||||
+define+NUM_BARRIERS=4 \
|
||||
+define+NUM_LSU_LANES=4 \
|
||||
+define+NUM_CORES=1 +define+NUM_THREADS=32 +define+NUM_WARPS=4
|
||||
# +define+EXT_T_DISABLE \
|
||||
# +define+FPU_FPNEW \
|
||||
# +define+SMEM_LOG_SIZE=15 \
|
||||
+define+EXT_T_DISABLE \
|
||||
+define+FPU_FPNEW \
|
||||
+define+SMEM_LOG_SIZE=17
|
||||
|
||||
VCS_NONCC_OPTS += +vcs+initreg+random
|
||||
|
||||
# cargo handles building of Rust files all on its own, so make this a PHONY
|
||||
# target to run cargo unconditionally
|
||||
|
||||
@@ -1146,9 +1146,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
||||
case (enqPort, uncoalResp) => {
|
||||
enqPort <> uncoalResp
|
||||
|
||||
when(!enqPort.ready) {
|
||||
printf(s"respQueue: enq port for uncoalesced response is blocked on lane ${lane}\n")
|
||||
}
|
||||
// when(!enqPort.ready) {
|
||||
// printf(s"respQueue: enq port for uncoalesced response is blocked on lane ${lane}\n")
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
78
src/main/scala/radiance/memory/FrameBuffer.scala
Normal file
78
src/main/scala/radiance/memory/FrameBuffer.scala
Normal file
@@ -0,0 +1,78 @@
|
||||
package radiance.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import freechips.rocketchip.diplomacy.{AddressSet, SimpleDevice, TransferSizes}
|
||||
import org.chipsalliance.diplomacy.lazymodule._
|
||||
import freechips.rocketchip.regmapper.RegField
|
||||
import freechips.rocketchip.tilelink._
|
||||
import midas.targetutils.SynthesizePrintf
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
|
||||
class FrameBuffer(baseAddress: BigInt, width: Int, size: Int, validAddress: BigInt, fbName: String = "fb")
|
||||
(implicit p: Parameters) extends LazyModule {
|
||||
|
||||
val node = TLXbar()
|
||||
|
||||
val bufferNode = TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
Seq(TLSlaveParameters.v2(
|
||||
address = Seq(AddressSet(baseAddress, (1 << log2Ceil(size)) - 1)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
putFull = TransferSizes(1, width),
|
||||
putPartial = TransferSizes(1, width)
|
||||
),
|
||||
fifoId = Some(0))), // requests are handled in order
|
||||
beatBytes = width
|
||||
)))
|
||||
|
||||
val regDevice = new SimpleDevice("framebuffer-valid-reg", Seq(s"framebuffer-valid-reg"))
|
||||
val regNode = TLRegisterNode(
|
||||
address = Seq(AddressSet(validAddress, 0x3)), device = regDevice, concurrency = 1)
|
||||
|
||||
bufferNode := TLWidthWidget(4) := TLBuffer() := node
|
||||
regNode := TLFragmenter(4, 4) := TLBuffer() := node
|
||||
|
||||
val depth = size >> log2Ceil(width)
|
||||
lazy val module = new LazyModuleImp(this) {
|
||||
val bufT = Vec(width, UInt(8.W))
|
||||
val buf = SyncReadMem(depth, bufT)
|
||||
val state = RegInit(false.B) // 0: accepting writes, 1: printing
|
||||
|
||||
val Seq((bufBundle, bufEdge)) = bufferNode.in
|
||||
|
||||
bufBundle.a.ready := !state && bufBundle.d.ready
|
||||
bufBundle.d.bits := DontCare
|
||||
bufBundle.d.valid := !state && bufBundle.a.valid
|
||||
when (bufBundle.a.fire) {
|
||||
bufBundle.d.bits := bufEdge.AccessAck(bufBundle.a.bits)
|
||||
buf.write(((bufBundle.a.bits.address & (size - 1).U) >> log2Ceil(width)).asUInt,
|
||||
bufBundle.a.bits.data.asTypeOf(bufT),
|
||||
bufBundle.a.bits.mask.asBools)
|
||||
}
|
||||
|
||||
val writeValid = RegInit(0.U(32.W))
|
||||
val writeTotal = RegInit(0.U(32.W))
|
||||
regNode.regmap(0x00 -> Seq(RegField.w(32, writeValid)))
|
||||
|
||||
// val (writeCounter, writeComplete) = Counter(state.asBool, size / width)
|
||||
// when (writeValid(0)) { state := 1.U }
|
||||
// when (writeComplete) { state := 0.U }
|
||||
val writeCounter = Counter(depth)
|
||||
when (writeValid > 0.U) {
|
||||
writeValid := 0.U
|
||||
writeTotal := writeValid
|
||||
state := true.B
|
||||
writeCounter.reset()
|
||||
}.elsewhen (writeCounter.value === writeTotal - 1.U) {
|
||||
state := false.B
|
||||
}
|
||||
|
||||
when (state) { writeCounter.inc() }
|
||||
|
||||
val readData = buf.read(writeCounter.value, state)
|
||||
val prevIdx = RegNext(writeCounter.value)
|
||||
when (RegNext(state)) {
|
||||
SynthesizePrintf(printf(s"$fbName %x %x\n", prevIdx, readData.asUInt))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -24,6 +24,17 @@ case class RadianceSharedMemKey(address: BigInt,
|
||||
serializeUnaligned: Boolean = true)
|
||||
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
|
||||
|
||||
case class RadianceGemminiKey(tileSize: Int,
|
||||
slaveAddress: BigInt)
|
||||
case object RadianceGemminiKey extends Field[Option[RadianceGemminiKey]](None)
|
||||
|
||||
case class RadianceFrameBufferKey(baseAddress: BigInt,
|
||||
width: Int,
|
||||
size: Int,
|
||||
validAddress: BigInt,
|
||||
fbName: String = "fb")
|
||||
case object RadianceFrameBufferKey extends Field[Seq[RadianceFrameBufferKey]](Seq())
|
||||
|
||||
class WithRadianceCores(
|
||||
n: Int,
|
||||
location: HierarchicalLocation,
|
||||
@@ -74,7 +85,7 @@ class WithRadianceCores(
|
||||
|
||||
class WithRadianceGemmini(location: HierarchicalLocation,
|
||||
crossing: RocketCrossingParams,
|
||||
dim: Int, accSizeInKB: Int) extends Config((site, _, up) => {
|
||||
dim: Int, accSizeInKB: Int, tileSize: Int) extends Config((site, _, up) => {
|
||||
case TilesLocated(`location`) => {
|
||||
val prev = up(TilesLocated(`location`), site)
|
||||
val idOffset = prev.size
|
||||
@@ -106,8 +117,15 @@ class WithRadianceGemmini(location: HierarchicalLocation,
|
||||
crossing
|
||||
)) ++ prev
|
||||
}
|
||||
case RadianceGemminiKey => {
|
||||
val smKey = site(RadianceSharedMemKey).get
|
||||
Some(RadianceGemminiKey(
|
||||
tileSize = tileSize,
|
||||
slaveAddress = smKey.address + smKey.size + 0x3000
|
||||
))
|
||||
}
|
||||
}) {
|
||||
def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int) =
|
||||
def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int, tileSize: Int) =
|
||||
this(location, RocketCrossingParams(
|
||||
master = HierarchicalElementMasterPortParams.locationDefault(location),
|
||||
slave = HierarchicalElementSlavePortParams.locationDefault(location),
|
||||
@@ -115,7 +133,7 @@ class WithRadianceGemmini(location: HierarchicalLocation,
|
||||
case InSubsystem => CBUS
|
||||
case InCluster(clusterId) => CCBUS(clusterId)
|
||||
}
|
||||
), dim, accSizeInKB)
|
||||
), dim, accSizeInKB, tileSize)
|
||||
}
|
||||
|
||||
class WithRadianceSharedMem(address: BigInt,
|
||||
@@ -136,6 +154,18 @@ class WithRadianceSharedMem(address: BigInt,
|
||||
}
|
||||
})
|
||||
|
||||
class WithRadianceFrameBuffer(baseAddress: BigInt,
|
||||
width: Int,
|
||||
size: Int,
|
||||
validAddress: BigInt,
|
||||
fbName: String = "fb") extends Config((_, _, up) => {
|
||||
case RadianceFrameBufferKey => {
|
||||
up(RadianceFrameBufferKey) ++ Seq(
|
||||
RadianceFrameBufferKey(baseAddress, width, size, validAddress, fbName)
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
class WithFuzzerCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
|
||||
@@ -6,15 +6,18 @@ package radiance.tile
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import chisel3.experimental.BundleLiterals._
|
||||
import freechips.rocketchip.diplomacy.{BigIntHexContext, ClockCrossingType, DisableMonitors, LazyModule, SimpleDevice}
|
||||
import org.chipsalliance.diplomacy.DisableMonitors
|
||||
import org.chipsalliance.diplomacy.lazymodule._
|
||||
import freechips.rocketchip.diplomacy.{AddressSet, BigIntHexContext, ClockCrossingType, SimpleDevice}
|
||||
import freechips.rocketchip.prci.ClockSinkParameters
|
||||
import freechips.rocketchip.regmapper.RegField
|
||||
import freechips.rocketchip.rocket._
|
||||
import freechips.rocketchip.subsystem.{CanAttachTile, HierarchicalElementCrossingParamsLike, RocketCrossingParams}
|
||||
import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import gemmini._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import radiance.subsystem.{GPUMemParams, GPUMemory}
|
||||
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceGemminiKey}
|
||||
|
||||
case class GemminiCoreParams(
|
||||
useVM: Boolean = false,
|
||||
@@ -120,10 +123,20 @@ class GemminiTile private (
|
||||
// tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
|
||||
tlMasterXbar.node :=* gemmini.atlNode
|
||||
tlOtherMastersNode :=* gemmini.tlNode
|
||||
gemmini.stlNode := tlSlaveXbar.node
|
||||
// gemmini.stlNode := tlSlaveXbar.node
|
||||
|
||||
require(!gemmini.config.sp_singleported, "external scratchpad must be dual ported")
|
||||
|
||||
val configKey = p(RadianceGemminiKey).get
|
||||
|
||||
val regDevice = new SimpleDevice("gemmini-cmd-reg", Seq(s"gemmini-cmd-reg"))
|
||||
val regNode = TLRegisterNode(
|
||||
address = Seq(AddressSet(configKey.slaveAddress, 0xfff)),
|
||||
device = regDevice,
|
||||
beatBytes = 8,
|
||||
concurrency = 1)
|
||||
regNode := TLFragmenter(8, 64) := tlSlaveXbar.node
|
||||
|
||||
override lazy val module = new GemminiTileModuleImp(this)
|
||||
}
|
||||
|
||||
@@ -173,17 +186,22 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
||||
}
|
||||
|
||||
ciscInst := 0.U.asTypeOf(ciscInstT)
|
||||
// val boundsInst = ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"4_00040004".U)
|
||||
// val spadQuartile = 0x80
|
||||
val boundsInst = ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U)
|
||||
val spadQuartile = 0x200
|
||||
|
||||
val tileSize = outer.configKey.tileSize
|
||||
val (boundsInst, spadQuartile) = if (tileSize == 4) {
|
||||
(ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"4_00040004".U), 0x80)
|
||||
} else if (tileSize == 8) {
|
||||
(ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U), 0x200)
|
||||
} else {
|
||||
(ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> (tileSize | (tileSize << 16) | (tileSize << 32)).U),
|
||||
tileSize * tileSize * outer.gemminiParams.gemminiConfig.DIM)
|
||||
}
|
||||
when (ciscValid) {
|
||||
assert(!accSlave.cmd.valid, "cisc state machine already busy")
|
||||
switch (ciscId) {
|
||||
is (0.U) {
|
||||
ciscInst := microcodeEntry(Seq(
|
||||
ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U), // set I, J, K
|
||||
ciscInstT.Lit(_.inst -> 0x3020b07b.U, _.rs1 -> 0.U, _.rs2 -> 0x600.U), // set A, B address
|
||||
ciscInst := microcodeEntry(Seq(boundsInst,
|
||||
ciscInstT.Lit(_.inst -> 0x3020b07b.U, _.rs1 -> 0.U, _.rs2 -> (spadQuartile * 3).U), // set A, B address
|
||||
ciscInstT.Lit(_.inst -> 0x1020b07b.U, _.rs1 -> 0.U, _.rs2 -> x"0_000002b8".U) // set skip, acc
|
||||
))
|
||||
}
|
||||
@@ -234,11 +252,39 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
||||
}
|
||||
|
||||
val gemminiIO = outer.gemmini.module.io.cmd
|
||||
|
||||
val regValid = Wire(Bool())
|
||||
val regCommand = Wire(gemminiIO.bits.inst.cloneType)
|
||||
val gemminiRs1RegLSB = RegInit(0.U(32.W))
|
||||
val gemminiRs1RegMSB = RegInit(0.U(32.W))
|
||||
val gemminiRs2RegLSB = RegInit(0.U(32.W))
|
||||
val gemminiRs2RegMSB = RegInit(0.U(32.W))
|
||||
|
||||
def gemminiCommandReg(valid: Bool, bits: UInt): Bool = {
|
||||
regValid := valid
|
||||
regCommand := bits.asTypeOf(regCommand)
|
||||
gemminiIO.ready && !ciscValid
|
||||
}
|
||||
|
||||
outer.regNode.regmap(
|
||||
0x00 -> Seq(RegField.w(32, gemminiCommandReg(_, _))),
|
||||
0x10 -> Seq(
|
||||
RegField.w(32, gemminiRs1RegLSB),
|
||||
RegField.w(32, gemminiRs1RegMSB)),
|
||||
0x18 -> Seq(
|
||||
RegField.w(32, gemminiRs2RegLSB),
|
||||
RegField.w(32, gemminiRs2RegMSB)),
|
||||
0x20 -> Seq(RegField.r(32, outer.gemmini.module.io.busy))
|
||||
)
|
||||
|
||||
assert(!regValid || gemminiIO.ready)
|
||||
assert(!ciscValid || gemminiIO.ready)
|
||||
|
||||
gemminiIO.bits.status := 0.U.asTypeOf(gemminiIO.bits.status)
|
||||
gemminiIO.bits.inst := ciscInst.inst.asTypeOf(gemminiIO.bits.inst)
|
||||
gemminiIO.bits.rs1 := ciscInst.rs1
|
||||
gemminiIO.bits.rs2 := ciscInst.rs2
|
||||
gemminiIO.valid := ciscValid
|
||||
gemminiIO.bits.inst := Mux(ciscValid, ciscInst.inst.asTypeOf(gemminiIO.bits.inst), regCommand)
|
||||
gemminiIO.bits.rs1 := Mux(ciscValid, ciscInst.rs1, Cat(gemminiRs1RegMSB, gemminiRs1RegLSB))
|
||||
gemminiIO.bits.rs2 := Mux(ciscValid, ciscInst.rs2, Cat(gemminiRs2RegMSB, gemminiRs2RegLSB))
|
||||
gemminiIO.valid := ciscValid || regValid
|
||||
assert(gemminiIO.ready || !gemminiIO.valid)
|
||||
|
||||
accSlave.status := RegNext(outer.gemmini.module.io.busy).asUInt
|
||||
|
||||
@@ -5,17 +5,17 @@ package radiance.tile
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.diplomacy._
|
||||
import freechips.rocketchip.diplomacy._
|
||||
import freechips.rocketchip.diplomacy.{AddressSet, BufferParams, ClockCrossingType, TransferSizes}
|
||||
import org.chipsalliance.diplomacy.lazymodule._
|
||||
import freechips.rocketchip.prci.ClockSinkParameters
|
||||
import freechips.rocketchip.subsystem._
|
||||
import freechips.rocketchip.tile.TraceBundle
|
||||
import freechips.rocketchip.tilelink._
|
||||
import gemmini._
|
||||
import midas.targetutils.SynthesizePrintf
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import org.chipsalliance.diplomacy.{DisableMonitors, ValName}
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.RadianceSharedMemKey
|
||||
import radiance.subsystem.{RadianceFrameBufferKey, RadianceSharedMemKey}
|
||||
|
||||
case class RadianceClusterParams(
|
||||
val clusterId: Int,
|
||||
@@ -85,6 +85,7 @@ class RadianceCluster (
|
||||
assert(gemminiConfig.sp_width / 8 == smem_width)
|
||||
assert(gemminiConfig.sp_bank_entries == smem_depth)
|
||||
|
||||
VecInit(Seq(0.U, 1.U)).reduceTree(_ +& _)
|
||||
val stride_by_word = true
|
||||
val filter_aligned = true
|
||||
val disable_monitors = true // otherwise it generate 1k+ different tl monitors
|
||||
@@ -337,9 +338,13 @@ class RadianceCluster (
|
||||
|
||||
val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m)
|
||||
// printf and perf counter buffer
|
||||
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := traceTLNode :=
|
||||
TLRAM(AddressSet(smem_key.address + smem_size, numCores * 0x200 - 1)) := traceTLNode :=
|
||||
TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
|
||||
|
||||
p(RadianceFrameBufferKey).foreach { key =>
|
||||
val fb = LazyModule(new FrameBuffer(key.baseAddress, key.width, key.size, key.validAddress, key.fbName))
|
||||
fb.node := TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
|
||||
}
|
||||
|
||||
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
||||
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
||||
@@ -371,7 +376,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
// @cleanup: This assumes barrier params on all edges are the same, i.e. all
|
||||
// cores are configured to have the same barrier id range. While true, might
|
||||
// be better to actually assert this
|
||||
val barrierParam = outer.barrierSlaveNode.in(0)._2
|
||||
val barrierParam = outer.barrierSlaveNode.in.head._2
|
||||
println(s"======= barrierParam: ${barrierParam}")
|
||||
val synchronizer = Module(new BarrierSynchronizer(barrierParam))
|
||||
(synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) =>
|
||||
@@ -401,6 +406,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO: remove Pipeline dependency of gemmini
|
||||
def makeSmemBanks(): Unit = {
|
||||
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn,
|
||||
|
||||
@@ -16,6 +16,7 @@ import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike
|
||||
import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util._
|
||||
import midas.targetutils.SynthesizePrintf
|
||||
import org.chipsalliance.cde.config._
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
||||
@@ -477,6 +478,10 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
|
||||
outer.decodeCoreInterrupts(core.io.interrupts) // Decode the interrupt vector
|
||||
|
||||
when (core.io.interrupts.msip && !RegNext(core.io.interrupts.msip)) {
|
||||
SynthesizePrintf(printf("interrupt\n"))
|
||||
}
|
||||
|
||||
core.io.interrupts.nmi.foreach { nmi => nmi := outer.nmiSinkNode.get.bundle }
|
||||
|
||||
// Pass through various external constants and reports that were bundle-bridged into the tile
|
||||
|
||||
@@ -39,7 +39,7 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
|
||||
// val hartid = Input(UInt(tileIdLen.W))
|
||||
val reset_vector = Input(UInt(resetVectorLen.W))
|
||||
val interrupts = Input(new freechips.rocketchip.rocket.CoreInterrupts(false/*hasBeu*/))
|
||||
|
||||
|
||||
// conditionally instantiate ports depending on whether we want to use VX_cache or not
|
||||
// TODO: flatten this like dmem and smem
|
||||
val imem = if (!tile.radianceParams.useVxCache) Some(Vec(1, new Bundle {
|
||||
|
||||
Reference in New Issue
Block a user