framebuffer, shared memory key, etc

This commit is contained in:
Richard Yan
2024-06-09 15:05:31 -07:00
parent 697c37b980
commit 3badd75473
8 changed files with 200 additions and 33 deletions

View File

@@ -11,6 +11,9 @@ RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
# EXTRA_SIM_REQS += radpie
EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradpie
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
endif
EXTRA_SIM_PREPROC_DEFINES += \
+define+SIMULATION \
+define+GPR_RESET \
@@ -21,12 +24,11 @@ EXTRA_SIM_PREPROC_DEFINES += \
+define+GBAR_ENABLE \
+define+GBAR_CLUSTER_ENABLE \
+define+NUM_FPU_BLOCKS=2 \
+define+NUM_BARRIERS=4 \
+define+NUM_LSU_LANES=4 \
+define+NUM_CORES=1 +define+NUM_THREADS=32 +define+NUM_WARPS=4
# +define+EXT_T_DISABLE \
# +define+FPU_FPNEW \
# +define+SMEM_LOG_SIZE=15 \
+define+EXT_T_DISABLE \
+define+FPU_FPNEW \
+define+SMEM_LOG_SIZE=17
VCS_NONCC_OPTS += +vcs+initreg+random
# cargo handles building of Rust files all on its own, so make this a PHONY
# target to run cargo unconditionally

View File

@@ -1146,9 +1146,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
case (enqPort, uncoalResp) => {
enqPort <> uncoalResp
when(!enqPort.ready) {
printf(s"respQueue: enq port for uncoalesced response is blocked on lane ${lane}\n")
}
// when(!enqPort.ready) {
// printf(s"respQueue: enq port for uncoalesced response is blocked on lane ${lane}\n")
// }
}
}
}

View File

@@ -0,0 +1,78 @@
package radiance.memory
import chisel3._
import chisel3.util._
import freechips.rocketchip.diplomacy.{AddressSet, SimpleDevice, TransferSizes}
import org.chipsalliance.diplomacy.lazymodule._
import freechips.rocketchip.regmapper.RegField
import freechips.rocketchip.tilelink._
import midas.targetutils.SynthesizePrintf
import org.chipsalliance.cde.config.Parameters
class FrameBuffer(baseAddress: BigInt, width: Int, size: Int, validAddress: BigInt, fbName: String = "fb")
(implicit p: Parameters) extends LazyModule {
val node = TLXbar()
val bufferNode = TLManagerNode(Seq(TLSlavePortParameters.v1(
Seq(TLSlaveParameters.v2(
address = Seq(AddressSet(baseAddress, (1 << log2Ceil(size)) - 1)),
supports = TLMasterToSlaveTransferSizes(
putFull = TransferSizes(1, width),
putPartial = TransferSizes(1, width)
),
fifoId = Some(0))), // requests are handled in order
beatBytes = width
)))
val regDevice = new SimpleDevice("framebuffer-valid-reg", Seq(s"framebuffer-valid-reg"))
val regNode = TLRegisterNode(
address = Seq(AddressSet(validAddress, 0x3)), device = regDevice, concurrency = 1)
bufferNode := TLWidthWidget(4) := TLBuffer() := node
regNode := TLFragmenter(4, 4) := TLBuffer() := node
val depth = size >> log2Ceil(width)
lazy val module = new LazyModuleImp(this) {
val bufT = Vec(width, UInt(8.W))
val buf = SyncReadMem(depth, bufT)
val state = RegInit(false.B) // 0: accepting writes, 1: printing
val Seq((bufBundle, bufEdge)) = bufferNode.in
bufBundle.a.ready := !state && bufBundle.d.ready
bufBundle.d.bits := DontCare
bufBundle.d.valid := !state && bufBundle.a.valid
when (bufBundle.a.fire) {
bufBundle.d.bits := bufEdge.AccessAck(bufBundle.a.bits)
buf.write(((bufBundle.a.bits.address & (size - 1).U) >> log2Ceil(width)).asUInt,
bufBundle.a.bits.data.asTypeOf(bufT),
bufBundle.a.bits.mask.asBools)
}
val writeValid = RegInit(0.U(32.W))
val writeTotal = RegInit(0.U(32.W))
regNode.regmap(0x00 -> Seq(RegField.w(32, writeValid)))
// val (writeCounter, writeComplete) = Counter(state.asBool, size / width)
// when (writeValid(0)) { state := 1.U }
// when (writeComplete) { state := 0.U }
val writeCounter = Counter(depth)
when (writeValid > 0.U) {
writeValid := 0.U
writeTotal := writeValid
state := true.B
writeCounter.reset()
}.elsewhen (writeCounter.value === writeTotal - 1.U) {
state := false.B
}
when (state) { writeCounter.inc() }
val readData = buf.read(writeCounter.value, state)
val prevIdx = RegNext(writeCounter.value)
when (RegNext(state)) {
SynthesizePrintf(printf(s"$fbName %x %x\n", prevIdx, readData.asUInt))
}
}
}

View File

@@ -24,6 +24,17 @@ case class RadianceSharedMemKey(address: BigInt,
serializeUnaligned: Boolean = true)
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
case class RadianceGemminiKey(tileSize: Int,
slaveAddress: BigInt)
case object RadianceGemminiKey extends Field[Option[RadianceGemminiKey]](None)
case class RadianceFrameBufferKey(baseAddress: BigInt,
width: Int,
size: Int,
validAddress: BigInt,
fbName: String = "fb")
case object RadianceFrameBufferKey extends Field[Seq[RadianceFrameBufferKey]](Seq())
class WithRadianceCores(
n: Int,
location: HierarchicalLocation,
@@ -74,7 +85,7 @@ class WithRadianceCores(
class WithRadianceGemmini(location: HierarchicalLocation,
crossing: RocketCrossingParams,
dim: Int, accSizeInKB: Int) extends Config((site, _, up) => {
dim: Int, accSizeInKB: Int, tileSize: Int) extends Config((site, _, up) => {
case TilesLocated(`location`) => {
val prev = up(TilesLocated(`location`), site)
val idOffset = prev.size
@@ -106,8 +117,15 @@ class WithRadianceGemmini(location: HierarchicalLocation,
crossing
)) ++ prev
}
case RadianceGemminiKey => {
val smKey = site(RadianceSharedMemKey).get
Some(RadianceGemminiKey(
tileSize = tileSize,
slaveAddress = smKey.address + smKey.size + 0x3000
))
}
}) {
def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int) =
def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int, tileSize: Int) =
this(location, RocketCrossingParams(
master = HierarchicalElementMasterPortParams.locationDefault(location),
slave = HierarchicalElementSlavePortParams.locationDefault(location),
@@ -115,7 +133,7 @@ class WithRadianceGemmini(location: HierarchicalLocation,
case InSubsystem => CBUS
case InCluster(clusterId) => CCBUS(clusterId)
}
), dim, accSizeInKB)
), dim, accSizeInKB, tileSize)
}
class WithRadianceSharedMem(address: BigInt,
@@ -136,6 +154,18 @@ class WithRadianceSharedMem(address: BigInt,
}
})
class WithRadianceFrameBuffer(baseAddress: BigInt,
width: Int,
size: Int,
validAddress: BigInt,
fbName: String = "fb") extends Config((_, _, up) => {
case RadianceFrameBufferKey => {
up(RadianceFrameBufferKey) ++ Seq(
RadianceFrameBufferKey(baseAddress, width, size, validAddress, fbName)
)
}
})
class WithFuzzerCores(
n: Int,
useVxCache: Boolean

View File

@@ -6,15 +6,18 @@ package radiance.tile
import chisel3._
import chisel3.util._
import chisel3.experimental.BundleLiterals._
import freechips.rocketchip.diplomacy.{BigIntHexContext, ClockCrossingType, DisableMonitors, LazyModule, SimpleDevice}
import org.chipsalliance.diplomacy.DisableMonitors
import org.chipsalliance.diplomacy.lazymodule._
import freechips.rocketchip.diplomacy.{AddressSet, BigIntHexContext, ClockCrossingType, SimpleDevice}
import freechips.rocketchip.prci.ClockSinkParameters
import freechips.rocketchip.regmapper.RegField
import freechips.rocketchip.rocket._
import freechips.rocketchip.subsystem.{CanAttachTile, HierarchicalElementCrossingParamsLike, RocketCrossingParams}
import freechips.rocketchip.tile._
import freechips.rocketchip.tilelink._
import gemmini._
import org.chipsalliance.cde.config.Parameters
import radiance.subsystem.{GPUMemParams, GPUMemory}
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceGemminiKey}
case class GemminiCoreParams(
useVM: Boolean = false,
@@ -120,10 +123,20 @@ class GemminiTile private (
// tlOtherMastersNode :=* AddressOrNode(base) :=* gemmini.tlNode
tlMasterXbar.node :=* gemmini.atlNode
tlOtherMastersNode :=* gemmini.tlNode
gemmini.stlNode := tlSlaveXbar.node
// gemmini.stlNode := tlSlaveXbar.node
require(!gemmini.config.sp_singleported, "external scratchpad must be dual ported")
val configKey = p(RadianceGemminiKey).get
val regDevice = new SimpleDevice("gemmini-cmd-reg", Seq(s"gemmini-cmd-reg"))
val regNode = TLRegisterNode(
address = Seq(AddressSet(configKey.slaveAddress, 0xfff)),
device = regDevice,
beatBytes = 8,
concurrency = 1)
regNode := TLFragmenter(8, 64) := tlSlaveXbar.node
override lazy val module = new GemminiTileModuleImp(this)
}
@@ -173,17 +186,22 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
}
ciscInst := 0.U.asTypeOf(ciscInstT)
// val boundsInst = ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"4_00040004".U)
// val spadQuartile = 0x80
val boundsInst = ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U)
val spadQuartile = 0x200
val tileSize = outer.configKey.tileSize
val (boundsInst, spadQuartile) = if (tileSize == 4) {
(ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"4_00040004".U), 0x80)
} else if (tileSize == 8) {
(ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U), 0x200)
} else {
(ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> (tileSize | (tileSize << 16) | (tileSize << 32)).U),
tileSize * tileSize * outer.gemminiParams.gemminiConfig.DIM)
}
when (ciscValid) {
assert(!accSlave.cmd.valid, "cisc state machine already busy")
switch (ciscId) {
is (0.U) {
ciscInst := microcodeEntry(Seq(
ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U), // set I, J, K
ciscInstT.Lit(_.inst -> 0x3020b07b.U, _.rs1 -> 0.U, _.rs2 -> 0x600.U), // set A, B address
ciscInst := microcodeEntry(Seq(boundsInst,
ciscInstT.Lit(_.inst -> 0x3020b07b.U, _.rs1 -> 0.U, _.rs2 -> (spadQuartile * 3).U), // set A, B address
ciscInstT.Lit(_.inst -> 0x1020b07b.U, _.rs1 -> 0.U, _.rs2 -> x"0_000002b8".U) // set skip, acc
))
}
@@ -234,11 +252,39 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
}
val gemminiIO = outer.gemmini.module.io.cmd
val regValid = Wire(Bool())
val regCommand = Wire(gemminiIO.bits.inst.cloneType)
val gemminiRs1RegLSB = RegInit(0.U(32.W))
val gemminiRs1RegMSB = RegInit(0.U(32.W))
val gemminiRs2RegLSB = RegInit(0.U(32.W))
val gemminiRs2RegMSB = RegInit(0.U(32.W))
def gemminiCommandReg(valid: Bool, bits: UInt): Bool = {
regValid := valid
regCommand := bits.asTypeOf(regCommand)
gemminiIO.ready && !ciscValid
}
outer.regNode.regmap(
0x00 -> Seq(RegField.w(32, gemminiCommandReg(_, _))),
0x10 -> Seq(
RegField.w(32, gemminiRs1RegLSB),
RegField.w(32, gemminiRs1RegMSB)),
0x18 -> Seq(
RegField.w(32, gemminiRs2RegLSB),
RegField.w(32, gemminiRs2RegMSB)),
0x20 -> Seq(RegField.r(32, outer.gemmini.module.io.busy))
)
assert(!regValid || gemminiIO.ready)
assert(!ciscValid || gemminiIO.ready)
gemminiIO.bits.status := 0.U.asTypeOf(gemminiIO.bits.status)
gemminiIO.bits.inst := ciscInst.inst.asTypeOf(gemminiIO.bits.inst)
gemminiIO.bits.rs1 := ciscInst.rs1
gemminiIO.bits.rs2 := ciscInst.rs2
gemminiIO.valid := ciscValid
gemminiIO.bits.inst := Mux(ciscValid, ciscInst.inst.asTypeOf(gemminiIO.bits.inst), regCommand)
gemminiIO.bits.rs1 := Mux(ciscValid, ciscInst.rs1, Cat(gemminiRs1RegMSB, gemminiRs1RegLSB))
gemminiIO.bits.rs2 := Mux(ciscValid, ciscInst.rs2, Cat(gemminiRs2RegMSB, gemminiRs2RegLSB))
gemminiIO.valid := ciscValid || regValid
assert(gemminiIO.ready || !gemminiIO.valid)
accSlave.status := RegNext(outer.gemmini.module.io.busy).asUInt

View File

@@ -5,17 +5,17 @@ package radiance.tile
import chisel3._
import chisel3.util._
import org.chipsalliance.diplomacy._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.diplomacy.{AddressSet, BufferParams, ClockCrossingType, TransferSizes}
import org.chipsalliance.diplomacy.lazymodule._
import freechips.rocketchip.prci.ClockSinkParameters
import freechips.rocketchip.subsystem._
import freechips.rocketchip.tile.TraceBundle
import freechips.rocketchip.tilelink._
import gemmini._
import midas.targetutils.SynthesizePrintf
import org.chipsalliance.cde.config.Parameters
import org.chipsalliance.diplomacy.{DisableMonitors, ValName}
import radiance.memory._
import radiance.subsystem.RadianceSharedMemKey
import radiance.subsystem.{RadianceFrameBufferKey, RadianceSharedMemKey}
case class RadianceClusterParams(
val clusterId: Int,
@@ -85,6 +85,7 @@ class RadianceCluster (
assert(gemminiConfig.sp_width / 8 == smem_width)
assert(gemminiConfig.sp_bank_entries == smem_depth)
VecInit(Seq(0.U, 1.U)).reduceTree(_ +& _)
val stride_by_word = true
val filter_aligned = true
val disable_monitors = true // otherwise it generate 1k+ different tl monitors
@@ -337,9 +338,13 @@ class RadianceCluster (
val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m)
// printf and perf counter buffer
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := traceTLNode :=
TLRAM(AddressSet(smem_key.address + smem_size, numCores * 0x200 - 1)) := traceTLNode :=
TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
p(RadianceFrameBufferKey).foreach { key =>
val fb = LazyModule(new FrameBuffer(key.baseAddress, key.width, key.size, key.validAddress, key.fbName))
fb.node := TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
}
// Diplomacy sink nodes for cluster-wide barrier sync signal
val barrierSlaveNode = BarrierSlaveNode(numCores)
@@ -371,7 +376,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
// @cleanup: This assumes barrier params on all edges are the same, i.e. all
// cores are configured to have the same barrier id range. While true, might
// be better to actually assert this
val barrierParam = outer.barrierSlaveNode.in(0)._2
val barrierParam = outer.barrierSlaveNode.in.head._2
println(s"======= barrierParam: ${barrierParam}")
val synchronizer = Module(new BarrierSynchronizer(barrierParam))
(synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) =>
@@ -401,6 +406,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
}
}
// TODO: remove Pipeline dependency of gemmini
def makeSmemBanks(): Unit = {
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn,

View File

@@ -16,6 +16,7 @@ import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike
import freechips.rocketchip.tile._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._
import midas.targetutils.SynthesizePrintf
import org.chipsalliance.cde.config._
import radiance.memory._
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
@@ -477,6 +478,10 @@ class RadianceTileModuleImp(outer: RadianceTile)
outer.decodeCoreInterrupts(core.io.interrupts) // Decode the interrupt vector
when (core.io.interrupts.msip && !RegNext(core.io.interrupts.msip)) {
SynthesizePrintf(printf("interrupt\n"))
}
core.io.interrupts.nmi.foreach { nmi => nmi := outer.nmiSinkNode.get.bundle }
// Pass through various external constants and reports that were bundle-bridged into the tile

View File

@@ -39,7 +39,7 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
// val hartid = Input(UInt(tileIdLen.W))
val reset_vector = Input(UInt(resetVectorLen.W))
val interrupts = Input(new freechips.rocketchip.rocket.CoreInterrupts(false/*hasBeu*/))
// conditionally instantiate ports depending on whether we want to use VX_cache or not
// TODO: flatten this like dmem and smem
val imem = if (!tile.radianceParams.useVxCache) Some(Vec(1, new Bundle {