Merge branch 'main' of https://github.com/ucb-bar/radiance into main
This commit is contained in:
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
|||||||
[submodule "src/main/resources/vsrc/vortex"]
|
[submodule "src/main/resources/vsrc/vortex"]
|
||||||
path = src/main/resources/vsrc/vortex
|
path = src/main/resources/vsrc/vortex
|
||||||
url = https://github.com/hansungk/vortex
|
url = git@github.com:hansungk/vortex-private.git
|
||||||
[submodule "radpie"]
|
[submodule "radpie"]
|
||||||
path = radpie
|
path = radpie
|
||||||
url = git@github.com:hansungk/radpie.git
|
url = git@github.com:hansungk/radpie.git
|
||||||
|
|||||||
@@ -19,7 +19,13 @@ EXTRA_SIM_PREPROC_DEFINES += \
|
|||||||
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
||||||
+define+PERF_ENABLE \
|
+define+PERF_ENABLE \
|
||||||
+define+ICACHE_DISABLE +define+DCACHE_DISABLE \
|
+define+ICACHE_DISABLE +define+DCACHE_DISABLE \
|
||||||
+define+NUM_THREADS=8 +define+NUM_WARPS=8
|
+define+GBAR_ENABLE \
|
||||||
|
+define+GBAR_CLUSTER_ENABLE \
|
||||||
|
+define+NUM_BARRIERS=8 \
|
||||||
|
+define+NUM_CORES=2 +define+NUM_THREADS=8 +define+NUM_WARPS=8
|
||||||
|
# Can't increase this to above 14, since the binary accesses 0xff0040..
|
||||||
|
# which is unmapped to any memory
|
||||||
|
# +define+SMEM_LOG_SIZE=14 \
|
||||||
|
|
||||||
# cargo handles building of Rust files all on its own, so make this a PHONY
|
# cargo handles building of Rust files all on its own, so make this a PHONY
|
||||||
# target to run cargo unconditionally
|
# target to run cargo unconditionally
|
||||||
|
|||||||
Submodule src/main/resources/vsrc/vortex updated: df4b21507e...3718a57937
@@ -14,12 +14,12 @@ import freechips.rocketchip.diplomacy._
|
|||||||
case class EmptyParams()
|
case class EmptyParams()
|
||||||
|
|
||||||
case class BarrierParams(
|
case class BarrierParams(
|
||||||
barrierIdBits: Int,
|
barrierIdBits: Int,
|
||||||
numCoreBits: Int
|
numCoreBits: Int
|
||||||
)
|
)
|
||||||
|
|
||||||
class BarrierRequestBits(
|
class BarrierRequestBits(
|
||||||
param: BarrierParams
|
param: BarrierParams
|
||||||
) extends Bundle {
|
) extends Bundle {
|
||||||
val barrierId = UInt(param.barrierIdBits.W)
|
val barrierId = UInt(param.barrierIdBits.W)
|
||||||
val sizeMinusOne = UInt(param.numCoreBits.W)
|
val sizeMinusOne = UInt(param.numCoreBits.W)
|
||||||
@@ -27,7 +27,7 @@ class BarrierRequestBits(
|
|||||||
}
|
}
|
||||||
|
|
||||||
class BarrierResponseBits(
|
class BarrierResponseBits(
|
||||||
param: BarrierParams
|
param: BarrierParams
|
||||||
) extends Bundle {
|
) extends Bundle {
|
||||||
val barrierId = UInt(param.barrierIdBits.W)
|
val barrierId = UInt(param.barrierIdBits.W)
|
||||||
}
|
}
|
||||||
@@ -38,25 +38,42 @@ class BarrierBundle(param: BarrierParams) extends Bundle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// FIXME Separate BarrierEdgeParams from BarrierParams
|
// FIXME Separate BarrierEdgeParams from BarrierParams
|
||||||
object BarrierNodeImp extends SimpleNodeImp[BarrierParams, EmptyParams, BarrierParams, BarrierBundle] {
|
object BarrierNodeImp extends SimpleNodeImp[BarrierParams, BarrierParams, BarrierParams, BarrierBundle] {
|
||||||
def edge(pd: BarrierParams, pu: EmptyParams, p: Parameters, sourceInfo: SourceInfo) = {
|
def edge(pd: BarrierParams, pu: BarrierParams, p: Parameters, sourceInfo: SourceInfo) = {
|
||||||
// barrier parameters flow strictly downward from the master node
|
println(s"==== BarrierNodeImp: barrierIdBits=${pd.barrierIdBits}, numCoreBits=${pu.numCoreBits}")
|
||||||
pd
|
require(pd.barrierIdBits >= 0 && pu.numCoreBits >= 0)
|
||||||
|
BarrierParams(barrierIdBits = pd.barrierIdBits, numCoreBits = pu.numCoreBits)
|
||||||
}
|
}
|
||||||
def bundle(e: BarrierParams) = new BarrierBundle(e)
|
def bundle(e: BarrierParams) = new BarrierBundle(e)
|
||||||
// FIXME render
|
// FIXME render
|
||||||
def render(e: BarrierParams) = RenderedEdge(colour = "ffffff", label = "X")
|
def render(e: BarrierParams) = RenderedEdge(colour = "ffffff", label = "X")
|
||||||
}
|
}
|
||||||
|
|
||||||
case class BarrierMasterNode(val srcParams: BarrierParams)(implicit valName: ValName)
|
case class BarrierMasterNode(val barrierIdBits: Int)(implicit valName: ValName)
|
||||||
extends SourceNode(BarrierNodeImp)(Seq(srcParams))
|
extends SourceNode(BarrierNodeImp)({
|
||||||
case class BarrierSlaveNode(val numEdges: Int)(implicit valName: ValName)
|
require(barrierIdBits >= 0)
|
||||||
extends SinkNode(BarrierNodeImp)(Seq.fill(numEdges)(EmptyParams()))
|
Seq(BarrierParams(barrierIdBits = barrierIdBits, numCoreBits = -1 /* unset */))
|
||||||
|
})
|
||||||
|
case class BarrierSlaveNode(val numCores: Int)(implicit valName: ValName)
|
||||||
|
extends SinkNode(BarrierNodeImp)({
|
||||||
|
require(numCores > 0)
|
||||||
|
val numCoreBits = log2Ceil(numCores)
|
||||||
|
Seq.fill(numCores)(
|
||||||
|
BarrierParams(barrierIdBits = -1 /* unset */, numCoreBits = numCoreBits)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
class BarrierSynchronizer(param: BarrierParams) extends Module {
|
// `delay`: number of cycles used to delay the response after all cores are
|
||||||
val numBarrierIds = 1 << param.barrierIdBits
|
// synchronized. This is used for debugging purposes to give some time for the
|
||||||
|
// cores to "settle" after the barrier synchronization, e.g. resolve
|
||||||
|
// outstanding smem requests.
|
||||||
|
class BarrierSynchronizer(
|
||||||
|
param: BarrierParams,
|
||||||
|
delay: Option[Int] = None
|
||||||
|
) extends Module {
|
||||||
|
val numBarriers = 1 << param.barrierIdBits
|
||||||
val numCores = 1 << param.numCoreBits
|
val numCores = 1 << param.numCoreBits
|
||||||
println(s"numBarrierIds: ${numBarrierIds}, numCores: ${numCores}")
|
println(s"numBarriers: ${numBarriers}, numCores: ${numCores}")
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val reqs = Vec(numCores, Flipped(Decoupled(new BarrierRequestBits(param))))
|
val reqs = Vec(numCores, Flipped(Decoupled(new BarrierRequestBits(param))))
|
||||||
@@ -64,35 +81,42 @@ class BarrierSynchronizer(param: BarrierParams) extends Module {
|
|||||||
})
|
})
|
||||||
|
|
||||||
// 2-dimensional table of per-id, per-core "done" signals
|
// 2-dimensional table of per-id, per-core "done" signals
|
||||||
val table = RegInit(VecInit(Seq.fill(numBarrierIds)(VecInit(Seq.fill(numCores)(false.B)))))
|
val table = RegInit(
|
||||||
val done = Wire(Vec(numBarrierIds, Bool()))
|
VecInit(Seq.fill(numBarriers)(VecInit(Seq.fill(numCores)(false.B))))
|
||||||
table.zipWithIndex.foreach { case (row, i) =>
|
)
|
||||||
done(i) := row.reduce(_ && _)
|
val done = Seq.fill(numBarriers)(Wire(Bool()))
|
||||||
|
val delayer = delay.map(n => Seq.fill(numBarriers)(Counter(n)))
|
||||||
|
|
||||||
|
(table zip done).zipWithIndex.foreach { case ((row, d), i) =>
|
||||||
|
d := row.reduce(_ && _)
|
||||||
|
delayer.foreach { dl => when(d) { dl(i).inc() } }
|
||||||
|
dontTouch(d)
|
||||||
}
|
}
|
||||||
dontTouch(done)
|
|
||||||
|
|
||||||
io.reqs.zipWithIndex.foreach { case (req, coreId) =>
|
io.reqs.zipWithIndex.foreach { case (req, coreId) =>
|
||||||
// always ready; all this module does is latch to boolean regs
|
// always ready; all this module does is latch to boolean regs
|
||||||
req.ready := true.B
|
req.ready := true.B
|
||||||
when(req.fire) {
|
when(req.fire) {
|
||||||
assert(coreId.U === req.bits.coreId)
|
assert(coreId.U === req.bits.coreId)
|
||||||
// FIXME: don't need coreId to be hardware here
|
// @cleanup: coreId don't need to be hardware
|
||||||
table(req.bits.barrierId)(coreId.U) := true.B
|
table(req.bits.barrierId)(coreId.U) := true.B
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val doneArbiter = Module(new RRArbiter(Bool(), numBarrierIds))
|
val doneArbiter = Module(new RRArbiter(Bool(), numBarriers))
|
||||||
(doneArbiter.io.in zip done).zipWithIndex.foreach { case ((in, d), i) =>
|
(doneArbiter.io.in zip done).zipWithIndex.foreach { case ((in, d), i) =>
|
||||||
in.valid := d
|
val alarm = delayer match {
|
||||||
|
case Some(dl) => dl(i).value === (dl(i).n - 1).U
|
||||||
|
case None => true.B
|
||||||
|
}
|
||||||
|
in.valid := (d && alarm)
|
||||||
in.bits := d
|
in.bits := d
|
||||||
when(in.fire) {
|
when(in.fire) {
|
||||||
table(i).foreach(_ := false.B)
|
table(i).foreach(_ := false.B)
|
||||||
|
delayer.foreach(_(i).reset())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
io.resp.valid := doneArbiter.io.out.valid
|
io.resp.valid := doneArbiter.io.out.valid
|
||||||
io.resp.bits.barrierId := doneArbiter.io.chosen
|
io.resp.bits.barrierId := doneArbiter.io.chosen
|
||||||
when(io.resp.fire) {
|
|
||||||
table(io.resp.bits.barrierId).foreach(_ := false.B)
|
|
||||||
}
|
|
||||||
doneArbiter.io.out.ready := io.resp.ready
|
doneArbiter.io.out.ready := io.resp.ready
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -253,54 +253,17 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
|||||||
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.length}")
|
println(s"======= RadianceCluster: clbus inward edges = ${outer.clbus.inwardNode.inward.inputs.length}")
|
||||||
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
println(s"======= RadianceCluster: clbus name = ${outer.clbus.busName}")
|
||||||
|
|
||||||
val numBarriers = 4 // FIXME: hardcoded
|
|
||||||
|
|
||||||
// @cleanup: This assumes barrier params on all edges are the same, i.e. all
|
// @cleanup: This assumes barrier params on all edges are the same, i.e. all
|
||||||
// cores are configured to have the same barrier id range. While true, might
|
// cores are configured to have the same barrier id range. While true, might
|
||||||
// be better to actually assert this
|
// be better to actually assert this
|
||||||
val barrierParam = outer.barrierSlaveNode.in(0)._2
|
val barrierParam = outer.barrierSlaveNode.in(0)._2
|
||||||
|
println(s"======= barrierParam: ${barrierParam}")
|
||||||
val synchronizer = Module(new BarrierSynchronizer(barrierParam))
|
val synchronizer = Module(new BarrierSynchronizer(barrierParam))
|
||||||
(synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) =>
|
(synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) =>
|
||||||
req <> b.req
|
req <> b.req
|
||||||
b.resp <> synchronizer.io.resp // broadcast
|
b.resp <> synchronizer.io.resp // broadcast
|
||||||
}
|
}
|
||||||
|
|
||||||
// outer.barrierSlaveNode.in.foreach { case (b, e) =>
|
|
||||||
// val fakeBarrierRespId = RegNext(b.req.bits.barrierId)
|
|
||||||
// val fakeBarrierRespValid = RegNext(b.req.fire)
|
|
||||||
// b.req.ready := true.B // barrier module is always ready
|
|
||||||
// b.resp.valid := fakeBarrierRespValid
|
|
||||||
// b.resp.bits.barrierId := fakeBarrierRespId
|
|
||||||
// }
|
|
||||||
|
|
||||||
val allSyncedRegs = Seq.fill(numBarriers)(Wire(UInt(32.W)))
|
|
||||||
val perCoreSyncedRegs = Seq.fill(numBarriers)(Seq.fill(outer.numCores)(RegInit(0.U(32.W))))
|
|
||||||
(allSyncedRegs zip perCoreSyncedRegs).foreach{ case (all, per) =>
|
|
||||||
all := per.reduce((x0, x1) => (x0 =/= 0.U) && (x1 =/= 0.U))
|
|
||||||
|
|
||||||
val allPassed = per.map(_ === 2.U).reduce(_ && _)
|
|
||||||
when(allPassed) {
|
|
||||||
per.foreach(_ := 0.U)
|
|
||||||
}
|
|
||||||
|
|
||||||
dontTouch(all)
|
|
||||||
}
|
|
||||||
// FIXME: 4 cores per cluster hardcoded
|
|
||||||
outer.regNode.regmap(
|
|
||||||
0x00 -> Seq(RegField.r(32, allSyncedRegs(0))),
|
|
||||||
0x04 -> Seq(RegField(32, perCoreSyncedRegs(0)(0))),
|
|
||||||
0x08 -> Seq(RegField(32, perCoreSyncedRegs(0)(1))),
|
|
||||||
0x10 -> Seq(RegField.r(32, allSyncedRegs(1))),
|
|
||||||
0x14 -> Seq(RegField(32, perCoreSyncedRegs(1)(0))),
|
|
||||||
0x18 -> Seq(RegField(32, perCoreSyncedRegs(1)(1))),
|
|
||||||
0x20 -> Seq(RegField.r(32, allSyncedRegs(2))),
|
|
||||||
0x24 -> Seq(RegField(32, perCoreSyncedRegs(2)(0))),
|
|
||||||
0x28 -> Seq(RegField(32, perCoreSyncedRegs(2)(1))),
|
|
||||||
0x30 -> Seq(RegField.r(32, allSyncedRegs(3))),
|
|
||||||
0x34 -> Seq(RegField(32, perCoreSyncedRegs(3)(0))),
|
|
||||||
0x38 -> Seq(RegField(32, perCoreSyncedRegs(3)(1))),
|
|
||||||
)
|
|
||||||
|
|
||||||
// TODO: remove Pipeline dependency of gemmini
|
// TODO: remove Pipeline dependency of gemmini
|
||||||
def makeSmemBanks: Unit = {
|
def makeSmemBanks: Unit = {
|
||||||
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
||||||
|
|||||||
@@ -326,8 +326,9 @@ class RadianceTile private (
|
|||||||
|
|
||||||
// Barrier synchronization node
|
// Barrier synchronization node
|
||||||
// FIXME: hardcoded params
|
// FIXME: hardcoded params
|
||||||
val barrierParams = BarrierParams(barrierIdBits = 2, numCoreBits = 1)
|
val numBarriers = 8
|
||||||
val barrierMasterNode = BarrierMasterNode(barrierParams)
|
def barrierIdBits = log2Ceil(numBarriers)
|
||||||
|
val barrierMasterNode = BarrierMasterNode(barrierIdBits)
|
||||||
|
|
||||||
val base = p(GPUMemory()) match {
|
val base = p(GPUMemory()) match {
|
||||||
case Some(GPUMemParams(baseAddr, _)) => baseAddr
|
case Some(GPUMemParams(baseAddr, _)) => baseAddr
|
||||||
@@ -747,22 +748,6 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
|||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
class ClusterSynchronizer(
|
|
||||||
barrierIdWidth: Int,
|
|
||||||
numCoreWidth: Int,
|
|
||||||
) extends Module {
|
|
||||||
val io = IO(new Bundle {
|
|
||||||
val req = Flipped(Decoupled(new Bundle {
|
|
||||||
val barrierId = UInt(barrierIdWidth.W)
|
|
||||||
val sizeMinusOne = UInt(numCoreWidth.W)
|
|
||||||
val coreId = UInt(numCoreWidth.W)
|
|
||||||
}))
|
|
||||||
val resp = Decoupled(new Bundle {
|
|
||||||
val barrierId = UInt(barrierIdWidth.W)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some @copypaste from CoalescerSourceGen.
|
// Some @copypaste from CoalescerSourceGen.
|
||||||
class VortexTLAdapter(
|
class VortexTLAdapter(
|
||||||
newSourceWidth: Int,
|
newSourceWidth: Int,
|
||||||
|
|||||||
@@ -90,15 +90,15 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
|
|||||||
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||||
|
|
||||||
// FIXME: hardcoded
|
// FIXME: hardcoded
|
||||||
val NB_WIDTH = 2
|
val barrierIdBits = tile.barrierMasterNode.out(0)._2.barrierIdBits
|
||||||
val NC_WIDTH = 1
|
val coreIdBits = tile.barrierMasterNode.out(0)._2.numCoreBits
|
||||||
val gbar_req_valid = Output(Bool())
|
val gbar_req_valid = Output(Bool())
|
||||||
val gbar_req_id = Output(UInt(NB_WIDTH.W))
|
val gbar_req_id = Output(UInt(barrierIdBits.W))
|
||||||
val gbar_req_size_m1 = Output(UInt(NC_WIDTH.W))
|
val gbar_req_size_m1 = Output(UInt(coreIdBits.W))
|
||||||
val gbar_req_core_id = Output(UInt(NC_WIDTH.W))
|
val gbar_req_core_id = Output(UInt(coreIdBits.W))
|
||||||
val gbar_req_ready = Input(Bool())
|
val gbar_req_ready = Input(Bool())
|
||||||
val gbar_rsp_valid = Input(Bool())
|
val gbar_rsp_valid = Input(Bool())
|
||||||
val gbar_rsp_id = Input(UInt(NB_WIDTH.W))
|
val gbar_rsp_id = Input(UInt(barrierIdBits.W))
|
||||||
|
|
||||||
// val fpu = Flipped(new FPUCoreIO())
|
// val fpu = Flipped(new FPUCoreIO())
|
||||||
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
||||||
@@ -116,7 +116,6 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
|||||||
// see VX_csr_data that implements the read logic for CSR_MHARTID/GWID.
|
// see VX_csr_data that implements the read logic for CSR_MHARTID/GWID.
|
||||||
Map(
|
Map(
|
||||||
"CORE_ID" -> tile.tileParams.tileId,
|
"CORE_ID" -> tile.tileParams.tileId,
|
||||||
"CORES_PER_CLUSTER" -> 2, // FIXME: hardcoded
|
|
||||||
// TODO: can we get this as a parameter?
|
// TODO: can we get this as a parameter?
|
||||||
"BOOTROM_HANG100" -> 0x10100,
|
"BOOTROM_HANG100" -> 0x10100,
|
||||||
"NUM_THREADS" -> tile.numLsuLanes
|
"NUM_THREADS" -> tile.numLsuLanes
|
||||||
|
|||||||
Reference in New Issue
Block a user