Do proper barrier param negotiation for numCores

This commit is contained in:
Hansung Kim
2024-03-23 13:48:44 -07:00
parent 54b64aba07
commit 215ac369cb
4 changed files with 38 additions and 42 deletions

View File

@@ -14,12 +14,12 @@ import freechips.rocketchip.diplomacy._
case class EmptyParams() case class EmptyParams()
case class BarrierParams( case class BarrierParams(
barrierIdBits: Int, barrierIdBits: Int,
numCoreBits: Int numCoreBits: Int
) )
class BarrierRequestBits( class BarrierRequestBits(
param: BarrierParams param: BarrierParams
) extends Bundle { ) extends Bundle {
val barrierId = UInt(param.barrierIdBits.W) val barrierId = UInt(param.barrierIdBits.W)
val sizeMinusOne = UInt(param.numCoreBits.W) val sizeMinusOne = UInt(param.numCoreBits.W)
@@ -27,7 +27,7 @@ class BarrierRequestBits(
} }
class BarrierResponseBits( class BarrierResponseBits(
param: BarrierParams param: BarrierParams
) extends Bundle { ) extends Bundle {
val barrierId = UInt(param.barrierIdBits.W) val barrierId = UInt(param.barrierIdBits.W)
} }
@@ -38,28 +38,38 @@ class BarrierBundle(param: BarrierParams) extends Bundle {
} }
// FIXME Separate BarrierEdgeParams from BarrierParams // FIXME Separate BarrierEdgeParams from BarrierParams
object BarrierNodeImp extends SimpleNodeImp[BarrierParams, EmptyParams, BarrierParams, BarrierBundle] { object BarrierNodeImp extends SimpleNodeImp[BarrierParams, BarrierParams, BarrierParams, BarrierBundle] {
def edge(pd: BarrierParams, pu: EmptyParams, p: Parameters, sourceInfo: SourceInfo) = { def edge(pd: BarrierParams, pu: BarrierParams, p: Parameters, sourceInfo: SourceInfo) = {
// barrier parameters flow strictly downward from the master node println(s"==== BarrierNodeImp: barrierIdBits=${pd.barrierIdBits}, numCoreBits=${pu.numCoreBits}")
pd require(pd.barrierIdBits >= 0 && pu.numCoreBits >= 0)
BarrierParams(barrierIdBits = pd.barrierIdBits, numCoreBits = pu.numCoreBits)
} }
def bundle(e: BarrierParams) = new BarrierBundle(e) def bundle(e: BarrierParams) = new BarrierBundle(e)
// FIXME render // FIXME render
def render(e: BarrierParams) = RenderedEdge(colour = "ffffff", label = "X") def render(e: BarrierParams) = RenderedEdge(colour = "ffffff", label = "X")
} }
case class BarrierMasterNode(val srcParams: BarrierParams)(implicit valName: ValName) case class BarrierMasterNode(val barrierIdBits: Int)(implicit valName: ValName)
extends SourceNode(BarrierNodeImp)(Seq(srcParams)) extends SourceNode(BarrierNodeImp)({
case class BarrierSlaveNode(val numEdges: Int)(implicit valName: ValName) require(barrierIdBits >= 0)
extends SinkNode(BarrierNodeImp)(Seq.fill(numEdges)(EmptyParams())) Seq(BarrierParams(barrierIdBits = barrierIdBits, numCoreBits = -1 /* unset */))
})
case class BarrierSlaveNode(val numCores: Int)(implicit valName: ValName)
extends SinkNode(BarrierNodeImp)({
require(numCores > 0)
val numCoreBits = log2Ceil(numCores)
Seq.fill(numCores)(
BarrierParams(barrierIdBits = -1 /* unset */, numCoreBits = numCoreBits)
)
})
// `delay`: number of cycles used to delay the response after all cores are // `delay`: number of cycles used to delay the response after all cores are
// synchronized. This is used for debugging purposes to give some time for the // synchronized. This is used for debugging purposes to give some time for the
// cores to "settle" after the barrier synchronization, e.g. resolve // cores to "settle" after the barrier synchronization, e.g. resolve
// outstanding smem requests. // outstanding smem requests.
class BarrierSynchronizer( class BarrierSynchronizer(
param: BarrierParams, param: BarrierParams,
delay: Option[Int] = None delay: Option[Int] = None
) extends Module { ) extends Module {
val numBarriers = 1 << param.barrierIdBits val numBarriers = 1 << param.barrierIdBits
val numCores = 1 << param.numCoreBits val numCores = 1 << param.numCoreBits
@@ -71,13 +81,15 @@ class BarrierSynchronizer(
}) })
// 2-dimensional table of per-id, per-core "done" signals // 2-dimensional table of per-id, per-core "done" signals
val table = RegInit(VecInit(Seq.fill(numBarriers)(VecInit(Seq.fill(numCores)(false.B))))) val table = RegInit(
VecInit(Seq.fill(numBarriers)(VecInit(Seq.fill(numCores)(false.B))))
)
val done = Seq.fill(numBarriers)(Wire(Bool())) val done = Seq.fill(numBarriers)(Wire(Bool()))
val delayer = delay.map(n => Seq.fill(numBarriers)(Counter(n))) val delayer = delay.map(n => Seq.fill(numBarriers)(Counter(n)))
(table zip done).zipWithIndex.foreach { case ((row, d), i) => (table zip done).zipWithIndex.foreach { case ((row, d), i) =>
d := row.reduce(_ && _) d := row.reduce(_ && _)
delayer.foreach{ dl => when (d) { dl(i).inc() }} delayer.foreach { dl => when(d) { dl(i).inc() } }
dontTouch(d) dontTouch(d)
} }
@@ -86,7 +98,7 @@ class BarrierSynchronizer(
req.ready := true.B req.ready := true.B
when(req.fire) { when(req.fire) {
assert(coreId.U === req.bits.coreId) assert(coreId.U === req.bits.coreId)
// FIXME: don't need coreId to be hardware here // @cleanup: coreId don't need to be hardware
table(req.bits.barrierId)(coreId.U) := true.B table(req.bits.barrierId)(coreId.U) := true.B
} }
} }
@@ -95,7 +107,7 @@ class BarrierSynchronizer(
(doneArbiter.io.in zip done).zipWithIndex.foreach { case ((in, d), i) => (doneArbiter.io.in zip done).zipWithIndex.foreach { case ((in, d), i) =>
val alarm = delayer match { val alarm = delayer match {
case Some(dl) => dl(i).value === (dl(i).n - 1).U case Some(dl) => dl(i).value === (dl(i).n - 1).U
case None => true.B case None => true.B
} }
in.valid := (d && alarm) in.valid := (d && alarm)
in.bits := d in.bits := d

View File

@@ -102,6 +102,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
// cores are configured to have the same barrier id range. While true, might // cores are configured to have the same barrier id range. While true, might
// be better to actually assert this // be better to actually assert this
val barrierParam = outer.barrierSlaveNode.in(0)._2 val barrierParam = outer.barrierSlaveNode.in(0)._2
println(s"======= barrierParam: ${barrierParam}")
val synchronizer = Module(new BarrierSynchronizer(barrierParam)) val synchronizer = Module(new BarrierSynchronizer(barrierParam))
(synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) => (synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) =>
req <> b.req req <> b.req

View File

@@ -329,11 +329,8 @@ class RadianceTile private (
// Barrier synchronization node // Barrier synchronization node
// FIXME: hardcoded params // FIXME: hardcoded params
val numBarriers = 8 val numBarriers = 8
val numCores = 2
def barrierIdBits = log2Ceil(numBarriers) def barrierIdBits = log2Ceil(numBarriers)
def coreIdBits = log2Ceil(numCores) val barrierMasterNode = BarrierMasterNode(barrierIdBits)
val barrierParams = BarrierParams(barrierIdBits = barrierIdBits, numCoreBits = coreIdBits)
val barrierMasterNode = BarrierMasterNode(barrierParams)
val base = p(GPUMemory()) match { val base = p(GPUMemory()) match {
case Some(GPUMemParams(baseAddr, _)) => baseAddr case Some(GPUMemParams(baseAddr, _)) => baseAddr
@@ -786,22 +783,6 @@ class RadianceTileModuleImp(outer: RadianceTile)
// } // }
} }
class ClusterSynchronizer(
barrierIdWidth: Int,
numCoreWidth: Int,
) extends Module {
val io = IO(new Bundle {
val req = Flipped(Decoupled(new Bundle {
val barrierId = UInt(barrierIdWidth.W)
val sizeMinusOne = UInt(numCoreWidth.W)
val coreId = UInt(numCoreWidth.W)
}))
val resp = Decoupled(new Bundle {
val barrierId = UInt(barrierIdWidth.W)
})
})
}
// Some @copypaste from CoalescerSourceGen. // Some @copypaste from CoalescerSourceGen.
class VortexTLAdapter( class VortexTLAdapter(
newSourceWidth: Int, newSourceWidth: Int,

View File

@@ -90,13 +90,15 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W)) val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
// FIXME: hardcoded // FIXME: hardcoded
val barrierIdBits = tile.barrierMasterNode.out(0)._2.barrierIdBits
val coreIdBits = tile.barrierMasterNode.out(0)._2.numCoreBits
val gbar_req_valid = Output(Bool()) val gbar_req_valid = Output(Bool())
val gbar_req_id = Output(UInt(tile.barrierIdBits.W)) val gbar_req_id = Output(UInt(barrierIdBits.W))
val gbar_req_size_m1 = Output(UInt(tile.coreIdBits.W)) val gbar_req_size_m1 = Output(UInt(coreIdBits.W))
val gbar_req_core_id = Output(UInt(tile.coreIdBits.W)) val gbar_req_core_id = Output(UInt(coreIdBits.W))
val gbar_req_ready = Input(Bool()) val gbar_req_ready = Input(Bool())
val gbar_rsp_valid = Input(Bool()) val gbar_rsp_valid = Input(Bool())
val gbar_rsp_id = Input(UInt(tile.barrierIdBits.W)) val gbar_rsp_id = Input(UInt(barrierIdBits.W))
// val fpu = Flipped(new FPUCoreIO()) // val fpu = Flipped(new FPUCoreIO())
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs)) //val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))