diff --git a/src/main/scala/radiance/tile/Barrier.scala b/src/main/scala/radiance/tile/Barrier.scala index e3e965b..25adafe 100644 --- a/src/main/scala/radiance/tile/Barrier.scala +++ b/src/main/scala/radiance/tile/Barrier.scala @@ -53,10 +53,17 @@ case class BarrierMasterNode(val srcParams: BarrierParams)(implicit valName: Val case class BarrierSlaveNode(val numEdges: Int)(implicit valName: ValName) extends SinkNode(BarrierNodeImp)(Seq.fill(numEdges)(EmptyParams())) -class BarrierSynchronizer(param: BarrierParams) extends Module { - val numBarrierIds = 1 << param.barrierIdBits +// `delay`: number of cycles used to delay the response after all cores are +// synchronized. This is used for debugging purposes to give some time for the +// cores to "settle" after the barrier synchronization, e.g. resolve +// outstanding smem requests. +class BarrierSynchronizer( + param: BarrierParams, + delay: Option[Int] = None +) extends Module { + val numBarriers = 1 << param.barrierIdBits val numCores = 1 << param.numCoreBits - println(s"numBarrierIds: ${numBarrierIds}, numCores: ${numCores}") + println(s"numBarriers: ${numBarriers}, numCores: ${numCores}") val io = IO(new Bundle { val reqs = Vec(numCores, Flipped(Decoupled(new BarrierRequestBits(param)))) @@ -64,12 +71,15 @@ class BarrierSynchronizer(param: BarrierParams) extends Module { }) // 2-dimensional table of per-id, per-core "done" signals - val table = RegInit(VecInit(Seq.fill(numBarrierIds)(VecInit(Seq.fill(numCores)(false.B))))) - val done = Wire(Vec(numBarrierIds, Bool())) - table.zipWithIndex.foreach { case (row, i) => - done(i) := row.reduce(_ && _) + val table = RegInit(VecInit(Seq.fill(numBarriers)(VecInit(Seq.fill(numCores)(false.B))))) + val done = Seq.fill(numBarriers)(Wire(Bool())) + val delayer = delay.map(n => Seq.fill(numBarriers)(Counter(n))) + + (table zip done).zipWithIndex.foreach { case ((row, d), i) => + d := row.reduce(_ && _) + delayer.foreach{ dl => when (d) { dl(i).inc() }} + dontTouch(d) } - dontTouch(done) io.reqs.zipWithIndex.foreach { case (req, coreId) => // always ready; all this module does is latch to boolean regs @@ -81,18 +91,20 @@ class BarrierSynchronizer(param: BarrierParams) extends Module { } } - val doneArbiter = Module(new RRArbiter(Bool(), numBarrierIds)) + val doneArbiter = Module(new RRArbiter(Bool(), numBarriers)) (doneArbiter.io.in zip done).zipWithIndex.foreach { case ((in, d), i) => - in.valid := d + val alarm = delayer match { + case Some(dl) => dl(i).value === (dl(i).n - 1).U + case None => true.B + } + in.valid := (d && alarm) in.bits := d when(in.fire) { table(i).foreach(_ := false.B) + delayer.foreach(_(i).reset()) } } io.resp.valid := doneArbiter.io.out.valid io.resp.bits.barrierId := doneArbiter.io.chosen - when(io.resp.fire) { - table(io.resp.bits.barrierId).foreach(_ := false.B) - } doneArbiter.io.out.ready := io.resp.ready }