From a0c15b2cc3d586fae1d8ee4a5fe5d85bf4d1dbe1 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 8 Nov 2023 20:20:17 -0800 Subject: [PATCH] Use separate {imem,dmem}SourceWidth to fix deadlock imemSourceWidth cannot be larger than the ibuffer size. --- src/main/scala/tile/VortexTile.scala | 32 +++++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index d8d406b..c7f199e 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -155,12 +155,27 @@ class VortexTile private ( case Some(simtParam) => simtParam.nLanes case None => 4 } - val sourceWidth = p(SIMTCoreKey) match { + + // CAUTION: imemSourceWidth is dependent on the ibuffer size. We have to + // make sure (1 << imemSourceWidth) is smaller than the per-warp ibuffer + // size; otherwise, more requests than what ibuffer can accommodate can fire, + // and responses might stall in the downstream. This migth cause issues when + // there are also outstanding dmem responses that might get blocked from + // going back to the core by a previous imem response due to serialization at + // the narrow tile<->sbus port, leading to a deadlock. + // + // This condition should ideally be asserted at elaboration time, but since + // ibuffer size is set as a hardcoded macro IBUF_SIZE that's uncontrollable + // from Chisel, there's no easy solution. We at least don't expose this as a + // Parameter and leave as a hardcoded value here. + val imemSourceWidth = 1 // 1 << 2 == IBUF_SIZE = 4 + + val dmemSourceWidth = p(SIMTCoreKey) match { // TODO: respect coalescer newSrcIds case Some(simtParam) => log2Ceil(simtParam.nSrcIds) case None => 4 } - require(sourceWidth >= 4, + require(dmemSourceWidth >= 4, "Allocating a small number of sourceIds may cause correctness bug inside " + "Vortex core due to unconstrained synchronization issues between warps." + "We recommend setting nSrcIds to at least 16.") @@ -168,7 +183,7 @@ class VortexTile private ( val imemNodes = Seq.tabulate(1) { i => TLClientNode(Seq(TLMasterPortParameters.v1( clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << sourceWidth), + sourceId = IdRange(0, 1 << imemSourceWidth), name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", requestFifo = true, supportsProbe = @@ -181,7 +196,7 @@ class VortexTile private ( val dmemNodes = Seq.tabulate(numLanes) { i => TLClientNode(Seq(TLMasterPortParameters.v1( clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << sourceWidth), + sourceId = IdRange(0, 1 << dmemSourceWidth), name = s"Vortex Core ${vortexParams.hartId} D-Mem Lane $i", requestFifo = true, supportsProbe = @@ -207,7 +222,8 @@ class VortexTile private ( val memNode = TLClientNode(Seq(TLMasterPortParameters.v1( clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << sourceWidth), + // FIXME: need to also respect imemSourceWidth + sourceId = IdRange(0, 1 << dmemSourceWidth), name = s"Vortex Core ${vortexParams.hartId} Mem Interface", requestFifo = true, supportsProbe = TransferSizes(16, 16), // FIXME: hardcoded @@ -397,7 +413,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { println(s"width of d channel data ${core.io.mem.get.d.bits.data.getWidth}") val memTLAdapter = Module(new VortexTLAdapter( - outer.sourceWidth, + outer.dmemSourceWidth, chiselTypeOf(core.io.mem.get.a.bits), chiselTypeOf(core.io.mem.get.d.bits), outer.memNode.out.head @@ -410,7 +426,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d } else { val imemTLAdapter = Module(new VortexTLAdapter( - outer.sourceWidth, + outer.imemSourceWidth, chiselTypeOf(core.io.imem.get(0).a.bits), chiselTypeOf(core.io.imem.get(0).d.bits), outer.imemNodes.head.out.head @@ -426,7 +442,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ => Module(new VortexTLAdapter( - outer.sourceWidth, + outer.dmemSourceWidth, chiselTypeOf(core.io.dmem.get(0).a.bits), chiselTypeOf(core.io.dmem.get(0).d.bits), outer.dmemNodes(0).out.head