From d70cbc8e587497f55869fd8f7a6d14204a2efdb8 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 25 Oct 2023 19:48:21 -0700 Subject: [PATCH] Do matchingSources filtering using Vortex tag instead of TL source Since we do source generation independently for each lane, if we use TL source for filtering, it becomes possible that lane 0's source happens to match lane 1/2/3's source even when they don't belong to the same warp. Since Vortex uses dcache req ID that is unique across instructions, using that for filtering prevents this bug. A better solution would be to do source generation for all lanes at a time though. --- src/main/scala/tile/VortexTile.scala | 60 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index c2a8b31..0639ffd 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -397,29 +397,9 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // we need to selectively fire responses that have the same source, and // delay others. Below is the logic that implements this. - // choose one source out of the arriving per-lane TL D channels - val arb = Module( - new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, outer.numLanes) - ) - arb.io.out.ready := true.B - - val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) - (arb.io.in zip dmemTLBundles).foreach { case (arbIn, tlBundle) => - arbIn.valid := tlBundle.d.valid - arbIn.bits := tlBundle.d.bits.source - } - val matchingSources = Wire(UInt(outer.numLanes.W)) - matchingSources := dmemTLBundles - .map(b => - // If there is no valid response pending across all lanes, - // matchingSources should not filter out upstream ready signals, so - // set it to all-1 - !arb.io.out.valid || (b.d.bits.source === arb.io.out.bits)) - .asUInt - - // connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes // @perf: this would duplicate SourceGenerator table for every lane and eat // up some area + val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ => Module(new VortexTLAdapter( outer.sourceWidth, @@ -430,21 +410,43 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { outer.dmemNodes(0).out.head._2 )) } + + // choose one source out of the arriving per-lane TL D channels + val arb = Module( + new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, outer.numLanes) + ) + arb.io.out.ready := true.B + val dmemBundles = dmemTLAdapters.map(_.io.inResp) + (arb.io.in zip dmemBundles).foreach { case (arbIn, vxDmem) => + arbIn.valid := vxDmem.valid + arbIn.bits := vxDmem.bits.source + } + val matchingSources = Wire(UInt(outer.numLanes.W)) + matchingSources := dmemBundles + .map(b => + // If there is no valid response pending across all lanes, + // matchingSources should not filter out upstream ready signals, so + // set it to all-1 + !arb.io.out.valid || (b.bits.source === arb.io.out.bits)) + .asUInt + + // make connection: + // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes (core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) => tlAdapter.io.inReq <> coreMem.a coreMem.d <> tlAdapter.io.inResp } - (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlBundle) => - tlBundle.a <> tlAdapter.io.outReq - } // using the chosen source id, // - lie to core that response is not valid if source doesn't match picked // - lie to downstream that core is not ready if source doesn't match picked - (dmemTLAdapters zip dmemTLBundles).zipWithIndex.foreach { - case ((tlAdapter, tlBundle), i) => - tlAdapter.io.outResp.bits := tlBundle.d.bits - tlAdapter.io.outResp.valid := tlBundle.d.valid && matchingSources(i) - tlBundle.d.ready := tlAdapter.io.outResp.ready && matchingSources(i) + (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach { + case ((coreMem, tlAdapter), i) => + coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i) + tlAdapter.io.inResp.ready := coreMem.d.ready && matchingSources(i) + } + (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => + tlOut.a <> tlAdapter.io.outReq + tlAdapter.io.outResp <> tlOut.d } outer.dmemAggregateNode.out.foreach { bo =>