From ff4fc66c56de357e0ac5411a30742cebab1680d8 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 15 Oct 2023 01:17:20 -0700 Subject: [PATCH 01/15] Reformat --- src/main/scala/rocket/VortexCore.scala | 2 - src/main/scala/tile/VortexTile.scala | 221 ++++++++++++++++--------- 2 files changed, 142 insertions(+), 81 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 53bd707..5ce05b1 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -8,8 +8,6 @@ import chisel3.util._ import chisel3.experimental._ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tile._ -import freechips.rocketchip.util._ -import freechips.rocketchip.scie._ import tile.VortexTile class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle { diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 1a34a3f..6c900fd 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -18,8 +18,6 @@ import freechips.rocketchip.regmapper.RegField import freechips.rocketchip.tile._ import rocket.Vortex -import scala.collection.mutable.ListBuffer - case class RocketTileBoundaryBufferParams(force: Boolean = false) case class VortexTileParams( @@ -35,26 +33,31 @@ case class VortexTileParams( blockerCtrlAddr: Option[BigInt] = None, clockSinkParams: ClockSinkParameters = ClockSinkParameters(), boundaryBuffers: Option[RocketTileBoundaryBufferParams] = None - ) extends InstantiableTileParams[VortexTile] { +) extends InstantiableTileParams[VortexTile] { // require(icache.isDefined) - // require(dcache.isDefined) + // require(dcache.isDefined) - def instantiate(crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters): VortexTile = { + def instantiate(crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)( + implicit p: Parameters + ): VortexTile = { new VortexTile(this, crossing, lookup) } } -class VortexTile private( - val vortexParams: VortexTileParams, - crossing: ClockCrossingType, - lookup: LookupByHartIdImpl, - q: Parameters) - extends BaseTile(vortexParams, crossing, lookup, q) +class VortexTile private ( + val vortexParams: VortexTileParams, + crossing: ClockCrossingType, + lookup: LookupByHartIdImpl, + q: Parameters +) extends BaseTile(vortexParams, crossing, lookup, q) with SinksExternalInterrupts - with SourcesExternalNotifications -{ + with SourcesExternalNotifications { // Private constructor ensures altered LazyModule.p is used implicitly - def this(params: VortexTileParams, crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters) = + def this( + params: VortexTileParams, + crossing: TileCrossingParamsLike, + lookup: LookupByHartIdImpl + )(implicit p: Parameters) = this(params, crossing.crossingType, lookup, p) val intOutwardNode = IntIdentityNode() @@ -87,40 +90,66 @@ class VortexTile private( beatBytes = lazyCoreParamsView.coreDataBytes, minLatency = 1)))*/ - val imemNodes = Seq.tabulate(1) { i => TLClientNode(Seq(TLMasterPortParameters.v1( - clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO magic number - name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", - requestFifo = true, - supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes) - )) - )))} + val imemNodes = Seq.tabulate(1) { i => + TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << 10), // TODO magic number + name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", + requestFifo = true, + supportsProbe = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes) + ) + ) + ) + ) + ) + } - val dmemNodes = Seq.tabulate(4) { i => TLClientNode(Seq(TLMasterPortParameters.v1( - clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO magic number - name = s"Vortex Core ${vortexParams.hartId} D-Mem Lane $i", - requestFifo = true, - supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsPutFull = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsPutPartial = TransferSizes(1, lazyCoreParamsView.coreDataBytes) - )) - )))} + val dmemNodes = Seq.tabulate(4) { i => + TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << 10), // TODO magic number + name = s"Vortex Core ${vortexParams.hartId} D-Mem Lane $i", + requestFifo = true, + supportsProbe = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsPutFull = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsPutPartial = + TransferSizes(1, lazyCoreParamsView.coreDataBytes) + ) + ) + ) + ) + ) + } + + val memNode = TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << 15), // TODO magic numbers + name = s"Vortex Core ${vortexParams.hartId} Mem Interface", + requestFifo = true, + supportsProbe = TransferSizes(16, 16), + supportsGet = TransferSizes(16, 16), + supportsPutFull = TransferSizes(16, 16), + supportsPutPartial = TransferSizes(16, 16) + ) + ) + ) + ) + ) - val memNode = TLClientNode(Seq(TLMasterPortParameters.v1( - clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 15), // TODO magic numbers - name = s"Vortex Core ${vortexParams.hartId} Mem Interface", - requestFifo = true, - supportsProbe = TransferSizes(16, 16), - supportsGet = TransferSizes(16, 16), - supportsPutFull = TransferSizes(16, 16), - supportsPutPartial = TransferSizes(16, 16) - )), - ))) - if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode } else { @@ -131,7 +160,8 @@ class VortexTile private( /* below are copied from rocket */ val bus_error_unit = vortexParams.beuAddr map { a => - val beu = LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) + val beu = + LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) intOutwardNode := beu.intNode connectTLSlave(beu.node, xBytes) beu @@ -139,31 +169,42 @@ class VortexTile private( val tile_master_blocker = tileParams.blockerCtrlAddr - .map(BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true)) + .map( + BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true) + ) .map(bp => LazyModule(new BasicBusBlocker(bp))) tile_master_blocker.foreach(lm => connectTLSlave(lm.controlNode, xBytes)) // TODO: this doesn't block other masters, e.g. RoCCs - tlOtherMastersNode := tile_master_blocker.map { _.node := tlMasterXbar.node } getOrElse { tlMasterXbar.node } + tlOtherMastersNode := tile_master_blocker.map { + _.node := tlMasterXbar.node + } getOrElse { tlMasterXbar.node } masterNode :=* tlOtherMastersNode DisableMonitors { implicit p => tlSlaveXbar.node :*= slaveNode } - val dtimProperty = Nil //Seq(dmemDevice.asProperty).flatMap(p => Map("sifive,dtim" -> p)) + val dtimProperty = + Nil // Seq(dmemDevice.asProperty).flatMap(p => Map("sifive,dtim" -> p)) - val itimProperty = Nil //frontend.icache.itimProperty.toSeq.flatMap(p => Map("sifive,itim" -> p)) + val itimProperty = + Nil // frontend.icache.itimProperty.toSeq.flatMap(p => Map("sifive,itim" -> p)) - val beuProperty = bus_error_unit.map(d => Map( - "sifive,buserror" -> d.device.asProperty)).getOrElse(Nil) + val beuProperty = bus_error_unit + .map(d => Map("sifive,buserror" -> d.device.asProperty)) + .getOrElse(Nil) - val cpuDevice: SimpleDevice = new SimpleDevice("cpu", Seq("sifive,vortex0", "riscv")) { - override def parent = Some(ResourceAnchors.cpus) - override def describe(resources: ResourceBindings): Description = { - val Description(name, mapping) = super.describe(resources) - Description(name, mapping ++ cpuProperties ++ nextLevelCacheProperty - ++ tileProperties ++ dtimProperty ++ itimProperty ++ beuProperty) + val cpuDevice: SimpleDevice = + new SimpleDevice("cpu", Seq("sifive,vortex0", "riscv")) { + override def parent = Some(ResourceAnchors.cpus) + override def describe(resources: ResourceBindings): Description = { + val Description(name, mapping) = super.describe(resources) + Description( + name, + mapping ++ cpuProperties ++ nextLevelCacheProperty + ++ tileProperties ++ dtimProperty ++ itimProperty ++ beuProperty + ) + } } - } ResourceBinding { Resource(cpuDevice, "reg").bind(ResourceAddress(staticIdForMetadataUseOnly)) @@ -171,15 +212,33 @@ class VortexTile private( override lazy val module = new VortexTileModuleImp(this) - override def makeMasterBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { - case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() - case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.none, BufferParams.flow, BufferParams.none, BufferParams.flow, BufferParams(1)) + override def makeMasterBoundaryBuffers( + crossing: ClockCrossingType + )(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { + case (Some(RocketTileBoundaryBufferParams(true)), _) => TLBuffer() + case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => + TLBuffer( + BufferParams.none, + BufferParams.flow, + BufferParams.none, + BufferParams.flow, + BufferParams(1) + ) case _ => TLBuffer(BufferParams.none) } - override def makeSlaveBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { - case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() - case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.flow, BufferParams.none, BufferParams.none, BufferParams.none, BufferParams.none) + override def makeSlaveBoundaryBuffers( + crossing: ClockCrossingType + )(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { + case (Some(RocketTileBoundaryBufferParams(true)), _) => TLBuffer() + case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => + TLBuffer( + BufferParams.flow, + BufferParams.none, + BufferParams.none, + BufferParams.none, + BufferParams.none + ) case _ => TLBuffer(BufferParams.none) } } @@ -188,7 +247,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { Annotated.params(this, outer.vortexParams) val core = Module(new Vortex(outer)(outer.p)) - + core.io.clock := clock core.io.reset := reset @@ -200,8 +259,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { ) // Report when the tile has ceased to retire instructions; for now the only cause is clock gating - outer.reportCease(outer.vortexParams.core.clockGate.option( - core.io.cease)) + outer.reportCease(outer.vortexParams.core.clockGate.option(core.io.cease)) outer.reportWFI(Some(core.io.wfi)) @@ -228,8 +286,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { println(s"width of d channel data ${core.io.mem.get.d.bits.data.getWidth}") core.io.mem.get.a <> outer.memNode.out.head._1.a core.io.mem.get.d <> outer.memNode.out.head._1.d - } - else { + } else { (core.io.imem.get zip outer.imemNodes).foreach { case (coreMem, tileNode) => coreMem.d <> tileNode.out.head._1.d coreMem.a <> tileNode.out.head._1.a @@ -239,7 +296,9 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // - lie to core that response is not valid if source doesn't match picked // - lie to downstream that core is not ready if source doesn't match picked - val arb = Module(new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, 4)) + val arb = Module( + new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, 4) + ) val matchingSources = Wire(UInt(4.W)) val dmemDs = outer.dmemNodes.map(_.out.head._1.d) @@ -247,13 +306,16 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { arbIn.valid := tileNode.valid arbIn.bits := tileNode.bits.source } - matchingSources := dmemDs.map(d => (d.bits.source === arb.io.out.bits) && arb.io.out.valid).asUInt + matchingSources := dmemDs + .map(d => (d.bits.source === arb.io.out.bits) && arb.io.out.valid) + .asUInt arb.io.out.ready := true.B - (core.io.dmem.get zip dmemDs).zipWithIndex.foreach { case ((coreMem, tileNode), i) => - coreMem.d.bits := tileNode.bits - coreMem.d.valid := tileNode.valid && matchingSources(i) - tileNode.ready := coreMem.d.ready && matchingSources(i) + (core.io.dmem.get zip dmemDs).zipWithIndex.foreach { + case ((coreMem, tileNode), i) => + coreMem.d.bits := tileNode.bits + coreMem.d.valid := tileNode.valid && matchingSources(i) + tileNode.ready := coreMem.d.ready && matchingSources(i) } (core.io.dmem.get zip outer.dmemNodes).foreach { case (coreMem, tileNode) => @@ -265,7 +327,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // TODO eliminate this redundancy // val h = dcachePorts.size - //val c = core.dcacheArbPorts + // val c = core.dcacheArbPorts // val o = outer.nDCachePorts // require(h == c, s"port list size was $h, core expected $c") // require(h == o, s"port list size was $h, outer counted $o") @@ -275,5 +337,6 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // FIXME: unsure this is necessary trait HasFpuOpt { this: RocketTileModuleImp => - val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p))) + val fpuOpt = + outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p))) } From cbd32b78a9e029c2f249ead17e43e454b2eeed9d Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 15 Oct 2023 22:56:07 -0700 Subject: [PATCH 02/15] add metadata field in SourceGenerator table This enables using SourceGenerator as a sourceId converter/restorer. --- src/main/scala/tilelink/Coalescing.scala | 48 +++++++++++++++++------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 1b24c20..6447b24 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -254,41 +254,59 @@ case class CoalescedResponse(config: CoalescerConfig) dataWidth = (8 * (1 << config.maxCoalLogSize)) ) -// If `ignoreInUse`, just keep giving out new IDs without checking if it is in -// use. -class SourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) +// `metadata` is an extra field in the sourceId table that can be used for +// storing e.g. the UUID originally attached to a request. This is useful for +// using this module as a source ID converter / compressor. If `None`, this +// field is not instantiated. +// TODO: implement lookup logic. +// +// If `ignoreInUse`, just keep giving out new IDs without any collision checking. +// This might result in TL violation. +class SourceGenerator[T <: Data]( + sourceWidth: Int, + metadata: Option[T] = None, + ignoreInUse: Boolean = true +) extends Module { val io = IO(new Bundle { val gen = Input(Bool()) val reclaim = Input(Valid(UInt(sourceWidth.W))) val id = Output(Valid(UInt(sourceWidth.W))) + // for debugging; indicates whether there is at least one inflight request + // that hasn't been reclaimed yet val inflight = Output(Bool()) }) val head = RegInit(UInt(sourceWidth.W), 0.U) head := Mux(io.gen, head + 1.U, head) - // for debugging - // also for indicating if there is at least one inflight request that hasn't been reclaimed val outstanding = RegInit(UInt((sourceWidth + 1).W), 0.U) io.inflight := (outstanding > 0.U) || io.gen val numSourceId = 1 << sourceWidth - // true: in use, false: available - val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W))) - when(reset.asBool) { - (0 until numSourceId).foreach { occupancyTable(_).valid := false.B } + val row = new Bundle { + val meta = metadata match { + case Some(gen) => gen.cloneType + case None => UInt(0.W) + } + val id = Valid(UInt(sourceWidth.W)) } - val frees = (0 until numSourceId).map(!occupancyTable(_).valid) + // valid: in use, invalid: available + // val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W))) + val occupancyTable = Mem(numSourceId, row) + when(reset.asBool) { + (0 until numSourceId).foreach { occupancyTable(_).id.valid := false.B } + } + val frees = (0 until numSourceId).map(!occupancyTable(_).id.valid) val lowestFree = PriorityEncoder(frees) val lowestFreeRow = occupancyTable(lowestFree) - io.id.valid := (if (ignoreInUse) true.B else !lowestFreeRow.valid) + io.id.valid := (if (ignoreInUse) true.B else !lowestFreeRow.id.valid) io.id.bits := lowestFree when(io.gen && io.id.valid /* fire */ ) { - occupancyTable(io.id.bits).valid := true.B // mark in use + occupancyTable(io.id.bits).id.valid := true.B // mark in use } when(io.reclaim.valid) { - occupancyTable(io.reclaim.bits).valid := false.B // mark freed + occupancyTable(io.reclaim.bits).id.valid := false.B // mark freed } when(io.gen && io.id.valid) { @@ -738,6 +756,9 @@ class MultiCoalescer( if (!config.enable) disable } +// This module mostly handles the correct ready/valid handshake depending on +// sourceId availability. Actual generation logic is done by the +// SourceGenerator module. class CoalescerSourceGen( config: CoalescerConfig, coalReqT: CoalescedRequest, @@ -758,6 +779,7 @@ class CoalescerSourceGen( // TODO: make sourceGen.io.reclaim Decoupled? io.outReq <> io.inReq + // "man-in-the-middle" io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid // overwrite bits affected by sourcegen backpressure io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid From 78012800e7783ff146ec4e00b6cef3b46d03c632 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 15 Oct 2023 23:17:01 -0700 Subject: [PATCH 03/15] Clarify confusing in/outResp naming in SourceGenerator --- src/main/scala/tilelink/Coalescing.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 6447b24..06f6abf 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -765,25 +765,30 @@ class CoalescerSourceGen( respT: TLBundleD ) extends Module { val io = IO(new Bundle { + // in/out means upstream/downstream val inReq = Flipped(Decoupled(coalReqT.cloneType)) val outReq = Decoupled(coalReqT.cloneType) - val inResp = Flipped(Decoupled(respT.cloneType)) + // no need for inResp, since CoalShiftQueue/Mono/MultiCoalescer only generates + // requests and do not take in responses. Coalesced responses are + // separately taken in by InflightTable. + val outResp = Flipped(Decoupled(respT.cloneType)) }) val sourceGen = Module( new SourceGenerator(log2Ceil(config.numNewSrcIds), ignoreInUse = false) ) sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created - sourceGen.io.reclaim.valid := io.inResp.fire - sourceGen.io.reclaim.bits := io.inResp.bits.source - io.inResp.ready := true.B // should be always ready to reclaim old ID + sourceGen.io.reclaim.valid := io.outResp.fire + sourceGen.io.reclaim.bits := io.outResp.bits.source // TODO: make sourceGen.io.reclaim Decoupled? + // passthrough logic io.outReq <> io.inReq // "man-in-the-middle" io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid // overwrite bits affected by sourcegen backpressure io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid io.outReq.bits.source := sourceGen.io.id.bits + io.outResp.ready := true.B // should be always ready to reclaim old ID } class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) @@ -902,7 +907,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // val coalSourceGen = Module(new CoalescerSourceGen(config, coalReqT, tlCoal.d.bits)) coalSourceGen.io.inReq <> coalescer.io.coalReq - coalSourceGen.io.inResp <> tlCoal.d + coalSourceGen.io.outResp <> tlCoal.d // InflightTable IO // From 5b356b735caf5e910df7ab752939ea4baccd463c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 15 Oct 2023 23:24:32 -0700 Subject: [PATCH 04/15] Fix unused warning in Coalescing --- src/main/scala/tilelink/Coalescing.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 06f6abf..6ee71e9 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -2230,7 +2230,7 @@ class CoalescerXbarImpl(outer: CoalescerXbar, // For the uncoalesced data response (outer.nonCoalNarrowNodes zip io.nonCoalResps).foreach{ case(node,resp) => - val (tlOut, edgeOut) = node.out(0) + val (tlOut, _) = node.out(0) val nonCoalResp = Wire(respNonCoalEntryT) nonCoalResp.fromTLD(tlOut.d.bits) tlOut.d.ready := resp.ready @@ -2246,7 +2246,7 @@ class CoalescerXbarImpl(outer: CoalescerXbar, ) outer.coalReqNodes.zipWithIndex.foreach{ case(node, idx) => - val (tlOut, edgeOut) = node.out(0) + val (tlOut, _) = node.out(0) coalRespRRArbiter.io.in(idx) <> tlOut.d } //Connect output of arbiter to coalesced reponse output From c34853447b01ccda0a44370419b88c0c27025bdd Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 01:11:50 -0700 Subject: [PATCH 05/15] Implement metadata retrieval in SourceGenerator --- src/main/scala/tilelink/Coalescing.scala | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 6ee71e9..bdc6edb 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -265,9 +265,8 @@ case class CoalescedResponse(config: CoalescerConfig) class SourceGenerator[T <: Data]( sourceWidth: Int, metadata: Option[T] = None, - ignoreInUse: Boolean = true -) - extends Module { + ignoreInUse: Boolean = false +) extends Module { val io = IO(new Bundle { val gen = Input(Bool()) val reclaim = Input(Valid(UInt(sourceWidth.W))) @@ -275,6 +274,14 @@ class SourceGenerator[T <: Data]( // for debugging; indicates whether there is at least one inflight request // that hasn't been reclaimed yet val inflight = Output(Bool()) + // below are used when metadata is not None + // `peek` is the retrieved metadata saved for the request when corresponding + // request has come back (and hence `reclaim` was set). + // Although these do not use ValidIO, it is safe because any in-flight + // response coming back should have allocated a valid entry in the table + // when it went out. + val meta = Input(metadata.getOrElse(UInt(0.W))) + val peek = Output(metadata.getOrElse(UInt(0.W))) }) val head = RegInit(UInt(sourceWidth.W), 0.U) head := Mux(io.gen, head + 1.U, head) @@ -304,8 +311,12 @@ class SourceGenerator[T <: Data]( io.id.bits := lowestFree when(io.gen && io.id.valid /* fire */ ) { occupancyTable(io.id.bits).id.valid := true.B // mark in use + if (metadata.isDefined) { + occupancyTable(io.id.bits).meta := io.meta + } } when(io.reclaim.valid) { + // @perf: would this require multiple write ports? occupancyTable(io.reclaim.bits).id.valid := false.B // mark freed } @@ -319,6 +330,10 @@ class SourceGenerator[T <: Data]( outstanding := outstanding - 1.U } + if (metadata.isDefined) { + io.peek := occupancyTable(io.reclaim.bits).meta + } + dontTouch(outstanding) } From 630d76461c3ac32f229c501c3a92a797119fee3e Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 01:12:33 -0700 Subject: [PATCH 06/15] Do proper TL sourceId allocation for Vortex dmem requests This fixes sourceId collision that occurs when naively re-using tag bit of a Vortex dmem request as TL source, which happens because Vortex core does not allocate a new LSU entry for writes. `VortexSourceGen` module acts as a Vortax tag <-> new TL source ID converter, where it allocates a new ID for every new Vortex request, and restores its original tag bits from the metadata embedded in the SourceGenerator module. TODO: - Decouple sourceWidth of downstream TL nodes from Vortex's tag bit width; they are set to be the same for convenience as of now - Apply this to imem requests as well --- src/main/scala/tile/VortexTile.scala | 116 ++++++++++++++++++++------- 1 file changed, 88 insertions(+), 28 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 6c900fd..3b81e77 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -4,7 +4,7 @@ package tile import chisel3._ -import chisel3.util.RRArbiter +import chisel3.util._ import org.chipsalliance.cde.config._ import freechips.rocketchip.devices.tilelink._ import freechips.rocketchip.diplomacy._ @@ -90,6 +90,8 @@ class VortexTile private ( beatBytes = lazyCoreParamsView.coreDataBytes, minLatency = 1)))*/ + val numLanes = 4 // FIXME: hardcoded + val imemNodes = Seq.tabulate(1) { i => TLClientNode( Seq( @@ -109,7 +111,7 @@ class VortexTile private ( ) } - val dmemNodes = Seq.tabulate(4) { i => + val dmemNodes = Seq.tabulate(numLanes) { i => TLClientNode( Seq( TLMasterPortParameters.v1( @@ -289,38 +291,57 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { } else { (core.io.imem.get zip outer.imemNodes).foreach { case (coreMem, tileNode) => coreMem.d <> tileNode.out.head._1.d - coreMem.a <> tileNode.out.head._1.a + tileNode.out.head._1.a <> coreMem.a } - // pick source id and: + // Since the individual per-lane TL requests might come back out-of-sync between + // the lanes, but Vortex core expects the lane requests to be synced, + // we need to selectively fire responses that have the same source, and + // delay others. Below is the logic that implements this. + + // choose one source out of the arriving per-lane TL D channels + val arb = Module( + new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, outer.numLanes) + ) + val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) + arb.io.out.ready := true.B + (arb.io.in zip dmemTLBundles).foreach { case (arbIn, tlBundle) => + arbIn.valid := tlBundle.d.valid + arbIn.bits := tlBundle.d.bits.source + } + val matchingSources = Wire(UInt(outer.numLanes.W)) + matchingSources := dmemTLBundles + .map(b => (b.d.bits.source === arb.io.out.bits) && arb.io.out.valid) + .asUInt + + // connection: VortexBundle <--> sourceGen <--> dmemNodes + val sourceGens = Seq.tabulate(outer.numLanes) { _ => + Module(new VortexSourceGen( + 2, // FIXME: hardcoded + dmemTLBundles.head.a.bits, + dmemTLBundles.head.d.bits, + )) + } + (core.io.dmem.get zip sourceGens) foreach { case (coreMem, sourceGen) => + sourceGen.io.inReq <> coreMem.a + coreMem.d <> sourceGen.io.inResp + } + (sourceGens zip dmemTLBundles) foreach { case (sourceGen, tlBundle) => + tlBundle.a <> sourceGen.io.outReq + } + // using the chosen source id, // - lie to core that response is not valid if source doesn't match picked // - lie to downstream that core is not ready if source doesn't match picked - - val arb = Module( - new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, 4) - ) - val matchingSources = Wire(UInt(4.W)) - val dmemDs = outer.dmemNodes.map(_.out.head._1.d) - - (arb.io.in zip dmemDs).zipWithIndex.foreach { case ((arbIn, tileNode), i) => - arbIn.valid := tileNode.valid - arbIn.bits := tileNode.bits.source - } - matchingSources := dmemDs - .map(d => (d.bits.source === arb.io.out.bits) && arb.io.out.valid) - .asUInt - arb.io.out.ready := true.B - - (core.io.dmem.get zip dmemDs).zipWithIndex.foreach { - case ((coreMem, tileNode), i) => - coreMem.d.bits := tileNode.bits - coreMem.d.valid := tileNode.valid && matchingSources(i) - tileNode.ready := coreMem.d.ready && matchingSources(i) + (sourceGens zip dmemTLBundles).zipWithIndex.foreach { + case ((sourceGen, tlBundle), i) => + sourceGen.io.outResp.bits := tlBundle.d.bits + sourceGen.io.outResp.valid := tlBundle.d.valid && matchingSources(i) + tlBundle.d.ready := sourceGen.io.outResp.ready && matchingSources(i) } - (core.io.dmem.get zip outer.dmemNodes).foreach { case (coreMem, tileNode) => - coreMem.a <> tileNode.out.head._1.a - } + // (core.io.dmem.get zip outer.dmemNodes).foreach { case (coreMem, tileNode) => + // tileNode.out.head._1.a <> coreMem.a + // } } // core.io.fpu := DontCare @@ -335,6 +356,45 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // dcacheArb.io.requestor <> dcachePorts.toSeq } +// TODO: Currently in/out are assumed to be the same TL bundle with the same +// sourceWidth; this needs to be more flexible. +// +// Some @copypaste from CoalescerSourceGen. +class VortexSourceGen( + newSourceWidth: Int, + reqT: TLBundleA, + respT: TLBundleD +) extends Module { + val io = IO(new Bundle { + // in/out means upstream/downstream + val inReq = Flipped(Decoupled(reqT.cloneType)) + val outReq = Decoupled(reqT.cloneType) + val inResp = Decoupled(respT.cloneType) + val outResp = Flipped(Decoupled(respT.cloneType)) + }) + val sourceGen = Module(new SourceGenerator( + newSourceWidth, + Some(chiselTypeOf(reqT.source)), + ignoreInUse = false + )) + sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created + sourceGen.io.reclaim.valid := io.outResp.fire + sourceGen.io.reclaim.bits := io.outResp.bits.source + sourceGen.io.meta := io.inReq.bits.source + + // passthrough logic + io.outReq <> io.inReq + // "man-in-the-middle" + io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid + io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid + // FIXME: Fill is a hack; just change downstream to the right sourceWidth + // io.outReq.bits.source := Fill(newSourceWidth, sourceGen.io.id.bits) + io.outReq.bits.source := sourceGen.io.id.bits + io.inResp <> io.outResp + // translate upstream response back to its old sourceId + io.inResp.bits.source := sourceGen.io.peek +} + // FIXME: unsure this is necessary trait HasFpuOpt { this: RocketTileModuleImp => val fpuOpt = From 154e61b1a35ff27e259d54525afc12fd11c9cc1c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 11:43:20 -0700 Subject: [PATCH 07/15] Fix SourceGen metadata IO errors in coalescer --- src/main/scala/tilelink/Coalescing.scala | 31 +++++++++++++----------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index bdc6edb..89e7385 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -271,17 +271,19 @@ class SourceGenerator[T <: Data]( val gen = Input(Bool()) val reclaim = Input(Valid(UInt(sourceWidth.W))) val id = Output(Valid(UInt(sourceWidth.W))) - // for debugging; indicates whether there is at least one inflight request - // that hasn't been reclaimed yet - val inflight = Output(Bool()) - // below are used when metadata is not None + // below are used only when metadata is not None + // `meta` is used as input when a request succeeds id generation to store + // its value to the table. // `peek` is the retrieved metadata saved for the request when corresponding - // request has come back (and hence `reclaim` was set). + // request has come back, setting `reclaim`. // Although these do not use ValidIO, it is safe because any in-flight // response coming back should have allocated a valid entry in the table // when it went out. val meta = Input(metadata.getOrElse(UInt(0.W))) val peek = Output(metadata.getOrElse(UInt(0.W))) + // for debugging; indicates whether there is at least one inflight request + // that hasn't been reclaimed yet + val inflight = Output(Bool()) }) val head = RegInit(UInt(sourceWidth.W), 0.U) head := Mux(io.gen, head + 1.U, head) @@ -292,7 +294,7 @@ class SourceGenerator[T <: Data]( val numSourceId = 1 << sourceWidth val row = new Bundle { val meta = metadata match { - case Some(gen) => gen.cloneType + case Some(gen) => chiselTypeOf(gen) case None => UInt(0.W) } val id = Valid(UInt(sourceWidth.W)) @@ -319,6 +321,9 @@ class SourceGenerator[T <: Data]( // @perf: would this require multiple write ports? occupancyTable(io.reclaim.bits).id.valid := false.B // mark freed } + io.peek := { + if (metadata.isDefined) occupancyTable(io.reclaim.bits).meta else 0.U + } when(io.gen && io.id.valid) { when (!io.reclaim.valid) { @@ -329,11 +334,6 @@ class SourceGenerator[T <: Data]( assert(outstanding > 0.U) outstanding := outstanding - 1.U } - - if (metadata.isDefined) { - io.peek := occupancyTable(io.reclaim.bits).meta - } - dontTouch(outstanding) } @@ -783,9 +783,10 @@ class CoalescerSourceGen( // in/out means upstream/downstream val inReq = Flipped(Decoupled(coalReqT.cloneType)) val outReq = Decoupled(coalReqT.cloneType) - // no need for inResp, since CoalShiftQueue/Mono/MultiCoalescer only generates - // requests and do not take in responses. Coalesced responses are - // separately taken in by InflightTable. + // outResp is only needed for telling the downstream TL node that this + // sourcegen module is always ready to take in responses. + // No need for inResp, since coalescerNode is directly replied by the + // outResp TileLink bundle. val outResp = Flipped(Decoupled(respT.cloneType)) }) val sourceGen = Module( @@ -794,6 +795,7 @@ class CoalescerSourceGen( sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created sourceGen.io.reclaim.valid := io.outResp.fire sourceGen.io.reclaim.bits := io.outResp.bits.source + sourceGen.io.meta := DontCare // TODO: make sourceGen.io.reclaim Decoupled? // passthrough logic @@ -1510,6 +1512,7 @@ class MemTraceDriverImp( // assert(sourceGen.io.id.valid) sourceGen.io.reclaim.valid := tlOut.d.fire sourceGen.io.reclaim.bits := tlOut.d.bits.source + sourceGen.io.meta := DontCare val (plegal, pbits) = edge.Put( fromSource = sourceGen.io.id.bits, From db8625fb205b468eae769f967afe7f43f27a754a Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 15:24:37 -0700 Subject: [PATCH 08/15] Simplify metadata type wrangling in SourceGen --- src/main/scala/tilelink/Coalescing.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 89e7385..59c3882 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -267,6 +267,10 @@ class SourceGenerator[T <: Data]( metadata: Option[T] = None, ignoreInUse: Boolean = false ) extends Module { + def getMetadataType = metadata match { + case Some(gen) => gen.cloneType + case None => UInt(0.W) + } val io = IO(new Bundle { val gen = Input(Bool()) val reclaim = Input(Valid(UInt(sourceWidth.W))) @@ -279,8 +283,8 @@ class SourceGenerator[T <: Data]( // Although these do not use ValidIO, it is safe because any in-flight // response coming back should have allocated a valid entry in the table // when it went out. - val meta = Input(metadata.getOrElse(UInt(0.W))) - val peek = Output(metadata.getOrElse(UInt(0.W))) + val meta = Input(getMetadataType) + val peek = Output(getMetadataType) // for debugging; indicates whether there is at least one inflight request // that hasn't been reclaimed yet val inflight = Output(Bool()) @@ -293,10 +297,7 @@ class SourceGenerator[T <: Data]( val numSourceId = 1 << sourceWidth val row = new Bundle { - val meta = metadata match { - case Some(gen) => chiselTypeOf(gen) - case None => UInt(0.W) - } + val meta = getMetadataType val id = Valid(UInt(sourceWidth.W)) } // valid: in use, invalid: available From eb9772b750be69e6e09ef3c4032b03cf3ee45dc1 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 17:42:17 -0700 Subject: [PATCH 09/15] Decouple Vortex dmem bundle from TL Previously VortexBundle was being instantiated using the parameters of the TileLink bundle from VortexTile. This results in tight coupling between Vortex interface parameters and downstream TileLink parameters. This change adds a standalone Bundle used by the VortexCore wrapper and is independently instantiated from the TL params, i.e. different source widths. Ideally we want to move away from using TL-like structures for VortexBundle and handling adapter logic completely outside the core blackbox. --- src/main/scala/rocket/VortexCore.scala | 8 +-- src/main/scala/tile/VortexTile.scala | 73 ++++++++++++++++++++------ 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 5ce05b1..983d0f8 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -8,7 +8,7 @@ import chisel3.util._ import chisel3.experimental._ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tile._ -import tile.VortexTile +import tile.{VortexTile, VortexBundleA, VortexBundleD} class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle { val clock = Input(Clock()) @@ -22,9 +22,9 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val a = tile.imemNodes.head.out.head._1.a.cloneType val d = Flipped(tile.imemNodes.head.out.head._1.d.cloneType) })) else None - val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(4, new Bundle { - val a = tile.dmemNodes.head.out.head._1.a.cloneType - val d = Flipped(tile.dmemNodes.head.out.head._1.d.cloneType) + val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { + val a = Decoupled(new VortexBundleA()) + val d = Flipped(Decoupled(new VortexBundleD())) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = tile.memNode.out.head._1.a.cloneType diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 3b81e77..a7bec4f 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -44,6 +44,22 @@ case class VortexTileParams( } } +class VortexBundleA extends Bundle { + val opcode = UInt(3.W) // FIXME: hardcoded + val size = UInt(4.W) // FIXME: hardcoded + val source = UInt(10.W) // FIXME: hardcoded + val address = UInt(32.W) // FIXME: hardcoded + val mask = UInt(4.W) // FIXME: hardcoded + val data = UInt(32.W) // FIXME: hardcoded +} + +class VortexBundleD extends Bundle { + val opcode = UInt(3.W) // FIXME: hardcoded + val size = UInt(4.W) // FIXME: hardcoded + val source = UInt(10.W) // FIXME: hardcoded + val data = UInt(32.W) // FIXME: hardcoded +} + class VortexTile private ( val vortexParams: VortexTileParams, crossing: ClockCrossingType, @@ -91,6 +107,7 @@ class VortexTile private ( minLatency = 1)))*/ val numLanes = 4 // FIXME: hardcoded + val sourceWidth = 1 // TODO: use Parameters for this val imemNodes = Seq.tabulate(1) { i => TLClientNode( @@ -98,7 +115,7 @@ class VortexTile private ( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO magic number + sourceId = IdRange(0, 1 << 10), // TODO: magic numbers name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", requestFifo = true, supportsProbe = @@ -117,7 +134,7 @@ class VortexTile private ( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO magic number + sourceId = IdRange(0, 1 << sourceWidth), name = s"Vortex Core ${vortexParams.hartId} D-Mem Lane $i", requestFifo = true, supportsProbe = @@ -316,10 +333,12 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // connection: VortexBundle <--> sourceGen <--> dmemNodes val sourceGens = Seq.tabulate(outer.numLanes) { _ => - Module(new VortexSourceGen( - 2, // FIXME: hardcoded - dmemTLBundles.head.a.bits, - dmemTLBundles.head.d.bits, + Module(new VortexTLAdapter( + outer.sourceWidth, + new VortexBundleA(), + new VortexBundleD(), + chiselTypeOf(dmemTLBundles.head.a.bits), + chiselTypeOf(dmemTLBundles.head.d.bits), )) } (core.io.dmem.get zip sourceGens) foreach { case (coreMem, sourceGen) => @@ -360,21 +379,24 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // sourceWidth; this needs to be more flexible. // // Some @copypaste from CoalescerSourceGen. -class VortexSourceGen( +class VortexTLAdapter( newSourceWidth: Int, - reqT: TLBundleA, - respT: TLBundleD + inReqT: VortexBundleA, + inRespT: VortexBundleD, + outReqT: TLBundleA, + outRespT: TLBundleD ) extends Module { val io = IO(new Bundle { // in/out means upstream/downstream - val inReq = Flipped(Decoupled(reqT.cloneType)) - val outReq = Decoupled(reqT.cloneType) - val inResp = Decoupled(respT.cloneType) - val outResp = Flipped(Decoupled(respT.cloneType)) + // TODO: change inReq/inResp to VortexBundle + val inReq = Flipped(Decoupled(inReqT)) + val outReq = Decoupled(outReqT) + val inResp = Decoupled(inRespT) + val outResp = Flipped(Decoupled(outRespT)) }) val sourceGen = Module(new SourceGenerator( newSourceWidth, - Some(chiselTypeOf(reqT.source)), + Some(inReqT.source), ignoreInUse = false )) sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created @@ -382,15 +404,32 @@ class VortexSourceGen( sourceGen.io.reclaim.bits := io.outResp.bits.source sourceGen.io.meta := io.inReq.bits.source - // passthrough logic - io.outReq <> io.inReq + // io passthrough logic + // TLBundleA <> VortexBundleA + io.outReq.valid := io.inReq.valid + io.outReq.bits.opcode := io.inReq.bits.opcode + io.outReq.bits.param := 0.U + io.outReq.bits.size := io.inReq.bits.size + io.outReq.bits.source := io.inReq.bits.source + io.outReq.bits.address := io.inReq.bits.address + io.outReq.bits.mask := io.inReq.bits.mask + io.outReq.bits.data := io.inReq.bits.data + io.outReq.bits.corrupt := 0.U + io.inReq.ready := io.outReq.ready + // VortexBundleD <> TLBundleD + io.inResp.valid := io.outResp.valid + io.inResp.bits.opcode := io.outResp.bits.opcode + io.inResp.bits.size := io.outResp.bits.size + io.inResp.bits.source := io.outResp.bits.source + io.inResp.bits.data := io.outResp.bits.data + io.outResp.ready := io.inResp.ready + // "man-in-the-middle" io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid // FIXME: Fill is a hack; just change downstream to the right sourceWidth // io.outReq.bits.source := Fill(newSourceWidth, sourceGen.io.id.bits) io.outReq.bits.source := sourceGen.io.id.bits - io.inResp <> io.outResp // translate upstream response back to its old sourceId io.inResp.bits.source := sourceGen.io.peek } From 8ab05293548a447ec87578df767ce5062432d665 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 17:54:12 -0700 Subject: [PATCH 10/15] Move VortexBundleA/D to Core; resolve TODOs --- src/main/scala/rocket/VortexCore.scala | 18 +++++++++++++++++- src/main/scala/tile/VortexTile.scala | 26 ++------------------------ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 983d0f8..8bb0b62 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -8,7 +8,23 @@ import chisel3.util._ import chisel3.experimental._ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tile._ -import tile.{VortexTile, VortexBundleA, VortexBundleD} +import tile.VortexTile + +class VortexBundleA extends Bundle { + val opcode = UInt(3.W) // FIXME: hardcoded + val size = UInt(4.W) // FIXME: hardcoded + val source = UInt(10.W) // FIXME: hardcoded + val address = UInt(32.W) // FIXME: hardcoded + val mask = UInt(4.W) // FIXME: hardcoded + val data = UInt(32.W) // FIXME: hardcoded +} + +class VortexBundleD extends Bundle { + val opcode = UInt(3.W) // FIXME: hardcoded + val size = UInt(4.W) // FIXME: hardcoded + val source = UInt(10.W) // FIXME: hardcoded + val data = UInt(32.W) // FIXME: hardcoded +} class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle { val clock = Input(Clock()) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index a7bec4f..23cd4ca 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -16,7 +16,7 @@ import freechips.rocketchip.util._ import freechips.rocketchip.prci.ClockSinkParameters import freechips.rocketchip.regmapper.RegField import freechips.rocketchip.tile._ -import rocket.Vortex +import rocket.{Vortex, VortexBundleA, VortexBundleD} case class RocketTileBoundaryBufferParams(force: Boolean = false) @@ -44,22 +44,6 @@ case class VortexTileParams( } } -class VortexBundleA extends Bundle { - val opcode = UInt(3.W) // FIXME: hardcoded - val size = UInt(4.W) // FIXME: hardcoded - val source = UInt(10.W) // FIXME: hardcoded - val address = UInt(32.W) // FIXME: hardcoded - val mask = UInt(4.W) // FIXME: hardcoded - val data = UInt(32.W) // FIXME: hardcoded -} - -class VortexBundleD extends Bundle { - val opcode = UInt(3.W) // FIXME: hardcoded - val size = UInt(4.W) // FIXME: hardcoded - val source = UInt(10.W) // FIXME: hardcoded - val data = UInt(32.W) // FIXME: hardcoded -} - class VortexTile private ( val vortexParams: VortexTileParams, crossing: ClockCrossingType, @@ -106,7 +90,7 @@ class VortexTile private ( beatBytes = lazyCoreParamsView.coreDataBytes, minLatency = 1)))*/ - val numLanes = 4 // FIXME: hardcoded + val numLanes = 4 // TODO: use Parameters for this val sourceWidth = 1 // TODO: use Parameters for this val imemNodes = Seq.tabulate(1) { i => @@ -375,9 +359,6 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // dcacheArb.io.requestor <> dcachePorts.toSeq } -// TODO: Currently in/out are assumed to be the same TL bundle with the same -// sourceWidth; this needs to be more flexible. -// // Some @copypaste from CoalescerSourceGen. class VortexTLAdapter( newSourceWidth: Int, @@ -388,7 +369,6 @@ class VortexTLAdapter( ) extends Module { val io = IO(new Bundle { // in/out means upstream/downstream - // TODO: change inReq/inResp to VortexBundle val inReq = Flipped(Decoupled(inReqT)) val outReq = Decoupled(outReqT) val inResp = Decoupled(inRespT) @@ -427,8 +407,6 @@ class VortexTLAdapter( // "man-in-the-middle" io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid - // FIXME: Fill is a hack; just change downstream to the right sourceWidth - // io.outReq.bits.source := Fill(newSourceWidth, sourceGen.io.id.bits) io.outReq.bits.source := sourceGen.io.id.bits // translate upstream response back to its old sourceId io.inResp.bits.source := sourceGen.io.peek From e4dd0c21e9af02df44194b20f0dd551187b2a16f Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 16 Oct 2023 20:45:18 -0700 Subject: [PATCH 11/15] Bump vortex --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index a654c9b..3adf178 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit a654c9b8b23b72855a4077f356cc9f22c983a934 +Subproject commit 3adf178478c28fa9629da31afe3c6b8b55c58772 From fb97bd3c2b7d1ac7f605703d40bc0865d1f4bbb6 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 17 Oct 2023 12:18:58 -0700 Subject: [PATCH 12/15] Decouple Vortex imem bundle from TL --- src/main/resources/vsrc/vortex | 2 +- src/main/scala/rocket/VortexCore.scala | 6 ++-- src/main/scala/tile/VortexTile.scala | 42 ++++++++++++++++---------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 3adf178..696621b 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 3adf178478c28fa9629da31afe3c6b8b55c58772 +Subproject commit 696621b2dc4b14e5de382c144b8ee29f437ea1b5 diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 8bb0b62..0c2f066 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -34,9 +34,9 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val interrupts = Input(new CoreInterrupts()) // conditionally instantiate ports depending on whether we want to use VX_cache or not - val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { // TODO: magic number - val a = tile.imemNodes.head.out.head._1.a.cloneType - val d = Flipped(tile.imemNodes.head.out.head._1.d.cloneType) + val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { + val a = Decoupled(new VortexBundleA()) + val d = Flipped(Decoupled(new VortexBundleD())) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { val a = Decoupled(new VortexBundleA()) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 23cd4ca..5b9cfa4 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -290,10 +290,18 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { core.io.mem.get.a <> outer.memNode.out.head._1.a core.io.mem.get.d <> outer.memNode.out.head._1.d } else { - (core.io.imem.get zip outer.imemNodes).foreach { case (coreMem, tileNode) => - coreMem.d <> tileNode.out.head._1.d - tileNode.out.head._1.a <> coreMem.a - } + val imemTLAdapter = Module(new VortexTLAdapter( + outer.sourceWidth, + new VortexBundleA(), + new VortexBundleD(), + chiselTypeOf(outer.imemNodes.head.out.head._1.a.bits), + chiselTypeOf(outer.imemNodes.head.out.head._1.d.bits), + )) + // TODO: make imemNodes not a vector + imemTLAdapter.io.inReq <> core.io.imem.get(0).a + core.io.imem.get(0).d <> imemTLAdapter.io.inResp + outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq + imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d // Since the individual per-lane TL requests might come back out-of-sync between // the lanes, but Vortex core expects the lane requests to be synced, @@ -315,8 +323,10 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { .map(b => (b.d.bits.source === arb.io.out.bits) && arb.io.out.valid) .asUInt - // connection: VortexBundle <--> sourceGen <--> dmemNodes - val sourceGens = Seq.tabulate(outer.numLanes) { _ => + // connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes + // @perf: this would duplicate SourceGenerator table for every lane and eat + // up some area + val tlAdapters = Seq.tabulate(outer.numLanes) { _ => Module(new VortexTLAdapter( outer.sourceWidth, new VortexBundleA(), @@ -325,21 +335,21 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { chiselTypeOf(dmemTLBundles.head.d.bits), )) } - (core.io.dmem.get zip sourceGens) foreach { case (coreMem, sourceGen) => - sourceGen.io.inReq <> coreMem.a - coreMem.d <> sourceGen.io.inResp + (core.io.dmem.get zip tlAdapters) foreach { case (coreMem, tlAdapter) => + tlAdapter.io.inReq <> coreMem.a + coreMem.d <> tlAdapter.io.inResp } - (sourceGens zip dmemTLBundles) foreach { case (sourceGen, tlBundle) => - tlBundle.a <> sourceGen.io.outReq + (tlAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlBundle) => + tlBundle.a <> tlAdapter.io.outReq } // using the chosen source id, // - lie to core that response is not valid if source doesn't match picked // - lie to downstream that core is not ready if source doesn't match picked - (sourceGens zip dmemTLBundles).zipWithIndex.foreach { - case ((sourceGen, tlBundle), i) => - sourceGen.io.outResp.bits := tlBundle.d.bits - sourceGen.io.outResp.valid := tlBundle.d.valid && matchingSources(i) - tlBundle.d.ready := sourceGen.io.outResp.ready && matchingSources(i) + (tlAdapters zip dmemTLBundles).zipWithIndex.foreach { + case ((tlAdapter, tlBundle), i) => + tlAdapter.io.outResp.bits := tlBundle.d.bits + tlAdapter.io.outResp.valid := tlBundle.d.valid && matchingSources(i) + tlBundle.d.ready := tlAdapter.io.outResp.ready && matchingSources(i) } // (core.io.dmem.get zip outer.dmemNodes).foreach { case (coreMem, tileNode) => From 0d92eb65d4a73916b0f0b57381db6762fb84dd8e Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 18 Oct 2023 15:19:11 -0700 Subject: [PATCH 13/15] Increase sourceWidth to fix vx_wspawn sync bug With sourceWidth = 1, we hit an unsynchronized vx_wspawn bug, where the previously spawned warps get killed and overridden by a new vx_wspawn call before all the warps complete execution. Setting sourceWidth = 1 somehow slows down the progress of the spawned warps in relation to warp 0 (presumably because fetch stalls, but not sure why they would slow down more than warp 0) and results in this bug. sourceWidth = 4 seems to work for vecadd. --- src/main/scala/tile/VortexTile.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 5b9cfa4..bd86ba5 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -91,7 +91,7 @@ class VortexTile private ( minLatency = 1)))*/ val numLanes = 4 // TODO: use Parameters for this - val sourceWidth = 1 // TODO: use Parameters for this + val sourceWidth = 4 // TODO: use Parameters for this val imemNodes = Seq.tabulate(1) { i => TLClientNode( @@ -99,7 +99,7 @@ class VortexTile private ( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO: magic numbers + sourceId = IdRange(0, 1 << sourceWidth), name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", requestFifo = true, supportsProbe = From ff302c1ba5974138dee69b12b0249a65b16ac77d Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 18 Oct 2023 20:04:31 -0700 Subject: [PATCH 14/15] Use VortexTLAdapter for useVxCache = true as well --- src/main/scala/rocket/VortexCore.scala | 36 +++++++++++++-------- src/main/scala/tile/VortexTile.scala | 44 +++++++++++++++++++------- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 0c2f066..366d8fd 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -10,20 +10,28 @@ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tile._ import tile.VortexTile -class VortexBundleA extends Bundle { +class VortexBundleA( + sourceWidth: Int, + dataWidth: Int +) extends Bundle { + assert(dataWidth % 8 == 0) val opcode = UInt(3.W) // FIXME: hardcoded val size = UInt(4.W) // FIXME: hardcoded - val source = UInt(10.W) // FIXME: hardcoded + val source = UInt(sourceWidth.W) // FIXME: hardcoded val address = UInt(32.W) // FIXME: hardcoded - val mask = UInt(4.W) // FIXME: hardcoded - val data = UInt(32.W) // FIXME: hardcoded + val mask = UInt((dataWidth / 8).W) // FIXME: hardcoded + val data = UInt(dataWidth.W) // FIXME: hardcoded } -class VortexBundleD extends Bundle { +class VortexBundleD( + sourceWidth: Int, + dataWidth: Int +) extends Bundle { + assert(dataWidth % 8 == 0) val opcode = UInt(3.W) // FIXME: hardcoded val size = UInt(4.W) // FIXME: hardcoded - val source = UInt(10.W) // FIXME: hardcoded - val data = UInt(32.W) // FIXME: hardcoded + val source = UInt(sourceWidth.W) // FIXME: hardcoded + val data = UInt(dataWidth.W) // FIXME: hardcoded } class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle { @@ -35,16 +43,18 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { - val a = Decoupled(new VortexBundleA()) - val d = Flipped(Decoupled(new VortexBundleD())) + val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA()) - val d = Flipped(Decoupled(new VortexBundleD())) + val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { - val a = tile.memNode.out.head._1.a.cloneType - val d = Flipped(tile.memNode.out.head._1.d.cloneType) + val a = Decoupled(new VortexBundleA(sourceWidth = 15, dataWidth = 128)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 15, dataWidth = 128))) + // val a = tile.memNode.out.head._1.a.cloneType + // val d = Flipped(tile.memNode.out.head._1.d.cloneType) }) else None // val fpu = Flipped(new FPUCoreIO()) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index bd86ba5..cf14c3d 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -135,15 +135,16 @@ class VortexTile private ( ) } + println(s"============= lazyCoreParamsView.coreDataBytes=${lazyCoreParamsView.coreDataBytes}") val memNode = TLClientNode( Seq( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 15), // TODO magic numbers + sourceId = IdRange(0, 1 << sourceWidth), name = s"Vortex Core ${vortexParams.hartId} Mem Interface", requestFifo = true, - supportsProbe = TransferSizes(16, 16), + supportsProbe = TransferSizes(16, 16), // FIXME: hardcoded supportsGet = TransferSizes(16, 16), supportsPutFull = TransferSizes(16, 16), supportsPutPartial = TransferSizes(16, 16) @@ -284,16 +285,35 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // require(core.io.hartid.getWidth >= outer.hartIdSinkNode.bundle.getWidth, // s"core hartid wire (${core.io.hartid.getWidth}b) truncates external hartid wire (${outer.hartIdSinkNode.bundle.getWidth}b)") + // --------------------------------------------- + // Translate Vortex memory interface to TileLink + // --------------------------------------------- + if (outer.vortexParams.useVxCache) { println(s"width of a channel data ${core.io.mem.get.a.bits.data.getWidth}") println(s"width of d channel data ${core.io.mem.get.d.bits.data.getWidth}") - core.io.mem.get.a <> outer.memNode.out.head._1.a - core.io.mem.get.d <> outer.memNode.out.head._1.d + + val memTLAdapter = Module(new VortexTLAdapter( + outer.sourceWidth, + chiselTypeOf(core.io.mem.get.a.bits), + chiselTypeOf(core.io.mem.get.d.bits), + chiselTypeOf(outer.memNode.out.head._1.a.bits), + chiselTypeOf(outer.memNode.out.head._1.d.bits), + )) + + // connection: VortexBundle <--> VortexTLAdapter <--> TL memNode + memTLAdapter.io.inReq <> core.io.mem.get.a + core.io.mem.get.d <> memTLAdapter.io.inResp + outer.memNode.out(0)._1.a <> memTLAdapter.io.outReq + memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d + + // core.io.mem.get.a <> outer.memNode.out.head._1.a + // core.io.mem.get.d <> outer.memNode.out.head._1.d } else { val imemTLAdapter = Module(new VortexTLAdapter( outer.sourceWidth, - new VortexBundleA(), - new VortexBundleD(), + chiselTypeOf(core.io.imem.get(0).a.bits), + chiselTypeOf(core.io.imem.get(0).d.bits), chiselTypeOf(outer.imemNodes.head.out.head._1.a.bits), chiselTypeOf(outer.imemNodes.head.out.head._1.d.bits), )) @@ -326,26 +346,26 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes // @perf: this would duplicate SourceGenerator table for every lane and eat // up some area - val tlAdapters = Seq.tabulate(outer.numLanes) { _ => + val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ => Module(new VortexTLAdapter( outer.sourceWidth, - new VortexBundleA(), - new VortexBundleD(), + chiselTypeOf(core.io.dmem.get(0).a.bits), + chiselTypeOf(core.io.dmem.get(0).d.bits), chiselTypeOf(dmemTLBundles.head.a.bits), chiselTypeOf(dmemTLBundles.head.d.bits), )) } - (core.io.dmem.get zip tlAdapters) foreach { case (coreMem, tlAdapter) => + (core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) => tlAdapter.io.inReq <> coreMem.a coreMem.d <> tlAdapter.io.inResp } - (tlAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlBundle) => + (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlBundle) => tlBundle.a <> tlAdapter.io.outReq } // using the chosen source id, // - lie to core that response is not valid if source doesn't match picked // - lie to downstream that core is not ready if source doesn't match picked - (tlAdapters zip dmemTLBundles).zipWithIndex.foreach { + (dmemTLAdapters zip dmemTLBundles).zipWithIndex.foreach { case ((tlAdapter, tlBundle), i) => tlAdapter.io.outResp.bits := tlBundle.d.bits tlAdapter.io.outResp.valid := tlBundle.d.valid && matchingSources(i) From 805abd1b4b553962f078a313d3d377bfea8cb571 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 18 Oct 2023 20:05:55 -0700 Subject: [PATCH 15/15] Bump vortex for TL port change --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 696621b..59efba2 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 696621b2dc4b14e5de382c144b8ee29f437ea1b5 +Subproject commit 59efba2b7024910a5b35195bf38bf86120bf1403