diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index a654c9b..59efba2 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit a654c9b8b23b72855a4077f356cc9f22c983a934 +Subproject commit 59efba2b7024910a5b35195bf38bf86120bf1403 diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 53bd707..366d8fd 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -8,10 +8,32 @@ import chisel3.util._ import chisel3.experimental._ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tile._ -import freechips.rocketchip.util._ -import freechips.rocketchip.scie._ import tile.VortexTile +class VortexBundleA( + sourceWidth: Int, + dataWidth: Int +) extends Bundle { + assert(dataWidth % 8 == 0) + val opcode = UInt(3.W) // FIXME: hardcoded + val size = UInt(4.W) // FIXME: hardcoded + val source = UInt(sourceWidth.W) // FIXME: hardcoded + val address = UInt(32.W) // FIXME: hardcoded + val mask = UInt((dataWidth / 8).W) // FIXME: hardcoded + val data = UInt(dataWidth.W) // FIXME: hardcoded +} + +class VortexBundleD( + sourceWidth: Int, + dataWidth: Int +) extends Bundle { + assert(dataWidth % 8 == 0) + val opcode = UInt(3.W) // FIXME: hardcoded + val size = UInt(4.W) // FIXME: hardcoded + val source = UInt(sourceWidth.W) // FIXME: hardcoded + val data = UInt(dataWidth.W) // FIXME: hardcoded +} + class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle { val clock = Input(Clock()) val reset = Input(Reset()) @@ -20,17 +42,19 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val interrupts = Input(new CoreInterrupts()) // conditionally instantiate ports depending on whether we want to use VX_cache or not - val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { // TODO: magic number - val a = tile.imemNodes.head.out.head._1.a.cloneType - val d = Flipped(tile.imemNodes.head.out.head._1.d.cloneType) + val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { + val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32))) })) else None - val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(4, new Bundle { - val a = tile.dmemNodes.head.out.head._1.a.cloneType - val d = Flipped(tile.dmemNodes.head.out.head._1.d.cloneType) + val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { + val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { - val a = tile.memNode.out.head._1.a.cloneType - val d = Flipped(tile.memNode.out.head._1.d.cloneType) + val a = Decoupled(new VortexBundleA(sourceWidth = 15, dataWidth = 128)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 15, dataWidth = 128))) + // val a = tile.memNode.out.head._1.a.cloneType + // val d = Flipped(tile.memNode.out.head._1.d.cloneType) }) else None // val fpu = Flipped(new FPUCoreIO()) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 7e0d570..4e33f3e 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -4,7 +4,7 @@ package tile import chisel3._ -import chisel3.util.RRArbiter +import chisel3.util._ import org.chipsalliance.cde.config._ import freechips.rocketchip.devices.tilelink._ import freechips.rocketchip.diplomacy._ @@ -16,9 +16,7 @@ import freechips.rocketchip.util._ import freechips.rocketchip.prci.ClockSinkParameters import freechips.rocketchip.regmapper.RegField import freechips.rocketchip.tile._ -import rocket.Vortex - -import scala.collection.mutable.ListBuffer +import rocket.{Vortex, VortexBundleA, VortexBundleD} case class RocketTileBoundaryBufferParams(force: Boolean = false) @@ -35,26 +33,31 @@ case class VortexTileParams( blockerCtrlAddr: Option[BigInt] = None, clockSinkParams: ClockSinkParameters = ClockSinkParameters(), boundaryBuffers: Option[RocketTileBoundaryBufferParams] = None - ) extends InstantiableTileParams[VortexTile] { +) extends InstantiableTileParams[VortexTile] { // require(icache.isDefined) - // require(dcache.isDefined) + // require(dcache.isDefined) - def instantiate(crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters): VortexTile = { + def instantiate(crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)( + implicit p: Parameters + ): VortexTile = { new VortexTile(this, crossing, lookup) } } -class VortexTile private( - val vortexParams: VortexTileParams, - crossing: ClockCrossingType, - lookup: LookupByHartIdImpl, - q: Parameters) - extends BaseTile(vortexParams, crossing, lookup, q) +class VortexTile private ( + val vortexParams: VortexTileParams, + crossing: ClockCrossingType, + lookup: LookupByHartIdImpl, + q: Parameters +) extends BaseTile(vortexParams, crossing, lookup, q) with SinksExternalInterrupts - with SourcesExternalNotifications -{ + with SourcesExternalNotifications { // Private constructor ensures altered LazyModule.p is used implicitly - def this(params: VortexTileParams, crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters) = + def this( + params: VortexTileParams, + crossing: TileCrossingParamsLike, + lookup: LookupByHartIdImpl + )(implicit p: Parameters) = this(params, crossing.crossingType, lookup, p) val intOutwardNode = IntIdentityNode() @@ -87,40 +90,70 @@ class VortexTile private( beatBytes = lazyCoreParamsView.coreDataBytes, minLatency = 1)))*/ - val imemNodes = Seq.tabulate(1) { i => TLClientNode(Seq(TLMasterPortParameters.v1( - clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO magic number - name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", - requestFifo = true, - supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes) - )) - )))} + val numLanes = 4 // TODO: use Parameters for this + val sourceWidth = 4 // TODO: use Parameters for this - val dmemNodes = Seq.tabulate(4) { i => TLClientNode(Seq(TLMasterPortParameters.v1( - clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 10), // TODO magic number - name = s"Vortex Core ${vortexParams.hartId} D-Mem Lane $i", - requestFifo = true, - supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsPutFull = TransferSizes(1, lazyCoreParamsView.coreDataBytes), - supportsPutPartial = TransferSizes(1, lazyCoreParamsView.coreDataBytes) - )) - )))} + val imemNodes = Seq.tabulate(1) { i => + TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << sourceWidth), + name = s"Vortex Core ${vortexParams.hartId} I-Mem $i", + requestFifo = true, + supportsProbe = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes) + ) + ) + ) + ) + ) + } + + val dmemNodes = Seq.tabulate(numLanes) { i => + TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << sourceWidth), + name = s"Vortex Core ${vortexParams.hartId} D-Mem Lane $i", + requestFifo = true, + supportsProbe = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsPutFull = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsPutPartial = + TransferSizes(1, lazyCoreParamsView.coreDataBytes) + ) + ) + ) + ) + ) + } + + println(s"============= lazyCoreParamsView.coreDataBytes=${lazyCoreParamsView.coreDataBytes}") + val memNode = TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << sourceWidth), + name = s"Vortex Core ${vortexParams.hartId} Mem Interface", + requestFifo = true, + supportsProbe = TransferSizes(16, 16), // FIXME: hardcoded + supportsGet = TransferSizes(16, 16), + supportsPutFull = TransferSizes(16, 16), + supportsPutPartial = TransferSizes(16, 16) + ) + ) + ) + ) + ) - val memNode = TLClientNode(Seq(TLMasterPortParameters.v1( - clients = Seq(TLMasterParameters.v1( - sourceId = IdRange(0, 1 << 15), // TODO magic numbers - name = s"Vortex Core ${vortexParams.hartId} Mem Interface", - requestFifo = true, - supportsProbe = TransferSizes(16, 16), - supportsGet = TransferSizes(16, 16), - supportsPutFull = TransferSizes(16, 16), - supportsPutPartial = TransferSizes(16, 16) - )), - ))) - if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode } else { @@ -131,7 +164,8 @@ class VortexTile private( /* below are copied from rocket */ val bus_error_unit = vortexParams.beuAddr map { a => - val beu = LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) + val beu = + LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) intOutwardNode := beu.intNode connectTLSlave(beu.node, xBytes) beu @@ -139,13 +173,17 @@ class VortexTile private( val tile_master_blocker = tileParams.blockerCtrlAddr - .map(BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true)) + .map( + BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true) + ) .map(bp => LazyModule(new BasicBusBlocker(bp))) tile_master_blocker.foreach(lm => connectTLSlave(lm.controlNode, xBytes)) // TODO: this doesn't block other masters, e.g. RoCCs - tlOtherMastersNode := tile_master_blocker.map { _.node := tlMasterXbar.node } getOrElse { tlMasterXbar.node } + tlOtherMastersNode := tile_master_blocker.map { + _.node := tlMasterXbar.node + } getOrElse { tlMasterXbar.node } masterNode :=* tlOtherMastersNode DisableMonitors { implicit p => tlSlaveXbar.node :*= slaveNode } @@ -163,7 +201,6 @@ class VortexTile private( Description(name, mapping ++ cpuProperties ++ nextLevelCacheProperty ++ tileProperties ++ dtimProperty ++ itimProperty ++ beuProperty) } - } ResourceBinding { Resource(cpuDevice, "reg").bind(ResourceAddress(staticIdForMetadataUseOnly)) @@ -171,15 +208,33 @@ class VortexTile private( override lazy val module = new VortexTileModuleImp(this) - override def makeMasterBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { - case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() - case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.none, BufferParams.flow, BufferParams.none, BufferParams.flow, BufferParams(1)) + override def makeMasterBoundaryBuffers( + crossing: ClockCrossingType + )(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { + case (Some(RocketTileBoundaryBufferParams(true)), _) => TLBuffer() + case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => + TLBuffer( + BufferParams.none, + BufferParams.flow, + BufferParams.none, + BufferParams.flow, + BufferParams(1) + ) case _ => TLBuffer(BufferParams.none) } - override def makeSlaveBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { - case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() - case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.flow, BufferParams.none, BufferParams.none, BufferParams.none, BufferParams.none) + override def makeSlaveBoundaryBuffers( + crossing: ClockCrossingType + )(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { + case (Some(RocketTileBoundaryBufferParams(true)), _) => TLBuffer() + case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => + TLBuffer( + BufferParams.flow, + BufferParams.none, + BufferParams.none, + BufferParams.none, + BufferParams.none + ) case _ => TLBuffer(BufferParams.none) } } @@ -188,7 +243,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { Annotated.params(this, outer.vortexParams) val core = Module(new Vortex(outer)(outer.p)) - + core.io.clock := clock core.io.reset := reset @@ -200,8 +255,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { ) // Report when the tile has ceased to retire instructions; for now the only cause is clock gating - outer.reportCease(outer.vortexParams.core.clockGate.option( - core.io.cease)) + outer.reportCease(outer.vortexParams.core.clockGate.option(core.io.cease)) outer.reportWFI(Some(core.io.wfi)) @@ -223,49 +277,103 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // require(core.io.hartid.getWidth >= outer.hartIdSinkNode.bundle.getWidth, // s"core hartid wire (${core.io.hartid.getWidth}b) truncates external hartid wire (${outer.hartIdSinkNode.bundle.getWidth}b)") + // --------------------------------------------- + // Translate Vortex memory interface to TileLink + // --------------------------------------------- + if (outer.vortexParams.useVxCache) { println(s"width of a channel data ${core.io.mem.get.a.bits.data.getWidth}") println(s"width of d channel data ${core.io.mem.get.d.bits.data.getWidth}") - core.io.mem.get.a <> outer.memNode.out.head._1.a - core.io.mem.get.d <> outer.memNode.out.head._1.d - } - else { - (core.io.imem.get zip outer.imemNodes).foreach { case (coreMem, tileNode) => - coreMem.d <> tileNode.out.head._1.d - coreMem.a <> tileNode.out.head._1.a - } - // pick source id and: + val memTLAdapter = Module(new VortexTLAdapter( + outer.sourceWidth, + chiselTypeOf(core.io.mem.get.a.bits), + chiselTypeOf(core.io.mem.get.d.bits), + chiselTypeOf(outer.memNode.out.head._1.a.bits), + chiselTypeOf(outer.memNode.out.head._1.d.bits), + )) + + // connection: VortexBundle <--> VortexTLAdapter <--> TL memNode + memTLAdapter.io.inReq <> core.io.mem.get.a + core.io.mem.get.d <> memTLAdapter.io.inResp + outer.memNode.out(0)._1.a <> memTLAdapter.io.outReq + memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d + + // core.io.mem.get.a <> outer.memNode.out.head._1.a + // core.io.mem.get.d <> outer.memNode.out.head._1.d + } else { + val imemTLAdapter = Module(new VortexTLAdapter( + outer.sourceWidth, + chiselTypeOf(core.io.imem.get(0).a.bits), + chiselTypeOf(core.io.imem.get(0).d.bits), + chiselTypeOf(outer.imemNodes.head.out.head._1.a.bits), + chiselTypeOf(outer.imemNodes.head.out.head._1.d.bits), + )) + // TODO: make imemNodes not a vector + imemTLAdapter.io.inReq <> core.io.imem.get(0).a + core.io.imem.get(0).d <> imemTLAdapter.io.inResp + outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq + imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d + + // Since the individual per-lane TL requests might come back out-of-sync between + // the lanes, but Vortex core expects the lane requests to be synced, + // we need to selectively fire responses that have the same source, and + // delay others. Below is the logic that implements this. + + // choose one source out of the arriving per-lane TL D channels + val arb = Module( + new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, outer.numLanes) + ) + val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) + arb.io.out.ready := true.B + (arb.io.in zip dmemTLBundles).foreach { case (arbIn, tlBundle) => + arbIn.valid := tlBundle.d.valid + arbIn.bits := tlBundle.d.bits.source + } + val matchingSources = Wire(UInt(outer.numLanes.W)) + matchingSources := dmemTLBundles + .map(b => (b.d.bits.source === arb.io.out.bits) && arb.io.out.valid) + .asUInt + + // connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes + // @perf: this would duplicate SourceGenerator table for every lane and eat + // up some area + val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ => + Module(new VortexTLAdapter( + outer.sourceWidth, + chiselTypeOf(core.io.dmem.get(0).a.bits), + chiselTypeOf(core.io.dmem.get(0).d.bits), + chiselTypeOf(dmemTLBundles.head.a.bits), + chiselTypeOf(dmemTLBundles.head.d.bits), + )) + } + (core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) => + tlAdapter.io.inReq <> coreMem.a + coreMem.d <> tlAdapter.io.inResp + } + (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlBundle) => + tlBundle.a <> tlAdapter.io.outReq + } + // using the chosen source id, // - lie to core that response is not valid if source doesn't match picked // - lie to downstream that core is not ready if source doesn't match picked - - val arb = Module(new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, 4)) - val matchingSources = Wire(UInt(4.W)) - val dmemDs = outer.dmemNodes.map(_.out.head._1.d) - - (arb.io.in zip dmemDs).zipWithIndex.foreach { case ((arbIn, tileNode), i) => - arbIn.valid := tileNode.valid - arbIn.bits := tileNode.bits.source - } - matchingSources := dmemDs.map(d => (d.bits.source === arb.io.out.bits) && arb.io.out.valid).asUInt - arb.io.out.ready := true.B - - (core.io.dmem.get zip dmemDs).zipWithIndex.foreach { case ((coreMem, tileNode), i) => - coreMem.d.bits := tileNode.bits - coreMem.d.valid := tileNode.valid && matchingSources(i) - tileNode.ready := coreMem.d.ready && matchingSources(i) + (dmemTLAdapters zip dmemTLBundles).zipWithIndex.foreach { + case ((tlAdapter, tlBundle), i) => + tlAdapter.io.outResp.bits := tlBundle.d.bits + tlAdapter.io.outResp.valid := tlBundle.d.valid && matchingSources(i) + tlBundle.d.ready := tlAdapter.io.outResp.ready && matchingSources(i) } - (core.io.dmem.get zip outer.dmemNodes).foreach { case (coreMem, tileNode) => - coreMem.a <> tileNode.out.head._1.a - } + // (core.io.dmem.get zip outer.dmemNodes).foreach { case (coreMem, tileNode) => + // tileNode.out.head._1.a <> coreMem.a + // } } // core.io.fpu := DontCare // TODO eliminate this redundancy // val h = dcachePorts.size - //val c = core.dcacheArbPorts + // val c = core.dcacheArbPorts // val o = outer.nDCachePorts // require(h == c, s"port list size was $h, core expected $c") // require(h == o, s"port list size was $h, outer counted $o") @@ -273,7 +381,61 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // dcacheArb.io.requestor <> dcachePorts.toSeq } +// Some @copypaste from CoalescerSourceGen. +class VortexTLAdapter( + newSourceWidth: Int, + inReqT: VortexBundleA, + inRespT: VortexBundleD, + outReqT: TLBundleA, + outRespT: TLBundleD +) extends Module { + val io = IO(new Bundle { + // in/out means upstream/downstream + val inReq = Flipped(Decoupled(inReqT)) + val outReq = Decoupled(outReqT) + val inResp = Decoupled(inRespT) + val outResp = Flipped(Decoupled(outRespT)) + }) + val sourceGen = Module(new SourceGenerator( + newSourceWidth, + Some(inReqT.source), + ignoreInUse = false + )) + sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created + sourceGen.io.reclaim.valid := io.outResp.fire + sourceGen.io.reclaim.bits := io.outResp.bits.source + sourceGen.io.meta := io.inReq.bits.source + + // io passthrough logic + // TLBundleA <> VortexBundleA + io.outReq.valid := io.inReq.valid + io.outReq.bits.opcode := io.inReq.bits.opcode + io.outReq.bits.param := 0.U + io.outReq.bits.size := io.inReq.bits.size + io.outReq.bits.source := io.inReq.bits.source + io.outReq.bits.address := io.inReq.bits.address + io.outReq.bits.mask := io.inReq.bits.mask + io.outReq.bits.data := io.inReq.bits.data + io.outReq.bits.corrupt := 0.U + io.inReq.ready := io.outReq.ready + // VortexBundleD <> TLBundleD + io.inResp.valid := io.outResp.valid + io.inResp.bits.opcode := io.outResp.bits.opcode + io.inResp.bits.size := io.outResp.bits.size + io.inResp.bits.source := io.outResp.bits.source + io.inResp.bits.data := io.outResp.bits.data + io.outResp.ready := io.inResp.ready + + // "man-in-the-middle" + io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid + io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid + io.outReq.bits.source := sourceGen.io.id.bits + // translate upstream response back to its old sourceId + io.inResp.bits.source := sourceGen.io.peek +} + // FIXME: unsure this is necessary trait HasFpuOpt { this: RocketTileModuleImp => - val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p))) + val fpuOpt = + outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p))) } diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 1b24c20..59c3882 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -254,41 +254,76 @@ case class CoalescedResponse(config: CoalescerConfig) dataWidth = (8 * (1 << config.maxCoalLogSize)) ) -// If `ignoreInUse`, just keep giving out new IDs without checking if it is in -// use. -class SourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) - extends Module { +// `metadata` is an extra field in the sourceId table that can be used for +// storing e.g. the UUID originally attached to a request. This is useful for +// using this module as a source ID converter / compressor. If `None`, this +// field is not instantiated. +// TODO: implement lookup logic. +// +// If `ignoreInUse`, just keep giving out new IDs without any collision checking. +// This might result in TL violation. +class SourceGenerator[T <: Data]( + sourceWidth: Int, + metadata: Option[T] = None, + ignoreInUse: Boolean = false +) extends Module { + def getMetadataType = metadata match { + case Some(gen) => gen.cloneType + case None => UInt(0.W) + } val io = IO(new Bundle { val gen = Input(Bool()) val reclaim = Input(Valid(UInt(sourceWidth.W))) val id = Output(Valid(UInt(sourceWidth.W))) + // below are used only when metadata is not None + // `meta` is used as input when a request succeeds id generation to store + // its value to the table. + // `peek` is the retrieved metadata saved for the request when corresponding + // request has come back, setting `reclaim`. + // Although these do not use ValidIO, it is safe because any in-flight + // response coming back should have allocated a valid entry in the table + // when it went out. + val meta = Input(getMetadataType) + val peek = Output(getMetadataType) + // for debugging; indicates whether there is at least one inflight request + // that hasn't been reclaimed yet val inflight = Output(Bool()) }) val head = RegInit(UInt(sourceWidth.W), 0.U) head := Mux(io.gen, head + 1.U, head) - // for debugging - // also for indicating if there is at least one inflight request that hasn't been reclaimed val outstanding = RegInit(UInt((sourceWidth + 1).W), 0.U) io.inflight := (outstanding > 0.U) || io.gen val numSourceId = 1 << sourceWidth - // true: in use, false: available - val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W))) - when(reset.asBool) { - (0 until numSourceId).foreach { occupancyTable(_).valid := false.B } + val row = new Bundle { + val meta = getMetadataType + val id = Valid(UInt(sourceWidth.W)) } - val frees = (0 until numSourceId).map(!occupancyTable(_).valid) + // valid: in use, invalid: available + // val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W))) + val occupancyTable = Mem(numSourceId, row) + when(reset.asBool) { + (0 until numSourceId).foreach { occupancyTable(_).id.valid := false.B } + } + val frees = (0 until numSourceId).map(!occupancyTable(_).id.valid) val lowestFree = PriorityEncoder(frees) val lowestFreeRow = occupancyTable(lowestFree) - io.id.valid := (if (ignoreInUse) true.B else !lowestFreeRow.valid) + io.id.valid := (if (ignoreInUse) true.B else !lowestFreeRow.id.valid) io.id.bits := lowestFree when(io.gen && io.id.valid /* fire */ ) { - occupancyTable(io.id.bits).valid := true.B // mark in use + occupancyTable(io.id.bits).id.valid := true.B // mark in use + if (metadata.isDefined) { + occupancyTable(io.id.bits).meta := io.meta + } } when(io.reclaim.valid) { - occupancyTable(io.reclaim.bits).valid := false.B // mark freed + // @perf: would this require multiple write ports? + occupancyTable(io.reclaim.bits).id.valid := false.B // mark freed + } + io.peek := { + if (metadata.isDefined) occupancyTable(io.reclaim.bits).meta else 0.U } when(io.gen && io.id.valid) { @@ -300,7 +335,6 @@ class SourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) assert(outstanding > 0.U) outstanding := outstanding - 1.U } - dontTouch(outstanding) } @@ -738,30 +772,41 @@ class MultiCoalescer( if (!config.enable) disable } +// This module mostly handles the correct ready/valid handshake depending on +// sourceId availability. Actual generation logic is done by the +// SourceGenerator module. class CoalescerSourceGen( config: CoalescerConfig, coalReqT: CoalescedRequest, respT: TLBundleD ) extends Module { val io = IO(new Bundle { + // in/out means upstream/downstream val inReq = Flipped(Decoupled(coalReqT.cloneType)) val outReq = Decoupled(coalReqT.cloneType) - val inResp = Flipped(Decoupled(respT.cloneType)) + // outResp is only needed for telling the downstream TL node that this + // sourcegen module is always ready to take in responses. + // No need for inResp, since coalescerNode is directly replied by the + // outResp TileLink bundle. + val outResp = Flipped(Decoupled(respT.cloneType)) }) val sourceGen = Module( new SourceGenerator(log2Ceil(config.numNewSrcIds), ignoreInUse = false) ) sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created - sourceGen.io.reclaim.valid := io.inResp.fire - sourceGen.io.reclaim.bits := io.inResp.bits.source - io.inResp.ready := true.B // should be always ready to reclaim old ID + sourceGen.io.reclaim.valid := io.outResp.fire + sourceGen.io.reclaim.bits := io.outResp.bits.source + sourceGen.io.meta := DontCare // TODO: make sourceGen.io.reclaim Decoupled? + // passthrough logic io.outReq <> io.inReq + // "man-in-the-middle" io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid // overwrite bits affected by sourcegen backpressure io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid io.outReq.bits.source := sourceGen.io.id.bits + io.outResp.ready := true.B // should be always ready to reclaim old ID } class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) @@ -880,7 +925,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // val coalSourceGen = Module(new CoalescerSourceGen(config, coalReqT, tlCoal.d.bits)) coalSourceGen.io.inReq <> coalescer.io.coalReq - coalSourceGen.io.inResp <> tlCoal.d + coalSourceGen.io.outResp <> tlCoal.d // InflightTable IO // @@ -1468,6 +1513,7 @@ class MemTraceDriverImp( // assert(sourceGen.io.id.valid) sourceGen.io.reclaim.valid := tlOut.d.fire sourceGen.io.reclaim.bits := tlOut.d.bits.source + sourceGen.io.meta := DontCare val (plegal, pbits) = edge.Put( fromSource = sourceGen.io.id.bits, @@ -2203,7 +2249,7 @@ class CoalescerXbarImpl(outer: CoalescerXbar, // For the uncoalesced data response (outer.nonCoalNarrowNodes zip io.nonCoalResps).foreach{ case(node,resp) => - val (tlOut, edgeOut) = node.out(0) + val (tlOut, _) = node.out(0) val nonCoalResp = Wire(respNonCoalEntryT) nonCoalResp.fromTLD(tlOut.d.bits) tlOut.d.ready := resp.ready @@ -2219,7 +2265,7 @@ class CoalescerXbarImpl(outer: CoalescerXbar, ) outer.coalReqNodes.zipWithIndex.foreach{ case(node, idx) => - val (tlOut, edgeOut) = node.out(0) + val (tlOut, _) = node.out(0) coalRespRRArbiter.io.in(idx) <> tlOut.d } //Connect output of arbiter to coalesced reponse output