// See LICENSE.SiFive for license details. // See LICENSE.Berkeley for license details. package radiance.tile import chisel3._ import chisel3.util._ import org.chipsalliance.cde.config._ import freechips.rocketchip.devices.tilelink._ import freechips.rocketchip.diplomacy._ import freechips.rocketchip.interrupts._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.rocket._ import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike import freechips.rocketchip.util._ import freechips.rocketchip.prci.ClockSinkParameters import freechips.rocketchip.regmapper.RegField import freechips.rocketchip.tile._ import radiance.memory._ import gemmini.{Gemmini, GemminiCustomConfigs} case class VortexTileParams( core: VortexCoreParams = VortexCoreParams(), useVxCache: Boolean = false, icache: Option[ICacheParams] = None /* Some(ICacheParams()) */, dcache: Option[DCacheParams] = None /* Some(DCacheParams()) */, btb: Option[BTBParams] = None, // Some(BTBParams()), dataScratchpadBytes: Int = 0, name: Option[String] = Some("vortex_tile"), tileId: Int = 0, beuAddr: Option[BigInt] = None, blockerCtrlAddr: Option[BigInt] = None, clockSinkParams: ClockSinkParameters = ClockSinkParameters(), boundaryBuffers: Option[RocketTileBoundaryBufferParams] = None ) extends InstantiableTileParams[VortexTile] { // TODO: want to use ICache/DCacheParams as well // require(icache.isDefined) // require(dcache.isDefined) def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)( implicit p: Parameters ): VortexTile = { new VortexTile(this, crossing, lookup) } val baseName = name.getOrElse("radiance_tile") val uniqueName = s"${baseName}_$tileId" } // TODO: move to VortexCore // VortexTileParams extends TileParams which require a `core: CoreParams` // field, so VortexCoreParams needs to extend from CoreParams as well, // requiring all these fields to be initialized. Most of this is unnecessary // though. TODO see how BOOM does that case class VortexCoreParams( bootFreqHz: BigInt = 0, useVM: Boolean = true, useUser: Boolean = false, useSupervisor: Boolean = false, useHypervisor: Boolean = false, useDebug: Boolean = true, useAtomics: Boolean = true, useAtomicsOnlyForIO: Boolean = false, useCompressed: Boolean = true, useRVE: Boolean = false, useConditionalZero: Boolean = false, nLocalInterrupts: Int = 0, useNMI: Boolean = false, nBreakpoints: Int = 1, useBPWatch: Boolean = false, mcontextWidth: Int = 0, scontextWidth: Int = 0, nPMPs: Int = 8, nPerfCounters: Int = 0, haveBasicCounters: Boolean = true, haveCFlush: Boolean = false, misaWritable: Boolean = true, nL2TLBEntries: Int = 0, nL2TLBWays: Int = 1, nPTECacheEntries: Int = 8, mtvecInit: Option[BigInt] = Some(BigInt(0)), mtvecWritable: Boolean = true, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, branchPredictionModeCSR: Boolean = false, clockGate: Boolean = false, mvendorid: Int = 0, // 0 means non-commercial implementation mimpid: Int = 0x20181004, // release date in BCD mulDiv: Option[MulDivParams] = Some(MulDivParams()), fpu: Option[FPUParams] = Some(FPUParams()), debugROB: Boolean = false, // if enabled, uses a C++ debug ROB to generate trace-with-wdata haveCease: Boolean = true, // non-standard CEASE instruction haveSimTimeout: Boolean = true // add plusarg for simulation timeout ) extends CoreParams { val haveFSDirty = false val pmpGranularity: Int = if (useHypervisor) 4096 else 4 val fetchWidth: Int = if (useCompressed) 2 else 1 val decodeWidth: Int = fetchWidth / (if (useCompressed) 2 else 1) val retireWidth: Int = 1 val instBits: Int = if (useCompressed) 16 else 32 val lrscCycles: Int = 80 // worst case is 14 mispredicted branches + slop val traceHasWdata: Boolean = false // ooo wb, so no wdata in trace } class VortexTile private ( val vortexParams: VortexTileParams, crossing: ClockCrossingType, lookup: LookupByHartIdImpl, q: Parameters ) extends BaseTile(vortexParams, crossing, lookup, q) with SinksExternalInterrupts with SourcesExternalNotifications { // Private constructor ensures altered LazyModule.p is used implicitly def this( params: VortexTileParams, crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl )(implicit p: Parameters) = this(params, crossing.crossingType, lookup, p) val intOutwardNode = None val slaveNode = TLIdentityNode() val masterNode = visibilityNode // Memory-mapped region for HTIF communication // We use fixed addresses instead of tohost/fromhost val regDevice = new SimpleDevice("vortex-reg", Seq(s"vortex-reg${tileParams.tileId}")) val regNode = TLRegisterNode( address = Seq(AddressSet(0x7c000000 + 0x1000 * tileParams.tileId, 0xfff)), device = regDevice, beatBytes = 4, concurrency = 1 ) regNode := TLFragmenter(4, 64) := tlSlaveXbar.node require( p(SIMTCoreKey).isDefined, "SIMTCoreKey not defined; make sure to use WithSimtLanes when using VortexTile" ) val numLanes = p(SIMTCoreKey) match { case Some(simtParam) => simtParam.nLanes case None => 4 } // CAUTION: imemSourceWidth is dependent on the ibuffer size. We have to // make sure (1 << imemSourceWidth) is smaller than the per-warp ibuffer // size; otherwise, more requests than what ibuffer can accommodate can fire, // and responses might stall in the downstream. This migth cause issues when // there are also outstanding dmem responses that might get blocked from // going back to the core by a previous imem response due to serialization at // the narrow tile<->sbus port, leading to a deadlock. // // This condition should ideally be asserted at elaboration time, but since // ibuffer size is set as a hardcoded macro IBUF_SIZE that's uncontrollable // from Chisel, there's no easy solution. We at least don't expose this as a // Parameter and leave as a hardcoded value here. val imemSourceWidth = 4 // 1 << imemSourceWidth == IBUF_SIZE val dmemSourceWidth = p(SIMTCoreKey) match { // TODO: respect coalescer newSrcIds case Some(simtParam) => log2Ceil(simtParam.nSrcIds) case None => 4 } // require( // dmemSourceWidth >= 4, // "Setting a small number of sourceIds may cause correctness bug inside " + // "Vortex core due to synchronization issues in vx_wspawn. " + // "We recommend setting nSrcIds to at least 16." // ) val smemSourceWidth = 4 // FIXME: hardcoded val numWarps = 4 // TODO: parametrize val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) val UUID_WIDTH = 44 val imemTagWidth = UUID_WIDTH + NW_WIDTH val numLsuLanes = 4 // see VX_gpu_pkg.sv val LSUQ_SIZE = 8 * (numLanes / numLsuLanes) val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/ val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH val smemTagWidth = dmemTagWidth val imemNodes = Seq.tabulate(1) { i => TLClientNode( Seq( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( sourceId = IdRange(0, 1 << imemSourceWidth), name = s"Vortex Core ${vortexParams.tileId} I-Mem $i", requestFifo = true, supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes) ) ) ) ) ) } val dmemNodes = Seq.tabulate(numLsuLanes) { i => TLClientNode( Seq( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( sourceId = IdRange(0, 1 << dmemSourceWidth), name = s"Vortex Core ${vortexParams.tileId} D-Mem Lane $i", requestFifo = true, supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsPutFull = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsPutPartial = TransferSizes(1, lazyCoreParamsView.coreDataBytes) ) ) ) ) ) } val smemNodes = Seq.tabulate(numLsuLanes) { i => TLClientNode( Seq( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( sourceId = IdRange(0, 1 << smemSourceWidth), name = s"Vortex Core ${vortexParams.tileId} SharedMem Lane $i", requestFifo = true, supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsPutFull = TransferSizes(1, lazyCoreParamsView.coreDataBytes), supportsPutPartial = TransferSizes(1, lazyCoreParamsView.coreDataBytes) ) ) ) ) ) } // combine outgoing per-lane dmemNode into 1 idenity node // // NOTE: We need TLWidthWidget here because there might be a data width // mismatch between Vortex's per-lane response and the system bus when we // don't instantiate either L1 or the coalescer. This _should_ be optimized // out when we instantiate either which should handle data width conversion // internally (which it does by... using TLWidthWidget). val dmemAggregateNode = TLIdentityNode() dmemNodes.foreach { dmemAggregateNode := TLWidthWidget(4) := _ } val memNode = TLClientNode( Seq( TLMasterPortParameters.v1( clients = Seq( TLMasterParameters.v1( // FIXME: need to also respect imemSourceWidth sourceId = IdRange(0, 1 << dmemSourceWidth), name = s"Vortex Core ${vortexParams.tileId} Mem Interface", requestFifo = true, supportsProbe = TransferSizes(16, 16), // FIXME: hardcoded supportsGet = TransferSizes(16, 16), supportsPutFull = TransferSizes(16, 16), supportsPutPartial = TransferSizes(16, 16) ) ) ) ) ) // Conditionally instantiate memory coalescer val coalescerNode = p(CoalescerKey) match { case Some(coalParam) => { val coal = LazyModule( new CoalescingUnit(coalParam) ) coal.cpuNode :=* dmemAggregateNode coal.aggregateNode // N+1 lanes } case None => dmemAggregateNode } // Conditionally instantiate L1 cache val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match { case Some(vortexL1Config) => { println( s"============ Using Vortex L1 cache =================" ) // require( // p(CoalescerKey).isDefined, // "Vortex L1 configuration currently only works when coalescer is also enabled." // ) val icache = LazyModule(new VortexL1Cache(vortexL1Config)) val dcache = LazyModule(new VortexL1Cache(vortexL1Config)) // imemNodes.foreach { icache.coresideNode := TLWidthWidget(4) := _ } assert(imemNodes.length == 1) // FIXME icache.coresideNode := TLWidthWidget(4) := imemNodes(0) // dmemNodes go through coalescerNode dcache.coresideNode :=* coalescerNode (icache.masterNode, dcache.masterNode) } case None => { val imemWideNode = TLIdentityNode() assert(imemNodes.length == 1) // FIXME imemWideNode := TLWidthWidget(4) := imemNodes(0) (imemWideNode, coalescerNode) } } // Instantiate sharedmem banks // // Instantiate the same number of banks as there are lanes. // TODO: parametrize val smemBanks = Seq.tabulate(numLsuLanes) { bankId => // Banked-by-word (4 bytes) // base for bank 1: ff...000000|01|00 // mask for bank 1; 00...111111|00|11 val base = 0xff000000L | (bankId * 4 /*wordSize*/ ) val mask = 0x00001fffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ ) LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ )) } // smem lanes-to-banks crossbar val smemXbar = LazyModule(new TLXbar) smemNodes.foreach(smemXbar.node := _) smemBanks.foreach(_.node := smemXbar.node) if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode } else { // imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ } tlMasterXbar.node :=* icacheNode tlMasterXbar.node :=* dcacheNode } // ROCC // TODO: parametrize val gemmini = LazyModule(new Gemmini(GemminiCustomConfigs.unifiedMemConfig)) val roccs: Seq[LazyRoCC] = Seq(gemmini) tlMasterXbar.node :=* gemmini.atlNode tlOtherMastersNode :=* gemmini.tlNode gemmini.stlNode :=* TLWidthWidget(4) :=* smemXbar.node /* below are copied from rocket */ // val bus_error_unit = vortexParams.beuAddr map { a => // val beu = // LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) // intOutwardNode := beu.intNode // connectTLSlave(beu.node, xBytes) // beu // } val tile_master_blocker = tileParams.blockerCtrlAddr .map( BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true) ) .map(bp => LazyModule(new BasicBusBlocker(bp))) tile_master_blocker.foreach(lm => connectTLSlave(lm.controlNode, xBytes)) // TODO: this doesn't block other masters, e.g. RoCCs tlOtherMastersNode := tile_master_blocker.map { _.node := tlMasterXbar.node } getOrElse { tlMasterXbar.node } masterNode :=* tlOtherMastersNode DisableMonitors { implicit p => tlSlaveXbar.node :*= slaveNode } val dtimProperty = Nil // Seq(dmemDevice.asProperty).flatMap(p => Map("sifive,dtim" -> p)) val itimProperty = Nil // frontend.icache.itimProperty.toSeq.flatMap(p => Map("sifive,itim" -> p)) // missing bus_error_unit val cpuDevice: SimpleDevice = new SimpleDevice( "cpu", Seq(s"sifive,vortex${tileParams.tileId}", "riscv") ) { override def parent = Some(ResourceAnchors.cpus) override def describe(resources: ResourceBindings): Description = { val Description(name, mapping) = super.describe(resources) Description( name, mapping ++ cpuProperties ++ nextLevelCacheProperty ++ tileProperties ++ dtimProperty ++ itimProperty /*++ beuProperty*/ ) } } ResourceBinding { Resource(cpuDevice, "reg").bind(ResourceAddress(tileId)) } override lazy val module = new VortexTileModuleImp(this) override def makeMasterBoundaryBuffers( crossing: ClockCrossingType )(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { case (Some(RocketTileBoundaryBufferParams(true)), _) => TLBuffer() case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer( BufferParams.none, BufferParams.flow, BufferParams.none, BufferParams.flow, BufferParams(1) ) case _ => TLBuffer(BufferParams.none) } override def makeSlaveBoundaryBuffers( crossing: ClockCrossingType )(implicit p: Parameters) = (vortexParams.boundaryBuffers, crossing) match { case (Some(RocketTileBoundaryBufferParams(true)), _) => TLBuffer() case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer( BufferParams.flow, BufferParams.none, BufferParams.none, BufferParams.none, BufferParams.none ) case _ => TLBuffer(BufferParams.none) } } class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { Annotated.params(this, outer.vortexParams) val core = Module(new Vortex(outer)(outer.p)) core.io.clock := clock core.io.reset := reset // begin @copypaste from RocketTile ------------------------------------------ // reset vector is connected in the Frontend to s2_pc core.io.reset_vector := DontCare outer.regNode.regmap( 0x00 -> Seq(RegField.r(32, core.io.finished)) ) // Report when the tile has ceased to retire instructions outer.reportCease(Some(core.io.finished)) outer.reportWFI(Some(core.io.wfi)) outer.decodeCoreInterrupts(core.io.interrupts) // Decode the interrupt vector // outer.bus_error_unit.foreach { beu => // core.io.interrupts.buserror.get := beu.module.io.interrupt // } core.io.interrupts.nmi.foreach { nmi => nmi := outer.nmiSinkNode.get.bundle } // Pass through various external constants and reports that were bundle-bridged into the tile // outer.traceSourceNode.bundle <> core.io.trace core.io.traceStall := outer.traceAuxSinkNode.bundle.stall // outer.bpwatchSourceNode.bundle <> core.io.bpwatch // not necessary for Vortex as hartId is set via Verilog parameter // core.io.hartid := outer.hartIdSinkNode.bundle // require(core.io.hartid.getWidth >= outer.hartIdSinkNode.bundle.getWidth, // s"core hartid wire (${core.io.hartid.getWidth}b) truncates external hartid wire (${outer.hartIdSinkNode.bundle.getWidth}b)") // end @copypaste from RocketTile -------------------------------------------- // --------------------------------------------- // Translate Vortex memory interface to TileLink // --------------------------------------------- if (outer.vortexParams.useVxCache) { println(s"width of a channel data ${core.io.mem.get.a.bits.data.getWidth}") println(s"width of d channel data ${core.io.mem.get.d.bits.data.getWidth}") val memTLAdapter = Module( new VortexTLAdapter( outer.dmemSourceWidth, chiselTypeOf(core.io.mem.get.a.bits), chiselTypeOf(core.io.mem.get.d.bits), outer.memNode.out.head ) ) // connection: VortexBundle <--> VortexTLAdapter <--> TL memNode memTLAdapter.io.inReq <> core.io.mem.get.a core.io.mem.get.d <> memTLAdapter.io.inResp outer.memNode.out(0)._1.a <> memTLAdapter.io.outReq memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d } else { def connectImem = { val imemTLAdapter = Module( new VortexTLAdapter( outer.imemSourceWidth, chiselTypeOf(core.io.imem.get(0).a.bits), chiselTypeOf(core.io.imem.get(0).d.bits), outer.imemNodes.head.out.head ) ) // TODO: make imemNodes not a vector imemTLAdapter.io.inReq <> core.io.imem.get(0).a core.io.imem.get(0).d <> imemTLAdapter.io.inResp outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d } def connectDmem = { // @perf: this would duplicate SourceGenerator table for every lane and eat // up some area val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) val dmemTLAdapters = Seq.tabulate(outer.numLsuLanes) { _ => Module( new VortexTLAdapter( outer.dmemSourceWidth, new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32), new VortexBundleD(tagWidth = outer.dmemTagWidth, dataWidth = 32), outer.dmemNodes(0).out.head ) ) } // Since the individual per-lane TL requests might come back out-of-sync between // the lanes, but Vortex core expects the per-lane responses to be synced, // we need to selectively fire responses that have the same source, and // delay others. // // In order to do that, we pick a source from one of the valid lanes using e.g. // an arbiter. Then using the chosen source id, we // - lie to core that response is not valid if source doesn't match picked, and // - lie to downstream that core is not ready if source doesn't match picked. // // Note that we cannot do this filtering logic using TileLink source ID, because // we're allocating source for each lane independently. In that case, it's // possible that lane 0's source matches lane 1/2/3's source by chance, // even when they originated from different warps. Using Vortex's dcache req tag // solves this issue because they use a UUID that is unique across all requests // in the program. // // TODO: A cleaner solution would be to simply do a synchronized allocation // of a same source id for all lanes. val arb = Module( new RRArbiter( // FIXME: should really be source on D channel new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType, outer.numLsuLanes ) ) arb.io.out.ready := true.B val dmemBundles = dmemTLAdapters.map(_.io.inResp) (arb.io.in zip dmemBundles).foreach { case (arbIn, vxDmem) => arbIn.valid := vxDmem.valid arbIn.bits := vxDmem.bits.source } val matchingSources = Wire(UInt(outer.numLsuLanes.W)) matchingSources := dmemBundles .map(b => // If there is no valid response pending across all lanes, // matchingSources should not filter out upstream ready signals, so // set it to all-1 !arb.io.out.valid || (b.bits.source === arb.io.out.bits) ) .asUInt // make connection: // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes // // Chisel doesn't support 2-D array in BlackBox interface to Verilog, so // need to flatten everything. dmemTLAdapters.zipWithIndex.foreach { case (tlAdapter, i) => // tlAdapter.io.inReq <> coreMem.a tlAdapter.io.inReq.valid := core.io.dmem_a_valid(i) tlAdapter.io.inReq.bits.opcode := core.io.dmem_a_bits_opcode(3 * (i + 1) - 1, 3 * i) tlAdapter.io.inReq.bits.size := core.io.dmem_a_bits_size(4 * (i + 1) - 1, 4 * i) tlAdapter.io.inReq.bits.source := core.io.dmem_a_bits_source(outer.dmemTagWidth * (i + 1) - 1, outer.dmemTagWidth * i) tlAdapter.io.inReq.bits.address := core.io.dmem_a_bits_address(32 * (i + 1) - 1, 32 * i) tlAdapter.io.inReq.bits.mask := core.io.dmem_a_bits_mask(4 * (i + 1) - 1, 4 * i) tlAdapter.io.inReq.bits.data := core.io.dmem_a_bits_data(32 * (i + 1) - 1, 32 * i) } core.io.dmem_a_ready := dmemTLAdapters.map(_.io.inReq.ready).asUInt core.io.dmem_d_valid := dmemTLAdapters.map(_.io.inResp.valid).asUInt core.io.dmem_d_bits_opcode := dmemTLAdapters.map(_.io.inResp.bits.opcode).asUInt core.io.dmem_d_bits_size := dmemTLAdapters.map(_.io.inResp.bits.size).asUInt core.io.dmem_d_bits_source := dmemTLAdapters.map(_.io.inResp.bits.source).asUInt core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt // override response channel with matchingSources val dmem_d_valid_vec = Wire(Vec(outer.numLsuLanes, Bool())) dmemTLAdapters.zipWithIndex.foreach { case (tlAdapter, i) => dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i) tlAdapter.io.inResp.ready := core.io.dmem_d_ready(i) && matchingSources(i) } core.io.dmem_d_valid := dmem_d_valid_vec.asUInt (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq tlAdapter.io.outResp <> tlOut.d } outer.dmemAggregateNode.out.foreach { bo => dontTouch(bo._1.a) dontTouch(bo._1.d) } } def connectSmem = { // @perf: this would duplicate SourceGenerator table for every lane and eat // up some area val smemTLBundles = outer.smemNodes.map(_.out.head._1) val smemTLAdapters = Seq.tabulate(outer.numLsuLanes) { _ => Module( new VortexTLAdapter( outer.smemSourceWidth, new VortexBundleA(tagWidth = outer.smemTagWidth, dataWidth = 32), new VortexBundleD(tagWidth = outer.smemTagWidth, dataWidth = 32), outer.smemNodes(0).out.head ) ) } smemTLAdapters.zipWithIndex.foreach { case (tlAdapter, i) => // tlAdapter.io.inReq <> coreMem.a tlAdapter.io.inReq.valid := core.io.smem_a_valid(i) tlAdapter.io.inReq.bits.opcode := core.io.smem_a_bits_opcode(3 * (i + 1) - 1, 3 * i) tlAdapter.io.inReq.bits.size := core.io.smem_a_bits_size(4 * (i + 1) - 1, 4 * i) tlAdapter.io.inReq.bits.source := core.io.smem_a_bits_source(outer.smemTagWidth * (i + 1) - 1, outer.smemTagWidth * i) tlAdapter.io.inReq.bits.address := core.io.smem_a_bits_address(32 * (i + 1) - 1, 32 * i) tlAdapter.io.inReq.bits.mask := core.io.smem_a_bits_mask(4 * (i + 1) - 1, 4 * i) tlAdapter.io.inReq.bits.data := core.io.smem_a_bits_data(32 * (i + 1) - 1, 32 * i) } core.io.smem_a_ready := smemTLAdapters.map(_.io.inReq.ready).asUInt core.io.smem_d_valid := smemTLAdapters.map(_.io.inResp.valid).asUInt core.io.smem_d_bits_opcode := smemTLAdapters.map(_.io.inResp.bits.opcode).asUInt core.io.smem_d_bits_size := smemTLAdapters.map(_.io.inResp.bits.size).asUInt core.io.smem_d_bits_source := smemTLAdapters.map(_.io.inResp.bits.source).asUInt core.io.smem_d_bits_data := smemTLAdapters.map(_.io.inResp.bits.data).asUInt smemTLAdapters.zipWithIndex.foreach { case (tlAdapter, i) => tlAdapter.io.inResp.ready := core.io.smem_d_ready(i) } (smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq tlAdapter.io.outResp <> tlOut.d } } connectImem connectDmem connectSmem } // TODO: generalize for useVxCache if (!outer.vortexParams.useVxCache) {} // RoCC if (outer.roccs.size > 0) { val (respArb, cmdRouter) = { val respArb = Module(new RRArbiter(new RoCCResponse()(outer.p), outer.roccs.size)) val cmdRouter = Module(new RoccCommandRouter(outer.roccs.map(_.opcodes))(outer.p)) outer.roccs.zipWithIndex.foreach { case (rocc, i) => // ptwPorts ++= rocc.module.io.ptw rocc.module.io.ptw <> DontCare rocc.module.io.mem <> DontCare rocc.module.io.cmd <> cmdRouter.io.out(i) respArb.io.in(i) <> Queue(rocc.module.io.resp) } // Create this FPU just for RoCC // val nFPUPorts = outer.roccs.filter(_.usesFPU).size val fp_rocc_ios = outer.roccs.map(_.module.io) fp_rocc_ios.map{ io => io.fpu_req.ready := false.B io.fpu_resp.valid := false.B io.fpu_resp.bits := DontCare } (respArb, cmdRouter) } cmdRouter.io.in <> DontCare outer.roccs.foreach(_.module.io.exception := DontCare) respArb.io.out <> DontCare } } // Some @copypaste from CoalescerSourceGen. class VortexTLAdapter( newSourceWidth: Int, inReqT: VortexBundleA, inRespT: VortexBundleD, outTL: (TLBundle, TLEdge) ) extends Module { val io = IO(new Bundle { // in/out means upstream/downstream val inReq = Flipped(Decoupled(inReqT)) val outReq = chiselTypeOf(outTL._1.a) val inResp = Decoupled(inRespT) val outResp = chiselTypeOf(outTL._1.d) }) val (bundle, edge) = outTL val sourceGen = Module( new SourceGenerator( newSourceWidth, Some(inReqT.source), ignoreInUse = false ) ) sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created sourceGen.io.reclaim.valid := io.outResp.fire sourceGen.io.reclaim.bits := io.outResp.bits.source sourceGen.io.meta := io.inReq.bits.source // io passthrough logic // TLBundleA <> VortexBundleA io.outReq.valid := io.inReq.valid io.outReq.bits.opcode := io.inReq.bits.opcode io.outReq.bits.param := 0.U io.outReq.bits.size := io.inReq.bits.size io.outReq.bits.source := io.inReq.bits.source io.outReq.bits.address := io.inReq.bits.address // Get requires contiguous mask; only copy core's potentially-partial mask // when writing io.outReq.bits.mask := Mux( edge.hasData(io.outReq.bits), io.inReq.bits.mask, // generate TL-correct mask edge.mask(io.inReq.bits.address, io.inReq.bits.size) ) io.outReq.bits.data := io.inReq.bits.data io.outReq.bits.corrupt := 0.U io.inReq.ready := io.outReq.ready // VortexBundleD <> TLBundleD // Filtering out write requests is handled inside the wrapper Verilog io.inResp.valid := io.outResp.valid io.inResp.bits.opcode := io.outResp.bits.opcode io.inResp.bits.size := io.outResp.bits.size io.inResp.bits.source := io.outResp.bits.source io.inResp.bits.data := io.outResp.bits.data io.outResp.ready := io.inResp.ready // "man-in-the-middle" io.inReq.ready := io.outReq.ready && sourceGen.io.id.valid io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid io.outReq.bits.source := sourceGen.io.id.bits // translate upstream response back to its old sourceId io.inResp.bits.source := sourceGen.io.peek }