diff --git a/radiance.mk b/radiance.mk index d5a5552..5eddf00 100644 --- a/radiance.mk +++ b/radiance.mk @@ -14,7 +14,6 @@ EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradp EXTRA_SIM_PREPROC_DEFINES += \ +define+SIMULATION \ +define+GPR_RESET \ - +define+GPR_DUPLICATED \ +define+LSU_DUP_DISABLE \ +define+DBG_TRACE_CORE_PIPELINE_VCS \ +define+PERF_ENABLE \ diff --git a/src/main/scala/radiance/memory/VortexCache.scala b/src/main/scala/radiance/memory/VortexCache.scala index 61a841c..0c95eb4 100644 --- a/src/main/scala/radiance/memory/VortexCache.scala +++ b/src/main/scala/radiance/memory/VortexCache.scala @@ -10,18 +10,18 @@ import org.chipsalliance.cde.config.{Parameters, Field} case object VortexL1Key extends Field[Option[VortexL1Config]](None /*default*/ ) case class VortexL1Config( - cacheSize: Int, // total cache size in bytes - numBanks: Int, - wordSize: Int, // This is the read/write granularity of the L1 cache - cacheLineSize: Int, - coreTagWidth: Int, - writeInfoReqQSize: Int, - mshrSize: Int, - memSideSourceIds: Int, - uncachedAddrSets: Seq[AddressSet] + cacheSize: Int, // total cache size in bytes + numBanks: Int, + inputSize: Int, // This is the read/write granularity of the L1 cache + cacheLineSize: Int, + coreTagWidth: Int, + writeInfoReqQSize: Int, + mshrSize: Int, + memSideSourceIds: Int, + uncachedAddrSets: Seq[AddressSet] ) { def coreTagPlusSizeWidth: Int = { - log2Ceil(wordSize) + coreTagWidth + log2Ceil(inputSize) + coreTagWidth } // NOTE: This assertion depends on the fact that the Vortex cache is // configured to have 1 bank, and that it uses MSHR id as the tag of @@ -37,7 +37,7 @@ object defaultVortexL1Config extends VortexL1Config( cacheSize = 16384, numBanks = 4, - wordSize = 16, + inputSize = 16, cacheLineSize = 16, coreTagWidth = 8, writeInfoReqQSize = 16, @@ -80,15 +80,15 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) // Slave node to upstream val managerParam = Seq( TLSlavePortParameters.v1( - beatBytes = config.wordSize, + beatBytes = config.inputSize, managers = Seq( TLSlaveParameters.v1( address = config.uncachedAddrSets, regionType = RegionType.IDEMPOTENT, executable = false, - supportsGet = TransferSizes(1, config.wordSize), - supportsPutPartial = TransferSizes(1, config.wordSize), - supportsPutFull = TransferSizes(1, config.wordSize), + supportsGet = TransferSizes(1, config.inputSize), + supportsPutPartial = TransferSizes(1, config.inputSize), + supportsPutFull = TransferSizes(1, config.inputSize), fifoId = Some(0) ) ) @@ -107,10 +107,10 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) config.memSideSourceIds ) + 5 /*FIXME: give more sourceId so that passthrough doesn't block; hacky*/ ) ), - supportsProbe = TransferSizes(1, config.wordSize), - supportsGet = TransferSizes(1, config.wordSize), - supportsPutFull = TransferSizes(1, config.wordSize), - supportsPutPartial = TransferSizes(1, config.wordSize) + supportsProbe = TransferSizes(1, config.cacheLineSize), + supportsGet = TransferSizes(1, config.cacheLineSize), + supportsPutFull = TransferSizes(1, config.cacheLineSize), + supportsPutPartial = TransferSizes(1, config.cacheLineSize) ) ) ) @@ -141,8 +141,8 @@ class VortexBank( // suppose have 4 bank // base for bank 1: ...000000|01|0000 // mask for bank 1; 111111|00|1111 - val base = 0x00000000L | (bankId * config.wordSize) - val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize) + val base = 0x00000000L | (bankId * config.inputSize) + val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.inputSize) val excludeSets = config.uncachedAddrSets var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask)) @@ -155,15 +155,15 @@ class VortexBank( // Slave node to upstream val managerParam = Seq( TLSlavePortParameters.v1( - beatBytes = config.wordSize, + beatBytes = config.inputSize, managers = Seq( TLSlaveParameters.v1( address = generateAddressSets(), regionType = RegionType.IDEMPOTENT, // idk what this does executable = false, - supportsGet = TransferSizes(1, config.wordSize), - supportsPutPartial = TransferSizes(1, config.wordSize), - supportsPutFull = TransferSizes(1, config.wordSize), + supportsGet = TransferSizes(1, config.inputSize), + supportsPutPartial = TransferSizes(1, config.inputSize), + supportsPutFull = TransferSizes(1, config.inputSize), fifoId = Some(0) ) ) @@ -177,10 +177,10 @@ class VortexBank( TLMasterParameters.v1( name = s"VortexBank${bankId}", sourceId = IdRange(0, config.memSideSourceIds), - supportsProbe = TransferSizes(1, config.wordSize), - supportsGet = TransferSizes(1, config.wordSize), - supportsPutFull = TransferSizes(1, config.wordSize), - supportsPutPartial = TransferSizes(1, config.wordSize) + supportsProbe = TransferSizes(1, config.inputSize), + supportsGet = TransferSizes(1, config.inputSize), + supportsPutFull = TransferSizes(1, config.inputSize), + supportsPutPartial = TransferSizes(1, config.inputSize) ) ) ) @@ -204,7 +204,7 @@ class VortexBankImp( ) extends LazyModuleImp(outer) { val vxCache = Module( new VX_cache_top( - WORD_SIZE = config.wordSize, + WORD_SIZE = config.inputSize, // distribute total size across numBanks CACHE_SIZE = config.cacheSize / config.numBanks, CACHE_LINE_SIZE = config.cacheLineSize, @@ -236,7 +236,7 @@ class VortexBankImp( } class ReadReqInfo(config: VortexL1Config) extends Bundle { - val size = UInt(log2Ceil(config.wordSize).W) + val size = UInt(log2Ceil(config.inputSize + 1).W) val id = UInt(config.coreTagWidth.W) } @@ -264,7 +264,7 @@ class VortexBankImp( // 4 is also hardcoded, it should be log2WordSize vxCache.io.core_req_addr := tlInFromCoal.a.bits.address( 31, - log2Ceil(config.wordSize) + log2Ceil(config.inputSize) ) vxCache.io.core_req_byteen := tlInFromCoal.a.bits.mask vxCache.io.core_req_data := tlInFromCoal.a.bits.data @@ -362,17 +362,17 @@ class VortexBankImp( TLMessages.Get ) - tlOutToL2.a.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(4.W)) + tlOutToL2.a.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(log2Ceil(config.cacheLineSize).W)) tlOutToL2.a.bits.mask := Mux( vxCache.io.mem_req_rw, vxCache.io.mem_req_byteen, - 0xffff.U + ~(0.U(config.cacheLineSize.W)) ) tlOutToL2.a.bits.data := vxCache.io.mem_req_data tlOutToL2.a.bits.source := sourceGen.io.id.bits // ignore param, size, corrupt fields tlOutToL2.a.bits.param := 0.U - tlOutToL2.a.bits.size := 4.U // FIXME: hardcoded + tlOutToL2.a.bits.size := log2Ceil(config.cacheLineSize).U tlOutToL2.a.bits.corrupt := false.B // downstream L2 -> vxCache response tlOutToL2.d.ready := vxCache.io.mem_rsp_ready diff --git a/src/main/scala/radiance/subsystem/Configs.scala b/src/main/scala/radiance/subsystem/Configs.scala index a8acd25..1a576cb 100644 --- a/src/main/scala/radiance/subsystem/Configs.scala +++ b/src/main/scala/radiance/subsystem/Configs.scala @@ -126,7 +126,7 @@ class WithFuzzerCores( class WithRadianceCluster( clusterId: Int, location: HierarchicalLocation = InSubsystem, - crossing: RocketCrossingParams = RocketCrossingParams() // TODO make this not rocket + crossing: RocketCrossingParams = RocketCrossingParams() ) extends Config((site, here, up) => { case ClustersLocated(`location`) => up(ClustersLocated(location)) :+ RadianceClusterAttachParams( RadianceClusterParams(clusterId = clusterId), @@ -174,7 +174,17 @@ class WithPriorityCoalXbar extends Config((site, _, up) => { class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => { case VortexL1Key => { - Some(defaultVortexL1Config.copy(numBanks = nBanks)) + Some(defaultVortexL1Config.copy( + numBanks = nBanks, + inputSize = up(SIMTCoreKey).get.nMemLanes * 4, + cacheLineSize = up(SIMTCoreKey).get.nMemLanes * 4, + memSideSourceIds = 64, + mshrSize = 64, + coreTagWidth = log2Ceil(up(SIMTCoreKey).get.nSrcIds.max(up(CoalescerKey) match { + case Some(key) => key.numNewSrcIds + case None => 0 + })) + log2Ceil(up(SIMTCoreKey).get.nMemLanes) + 1 + )) } }) @@ -197,8 +207,7 @@ class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config // If instantiating L1 cache, the maximum coalescing size should match the // cache line size val maxCoalSizeInBytes = up(VortexL1Key, site) match { - case Some(param) => - (param.wordSize) + case Some(param) => param.inputSize case None => sbusWidthInBytes } diff --git a/src/main/scala/radiance/tile/GemminiTile.scala b/src/main/scala/radiance/tile/GemminiTile.scala index ccb7515..897a314 100644 --- a/src/main/scala/radiance/tile/GemminiTile.scala +++ b/src/main/scala/radiance/tile/GemminiTile.scala @@ -140,6 +140,9 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer) tieOffGemminiRocc + outer.traceSourceNode.bundle := DontCare + outer.traceSourceNode.bundle.insns foreach (_.valid := false.B) + // hacky, but cluster will AND the cease signals from all tiles, and we want // the core tiles to determine cluster cease not Gemmini outer.reportCease(Some(true.B)) diff --git a/src/main/scala/radiance/tile/RadianceCluster.scala b/src/main/scala/radiance/tile/RadianceCluster.scala index e5d1e10..cb50a32 100644 --- a/src/main/scala/radiance/tile/RadianceCluster.scala +++ b/src/main/scala/radiance/tile/RadianceCluster.scala @@ -8,6 +8,7 @@ import chisel3.util._ import freechips.rocketchip.diplomacy._ import freechips.rocketchip.prci.ClockSinkParameters import freechips.rocketchip.subsystem._ +import freechips.rocketchip.tile.TraceBundle import freechips.rocketchip.tilelink._ import gemmini._ import org.chipsalliance.cde.config.Parameters @@ -91,7 +92,7 @@ class RadianceCluster ( callback(p) } } - def connect_one[T <: BaseNode with TLNode](from: TLNode, to: () => T): T = { + def connect_one[T <: TLNode](from: TLNode, to: () => T): T = { val t = to() guard_monitors { implicit p => t := from } t @@ -183,13 +184,18 @@ class RadianceCluster ( val spad_read_nodes = Seq.fill(smem_banks) { val r_dist = DistributorNode(from = smem_width, to = wordSize) - guard_monitors { implicit p => r_dist := gemmini.spad_read_nodes } + guard_monitors { implicit p => r_dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := gemmini.spad_read_nodes } Seq.fill(smem_subbanks) { connect_one(r_dist, TLIdentityNode.apply) } } val spad_write_nodes = Seq.fill(smem_banks) { val w_dist = DistributorNode(from = smem_width, to = wordSize) - guard_monitors { implicit p => w_dist := gemmini.spad_write_nodes } + guard_monitors { implicit p => w_dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := gemmini.spad_write_nodes } Seq.fill(smem_subbanks) { connect_one(w_dist, TLIdentityNode.apply) } + /* Seq.fill(smem_subbanks) { + val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0)) + buf := w_dist + buf + } */ } val ws_dist = DistributorNode(from = smem_width, to = wordSize) guard_monitors { implicit p => ws_dist := gemmini.spad.spad_writer.node } // this is the dma write node diff --git a/src/main/scala/radiance/tile/RadianceTile.scala b/src/main/scala/radiance/tile/RadianceTile.scala index ea377cb..2cfcc5c 100644 --- a/src/main/scala/radiance/tile/RadianceTile.scala +++ b/src/main/scala/radiance/tile/RadianceTile.scala @@ -165,10 +165,14 @@ class RadianceTile private ( // to a stall in the backend pipeline and resulting in a deadlock. val imemSourceWidth = 4 // 1 << imemSourceWidth == IBUF_SIZE - val dmemSourceWidth = p(SIMTCoreKey) match { - // TODO: respect coalescer newSrcIds + val smemSourceWidth = p(SIMTCoreKey) match { case Some(simtParam) => log2Ceil(simtParam.nSrcIds) - case None => 4 + case None => 4 + } + + val dmemSourceWidth = p(CoalescerKey) match { + case Some(coalParam) => log2Ceil(coalParam.numOldSrcIds) + case None => smemSourceWidth } // require( // dmemSourceWidth >= 4, @@ -177,8 +181,6 @@ class RadianceTile private ( // "We recommend setting nSrcIds to at least 16." // ) - val smemSourceWidth = 4 // FIXME: hardcoded - // Replicates some of the logic of how Vortex determines the tag width of // memory requests so that Chisel and Verilog are in agreement on bitwidths. // See VX_gpu_pkg.sv @@ -190,7 +192,8 @@ class RadianceTile private ( } val imemTagWidth = UUID_WIDTH + NW_WIDTH - val LSUQ_SIZE = 2 * numWarps * (numCoreLanes / numLsuLanes) + val LSUQ_SIZE = 8 * numWarps * (numCoreLanes / numLsuLanes) + assert(LSUQ_SIZE == p(SIMTCoreKey).get.nSrcIds) val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/ val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH