wide dram support and enlarge queues
This commit is contained in:
@@ -14,7 +14,6 @@ EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradp
|
|||||||
EXTRA_SIM_PREPROC_DEFINES += \
|
EXTRA_SIM_PREPROC_DEFINES += \
|
||||||
+define+SIMULATION \
|
+define+SIMULATION \
|
||||||
+define+GPR_RESET \
|
+define+GPR_RESET \
|
||||||
+define+GPR_DUPLICATED \
|
|
||||||
+define+LSU_DUP_DISABLE \
|
+define+LSU_DUP_DISABLE \
|
||||||
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
||||||
+define+PERF_ENABLE \
|
+define+PERF_ENABLE \
|
||||||
|
|||||||
@@ -10,18 +10,18 @@ import org.chipsalliance.cde.config.{Parameters, Field}
|
|||||||
case object VortexL1Key extends Field[Option[VortexL1Config]](None /*default*/ )
|
case object VortexL1Key extends Field[Option[VortexL1Config]](None /*default*/ )
|
||||||
|
|
||||||
case class VortexL1Config(
|
case class VortexL1Config(
|
||||||
cacheSize: Int, // total cache size in bytes
|
cacheSize: Int, // total cache size in bytes
|
||||||
numBanks: Int,
|
numBanks: Int,
|
||||||
wordSize: Int, // This is the read/write granularity of the L1 cache
|
inputSize: Int, // This is the read/write granularity of the L1 cache
|
||||||
cacheLineSize: Int,
|
cacheLineSize: Int,
|
||||||
coreTagWidth: Int,
|
coreTagWidth: Int,
|
||||||
writeInfoReqQSize: Int,
|
writeInfoReqQSize: Int,
|
||||||
mshrSize: Int,
|
mshrSize: Int,
|
||||||
memSideSourceIds: Int,
|
memSideSourceIds: Int,
|
||||||
uncachedAddrSets: Seq[AddressSet]
|
uncachedAddrSets: Seq[AddressSet]
|
||||||
) {
|
) {
|
||||||
def coreTagPlusSizeWidth: Int = {
|
def coreTagPlusSizeWidth: Int = {
|
||||||
log2Ceil(wordSize) + coreTagWidth
|
log2Ceil(inputSize) + coreTagWidth
|
||||||
}
|
}
|
||||||
// NOTE: This assertion depends on the fact that the Vortex cache is
|
// NOTE: This assertion depends on the fact that the Vortex cache is
|
||||||
// configured to have 1 bank, and that it uses MSHR id as the tag of
|
// configured to have 1 bank, and that it uses MSHR id as the tag of
|
||||||
@@ -37,7 +37,7 @@ object defaultVortexL1Config
|
|||||||
extends VortexL1Config(
|
extends VortexL1Config(
|
||||||
cacheSize = 16384,
|
cacheSize = 16384,
|
||||||
numBanks = 4,
|
numBanks = 4,
|
||||||
wordSize = 16,
|
inputSize = 16,
|
||||||
cacheLineSize = 16,
|
cacheLineSize = 16,
|
||||||
coreTagWidth = 8,
|
coreTagWidth = 8,
|
||||||
writeInfoReqQSize = 16,
|
writeInfoReqQSize = 16,
|
||||||
@@ -80,15 +80,15 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
// Slave node to upstream
|
// Slave node to upstream
|
||||||
val managerParam = Seq(
|
val managerParam = Seq(
|
||||||
TLSlavePortParameters.v1(
|
TLSlavePortParameters.v1(
|
||||||
beatBytes = config.wordSize,
|
beatBytes = config.inputSize,
|
||||||
managers = Seq(
|
managers = Seq(
|
||||||
TLSlaveParameters.v1(
|
TLSlaveParameters.v1(
|
||||||
address = config.uncachedAddrSets,
|
address = config.uncachedAddrSets,
|
||||||
regionType = RegionType.IDEMPOTENT,
|
regionType = RegionType.IDEMPOTENT,
|
||||||
executable = false,
|
executable = false,
|
||||||
supportsGet = TransferSizes(1, config.wordSize),
|
supportsGet = TransferSizes(1, config.inputSize),
|
||||||
supportsPutPartial = TransferSizes(1, config.wordSize),
|
supportsPutPartial = TransferSizes(1, config.inputSize),
|
||||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
supportsPutFull = TransferSizes(1, config.inputSize),
|
||||||
fifoId = Some(0)
|
fifoId = Some(0)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -107,10 +107,10 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
config.memSideSourceIds
|
config.memSideSourceIds
|
||||||
) + 5 /*FIXME: give more sourceId so that passthrough doesn't block; hacky*/ )
|
) + 5 /*FIXME: give more sourceId so that passthrough doesn't block; hacky*/ )
|
||||||
),
|
),
|
||||||
supportsProbe = TransferSizes(1, config.wordSize),
|
supportsProbe = TransferSizes(1, config.cacheLineSize),
|
||||||
supportsGet = TransferSizes(1, config.wordSize),
|
supportsGet = TransferSizes(1, config.cacheLineSize),
|
||||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
supportsPutFull = TransferSizes(1, config.cacheLineSize),
|
||||||
supportsPutPartial = TransferSizes(1, config.wordSize)
|
supportsPutPartial = TransferSizes(1, config.cacheLineSize)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -141,8 +141,8 @@ class VortexBank(
|
|||||||
// suppose have 4 bank
|
// suppose have 4 bank
|
||||||
// base for bank 1: ...000000|01|0000
|
// base for bank 1: ...000000|01|0000
|
||||||
// mask for bank 1; 111111|00|1111
|
// mask for bank 1; 111111|00|1111
|
||||||
val base = 0x00000000L | (bankId * config.wordSize)
|
val base = 0x00000000L | (bankId * config.inputSize)
|
||||||
val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize)
|
val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.inputSize)
|
||||||
|
|
||||||
val excludeSets = config.uncachedAddrSets
|
val excludeSets = config.uncachedAddrSets
|
||||||
var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask))
|
var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask))
|
||||||
@@ -155,15 +155,15 @@ class VortexBank(
|
|||||||
// Slave node to upstream
|
// Slave node to upstream
|
||||||
val managerParam = Seq(
|
val managerParam = Seq(
|
||||||
TLSlavePortParameters.v1(
|
TLSlavePortParameters.v1(
|
||||||
beatBytes = config.wordSize,
|
beatBytes = config.inputSize,
|
||||||
managers = Seq(
|
managers = Seq(
|
||||||
TLSlaveParameters.v1(
|
TLSlaveParameters.v1(
|
||||||
address = generateAddressSets(),
|
address = generateAddressSets(),
|
||||||
regionType = RegionType.IDEMPOTENT, // idk what this does
|
regionType = RegionType.IDEMPOTENT, // idk what this does
|
||||||
executable = false,
|
executable = false,
|
||||||
supportsGet = TransferSizes(1, config.wordSize),
|
supportsGet = TransferSizes(1, config.inputSize),
|
||||||
supportsPutPartial = TransferSizes(1, config.wordSize),
|
supportsPutPartial = TransferSizes(1, config.inputSize),
|
||||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
supportsPutFull = TransferSizes(1, config.inputSize),
|
||||||
fifoId = Some(0)
|
fifoId = Some(0)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -177,10 +177,10 @@ class VortexBank(
|
|||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = s"VortexBank${bankId}",
|
name = s"VortexBank${bankId}",
|
||||||
sourceId = IdRange(0, config.memSideSourceIds),
|
sourceId = IdRange(0, config.memSideSourceIds),
|
||||||
supportsProbe = TransferSizes(1, config.wordSize),
|
supportsProbe = TransferSizes(1, config.inputSize),
|
||||||
supportsGet = TransferSizes(1, config.wordSize),
|
supportsGet = TransferSizes(1, config.inputSize),
|
||||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
supportsPutFull = TransferSizes(1, config.inputSize),
|
||||||
supportsPutPartial = TransferSizes(1, config.wordSize)
|
supportsPutPartial = TransferSizes(1, config.inputSize)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -204,7 +204,7 @@ class VortexBankImp(
|
|||||||
) extends LazyModuleImp(outer) {
|
) extends LazyModuleImp(outer) {
|
||||||
val vxCache = Module(
|
val vxCache = Module(
|
||||||
new VX_cache_top(
|
new VX_cache_top(
|
||||||
WORD_SIZE = config.wordSize,
|
WORD_SIZE = config.inputSize,
|
||||||
// distribute total size across numBanks
|
// distribute total size across numBanks
|
||||||
CACHE_SIZE = config.cacheSize / config.numBanks,
|
CACHE_SIZE = config.cacheSize / config.numBanks,
|
||||||
CACHE_LINE_SIZE = config.cacheLineSize,
|
CACHE_LINE_SIZE = config.cacheLineSize,
|
||||||
@@ -236,7 +236,7 @@ class VortexBankImp(
|
|||||||
}
|
}
|
||||||
|
|
||||||
class ReadReqInfo(config: VortexL1Config) extends Bundle {
|
class ReadReqInfo(config: VortexL1Config) extends Bundle {
|
||||||
val size = UInt(log2Ceil(config.wordSize).W)
|
val size = UInt(log2Ceil(config.inputSize + 1).W)
|
||||||
val id = UInt(config.coreTagWidth.W)
|
val id = UInt(config.coreTagWidth.W)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -264,7 +264,7 @@ class VortexBankImp(
|
|||||||
// 4 is also hardcoded, it should be log2WordSize
|
// 4 is also hardcoded, it should be log2WordSize
|
||||||
vxCache.io.core_req_addr := tlInFromCoal.a.bits.address(
|
vxCache.io.core_req_addr := tlInFromCoal.a.bits.address(
|
||||||
31,
|
31,
|
||||||
log2Ceil(config.wordSize)
|
log2Ceil(config.inputSize)
|
||||||
)
|
)
|
||||||
vxCache.io.core_req_byteen := tlInFromCoal.a.bits.mask
|
vxCache.io.core_req_byteen := tlInFromCoal.a.bits.mask
|
||||||
vxCache.io.core_req_data := tlInFromCoal.a.bits.data
|
vxCache.io.core_req_data := tlInFromCoal.a.bits.data
|
||||||
@@ -362,17 +362,17 @@ class VortexBankImp(
|
|||||||
TLMessages.Get
|
TLMessages.Get
|
||||||
)
|
)
|
||||||
|
|
||||||
tlOutToL2.a.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(4.W))
|
tlOutToL2.a.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(log2Ceil(config.cacheLineSize).W))
|
||||||
tlOutToL2.a.bits.mask := Mux(
|
tlOutToL2.a.bits.mask := Mux(
|
||||||
vxCache.io.mem_req_rw,
|
vxCache.io.mem_req_rw,
|
||||||
vxCache.io.mem_req_byteen,
|
vxCache.io.mem_req_byteen,
|
||||||
0xffff.U
|
~(0.U(config.cacheLineSize.W))
|
||||||
)
|
)
|
||||||
tlOutToL2.a.bits.data := vxCache.io.mem_req_data
|
tlOutToL2.a.bits.data := vxCache.io.mem_req_data
|
||||||
tlOutToL2.a.bits.source := sourceGen.io.id.bits
|
tlOutToL2.a.bits.source := sourceGen.io.id.bits
|
||||||
// ignore param, size, corrupt fields
|
// ignore param, size, corrupt fields
|
||||||
tlOutToL2.a.bits.param := 0.U
|
tlOutToL2.a.bits.param := 0.U
|
||||||
tlOutToL2.a.bits.size := 4.U // FIXME: hardcoded
|
tlOutToL2.a.bits.size := log2Ceil(config.cacheLineSize).U
|
||||||
tlOutToL2.a.bits.corrupt := false.B
|
tlOutToL2.a.bits.corrupt := false.B
|
||||||
// downstream L2 -> vxCache response
|
// downstream L2 -> vxCache response
|
||||||
tlOutToL2.d.ready := vxCache.io.mem_rsp_ready
|
tlOutToL2.d.ready := vxCache.io.mem_rsp_ready
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ class WithFuzzerCores(
|
|||||||
class WithRadianceCluster(
|
class WithRadianceCluster(
|
||||||
clusterId: Int,
|
clusterId: Int,
|
||||||
location: HierarchicalLocation = InSubsystem,
|
location: HierarchicalLocation = InSubsystem,
|
||||||
crossing: RocketCrossingParams = RocketCrossingParams() // TODO make this not rocket
|
crossing: RocketCrossingParams = RocketCrossingParams()
|
||||||
) extends Config((site, here, up) => {
|
) extends Config((site, here, up) => {
|
||||||
case ClustersLocated(`location`) => up(ClustersLocated(location)) :+ RadianceClusterAttachParams(
|
case ClustersLocated(`location`) => up(ClustersLocated(location)) :+ RadianceClusterAttachParams(
|
||||||
RadianceClusterParams(clusterId = clusterId),
|
RadianceClusterParams(clusterId = clusterId),
|
||||||
@@ -174,7 +174,17 @@ class WithPriorityCoalXbar extends Config((site, _, up) => {
|
|||||||
|
|
||||||
class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
||||||
case VortexL1Key => {
|
case VortexL1Key => {
|
||||||
Some(defaultVortexL1Config.copy(numBanks = nBanks))
|
Some(defaultVortexL1Config.copy(
|
||||||
|
numBanks = nBanks,
|
||||||
|
inputSize = up(SIMTCoreKey).get.nMemLanes * 4,
|
||||||
|
cacheLineSize = up(SIMTCoreKey).get.nMemLanes * 4,
|
||||||
|
memSideSourceIds = 64,
|
||||||
|
mshrSize = 64,
|
||||||
|
coreTagWidth = log2Ceil(up(SIMTCoreKey).get.nSrcIds.max(up(CoalescerKey) match {
|
||||||
|
case Some(key) => key.numNewSrcIds
|
||||||
|
case None => 0
|
||||||
|
})) + log2Ceil(up(SIMTCoreKey).get.nMemLanes) + 1
|
||||||
|
))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -197,8 +207,7 @@ class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config
|
|||||||
// If instantiating L1 cache, the maximum coalescing size should match the
|
// If instantiating L1 cache, the maximum coalescing size should match the
|
||||||
// cache line size
|
// cache line size
|
||||||
val maxCoalSizeInBytes = up(VortexL1Key, site) match {
|
val maxCoalSizeInBytes = up(VortexL1Key, site) match {
|
||||||
case Some(param) =>
|
case Some(param) => param.inputSize
|
||||||
(param.wordSize)
|
|
||||||
case None => sbusWidthInBytes
|
case None => sbusWidthInBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -140,6 +140,9 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
|
|
||||||
tieOffGemminiRocc
|
tieOffGemminiRocc
|
||||||
|
|
||||||
|
outer.traceSourceNode.bundle := DontCare
|
||||||
|
outer.traceSourceNode.bundle.insns foreach (_.valid := false.B)
|
||||||
|
|
||||||
// hacky, but cluster will AND the cease signals from all tiles, and we want
|
// hacky, but cluster will AND the cease signals from all tiles, and we want
|
||||||
// the core tiles to determine cluster cease not Gemmini
|
// the core tiles to determine cluster cease not Gemmini
|
||||||
outer.reportCease(Some(true.B))
|
outer.reportCease(Some(true.B))
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import chisel3.util._
|
|||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
import freechips.rocketchip.prci.ClockSinkParameters
|
import freechips.rocketchip.prci.ClockSinkParameters
|
||||||
import freechips.rocketchip.subsystem._
|
import freechips.rocketchip.subsystem._
|
||||||
|
import freechips.rocketchip.tile.TraceBundle
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import gemmini._
|
import gemmini._
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
@@ -91,7 +92,7 @@ class RadianceCluster (
|
|||||||
callback(p)
|
callback(p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
def connect_one[T <: BaseNode with TLNode](from: TLNode, to: () => T): T = {
|
def connect_one[T <: TLNode](from: TLNode, to: () => T): T = {
|
||||||
val t = to()
|
val t = to()
|
||||||
guard_monitors { implicit p => t := from }
|
guard_monitors { implicit p => t := from }
|
||||||
t
|
t
|
||||||
@@ -183,13 +184,18 @@ class RadianceCluster (
|
|||||||
|
|
||||||
val spad_read_nodes = Seq.fill(smem_banks) {
|
val spad_read_nodes = Seq.fill(smem_banks) {
|
||||||
val r_dist = DistributorNode(from = smem_width, to = wordSize)
|
val r_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||||
guard_monitors { implicit p => r_dist := gemmini.spad_read_nodes }
|
guard_monitors { implicit p => r_dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := gemmini.spad_read_nodes }
|
||||||
Seq.fill(smem_subbanks) { connect_one(r_dist, TLIdentityNode.apply) }
|
Seq.fill(smem_subbanks) { connect_one(r_dist, TLIdentityNode.apply) }
|
||||||
}
|
}
|
||||||
val spad_write_nodes = Seq.fill(smem_banks) {
|
val spad_write_nodes = Seq.fill(smem_banks) {
|
||||||
val w_dist = DistributorNode(from = smem_width, to = wordSize)
|
val w_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||||
guard_monitors { implicit p => w_dist := gemmini.spad_write_nodes }
|
guard_monitors { implicit p => w_dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := gemmini.spad_write_nodes }
|
||||||
Seq.fill(smem_subbanks) { connect_one(w_dist, TLIdentityNode.apply) }
|
Seq.fill(smem_subbanks) { connect_one(w_dist, TLIdentityNode.apply) }
|
||||||
|
/* Seq.fill(smem_subbanks) {
|
||||||
|
val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0))
|
||||||
|
buf := w_dist
|
||||||
|
buf
|
||||||
|
} */
|
||||||
}
|
}
|
||||||
val ws_dist = DistributorNode(from = smem_width, to = wordSize)
|
val ws_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||||
guard_monitors { implicit p => ws_dist := gemmini.spad.spad_writer.node } // this is the dma write node
|
guard_monitors { implicit p => ws_dist := gemmini.spad.spad_writer.node } // this is the dma write node
|
||||||
|
|||||||
@@ -165,10 +165,14 @@ class RadianceTile private (
|
|||||||
// to a stall in the backend pipeline and resulting in a deadlock.
|
// to a stall in the backend pipeline and resulting in a deadlock.
|
||||||
val imemSourceWidth = 4 // 1 << imemSourceWidth == IBUF_SIZE
|
val imemSourceWidth = 4 // 1 << imemSourceWidth == IBUF_SIZE
|
||||||
|
|
||||||
val dmemSourceWidth = p(SIMTCoreKey) match {
|
val smemSourceWidth = p(SIMTCoreKey) match {
|
||||||
// TODO: respect coalescer newSrcIds
|
|
||||||
case Some(simtParam) => log2Ceil(simtParam.nSrcIds)
|
case Some(simtParam) => log2Ceil(simtParam.nSrcIds)
|
||||||
case None => 4
|
case None => 4
|
||||||
|
}
|
||||||
|
|
||||||
|
val dmemSourceWidth = p(CoalescerKey) match {
|
||||||
|
case Some(coalParam) => log2Ceil(coalParam.numOldSrcIds)
|
||||||
|
case None => smemSourceWidth
|
||||||
}
|
}
|
||||||
// require(
|
// require(
|
||||||
// dmemSourceWidth >= 4,
|
// dmemSourceWidth >= 4,
|
||||||
@@ -177,8 +181,6 @@ class RadianceTile private (
|
|||||||
// "We recommend setting nSrcIds to at least 16."
|
// "We recommend setting nSrcIds to at least 16."
|
||||||
// )
|
// )
|
||||||
|
|
||||||
val smemSourceWidth = 4 // FIXME: hardcoded
|
|
||||||
|
|
||||||
// Replicates some of the logic of how Vortex determines the tag width of
|
// Replicates some of the logic of how Vortex determines the tag width of
|
||||||
// memory requests so that Chisel and Verilog are in agreement on bitwidths.
|
// memory requests so that Chisel and Verilog are in agreement on bitwidths.
|
||||||
// See VX_gpu_pkg.sv
|
// See VX_gpu_pkg.sv
|
||||||
@@ -190,7 +192,8 @@ class RadianceTile private (
|
|||||||
}
|
}
|
||||||
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
||||||
|
|
||||||
val LSUQ_SIZE = 2 * numWarps * (numCoreLanes / numLsuLanes)
|
val LSUQ_SIZE = 8 * numWarps * (numCoreLanes / numLsuLanes)
|
||||||
|
assert(LSUQ_SIZE == p(SIMTCoreKey).get.nSrcIds)
|
||||||
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
|
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
|
||||||
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
||||||
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
||||||
|
|||||||
Reference in New Issue
Block a user