camelCase

This commit is contained in:
Richard Yan
2024-09-22 01:21:37 -07:00
parent daacae9edc
commit 20cf4609b7

View File

@@ -64,74 +64,73 @@ class RadianceCluster (
// /___/_/ /_/___/_/ /_/ // /___/_/ /_/___/_/ /_/
// //
// ************************************** // **************************************
val unifiedMemReadNode = TLIdentityNode()
val unifiedMemWriteNode = TLIdentityNode()
val unified_mem_read_node = TLIdentityNode() val smemKey = p(RadianceSharedMemKey).get
val unified_mem_write_node = TLIdentityNode() val wordSize = smemKey.wordSize
val smemBase = smemKey.address
val smem_key = p(RadianceSharedMemKey).get val smemBanks = smemKey.numBanks
val wordSize = smem_key.wordSize val smemWidth = smemKey.numWords * smemKey.wordSize
val smem_base = smem_key.address val smemDepth = smemKey.size / smemWidth / smemBanks
val smem_banks = smem_key.numBanks val smemSubbanks = smemWidth / wordSize
val smem_width = smem_key.numWords * smem_key.wordSize val smemSize = smemWidth * smemDepth * smemBanks
val smem_depth = smem_key.size / smem_width / smem_banks
val smem_subbanks = smem_width / wordSize
val smem_size = smem_width * smem_depth * smem_banks
gemminiConfigs.foreach { config => gemminiConfigs.foreach { config =>
assert(smem_banks == config.sp_banks && isPow2(smem_banks / config.sp_banks)) // TODO: should allow >= assert(smemBanks == config.sp_banks && isPow2(smemBanks / config.sp_banks)) // TODO: should allow >=
assert(smem_width >= (config.sp_width / 8) && isPow2(smem_width / (config.sp_width / 8))) assert(smemWidth >= (config.sp_width / 8) && isPow2(smemWidth / (config.sp_width / 8)))
assert(smem_size == config.sp_capacity.asInstanceOf[CapacityInKilobytes].kilobytes * 1024) assert(smemSize == config.sp_capacity.asInstanceOf[CapacityInKilobytes].kilobytes * 1024)
} }
val stride_by_word = true val strideByWord = true
val filter_aligned = true val filterAligned = true
val disable_monitors = true // otherwise it generate 1k+ different tl monitors val disableMonitors = true // otherwise it generate 1k+ different tl monitors
val serialize_unaligned = true val serializeUnaligned = true
def guard_monitors[T](callback: Parameters => T)(implicit p: Parameters): Unit = { def guardMonitors[T](callback: Parameters => T)(implicit p: Parameters): Unit = {
if (disable_monitors) { if (disableMonitors) {
DisableMonitors { callback } DisableMonitors { callback }
} else { } else {
callback(p) callback(p)
} }
} }
def connect_one[T <: TLNode](from: TLNode, to: () => T): T = { def connectOne[T <: TLNode](from: TLNode, to: () => T): T = {
val t = to() val t = to()
guard_monitors { implicit p => t := from } guardMonitors { implicit p => t := from }
t t
} }
def connect_xbar_name(from: TLNode, name: Option[String] = None, def connectXbarName(from: TLNode, name: Option[String] = None,
policy: TLArbiter.Policy = TLArbiter.roundRobin): TLNexusNode = { policy: TLArbiter.Policy = TLArbiter.roundRobin): TLNexusNode = {
val t = LazyModule(new TLXbar(policy)) val t = LazyModule(new TLXbar(policy))
name.map(t.suggestName) name.map(t.suggestName)
guard_monitors { implicit p => t.node := from } guardMonitors { implicit p => t.node := from }
t.node t.node
} }
def connect_xbar(from: TLNode): TLNexusNode = { def connectXbar(from: TLNode): TLNexusNode = {
connect_xbar_name(from, None) connectXbarName(from, None)
} }
val radiance_smem_fanout = radianceTiles.zipWithIndex.flatMap { case (tile, cid) => val radianceSmemFanout = radianceTiles.zipWithIndex.flatMap { case (tile, cid) =>
tile.smemNodes.zipWithIndex.map { case (m, lid) => tile.smemNodes.zipWithIndex.map { case (m, lid) =>
val smem_fanout_xbar = LazyModule(new TLXbar()) val smemFanoutXbar = LazyModule(new TLXbar())
smem_fanout_xbar.suggestName(f"rad_smem_fanout_cl${thisClusterParams.clusterId}_c${cid}_l${lid}_xbar") smemFanoutXbar.suggestName(f"rad_smem_fanout_cl${thisClusterParams.clusterId}_c${cid}_l${lid}_xbar")
smem_fanout_xbar.node :=* m smemFanoutXbar.node :=* m
smem_fanout_xbar.node smemFanoutXbar.node
} }
} }
require(isPow2(smem_banks)) require(isPow2(smemBanks))
// collection of read and write managers for each sram (sub)bank // collection of read and write managers for each sram (sub)bank
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) { val smemBankMgrs : Seq[Seq[TLManagerNode]] = if (strideByWord) {
require(isPow2(smem_subbanks)) require(isPow2(smemSubbanks))
(0 until smem_banks).flatMap { bid => (0 until smemBanks).flatMap { bid =>
(0 until smem_subbanks).map { wid => (0 until smemSubbanks).map { wid =>
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1( Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2( managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"), name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
address = Seq(AddressSet( address = Seq(AddressSet(
smem_base + (smem_depth * smem_width * bid) + wordSize * wid, smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
smem_depth * smem_width - smem_width + wordSize - 1 smemDepth * smemWidth - smemWidth + wordSize - 1
)), )),
supports = TLMasterToSlaveTransferSizes( supports = TLMasterToSlaveTransferSizes(
get = TransferSizes(wordSize, wordSize)), get = TransferSizes(wordSize, wordSize)),
@@ -143,8 +142,8 @@ class RadianceCluster (
managers = Seq(TLSlaveParameters.v2( managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"), name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
address = Seq(AddressSet( address = Seq(AddressSet(
smem_base + (smem_depth * smem_width * bid) + wordSize * wid, smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
smem_depth * smem_width - smem_width + wordSize - 1 smemDepth * smemWidth - smemWidth + wordSize - 1
)), )),
supports = TLMasterToSlaveTransferSizes( supports = TLMasterToSlaveTransferSizes(
putFull = TransferSizes(wordSize, wordSize), putFull = TransferSizes(wordSize, wordSize),
@@ -156,94 +155,94 @@ class RadianceCluster (
} }
} }
} else { } else {
(0 until smem_banks).map { bank => (0 until smemBanks).map { bank =>
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1( Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2( managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bank}_read_mgr"), name = Some(f"sp_bank${bank}_read_mgr"),
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank), address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
smem_depth * smem_width - 1)), smemDepth * smemWidth - 1)),
supports = TLMasterToSlaveTransferSizes( supports = TLMasterToSlaveTransferSizes(
get = TransferSizes(1, smem_width)), get = TransferSizes(1, smemWidth)),
fifoId = Some(0) fifoId = Some(0)
)), )),
beatBytes = smem_width beatBytes = smemWidth
)) ))
), TLManagerNode(Seq(TLSlavePortParameters.v1( ), TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2( managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bank}_write_mgr"), name = Some(f"sp_bank${bank}_write_mgr"),
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank), address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
smem_depth * smem_width - 1)), smemDepth * smemWidth - 1)),
supports = TLMasterToSlaveTransferSizes( supports = TLMasterToSlaveTransferSizes(
putFull = TransferSizes(1, smem_width), putFull = TransferSizes(1, smemWidth),
putPartial = TransferSizes(1, smem_width)), putPartial = TransferSizes(1, smemWidth)),
fifoId = Some(0) fifoId = Some(0)
)), )),
beatBytes = smem_width beatBytes = smemWidth
)))) ))))
} }
} }
val uniform_policy_nodes: Seq[ArrayBuffer[ArrayBuffer[ExtPolicyMasterNode]]] = // mutable val uniformPolicyNodes: Seq[ArrayBuffer[ArrayBuffer[ExtPolicyMasterNode]]] = // mutable
Seq.fill(2)(ArrayBuffer.fill(smem_banks)(ArrayBuffer.fill(smem_subbanks)(null))) Seq.fill(2)(ArrayBuffer.fill(smemBanks)(ArrayBuffer.fill(smemSubbanks)(null)))
val uniform_nodes_in: Seq[ArrayBuffer[ArrayBuffer[Seq[TLIdentityNode]]]] = val uniformNodesIn: Seq[ArrayBuffer[ArrayBuffer[Seq[TLIdentityNode]]]] =
Seq.fill(2)(ArrayBuffer.fill(smem_banks)(ArrayBuffer.fill(smem_subbanks)(Seq()))) Seq.fill(2)(ArrayBuffer.fill(smemBanks)(ArrayBuffer.fill(smemSubbanks)(Seq())))
val uniform_nodes_out: Seq[ArrayBuffer[ArrayBuffer[TLIdentityNode]]] = val uniformNodesOut: Seq[ArrayBuffer[ArrayBuffer[TLIdentityNode]]] =
Seq.fill(2)(ArrayBuffer.fill(smem_banks)(ArrayBuffer.fill(smem_subbanks)(null))) Seq.fill(2)(ArrayBuffer.fill(smemBanks)(ArrayBuffer.fill(smemSubbanks)(null)))
val (uniform_r_nodes, uniform_w_nodes, _, _) = val (uniformRNodes, uniformWNodes, _, _) =
if (stride_by_word) { if (strideByWord) {
def dist_and_duplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = { def distAndDuplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = {
val word_fanout_nodes = gemminis.zip(nodes).zipWithIndex.map { case ((gemmini, node), gemmini_idx) => val wordFanoutNodes = gemminis.zip(nodes).zipWithIndex.map { case ((gemmini, node), gemminiIdx) =>
val sp_width_bytes = gemmini.config.sp_width / 8 val spWidthBytes = gemmini.config.sp_width / 8
val sp_subbanks = sp_width_bytes / wordSize val spSubbanks = spWidthBytes / wordSize
val dist = DistributorNode(from = sp_width_bytes, to = wordSize) val dist = DistributorNode(from = spWidthBytes, to = wordSize)
guard_monitors { implicit p => guardMonitors { implicit p =>
dist := node dist := node
} }
val fanout = Seq.tabulate(sp_subbanks) { w => val fanout = Seq.tabulate(spSubbanks) { w =>
val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0)) val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0))
buf := dist buf := dist
connect_xbar_name(buf, Some(s"spad_g${gemmini_idx}w${w}_fanout_$suffix")) connectXbarName(buf, Some(s"spad_g${gemminiIdx}w${w}_fanout_$suffix"))
} }
Seq.fill(smem_width / sp_width_bytes)(fanout).flatten // smem wider than spad, duplicate masters Seq.fill(smemWidth / spWidthBytes)(fanout).flatten // smem wider than spad, duplicate masters
} }
// (gemmini, word) => (word, gemmini) // (gemmini, word) => (word, gemmini)
word_fanout_nodes.transpose wordFanoutNodes.transpose
} }
// (banks, subbanks, gemminis) // (banks, subbanks, gemminis)
val spad_read_nodes = Seq.fill(smem_banks)(dist_and_duplicate(gemminis.map(_.spad_read_nodes), "r")) val spadReadNodes = Seq.fill(smemBanks)(distAndDuplicate(gemminis.map(_.spad_read_nodes), "r"))
val spad_write_nodes = Seq.fill(smem_banks)(dist_and_duplicate(gemminis.map(_.spad_write_nodes), "w")) val spadWriteNodes = Seq.fill(smemBanks)(distAndDuplicate(gemminis.map(_.spad_write_nodes), "w"))
val spad_sp_write_nodes_single_bank = dist_and_duplicate(gemminis.map(_.spad.spad_writer.node), "ws") val spadSpWriteNodesSingleBank = distAndDuplicate(gemminis.map(_.spad.spad_writer.node), "ws")
val spad_sp_write_nodes = Seq.fill(smem_banks)(spad_sp_write_nodes_single_bank) // executed only once val spadSpWriteNodes = Seq.fill(smemBanks)(spadSpWriteNodesSingleBank) // executed only once
val (uniform_r_nodes, uniform_w_nodes, nonuniform_r_nodes, nonuniform_w_nodes): val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes):
(Seq[Seq[Seq[TLNexusNode]]], Seq[Seq[Seq[TLNexusNode]]], Seq[TLNode], Seq[TLNode]) = if (filter_aligned) { (Seq[Seq[Seq[TLNexusNode]]], Seq[Seq[Seq[TLNexusNode]]], Seq[TLNode], Seq[TLNode]) = if (filterAligned) {
val num_lsu_lanes = radianceTiles.head.numLsuLanes val numLsuLanes = radianceTiles.head.numLsuLanes
val num_lane_dupes = Math.max(1, smem_subbanks / num_lsu_lanes) val numLaneDupes = Math.max(1, smemSubbanks / numLsuLanes)
val filter_range = Math.min(smem_subbanks, num_lsu_lanes) val filterRange = Math.min(smemSubbanks, numLsuLanes)
println(s"num_lsu_lanes ${num_lsu_lanes} num_lane_dupes ${num_lane_dupes} filter_range ${filter_range}") println(s"num_lsu_lanes ${numLsuLanes} num_lane_dupes ${numLaneDupes} filter_range ${filterRange}")
// (subbank, sources, aligned) = rw node // (subbank, sources, aligned) = rw node
val (f_aligned, f_unaligned) = if (num_lsu_lanes >= smem_subbanks) { val (fAligned, fUnaligned) = if (numLsuLanes >= smemSubbanks) {
val filter_nodes: Seq[Seq[(TLNode, TLNode)]] = Seq.tabulate(num_lane_dupes) { did => val filterNodes: Seq[Seq[(TLNode, TLNode)]] = Seq.tabulate(numLaneDupes) { did =>
Seq.tabulate(filter_range) { wid => Seq.tabulate(filterRange) { wid =>
val true_wid = did * filter_range + wid val trueWid = did * filterRange + wid
val address = AddressSet(smem_base + wordSize * true_wid, (smem_size - 1) - (smem_subbanks - 1) * wordSize) val address = AddressSet(smemBase + wordSize * trueWid, (smemSize - 1) - (smemSubbanks - 1) * wordSize)
radiance_smem_fanout.grouped(num_lsu_lanes).toList.zipWithIndex.flatMap { case (lanes, cid) => radianceSmemFanout.grouped(numLsuLanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
lanes.zipWithIndex.flatMap { case (lane, lid) => lanes.zipWithIndex.flatMap { case (lane, lid) =>
if ((lid % filter_range) == wid) { if ((lid % filterRange) == wid) {
println(f"c${cid}_l${lid} connected to d${did}w${wid}") println(f"c${cid}_l${lid} connected to d${did}w${wid}")
val filter_node = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${true_wid}"), info) val filterNode = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${trueWid}"), info)
DisableMonitors { implicit p => filter_node := lane } DisableMonitors { implicit p => filterNode := lane }
// Seq((aligned splitter, unaligned splitter)) // Seq((aligned splitter, unaligned splitter))
Seq(( Seq((
connect_one(filter_node, () => connectOne(filterNode, () =>
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${true_wid}")), RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${trueWid}")),
connect_one(filter_node, () => connectOne(filterNode, () =>
RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}")) RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}"))
)) ))
} else Seq() } else Seq()
@@ -252,174 +251,174 @@ class RadianceCluster (
} }
}.flatten }.flatten
val f_aligned = Seq.fill(2)(filter_nodes.map(_.map(_._1).map(connect_xbar_name(_, Some("rad_aligned"))))) val fAligned = Seq.fill(2)(filterNodes.map(_.map(_._1).map(connectXbarName(_, Some("rad_aligned")))))
val f_unaligned = if (serialize_unaligned) { val fUnaligned = if (serializeUnaligned) {
Seq.fill(2) { Seq.fill(2) {
val serialized_node = TLEphemeralNode() val serializedNode = TLEphemeralNode()
val serialized_in_xbar = LazyModule(new TLXbar()) val serializedInXbar = LazyModule(new TLXbar())
val serialized_out_xbar = LazyModule(new TLXbar()) val serializedOutXbar = LazyModule(new TLXbar())
serialized_in_xbar.suggestName("unaligned_serialized_in_xbar") serializedInXbar.suggestName("unaligned_serialized_in_xbar")
serialized_out_xbar.suggestName("unaligned_serialized_out_xbar") serializedOutXbar.suggestName("unaligned_serialized_out_xbar")
guard_monitors { implicit p => guardMonitors { implicit p =>
filter_nodes.foreach(_.map(_._2).foreach(serialized_in_xbar.node := _)) filterNodes.foreach(_.map(_._2).foreach(serializedInXbar.node := _))
serialized_node := serialized_in_xbar.node serializedNode := serializedInXbar.node
serialized_out_xbar.node := serialized_node serializedOutXbar.node := serializedNode
} }
Seq(serialized_out_xbar.node) Seq(serializedOutXbar.node)
} }
} else { } else {
Seq.fill(2)(filter_nodes.flatMap(_.map(_._2).map(connect_xbar))) Seq.fill(2)(filterNodes.flatMap(_.map(_._2).map(connectXbar)))
} }
(f_aligned, f_unaligned) (fAligned, fUnaligned)
} else { // aligned: (subbanks, cores) = rw node } else { // aligned: (subbanks, cores) = rw node
// (lanes, cores) = filter_node // (lanes, cores) = filter_node
val filter_nodes = Seq.tabulate(filter_range) { wid => val filterNodes = Seq.tabulate(filterRange) { wid =>
val addresses = Seq.tabulate(num_lane_dupes) { did => val addresses = Seq.tabulate(numLaneDupes) { did =>
AddressSet(smem_base + (did * filter_range + wid) * wordSize, AddressSet(smemBase + (did * filterRange + wid) * wordSize,
(smem_size - 1) - (smem_subbanks - 1) * wordSize) (smemSize - 1) - (smemSubbanks - 1) * wordSize)
} }
radiance_smem_fanout.grouped(num_lsu_lanes).toSeq.zipWithIndex.map { case (lanes, cid) => radianceSmemFanout.grouped(numLsuLanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
val lane = lanes(wid) val lane = lanes(wid)
val filter_node = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"), info) val filterNode = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"), info)
guard_monitors { implicit p => guardMonitors { implicit p =>
filter_node := lane filterNode := lane
} }
filter_node filterNode
} }
} }
val f_aligned_rw = Seq.tabulate(num_lane_dupes) { did => val fAlignedRw = Seq.tabulate(numLaneDupes) { did =>
filter_nodes.zipWithIndex.map { case (cores, lid) => filterNodes.zipWithIndex.map { case (cores, lid) =>
cores.zipWithIndex.map { case (fn, cid) => cores.zipWithIndex.map { case (fn, cid) =>
val address = AddressSet(smem_base + (did * filter_range + lid) * wordSize, val address = AddressSet(smemBase + (did * filterRange + lid) * wordSize,
(smem_size - 1) - (smem_subbanks - 1) * wordSize) (smemSize - 1) - (smemSubbanks - 1) * wordSize)
connect_one(fn, () => RWSplitterNode(address, s"aligned_split_c${cid}_l${lid}_d${did}")) connectOne(fn, () => RWSplitterNode(address, s"aligned_split_c${cid}_l${lid}_d${did}"))
} }
} }
}.flatten }.flatten
val f_unaligned_rw = filter_nodes.zipWithIndex.flatMap { case (cores, lid) => val fUnalignedRw = filterNodes.zipWithIndex.flatMap { case (cores, lid) =>
cores.zipWithIndex.map { case (fn, cid) => cores.zipWithIndex.map { case (fn, cid) =>
connect_one(fn, () => RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}")) connectOne(fn, () => RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
} }
} }
val f_aligned = Seq.fill(2)(f_aligned_rw.map(_.map(connect_xbar_name(_, Some("rad_aligned"))))) val fAligned = Seq.fill(2)(fAlignedRw.map(_.map(connectXbarName(_, Some("rad_aligned")))))
val f_unaligned = if (serialize_unaligned) { val fUnaligned = if (serializeUnaligned) {
Seq.fill(2) { Seq.fill(2) {
val serialized_node = TLEphemeralNode() val serializedNode = TLEphemeralNode()
val serialized_in_xbar = TLXbar(nameSuffix = Some("unaligned_ser_in")) val serializedInXbar = TLXbar(nameSuffix = Some("unaligned_ser_in"))
val serialized_out_xbar = TLXbar(nameSuffix = Some("unaligned_ser_out")) val serializedOutXbar = TLXbar(nameSuffix = Some("unaligned_ser_out"))
guard_monitors { implicit p => guardMonitors { implicit p =>
f_unaligned_rw.foreach(serialized_in_xbar := _) fUnalignedRw.foreach(serializedInXbar := _)
serialized_node := serialized_in_xbar serializedNode := serializedInXbar
serialized_out_xbar := serialized_node serializedOutXbar := serializedNode
} }
Seq(serialized_out_xbar) Seq(serializedOutXbar)
} }
} else { } else {
Seq.fill(2)(f_unaligned_rw.map(connect_xbar)) Seq.fill(2)(fUnalignedRw.map(connectXbar))
} }
(f_aligned, f_unaligned) (fAligned, fUnaligned)
} }
val uniform_r_nodes: Seq[Seq[Seq[TLNexusNode]]] = spad_read_nodes.map { rb => val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = spadReadNodes.map { rb =>
(rb zip f_aligned.head).map { case (rw, fa) => rw ++ fa } (rb zip fAligned.head).map { case (rw, fa) => rw ++ fa }
} }
val uniform_w_nodes: Seq[Seq[Seq[TLNexusNode]]] = (spad_write_nodes zip spad_sp_write_nodes).map { case (wb, wsb) => val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
(wb lazyZip wsb lazyZip f_aligned.last).map { (wb lazyZip wsb lazyZip fAligned.last).map {
case (ww, wsw, fa) => ww ++ wsw ++ fa case (ww, wsw, fa) => ww ++ wsw ++ fa
} }
} }
// all to all xbar // all to all xbar
val Seq(nonuniform_r_nodes, nonuniform_w_nodes) = f_unaligned val Seq(nonuniformRNodes, nonuniformWNodes) = fUnaligned
(uniform_r_nodes, uniform_w_nodes, nonuniform_r_nodes, nonuniform_w_nodes) (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes)
} else { } else {
val splitter_nodes = radiance_smem_fanout.map { connect_one(_, RWSplitterNode.apply) } val splitterNodes = radianceSmemFanout.map { connectOne(_, RWSplitterNode.apply) }
// these nodes access an entire line simultaneously // these nodes access an entire line simultaneously
val uniform_r_nodes: Seq[Seq[Seq[TLNexusNode]]] = spad_read_nodes val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = spadReadNodes
val uniform_w_nodes: Seq[Seq[Seq[TLNexusNode]]] = (spad_write_nodes zip spad_sp_write_nodes).map { case (wb, wsb) => val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
(wb zip wsb).map { case (ww, wsw) => ww ++ wsw } (wb zip wsb).map { case (ww, wsw) => ww ++ wsw }
} }
// these nodes are random access // these nodes are random access
val nonuniform_r_nodes: Seq[TLNode] = splitter_nodes.map(connect_xbar_name(_, Some("rad_unaligned_r"))) val nonuniformRNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_r")))
val nonuniform_w_nodes: Seq[TLNode] = splitter_nodes.map(connect_xbar_name(_, Some("rad_unaligned_w"))) val nonuniformWNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_w")))
(uniform_r_nodes, uniform_w_nodes, nonuniform_r_nodes, nonuniform_w_nodes) (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes)
} }
guard_monitors { implicit p => radiance_smem_fanout.foreach(clbus.inwardNode := _) } guardMonitors { implicit p => radianceSmemFanout.foreach(clbus.inwardNode := _) }
smem_bank_mgrs.grouped(smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) => smemBankMgrs.grouped(smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) => bankMgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
// TODO: this should be a coordinated round robin // TODO: this should be a coordinated round robin
val subbank_r_xbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst)) val subbankRXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
val subbank_w_xbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst)) val subbankWXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
subbank_r_xbar.suggestName(s"smem_b${bid}_w${wid}_r_xbar") subbankRXbar.suggestName(s"smem_b${bid}_w${wid}_r_xbar")
subbank_w_xbar.suggestName(s"smem_b${bid}_w${wid}_w_xbar") subbankWXbar.suggestName(s"smem_b${bid}_w${wid}_w_xbar")
guard_monitors { implicit p => guardMonitors { implicit p =>
r := subbank_r_xbar.node r := subbankRXbar.node
w := subbank_w_xbar.node w := subbankWXbar.node
val ur_xbar = XbarWithExtPolicy(Some(s"ur_b${bid}_w${wid}")) val urXbar = XbarWithExtPolicy(Some(s"ur_b${bid}_w${wid}"))
val uw_xbar = XbarWithExtPolicy(Some(s"uw_b${bid}_w${wid}")) val uwXbar = XbarWithExtPolicy(Some(s"uw_b${bid}_w${wid}"))
val r_policy_node = ExtPolicyMasterNode(uniform_r_nodes(bid)(wid).length) val rPolicyNode = ExtPolicyMasterNode(uniformRNodes(bid)(wid).length)
val w_policy_node = ExtPolicyMasterNode(uniform_w_nodes(bid)(wid).length) val wPolicyNode = ExtPolicyMasterNode(uniformWNodes(bid)(wid).length)
ur_xbar.policySlaveNode := r_policy_node urXbar.policySlaveNode := rPolicyNode
uw_xbar.policySlaveNode := w_policy_node uwXbar.policySlaveNode := wPolicyNode
uniform_policy_nodes.head(bid)(wid) = r_policy_node uniformPolicyNodes.head(bid)(wid) = rPolicyNode
uniform_policy_nodes.last(bid)(wid) = w_policy_node uniformPolicyNodes.last(bid)(wid) = wPolicyNode
(Seq(ur_xbar, uw_xbar) lazyZip uniform_nodes_in lazyZip Seq(uniform_r_nodes, uniform_w_nodes)) (Seq(urXbar, uwXbar) lazyZip uniformNodesIn lazyZip Seq(uniformRNodes, uniformWNodes))
.foreach { case (xbar, id_buf, u_nodes) => .foreach { case (xbar, idBuf, uNodes) =>
id_buf(bid)(wid) = u_nodes(bid)(wid).map { u => idBuf(bid)(wid) = uNodes(bid)(wid).map { u =>
val id = TLIdentityNode() val id = TLIdentityNode()
xbar.node := id := u xbar.node := id := u
id id
} }
} }
// uniform_w_nodes(bid)(wid).foreach( uw_xbar.node := _ ) // uniformWNodes(bid)(wid).foreach( uwXbar.node := _ )
uniform_nodes_out.head(bid)(wid) = TLIdentityNode() uniformNodesOut.head(bid)(wid) = TLIdentityNode()
uniform_nodes_out.last(bid)(wid) = TLIdentityNode() uniformNodesOut.last(bid)(wid) = TLIdentityNode()
subbank_r_xbar.node := uniform_nodes_out.head(bid)(wid) := ur_xbar.node subbankRXbar.node := uniformNodesOut.head(bid)(wid) := urXbar.node
subbank_w_xbar.node := uniform_nodes_out.last(bid)(wid) := uw_xbar.node subbankWXbar.node := uniformNodesOut.last(bid)(wid) := uwXbar.node
nonuniform_r_nodes.foreach( subbank_r_xbar.node := _ ) nonuniformRNodes.foreach( subbankRXbar.node := _ )
nonuniform_w_nodes.foreach( subbank_w_xbar.node := _ ) nonuniformWNodes.foreach( subbankWXbar.node := _ )
} }
} }
} }
(Some(uniform_r_nodes), Some(uniform_w_nodes), Some(nonuniform_r_nodes), Some(nonuniform_w_nodes)) (Some(uniformRNodes), Some(uniformWNodes), Some(nonuniformRNodes), Some(nonuniformWNodes))
} else { } else {
gemminis.foreach { gemmini => gemminis.foreach { gemmini =>
unified_mem_read_node :=* TLWidthWidget(smem_width) :=* gemmini.spad_read_nodes unifiedMemReadNode :=* TLWidthWidget(smemWidth) :=* gemmini.spad_read_nodes
unified_mem_write_node :=* TLWidthWidget(smem_width) :=* gemmini.spad_write_nodes unifiedMemWriteNode :=* TLWidthWidget(smemWidth) :=* gemmini.spad_write_nodes
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node unifiedMemWriteNode := gemmini.spad.spad_writer.node // this is the dma write node
} }
val splitter_node = RWSplitterNode() val splitterNode = RWSplitterNode()
unified_mem_read_node := TLWidthWidget(smem_width) := splitter_node unifiedMemReadNode := TLWidthWidget(smemWidth) := splitterNode
unified_mem_write_node := TLWidthWidget(smem_width) := splitter_node unifiedMemWriteNode := TLWidthWidget(smemWidth) := splitterNode
radiance_smem_fanout.foreach(clbus.inwardNode := _) radianceSmemFanout.foreach(clbus.inwardNode := _)
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode splitterNode :=* TLWidthWidget(4) :=* clbus.outwardNode
val smem_r_xbar = TLXbar() val smemRXbar = TLXbar()
val smem_w_xbar = TLXbar() val smemWXbar = TLXbar()
DisableMonitors { implicit p => DisableMonitors { implicit p =>
smem_r_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_read_node smemRXbar :=* TLWidthWidget(wordSize) :=* unifiedMemReadNode
smem_w_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_write_node smemWXbar :=* TLWidthWidget(wordSize) :=* unifiedMemWriteNode
} }
smem_bank_mgrs.foreach { mem => smemBankMgrs.foreach { mem =>
require(mem.length == 2) require(mem.length == 2)
mem.head := smem_r_xbar mem.head := smemRXbar
mem.last := smem_w_xbar mem.last := smemWXbar
} }
(None, None, None, None) (None, None, None, None)
@@ -444,7 +443,7 @@ class RadianceCluster (
val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m) val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m)
// printf and perf counter buffer // printf and perf counter buffer
TLRAM(AddressSet(smem_key.address + smem_size, numCoresInCluster * 0x200 - 1)) := traceTLNode := TLRAM(AddressSet(smemKey.address + smemSize, numCoresInCluster * 0x200 - 1)) := traceTLNode :=
TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
p(RadianceFrameBufferKey).foreach { key => p(RadianceFrameBufferKey).foreach { key =>
@@ -517,82 +516,81 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
} }
} }
// TODO: remove Pipeline dependency of gemmini
def makeSmemBanks(): Unit = { def makeSmemBanks(): Unit = {
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn, def makeBuffer[T <: Data](mem: TwoPortSyncMem[T], rNode: TLBundle, rEdge: TLEdgeIn,
w_node: TLBundle, w_edge: TLEdgeIn): Unit = { wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
mem.io.ren := r_node.a.fire mem.io.ren := rNode.a.fire
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType)) val dataPipeIn = Wire(DecoupledIO(mem.io.rdata.cloneType))
data_pipe_in.valid := RegNext(mem.io.ren) dataPipeIn.valid := RegNext(mem.io.ren)
data_pipe_in.bits := mem.io.rdata dataPipeIn.bits := mem.io.rdata
val metadata_pipe_in = Wire(DecoupledIO(new Bundle { val metadataPipeIn = Wire(DecoupledIO(new Bundle {
val source = r_node.a.bits.source.cloneType val source = rNode.a.bits.source.cloneType
val size = r_node.a.bits.size.cloneType val size = rNode.a.bits.size.cloneType
})) }))
metadata_pipe_in.valid := mem.io.ren metadataPipeIn.valid := mem.io.ren
metadata_pipe_in.bits.source := r_node.a.bits.source metadataPipeIn.bits.source := rNode.a.bits.source
metadata_pipe_in.bits.size := r_node.a.bits.size metadataPipeIn.bits.size := rNode.a.bits.size
val sram_read_backup_reg = RegInit(0.U.asTypeOf(Valid(mem.io.rdata.cloneType))) val sramReadBackupReg = RegInit(0.U.asTypeOf(Valid(mem.io.rdata.cloneType)))
val data_pipe_inst = Module(new Pipeline(data_pipe_in.bits.cloneType, 1)()) val dataPipeInst = Module(new Pipeline(dataPipeIn.bits.cloneType, 1)())
data_pipe_inst.io.in <> data_pipe_in dataPipeInst.io.in <> dataPipeIn
val data_pipe = data_pipe_inst.io.out val dataPipe = dataPipeInst.io.out
val metadata_pipe = Pipeline(metadata_pipe_in, 2) val metadataPipe = Pipeline(metadataPipeIn, 2)
assert((data_pipe.valid || sram_read_backup_reg.valid) === metadata_pipe.valid) assert((dataPipe.valid || sramReadBackupReg.valid) === metadataPipe.valid)
// data pipe is filled, but D is not ready and SRAM read came back // data pipe is filled, but D is not ready and SRAM read came back
when (data_pipe.valid && !r_node.d.ready && data_pipe_in.valid) { when (dataPipe.valid && !rNode.d.ready && dataPipeIn.valid) {
assert(!data_pipe_in.ready) // we should fill backup reg only if data pipe is not enqueueing assert(!dataPipeIn.ready) // we should fill backup reg only if data pipe is not enqueueing
assert(!sram_read_backup_reg.valid) // backup reg should be empty assert(!sramReadBackupReg.valid) // backup reg should be empty
assert(!metadata_pipe_in.ready) // metadata should be filled previous cycle assert(!metadataPipeIn.ready) // metadata should be filled previous cycle
sram_read_backup_reg.valid := true.B sramReadBackupReg.valid := true.B
sram_read_backup_reg.bits := mem.io.rdata sramReadBackupReg.bits := mem.io.rdata
}.otherwise { }.otherwise {
assert(data_pipe_in.ready || !data_pipe_in.valid) // do not skip any response assert(dataPipeIn.ready || !dataPipeIn.valid) // do not skip any response
} }
assert(metadata_pipe_in.fire || !mem.io.ren) // when requesting sram, metadata needs to be ready assert(metadataPipeIn.fire || !mem.io.ren) // when requesting sram, metadata needs to be ready
assert(r_node.d.fire === metadata_pipe.fire) // metadata dequeues iff D fires assert(rNode.d.fire === metadataPipe.fire) // metadata dequeues iff D fires
// when D becomes ready, and data pipe has emptied, time for backup to empty // when D becomes ready, and data pipe has emptied, time for backup to empty
when (r_node.d.ready && sram_read_backup_reg.valid && !data_pipe.valid) { when (rNode.d.ready && sramReadBackupReg.valid && !dataPipe.valid) {
sram_read_backup_reg.valid := false.B sramReadBackupReg.valid := false.B
} }
// must empty backup before filling data pipe // must empty backup before filling data pipe
assert(!(sram_read_backup_reg.valid && data_pipe.valid && data_pipe_in.fire)) assert(!(sramReadBackupReg.valid && dataPipe.valid && dataPipeIn.fire))
r_node.d.bits := r_edge.AccessAck( rNode.d.bits := rEdge.AccessAck(
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U), Mux(rNode.d.valid, metadataPipe.bits.source, 0.U),
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U), Mux(rNode.d.valid, metadataPipe.bits.size, 0.U),
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits).asUInt) Mux(!dataPipe.valid, sramReadBackupReg.bits, dataPipe.bits).asUInt)
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid rNode.d.valid := dataPipe.valid || sramReadBackupReg.valid
// r node A is not ready only if D is not ready and both slots filled // r node A is not ready only if D is not ready and both slots filled
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid) rNode.a.ready := rNode.d.ready && !(dataPipe.valid && sramReadBackupReg.valid)
data_pipe.ready := r_node.d.ready dataPipe.ready := rNode.d.ready
metadata_pipe.ready := r_node.d.ready metadataPipe.ready := rNode.d.ready
// WRITE // WRITE
mem.io.wen := RegNext(w_node.a.fire) mem.io.wen := RegNext(wNode.a.fire)
mem.io.wdata := RegNext(w_node.a.bits.data) mem.io.wdata := RegNext(wNode.a.bits.data)
mem.io.mask := RegNext(VecInit(w_node.a.bits.mask.asBools)) mem.io.mask := RegNext(VecInit(wNode.a.bits.mask.asBools))
val write_resp = Wire(Flipped(w_node.d.cloneType)) val writeResp = Wire(Flipped(wNode.d.cloneType))
write_resp.bits := w_edge.AccessAck(w_node.a.bits) writeResp.bits := wEdge.AccessAck(wNode.a.bits)
write_resp.valid := w_node.a.valid writeResp.valid := wNode.a.valid
w_node.a.ready := write_resp.ready wNode.a.ready := writeResp.ready
w_node.d <> Queue(write_resp, 2) wNode.d <> Queue(writeResp, 2)
} }
// read OR write access counter for smem banks // read OR write access counter for smem banks
val smem_bank_mgrs_grouped = outer.smem_bank_mgrs.grouped(outer.smem_subbanks) val smemBankMgrsGrouped = outer.smemBankMgrs.grouped(outer.smemSubbanks)
val numBanks = smem_bank_mgrs_grouped.length val numBanks = smemBankMgrsGrouped.length
val counterWidth = 32 val counterWidth = 32
val smemReadsPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smem_subbanks) val smemReadsPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smemSubbanks)
(Wire(UInt(counterWidth.W)))) (Wire(UInt(counterWidth.W))))
val smemWritesPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smem_subbanks) val smemWritesPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smemSubbanks)
(Wire(UInt(counterWidth.W)))) (Wire(UInt(counterWidth.W))))
val smemReadsPerCycle = smemReadsPerBankPerCycle.map(_.reduce(_ + _)).reduce(_ + _) val smemReadsPerCycle = smemReadsPerBankPerCycle.map(_.reduce(_ + _)).reduce(_ + _)
val smemWritesPerCycle = smemWritesPerBankPerCycle.map(_.reduce(_ + _)).reduce(_ + _) val smemWritesPerCycle = smemWritesPerBankPerCycle.map(_.reduce(_ + _)).reduce(_ + _)
@@ -604,123 +602,122 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
dontTouch(smemReadCounter) dontTouch(smemReadCounter)
dontTouch(smemWriteCounter) dontTouch(smemWriteCounter)
if (outer.stride_by_word) { if (outer.strideByWord) {
val uniform_fires = Seq.fill(2)(VecInit.fill(outer.smem_banks)(VecInit.fill(outer.smem_subbanks)(false.B))) val uniformFires = Seq.fill(2)(VecInit.fill(outer.smemBanks)(VecInit.fill(outer.smemSubbanks)(false.B)))
outer.smem_bank_mgrs.grouped(outer.smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) => outer.smemBankMgrs.grouped(outer.smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
// TODO move this loop out // TODO move this loop out
// val Seq(valid_r_sources, valid_w_sources) = uniform_xbar_nodes.map(_(bid)).map { words => // val Seq(valid_r_sources, valid_w_sources) = uniform_xbar_nodes.map(_(bid)).map { words =>
// VecInit(words.map(_.out.map(_._1.a.valid)).transpose.map { words_with_same_idx => // VecInit(words.map(_.out.map(_._1.a.valid)).transpose.map { words_with_same_idx =>
// VecInit(words_with_same_idx.toSeq).asUInt.orR // VecInit(words_with_same_idx.toSeq).asUInt.orR
// }.toSeq).asUInt // }.toSeq).asUInt
// } // }
val word_selects_1h = Seq( val wordSelects1h = Seq(
Wire(UInt(outer.uniform_nodes_in.head(bid).head.length.W)).suggestName(s"ws_r_b${bid}"), Wire(UInt(outer.uniformNodesIn.head(bid).head.length.W)).suggestName(s"ws_r_b${bid}"),
Wire(UInt(outer.uniform_nodes_in.last(bid).head.length.W)).suggestName(s"ws_w_b${bid}")) Wire(UInt(outer.uniformNodesIn.last(bid).head.length.W)).suggestName(s"ws_w_b${bid}"))
val Seq(valid_r_sources, valid_w_sources) = outer.uniform_nodes_in.zipWithIndex.map { case (banks, rw) => val Seq(validRSources, validWSources) = outer.uniformNodesIn.zipWithIndex.map { case (banks, rw) =>
VecInit(banks(bid).map(_.map(_.in.head._1.a.valid)).transpose.map { words_in_idx => VecInit(banks(bid).map(_.map(_.in.head._1.a.valid)).transpose.map { wordsInIdx =>
VecInit(words_in_idx.toSeq).asUInt.orR VecInit(wordsInIdx.toSeq).asUInt.orR
}.toSeq).asUInt.suggestName(s"valid_sources_rw${rw}_b${bid}") }.toSeq).asUInt.suggestName(s"valid_sources_rw${rw}_b${bid}")
} }
assert(bank_mgrs.flatten.size == 2/* read and write */ * outer.smem_subbanks) assert(bankMgrs.flatten.size == 2/* read and write */ * outer.smemSubbanks)
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) => bankMgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _)) assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _))
assert(!w.portParams.map(_.anySupportGet).reduce(_ || _)) assert(!w.portParams.map(_.anySupportGet).reduce(_ || _))
val mem_depth = outer.smem_depth val memDepth = outer.smemDepth
val mem_width = outer.smem_width val memWidth = outer.smemWidth
val word_width = outer.wordSize val wordWidth = outer.wordSize
val mem = TwoPortSyncMem( val mem = TwoPortSyncMem(
n = mem_depth, n = memDepth,
t = UInt((word_width * 8).W), t = UInt((wordWidth * 8).W),
mask_len = word_width // byte level mask mask_len = wordWidth // byte level mask
) )
mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}") mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
val (r_node, r_edge) = r.in.head val (rNode, rEdge) = r.in.head
val (w_node, w_edge) = w.in.head val (wNode, wEdge) = w.in.head
// address format is // address format is
// [ smem_base | bank_id | line_id | word_id | byte_offset ] // [ smem_base | bank_id | line_id | word_id | byte_offset ]
// line_id is used to index into the SRAMs // line_id is used to index into the SRAMs
mem.io.raddr := (r_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U mem.io.raddr := (rNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U
mem.io.waddr := RegNext((w_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U) mem.io.waddr := RegNext((wNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U)
assert((bid.U === ((r_node.a.bits.address & (mem_depth * mem_width * outer.smem_banks - 1).U) >> assert((bid.U === ((rNode.a.bits.address & (memDepth * memWidth * outer.smemBanks - 1).U) >>
log2Ceil(mem_depth * mem_width).U).asUInt) || !r_node.a.valid, "bank id mismatch with request") log2Ceil(memDepth * memWidth).U).asUInt) || !rNode.a.valid, "bank id mismatch with request")
assert((wid.U === ((r_node.a.bits.address & (mem_width - 1).U) >> assert((wid.U === ((rNode.a.bits.address & (memWidth - 1).U) >>
log2Ceil(word_width).U).asUInt) || !r_node.a.valid, "word id mismatch with request") log2Ceil(wordWidth).U).asUInt) || !rNode.a.valid, "word id mismatch with request")
make_buffer(mem, r_node, r_edge, w_node, w_edge) makeBuffer(mem, rNode, rEdge, wNode, wEdge)
// add access counters to banks // add access counters to banks
smemReadsPerBankPerCycle(bid)(wid) := (r_node.a.fire === true.B) smemReadsPerBankPerCycle(bid)(wid) := (rNode.a.fire === true.B)
smemWritesPerBankPerCycle(bid)(wid) := (w_node.a.fire === true.B) smemWritesPerBankPerCycle(bid)(wid) := (wNode.a.fire === true.B)
// (uniform_fires zip Seq(uniform_r_nodes, uniform_w_nodes)).foreach { case (uf, n) => // (uniform_fires zip Seq(uniform_r_nodes, uniform_w_nodes)).foreach { case (uf, n) =>
// uf(bid)(wid) := VecInit(n(bid)(wid).map(_.out.head._1.a.fire)).asUInt.orR // uf(bid)(wid) := VecInit(n(bid)(wid).map(_.out.head._1.a.fire)).asUInt.orR
// } (uniformFires zip outer.uniformNodesOut).foreach { case (uf, n) =>
(uniform_fires zip outer.uniform_nodes_out).foreach { case (uf, n) =>
uf(bid)(wid) := n(bid)(wid).in.head._1.a.fire uf(bid)(wid) := n(bid)(wid).in.head._1.a.fire
} }
} }
// use round robin to decide uniform select // use round robin to decide uniform select
(word_selects_1h zip Seq(valid_r_sources, valid_w_sources)).zipWithIndex.foreach { case ((ws, vs), rw) => (wordSelects1h zip Seq(validRSources, validWSources)).zipWithIndex.foreach { case ((ws, vs), rw) =>
ws := TLArbiter.roundRobin(vs.getWidth, vs, uniform_fires(rw)(bid).asUInt.orR) ws := TLArbiter.roundRobin(vs.getWidth, vs, uniformFires(rw)(bid).asUInt.orR)
} }
// mask valid into xbar to prevent triggering assertion // mask valid into xbar to prevent triggering assertion
// (word_selects_1h zip outer.uniform_nodes_in).foreach { case (ws, ui) => // (wordSelects1h zip outer.uniformNodesIn).foreach { case (ws, ui) =>
// ui(bid).foreach { sources => // ui(bid).foreach { sources =>
// val in_valid = sources.map(_.in.head._1.a.valid) // val inValid = sources.map(_.in.head._1.a.valid)
// val out_valid = sources.map(_.out.head._1.a.valid) // val outValid = sources.map(_.out.head._1.a.valid)
// val ws_actual = Mux((ws & VecInit(in_valid).asUInt).orR, // val wsActual = Mux((ws & VecInit(inValid).asUInt).orR,
// ws, TLArbiter.roundRobin( // ws, TLArbiter.roundRobin(
// in_valid.length, VecInit(in_valid).asUInt, VecInit(sources.map(_.in.head._1.a.fire)).asUInt.orR)) // inValid.length, VecInit(inValid).asUInt, VecInit(sources.map(_.in.head._1.a.fire)).asUInt.orR))
// (in_valid lazyZip out_valid lazyZip ws_actual.asBools).foreach { case (iv, ov, sel) => // (inValid lazyZip outValid lazyZip wsActual.asBools).foreach { case (iv, ov, sel) =>
// ov := iv && sel // only present output valid if input is selected // ov := iv && sel // only present output valid if input is selected
// } // }
// } // }
// } // }
(word_selects_1h lazyZip outer.uniform_policy_nodes lazyZip outer.uniform_nodes_in).foreach { case (ws, pn, ui) => (wordSelects1h lazyZip outer.uniformPolicyNodes lazyZip outer.uniformNodesIn).foreach { case (ws, pn, ui) =>
(pn(bid) zip ui(bid)).foreach { case (policies, sources) => (pn(bid) zip ui(bid)).foreach { case (policies, sources) =>
val in_valid = sources.map(_.in.head._1.a.valid) val inValid = sources.map(_.in.head._1.a.valid)
val out_valid = sources.map(_.out.head._1.a.valid) val outValid = sources.map(_.out.head._1.a.valid)
val hint_hit = (ws & VecInit(in_valid).asUInt).orR val hintHit = (ws & VecInit(inValid).asUInt).orR
val ws_actual = Mux(hint_hit, ws, TLArbiter.lowestIndexFirst( val wsActual = Mux(hintHit, ws, TLArbiter.lowestIndexFirst(
in_valid.length, VecInit(in_valid).asUInt, hint_hit && policies.out.head._1.actual(0))) inValid.length, VecInit(inValid).asUInt, hintHit && policies.out.head._1.actual(0)))
(in_valid lazyZip out_valid lazyZip ws_actual.asBools).foreach { case (iv, ov, sel) => (inValid lazyZip outValid lazyZip wsActual.asBools).foreach { case (iv, ov, sel) =>
ov := iv && sel // only present output valid if input is selected ov := iv && sel // only present output valid if input is selected
} }
} }
} }
(outer.uniform_policy_nodes zip word_selects_1h).zipWithIndex.foreach { case ((nodes_bw, ws), rw) => (outer.uniformPolicyNodes zip wordSelects1h).zipWithIndex.foreach { case ((nodesBw, ws), rw) =>
nodes_bw(bid).foreach { policy => nodesBw(bid).foreach { policy =>
policy.out.head._1.hint := ws policy.out.head._1.hint := ws
} }
} }
} }
} else { } else {
outer.smem_bank_mgrs.foreach { case Seq(r, w) => outer.smemBankMgrs.foreach { case Seq(r, w) =>
val mem_depth = outer.smem_depth val memDepth = outer.smemDepth
val mem_width = outer.smem_width val memWidth = outer.smemWidth
val mem = TwoPortSyncMem( val mem = TwoPortSyncMem(
n = mem_depth, n = memDepth,
t = UInt((mem_width * 8).W), t = UInt((memWidth * 8).W),
mask_len = mem_width // byte level mask mask_len = memWidth // byte level mask
) )
val (r_node, r_edge) = r.in.head val (rNode, rEdge) = r.in.head
val (w_node, w_edge) = w.in.head val (wNode, wEdge) = w.in.head
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U mem.io.raddr := (rNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U
mem.io.waddr := RegNext((w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U) mem.io.waddr := RegNext((wNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U)
make_buffer(mem, r_node, r_edge, w_node, w_edge) makeBuffer(mem, rNode, rEdge, wNode, wEdge)
} }
} }
} }