From 9c2a55ae79e625d3d4e821ad2c5741f52c5eaa9c Mon Sep 17 00:00:00 2001 From: Vamber Yang Date: Mon, 8 May 2023 16:52:45 -0700 Subject: [PATCH] CoalArbiter RTL implementation first draft, verification WIP --- src/main/scala/tilelink/Coalescing.scala | 271 +++++++++++++++++++---- 1 file changed, 223 insertions(+), 48 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 84e1641..90237b7 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -810,6 +810,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle { val source = UInt(log2Ceil(config.numNewSrcIds).W) val data = UInt((8 * (1 << config.maxCoalLogSize)).W) + + def fromTLD(bundle:TLBundleD): Unit = { + this.source := bundle.source + this.data := bundle.data + } + } class Uncoalescer(config: CoalescerConfig) extends Module { @@ -1766,12 +1772,20 @@ class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends Lazy coalNode := coalReqNode ) - + //Assertion Section + def isPowerOfTwo(n: Int): Boolean = { + (n > 0) && ((n & (n - 1)) == 0) + } + assert(isPowerOfTwo(config.numOldSrcIds), "Number of old source id must be power of 2") + assert(isPowerOfTwo(config.numNewSrcIds), "Number of new source id must be power of 2") + //Below is for efficient conversion from Global to Local bits + //Also, we should have more source id for coalesced request for better perf + assert(config.numNewSrcIds >= config.numOldSrcIds, "new source id must be equal or greater than old source id") // 1 Final Output Identity Node val outputNode = TLIdentityNode() - //Explictly define I/O bundule tyoe + val nonCoalEntryT = new ReqQueueEntry( log2Ceil(config.numOldSrcIds), config.wordWidth, @@ -1791,10 +1805,13 @@ class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends Lazy ) val respCoalBundleT = new CoalescedResponseBundle(config) - + + lazy val module = new CoalArbiterImpl( this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT) + + } class CoalArbiterImpl(outer: CoalArbiter, @@ -1816,7 +1833,7 @@ class CoalArbiterImpl(outer: CoalArbiter, //Helper Class & Method Section - //Provide an decoupled interface between bundle of 2 different type + //Provide an simple decoupled interface between bundle of 2 different type class ConverterTunnel[T <: Data, U <: Data]( genA: T, genB: U, @@ -1833,19 +1850,36 @@ class CoalArbiterImpl(outer: CoalArbiter, def canHitBank(addr: UInt, bankNum: UInt) : Bool = { - val byteOffset = 3 - val bankBase = log2Ceil(config.bankStrideInBytes) - val bankOffset = log2Ceil(config.numArbiterOutputPorts) - (addr(bankBase+bankOffset-byteOffset, bankBase - byteOffset) === bankNum) + val byteOffset = 3 + val bankBase = log2Ceil(config.bankStrideInBytes) + val bankOffset = log2Ceil(config.numArbiterOutputPorts) + (addr(bankBase+bankOffset-byteOffset, bankBase - byteOffset) === bankNum) } //This Operation Could be Expensive - def toGlobalSourceId(isCoalReq : Bool, laneIndex : UInt, sourceID : UInt) : UInt = { - val res = Mux(isCoalReq, - config.numNewSrcIds.U * laneIndex + sourceID, - config.numOldSrcIds.U * laneIndex + sourceID + config.numNewSrcIds.U * config.numCoalReqs.U - ) - res + def toGlobalSourceId(isCoalReq : Bool, laneIdx : UInt, sourceID : UInt) : UInt = { + val gid = Mux(isCoalReq, + config.numNewSrcIds.U * laneIdx + sourceID, + config.numOldSrcIds.U * laneIdx + sourceID + config.numNewSrcIds.U * config.numCoalReqs.U + ) + gid + } + //All the ids are power of 2, so we can just look at bottom bits + def toLocalSourceId(isCoalReq : Bool, sourceID : UInt) : UInt = { + val sid = Mux(isCoalReq, + sourceID(log2Ceil(config.numNewSrcIds)-1, 0), + sourceID(log2Ceil(config.numOldSrcIds)-1, 0) + ) + sid + } + def belongsToLane(laneIdx: UInt, gid: UInt) : Bool = { + val base = config.numNewSrcIds.U * config.numCoalReqs.U + ((gid >= base + config.numOldSrcIds.U * laneIdx) && + (gid < base + config.numOldSrcIds.U * (laneIdx+1.U))) + } + + def isCoalReq(gid : UInt) : Bool = { + gid <= config.numNewSrcIds.U * config.numCoalReqs.U } // @@ -1858,26 +1892,52 @@ class CoalArbiterImpl(outer: CoalArbiter, config.addressWidth, log2Ceil(config.wordSizeInBytes) ) - // Before a nonCoalesced request enter RR arbiter + val coalGiDEntryT = new ReqQueueEntry( + log2Ceil(fullSourceIdRange), + log2Ceil(config.maxCoalLogSize), + config.addressWidth, + config.maxCoalLogSize //already log 2 + ) + + // Before either a coalesced or non coalesced request enter RR arbiter // It needs to turn its source into global source id // Unfortunately this involves extending the width of sourceid field, and a new bundle must be created // This is a higher order function - def nonCoal2gidReqFn(laneIndex : UInt) - : ReqQueueEntry => ReqQueueEntry = { - def func(lid_req : ReqQueueEntry) : ReqQueueEntry = { - val gid_req = nonCoalGiDEntryT.cloneType - gid_req <> lid_req - gid_req.source := toGlobalSourceId(false.B, laneIndex, lid_req.source) - gid_req - } - func + def reqEntry2GidReqFn(laneIndex : UInt, reqEntryT : ReqQueueEntry, isCoalReq : Bool) : ReqQueueEntry => ReqQueueEntry = { + def func(lid_req : ReqQueueEntry) : ReqQueueEntry = { + val gid_req = reqEntryT.cloneType + gid_req <> lid_req + gid_req.source := toGlobalSourceId(isCoalReq, laneIndex, lid_req.source) + gid_req + } + func } - def nonCoal2TLAFn(edgeOut: TLEdgeOut) : ReqQueueEntry => TLBundleA = { - def func(gid_req : ReqQueueEntry) : TLBundleA = { - gid_req.toTLA(edgeOut) - } - func + + def reqEntry2TLAFn(edgeOut: TLEdgeOut) : ReqQueueEntry => TLBundleA = { + def func(gid_req : ReqQueueEntry) : TLBundleA = { + gid_req.toTLA(edgeOut) + } + func + } + + def tlD2respEntryFn() : TLBundleD => RespQueueEntry = { + def func(bundle: TLBundleD) : RespQueueEntry = { + val resp = Wire(respNonCoalEntryT) + resp.fromTLD(bundle) + resp.source := toLocalSourceId(false.B, bundle.source) + resp + } + func + } + def tlD2CoalBundleFn() : TLBundleD => CoalescedResponseBundle = { + def func(bundle: TLBundleD) : CoalescedResponseBundle = { + val coalbundle = Wire(respCoalBundleT) + coalbundle.fromTLD(bundle) + coalbundle.source := toLocalSourceId(true.B, bundle.source) + coalbundle + } + func } ///////////////////////////////////////////////////// @@ -1899,47 +1959,80 @@ class CoalArbiterImpl(outer: CoalArbiter, case (req, q) => q.io.enq <> req } //Stage 1b: connect output of Queues to the RR arbiters (each arbiter is for a unique bank) + // the two loops below could be merged into one loop, but separated for readability val nonCoalRRArbiters = Seq.tabulate(config.numArbiterOutputPorts){_=> Module(new RRArbiter(nonCoalGiDEntryT.cloneType, config.numLanes)) } nonCoalReqsQueues.zipWithIndex.foreach{ case(q, q_idx) => nonCoalRRArbiters.zipWithIndex.foreach{ case(arb, arb_idx) => - val nonCoal2gidFunc = nonCoal2gidReqFn(q_idx.U) + val nonCoal2gidFunc = reqEntry2GidReqFn(q_idx.U, nonCoalGiDEntryT, false.B) val nonCoalRRArbTunnel = Module(new ConverterTunnel( - coalEntryT.cloneType, + nonCoalEntryT.cloneType, nonCoalGiDEntryT.cloneType, nonCoal2gidFunc) ) nonCoalRRArbTunnel.io.in <> q.io.deq arb.io.in(q_idx) <> nonCoalRRArbTunnel.io.out //OverWrite Valid base on if we can actually hit this bank - arb.io.in(q_idx).valid := canHitBank(nonCoalRRArbTunnel.io.out.bits.address, arb_idx.U) + arb.io.in(q_idx).valid := canHitBank(nonCoalRRArbTunnel.io.out.bits.address, arb_idx.U) && + nonCoalRRArbTunnel.io.out.valid } } + val coalRRArbiters = Seq.tabulate(config.numArbiterOutputPorts){_=> + Module(new RRArbiter(coalGiDEntryT.cloneType, config.numCoalReqs)) + } + coalReqsQueues.zipWithIndex.foreach{ case(q, q_idx) => + coalRRArbiters.zipWithIndex.foreach{ case(arb, arb_idx) => + val coal2gidFunc = reqEntry2GidReqFn(q_idx.U, coalGiDEntryT, true.B) + val coalRRArbTunnel = Module(new ConverterTunnel( + coalEntryT.cloneType, + coalGiDEntryT.cloneType, + coal2gidFunc) + ) + coalRRArbTunnel.io.in <> q.io.deq + arb.io.in(q_idx) <> coalRRArbTunnel.io.out + //OverWrite Valid + arb.io.in(q_idx).valid := canHitBank(coalRRArbTunnel.io.out.bits.address, arb_idx.U) && + coalRRArbTunnel.io.out.valid + } + } + //Stage 2, Connect the output of Arbiters to respective nonCoal node - - //Stage 2a, the K Arbiters for Coalesced - (outer.nonCoalNarrowNodes zip nonCoalRRArbiters).foreach{ - case (node, arb) => - val (tlOut, edgeOut) = node.out(0) - val coal2TLAFunc = nonCoal2TLAFn(edgeOut) - val nonCoalTLATunnel = Module(new ConverterTunnel( - arb.io.out.bits.cloneType, - tlOut.a.bits.cloneType, - coal2TLAFunc + + // Concatenate the nodes , concatenates the arbiters, and zip them together, then loop + // the reqEntry2TLA will generate different TLA bundle depending on if the Req is coal or non coal + ((outer.nonCoalNarrowNodes++outer.coalReqNodes) zip + (nonCoalRRArbiters++coalRRArbiters)).foreach{ + case (node, arb) => + val (tlOut, edgeOut) = node.out(0) + val coal2TLAFunc = reqEntry2TLAFn(edgeOut) + val nonCoalTLATunnel = Module(new ConverterTunnel( + arb.io.out.bits.cloneType, + tlOut.a.bits.cloneType, + coal2TLAFunc + ) ) - ) - nonCoalTLATunnel.io.in <> arb.io.out - tlOut.a <> nonCoalTLATunnel.io.out + nonCoalTLATunnel.io.in <> arb.io.out + tlOut.a <> nonCoalTLATunnel.io.out } + - //Stage 3, Connect the K edges Identity Node to PO arbiter - // noncoalesced to port 1, coalesced to por1 + //Stage 3, Make the Idenity node pass through channel A + // Connect the K edges Identity Node to PO arbiter + // noncoalesced to port 1, coalesced to port 0 val priorityArbs = Seq.tabulate(config.numArbiterOutputPorts){_=> Module(new Arbiter(outer.outputNode.out(0)._1.a.bits.cloneType, 2)) } + + //Make both Idenity node Pass Through Channel A, for both Coal and NonCoal + ((outer.nonCoalNode.out ++ outer.coalNode.out) zip + (outer.nonCoalNode.in ++ outer.coalNode.in)).foreach{ + case ((tlOut,_),(tlIn,_)) => + tlOut.a <> tlIn.a + } + //Connection to PO Arbiters ((outer.nonCoalNode.out zip outer.coalNode.out) zip priorityArbs).foreach{ case (((nonCoalOut, _),(coalOut, _)), arb) => arb.io.in(1) <> nonCoalOut.a @@ -1951,10 +2044,92 @@ class CoalArbiterImpl(outer: CoalArbiter, //And make idenitity node passs through the inputs ((outer.outputNode.in zip outer.outputNode.out) zip priorityArbs).foreach{ case (((tlIn, _), (tlOut, _)), arb) => - tlOut <> tlIn + tlOut.a <> tlIn.a tlIn.a <> arb.io.out } + + + //////////////// + // Incoming Data Handling + + //Stage 1, Forward data from output node to the Idenity node of Coal and NonCoal + // while setting the correct valid signal to base on if the request is Coalesced or not + + ((outer.outputNode.in zip outer.outputNode.out) zip + (outer.nonCoalNode.out zip outer.coalNode.out)).foreach{ + case( ((tlIn, _),(tlOut, _)), ((nonCoalOut, _),(coalOut, _)) ) => + tlIn.d <> tlOut.d + nonCoalOut.d <> tlIn.d + coalOut.d <> tlIn.d + //rewrite valid signal + nonCoalOut.d.valid := !isCoalReq(tlIn.d.bits.source) && tlIn.d.valid + coalOut.d.valid := isCoalReq(tlIn.d.bits.source) && tlIn.d.valid + } + + //Stage 2, Make both Idenity node Pass Through Channel D, for both Coal and NonCoal + // + ((outer.nonCoalNode.out ++ outer.coalNode.out) zip + (outer.nonCoalNode.in ++ outer.coalNode.in)).foreach{ + case ((tlOut,_),(tlIn,_)) => + tlIn.d <> tlOut.d + } + + //Stage 3, Connect the channel D of nonCoalNodes to the perLane arbiters + + //Stage 3a, connect the noncoalesced edge to every single perlane arbiter + val perLaneRespRRArbs = Seq.tabulate(config.numLanes){_=> + Module(new RRArbiter(respNonCoalEntryT.cloneType, config.numArbiterOutputPorts)) + } + outer.nonCoalNarrowNodes.zipWithIndex.foreach{ + case (node, node_idx) => + val (tlOut, edgeOut) = node.out(0) + perLaneRespRRArbs.zipWithIndex.foreach{ + case(arb, arb_idx) => + val tlD2RespEntryFunc = tlD2respEntryFn() + val perLaneArbTunnel = Module(new ConverterTunnel( + tlOut.d.bits.cloneType, + arb.io.in(0).bits.cloneType, + tlD2RespEntryFunc + ) + ) + perLaneArbTunnel.io.in <> tlOut.d + arb.io.in(node_idx) <> perLaneArbTunnel.io.out + //rewrite valid base on if source id actually belongs to this lane + arb.io.in(node_idx).valid := belongsToLane(arb_idx.U, perLaneArbTunnel.io.out.bits.source) && + perLaneArbTunnel.io.out.valid + } + } + //Stage 3b, connect coalesced request to + val coalBundleRRArbiter = Module(new RRArbiter(respCoalBundleT.cloneType, config.numArbiterOutputPorts)) + outer.coalReqNodes.zipWithIndex.foreach{ + case(node, node_idx) => + val (tlOut, edgeOut) = node.out(0) + val tlD2CoalBundleFunc = tlD2CoalBundleFn() + val coalBundleArbTunnel = Module(new ConverterTunnel( + tlOut.d.bits.cloneType, + coalBundleRRArbiter.io.in(0).bits.cloneType, + tlD2CoalBundleFunc + ) + ) + coalBundleArbTunnel.io.in <> tlOut.d + coalBundleRRArbiter.io.in(node_idx) <> coalBundleArbTunnel.io.out + } + + + //Connect 4, Connect the arbiters to output + // connect the noncoalesced vector + (perLaneRespRRArbs zip io.nonCoalResps).foreach{ + case (arb, resp) => + resp <> arb.io.out + } + // connect the coalesced bundle + io.coalResp <> coalBundleRRArbiter.io.out + + + + + }