From 1633371f6f3285bc45138746a160168f9f24f02d Mon Sep 17 00:00:00 2001 From: Vamber Yang Date: Wed, 10 May 2023 18:59:36 -0700 Subject: [PATCH] Coalescer XBar, a design overhaul from CoalArbiter, the best way to implement the 'arbiter' functionality is to implement as a TLXbar with different arbitration policy (RR + PO) --- src/main/scala/tilelink/Coalescing.scala | 389 +++++------------------ 1 file changed, 74 insertions(+), 315 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 86fd3c0..a65c037 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -1756,64 +1756,64 @@ class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends //////////// //////////// //////////// -//////////// Code for CoalArbiter +//////////// Code for CoalescerXbar //////////// //////////// // Lazy Module is needed to instantiate outgoing node -class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends LazyModule { +class CoalescerXbar(config: CoalescerConfig) (implicit p: Parameters) extends LazyModule { // Let SIMT's word size be 32, and read/write granularity be 256 - val fullSourceIdRange = config.numOldSrcIds * config.numLanes + config.numNewSrcIds * config.numCoalReqs - // K client nodes of edge size 32 for non-coalesced reqs - val nonCoalNarrowNodes = Seq.tabulate(config.numArbiterOutputPorts){ i => + // 32 client nodes of edge size 32 for non-coalesced reqs + // And attaching them wigets + val nonCoalNarrowNodes = Seq.tabulate(config.numLanes){i => val nonCoalNarrowParam = Seq( TLMasterParameters.v1( name = "NonCoalNarrowNode" + i.toString, - sourceId = IdRange(0, fullSourceIdRange) + sourceId = IdRange(0, config.numOldSrcIds) ) ) TLClientNode(Seq(TLMasterPortParameters.v1(nonCoalNarrowParam))) } + val nonCoalWidgets = Seq.tabulate(config.numLanes){ _=> + TLWidthWidget(config.wordSizeInBytes) + } + + (nonCoalWidgets zip nonCoalNarrowNodes).foreach{ + case(wgt,node)=> wgt := node + } + + //Creating a round robin cross tilelink xbar for the un-coalesced + //and connect them to the widgets + val nonCoalXbar = LazyModule(new TLXbar(TLArbiter.roundRobin)) + nonCoalWidgets.foreach{nonCoalXbar.node:=_} + - // One identity Node for the Noncoalesced Reqest after Width Adaptation - // You can put widget between idenity node and client node (diplomacy) - val nonCoalNode = TLIdentityNode() - nonCoalNarrowNodes.foreach(narrowNode => - nonCoalNode := TLWidthWidget(config.wordSizeInBytes) := narrowNode - ) // K client nodes of edge size 256 for the coalesced reqs - val coalReqNodes = Seq.tabulate(config.numArbiterOutputPorts){ i => + val coalReqNodes = Seq.tabulate(config.numCoalReqs){ i => val coalParam = Seq( TLMasterParameters.v1( name = "CoalReqNode" + i.toString, - sourceId = IdRange(0, fullSourceIdRange) + sourceId = IdRange(0, config.numNewSrcIds) ) ) TLClientNode(Seq(TLMasterPortParameters.v1(coalParam))) } - // 1 idenity node for the Coalesced Reqs - val coalNode = TLIdentityNode() - coalReqNodes.foreach(coalReqNode => - coalNode := coalReqNode - ) + // Create a RR Xbar for the coalesced request + val coalXbar = LazyModule(new TLXbar(TLArbiter.roundRobin)) + coalReqNodes.foreach{coalXbar.node:=_} - //Assertion Section - def isPowerOfTwo(n: Int): Boolean = { - (n > 0) && ((n & (n - 1)) == 0) - } - assert(isPowerOfTwo(config.numOldSrcIds), "Number of old source id must be power of 2") - assert(isPowerOfTwo(config.numNewSrcIds), "Number of new source id must be power of 2") - //Below is for efficient conversion from Global to Local bits - //Also, we should have more source id for coalesced request for better perf - assert(config.numNewSrcIds >= config.numOldSrcIds, "new source id must be equal or greater than old source id") - // 1 Final Output Identity Node - val outputNode = TLIdentityNode() + //Create a Priority XBar between Coalesced and Uncoalesced Request + val outputXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst)) + outputXbar.node :=* coalXbar.node + outputXbar.node :=* nonCoalXbar.node + //express output crossbar as an idenity node for simpler downstream connection + val node = TLIdentityNode() + node :=* outputXbar.node - val nonCoalEntryT = new ReqQueueEntry( log2Ceil(config.numOldSrcIds), config.wordWidth, @@ -1835,14 +1835,14 @@ class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends Lazy val respCoalBundleT = new CoalescedResponseBundle(config) - lazy val module = new CoalArbiterImpl( + lazy val module = new CoalescerXbarImpl( this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT) } -class CoalArbiterImpl(outer: CoalArbiter, +class CoalescerXbarImpl(outer: CoalescerXbar, config: CoalescerConfig, nonCoalEntryT: ReqQueueEntry, coalEntryT: ReqQueueEntry, @@ -1859,119 +1859,7 @@ class CoalArbiterImpl(outer: CoalArbiter, } ) - //Helper Class & Method Section - - //Provide an simple decoupled interface between bundle of 2 different type - class ConverterTunnel[T <: Data, U <: Data]( - genA: T, - genB: U, - conversionFn: T => U - ) extends Module { - val io = IO(new Bundle { - val in = Flipped(Decoupled(genA.cloneType)) - val out = Decoupled(genB.cloneType) - }) - io.in.ready := io.out.ready - io.out.valid := io.in.valid - io.out.bits := conversionFn(io.in.bits) - } - - - def canHitBank(addr: UInt, bankNum: UInt) : Bool = { - val byteOffset = 3 - val bankBase = log2Ceil(config.bankStrideInBytes) - val bankOffset = log2Ceil(config.numArbiterOutputPorts) - (addr(bankBase+bankOffset-byteOffset, bankBase - byteOffset) === bankNum) - } - - //This Operation Could be Expensive - def toGlobalSourceId(isCoalReq : Bool, laneIdx : UInt, sourceID : UInt) : UInt = { - val gid = Mux(isCoalReq, - config.numNewSrcIds.U * laneIdx + sourceID, - config.numOldSrcIds.U * laneIdx + sourceID + config.numNewSrcIds.U * config.numCoalReqs.U - ) - gid - } - //All the ids are power of 2, so we can just look at bottom bits - def toLocalSourceId(isCoalReq : Bool, sourceID : UInt) : UInt = { - val sid = Mux(isCoalReq, - sourceID(log2Ceil(config.numNewSrcIds)-1, 0), - sourceID(log2Ceil(config.numOldSrcIds)-1, 0) - ) - sid - } - def belongsToLane(laneIdx: UInt, gid: UInt) : Bool = { - val base = config.numNewSrcIds.U * config.numCoalReqs.U - ((gid >= base + config.numOldSrcIds.U * laneIdx) && - (gid < base + config.numOldSrcIds.U * (laneIdx+1.U))) - } - - def isCoalReq(gid : UInt) : Bool = { - gid <= config.numNewSrcIds.U * config.numCoalReqs.U - } - - // - val fullSourceIdRange = config.numOldSrcIds * config.numLanes + config.numNewSrcIds * config.numCoalReqs - - - val nonCoalGiDEntryT = new ReqQueueEntry( - log2Ceil(fullSourceIdRange), - config.wordWidth, - config.addressWidth, - log2Ceil(config.wordSizeInBytes) - ) - val coalGiDEntryT = new ReqQueueEntry( - log2Ceil(fullSourceIdRange), - log2Ceil(config.maxCoalLogSize), - config.addressWidth, - config.maxCoalLogSize //already log 2 - ) - - // Before either a coalesced or non coalesced request enter RR arbiter - // It needs to turn its source into global source id - // Unfortunately this involves extending the width of sourceid field, and a new bundle must be created - // This is a higher order function - def reqEntry2GidReqFn(laneIndex : UInt, reqEntryT : ReqQueueEntry, isCoalReq : Bool) : ReqQueueEntry => ReqQueueEntry = { - def func(lid_req : ReqQueueEntry) : ReqQueueEntry = { - val gid_req = reqEntryT.cloneType - gid_req <> lid_req - gid_req.source := toGlobalSourceId(isCoalReq, laneIndex, lid_req.source) - gid_req - } - func - } - - - def reqEntry2TLAFn(edgeOut: TLEdgeOut) : ReqQueueEntry => TLBundleA = { - def func(gid_req : ReqQueueEntry) : TLBundleA = { - gid_req.toTLA(edgeOut) - } - func - } - - def tlD2respEntryFn() : TLBundleD => RespQueueEntry = { - def func(bundle: TLBundleD) : RespQueueEntry = { - val resp = Wire(respNonCoalEntryT) - resp.fromTLD(bundle) - resp.source := toLocalSourceId(false.B, bundle.source) - resp - } - func - } - def tlD2CoalBundleFn() : TLBundleD => CoalescedResponseBundle = { - def func(bundle: TLBundleD) : CoalescedResponseBundle = { - val coalbundle = Wire(respCoalBundleT) - coalbundle.fromTLD(bundle) - coalbundle.source := toLocalSourceId(true.B, bundle.source) - coalbundle - } - func - } - - ///////////////////////////////////////////////////// - //HDL Implementation Section - ///////////////////////////////////////////////////// - + //Create Queues to receive data from upstream //Stage 1: Create Queue for nonCoalReqs and CoalReqs val nonCoalReqsQueues = Seq.tabulate(config.numLanes){_=> Module(new Queue(nonCoalEntryT.cloneType, 1, true, false)) @@ -1980,182 +1868,53 @@ class CoalArbiterImpl(outer: CoalArbiter, Module(new Queue(coalEntryT.cloneType, 1, true, false)) } //Stage 1a: connect two Queue groups to the input - (io.nonCoalReqs zip nonCoalReqsQueues).foreach{ + (io.nonCoalReqs++io.coalReqs zip nonCoalReqsQueues++coalReqsQueues).foreach{ case (req, q) => q.io.enq <> req } - (io.coalReqs zip coalReqsQueues).foreach{ - case (req, q) => q.io.enq <> req + + //Stage 2: connect output of the queue to the respective Node + (nonCoalReqsQueues++coalReqsQueues zip outer.nonCoalNarrowNodes++outer.coalReqNodes).foreach{ + case(q, node) => + val (tlOut, edgeOut) = node.out(0) + q.io.deq.ready := tlOut.a.ready + tlOut.a.valid := q.io.deq.valid + tlOut.a.bits := q.io.deq.bits.toTLA(edgeOut) } - //Stage 1b: connect output of Queues to the RR arbiters (each arbiter is for a unique bank) - // the two loops below could be merged into one loop, but separated for readability - val nonCoalRRArbiters = Seq.tabulate(config.numArbiterOutputPorts){_=> - Module(new RRArbiter(nonCoalGiDEntryT.cloneType, config.numLanes)) - } - nonCoalReqsQueues.zipWithIndex.foreach{ case(q, q_idx) => - nonCoalRRArbiters.zipWithIndex.foreach{ case(arb, arb_idx) => - val nonCoal2gidFunc = reqEntry2GidReqFn(q_idx.U, nonCoalGiDEntryT, false.B) - val nonCoalRRArbTunnel = Module(new ConverterTunnel( - nonCoalEntryT.cloneType, - nonCoalGiDEntryT.cloneType, - nonCoal2gidFunc) - ) - nonCoalRRArbTunnel.io.in <> q.io.deq - arb.io.in(q_idx) <> nonCoalRRArbTunnel.io.out - //OverWrite Valid base on if we can actually hit this bank - arb.io.in(q_idx).valid := canHitBank(nonCoalRRArbTunnel.io.out.bits.address, arb_idx.U) && - nonCoalRRArbTunnel.io.out.valid - } - } - val coalRRArbiters = Seq.tabulate(config.numArbiterOutputPorts){_=> - Module(new RRArbiter(coalGiDEntryT.cloneType, config.numCoalReqs)) - } - coalReqsQueues.zipWithIndex.foreach{ case(q, q_idx) => - coalRRArbiters.zipWithIndex.foreach{ case(arb, arb_idx) => - val coal2gidFunc = reqEntry2GidReqFn(q_idx.U, coalGiDEntryT, true.B) - val coalRRArbTunnel = Module(new ConverterTunnel( - coalEntryT.cloneType, - coalGiDEntryT.cloneType, - coal2gidFunc) - ) - coalRRArbTunnel.io.in <> q.io.deq - arb.io.in(q_idx) <> coalRRArbTunnel.io.out - //OverWrite Valid - arb.io.in(q_idx).valid := canHitBank(coalRRArbTunnel.io.out.bits.address, arb_idx.U) && - coalRRArbTunnel.io.out.valid - } + //The XBar will take care of the rest + + + // + // Inward data handling + // + + // For the uncoalesced data response + (outer.nonCoalNarrowNodes zip io.nonCoalResps).foreach{ + case(node,resp) => + val (tlOut, edgeOut) = node.out(0) + val nonCoalResp = Wire(respNonCoalEntryT) + nonCoalResp.fromTLD(tlOut.d.bits) + tlOut.d.ready := resp.ready + resp.valid := tlOut.d.valid + resp.bits := nonCoalResp } - - //Stage 2, Connect the output of Arbiters to respective nonCoal node - - // Concatenate the nodes , concatenates the arbiters, and zip them together, then loop - // the reqEntry2TLA will generate different TLA bundle depending on if the Req is coal or non coal - ((outer.nonCoalNarrowNodes++outer.coalReqNodes) zip - (nonCoalRRArbiters++coalRRArbiters)).foreach{ - case (node, arb) => - val (tlOut, edgeOut) = node.out(0) - val coal2TLAFunc = reqEntry2TLAFn(edgeOut) - val nonCoalTLATunnel = Module(new ConverterTunnel( - arb.io.out.bits.cloneType, - tlOut.a.bits.cloneType, - coal2TLAFunc - ) - ) - nonCoalTLATunnel.io.in <> arb.io.out - tlOut.a <> nonCoalTLATunnel.io.out - } - - - //Stage 3, Make the Idenity node pass through channel A - // Connect the K edges Identity Node to PO arbiter - // noncoalesced to port 1, coalesced to port 0 - - val priorityArbs = Seq.tabulate(config.numArbiterOutputPorts){_=> - Module(new Arbiter(outer.outputNode.out(0)._1.a.bits.cloneType, 2)) - } - - //Make both Idenity node Pass Through Channel A, for both Coal and NonCoal - ((outer.nonCoalNode.out ++ outer.coalNode.out) zip - (outer.nonCoalNode.in ++ outer.coalNode.in)).foreach{ - case ((tlOut,_),(tlIn,_)) => - tlOut.a <> tlIn.a - } - //Connection to PO Arbiters - ((outer.nonCoalNode.out zip outer.coalNode.out) zip priorityArbs).foreach{ - case (((nonCoalOut, _),(coalOut, _)), arb) => - arb.io.in(1) <> nonCoalOut.a - arb.io.in(0) <> coalOut.a - } - - - //Stage 4, Connect PO arbiter to each edge of output Node - //And make idenitity node passs through the inputs - ((outer.outputNode.in zip outer.outputNode.out) zip priorityArbs).foreach{ - case (((tlIn, _), (tlOut, _)), arb) => - tlOut.a <> tlIn.a - tlIn.a <> arb.io.out - } - - - - //////////////// - // Incoming Data Handling - - //Stage 1, Forward data from output node to the Idenity node of Coal and NonCoal - // while setting the correct valid signal to base on if the request is Coalesced or not - - ((outer.outputNode.in zip outer.outputNode.out) zip - (outer.nonCoalNode.out zip outer.coalNode.out)).foreach{ - case( ((tlIn, _),(tlOut, _)), ((nonCoalOut, _),(coalOut, _)) ) => - tlIn.d <> tlOut.d - nonCoalOut.d <> tlIn.d - coalOut.d <> tlIn.d - //rewrite valid signal - nonCoalOut.d.valid := !isCoalReq(tlIn.d.bits.source) && tlIn.d.valid - coalOut.d.valid := isCoalReq(tlIn.d.bits.source) && tlIn.d.valid - } - - //Stage 2, Make both Idenity node Pass Through Channel D, for both Coal and NonCoal - // - ((outer.nonCoalNode.out ++ outer.coalNode.out) zip - (outer.nonCoalNode.in ++ outer.coalNode.in)).foreach{ - case ((tlOut,_),(tlIn,_)) => - tlIn.d <> tlOut.d - } - - //Stage 3, Connect the channel D of nonCoalNodes to the perLane arbiters - - //Stage 3a, connect the noncoalesced edge to every single perlane arbiter - val perLaneRespRRArbs = Seq.tabulate(config.numLanes){_=> - Module(new RRArbiter(respNonCoalEntryT.cloneType, config.numArbiterOutputPorts)) - } - outer.nonCoalNarrowNodes.zipWithIndex.foreach{ - case (node, node_idx) => - val (tlOut, edgeOut) = node.out(0) - perLaneRespRRArbs.zipWithIndex.foreach{ - case(arb, arb_idx) => - val tlD2RespEntryFunc = tlD2respEntryFn() - val perLaneArbTunnel = Module(new ConverterTunnel( - tlOut.d.bits.cloneType, - arb.io.in(0).bits.cloneType, - tlD2RespEntryFunc - ) - ) - perLaneArbTunnel.io.in <> tlOut.d - arb.io.in(node_idx) <> perLaneArbTunnel.io.out - //rewrite valid base on if source id actually belongs to this lane - arb.io.in(node_idx).valid := belongsToLane(arb_idx.U, perLaneArbTunnel.io.out.bits.source) && - perLaneArbTunnel.io.out.valid - } - } - //Stage 3b, connect coalesced request to - val coalBundleRRArbiter = Module(new RRArbiter(respCoalBundleT.cloneType, config.numArbiterOutputPorts)) + //For the coalesced data response + //Have an RR arbiter that holds the response data + val coalRespRRArbiter = Module(new RRArbiter( + outer.node.in(0)._1.d.bits.cloneType, + config.numCoalReqs) + ) outer.coalReqNodes.zipWithIndex.foreach{ - case(node, node_idx) => - val (tlOut, edgeOut) = node.out(0) - val tlD2CoalBundleFunc = tlD2CoalBundleFn() - val coalBundleArbTunnel = Module(new ConverterTunnel( - tlOut.d.bits.cloneType, - coalBundleRRArbiter.io.in(0).bits.cloneType, - tlD2CoalBundleFunc - ) - ) - coalBundleArbTunnel.io.in <> tlOut.d - coalBundleRRArbiter.io.in(node_idx) <> coalBundleArbTunnel.io.out + case(node, idx) => + val (tlOut, edgeOut) = node.out(0) + coalRespRRArbiter.io.in(idx) <> tlOut.d } - - - //Connect 4, Connect the arbiters to output - // connect the noncoalesced vector - (perLaneRespRRArbs zip io.nonCoalResps).foreach{ - case (arb, resp) => - resp <> arb.io.out - } - // connect the coalesced bundle - io.coalResp <> coalBundleRRArbiter.io.out - - - + //Connect output of arbiter to coalesced reponse output + io.coalResp.valid := coalRespRRArbiter.io.out.valid + coalRespRRArbiter.io.out.ready := io.coalResp.ready + val coalRespBundle = Wire(respCoalBundleT) + coalRespBundle.fromTLD(coalRespRRArbiter.io.out.bits) + io.coalResp.bits := coalRespBundle }