diff --git a/src/main/scala/rocket/VortexFatBank.scala b/src/main/scala/rocket/VortexFatBank.scala index 9481af8..dddf348 100644 --- a/src/main/scala/rocket/VortexFatBank.scala +++ b/src/main/scala/rocket/VortexFatBank.scala @@ -10,6 +10,88 @@ import freechips.rocketchip.tilelink._ import org.chipsalliance.cde.config.{Parameters, Field} + + +// Delete the following NewSourceGenerator when merging with origin/graphics +//we should just use the one in coalescing.scala written by hansung + +class NewSourceGenerator[T <: Data]( + sourceWidth: Int, + metadata: Option[T] = None, + ignoreInUse: Boolean = false +) extends Module { + def getMetadataType = metadata match { + case Some(gen) => gen.cloneType + case None => UInt(0.W) + } + val io = IO(new Bundle { + val gen = Input(Bool()) + val reclaim = Input(Valid(UInt(sourceWidth.W))) + val id = Output(Valid(UInt(sourceWidth.W))) + // below are used only when metadata is not None + // `meta` is used as input when a request succeeds id generation to store + // its value to the table. + // `peek` is the retrieved metadata saved for the request when corresponding + // request has come back, setting `reclaim`. + // Although these do not use ValidIO, it is safe because any in-flight + // response coming back should have allocated a valid entry in the table + // when it went out. + val meta = Input(getMetadataType) + val peek = Output(getMetadataType) + // for debugging; indicates whether there is at least one inflight request + // that hasn't been reclaimed yet + val inflight = Output(Bool()) + }) + val head = RegInit(UInt(sourceWidth.W), 0.U) + head := Mux(io.gen, head + 1.U, head) + + val outstanding = RegInit(UInt((sourceWidth + 1).W), 0.U) + io.inflight := (outstanding > 0.U) || io.gen + + val numSourceId = 1 << sourceWidth + val row = new Bundle { + val meta = getMetadataType + val id = Valid(UInt(sourceWidth.W)) + } + // valid: in use, invalid: available + // val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W))) + val occupancyTable = Mem(numSourceId, row) + when(reset.asBool) { + (0 until numSourceId).foreach { occupancyTable(_).id.valid := false.B } + } + val frees = (0 until numSourceId).map(!occupancyTable(_).id.valid) + val lowestFree = PriorityEncoder(frees) + val lowestFreeRow = occupancyTable(lowestFree) + + io.id.valid := (if (ignoreInUse) true.B else !lowestFreeRow.id.valid) + io.id.bits := lowestFree + when(io.gen && io.id.valid /* fire */ ) { + occupancyTable(io.id.bits).id.valid := true.B // mark in use + if (metadata.isDefined) { + occupancyTable(io.id.bits).meta := io.meta + } + } + when(io.reclaim.valid) { + // @perf: would this require multiple write ports? + occupancyTable(io.reclaim.bits).id.valid := false.B // mark freed + } + io.peek := { + if (metadata.isDefined) occupancyTable(io.reclaim.bits).meta else 0.U + } + + when(io.gen && io.id.valid) { + when (!io.reclaim.valid) { + assert(outstanding < (1 << sourceWidth).U) + outstanding := outstanding + 1.U + } + }.elsewhen(io.reclaim.valid) { + assert(outstanding > 0.U) + outstanding := outstanding - 1.U + } + dontTouch(outstanding) +} + + // VortexTile has dmemNodes, imemNodes //Param and Key are used during SoC Generation @@ -21,6 +103,8 @@ case class VortexFatBankConfig( wordSize: Int, //This is the read/write granularity of the L1 cache cacheLineSize: Int, coreTagWidth: Int, + writeInfoReqQSize: Int, + mshrSize: Int ) { def coreTagPlusSizeWidth: Int = { log2Ceil(wordSize) + coreTagWidth @@ -31,6 +115,8 @@ object defaultFatBankConfig extends VortexFatBankConfig( wordSize = 16, cacheLineSize = 16, coreTagWidth = 8, + writeInfoReqQSize = 16, + mshrSize = 8 ) @@ -83,7 +169,8 @@ class VortexFatBankImp ( val vxCache = Module(new VX_cache( WORD_SIZE=config.wordSize, CACHE_LINE_SIZE=config.cacheLineSize, - CORE_TAG_WIDTH= config.coreTagPlusSizeWidth + CORE_TAG_WIDTH= config.coreTagPlusSizeWidth, + MSHR_SIZE= config.mshrSize ) ); @@ -94,16 +181,13 @@ class VortexFatBankImp ( val id = UInt(32.W) val size = UInt(32.W) } - - // assuming this is never full - val rcvWriteReqInfo = Module(new Queue((new WriteReqInfo).cloneType, 64, true, false)) - class ReadReqInfo(config: VortexFatBankConfig) extends Bundle { val size = UInt(log2Ceil(config.wordSize).W) val id = UInt(config.coreTagWidth.W) } + val rcvWriteReqInfo = Module(new Queue((new WriteReqInfo).cloneType, config.writeInfoReqQSize, true, false)) val readReqInfo = Wire(new ReadReqInfo(config)) // Translate TL request from Coalescer to requests for VX_cache @@ -112,16 +196,17 @@ class VortexFatBankImp ( // coal -> vxCache request on channel A val coalToBankA = coalToBankBundle.a; - coalToBankA.ready := vxCache.io.core_req_ready + coalToBankA.ready := vxCache.io.core_req_ready && rcvWriteReqInfo.io.enq.ready //not optimal vxCache.io.core_req_valid := coalToBankA.valid // read = 0, write = 1 vxCache.io.core_req_rw := !(coalToBankA.bits.opcode === TLMessages.Get) //4 is also hardcoded, it should be log2WordSize - vxCache.io.core_req_addr := coalToBankA.bits.address(31, 4) + vxCache.io.core_req_addr := coalToBankA.bits.address(31, log2Ceil(config.wordSize)) vxCache.io.core_req_byteen := coalToBankA.bits.mask vxCache.io.core_req_data := coalToBankA.bits.data + //combine size and tag field into one big wire, to put into vxCache.io.core_req_tag readReqInfo.id := coalToBankA.bits.source readReqInfo.size := coalToBankA.bits.size vxCache.io.core_req_tag := readReqInfo.asTypeOf(vxCache.io.core_req_tag) @@ -135,9 +220,7 @@ class VortexFatBankImp ( // if we don't send ack, the coalescer will run out of IDs, and can't generate new request // for read request, we send AckData when the FatBank has a valid output - // for write request, we can immediate Ack on the next clock cycle (not the same clock cycle, otherwise critical path too long) - // It's possible that on the same cycle, we need to do both "AckData" and "Ack" - // in this case, we always priorize "Ack", this makes the design easier + // for write request, we can ack whenever we have a valid entry in rcvWriteReqInfo Queue //I think this just shows the flaws of Tilelink. CPU never waits for an Ack upon regular write request //the Core should unconditionally move forward after every regular write request @@ -150,39 +233,29 @@ class VortexFatBankImp ( rcvWriteReqInfo.io.enq.bits.id := coalToBankA.bits.source rcvWriteReqInfo.io.enq.bits.size := coalToBankA.bits.size - - rcvWriteReqInfo.io.deq.ready := coalToBankD.ready + //prioritize Ack for Read, so we only deque from writeReqInfo, if we don't have a readReq we need to ack + //vxCache.io.core_rsp_valid means readDataAck + rcvWriteReqInfo.io.deq.ready := coalToBankD.ready && ~vxCache.io.core_rsp_valid - //if we "need" to do Ack - //we unconditionally set the vxCache.ready to be false, so it gets delayed - vxCache.io.core_rsp_ready := Mux( - rcvWriteReqInfo.io.deq.valid, - false.B, - coalToBankD.ready - ) - - coalToBankD.valid := Mux( - rcvWriteReqInfo.io.deq.valid, - true.B, - vxCache.io.core_rsp_valid - ) + vxCache.io.core_rsp_ready := coalToBankD.ready + coalToBankD.valid := vxCache.io.core_rsp_valid || rcvWriteReqInfo.io.deq.valid coalToBankD.bits.source := Mux( - rcvWriteReqInfo.io.deq.valid, - rcvWriteReqInfo.io.deq.bits.id, - vxCache.io.core_rsp_tag.asTypeOf(readReqInfo).id + vxCache.io.core_rsp_valid, + vxCache.io.core_rsp_tag.asTypeOf(readReqInfo).id, + rcvWriteReqInfo.io.deq.bits.id ) coalToBankD.bits.opcode := Mux( - rcvWriteReqInfo.io.deq.valid, - TLMessages.AccessAck, - TLMessages.AccessAckData + vxCache.io.core_rsp_valid, + TLMessages.AccessAckData, + TLMessages.AccessAck ) coalToBankD.bits.size := Mux( - rcvWriteReqInfo.io.deq.valid, - rcvWriteReqInfo.io.deq.bits.size, - vxCache.io.core_rsp_tag.asTypeOf(readReqInfo).size + vxCache.io.core_rsp_valid, + vxCache.io.core_rsp_tag.asTypeOf(readReqInfo).size, + rcvWriteReqInfo.io.deq.bits.size ) coalToBankD.bits.param := 0.U @@ -190,7 +263,7 @@ class VortexFatBankImp ( coalToBankD.bits.denied := false.B coalToBankD.bits.corrupt := false.B - coalToBankD.bits.data := vxCache.io.core_rsp_data + coalToBankD.bits.data := vxCache.io.core_rsp_data } @@ -200,10 +273,10 @@ class VortexFatBankImp ( //vx_cache can indeed guarantee that all active read operation has unique ID //However, since the cache is write_through, so it can't ensure unique ID for write operation //Therefore, we need our own internal source_ID generator for all write operation - // - //Now, we allocate id range: 0-15 for all write operation - // and 16-> above for read operation - val sourceGen = Module( new SourceGenerator(log2Ceil(16), ignoreInUse = false)) + + + val sourceGen = Module( new NewSourceGenerator(log2Ceil(config.mshrSize), metadata = Some(UInt(32.W)), ignoreInUse = false)) + // Translate VX_cache mem request to a TL request to be sent to L2 @@ -215,13 +288,9 @@ class VortexFatBankImp ( //Read Operation is ready as long as downstream L2 is ready - vxCache.io.mem_req_ready := vxCacheToL2A.ready + vxCache.io.mem_req_ready := vxCacheToL2A.ready - vxCacheToL2A.valid := Mux( - vxCache.io.mem_req_rw, - vxCache.io.mem_req_valid && sourceGen.io.id.valid, - vxCache.io.mem_req_valid - ) + vxCacheToL2A.valid := vxCache.io.mem_req_valid && sourceGen.io.id.valid vxCacheToL2A.bits.opcode := Mux( vxCache.io.mem_req_rw, @@ -230,21 +299,19 @@ class VortexFatBankImp ( ) vxCacheToL2A.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(4.W)) + vxCacheToL2A.bits.mask := Mux( vxCache.io.mem_req_rw, vxCache.io.mem_req_byteen, 0xFFFF.U ) + vxCacheToL2A.bits.data := vxCache.io.mem_req_data - - vxCacheToL2A.bits.source := Mux( - vxCache.io.mem_req_rw, - sourceGen.io.id.bits, - vxCache.io.mem_req_tag + 16.U - ) - //mark current source_id as in-use - sourceGen.io.gen := vxCache.io.mem_req_rw && vxCacheToL2A.ready && vxCacheToL2A.valid + vxCacheToL2A.bits.source := sourceGen.io.id.bits + + sourceGen.io.gen := vxCacheToL2A.ready && vxCacheToL2A.valid + sourceGen.io.meta := vxCache.io.mem_req_tag //save the old read id vxCacheToL2A.bits.param := 0.U vxCacheToL2A.bits.size := 4.U @@ -257,10 +324,11 @@ class VortexFatBankImp ( vxCacheToL2D.ready := vxCache.io.mem_rsp_ready vxCache.io.mem_rsp_valid := vxCacheToL2D.valid && vxCacheToL2D.bits.opcode === TLMessages.AccessAckData - vxCache.io.mem_rsp_tag := vxCacheToL2D.bits.source - 16.U // -16 for read resp, we can safely do this, since write-ack wouldn't pass through - vxCache.io.mem_rsp_data := vxCacheToL2D.bits.data + vxCache.io.mem_rsp_tag := sourceGen.io.peek + vxCache.io.mem_rsp_data := vxCacheToL2D.bits.data - sourceGen.io.reclaim.valid := vxCacheToL2D.ready && vxCacheToL2D.valid && vxCacheToL2D.bits.opcode === TLMessages.AccessAck + // all ids needs to be reclaimed + sourceGen.io.reclaim.valid := vxCacheToL2D.ready && vxCacheToL2D.valid sourceGen.io.reclaim.bits := vxCacheToL2D.bits.source }