Revamp uncoalescer IO
Connect coalescer output directly to the uncoalescer at the toplevel, and do table entry construction entirely inside the module. WIP: unittest is very broken as a result of this.
This commit is contained in:
@@ -41,6 +41,13 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mapping to reference model param names
|
||||||
|
// numLanes: Int, <-> config.NUM_LANES
|
||||||
|
// numPerLaneReqs: Int, <-> config.DEPTH
|
||||||
|
// sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS)
|
||||||
|
// sizeWidth: Int, <-> config.sizeEnum.width
|
||||||
|
// coalDataWidth: Int, <-> (1 << config.MAX_SIZE)
|
||||||
|
// numInflightCoalRequests: Int <-> config.NUM_NEW_IDS
|
||||||
case class CoalescerConfig(
|
case class CoalescerConfig(
|
||||||
enable: Boolean, // globally enable or disable coalescing
|
enable: Boolean, // globally enable or disable coalescing
|
||||||
numLanes: Int, // number of lanes (or threads) in a warp
|
numLanes: Int, // number of lanes (or threads) in a warp
|
||||||
@@ -137,7 +144,8 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La
|
|||||||
// Protocol-agnostic bundles that represent a request and a response to the
|
// Protocol-agnostic bundles that represent a request and a response to the
|
||||||
// coalescer.
|
// coalescer.
|
||||||
|
|
||||||
class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) extends Bundle {
|
class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int)
|
||||||
|
extends Bundle {
|
||||||
require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8")
|
require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8")
|
||||||
val op = UInt(1.W) // 0=READ 1=WRITE
|
val op = UInt(1.W) // 0=READ 1=WRITE
|
||||||
val address = UInt(addressWidth.W)
|
val address = UInt(addressWidth.W)
|
||||||
@@ -181,6 +189,7 @@ case class CoalescedRequest(config: CoalescerConfig)
|
|||||||
|
|
||||||
class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int)
|
class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int)
|
||||||
extends Bundle {
|
extends Bundle {
|
||||||
|
require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8")
|
||||||
val op = UInt(1.W) // 0=READ 1=WRITE
|
val op = UInt(1.W) // 0=READ 1=WRITE
|
||||||
val size = UInt(sizeWidth.W)
|
val size = UInt(sizeWidth.W)
|
||||||
val source = UInt(sourceWidth.W)
|
val source = UInt(sourceWidth.W)
|
||||||
@@ -382,10 +391,10 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
|
|||||||
class MonoCoalescer(
|
class MonoCoalescer(
|
||||||
config: CoalescerConfig,
|
config: CoalescerConfig,
|
||||||
coalLogSize: Int,
|
coalLogSize: Int,
|
||||||
windowT: CoalShiftQueue[NonCoalescedRequest]
|
queueT: CoalShiftQueue[NonCoalescedRequest]
|
||||||
) extends Module {
|
) extends Module {
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val window = Input(windowT.io.cloneType)
|
val window = Input(queueT.io.cloneType)
|
||||||
val results = Output(new Bundle {
|
val results = Output(new Bundle {
|
||||||
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
||||||
val baseAddr = Output(UInt(config.addressWidth.W))
|
val baseAddr = Output(UInt(config.addressWidth.W))
|
||||||
@@ -478,7 +487,8 @@ class MonoCoalescer(
|
|||||||
val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts)
|
val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts)
|
||||||
|
|
||||||
val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux
|
val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux
|
||||||
// matchTable for the chosen lane, but converted to a Vec[UInt]
|
// matchTable for the chosen lane, but each column converted to bitflags,
|
||||||
|
// i.e. Vec[UInt]
|
||||||
val chosenMatches = VecInit(matchTablePerLane.map { table =>
|
val chosenMatches = VecInit(matchTablePerLane.map { table =>
|
||||||
VecInit(table.map(VecInit(_).asUInt))
|
VecInit(table.map(VecInit(_).asUInt))
|
||||||
})(chosenLeaderIdx)
|
})(chosenLeaderIdx)
|
||||||
@@ -532,23 +542,25 @@ class MonoCoalescer(
|
|||||||
// Software model: coalescer.py
|
// Software model: coalescer.py
|
||||||
class MultiCoalescer(
|
class MultiCoalescer(
|
||||||
config: CoalescerConfig,
|
config: CoalescerConfig,
|
||||||
windowT: CoalShiftQueue[NonCoalescedRequest],
|
queueT: CoalShiftQueue[NonCoalescedRequest],
|
||||||
coalReqT: Request,
|
coalReqT: Request,
|
||||||
) extends Module {
|
) extends Module {
|
||||||
|
val invalidateT = Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
// coalescing window, connected to the contents of the request queues
|
// coalescing window, connected to the contents of the request queues
|
||||||
val window = Input(windowT.io.cloneType)
|
val window = Input(queueT.io.cloneType)
|
||||||
// generated coalesced request
|
// generated coalesced request
|
||||||
val coalReq = DecoupledIO(coalReqT.cloneType)
|
val coalReq = DecoupledIO(coalReqT.cloneType)
|
||||||
// invalidate signals going into each request queue's head
|
// invalidate signals going into each request queue's head. Lanes with
|
||||||
val invalidate =
|
// high invalidate bits are what became coalesced into the new request.
|
||||||
Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
|
val invalidate = Output(invalidateT)
|
||||||
// whether a lane is coalescable
|
// whether a lane is coalescable. This is used to output non-coalescable
|
||||||
|
// lanes to the arbiter so they can be flushed to downstream.
|
||||||
val coalescable = Output(Vec(config.numLanes, Bool()))
|
val coalescable = Output(Vec(config.numLanes, Bool()))
|
||||||
})
|
})
|
||||||
|
|
||||||
val coalescers = config.coalLogSizes.map(size =>
|
val coalescers = config.coalLogSizes.map(size =>
|
||||||
Module(new MonoCoalescer(config, size, windowT))
|
Module(new MonoCoalescer(config, size, queueT))
|
||||||
)
|
)
|
||||||
coalescers.foreach(_.io.window := io.window)
|
coalescers.foreach(_.io.window := io.window)
|
||||||
|
|
||||||
@@ -701,11 +713,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " +
|
s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " +
|
||||||
s"mismatch with config.addressWidth (${config.addressWidth})"
|
s"mismatch with config.addressWidth (${config.addressWidth})"
|
||||||
)
|
)
|
||||||
|
require(
|
||||||
|
config.maxCoalLogSize <= config.dataBusWidth,
|
||||||
|
"multi-beat coalesced reads/writes are currently not supported"
|
||||||
|
)
|
||||||
|
|
||||||
val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits
|
val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits
|
||||||
val reqQueueEntryT = new NonCoalescedRequest(config)
|
val nonCoalReqT = new NonCoalescedRequest(config)
|
||||||
val reqQueues = Module(
|
val reqQueues = Module(
|
||||||
new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)
|
new CoalShiftQueue(nonCoalReqT, config.queueDepth, config)
|
||||||
)
|
)
|
||||||
|
|
||||||
val coalReqT = new CoalescedRequest(config)
|
val coalReqT = new CoalescedRequest(config)
|
||||||
@@ -725,7 +741,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
|
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
|
||||||
case (((tlIn, _), (tlOut, edgeOut)), lane) =>
|
case (((tlIn, _), (tlOut, edgeOut)), lane) =>
|
||||||
// Request queue
|
// Request queue
|
||||||
val req = Wire(reqQueueEntryT)
|
val req = Wire(nonCoalReqT)
|
||||||
|
|
||||||
req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
|
req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
|
||||||
req.source := tlIn.a.bits.source
|
req.source := tlIn.a.bits.source
|
||||||
@@ -782,6 +798,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
|
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
|
||||||
tlCoal.e.valid := false.B
|
tlCoal.e.valid := false.B
|
||||||
|
|
||||||
|
require(
|
||||||
|
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
|
||||||
|
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
|
||||||
|
+ s" (${(1 << config.dataBusWidth) * 8})"
|
||||||
|
)
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
// Response flow
|
// Response flow
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
@@ -870,57 +892,21 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
dontTouch(tlOut.d)
|
dontTouch(tlOut.d)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct new entry for the inflight table
|
val uncoalescer = Module(
|
||||||
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl
|
new Uncoalescer(config, reqQueues, coalReqT, coalescer.invalidateT)
|
||||||
// detail to the coalescer
|
|
||||||
|
|
||||||
// richard: I think a good idea is to pass Valid[ReqQueueEntry] generated by
|
|
||||||
// the coalescer directly into the uncoalescer, so that we can offload the
|
|
||||||
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
|
|
||||||
// this also reduces top level clutter.
|
|
||||||
|
|
||||||
val uncoalescer = Module(new Uncoalescer(config))
|
|
||||||
|
|
||||||
val newEntry = Wire(uncoalescer.inflightTable.entryT)
|
|
||||||
newEntry.source := coalescer.io.coalReq.bits.source
|
|
||||||
|
|
||||||
assert(
|
|
||||||
config.maxCoalLogSize <= config.dataBusWidth,
|
|
||||||
"multi-beat coalesced reads/writes are currently not supported"
|
|
||||||
)
|
|
||||||
assert(
|
|
||||||
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
|
|
||||||
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
|
|
||||||
+ s" (${(1 << config.dataBusWidth) * 8})"
|
|
||||||
)
|
)
|
||||||
|
// connect coalesced request that is newly generated and being recorded in
|
||||||
|
// the uncoalescer
|
||||||
|
uncoalescer.io.coalReq <> coalescer.io.coalReq
|
||||||
|
uncoalescer.io.invalidate := coalescer.io.invalidate
|
||||||
val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits)
|
val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits)
|
||||||
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
|
uncoalescer.io.window := reqQueues.io
|
||||||
// coalescer to every (numLanes * queueDepth) entry in the inflight table.
|
// connect coalesced response going into the uncoalescer, ready to be
|
||||||
(newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex
|
// uncoalesced
|
||||||
.foreach { case ((laneEntry, laneInv), lane) =>
|
|
||||||
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
|
|
||||||
.foreach { case ((reqEntry, inv), i) =>
|
|
||||||
val req = reqQueues.io.elts(lane)(i)
|
|
||||||
when((coalescer.io.invalidate.valid && inv)) {
|
|
||||||
printf(
|
|
||||||
s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n",
|
|
||||||
req.source
|
|
||||||
)
|
|
||||||
}
|
|
||||||
reqEntry.valid := (coalescer.io.invalidate.valid && inv)
|
|
||||||
reqEntry.source := req.source
|
|
||||||
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth)
|
|
||||||
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
|
|
||||||
// TODO: load/store op
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dontTouch(newEntry)
|
|
||||||
|
|
||||||
uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid
|
|
||||||
uncoalescer.io.newEntry := newEntry
|
|
||||||
// Cleanup: custom <>?
|
// Cleanup: custom <>?
|
||||||
uncoalescer.io.coalResp.valid := tlCoal.d.valid
|
uncoalescer.io.coalResp.valid := tlCoal.d.valid
|
||||||
uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits)
|
uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits)
|
||||||
|
// uncoalescer backpressure
|
||||||
tlCoal.d.ready := uncoalescer.io.coalResp.ready
|
tlCoal.d.ready := uncoalescer.io.coalResp.ready
|
||||||
|
|
||||||
// Connect uncoalescer results back into each lane's response queue
|
// Connect uncoalescer results back into each lane's response queue
|
||||||
@@ -935,11 +921,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
)
|
)
|
||||||
q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid
|
q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid
|
||||||
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits
|
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits
|
||||||
// debug
|
// debug
|
||||||
// when (resp.valid) {
|
// when (resp.valid) {
|
||||||
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
|
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
|
||||||
// }
|
// }
|
||||||
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -952,48 +938,76 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
dontTouch(tlCoal.d)
|
dontTouch(tlCoal.d)
|
||||||
}
|
}
|
||||||
|
|
||||||
class Uncoalescer(config: CoalescerConfig) extends Module {
|
class Uncoalescer(
|
||||||
// Mapping to reference model param names
|
config: CoalescerConfig,
|
||||||
// val numLanes: Int, <-> config.NUM_LANES
|
queueT: CoalShiftQueue[NonCoalescedRequest],
|
||||||
// val numPerLaneReqs: Int, <-> config.DEPTH
|
coalReqT: Request,
|
||||||
// val sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS)
|
coalInvalidateT: Valid[Vec[UInt]],
|
||||||
// val sizeWidth: Int, <-> config.sizeEnum.width
|
) extends Module {
|
||||||
// val coalDataWidth: Int, <-> (1 << config.MAX_SIZE)
|
|
||||||
// val numInflightCoalRequests: Int <-> config.NUM_NEW_IDS
|
|
||||||
val inflightTable = Module(new InflightCoalReqTable(config))
|
val inflightTable = Module(new InflightCoalReqTable(config))
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val coalReqValid = Input(Bool())
|
// generated coalesced request, connected to the output of the coalescer.
|
||||||
// FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer
|
val coalReq = Flipped(DecoupledIO(coalReqT.cloneType))
|
||||||
val newEntry = Input(inflightTable.entryT.cloneType)
|
// invalidate signal coming out of coalescer.
|
||||||
|
val invalidate = Input(coalInvalidateT.cloneType)
|
||||||
|
// coalescing window, connected to the contents of the request queues.
|
||||||
|
// Uncoalescer looks at the queue entries that got coalesced into `coalReq`
|
||||||
|
// in order to record which lanes this coalReq originally came from.
|
||||||
|
val window = Input(queueT.io.cloneType)
|
||||||
val coalResp = Flipped(Decoupled(new CoalescedResponse(config)))
|
val coalResp = Flipped(Decoupled(new CoalescedResponse(config)))
|
||||||
val uncoalResps = Output(
|
val uncoalResps = Output(
|
||||||
Vec(
|
Vec(
|
||||||
config.numLanes,
|
config.numLanes,
|
||||||
Vec(
|
Vec(config.queueDepth, ValidIO(new NonCoalescedResponse(config)))
|
||||||
config.queueDepth,
|
|
||||||
ValidIO(
|
|
||||||
new NonCoalescedResponse(config)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Populate inflight table
|
// Uncoalescer has to be always ready to accept and record new coalesced
|
||||||
inflightTable.io.enq.valid := io.coalReqValid
|
// requests, so that it doesn't stall the coalescer.
|
||||||
inflightTable.io.enq.bits := io.newEntry
|
io.coalReq.ready := true.B
|
||||||
|
|
||||||
|
// Construct a new entry for the inflight table using generated coalesced request
|
||||||
|
def generateInflightTableEntry: InflightCoalReqTableEntry = {
|
||||||
|
val newEntry = Wire(inflightTable.entryT)
|
||||||
|
newEntry.source := io.coalReq.bits.source
|
||||||
|
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
|
||||||
|
// coalescer to every (numLanes * queueDepth) entry in the inflight table.
|
||||||
|
(newEntry.lanes zip io.invalidate.bits).zipWithIndex
|
||||||
|
.foreach { case ((laneEntry, laneInv), lane) =>
|
||||||
|
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
|
||||||
|
.foreach { case ((reqEntry, inv), i) =>
|
||||||
|
val req = io.window.elts(lane)(i)
|
||||||
|
when((io.invalidate.valid && inv)) {
|
||||||
|
printf(
|
||||||
|
s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n",
|
||||||
|
req.source
|
||||||
|
)
|
||||||
|
}
|
||||||
|
reqEntry.valid := (io.invalidate.valid && inv)
|
||||||
|
reqEntry.source := req.source
|
||||||
|
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth)
|
||||||
|
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
|
||||||
|
// TODO: load/store op
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(
|
||||||
|
!((io.coalReq.valid === true.B) && (io.coalResp.valid === true.B) &&
|
||||||
|
(newEntry.source === io.coalResp.bits.source)),
|
||||||
|
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
|
||||||
|
)
|
||||||
|
dontTouch(newEntry)
|
||||||
|
|
||||||
|
newEntry
|
||||||
|
}
|
||||||
|
inflightTable.io.enq.valid := io.coalReq.valid
|
||||||
|
inflightTable.io.enq.bits := generateInflightTableEntry
|
||||||
|
|
||||||
// Look up the table with incoming coalesced responses
|
// Look up the table with incoming coalesced responses
|
||||||
inflightTable.io.lookup.ready := io.coalResp.valid
|
inflightTable.io.lookup.ready := io.coalResp.valid
|
||||||
inflightTable.io.lookupSourceId := io.coalResp.bits.source
|
inflightTable.io.lookupSourceId := io.coalResp.bits.source
|
||||||
io.coalResp.ready := true.B // FIXME, see sw model implementation
|
io.coalResp.ready := true.B // FIXME, see sw model implementation
|
||||||
|
|
||||||
assert(
|
|
||||||
!((io.coalReqValid === true.B) && (io.coalResp.valid === true.B) &&
|
|
||||||
(io.newEntry.source === io.coalResp.bits.source)),
|
|
||||||
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Un-coalescing logic
|
// Un-coalescing logic
|
||||||
//
|
//
|
||||||
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
||||||
|
|||||||
Reference in New Issue
Block a user