732 lines
24 KiB
Scala
732 lines
24 KiB
Scala
// See LICENSE.SiFive for license details.
|
|
|
|
package freechips.rocketchip.tilelink
|
|
|
|
import chisel3._
|
|
import chisel3.util._
|
|
import freechips.rocketchip.config.Parameters
|
|
import freechips.rocketchip.diplomacy._
|
|
// import freechips.rocketchip.devices.tilelink.TLTestRAM
|
|
import freechips.rocketchip.util.MultiPortQueue
|
|
import freechips.rocketchip.unittest._
|
|
|
|
class CoalescingUnit(numLanes: Int = 1)(implicit p: Parameters) extends LazyModule {
|
|
// Identity node that captures the incoming TL requests and passes them
|
|
// through the other end, dropping coalesced requests. This node is what
|
|
// will be visible to upstream and downstream nodes.
|
|
val node = TLIdentityNode()
|
|
|
|
// Number of maximum in-flight coalesced requests. The upper bound of this
|
|
// value would be the sourceId range of a single lane.
|
|
val numInflightCoalRequests = 4
|
|
|
|
// Master node that actually generates coalesced requests.
|
|
protected val coalParam = Seq(
|
|
TLMasterParameters.v1(
|
|
name = "CoalescerNode",
|
|
sourceId = IdRange(0, numInflightCoalRequests)
|
|
)
|
|
)
|
|
val coalescerNode = TLClientNode(
|
|
Seq(TLMasterPortParameters.v1(coalParam))
|
|
)
|
|
|
|
// Connect master node as the first inward edge of the IdentityNode
|
|
node :=* coalescerNode
|
|
|
|
lazy val module = new CoalescingUnitImp(this, numLanes)
|
|
}
|
|
|
|
class ReqQueueEntry(val sourceWidth: Int, val addressWidth: Int) extends Bundle {
|
|
val source = UInt(sourceWidth.W)
|
|
val address = UInt(addressWidth.W)
|
|
val data = UInt(64.W /* FIXME hardcoded */ ) // write data
|
|
}
|
|
|
|
class RespQueueEntry(val sourceWidth: Int, val dataWidthInBits: Int) extends Bundle {
|
|
val source = UInt(sourceWidth.W)
|
|
val data = UInt(dataWidthInBits.W) // read data
|
|
}
|
|
|
|
class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModuleImp(outer) {
|
|
// Make sure IdentityNode is connected to an upstream node, not just the
|
|
// coalescer TL master node
|
|
assert(outer.node.in.length >= 2)
|
|
|
|
val wordSize = 4
|
|
|
|
val reqQueueDepth = 4 // FIXME test
|
|
val respQueueDepth = 2 // FIXME test
|
|
|
|
val sourceWidth = outer.node.in(1)._1.params.sourceBits
|
|
val addressWidth = outer.node.in(1)._1.params.addressBits
|
|
val reqQueueEntryT = new ReqQueueEntry(sourceWidth, addressWidth)
|
|
val reqQueues = Seq.tabulate(numLanes) { _ =>
|
|
Module(new CoalShiftQueue(reqQueueEntryT, reqQueueDepth))
|
|
}
|
|
|
|
// The maximum number of requests from a single lane that can go into a
|
|
// coalesced request. Upper bound is 2**sourceWidth.
|
|
val numPerLaneReqs = 2
|
|
|
|
val respQueueEntryT = new RespQueueEntry(sourceWidth, wordSize * 8)
|
|
val respQueues = Seq.tabulate(numLanes) { _ =>
|
|
Module(
|
|
new MultiPortQueue(
|
|
respQueueEntryT,
|
|
// enq_lanes = 1 + M, where 1 is the response for the original per-lane
|
|
// requests that didn't get coalesced, and M is the maximum number of
|
|
// single-lane requests that can go into a coalesced request.
|
|
// (`numPerLaneReqs`).
|
|
1 + numPerLaneReqs,
|
|
// deq_lanes = 1 because we're serializing all responses to 1 port that
|
|
// goes back to the core.
|
|
1,
|
|
// lanes. Has to be at least max(enq_lanes, deq_lanes)
|
|
1 + numPerLaneReqs,
|
|
// Depth of each lane queue.
|
|
// XXX queue depth is set to an arbitrarily high value that doesn't
|
|
// make queue block up in the middle of the simulation. Ideally there
|
|
// should be a more logical way to set this, or we should handle
|
|
// response queue blocking.
|
|
respQueueDepth
|
|
)
|
|
)
|
|
}
|
|
val respQueueNoncoalPort = 0
|
|
val respQueueCoalPortOffset = 1
|
|
|
|
// did coalescing succeed at all?
|
|
val coalReqValid = Wire(Bool())
|
|
|
|
// Per-lane request and response queues
|
|
//
|
|
// Override IdentityNode implementation so that we can instantiate
|
|
// queues between input and output edges to buffer requests and responses.
|
|
// See IdentityNode definition in `diplomacy/Nodes.scala`.
|
|
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
|
|
case (((tlIn, edgeIn), (tlOut, _)), 0) =>
|
|
assert(
|
|
edgeIn.master.masters(0).name == "CoalescerNode",
|
|
"First edge is not connected to the coalescer master node"
|
|
)
|
|
// Edge from the coalescer TL master node should simply bypass the identity node,
|
|
// except for connecting the outgoing edge to the inflight table, which is done
|
|
// down below.
|
|
tlOut.a <> tlIn.a
|
|
tlIn.d <> tlOut.d
|
|
case (((tlIn, edgeIn), (tlOut, edgeOut)), i) =>
|
|
// Request queue
|
|
//
|
|
val lane = i - 1
|
|
val reqQueue = reqQueues(lane)
|
|
val req = Wire(reqQueueEntryT)
|
|
req.source := tlIn.a.bits.source
|
|
req.address := tlIn.a.bits.address
|
|
req.data := tlIn.a.bits.data
|
|
|
|
reqQueue.io.enq.valid := tlIn.a.valid
|
|
reqQueue.io.enq.bits := req
|
|
// TODO: deq.ready should respect downstream ready
|
|
reqQueue.io.deq.ready := true.B
|
|
reqQueue.io.invalidate := 0.U
|
|
|
|
printf(s"reqQueue(${lane}).count=%d\n", reqQueue.io.count)
|
|
|
|
// Invalidate coalesced requests
|
|
// FIXME: hardcoded lanes
|
|
// val invalidate = coalReqValid && (lane == 0 || lane == 2).B
|
|
val invalidate = coalReqValid
|
|
tlOut.a.valid := reqQueue.io.deq.valid && !invalidate
|
|
|
|
val reqHead = reqQueue.io.deq.bits
|
|
// FIXME: generate Get or Put according to read/write
|
|
val (reqLegal, reqBits) = edgeOut.Get(
|
|
fromSource = reqHead.source,
|
|
// `toAddress` should be aligned to 2**lgSize
|
|
toAddress = reqHead.address,
|
|
lgSize = 0.U
|
|
)
|
|
assert(reqLegal, "unhandled illegal TL req gen")
|
|
tlOut.a.bits := reqBits
|
|
|
|
// Response queue
|
|
//
|
|
// This queue will serialize non-coalesced responses along with
|
|
// coalesced responses and serve them back to the core side.
|
|
val respQueue = respQueues(lane)
|
|
val resp = Wire(respQueueEntryT)
|
|
resp.source := tlOut.d.bits.source
|
|
resp.data := tlOut.d.bits.data
|
|
// TODO: read/write bit?
|
|
|
|
// Queue up responses that didn't get coalesced originally ("noncoalesced" responses).
|
|
// Coalesced (but uncoalesced back) responses will also be enqueued into the same queue.
|
|
assert(
|
|
respQueue.io.enq(respQueueNoncoalPort).ready,
|
|
"respQueue: enq port for noncoalesced response is blocked"
|
|
)
|
|
respQueue.io.enq(respQueueNoncoalPort).valid := tlOut.d.valid
|
|
respQueue.io.enq(respQueueNoncoalPort).bits := resp
|
|
// TODO: deq.ready should respect upstream ready
|
|
respQueue.io.deq(respQueueNoncoalPort).ready := true.B
|
|
|
|
tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid
|
|
val respHead = respQueue.io.deq(respQueueNoncoalPort).bits
|
|
val respBits = edgeIn.AccessAck(
|
|
toSource = respHead.source,
|
|
lgSize = 0.U,
|
|
data = respHead.data
|
|
)
|
|
tlIn.d.bits := respBits
|
|
|
|
// Debug only
|
|
val inflightCounter = RegInit(UInt(32.W), 0.U)
|
|
when(tlOut.a.valid) {
|
|
// don't inc/dec on simultaneous req/resp
|
|
when(!tlOut.d.valid) {
|
|
inflightCounter := inflightCounter + 1.U
|
|
}
|
|
}.elsewhen(tlOut.d.valid) {
|
|
inflightCounter := inflightCounter - 1.U
|
|
}
|
|
|
|
dontTouch(inflightCounter)
|
|
dontTouch(tlIn.a)
|
|
dontTouch(tlIn.d)
|
|
dontTouch(tlOut.a)
|
|
dontTouch(tlOut.d)
|
|
}
|
|
|
|
// Generate coalesced requests
|
|
val coalSourceId = RegInit(0.U(2.W /* FIXME hardcoded */ ))
|
|
coalSourceId := coalSourceId + 1.U
|
|
|
|
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
|
val coalReqAddress = Wire(UInt(tlCoal.params.addressBits.W))
|
|
// TODO: bogus address
|
|
coalReqAddress := (0xabcd.U + coalSourceId) << 4
|
|
// FIXME: coalesce lane 0 and lane 2's queue head whenever they're valid
|
|
coalReqValid := reqQueues(0).io.deq.valid && reqQueues(1).io.deq.valid &&
|
|
reqQueues(2).io.deq.valid && reqQueues(3).io.deq.valid
|
|
when(coalReqValid) {
|
|
// invalidate original requests due to coalescing
|
|
reqQueues(0).io.invalidate := 0x1.U
|
|
reqQueues(1).io.invalidate := 0x1.U
|
|
reqQueues(2).io.invalidate := 0x1.U
|
|
reqQueues(3).io.invalidate := 0x1.U
|
|
}
|
|
|
|
val (legal, bits) = edgeCoal.Get(
|
|
fromSource = coalSourceId,
|
|
// `toAddress` should be aligned to 2**lgSize
|
|
toAddress = coalReqAddress,
|
|
// 64 bits = 8 bytes = 2**(3) bytes
|
|
lgSize = 3.U
|
|
)
|
|
assert(legal, "unhandled illegal TL req gen")
|
|
tlCoal.a.valid := coalReqValid
|
|
tlCoal.a.bits := bits
|
|
tlCoal.b.ready := true.B
|
|
tlCoal.c.valid := false.B
|
|
tlCoal.d.ready := true.B
|
|
tlCoal.e.valid := false.B
|
|
|
|
// Construct new entry for the inflight table
|
|
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl
|
|
// detail outside to the coalescer
|
|
val offsetBits = 4 // FIXME hardcoded
|
|
val sizeBits = 2 // FIXME hardcoded
|
|
val newEntry = Wire(
|
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
|
)
|
|
newEntry.source := coalSourceId
|
|
newEntry.lanes.foreach { l =>
|
|
l.reqs.foreach { r =>
|
|
// TODO: this part needs the actual coalescing logic to work
|
|
r.valid := false.B
|
|
r.offset := 1.U
|
|
r.size := 2.U
|
|
}
|
|
}
|
|
newEntry.lanes(0).reqs(0).valid := true.B
|
|
newEntry.lanes(1).reqs(0).valid := true.B
|
|
newEntry.lanes(2).reqs(0).valid := true.B
|
|
newEntry.lanes(3).reqs(0).valid := true.B
|
|
dontTouch(newEntry)
|
|
|
|
// Uncoalescer module sncoalesces responses back to each lane
|
|
val coalDataWidth = tlCoal.params.dataBits
|
|
val uncoalescer = Module(
|
|
new UncoalescingUnit(
|
|
numLanes,
|
|
numPerLaneReqs,
|
|
sourceWidth,
|
|
coalDataWidth,
|
|
outer.numInflightCoalRequests
|
|
)
|
|
)
|
|
|
|
uncoalescer.io.coalReqValid := coalReqValid
|
|
uncoalescer.io.newEntry := newEntry
|
|
uncoalescer.io.coalRespValid := tlCoal.d.valid
|
|
uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source
|
|
uncoalescer.io.coalRespData := tlCoal.d.bits.data
|
|
|
|
// Queue up synthesized uncoalesced responses into each lane's response queue
|
|
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
|
lanes.zipWithIndex.foreach { case (resp, i) =>
|
|
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
|
|
// cache. This should ideally not happen though.
|
|
assert(
|
|
q.io.enq(respQueueCoalPortOffset + i).ready,
|
|
s"respQueue: enq port for 0-th coalesced response is blocked"
|
|
)
|
|
q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
|
|
q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
|
|
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
|
}
|
|
}
|
|
|
|
// Debug
|
|
dontTouch(coalReqValid)
|
|
dontTouch(coalReqAddress)
|
|
val coalRespData = tlCoal.d.bits.data
|
|
dontTouch(coalRespData)
|
|
|
|
dontTouch(tlCoal.a)
|
|
dontTouch(tlCoal.d)
|
|
}
|
|
|
|
class UncoalescingUnit(
|
|
val numLanes: Int,
|
|
val numPerLaneReqs: Int,
|
|
val sourceWidth: Int,
|
|
val coalDataWidth: Int,
|
|
val numInflightCoalRequests: Int
|
|
) extends Module {
|
|
val inflightTable = Module(
|
|
new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
|
|
)
|
|
val wordSize = 4 // FIXME duplicate
|
|
|
|
val io = IO(new Bundle {
|
|
val coalReqValid = Input(Bool())
|
|
val newEntry = Input(inflightTable.entryT)
|
|
val coalRespValid = Input(Bool())
|
|
val coalRespSrcId = Input(UInt(sourceWidth.W))
|
|
val coalRespData = Input(UInt(coalDataWidth.W))
|
|
val uncoalResps = Output(
|
|
Vec(numLanes, Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, wordSize * 8))))
|
|
)
|
|
})
|
|
|
|
// Populate inflight table
|
|
inflightTable.io.enq.valid := io.coalReqValid
|
|
inflightTable.io.enq.bits := io.newEntry
|
|
|
|
// Look up the table with incoming coalesced responses
|
|
inflightTable.io.lookup.ready := io.coalRespValid
|
|
inflightTable.io.lookupSourceId := io.coalRespSrcId
|
|
|
|
assert(
|
|
!((io.coalReqValid === true.B) && (io.coalRespValid === true.B) &&
|
|
(io.newEntry.source === io.coalRespSrcId)),
|
|
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
|
|
)
|
|
|
|
// Un-coalescing logic
|
|
//
|
|
// FIXME: `size` should be UInt, not Int
|
|
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, byteSize: Int): UInt = {
|
|
val bitSize = byteSize * 8
|
|
val sizeMask = (1.U << bitSize) - 1.U
|
|
assert(dataWidth % bitSize == 0, "coalesced data width not evenly divisible by size")
|
|
val numChunks = dataWidth / bitSize
|
|
val chunks = Wire(Vec(numChunks, UInt(bitSize.W)))
|
|
val offsets = (0 until numChunks)
|
|
(chunks zip offsets).foreach { case (c, o) =>
|
|
// Take [(off-1)*size:off*size] starting from MSB
|
|
c := (data >> (dataWidth - (o + 1) * bitSize)) & sizeMask
|
|
}
|
|
chunks(offset) // MUX
|
|
}
|
|
|
|
// Un-coalesce responses back to individual lanes
|
|
val found = inflightTable.io.lookup.bits
|
|
(found.lanes zip io.uncoalResps).foreach { case (lane, ioLane) =>
|
|
lane.reqs.zipWithIndex.foreach { case (req, i) =>
|
|
val ioReq = ioLane(i)
|
|
|
|
// FIXME: only looking at 0th srcId entry
|
|
|
|
ioReq.valid := false.B
|
|
ioReq.bits := DontCare
|
|
|
|
when(inflightTable.io.lookup.valid) {
|
|
ioReq.valid := req.valid
|
|
ioReq.bits.source := 0.U
|
|
|
|
// FIXME: disregard size enum for now
|
|
val byteSize = 4
|
|
ioReq.bits.data :=
|
|
getCoalescedDataChunk(io.coalRespData, coalDataWidth, req.offset, byteSize)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// InflightCoalReqTable is a table structure that records
|
|
// for each unanswered coalesced request which lane the request originated
|
|
// from, what their original TileLink sourceId were, etc. We use this info to
|
|
// split the coalesced response back to individual per-lane responses with the
|
|
// right metadata.
|
|
class InflightCoalReqTable(
|
|
val numLanes: Int,
|
|
val numPerLaneReqs: Int,
|
|
val sourceWidth: Int,
|
|
val entries: Int
|
|
) extends Module {
|
|
val offsetBits = 4 // FIXME hardcoded
|
|
val sizeBits = 2 // FIXME hardcoded
|
|
val entryT =
|
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
|
|
|
val io = IO(new Bundle {
|
|
val enq = Flipped(Decoupled(entryT))
|
|
// TODO: return actual stuff
|
|
val lookup = Decoupled(entryT)
|
|
// TODO: put this inside decoupledIO
|
|
val lookupSourceId = Input(UInt(sourceWidth.W))
|
|
})
|
|
|
|
val table = Mem(
|
|
entries,
|
|
new Bundle {
|
|
val valid = Bool()
|
|
val bits =
|
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
|
}
|
|
)
|
|
|
|
when(reset.asBool) {
|
|
(0 until entries).foreach { i =>
|
|
table(i).valid := false.B
|
|
table(i).bits.lanes.foreach { l =>
|
|
l.reqs.foreach { r =>
|
|
r.offset := 0.U
|
|
r.size := 0.U
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
val full = Wire(Bool())
|
|
full := (0 until entries)
|
|
.map { i => table(i).valid }
|
|
.reduce { (v0, v1) => v0 && v1 }
|
|
// Inflight table should never be full. It should have enough number of
|
|
// entries to keep track of all outstanding core-side requests; otherwise,
|
|
// it will stall the core issuing logic.
|
|
assert(!full, "table is blocking coalescer")
|
|
dontTouch(full)
|
|
|
|
// Enqueue logic
|
|
//
|
|
io.enq.ready := !full
|
|
val enqFire = io.enq.ready && io.enq.valid
|
|
when(enqFire) {
|
|
// TODO: handle enqueueing and looking up the same entry in the same cycle?
|
|
val entryToWrite = table(io.enq.bits.source)
|
|
assert(
|
|
!entryToWrite.valid,
|
|
"tried to enqueue to an already occupied entry"
|
|
)
|
|
entryToWrite.valid := true.B
|
|
entryToWrite.bits := io.enq.bits
|
|
}
|
|
|
|
// Lookup logic
|
|
//
|
|
io.lookup.valid := table(io.lookupSourceId).valid
|
|
io.lookup.bits := table(io.lookupSourceId).bits
|
|
val lookupFire = io.lookup.ready && io.lookup.valid
|
|
// Dequeue as soon as lookup succeeds
|
|
when(lookupFire) {
|
|
table(io.lookupSourceId).valid := false.B
|
|
}
|
|
|
|
dontTouch(io.lookup)
|
|
}
|
|
|
|
class InflightCoalReqTableEntry(
|
|
val numLanes: Int,
|
|
// Maximum number of requests from a single lane that can get coalesced into a single request
|
|
val numPerLaneReqs: Int,
|
|
val sourceWidth: Int,
|
|
val offsetBits: Int,
|
|
val sizeBits: Int
|
|
) extends Bundle {
|
|
class CoreReq extends Bundle {
|
|
val valid = Bool()
|
|
val offset = UInt(offsetBits.W)
|
|
val size = UInt(sizeBits.W)
|
|
}
|
|
class PerLane extends Bundle {
|
|
// FIXME: if numPerLaneReqs != 2 ** sourceWidth, we need to store srcId as well
|
|
val reqs = Vec(numPerLaneReqs, new CoreReq)
|
|
}
|
|
// sourceId of the coalesced response that just came back. This will be the
|
|
// key that queries the table.
|
|
val source = UInt(sourceWidth.W)
|
|
val lanes = Vec(numLanes, new PerLane)
|
|
}
|
|
|
|
// A shift-register queue implementation that supports invalidating entries
|
|
// and exposing queue contents as output IO. (TODO: support deadline)
|
|
// Initially copied from freechips.rocketchip.util.ShiftQueue.
|
|
// If `pipe` is true, support enqueueing to a full queue when also dequeueing.
|
|
class CoalShiftQueue[T <: Data](
|
|
gen: T,
|
|
val entries: Int,
|
|
pipe: Boolean = true,
|
|
flow: Boolean = false
|
|
) extends Module {
|
|
val io = IO(new QueueIO(gen, entries) {
|
|
val invalidate = Input(UInt(entries.W))
|
|
val mask = Output(UInt(entries.W))
|
|
val elts = Output(Vec(entries, gen))
|
|
})
|
|
|
|
private val valid = RegInit(VecInit(Seq.fill(entries) { false.B }))
|
|
// "Used" flag is 1 for every entry between the current queue head and tail,
|
|
// even if that entry has been invalidated:
|
|
//
|
|
// used: 000011111
|
|
// valid: 000011011
|
|
// │ │ └─ head
|
|
// │ └────invalidated
|
|
// └──────tail
|
|
//
|
|
// Need this because we can't tell where to enqueue simply by looking at the
|
|
// valid bits.
|
|
private val used = RegInit(UInt(entries.W), 0.U)
|
|
private val elts = Reg(Vec(entries, gen))
|
|
|
|
// Indexing is tail-to-head: i=0 equals tail, i=entries-1 equals topmost reg
|
|
def pad(mask: Int => Bool) = { i: Int =>
|
|
if (i == -1) true.B else if (i == entries) false.B else mask(i)
|
|
}
|
|
def paddedUsed = pad({ i: Int => used(i) })
|
|
def validAfterInv(i: Int) = valid(i) && !io.invalidate(i)
|
|
|
|
val shift = io.deq.ready || (used =/= 0.U) && !validAfterInv(0)
|
|
for (i <- 0 until entries) {
|
|
val wdata = if (i == entries - 1) io.enq.bits else Mux(!used(i + 1), io.enq.bits, elts(i + 1))
|
|
val wen = Mux(
|
|
shift,
|
|
(io.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
|
|
// enqueue to the first empty slot above the top
|
|
(io.enq.fire && paddedUsed(i - 1) && !used(i)) || !validAfterInv(i)
|
|
)
|
|
when(wen) { elts(i) := wdata }
|
|
|
|
valid(i) := Mux(
|
|
shift,
|
|
(io.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
|
|
(io.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i)
|
|
)
|
|
}
|
|
|
|
when(io.enq.fire) {
|
|
when(!io.deq.fire) {
|
|
used := (used << 1.U) | 1.U
|
|
}
|
|
}.elsewhen(io.deq.fire) {
|
|
used := used >> 1.U
|
|
}
|
|
|
|
io.enq.ready := !valid(entries - 1)
|
|
// We don't want to invalidate deq.valid response right away even when
|
|
// io.invalidate(head) is true.
|
|
// Coalescing unit consumes queue head's validity, and produces its new
|
|
// validity. Deasserting deq.valid right away will result in a combinational
|
|
// cycle.
|
|
io.deq.valid := valid(0)
|
|
io.deq.bits := elts.head
|
|
|
|
assert(!flow, "flow-through is not implemented")
|
|
if (flow) {
|
|
when(io.enq.valid) { io.deq.valid := true.B }
|
|
when(!valid(0)) { io.deq.bits := io.enq.bits }
|
|
}
|
|
|
|
if (pipe) {
|
|
when(io.deq.ready) { io.enq.ready := true.B }
|
|
}
|
|
|
|
io.mask := valid.asUInt
|
|
io.elts := elts
|
|
io.count := PopCount(io.mask)
|
|
}
|
|
|
|
class MemTraceDriver(numLanes: Int = 1)(implicit p: Parameters) extends LazyModule {
|
|
// Create N client nodes together
|
|
val laneNodes = Seq.tabulate(numLanes) { i =>
|
|
val clientParam = Seq(
|
|
TLMasterParameters.v1(
|
|
name = "MemTraceDriver" + i.toString,
|
|
sourceId = IdRange(0, 0x10)
|
|
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
|
)
|
|
)
|
|
TLClientNode(Seq(TLMasterPortParameters.v1(clientParam)))
|
|
}
|
|
|
|
// Combine N outgoing client node into 1 idenity node for diplomatic
|
|
// connection.
|
|
val node = TLIdentityNode()
|
|
laneNodes.foreach { l => node := l }
|
|
|
|
lazy val module = new MemTraceDriverImp(this, numLanes)
|
|
}
|
|
|
|
class TraceReq extends Bundle {
|
|
val valid = Bool()
|
|
val address = UInt(64.W)
|
|
val is_store = Bool()
|
|
val mask = UInt(8.W)
|
|
val data = UInt(64.W)
|
|
}
|
|
|
|
class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int)
|
|
extends LazyModuleImp(outer)
|
|
with UnitTestModule {
|
|
val sim = Module(
|
|
new SimMemTrace(filename = "vecadd.core1.thread4.trace", numLanes)
|
|
)
|
|
sim.io.clock := clock
|
|
sim.io.reset := reset.asBool
|
|
sim.io.trace_read.ready := true.B
|
|
|
|
// Split output of SimMemTrace, which is flattened across all lanes,
|
|
// back to each lane's.
|
|
|
|
// Maybe this part can be improved, since now we are still mannually shifting everything
|
|
val laneReqs = Wire(Vec(numLanes, new TraceReq))
|
|
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
|
req.valid := (sim.io.trace_read.valid >> i)
|
|
req.address := (sim.io.trace_read.address >> (64 * i))
|
|
req.is_store := (sim.io.trace_read.is_store >> i)
|
|
req.mask := (sim.io.trace_read.store_mask >> (8 * i))
|
|
req.data := (sim.io.trace_read.data >> (64 * i))
|
|
|
|
}
|
|
|
|
// To prevent collision of sourceId with a current in-flight message,
|
|
// just use a counter that increments indefinitely as the sourceId of new
|
|
// messages.
|
|
val sourceIdCounter = RegInit(0.U(64.W))
|
|
sourceIdCounter := sourceIdCounter + 1.U
|
|
|
|
// Connect each lane to its respective TL node.
|
|
(outer.laneNodes zip laneReqs).foreach { case (node, req) =>
|
|
val (tlOut, edge) = node.out(0)
|
|
|
|
val (plegal, pbits) = edge.Put(
|
|
fromSource = sourceIdCounter,
|
|
toAddress = req.address,
|
|
// Memory trace addresses are not necessarily aligned to word boundaries
|
|
// so leave lgSize to 0
|
|
// NOTE: this is in bytes not bits
|
|
lgSize = 0.U,
|
|
data = req.data
|
|
)
|
|
val (glegal, gbits) = edge.Get(
|
|
fromSource = sourceIdCounter,
|
|
toAddress = req.address,
|
|
lgSize = 0.U
|
|
)
|
|
val legal = Mux(req.is_store, plegal, glegal)
|
|
val bits = Mux(req.is_store, pbits, gbits)
|
|
assert(legal, "illegal TL req gen")
|
|
tlOut.a.valid := req.valid
|
|
tlOut.a.bits := bits
|
|
tlOut.b.ready := true.B
|
|
tlOut.c.valid := false.B
|
|
tlOut.d.ready := true.B
|
|
tlOut.e.valid := false.B
|
|
|
|
dontTouch(tlOut.a)
|
|
}
|
|
|
|
io.finished := sim.io.trace_read.finished
|
|
|
|
// Clock Counter, for debugging purpose
|
|
val clkcount = RegInit(0.U(64.W))
|
|
clkcount := clkcount + 1.U
|
|
dontTouch(clkcount)
|
|
}
|
|
|
|
class SimMemTrace(val filename: String, numLanes: Int)
|
|
extends BlackBox(
|
|
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
|
|
)
|
|
with HasBlackBoxResource {
|
|
val io = IO(new Bundle {
|
|
val clock = Input(Clock())
|
|
val reset = Input(Bool())
|
|
|
|
// These names have to match declarations in the Verilog code, eg.
|
|
// trace_read_address.
|
|
val trace_read = new Bundle {
|
|
val ready = Input(Bool())
|
|
val valid = Output(UInt(numLanes.W))
|
|
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
|
// single wide 1D array.
|
|
// TODO: assumes 64-bit address.
|
|
val address = Output(UInt((64 * numLanes).W))
|
|
val is_store = Output(UInt(numLanes.W))
|
|
val store_mask = Output(UInt((8 * numLanes).W))
|
|
val data = Output(UInt((64 * numLanes).W))
|
|
val finished = Output(Bool())
|
|
}
|
|
})
|
|
|
|
addResource("/vsrc/SimMemTrace.v")
|
|
addResource("/csrc/SimMemTrace.cc")
|
|
addResource("/csrc/SimMemTrace.h")
|
|
}
|
|
|
|
class CoalConnectTrace(implicit p: Parameters) extends LazyModule {
|
|
// TODO: use parameters for numLanes
|
|
val numLanes = 4
|
|
val coal = LazyModule(new CoalescingUnit(numLanes))
|
|
val driver = LazyModule(new MemTraceDriver(numLanes))
|
|
|
|
coal.node :=* driver.node
|
|
|
|
// Use TLTestRAM as bogus downstream TL manager nodes
|
|
// TODO: swap this out with a memtrace logger
|
|
val rams = Seq.tabulate(numLanes + 1) { _ =>
|
|
LazyModule(
|
|
// TODO: properly propagate beatBytes?
|
|
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
|
|
)
|
|
}
|
|
// Connect all (N+1) outputs of coal to separate TestRAM modules
|
|
rams.foreach { r => r.node := coal.node }
|
|
|
|
lazy val module = new Impl
|
|
class Impl extends LazyModuleImp(this) with UnitTestModule {
|
|
driver.module.io.start := io.start
|
|
io.finished := driver.module.io.finished
|
|
}
|
|
}
|
|
|
|
class CoalescingUnitTest(timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) {
|
|
val dut = Module(LazyModule(new CoalConnectTrace).module)
|
|
dut.io.start := io.start
|
|
io.finished := dut.io.finished
|
|
}
|