454 lines
15 KiB
Scala
454 lines
15 KiB
Scala
// See LICENSE.SiFive for license details.
|
|
|
|
package freechips.rocketchip.tilelink
|
|
|
|
import chisel3._
|
|
import chisel3.util._
|
|
import freechips.rocketchip.config.Parameters
|
|
import freechips.rocketchip.diplomacy._
|
|
// import freechips.rocketchip.devices.tilelink.TLTestRAM
|
|
import freechips.rocketchip.util.ShiftQueue
|
|
import freechips.rocketchip.unittest._
|
|
|
|
class CoalescingUnit(numLanes: Int = 1)(implicit p: Parameters)
|
|
extends LazyModule {
|
|
|
|
// Describes original, uncoalesced memory requests on each lane
|
|
class UncoalReq(val sourceWidth: Int, val addressWidth: Int) extends Bundle {
|
|
val source = UInt(sourceWidth.W)
|
|
val address = UInt(addressWidth.W)
|
|
val data = UInt(64.W /* FIXME hardcoded */ )
|
|
}
|
|
|
|
// Identity node that captures the incoming TL requests and passes them
|
|
// through the other end, dropping coalesced requests. This node is what
|
|
// will be visible from the external nodes.
|
|
val node = TLIdentityNode()
|
|
|
|
// Number of maximum in-flight coalesced requests. The upper bound of this
|
|
// value would be the sourceId range of a single lane.
|
|
val numInflightCoalRequests = 4
|
|
|
|
// Master node that actually generates coalesced requests.
|
|
protected val coalParam = Seq(
|
|
TLMasterParameters.v1(
|
|
name = "CoalescerNode",
|
|
sourceId = IdRange(0, numInflightCoalRequests)
|
|
)
|
|
)
|
|
protected val coalescerNode = TLClientNode(
|
|
Seq(TLMasterPortParameters.v1(coalParam))
|
|
)
|
|
|
|
// Connect master node as the first of the N+1-th inward edges of the
|
|
// IdentityNode
|
|
node :=* coalescerNode
|
|
|
|
lazy val module = new Impl
|
|
class Impl extends LazyModuleImp(this) {
|
|
// Instantiate per-lane queue that buffers incoming requests.
|
|
val sourceWidth = node.in(0)._1.params.sourceBits
|
|
val addressWidth = node.in(0)._1.params.addressBits
|
|
val coalRegEntry = new UncoalReq(sourceWidth, addressWidth)
|
|
val queues = Seq.tabulate(numLanes) { _ =>
|
|
Module(
|
|
new ShiftQueue(coalRegEntry, 4 /* FIXME hardcoded */ )
|
|
)
|
|
}
|
|
|
|
println(s"============= node edges: ${node.in.length}")
|
|
|
|
// Override IdentityNode implementation so that we wire node output to the
|
|
// queue output, instead of directly passing through node input.
|
|
// See IdentityNode definition in `diplomacy/Nodes.scala`.
|
|
(node.in zip node.out).zipWithIndex.foreach {
|
|
case (((_, edgeIn), _), 0) =>
|
|
// No need to do anything on the edge from coalescerNode
|
|
assert(
|
|
edgeIn.master.masters(0).name == "CoalescerNode",
|
|
"First edge is not connected to the coalescer master node"
|
|
)
|
|
case (((tlIn, _), (tlOut, edgeOut)), i) =>
|
|
val queue = queues(i - 1)
|
|
val newReq = Wire(coalRegEntry)
|
|
newReq.source := tlIn.a.bits.source
|
|
newReq.address := tlIn.a.bits.address
|
|
newReq.data := tlIn.a.bits.data
|
|
|
|
queue.io.enq.valid := tlIn.a.valid
|
|
queue.io.enq.bits := newReq
|
|
// FIXME: deq.ready should respect the ready state of the downstream
|
|
// module, e.g. Xbar or NoC.
|
|
queue.io.deq.ready := true.B
|
|
val head = queue.io.deq.bits
|
|
|
|
tlOut.a.valid := queue.io.deq.valid
|
|
// FIXME: generate Get or Put according to read/write
|
|
val (legal, bits) = edgeOut.Get(
|
|
fromSource = head.source,
|
|
// `toAddress` should be aligned to 2**lgSize
|
|
toAddress = head.address,
|
|
lgSize = 0.U
|
|
)
|
|
assert(legal, "unhandled illegal TL req gen")
|
|
tlOut.a.bits := bits
|
|
tlIn.d <> tlOut.d
|
|
|
|
dontTouch(tlIn.a)
|
|
dontTouch(tlOut.a)
|
|
dontTouch(tlOut.d)
|
|
}
|
|
|
|
// Generate coalesced requests
|
|
// FIXME: currently generating bogus coalesced requests
|
|
val coalSourceId = RegInit(0.U(2.W /* FIXME hardcoded */ ))
|
|
coalSourceId := coalSourceId + 1.U
|
|
|
|
val (tlCoal, edgeCoal) = coalescerNode.out(0)
|
|
val coalReqAddress = Wire(UInt(tlCoal.params.addressBits.W))
|
|
// TODO: bogus address
|
|
coalReqAddress := (0xabcd.U + coalSourceId) << 4
|
|
val coalReqValid = Wire(Bool())
|
|
// FIXME: copy lane 1's valid signal
|
|
coalReqValid := node.in(1)._1.a.valid
|
|
|
|
val (legal, bits) = edgeCoal.Get(
|
|
fromSource = coalSourceId,
|
|
// `toAddress` should be aligned to 2**lgSize
|
|
toAddress = coalReqAddress,
|
|
// 64 bits = 8 bytes = 2**(3) bytes
|
|
lgSize = 3.U
|
|
)
|
|
assert(legal, "unhandled illegal TL req gen")
|
|
tlCoal.a.valid := coalReqValid
|
|
tlCoal.a.bits := bits
|
|
tlCoal.b.ready := true.B
|
|
tlCoal.c.valid := false.B
|
|
tlCoal.d.ready := true.B
|
|
tlCoal.e.valid := false.B
|
|
|
|
// Populate inflight coalesced request table for use in un-coalescing
|
|
// responses back to the individual lanes that they originated from.
|
|
val inflightCoalReqTableEntry =
|
|
new InflightCoalReqTableEntry(numLanes, sourceWidth)
|
|
val inflightCoalReqTable =
|
|
Module(
|
|
new InflightCoalReqTable(numLanes, sourceWidth, numInflightCoalRequests)
|
|
)
|
|
val tableEntry = Wire(inflightCoalReqTableEntry)
|
|
tableEntry.respSourceId := coalSourceId
|
|
// TODO: bogus fromLane. Take the lowest numLane bits off of coalSourceId
|
|
tableEntry.fromLane := coalSourceId & ((2 << numLanes) - 1).U
|
|
// FIXME: I'm positive this is not the right way to do this
|
|
tableEntry.reqSourceIds(0) := 0.U
|
|
tableEntry.reqSourceIds(1) := 0.U
|
|
tableEntry.reqSourceIds(2) := 0.U
|
|
tableEntry.reqSourceIds(3) := 0.U
|
|
dontTouch(tableEntry)
|
|
|
|
inflightCoalReqTable.io.enq.valid := coalReqValid
|
|
inflightCoalReqTable.io.enq.bits := tableEntry
|
|
|
|
// Look up the table with incoming coalesced responses
|
|
inflightCoalReqTable.io.lookup.ready := tlCoal.d.valid
|
|
inflightCoalReqTable.io.lookupSourceId := tlCoal.d.bits.source
|
|
|
|
(node.in zip node.out)(0) match {
|
|
case ((tlIn, edgeIn), (tlOut, _)) =>
|
|
assert(
|
|
edgeIn.master.masters(0).name == "CoalescerNode",
|
|
"First edge is not connected to the coalescer master node"
|
|
)
|
|
|
|
tlOut.a <> tlIn.a
|
|
// No need to drop any incoming coalesced responses, so just passthrough
|
|
// to master node
|
|
tlIn.d <> tlOut.d
|
|
dontTouch(tlIn.d)
|
|
dontTouch(tlOut.d)
|
|
}
|
|
|
|
// Debug
|
|
dontTouch(coalReqValid)
|
|
dontTouch(coalReqAddress)
|
|
val coalRespData = Wire(UInt(tlCoal.params.dataBits.W))
|
|
coalRespData := tlCoal.d.bits.data
|
|
dontTouch(coalRespData)
|
|
|
|
dontTouch(tlCoal.a)
|
|
dontTouch(tlCoal.d)
|
|
}
|
|
}
|
|
|
|
// InflightCoalReqTable is a reservation station-like structure that records
|
|
// for each unanswered coalesced request which lane the request originated
|
|
// from, what their original sourceId were, etc. We use this info to split
|
|
// the coalesced response back to individual responses for each lanes with
|
|
// the right metadata.
|
|
class InflightCoalReqTable(
|
|
val numLanes: Int,
|
|
val sourceWidth: Int,
|
|
val entries: Int
|
|
) extends Module {
|
|
private val inflightCoalReqEntryT =
|
|
new InflightCoalReqTableEntry(numLanes, sourceWidth)
|
|
|
|
val io = IO(new Bundle {
|
|
val enq = Flipped(EnqIO(inflightCoalReqEntryT))
|
|
val lookup = Decoupled(UInt(sourceWidth.W))
|
|
// TODO: put this inside decoupledIO
|
|
val lookupSourceId = Input(UInt(sourceWidth.W))
|
|
})
|
|
|
|
val table = Mem(
|
|
entries,
|
|
new Bundle {
|
|
val valid = Bool()
|
|
val bits = new InflightCoalReqTableEntry(numLanes, sourceWidth)
|
|
}
|
|
)
|
|
|
|
when(reset.asBool) {
|
|
(0 until entries).foreach(i => table(i).valid := false.B)
|
|
}
|
|
|
|
val full = Wire(Bool())
|
|
full := (0 until entries)
|
|
.map { i => table(i).valid }
|
|
.reduce { (v0, v1) => v0 && v1 }
|
|
|
|
// Enqueue logic
|
|
//
|
|
// Instantiate simple cascade of muxes that indicate what is the current
|
|
// minimum index that has an empty spot in the table.
|
|
val cascadeEmptyIndex = Seq.tabulate(entries) { i => WireInit(i.U) }
|
|
(0 until entries - 1).reverse.foreach { i =>
|
|
val empty = !table(i).valid
|
|
assert(i + 1 < entries)
|
|
// If entry with a lower index is empty, it always takes priority
|
|
cascadeEmptyIndex(i) := Mux(empty, i.U, cascadeEmptyIndex(i + 1))
|
|
}
|
|
val chosenEmptyIndex = cascadeEmptyIndex(0)
|
|
dontTouch(chosenEmptyIndex)
|
|
dontTouch(full)
|
|
|
|
val enqFire = io.enq.ready && io.enq.valid
|
|
when(enqFire) {
|
|
val entry = table(chosenEmptyIndex)
|
|
entry.valid := true.B
|
|
entry.bits := io.enq.bits
|
|
}
|
|
|
|
io.enq.ready := !full
|
|
|
|
// Currently, we assume coalescer never blocks generating coalesced requests.
|
|
// If this ever happens, it means the table is insufficiently large to keep
|
|
// track of the maximum number of in-flight requests and should be enlarged
|
|
// in size.
|
|
// assert(!full, "coalescer is blocking responses")
|
|
|
|
// Lookup logic
|
|
//
|
|
// Same deal as cascadeEmptyIndex, but for finding a respSourceId match
|
|
// FIXME: tree structure may be better. Any library for instantiating CAM?
|
|
val cascadeMatchIndex = Seq.tabulate(entries) { i => WireInit(i.U) }
|
|
(0 until entries - 1).reverse.foreach { i =>
|
|
val match_ = table(i).bits.respSourceId === io.lookupSourceId
|
|
assert(i + 1 < entries)
|
|
// If entry with a lower index is empty, it always takes priority
|
|
cascadeMatchIndex(i) := Mux(match_, i.U, cascadeMatchIndex(i + 1))
|
|
}
|
|
val matchIndex = cascadeMatchIndex(0)
|
|
val matchValid = Wire(Bool())
|
|
matchValid := table(matchIndex).bits.respSourceId === io.lookupSourceId
|
|
io.lookup.valid := matchValid
|
|
// TODO: return something actually useful
|
|
io.lookup.bits := table(matchIndex).bits.respSourceId
|
|
|
|
val lookupFire = io.lookup.ready && io.lookup.valid
|
|
when(lookupFire) {
|
|
// As soon as a lookup returns a match, dequeue that entry
|
|
table(matchIndex).valid := false.B
|
|
}
|
|
dontTouch(io.lookup)
|
|
dontTouch(matchIndex)
|
|
dontTouch(matchValid)
|
|
}
|
|
|
|
class InflightCoalReqTableEntry(val numLanes: Int, val sourceWidth: Int)
|
|
extends Bundle {
|
|
// sourceId of the coalesced response that just came back. This will be the
|
|
// key that queries the table.
|
|
val respSourceId = UInt(sourceWidth.W)
|
|
// Bit flags that show which lanes got coalesced into this request
|
|
val fromLane = UInt(numLanes.W)
|
|
// sourceId of the original requests before getting coalesced. We need to
|
|
// remember this in order to answer the right outstanding TL request on each
|
|
// lane.
|
|
val reqSourceIds = Vec(numLanes, UInt(sourceWidth.W))
|
|
}
|
|
|
|
class MemTraceDriver(numLanes: Int = 1)(implicit p: Parameters)
|
|
extends LazyModule {
|
|
// Create N client nodes together
|
|
val laneNodes = Seq.tabulate(numLanes) { i =>
|
|
val clientParam = Seq(
|
|
TLMasterParameters.v1(
|
|
name = "MemTraceDriver" + i.toString,
|
|
sourceId = IdRange(0, 0xffff)
|
|
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
|
)
|
|
)
|
|
TLClientNode(Seq(TLMasterPortParameters.v1(clientParam)))
|
|
}
|
|
|
|
// Combine N outgoing client node into 1 idenity node for diplomatic
|
|
// connection.
|
|
val node = TLIdentityNode()
|
|
laneNodes.foreach { l => node := l }
|
|
|
|
lazy val module = new MemTraceDriverImp(this, numLanes)
|
|
}
|
|
|
|
class TraceReq extends Bundle {
|
|
val valid = Bool()
|
|
val address = UInt(64.W)
|
|
val is_store = Bool()
|
|
val mask = UInt(8.W)
|
|
val data = UInt(64.W)
|
|
}
|
|
|
|
class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int)
|
|
extends LazyModuleImp(outer)
|
|
with UnitTestModule {
|
|
val sim = Module(
|
|
new SimMemTrace(filename = "vecadd.core1.thread4.trace", numLanes)
|
|
)
|
|
sim.io.clock := clock
|
|
sim.io.reset := reset.asBool
|
|
sim.io.trace_read.ready := true.B
|
|
|
|
// Split output of SimMemTrace, which is flattened across all lanes,
|
|
// back to each lane's.
|
|
|
|
// Maybe this part can be improved, since now we are still mannually shifting everything
|
|
val laneReqs = Wire(Vec(numLanes, new TraceReq))
|
|
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
|
req.valid := (sim.io.trace_read.valid >> i)
|
|
req.address := (sim.io.trace_read.address >> (64 * i))
|
|
req.is_store := (sim.io.trace_read.is_store >> i)
|
|
req.mask := (sim.io.trace_read.store_mask >> (8 * i))
|
|
req.data := (sim.io.trace_read.data >> (64 * i))
|
|
|
|
}
|
|
|
|
// To prevent collision of sourceId with a current in-flight message,
|
|
// just use a counter that increments indefinitely as the sourceId of new
|
|
// messages.
|
|
val sourceIdCounter = RegInit(0.U(64.W))
|
|
sourceIdCounter := sourceIdCounter + 1.U
|
|
|
|
// Connect each lane to its respective TL node.
|
|
(outer.laneNodes zip laneReqs).foreach { case (node, req) =>
|
|
val (tlOut, edge) = node.out(0)
|
|
tlOut.a.valid := req.valid
|
|
|
|
val (plegal, pbits) = edge.Put(
|
|
fromSource = sourceIdCounter,
|
|
toAddress = req.address,
|
|
// Memory trace addresses are not aligned in word addresses (e.g.
|
|
// read of size 1 at 0x1007) so leave lgSize to 0.
|
|
// TODO: We need to build an issue logic that aligns addresses at
|
|
// word boundaries and uses masks.
|
|
// NOTE: this is in byte size, not bits
|
|
lgSize = 0.U,
|
|
data = req.data
|
|
)
|
|
val (glegal, gbits) = edge.Get(
|
|
fromSource = sourceIdCounter,
|
|
toAddress = req.address,
|
|
lgSize = 0.U
|
|
)
|
|
val legal = Mux(req.is_store, plegal, glegal)
|
|
val bits = Mux(req.is_store, pbits, gbits)
|
|
assert(legal, "unhandled illegal TL req gen")
|
|
tlOut.a.bits := bits
|
|
tlOut.b.ready := true.B
|
|
tlOut.c.valid := false.B
|
|
tlOut.d.ready := true.B
|
|
tlOut.e.valid := false.B
|
|
|
|
dontTouch(tlOut.a)
|
|
}
|
|
|
|
io.finished := sim.io.trace_read.finished
|
|
|
|
// Clock Counter, for debugging purpose
|
|
val clkcount = RegInit(0.U(64.W))
|
|
clkcount := clkcount + 1.U
|
|
dontTouch(clkcount)
|
|
}
|
|
|
|
class SimMemTrace(val filename: String, numLanes: Int)
|
|
extends BlackBox(
|
|
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
|
|
)
|
|
with HasBlackBoxResource {
|
|
val io = IO(new Bundle {
|
|
val clock = Input(Clock())
|
|
val reset = Input(Bool())
|
|
|
|
// These names have to match declarations in the Verilog code, eg.
|
|
// trace_read_address.
|
|
val trace_read = new Bundle {
|
|
val ready = Input(Bool())
|
|
val valid = Output(UInt(numLanes.W))
|
|
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
|
// single wide 1D array.
|
|
// TODO: assumes 64-bit address.
|
|
val address = Output(UInt((64 * numLanes).W))
|
|
val is_store = Output(UInt(numLanes.W))
|
|
val store_mask = Output(UInt((8 * numLanes).W))
|
|
val data = Output(UInt((64 * numLanes).W))
|
|
val finished = Output(Bool())
|
|
}
|
|
})
|
|
|
|
addResource("/vsrc/SimMemTrace.v")
|
|
addResource("/csrc/SimMemTrace.cc")
|
|
addResource("/csrc/SimMemTrace.h")
|
|
}
|
|
|
|
class CoalConnectTrace(implicit p: Parameters) extends LazyModule {
|
|
// TODO: use parameters for numLanes
|
|
val numLanes = 4
|
|
val coal = LazyModule(new CoalescingUnit(numLanes))
|
|
val driver = LazyModule(new MemTraceDriver(numLanes))
|
|
|
|
coal.node :=* driver.node
|
|
|
|
// Use TLTestRAM as bogus downstream TL manager nodes
|
|
// TODO: swap this out with a memtrace logger
|
|
val rams = Seq.tabulate(numLanes + 1) { _ =>
|
|
LazyModule(
|
|
// TODO: properly propagate beatBytes?
|
|
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
|
|
)
|
|
}
|
|
// Connect all (N+1) outputs of coal to separate TestRAM modules
|
|
rams.foreach { r => r.node := coal.node }
|
|
|
|
lazy val module = new Impl
|
|
class Impl extends LazyModuleImp(this) with UnitTestModule {
|
|
driver.module.io.start := io.start
|
|
io.finished := driver.module.io.finished
|
|
}
|
|
}
|
|
|
|
class CoalescingUnitTest(timeout: Int = 500000)(implicit p: Parameters)
|
|
extends UnitTest(timeout) {
|
|
val dut = Module(LazyModule(new CoalConnectTrace).module)
|
|
dut.io.start := io.start
|
|
io.finished := dut.io.finished
|
|
}
|