restructure parameters into config object && cleanup
This commit is contained in:
@@ -4,27 +4,64 @@ package freechips.rocketchip.tilelink
|
|||||||
|
|
||||||
import chisel3._
|
import chisel3._
|
||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
|
import chisel3.experimental.ChiselEnum
|
||||||
import freechips.rocketchip.config.Parameters
|
import freechips.rocketchip.config.Parameters
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
// import freechips.rocketchip.devices.tilelink.TLTestRAM
|
// import freechips.rocketchip.devices.tilelink.TLTestRAM
|
||||||
import freechips.rocketchip.util.MultiPortQueue
|
import freechips.rocketchip.util.MultiPortQueue
|
||||||
import freechips.rocketchip.unittest._
|
import freechips.rocketchip.unittest._
|
||||||
|
|
||||||
object CoalescerConsts {
|
object InFlightTableSizeEnum extends ChiselEnum {
|
||||||
val MAX_SIZE = 6 // maximum burst size (64 bytes)
|
val INVALID = Value(0.U)
|
||||||
val DEPTH = 1 // request window per lane
|
val FOUR = Value(1.U)
|
||||||
val WAIT_TIMEOUT = 8 // max cycles to wait before forced fifo dequeue, per lane
|
|
||||||
val ADDR_WIDTH = 32 // assume <= 32
|
def log2Enum(x: UInt): Type = {
|
||||||
val DATA_BUS_SIZE = 4 // 2^4=16 bytes, 128 bit bus
|
MuxCase(INVALID, Seq(
|
||||||
val NUM_LANES = 4
|
(x === 2.U) -> FOUR
|
||||||
// val WATERMARK = 2 // minimum buffer occupancy to start coalescing
|
))
|
||||||
val WORD_SIZE = 4 // 32-bit system
|
}
|
||||||
val WORD_WIDTH = 2 // log(WORD_SIZE)
|
|
||||||
val NUM_OLD_IDS = 8 // num of outstanding requests per lane, from processor
|
def enum2log(x: Type): UInt = {
|
||||||
val NUM_NEW_IDS = 4 // num of outstanding coalesced requests
|
MuxCase(0.U, Seq(
|
||||||
|
(x === FOUR) -> 2.U
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalescingUnit(numLanes: Int = CoalescerConsts.NUM_LANES)(implicit p: Parameters) extends LazyModule {
|
case class CoalescerConfig(
|
||||||
|
MAX_SIZE: Int, // maximum burst size (64 bytes)
|
||||||
|
DEPTH: Int, // request window per lane
|
||||||
|
WAIT_TIMEOUT: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||||
|
ADDR_WIDTH: Int, // assume <= 32
|
||||||
|
DATA_BUS_SIZE: Int, // 2^4=16 bytes, 128 bit bus
|
||||||
|
NUM_LANES: Int,
|
||||||
|
// WATERMARK = 2, // minimum buffer occupancy to start coalescing
|
||||||
|
WORD_SIZE: Int, // 32-bit system
|
||||||
|
WORD_WIDTH: Int, // log(WORD_SIZE)
|
||||||
|
NUM_OLD_IDS: Int, // num of outstanding requests per lane, from processor
|
||||||
|
NUM_NEW_IDS: Int, // num of outstanding coalesced requests
|
||||||
|
COAL_SIZES: Seq[Int],
|
||||||
|
SizeEnum: ChiselEnum
|
||||||
|
)
|
||||||
|
|
||||||
|
object defaultConfig extends CoalescerConfig(
|
||||||
|
// TODO: bigger size
|
||||||
|
MAX_SIZE = 3, // maximum burst size (64 bytes)
|
||||||
|
DEPTH = 1, // request window per lane
|
||||||
|
WAIT_TIMEOUT = 8, // max cycles to wait before forced fifo dequeue, per lane
|
||||||
|
ADDR_WIDTH = 24, // assume <= 32
|
||||||
|
DATA_BUS_SIZE = 4, // 2^4=16 bytes, 128 bit bus
|
||||||
|
NUM_LANES = 4,
|
||||||
|
// WATERMARK = 2, // minimum buffer occupancy to start coalescing
|
||||||
|
WORD_SIZE = 4, // 32-bit system
|
||||||
|
WORD_WIDTH = 2, // log(WORD_SIZE)
|
||||||
|
NUM_OLD_IDS = 16, // num of outstanding requests per lane, from processor
|
||||||
|
NUM_NEW_IDS = 4, // num of outstanding coalesced requests
|
||||||
|
COAL_SIZES = Seq(3),
|
||||||
|
SizeEnum = InFlightTableSizeEnum
|
||||||
|
)
|
||||||
|
|
||||||
|
class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends LazyModule {
|
||||||
// Identity node that captures the incoming TL requests and passes them
|
// Identity node that captures the incoming TL requests and passes them
|
||||||
// through the other end, dropping coalesced requests. This node is what
|
// through the other end, dropping coalesced requests. This node is what
|
||||||
// will be visible to upstream and downstream nodes.
|
// will be visible to upstream and downstream nodes.
|
||||||
@@ -32,7 +69,7 @@ class CoalescingUnit(numLanes: Int = CoalescerConsts.NUM_LANES)(implicit p: Para
|
|||||||
|
|
||||||
// Number of maximum in-flight coalesced requests. The upper bound of this
|
// Number of maximum in-flight coalesced requests. The upper bound of this
|
||||||
// value would be the sourceId range of a single lane.
|
// value would be the sourceId range of a single lane.
|
||||||
val numInflightCoalRequests = CoalescerConsts.NUM_NEW_IDS
|
val numInflightCoalRequests = config.NUM_NEW_IDS
|
||||||
|
|
||||||
// Master node that actually generates coalesced requests.
|
// Master node that actually generates coalesced requests.
|
||||||
protected val coalParam = Seq(
|
protected val coalParam = Seq(
|
||||||
@@ -48,7 +85,7 @@ class CoalescingUnit(numLanes: Int = CoalescerConsts.NUM_LANES)(implicit p: Para
|
|||||||
// Connect master node as the first inward edge of the IdentityNode
|
// Connect master node as the first inward edge of the IdentityNode
|
||||||
node :=* coalescerNode
|
node :=* coalescerNode
|
||||||
|
|
||||||
lazy val module = new CoalescingUnitImp(this, numLanes)
|
lazy val module = new CoalescingUnitImp(this, config)
|
||||||
}
|
}
|
||||||
|
|
||||||
class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, maxSize: Int) extends Bundle {
|
class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, maxSize: Int) extends Bundle {
|
||||||
@@ -181,18 +218,19 @@ class CoalShiftQueue[T <: Data](
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Software model: coalescer.py
|
// Software model: coalescer.py
|
||||||
class MonoCoalescer[T <: Data](coalSize: Int, coalWindow: Seq[CoalShiftQueue[T]]) extends Module {
|
class MonoCoalescer[T <: Data](coalSize: Int, coalWindow: Seq[CoalShiftQueue[T]],
|
||||||
|
config: CoalescerConfig) extends Module {
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val leader_idx = Output(UInt(log2Ceil(CoalescerConsts.NUM_LANES).W))
|
val leader_idx = Output(UInt(log2Ceil(config.NUM_LANES).W))
|
||||||
val base_addr = Output(UInt(CoalescerConsts.ADDR_WIDTH.W))
|
val base_addr = Output(UInt(config.ADDR_WIDTH.W))
|
||||||
val match_oh = Output(Vec(CoalescerConsts.NUM_LANES, UInt(CoalescerConsts.DEPTH.W)))
|
val match_oh = Output(Vec(config.NUM_LANES, UInt(config.DEPTH.W)))
|
||||||
val coverage_hits = Output(UInt((1 << CoalescerConsts.MAX_SIZE).W))
|
val coverage_hits = Output(UInt((1 << config.MAX_SIZE).W))
|
||||||
})
|
})
|
||||||
|
|
||||||
io := DontCare
|
io := DontCare
|
||||||
|
|
||||||
val size = coalSize
|
val size = coalSize
|
||||||
val mask = ((1 << CoalescerConsts.ADDR_WIDTH - 1) - (1 << size - 1)).U
|
val mask = ((1 << config.ADDR_WIDTH - 1) - (1 << size - 1)).U
|
||||||
val window = coalWindow
|
val window = coalWindow
|
||||||
|
|
||||||
def can_match(req0: Valid[ReqQueueEntry], req1: Valid[ReqQueueEntry]): Bool = {
|
def can_match(req0: Valid[ReqQueueEntry], req1: Valid[ReqQueueEntry]): Bool = {
|
||||||
@@ -208,33 +246,36 @@ class MonoCoalescer[T <: Data](coalSize: Int, coalWindow: Seq[CoalShiftQueue[T]]
|
|||||||
|
|
||||||
// Software model: coalescer.py
|
// Software model: coalescer.py
|
||||||
class MultiCoalescer[T <: Data]
|
class MultiCoalescer[T <: Data]
|
||||||
(sizes: Seq[Int], window: Seq[CoalShiftQueue[T]], coalReqT: ReqQueueEntry) extends Module {
|
(sizes: Seq[Int], window: Seq[CoalShiftQueue[T]], coalReqT: ReqQueueEntry,
|
||||||
|
config: CoalescerConfig) extends Module {
|
||||||
|
|
||||||
val coalescers = sizes.map(size => Module(new MonoCoalescer(size, window)))
|
val coalescers = sizes.map(size => Module(new MonoCoalescer(size, window, config)))
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val out_req = Output(Valid(coalReqT.cloneType))
|
val out_req = Output(Valid(coalReqT.cloneType))
|
||||||
val invalidate = Output(Valid(Vec(CoalescerConsts.NUM_LANES, UInt(CoalescerConsts.DEPTH.W))))
|
val invalidate = Output(Valid(Vec(config.NUM_LANES, UInt(config.DEPTH.W))))
|
||||||
})
|
})
|
||||||
|
|
||||||
io := DontCare
|
io := DontCare
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModuleImp(outer) {
|
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
|
||||||
// Make sure IdentityNode is connected to an upstream node, not just the
|
// Make sure IdentityNode is connected to an upstream node, not just the
|
||||||
// coalescer TL master node
|
// coalescer TL master node
|
||||||
assert(outer.node.in.length >= 2)
|
assert(outer.node.in.length >= 2)
|
||||||
|
assert(outer.node.in(1)._1.params.sourceBits == log2Ceil(config.NUM_OLD_IDS),
|
||||||
|
s"old source id bits TL param (${outer.node.in(1)._1.params.sourceBits}) mismatch with config")
|
||||||
|
assert(outer.node.in(1)._1.params.addressBits == config.ADDR_WIDTH,
|
||||||
|
s"address width TL param (${outer.node.in(1)._1.params.addressBits}) mismatch with config")
|
||||||
|
|
||||||
val reqQueueDepth = CoalescerConsts.DEPTH
|
|
||||||
val sourceWidth = outer.node.in(1)._1.params.sourceBits
|
val sourceWidth = outer.node.in(1)._1.params.sourceBits
|
||||||
val addressWidth = outer.node.in(1)._1.params.addressBits
|
|
||||||
// note we are using word size. assuming all coalescer inputs are word sized
|
// note we are using word size. assuming all coalescer inputs are word sized
|
||||||
val reqQueueEntryT = new ReqQueueEntry(sourceWidth, log2Ceil(CoalescerConsts.WORD_SIZE), addressWidth, CoalescerConsts.WORD_SIZE)
|
val reqQueueEntryT = new ReqQueueEntry(sourceWidth, config.WORD_WIDTH, config.ADDR_WIDTH, config.WORD_SIZE)
|
||||||
val reqQueues = Seq.tabulate(numLanes) { _ =>
|
val reqQueues = Seq.tabulate(config.NUM_LANES) { _ =>
|
||||||
Module(new CoalShiftQueue(reqQueueEntryT, reqQueueDepth))
|
Module(new CoalShiftQueue(reqQueueEntryT, config.DEPTH))
|
||||||
}
|
}
|
||||||
|
|
||||||
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(CoalescerConsts.MAX_SIZE), addressWidth, CoalescerConsts.MAX_SIZE)
|
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.MAX_SIZE), config.ADDR_WIDTH, config.MAX_SIZE)
|
||||||
val coalescer = Module(new MultiCoalescer(Seq(4, 5, 6), reqQueues, coalReqT))
|
val coalescer = Module(new MultiCoalescer(config.COAL_SIZES, reqQueues, coalReqT, config))
|
||||||
|
|
||||||
// Per-lane request and response queues
|
// Per-lane request and response queues
|
||||||
//
|
//
|
||||||
@@ -251,9 +292,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
// except for connecting the outgoing edge to the inflight table, which is done
|
// except for connecting the outgoing edge to the inflight table, which is done
|
||||||
// down below.
|
// down below.
|
||||||
tlOut.a <> tlIn.a
|
tlOut.a <> tlIn.a
|
||||||
case (((tlIn, edgeIn), (tlOut, edgeOut)), i) =>
|
case (((tlIn, _), (tlOut, edgeOut)), i) =>
|
||||||
// Request queue
|
// Request queue
|
||||||
//
|
|
||||||
val lane = i - 1
|
val lane = i - 1
|
||||||
val reqQueue = reqQueues(lane)
|
val reqQueue = reqQueues(lane)
|
||||||
val req = Wire(reqQueueEntryT)
|
val req = Wire(reqQueueEntryT)
|
||||||
@@ -293,13 +333,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
// ******************************************************************
|
// ******************************************************************
|
||||||
// ==================================================================
|
// ==================================================================
|
||||||
|
|
||||||
val respQueueDepth = CoalescerConsts.NUM_NEW_IDS
|
|
||||||
// The maximum number of requests from a single lane that can go into a
|
// The maximum number of requests from a single lane that can go into a
|
||||||
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
||||||
val numPerLaneReqs = CoalescerConsts.DEPTH
|
val numPerLaneReqs = config.DEPTH
|
||||||
|
|
||||||
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(CoalescerConsts.MAX_SIZE), CoalescerConsts.MAX_SIZE)
|
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.MAX_SIZE), config.MAX_SIZE)
|
||||||
val respQueues = Seq.tabulate(numLanes) { _ =>
|
val respQueues = Seq.tabulate(config.NUM_LANES) { _ =>
|
||||||
Module(
|
Module(
|
||||||
new MultiPortQueue(
|
new MultiPortQueue(
|
||||||
respQueueEntryT,
|
respQueueEntryT,
|
||||||
@@ -318,7 +357,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
// make queue block up in the middle of the simulation. Ideally there
|
// make queue block up in the middle of the simulation. Ideally there
|
||||||
// should be a more logical way to set this, or we should handle
|
// should be a more logical way to set this, or we should handle
|
||||||
// response queue blocking.
|
// response queue blocking.
|
||||||
respQueueDepth
|
config.NUM_NEW_IDS
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -335,7 +374,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
// except for connecting the outgoing edge to the inflight table, which is done
|
// except for connecting the outgoing edge to the inflight table, which is done
|
||||||
// down below.
|
// down below.
|
||||||
tlIn.d <> tlOut.d
|
tlIn.d <> tlOut.d
|
||||||
case (((tlIn, edgeIn), (tlOut, edgeOut)), i) =>
|
case (((tlIn, edgeIn), (tlOut, _)), i) =>
|
||||||
// Response queue
|
// Response queue
|
||||||
//
|
//
|
||||||
// This queue will serialize non-coalesced responses along with
|
// This queue will serialize non-coalesced responses along with
|
||||||
@@ -396,25 +435,32 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
// Construct new entry for the inflight table
|
// Construct new entry for the inflight table
|
||||||
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl
|
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl
|
||||||
// detail to the coalescer
|
// detail to the coalescer
|
||||||
|
|
||||||
|
// richard: I think a good idea is to pass Valid[ReqQueueEntry] generated by
|
||||||
|
// the coalescer directly into the uncoalescer, so that we can offload the
|
||||||
|
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
|
||||||
|
// this also reduces top level clutter.
|
||||||
|
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val offsetBits = 4 // FIXME hardcoded
|
||||||
val sizeBits = 4 // FIXME hardcoded. This is should be not the TL size bits
|
val sizeBits = log2Ceil(config.MAX_SIZE) // FIXME This is should be not the TL size bits
|
||||||
// but the width of the size enum
|
// but the width of the size enum
|
||||||
val newEntry = Wire(
|
val newEntry = Wire(
|
||||||
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
new InflightCoalReqTableEntry(config.NUM_LANES, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
||||||
)
|
)
|
||||||
println(s"=========== table sourceWidth: ${sourceWidth}")
|
println(s"=========== table sourceWidth: ${sourceWidth}")
|
||||||
println(s"=========== table sizeBits: ${sizeBits}")
|
println(s"=========== table sizeBits: ${sizeBits}")
|
||||||
newEntry.source := coalescer.io.out_req.bits.source
|
newEntry.source := coalescer.io.out_req.bits.source
|
||||||
|
|
||||||
// TODO: richard to write table fill logic
|
// TODO: richard to write table fill logic
|
||||||
assert(tlCoal.params.dataBits == (1 << CoalescerConsts.MAX_SIZE) * 8, "tlCoal parameters mismatch coalescer constant")
|
assert(tlCoal.params.dataBits == (1 << config.MAX_SIZE) * 8,
|
||||||
|
s"tlCoal param dataBits (${tlCoal.params.dataBits}) mismatch coalescer constant")
|
||||||
val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
|
val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
|
||||||
newEntry.lanes.foreach { l =>
|
newEntry.lanes.foreach { l =>
|
||||||
l.reqs.zipWithIndex.foreach { case (r, i) =>
|
l.reqs.zipWithIndex.foreach { case (r, i) =>
|
||||||
// TODO: this part needs the actual coalescing logic to work
|
// TODO: this part needs the actual coalescing logic to work
|
||||||
r.valid := false.B
|
r.valid := false.B
|
||||||
r.source := origReqs(i).source
|
r.source := origReqs(i).source
|
||||||
r.offset := (origReqs(i).address % (1 << CoalescerConsts.MAX_SIZE).U) >> CoalescerConsts.WORD_WIDTH
|
r.offset := (origReqs(i).address % (1 << config.MAX_SIZE).U) >> config.WORD_WIDTH
|
||||||
r.size := origReqs(i).size
|
r.size := origReqs(i).size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -425,22 +471,13 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
dontTouch(newEntry)
|
dontTouch(newEntry)
|
||||||
|
|
||||||
// Uncoalescer module uncoalesces responses back to each lane
|
// Uncoalescer module uncoalesces responses back to each lane
|
||||||
val uncoalescer = Module(
|
val uncoalescer = Module(new UncoalescingUnit(config, tlCoal.d.bits.cloneType))
|
||||||
new UncoalescingUnit(
|
|
||||||
numLanes,
|
|
||||||
numPerLaneReqs,
|
|
||||||
sourceWidth,
|
|
||||||
CoalescerConsts.WORD_WIDTH,
|
|
||||||
(1 << CoalescerConsts.MAX_SIZE),
|
|
||||||
outer.numInflightCoalRequests
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
uncoalescer.io.coalReqValid := coalescer.io.out_req.valid
|
uncoalescer.io.coalReqValid := coalescer.io.out_req.valid
|
||||||
uncoalescer.io.newEntry := newEntry
|
uncoalescer.io.newEntry := newEntry
|
||||||
uncoalescer.io.coalRespValid := tlCoal.d.valid
|
// richard: I changed this to use the DecoupledIO interface, which TL is using,
|
||||||
uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source
|
// previously we were not handling the ready signal
|
||||||
uncoalescer.io.coalRespData := tlCoal.d.bits.data
|
uncoalescer.io.coalResp <> tlCoal.d
|
||||||
|
|
||||||
// Queue up synthesized uncoalesced responses into each lane's response queue
|
// Queue up synthesized uncoalesced responses into each lane's response queue
|
||||||
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
||||||
@@ -466,27 +503,24 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
dontTouch(tlCoal.d)
|
dontTouch(tlCoal.d)
|
||||||
}
|
}
|
||||||
|
|
||||||
class UncoalescingUnit(
|
class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Module {
|
||||||
val numLanes: Int,
|
// notes to hansung:
|
||||||
val numPerLaneReqs: Int,
|
// val numLanes: Int, <-> config.NUM_LANES
|
||||||
val sourceWidth: Int,
|
// val numPerLaneReqs: Int, <-> config.DEPTH
|
||||||
val sizeWidth: Int,
|
// val sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS)
|
||||||
val coalDataWidth: Int,
|
// val sizeWidth: Int, <-> config.SizeEnum.width
|
||||||
val numInflightCoalRequests: Int
|
// val coalDataWidth: Int, <-> (1 << config.MAX_SIZE)
|
||||||
) extends Module {
|
// val numInflightCoalRequests: Int <-> config.NUM_NEW_IDS
|
||||||
val inflightTable = Module(
|
val inflightTable = Module(new InflightCoalReqTable(config))
|
||||||
new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
|
|
||||||
)
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val coalReqValid = Input(Bool())
|
val coalReqValid = Input(Bool())
|
||||||
val newEntry = Input(inflightTable.entryT)
|
val newEntry = Input(inflightTable.entryT.cloneType)
|
||||||
val coalRespValid = Input(Bool())
|
val coalResp = Flipped(Decoupled(tlCoalD))
|
||||||
val coalRespSrcId = Input(UInt(sourceWidth.W))
|
|
||||||
val coalRespData = Input(UInt(coalDataWidth.W))
|
|
||||||
val uncoalResps = Output(
|
val uncoalResps = Output(
|
||||||
Vec(
|
Vec(
|
||||||
numLanes,
|
config.NUM_LANES,
|
||||||
Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, CoalescerConsts.WORD_WIDTH, CoalescerConsts.WORD_SIZE)))
|
Vec(config.DEPTH, ValidIO(new RespQueueEntry(
|
||||||
|
log2Ceil(config.NUM_OLD_IDS), config.WORD_WIDTH, config.WORD_SIZE)))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@@ -496,19 +530,20 @@ class UncoalescingUnit(
|
|||||||
inflightTable.io.enq.bits := io.newEntry
|
inflightTable.io.enq.bits := io.newEntry
|
||||||
|
|
||||||
// Look up the table with incoming coalesced responses
|
// Look up the table with incoming coalesced responses
|
||||||
inflightTable.io.lookup.ready := io.coalRespValid
|
inflightTable.io.lookup.ready := io.coalResp.valid
|
||||||
inflightTable.io.lookupSourceId := io.coalRespSrcId
|
inflightTable.io.lookupSourceId := io.coalResp.bits.source
|
||||||
|
io.coalResp.ready := true.B // FIXME, see sw model implementation
|
||||||
|
|
||||||
assert(
|
assert(
|
||||||
!((io.coalReqValid === true.B) && (io.coalRespValid === true.B) &&
|
!((io.coalReqValid === true.B) && (io.coalResp.valid === true.B) &&
|
||||||
(io.newEntry.source === io.coalRespSrcId)),
|
(io.newEntry.source === io.coalResp.bits.source)),
|
||||||
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
|
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Un-coalescing logic
|
// Un-coalescing logic
|
||||||
//
|
//
|
||||||
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
||||||
val sizeInBits = (1.U << logSize) * 8.U
|
val sizeInBits = (1.U << logSize) << 3.U
|
||||||
assert(
|
assert(
|
||||||
(dataWidth > 0).B && (dataWidth.U % sizeInBits === 0.U),
|
(dataWidth > 0).B && (dataWidth.U % sizeInBits === 0.U),
|
||||||
s"coalesced data width ($dataWidth) not evenly divisible by core req size ($sizeInBits)"
|
s"coalesced data width ($dataWidth) not evenly divisible by core req size ($sizeInBits)"
|
||||||
@@ -541,7 +576,7 @@ class UncoalescingUnit(
|
|||||||
ioOldReq.bits.source := oldReq.source
|
ioOldReq.bits.source := oldReq.source
|
||||||
ioOldReq.bits.size := oldReq.size
|
ioOldReq.bits.size := oldReq.size
|
||||||
ioOldReq.bits.data :=
|
ioOldReq.bits.data :=
|
||||||
getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, oldReq.size)
|
getCoalescedDataChunk(io.coalResp.bits.data, io.coalResp.bits.data.getWidth, oldReq.offset, oldReq.size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -552,16 +587,14 @@ class UncoalescingUnit(
|
|||||||
// from, what their original TileLink sourceId were, etc. We use this info to
|
// from, what their original TileLink sourceId were, etc. We use this info to
|
||||||
// split the coalesced response back to individual per-lane responses with the
|
// split the coalesced response back to individual per-lane responses with the
|
||||||
// right metadata.
|
// right metadata.
|
||||||
class InflightCoalReqTable(
|
class InflightCoalReqTable(config: CoalescerConfig) extends Module {
|
||||||
val numLanes: Int,
|
|
||||||
val numPerLaneReqs: Int,
|
|
||||||
val sourceWidth: Int,
|
|
||||||
val entries: Int
|
|
||||||
) extends Module {
|
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val offsetBits = 4 // FIXME hardcoded
|
||||||
val sizeBits = 2 // FIXME hardcoded
|
val sizeBits = 2 // FIXME hardcoded
|
||||||
val entryT =
|
val entryT = new InflightCoalReqTableEntry(config.NUM_LANES, config.DEPTH,
|
||||||
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
log2Ceil(config.NUM_OLD_IDS), config.MAX_SIZE, config.SizeEnum.getWidth)
|
||||||
|
|
||||||
|
val entries = config.NUM_NEW_IDS
|
||||||
|
val sourceWidth = log2Ceil(config.NUM_OLD_IDS)
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val enq = Flipped(Decoupled(entryT))
|
val enq = Flipped(Decoupled(entryT))
|
||||||
@@ -575,8 +608,7 @@ class InflightCoalReqTable(
|
|||||||
entries,
|
entries,
|
||||||
new Bundle {
|
new Bundle {
|
||||||
val valid = Bool()
|
val valid = Bool()
|
||||||
val bits =
|
val bits = entryT.cloneType
|
||||||
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -672,11 +704,11 @@ object TLUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriver(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.trace")(implicit
|
class MemTraceDriver(config: CoalescerConfig, filename: String = "vecadd.core1.thread4.trace")(implicit
|
||||||
p: Parameters
|
p: Parameters
|
||||||
) extends LazyModule {
|
) extends LazyModule {
|
||||||
// Create N client nodes together
|
// Create N client nodes together
|
||||||
val laneNodes = Seq.tabulate(numLanes) { i =>
|
val laneNodes = Seq.tabulate(config.NUM_LANES) { i =>
|
||||||
val clientParam = Seq(
|
val clientParam = Seq(
|
||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = "MemTraceDriver" + i.toString,
|
name = "MemTraceDriver" + i.toString,
|
||||||
@@ -692,7 +724,7 @@ class MemTraceDriver(numLanes: Int = 4, filename: String = "vecadd.core1.thread4
|
|||||||
val node = TLIdentityNode()
|
val node = TLIdentityNode()
|
||||||
laneNodes.foreach { l => node := l }
|
laneNodes.foreach { l => node := l }
|
||||||
|
|
||||||
lazy val module = new MemTraceDriverImp(this, numLanes, filename)
|
lazy val module = new MemTraceDriverImp(this, config, filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
trait HasTraceLine {
|
trait HasTraceLine {
|
||||||
@@ -715,10 +747,10 @@ class TraceLine extends Bundle with HasTraceLine {
|
|||||||
val data = UInt(64.W)
|
val data = UInt(64.W)
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
|
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFile: String)
|
||||||
extends LazyModuleImp(outer)
|
extends LazyModuleImp(outer)
|
||||||
with UnitTestModule {
|
with UnitTestModule {
|
||||||
val sim = Module(new SimMemTrace(traceFile, numLanes))
|
val sim = Module(new SimMemTrace(traceFile, config.NUM_LANES))
|
||||||
sim.io.clock := clock
|
sim.io.clock := clock
|
||||||
sim.io.reset := reset.asBool
|
sim.io.reset := reset.asBool
|
||||||
sim.io.trace_read.ready := true.B
|
sim.io.trace_read.ready := true.B
|
||||||
@@ -726,7 +758,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
|
|||||||
// Split output of SimMemTrace, which is flattened across all lanes,
|
// Split output of SimMemTrace, which is flattened across all lanes,
|
||||||
// back to each lane's.
|
// back to each lane's.
|
||||||
|
|
||||||
val laneReqs = Wire(Vec(numLanes, new TraceLine))
|
val laneReqs = Wire(Vec(config.NUM_LANES, new TraceLine))
|
||||||
val addrW = laneReqs(0).address.getWidth
|
val addrW = laneReqs(0).address.getWidth
|
||||||
val sizeW = laneReqs(0).size.getWidth
|
val sizeW = laneReqs(0).size.getWidth
|
||||||
val dataW = laneReqs(0).data.getWidth
|
val dataW = laneReqs(0).data.getWidth
|
||||||
@@ -759,20 +791,20 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
|
|||||||
// the trace driver to act so as well.
|
// the trace driver to act so as well.
|
||||||
// That means if req.size is smaller than word size, we need to pad data
|
// That means if req.size is smaller than word size, we need to pad data
|
||||||
// with zeros to generate a word-size request, and set mask accordingly.
|
// with zeros to generate a word-size request, and set mask accordingly.
|
||||||
val offsetInWord = req.address % CoalescerConsts.WORD_SIZE.U
|
val offsetInWord = req.address % config.WORD_SIZE.U
|
||||||
val subword = req.size < log2Ceil(CoalescerConsts.WORD_SIZE).U
|
val subword = req.size < log2Ceil(config.WORD_SIZE).U
|
||||||
|
|
||||||
val mask = Wire(UInt(CoalescerConsts.WORD_SIZE.W))
|
val mask = Wire(UInt(config.WORD_SIZE.W))
|
||||||
val wordData = Wire(UInt((CoalescerConsts.WORD_SIZE * 8).W))
|
val wordData = Wire(UInt((config.WORD_SIZE * 8).W))
|
||||||
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
||||||
sizeInBytes := (1.U) << req.size
|
sizeInBytes := (1.U) << req.size
|
||||||
mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
||||||
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
|
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
|
||||||
val wordAlignedAddress = req.address & ~((1 << log2Ceil(CoalescerConsts.WORD_SIZE)) - 1).U(addrW.W)
|
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.WORD_SIZE)) - 1).U(addrW.W)
|
||||||
|
|
||||||
assert(
|
assert(
|
||||||
req.size <= log2Ceil(CoalescerConsts.WORD_SIZE).U,
|
req.size <= log2Ceil(config.WORD_SIZE).U,
|
||||||
s"trace driver currently does not support access sizes larger than word size (${CoalescerConsts.WORD_SIZE})"
|
s"trace driver currently does not support access sizes larger than word size (${config.WORD_SIZE})"
|
||||||
)
|
)
|
||||||
val wordAlignedSize = 2.U // FIXME: hardcoded
|
val wordAlignedSize = 2.U // FIXME: hardcoded
|
||||||
|
|
||||||
@@ -782,7 +814,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
|
|||||||
// req.address,
|
// req.address,
|
||||||
// req.size,
|
// req.size,
|
||||||
// req.data,
|
// req.data,
|
||||||
// ~((1 << log2Ceil(CoalescerConsts.WORD_SIZE)) - 1).U(addrW.W),
|
// ~((1 << log2Ceil(config.WORD_SIZE)) - 1).U(addrW.W),
|
||||||
// wordAlignedAddress,
|
// wordAlignedAddress,
|
||||||
// mask,
|
// mask,
|
||||||
// wordData
|
// wordData
|
||||||
@@ -1153,11 +1185,11 @@ object TracePrintf {
|
|||||||
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
|
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
|
||||||
// TODO: use parameters for numLanes
|
// TODO: use parameters for numLanes
|
||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
val driver = LazyModule(new MemTraceDriver(numLanes))
|
val driver = LazyModule(new MemTraceDriver(defaultConfig))
|
||||||
val coreSideLogger = LazyModule(
|
val coreSideLogger = LazyModule(
|
||||||
new MemTraceLogger(numLanes, loggerName = "coreside")
|
new MemTraceLogger(numLanes, loggerName = "coreside")
|
||||||
)
|
)
|
||||||
val coal = LazyModule(new CoalescingUnit(numLanes))
|
val coal = LazyModule(new CoalescingUnit(defaultConfig))
|
||||||
val memSideLogger = LazyModule(new MemTraceLogger(numLanes + 1, loggerName = "memside"))
|
val memSideLogger = LazyModule(new MemTraceLogger(numLanes + 1, loggerName = "memside"))
|
||||||
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
||||||
LazyModule(
|
LazyModule(
|
||||||
@@ -1204,8 +1236,8 @@ class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters)
|
|||||||
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
|
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
|
||||||
// TODO: use parameters for numLanes
|
// TODO: use parameters for numLanes
|
||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
val coal = LazyModule(new CoalescingUnit(numLanes))
|
val coal = LazyModule(new CoalescingUnit(defaultConfig))
|
||||||
val driver = LazyModule(new MemTraceDriver(numLanes))
|
val driver = LazyModule(new MemTraceDriver(defaultConfig))
|
||||||
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
||||||
LazyModule(
|
LazyModule(
|
||||||
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
|
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ trait CanHaveGPUTracer { this: BaseSubsystem =>
|
|||||||
//p(GPUTracerKey) is the mechnimism to pass Config's parameter down to lazymodule
|
//p(GPUTracerKey) is the mechnimism to pass Config's parameter down to lazymodule
|
||||||
p(GPUTracerKey) .map { k =>
|
p(GPUTracerKey) .map { k =>
|
||||||
val config = p(GPUTracerKey).get
|
val config = p(GPUTracerKey).get
|
||||||
val tracer = LazyModule(new MemTraceDriver(config.numLanes, config.traceFile)(p))
|
val tracer = LazyModule(new MemTraceDriver(defaultConfig, config.traceFile)(p))
|
||||||
// Must use :=* to ensure the N edges from Tracer doesn't get merged into 1 when connecting to SBus
|
// Must use :=* to ensure the N edges from Tracer doesn't get merged into 1 when connecting to SBus
|
||||||
sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node
|
sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user