Use DecoupledIO instead of explicit valid in TraceLine bundle

This commit is contained in:
Hansung Kim
2024-01-21 17:50:00 -08:00
parent d56981a0b1
commit ec02a12220

View File

@@ -1502,7 +1502,6 @@ class MemTraceDriver(
}
trait HasTraceLine {
val valid: UInt
val source: UInt
val address: UInt
val is_store: UInt
@@ -1513,7 +1512,6 @@ trait HasTraceLine {
// Used for both request and response. Response had address set to 0
// NOTE: these widths have to agree with what's hardcoded in Verilog.
class TraceLine extends Bundle with HasTraceLine {
val valid = Bool()
val source = UInt(32.W)
val address = UInt(64.W)
val is_store = Bool()
@@ -1538,7 +1536,7 @@ class MemTraceDriverImp(
// downstream take requests from the queue individually for each lane,
// but do synchronized enqueue whenever all lane queue is ready to prevent
// drifts between the lane.
val reqQueues = Seq.fill(config.numLanes)(Module(new Queue(new TraceLine, 2)))
val reqQueues = Seq.fill(config.numLanes)(Module(new Queue(Valid(new TraceLine), 2)))
// Are we safe to read the next warp?
val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _)
@@ -1552,17 +1550,17 @@ class MemTraceDriverImp(
// Read output from Verilog BlackBox
// Split output of SimMemTrace, which is flattened across all lanes,back to each lane's.
val laneReqs = Wire(Vec(config.numLanes, new TraceLine))
val addrW = laneReqs(0).address.getWidth
val sizeW = laneReqs(0).size.getWidth
val dataW = laneReqs(0).data.getWidth
val laneReqs = Wire(Vec(config.numLanes, Valid(new TraceLine)))
val addrW = laneReqs(0).bits.address.getWidth
val sizeW = laneReqs(0).bits.size.getWidth
val dataW = laneReqs(0).bits.data.getWidth
laneReqs.zipWithIndex.foreach { case (req, i) =>
req.valid := sim.io.trace_read.valid(i)
req.source := 0.U // driver trace doesn't contain source id
req.address := sim.io.trace_read.address(addrW * (i + 1) - 1, addrW * i)
req.is_store := sim.io.trace_read.is_store(i)
req.size := sim.io.trace_read.size(sizeW * (i + 1) - 1, sizeW * i)
req.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i)
req.bits.source := 0.U // driver trace doesn't contain source id
req.bits.address := sim.io.trace_read.address(addrW * (i + 1) - 1, addrW * i)
req.bits.is_store := sim.io.trace_read.is_store(i)
req.bits.size := sim.io.trace_read.size(sizeW * (i + 1) - 1, sizeW * i)
req.bits.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i)
}
// Not all fire because trace cycle has to advance even when there is no valid
@@ -1610,19 +1608,19 @@ class MemTraceDriverImp(
// the trace driver to act so as well.
// That means if req.size is smaller than word size, we need to pad data
// with zeros to generate a word-size request, and set mask accordingly.
val offsetInWord = req.address % config.wordSizeInBytes.U
val subword = req.size < log2Ceil(config.wordSizeInBytes).U
val offsetInWord = req.bits.address % config.wordSizeInBytes.U
val subword = req.bits.size < log2Ceil(config.wordSizeInBytes).U
// `mask` is currently unused
// val mask = Wire(UInt(config.wordSizeInBytes.W))
val wordData = Wire(UInt((config.wordSizeInBytes * 8 * 2).W))
val sizeInBytes = Wire(UInt((sizeW + 1).W))
sizeInBytes := (1.U) << req.size
sizeInBytes := (1.U) << req.bits.size
// mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data)
val wordAlignedAddress =
req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.size)
req.bits.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.bits.size)
val sourceGen = sourceGens(lane)
sourceGen.io.gen := tlOut.a.fire
@@ -1644,8 +1642,8 @@ class MemTraceDriverImp(
toAddress = hashToValidPhyAddr(wordAlignedAddress),
lgSize = wordAlignedSize
)
val legal = Mux(req.is_store, plegal, glegal)
val bits = Mux(req.is_store, pbits, gbits)
val legal = Mux(req.bits.is_store, plegal, glegal)
val bits = Mux(req.bits.is_store, pbits, gbits)
tlOut.a.valid := reqQ.io.deq.valid && syncedSourceGenValid
when(tlOut.a.fire) {
@@ -1667,9 +1665,9 @@ class MemTraceDriverImp(
tlOut.a.bits.address,
tlOut.a.bits.size,
tlOut.a.bits.mask,
req.is_store,
req.bits.is_store,
tlOut.a.bits.data,
req.data
req.bits.data
)
}
dontTouch(tlOut.a)
@@ -1809,8 +1807,8 @@ class MemTraceLogger(
simResp.get.io.reset := reset.asBool
}
val laneReqs = Wire(Vec(numLanes, new TraceLine))
val laneResps = Wire(Vec(numLanes, new TraceLine))
val laneReqs = Wire(Vec(numLanes, Valid(new TraceLine)))
val laneResps = Wire(Vec(numLanes, Valid(new TraceLine)))
assert(
numLanes == node.in.length,
@@ -1828,12 +1826,12 @@ class MemTraceLogger(
// Only log trace when fired, e.g. both upstream and downstream is ready
// and transaction happened.
req.valid := tlIn.a.fire
req.size := tlIn.a.bits.size
req.is_store := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode, tlIn.a.fire)
req.source := tlIn.a.bits.source
req.bits.size := tlIn.a.bits.size
req.bits.is_store := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode, tlIn.a.fire)
req.bits.source := tlIn.a.bits.source
// TL always carries the exact unaligned address that the client
// originally requested, so no postprocessing required
req.address := tlIn.a.bits.address
req.bits.address := tlIn.a.bits.address
when(req.valid) {
TLPrintf(
@@ -1842,9 +1840,9 @@ class MemTraceLogger(
tlIn.a.bits.address,
tlIn.a.bits.size,
tlIn.a.bits.mask,
req.is_store,
req.bits.is_store,
tlIn.a.bits.data,
req.data
req.bits.data
)
}
@@ -1868,9 +1866,9 @@ class MemTraceLogger(
val dataW = tlIn.params.dataBits
val sizeInBits = (1.U(1.W) << tlIn.a.bits.size) << 3.U
val mask = ~(~(0.U(dataW.W)) << sizeInBits)
req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
// when (req.valid) {
// printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data)
req.bits.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
// when (req.bits.valid) {
// printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.bits.data)
// }
// responses on TL D channel
@@ -1878,18 +1876,18 @@ class MemTraceLogger(
// Only log trace when fired, e.g. both upstream and downstream is ready
// and transaction happened.
resp.valid := tlOut.d.fire
resp.size := tlOut.d.bits.size
resp.is_store := TLUtils.DOpcodeIsStore(
resp.bits.size := tlOut.d.bits.size
resp.bits.is_store := TLUtils.DOpcodeIsStore(
tlOut.d.bits.opcode,
tlOut.d.fire
)
resp.source := tlOut.d.bits.source
resp.bits.source := tlOut.d.bits.source
// NOTE: TL D channel doesn't carry address nor mask, so there's no easy
// way to figure out which bytes the master actually use. Since we
// don't care too much about addresses in the trace anyway, just store
// the entire bits.
resp.address := 0.U
resp.data := tlOut.d.bits.data
resp.bits.address := 0.U
resp.bits.data := tlOut.d.bits.data
}
// stats
@@ -1903,13 +1901,13 @@ class MemTraceLogger(
}
val reqBytesThisCycle =
laneReqs
.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }
.map { l => Mux(l.valid, 1.U(64.W) << l.bits.size, 0.U(64.W)) }
.reduce { (b0, b1) =>
b0 + b1
}
val respBytesThisCycle =
laneResps
.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }
.map { l => Mux(l.valid, 1.U(64.W) << l.bits.size, 0.U(64.W)) }
.reduce { (b0, b1) =>
b0 + b1
}
@@ -1922,42 +1920,25 @@ class MemTraceLogger(
//
// This is a clunky workaround of the fact that Chisel doesn't allow partial
// assignment to a bitfield range of a wide signal.
def flattenTrace(
simIO: Bundle with HasTraceLine,
perLane: Vec[TraceLine]
) = {
// these will get optimized out
val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid)))
val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source)))
val vecAddress = Wire(Vec(numLanes, chiselTypeOf(perLane(0).address)))
val vecIsStore = Wire(Vec(numLanes, chiselTypeOf(perLane(0).is_store)))
val vecSize = Wire(Vec(numLanes, chiselTypeOf(perLane(0).size)))
val vecData = Wire(Vec(numLanes, chiselTypeOf(perLane(0).data)))
perLane.zipWithIndex.foreach { case (l, i) =>
vecValid(i) := l.valid
vecSource(i) := l.source
vecAddress(i) := l.address
vecIsStore(i) := l.is_store
vecSize(i) := l.size
vecData(i) := l.data
}
simIO.valid := vecValid.asUInt
simIO.source := vecSource.asUInt
simIO.address := vecAddress.asUInt
simIO.is_store := vecIsStore.asUInt
simIO.size := vecSize.asUInt
simIO.data := vecData.asUInt
}
if (simReq.isDefined) {
flattenTrace(simReq.get.io.trace_log, laneReqs)
simReq.get.io.trace_log.valid := VecInit(laneReqs.map(_.valid)).asUInt
simReq.get.io.trace_log.source := VecInit(laneReqs.map(_.bits.source)).asUInt
simReq.get.io.trace_log.address := VecInit(laneReqs.map(_.bits.address)).asUInt
simReq.get.io.trace_log.is_store := VecInit(laneReqs.map(_.bits.is_store)).asUInt
simReq.get.io.trace_log.size := VecInit(laneReqs.map(_.bits.size)).asUInt
simReq.get.io.trace_log.data := VecInit(laneReqs.map(_.bits.data)).asUInt
assert(
simReq.get.io.trace_log.ready === true.B,
"MemTraceLogger is expected to be always ready"
)
}
if (simResp.isDefined) {
flattenTrace(simResp.get.io.trace_log, laneResps)
simResp.get.io.trace_log.valid := VecInit(laneResps.map(_.valid)).asUInt
simResp.get.io.trace_log.source := VecInit(laneResps.map(_.bits.source)).asUInt
simResp.get.io.trace_log.address := VecInit(laneResps.map(_.bits.address)).asUInt
simResp.get.io.trace_log.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt
simResp.get.io.trace_log.size := VecInit(laneResps.map(_.bits.size)).asUInt
simResp.get.io.trace_log.data := VecInit(laneResps.map(_.bits.data)).asUInt
assert(
simResp.get.io.trace_log.ready === true.B,
"MemTraceLogger is expected to be always ready"
@@ -1994,7 +1975,7 @@ class SimMemTraceLogger(
val clock = Input(Clock())
val reset = Input(Bool())
val trace_log = new Bundle with HasTraceLine {
val trace_log = new Bundle {
val valid = Input(UInt(numLanes.W))
val source = Input(UInt((sourceW * numLanes).W))
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
@@ -2074,24 +2055,40 @@ class MemFuzzerImp(
sim.io.clock := clock
sim.io.reset := reset.asBool
sim.io.a.ready := true.B // FIXME
sim.io.a.ready := VecInit(outer.laneNodes.map { node =>
val (tlOut, _) = node.out(0)
tlOut.a.ready
}).asUInt
io.finished := sim.io.finished
// Read output from Verilog BlackBox
// Split output of SimMemTrace, which is flattened across all lanes,back to each lane's.
val laneReqs = Wire(Vec(config.numLanes, new TraceLine))
val addrW = laneReqs(0).address.getWidth
val sizeW = laneReqs(0).size.getWidth
val dataW = laneReqs(0).data.getWidth
// connect Verilog <-> Chisel IO
// Verilog IO flattened across all lanes
val laneReqs = Wire(Vec(config.numLanes, Decoupled(new TraceLine)))
val addrW = laneReqs(0).bits.address.getWidth
val sizeW = laneReqs(0).bits.size.getWidth
val dataW = laneReqs(0).bits.data.getWidth
laneReqs.zipWithIndex.foreach { case (req, i) =>
req.valid := sim.io.a.valid(i)
req.source := 0.U // DPI fuzzer doesn't generate contain source id
req.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i)
req.is_store := sim.io.a.is_store(i)
req.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i)
req.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i)
req.bits.source := 0.U // DPI fuzzer doesn't generate contain source id
req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i)
req.bits.is_store := sim.io.a.is_store(i)
req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i)
req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i)
}
sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt
val laneResps = Wire(Vec(config.numLanes, Flipped(Decoupled(new TraceLine))))
laneResps.zipWithIndex.foreach { case (resp, i) =>
resp.ready := sim.io.d.ready(i)
// TODO: not handled in DPI
resp.bits.source := DontCare
resp.bits.address := DontCare
resp.bits.data := DontCare
}
sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt
sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt
sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt
val sourceGens = Seq.fill(config.numLanes)(
Module(
@@ -2103,27 +2100,29 @@ class MemFuzzerImp(
)
// Take requests off of the queue and generate TL requests
(outer.laneNodes zip laneReqs).zipWithIndex.foreach {
case ((node, req), lane) =>
(outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach {
case ((node, (req, resp)), lane) =>
val (tlOut, edge) = node.out(0)
// Requests --------------------------------------------------------------
//
// Core only makes accesses of granularity larger than a word, so we want
// the trace driver to act so as well.
// That means if req.size is smaller than word size, we need to pad data
// with zeros to generate a word-size request, and set mask accordingly.
val offsetInWord = req.address % config.wordSizeInBytes.U
val subword = req.size < log2Ceil(config.wordSizeInBytes).U
val offsetInWord = req.bits.address % config.wordSizeInBytes.U
val subword = req.bits.size < log2Ceil(config.wordSizeInBytes).U
// `mask` is currently unused
// val mask = Wire(UInt(config.wordSizeInBytes.W))
val wordData = Wire(UInt((config.wordSizeInBytes * 8 * 2).W))
val sizeInBytes = Wire(UInt((sizeW + 1).W))
sizeInBytes := (1.U) << req.size
sizeInBytes := (1.U) << req.bits.size
// mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data)
val wordAlignedAddress =
req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.size)
req.bits.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.bits.size)
val sourceGen = sourceGens(lane)
sourceGen.io.gen := tlOut.a.fire
@@ -2144,19 +2143,26 @@ class MemFuzzerImp(
toAddress = wordAlignedAddress,
lgSize = wordAlignedSize
)
val legal = Mux(req.is_store, plegal, glegal)
val bits = Mux(req.is_store, pbits, gbits)
val legal = Mux(req.bits.is_store, plegal, glegal)
val bits = Mux(req.bits.is_store, pbits, gbits)
tlOut.a.valid := req.valid && sourceGen.io.id.valid
// req.ready := tlOut.a.ready && sourceGen.io.id.valid
req.ready := tlOut.a.ready && sourceGen.io.id.valid
when(tlOut.a.fire) {
assert(legal, "illegal TL req gen")
}
tlOut.a.bits := bits
// Responses -------------------------------------------------------------
//
tlOut.d.ready := resp.ready
resp.valid := tlOut.d.valid
resp.bits.is_store := !edge.hasData(tlOut.d.bits)
resp.bits.size := tlOut.d.bits.size
tlOut.b.ready := true.B
tlOut.c.valid := false.B
tlOut.d.ready := sim.io.d.ready(lane) // FIXME
tlOut.e.valid := false.B
// debug
@@ -2168,9 +2174,9 @@ class MemFuzzerImp(
tlOut.a.bits.address,
tlOut.a.bits.size,
tlOut.a.bits.mask,
req.is_store,
req.bits.is_store,
tlOut.a.bits.data,
req.data
req.bits.data
)
}
dontTouch(tlOut.a)
@@ -2210,6 +2216,9 @@ class SimMemFuzzer(numLanes: Int) extends BlackBox
val d =
new Bundle {
val ready = Output(UInt(numLanes.W))
val valid = Input(UInt(numLanes.W))
val is_store = Input(UInt(numLanes.W))
val size = Input(UInt((sizeW * numLanes).W))
}
})