diff --git a/src/main/scala/radiance/memory/Coalescing.scala b/src/main/scala/radiance/memory/Coalescing.scala index 66cefe7..cb6d26d 100644 --- a/src/main/scala/radiance/memory/Coalescing.scala +++ b/src/main/scala/radiance/memory/Coalescing.scala @@ -1502,7 +1502,6 @@ class MemTraceDriver( } trait HasTraceLine { - val valid: UInt val source: UInt val address: UInt val is_store: UInt @@ -1513,7 +1512,6 @@ trait HasTraceLine { // Used for both request and response. Response had address set to 0 // NOTE: these widths have to agree with what's hardcoded in Verilog. class TraceLine extends Bundle with HasTraceLine { - val valid = Bool() val source = UInt(32.W) val address = UInt(64.W) val is_store = Bool() @@ -1538,7 +1536,7 @@ class MemTraceDriverImp( // downstream take requests from the queue individually for each lane, // but do synchronized enqueue whenever all lane queue is ready to prevent // drifts between the lane. - val reqQueues = Seq.fill(config.numLanes)(Module(new Queue(new TraceLine, 2))) + val reqQueues = Seq.fill(config.numLanes)(Module(new Queue(Valid(new TraceLine), 2))) // Are we safe to read the next warp? val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _) @@ -1552,17 +1550,17 @@ class MemTraceDriverImp( // Read output from Verilog BlackBox // Split output of SimMemTrace, which is flattened across all lanes,back to each lane's. - val laneReqs = Wire(Vec(config.numLanes, new TraceLine)) - val addrW = laneReqs(0).address.getWidth - val sizeW = laneReqs(0).size.getWidth - val dataW = laneReqs(0).data.getWidth + val laneReqs = Wire(Vec(config.numLanes, Valid(new TraceLine))) + val addrW = laneReqs(0).bits.address.getWidth + val sizeW = laneReqs(0).bits.size.getWidth + val dataW = laneReqs(0).bits.data.getWidth laneReqs.zipWithIndex.foreach { case (req, i) => req.valid := sim.io.trace_read.valid(i) - req.source := 0.U // driver trace doesn't contain source id - req.address := sim.io.trace_read.address(addrW * (i + 1) - 1, addrW * i) - req.is_store := sim.io.trace_read.is_store(i) - req.size := sim.io.trace_read.size(sizeW * (i + 1) - 1, sizeW * i) - req.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i) + req.bits.source := 0.U // driver trace doesn't contain source id + req.bits.address := sim.io.trace_read.address(addrW * (i + 1) - 1, addrW * i) + req.bits.is_store := sim.io.trace_read.is_store(i) + req.bits.size := sim.io.trace_read.size(sizeW * (i + 1) - 1, sizeW * i) + req.bits.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i) } // Not all fire because trace cycle has to advance even when there is no valid @@ -1610,19 +1608,19 @@ class MemTraceDriverImp( // the trace driver to act so as well. // That means if req.size is smaller than word size, we need to pad data // with zeros to generate a word-size request, and set mask accordingly. - val offsetInWord = req.address % config.wordSizeInBytes.U - val subword = req.size < log2Ceil(config.wordSizeInBytes).U + val offsetInWord = req.bits.address % config.wordSizeInBytes.U + val subword = req.bits.size < log2Ceil(config.wordSizeInBytes).U // `mask` is currently unused // val mask = Wire(UInt(config.wordSizeInBytes.W)) val wordData = Wire(UInt((config.wordSizeInBytes * 8 * 2).W)) val sizeInBytes = Wire(UInt((sizeW + 1).W)) - sizeInBytes := (1.U) << req.size + sizeInBytes := (1.U) << req.bits.size // mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U) - wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data) + wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data) val wordAlignedAddress = - req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) - val wordAlignedSize = Mux(subword, 2.U, req.size) + req.bits.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) + val wordAlignedSize = Mux(subword, 2.U, req.bits.size) val sourceGen = sourceGens(lane) sourceGen.io.gen := tlOut.a.fire @@ -1644,8 +1642,8 @@ class MemTraceDriverImp( toAddress = hashToValidPhyAddr(wordAlignedAddress), lgSize = wordAlignedSize ) - val legal = Mux(req.is_store, plegal, glegal) - val bits = Mux(req.is_store, pbits, gbits) + val legal = Mux(req.bits.is_store, plegal, glegal) + val bits = Mux(req.bits.is_store, pbits, gbits) tlOut.a.valid := reqQ.io.deq.valid && syncedSourceGenValid when(tlOut.a.fire) { @@ -1667,9 +1665,9 @@ class MemTraceDriverImp( tlOut.a.bits.address, tlOut.a.bits.size, tlOut.a.bits.mask, - req.is_store, + req.bits.is_store, tlOut.a.bits.data, - req.data + req.bits.data ) } dontTouch(tlOut.a) @@ -1809,8 +1807,8 @@ class MemTraceLogger( simResp.get.io.reset := reset.asBool } - val laneReqs = Wire(Vec(numLanes, new TraceLine)) - val laneResps = Wire(Vec(numLanes, new TraceLine)) + val laneReqs = Wire(Vec(numLanes, Valid(new TraceLine))) + val laneResps = Wire(Vec(numLanes, Valid(new TraceLine))) assert( numLanes == node.in.length, @@ -1828,12 +1826,12 @@ class MemTraceLogger( // Only log trace when fired, e.g. both upstream and downstream is ready // and transaction happened. req.valid := tlIn.a.fire - req.size := tlIn.a.bits.size - req.is_store := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode, tlIn.a.fire) - req.source := tlIn.a.bits.source + req.bits.size := tlIn.a.bits.size + req.bits.is_store := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode, tlIn.a.fire) + req.bits.source := tlIn.a.bits.source // TL always carries the exact unaligned address that the client // originally requested, so no postprocessing required - req.address := tlIn.a.bits.address + req.bits.address := tlIn.a.bits.address when(req.valid) { TLPrintf( @@ -1842,9 +1840,9 @@ class MemTraceLogger( tlIn.a.bits.address, tlIn.a.bits.size, tlIn.a.bits.mask, - req.is_store, + req.bits.is_store, tlIn.a.bits.data, - req.data + req.bits.data ) } @@ -1868,9 +1866,9 @@ class MemTraceLogger( val dataW = tlIn.params.dataBits val sizeInBits = (1.U(1.W) << tlIn.a.bits.size) << 3.U val mask = ~(~(0.U(dataW.W)) << sizeInBits) - req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) - // when (req.valid) { - // printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data) + req.bits.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) + // when (req.bits.valid) { + // printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.bits.data) // } // responses on TL D channel @@ -1878,18 +1876,18 @@ class MemTraceLogger( // Only log trace when fired, e.g. both upstream and downstream is ready // and transaction happened. resp.valid := tlOut.d.fire - resp.size := tlOut.d.bits.size - resp.is_store := TLUtils.DOpcodeIsStore( + resp.bits.size := tlOut.d.bits.size + resp.bits.is_store := TLUtils.DOpcodeIsStore( tlOut.d.bits.opcode, tlOut.d.fire ) - resp.source := tlOut.d.bits.source + resp.bits.source := tlOut.d.bits.source // NOTE: TL D channel doesn't carry address nor mask, so there's no easy // way to figure out which bytes the master actually use. Since we // don't care too much about addresses in the trace anyway, just store // the entire bits. - resp.address := 0.U - resp.data := tlOut.d.bits.data + resp.bits.address := 0.U + resp.bits.data := tlOut.d.bits.data } // stats @@ -1903,13 +1901,13 @@ class MemTraceLogger( } val reqBytesThisCycle = laneReqs - .map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) } + .map { l => Mux(l.valid, 1.U(64.W) << l.bits.size, 0.U(64.W)) } .reduce { (b0, b1) => b0 + b1 } val respBytesThisCycle = laneResps - .map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) } + .map { l => Mux(l.valid, 1.U(64.W) << l.bits.size, 0.U(64.W)) } .reduce { (b0, b1) => b0 + b1 } @@ -1922,42 +1920,25 @@ class MemTraceLogger( // // This is a clunky workaround of the fact that Chisel doesn't allow partial // assignment to a bitfield range of a wide signal. - def flattenTrace( - simIO: Bundle with HasTraceLine, - perLane: Vec[TraceLine] - ) = { - // these will get optimized out - val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid))) - val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source))) - val vecAddress = Wire(Vec(numLanes, chiselTypeOf(perLane(0).address))) - val vecIsStore = Wire(Vec(numLanes, chiselTypeOf(perLane(0).is_store))) - val vecSize = Wire(Vec(numLanes, chiselTypeOf(perLane(0).size))) - val vecData = Wire(Vec(numLanes, chiselTypeOf(perLane(0).data))) - perLane.zipWithIndex.foreach { case (l, i) => - vecValid(i) := l.valid - vecSource(i) := l.source - vecAddress(i) := l.address - vecIsStore(i) := l.is_store - vecSize(i) := l.size - vecData(i) := l.data - } - simIO.valid := vecValid.asUInt - simIO.source := vecSource.asUInt - simIO.address := vecAddress.asUInt - simIO.is_store := vecIsStore.asUInt - simIO.size := vecSize.asUInt - simIO.data := vecData.asUInt - } - if (simReq.isDefined) { - flattenTrace(simReq.get.io.trace_log, laneReqs) + simReq.get.io.trace_log.valid := VecInit(laneReqs.map(_.valid)).asUInt + simReq.get.io.trace_log.source := VecInit(laneReqs.map(_.bits.source)).asUInt + simReq.get.io.trace_log.address := VecInit(laneReqs.map(_.bits.address)).asUInt + simReq.get.io.trace_log.is_store := VecInit(laneReqs.map(_.bits.is_store)).asUInt + simReq.get.io.trace_log.size := VecInit(laneReqs.map(_.bits.size)).asUInt + simReq.get.io.trace_log.data := VecInit(laneReqs.map(_.bits.data)).asUInt assert( simReq.get.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready" ) } if (simResp.isDefined) { - flattenTrace(simResp.get.io.trace_log, laneResps) + simResp.get.io.trace_log.valid := VecInit(laneResps.map(_.valid)).asUInt + simResp.get.io.trace_log.source := VecInit(laneResps.map(_.bits.source)).asUInt + simResp.get.io.trace_log.address := VecInit(laneResps.map(_.bits.address)).asUInt + simResp.get.io.trace_log.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt + simResp.get.io.trace_log.size := VecInit(laneResps.map(_.bits.size)).asUInt + simResp.get.io.trace_log.data := VecInit(laneResps.map(_.bits.data)).asUInt assert( simResp.get.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready" @@ -1994,7 +1975,7 @@ class SimMemTraceLogger( val clock = Input(Clock()) val reset = Input(Bool()) - val trace_log = new Bundle with HasTraceLine { + val trace_log = new Bundle { val valid = Input(UInt(numLanes.W)) val source = Input(UInt((sourceW * numLanes).W)) // Chisel can't interface with Verilog 2D port, so flatten all lanes into @@ -2074,24 +2055,40 @@ class MemFuzzerImp( sim.io.clock := clock sim.io.reset := reset.asBool - sim.io.a.ready := true.B // FIXME + sim.io.a.ready := VecInit(outer.laneNodes.map { node => + val (tlOut, _) = node.out(0) + tlOut.a.ready + }).asUInt io.finished := sim.io.finished - // Read output from Verilog BlackBox - // Split output of SimMemTrace, which is flattened across all lanes,back to each lane's. - val laneReqs = Wire(Vec(config.numLanes, new TraceLine)) - val addrW = laneReqs(0).address.getWidth - val sizeW = laneReqs(0).size.getWidth - val dataW = laneReqs(0).data.getWidth + // connect Verilog <-> Chisel IO + // Verilog IO flattened across all lanes + val laneReqs = Wire(Vec(config.numLanes, Decoupled(new TraceLine))) + val addrW = laneReqs(0).bits.address.getWidth + val sizeW = laneReqs(0).bits.size.getWidth + val dataW = laneReqs(0).bits.data.getWidth laneReqs.zipWithIndex.foreach { case (req, i) => req.valid := sim.io.a.valid(i) - req.source := 0.U // DPI fuzzer doesn't generate contain source id - req.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i) - req.is_store := sim.io.a.is_store(i) - req.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i) - req.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i) + req.bits.source := 0.U // DPI fuzzer doesn't generate contain source id + req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i) + req.bits.is_store := sim.io.a.is_store(i) + req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i) + req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i) } + sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt + + val laneResps = Wire(Vec(config.numLanes, Flipped(Decoupled(new TraceLine)))) + laneResps.zipWithIndex.foreach { case (resp, i) => + resp.ready := sim.io.d.ready(i) + // TODO: not handled in DPI + resp.bits.source := DontCare + resp.bits.address := DontCare + resp.bits.data := DontCare + } + sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt + sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt + sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt val sourceGens = Seq.fill(config.numLanes)( Module( @@ -2103,27 +2100,29 @@ class MemFuzzerImp( ) // Take requests off of the queue and generate TL requests - (outer.laneNodes zip laneReqs).zipWithIndex.foreach { - case ((node, req), lane) => + (outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach { + case ((node, (req, resp)), lane) => val (tlOut, edge) = node.out(0) + // Requests -------------------------------------------------------------- + // // Core only makes accesses of granularity larger than a word, so we want // the trace driver to act so as well. // That means if req.size is smaller than word size, we need to pad data // with zeros to generate a word-size request, and set mask accordingly. - val offsetInWord = req.address % config.wordSizeInBytes.U - val subword = req.size < log2Ceil(config.wordSizeInBytes).U + val offsetInWord = req.bits.address % config.wordSizeInBytes.U + val subword = req.bits.size < log2Ceil(config.wordSizeInBytes).U // `mask` is currently unused // val mask = Wire(UInt(config.wordSizeInBytes.W)) val wordData = Wire(UInt((config.wordSizeInBytes * 8 * 2).W)) val sizeInBytes = Wire(UInt((sizeW + 1).W)) - sizeInBytes := (1.U) << req.size + sizeInBytes := (1.U) << req.bits.size // mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U) - wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data) + wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data) val wordAlignedAddress = - req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) - val wordAlignedSize = Mux(subword, 2.U, req.size) + req.bits.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) + val wordAlignedSize = Mux(subword, 2.U, req.bits.size) val sourceGen = sourceGens(lane) sourceGen.io.gen := tlOut.a.fire @@ -2144,19 +2143,26 @@ class MemFuzzerImp( toAddress = wordAlignedAddress, lgSize = wordAlignedSize ) - val legal = Mux(req.is_store, plegal, glegal) - val bits = Mux(req.is_store, pbits, gbits) + val legal = Mux(req.bits.is_store, plegal, glegal) + val bits = Mux(req.bits.is_store, pbits, gbits) tlOut.a.valid := req.valid && sourceGen.io.id.valid - // req.ready := tlOut.a.ready && sourceGen.io.id.valid + req.ready := tlOut.a.ready && sourceGen.io.id.valid when(tlOut.a.fire) { assert(legal, "illegal TL req gen") } tlOut.a.bits := bits + + // Responses ------------------------------------------------------------- + // + tlOut.d.ready := resp.ready + resp.valid := tlOut.d.valid + resp.bits.is_store := !edge.hasData(tlOut.d.bits) + resp.bits.size := tlOut.d.bits.size + tlOut.b.ready := true.B tlOut.c.valid := false.B - tlOut.d.ready := sim.io.d.ready(lane) // FIXME tlOut.e.valid := false.B // debug @@ -2168,9 +2174,9 @@ class MemFuzzerImp( tlOut.a.bits.address, tlOut.a.bits.size, tlOut.a.bits.mask, - req.is_store, + req.bits.is_store, tlOut.a.bits.data, - req.data + req.bits.data ) } dontTouch(tlOut.a) @@ -2210,6 +2216,9 @@ class SimMemFuzzer(numLanes: Int) extends BlackBox val d = new Bundle { val ready = Output(UInt(numLanes.W)) + val valid = Input(UInt(numLanes.W)) + val is_store = Input(UInt(numLanes.W)) + val size = Input(UInt((sizeW * numLanes).W)) } })