diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 08540bc..6c08858 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -22,7 +22,9 @@ MemTraceReader::MemTraceReader(const std::string &filename) infile.open(filename); if (infile.fail()) { - fprintf(stderr, "failed to open file %s\n", filename.c_str()); + fprintf(stderr, "MemTraceReader: error: failed to open file %s\n", + filename.c_str()); + exit(EXIT_FAILURE); } } @@ -60,8 +62,6 @@ void MemTraceReader::parse(const bool has_source) { } if (!(infile >> line.cycle >> loadstore >> line.core_id >> line.lane_id)) { - printf("char=[%c]\n", infile.peek()); - // assert(!infile.eof()); error(fileline, "failed parsing cycle..lane_id"); } if (has_source && !(infile >> source)) { @@ -101,8 +101,6 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id, MemTraceLine line; line.valid = false; - // printf("tick(): cycle=%ld\n", cycle); - if (finished()) { return line; } @@ -112,7 +110,11 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id, // the next line is in the future. if (line.cycle < cycle) { long fileline = read_pos - std::cbegin(trace_buf) + 1; - error(fileline, "some trace lines are left unread in the past"); + error(fileline, "some trace lines are left unread in the past. " + "Tried cycle=" + + std::to_string(cycle) + + ", found line.cycle=" + std::to_string(line.cycle) + + ". Is NUM_LANES set correctly?"); return MemTraceLine{}; } @@ -134,14 +136,17 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id, // monotonically increment read_pos. lane_id need not be contiguous, e.g. // 0->1->3 is fine. ++read_pos; - return line; } else { // For debugging purposes, instead of early-returning on // !trace_read_ready, print something to notify we are blocking a valid // trace line. printf("All Lanes Blocked on this cycle! cycle=%ld \n", cycle); - return MemTraceLine{}; } + // We want to return valid line regardless of `trace_read_ready` or not, + // because we want to let the driver know that it missed a valid line at the + // given cycle, so that it holds its cycle counter and safely reads back the + // line in the future. + return line; } assert(!"unreachable"); diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index 4d630fd..74594cb 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -39,49 +39,32 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data, output trace_read_finished ); - bit __in_valid [NUM_LANES-1:0]; - longint __in_address [NUM_LANES-1:0]; - - bit __in_is_store [NUM_LANES-1:0]; + bit __in_valid [NUM_LANES-1:0]; + longint __in_address [NUM_LANES-1:0]; + bit __in_is_store [NUM_LANES-1:0]; reg [`LOGSIZE_WIDTH-1:0] __in_size [NUM_LANES-1:0]; - longint __in_data [NUM_LANES-1:0]; - - bit __in_finished; - string __uartlog; - - // Cycle counter that is used to query C parser whether we have a request - // coming in at the current cycle. - - - // registers that stage outputs of the C parser - reg [NUM_LANES-1:0] __in_valid_wire; - reg [`DATA_WIDTH-1:0] __in_address_wire [NUM_LANES-1:0]; - - reg [NUM_LANES-1:0] __in_is_store_wire; - reg [`LOGSIZE_WIDTH-1:0] __in_size_wire [NUM_LANES-1:0]; - reg [`DATA_WIDTH-1:0] __in_data_wire [NUM_LANES-1:0]; - reg __in_finished_wire; + longint __in_data [NUM_LANES-1:0]; + bit __in_finished; genvar g; - generate for (g = 0; g < NUM_LANES; g = g + 1) begin - assign trace_read_valid[g] = __in_valid_wire[g]; - assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_wire[g]; + assign trace_read_valid[g] = __in_valid[g]; + assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address[g]; - assign trace_read_is_store[g] = __in_is_store_wire[g]; - assign trace_read_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g] = __in_size_wire[g]; - assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data_wire[g]; + assign trace_read_is_store[g] = __in_is_store[g]; + assign trace_read_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g] = __in_size[g]; + assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data[g]; end endgenerate - assign trace_read_finished = __in_finished_wire; + assign trace_read_finished = __in_finished; initial begin /* $value$plusargs("uartlog=%s", __uartlog); */ memtrace_init(FILENAME); end - always @(*) begin + always @(posedge clock) begin if (reset) begin for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin __in_valid[tid] = 1'b0; @@ -91,55 +74,29 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( __in_size[tid] = `LOGSIZE_WIDTH'b0; __in_data[tid] = `DATA_WIDTH'b0; end - __in_finished = 1'b0; - - //cycle_counter <= `DATA_WIDTH'b0; - - // setting default value for register to avoid latches - for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin - __in_valid_wire[tid] = 1'b0; - __in_address_wire[tid] = `DATA_WIDTH'b0; - - __in_is_store_wire[tid] = 1'b0; - __in_size_wire[tid] = `LOGSIZE_WIDTH'b0; - __in_data_wire[tid] = `DATA_WIDTH'b0; - end - - __in_finished_wire = 1'b0; end else begin + // We have to write to __in_ regs only when trace_read_ready, or + // otherwise we might overwrite lines that were previously valid + // but the downstream missed by being not ready. + if (trace_read_ready) begin + for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin + memtrace_query( + trace_read_ready, + trace_read_cycle, + tid, - // Getting values from C function into pseudeo register - for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin - memtrace_query( - trace_read_ready, - // Since parsed results are latched to the output on the next - // cycle due to staging registers, we need to pass in the next cycle - // to sync up. - trace_read_cycle, // the left replace next_cycle_counter, - tid, + __in_valid[tid], + __in_address[tid], - __in_valid[tid], - __in_address[tid], - - __in_is_store[tid], - __in_size[tid], - __in_data[tid], + __in_is_store[tid], + __in_size[tid], + __in_data[tid], - __in_finished - ); + __in_finished + ); + end end - - // Connect values from pseudo register into verilog register - for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin - __in_valid_wire[tid] = __in_valid[tid]; - __in_address_wire[tid] = __in_address[tid]; - - __in_is_store_wire[tid] = __in_is_store[tid]; - __in_size_wire[tid] = __in_size[tid]; - __in_data_wire[tid] = __in_data[tid]; - end - __in_finished_wire = __in_finished; end end endmodule diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 90237b7..8ded755 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -233,11 +233,13 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e })) // shift hint is when the heads have no more coalescable left this or next cycle - val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, i) => - c && !(io.invalidate.valid && i) + val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, inv) => + c && !(io.invalidate.valid && inv) }.reduce(_ || _) val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _) - val syncedDeqValid = io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) // valid and not fire + // syncedDeqValidNextCycle being true means the arbiter has completed + // processing all of the ready-to-go requests. + val syncedDeqValidNextCycle = io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) // valid and not fire for (i <- 0 until config.numLanes) { val enq = io.queue.enq(i) @@ -247,7 +249,7 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e ctrl.full := writePtr(i) === entries.U ctrl.empty := writePtr(i) === 0.U // shift when no outstanding dequeue, no more coalescable chunks, and not empty - ctrl.shift := !syncedDeqValid && shiftHint && !ctrl.empty + ctrl.shift := !syncedDeqValidNextCycle && shiftHint && !ctrl.empty // dequeue is valid when: // head entry is valid, has not been processed by downstream, and is not coalescable @@ -293,6 +295,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e } } + // When doing spatial-only coalescing, queues should never drift from each + // other, i.e. the queue heads should always contain mem requests from the + // same instruction. val queueInSync = controlSignals.map(_ === controlSignals.head).reduce(_ && _) && writePtr.map(_ === writePtr.head).reduce(_ && _) assert(queueInSync, "shift queue lanes are not in sync") @@ -326,23 +331,15 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], val leaders = io.window.elts.map(_.head) val leadersValid = io.window.mask.map(_.asBools.head) - // When doing spatial-only coalescing, queues should never drift from each - // other, i.e. the queue heads should always contain mem requests from the - // same instruction. - // FIXME: This relies on the MemTraceDriver's behavior of generating TL - // requests with full source info even when the corresponding lane is not - // active. - def testNoQueueDrift: Bool = leaders.map(_.source === leaders.head.source).reduce(_ || _) def printQueueHeads = { leaders.zipWithIndex.foreach{ case (head, i) => printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n", leadersValid(i), head.source, head.address) } } - when (leadersValid.reduce(_ || _)) { - assert(testNoQueueDrift, "unexpected drift between lane request queues") - // printQueueHeads - } + // when (leadersValid.reduce(_ || _)) { + // printQueueHeads + // } val size = coalLogSize val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U @@ -375,14 +372,21 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], .reduce(_ +& _)) val canCoalesce = matchCounts.map(_ > 1.U) - // Elect the leader out of all potential leaders that have matchCounts > 1. + // Elect the leader that has the most match counts. // TODO: potentially expensive: magnitude comparator - // Maybe choose leftmost leader (priority encoder) instead of argmax - val chosenLeaderIdx = matchCounts.zipWithIndex.map { - case (c, i) => (c, i.U) - }.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) => - (Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j)) - }._2 + def chooseLeaderArgMax(matchCounts: Seq[UInt]): UInt = { + matchCounts.zipWithIndex.map { + case (c, i) => (c, i.U) + }.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) => + (Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j)) + }._2 + } + // Elect leader by choosing the smallest-index lane that has a valid + // match, i.e. using priority encoder. + def chooseLeaderPriorityEncoder(matchCounts: Seq[UInt]): UInt = { + PriorityEncoder(matchCounts.map(_ > 1.U)) + } + val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts) val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // matchTable for the chosen lane, but converted to a Vec[UInt] @@ -578,11 +582,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends reqQueues.io.coalescable := coalescer.io.coalescable reqQueues.io.invalidate := coalescer.io.invalidate - // Per-lane request and response queues + // =========================================================================== + // Request flow + // =========================================================================== // // Override IdentityNode implementation so that we can instantiate // queues between input and output edges to buffer requests and responses. // See IdentityNode definition in `diplomacy/Nodes.scala`. + // (outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach { case (((tlIn, _), (tlOut, edgeOut)), lane) => // Request queue @@ -604,7 +611,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends val deq = reqQueues.io.queue.deq(lane) enq.valid := tlIn.a.valid enq.bits := req - deq.ready := true.B // TODO: deq.ready should respect downstream arbiter + // TODO: deq.ready should respect downstream arbiter + deq.ready := true.B + // Stall upstream core or memtrace driver when shiftqueue is not ready + tlIn.a.ready := enq.ready tlOut.a.valid := deq.valid tlOut.a.bits := deq.bits.toTLA(edgeOut) @@ -641,11 +651,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends tlCoal.e.valid := false.B - // ================================================================== - // ****************************************************************** - // ************************* REORG BOUNDARY ************************* - // ****************************************************************** - // ================================================================== + // =========================================================================== + // Response flow + // =========================================================================== + // + // Connect uncoalescer output and noncoalesced response ports to the response + // queues. // The maximum number of requests from a single lane that can go into a // coalesced request. Upper bound is min(DEPTH, 2**sourceWidth). @@ -1083,24 +1094,18 @@ class TraceLine extends Bundle with HasTraceLine { class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFile: String) extends LazyModuleImp(outer) with UnitTestModule { + // Current cycle mark to read from trace + val traceReadCycle = RegInit(1.U(64.W)) - val globalClkCounter = RegInit(1.U(64.W)) - val traceReadCycle = RegInit(1.U(64.W)) - val downstreamSQready = WireInit(true.B) + // If any of the downstream lane is not ready, hold on from advancing + val downstreamReady = outer.laneNodes.map(_.out(0)._1.a.ready).reduce(_ && _) - //make the downstream only ready 1/4 of the time - //This is to test Tracer System's ability to hold on requests - //FIXME - downstreamSQready := (globalClkCounter(1,0) =/= 0.U) - //Connect Signals to Verilog BlackBox val sim = Module(new SimMemTrace(traceFile, config.numLanes)) sim.io.clock := clock sim.io.reset := reset.asBool - sim.io.trace_read.ready := downstreamSQready - //FIXME - 1.U hardcoded, currently there is a delay between chisel and verilog + sim.io.trace_read.ready := downstreamReady sim.io.trace_read.cycle := traceReadCycle - // Read output from Verilog BlackBox // Split output of SimMemTrace, which is flattened across all lanes,back to each lane's. val laneReqs = Wire(Vec(config.numLanes, new TraceLine)) @@ -1109,26 +1114,28 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil val dataW = laneReqs(0).data.getWidth laneReqs.zipWithIndex.foreach { case (req, i) => req.valid := sim.io.trace_read.valid(i) - // TODO: driver trace doesn't contain source id - req.source := 0.U + req.source := 0.U // driver trace doesn't contain source id req.address := sim.io.trace_read.address(addrW * (i + 1) - 1, addrW * i) req.is_store := sim.io.trace_read.is_store(i) req.size := sim.io.trace_read.size(sizeW * (i + 1) - 1, sizeW * i) req.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i) } - globalClkCounter := globalClkCounter + 1.U - val existValidReq = WireInit(false.B) - existValidReq := laneReqs.map(_.valid).reduce(_||_) - val validReqBlocked = WireInit(false.B) - validReqBlocked := !downstreamSQready && existValidReq - //Debug - dontTouch(downstreamSQready) - dontTouch(existValidReq) - dontTouch(validReqBlocked) - // Do Not Update TraceReadCycle if downstream is blocking - when(!validReqBlocked){ - traceReadCycle := traceReadCycle + 1.U + // def missedLine = { + // val existsValidLine = WireInit(false.B) + // existsValidLine := laneReqs.map(_.valid).reduce(_||_) + // val missedLine = WireInit(false.B) + // missedLine := !downstreamReady && existsValidLine + + // // Debug + // dontTouch(downstreamReady) + // dontTouch(existsValidLine) + // dontTouch(missedLine) + + // missedLine + // } + when (downstreamReady){ + traceReadCycle := traceReadCycle + 1.U } // To prevent collision of sourceId with a current in-flight message, @@ -1163,19 +1170,6 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) val wordAlignedSize = Mux(subword, 2.U, req.size) - // when(req.valid && subword) { - // printf( - // "address=%x, size=%d, data=%x, addressMask=%x, wordAlignedAddress=%x, mask=%x, wordData=%x\n", - // req.address, - // req.size, - // req.data, - // ~((1 << log2Ceil(config.WORD_SIZE)) - 1).U(addrW.W), - // wordAlignedAddress, - // mask, - // wordData - // ) - // } - val (tlOut, edge) = node.out(0) val (plegal, pbits) = edge.Put( fromSource = sourceIdCounter, @@ -1356,7 +1350,9 @@ class MemTraceLogger( // requests on TL A channel // - req.valid := tlIn.a.valid + // Only log trace when fired, e.g. both upstream and downstream is ready + // and transaction happened. + req.valid := tlIn.a.fire req.size := tlIn.a.bits.size req.is_store := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode) req.source := tlIn.a.bits.source @@ -1364,27 +1360,6 @@ class MemTraceLogger( // originally requested, so no postprocessing required req.address := tlIn.a.bits.address - // TL data - // - // When tlIn.a.bits.size is smaller than the data bus width, need to - // figure out which byte lanes we actually accessed so that - // we can write that to the memory trace. - // See Section 4.5 Byte Lanes in spec 1.8.1 - - // This assert only holds true for PutFullData and not PutPartialData, - // where HIGH bits in the mask may not be contiguous. - assert( - PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), - "mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic" - ) - val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) - val dataW = tlIn.params.dataBits - val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U)) - req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) - // when (req.valid) { - // printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data) - // } - when(req.valid) { TLPrintf( s"MemTraceLogger (${loggerName}:downstream)", @@ -1397,9 +1372,33 @@ class MemTraceLogger( ) } + // TL data + // + // When tlIn.a.bits.size is smaller than the data bus width, need to + // figure out which byte lanes we actually accessed so that + // we can write that to the memory trace. + // See Section 4.5 Byte Lanes in spec 1.8.1 + + // This assert only holds true for PutFullData and not PutPartialData, + // where HIGH bits in the mask may not be contiguous. + assert( + PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), + "mask HIGH popcount do not match the TL size. " + + "Partial masks are not allowed for PutFull" + ) + val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) + val dataW = tlIn.params.dataBits + val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U)) + req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) + // when (req.valid) { + // printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data) + // } + // responses on TL D channel // - resp.valid := tlOut.d.valid + // Only log trace when fired, e.g. both upstream and downstream is ready + // and transaction happened. + resp.valid := tlOut.d.fire resp.size := tlOut.d.bits.size resp.is_store := TLUtils.DOpcodeIsStore(tlOut.d.bits.opcode) resp.source := tlOut.d.bits.source @@ -1433,7 +1432,7 @@ class MemTraceLogger( // // This is a clunky workaround of the fact that Chisel doesn't allow partial // assignment to a bitfield range of a wide signal. - def flattenTrace(traceLogIO: Bundle with HasTraceLine, perLane: Vec[TraceLine]) = { + def flattenTrace(simIO: Bundle with HasTraceLine, perLane: Vec[TraceLine]) = { // these will get optimized out val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid))) val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source))) @@ -1449,12 +1448,12 @@ class MemTraceLogger( vecSize(i) := l.size vecData(i) := l.data } - traceLogIO.valid := vecValid.asUInt - traceLogIO.source := vecSource.asUInt - traceLogIO.address := vecAddress.asUInt - traceLogIO.is_store := vecIsStore.asUInt - traceLogIO.size := vecSize.asUInt - traceLogIO.data := vecData.asUInt + simIO.valid := vecValid.asUInt + simIO.source := vecSource.asUInt + simIO.address := vecAddress.asUInt + simIO.is_store := vecIsStore.asUInt + simIO.size := vecSize.asUInt + simIO.data := vecData.asUInt } if (simReq.isDefined) { @@ -1544,7 +1543,7 @@ class DummyDriver(config: CoalescerConfig)(implicit p: Parameters) val clientParam = Seq( TLMasterParameters.v1( name = "dummy-core-node-" + i.toString, - sourceId = IdRange(0, defaultConfig.numOldSrcIds) + sourceId = IdRange(0, config.numOldSrcIds) // visibility = Seq(AddressSet(0x0000, 0xffffff)) ) ) @@ -1635,10 +1634,7 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) } // tracedriver --> coalescer --> tracelogger --> tlram -class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { - // val filename = "test.trace" - val filename = "vecadd.core1.thread4.trace" - // val filename = "nvbit.vecadd.n100000.filter_sm0.trace" +class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends LazyModule { // TODO: use parameters for numLanes val numLanes = defaultConfig.numLanes @@ -1680,13 +1676,14 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { (coreSideLogger.module.io.reqBytes === coreSideLogger.module.io.respBytes), "FAIL: requests and responses traffic to the coalescer do not match" ) + printf("SUCCESS: coalescer response traffic matched requests!\n") } } } -class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters) +class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) { - val dut = Module(LazyModule(new TLRAMCoalescerLogger).module) + val dut = Module(LazyModule(new TLRAMCoalescerLogger(filename)).module) dut.io.start := io.start io.finished := dut.io.finished } @@ -2137,3 +2134,5 @@ class CoalArbiterImpl(outer: CoalArbiter, + + diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 2d31711..5a48733 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -2,10 +2,12 @@ package freechips.rocketchip.tilelink.coalescing import chisel3._ import chiseltest._ +import chiseltest.simulator.VerilatorFlags import org.scalatest.flatspec.AnyFlatSpec import freechips.rocketchip.tilelink._ import freechips.rocketchip.util.MultiPortQueue import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.subsystem.WithoutTLMonitors import org.chipsalliance.cde.config.Parameters import chisel3.util.{DecoupledIO, Valid} import chisel3.util.experimental.BoringUtils @@ -190,8 +192,8 @@ object testConfig extends CoalescerConfig( respQueueDepth = 4, coalLogSizes = Seq(4, 5), sizeEnum = DefaultInFlightTableSizeEnum, - numArbiterOutputPorts = 4, numCoalReqs = 1, + numArbiterOutputPorts = 4, bankStrideInBytes = 64 ) @@ -229,8 +231,8 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { } it should "coalesce fully consecutive accesses at size 4, only once" in { - test(LazyModule(new DummyCoalescingUnitTB()).module) - .withAnnotations(Seq(VerilatorBackendAnnotation, WriteFstAnnotation)) + test(LazyModule(new DummyCoalescingUnitTB()(new WithoutTLMonitors())).module) + .withAnnotations(Seq(VerilatorBackendAnnotation, VerilatorFlags(Seq("--coverage-line")), WriteFstAnnotation)) // .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c => val nodes = c.coalIOs.map(_.head) @@ -291,8 +293,8 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { } it should "coalesce identical addresses (stride of 0)" in { - test(LazyModule(new DummyCoalescingUnitTB()).module) - .withAnnotations(Seq(VcsBackendAnnotation)) + test(LazyModule(new DummyCoalescingUnitTB()(new WithoutTLMonitors())).module) + .withAnnotations(Seq(VerilatorBackendAnnotation)) { c => println(s"coalIO length = ${c.coalIOs(0).length}") val nodes = c.coalIOs.map(_.head)