From 41d520a9912221cb182a3b22b5a29236b646aa7e Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 17 Apr 2023 17:59:30 -0700 Subject: [PATCH] Log both request and response in trace logger Inside DPI code, have a vector of unique_ptrs that act as handles to multiple different trace logger instances. Each logger instance is instantiated in a single instance of the Verilog module, and multiple of these Verilog modules may be instantiated in the Chisel module (see simReq and simResp in MemTraceLogger). --- src/main/resources/csrc/SimMemTrace.h | 8 +- src/main/resources/csrc/SimMemTraceLogger.cc | 60 +++--- src/main/resources/vsrc/SimMemTraceLogger.v | 100 ++++++++++ src/main/scala/tilelink/Coalescing.scala | 196 ++++++++++++------- 4 files changed, 267 insertions(+), 97 deletions(-) create mode 100644 src/main/resources/vsrc/SimMemTraceLogger.v diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index 3bdd2d5..fef1ad8 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -28,10 +28,11 @@ public: class MemTraceWriter { public: - MemTraceWriter(const std::string &filename); + MemTraceWriter(const bool is_response, const std::string &filename); ~MemTraceWriter(); void write_line_to_trace(const MemTraceLine line); + bool is_response; FILE *outfile; }; @@ -45,8 +46,9 @@ extern "C" void memtrace_query(unsigned char trace_read_ready, int *trace_read_size, unsigned long *trace_read_data, unsigned char *trace_read_finished); -extern "C" void memtracelogger_init(const char *filename); -extern "C" void memtracelogger_log(unsigned char trace_log_valid, +extern "C" int memtracelogger_init(int is_response, const char *filename); +extern "C" void memtracelogger_log(int handle, + unsigned char trace_log_valid, unsigned long trace_log_cycle, unsigned long trace_log_address, int trace_log_lane_id, diff --git a/src/main/resources/csrc/SimMemTraceLogger.cc b/src/main/resources/csrc/SimMemTraceLogger.cc index 64e4ce5..c4cfde9 100644 --- a/src/main/resources/csrc/SimMemTraceLogger.cc +++ b/src/main/resources/csrc/SimMemTraceLogger.cc @@ -1,19 +1,23 @@ #ifndef NO_VPI -#include #include +#include #endif -#include -#include -#include -#include -#include -#include #include "SimMemTrace.h" +#include +#include +#include +#include +#include +#include -// Global singleton instance -static std::unique_ptr logger; +// Contains handle for every logger that is instantiated per Verilog module +// instance +static std::vector> loggers; + +MemTraceWriter::MemTraceWriter(const bool is_response, + const std::string &filename) { + this->is_response = is_response; -MemTraceWriter::MemTraceWriter(const std::string &filename) { char cwd[4096]; if (getcwd(cwd, sizeof(cwd))) { printf("MemTraceWriter: current working dir: %s\n", cwd); @@ -36,16 +40,17 @@ void MemTraceWriter::write_line_to_trace(const MemTraceLine line) { line.address, line.data, (1u << line.log_data_size)); } -extern "C" void memtracelogger_init(const char *filename) { +// Returns the "handle" ID for this particular logger instance. +extern "C" int memtracelogger_init(int is_response, const char *filename) { #ifndef NO_VPI s_vpi_vlog_info info; if (!vpi_get_vlog_info(&info)) { fprintf(stderr, "fatal: failed to get plusargs from VCS\n"); exit(1); } - const char* TRACEFILENAME_PLUSARG = "+memtracefile="; + const char *TRACEFILENAME_PLUSARG = "+memtracefile="; for (int i = 0; i < info.argc; i++) { - char* input_arg = info.argv[i]; + char *input_arg = info.argv[i]; if (strncmp(input_arg, TRACEFILENAME_PLUSARG, strlen(TRACEFILENAME_PLUSARG)) == 0) { filename = input_arg + strlen(TRACEFILENAME_PLUSARG); @@ -54,20 +59,24 @@ extern "C" void memtracelogger_init(const char *filename) { } #endif - printf("memtrace_init: filename=[%s]\n", filename); + int handle = loggers.size(); + loggers.emplace_back(std::make_unique(is_response, filename)); - logger = std::make_unique(filename); + printf("memtracelogger_init: handle=%d, is_response=%d, filename=[%s]\n", + handle, is_response, filename); + + return handle; } +// This is used to log both TileLink A and D channels. // TODO: accept core_id as well -extern "C" void memtracelogger_log(unsigned char trace_log_valid, - unsigned long trace_log_cycle, - unsigned long trace_log_address, - int trace_log_lane_id, - unsigned char trace_log_is_store, - int trace_log_size, - unsigned long trace_log_data, - unsigned char *trace_log_ready) { +extern "C" void +memtracelogger_log(int handle, + unsigned char trace_log_valid, unsigned long trace_log_cycle, + unsigned long trace_log_address, int trace_log_lane_id, + unsigned char trace_log_is_store, int trace_log_size, + unsigned long trace_log_data, + unsigned char *trace_log_ready) { // printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle, // trace_read_lane_id); *trace_log_ready = 1; @@ -77,8 +86,7 @@ extern "C" void memtracelogger_log(unsigned char trace_log_valid, } printf("%s: [%lu] valid: address=%lx, tid=%u, size=%d\n", __func__, - trace_log_cycle, trace_log_address, trace_log_lane_id, - trace_log_size); + trace_log_cycle, trace_log_address, trace_log_lane_id, trace_log_size); MemTraceLine line{.valid = (trace_log_valid == 1), .cycle = static_cast(trace_log_cycle), @@ -89,5 +97,7 @@ extern "C" void memtracelogger_log(unsigned char trace_log_valid, .data = trace_log_data, .log_data_size = trace_log_size}; + assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle"); + auto logger = loggers[handle].get(); logger->write_line_to_trace(line); } diff --git a/src/main/resources/vsrc/SimMemTraceLogger.v b/src/main/resources/vsrc/SimMemTraceLogger.v new file mode 100644 index 0000000..8c358d2 --- /dev/null +++ b/src/main/resources/vsrc/SimMemTraceLogger.v @@ -0,0 +1,100 @@ +// FIXME hardcoded +`define DATA_WIDTH 64 +`define MAX_NUM_LANES 32 +`define LOGSIZE_WIDTH 32 + +import "DPI-C" function int memtracelogger_init( + input bit is_response, + input string filename +); + +// Make sure to sync the parameters for: +// (1) import "DPI-C" declaration +// (2) C function declaration +// (3) DPI function calls inside initial/always blocks +import "DPI-C" function void memtracelogger_log +( + input int handle, + input bit trace_log_valid, + input longint trace_log_cycle, + input longint trace_log_address, + input int trace_log_tid, + input bit trace_log_is_store, + input int trace_log_size, + input longint trace_log_data, + output bit trace_log_ready +); + +module SimMemTraceLogger #(parameter + IS_RESPONSE = 0, + FILENAME = "undefined", + NUM_LANES = 4) ( + input clock, + input reset, + + // NOTE: LSB is lane 0 + input [NUM_LANES-1:0] trace_log_valid, + input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_address, + input [NUM_LANES-1:0] trace_log_is_store, + input [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_log_size, + input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_data, + output trace_log_ready +); + int logger_handle; + bit __in_ready; + + // cycle_counter will start off right after reset is deasserted which should + // synchronize itself with SimMemTrace.cycle_counter + reg [`DATA_WIDTH-1:0] cycle_counter; + wire [`DATA_WIDTH-1:0] next_cycle_counter; + assign next_cycle_counter = cycle_counter + 1'b1; + + // wires going into the DPC + wire __valid [NUM_LANES-1:0]; + wire [`DATA_WIDTH-1:0] __address [NUM_LANES-1:0]; + wire __is_store [NUM_LANES-1:0]; + wire [`LOGSIZE_WIDTH-1:0] __size [NUM_LANES-1:0]; + wire [`DATA_WIDTH-1:0] __data [NUM_LANES-1:0]; + + assign trace_log_ready = __in_ready; + + genvar g; + generate + for (g = 0; g < NUM_LANES; g = g + 1) begin + // LSB is lane 0 + assign __valid[g] = trace_log_valid[g]; + assign __address[g] = trace_log_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g]; + assign __is_store[g] = trace_log_is_store[g]; + assign __size[g] = trace_log_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g]; + assign __data[g] = trace_log_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g]; + end + endgenerate + + initial begin + /* $value$plusargs("uartlog=%s", __uartlog); */ + logger_handle = memtracelogger_init(IS_RESPONSE, FILENAME); + end + + always @(posedge clock) begin + if (reset) begin + __in_ready = 1'b1; + cycle_counter <= `DATA_WIDTH'b0; + end else begin + cycle_counter <= next_cycle_counter; + + for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin + memtracelogger_log( + logger_handle, + __valid[tid], + cycle_counter, + __address[tid], + tid, + __is_store[tid], + __size[tid], + __data[tid], + __in_ready + ); + end + end + end +endmodule diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 161035e..775cce8 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -747,7 +747,11 @@ class SimMemTrace(filename: String, numLanes: Int) addResource("/csrc/SimMemTrace.h") } -class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.out.trace")(implicit +class MemTraceLogger( + numLanes: Int = 4, + reqFilename: String = "vecadd.core1.thread4.logger.req.trace", + respFilename: String = "vecadd.core1.thread4.logger.resp.trace" +)(implicit p: Parameters ) extends LazyModule { val node = TLIdentityNode() @@ -775,98 +779,152 @@ class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4 lazy val module = new Impl class Impl extends LazyModuleImp(this) { - val sim = Module(new SimMemTraceLogger(filename, numLanes)) - sim.io.clock := clock - sim.io.reset := reset.asBool + val simReq = Module(new SimMemTraceLogger(false, reqFilename, numLanes)) + val simResp = Module(new SimMemTraceLogger(true, respFilename, numLanes)) + simReq.io.clock := clock + simReq.io.reset := reset.asBool + simResp.io.clock := clock + simResp.io.reset := reset.asBool val laneReqs = Wire(Vec(numLanes, new TraceReq)) + val laneResps = Wire(Vec(numLanes, new TraceReq)) assert( numLanes == node.in.length, "`numLanes` does not match the number of TL edges connected to the MemTraceLogger" ) + def tlAOpcodeIsStore(opcode: UInt): Bool = { + // 0: PutFullData, 1: PutPartialData but we don't support it + // 4: Get + assert(opcode === 0.U || opcode === 4.U, "unhandled TL A opcode found") + opcode === 0.U + } + def tlDOpcodeIsStore(opcode: UInt): Bool = { + // 0: AccessAck (Put), 1: AccessAckData (Get or Atomic) + // See Table 13 of spec 1.8.1 + assert(opcode === 0.U || opcode === 1.U, "unhandled TL D opcode found") + opcode === 0.U + } + // snoop on the TileLink edges to log traffic - ((node.in zip node.out) zip laneReqs).foreach { case (((tlIn, _), (tlOut, _)), req) => - tlOut.a <> tlIn.a - tlIn.d <> tlOut.d + ((node.in zip node.out) zip (laneReqs zip laneResps)).foreach { + case (((tlIn, _), (tlOut, _)), (req, resp)) => + tlOut.a <> tlIn.a + tlIn.d <> tlOut.d - // requests on TL A channel - // - req.valid := tlIn.a.valid - req.size := tlIn.a.bits.size - def tlOpcodeIsStore(opcode: UInt): Bool = { - // 0: PutFullData, 1: PutPartialData but we don't support it - // 4: Get - assert(opcode === 0.U || opcode === 4.U, "unhandled TL opcode found in MemTraceLogger") - tlIn.a.bits.opcode === 0.U - } - req.is_store := tlOpcodeIsStore(tlIn.a.bits.opcode) - // TL always carries the exact unaligned address that the client - // originally requested, so no postprocessing required - req.address := tlIn.a.bits.address + // requests on TL A channel + // + req.valid := tlIn.a.valid + req.size := tlIn.a.bits.size + req.is_store := tlAOpcodeIsStore(tlIn.a.bits.opcode) + // TL always carries the exact unaligned address that the client + // originally requested, so no postprocessing required + req.address := tlIn.a.bits.address - // TL data - // - // When tlIn.a.bits.size is smaller than the data bus width, need to - // figure out which byte lanes we actually accessed so that - // we can write that to the memory trace. - // See Section 4.5 Byte Lanes in spec 1.8.1 + // TL data + // + // When tlIn.a.bits.size is smaller than the data bus width, need to + // figure out which byte lanes we actually accessed so that + // we can write that to the memory trace. + // See Section 4.5 Byte Lanes in spec 1.8.1 - // This assert only holds true for PutFullData and not PutPartialData, - // where HIGH bits in the mask may not be contiguous. - assert( - PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), - "mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic" - ) - val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) - val mask = ~((~0.U) << (trailingZerosInMask * 8.U)) - req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) - - when(req.valid) { - TracePrintf( - "MemTraceLogger", - tlIn.a.bits.address, - tlIn.a.bits.size, - tlIn.a.bits.mask, - req.is_store, - tlIn.a.bits.data, - req.data + // This assert only holds true for PutFullData and not PutPartialData, + // where HIGH bits in the mask may not be contiguous. + assert( + PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), + "mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic" ) - } + val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) + val mask = ~((~0.U) << (trailingZerosInMask * 8.U)) + req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) - // responses on TL D channel - // TODO + when(req.valid) { + TracePrintf( + "MemTraceLogger", + tlIn.a.bits.address, + tlIn.a.bits.size, + tlIn.a.bits.mask, + req.is_store, + tlIn.a.bits.data, + req.data + ) + } + + // responses on TL D channel + // + resp.valid := tlOut.d.valid + resp.size := tlOut.d.bits.size + resp.is_store := tlDOpcodeIsStore(tlOut.d.bits.opcode) + // NOTE: TL D channel doesn't carry address nor mask, so there's no easy + // way to figure out which bytes the master actually use. Since we + // don't care too much about addresses in the trace anyway, just store + // the entire bits. + resp.address := 0.U + resp.data := tlOut.d.bits.data } // clunky workaround of the fact that Chisel doesn't allow partial // assignment to a bitfield range of a wide signal. - val laneValid = Wire(Vec(numLanes, Bool())) - val laneAddress = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).address))) - val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).is_store))) - val laneSize = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).size))) - val laneData = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).data))) - laneReqs.zipWithIndex.foreach { case (req, i) => - laneValid(i) := req.valid - laneAddress(i) := req.address - laneIsStore(i) := req.is_store - laneSize(i) := req.size - laneData(i) := req.data + def flattenTrace(traceLogIO: Bundle with HasTraceReq, perLane: Vec[TraceReq]) = { + val laneValid = Wire(Vec(numLanes, Bool())) + val laneAddress = Wire(Vec(numLanes, chiselTypeOf(perLane(0).address))) + val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(perLane(0).is_store))) + val laneSize = Wire(Vec(numLanes, chiselTypeOf(perLane(0).size))) + val laneData = Wire(Vec(numLanes, chiselTypeOf(perLane(0).data))) + perLane.zipWithIndex.foreach { case (req, i) => + laneValid(i) := req.valid + laneAddress(i) := req.address + laneIsStore(i) := req.is_store + laneSize(i) := req.size + laneData(i) := req.data + } + // flatten per-lane signals to the Verilog blackbox input + traceLogIO.valid := laneValid.asUInt + traceLogIO.address := laneAddress.asUInt + traceLogIO.is_store := laneIsStore.asUInt + traceLogIO.size := laneSize.asUInt + traceLogIO.data := laneData.asUInt } - // flatten per-lane signals to the Verilog blackbox input - sim.io.trace_log.valid := laneValid.asUInt - sim.io.trace_log.address := laneAddress.asUInt - sim.io.trace_log.is_store := laneIsStore.asUInt - sim.io.trace_log.size := laneSize.asUInt - sim.io.trace_log.data := laneData.asUInt - assert(sim.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready") + flattenTrace(simReq.io.trace_log, laneReqs) + flattenTrace(simResp.io.trace_log, laneResps) + + assert(simReq.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready") + assert(simResp.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready") + + // val laneValid = Wire(Vec(numLanes, Bool())) + // val laneAddress = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).address))) + // val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).is_store))) + // val laneSize = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).size))) + // val laneData = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).data))) + // laneReqs.zipWithIndex.foreach { case (req, i) => + // laneValid(i) := req.valid + // laneAddress(i) := req.address + // laneIsStore(i) := req.is_store + // laneSize(i) := req.size + // laneData(i) := req.data + // } + // // flatten per-lane signals to the Verilog blackbox input + // simReq.io.trace_log.valid := laneValid.asUInt + // simReq.io.trace_log.address := laneAddress.asUInt + // simReq.io.trace_log.is_store := laneIsStore.asUInt + // simReq.io.trace_log.size := laneSize.asUInt + // simReq.io.trace_log.data := laneData.asUInt } } -class SimMemTraceLogger(filename: String, numLanes: Int) +// MemTraceLogger is bidirectional. The DPI module tells itself if it's logging +// the request stream or the response stream by `isResponse`. This distinction +// is needed because the response trace file will not contain certain columns +// such as address. +class SimMemTraceLogger(isResponse: Boolean, filename: String, numLanes: Int) extends BlackBox( - Map("FILENAME" -> filename, "NUM_LANES" -> numLanes) + Map( + "IS_RESPONSE" -> (if (isResponse) 1 else 0), + "FILENAME" -> filename, + "NUM_LANES" -> numLanes + ) ) with HasBlackBoxResource { val io = IO(new Bundle {