Log both request and response in trace logger

Inside DPI code, have a vector of unique_ptrs that act as handles to multiple
different trace logger instances.  Each logger instance is instantiated in a
single instance of the Verilog module, and multiple of these Verilog modules may
be instantiated in the Chisel module (see simReq and simResp in MemTraceLogger).
This commit is contained in:
Hansung Kim
2023-04-17 17:59:30 -07:00
parent 8978c2a812
commit 41d520a991
4 changed files with 267 additions and 97 deletions

View File

@@ -28,10 +28,11 @@ public:
class MemTraceWriter { class MemTraceWriter {
public: public:
MemTraceWriter(const std::string &filename); MemTraceWriter(const bool is_response, const std::string &filename);
~MemTraceWriter(); ~MemTraceWriter();
void write_line_to_trace(const MemTraceLine line); void write_line_to_trace(const MemTraceLine line);
bool is_response;
FILE *outfile; FILE *outfile;
}; };
@@ -45,8 +46,9 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
int *trace_read_size, int *trace_read_size,
unsigned long *trace_read_data, unsigned long *trace_read_data,
unsigned char *trace_read_finished); unsigned char *trace_read_finished);
extern "C" void memtracelogger_init(const char *filename); extern "C" int memtracelogger_init(int is_response, const char *filename);
extern "C" void memtracelogger_log(unsigned char trace_log_valid, extern "C" void memtracelogger_log(int handle,
unsigned char trace_log_valid,
unsigned long trace_log_cycle, unsigned long trace_log_cycle,
unsigned long trace_log_address, unsigned long trace_log_address,
int trace_log_lane_id, int trace_log_lane_id,

View File

@@ -1,19 +1,23 @@
#ifndef NO_VPI #ifndef NO_VPI
#include <vpi_user.h>
#include <svdpi.h> #include <svdpi.h>
#include <vpi_user.h>
#endif #endif
#include <string>
#include <cstring>
#include <cstdio>
#include <cassert>
#include <memory>
#include <unistd.h>
#include "SimMemTrace.h" #include "SimMemTrace.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <unistd.h>
// Global singleton instance // Contains handle for every logger that is instantiated per Verilog module
static std::unique_ptr<MemTraceWriter> logger; // instance
static std::vector<std::unique_ptr<MemTraceWriter>> loggers;
MemTraceWriter::MemTraceWriter(const bool is_response,
const std::string &filename) {
this->is_response = is_response;
MemTraceWriter::MemTraceWriter(const std::string &filename) {
char cwd[4096]; char cwd[4096];
if (getcwd(cwd, sizeof(cwd))) { if (getcwd(cwd, sizeof(cwd))) {
printf("MemTraceWriter: current working dir: %s\n", cwd); printf("MemTraceWriter: current working dir: %s\n", cwd);
@@ -36,16 +40,17 @@ void MemTraceWriter::write_line_to_trace(const MemTraceLine line) {
line.address, line.data, (1u << line.log_data_size)); line.address, line.data, (1u << line.log_data_size));
} }
extern "C" void memtracelogger_init(const char *filename) { // Returns the "handle" ID for this particular logger instance.
extern "C" int memtracelogger_init(int is_response, const char *filename) {
#ifndef NO_VPI #ifndef NO_VPI
s_vpi_vlog_info info; s_vpi_vlog_info info;
if (!vpi_get_vlog_info(&info)) { if (!vpi_get_vlog_info(&info)) {
fprintf(stderr, "fatal: failed to get plusargs from VCS\n"); fprintf(stderr, "fatal: failed to get plusargs from VCS\n");
exit(1); exit(1);
} }
const char* TRACEFILENAME_PLUSARG = "+memtracefile="; const char *TRACEFILENAME_PLUSARG = "+memtracefile=";
for (int i = 0; i < info.argc; i++) { for (int i = 0; i < info.argc; i++) {
char* input_arg = info.argv[i]; char *input_arg = info.argv[i];
if (strncmp(input_arg, TRACEFILENAME_PLUSARG, if (strncmp(input_arg, TRACEFILENAME_PLUSARG,
strlen(TRACEFILENAME_PLUSARG)) == 0) { strlen(TRACEFILENAME_PLUSARG)) == 0) {
filename = input_arg + strlen(TRACEFILENAME_PLUSARG); filename = input_arg + strlen(TRACEFILENAME_PLUSARG);
@@ -54,20 +59,24 @@ extern "C" void memtracelogger_init(const char *filename) {
} }
#endif #endif
printf("memtrace_init: filename=[%s]\n", filename); int handle = loggers.size();
loggers.emplace_back(std::make_unique<MemTraceWriter>(is_response, filename));
logger = std::make_unique<MemTraceWriter>(filename); printf("memtracelogger_init: handle=%d, is_response=%d, filename=[%s]\n",
handle, is_response, filename);
return handle;
} }
// This is used to log both TileLink A and D channels.
// TODO: accept core_id as well // TODO: accept core_id as well
extern "C" void memtracelogger_log(unsigned char trace_log_valid, extern "C" void
unsigned long trace_log_cycle, memtracelogger_log(int handle,
unsigned long trace_log_address, unsigned char trace_log_valid, unsigned long trace_log_cycle,
int trace_log_lane_id, unsigned long trace_log_address, int trace_log_lane_id,
unsigned char trace_log_is_store, unsigned char trace_log_is_store, int trace_log_size,
int trace_log_size, unsigned long trace_log_data,
unsigned long trace_log_data, unsigned char *trace_log_ready) {
unsigned char *trace_log_ready) {
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle, // printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
// trace_read_lane_id); // trace_read_lane_id);
*trace_log_ready = 1; *trace_log_ready = 1;
@@ -77,8 +86,7 @@ extern "C" void memtracelogger_log(unsigned char trace_log_valid,
} }
printf("%s: [%lu] valid: address=%lx, tid=%u, size=%d\n", __func__, printf("%s: [%lu] valid: address=%lx, tid=%u, size=%d\n", __func__,
trace_log_cycle, trace_log_address, trace_log_lane_id, trace_log_cycle, trace_log_address, trace_log_lane_id, trace_log_size);
trace_log_size);
MemTraceLine line{.valid = (trace_log_valid == 1), MemTraceLine line{.valid = (trace_log_valid == 1),
.cycle = static_cast<long>(trace_log_cycle), .cycle = static_cast<long>(trace_log_cycle),
@@ -89,5 +97,7 @@ extern "C" void memtracelogger_log(unsigned char trace_log_valid,
.data = trace_log_data, .data = trace_log_data,
.log_data_size = trace_log_size}; .log_data_size = trace_log_size};
assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle");
auto logger = loggers[handle].get();
logger->write_line_to_trace(line); logger->write_line_to_trace(line);
} }

View File

@@ -0,0 +1,100 @@
// FIXME hardcoded
`define DATA_WIDTH 64
`define MAX_NUM_LANES 32
`define LOGSIZE_WIDTH 32
import "DPI-C" function int memtracelogger_init(
input bit is_response,
input string filename
);
// Make sure to sync the parameters for:
// (1) import "DPI-C" declaration
// (2) C function declaration
// (3) DPI function calls inside initial/always blocks
import "DPI-C" function void memtracelogger_log
(
input int handle,
input bit trace_log_valid,
input longint trace_log_cycle,
input longint trace_log_address,
input int trace_log_tid,
input bit trace_log_is_store,
input int trace_log_size,
input longint trace_log_data,
output bit trace_log_ready
);
module SimMemTraceLogger #(parameter
IS_RESPONSE = 0,
FILENAME = "undefined",
NUM_LANES = 4) (
input clock,
input reset,
// NOTE: LSB is lane 0
input [NUM_LANES-1:0] trace_log_valid,
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_address,
input [NUM_LANES-1:0] trace_log_is_store,
input [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_log_size,
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_data,
output trace_log_ready
);
int logger_handle;
bit __in_ready;
// cycle_counter will start off right after reset is deasserted which should
// synchronize itself with SimMemTrace.cycle_counter
reg [`DATA_WIDTH-1:0] cycle_counter;
wire [`DATA_WIDTH-1:0] next_cycle_counter;
assign next_cycle_counter = cycle_counter + 1'b1;
// wires going into the DPC
wire __valid [NUM_LANES-1:0];
wire [`DATA_WIDTH-1:0] __address [NUM_LANES-1:0];
wire __is_store [NUM_LANES-1:0];
wire [`LOGSIZE_WIDTH-1:0] __size [NUM_LANES-1:0];
wire [`DATA_WIDTH-1:0] __data [NUM_LANES-1:0];
assign trace_log_ready = __in_ready;
genvar g;
generate
for (g = 0; g < NUM_LANES; g = g + 1) begin
// LSB is lane 0
assign __valid[g] = trace_log_valid[g];
assign __address[g] = trace_log_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
assign __is_store[g] = trace_log_is_store[g];
assign __size[g] = trace_log_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g];
assign __data[g] = trace_log_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
end
endgenerate
initial begin
/* $value$plusargs("uartlog=%s", __uartlog); */
logger_handle = memtracelogger_init(IS_RESPONSE, FILENAME);
end
always @(posedge clock) begin
if (reset) begin
__in_ready = 1'b1;
cycle_counter <= `DATA_WIDTH'b0;
end else begin
cycle_counter <= next_cycle_counter;
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
memtracelogger_log(
logger_handle,
__valid[tid],
cycle_counter,
__address[tid],
tid,
__is_store[tid],
__size[tid],
__data[tid],
__in_ready
);
end
end
end
endmodule

View File

@@ -747,7 +747,11 @@ class SimMemTrace(filename: String, numLanes: Int)
addResource("/csrc/SimMemTrace.h") addResource("/csrc/SimMemTrace.h")
} }
class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.out.trace")(implicit class MemTraceLogger(
numLanes: Int = 4,
reqFilename: String = "vecadd.core1.thread4.logger.req.trace",
respFilename: String = "vecadd.core1.thread4.logger.resp.trace"
)(implicit
p: Parameters p: Parameters
) extends LazyModule { ) extends LazyModule {
val node = TLIdentityNode() val node = TLIdentityNode()
@@ -775,98 +779,152 @@ class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4
lazy val module = new Impl lazy val module = new Impl
class Impl extends LazyModuleImp(this) { class Impl extends LazyModuleImp(this) {
val sim = Module(new SimMemTraceLogger(filename, numLanes)) val simReq = Module(new SimMemTraceLogger(false, reqFilename, numLanes))
sim.io.clock := clock val simResp = Module(new SimMemTraceLogger(true, respFilename, numLanes))
sim.io.reset := reset.asBool simReq.io.clock := clock
simReq.io.reset := reset.asBool
simResp.io.clock := clock
simResp.io.reset := reset.asBool
val laneReqs = Wire(Vec(numLanes, new TraceReq)) val laneReqs = Wire(Vec(numLanes, new TraceReq))
val laneResps = Wire(Vec(numLanes, new TraceReq))
assert( assert(
numLanes == node.in.length, numLanes == node.in.length,
"`numLanes` does not match the number of TL edges connected to the MemTraceLogger" "`numLanes` does not match the number of TL edges connected to the MemTraceLogger"
) )
def tlAOpcodeIsStore(opcode: UInt): Bool = {
// 0: PutFullData, 1: PutPartialData but we don't support it
// 4: Get
assert(opcode === 0.U || opcode === 4.U, "unhandled TL A opcode found")
opcode === 0.U
}
def tlDOpcodeIsStore(opcode: UInt): Bool = {
// 0: AccessAck (Put), 1: AccessAckData (Get or Atomic)
// See Table 13 of spec 1.8.1
assert(opcode === 0.U || opcode === 1.U, "unhandled TL D opcode found")
opcode === 0.U
}
// snoop on the TileLink edges to log traffic // snoop on the TileLink edges to log traffic
((node.in zip node.out) zip laneReqs).foreach { case (((tlIn, _), (tlOut, _)), req) => ((node.in zip node.out) zip (laneReqs zip laneResps)).foreach {
tlOut.a <> tlIn.a case (((tlIn, _), (tlOut, _)), (req, resp)) =>
tlIn.d <> tlOut.d tlOut.a <> tlIn.a
tlIn.d <> tlOut.d
// requests on TL A channel // requests on TL A channel
// //
req.valid := tlIn.a.valid req.valid := tlIn.a.valid
req.size := tlIn.a.bits.size req.size := tlIn.a.bits.size
def tlOpcodeIsStore(opcode: UInt): Bool = { req.is_store := tlAOpcodeIsStore(tlIn.a.bits.opcode)
// 0: PutFullData, 1: PutPartialData but we don't support it // TL always carries the exact unaligned address that the client
// 4: Get // originally requested, so no postprocessing required
assert(opcode === 0.U || opcode === 4.U, "unhandled TL opcode found in MemTraceLogger") req.address := tlIn.a.bits.address
tlIn.a.bits.opcode === 0.U
}
req.is_store := tlOpcodeIsStore(tlIn.a.bits.opcode)
// TL always carries the exact unaligned address that the client
// originally requested, so no postprocessing required
req.address := tlIn.a.bits.address
// TL data // TL data
// //
// When tlIn.a.bits.size is smaller than the data bus width, need to // When tlIn.a.bits.size is smaller than the data bus width, need to
// figure out which byte lanes we actually accessed so that // figure out which byte lanes we actually accessed so that
// we can write that to the memory trace. // we can write that to the memory trace.
// See Section 4.5 Byte Lanes in spec 1.8.1 // See Section 4.5 Byte Lanes in spec 1.8.1
// This assert only holds true for PutFullData and not PutPartialData, // This assert only holds true for PutFullData and not PutPartialData,
// where HIGH bits in the mask may not be contiguous. // where HIGH bits in the mask may not be contiguous.
assert( assert(
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
"mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic" "mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic"
)
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
val mask = ~((~0.U) << (trailingZerosInMask * 8.U))
req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
when(req.valid) {
TracePrintf(
"MemTraceLogger",
tlIn.a.bits.address,
tlIn.a.bits.size,
tlIn.a.bits.mask,
req.is_store,
tlIn.a.bits.data,
req.data
) )
} val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
val mask = ~((~0.U) << (trailingZerosInMask * 8.U))
req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
// responses on TL D channel when(req.valid) {
// TODO TracePrintf(
"MemTraceLogger",
tlIn.a.bits.address,
tlIn.a.bits.size,
tlIn.a.bits.mask,
req.is_store,
tlIn.a.bits.data,
req.data
)
}
// responses on TL D channel
//
resp.valid := tlOut.d.valid
resp.size := tlOut.d.bits.size
resp.is_store := tlDOpcodeIsStore(tlOut.d.bits.opcode)
// NOTE: TL D channel doesn't carry address nor mask, so there's no easy
// way to figure out which bytes the master actually use. Since we
// don't care too much about addresses in the trace anyway, just store
// the entire bits.
resp.address := 0.U
resp.data := tlOut.d.bits.data
} }
// clunky workaround of the fact that Chisel doesn't allow partial // clunky workaround of the fact that Chisel doesn't allow partial
// assignment to a bitfield range of a wide signal. // assignment to a bitfield range of a wide signal.
val laneValid = Wire(Vec(numLanes, Bool())) def flattenTrace(traceLogIO: Bundle with HasTraceReq, perLane: Vec[TraceReq]) = {
val laneAddress = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).address))) val laneValid = Wire(Vec(numLanes, Bool()))
val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).is_store))) val laneAddress = Wire(Vec(numLanes, chiselTypeOf(perLane(0).address)))
val laneSize = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).size))) val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(perLane(0).is_store)))
val laneData = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).data))) val laneSize = Wire(Vec(numLanes, chiselTypeOf(perLane(0).size)))
laneReqs.zipWithIndex.foreach { case (req, i) => val laneData = Wire(Vec(numLanes, chiselTypeOf(perLane(0).data)))
laneValid(i) := req.valid perLane.zipWithIndex.foreach { case (req, i) =>
laneAddress(i) := req.address laneValid(i) := req.valid
laneIsStore(i) := req.is_store laneAddress(i) := req.address
laneSize(i) := req.size laneIsStore(i) := req.is_store
laneData(i) := req.data laneSize(i) := req.size
laneData(i) := req.data
}
// flatten per-lane signals to the Verilog blackbox input
traceLogIO.valid := laneValid.asUInt
traceLogIO.address := laneAddress.asUInt
traceLogIO.is_store := laneIsStore.asUInt
traceLogIO.size := laneSize.asUInt
traceLogIO.data := laneData.asUInt
} }
// flatten per-lane signals to the Verilog blackbox input
sim.io.trace_log.valid := laneValid.asUInt
sim.io.trace_log.address := laneAddress.asUInt
sim.io.trace_log.is_store := laneIsStore.asUInt
sim.io.trace_log.size := laneSize.asUInt
sim.io.trace_log.data := laneData.asUInt
assert(sim.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready") flattenTrace(simReq.io.trace_log, laneReqs)
flattenTrace(simResp.io.trace_log, laneResps)
assert(simReq.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready")
assert(simResp.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready")
// val laneValid = Wire(Vec(numLanes, Bool()))
// val laneAddress = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).address)))
// val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).is_store)))
// val laneSize = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).size)))
// val laneData = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).data)))
// laneReqs.zipWithIndex.foreach { case (req, i) =>
// laneValid(i) := req.valid
// laneAddress(i) := req.address
// laneIsStore(i) := req.is_store
// laneSize(i) := req.size
// laneData(i) := req.data
// }
// // flatten per-lane signals to the Verilog blackbox input
// simReq.io.trace_log.valid := laneValid.asUInt
// simReq.io.trace_log.address := laneAddress.asUInt
// simReq.io.trace_log.is_store := laneIsStore.asUInt
// simReq.io.trace_log.size := laneSize.asUInt
// simReq.io.trace_log.data := laneData.asUInt
} }
} }
class SimMemTraceLogger(filename: String, numLanes: Int) // MemTraceLogger is bidirectional. The DPI module tells itself if it's logging
// the request stream or the response stream by `isResponse`. This distinction
// is needed because the response trace file will not contain certain columns
// such as address.
class SimMemTraceLogger(isResponse: Boolean, filename: String, numLanes: Int)
extends BlackBox( extends BlackBox(
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes) Map(
"IS_RESPONSE" -> (if (isResponse) 1 else 0),
"FILENAME" -> filename,
"NUM_LANES" -> numLanes
)
) )
with HasBlackBoxResource { with HasBlackBoxResource {
val io = IO(new Bundle { val io = IO(new Bundle {