Log both request and response in trace logger

Inside DPI code, have a vector of unique_ptrs that act as handles to multiple
different trace logger instances.  Each logger instance is instantiated in a
single instance of the Verilog module, and multiple of these Verilog modules may
be instantiated in the Chisel module (see simReq and simResp in MemTraceLogger).
This commit is contained in:
Hansung Kim
2023-04-17 17:59:30 -07:00
parent 8978c2a812
commit 41d520a991
4 changed files with 267 additions and 97 deletions

View File

@@ -28,10 +28,11 @@ public:
class MemTraceWriter {
public:
MemTraceWriter(const std::string &filename);
MemTraceWriter(const bool is_response, const std::string &filename);
~MemTraceWriter();
void write_line_to_trace(const MemTraceLine line);
bool is_response;
FILE *outfile;
};
@@ -45,8 +46,9 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
int *trace_read_size,
unsigned long *trace_read_data,
unsigned char *trace_read_finished);
extern "C" void memtracelogger_init(const char *filename);
extern "C" void memtracelogger_log(unsigned char trace_log_valid,
extern "C" int memtracelogger_init(int is_response, const char *filename);
extern "C" void memtracelogger_log(int handle,
unsigned char trace_log_valid,
unsigned long trace_log_cycle,
unsigned long trace_log_address,
int trace_log_lane_id,

View File

@@ -1,19 +1,23 @@
#ifndef NO_VPI
#include <vpi_user.h>
#include <svdpi.h>
#include <vpi_user.h>
#endif
#include <string>
#include <cstring>
#include <cstdio>
#include <cassert>
#include <memory>
#include <unistd.h>
#include "SimMemTrace.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <unistd.h>
// Global singleton instance
static std::unique_ptr<MemTraceWriter> logger;
// Contains handle for every logger that is instantiated per Verilog module
// instance
static std::vector<std::unique_ptr<MemTraceWriter>> loggers;
MemTraceWriter::MemTraceWriter(const bool is_response,
const std::string &filename) {
this->is_response = is_response;
MemTraceWriter::MemTraceWriter(const std::string &filename) {
char cwd[4096];
if (getcwd(cwd, sizeof(cwd))) {
printf("MemTraceWriter: current working dir: %s\n", cwd);
@@ -36,16 +40,17 @@ void MemTraceWriter::write_line_to_trace(const MemTraceLine line) {
line.address, line.data, (1u << line.log_data_size));
}
extern "C" void memtracelogger_init(const char *filename) {
// Returns the "handle" ID for this particular logger instance.
extern "C" int memtracelogger_init(int is_response, const char *filename) {
#ifndef NO_VPI
s_vpi_vlog_info info;
if (!vpi_get_vlog_info(&info)) {
fprintf(stderr, "fatal: failed to get plusargs from VCS\n");
exit(1);
}
const char* TRACEFILENAME_PLUSARG = "+memtracefile=";
const char *TRACEFILENAME_PLUSARG = "+memtracefile=";
for (int i = 0; i < info.argc; i++) {
char* input_arg = info.argv[i];
char *input_arg = info.argv[i];
if (strncmp(input_arg, TRACEFILENAME_PLUSARG,
strlen(TRACEFILENAME_PLUSARG)) == 0) {
filename = input_arg + strlen(TRACEFILENAME_PLUSARG);
@@ -54,20 +59,24 @@ extern "C" void memtracelogger_init(const char *filename) {
}
#endif
printf("memtrace_init: filename=[%s]\n", filename);
int handle = loggers.size();
loggers.emplace_back(std::make_unique<MemTraceWriter>(is_response, filename));
logger = std::make_unique<MemTraceWriter>(filename);
printf("memtracelogger_init: handle=%d, is_response=%d, filename=[%s]\n",
handle, is_response, filename);
return handle;
}
// This is used to log both TileLink A and D channels.
// TODO: accept core_id as well
extern "C" void memtracelogger_log(unsigned char trace_log_valid,
unsigned long trace_log_cycle,
unsigned long trace_log_address,
int trace_log_lane_id,
unsigned char trace_log_is_store,
int trace_log_size,
unsigned long trace_log_data,
unsigned char *trace_log_ready) {
extern "C" void
memtracelogger_log(int handle,
unsigned char trace_log_valid, unsigned long trace_log_cycle,
unsigned long trace_log_address, int trace_log_lane_id,
unsigned char trace_log_is_store, int trace_log_size,
unsigned long trace_log_data,
unsigned char *trace_log_ready) {
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
// trace_read_lane_id);
*trace_log_ready = 1;
@@ -77,8 +86,7 @@ extern "C" void memtracelogger_log(unsigned char trace_log_valid,
}
printf("%s: [%lu] valid: address=%lx, tid=%u, size=%d\n", __func__,
trace_log_cycle, trace_log_address, trace_log_lane_id,
trace_log_size);
trace_log_cycle, trace_log_address, trace_log_lane_id, trace_log_size);
MemTraceLine line{.valid = (trace_log_valid == 1),
.cycle = static_cast<long>(trace_log_cycle),
@@ -89,5 +97,7 @@ extern "C" void memtracelogger_log(unsigned char trace_log_valid,
.data = trace_log_data,
.log_data_size = trace_log_size};
assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle");
auto logger = loggers[handle].get();
logger->write_line_to_trace(line);
}

View File

@@ -0,0 +1,100 @@
// FIXME hardcoded
`define DATA_WIDTH 64
`define MAX_NUM_LANES 32
`define LOGSIZE_WIDTH 32
import "DPI-C" function int memtracelogger_init(
input bit is_response,
input string filename
);
// Make sure to sync the parameters for:
// (1) import "DPI-C" declaration
// (2) C function declaration
// (3) DPI function calls inside initial/always blocks
import "DPI-C" function void memtracelogger_log
(
input int handle,
input bit trace_log_valid,
input longint trace_log_cycle,
input longint trace_log_address,
input int trace_log_tid,
input bit trace_log_is_store,
input int trace_log_size,
input longint trace_log_data,
output bit trace_log_ready
);
module SimMemTraceLogger #(parameter
IS_RESPONSE = 0,
FILENAME = "undefined",
NUM_LANES = 4) (
input clock,
input reset,
// NOTE: LSB is lane 0
input [NUM_LANES-1:0] trace_log_valid,
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_address,
input [NUM_LANES-1:0] trace_log_is_store,
input [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_log_size,
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_data,
output trace_log_ready
);
int logger_handle;
bit __in_ready;
// cycle_counter will start off right after reset is deasserted which should
// synchronize itself with SimMemTrace.cycle_counter
reg [`DATA_WIDTH-1:0] cycle_counter;
wire [`DATA_WIDTH-1:0] next_cycle_counter;
assign next_cycle_counter = cycle_counter + 1'b1;
// wires going into the DPC
wire __valid [NUM_LANES-1:0];
wire [`DATA_WIDTH-1:0] __address [NUM_LANES-1:0];
wire __is_store [NUM_LANES-1:0];
wire [`LOGSIZE_WIDTH-1:0] __size [NUM_LANES-1:0];
wire [`DATA_WIDTH-1:0] __data [NUM_LANES-1:0];
assign trace_log_ready = __in_ready;
genvar g;
generate
for (g = 0; g < NUM_LANES; g = g + 1) begin
// LSB is lane 0
assign __valid[g] = trace_log_valid[g];
assign __address[g] = trace_log_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
assign __is_store[g] = trace_log_is_store[g];
assign __size[g] = trace_log_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g];
assign __data[g] = trace_log_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
end
endgenerate
initial begin
/* $value$plusargs("uartlog=%s", __uartlog); */
logger_handle = memtracelogger_init(IS_RESPONSE, FILENAME);
end
always @(posedge clock) begin
if (reset) begin
__in_ready = 1'b1;
cycle_counter <= `DATA_WIDTH'b0;
end else begin
cycle_counter <= next_cycle_counter;
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
memtracelogger_log(
logger_handle,
__valid[tid],
cycle_counter,
__address[tid],
tid,
__is_store[tid],
__size[tid],
__data[tid],
__in_ready
);
end
end
end
endmodule

View File

@@ -747,7 +747,11 @@ class SimMemTrace(filename: String, numLanes: Int)
addResource("/csrc/SimMemTrace.h")
}
class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.out.trace")(implicit
class MemTraceLogger(
numLanes: Int = 4,
reqFilename: String = "vecadd.core1.thread4.logger.req.trace",
respFilename: String = "vecadd.core1.thread4.logger.resp.trace"
)(implicit
p: Parameters
) extends LazyModule {
val node = TLIdentityNode()
@@ -775,98 +779,152 @@ class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4
lazy val module = new Impl
class Impl extends LazyModuleImp(this) {
val sim = Module(new SimMemTraceLogger(filename, numLanes))
sim.io.clock := clock
sim.io.reset := reset.asBool
val simReq = Module(new SimMemTraceLogger(false, reqFilename, numLanes))
val simResp = Module(new SimMemTraceLogger(true, respFilename, numLanes))
simReq.io.clock := clock
simReq.io.reset := reset.asBool
simResp.io.clock := clock
simResp.io.reset := reset.asBool
val laneReqs = Wire(Vec(numLanes, new TraceReq))
val laneResps = Wire(Vec(numLanes, new TraceReq))
assert(
numLanes == node.in.length,
"`numLanes` does not match the number of TL edges connected to the MemTraceLogger"
)
def tlAOpcodeIsStore(opcode: UInt): Bool = {
// 0: PutFullData, 1: PutPartialData but we don't support it
// 4: Get
assert(opcode === 0.U || opcode === 4.U, "unhandled TL A opcode found")
opcode === 0.U
}
def tlDOpcodeIsStore(opcode: UInt): Bool = {
// 0: AccessAck (Put), 1: AccessAckData (Get or Atomic)
// See Table 13 of spec 1.8.1
assert(opcode === 0.U || opcode === 1.U, "unhandled TL D opcode found")
opcode === 0.U
}
// snoop on the TileLink edges to log traffic
((node.in zip node.out) zip laneReqs).foreach { case (((tlIn, _), (tlOut, _)), req) =>
tlOut.a <> tlIn.a
tlIn.d <> tlOut.d
((node.in zip node.out) zip (laneReqs zip laneResps)).foreach {
case (((tlIn, _), (tlOut, _)), (req, resp)) =>
tlOut.a <> tlIn.a
tlIn.d <> tlOut.d
// requests on TL A channel
//
req.valid := tlIn.a.valid
req.size := tlIn.a.bits.size
def tlOpcodeIsStore(opcode: UInt): Bool = {
// 0: PutFullData, 1: PutPartialData but we don't support it
// 4: Get
assert(opcode === 0.U || opcode === 4.U, "unhandled TL opcode found in MemTraceLogger")
tlIn.a.bits.opcode === 0.U
}
req.is_store := tlOpcodeIsStore(tlIn.a.bits.opcode)
// TL always carries the exact unaligned address that the client
// originally requested, so no postprocessing required
req.address := tlIn.a.bits.address
// requests on TL A channel
//
req.valid := tlIn.a.valid
req.size := tlIn.a.bits.size
req.is_store := tlAOpcodeIsStore(tlIn.a.bits.opcode)
// TL always carries the exact unaligned address that the client
// originally requested, so no postprocessing required
req.address := tlIn.a.bits.address
// TL data
//
// When tlIn.a.bits.size is smaller than the data bus width, need to
// figure out which byte lanes we actually accessed so that
// we can write that to the memory trace.
// See Section 4.5 Byte Lanes in spec 1.8.1
// TL data
//
// When tlIn.a.bits.size is smaller than the data bus width, need to
// figure out which byte lanes we actually accessed so that
// we can write that to the memory trace.
// See Section 4.5 Byte Lanes in spec 1.8.1
// This assert only holds true for PutFullData and not PutPartialData,
// where HIGH bits in the mask may not be contiguous.
assert(
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
"mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic"
)
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
val mask = ~((~0.U) << (trailingZerosInMask * 8.U))
req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
when(req.valid) {
TracePrintf(
"MemTraceLogger",
tlIn.a.bits.address,
tlIn.a.bits.size,
tlIn.a.bits.mask,
req.is_store,
tlIn.a.bits.data,
req.data
// This assert only holds true for PutFullData and not PutPartialData,
// where HIGH bits in the mask may not be contiguous.
assert(
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
"mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic"
)
}
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
val mask = ~((~0.U) << (trailingZerosInMask * 8.U))
req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
// responses on TL D channel
// TODO
when(req.valid) {
TracePrintf(
"MemTraceLogger",
tlIn.a.bits.address,
tlIn.a.bits.size,
tlIn.a.bits.mask,
req.is_store,
tlIn.a.bits.data,
req.data
)
}
// responses on TL D channel
//
resp.valid := tlOut.d.valid
resp.size := tlOut.d.bits.size
resp.is_store := tlDOpcodeIsStore(tlOut.d.bits.opcode)
// NOTE: TL D channel doesn't carry address nor mask, so there's no easy
// way to figure out which bytes the master actually use. Since we
// don't care too much about addresses in the trace anyway, just store
// the entire bits.
resp.address := 0.U
resp.data := tlOut.d.bits.data
}
// clunky workaround of the fact that Chisel doesn't allow partial
// assignment to a bitfield range of a wide signal.
val laneValid = Wire(Vec(numLanes, Bool()))
val laneAddress = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).address)))
val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).is_store)))
val laneSize = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).size)))
val laneData = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).data)))
laneReqs.zipWithIndex.foreach { case (req, i) =>
laneValid(i) := req.valid
laneAddress(i) := req.address
laneIsStore(i) := req.is_store
laneSize(i) := req.size
laneData(i) := req.data
def flattenTrace(traceLogIO: Bundle with HasTraceReq, perLane: Vec[TraceReq]) = {
val laneValid = Wire(Vec(numLanes, Bool()))
val laneAddress = Wire(Vec(numLanes, chiselTypeOf(perLane(0).address)))
val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(perLane(0).is_store)))
val laneSize = Wire(Vec(numLanes, chiselTypeOf(perLane(0).size)))
val laneData = Wire(Vec(numLanes, chiselTypeOf(perLane(0).data)))
perLane.zipWithIndex.foreach { case (req, i) =>
laneValid(i) := req.valid
laneAddress(i) := req.address
laneIsStore(i) := req.is_store
laneSize(i) := req.size
laneData(i) := req.data
}
// flatten per-lane signals to the Verilog blackbox input
traceLogIO.valid := laneValid.asUInt
traceLogIO.address := laneAddress.asUInt
traceLogIO.is_store := laneIsStore.asUInt
traceLogIO.size := laneSize.asUInt
traceLogIO.data := laneData.asUInt
}
// flatten per-lane signals to the Verilog blackbox input
sim.io.trace_log.valid := laneValid.asUInt
sim.io.trace_log.address := laneAddress.asUInt
sim.io.trace_log.is_store := laneIsStore.asUInt
sim.io.trace_log.size := laneSize.asUInt
sim.io.trace_log.data := laneData.asUInt
assert(sim.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready")
flattenTrace(simReq.io.trace_log, laneReqs)
flattenTrace(simResp.io.trace_log, laneResps)
assert(simReq.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready")
assert(simResp.io.trace_log.ready === true.B, "MemTraceLogger is expected to be always ready")
// val laneValid = Wire(Vec(numLanes, Bool()))
// val laneAddress = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).address)))
// val laneIsStore = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).is_store)))
// val laneSize = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).size)))
// val laneData = Wire(Vec(numLanes, chiselTypeOf(laneReqs(0).data)))
// laneReqs.zipWithIndex.foreach { case (req, i) =>
// laneValid(i) := req.valid
// laneAddress(i) := req.address
// laneIsStore(i) := req.is_store
// laneSize(i) := req.size
// laneData(i) := req.data
// }
// // flatten per-lane signals to the Verilog blackbox input
// simReq.io.trace_log.valid := laneValid.asUInt
// simReq.io.trace_log.address := laneAddress.asUInt
// simReq.io.trace_log.is_store := laneIsStore.asUInt
// simReq.io.trace_log.size := laneSize.asUInt
// simReq.io.trace_log.data := laneData.asUInt
}
}
class SimMemTraceLogger(filename: String, numLanes: Int)
// MemTraceLogger is bidirectional. The DPI module tells itself if it's logging
// the request stream or the response stream by `isResponse`. This distinction
// is needed because the response trace file will not contain certain columns
// such as address.
class SimMemTraceLogger(isResponse: Boolean, filename: String, numLanes: Int)
extends BlackBox(
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
Map(
"IS_RESPONSE" -> (if (isResponse) 1 else 0),
"FILENAME" -> filename,
"NUM_LANES" -> numLanes
)
)
with HasBlackBoxResource {
val io = IO(new Bundle {