somehow merged

This commit is contained in:
Richard Yan
2023-04-22 21:56:57 -07:00
7 changed files with 946 additions and 229 deletions

View File

@@ -3,12 +3,16 @@
#include <svdpi.h>
#endif
#include <string>
#include <string.h>
#include <string.h>
#include <cstdio>
#include <cmath>
#include <cassert>
#include <unistd.h>
#include "SimMemTrace.h"
// Global singleton instance
static std::unique_ptr<MemTraceReader> reader;
MemTraceReader::MemTraceReader(const std::string &filename) {
char cwd[4096];
if (getcwd(cwd, sizeof(cwd))) {
@@ -34,10 +38,21 @@ void MemTraceReader::parse() {
printf("MemTraceReader: started parsing\n");
while (infile >> line.cycle >> line.loadstore >> line.core_id >>
long size = 0;
std::string loadstore; // FIXME: likely slow
while (infile >> line.cycle >> loadstore >> line.core_id >>
line.lane_id >> std::hex >> line.address >> line.data >> std::dec >>
line.data_size) {
size) {
line.valid = true;
line.is_store = (loadstore == "STORE");
assert(size > 0 && "invalid size in trace");
int lgsize = static_cast<int>(log2(size));
assert((size & ~(~0lu << lgsize)) == 0 &&
"non-power-of-2 size detected in trace");
line.log_data_size = lgsize;
trace.push_back(line);
}
read_pos = trace.cbegin();
@@ -75,8 +90,9 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle,
// read it right now.
return MemTraceLine{};
} else if (line.cycle == cycle && line.lane_id == lane_id) {
printf("fire! cycle=%ld, valid=%d, %s addr=%x \n", cycle, line.valid,
line.loadstore, line.address);
printf("fire! cycle=%ld, valid=%d, %s addr=%lx, size=%d \n", cycle,
line.valid, (line.is_store ? "STORE" : "LOAD"), line.address,
line.log_data_size);
// FIXME! Currently lane_id is assumed to be in round-robin order, e.g.
// 0->1->2->3->0->..., both in the trace file and the order the caller calls
@@ -119,11 +135,11 @@ extern "C" void memtrace_init(const char *filename) {
// TODO: accept core_id as well
extern "C" void memtrace_query(unsigned char trace_read_ready,
unsigned long trace_read_cycle,
int trace_read_lane_id,
int trace_read_lane_id,
unsigned char *trace_read_valid,
unsigned long *trace_read_address,
unsigned char *trace_read_is_store,
int *trace_read_store_mask,
int *trace_read_size,
unsigned long *trace_read_data,
unsigned char *trace_read_finished) {
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
@@ -136,8 +152,8 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id);
*trace_read_valid = line.valid;
*trace_read_address = line.address;
*trace_read_is_store = strcmp(line.loadstore, "STORE") == 0 ;
*trace_read_store_mask = line.data_size;
*trace_read_is_store = line.is_store;
*trace_read_size = line.log_data_size;
*trace_read_data = line.data;
// This means finished and valid will go up at the same cycle. Need to
// handle this without skipping the last line.

View File

@@ -2,20 +2,16 @@
#include <memory>
#include <fstream>
class MemTraceReader;
// Global singleton instance of MemTraceReader
static std::unique_ptr<MemTraceReader> reader;
struct MemTraceLine {
bool valid = false;
long cycle = 0;
char loadstore[10];
int core_id = 0;
int lane_id = 0;
int source = 0;
unsigned long address = 0;
bool is_store = 0;
unsigned long data = 0;
int data_size = 0;
int log_data_size = 0;
};
class MemTraceReader {
@@ -31,14 +27,34 @@ public:
std::vector<MemTraceLine>::const_iterator read_pos;
};
class MemTraceWriter {
public:
MemTraceWriter(const bool is_response, const std::string &filename);
~MemTraceWriter();
void write_line_to_trace(const MemTraceLine line);
bool is_response;
FILE *outfile;
};
extern "C" void memtrace_init(const char *filename);
extern "C" void memtrace_query(unsigned char trace_read_ready,
unsigned long trace_read_cycle,
int trace_read_lane_id,
int trace_read_lane_id,
unsigned char *trace_read_valid,
unsigned long *trace_read_address,
unsigned char *trace_read_is_store,
int *trace_read_store_mask,
int *trace_read_size,
unsigned long *trace_read_data,
unsigned char *trace_read_finished
);
unsigned char *trace_read_finished);
extern "C" int memtracelogger_init(int is_response, const char *filename);
extern "C" void memtracelogger_log(int handle,
unsigned char trace_log_valid,
unsigned long trace_log_cycle,
int trace_log_lane_id,
int trace_log_source,
unsigned long trace_log_address,
unsigned char trace_log_is_store,
int trace_log_size,
unsigned long trace_log_data,
unsigned char *trace_log_ready);

View File

@@ -0,0 +1,107 @@
#ifndef NO_VPI
#include <svdpi.h>
#include <vpi_user.h>
#endif
#include "SimMemTrace.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <unistd.h>
// Contains handle for every logger that is instantiated per Verilog module
// instance
static std::vector<std::unique_ptr<MemTraceWriter>> loggers;
MemTraceWriter::MemTraceWriter(const bool is_response,
const std::string &filename) {
this->is_response = is_response;
char cwd[4096];
if (getcwd(cwd, sizeof(cwd))) {
printf("MemTraceWriter: current working dir: %s\n", cwd);
}
outfile = fopen(filename.c_str(), "w");
if (!outfile) {
fprintf(stderr, "failed to open file %s\n", filename.c_str());
}
}
MemTraceWriter::~MemTraceWriter() {
fclose(outfile);
printf("MemTraceWriter destroyed\n");
}
void MemTraceWriter::write_line_to_trace(const MemTraceLine line) {
fprintf(outfile, "%ld %s %d %d %d 0x%lx 0x%lx %u\n", line.cycle,
(line.is_store ? "STORE" : "LOAD"), line.core_id, line.lane_id,
line.source, line.address, line.data, (1u << line.log_data_size));
}
// Returns the "handle" ID for this particular logger instance.
extern "C" int memtracelogger_init(int is_response, const char *filename) {
#ifndef NO_VPI
s_vpi_vlog_info info;
if (!vpi_get_vlog_info(&info)) {
fprintf(stderr, "fatal: failed to get plusargs from VCS\n");
exit(1);
}
const char *TRACEFILENAME_PLUSARG = "+memtracefile=";
for (int i = 0; i < info.argc; i++) {
char *input_arg = info.argv[i];
if (strncmp(input_arg, TRACEFILENAME_PLUSARG,
strlen(TRACEFILENAME_PLUSARG)) == 0) {
filename = input_arg + strlen(TRACEFILENAME_PLUSARG);
break;
}
}
#endif
int handle = loggers.size();
loggers.emplace_back(std::make_unique<MemTraceWriter>(is_response, filename));
printf("memtracelogger_init: handle=%d, is_response=%d, filename=[%s]\n",
handle, is_response, filename);
return handle;
}
// This is used to log both TileLink A and D channels.
// TODO: accept core_id as well
extern "C" void memtracelogger_log(int handle,
unsigned char trace_log_valid,
unsigned long trace_log_cycle,
int trace_log_lane_id,
int trace_log_source,
unsigned long trace_log_address,
unsigned char trace_log_is_store,
int trace_log_size,
unsigned long trace_log_data,
unsigned char *trace_log_ready) {
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
// trace_read_lane_id);
*trace_log_ready = 1;
if (!trace_log_valid) {
return;
}
// printf("%s: [%lu] valid: address=%lx, tid=%u, size=%d\n", __func__,
// trace_log_cycle, trace_log_address, trace_log_lane_id, trace_log_size);
MemTraceLine line{.valid = (trace_log_valid == 1),
.cycle = static_cast<long>(trace_log_cycle),
.core_id = 0, // TODO support multicores
.lane_id = trace_log_lane_id,
.source = trace_log_source,
.address = trace_log_address,
.is_store = (trace_log_is_store == 1),
.data = trace_log_data,
.log_data_size = trace_log_size};
assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle");
auto logger = loggers[handle].get();
logger->write_line_to_trace(line);
}

View File

@@ -1,6 +1,7 @@
// FIXME hardcoded
`define DATA_WIDTH 64
`define MAX_NUM_LANES 32
`define MASK_WIDTH 8
`define LOGSIZE_WIDTH 8
import "DPI-C" function void memtrace_init(
input string filename
@@ -14,35 +15,34 @@ import "DPI-C" function void memtrace_query
(
input bit trace_read_ready,
input longint trace_read_cycle,
input int trace_read_tid,
input int trace_read_lane_id,
output bit trace_read_valid,
output longint trace_read_address,
output bit trace_read_is_store,
output int trace_read_store_mask,
output int trace_read_size,
output longint trace_read_data,
output bit trace_read_finished
);
module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
input clock,
input reset,
input clock,
input reset,
// These have to match the IO port of the Chisel wrapper module.
input trace_read_ready,
output [NUM_LANES-1:0] trace_read_valid,
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_address,
output [NUM_LANES-1:0] trace_read_is_store,
output [NUM_LANES*`MASK_WIDTH-1:0] trace_read_store_mask,
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data,
output trace_read_finished
// These have to match the IO port name of the Chisel wrapper module.
input trace_read_ready,
output [NUM_LANES-1:0] trace_read_valid,
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_address,
output [NUM_LANES-1:0] trace_read_is_store,
output [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_read_size,
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data,
output trace_read_finished
);
bit __in_valid[NUM_LANES-1:0];
longint __in_address[NUM_LANES-1:0];
bit __in_valid [NUM_LANES-1:0];
longint __in_address [NUM_LANES-1:0];
bit __in_is_store[NUM_LANES-1:0];
int __in_store_mask [NUM_LANES-1:0];
longint __in_data[NUM_LANES-1:0];
bit __in_is_store [NUM_LANES-1:0];
reg [`LOGSIZE_WIDTH-1:0] __in_size [NUM_LANES-1:0];
longint __in_data [NUM_LANES-1:0];
bit __in_finished;
string __uartlog;
@@ -54,13 +54,13 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
assign next_cycle_counter = cycle_counter + 1'b1;
// registers that stage outputs of the C parser
reg [NUM_LANES-1:0] __in_valid_reg;
reg [NUM_LANES-1:0] __in_valid_reg;
reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_LANES-1:0];
reg [NUM_LANES-1:0] __in_is_store_reg;
reg [`MASK_WIDTH-1:0] __in_store_mask_reg [NUM_LANES-1:0];
reg [`DATA_WIDTH-1:0] __in_data_reg [NUM_LANES-1:0];
reg __in_finished_reg;
reg [NUM_LANES-1:0] __in_is_store_reg;
reg [`LOGSIZE_WIDTH-1:0] __in_size_reg [NUM_LANES-1:0];
reg [`DATA_WIDTH-1:0] __in_data_reg [NUM_LANES-1:0];
reg __in_finished_reg;
genvar g;
@@ -70,7 +70,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g];
assign trace_read_is_store[g] = __in_is_store_reg[g];
assign trace_read_store_mask[`MASK_WIDTH*(g+1)-1:`MASK_WIDTH*g] = __in_store_mask_reg[g];
assign trace_read_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g] = __in_size_reg[g];
assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data_reg[g];
end
endgenerate
@@ -81,17 +81,14 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
memtrace_init(FILENAME);
end
// Evaluate the signals on the positive edge
always @(posedge clock) begin
// Setting reset value
if (reset) begin
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
__in_valid[tid] = 1'b0;
__in_address[tid] = `DATA_WIDTH'b0;
__in_is_store[tid] = 1'b0;
__in_store_mask[tid] = `MASK_WIDTH'b0;
__in_size[tid] = `LOGSIZE_WIDTH'b0;
__in_data[tid] = `DATA_WIDTH'b0;
end
@@ -105,7 +102,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
__in_address_reg[tid] <= `DATA_WIDTH'b0;
__in_is_store_reg[tid] = 1'b0;
__in_store_mask_reg[tid] = `MASK_WIDTH'b0;
__in_size_reg[tid] = `LOGSIZE_WIDTH'b0;
__in_data_reg[tid] = `DATA_WIDTH'b0;
end
@@ -127,7 +124,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
__in_address[tid],
__in_is_store[tid],
__in_store_mask[tid],
__in_size[tid],
__in_data[tid],
__in_finished
@@ -140,7 +137,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
__in_address_reg[tid] <= __in_address[tid];
__in_is_store_reg[tid] <= __in_is_store[tid];
__in_store_mask_reg[tid] <= __in_store_mask[tid];
__in_size_reg[tid] <= __in_size[tid];
__in_data_reg[tid] <= __in_data[tid];
end
__in_finished_reg <= __in_finished;

View File

@@ -0,0 +1,106 @@
// FIXME hardcoded
`define DATA_WIDTH 64
`define MAX_NUM_LANES 32
`define SOURCEID_WIDTH 32
`define LOGSIZE_WIDTH 8
import "DPI-C" function int memtracelogger_init(
input bit is_response,
input string filename
);
// Make sure to sync the parameters for:
// (1) import "DPI-C" declaration
// (2) C function declaration
// (3) DPI function calls inside initial/always blocks
import "DPI-C" function void memtracelogger_log
(
input int handle,
input bit trace_log_valid,
input longint trace_log_cycle,
input int trace_log_lane_id,
input int trace_log_source,
input longint trace_log_address,
input bit trace_log_is_store,
input int trace_log_size,
input longint trace_log_data,
output bit trace_log_ready
);
module SimMemTraceLogger #(parameter
IS_RESPONSE = 0,
FILENAME = "undefined",
NUM_LANES = 4) (
input clock,
input reset,
// NOTE: LSB is lane 0
input [NUM_LANES-1:0] trace_log_valid,
input [`SOURCEID_WIDTH*NUM_LANES-1:0] trace_log_source,
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_address,
input [NUM_LANES-1:0] trace_log_is_store,
input [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_log_size,
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_data,
output trace_log_ready
);
int logger_handle;
bit __in_ready;
// cycle_counter will start off right after reset is deasserted which should
// synchronize itself with SimMemTrace.cycle_counter
reg [`DATA_WIDTH-1:0] cycle_counter;
wire [`DATA_WIDTH-1:0] next_cycle_counter;
assign next_cycle_counter = cycle_counter + 1'b1;
// wires going into the DPC
wire __valid [NUM_LANES-1:0];
wire [`SOURCEID_WIDTH-1:0] __source [NUM_LANES-1:0];
wire [`DATA_WIDTH-1:0] __address [NUM_LANES-1:0];
wire __is_store [NUM_LANES-1:0];
wire [`LOGSIZE_WIDTH-1:0] __size [NUM_LANES-1:0];
wire [`DATA_WIDTH-1:0] __data [NUM_LANES-1:0];
assign trace_log_ready = __in_ready;
genvar g;
generate
for (g = 0; g < NUM_LANES; g = g + 1) begin
// LSB is lane 0
assign __valid[g] = trace_log_valid[g];
assign __source[g] = trace_log_source[`SOURCEID_WIDTH*(g+1)-1:`SOURCEID_WIDTH*g];
assign __address[g] = trace_log_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
assign __is_store[g] = trace_log_is_store[g];
assign __size[g] = trace_log_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g];
assign __data[g] = trace_log_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
end
endgenerate
initial begin
/* $value$plusargs("uartlog=%s", __uartlog); */
logger_handle = memtracelogger_init(IS_RESPONSE, FILENAME);
end
always @(posedge clock) begin
if (reset) begin
__in_ready = 1'b1;
cycle_counter <= `DATA_WIDTH'b0;
end else begin
cycle_counter <= next_cycle_counter;
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
memtracelogger_log(
logger_handle,
__valid[tid],
cycle_counter,
tid,
__source[tid],
__address[tid],
__is_store[tid],
__size[tid],
__data[tid],
__in_ready
);
end
end
end
endmodule

File diff suppressed because it is too large Load Diff

View File

@@ -87,7 +87,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
c.io.invalidate.poke(0.U)
// prepare
c.io.deq.ready.poke(false.B)
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
@@ -113,6 +113,45 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
}
}
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
c.io.invalidate.poke(0.U)
// prepare
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.clock.step()
// enqueue and dequeue simultaneously
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U)
c.clock.step()
// enqueue and dequeue simultaneously once more
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U)
c.clock.step()
// dequeueing back-to-back should work without any holes in the middle
c.io.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U)
c.clock.step()
// make sure is empty
c.io.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B)
c.io.deq.valid.expect(false.B)
}
}
it should "invalidate head being dequeued" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U)
@@ -216,6 +255,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
val numLanes = 4
val numPerLaneReqs = 2
val sourceWidth = 2
val sizeWidth = 2
// 16B coalescing size
val coalDataWidth = 128
val numInflightCoalRequests = 4
@@ -226,8 +266,9 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
numLanes,
numPerLaneReqs,
sourceWidth,
sizeWidth,
coalDataWidth,
numInflightCoalRequests
numInflightCoalRequests,
)
)
// vcs helps with simulation time, but sometimes errors with
@@ -238,15 +279,19 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
c.io.coalReqValid.poke(true.B)
c.io.newEntry.source.poke(sourceId)
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(0).source.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U)
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(1).size.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(0).source.poke(1.U)
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U)
c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(1).source.poke(2.U)
c.io.newEntry.lanes(2).reqs(1).offset.poke(0.U)
c.io.newEntry.lanes(2).reqs(1).size.poke(2.U)
@@ -268,13 +313,13 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
c.io.uncoalResps(3)(0).valid.expect(false.B)
c.io.uncoalResps(0)(0).bits.data.expect(0x89abcdefL.U)
c.io.uncoalResps(0)(0).bits.source.expect(0.U)
c.io.uncoalResps(0)(0).bits.source.expect(1.U)
c.io.uncoalResps(0)(1).bits.data.expect(0x89abcdefL.U)
c.io.uncoalResps(0)(1).bits.source.expect(0.U)
c.io.uncoalResps(0)(1).bits.source.expect(2.U)
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(2)(0).bits.source.expect(0.U)
c.io.uncoalResps(2)(0).bits.source.expect(1.U)
c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U)
c.io.uncoalResps(2)(1).bits.source.expect(0.U)
c.io.uncoalResps(2)(1).bits.source.expect(2.U)
}
}
}