Fix size parsing from memtrace

This commit is contained in:
Hansung Kim
2023-04-11 17:36:45 -07:00
parent 62f940618e
commit 71f334bb22
4 changed files with 51 additions and 46 deletions

View File

@@ -75,8 +75,8 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle,
// read it right now.
return MemTraceLine{};
} else if (line.cycle == cycle && line.lane_id == lane_id) {
printf("fire! cycle=%ld, valid=%d, %s addr=%x \n", cycle, line.valid,
line.loadstore, line.address);
printf("fire! cycle=%ld, valid=%d, %s addr=%lx, size=%d \n", cycle, line.valid,
line.loadstore, line.address, line.data_size);
// FIXME! Currently lane_id is assumed to be in round-robin order, e.g.
// 0->1->2->3->0->..., both in the trace file and the order the caller calls
@@ -119,11 +119,11 @@ extern "C" void memtrace_init(const char *filename) {
// TODO: accept core_id as well
extern "C" void memtrace_query(unsigned char trace_read_ready,
unsigned long trace_read_cycle,
int trace_read_lane_id,
int trace_read_lane_id,
unsigned char *trace_read_valid,
unsigned long *trace_read_address,
unsigned char *trace_read_is_store,
int *trace_read_store_mask,
int *trace_read_size,
unsigned long *trace_read_data,
unsigned char *trace_read_finished) {
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
@@ -136,8 +136,8 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id);
*trace_read_valid = line.valid;
*trace_read_address = line.address;
*trace_read_is_store = strcmp(line.loadstore, "STORE") == 0 ;
*trace_read_store_mask = line.data_size;
*trace_read_is_store = (strcmp(line.loadstore, "STORE") == 0);
*trace_read_size = line.data_size;
*trace_read_data = line.data;
// This means finished and valid will go up at the same cycle. Need to
// handle this without skipping the last line.

View File

@@ -34,11 +34,10 @@ public:
extern "C" void memtrace_init(const char *filename);
extern "C" void memtrace_query(unsigned char trace_read_ready,
unsigned long trace_read_cycle,
int trace_read_lane_id,
int trace_read_lane_id,
unsigned char *trace_read_valid,
unsigned long *trace_read_address,
unsigned char *trace_read_is_store,
int *trace_read_store_mask,
int *trace_read_size,
unsigned long *trace_read_data,
unsigned char *trace_read_finished
);
unsigned char *trace_read_finished);

View File

@@ -1,6 +1,6 @@
`define DATA_WIDTH 64
`define MAX_NUM_LANES 32
`define MASK_WIDTH 8
`define SIZE_WIDTH 32
import "DPI-C" function void memtrace_init(
input string filename
@@ -18,31 +18,31 @@ import "DPI-C" function void memtrace_query
output bit trace_read_valid,
output longint trace_read_address,
output bit trace_read_is_store,
output int trace_read_store_mask,
output int trace_read_size,
output longint trace_read_data,
output bit trace_read_finished
);
module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
input clock,
input reset,
input clock,
input reset,
// These have to match the IO port of the Chisel wrapper module.
input trace_read_ready,
output [NUM_LANES-1:0] trace_read_valid,
input trace_read_ready,
output [NUM_LANES-1:0] trace_read_valid,
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_address,
output [NUM_LANES-1:0] trace_read_is_store,
output [NUM_LANES*`MASK_WIDTH-1:0] trace_read_store_mask,
output [NUM_LANES-1:0] trace_read_is_store,
output [`SIZE_WIDTH*NUM_LANES-1:0] trace_read_size,
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data,
output trace_read_finished
output trace_read_finished
);
bit __in_valid[NUM_LANES-1:0];
longint __in_address[NUM_LANES-1:0];
bit __in_valid [NUM_LANES-1:0];
longint __in_address [NUM_LANES-1:0];
bit __in_is_store[NUM_LANES-1:0];
logic [`MASK_WIDTH-1:0] __in_store_mask [NUM_LANES-1:0];
longint __in_data[NUM_LANES-1:0];
bit __in_is_store [NUM_LANES-1:0];
int __in_size [NUM_LANES-1:0];
longint __in_data [NUM_LANES-1:0];
bit __in_finished;
string __uartlog;
@@ -54,13 +54,13 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
assign next_cycle_counter = cycle_counter + 1'b1;
// registers that stage outputs of the C parser
reg [NUM_LANES-1:0] __in_valid_reg;
reg [NUM_LANES-1:0] __in_valid_reg;
reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_LANES-1:0];
reg [NUM_LANES-1:0] __in_is_store_reg;
reg [`MASK_WIDTH-1:0] __in_store_mask_reg [NUM_LANES-1:0];
reg [NUM_LANES-1:0] __in_is_store_reg;
int __in_size_reg [NUM_LANES-1:0];
reg [`DATA_WIDTH-1:0] __in_data_reg [NUM_LANES-1:0];
reg __in_finished_reg;
reg __in_finished_reg;
genvar g;
@@ -70,7 +70,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g];
assign trace_read_is_store[g] = __in_is_store_reg[g];
assign trace_read_store_mask[`MASK_WIDTH*(g+1)-1:`MASK_WIDTH*g] = __in_store_mask_reg[g];
assign trace_read_size[`SIZE_WIDTH*(g+1)-1:`SIZE_WIDTH*g] = __in_size_reg[g];
assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data_reg[g];
end
endgenerate
@@ -83,15 +83,13 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
// Evaluate the signals on the positive edge
always @(posedge clock) begin
// Setting reset value
if (reset) begin
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
__in_valid[tid] = 1'b0;
__in_address[tid] = `DATA_WIDTH'b0;
__in_is_store[tid] = 1'b0;
__in_store_mask[tid] = `MASK_WIDTH'b0;
__in_size[tid] = `SIZE_WIDTH'b0;
__in_data[tid] = `DATA_WIDTH'b0;
end
@@ -105,7 +103,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
__in_address_reg[tid] <= `DATA_WIDTH'b0;
__in_is_store_reg[tid] = 1'b0;
__in_store_mask_reg[tid] = `MASK_WIDTH'b0;
__in_size_reg[tid] = `SIZE_WIDTH'b0;
__in_data_reg[tid] = `DATA_WIDTH'b0;
end
@@ -127,7 +125,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
__in_address[tid],
__in_is_store[tid],
__in_store_mask[tid],
__in_size[tid],
__in_data[tid],
__in_finished
@@ -140,7 +138,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
__in_address_reg[tid] <= __in_address[tid];
__in_is_store_reg[tid] <= __in_is_store[tid];
__in_store_mask_reg[tid] <= __in_store_mask[tid];
__in_size_reg[tid] <= __in_size[tid];
__in_data_reg[tid] <= __in_data[tid];
end
__in_finished_reg <= __in_finished;

View File

@@ -587,7 +587,6 @@ class CoalShiftQueue[T <: Data](
class MemTraceDriver(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.trace")(implicit
p: Parameters
) extends LazyModule {
// Create N client nodes together
val laneNodes = Seq.tabulate(numLanes) { i =>
val clientParam = Seq(
@@ -612,7 +611,7 @@ class TraceReq extends Bundle {
val valid = Bool()
val address = UInt(64.W)
val is_store = Bool()
val mask = UInt(8.W)
val size = UInt(32.W)
val data = UInt(64.W)
}
@@ -634,7 +633,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
req.valid := sim.io.trace_read.valid(i)
req.address := sim.io.trace_read.address(64 * i + 63, 64 * i)
req.is_store := sim.io.trace_read.is_store(i)
req.mask := sim.io.trace_read.store_mask(8 * i + 7, 8 * i)
req.size := sim.io.trace_read.size(32 * i + 31, 32 * i)
printf("========= req.size=%d\n", req.size)
req.data := sim.io.trace_read.data(64 * i + 63, 64 * i)
}
@@ -655,16 +655,17 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
(outer.laneNodes zip laneReqs).foreach { case (node, req) =>
val (tlOut, edge) = node.out(0)
val size = 4.U // TODO: get proper size from the trace
val (plegal, pbits) = edge.Put(
fromSource = sourceIdCounter,
toAddress = hashToValidPhyAddr(req.address),
lgSize = 3.U,
lgSize = Log2(size),
data = req.data
)
val (glegal, gbits) = edge.Get(
fromSource = sourceIdCounter,
toAddress = hashToValidPhyAddr(req.address),
lgSize = 3.U
lgSize = Log2(size),
)
val legal = Mux(req.is_store, plegal, glegal)
val bits = Mux(req.is_store, pbits, gbits)
@@ -677,6 +678,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
tlOut.d.ready := true.B
tlOut.e.valid := false.B
println(s"======= MemTraceDriver: TL data width: ${tlOut.params.dataBits}")
dontTouch(tlOut.a)
dontTouch(tlOut.d)
}
@@ -714,7 +717,7 @@ class SimMemTrace(filename: String, numLanes: Int)
// TODO: assumes 64-bit address.
val address = Output(UInt((64 * numLanes).W))
val is_store = Output(UInt(numLanes.W))
val store_mask = Output(UInt((8 * numLanes).W))
val size = Output(UInt((32 * numLanes).W))
val data = Output(UInt((64 * numLanes).W))
val finished = Output(Bool())
}
@@ -762,7 +765,8 @@ class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4
req.address := tlIn.a.bits.address
req.data := tlIn.a.bits.data
req.is_store := false.B // FIXME: take is_store from TL
req.mask := tlIn.a.bits.mask
req.size := tlIn.a.bits.size
printf("========= logger: req.size=%d\n", tlIn.a.bits.size)
}
val laneValid = Wire(Vec(numLanes, Bool()))
@@ -794,7 +798,7 @@ class SimMemTraceLogger(filename: String, numLanes: Int)
// val ready = Output(Bool())
// TODO: assumes 64-bit address.
// val is_store = Output(UInt(numLanes.W))
// val store_mask = Output(UInt((8 * numLanes).W))
// val size = Output(UInt((8 * numLanes).W))
// val data = Output(UInt((64 * numLanes).W))
// val finished = Output(Bool())
}
@@ -805,7 +809,7 @@ class SimMemTraceLogger(filename: String, numLanes: Int)
addResource("/csrc/SimMemTraceLogger.h")
}
// synthesizable unit tests
// Synthesizable unit tests
// tracedriver --> coalescer --> tracelogger --> tlram
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
@@ -816,7 +820,9 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
val logger = LazyModule(new MemTraceLogger(numLanes))
val rams = Seq.fill(numLanes)( // +1 for coalesced edge
LazyModule(
// FIXME: properly propagate beatBytes?
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
)
)
@@ -847,7 +853,9 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
val driver = LazyModule(new MemTraceDriver(numLanes))
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
LazyModule(
// FIXME: properly propagate beatBytes?
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
)
)