From 71f334bb229c96d35545d047d6a16430ef4624e8 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 11 Apr 2023 17:36:45 -0700 Subject: [PATCH] Fix size parsing from memtrace --- src/main/resources/csrc/SimMemTrace.cc | 12 +++--- src/main/resources/csrc/SimMemTrace.h | 7 ++-- src/main/resources/vsrc/SimMemTrace.v | 48 ++++++++++++------------ src/main/scala/tilelink/Coalescing.scala | 30 +++++++++------ 4 files changed, 51 insertions(+), 46 deletions(-) diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 5e29d57..0e3274b 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -75,8 +75,8 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, // read it right now. return MemTraceLine{}; } else if (line.cycle == cycle && line.lane_id == lane_id) { - printf("fire! cycle=%ld, valid=%d, %s addr=%x \n", cycle, line.valid, - line.loadstore, line.address); + printf("fire! cycle=%ld, valid=%d, %s addr=%lx, size=%d \n", cycle, line.valid, + line.loadstore, line.address, line.data_size); // FIXME! Currently lane_id is assumed to be in round-robin order, e.g. // 0->1->2->3->0->..., both in the trace file and the order the caller calls @@ -119,11 +119,11 @@ extern "C" void memtrace_init(const char *filename) { // TODO: accept core_id as well extern "C" void memtrace_query(unsigned char trace_read_ready, unsigned long trace_read_cycle, - int trace_read_lane_id, + int trace_read_lane_id, unsigned char *trace_read_valid, unsigned long *trace_read_address, unsigned char *trace_read_is_store, - int *trace_read_store_mask, + int *trace_read_size, unsigned long *trace_read_data, unsigned char *trace_read_finished) { // printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle, @@ -136,8 +136,8 @@ extern "C" void memtrace_query(unsigned char trace_read_ready, auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id); *trace_read_valid = line.valid; *trace_read_address = line.address; - *trace_read_is_store = strcmp(line.loadstore, "STORE") == 0 ; - *trace_read_store_mask = line.data_size; + *trace_read_is_store = (strcmp(line.loadstore, "STORE") == 0); + *trace_read_size = line.data_size; *trace_read_data = line.data; // This means finished and valid will go up at the same cycle. Need to // handle this without skipping the last line. diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index b046fcc..033fc7e 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -34,11 +34,10 @@ public: extern "C" void memtrace_init(const char *filename); extern "C" void memtrace_query(unsigned char trace_read_ready, unsigned long trace_read_cycle, - int trace_read_lane_id, + int trace_read_lane_id, unsigned char *trace_read_valid, unsigned long *trace_read_address, unsigned char *trace_read_is_store, - int *trace_read_store_mask, + int *trace_read_size, unsigned long *trace_read_data, - unsigned char *trace_read_finished - ); + unsigned char *trace_read_finished); diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index d5c5584..b18fcab 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -1,6 +1,6 @@ `define DATA_WIDTH 64 `define MAX_NUM_LANES 32 -`define MASK_WIDTH 8 +`define SIZE_WIDTH 32 import "DPI-C" function void memtrace_init( input string filename @@ -18,31 +18,31 @@ import "DPI-C" function void memtrace_query output bit trace_read_valid, output longint trace_read_address, output bit trace_read_is_store, - output int trace_read_store_mask, + output int trace_read_size, output longint trace_read_data, output bit trace_read_finished ); module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( - input clock, - input reset, + input clock, + input reset, // These have to match the IO port of the Chisel wrapper module. - input trace_read_ready, - output [NUM_LANES-1:0] trace_read_valid, + input trace_read_ready, + output [NUM_LANES-1:0] trace_read_valid, output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_address, - output [NUM_LANES-1:0] trace_read_is_store, - output [NUM_LANES*`MASK_WIDTH-1:0] trace_read_store_mask, + output [NUM_LANES-1:0] trace_read_is_store, + output [`SIZE_WIDTH*NUM_LANES-1:0] trace_read_size, output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data, - output trace_read_finished + output trace_read_finished ); - bit __in_valid[NUM_LANES-1:0]; - longint __in_address[NUM_LANES-1:0]; + bit __in_valid [NUM_LANES-1:0]; + longint __in_address [NUM_LANES-1:0]; - bit __in_is_store[NUM_LANES-1:0]; - logic [`MASK_WIDTH-1:0] __in_store_mask [NUM_LANES-1:0]; - longint __in_data[NUM_LANES-1:0]; + bit __in_is_store [NUM_LANES-1:0]; + int __in_size [NUM_LANES-1:0]; + longint __in_data [NUM_LANES-1:0]; bit __in_finished; string __uartlog; @@ -54,13 +54,13 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( assign next_cycle_counter = cycle_counter + 1'b1; // registers that stage outputs of the C parser - reg [NUM_LANES-1:0] __in_valid_reg; + reg [NUM_LANES-1:0] __in_valid_reg; reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_LANES-1:0]; - reg [NUM_LANES-1:0] __in_is_store_reg; - reg [`MASK_WIDTH-1:0] __in_store_mask_reg [NUM_LANES-1:0]; + reg [NUM_LANES-1:0] __in_is_store_reg; + int __in_size_reg [NUM_LANES-1:0]; reg [`DATA_WIDTH-1:0] __in_data_reg [NUM_LANES-1:0]; - reg __in_finished_reg; + reg __in_finished_reg; genvar g; @@ -70,7 +70,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g]; assign trace_read_is_store[g] = __in_is_store_reg[g]; - assign trace_read_store_mask[`MASK_WIDTH*(g+1)-1:`MASK_WIDTH*g] = __in_store_mask_reg[g]; + assign trace_read_size[`SIZE_WIDTH*(g+1)-1:`SIZE_WIDTH*g] = __in_size_reg[g]; assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data_reg[g]; end endgenerate @@ -83,15 +83,13 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( // Evaluate the signals on the positive edge always @(posedge clock) begin - - // Setting reset value if (reset) begin for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin __in_valid[tid] = 1'b0; __in_address[tid] = `DATA_WIDTH'b0; __in_is_store[tid] = 1'b0; - __in_store_mask[tid] = `MASK_WIDTH'b0; + __in_size[tid] = `SIZE_WIDTH'b0; __in_data[tid] = `DATA_WIDTH'b0; end @@ -105,7 +103,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( __in_address_reg[tid] <= `DATA_WIDTH'b0; __in_is_store_reg[tid] = 1'b0; - __in_store_mask_reg[tid] = `MASK_WIDTH'b0; + __in_size_reg[tid] = `SIZE_WIDTH'b0; __in_data_reg[tid] = `DATA_WIDTH'b0; end @@ -127,7 +125,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( __in_address[tid], __in_is_store[tid], - __in_store_mask[tid], + __in_size[tid], __in_data[tid], __in_finished @@ -140,7 +138,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( __in_address_reg[tid] <= __in_address[tid]; __in_is_store_reg[tid] <= __in_is_store[tid]; - __in_store_mask_reg[tid] <= __in_store_mask[tid]; + __in_size_reg[tid] <= __in_size[tid]; __in_data_reg[tid] <= __in_data[tid]; end __in_finished_reg <= __in_finished; diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 37f76b1..bea19ee 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -587,7 +587,6 @@ class CoalShiftQueue[T <: Data]( class MemTraceDriver(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.trace")(implicit p: Parameters ) extends LazyModule { - // Create N client nodes together val laneNodes = Seq.tabulate(numLanes) { i => val clientParam = Seq( @@ -612,7 +611,7 @@ class TraceReq extends Bundle { val valid = Bool() val address = UInt(64.W) val is_store = Bool() - val mask = UInt(8.W) + val size = UInt(32.W) val data = UInt(64.W) } @@ -634,7 +633,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String) req.valid := sim.io.trace_read.valid(i) req.address := sim.io.trace_read.address(64 * i + 63, 64 * i) req.is_store := sim.io.trace_read.is_store(i) - req.mask := sim.io.trace_read.store_mask(8 * i + 7, 8 * i) + req.size := sim.io.trace_read.size(32 * i + 31, 32 * i) + printf("========= req.size=%d\n", req.size) req.data := sim.io.trace_read.data(64 * i + 63, 64 * i) } @@ -655,16 +655,17 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String) (outer.laneNodes zip laneReqs).foreach { case (node, req) => val (tlOut, edge) = node.out(0) + val size = 4.U // TODO: get proper size from the trace val (plegal, pbits) = edge.Put( fromSource = sourceIdCounter, toAddress = hashToValidPhyAddr(req.address), - lgSize = 3.U, + lgSize = Log2(size), data = req.data ) val (glegal, gbits) = edge.Get( fromSource = sourceIdCounter, toAddress = hashToValidPhyAddr(req.address), - lgSize = 3.U + lgSize = Log2(size), ) val legal = Mux(req.is_store, plegal, glegal) val bits = Mux(req.is_store, pbits, gbits) @@ -677,6 +678,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String) tlOut.d.ready := true.B tlOut.e.valid := false.B + println(s"======= MemTraceDriver: TL data width: ${tlOut.params.dataBits}") + dontTouch(tlOut.a) dontTouch(tlOut.d) } @@ -714,7 +717,7 @@ class SimMemTrace(filename: String, numLanes: Int) // TODO: assumes 64-bit address. val address = Output(UInt((64 * numLanes).W)) val is_store = Output(UInt(numLanes.W)) - val store_mask = Output(UInt((8 * numLanes).W)) + val size = Output(UInt((32 * numLanes).W)) val data = Output(UInt((64 * numLanes).W)) val finished = Output(Bool()) } @@ -762,7 +765,8 @@ class MemTraceLogger(numLanes: Int = 4, filename: String = "vecadd.core1.thread4 req.address := tlIn.a.bits.address req.data := tlIn.a.bits.data req.is_store := false.B // FIXME: take is_store from TL - req.mask := tlIn.a.bits.mask + req.size := tlIn.a.bits.size + printf("========= logger: req.size=%d\n", tlIn.a.bits.size) } val laneValid = Wire(Vec(numLanes, Bool())) @@ -794,7 +798,7 @@ class SimMemTraceLogger(filename: String, numLanes: Int) // val ready = Output(Bool()) // TODO: assumes 64-bit address. // val is_store = Output(UInt(numLanes.W)) - // val store_mask = Output(UInt((8 * numLanes).W)) + // val size = Output(UInt((8 * numLanes).W)) // val data = Output(UInt((64 * numLanes).W)) // val finished = Output(Bool()) } @@ -805,7 +809,7 @@ class SimMemTraceLogger(filename: String, numLanes: Int) addResource("/csrc/SimMemTraceLogger.h") } -// synthesizable unit tests +// Synthesizable unit tests // tracedriver --> coalescer --> tracelogger --> tlram class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { @@ -816,7 +820,9 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { val logger = LazyModule(new MemTraceLogger(numLanes)) val rams = Seq.fill(numLanes)( // +1 for coalesced edge LazyModule( - // FIXME: properly propagate beatBytes? + // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink + // edges globally, by way of Diplomacy communicating the TL slave + // parameters to the upstream nodes. new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8) ) ) @@ -847,7 +853,9 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule { val driver = LazyModule(new MemTraceDriver(numLanes)) val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge LazyModule( - // FIXME: properly propagate beatBytes? + // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink + // edges globally, by way of Diplomacy communicating the TL slave + // parameters to the upstream nodes. new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8) ) )