diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 5f7a9ee..08540bc 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -13,7 +13,8 @@ // Global singleton instance static std::unique_ptr reader; -MemTraceReader::MemTraceReader(const std::string &filename) { +MemTraceReader::MemTraceReader(const std::string &filename) + : filename(filename) { char cwd[4096]; if (getcwd(cwd, sizeof(cwd))) { printf("MemTraceReader: current working dir: %s\n", cwd); @@ -30,32 +31,64 @@ MemTraceReader::~MemTraceReader() { printf("MemTraceReader destroyed\n"); } -// Parse trace file in its entirety and store it into internal structure. +void MemTraceReader::error(long fileline, const std::string &msg) { + fprintf(stderr, "parse error at %s:%ld: %s\n", filename.c_str(), fileline, + msg.c_str()); + exit(EXIT_FAILURE); +} + +// Parse trace file in its entirety and store it into an internal structure. +// If `has_source` is true, assumes the trace has an additional column after +// core and lane_id for source id and tries to parse that. // TODO: might block for a long time when the trace gets big, check if need to // be broken down -void MemTraceReader::parse() { +void MemTraceReader::parse(const bool has_source) { MemTraceLine line; printf("MemTraceReader: started parsing\n"); long size = 0; - std::string loadstore; // FIXME: likely slow - while (infile >> line.cycle >> loadstore >> line.core_id >> - line.lane_id >> std::hex >> line.address >> line.data >> std::dec >> - size) { + long source = 0; + std::string loadstore; // slow? + for (long fileline = 1;; fileline++) { + if (infile.peek() == '\n') { + infile.get(); + continue; + } + if (infile.eof()) { + break; + } + + if (!(infile >> line.cycle >> loadstore >> line.core_id >> line.lane_id)) { + printf("char=[%c]\n", infile.peek()); + // assert(!infile.eof()); + error(fileline, "failed parsing cycle..lane_id"); + } + if (has_source && !(infile >> source)) { + error(fileline, "failed parsing source"); + } + if (!(infile >> std::hex >> line.address >> line.data >> std::dec >> + size)) { + error(fileline, "failed parsing address..size"); + } + if (infile.get() != '\n') { + error(fileline, "trailing characters at the end of the line"); + } + line.valid = true; - line.is_store = (loadstore == "STORE"); - - assert(size > 0 && "invalid size in trace"); + if (size <= 0) { + error(fileline, "invalid size in trace"); + } int lgsize = static_cast(log2(size)); - assert((size & ~(~0lu << lgsize)) == 0 && - "non-power-of-2 size detected in trace"); + if ((size & ~(~0lu << lgsize)) != 0) { + error(fileline, "non-power-of-2 size detected in trace"); + } line.log_data_size = lgsize; - trace.push_back(line); + trace_buf.push_back(line); } - read_pos = trace.cbegin(); + read_pos = trace_buf.cbegin(); printf("MemTraceReader: finished parsing\n"); } @@ -63,8 +96,7 @@ void MemTraceReader::parse() { // Try to read a memory request that might have happened at a given cycle, on a // given SIMD lane (= "thread"). In case no request happened at that point, // return an empty line with .valid = false. -MemTraceLine MemTraceReader::read_trace_at(const long cycle, - const int lane_id, +MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id, unsigned char trace_read_ready) { MemTraceLine line; line.valid = false; @@ -79,43 +111,41 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, // It should always be guaranteed that we consumed all of the past lines, and // the next line is in the future. if (line.cycle < cycle) { - // fprintf(stderr, "line.cycle=%ld, cycle=%ld\n", line.cycle, cycle); - printf("cycle=%ld, some lines are left in past Fatal", cycle); - assert(false && "some trace lines are left unread in the past"); + long fileline = read_pos - std::cbegin(trace_buf) + 1; + error(fileline, "some trace lines are left unread in the past"); return MemTraceLine{}; } - if (line.lane_id != lane_id) { - line.valid = false; - } if (line.cycle > cycle) { // We haven't reached the cycle mark specified in this line yet, so we don't // read it right now. return MemTraceLine{}; + } else if (line.lane_id != lane_id) { + return MemTraceLine{}; } else if (line.cycle == cycle && line.lane_id == lane_id) { - - if (trace_read_ready){ + if (trace_read_ready) { printf("Fire! cycle=%ld, valid=%d, %s addr=%lx, size=%d \n", cycle, - line.valid, (line.is_store ? "STORE" : "LOAD"), line.address, - line.log_data_size); + line.valid, (line.is_store ? "STORE" : "LOAD"), line.address, + line.log_data_size); - // FIXME! Currently lane_id is assumed to be in round-robin order, e.g. - // 0->1->2->3->0->..., both in the trace file and the order the caller calls - // this function. If this is not true, we cannot simply monotonically - // increment read_pos. - // Only advance pointer when cycle and threa_id both match - // now increaseing sequence is fine (0, 1, 3), but unordered is not fine (0, 3, 1) + // NOTE: Currently lane_id is assumed to be in always-increasing order, + // e.g. 0->1->2->3->0->..., both in the trace file and the order the + // caller calls this function. If this is not true, we cannot simply + // monotonically increment read_pos. lane_id need not be contiguous, e.g. + // 0->1->3 is fine. ++read_pos; - } - else { // we do not want to advance read_pos + return line; + } else { + // For debugging purposes, instead of early-returning on + // !trace_read_ready, print something to notify we are blocking a valid + // trace line. printf("All Lanes Blocked on this cycle! cycle=%ld \n", cycle); + return MemTraceLine{}; } - - return line; - - } } - + + assert(!"unreachable"); +} extern "C" void memtrace_init(const char *filename) { #ifndef NO_VPI @@ -124,9 +154,9 @@ extern "C" void memtrace_init(const char *filename) { fprintf(stderr, "fatal: failed to get plusargs from VCS\n"); exit(1); } - const char* TRACEFILENAME_PLUSARG = "+memtracefile="; + const char *TRACEFILENAME_PLUSARG = "+memtracefile="; for (int i = 0; i < info.argc; i++) { - char* input_arg = info.argv[i]; + char *input_arg = info.argv[i]; if (strncmp(input_arg, TRACEFILENAME_PLUSARG, strlen(TRACEFILENAME_PLUSARG)) == 0) { filename = input_arg + strlen(TRACEFILENAME_PLUSARG); @@ -139,7 +169,8 @@ extern "C" void memtrace_init(const char *filename) { reader = std::make_unique(filename); // parse file upfront - reader->parse(); + // driver trace file is assumed to not have source id + reader->parse(false); } // TODO: accept core_id as well @@ -155,13 +186,6 @@ extern "C" void memtrace_query(unsigned char trace_read_ready, // printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle, // trace_read_lane_id); - /* we can't return immediately, even if trace is ready, we still want to find out - if we are suppose to generate valid req on this clock cycle - if (!trace_read_ready) { - return; - } - */ - auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id, trace_read_ready); *trace_read_valid = line.valid; *trace_read_address = line.address; diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index 9b90918..753385e 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -18,12 +18,18 @@ class MemTraceReader { public: MemTraceReader(const std::string &filename); ~MemTraceReader(); - void parse(); + void parse(const bool has_source); MemTraceLine read_trace_at(const long cycle, const int lane_id, unsigned char trace_read_ready); - bool finished() const { return read_pos == trace.cend(); } + bool finished() const { return read_pos == trace_buf.cend(); } + MemTraceLine peek() const { return *read_pos; } + void next() { read_pos++; } + void error(long fileline, const std::string &msg); + const std::string filename; + +private: std::ifstream infile; - std::vector trace; + std::vector trace_buf; std::vector::const_iterator read_pos; }; diff --git a/src/main/resources/csrc/SimMemTraceLogger.cc b/src/main/resources/csrc/SimMemTraceLogger.cc index 647aeb5..3963417 100644 --- a/src/main/resources/csrc/SimMemTraceLogger.cc +++ b/src/main/resources/csrc/SimMemTraceLogger.cc @@ -102,7 +102,8 @@ extern "C" void memtracelogger_log(int handle, .data = trace_log_data, .log_data_size = trace_log_size}; - assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle"); + assert(0 <= handle && static_cast(handle) < loggers.size() && + "wrong trace logger handle"); auto logger = loggers[handle].get(); logger->write_line_to_trace(line); } diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index a206f75..b1cd865 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -53,9 +53,10 @@ case class CoalescerConfig( coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers // each size is log(byteSize) sizeEnum: InFlightTableSizeEnum, - numCoalReq: Int, // the total number of coalesced request - arbiterOutputs: Int, //total number RW ports from the - bankStrideInBytes: Int //cache line strides across the different banks + numCoalReqs: Int, // total number of coalesced requests we can generate in one cycle + numArbiterOutputPorts: Int, // total of output ports the arbiter will arbitrate into. + // this has to match downstream cache's configuration + bankStrideInBytes: Int // cache line strides across the different banks ) { // maximum coalesced size def maxCoalLogSize: Int = coalLogSizes.max @@ -77,8 +78,8 @@ object defaultConfig extends CoalescerConfig( respQueueDepth = 4, coalLogSizes = Seq(3), sizeEnum = DefaultInFlightTableSizeEnum, - numCoalReq = 1, - arbiterOutputs = 4, + numCoalReqs = 1, + numArbiterOutputPorts = 4, bankStrideInBytes = 64 // Current L2 is strided by 512 bits ) @@ -1729,10 +1730,10 @@ class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends LazyModule { // Let SIMT's word size be 32, and read/write granularity be 256 - val fullSourceIdRange = config.numOldSrcIds * config.numLanes + config.numNewSrcIds * config.numCoalReq + val fullSourceIdRange = config.numOldSrcIds * config.numLanes + config.numNewSrcIds * config.numCoalReqs // K client nodes of edge size 32 for non-coalesced reqs - val nonCoalNarrowNodes = Seq.tabulate(config.arbiterOutputs){ i => + val nonCoalNarrowNodes = Seq.tabulate(config.numArbiterOutputPorts){ i => val nonCoalNarrowParam = Seq( TLMasterParameters.v1( name = "NonCoalNarrowNode" + i.toString, @@ -1750,7 +1751,7 @@ class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends Lazy ) // K client nodes of edge size 256 for the coalesced reqs - val coalReqNodes = Seq.tabulate(config.arbiterOutputs){ i => + val coalReqNodes = Seq.tabulate(config.numArbiterOutputPorts){ i => val coalParam = Seq( TLMasterParameters.v1( name = "CoalReqNode" + i.toString, @@ -1806,10 +1807,10 @@ class CoalArbiterImpl(outer: CoalArbiter, val io = IO(new Bundle { - val nonCoalVec = Vec(config.numLanes, Flipped(Decoupled(nonCoalEntryT.cloneType))) - val coalVec = Vec(config.numCoalReq, Flipped(Decoupled(coalEntryT.cloneType))) - val respNonCoalVec = Vec(config.numLanes, Decoupled(respNonCoalEntryT.cloneType)) - val respCoalBundle = Decoupled(respCoalBundleT.cloneType) + val nonCoalReqs = Vec(config.numLanes, Flipped(Decoupled(nonCoalEntryT))) + val coalReqs = Vec(config.numCoalReqs, Flipped(Decoupled(coalEntryT))) + val nonCoalResps = Vec(config.numLanes, Decoupled(respNonCoalEntryT)) + val coalResp = Decoupled(respCoalBundleT) } )