From b57c0e2b7dbcbc652db59f0bf788798a19c874d6 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 2 Mar 2023 17:24:36 -0800 Subject: [PATCH 1/5] SimMemTrace: parse batch instead of at every cycle --- src/main/resources/csrc/SimMemTrace.cc | 53 ++++++++++++++++++++------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 397773d..9a45978 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -27,51 +28,79 @@ public: MemTraceReader(const std::string &filename) { char cwd[4096]; if (getcwd(cwd, sizeof(cwd))) { - printf("MemTraceReader: current working dir: %s\n", cwd); + printf("MemTraceReader: current working dir: %s\n", cwd); } infile.open(filename); if (infile.fail()) { - fprintf(stderr, "failed to open file %s\n", filename); + fprintf(stderr, "failed to open file %s\n", filename); } } + ~MemTraceReader() { infile.close(); printf("MemTraceReader destroyed\n"); } + + void parse(); MemTraceLine tick(); std::ifstream infile; + std::vector trace; + std::vector::const_iterator curr_line; + long cycle = 0; }; +// Parse trace file in its entirety and store it into internal structure. +// TODO: might block for a long time when the trace gets big, check if need to +// be broken down +void MemTraceReader::parse() { + MemTraceLine line; + + printf("MemTraceReader: started parsing\n"); + + // TODO: line.valid is useless now + line.valid = false; + while (infile >> line.cycle >> line.loadstore >> line.core_id >> + line.thread_id >> std::hex >> line.address >> line.data >> std::dec >> + line.data_size) { + line.valid = true; + trace.push_back(line); + } + curr_line = trace.cbegin(); + + printf("MemTraceReader: finished parsing\n"); +} + MemTraceLine MemTraceReader::tick() { MemTraceLine line; - - line.valid = false; - if (infile >> line.cycle >> line.loadstore >> line.core_id >> - line.thread_id >> std::hex >> line.address >> line.data >> std::dec >> - line.data_size) { - line.valid = true; - printf("cycle: %ld\n", line.cycle); + if (curr_line == trace.cend()) { + return line; } + line = *curr_line; + printf("cycle: %ld\n", line.cycle); + curr_line++; return line; } extern "C" void memtrace_init(const char *filename) { reader = std::make_unique(filename); printf("memtrace_init: filename=[%s]\n", filename); + + // parse file upfront + reader->parse(); } extern "C" void memtrace_tick(unsigned char *trace_read_valid, unsigned char trace_read_ready, unsigned long *trace_read_cycle, unsigned long *trace_read_address) { - auto line = reader->tick(); + if (!trace_read_ready) + return; + auto line = reader->tick(); *trace_read_valid = line.valid; *trace_read_cycle = line.cycle; *trace_read_address = line.address; - - return; } From 44cf6fbb2f5f6e8b385ac8f96801c61ab43c936f Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 3 Mar 2023 16:14:11 -0800 Subject: [PATCH 2/5] Update SimMemTrace csrc from submodule --- src/main/resources/csrc/SimMemTrace.cc | 88 +++++++++++--------------- src/main/resources/csrc/SimMemTrace.h | 40 ++++++++++++ 2 files changed, 77 insertions(+), 51 deletions(-) create mode 100644 src/main/resources/csrc/SimMemTrace.h diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 9a45978..4b07a4e 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -1,55 +1,29 @@ +#ifndef NO_VPI #include #include -#include -#include +#endif #include -#include #include +#include #include +#include "SimMemTrace.h" -class MemTraceReader; - -// Global singleton instance of MemTraceReader -static std::unique_ptr reader; - -struct MemTraceLine { - bool valid = false; - unsigned long cycle = 0; - char loadstore[10]; - int core_id = 0; - int thread_id = 0; - unsigned long address = 0; - unsigned long data = 0; - int data_size = 0; -}; - -class MemTraceReader { -public: - MemTraceReader(const std::string &filename) { - char cwd[4096]; - if (getcwd(cwd, sizeof(cwd))) { - printf("MemTraceReader: current working dir: %s\n", cwd); - } - - infile.open(filename); - if (infile.fail()) { - fprintf(stderr, "failed to open file %s\n", filename); - } +MemTraceReader::MemTraceReader(const std::string &filename) { + char cwd[4096]; + if (getcwd(cwd, sizeof(cwd))) { + printf("MemTraceReader: current working dir: %s\n", cwd); } - ~MemTraceReader() { - infile.close(); - printf("MemTraceReader destroyed\n"); + infile.open(filename); + if (infile.fail()) { + fprintf(stderr, "failed to open file %s\n", filename.c_str()); } +} - void parse(); - MemTraceLine tick(); - - std::ifstream infile; - std::vector trace; - std::vector::const_iterator curr_line; - long cycle = 0; -}; +MemTraceReader::~MemTraceReader() { + infile.close(); + printf("MemTraceReader destroyed\n"); +} // Parse trace file in its entirety and store it into internal structure. // TODO: might block for a long time when the trace gets big, check if need to @@ -59,12 +33,9 @@ void MemTraceReader::parse() { printf("MemTraceReader: started parsing\n"); - // TODO: line.valid is useless now - line.valid = false; while (infile >> line.cycle >> line.loadstore >> line.core_id >> line.thread_id >> std::hex >> line.address >> line.data >> std::dec >> line.data_size) { - line.valid = true; trace.push_back(line); } curr_line = trace.cbegin(); @@ -74,20 +45,27 @@ void MemTraceReader::parse() { MemTraceLine MemTraceReader::tick() { MemTraceLine line; - if (curr_line == trace.cend()) { + + if (finished()) { + cycle++; return line; } line = *curr_line; - printf("cycle: %ld\n", line.cycle); - curr_line++; + assert(line.cycle >= cycle && "missed some trace lines past their cycles"); + while (line.cycle == cycle) { + printf("cycle: %ld\n", cycle); + line = *(++curr_line); + } + + cycle++; return line; } extern "C" void memtrace_init(const char *filename) { - reader = std::make_unique(filename); printf("memtrace_init: filename=[%s]\n", filename); + reader = std::make_unique(filename); // parse file upfront reader->parse(); } @@ -95,12 +73,20 @@ extern "C" void memtrace_init(const char *filename) { extern "C" void memtrace_tick(unsigned char *trace_read_valid, unsigned char trace_read_ready, unsigned long *trace_read_cycle, - unsigned long *trace_read_address) { - if (!trace_read_ready) + unsigned long *trace_read_address, + unsigned char *trace_read_finished) { + // printf("memtrace_tick()\n"); + if (!trace_read_ready) { return; + } auto line = reader->tick(); *trace_read_valid = line.valid; *trace_read_cycle = line.cycle; *trace_read_address = line.address; + // This means finished and valid will go up at the same cycle. Need to + // handle this without skipping the last line. + *trace_read_finished = reader->finished(); + + return; } diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h new file mode 100644 index 0000000..e2e3070 --- /dev/null +++ b/src/main/resources/csrc/SimMemTrace.h @@ -0,0 +1,40 @@ +#include +#include +#include + +class MemTraceReader; + +// Global singleton instance of MemTraceReader +static std::unique_ptr reader; + +struct MemTraceLine { + bool valid = false; + long cycle = 0; + char loadstore[10]; + int core_id = 0; + int thread_id = 0; + unsigned long address = 0; + unsigned long data = 0; + int data_size = 0; +}; + +class MemTraceReader { +public: + MemTraceReader(const std::string &filename); + ~MemTraceReader(); + void parse(); + MemTraceLine tick(); + bool finished() const { return curr_line == trace.cend(); } + + std::ifstream infile; + std::vector trace; + std::vector::const_iterator curr_line; + long cycle = 0; +}; + +extern "C" void memtrace_init(const char *filename); +extern "C" void memtrace_tick(unsigned char *trace_read_valid, + unsigned char trace_read_ready, + unsigned long *trace_read_cycle, + unsigned long *trace_read_address, + unsigned char *trace_read_finished); From 664959f723f47656aaaf21aef41e1f883f3044ba Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 3 Mar 2023 16:16:07 -0800 Subject: [PATCH 3/5] Parameterize SimMemTrace Verilog module to number of threads --- src/main/resources/vsrc/SimMemTrace.v | 117 ++++++++++++++--------- src/main/scala/tilelink/Coalescing.scala | 12 ++- 2 files changed, 79 insertions(+), 50 deletions(-) diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index da9238d..4360213 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -1,65 +1,90 @@ `define DATA_WIDTH 64 +`define MAX_NUM_THREADS 32 import "DPI-C" function void memtrace_init( - input string filename + input string filename ); import "DPI-C" function void memtrace_tick ( - output bit trace_read_valid, - input bit trace_read_ready, - output longint trace_read_cycle, - output longint trace_read_address + output bit trace_read_valid, + input bit trace_read_ready, + output longint trace_read_cycle, + output longint trace_read_address, + output bit trace_read_finished ); -module SimMemTrace ( - input clock, - input reset, +module SimMemTrace #(parameter NUM_THREADS = 4) ( + input clock, + input reset, - output trace_read_valid, - input trace_read_ready, - output [`DATA_WIDTH-1:0] trace_read_cycle, - output [`DATA_WIDTH-1:0] trace_read_address + output trace_read_valid, + input trace_read_ready, + output [`DATA_WIDTH-1:0] trace_read_cycle, + output [`DATA_WIDTH*NUM_THREADS-1:0] trace_read_address, + output trace_read_finished ); + bit __in_valid; + longint __in_cycle; + longint __in_address[NUM_THREADS-1:0]; + bit __in_finished; + string __uartlog; + int __uartno; - bit __in_valid; - longint __in_cycle; - longint __in_address; - string __uartlog; - int __uartno; + initial begin + /* $value$plusargs("uartlog=%s", __uartlog); */ + memtrace_init("vecadd.core1.thread4.trace"); + end - initial begin - $value$plusargs("uartlog=%s", __uartlog); - memtrace_init("vecadd.core1.thread4.trace"); + reg __in_valid_reg; + reg [`DATA_WIDTH-1:0] __in_cycle_reg; + reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_THREADS-1:0]; + reg __in_finished_reg; + + genvar g; + + assign trace_read_valid = __in_valid_reg; + assign trace_read_cycle = __in_cycle_reg; + generate + for (g = 0; g < NUM_THREADS; g = g + 1) begin + assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g]; end + endgenerate + assign trace_read_finished = __in_finished_reg; - reg __in_valid_reg; - reg [`DATA_WIDTH-1:0] __in_cycle_reg; - reg [`DATA_WIDTH-1:0] __in_address_reg; + // Evaluate the signals on the positive edge + always @(posedge clock) begin + if (reset) begin + __in_valid = 1'b0; + __in_cycle = `DATA_WIDTH'b0; + for (integer i = 0; i < NUM_THREADS; i = i + 1) begin + __in_address[i] = `DATA_WIDTH'b0; + end + __in_finished = 1'b0; - assign trace_read_valid = __in_valid_reg; - assign trace_read_cycle = __in_cycle_reg; - assign trace_read_address = __in_address_reg; + __in_valid_reg <= 1'b0; + __in_cycle_reg <= `DATA_WIDTH'b0; + for (integer i = 0; i < NUM_THREADS; i = i + 1) begin + __in_address_reg[i] <= `DATA_WIDTH'b0; + end + __in_finished_reg <= 1'b0; + end else begin + for (integer i = 0; i < NUM_THREADS; i = i + 1) begin + memtrace_tick( + __in_valid, + trace_read_ready, + __in_cycle, + __in_address[i], + __in_finished + ); + end - // Evaluate the signals on the positive edge - always @(posedge clock) begin - if (reset) begin - __in_valid = 1'b0; - - __in_valid_reg <= 1'b0; - __in_cycle_reg <= `DATA_WIDTH'b0; - end else begin - memtrace_tick( - __in_valid, - trace_read_ready, - __in_cycle, - __in_address - ); - - __in_valid_reg <= __in_valid; - __in_cycle_reg <= __in_cycle; - __in_address_reg <= __in_address; - end + __in_valid_reg <= __in_valid; + __in_cycle_reg <= __in_cycle; + for (integer i = 0; i < NUM_THREADS; i = i + 1) begin + __in_address_reg[i] <= __in_address[i]; + end + __in_finished_reg <= __in_finished; end - + end endmodule diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 5ae1475..6ffdc15 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -39,7 +39,7 @@ class MemTraceDriver(implicit p: Parameters) extends LazyModule { lazy val module = new Impl class Impl extends LazyModuleImp(this) with UnitTestModule { - val sim = Module(new SimMemTrace) + val sim = Module(new SimMemTrace(2)) sim.io.clock := clock sim.io.reset := reset.asBool sim.io.trace_read.ready := true.B @@ -49,11 +49,13 @@ class MemTraceDriver(implicit p: Parameters) extends LazyModule { } // we're finished when there is no more memtrace to read - io.finished := !sim.io.trace_read.valid + io.finished := sim.io.trace_read.finished } } -class SimMemTrace extends BlackBox with HasBlackBoxResource { +class SimMemTrace(num_threads: Int) + extends BlackBox(Map("NUM_THREADS" -> num_threads)) + with HasBlackBoxResource { val io = IO(new Bundle { val clock = Input(Clock()) val reset = Input(Bool()) @@ -62,12 +64,14 @@ class SimMemTrace extends BlackBox with HasBlackBoxResource { val valid = Output(Bool()) val ready = Input(Bool()) val cycle = Output(UInt(64.W)) - val address = Output(UInt(64.W)) + val address = Output(UInt((64 * num_threads).W)) + val finished = Output(Bool()) } }) addResource("/vsrc/SimMemTrace.v") addResource("/csrc/SimMemTrace.cc") + addResource("/csrc/SimMemTrace.h") } class CoalescingUnitTest(txns: Int = 5000, timeout: Int = 500000)(implicit From c1e8f4ef860247e8ca1b8c59f16a26f7dbe14dc0 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 3 Mar 2023 16:38:32 -0800 Subject: [PATCH 4/5] Maintain cycle inside Verilog instead of C The Verilog wrapper maintains the cycle state, and C parser becomes a combinational logic which Verilog queries to check if there is a request in the trace at a specific {cycle, core_id, thread_id}. --- src/main/resources/csrc/SimMemTrace.cc | 18 +++++--- src/main/resources/csrc/SimMemTrace.h | 7 +-- src/main/resources/vsrc/SimMemTrace.v | 58 ++++++++++++++---------- src/main/scala/tilelink/Coalescing.scala | 3 +- 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 4b07a4e..8f357f7 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -46,15 +46,19 @@ void MemTraceReader::parse() { MemTraceLine MemTraceReader::tick() { MemTraceLine line; + printf("tick(): cycle=%ld\n", cycle); + if (finished()) { cycle++; return line; } + // Fire all requests that happend at this cycle. This is at most #lane + // requests. line = *curr_line; assert(line.cycle >= cycle && "missed some trace lines past their cycles"); while (line.cycle == cycle) { - printf("cycle: %ld\n", cycle); + printf("fire! cycle=%ld\n", cycle); line = *(++curr_line); } @@ -70,19 +74,21 @@ extern "C" void memtrace_init(const char *filename) { reader->parse(); } -extern "C" void memtrace_tick(unsigned char *trace_read_valid, - unsigned char trace_read_ready, - unsigned long *trace_read_cycle, +extern "C" void memtrace_tick(unsigned char trace_read_ready, + unsigned long trace_read_cycle, + int trace_read_thread_id, + unsigned char *trace_read_valid, unsigned long *trace_read_address, unsigned char *trace_read_finished) { - // printf("memtrace_tick()\n"); + printf("memtrace_tick(cycle=%ld, tid=%d)\n", trace_read_cycle, + trace_read_thread_id); + if (!trace_read_ready) { return; } auto line = reader->tick(); *trace_read_valid = line.valid; - *trace_read_cycle = line.cycle; *trace_read_address = line.address; // This means finished and valid will go up at the same cycle. Need to // handle this without skipping the last line. diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index e2e3070..274b5f5 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -33,8 +33,9 @@ public: }; extern "C" void memtrace_init(const char *filename); -extern "C" void memtrace_tick(unsigned char *trace_read_valid, - unsigned char trace_read_ready, - unsigned long *trace_read_cycle, +extern "C" void memtrace_tick(unsigned char trace_read_ready, + unsigned long trace_read_cycle, + int trace_read_thread_id, + unsigned char *trace_read_valid, unsigned long *trace_read_address, unsigned char *trace_read_finished); diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index 4360213..794045e 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -5,11 +5,16 @@ import "DPI-C" function void memtrace_init( input string filename ); +// Make sure to sync the parameters for: +// (1) import "DPI-C" declaration +// (2) C function declaration +// (3) DPI function calls inside initial/always blocks import "DPI-C" function void memtrace_tick ( - output bit trace_read_valid, input bit trace_read_ready, - output longint trace_read_cycle, + input longint trace_read_cycle, + input int trace_read_tid, + output bit trace_read_valid, output longint trace_read_address, output bit trace_read_finished ); @@ -18,33 +23,29 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( input clock, input reset, - output trace_read_valid, + // These have to match the IO port of the Chisel wrapper module. input trace_read_ready, - output [`DATA_WIDTH-1:0] trace_read_cycle, + output trace_read_valid, output [`DATA_WIDTH*NUM_THREADS-1:0] trace_read_address, output trace_read_finished ); bit __in_valid; - longint __in_cycle; longint __in_address[NUM_THREADS-1:0]; bit __in_finished; string __uartlog; - int __uartno; - initial begin - /* $value$plusargs("uartlog=%s", __uartlog); */ - memtrace_init("vecadd.core1.thread4.trace"); - end + // Cycle counter that is used to query C parser whether we have a request + // coming in at the current cycle. + reg [`DATA_WIDTH-1:0] cycle_counter; + // registers that stage outputs of the C parser reg __in_valid_reg; - reg [`DATA_WIDTH-1:0] __in_cycle_reg; reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_THREADS-1:0]; reg __in_finished_reg; genvar g; assign trace_read_valid = __in_valid_reg; - assign trace_read_cycle = __in_cycle_reg; generate for (g = 0; g < NUM_THREADS; g = g + 1) begin assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g]; @@ -52,37 +53,44 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( endgenerate assign trace_read_finished = __in_finished_reg; + initial begin + /* $value$plusargs("uartlog=%s", __uartlog); */ + memtrace_init("vecadd.core1.thread4.trace"); + end + // Evaluate the signals on the positive edge always @(posedge clock) begin if (reset) begin __in_valid = 1'b0; - __in_cycle = `DATA_WIDTH'b0; - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin - __in_address[i] = `DATA_WIDTH'b0; + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_address[tid] = `DATA_WIDTH'b0; end __in_finished = 1'b0; + cycle_counter <= `DATA_WIDTH'b0; + __in_valid_reg <= 1'b0; - __in_cycle_reg <= `DATA_WIDTH'b0; - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin - __in_address_reg[i] <= `DATA_WIDTH'b0; + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_address_reg[tid] <= `DATA_WIDTH'b0; end __in_finished_reg <= 1'b0; end else begin - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin + cycle_counter <= cycle_counter + 1'b1; + + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin memtrace_tick( - __in_valid, trace_read_ready, - __in_cycle, - __in_address[i], + cycle_counter, + tid, + __in_valid, + __in_address[tid], __in_finished ); end __in_valid_reg <= __in_valid; - __in_cycle_reg <= __in_cycle; - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin - __in_address_reg[i] <= __in_address[i]; + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_address_reg[tid] <= __in_address[tid]; end __in_finished_reg <= __in_finished; end diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 6ffdc15..2f05e57 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -61,9 +61,8 @@ class SimMemTrace(num_threads: Int) val reset = Input(Bool()) val trace_read = new Bundle { - val valid = Output(Bool()) val ready = Input(Bool()) - val cycle = Output(UInt(64.W)) + val valid = Output(Bool()) val address = Output(UInt((64 * num_threads).W)) val finished = Output(Bool()) } From 97fec016205e75ef42432af56d4e8802420bdb11 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 3 Mar 2023 18:09:58 -0800 Subject: [PATCH 5/5] Receive per-lane valid from SimMemTrace --- src/main/resources/csrc/SimMemTrace.cc | 52 +++++++++++++++--------- src/main/resources/csrc/SimMemTrace.h | 19 ++++----- src/main/resources/vsrc/SimMemTrace.v | 29 +++++++------ src/main/scala/tilelink/Coalescing.scala | 10 ++--- 4 files changed, 62 insertions(+), 48 deletions(-) diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 8f357f7..1857985 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -36,33 +36,47 @@ void MemTraceReader::parse() { while (infile >> line.cycle >> line.loadstore >> line.core_id >> line.thread_id >> std::hex >> line.address >> line.data >> std::dec >> line.data_size) { + line.valid = true; trace.push_back(line); } - curr_line = trace.cbegin(); + read_pos = trace.cbegin(); printf("MemTraceReader: finished parsing\n"); } -MemTraceLine MemTraceReader::tick() { +// Try to read a memory request that might have happened at a given cycle, on +// given thread. In case no request happened at that point, return an empty +// line with .valid = false. +MemTraceLine MemTraceReader::read_trace_at(const long cycle, + const int thread_id) { MemTraceLine line; + line.valid = false; printf("tick(): cycle=%ld\n", cycle); if (finished()) { - cycle++; return line; } - // Fire all requests that happend at this cycle. This is at most #lane - // requests. - line = *curr_line; - assert(line.cycle >= cycle && "missed some trace lines past their cycles"); - while (line.cycle == cycle) { - printf("fire! cycle=%ld\n", cycle); - line = *(++curr_line); + line = *read_pos; + // It should always be guaranteed that the next line is not read yet. + if (line.cycle < cycle) { + fprintf(stderr, "line.cycle=%ld, cycle=%ld\n", line.cycle, cycle); + assert(false && "some trace lines are left unread in the past"); + } + + if (line.cycle > cycle) { + // It's not ready to read this line yet. + return MemTraceLine{}; + } else if (line.cycle == cycle) { + printf("fire! cycle=%ld, valid=%d\n", cycle, line.valid); + // FIXME! Currently thread_id is assumed to be in round-robin order, e.g. + // 0->1->2->3->0->..., both in the trace file and the order the caller calls + // this function. If this is not true, we cannot simply monotonically + // increment read_pos. + ++read_pos; } - cycle++; return line; } @@ -74,20 +88,20 @@ extern "C" void memtrace_init(const char *filename) { reader->parse(); } -extern "C" void memtrace_tick(unsigned char trace_read_ready, - unsigned long trace_read_cycle, - int trace_read_thread_id, - unsigned char *trace_read_valid, - unsigned long *trace_read_address, - unsigned char *trace_read_finished) { - printf("memtrace_tick(cycle=%ld, tid=%d)\n", trace_read_cycle, +extern "C" void memtrace_query(unsigned char trace_read_ready, + unsigned long trace_read_cycle, + int trace_read_thread_id, + unsigned char *trace_read_valid, + unsigned long *trace_read_address, + unsigned char *trace_read_finished) { + printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle, trace_read_thread_id); if (!trace_read_ready) { return; } - auto line = reader->tick(); + auto line = reader->read_trace_at(trace_read_cycle, trace_read_thread_id); *trace_read_valid = line.valid; *trace_read_address = line.address; // This means finished and valid will go up at the same cycle. Need to diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index 274b5f5..2f45b8d 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -23,19 +23,18 @@ public: MemTraceReader(const std::string &filename); ~MemTraceReader(); void parse(); - MemTraceLine tick(); - bool finished() const { return curr_line == trace.cend(); } + MemTraceLine read_trace_at(const long cycle, const int thread_id); + bool finished() const { return read_pos == trace.cend(); } std::ifstream infile; std::vector trace; - std::vector::const_iterator curr_line; - long cycle = 0; + std::vector::const_iterator read_pos; }; extern "C" void memtrace_init(const char *filename); -extern "C" void memtrace_tick(unsigned char trace_read_ready, - unsigned long trace_read_cycle, - int trace_read_thread_id, - unsigned char *trace_read_valid, - unsigned long *trace_read_address, - unsigned char *trace_read_finished); +extern "C" void memtrace_query(unsigned char trace_read_ready, + unsigned long trace_read_cycle, + int trace_read_thread_id, + unsigned char *trace_read_valid, + unsigned long *trace_read_address, + unsigned char *trace_read_finished); diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index 794045e..19960db 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -9,7 +9,7 @@ import "DPI-C" function void memtrace_init( // (1) import "DPI-C" declaration // (2) C function declaration // (3) DPI function calls inside initial/always blocks -import "DPI-C" function void memtrace_tick +import "DPI-C" function void memtrace_query ( input bit trace_read_ready, input longint trace_read_cycle, @@ -25,11 +25,11 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( // These have to match the IO port of the Chisel wrapper module. input trace_read_ready, - output trace_read_valid, + output [NUM_THREADS-1:0] trace_read_valid, output [`DATA_WIDTH*NUM_THREADS-1:0] trace_read_address, output trace_read_finished ); - bit __in_valid; + bit __in_valid[NUM_THREADS-1:0]; longint __in_address[NUM_THREADS-1:0]; bit __in_finished; string __uartlog; @@ -37,17 +37,19 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( // Cycle counter that is used to query C parser whether we have a request // coming in at the current cycle. reg [`DATA_WIDTH-1:0] cycle_counter; + wire [`DATA_WIDTH-1:0] next_cycle_counter; + assign next_cycle_counter = cycle_counter + 1'b1; // registers that stage outputs of the C parser - reg __in_valid_reg; + reg [NUM_THREADS-1:0] __in_valid_reg; reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_THREADS-1:0]; reg __in_finished_reg; genvar g; - assign trace_read_valid = __in_valid_reg; generate for (g = 0; g < NUM_THREADS; g = g + 1) begin + assign trace_read_valid[g] = __in_valid_reg[g]; assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g]; end endgenerate @@ -61,35 +63,38 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( // Evaluate the signals on the positive edge always @(posedge clock) begin if (reset) begin - __in_valid = 1'b0; for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_valid[tid] = 1'b0; __in_address[tid] = `DATA_WIDTH'b0; end __in_finished = 1'b0; cycle_counter <= `DATA_WIDTH'b0; - __in_valid_reg <= 1'b0; for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_valid_reg[tid] <= 1'b0; __in_address_reg[tid] <= `DATA_WIDTH'b0; end __in_finished_reg <= 1'b0; end else begin - cycle_counter <= cycle_counter + 1'b1; + cycle_counter <= next_cycle_counter; for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin - memtrace_tick( + memtrace_query( trace_read_ready, - cycle_counter, + // Since parsed results are latched to the output on the next + // cycle due to staging registers, we need to pass in the next cycle + // to sync up. + next_cycle_counter, tid, - __in_valid, + __in_valid[tid], __in_address[tid], __in_finished ); end - __in_valid_reg <= __in_valid; for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_valid_reg[tid] <= __in_valid[tid]; __in_address_reg[tid] <= __in_address[tid]; end __in_finished_reg <= __in_finished; diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 2f05e57..c23ef0e 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -39,15 +39,11 @@ class MemTraceDriver(implicit p: Parameters) extends LazyModule { lazy val module = new Impl class Impl extends LazyModuleImp(this) with UnitTestModule { - val sim = Module(new SimMemTrace(2)) + val sim = Module(new SimMemTrace(4)) sim.io.clock := clock sim.io.reset := reset.asBool sim.io.trace_read.ready := true.B - when(sim.io.trace_read.valid) { - println("sim.io.valid!") - } - // we're finished when there is no more memtrace to read io.finished := sim.io.trace_read.finished } @@ -62,7 +58,7 @@ class SimMemTrace(num_threads: Int) val trace_read = new Bundle { val ready = Input(Bool()) - val valid = Output(Bool()) + val valid = Output(UInt(num_threads.W)) val address = Output(UInt((64 * num_threads).W)) val finished = Output(Bool()) } @@ -76,7 +72,7 @@ class SimMemTrace(num_threads: Int) class CoalescingUnitTest(txns: Int = 5000, timeout: Int = 500000)(implicit p: Parameters ) extends UnitTest(timeout) { - val coal = Module(LazyModule(new CoalescingUnit(txns)).module) + // val coal = Module(LazyModule(new CoalescingUnit(txns)).module) val driver = Module(LazyModule(new MemTraceDriver).module) driver.io.start := io.start