diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 4b07a4e..8f357f7 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -46,15 +46,19 @@ void MemTraceReader::parse() { MemTraceLine MemTraceReader::tick() { MemTraceLine line; + printf("tick(): cycle=%ld\n", cycle); + if (finished()) { cycle++; return line; } + // Fire all requests that happend at this cycle. This is at most #lane + // requests. line = *curr_line; assert(line.cycle >= cycle && "missed some trace lines past their cycles"); while (line.cycle == cycle) { - printf("cycle: %ld\n", cycle); + printf("fire! cycle=%ld\n", cycle); line = *(++curr_line); } @@ -70,19 +74,21 @@ extern "C" void memtrace_init(const char *filename) { reader->parse(); } -extern "C" void memtrace_tick(unsigned char *trace_read_valid, - unsigned char trace_read_ready, - unsigned long *trace_read_cycle, +extern "C" void memtrace_tick(unsigned char trace_read_ready, + unsigned long trace_read_cycle, + int trace_read_thread_id, + unsigned char *trace_read_valid, unsigned long *trace_read_address, unsigned char *trace_read_finished) { - // printf("memtrace_tick()\n"); + printf("memtrace_tick(cycle=%ld, tid=%d)\n", trace_read_cycle, + trace_read_thread_id); + if (!trace_read_ready) { return; } auto line = reader->tick(); *trace_read_valid = line.valid; - *trace_read_cycle = line.cycle; *trace_read_address = line.address; // This means finished and valid will go up at the same cycle. Need to // handle this without skipping the last line. diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index e2e3070..274b5f5 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -33,8 +33,9 @@ public: }; extern "C" void memtrace_init(const char *filename); -extern "C" void memtrace_tick(unsigned char *trace_read_valid, - unsigned char trace_read_ready, - unsigned long *trace_read_cycle, +extern "C" void memtrace_tick(unsigned char trace_read_ready, + unsigned long trace_read_cycle, + int trace_read_thread_id, + unsigned char *trace_read_valid, unsigned long *trace_read_address, unsigned char *trace_read_finished); diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index 4360213..794045e 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -5,11 +5,16 @@ import "DPI-C" function void memtrace_init( input string filename ); +// Make sure to sync the parameters for: +// (1) import "DPI-C" declaration +// (2) C function declaration +// (3) DPI function calls inside initial/always blocks import "DPI-C" function void memtrace_tick ( - output bit trace_read_valid, input bit trace_read_ready, - output longint trace_read_cycle, + input longint trace_read_cycle, + input int trace_read_tid, + output bit trace_read_valid, output longint trace_read_address, output bit trace_read_finished ); @@ -18,33 +23,29 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( input clock, input reset, - output trace_read_valid, + // These have to match the IO port of the Chisel wrapper module. input trace_read_ready, - output [`DATA_WIDTH-1:0] trace_read_cycle, + output trace_read_valid, output [`DATA_WIDTH*NUM_THREADS-1:0] trace_read_address, output trace_read_finished ); bit __in_valid; - longint __in_cycle; longint __in_address[NUM_THREADS-1:0]; bit __in_finished; string __uartlog; - int __uartno; - initial begin - /* $value$plusargs("uartlog=%s", __uartlog); */ - memtrace_init("vecadd.core1.thread4.trace"); - end + // Cycle counter that is used to query C parser whether we have a request + // coming in at the current cycle. + reg [`DATA_WIDTH-1:0] cycle_counter; + // registers that stage outputs of the C parser reg __in_valid_reg; - reg [`DATA_WIDTH-1:0] __in_cycle_reg; reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_THREADS-1:0]; reg __in_finished_reg; genvar g; assign trace_read_valid = __in_valid_reg; - assign trace_read_cycle = __in_cycle_reg; generate for (g = 0; g < NUM_THREADS; g = g + 1) begin assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g]; @@ -52,37 +53,44 @@ module SimMemTrace #(parameter NUM_THREADS = 4) ( endgenerate assign trace_read_finished = __in_finished_reg; + initial begin + /* $value$plusargs("uartlog=%s", __uartlog); */ + memtrace_init("vecadd.core1.thread4.trace"); + end + // Evaluate the signals on the positive edge always @(posedge clock) begin if (reset) begin __in_valid = 1'b0; - __in_cycle = `DATA_WIDTH'b0; - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin - __in_address[i] = `DATA_WIDTH'b0; + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_address[tid] = `DATA_WIDTH'b0; end __in_finished = 1'b0; + cycle_counter <= `DATA_WIDTH'b0; + __in_valid_reg <= 1'b0; - __in_cycle_reg <= `DATA_WIDTH'b0; - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin - __in_address_reg[i] <= `DATA_WIDTH'b0; + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_address_reg[tid] <= `DATA_WIDTH'b0; end __in_finished_reg <= 1'b0; end else begin - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin + cycle_counter <= cycle_counter + 1'b1; + + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin memtrace_tick( - __in_valid, trace_read_ready, - __in_cycle, - __in_address[i], + cycle_counter, + tid, + __in_valid, + __in_address[tid], __in_finished ); end __in_valid_reg <= __in_valid; - __in_cycle_reg <= __in_cycle; - for (integer i = 0; i < NUM_THREADS; i = i + 1) begin - __in_address_reg[i] <= __in_address[i]; + for (integer tid = 0; tid < NUM_THREADS; tid = tid + 1) begin + __in_address_reg[tid] <= __in_address[tid]; end __in_finished_reg <= __in_finished; end diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 6ffdc15..2f05e57 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -61,9 +61,8 @@ class SimMemTrace(num_threads: Int) val reset = Input(Bool()) val trace_read = new Bundle { - val valid = Output(Bool()) val ready = Input(Bool()) - val cycle = Output(UInt(64.W)) + val valid = Output(Bool()) val address = Output(UInt((64 * num_threads).W)) val finished = Output(Bool()) }