somehow merged
This commit is contained in:
@@ -5,10 +5,14 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <cmath>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "SimMemTrace.h"
|
#include "SimMemTrace.h"
|
||||||
|
|
||||||
|
// Global singleton instance
|
||||||
|
static std::unique_ptr<MemTraceReader> reader;
|
||||||
|
|
||||||
MemTraceReader::MemTraceReader(const std::string &filename) {
|
MemTraceReader::MemTraceReader(const std::string &filename) {
|
||||||
char cwd[4096];
|
char cwd[4096];
|
||||||
if (getcwd(cwd, sizeof(cwd))) {
|
if (getcwd(cwd, sizeof(cwd))) {
|
||||||
@@ -34,10 +38,21 @@ void MemTraceReader::parse() {
|
|||||||
|
|
||||||
printf("MemTraceReader: started parsing\n");
|
printf("MemTraceReader: started parsing\n");
|
||||||
|
|
||||||
while (infile >> line.cycle >> line.loadstore >> line.core_id >>
|
long size = 0;
|
||||||
|
std::string loadstore; // FIXME: likely slow
|
||||||
|
while (infile >> line.cycle >> loadstore >> line.core_id >>
|
||||||
line.lane_id >> std::hex >> line.address >> line.data >> std::dec >>
|
line.lane_id >> std::hex >> line.address >> line.data >> std::dec >>
|
||||||
line.data_size) {
|
size) {
|
||||||
line.valid = true;
|
line.valid = true;
|
||||||
|
|
||||||
|
line.is_store = (loadstore == "STORE");
|
||||||
|
|
||||||
|
assert(size > 0 && "invalid size in trace");
|
||||||
|
int lgsize = static_cast<int>(log2(size));
|
||||||
|
assert((size & ~(~0lu << lgsize)) == 0 &&
|
||||||
|
"non-power-of-2 size detected in trace");
|
||||||
|
line.log_data_size = lgsize;
|
||||||
|
|
||||||
trace.push_back(line);
|
trace.push_back(line);
|
||||||
}
|
}
|
||||||
read_pos = trace.cbegin();
|
read_pos = trace.cbegin();
|
||||||
@@ -75,8 +90,9 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle,
|
|||||||
// read it right now.
|
// read it right now.
|
||||||
return MemTraceLine{};
|
return MemTraceLine{};
|
||||||
} else if (line.cycle == cycle && line.lane_id == lane_id) {
|
} else if (line.cycle == cycle && line.lane_id == lane_id) {
|
||||||
printf("fire! cycle=%ld, valid=%d, %s addr=%x \n", cycle, line.valid,
|
printf("fire! cycle=%ld, valid=%d, %s addr=%lx, size=%d \n", cycle,
|
||||||
line.loadstore, line.address);
|
line.valid, (line.is_store ? "STORE" : "LOAD"), line.address,
|
||||||
|
line.log_data_size);
|
||||||
|
|
||||||
// FIXME! Currently lane_id is assumed to be in round-robin order, e.g.
|
// FIXME! Currently lane_id is assumed to be in round-robin order, e.g.
|
||||||
// 0->1->2->3->0->..., both in the trace file and the order the caller calls
|
// 0->1->2->3->0->..., both in the trace file and the order the caller calls
|
||||||
@@ -123,7 +139,7 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
|
|||||||
unsigned char *trace_read_valid,
|
unsigned char *trace_read_valid,
|
||||||
unsigned long *trace_read_address,
|
unsigned long *trace_read_address,
|
||||||
unsigned char *trace_read_is_store,
|
unsigned char *trace_read_is_store,
|
||||||
int *trace_read_store_mask,
|
int *trace_read_size,
|
||||||
unsigned long *trace_read_data,
|
unsigned long *trace_read_data,
|
||||||
unsigned char *trace_read_finished) {
|
unsigned char *trace_read_finished) {
|
||||||
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
|
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
|
||||||
@@ -136,8 +152,8 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
|
|||||||
auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id);
|
auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id);
|
||||||
*trace_read_valid = line.valid;
|
*trace_read_valid = line.valid;
|
||||||
*trace_read_address = line.address;
|
*trace_read_address = line.address;
|
||||||
*trace_read_is_store = strcmp(line.loadstore, "STORE") == 0 ;
|
*trace_read_is_store = line.is_store;
|
||||||
*trace_read_store_mask = line.data_size;
|
*trace_read_size = line.log_data_size;
|
||||||
*trace_read_data = line.data;
|
*trace_read_data = line.data;
|
||||||
// This means finished and valid will go up at the same cycle. Need to
|
// This means finished and valid will go up at the same cycle. Need to
|
||||||
// handle this without skipping the last line.
|
// handle this without skipping the last line.
|
||||||
|
|||||||
@@ -2,20 +2,16 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
class MemTraceReader;
|
|
||||||
|
|
||||||
// Global singleton instance of MemTraceReader
|
|
||||||
static std::unique_ptr<MemTraceReader> reader;
|
|
||||||
|
|
||||||
struct MemTraceLine {
|
struct MemTraceLine {
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
long cycle = 0;
|
long cycle = 0;
|
||||||
char loadstore[10];
|
|
||||||
int core_id = 0;
|
int core_id = 0;
|
||||||
int lane_id = 0;
|
int lane_id = 0;
|
||||||
|
int source = 0;
|
||||||
unsigned long address = 0;
|
unsigned long address = 0;
|
||||||
|
bool is_store = 0;
|
||||||
unsigned long data = 0;
|
unsigned long data = 0;
|
||||||
int data_size = 0;
|
int log_data_size = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MemTraceReader {
|
class MemTraceReader {
|
||||||
@@ -31,6 +27,16 @@ public:
|
|||||||
std::vector<MemTraceLine>::const_iterator read_pos;
|
std::vector<MemTraceLine>::const_iterator read_pos;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class MemTraceWriter {
|
||||||
|
public:
|
||||||
|
MemTraceWriter(const bool is_response, const std::string &filename);
|
||||||
|
~MemTraceWriter();
|
||||||
|
void write_line_to_trace(const MemTraceLine line);
|
||||||
|
|
||||||
|
bool is_response;
|
||||||
|
FILE *outfile;
|
||||||
|
};
|
||||||
|
|
||||||
extern "C" void memtrace_init(const char *filename);
|
extern "C" void memtrace_init(const char *filename);
|
||||||
extern "C" void memtrace_query(unsigned char trace_read_ready,
|
extern "C" void memtrace_query(unsigned char trace_read_ready,
|
||||||
unsigned long trace_read_cycle,
|
unsigned long trace_read_cycle,
|
||||||
@@ -38,7 +44,17 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
|
|||||||
unsigned char *trace_read_valid,
|
unsigned char *trace_read_valid,
|
||||||
unsigned long *trace_read_address,
|
unsigned long *trace_read_address,
|
||||||
unsigned char *trace_read_is_store,
|
unsigned char *trace_read_is_store,
|
||||||
int *trace_read_store_mask,
|
int *trace_read_size,
|
||||||
unsigned long *trace_read_data,
|
unsigned long *trace_read_data,
|
||||||
unsigned char *trace_read_finished
|
unsigned char *trace_read_finished);
|
||||||
);
|
extern "C" int memtracelogger_init(int is_response, const char *filename);
|
||||||
|
extern "C" void memtracelogger_log(int handle,
|
||||||
|
unsigned char trace_log_valid,
|
||||||
|
unsigned long trace_log_cycle,
|
||||||
|
int trace_log_lane_id,
|
||||||
|
int trace_log_source,
|
||||||
|
unsigned long trace_log_address,
|
||||||
|
unsigned char trace_log_is_store,
|
||||||
|
int trace_log_size,
|
||||||
|
unsigned long trace_log_data,
|
||||||
|
unsigned char *trace_log_ready);
|
||||||
|
|||||||
107
src/main/resources/csrc/SimMemTraceLogger.cc
Normal file
107
src/main/resources/csrc/SimMemTraceLogger.cc
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
#ifndef NO_VPI
|
||||||
|
#include <svdpi.h>
|
||||||
|
#include <vpi_user.h>
|
||||||
|
#endif
|
||||||
|
#include "SimMemTrace.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
// Contains handle for every logger that is instantiated per Verilog module
|
||||||
|
// instance
|
||||||
|
static std::vector<std::unique_ptr<MemTraceWriter>> loggers;
|
||||||
|
|
||||||
|
MemTraceWriter::MemTraceWriter(const bool is_response,
|
||||||
|
const std::string &filename) {
|
||||||
|
this->is_response = is_response;
|
||||||
|
|
||||||
|
char cwd[4096];
|
||||||
|
if (getcwd(cwd, sizeof(cwd))) {
|
||||||
|
printf("MemTraceWriter: current working dir: %s\n", cwd);
|
||||||
|
}
|
||||||
|
|
||||||
|
outfile = fopen(filename.c_str(), "w");
|
||||||
|
if (!outfile) {
|
||||||
|
fprintf(stderr, "failed to open file %s\n", filename.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MemTraceWriter::~MemTraceWriter() {
|
||||||
|
fclose(outfile);
|
||||||
|
printf("MemTraceWriter destroyed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemTraceWriter::write_line_to_trace(const MemTraceLine line) {
|
||||||
|
fprintf(outfile, "%ld %s %d %d %d 0x%lx 0x%lx %u\n", line.cycle,
|
||||||
|
(line.is_store ? "STORE" : "LOAD"), line.core_id, line.lane_id,
|
||||||
|
line.source, line.address, line.data, (1u << line.log_data_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the "handle" ID for this particular logger instance.
|
||||||
|
extern "C" int memtracelogger_init(int is_response, const char *filename) {
|
||||||
|
#ifndef NO_VPI
|
||||||
|
s_vpi_vlog_info info;
|
||||||
|
if (!vpi_get_vlog_info(&info)) {
|
||||||
|
fprintf(stderr, "fatal: failed to get plusargs from VCS\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
const char *TRACEFILENAME_PLUSARG = "+memtracefile=";
|
||||||
|
for (int i = 0; i < info.argc; i++) {
|
||||||
|
char *input_arg = info.argv[i];
|
||||||
|
if (strncmp(input_arg, TRACEFILENAME_PLUSARG,
|
||||||
|
strlen(TRACEFILENAME_PLUSARG)) == 0) {
|
||||||
|
filename = input_arg + strlen(TRACEFILENAME_PLUSARG);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int handle = loggers.size();
|
||||||
|
loggers.emplace_back(std::make_unique<MemTraceWriter>(is_response, filename));
|
||||||
|
|
||||||
|
printf("memtracelogger_init: handle=%d, is_response=%d, filename=[%s]\n",
|
||||||
|
handle, is_response, filename);
|
||||||
|
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is used to log both TileLink A and D channels.
|
||||||
|
// TODO: accept core_id as well
|
||||||
|
extern "C" void memtracelogger_log(int handle,
|
||||||
|
unsigned char trace_log_valid,
|
||||||
|
unsigned long trace_log_cycle,
|
||||||
|
int trace_log_lane_id,
|
||||||
|
int trace_log_source,
|
||||||
|
unsigned long trace_log_address,
|
||||||
|
unsigned char trace_log_is_store,
|
||||||
|
int trace_log_size,
|
||||||
|
unsigned long trace_log_data,
|
||||||
|
unsigned char *trace_log_ready) {
|
||||||
|
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
|
||||||
|
// trace_read_lane_id);
|
||||||
|
*trace_log_ready = 1;
|
||||||
|
|
||||||
|
if (!trace_log_valid) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// printf("%s: [%lu] valid: address=%lx, tid=%u, size=%d\n", __func__,
|
||||||
|
// trace_log_cycle, trace_log_address, trace_log_lane_id, trace_log_size);
|
||||||
|
|
||||||
|
MemTraceLine line{.valid = (trace_log_valid == 1),
|
||||||
|
.cycle = static_cast<long>(trace_log_cycle),
|
||||||
|
.core_id = 0, // TODO support multicores
|
||||||
|
.lane_id = trace_log_lane_id,
|
||||||
|
.source = trace_log_source,
|
||||||
|
.address = trace_log_address,
|
||||||
|
.is_store = (trace_log_is_store == 1),
|
||||||
|
.data = trace_log_data,
|
||||||
|
.log_data_size = trace_log_size};
|
||||||
|
|
||||||
|
assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle");
|
||||||
|
auto logger = loggers[handle].get();
|
||||||
|
logger->write_line_to_trace(line);
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
|
// FIXME hardcoded
|
||||||
`define DATA_WIDTH 64
|
`define DATA_WIDTH 64
|
||||||
`define MAX_NUM_LANES 32
|
`define MAX_NUM_LANES 32
|
||||||
`define MASK_WIDTH 8
|
`define LOGSIZE_WIDTH 8
|
||||||
|
|
||||||
import "DPI-C" function void memtrace_init(
|
import "DPI-C" function void memtrace_init(
|
||||||
input string filename
|
input string filename
|
||||||
@@ -14,11 +15,11 @@ import "DPI-C" function void memtrace_query
|
|||||||
(
|
(
|
||||||
input bit trace_read_ready,
|
input bit trace_read_ready,
|
||||||
input longint trace_read_cycle,
|
input longint trace_read_cycle,
|
||||||
input int trace_read_tid,
|
input int trace_read_lane_id,
|
||||||
output bit trace_read_valid,
|
output bit trace_read_valid,
|
||||||
output longint trace_read_address,
|
output longint trace_read_address,
|
||||||
output bit trace_read_is_store,
|
output bit trace_read_is_store,
|
||||||
output int trace_read_store_mask,
|
output int trace_read_size,
|
||||||
output longint trace_read_data,
|
output longint trace_read_data,
|
||||||
output bit trace_read_finished
|
output bit trace_read_finished
|
||||||
);
|
);
|
||||||
@@ -27,13 +28,12 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
input clock,
|
input clock,
|
||||||
input reset,
|
input reset,
|
||||||
|
|
||||||
// These have to match the IO port of the Chisel wrapper module.
|
// These have to match the IO port name of the Chisel wrapper module.
|
||||||
input trace_read_ready,
|
input trace_read_ready,
|
||||||
output [NUM_LANES-1:0] trace_read_valid,
|
output [NUM_LANES-1:0] trace_read_valid,
|
||||||
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_address,
|
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_address,
|
||||||
|
|
||||||
output [NUM_LANES-1:0] trace_read_is_store,
|
output [NUM_LANES-1:0] trace_read_is_store,
|
||||||
output [NUM_LANES*`MASK_WIDTH-1:0] trace_read_store_mask,
|
output [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_read_size,
|
||||||
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data,
|
output [`DATA_WIDTH*NUM_LANES-1:0] trace_read_data,
|
||||||
output trace_read_finished
|
output trace_read_finished
|
||||||
);
|
);
|
||||||
@@ -41,7 +41,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
longint __in_address [NUM_LANES-1:0];
|
longint __in_address [NUM_LANES-1:0];
|
||||||
|
|
||||||
bit __in_is_store [NUM_LANES-1:0];
|
bit __in_is_store [NUM_LANES-1:0];
|
||||||
int __in_store_mask [NUM_LANES-1:0];
|
reg [`LOGSIZE_WIDTH-1:0] __in_size [NUM_LANES-1:0];
|
||||||
longint __in_data [NUM_LANES-1:0];
|
longint __in_data [NUM_LANES-1:0];
|
||||||
|
|
||||||
bit __in_finished;
|
bit __in_finished;
|
||||||
@@ -58,7 +58,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_LANES-1:0];
|
reg [`DATA_WIDTH-1:0] __in_address_reg [NUM_LANES-1:0];
|
||||||
|
|
||||||
reg [NUM_LANES-1:0] __in_is_store_reg;
|
reg [NUM_LANES-1:0] __in_is_store_reg;
|
||||||
reg [`MASK_WIDTH-1:0] __in_store_mask_reg [NUM_LANES-1:0];
|
reg [`LOGSIZE_WIDTH-1:0] __in_size_reg [NUM_LANES-1:0];
|
||||||
reg [`DATA_WIDTH-1:0] __in_data_reg [NUM_LANES-1:0];
|
reg [`DATA_WIDTH-1:0] __in_data_reg [NUM_LANES-1:0];
|
||||||
reg __in_finished_reg;
|
reg __in_finished_reg;
|
||||||
|
|
||||||
@@ -70,7 +70,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g];
|
assign trace_read_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_address_reg[g];
|
||||||
|
|
||||||
assign trace_read_is_store[g] = __in_is_store_reg[g];
|
assign trace_read_is_store[g] = __in_is_store_reg[g];
|
||||||
assign trace_read_store_mask[`MASK_WIDTH*(g+1)-1:`MASK_WIDTH*g] = __in_store_mask_reg[g];
|
assign trace_read_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g] = __in_size_reg[g];
|
||||||
assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data_reg[g];
|
assign trace_read_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g] = __in_data_reg[g];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
@@ -81,17 +81,14 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
memtrace_init(FILENAME);
|
memtrace_init(FILENAME);
|
||||||
end
|
end
|
||||||
|
|
||||||
// Evaluate the signals on the positive edge
|
|
||||||
always @(posedge clock) begin
|
always @(posedge clock) begin
|
||||||
|
|
||||||
// Setting reset value
|
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||||
__in_valid[tid] = 1'b0;
|
__in_valid[tid] = 1'b0;
|
||||||
__in_address[tid] = `DATA_WIDTH'b0;
|
__in_address[tid] = `DATA_WIDTH'b0;
|
||||||
|
|
||||||
__in_is_store[tid] = 1'b0;
|
__in_is_store[tid] = 1'b0;
|
||||||
__in_store_mask[tid] = `MASK_WIDTH'b0;
|
__in_size[tid] = `LOGSIZE_WIDTH'b0;
|
||||||
__in_data[tid] = `DATA_WIDTH'b0;
|
__in_data[tid] = `DATA_WIDTH'b0;
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -105,7 +102,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
__in_address_reg[tid] <= `DATA_WIDTH'b0;
|
__in_address_reg[tid] <= `DATA_WIDTH'b0;
|
||||||
|
|
||||||
__in_is_store_reg[tid] = 1'b0;
|
__in_is_store_reg[tid] = 1'b0;
|
||||||
__in_store_mask_reg[tid] = `MASK_WIDTH'b0;
|
__in_size_reg[tid] = `LOGSIZE_WIDTH'b0;
|
||||||
__in_data_reg[tid] = `DATA_WIDTH'b0;
|
__in_data_reg[tid] = `DATA_WIDTH'b0;
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -127,7 +124,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
__in_address[tid],
|
__in_address[tid],
|
||||||
|
|
||||||
__in_is_store[tid],
|
__in_is_store[tid],
|
||||||
__in_store_mask[tid],
|
__in_size[tid],
|
||||||
__in_data[tid],
|
__in_data[tid],
|
||||||
|
|
||||||
__in_finished
|
__in_finished
|
||||||
@@ -140,7 +137,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
__in_address_reg[tid] <= __in_address[tid];
|
__in_address_reg[tid] <= __in_address[tid];
|
||||||
|
|
||||||
__in_is_store_reg[tid] <= __in_is_store[tid];
|
__in_is_store_reg[tid] <= __in_is_store[tid];
|
||||||
__in_store_mask_reg[tid] <= __in_store_mask[tid];
|
__in_size_reg[tid] <= __in_size[tid];
|
||||||
__in_data_reg[tid] <= __in_data[tid];
|
__in_data_reg[tid] <= __in_data[tid];
|
||||||
end
|
end
|
||||||
__in_finished_reg <= __in_finished;
|
__in_finished_reg <= __in_finished;
|
||||||
|
|||||||
106
src/main/resources/vsrc/SimMemTraceLogger.v
Normal file
106
src/main/resources/vsrc/SimMemTraceLogger.v
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
// FIXME hardcoded
|
||||||
|
`define DATA_WIDTH 64
|
||||||
|
`define MAX_NUM_LANES 32
|
||||||
|
`define SOURCEID_WIDTH 32
|
||||||
|
`define LOGSIZE_WIDTH 8
|
||||||
|
|
||||||
|
import "DPI-C" function int memtracelogger_init(
|
||||||
|
input bit is_response,
|
||||||
|
input string filename
|
||||||
|
);
|
||||||
|
|
||||||
|
// Make sure to sync the parameters for:
|
||||||
|
// (1) import "DPI-C" declaration
|
||||||
|
// (2) C function declaration
|
||||||
|
// (3) DPI function calls inside initial/always blocks
|
||||||
|
import "DPI-C" function void memtracelogger_log
|
||||||
|
(
|
||||||
|
input int handle,
|
||||||
|
input bit trace_log_valid,
|
||||||
|
input longint trace_log_cycle,
|
||||||
|
input int trace_log_lane_id,
|
||||||
|
input int trace_log_source,
|
||||||
|
input longint trace_log_address,
|
||||||
|
input bit trace_log_is_store,
|
||||||
|
input int trace_log_size,
|
||||||
|
input longint trace_log_data,
|
||||||
|
output bit trace_log_ready
|
||||||
|
);
|
||||||
|
|
||||||
|
module SimMemTraceLogger #(parameter
|
||||||
|
IS_RESPONSE = 0,
|
||||||
|
FILENAME = "undefined",
|
||||||
|
NUM_LANES = 4) (
|
||||||
|
input clock,
|
||||||
|
input reset,
|
||||||
|
|
||||||
|
// NOTE: LSB is lane 0
|
||||||
|
input [NUM_LANES-1:0] trace_log_valid,
|
||||||
|
input [`SOURCEID_WIDTH*NUM_LANES-1:0] trace_log_source,
|
||||||
|
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_address,
|
||||||
|
input [NUM_LANES-1:0] trace_log_is_store,
|
||||||
|
input [`LOGSIZE_WIDTH*NUM_LANES-1:0] trace_log_size,
|
||||||
|
input [`DATA_WIDTH*NUM_LANES-1:0] trace_log_data,
|
||||||
|
output trace_log_ready
|
||||||
|
);
|
||||||
|
int logger_handle;
|
||||||
|
bit __in_ready;
|
||||||
|
|
||||||
|
// cycle_counter will start off right after reset is deasserted which should
|
||||||
|
// synchronize itself with SimMemTrace.cycle_counter
|
||||||
|
reg [`DATA_WIDTH-1:0] cycle_counter;
|
||||||
|
wire [`DATA_WIDTH-1:0] next_cycle_counter;
|
||||||
|
assign next_cycle_counter = cycle_counter + 1'b1;
|
||||||
|
|
||||||
|
// wires going into the DPC
|
||||||
|
wire __valid [NUM_LANES-1:0];
|
||||||
|
wire [`SOURCEID_WIDTH-1:0] __source [NUM_LANES-1:0];
|
||||||
|
wire [`DATA_WIDTH-1:0] __address [NUM_LANES-1:0];
|
||||||
|
wire __is_store [NUM_LANES-1:0];
|
||||||
|
wire [`LOGSIZE_WIDTH-1:0] __size [NUM_LANES-1:0];
|
||||||
|
wire [`DATA_WIDTH-1:0] __data [NUM_LANES-1:0];
|
||||||
|
|
||||||
|
assign trace_log_ready = __in_ready;
|
||||||
|
|
||||||
|
genvar g;
|
||||||
|
generate
|
||||||
|
for (g = 0; g < NUM_LANES; g = g + 1) begin
|
||||||
|
// LSB is lane 0
|
||||||
|
assign __valid[g] = trace_log_valid[g];
|
||||||
|
assign __source[g] = trace_log_source[`SOURCEID_WIDTH*(g+1)-1:`SOURCEID_WIDTH*g];
|
||||||
|
assign __address[g] = trace_log_address[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
|
||||||
|
assign __is_store[g] = trace_log_is_store[g];
|
||||||
|
assign __size[g] = trace_log_size[`LOGSIZE_WIDTH*(g+1)-1:`LOGSIZE_WIDTH*g];
|
||||||
|
assign __data[g] = trace_log_data[`DATA_WIDTH*(g+1)-1:`DATA_WIDTH*g];
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
initial begin
|
||||||
|
/* $value$plusargs("uartlog=%s", __uartlog); */
|
||||||
|
logger_handle = memtracelogger_init(IS_RESPONSE, FILENAME);
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clock) begin
|
||||||
|
if (reset) begin
|
||||||
|
__in_ready = 1'b1;
|
||||||
|
cycle_counter <= `DATA_WIDTH'b0;
|
||||||
|
end else begin
|
||||||
|
cycle_counter <= next_cycle_counter;
|
||||||
|
|
||||||
|
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||||
|
memtracelogger_log(
|
||||||
|
logger_handle,
|
||||||
|
__valid[tid],
|
||||||
|
cycle_counter,
|
||||||
|
tid,
|
||||||
|
__source[tid],
|
||||||
|
__address[tid],
|
||||||
|
__is_store[tid],
|
||||||
|
__size[tid],
|
||||||
|
__data[tid],
|
||||||
|
__in_ready
|
||||||
|
);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
@@ -106,7 +106,6 @@ class MultiCoalescer[QueueT: CoalShiftQueue]
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModuleImp(outer) {
|
class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModuleImp(outer) {
|
||||||
// Make sure IdentityNode is connected to an upstream node, not just the
|
// Make sure IdentityNode is connected to an upstream node, not just the
|
||||||
// coalescer TL master node
|
// coalescer TL master node
|
||||||
@@ -126,11 +125,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
|
|
||||||
// The maximum number of requests from a single lane that can go into a
|
// The maximum number of requests from a single lane that can go into a
|
||||||
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
||||||
val numPerLaneReqs = DEPTH
|
val numPerLaneReqs = CoalescerConsts.DEPTH
|
||||||
|
|
||||||
val coalescer = Module(new MultiCoalescer(Seq(4, 5, 6), reqQueues, reqQueueEntryT))
|
val coalescer = Module(new MultiCoalescer(Seq(4, 5, 6), reqQueues, reqQueueEntryT))
|
||||||
|
|
||||||
val respQueueEntryT = new RespQueueEntry(sourceWidth, wordSize * 8)
|
val respQueueEntryT = new RespQueueEntry(sourceWidth, CoalescerConsts.WORD_SIZE * 8, sizeWidth)
|
||||||
val respQueues = Seq.tabulate(numLanes) { _ =>
|
val respQueues = Seq.tabulate(numLanes) { _ =>
|
||||||
Module(
|
Module(
|
||||||
new MultiPortQueue(
|
new MultiPortQueue(
|
||||||
@@ -181,38 +179,42 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
val reqQueue = reqQueues(lane)
|
val reqQueue = reqQueues(lane)
|
||||||
val req = Wire(reqQueueEntryT)
|
val req = Wire(reqQueueEntryT)
|
||||||
|
|
||||||
// **********
|
req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
|
||||||
// CONNECTING IO
|
|
||||||
// **********
|
|
||||||
|
|
||||||
assert(~tlIn.a.valid || (tlIn.a.bits.opcode === OpCode.Get || tlIn.a.bits.opcode === OpCode.PutFullData ||
|
|
||||||
tlIn.a.bits.opcode === OpCode.PutPartialData), "Coalescer input has unsupported TL opcode");
|
|
||||||
req.op := tlIn.a.bits.opcode === OpCode.Get ? 0.U : 1.U
|
|
||||||
req.source := tlIn.a.bits.source
|
req.source := tlIn.a.bits.source
|
||||||
req.address := tlIn.a.bits.address
|
req.address := tlIn.a.bits.address
|
||||||
req.data := tlIn.a.bits.data
|
req.data := tlIn.a.bits.data
|
||||||
req.size := tlIn.a.bits.size
|
req.size := tlIn.a.bits.size
|
||||||
|
|
||||||
reqQueue.io.enq.valid := tlIn.a.valid
|
assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready")
|
||||||
reqQueue.io.enq.bits := req
|
reqQueue.io.queue.enq.valid := tlIn.a.valid
|
||||||
|
reqQueue.io.queue.enq.bits := req
|
||||||
// TODO: deq.ready should respect downstream ready
|
// TODO: deq.ready should respect downstream ready
|
||||||
reqQueue.io.deq.ready := true.B
|
reqQueue.io.queue.deq.ready := true.B
|
||||||
reqQueue.io.invalidate.bits := 0.U // TODO
|
reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane)
|
||||||
reqQueue.io.invalidate.valid := false.B // TODO
|
reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid
|
||||||
printf(s"reqQueue(${lane}).count=%d\n", reqQueue.io.count)
|
|
||||||
|
|
||||||
val reqHead = reqQueue.io.deq.bits
|
tlOut.a.valid := reqQueue.io.queue.deq.valid
|
||||||
// FIXME: generate Get or Put according to read/write
|
|
||||||
val (reqLegal, reqBits) = edgeOut.Get(
|
val reqHead = reqQueue.io.queue.deq.bits
|
||||||
|
val (plegal, pbits) = edgeOut.Put(
|
||||||
fromSource = reqHead.source,
|
fromSource = reqHead.source,
|
||||||
// `toAddress` should be aligned to 2**lgSize
|
|
||||||
toAddress = reqHead.address,
|
toAddress = reqHead.address,
|
||||||
lgSize = 0.U
|
lgSize = reqHead.size,
|
||||||
|
// data is already aligned by MemTraceDriver
|
||||||
|
// NOTE: if tlIn has different parameters, this will no longer be the
|
||||||
|
// case
|
||||||
|
data = reqHead.data,
|
||||||
|
mask = reqHead.mask
|
||||||
)
|
)
|
||||||
assert(reqLegal, "unhandled illegal TL req gen")
|
val (glegal, gbits) = edgeOut.Get(
|
||||||
|
fromSource = reqHead.source,
|
||||||
tlOut.a.bits := reqBits // TODO: this is incorrect, this does not take iinto account of queue
|
toAddress = reqHead.address,
|
||||||
tlOut.a.valid := reqQueue.io.deq.valid
|
lgSize = reqHead.size
|
||||||
|
)
|
||||||
|
val legal = Mux(reqHead.op.asBool, plegal, glegal)
|
||||||
|
val bits = Mux(reqHead.op.asBool, pbits, gbits)
|
||||||
|
assert(legal, "unhandled illegal TL req gen")
|
||||||
|
tlOut.a.bits := bits
|
||||||
|
|
||||||
// Response queue
|
// Response queue
|
||||||
//
|
//
|
||||||
@@ -221,8 +223,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
val respQueue = respQueues(lane)
|
val respQueue = respQueues(lane)
|
||||||
val resp = Wire(respQueueEntryT)
|
val resp = Wire(respQueueEntryT)
|
||||||
resp.source := tlOut.d.bits.source
|
resp.source := tlOut.d.bits.source
|
||||||
|
resp.op := TLUtils.DOpcodeIsStore(tlOut.d.bits.opcode)
|
||||||
|
resp.size := tlOut.d.bits.size
|
||||||
resp.data := tlOut.d.bits.data
|
resp.data := tlOut.d.bits.data
|
||||||
// TODO: read/write bit?
|
|
||||||
|
|
||||||
// Queue up responses that didn't get coalesced originally ("noncoalesced" responses).
|
// Queue up responses that didn't get coalesced originally ("noncoalesced" responses).
|
||||||
// Coalesced (but uncoalesced back) responses will also be enqueued into the same queue.
|
// Coalesced (but uncoalesced back) responses will also be enqueued into the same queue.
|
||||||
@@ -237,11 +240,16 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
|
|
||||||
tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid
|
tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid
|
||||||
val respHead = respQueue.io.deq(respQueueNoncoalPort).bits
|
val respHead = respQueue.io.deq(respQueueNoncoalPort).bits
|
||||||
val respBits = edgeIn.AccessAck(
|
val apBits = edgeIn.AccessAck(
|
||||||
toSource = respHead.source,
|
toSource = respHead.source,
|
||||||
lgSize = 0.U,
|
lgSize = respHead.size
|
||||||
|
)
|
||||||
|
val agBits = edgeIn.AccessAck(
|
||||||
|
toSource = respHead.source,
|
||||||
|
lgSize = respHead.size,
|
||||||
data = respHead.data
|
data = respHead.data
|
||||||
)
|
)
|
||||||
|
val respBits = Mux(respHead.isStore, apBits, agBits)
|
||||||
tlIn.d.bits := respBits
|
tlIn.d.bits := respBits
|
||||||
|
|
||||||
// Debug only
|
// Debug only
|
||||||
@@ -268,24 +276,30 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
|
|
||||||
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
||||||
val coalReqAddress = Wire(UInt(tlCoal.params.addressBits.W))
|
val coalReqAddress = Wire(UInt(tlCoal.params.addressBits.W))
|
||||||
// TODO: bogus address
|
// FIXME: bogus address
|
||||||
coalReqAddress := (0xabcd.U + coalSourceId) << 4
|
coalReqAddress := (0xabcd.U + coalSourceId) << 4
|
||||||
// FIXME: coalesce lane 0 and lane 2's queue head whenever they're valid
|
// FIXME: bogus coalescing logic: coalesce whenever all 4 lanes have valid
|
||||||
|
// queue head
|
||||||
coalReqValid := reqQueues(0).io.deq.valid && reqQueues(1).io.deq.valid &&
|
coalReqValid := reqQueues(0).io.deq.valid && reqQueues(1).io.deq.valid &&
|
||||||
reqQueues(2).io.deq.valid && reqQueues(3).io.deq.valid
|
reqQueues(2).io.deq.valid && reqQueues(3).io.deq.valid
|
||||||
|
// coalReqValid := false.B
|
||||||
when(coalReqValid) {
|
when(coalReqValid) {
|
||||||
// invalidate original requests due to coalescing
|
// invalidate original requests due to coalescing
|
||||||
|
// FIXME: bogus
|
||||||
reqQueues(0).io.invalidate := 0x1.U
|
reqQueues(0).io.invalidate := 0x1.U
|
||||||
reqQueues(1).io.invalidate := 0x1.U
|
reqQueues(1).io.invalidate := 0x1.U
|
||||||
reqQueues(2).io.invalidate := 0x1.U
|
reqQueues(2).io.invalidate := 0x1.U
|
||||||
reqQueues(3).io.invalidate := 0x1.U
|
reqQueues(3).io.invalidate := 0x1.U
|
||||||
|
printf("coalescing succeeded!\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: write request
|
||||||
val (legal, bits) = edgeCoal.Get(
|
val (legal, bits) = edgeCoal.Get(
|
||||||
fromSource = coalSourceId,
|
fromSource = coalSourceId,
|
||||||
// `toAddress` should be aligned to 2**lgSize
|
// `toAddress` should be aligned to 2**lgSize
|
||||||
toAddress = coalReqAddress,
|
toAddress = coalReqAddress,
|
||||||
// 64 bits = 8 bytes = 2**(3) bytes
|
// 64 bits = 8 bytes = 2**(3) bytes
|
||||||
|
// TODO: parameterize to eg. cache line size
|
||||||
lgSize = 3.U
|
lgSize = 3.U
|
||||||
)
|
)
|
||||||
assert(legal, "unhandled illegal TL req gen")
|
assert(legal, "unhandled illegal TL req gen")
|
||||||
@@ -298,19 +312,24 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
|
|
||||||
// Construct new entry for the inflight table
|
// Construct new entry for the inflight table
|
||||||
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl
|
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl
|
||||||
// detail outside to the coalescer
|
// detail to the coalescer
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val offsetBits = 4 // FIXME hardcoded
|
||||||
val sizeBits = 2 // FIXME hardcoded
|
val sizeBits = 2 // FIXME hardcoded
|
||||||
val newEntry = Wire(
|
val newEntry = Wire(
|
||||||
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
println(s"=========== table sourceWidth: ${sourceWidth}")
|
||||||
|
println(s"=========== table sizeBits: ${sizeBits}")
|
||||||
|
|
||||||
newEntry.source := coalSourceId
|
newEntry.source := coalSourceId
|
||||||
newEntry.lanes.foreach { l =>
|
newEntry.lanes.foreach { l =>
|
||||||
l.reqs.foreach { r =>
|
l.reqs.zipWithIndex.foreach { case (r, i) =>
|
||||||
// TODO: this part needs the actual coalescing logic to work
|
// TODO: this part needs the actual coalescing logic to work
|
||||||
r.valid := false.B
|
r.valid := false.B
|
||||||
|
r.source := i.U // FIXME bogus
|
||||||
r.offset := 1.U
|
r.offset := 1.U
|
||||||
r.size := 2.U
|
r.size := 2.U // FIXME hardcoded
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
newEntry.lanes(0).reqs(0).valid := true.B
|
newEntry.lanes(0).reqs(0).valid := true.B
|
||||||
@@ -326,6 +345,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
numLanes,
|
numLanes,
|
||||||
numPerLaneReqs,
|
numPerLaneReqs,
|
||||||
sourceWidth,
|
sourceWidth,
|
||||||
|
sizeWidth,
|
||||||
coalDataWidth,
|
coalDataWidth,
|
||||||
outer.numInflightCoalRequests
|
outer.numInflightCoalRequests
|
||||||
)
|
)
|
||||||
@@ -337,6 +357,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source
|
uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source
|
||||||
uncoalescer.io.coalRespData := tlCoal.d.bits.data
|
uncoalescer.io.coalRespData := tlCoal.d.bits.data
|
||||||
|
|
||||||
|
println(s"=========== coalRespData width: ${tlCoal.d.bits.data.widthOption.get}")
|
||||||
|
|
||||||
// Queue up synthesized uncoalesced responses into each lane's response queue
|
// Queue up synthesized uncoalesced responses into each lane's response queue
|
||||||
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
||||||
lanes.zipWithIndex.foreach { case (resp, i) =>
|
lanes.zipWithIndex.foreach { case (resp, i) =>
|
||||||
@@ -353,7 +375,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Debug
|
// Debug
|
||||||
dontTouch(coalReqValid)
|
|
||||||
dontTouch(coalReqAddress)
|
dontTouch(coalReqAddress)
|
||||||
val coalRespData = tlCoal.d.bits.data
|
val coalRespData = tlCoal.d.bits.data
|
||||||
dontTouch(coalRespData)
|
dontTouch(coalRespData)
|
||||||
@@ -366,14 +387,13 @@ class UncoalescingUnit(
|
|||||||
val numLanes: Int,
|
val numLanes: Int,
|
||||||
val numPerLaneReqs: Int,
|
val numPerLaneReqs: Int,
|
||||||
val sourceWidth: Int,
|
val sourceWidth: Int,
|
||||||
|
val sizeWidth: Int,
|
||||||
val coalDataWidth: Int,
|
val coalDataWidth: Int,
|
||||||
val numInflightCoalRequests: Int
|
val numInflightCoalRequests: Int
|
||||||
) extends Module {
|
) extends Module {
|
||||||
val inflightTable = Module(
|
val inflightTable = Module(
|
||||||
new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
|
new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
|
||||||
)
|
)
|
||||||
val wordSize = 4 // FIXME duplicate
|
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val coalReqValid = Input(Bool())
|
val coalReqValid = Input(Bool())
|
||||||
val newEntry = Input(inflightTable.entryT)
|
val newEntry = Input(inflightTable.entryT)
|
||||||
@@ -381,7 +401,10 @@ class UncoalescingUnit(
|
|||||||
val coalRespSrcId = Input(UInt(sourceWidth.W))
|
val coalRespSrcId = Input(UInt(sourceWidth.W))
|
||||||
val coalRespData = Input(UInt(coalDataWidth.W))
|
val coalRespData = Input(UInt(coalDataWidth.W))
|
||||||
val uncoalResps = Output(
|
val uncoalResps = Output(
|
||||||
Vec(numLanes, Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, wordSize * 8))))
|
Vec(
|
||||||
|
numLanes,
|
||||||
|
Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, WordSizeInBytes() * 8, sizeWidth)))
|
||||||
|
)
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -405,7 +428,10 @@ class UncoalescingUnit(
|
|||||||
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, byteSize: Int): UInt = {
|
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, byteSize: Int): UInt = {
|
||||||
val bitSize = byteSize * 8
|
val bitSize = byteSize * 8
|
||||||
val sizeMask = (1.U << bitSize) - 1.U
|
val sizeMask = (1.U << bitSize) - 1.U
|
||||||
assert(dataWidth % bitSize == 0, "coalesced data width not evenly divisible by size")
|
assert(
|
||||||
|
dataWidth > 0 && dataWidth % bitSize == 0,
|
||||||
|
s"coalesced data width ($dataWidth) not evenly divisible by core req size ($bitSize)"
|
||||||
|
)
|
||||||
val numChunks = dataWidth / bitSize
|
val numChunks = dataWidth / bitSize
|
||||||
val chunks = Wire(Vec(numChunks, UInt(bitSize.W)))
|
val chunks = Wire(Vec(numChunks, UInt(bitSize.W)))
|
||||||
val offsets = (0 until numChunks)
|
val offsets = (0 until numChunks)
|
||||||
@@ -418,23 +444,22 @@ class UncoalescingUnit(
|
|||||||
|
|
||||||
// Un-coalesce responses back to individual lanes
|
// Un-coalesce responses back to individual lanes
|
||||||
val found = inflightTable.io.lookup.bits
|
val found = inflightTable.io.lookup.bits
|
||||||
(found.lanes zip io.uncoalResps).foreach { case (lane, ioLane) =>
|
(found.lanes zip io.uncoalResps).foreach { case (perLane, ioPerLane) =>
|
||||||
lane.reqs.zipWithIndex.foreach { case (req, i) =>
|
perLane.reqs.zipWithIndex.foreach { case (oldReq, i) =>
|
||||||
val ioReq = ioLane(i)
|
val ioOldReq = ioPerLane(i)
|
||||||
|
|
||||||
// FIXME: only looking at 0th srcId entry
|
// FIXME: only looking at 0th srcId entry
|
||||||
|
|
||||||
ioReq.valid := false.B
|
ioOldReq.valid := false.B
|
||||||
ioReq.bits := DontCare
|
ioOldReq.bits := DontCare
|
||||||
|
|
||||||
when(inflightTable.io.lookup.valid) {
|
when(inflightTable.io.lookup.valid) {
|
||||||
ioReq.valid := req.valid
|
ioOldReq.valid := oldReq.valid
|
||||||
ioReq.bits.source := 0.U
|
ioOldReq.bits.source := oldReq.source
|
||||||
|
|
||||||
// FIXME: disregard size enum for now
|
// FIXME: disregard size enum for now
|
||||||
val byteSize = 4
|
val byteSize = 4
|
||||||
ioReq.bits.data :=
|
ioOldReq.bits.data :=
|
||||||
getCoalescedDataChunk(io.coalRespData, coalDataWidth, req.offset, byteSize)
|
getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, byteSize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -478,6 +503,8 @@ class InflightCoalReqTable(
|
|||||||
table(i).valid := false.B
|
table(i).valid := false.B
|
||||||
table(i).bits.lanes.foreach { l =>
|
table(i).bits.lanes.foreach { l =>
|
||||||
l.reqs.foreach { r =>
|
l.reqs.foreach { r =>
|
||||||
|
r.valid := false.B
|
||||||
|
r.source := 0.U
|
||||||
r.offset := 0.U
|
r.offset := 0.U
|
||||||
r.size := 0.U
|
r.size := 0.U
|
||||||
}
|
}
|
||||||
@@ -490,13 +517,12 @@ class InflightCoalReqTable(
|
|||||||
.map { i => table(i).valid }
|
.map { i => table(i).valid }
|
||||||
.reduce { (v0, v1) => v0 && v1 }
|
.reduce { (v0, v1) => v0 && v1 }
|
||||||
// Inflight table should never be full. It should have enough number of
|
// Inflight table should never be full. It should have enough number of
|
||||||
// entries to keep track of all outstanding core-side requests; otherwise,
|
// entries to keep track of all outstanding core-side requests, i.e.
|
||||||
// it will stall the core issuing logic.
|
// (2 ** oldSrcIdBits) entries.
|
||||||
assert(!full, "table is blocking coalescer")
|
assert(!full, "inflight table is full and blocking coalescer")
|
||||||
dontTouch(full)
|
dontTouch(full)
|
||||||
|
|
||||||
// Enqueue logic
|
// Enqueue logic
|
||||||
//
|
|
||||||
io.enq.ready := !full
|
io.enq.ready := !full
|
||||||
val enqFire = io.enq.ready && io.enq.valid
|
val enqFire = io.enq.ready && io.enq.valid
|
||||||
when(enqFire) {
|
when(enqFire) {
|
||||||
@@ -511,7 +537,6 @@ class InflightCoalReqTable(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Lookup logic
|
// Lookup logic
|
||||||
//
|
|
||||||
io.lookup.valid := table(io.lookupSourceId).valid
|
io.lookup.valid := table(io.lookupSourceId).valid
|
||||||
io.lookup.bits := table(io.lookupSourceId).bits
|
io.lookup.bits := table(io.lookupSourceId).bits
|
||||||
val lookupFire = io.lookup.ready && io.lookup.valid
|
val lookupFire = io.lookup.ready && io.lookup.valid
|
||||||
@@ -524,6 +549,7 @@ class InflightCoalReqTable(
|
|||||||
}
|
}
|
||||||
|
|
||||||
class InflightCoalReqTableEntry(
|
class InflightCoalReqTableEntry(
|
||||||
|
|
||||||
val numLanes: Int,
|
val numLanes: Int,
|
||||||
// Maximum number of requests from a single lane that can get coalesced into a single request
|
// Maximum number of requests from a single lane that can get coalesced into a single request
|
||||||
val numPerLaneReqs: Int,
|
val numPerLaneReqs: Int,
|
||||||
@@ -531,14 +557,15 @@ class InflightCoalReqTableEntry(
|
|||||||
val offsetBits: Int,
|
val offsetBits: Int,
|
||||||
val sizeBits: Int
|
val sizeBits: Int
|
||||||
) extends Bundle {
|
) extends Bundle {
|
||||||
class CoreReq extends Bundle {
|
class PerCoreReq extends Bundle {
|
||||||
val valid = Bool()
|
val valid = Bool()
|
||||||
|
// FIXME: oldId and newId shares the same width
|
||||||
|
val source = UInt(sourceWidth.W)
|
||||||
val offset = UInt(offsetBits.W)
|
val offset = UInt(offsetBits.W)
|
||||||
val size = UInt(sizeBits.W)
|
val size = UInt(sizeBits.W)
|
||||||
}
|
}
|
||||||
class PerLane extends Bundle {
|
class PerLane extends Bundle {
|
||||||
// FIXME: if numPerLaneReqs != 2 ** sourceWidth, we need to store srcId as well
|
val reqs = Vec(numPerLaneReqs, new PerCoreReq)
|
||||||
val reqs = Vec(numPerLaneReqs, new CoreReq)
|
|
||||||
}
|
}
|
||||||
// sourceId of the coalesced response that just came back. This will be the
|
// sourceId of the coalesced response that just came back. This will be the
|
||||||
// key that queries the table.
|
// key that queries the table.
|
||||||
@@ -561,6 +588,8 @@ class CoalShiftQueue[T <: Data](
|
|||||||
val invalidate = Input(Valid(UInt(entries.W)))
|
val invalidate = Input(Valid(UInt(entries.W)))
|
||||||
val mask = Output(UInt(entries.W))
|
val mask = Output(UInt(entries.W))
|
||||||
val elts = Output(Vec(entries, gen))
|
val elts = Output(Vec(entries, gen))
|
||||||
|
// 'QueueIO' provides io.count, but we might not want to use it in the
|
||||||
|
// coalescer because it has potentially expensive PopCount
|
||||||
})
|
})
|
||||||
|
|
||||||
private val valid = RegInit(VecInit(Seq.fill(entries) { false.B }))
|
private val valid = RegInit(VecInit(Seq.fill(entries) { false.B }))
|
||||||
@@ -585,7 +614,7 @@ class CoalShiftQueue[T <: Data](
|
|||||||
def paddedUsed = pad({ i: Int => used(i) })
|
def paddedUsed = pad({ i: Int => used(i) })
|
||||||
def validAfterInv(i: Int) = valid(i) && !io.invalidate.bits(i)
|
def validAfterInv(i: Int) = valid(i) && !io.invalidate.bits(i)
|
||||||
|
|
||||||
val shift = io.queue.deq.ready || (used =/= 0.U) && !validAfterInv(0)
|
val shift = (used =/= 0.U) && (io.queue.deq.ready || !validAfterInv(0))
|
||||||
for (i <- 0 until entries) {
|
for (i <- 0 until entries) {
|
||||||
val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
|
val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
|
||||||
val wen = Mux(
|
val wen = Mux(
|
||||||
@@ -635,14 +664,32 @@ class CoalShiftQueue[T <: Data](
|
|||||||
io.queue.count := PopCount(io.mask)
|
io.queue.count := PopCount(io.mask)
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriver(numLanes: Int = 4, traceFile : String = "vecadd.core1.thread4.trace")(implicit p: Parameters) extends LazyModule {
|
object TLUtils {
|
||||||
|
def AOpcodeIsStore(opcode: UInt): Bool = {
|
||||||
|
assert(
|
||||||
|
opcode === TLMessages.PutFullData || opcode === TLMessages.Get,
|
||||||
|
"unhandled TL A opcode found"
|
||||||
|
)
|
||||||
|
Mux(opcode === TLMessages.PutFullData, true.B, false.B)
|
||||||
|
}
|
||||||
|
def DOpcodeIsStore(opcode: UInt): Bool = {
|
||||||
|
assert(
|
||||||
|
opcode === TLMessages.AccessAck || opcode === TLMessages.AccessAckData,
|
||||||
|
"unhandled TL D opcode found"
|
||||||
|
)
|
||||||
|
Mux(opcode === TLMessages.AccessAck, true.B, false.B)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MemTraceDriver(numLanes: Int = 4, filename: String = "vecadd.core1.thread4.trace")(implicit
|
||||||
|
p: Parameters
|
||||||
|
) extends LazyModule {
|
||||||
// Create N client nodes together
|
// Create N client nodes together
|
||||||
val laneNodes = Seq.tabulate(numLanes) { i =>
|
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||||
val clientParam = Seq(
|
val clientParam = Seq(
|
||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = "MemTraceDriver" + i.toString,
|
name = "MemTraceDriver" + i.toString,
|
||||||
sourceId = IdRange(0, 0x1000)
|
sourceId = IdRange(0, 0x10)
|
||||||
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -654,23 +701,33 @@ class MemTraceDriver(numLanes: Int = 4, traceFile : String = "vecadd.core1.threa
|
|||||||
val node = TLIdentityNode()
|
val node = TLIdentityNode()
|
||||||
laneNodes.foreach { l => node := l }
|
laneNodes.foreach { l => node := l }
|
||||||
|
|
||||||
lazy val module = new MemTraceDriverImp(this, numLanes, traceFile)
|
lazy val module = new MemTraceDriverImp(this, numLanes, filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
class TraceReq extends Bundle {
|
trait HasTraceLine {
|
||||||
|
val valid: UInt
|
||||||
|
val source: UInt
|
||||||
|
val address: UInt
|
||||||
|
val is_store: UInt
|
||||||
|
val size: UInt
|
||||||
|
val data: UInt
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for both request and response. Response had address set to 0
|
||||||
|
// NOTE: these widths have to agree with what's hardcoded in Verilog.
|
||||||
|
class TraceLine extends Bundle with HasTraceLine {
|
||||||
val valid = Bool()
|
val valid = Bool()
|
||||||
val address = UInt(64.W)
|
val source = UInt(32.W)
|
||||||
|
val address = UInt(64.W) // FIXME: in Verilog this is the same as data width
|
||||||
val is_store = Bool()
|
val is_store = Bool()
|
||||||
val mask = UInt(8.W)
|
val size = UInt(8.W) // this is log2(bytesize) as in TL A bundle
|
||||||
val data = UInt(64.W)
|
val data = UInt(64.W)
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
|
class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile: String)
|
||||||
extends LazyModuleImp(outer)
|
extends LazyModuleImp(outer)
|
||||||
with UnitTestModule {
|
with UnitTestModule {
|
||||||
val sim = Module(
|
val sim = Module(new SimMemTrace(traceFile, numLanes))
|
||||||
new SimMemTrace(traceFile, numLanes)
|
|
||||||
)
|
|
||||||
sim.io.clock := clock
|
sim.io.clock := clock
|
||||||
sim.io.reset := reset.asBool
|
sim.io.reset := reset.asBool
|
||||||
sim.io.trace_read.ready := true.B
|
sim.io.trace_read.ready := true.B
|
||||||
@@ -678,15 +735,18 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile : String
|
|||||||
// Split output of SimMemTrace, which is flattened across all lanes,
|
// Split output of SimMemTrace, which is flattened across all lanes,
|
||||||
// back to each lane's.
|
// back to each lane's.
|
||||||
|
|
||||||
// Maybe this part can be improved, since now we are still mannually shifting everything
|
val laneReqs = Wire(Vec(numLanes, new TraceLine))
|
||||||
val laneReqs = Wire(Vec(numLanes, new TraceReq))
|
val addrW = laneReqs(0).address.getWidth
|
||||||
|
val sizeW = laneReqs(0).size.getWidth
|
||||||
|
val dataW = laneReqs(0).data.getWidth
|
||||||
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
||||||
req.valid := (sim.io.trace_read.valid >> i)
|
req.valid := sim.io.trace_read.valid(i)
|
||||||
req.address := (sim.io.trace_read.address >> (64 * i))
|
// TODO: driver trace doesn't contain source id
|
||||||
req.is_store := (sim.io.trace_read.is_store >> i)
|
req.source := 0.U
|
||||||
req.mask := (sim.io.trace_read.store_mask >> (8 * i))
|
req.address := sim.io.trace_read.address(addrW * (i + 1) - 1, addrW * i)
|
||||||
req.data := (sim.io.trace_read.data >> (64 * i))
|
req.is_store := sim.io.trace_read.is_store(i)
|
||||||
|
req.size := sim.io.trace_read.size(sizeW * (i + 1) - 1, sizeW * i)
|
||||||
|
req.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// To prevent collision of sourceId with a current in-flight message,
|
// To prevent collision of sourceId with a current in-flight message,
|
||||||
@@ -696,30 +756,76 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile : String
|
|||||||
sourceIdCounter := sourceIdCounter + 1.U
|
sourceIdCounter := sourceIdCounter + 1.U
|
||||||
|
|
||||||
// Issue here is that Vortex mem range is not within Chipyard Mem range
|
// Issue here is that Vortex mem range is not within Chipyard Mem range
|
||||||
//In default setting, all mem-req for program data must be within 0X80000000 -> 0X90000000
|
// In default setting, all mem-req for program data must be within
|
||||||
//
|
// 0X80000000 -> 0X90000000
|
||||||
def hashToValidPhyAddr(addr: UInt): UInt = {
|
def hashToValidPhyAddr(addr: UInt): UInt = {
|
||||||
Cat(8.U(4.W), addr(27, 3), 0.U(3.W) )
|
Cat(8.U(4.W), addr(27, 0))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect each lane to its respective TL node.
|
// Generate TL requests corresponding to the trace lines
|
||||||
(outer.laneNodes zip laneReqs).foreach { case (node, req) =>
|
(outer.laneNodes zip laneReqs).foreach { case (node, req) =>
|
||||||
val (tlOut, edge) = node.out(0)
|
// Core only makes accesses of granularity larger than a word, so we want
|
||||||
|
// the trace driver to act so as well.
|
||||||
|
// That means if req.size is smaller than word size, we need to pad data
|
||||||
|
// with zeros to generate a word-size request, and set mask accordingly.
|
||||||
|
val offsetInWord = req.address % WordSizeInBytes().U
|
||||||
|
val subword = req.size < log2Ceil(WordSizeInBytes()).U
|
||||||
|
|
||||||
|
val mask = Wire(UInt(WordSizeInBytes().W))
|
||||||
|
val wordData = Wire(UInt((WordSizeInBytes() * 8).W))
|
||||||
|
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
||||||
|
sizeInBytes := (1.U) << req.size
|
||||||
|
mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
||||||
|
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
|
||||||
|
val wordAlignedAddress = req.address & ~((1 << log2Ceil(WordSizeInBytes())) - 1).U(addrW.W)
|
||||||
|
|
||||||
|
assert(
|
||||||
|
req.size <= log2Ceil(WordSizeInBytes()).U,
|
||||||
|
s"trace driver currently does not support access sizes larger than word size (${WordSizeInBytes()})"
|
||||||
|
)
|
||||||
|
val wordAlignedSize = 2.U // FIXME: hardcoded
|
||||||
|
|
||||||
|
// when(req.valid && subword) {
|
||||||
|
// printf(
|
||||||
|
// "address=%x, size=%d, data=%x, addressMask=%x, wordAlignedAddress=%x, mask=%x, wordData=%x\n",
|
||||||
|
// req.address,
|
||||||
|
// req.size,
|
||||||
|
// req.data,
|
||||||
|
// ~((1 << log2Ceil(WordSizeInBytes())) - 1).U(addrW.W),
|
||||||
|
// wordAlignedAddress,
|
||||||
|
// mask,
|
||||||
|
// wordData
|
||||||
|
// )
|
||||||
|
// }
|
||||||
|
|
||||||
|
val (tlOut, edge) = node.out(0)
|
||||||
val (plegal, pbits) = edge.Put(
|
val (plegal, pbits) = edge.Put(
|
||||||
fromSource = sourceIdCounter,
|
fromSource = sourceIdCounter,
|
||||||
toAddress = hashToValidPhyAddr(req.address),
|
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
||||||
lgSize = 3.U,
|
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
||||||
data = req.data
|
// data should be aligned to beatBytes
|
||||||
|
data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U)))
|
||||||
)
|
)
|
||||||
val (glegal, gbits) = edge.Get(
|
val (glegal, gbits) = edge.Get(
|
||||||
fromSource = sourceIdCounter,
|
fromSource = sourceIdCounter,
|
||||||
toAddress = hashToValidPhyAddr(req.address),
|
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
||||||
lgSize = 3.U
|
lgSize = wordAlignedSize
|
||||||
)
|
)
|
||||||
val legal = Mux(req.is_store, plegal, glegal)
|
val legal = Mux(req.is_store, plegal, glegal)
|
||||||
val bits = Mux(req.is_store, pbits, gbits)
|
val bits = Mux(req.is_store, pbits, gbits)
|
||||||
|
|
||||||
|
when(tlOut.a.valid) {
|
||||||
|
TracePrintf(
|
||||||
|
"MemTraceDriver",
|
||||||
|
tlOut.a.bits.address,
|
||||||
|
tlOut.a.bits.size,
|
||||||
|
tlOut.a.bits.mask,
|
||||||
|
req.is_store,
|
||||||
|
tlOut.a.bits.data,
|
||||||
|
req.data
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
assert(legal, "illegal TL req gen")
|
assert(legal, "illegal TL req gen")
|
||||||
tlOut.a.valid := req.valid
|
tlOut.a.valid := req.valid
|
||||||
tlOut.a.bits := bits
|
tlOut.a.bits := bits
|
||||||
@@ -728,19 +834,25 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile : String
|
|||||||
tlOut.d.ready := true.B
|
tlOut.d.ready := true.B
|
||||||
tlOut.e.valid := false.B
|
tlOut.e.valid := false.B
|
||||||
|
|
||||||
|
println(s"======= MemTraceDriver: TL data width: ${tlOut.params.dataBits}")
|
||||||
|
|
||||||
dontTouch(tlOut.a)
|
dontTouch(tlOut.a)
|
||||||
dontTouch(tlOut.d)
|
dontTouch(tlOut.d)
|
||||||
}
|
}
|
||||||
|
|
||||||
io.finished := sim.io.trace_read.finished
|
// Give some slack time after trace EOF to the downstream system so that we
|
||||||
when(io.finished){
|
// make sure to receive all outstanding responses.
|
||||||
assert(false.B, "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)")
|
val finishCounter = RegInit(200.U(64.W))
|
||||||
|
when(sim.io.trace_read.finished) {
|
||||||
|
finishCounter := finishCounter - 1.U
|
||||||
}
|
}
|
||||||
|
io.finished := (finishCounter === 0.U)
|
||||||
// Clock Counter, for debugging purpose
|
// when(io.finished) {
|
||||||
val clkcount = RegInit(0.U(64.W))
|
// assert(
|
||||||
clkcount := clkcount + 1.U
|
// false.B,
|
||||||
dontTouch(clkcount)
|
// "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
||||||
|
// )
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
class SimMemTrace(filename: String, numLanes: Int)
|
class SimMemTrace(filename: String, numLanes: Int)
|
||||||
@@ -748,22 +860,27 @@ class SimMemTrace(filename: String, numLanes: Int)
|
|||||||
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
|
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
|
||||||
)
|
)
|
||||||
with HasBlackBoxResource {
|
with HasBlackBoxResource {
|
||||||
|
val traceLineT = new TraceLine
|
||||||
|
val addrW = traceLineT.address.getWidth
|
||||||
|
val sizeW = traceLineT.size.getWidth
|
||||||
|
val dataW = traceLineT.data.getWidth
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val clock = Input(Clock())
|
val clock = Input(Clock())
|
||||||
val reset = Input(Bool())
|
val reset = Input(Bool())
|
||||||
|
|
||||||
// These names have to match declarations in the Verilog code, eg.
|
// These names have to match declarations in the Verilog code, eg.
|
||||||
// trace_read_address.
|
// trace_read_address.
|
||||||
val trace_read = new Bundle {
|
val trace_read = new Bundle { // can't use HasTraceLine because this doesn't have source
|
||||||
val ready = Input(Bool())
|
val ready = Input(Bool())
|
||||||
val valid = Output(UInt(numLanes.W))
|
val valid = Output(UInt(numLanes.W))
|
||||||
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
||||||
// single wide 1D array.
|
// single wide 1D array.
|
||||||
// TODO: assumes 64-bit address.
|
// TODO: assumes 64-bit address.
|
||||||
val address = Output(UInt((64 * numLanes).W))
|
val address = Output(UInt((addrW * numLanes).W))
|
||||||
val is_store = Output(UInt(numLanes.W))
|
val is_store = Output(UInt(numLanes.W))
|
||||||
val store_mask = Output(UInt((8 * numLanes).W))
|
val size = Output(UInt((sizeW * numLanes).W))
|
||||||
val data = Output(UInt((64 * numLanes).W))
|
val data = Output(UInt((dataW * numLanes).W))
|
||||||
val finished = Output(Bool())
|
val finished = Output(Bool())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -773,23 +890,336 @@ class SimMemTrace(filename: String, numLanes: Int)
|
|||||||
addResource("/csrc/SimMemTrace.h")
|
addResource("/csrc/SimMemTrace.h")
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalConnectTrace(implicit p: Parameters) extends LazyModule {
|
class MemTraceLogger(
|
||||||
|
numLanes: Int,
|
||||||
|
// base filename for the generated trace files. full filename will be
|
||||||
|
// suffixed depending on `reqEnable`/`respEnable`/`loggerName`.
|
||||||
|
filename: String = "vecadd.core1.thread4.trace",
|
||||||
|
reqEnable: Boolean = true,
|
||||||
|
respEnable: Boolean = true,
|
||||||
|
// filename suffix that is unique to this logger module.
|
||||||
|
loggerName: String = ".logger"
|
||||||
|
)(implicit
|
||||||
|
p: Parameters
|
||||||
|
) extends LazyModule {
|
||||||
|
val node = TLIdentityNode()
|
||||||
|
|
||||||
|
// val beatBytes = 8 // FIXME: hardcoded
|
||||||
|
// val node = TLManagerNode(Seq.tabulate(numLanes) { _ =>
|
||||||
|
// TLSlavePortParameters.v1(
|
||||||
|
// Seq(
|
||||||
|
// TLSlaveParameters.v1(
|
||||||
|
// address = List(AddressSet(0x0000, 0xffffff)), // FIXME: hardcoded
|
||||||
|
// supportsGet = TransferSizes(1, beatBytes),
|
||||||
|
// supportsPutPartial = TransferSizes(1, beatBytes),
|
||||||
|
// supportsPutFull = TransferSizes(1, beatBytes)
|
||||||
|
// )
|
||||||
|
// ),
|
||||||
|
// beatBytes = beatBytes
|
||||||
|
// )
|
||||||
|
// })
|
||||||
|
|
||||||
|
// Copied from freechips.rocketchip.trailingZeros which only supports Scala
|
||||||
|
// integers
|
||||||
|
def trailingZeros(x: UInt): UInt = {
|
||||||
|
Mux(x === 0.U, x.widthOption.get.U, Log2(x & -x))
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy val module = new Impl
|
||||||
|
class Impl extends LazyModuleImp(this) {
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val numReqs = Output(UInt(64.W))
|
||||||
|
val numResps = Output(UInt(64.W))
|
||||||
|
val reqBytes = Output(UInt(64.W))
|
||||||
|
val respBytes = Output(UInt(64.W))
|
||||||
|
})
|
||||||
|
|
||||||
|
val numReqs = RegInit(0.U(64.W))
|
||||||
|
val numResps = RegInit(0.U(64.W))
|
||||||
|
val reqBytes = RegInit(0.U(64.W))
|
||||||
|
val respBytes = RegInit(0.U(64.W))
|
||||||
|
io.numReqs := numReqs
|
||||||
|
io.numResps := numResps
|
||||||
|
io.reqBytes := reqBytes
|
||||||
|
io.respBytes := respBytes
|
||||||
|
|
||||||
|
val simReq =
|
||||||
|
if (reqEnable)
|
||||||
|
Some(Module(new SimMemTraceLogger(false, s"${filename}.${loggerName}.req", numLanes)))
|
||||||
|
else None
|
||||||
|
val simResp =
|
||||||
|
if (respEnable)
|
||||||
|
Some(Module(new SimMemTraceLogger(true, s"${filename}.${loggerName}.resp", numLanes)))
|
||||||
|
else None
|
||||||
|
if (simReq.isDefined) {
|
||||||
|
simReq.get.io.clock := clock
|
||||||
|
simReq.get.io.reset := reset.asBool
|
||||||
|
}
|
||||||
|
if (simResp.isDefined) {
|
||||||
|
simResp.get.io.clock := clock
|
||||||
|
simResp.get.io.reset := reset.asBool
|
||||||
|
}
|
||||||
|
|
||||||
|
val laneReqs = Wire(Vec(numLanes, new TraceLine))
|
||||||
|
val laneResps = Wire(Vec(numLanes, new TraceLine))
|
||||||
|
|
||||||
|
assert(
|
||||||
|
numLanes == node.in.length,
|
||||||
|
"`numLanes` does not match the number of TL edges connected to the MemTraceLogger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// snoop on the TileLink edges to log traffic
|
||||||
|
((node.in zip node.out) zip (laneReqs zip laneResps)).foreach {
|
||||||
|
case (((tlIn, _), (tlOut, _)), (req, resp)) =>
|
||||||
|
tlOut.a <> tlIn.a
|
||||||
|
tlIn.d <> tlOut.d
|
||||||
|
|
||||||
|
// requests on TL A channel
|
||||||
|
//
|
||||||
|
req.valid := tlIn.a.valid
|
||||||
|
req.size := tlIn.a.bits.size
|
||||||
|
req.is_store := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
|
||||||
|
req.source := tlIn.a.bits.source
|
||||||
|
// TL always carries the exact unaligned address that the client
|
||||||
|
// originally requested, so no postprocessing required
|
||||||
|
req.address := tlIn.a.bits.address
|
||||||
|
|
||||||
|
// TL data
|
||||||
|
//
|
||||||
|
// When tlIn.a.bits.size is smaller than the data bus width, need to
|
||||||
|
// figure out which byte lanes we actually accessed so that
|
||||||
|
// we can write that to the memory trace.
|
||||||
|
// See Section 4.5 Byte Lanes in spec 1.8.1
|
||||||
|
|
||||||
|
// This assert only holds true for PutFullData and not PutPartialData,
|
||||||
|
// where HIGH bits in the mask may not be contiguous.
|
||||||
|
assert(
|
||||||
|
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
|
||||||
|
"mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic"
|
||||||
|
)
|
||||||
|
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
|
||||||
|
val mask = ~((~0.U) << (trailingZerosInMask * 8.U))
|
||||||
|
req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
|
||||||
|
|
||||||
|
when(req.valid) {
|
||||||
|
TracePrintf(
|
||||||
|
"MemTraceLogger",
|
||||||
|
tlIn.a.bits.address,
|
||||||
|
tlIn.a.bits.size,
|
||||||
|
tlIn.a.bits.mask,
|
||||||
|
req.is_store,
|
||||||
|
tlIn.a.bits.data,
|
||||||
|
req.data
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// responses on TL D channel
|
||||||
|
//
|
||||||
|
resp.valid := tlOut.d.valid
|
||||||
|
resp.size := tlOut.d.bits.size
|
||||||
|
resp.is_store := TLUtils.DOpcodeIsStore(tlOut.d.bits.opcode)
|
||||||
|
resp.source := tlOut.d.bits.source
|
||||||
|
// NOTE: TL D channel doesn't carry address nor mask, so there's no easy
|
||||||
|
// way to figure out which bytes the master actually use. Since we
|
||||||
|
// don't care too much about addresses in the trace anyway, just store
|
||||||
|
// the entire bits.
|
||||||
|
resp.address := 0.U
|
||||||
|
resp.data := tlOut.d.bits.data
|
||||||
|
}
|
||||||
|
|
||||||
|
// stats
|
||||||
|
val numReqsThisCycle =
|
||||||
|
laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 }
|
||||||
|
val numRespsThisCycle =
|
||||||
|
laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 }
|
||||||
|
val reqBytesThisCycle =
|
||||||
|
laneReqs.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) =>
|
||||||
|
b0 + b1
|
||||||
|
}
|
||||||
|
val respBytesThisCycle =
|
||||||
|
laneResps.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) =>
|
||||||
|
b0 + b1
|
||||||
|
}
|
||||||
|
numReqs := numReqs + numReqsThisCycle
|
||||||
|
numResps := numResps + numRespsThisCycle
|
||||||
|
reqBytes := reqBytes + reqBytesThisCycle
|
||||||
|
respBytes := respBytes + respBytesThisCycle
|
||||||
|
|
||||||
|
// Flatten per-lane signals to the Verilog blackbox input.
|
||||||
|
//
|
||||||
|
// This is a clunky workaround of the fact that Chisel doesn't allow partial
|
||||||
|
// assignment to a bitfield range of a wide signal.
|
||||||
|
def flattenTrace(traceLogIO: Bundle with HasTraceLine, perLane: Vec[TraceLine]) = {
|
||||||
|
// these will get optimized out
|
||||||
|
val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid)))
|
||||||
|
val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source)))
|
||||||
|
val vecAddress = Wire(Vec(numLanes, chiselTypeOf(perLane(0).address)))
|
||||||
|
val vecIsStore = Wire(Vec(numLanes, chiselTypeOf(perLane(0).is_store)))
|
||||||
|
val vecSize = Wire(Vec(numLanes, chiselTypeOf(perLane(0).size)))
|
||||||
|
val vecData = Wire(Vec(numLanes, chiselTypeOf(perLane(0).data)))
|
||||||
|
perLane.zipWithIndex.foreach { case (l, i) =>
|
||||||
|
vecValid(i) := l.valid
|
||||||
|
vecSource(i) := l.source
|
||||||
|
vecAddress(i) := l.address
|
||||||
|
vecIsStore(i) := l.is_store
|
||||||
|
vecSize(i) := l.size
|
||||||
|
vecData(i) := l.data
|
||||||
|
}
|
||||||
|
traceLogIO.valid := vecValid.asUInt
|
||||||
|
traceLogIO.source := vecSource.asUInt
|
||||||
|
traceLogIO.address := vecAddress.asUInt
|
||||||
|
traceLogIO.is_store := vecIsStore.asUInt
|
||||||
|
traceLogIO.size := vecSize.asUInt
|
||||||
|
traceLogIO.data := vecData.asUInt
|
||||||
|
}
|
||||||
|
|
||||||
|
if (simReq.isDefined) {
|
||||||
|
flattenTrace(simReq.get.io.trace_log, laneReqs)
|
||||||
|
assert(
|
||||||
|
simReq.get.io.trace_log.ready === true.B,
|
||||||
|
"MemTraceLogger is expected to be always ready"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if (simResp.isDefined) {
|
||||||
|
flattenTrace(simResp.get.io.trace_log, laneResps)
|
||||||
|
assert(
|
||||||
|
simResp.get.io.trace_log.ready === true.B,
|
||||||
|
"MemTraceLogger is expected to be always ready"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemTraceLogger is bidirectional, and `isResponse` is how the DPI module tells
|
||||||
|
// itself whether it's logging the request stream or the response stream. This
|
||||||
|
// is necessary because we have to generate slightly different trace format
|
||||||
|
// depending on this, e.g. response trace will not contain an address column.
|
||||||
|
class SimMemTraceLogger(isResponse: Boolean, filename: String, numLanes: Int)
|
||||||
|
extends BlackBox(
|
||||||
|
Map(
|
||||||
|
"IS_RESPONSE" -> (if (isResponse) 1 else 0),
|
||||||
|
"FILENAME" -> filename,
|
||||||
|
"NUM_LANES" -> numLanes
|
||||||
|
)
|
||||||
|
)
|
||||||
|
with HasBlackBoxResource {
|
||||||
|
val traceLineT = new TraceLine
|
||||||
|
val sourceW = traceLineT.source.getWidth
|
||||||
|
val addrW = traceLineT.address.getWidth
|
||||||
|
val sizeW = traceLineT.size.getWidth
|
||||||
|
val dataW = traceLineT.data.getWidth
|
||||||
|
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val clock = Input(Clock())
|
||||||
|
val reset = Input(Bool())
|
||||||
|
|
||||||
|
val trace_log = new Bundle with HasTraceLine {
|
||||||
|
val valid = Input(UInt(numLanes.W))
|
||||||
|
val source = Input(UInt((sourceW * numLanes).W))
|
||||||
|
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
||||||
|
// single wide 1D array.
|
||||||
|
// TODO: assumes 64-bit address.
|
||||||
|
val address = Input(UInt((addrW * numLanes).W))
|
||||||
|
val is_store = Input(UInt(numLanes.W))
|
||||||
|
val size = Input(UInt((sizeW * numLanes).W))
|
||||||
|
val data = Input(UInt((dataW * numLanes).W))
|
||||||
|
val ready = Output(Bool())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
addResource("/vsrc/SimMemTraceLogger.v")
|
||||||
|
addResource("/csrc/SimMemTraceLogger.cc")
|
||||||
|
addResource("/csrc/SimMemTrace.h")
|
||||||
|
}
|
||||||
|
|
||||||
|
class TracePrintf {}
|
||||||
|
|
||||||
|
object TracePrintf {
|
||||||
|
def apply(
|
||||||
|
printer: String,
|
||||||
|
address: UInt,
|
||||||
|
size: UInt,
|
||||||
|
mask: UInt,
|
||||||
|
is_store: Bool,
|
||||||
|
tlData: UInt,
|
||||||
|
reqData: UInt
|
||||||
|
) = {
|
||||||
|
printf(s"${printer}: TL addr=%x, size=%d, mask=%x, store=%d", address, size, mask, is_store)
|
||||||
|
when(is_store) {
|
||||||
|
printf(", tlData=%x, reqData=%x", tlData, reqData)
|
||||||
|
}
|
||||||
|
printf("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synthesizable unit tests
|
||||||
|
|
||||||
|
// tracedriver --> coalescer --> tracelogger --> tlram
|
||||||
|
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
|
||||||
|
// TODO: use parameters for numLanes
|
||||||
|
val numLanes = 4
|
||||||
|
val driver = LazyModule(new MemTraceDriver(numLanes))
|
||||||
|
val coreSideLogger = LazyModule(
|
||||||
|
new MemTraceLogger(numLanes, loggerName = "coreside")
|
||||||
|
)
|
||||||
|
val coal = LazyModule(new CoalescingUnit(numLanes))
|
||||||
|
val memSideLogger = LazyModule(new MemTraceLogger(numLanes + 1, loggerName = "memside"))
|
||||||
|
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
||||||
|
LazyModule(
|
||||||
|
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
|
||||||
|
// edges globally, by way of Diplomacy communicating the TL slave
|
||||||
|
// parameters to the upstream nodes.
|
||||||
|
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
memSideLogger.node :=* coal.node :=* coreSideLogger.node :=* driver.node
|
||||||
|
rams.foreach { r => r.node := memSideLogger.node }
|
||||||
|
|
||||||
|
lazy val module = new Impl
|
||||||
|
class Impl extends LazyModuleImp(this) with UnitTestModule {
|
||||||
|
driver.module.io.start := io.start
|
||||||
|
io.finished := driver.module.io.finished
|
||||||
|
|
||||||
|
when(io.finished) {
|
||||||
|
printf(
|
||||||
|
"numReqs=%d, numResps=%d, reqBytes=%d, respBytes=%d\n",
|
||||||
|
coreSideLogger.module.io.numReqs,
|
||||||
|
coreSideLogger.module.io.numResps,
|
||||||
|
coreSideLogger.module.io.reqBytes,
|
||||||
|
coreSideLogger.module.io.respBytes
|
||||||
|
)
|
||||||
|
assert(
|
||||||
|
coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps,
|
||||||
|
"FAIL: number of requests and responses to the coalescer do not match"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters)
|
||||||
|
extends UnitTest(timeout) {
|
||||||
|
val dut = Module(LazyModule(new TLRAMCoalescerLogger).module)
|
||||||
|
dut.io.start := io.start
|
||||||
|
io.finished := dut.io.finished
|
||||||
|
}
|
||||||
|
|
||||||
|
// tracedriver --> coalescer --> tlram
|
||||||
|
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
|
||||||
// TODO: use parameters for numLanes
|
// TODO: use parameters for numLanes
|
||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
val coal = LazyModule(new CoalescingUnit(numLanes))
|
val coal = LazyModule(new CoalescingUnit(numLanes))
|
||||||
val driver = LazyModule(new MemTraceDriver(numLanes))
|
val driver = LazyModule(new MemTraceDriver(numLanes))
|
||||||
|
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
||||||
coal.node :=* driver.node
|
|
||||||
|
|
||||||
// Use TLTestRAM as bogus downstream TL manager nodes
|
|
||||||
// TODO: swap this out with a memtrace logger
|
|
||||||
val rams = Seq.tabulate(numLanes + 1) { _ =>
|
|
||||||
LazyModule(
|
LazyModule(
|
||||||
// TODO: properly propagate beatBytes?
|
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
|
||||||
|
// edges globally, by way of Diplomacy communicating the TL slave
|
||||||
|
// parameters to the upstream nodes.
|
||||||
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
|
new TLRAM(address = AddressSet(0x0000, 0xffffff), beatBytes = 8)
|
||||||
)
|
)
|
||||||
}
|
)
|
||||||
// Connect all (N+1) outputs of coal to separate TestRAM modules
|
|
||||||
|
coal.node :=* driver.node
|
||||||
rams.foreach { r => r.node := coal.node }
|
rams.foreach { r => r.node := coal.node }
|
||||||
|
|
||||||
lazy val module = new Impl
|
lazy val module = new Impl
|
||||||
@@ -799,8 +1229,8 @@ class CoalConnectTrace(implicit p: Parameters) extends LazyModule {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalescingUnitTest(timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) {
|
class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) {
|
||||||
val dut = Module(LazyModule(new CoalConnectTrace).module)
|
val dut = Module(LazyModule(new TLRAMCoalescer).module)
|
||||||
dut.io.start := io.start
|
dut.io.start := io.start
|
||||||
io.finished := dut.io.finished
|
io.finished := dut.io.finished
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
c.io.invalidate.poke(0.U)
|
c.io.invalidate.poke(0.U)
|
||||||
|
|
||||||
// prepare
|
// prepare
|
||||||
c.io.deq.ready.poke(false.B)
|
c.io.deq.ready.poke(true.B)
|
||||||
c.io.enq.ready.expect(true.B)
|
c.io.enq.ready.expect(true.B)
|
||||||
c.io.enq.valid.poke(true.B)
|
c.io.enq.valid.poke(true.B)
|
||||||
c.io.enq.bits.poke(0x12.U)
|
c.io.enq.bits.poke(0x12.U)
|
||||||
@@ -113,6 +113,45 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
|
||||||
|
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||||
|
c.io.invalidate.poke(0.U)
|
||||||
|
|
||||||
|
// prepare
|
||||||
|
c.io.deq.ready.poke(true.B)
|
||||||
|
c.io.enq.ready.expect(true.B)
|
||||||
|
c.io.enq.valid.poke(true.B)
|
||||||
|
c.io.enq.bits.poke(0x12.U)
|
||||||
|
c.clock.step()
|
||||||
|
// enqueue and dequeue simultaneously
|
||||||
|
c.io.deq.ready.poke(true.B)
|
||||||
|
c.io.enq.ready.expect(true.B)
|
||||||
|
c.io.enq.valid.poke(true.B)
|
||||||
|
c.io.enq.bits.poke(0x34.U)
|
||||||
|
c.io.deq.valid.expect(true.B)
|
||||||
|
c.io.deq.bits.expect(0x12.U)
|
||||||
|
c.clock.step()
|
||||||
|
// enqueue and dequeue simultaneously once more
|
||||||
|
c.io.deq.ready.poke(true.B)
|
||||||
|
c.io.enq.ready.expect(true.B)
|
||||||
|
c.io.enq.valid.poke(true.B)
|
||||||
|
c.io.enq.bits.poke(0x56.U)
|
||||||
|
c.io.deq.valid.expect(true.B)
|
||||||
|
c.io.deq.bits.expect(0x34.U)
|
||||||
|
c.clock.step()
|
||||||
|
// dequeueing back-to-back should work without any holes in the middle
|
||||||
|
c.io.deq.ready.poke(true.B)
|
||||||
|
c.io.enq.valid.poke(false.B)
|
||||||
|
c.io.deq.valid.expect(true.B)
|
||||||
|
c.io.deq.bits.expect(0x56.U)
|
||||||
|
c.clock.step()
|
||||||
|
// make sure is empty
|
||||||
|
c.io.deq.ready.poke(true.B)
|
||||||
|
c.io.enq.valid.poke(false.B)
|
||||||
|
c.io.deq.valid.expect(false.B)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
it should "invalidate head being dequeued" in {
|
it should "invalidate head being dequeued" in {
|
||||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||||
c.io.invalidate.poke(0.U)
|
c.io.invalidate.poke(0.U)
|
||||||
@@ -216,6 +255,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
val numPerLaneReqs = 2
|
val numPerLaneReqs = 2
|
||||||
val sourceWidth = 2
|
val sourceWidth = 2
|
||||||
|
val sizeWidth = 2
|
||||||
// 16B coalescing size
|
// 16B coalescing size
|
||||||
val coalDataWidth = 128
|
val coalDataWidth = 128
|
||||||
val numInflightCoalRequests = 4
|
val numInflightCoalRequests = 4
|
||||||
@@ -226,8 +266,9 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
numLanes,
|
numLanes,
|
||||||
numPerLaneReqs,
|
numPerLaneReqs,
|
||||||
sourceWidth,
|
sourceWidth,
|
||||||
|
sizeWidth,
|
||||||
coalDataWidth,
|
coalDataWidth,
|
||||||
numInflightCoalRequests
|
numInflightCoalRequests,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
// vcs helps with simulation time, but sometimes errors with
|
// vcs helps with simulation time, but sometimes errors with
|
||||||
@@ -238,15 +279,19 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
c.io.coalReqValid.poke(true.B)
|
c.io.coalReqValid.poke(true.B)
|
||||||
c.io.newEntry.source.poke(sourceId)
|
c.io.newEntry.source.poke(sourceId)
|
||||||
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
|
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).source.poke(1.U)
|
||||||
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
|
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
|
||||||
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U)
|
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U)
|
||||||
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
|
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
|
||||||
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U)
|
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U)
|
||||||
c.io.newEntry.lanes(0).reqs(1).size.poke(2.U)
|
c.io.newEntry.lanes(0).reqs(1).size.poke(2.U)
|
||||||
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).source.poke(1.U)
|
||||||
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
|
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
|
||||||
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U)
|
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U)
|
||||||
c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B)
|
c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(2).reqs(1).source.poke(2.U)
|
||||||
c.io.newEntry.lanes(2).reqs(1).offset.poke(0.U)
|
c.io.newEntry.lanes(2).reqs(1).offset.poke(0.U)
|
||||||
c.io.newEntry.lanes(2).reqs(1).size.poke(2.U)
|
c.io.newEntry.lanes(2).reqs(1).size.poke(2.U)
|
||||||
|
|
||||||
@@ -268,13 +313,13 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
c.io.uncoalResps(3)(0).valid.expect(false.B)
|
c.io.uncoalResps(3)(0).valid.expect(false.B)
|
||||||
|
|
||||||
c.io.uncoalResps(0)(0).bits.data.expect(0x89abcdefL.U)
|
c.io.uncoalResps(0)(0).bits.data.expect(0x89abcdefL.U)
|
||||||
c.io.uncoalResps(0)(0).bits.source.expect(0.U)
|
c.io.uncoalResps(0)(0).bits.source.expect(1.U)
|
||||||
c.io.uncoalResps(0)(1).bits.data.expect(0x89abcdefL.U)
|
c.io.uncoalResps(0)(1).bits.data.expect(0x89abcdefL.U)
|
||||||
c.io.uncoalResps(0)(1).bits.source.expect(0.U)
|
c.io.uncoalResps(0)(1).bits.source.expect(2.U)
|
||||||
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
|
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
c.io.uncoalResps(2)(0).bits.source.expect(0.U)
|
c.io.uncoalResps(2)(0).bits.source.expect(1.U)
|
||||||
c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U)
|
c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U)
|
||||||
c.io.uncoalResps(2)(1).bits.source.expect(0.U)
|
c.io.uncoalResps(2)(1).bits.source.expect(2.U)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user