Merge branch 'graphics' of https://github.com/hansungk/rocket-chip into graphics

This commit is contained in:
Richard Yan
2023-05-07 02:34:11 -07:00
4 changed files with 97 additions and 65 deletions

View File

@@ -13,7 +13,8 @@
// Global singleton instance
static std::unique_ptr<MemTraceReader> reader;
MemTraceReader::MemTraceReader(const std::string &filename) {
MemTraceReader::MemTraceReader(const std::string &filename)
: filename(filename) {
char cwd[4096];
if (getcwd(cwd, sizeof(cwd))) {
printf("MemTraceReader: current working dir: %s\n", cwd);
@@ -30,32 +31,64 @@ MemTraceReader::~MemTraceReader() {
printf("MemTraceReader destroyed\n");
}
// Parse trace file in its entirety and store it into internal structure.
void MemTraceReader::error(long fileline, const std::string &msg) {
fprintf(stderr, "parse error at %s:%ld: %s\n", filename.c_str(), fileline,
msg.c_str());
exit(EXIT_FAILURE);
}
// Parse trace file in its entirety and store it into an internal structure.
// If `has_source` is true, assumes the trace has an additional column after
// core and lane_id for source id and tries to parse that.
// TODO: might block for a long time when the trace gets big, check if need to
// be broken down
void MemTraceReader::parse() {
void MemTraceReader::parse(const bool has_source) {
MemTraceLine line;
printf("MemTraceReader: started parsing\n");
long size = 0;
std::string loadstore; // FIXME: likely slow
while (infile >> line.cycle >> loadstore >> line.core_id >>
line.lane_id >> std::hex >> line.address >> line.data >> std::dec >>
size) {
long source = 0;
std::string loadstore; // slow?
for (long fileline = 1;; fileline++) {
if (infile.peek() == '\n') {
infile.get();
continue;
}
if (infile.eof()) {
break;
}
if (!(infile >> line.cycle >> loadstore >> line.core_id >> line.lane_id)) {
printf("char=[%c]\n", infile.peek());
// assert(!infile.eof());
error(fileline, "failed parsing cycle..lane_id");
}
if (has_source && !(infile >> source)) {
error(fileline, "failed parsing source");
}
if (!(infile >> std::hex >> line.address >> line.data >> std::dec >>
size)) {
error(fileline, "failed parsing address..size");
}
if (infile.get() != '\n') {
error(fileline, "trailing characters at the end of the line");
}
line.valid = true;
line.is_store = (loadstore == "STORE");
assert(size > 0 && "invalid size in trace");
if (size <= 0) {
error(fileline, "invalid size in trace");
}
int lgsize = static_cast<int>(log2(size));
assert((size & ~(~0lu << lgsize)) == 0 &&
"non-power-of-2 size detected in trace");
if ((size & ~(~0lu << lgsize)) != 0) {
error(fileline, "non-power-of-2 size detected in trace");
}
line.log_data_size = lgsize;
trace.push_back(line);
trace_buf.push_back(line);
}
read_pos = trace.cbegin();
read_pos = trace_buf.cbegin();
printf("MemTraceReader: finished parsing\n");
}
@@ -63,8 +96,7 @@ void MemTraceReader::parse() {
// Try to read a memory request that might have happened at a given cycle, on a
// given SIMD lane (= "thread"). In case no request happened at that point,
// return an empty line with .valid = false.
MemTraceLine MemTraceReader::read_trace_at(const long cycle,
const int lane_id,
MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id,
unsigned char trace_read_ready) {
MemTraceLine line;
line.valid = false;
@@ -79,43 +111,41 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle,
// It should always be guaranteed that we consumed all of the past lines, and
// the next line is in the future.
if (line.cycle < cycle) {
// fprintf(stderr, "line.cycle=%ld, cycle=%ld\n", line.cycle, cycle);
printf("cycle=%ld, some lines are left in past Fatal", cycle);
assert(false && "some trace lines are left unread in the past");
long fileline = read_pos - std::cbegin(trace_buf) + 1;
error(fileline, "some trace lines are left unread in the past");
return MemTraceLine{};
}
if (line.lane_id != lane_id) {
line.valid = false;
}
if (line.cycle > cycle) {
// We haven't reached the cycle mark specified in this line yet, so we don't
// read it right now.
return MemTraceLine{};
} else if (line.lane_id != lane_id) {
return MemTraceLine{};
} else if (line.cycle == cycle && line.lane_id == lane_id) {
if (trace_read_ready){
if (trace_read_ready) {
printf("Fire! cycle=%ld, valid=%d, %s addr=%lx, size=%d \n", cycle,
line.valid, (line.is_store ? "STORE" : "LOAD"), line.address,
line.log_data_size);
line.valid, (line.is_store ? "STORE" : "LOAD"), line.address,
line.log_data_size);
// FIXME! Currently lane_id is assumed to be in round-robin order, e.g.
// 0->1->2->3->0->..., both in the trace file and the order the caller calls
// this function. If this is not true, we cannot simply monotonically
// increment read_pos.
// Only advance pointer when cycle and threa_id both match
// now increaseing sequence is fine (0, 1, 3), but unordered is not fine (0, 3, 1)
// NOTE: Currently lane_id is assumed to be in always-increasing order,
// e.g. 0->1->2->3->0->..., both in the trace file and the order the
// caller calls this function. If this is not true, we cannot simply
// monotonically increment read_pos. lane_id need not be contiguous, e.g.
// 0->1->3 is fine.
++read_pos;
}
else { // we do not want to advance read_pos
return line;
} else {
// For debugging purposes, instead of early-returning on
// !trace_read_ready, print something to notify we are blocking a valid
// trace line.
printf("All Lanes Blocked on this cycle! cycle=%ld \n", cycle);
return MemTraceLine{};
}
return line;
}
}
assert(!"unreachable");
}
extern "C" void memtrace_init(const char *filename) {
#ifndef NO_VPI
@@ -124,9 +154,9 @@ extern "C" void memtrace_init(const char *filename) {
fprintf(stderr, "fatal: failed to get plusargs from VCS\n");
exit(1);
}
const char* TRACEFILENAME_PLUSARG = "+memtracefile=";
const char *TRACEFILENAME_PLUSARG = "+memtracefile=";
for (int i = 0; i < info.argc; i++) {
char* input_arg = info.argv[i];
char *input_arg = info.argv[i];
if (strncmp(input_arg, TRACEFILENAME_PLUSARG,
strlen(TRACEFILENAME_PLUSARG)) == 0) {
filename = input_arg + strlen(TRACEFILENAME_PLUSARG);
@@ -139,7 +169,8 @@ extern "C" void memtrace_init(const char *filename) {
reader = std::make_unique<MemTraceReader>(filename);
// parse file upfront
reader->parse();
// driver trace file is assumed to not have source id
reader->parse(false);
}
// TODO: accept core_id as well
@@ -155,13 +186,6 @@ extern "C" void memtrace_query(unsigned char trace_read_ready,
// printf("memtrace_query(cycle=%ld, tid=%d)\n", trace_read_cycle,
// trace_read_lane_id);
/* we can't return immediately, even if trace is ready, we still want to find out
if we are suppose to generate valid req on this clock cycle
if (!trace_read_ready) {
return;
}
*/
auto line = reader->read_trace_at(trace_read_cycle, trace_read_lane_id, trace_read_ready);
*trace_read_valid = line.valid;
*trace_read_address = line.address;

View File

@@ -18,12 +18,18 @@ class MemTraceReader {
public:
MemTraceReader(const std::string &filename);
~MemTraceReader();
void parse();
void parse(const bool has_source);
MemTraceLine read_trace_at(const long cycle, const int lane_id, unsigned char trace_read_ready);
bool finished() const { return read_pos == trace.cend(); }
bool finished() const { return read_pos == trace_buf.cend(); }
MemTraceLine peek() const { return *read_pos; }
void next() { read_pos++; }
void error(long fileline, const std::string &msg);
const std::string filename;
private:
std::ifstream infile;
std::vector<MemTraceLine> trace;
std::vector<MemTraceLine> trace_buf;
std::vector<MemTraceLine>::const_iterator read_pos;
};

View File

@@ -102,7 +102,8 @@ extern "C" void memtracelogger_log(int handle,
.data = trace_log_data,
.log_data_size = trace_log_size};
assert(0 <= handle && handle < loggers.size() && "wrong trace logger handle");
assert(0 <= handle && static_cast<size_t>(handle) < loggers.size() &&
"wrong trace logger handle");
auto logger = loggers[handle].get();
logger->write_line_to_trace(line);
}

View File

@@ -53,9 +53,10 @@ case class CoalescerConfig(
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
// each size is log(byteSize)
sizeEnum: InFlightTableSizeEnum,
numCoalReq: Int, // the total number of coalesced request
arbiterOutputs: Int, //total number RW ports from the
bankStrideInBytes: Int //cache line strides across the different banks
numCoalReqs: Int, // total number of coalesced requests we can generate in one cycle
numArbiterOutputPorts: Int, // total of output ports the arbiter will arbitrate into.
// this has to match downstream cache's configuration
bankStrideInBytes: Int // cache line strides across the different banks
) {
// maximum coalesced size
def maxCoalLogSize: Int = coalLogSizes.max
@@ -77,8 +78,8 @@ object defaultConfig extends CoalescerConfig(
respQueueDepth = 4,
coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum,
numCoalReq = 1,
arbiterOutputs = 4,
numCoalReqs = 1,
numArbiterOutputPorts = 4,
bankStrideInBytes = 64 // Current L2 is strided by 512 bits
)
@@ -1729,10 +1730,10 @@ class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends
class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends LazyModule {
// Let SIMT's word size be 32, and read/write granularity be 256
val fullSourceIdRange = config.numOldSrcIds * config.numLanes + config.numNewSrcIds * config.numCoalReq
val fullSourceIdRange = config.numOldSrcIds * config.numLanes + config.numNewSrcIds * config.numCoalReqs
// K client nodes of edge size 32 for non-coalesced reqs
val nonCoalNarrowNodes = Seq.tabulate(config.arbiterOutputs){ i =>
val nonCoalNarrowNodes = Seq.tabulate(config.numArbiterOutputPorts){ i =>
val nonCoalNarrowParam = Seq(
TLMasterParameters.v1(
name = "NonCoalNarrowNode" + i.toString,
@@ -1750,7 +1751,7 @@ class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends Lazy
)
// K client nodes of edge size 256 for the coalesced reqs
val coalReqNodes = Seq.tabulate(config.arbiterOutputs){ i =>
val coalReqNodes = Seq.tabulate(config.numArbiterOutputPorts){ i =>
val coalParam = Seq(
TLMasterParameters.v1(
name = "CoalReqNode" + i.toString,
@@ -1806,10 +1807,10 @@ class CoalArbiterImpl(outer: CoalArbiter,
val io = IO(new Bundle {
val nonCoalVec = Vec(config.numLanes, Flipped(Decoupled(nonCoalEntryT.cloneType)))
val coalVec = Vec(config.numCoalReq, Flipped(Decoupled(coalEntryT.cloneType)))
val respNonCoalVec = Vec(config.numLanes, Decoupled(respNonCoalEntryT.cloneType))
val respCoalBundle = Decoupled(respCoalBundleT.cloneType)
val nonCoalReqs = Vec(config.numLanes, Flipped(Decoupled(nonCoalEntryT)))
val coalReqs = Vec(config.numCoalReqs, Flipped(Decoupled(coalEntryT)))
val nonCoalResps = Vec(config.numLanes, Decoupled(respNonCoalEntryT))
val coalResp = Decoupled(respCoalBundleT)
}
)