simulation framework refactoring
This commit is contained in:
9
sim/Makefile
Normal file
9
sim/Makefile
Normal file
@@ -0,0 +1,9 @@
|
||||
all:
|
||||
$(MAKE) -C simX
|
||||
$(MAKE) -C rtlsim
|
||||
$(MAKE) -C vlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C simX clean
|
||||
$(MAKE) -C rtlsim clean
|
||||
$(MAKE) -C vlsim clean
|
||||
305
sim/common/mem.cpp
Normal file
305
sim/common/mem.cpp
Normal file
@@ -0,0 +1,305 @@
|
||||
#include "mem.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <assert.h>
|
||||
#include "util.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize)
|
||||
: wordSize_(wordSize) {
|
||||
std::ifstream input(filename);
|
||||
|
||||
if (!input) {
|
||||
std::cout << "Error reading file \"" << filename << "\" into RamMemDevice.\n";
|
||||
std::abort();
|
||||
}
|
||||
|
||||
do {
|
||||
contents_.push_back(input.get());
|
||||
} while (input);
|
||||
|
||||
while (contents_.size() & (wordSize-1))
|
||||
contents_.push_back(0x00);
|
||||
}
|
||||
|
||||
RamMemDevice::RamMemDevice(uint64_t size, uint32_t wordSize)
|
||||
: contents_(size)
|
||||
, wordSize_(wordSize)
|
||||
{}
|
||||
|
||||
void RamMemDevice::read(void *data, uint64_t addr, uint64_t size) {
|
||||
auto addr_end = addr + size;
|
||||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
|| (addr_end <= contents_.size())) {
|
||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
|
||||
const uint8_t *s = contents_.data() + addr;
|
||||
for (uint8_t *d = (uint8_t*)data, *de = d + size; d != de;) {
|
||||
*d++ = *s++;
|
||||
}
|
||||
}
|
||||
|
||||
void RamMemDevice::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
auto addr_end = addr + size;
|
||||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
|| (addr_end <= contents_.size())) {
|
||||
std::cout << "lookup of 0x" << std::hex << (addr_end-1) << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
|
||||
const uint8_t *s = (const uint8_t*)data;
|
||||
for (uint8_t *d = contents_.data() + addr, *de = d + size; d != de;) {
|
||||
*d++ = *s++;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void RomMemDevice::write(const void* /*data*/, uint64_t /*addr*/, uint64_t /*size*/) {
|
||||
std::cout << "attempt to write to ROM.\n";
|
||||
std::abort();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool MemoryUnit::ADecoder::lookup(uint64_t a, uint32_t wordSize, mem_accessor_t* ma) {
|
||||
uint64_t e = a + (wordSize - 1);
|
||||
assert(e >= a);
|
||||
for (auto iter = entries_.rbegin(), iterE = entries_.rend(); iter != iterE; ++iter) {
|
||||
if (a >= iter->start && e <= iter->end) {
|
||||
ma->md = iter->md;
|
||||
ma->addr = a - iter->start;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::map(uint64_t a, uint64_t e, MemDevice &m) {
|
||||
assert(e >= a);
|
||||
entry_t entry{&m, a, e};
|
||||
entries_.emplace_back(entry);
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::read(void *data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
ma.md->read(data, ma.addr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
throw BadAddress();
|
||||
}
|
||||
ma.md->write(data, ma.addr, size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemoryUnit::MemoryUnit(uint64_t pageSize, uint64_t addrBytes, bool disableVm)
|
||||
: pageSize_(pageSize)
|
||||
, addrBytes_(addrBytes)
|
||||
, disableVM_(disableVm) {
|
||||
if (!disableVm) {
|
||||
tlb_[0] = TLBEntry(0, 077);
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) {
|
||||
decoder_.map(start, end, m);
|
||||
}
|
||||
|
||||
MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) {
|
||||
auto iter = tlb_.find(vAddr / pageSize_);
|
||||
if (iter != tlb_.end()) {
|
||||
if (iter->second.flags & flagMask)
|
||||
return iter->second;
|
||||
else {
|
||||
throw PageFault(vAddr, false);
|
||||
}
|
||||
} else {
|
||||
throw PageFault(vAddr, true);
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryUnit::read(void *data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr;
|
||||
if (disableVM_) {
|
||||
pAddr = addr;
|
||||
} else {
|
||||
uint32_t flagMask = sup ? 8 : 1;
|
||||
TLBEntry t = this->tlbLookup(addr, flagMask);
|
||||
pAddr = t.pfn * pageSize_ + addr % pageSize_;
|
||||
}
|
||||
return decoder_.read(data, pAddr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::write(const void *data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr;
|
||||
if (disableVM_) {
|
||||
pAddr = addr;
|
||||
} else {
|
||||
uint32_t flagMask = sup ? 16 : 2;
|
||||
TLBEntry t = tlbLookup(addr, flagMask);
|
||||
pAddr = t.pfn * pageSize_ + addr % pageSize_;
|
||||
}
|
||||
decoder_.write(data, pAddr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) {
|
||||
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags);
|
||||
}
|
||||
|
||||
void MemoryUnit::tlbRm(uint64_t va) {
|
||||
if (tlb_.find(va / pageSize_) != tlb_.end())
|
||||
tlb_.erase(tlb_.find(va / pageSize_));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
RAM::RAM(uint32_t num_pages, uint32_t page_size)
|
||||
: page_bits_(log2ceil(page_size)) {
|
||||
assert(ispow2(page_size));
|
||||
mem_.resize(num_pages, NULL);
|
||||
size_ = uint64_t(mem_.size()) << page_bits_;
|
||||
}
|
||||
|
||||
RAM::~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void RAM::clear() {
|
||||
for (auto& page : mem_) {
|
||||
delete[] page;
|
||||
page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RAM::size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
uint8_t *RAM::get(uint32_t address) const {
|
||||
uint32_t page_size = 1 << page_bits_;
|
||||
uint32_t page_index = address >> page_bits_;
|
||||
uint32_t byte_offset = address & ((1 << page_bits_) - 1);
|
||||
|
||||
auto &page = mem_.at(page_index);
|
||||
if (page == NULL) {
|
||||
uint8_t *ptr = new uint8_t[page_size];
|
||||
// set uninitialized data to "baadf00d"
|
||||
for (uint32_t i = 0; i < page_size; ++i) {
|
||||
ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
|
||||
}
|
||||
page = ptr;
|
||||
}
|
||||
return page + byte_offset;
|
||||
}
|
||||
|
||||
void RAM::read(void *data, uint64_t addr, uint64_t size) {
|
||||
uint8_t* d = (uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
d[i] = *this->get(addr + i);
|
||||
}
|
||||
}
|
||||
|
||||
void RAM::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
const uint8_t* s = (const uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
*this->get(addr + i) = s[i];
|
||||
}
|
||||
}
|
||||
|
||||
void RAM::loadBinImage(const char* filename, uint64_t destination) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
}
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
size_t size = ifs.tellg();
|
||||
std::vector<uint8_t> content(size);
|
||||
ifs.seekg(0, ifs.beg);
|
||||
ifs.read((char*)content.data(), size);
|
||||
|
||||
this->clear();
|
||||
this->write(content.data(), destination, size);
|
||||
}
|
||||
|
||||
void RAM::loadHexImage(const char* filename) {
|
||||
auto hti = [&](char c)->uint32_t {
|
||||
if (c >= 'A' && c <= 'F')
|
||||
return c - 'A' + 10;
|
||||
if (c >= 'a' && c <= 'f')
|
||||
return c - 'a' + 10;
|
||||
return c - '0';
|
||||
};
|
||||
|
||||
auto hToI = [&](const char *c, uint32_t size)->uint32_t {
|
||||
uint32_t value = 0;
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
value += hti(c[i]) << ((size - i - 1) * 4);
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
}
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
size_t size = ifs.tellg();
|
||||
std::vector<char> content(size);
|
||||
ifs.seekg(0, ifs.beg);
|
||||
ifs.read(content.data(), size);
|
||||
|
||||
int offset = 0;
|
||||
char *line = content.data();
|
||||
|
||||
this->clear();
|
||||
|
||||
while (true) {
|
||||
if (line[0] == ':') {
|
||||
uint32_t byteCount = hToI(line + 1, 2);
|
||||
uint32_t nextAddr = hToI(line + 3, 4) + offset;
|
||||
uint32_t key = hToI(line + 7, 2);
|
||||
switch (key) {
|
||||
case 0:
|
||||
for (uint32_t i = 0; i < byteCount; i++) {
|
||||
uint32_t addr = nextAddr + i;
|
||||
uint32_t value = hToI(line + 9 + i * 2, 2);
|
||||
*this->get(addr) = value;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
offset = hToI(line + 9, 4) << 4;
|
||||
break;
|
||||
case 4:
|
||||
offset = hToI(line + 9, 4) << 16;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (*line != '\n' && size != 0) {
|
||||
++line;
|
||||
--size;
|
||||
}
|
||||
if (size <= 1)
|
||||
break;
|
||||
++line;
|
||||
--size;
|
||||
}
|
||||
}
|
||||
163
sim/common/mem.h
Normal file
163
sim/common/mem.h
Normal file
@@ -0,0 +1,163 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace vortex {
|
||||
struct BadAddress {};
|
||||
|
||||
class MemDevice {
|
||||
public:
|
||||
virtual ~MemDevice() {}
|
||||
virtual uint64_t size() const = 0;
|
||||
virtual void read(void *data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void write(const void *data, uint64_t addr, uint64_t size) = 0;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class RamMemDevice : public MemDevice {
|
||||
public:
|
||||
RamMemDevice(uint64_t size, uint32_t wordSize);
|
||||
RamMemDevice(const char *filename, uint32_t wordSize);
|
||||
~RamMemDevice() {}
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
virtual uint64_t size() const {
|
||||
return contents_.size();
|
||||
};
|
||||
|
||||
protected:
|
||||
std::vector<uint8_t> contents_;
|
||||
uint32_t wordSize_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class RomMemDevice : public RamMemDevice {
|
||||
public:
|
||||
RomMemDevice(const char *filename, uint32_t wordSize)
|
||||
: RamMemDevice(filename, wordSize)
|
||||
{}
|
||||
|
||||
RomMemDevice(uint64_t size, uint32_t wordSize)
|
||||
: RamMemDevice(size, wordSize)
|
||||
{}
|
||||
|
||||
~RomMemDevice();
|
||||
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class MemoryUnit {
|
||||
public:
|
||||
|
||||
struct PageFault {
|
||||
PageFault(uint64_t a, bool nf)
|
||||
: faultAddr(a)
|
||||
, notFound(nf)
|
||||
{}
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
};
|
||||
|
||||
MemoryUnit(uint64_t pageSize, uint64_t addrBytes, bool disableVm = false);
|
||||
|
||||
void attach(MemDevice &m, uint64_t start, uint64_t end);
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size, bool sup);
|
||||
void write(const void *data, uint64_t addr, uint64_t size, bool sup);
|
||||
|
||||
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags);
|
||||
void tlbRm(uint64_t va);
|
||||
void tlbFlush() {
|
||||
tlb_.clear();
|
||||
}
|
||||
private:
|
||||
|
||||
class ADecoder {
|
||||
public:
|
||||
ADecoder() {}
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size);
|
||||
void write(const void *data, uint64_t addr, uint64_t size);
|
||||
|
||||
void map(uint64_t start, uint64_t end, MemDevice &md);
|
||||
|
||||
private:
|
||||
|
||||
struct mem_accessor_t {
|
||||
MemDevice* md;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
struct entry_t {
|
||||
MemDevice *md;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
bool lookup(uint64_t a, uint32_t wordSize, mem_accessor_t*);
|
||||
|
||||
std::vector<entry_t> entries_;
|
||||
};
|
||||
|
||||
struct TLBEntry {
|
||||
TLBEntry() {}
|
||||
TLBEntry(uint32_t pfn, uint32_t flags)
|
||||
: pfn(pfn)
|
||||
, flags(flags)
|
||||
{}
|
||||
uint32_t pfn;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
|
||||
|
||||
std::unordered_map<uint64_t, TLBEntry> tlb_;
|
||||
uint64_t pageSize_;
|
||||
uint64_t addrBytes_;
|
||||
ADecoder decoder_;
|
||||
bool disableVM_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class RAM : public MemDevice {
|
||||
public:
|
||||
|
||||
RAM(uint32_t num_pages, uint32_t page_size);
|
||||
|
||||
~RAM();
|
||||
|
||||
void clear();
|
||||
|
||||
uint64_t size() const override;
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
void loadBinImage(const char* filename, uint64_t destination);
|
||||
void loadHexImage(const char* filename);
|
||||
|
||||
uint8_t& operator[](uint64_t address) {
|
||||
return *this->get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint64_t address) const {
|
||||
return *this->get(address);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
uint8_t *get(uint32_t address) const;
|
||||
|
||||
mutable std::vector<uint8_t*> mem_;
|
||||
uint32_t page_bits_;
|
||||
uint64_t size_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
92
sim/common/util.cpp
Normal file
92
sim/common/util.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
#include "util.h"
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <math.h>
|
||||
#include <climits>
|
||||
#include <string.h>
|
||||
#include <bitset>
|
||||
#include <fcntl.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
// Apply integer sign extension
|
||||
uint32_t vortex::signExt(uint32_t w, uint32_t bit, uint32_t mask) {
|
||||
if (w >> (bit - 1))
|
||||
w |= ~mask;
|
||||
return w;
|
||||
}
|
||||
|
||||
// Convert a floating point number to IEEE-754 32-bit representation,
|
||||
// so that it could be stored in a 32-bit integer register file
|
||||
// Reference: https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation
|
||||
// https://www.technical-recipes.com/2012/converting-between-binary-and-decimal-representations-of-ieee-754-floating-point-numbers-in-c/
|
||||
uint32_t vortex::floatToBin(float in_value) {
|
||||
union {
|
||||
float input; // assumes sizeof(float) == sizeof(int)
|
||||
int output;
|
||||
} data;
|
||||
|
||||
data.input = in_value;
|
||||
|
||||
std::bitset<sizeof(float) * CHAR_BIT> bits(data.output);
|
||||
std::string mystring = bits.to_string<char, std::char_traits<char>, std::allocator<char>>();
|
||||
// Convert binary to uint32_t
|
||||
uint32_t result = stoul(mystring, nullptr, 2);
|
||||
return result;
|
||||
}
|
||||
|
||||
// https://en.wikipedia.org/wiki/Single-precision_floating-point_format
|
||||
// check floating-point number in binary format is NaN
|
||||
uint8_t vortex::fpBinIsNan(uint32_t din) {
|
||||
bool fsign = din & 0x80000000;
|
||||
uint32_t expo = (din>>23) & 0x000000FF;
|
||||
uint32_t fraction = din & 0x007FFFFF;
|
||||
uint32_t bit_22 = din & 0x00400000;
|
||||
|
||||
if ((expo==0xFF) && (fraction!=0)) {
|
||||
// if (!fsign && (fraction == 0x00400000))
|
||||
if (!fsign && (bit_22))
|
||||
return 1; // quiet NaN, return 1
|
||||
else
|
||||
return 2; // signaling NaN, return 2
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// check floating-point number in binary format is zero
|
||||
uint8_t vortex::fpBinIsZero(uint32_t din) {
|
||||
bool fsign = din & 0x80000000;
|
||||
uint32_t expo = (din>>23) & 0x000000FF;
|
||||
uint32_t fraction = din & 0x007FFFFF;
|
||||
|
||||
if ((expo==0) && (fraction==0)) {
|
||||
if (fsign)
|
||||
return 1; // negative 0
|
||||
else
|
||||
return 2; // positive 0
|
||||
}
|
||||
return 0; // not zero
|
||||
}
|
||||
|
||||
// check floating-point number in binary format is infinity
|
||||
uint8_t vortex::fpBinIsInf(uint32_t din) {
|
||||
bool fsign = din & 0x80000000;
|
||||
uint32_t expo = (din>>23) & 0x000000FF;
|
||||
uint32_t fraction = din & 0x007FFFFF;
|
||||
|
||||
if ((expo==0xFF) && (fraction==0)) {
|
||||
if (fsign)
|
||||
return 1; // negative infinity
|
||||
else
|
||||
return 2; // positive infinity
|
||||
}
|
||||
return 0; // not infinity
|
||||
}
|
||||
|
||||
// return file extension
|
||||
const char* vortex::fileExtension(const char* filepath) {
|
||||
const char *ext = strrchr(filepath, '.');
|
||||
if (ext == NULL || ext == filepath)
|
||||
return "";
|
||||
return ext + 1;
|
||||
}
|
||||
44
sim/common/util.h
Normal file
44
sim/common/util.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <assert.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
template <typename... Args>
|
||||
void unused(Args&&...) {}
|
||||
|
||||
#define __unused(...) unused(__VA_ARGS__)
|
||||
|
||||
constexpr bool ispow2(uint64_t value) {
|
||||
return value && !(value & (value - 1));
|
||||
}
|
||||
|
||||
constexpr unsigned log2ceil(uint32_t value) {
|
||||
return 32 - __builtin_clz(value - 1);
|
||||
}
|
||||
|
||||
inline uint64_t align_size(uint64_t size, uint64_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
// Apply integer sign extension
|
||||
uint32_t signExt(uint32_t w, uint32_t bit, uint32_t mask);
|
||||
|
||||
// Convert a floating point number to IEEE-754 32-bit representation
|
||||
uint32_t floatToBin(float in_value);
|
||||
|
||||
// check floating-point number in binary format is NaN
|
||||
uint8_t fpBinIsNan(uint32_t din);
|
||||
|
||||
// check floating-point number in binary format is zero
|
||||
uint8_t fpBinIsZero(uint32_t din);
|
||||
|
||||
// check floating-point number in binary format is infinity
|
||||
uint8_t fpBinIsInf(uint32_t din);
|
||||
|
||||
// return file extension
|
||||
const char* fileExtension(const char* filepath);
|
||||
|
||||
}
|
||||
2
sim/rtlsim/.gitignore
vendored
Normal file
2
sim/rtlsim/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
VX_config.h
|
||||
/obj_dir/*
|
||||
123
sim/rtlsim/Makefile
Normal file
123
sim/rtlsim/Makefile
Normal file
@@ -0,0 +1,123 @@
|
||||
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../../../hw -I../../common
|
||||
CXXFLAGS += -I$(VERILATOR_ROOT)/include -I$(VERILATOR_ROOT)/include/vltstd
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
DBG_FLAGS += -DVCD_OUTPUT
|
||||
|
||||
SINGLECORE = -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
|
||||
MULTICORE = -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
DPI_DIR=../../hw/dpi
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += simulator.cpp
|
||||
|
||||
ifdef AXI_BUS
|
||||
TOP = Vortex_axi
|
||||
CFLAGS += -DAXI_BUS
|
||||
else
|
||||
TOP = Vortex
|
||||
endif
|
||||
|
||||
VL_FLAGS = --cc $(TOP) --top-module $(TOP)
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_FPNEW
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
|
||||
#$(info OBJS is $(OBJS))
|
||||
#$(info VPATH is $(VPATH))
|
||||
|
||||
PROJECT = rtlsim
|
||||
|
||||
all: build-s
|
||||
|
||||
build-s:
|
||||
verilator --build --exe main.cpp $(SRCS) $(VL_FLAGS) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CXXFLAGS) -DNDEBUG $(SINGLECORE)' -o ../$(PROJECT)
|
||||
|
||||
build-sd:
|
||||
verilator --build --exe main.cpp $(SRCS) $(VL_FLAGS) $(SINGLECORE) -CFLAGS '$(CXXFLAGS) $(DBG_FLAGS) $(SINGLECORE)' --trace --trace-structs $(DBG_FLAGS) -o ../$(PROJECT)
|
||||
|
||||
build-st:
|
||||
verilator --build --exe main.cpp $(SRCS) $(VL_FLAGS) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CXXFLAGS) -DNDEBUG $(SINGLECORE)' --threads $(THREADS) -o ../$(PROJECT)
|
||||
|
||||
build-m:
|
||||
verilator --build --exe main.cpp $(SRCS) $(VL_FLAGS) -DNDEBUG $(MULTICORE) -CFLAGS '$(CXXFLAGS) -DNDEBUG $(MULTICORE)' -o ../$(PROJECT)
|
||||
|
||||
build-md:
|
||||
verilator --build --exe main.cpp $(SRCS) $(VL_FLAGS) $(MULTICORE) -CFLAGS '$(CXXFLAGS) $(DBG_FLAGS) $(MULTICORE)' --trace --trace-structs $(DBG_FLAGS) -o ../$(PROJECT)
|
||||
|
||||
build-mt:
|
||||
verilator --build --exe main.cpp $(SRCS) $(VL_FLAGS) -DNDEBUG $(MULTICORE) -CFLAGS '$(CXXFLAGS) -DNDEBUG $(MULTICORE)' --threads $(THREADS) -o ../$(PROJECT)
|
||||
|
||||
obj_dir/V$(TOP)__ALL.a:
|
||||
verilator --build $(VL_FLAGS) -CFLAGS '$(CXXFLAGS)'
|
||||
|
||||
obj_dir/%.o: %.cpp
|
||||
cd obj_dir && $(CXX) $(CXXFLAGS) -c ../$< -o $(notdir $@)
|
||||
|
||||
obj_dir/verilated.o: $(VERILATOR_ROOT)/include/verilated.cpp
|
||||
cd obj_dir && $(CXX) $(CXXFLAGS) -c $< -o verilated.o
|
||||
|
||||
static: obj_dir/V$(TOP)__ALL.a $(OBJS) obj_dir/verilated.o
|
||||
cp obj_dir/V$(TOP)__ALL.a lib$(PROJECT).a
|
||||
$(AR) rs lib$(PROJECT).a $(OBJS) obj_dir/verilated.o
|
||||
|
||||
clean-objdir:
|
||||
rm -rf obj_dir
|
||||
|
||||
clean: clean-objdir
|
||||
rm -rf $(PROJECT) lib$(PROJECT).a
|
||||
87
sim/rtlsim/main.cpp
Normal file
87
sim/rtlsim/main.cpp
Normal file
@@ -0,0 +1,87 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <unistd.h>
|
||||
#include <unistd.h>
|
||||
#include <util.h>
|
||||
#include <mem.h>
|
||||
#include "simulator.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Usage: [-r] [-h: help] programs.." << std::endl;
|
||||
}
|
||||
|
||||
bool riscv_test = false;
|
||||
std::vector<const char*> programs;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "rh?")) != -1) {
|
||||
switch (c) {
|
||||
case 'r':
|
||||
riscv_test = true;
|
||||
break;
|
||||
case 'h':
|
||||
case '?':
|
||||
show_usage();
|
||||
exit(0);
|
||||
break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = optind; i < argc; ++i) {
|
||||
programs.push_back(argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
int exitcode = 0;
|
||||
bool failed = false;
|
||||
|
||||
parse_args(argc, argv);
|
||||
|
||||
for (auto program : programs) {
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
|
||||
vortex::RAM ram((1<<12), (1<<20));
|
||||
vortex::Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
|
||||
std::string program_ext(fileExtension(program));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(program, STARTUP_ADDR);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(program);
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
exitcode = simulator.run();
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed" << std::endl;
|
||||
} else {
|
||||
std::cout << "Failed: exitcode=" << exitcode << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed)
|
||||
break;
|
||||
}
|
||||
|
||||
return failed ? exitcode : 0;
|
||||
}
|
||||
578
sim/rtlsim/simulator.cpp
Normal file
578
sim/rtlsim/simulator.cpp
Normal file
@@ -0,0 +1,578 @@
|
||||
#include "simulator.h"
|
||||
|
||||
#include <verilated.h>
|
||||
|
||||
#ifdef AXI_BUS
|
||||
#include "VVortex_axi.h"
|
||||
#include "VVortex_axi__Syms.h"
|
||||
#else
|
||||
#include "VVortex.h"
|
||||
#include "VVortex__Syms.h"
|
||||
#endif
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#define ENABLE_MEM_STALLS
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#ifndef MEM_RQ_SIZE
|
||||
#define MEM_RQ_SIZE 16
|
||||
#endif
|
||||
|
||||
#ifndef MEM_STALLS_MODULO
|
||||
#define MEM_STALLS_MODULO 16
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
#define VL_WDATA_GETW(lwp, i, n, w) \
|
||||
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace vortex {
|
||||
class VL_OBJ {
|
||||
public:
|
||||
#ifdef AXI_BUS
|
||||
VVortex_axi *device;
|
||||
#else
|
||||
VVortex *device;
|
||||
#endif
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace;
|
||||
#endif
|
||||
|
||||
VL_OBJ() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->device = new VVortex_axi();
|
||||
#else
|
||||
this->device = new VVortex();
|
||||
#endif
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
this->trace = new VerilatedVcdC();
|
||||
this->device->trace(this->trace, 99);
|
||||
this->trace->open("trace.vcd");
|
||||
#endif
|
||||
}
|
||||
|
||||
~VL_OBJ() {
|
||||
#ifdef VCD_OUTPUT
|
||||
this->trace->close();
|
||||
delete this->trace;
|
||||
#endif
|
||||
delete this->device;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Simulator::Simulator() {
|
||||
vl_obj_ = new VL_OBJ();
|
||||
ram_ = nullptr;
|
||||
// reset the device
|
||||
this->reset();
|
||||
}
|
||||
|
||||
Simulator::~Simulator() {
|
||||
for (auto& buf : print_bufs_) {
|
||||
auto str = buf.second.str();
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
delete vl_obj_;
|
||||
}
|
||||
|
||||
void Simulator::attach_ram(RAM* ram) {
|
||||
ram_ = ram;
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
mem_rsp_vec_[b].clear();
|
||||
}
|
||||
last_mem_rsp_bank_ = 0;
|
||||
}
|
||||
|
||||
void Simulator::reset() {
|
||||
print_bufs_.clear();
|
||||
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
mem_rsp_vec_[b].clear();
|
||||
}
|
||||
last_mem_rsp_bank_ = 0;
|
||||
mem_rd_rsp_active_ = false;
|
||||
mem_wr_rsp_active_ = false;
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->reset_axi_bus();
|
||||
#else
|
||||
this->reset_mem_bus();
|
||||
#endif
|
||||
|
||||
vl_obj_->device->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
vl_obj_->device->clk = 0;
|
||||
this->eval();
|
||||
vl_obj_->device->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
vl_obj_->device->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void Simulator::step() {
|
||||
|
||||
vl_obj_->device->clk = 0;
|
||||
this->eval();
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->eval_axi_bus(0);
|
||||
#else
|
||||
this->eval_mem_bus(0);
|
||||
#endif
|
||||
|
||||
vl_obj_->device->clk = 1;
|
||||
this->eval();
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->eval_axi_bus(1);
|
||||
#else
|
||||
this->eval_mem_bus(1);
|
||||
#endif
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::eval() {
|
||||
vl_obj_->device->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
vl_obj_->trace->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
#ifdef AXI_BUS
|
||||
|
||||
void Simulator::reset_axi_bus() {
|
||||
vl_obj_->device->m_axi_wready = 0;
|
||||
vl_obj_->device->m_axi_awready = 0;
|
||||
vl_obj_->device->m_axi_arready = 0;
|
||||
vl_obj_->device->m_axi_rvalid = 0;
|
||||
}
|
||||
|
||||
void Simulator::eval_axi_bus(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = vl_obj_->device->m_axi_rready;
|
||||
mem_wr_rsp_ready_ = vl_obj_->device->m_axi_bready;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
vl_obj_->device->m_axi_wready = 0;
|
||||
vl_obj_->device->m_axi_awready = 0;
|
||||
vl_obj_->device->m_axi_arready = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// update memory responses schedule
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
for (auto& rsp : mem_rsp_vec_[b]) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_rd_response = false;
|
||||
bool has_wr_response = false;
|
||||
|
||||
// schedule memory responses that are ready
|
||||
for (int i = 0; i < MEMORY_BANKS; ++i) {
|
||||
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
|
||||
if (!mem_rsp_vec_[b].empty()) {
|
||||
auto mem_rsp_it = mem_rsp_vec_[b].begin();
|
||||
if (mem_rsp_it->cycles_left <= 0) {
|
||||
has_rd_response = !mem_rsp_it->write;
|
||||
has_wr_response = mem_rsp_it->write;
|
||||
last_mem_rsp_bank_ = b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// send memory read response
|
||||
if (mem_rd_rsp_active_
|
||||
&& vl_obj_->device->m_axi_rvalid && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
if (has_rd_response) {
|
||||
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
vl_obj_->device->m_axi_rvalid = 1;
|
||||
vl_obj_->device->m_axi_rid = mem_rsp_it->tag;
|
||||
vl_obj_->device->m_axi_rresp = 0;
|
||||
vl_obj_->device->m_axi_rlast = 1;
|
||||
memcpy((uint8_t*)vl_obj_->device->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
} else {
|
||||
vl_obj_->device->m_axi_rvalid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// send memory write response
|
||||
if (mem_wr_rsp_active_
|
||||
&& vl_obj_->device->m_axi_bvalid && mem_wr_rsp_ready_) {
|
||||
mem_wr_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_wr_rsp_active_) {
|
||||
if (has_wr_response) {
|
||||
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
*/
|
||||
vl_obj_->device->m_axi_bvalid = 1;
|
||||
vl_obj_->device->m_axi_bid = mem_rsp_it->tag;
|
||||
vl_obj_->device->m_axi_bresp = 0;
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
mem_wr_rsp_active_ = true;
|
||||
} else {
|
||||
vl_obj_->device->m_axi_bvalid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// select the memory bank
|
||||
uint32_t req_addr = vl_obj_->device->m_axi_wvalid ? vl_obj_->device->m_axi_awaddr : vl_obj_->device->m_axi_araddr;
|
||||
uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0;
|
||||
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
if (vl_obj_->device->m_axi_wvalid || vl_obj_->device->m_axi_arvalid) {
|
||||
if (vl_obj_->device->m_axi_wvalid) {
|
||||
uint64_t byteen = vl_obj_->device->m_axi_wstrb;
|
||||
unsigned base_addr = vl_obj_->device->m_axi_awaddr;
|
||||
uint8_t* data = (uint8_t*)(vl_obj_->device->m_axi_wdata);
|
||||
|
||||
// detect stdout write
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vl_obj_->device->m_axi_arid;
|
||||
mem_req.addr = vl_obj_->device->m_axi_araddr;
|
||||
mem_req.cycles_left = 0;
|
||||
mem_req.write = 1;
|
||||
mem_rsp_vec_[req_bank].emplace_back(mem_req);
|
||||
}
|
||||
} else {
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vl_obj_->device->m_axi_arid;
|
||||
mem_req.addr = vl_obj_->device->m_axi_araddr;
|
||||
ram_->read(vl_obj_->device->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data());
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
mem_req.write = 0;
|
||||
for (auto& rsp : mem_rsp_vec_[req_bank]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mem_rsp_vec_[req_bank].emplace_back(mem_req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vl_obj_->device->m_axi_wready = !mem_stalled;
|
||||
vl_obj_->device->m_axi_awready = !mem_stalled;
|
||||
vl_obj_->device->m_axi_arready = !mem_stalled;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void Simulator::reset_mem_bus() {
|
||||
vl_obj_->device->mem_req_ready = 0;
|
||||
vl_obj_->device->mem_rsp_valid = 0;
|
||||
}
|
||||
|
||||
void Simulator::eval_mem_bus(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = vl_obj_->device->mem_rsp_ready;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
vl_obj_->device->mem_req_ready = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// update memory responses schedule
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
for (auto& rsp : mem_rsp_vec_[b]) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_response = false;
|
||||
|
||||
// schedule memory responses that are ready
|
||||
for (int i = 0; i < MEMORY_BANKS; ++i) {
|
||||
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
|
||||
if (!mem_rsp_vec_[b].empty()
|
||||
&& (mem_rsp_vec_[b].begin()->cycles_left) <= 0) {
|
||||
has_response = true;
|
||||
last_mem_rsp_bank_ = b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// send memory response
|
||||
if (mem_rd_rsp_active_
|
||||
&& vl_obj_->device->mem_rsp_valid && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
if (has_response) {
|
||||
vl_obj_->device->mem_rsp_valid = 1;
|
||||
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy((uint8_t*)vl_obj_->device->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
|
||||
vl_obj_->device->mem_rsp_tag = mem_rsp_it->tag;
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
} else {
|
||||
vl_obj_->device->mem_rsp_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// select the memory bank
|
||||
uint32_t req_bank = (MEMORY_BANKS >= 2) ? (vl_obj_->device->mem_req_addr % MEMORY_BANKS) : 0;
|
||||
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
if (vl_obj_->device->mem_req_valid) {
|
||||
if (vl_obj_->device->mem_req_rw) {
|
||||
uint64_t byteen = vl_obj_->device->mem_req_byteen;
|
||||
unsigned base_addr = (vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
uint8_t* data = (uint8_t*)(vl_obj_->device->mem_req_data);
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vl_obj_->device->mem_req_tag;
|
||||
mem_req.addr = (vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
ram_->read(mem_req.block.data(), vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE);
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_rsp_vec_[req_bank]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mem_rsp_vec_[req_bank].emplace_back(mem_req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vl_obj_->device->mem_req_ready = !mem_stalled;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void Simulator::wait(uint32_t cycles) {
|
||||
for (int i = 0; i < cycles; ++i) {
|
||||
this->step();
|
||||
}
|
||||
}
|
||||
|
||||
bool Simulator::is_busy() const {
|
||||
return vl_obj_->device->busy;
|
||||
}
|
||||
|
||||
int Simulator::run() {
|
||||
int exitcode = 0;
|
||||
|
||||
#ifndef NDEBUG
|
||||
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
|
||||
#endif
|
||||
|
||||
// execute program
|
||||
while (vl_obj_->device->busy) {
|
||||
if (get_ebreak()) {
|
||||
exitcode = get_last_wb_value(3);
|
||||
break;
|
||||
}
|
||||
this->step();
|
||||
}
|
||||
|
||||
// wait 5 cycles to flush the pipeline
|
||||
this->wait(5);
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
bool Simulator::get_ebreak() const {
|
||||
#ifdef AXI_BUS
|
||||
return (int)vl_obj_->device->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
|
||||
#else
|
||||
return (int)vl_obj_->device->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
|
||||
#endif
|
||||
}
|
||||
|
||||
int Simulator::get_last_wb_value(int reg) const {
|
||||
#ifdef AXI_BUS
|
||||
return (int)vl_obj_->device->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
#else
|
||||
return (int)vl_obj_->device->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::print_stats(std::ostream& out) {
|
||||
out << std::left;
|
||||
out << std::setw(24) << "# of total cycles:" << std::dec << timestamp/2 << std::endl;
|
||||
}
|
||||
81
sim/rtlsim/simulator.h
Normal file
81
sim/rtlsim/simulator.h
Normal file
@@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <ostream>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class VL_OBJ;
|
||||
class RAM;
|
||||
|
||||
class Simulator {
|
||||
public:
|
||||
|
||||
Simulator();
|
||||
virtual ~Simulator();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool is_busy() const;
|
||||
|
||||
void reset();
|
||||
void step();
|
||||
void wait(uint32_t cycles);
|
||||
|
||||
int run();
|
||||
|
||||
void print_stats(std::ostream& out);
|
||||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> block;
|
||||
uint64_t addr;
|
||||
uint64_t tag;
|
||||
bool write;
|
||||
} mem_req_t;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
void eval();
|
||||
|
||||
#ifdef AXI_BUS
|
||||
void reset_axi_bus();
|
||||
void eval_axi_bus(bool clk);
|
||||
#else
|
||||
void reset_mem_bus();
|
||||
void eval_mem_bus(bool clk);
|
||||
#endif
|
||||
|
||||
int get_last_wb_value(int reg) const;
|
||||
|
||||
bool get_ebreak() const;
|
||||
|
||||
std::list<mem_req_t> mem_rsp_vec_ [MEMORY_BANKS];
|
||||
uint32_t last_mem_rsp_bank_;
|
||||
|
||||
bool mem_rd_rsp_active_;
|
||||
bool mem_rd_rsp_ready_;
|
||||
|
||||
bool mem_wr_rsp_active_;
|
||||
bool mem_wr_rsp_ready_;
|
||||
|
||||
RAM *ram_;
|
||||
|
||||
VL_OBJ* vl_obj_;
|
||||
};
|
||||
|
||||
}
|
||||
10
sim/rtlsim/verilator.vlt
Normal file
10
sim/rtlsim/verilator.vlt
Normal file
@@ -0,0 +1,10 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
49
sim/simX/Makefile
Normal file
49
sim/simX/Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../common -I../../hw
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
TOP = vx_cache_sim
|
||||
|
||||
RTL_DIR = ../hw/rtl
|
||||
|
||||
PROJECT = simX
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp
|
||||
SRCS += args.cpp pipeline.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
|
||||
#$(info OBJS is $(OBJS))
|
||||
#$(info VPATH is $(VPATH))
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -DDEBUG_LEVEL=$(DEBUG)
|
||||
else
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
obj_dir/%.o: %.cpp
|
||||
mkdir -p obj_dir
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
static: $(OBJS)
|
||||
$(AR) rs lib$(PROJECT).a $(OBJS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean-objdir:
|
||||
rm -rf obj_dir .depend
|
||||
|
||||
clean: clean-objdir
|
||||
rm -rf $(PROJECT) lib$(PROJECT).a
|
||||
72
sim/simX/archdef.h
Normal file
72
sim/simX/archdef.h
Normal file
@@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef {
|
||||
public:
|
||||
ArchDef(const std::string &/*arch*/,
|
||||
int num_cores,
|
||||
int num_warps,
|
||||
int num_threads) {
|
||||
wsize_ = 4;
|
||||
vsize_ = 16;
|
||||
num_regs_ = 32;
|
||||
num_csrs_ = 4096;
|
||||
num_barriers_= NUM_BARRIERS;
|
||||
num_cores_ = num_cores;
|
||||
num_warps_ = num_warps;
|
||||
num_threads_ = num_threads;
|
||||
}
|
||||
|
||||
int wsize() const {
|
||||
return wsize_;
|
||||
}
|
||||
|
||||
int vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
int num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
int num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
int num_barriers() const {
|
||||
return num_barriers_;
|
||||
}
|
||||
|
||||
int num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
int num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
int num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
int wsize_;
|
||||
int vsize_;
|
||||
int num_regs_;
|
||||
int num_csrs_;
|
||||
int num_barriers_;
|
||||
int num_threads_;
|
||||
int num_warps_;
|
||||
int num_cores_;
|
||||
};
|
||||
|
||||
}
|
||||
47
sim/simX/args.cpp
Normal file
47
sim/simX/args.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "args.h"
|
||||
|
||||
using namespace vortex;
|
||||
using std::string;
|
||||
|
||||
std::string CommandLineArg::helpString_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::longArgs_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::shortArgs_;
|
||||
|
||||
CommandLineArg::CommandLineArg(string s, string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
shortArgs_[s] = this;
|
||||
}
|
||||
|
||||
CommandLineArg::CommandLineArg(string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
}
|
||||
|
||||
void CommandLineArg::readArgs(int argc, char **argv) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
std::unordered_map<string, CommandLineArg *>::iterator
|
||||
s = shortArgs_.find(std::string(argv[i])),
|
||||
l = longArgs_.find(std::string(argv[i]));
|
||||
|
||||
if (s != shortArgs_.end()) {
|
||||
i += s->second->read(argc - i, &argv[i]);
|
||||
} else if (l != longArgs_.end()) {
|
||||
i += l->second->read(argc - i, &argv[i]);
|
||||
} else {
|
||||
throw BadArg(string(argv[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandLineArg::clearArgs() {
|
||||
shortArgs_.clear();
|
||||
longArgs_.clear();
|
||||
helpString_ = "";
|
||||
}
|
||||
|
||||
void CommandLineArg::showHelp(std::ostream &os) {
|
||||
os << helpString_;
|
||||
}
|
||||
64
sim/simX/args.h
Normal file
64
sim/simX/args.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; };
|
||||
|
||||
class CommandLineArg {
|
||||
public:
|
||||
CommandLineArg(std::string s, std::string l, const char *helpText);
|
||||
CommandLineArg(std::string l, const char *helpText);
|
||||
virtual int read(int argc, char** argv) = 0;
|
||||
|
||||
static void readArgs(int argc, char **argv);
|
||||
static void clearArgs();
|
||||
static void showHelp(std::ostream &os);
|
||||
|
||||
private:
|
||||
static std::string helpString_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> longArgs_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> shortArgs_;
|
||||
};
|
||||
|
||||
template <typename T> class CommandLineArgSetter : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) {}
|
||||
|
||||
CommandLineArgSetter(std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(l, ht), arg_(x) {}
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused(argc);
|
||||
std::istringstream iss(argv[1]);
|
||||
iss >> arg_;
|
||||
return 1;
|
||||
}
|
||||
private:
|
||||
T &arg_;
|
||||
};
|
||||
|
||||
class CommandLineArgFlag : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
CommandLineArgFlag(std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused(argc, argv);
|
||||
arg_ = true;
|
||||
return 0;
|
||||
}
|
||||
private:
|
||||
bool &arg_;
|
||||
};
|
||||
|
||||
}
|
||||
393
sim/simX/core.cpp
Normal file
393
sim/simX/core.cpp
Normal file
@@ -0,0 +1,393 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "mem.h"
|
||||
#include "decode.h"
|
||||
#include "core.h"
|
||||
#include "debug.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
: id_(id)
|
||||
, arch_(arch)
|
||||
, decoder_(decoder)
|
||||
, mem_(mem)
|
||||
, shared_mem_(1, SMEM_SIZE)
|
||||
, inst_in_schedule_("schedule")
|
||||
, inst_in_fetch_("fetch")
|
||||
, inst_in_decode_("decode")
|
||||
, inst_in_issue_("issue")
|
||||
, inst_in_execute_("execute")
|
||||
, inst_in_writeback_("writeback") {
|
||||
in_use_iregs_.resize(arch.num_warps(), 0);
|
||||
in_use_fregs_.resize(arch.num_warps(), 0);
|
||||
in_use_vregs_.reset();
|
||||
|
||||
csrs_.resize(arch_.num_csrs(), 0);
|
||||
|
||||
fcsrs_.resize(arch_.num_warps(), 0);
|
||||
|
||||
barriers_.resize(arch_.num_barriers(), 0);
|
||||
|
||||
warps_.resize(arch_.num_warps());
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
warps_[i] = std::make_shared<Warp>(this, i);
|
||||
}
|
||||
|
||||
this->clear();
|
||||
}
|
||||
|
||||
Core::~Core() {
|
||||
for (auto& buf : print_bufs_) {
|
||||
auto str = buf.second.str();
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Core::clear() {
|
||||
for (int w = 0; w < arch_.num_warps(); ++w) {
|
||||
in_use_iregs_[w].reset();
|
||||
in_use_fregs_[w].reset();
|
||||
}
|
||||
stalled_warps_.reset();
|
||||
|
||||
in_use_vregs_.reset();
|
||||
|
||||
for (auto& csr : csrs_) {
|
||||
csr = 0;
|
||||
}
|
||||
|
||||
for (auto& fcsr : fcsrs_) {
|
||||
fcsr = 0;
|
||||
}
|
||||
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
|
||||
for (auto warp : warps_) {
|
||||
warp->clear();
|
||||
}
|
||||
|
||||
inst_in_schedule_.clear();
|
||||
inst_in_fetch_.clear();
|
||||
inst_in_decode_.clear();
|
||||
inst_in_issue_.clear();
|
||||
inst_in_execute_.clear();
|
||||
inst_in_writeback_.clear();
|
||||
print_bufs_.clear();
|
||||
|
||||
steps_ = 0;
|
||||
insts_ = 0;
|
||||
loads_ = 0;
|
||||
stores_ = 0;
|
||||
|
||||
inst_in_schedule_.valid = true;
|
||||
warps_[0]->setTmask(0, true);
|
||||
|
||||
ebreak_ = false;
|
||||
}
|
||||
|
||||
void Core::step() {
|
||||
D(2, "###########################################################");
|
||||
|
||||
steps_++;
|
||||
D(2, std::dec << "Core" << id_ << ": cycle: " << steps_);
|
||||
|
||||
this->writeback();
|
||||
this->execute();
|
||||
this->issue();
|
||||
this->decode();
|
||||
this->fetch();
|
||||
this->schedule();
|
||||
|
||||
DPN(2, std::flush);
|
||||
}
|
||||
|
||||
void Core::schedule() {
|
||||
if (!inst_in_schedule_.enter(&inst_in_fetch_))
|
||||
return;
|
||||
|
||||
bool foundSchedule = false;
|
||||
int scheduled_warp = inst_in_schedule_.wid;
|
||||
|
||||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
// round robin scheduling
|
||||
scheduled_warp = (scheduled_warp + 1) % warps_.size();
|
||||
bool is_active = warps_[scheduled_warp]->active();
|
||||
bool stalled = stalled_warps_[scheduled_warp];
|
||||
if (is_active && !stalled) {
|
||||
foundSchedule = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundSchedule)
|
||||
return;
|
||||
|
||||
D(2, "Schedule: wid=" << scheduled_warp);
|
||||
inst_in_schedule_.wid = scheduled_warp;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_schedule_.next(&inst_in_fetch_);
|
||||
}
|
||||
|
||||
void Core::fetch() {
|
||||
if (!inst_in_fetch_.enter(&inst_in_issue_))
|
||||
return;
|
||||
|
||||
int wid = inst_in_fetch_.wid;
|
||||
|
||||
auto active_threads_b = warps_[wid]->getActiveThreads();
|
||||
warps_[wid]->step(&inst_in_fetch_);
|
||||
auto active_threads_a = warps_[wid]->getActiveThreads();
|
||||
|
||||
insts_ += active_threads_b;
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "*** warp#" << wid << " active threads changed to " << active_threads_a);
|
||||
}
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
D(3, "*** warp#" << wid << " fetch stalled");
|
||||
stalled_warps_[wid] = true;
|
||||
}
|
||||
|
||||
D(4, inst_in_fetch_);
|
||||
|
||||
// advance pipeline
|
||||
inst_in_fetch_.next(&inst_in_issue_);
|
||||
}
|
||||
|
||||
void Core::decode() {
|
||||
if (!inst_in_decode_.enter(&inst_in_issue_))
|
||||
return;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_decode_.next(&inst_in_issue_);
|
||||
}
|
||||
|
||||
void Core::issue() {
|
||||
if (!inst_in_issue_.enter(&inst_in_execute_))
|
||||
return;
|
||||
|
||||
bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0
|
||||
|| (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0
|
||||
|| (inst_in_issue_.used_vregs & in_use_vregs_) != 0;
|
||||
|
||||
if (in_use_regs) {
|
||||
D(3, "*** Issue: registers not ready!");
|
||||
inst_in_issue_.stalled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (inst_in_issue_.rdest_type) {
|
||||
case 1:
|
||||
if (inst_in_issue_.rdest)
|
||||
in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_[inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// advance pipeline
|
||||
inst_in_issue_.next(&inst_in_execute_);
|
||||
}
|
||||
|
||||
void Core::execute() {
|
||||
if (!inst_in_execute_.enter(&inst_in_writeback_))
|
||||
return;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_execute_.next(&inst_in_writeback_);
|
||||
}
|
||||
|
||||
void Core::writeback() {
|
||||
if (!inst_in_writeback_.enter(NULL))
|
||||
return;
|
||||
|
||||
switch (inst_in_writeback_.rdest_type) {
|
||||
case 1:
|
||||
in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_[inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst_in_writeback_.stall_warp) {
|
||||
stalled_warps_[inst_in_writeback_.wid] = false;
|
||||
D(3, "*** warp#" << inst_in_writeback_.wid << " fetch released");
|
||||
}
|
||||
|
||||
// advance pipeline
|
||||
inst_in_writeback_.next(NULL);
|
||||
}
|
||||
|
||||
Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
if (addr == CSR_FFLAGS) {
|
||||
return fcsrs_.at(wid) & 0x1F;
|
||||
} else if (addr == CSR_FRM) {
|
||||
return (fcsrs_.at(wid) >> 5);
|
||||
} else if (addr == CSR_FCSR) {
|
||||
return fcsrs_.at(wid);
|
||||
} else if (addr == CSR_WTID) {
|
||||
// Warp threadID
|
||||
return tid;
|
||||
} else if (addr == CSR_LTID) {
|
||||
// Core threadID
|
||||
return tid + (wid * arch_.num_threads());
|
||||
} else if (addr == CSR_GTID) {
|
||||
// Processor threadID
|
||||
return tid + (wid * arch_.num_threads()) +
|
||||
(arch_.num_threads() * arch_.num_warps() * id_);
|
||||
} else if (addr == CSR_LWID) {
|
||||
// Core warpID
|
||||
return wid;
|
||||
} else if (addr == CSR_GWID) {
|
||||
// Processor warpID
|
||||
return wid + (arch_.num_warps() * id_);
|
||||
} else if (addr == CSR_GCID) {
|
||||
// Processor coreID
|
||||
return id_;
|
||||
} else if (addr == CSR_TMASK) {
|
||||
// Processor coreID
|
||||
return warps_.at(wid)->getTmask();
|
||||
} else if (addr == CSR_NT) {
|
||||
// Number of threads per warp
|
||||
return arch_.num_threads();
|
||||
} else if (addr == CSR_NW) {
|
||||
// Number of warps per core
|
||||
return arch_.num_warps();
|
||||
} else if (addr == CSR_NC) {
|
||||
// Number of cores
|
||||
return arch_.num_cores();
|
||||
} else if (addr == CSR_MINSTRET) {
|
||||
// NumInsts
|
||||
return insts_;
|
||||
} else if (addr == CSR_MINSTRET_H) {
|
||||
// NumInsts
|
||||
return (Word)(insts_ >> 32);
|
||||
} else if (addr == CSR_MCYCLE) {
|
||||
// NumCycles
|
||||
return (Word)steps_;
|
||||
} else if (addr == CSR_MCYCLE_H) {
|
||||
// NumCycles
|
||||
return (Word)(steps_ >> 32);
|
||||
} else {
|
||||
return csrs_.at(addr);
|
||||
}
|
||||
}
|
||||
|
||||
void Core::set_csr(Addr addr, Word value, int /*tid*/, int wid) {
|
||||
if (addr == CSR_FFLAGS) {
|
||||
fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F);
|
||||
} else if (addr == CSR_FRM) {
|
||||
fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0xE0) | (value << 5);
|
||||
} else if (addr == CSR_FCSR) {
|
||||
fcsrs_.at(wid) = value & 0xff;
|
||||
} else {
|
||||
csrs_.at(addr) = value;
|
||||
}
|
||||
}
|
||||
|
||||
void Core::barrier(int bar_id, int count, int warp_id) {
|
||||
auto& barrier = barriers_.at(bar_id);
|
||||
barrier.set(warp_id);
|
||||
if (barrier.count() < (size_t)count)
|
||||
return;
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
if (barrier.test(i)) {
|
||||
warps_.at(i)->activate();
|
||||
}
|
||||
}
|
||||
barrier.reset();
|
||||
}
|
||||
|
||||
Word Core::icache_fetch(Addr addr) {
|
||||
Word data;
|
||||
mem_.read(&data, addr, sizeof(Word), 0);
|
||||
return data;
|
||||
}
|
||||
|
||||
Word Core::dcache_read(Addr addr, Size size) {
|
||||
++loads_;
|
||||
Word data = 0;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SMEM_BASE_ADDR)) {
|
||||
shared_mem_.read(&data, addr & (SMEM_SIZE-1), size);
|
||||
return data;
|
||||
}
|
||||
#endif
|
||||
mem_.read(&data, addr, size, 0);
|
||||
return data;
|
||||
}
|
||||
|
||||
void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
++stores_;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SMEM_BASE_ADDR)) {
|
||||
shared_mem_.write(&data, addr & (SMEM_SIZE-1), size);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (addr >= IO_COUT_ADDR
|
||||
&& addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
this->writeToStdOut(addr, data);
|
||||
return;
|
||||
}
|
||||
mem_.write(&data, addr, size, 0);
|
||||
}
|
||||
|
||||
bool Core::running() const {
|
||||
return inst_in_fetch_.valid
|
||||
|| inst_in_decode_.valid
|
||||
|| inst_in_issue_.valid
|
||||
|| inst_in_execute_.valid
|
||||
|| inst_in_writeback_.valid;
|
||||
}
|
||||
|
||||
void Core::printStats() const {
|
||||
std::cout << "Steps : " << steps_ << std::endl
|
||||
<< "Insts : " << insts_ << std::endl
|
||||
<< "Loads : " << loads_ << std::endl
|
||||
<< "Stores: " << stores_ << std::endl;
|
||||
}
|
||||
|
||||
void Core::writeToStdOut(Addr addr, Word data) {
|
||||
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
|
||||
auto& ss_buf = print_bufs_[tid];
|
||||
char c = (char)data;
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
|
||||
void Core::trigger_ebreak() {
|
||||
ebreak_ = true;
|
||||
}
|
||||
|
||||
bool Core::check_ebreak() const {
|
||||
return ebreak_;
|
||||
}
|
||||
122
sim/simX/core.h
Normal file
122
sim/simX/core.h
Normal file
@@ -0,0 +1,122 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "decode.h"
|
||||
#include "mem.h"
|
||||
#include "warp.h"
|
||||
#include "pipeline.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core {
|
||||
public:
|
||||
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id);
|
||||
|
||||
~Core();
|
||||
|
||||
void clear();
|
||||
|
||||
bool running() const;
|
||||
|
||||
void step();
|
||||
|
||||
void printStats() const;
|
||||
|
||||
Word id() const {
|
||||
return id_;
|
||||
}
|
||||
|
||||
Warp& warp(int i) {
|
||||
return *warps_.at(i);
|
||||
}
|
||||
|
||||
Decoder& decoder() {
|
||||
return decoder_;
|
||||
}
|
||||
|
||||
const ArchDef& arch() const {
|
||||
return arch_;
|
||||
}
|
||||
|
||||
unsigned long num_insts() const {
|
||||
return insts_;
|
||||
}
|
||||
|
||||
unsigned long num_steps() const {
|
||||
return steps_;
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
return warps_[0]->getIRegValue(reg);
|
||||
}
|
||||
|
||||
Word get_csr(Addr addr, int tid, int wid);
|
||||
|
||||
void set_csr(Addr addr, Word value, int tid, int wid);
|
||||
|
||||
void barrier(int bar_id, int count, int warp_id);
|
||||
|
||||
Word icache_fetch(Addr);
|
||||
|
||||
Word dcache_read(Addr, Size);
|
||||
|
||||
void dcache_write(Addr, Word, Size);
|
||||
|
||||
void trigger_ebreak();
|
||||
bool check_ebreak() const;
|
||||
|
||||
private:
|
||||
|
||||
void schedule();
|
||||
void fetch();
|
||||
void decode();
|
||||
void issue();
|
||||
void execute();
|
||||
void writeback();
|
||||
|
||||
void writeToStdOut(Addr addr, Word data);
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
RegMask in_use_vregs_;
|
||||
WarpMask stalled_warps_;
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Word> csrs_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
Word id_;
|
||||
const ArchDef &arch_;
|
||||
Decoder &decoder_;
|
||||
MemoryUnit &mem_;
|
||||
#ifdef SM_ENABLE
|
||||
RAM shared_mem_;
|
||||
#endif
|
||||
|
||||
bool ebreak_;
|
||||
|
||||
Pipeline inst_in_schedule_;
|
||||
Pipeline inst_in_fetch_;
|
||||
Pipeline inst_in_decode_;
|
||||
Pipeline inst_in_issue_;
|
||||
Pipeline inst_in_execute_;
|
||||
Pipeline inst_in_writeback_;
|
||||
|
||||
uint64_t steps_;
|
||||
uint64_t insts_;
|
||||
uint64_t loads_;
|
||||
uint64_t stores_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
43
sim/simX/debug.h
Normal file
43
sim/simX/debug.h
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef DEBUG_LEVEL
|
||||
#define DEBUG_LEVEL 3
|
||||
#endif
|
||||
|
||||
#define DEBUG_HEADER << "DEBUG "
|
||||
//#define DEBUG_HEADER << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": "
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#define DX(x) x
|
||||
|
||||
#define D(lvl, x) do { \
|
||||
if ((lvl) <= DEBUG_LEVEL) { \
|
||||
std::cout DEBUG_HEADER << x << std::endl; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define DPH(lvl, x) do { \
|
||||
if ((lvl) <= DEBUG_LEVEL) { \
|
||||
std::cout DEBUG_HEADER << x; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define DPN(lvl, x) do { \
|
||||
if ((lvl) <= DEBUG_LEVEL) { \
|
||||
std::cout << x; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define DX(x)
|
||||
#define D(lvl, x) do {} while(0)
|
||||
#define DPH(lvl, x) do {} while(0)
|
||||
#define DPN(lvl, x) do {} while(0)
|
||||
#define D_RAW(x) do {} while(0)
|
||||
|
||||
#endif
|
||||
464
sim/simX/decode.cpp
Normal file
464
sim/simX/decode.cpp
Normal file
@@ -0,0 +1,464 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "decode.h"
|
||||
#include "archdef.h"
|
||||
#include "instr.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct InstTableEntry_t {
|
||||
bool controlFlow;
|
||||
InstType iType;
|
||||
};
|
||||
|
||||
static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::NOP, {false, InstType::N_TYPE}},
|
||||
{Opcode::R_INST, {false, InstType::R_TYPE}},
|
||||
{Opcode::L_INST, {false, InstType::I_TYPE}},
|
||||
{Opcode::I_INST, {false, InstType::I_TYPE}},
|
||||
{Opcode::S_INST, {false, InstType::S_TYPE}},
|
||||
{Opcode::B_INST, {true , InstType::B_TYPE}},
|
||||
{Opcode::LUI_INST, {false, InstType::U_TYPE}},
|
||||
{Opcode::AUIPC_INST, {false, InstType::U_TYPE}},
|
||||
{Opcode::JAL_INST, {true , InstType::J_TYPE}},
|
||||
{Opcode::JALR_INST, {true , InstType::I_TYPE}},
|
||||
{Opcode::SYS_INST, {true , InstType::I_TYPE}},
|
||||
{Opcode::FENCE, {true , InstType::I_TYPE}},
|
||||
{Opcode::FL, {false, InstType::I_TYPE}},
|
||||
{Opcode::FS, {false, InstType::S_TYPE}},
|
||||
{Opcode::FCI, {false, InstType::R_TYPE}},
|
||||
{Opcode::FMADD, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMSUB, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMNMADD, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMNMSUB, {false, InstType::R4_TYPE}},
|
||||
{Opcode::VSET, {false, InstType::V_TYPE}},
|
||||
{Opcode::GPGPU, {false, InstType::R_TYPE}},
|
||||
};
|
||||
|
||||
static const char* op_string(const Instr &instr) {
|
||||
Word func3 = instr.getFunc3();
|
||||
Word func7 = instr.getFunc7();
|
||||
Word rs2 = instr.getRSrc(1);
|
||||
Word imm = instr.getImm();
|
||||
switch (instr.getOpcode()) {
|
||||
case Opcode::NOP: return "NOP";
|
||||
case Opcode::LUI_INST: return "LUI";
|
||||
case Opcode::AUIPC_INST: return "AUIPC";
|
||||
case Opcode::R_INST:
|
||||
if (func7 & 0x1) {
|
||||
switch (func3) {
|
||||
case 0: return "MUL";
|
||||
case 1: return "MULH";
|
||||
case 2: return "MULHSU";
|
||||
case 3: return "MULHU";
|
||||
case 4: return "DIV";
|
||||
case 5: return "DIVU";
|
||||
case 6: return "REM";
|
||||
case 7: return "REMU";
|
||||
}
|
||||
} else {
|
||||
switch (func3) {
|
||||
case 0: return func7 ? "SUB" : "ADD";
|
||||
case 1: return "SLL";
|
||||
case 2: return "SLT";
|
||||
case 3: return "SLTU";
|
||||
case 4: return "XOR";
|
||||
case 5: return func7 ? "SRA" : "SRL";
|
||||
case 6: return "OR";
|
||||
case 7: return "AND";
|
||||
}
|
||||
}
|
||||
case Opcode::I_INST:
|
||||
switch (func3) {
|
||||
case 0: return "ADDI";
|
||||
case 1: return "SLLI";
|
||||
case 2: return "SLTI";
|
||||
case 3: return "SLTIU";
|
||||
case 4: return "XORI";
|
||||
case 5: return func7 ? "SRAI" : "SRLI";
|
||||
case 6: return "ORI";
|
||||
case 7: return "ANDI";
|
||||
}
|
||||
case Opcode::B_INST:
|
||||
switch (func3) {
|
||||
case 0: return "BEQ";
|
||||
case 1: return "BNE";
|
||||
case 4: return "BLT";
|
||||
case 5: return "BGE";
|
||||
case 6: return "BLTU";
|
||||
case 7: return "BGEU";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::JAL_INST: return "JAL";
|
||||
case Opcode::JALR_INST: return "JALR";
|
||||
case Opcode::L_INST:
|
||||
switch (func3) {
|
||||
case 0: return "LBI";
|
||||
case 1: return "LHI";
|
||||
case 2: return "LW";
|
||||
case 4: return "LBU";
|
||||
case 5: return "LHU";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::S_INST:
|
||||
switch (func3) {
|
||||
case 0: return "SB";
|
||||
case 1: return "SH";
|
||||
case 2: return "SW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::SYS_INST:
|
||||
switch (func3) {
|
||||
case 0: return imm ? "EBREAK" : "ECALL";
|
||||
case 1: return "CSRRW";
|
||||
case 2: return "CSRRS";
|
||||
case 3: return "CSRRC";
|
||||
case 5: return "CSRRWI";
|
||||
case 6: return "CSRRSI";
|
||||
case 7: return "CSRRCI";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FENCE: return "FENCE";
|
||||
case Opcode::FL: return (func3 == 0x2) ? "FL" : "VL";
|
||||
case Opcode::FS: return (func3 == 0x2) ? "FS" : "VS";
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x00: return "FADD";
|
||||
case 0x04: return "FSUB";
|
||||
case 0x08: return "FMUL";
|
||||
case 0x0c: return "FDIV";
|
||||
case 0x2c: return "FSQRT";
|
||||
case 0x10:
|
||||
switch (func3) {
|
||||
case 0: return "FSGNJ";
|
||||
case 1: return "FSGNJN";
|
||||
case 2: return "FSGNJX";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x14:
|
||||
switch (func3) {
|
||||
case 0: return "FMIM";
|
||||
case 1: return "FMAX";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x50:
|
||||
switch (func3) {
|
||||
case 0: return "FLE";
|
||||
case 1: return "FLT";
|
||||
case 2: return "FEQ";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x60: return rs2 ? "FCVT.WU.S" : "FCVT.W.S";
|
||||
case 0x68: return rs2 ? "FCVT.S.WU" : "FCVT.S.W";
|
||||
case 0x70: return func3 ? "FLASS" : "FMV.X.W";
|
||||
case 0x78: return "FMV.W.X";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FMADD: return "FMADD";
|
||||
case Opcode::FMSUB: return "FMSUB";
|
||||
case Opcode::FMNMADD: return "FMNMADD";
|
||||
case Opcode::FMNMSUB: return "FMNMSUB";
|
||||
case Opcode::VSET: return "VSET";
|
||||
case Opcode::GPGPU:
|
||||
switch (func3) {
|
||||
case 0: return "TMC";
|
||||
case 1: return "WSPAWN";
|
||||
case 2: return "SPLIT";
|
||||
case 3: return "JOIN";
|
||||
case 4: return "BAR";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
os << op_string(instr) << ": ";
|
||||
auto opcode = instr.getOpcode();
|
||||
|
||||
auto rd_to_string = [&]() {
|
||||
int rdt = instr.getRDType();
|
||||
int rd = instr.getRDest();
|
||||
switch (rdt) {
|
||||
case 1: os << "r" << std::dec << rd << " <- "; break;
|
||||
case 2: os << "fr" << std::dec << rd << " <- "; break;
|
||||
case 3: os << "vr" << std::dec << rd << " <- "; break;
|
||||
default: break;
|
||||
}
|
||||
};
|
||||
|
||||
auto rs_to_string = [&](int i) {
|
||||
int rst = instr.getRSType(i);
|
||||
int rs = instr.getRSrc(i);
|
||||
switch (rst) {
|
||||
case 1: os << "r" << std::dec << rs; break;
|
||||
case 2: os << "fr" << std::dec << rs; break;
|
||||
case 3: os << "vr" << std::dec << rs; break;
|
||||
default: break;
|
||||
}
|
||||
};
|
||||
|
||||
if (opcode == S_INST
|
||||
|| opcode == FS
|
||||
|| opcode == VS) {
|
||||
os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "] <- ";
|
||||
rs_to_string(1);
|
||||
} else
|
||||
if (opcode == L_INST
|
||||
|| opcode == FL
|
||||
|| opcode == VL) {
|
||||
rd_to_string();
|
||||
os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "]";
|
||||
} else {
|
||||
rd_to_string();
|
||||
int i = 0;
|
||||
for (; i < instr.getNRSrc(); ++i) {
|
||||
if (i) os << ", ";
|
||||
rs_to_string(i);
|
||||
}
|
||||
if (instr.hasImm()) {
|
||||
if (i) os << ", ";
|
||||
os << "imm=0x" << std::hex << instr.getImm();
|
||||
}
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef &arch) {
|
||||
inst_s_ = arch.wsize() * 8;
|
||||
opcode_s_ = 7;
|
||||
reg_s_ = 5;
|
||||
func2_s_ = 2;
|
||||
func3_s_ = 3;
|
||||
mop_s_ = 3;
|
||||
vmask_s_ = 1;
|
||||
|
||||
shift_opcode_ = 0;
|
||||
shift_rd_ = opcode_s_;
|
||||
shift_func3_ = shift_rd_ + reg_s_;
|
||||
shift_rs1_ = shift_func3_ + func3_s_;
|
||||
shift_rs2_ = shift_rs1_ + reg_s_;
|
||||
shift_func7_ = shift_rs2_ + reg_s_;
|
||||
shift_rs3_ = shift_func7_ + func2_s_;
|
||||
shift_vmop_ = shift_func7_ + vmask_s_;
|
||||
shift_vnf_ = shift_vmop_ + mop_s_;
|
||||
shift_func6_ = shift_func7_ + 1;
|
||||
shift_vset_ = shift_func7_ + 6;
|
||||
|
||||
reg_mask_ = 0x1f;
|
||||
func2_mask_ = 0x2;
|
||||
func3_mask_ = 0x7;
|
||||
func6_mask_ = 0x3f;
|
||||
func7_mask_ = 0x7f;
|
||||
opcode_mask_ = 0x7f;
|
||||
i_imm_mask_ = 0xfff;
|
||||
s_imm_mask_ = 0xfff;
|
||||
b_imm_mask_ = 0x1fff;
|
||||
u_imm_mask_ = 0xfffff;
|
||||
j_imm_mask_ = 0xfffff;
|
||||
v_imm_mask_ = 0x7ff;
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(Word code, Word PC) {
|
||||
auto instr = std::make_shared<Instr>();
|
||||
Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_);
|
||||
instr->setOpcode(op);
|
||||
|
||||
Word func3 = (code >> shift_func3_) & func3_mask_;
|
||||
Word func6 = (code >> shift_func6_) & func6_mask_;
|
||||
Word func7 = (code >> shift_func7_) & func7_mask_;
|
||||
|
||||
int rd = (code >> shift_rd_) & reg_mask_;
|
||||
int rs1 = (code >> shift_rs1_) & reg_mask_;
|
||||
int rs2 = (code >> shift_rs2_) & reg_mask_;
|
||||
int rs3 = (code >> shift_rs3_) & reg_mask_;
|
||||
|
||||
auto op_it = sc_instTable.find(op);
|
||||
if (op_it == sc_instTable.end()) {
|
||||
std::cout << std::hex << "invalid opcode: 0x" << op << ", instruction=0x" << code << ", PC=" << PC << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
auto iType = op_it->second.iType;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
if (func3 != 0x2) {
|
||||
iType = InstType::V_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
switch (iType) {
|
||||
case InstType::N_TYPE:
|
||||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
if (op == Opcode::FCI) {
|
||||
switch (func7) {
|
||||
case 0x68: // FCVT.S.W, FCVT.S.WU
|
||||
case 0x78: // FMV.W.X
|
||||
instr->setSrcReg(rs1);
|
||||
break;
|
||||
default:
|
||||
instr->setSrcFReg(rs1);
|
||||
}
|
||||
instr->setSrcFReg(rs2);
|
||||
switch (func7) {
|
||||
case 0x50: // FLE, FLT, FEQ
|
||||
case 0x60: // FCVT.WU.S, FCVT.W.S
|
||||
case 0x70: // FLASS, FMV.X.W
|
||||
instr->setDestReg(rd);
|
||||
break;
|
||||
default:
|
||||
instr->setDestFReg(rd);
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd);
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs2);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE: {
|
||||
instr->setSrcReg(rs1);
|
||||
if (op == Opcode::FL) {
|
||||
instr->setDestFReg(rd);
|
||||
} else {
|
||||
instr->setDestReg(rd);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
|
||||
instr->setImm(signExt(rs2, 5, reg_mask_));
|
||||
} else {
|
||||
instr->setImm(signExt(code >> shift_rs2_, 12, i_imm_mask_));
|
||||
}
|
||||
} break;
|
||||
|
||||
case InstType::S_TYPE: {
|
||||
instr->setSrcReg(rs1);
|
||||
if (op == Opcode::FS) {
|
||||
instr->setSrcFReg(rs2);
|
||||
} else {
|
||||
instr->setSrcReg(rs2);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
Word imeed = (func7 << reg_s_) | rd;
|
||||
instr->setImm(signExt(imeed, 12, s_imm_mask_));
|
||||
} break;
|
||||
|
||||
case InstType::B_TYPE: {
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs2);
|
||||
instr->setFunc3(func3);
|
||||
Word bit_11 = rd & 0x1;
|
||||
Word bits_4_1 = rd >> 1;
|
||||
Word bit_10_5 = func7 & 0x3f;
|
||||
Word bit_12 = func7 >> 6;
|
||||
Word imeed = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
instr->setImm(signExt(imeed, 13, b_imm_mask_));
|
||||
} break;
|
||||
|
||||
case InstType::U_TYPE:
|
||||
instr->setDestReg(rd);
|
||||
instr->setImm(signExt(code >> shift_func3_, 20, u_imm_mask_));
|
||||
break;
|
||||
|
||||
case InstType::J_TYPE: {
|
||||
instr->setDestReg(rd);
|
||||
Word unordered = code >> shift_func3_;
|
||||
Word bits_19_12 = unordered & 0xff;
|
||||
Word bit_11 = (unordered >> 8) & 0x1;
|
||||
Word bits_10_1 = (unordered >> 9) & 0x3ff;
|
||||
Word bit_20 = (unordered >> 19) & 0x1;
|
||||
Word imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
if (bit_20) {
|
||||
imeed |= ~j_imm_mask_;
|
||||
}
|
||||
instr->setImm(imeed);
|
||||
} break;
|
||||
|
||||
case InstType::V_TYPE:
|
||||
switch (op) {
|
||||
case Opcode::VSET: {
|
||||
instr->setDestVReg(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setFunc3(func3);
|
||||
if (func3 == 7) {
|
||||
instr->setImm(!(code >> shift_vset_));
|
||||
if (instr->getImm()) {
|
||||
Word immed = (code >> shift_rs2_) & v_imm_mask_;
|
||||
instr->setImm(immed);
|
||||
instr->setVlmul(immed & 0x3);
|
||||
instr->setVediv((immed >> 4) & 0x3);
|
||||
instr->setVsew((immed >> 2) & 0x3);
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
}
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->setVmask((code >> shift_func7_) & 0x1);
|
||||
instr->setFunc6(func6);
|
||||
}
|
||||
} break;
|
||||
|
||||
case Opcode::VL:
|
||||
instr->setDestVReg(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->setVmask(code >> shift_func7_);
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
break;
|
||||
|
||||
case Opcode::VS:
|
||||
instr->setVs3(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->setVmask(code >> shift_func7_);
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
case R4_TYPE:
|
||||
instr->setDestFReg(rd);
|
||||
instr->setSrcFReg(rs1);
|
||||
instr->setSrcFReg(rs2);
|
||||
instr->setSrcFReg(rs3);
|
||||
instr->setFunc3(func3);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
D(2, "Instr 0x" << std::hex << code << ": " << *instr << std::flush);
|
||||
|
||||
return instr;
|
||||
}
|
||||
61
sim/simX/decode.h
Normal file
61
sim/simX/decode.h
Normal file
@@ -0,0 +1,61 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Instr;
|
||||
class Pipeline;
|
||||
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(const ArchDef &);
|
||||
|
||||
std::shared_ptr<Instr> decode(Word code, Word PC);
|
||||
|
||||
private:
|
||||
|
||||
Word inst_s_;
|
||||
Word opcode_s_;
|
||||
Word reg_s_;
|
||||
Word func2_s_;
|
||||
Word func3_s_;
|
||||
Word shift_opcode_;
|
||||
Word shift_rd_;
|
||||
Word shift_rs1_;
|
||||
Word shift_rs2_;
|
||||
Word shift_rs3_;
|
||||
Word shift_func2_;
|
||||
Word shift_func3_;
|
||||
Word shift_func7_;
|
||||
Word shift_j_u_immed_;
|
||||
Word shift_s_b_immed_;
|
||||
Word shift_i_immed_;
|
||||
|
||||
Word reg_mask_;
|
||||
Word func2_mask_;
|
||||
Word func3_mask_;
|
||||
Word func6_mask_;
|
||||
Word func7_mask_;
|
||||
Word opcode_mask_;
|
||||
Word i_imm_mask_;
|
||||
Word s_imm_mask_;
|
||||
Word b_imm_mask_;
|
||||
Word u_imm_mask_;
|
||||
Word j_imm_mask_;
|
||||
Word v_imm_mask_;
|
||||
|
||||
//Vector
|
||||
Word shift_vset_;
|
||||
Word shift_vset_immed_;
|
||||
Word shift_vmask_;
|
||||
Word shift_vmop_;
|
||||
Word shift_vnf_;
|
||||
Word shift_func6_;
|
||||
Word vmask_s_;
|
||||
Word mop_s_;
|
||||
};
|
||||
|
||||
}
|
||||
1811
sim/simX/execute.cpp
Normal file
1811
sim/simX/execute.cpp
Normal file
File diff suppressed because it is too large
Load Diff
140
sim/simX/instr.h
Normal file
140
sim/simX/instr.h
Normal file
@@ -0,0 +1,140 @@
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Warp;
|
||||
|
||||
enum Opcode {
|
||||
NOP = 0,
|
||||
R_INST = 0x33,
|
||||
L_INST = 0x3,
|
||||
I_INST = 0x13,
|
||||
S_INST = 0x23,
|
||||
B_INST = 0x63,
|
||||
LUI_INST = 0x37,
|
||||
AUIPC_INST= 0x17,
|
||||
JAL_INST = 0x6f,
|
||||
JALR_INST = 0x67,
|
||||
SYS_INST = 0x73,
|
||||
FENCE = 0x0f,
|
||||
// F Extension
|
||||
FL = 0x7,
|
||||
FS = 0x27,
|
||||
FCI = 0x53,
|
||||
FMADD = 0x43,
|
||||
FMSUB = 0x47,
|
||||
FMNMSUB = 0x4b,
|
||||
FMNMADD = 0x4f,
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
VL = 0x7,
|
||||
VS = 0x27,
|
||||
// GPGPU Extension
|
||||
GPGPU = 0x6b,
|
||||
};
|
||||
|
||||
enum InstType {
|
||||
N_TYPE,
|
||||
R_TYPE,
|
||||
I_TYPE,
|
||||
S_TYPE,
|
||||
B_TYPE,
|
||||
U_TYPE,
|
||||
J_TYPE,
|
||||
V_TYPE,
|
||||
R4_TYPE
|
||||
};
|
||||
|
||||
class Instr {
|
||||
public:
|
||||
Instr()
|
||||
: opcode_(Opcode::NOP)
|
||||
, num_rsrcs_(0)
|
||||
, has_imm_(false)
|
||||
, rdest_(0)
|
||||
, func3_(0)
|
||||
, func7_(0) {
|
||||
for (int i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
rsrc_type_[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Setters used to "craft" the instruction. */
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setDestReg(int destReg) { rdest_type_ = 1; rdest_ = destReg; }
|
||||
void setSrcReg(int srcReg) { rsrc_type_[num_rsrcs_] = 1; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestFReg(int destReg) { rdest_type_ = 2; rdest_ = destReg; }
|
||||
void setSrcFReg(int srcReg) { rsrc_type_[num_rsrcs_] = 2; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestVReg(int destReg) { rdest_type_ = 3; rdest_ = destReg; }
|
||||
void setSrcVReg(int srcReg) { rsrc_type_[num_rsrcs_] = 3; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setFunc3(Word func3) { func3_ = func3; }
|
||||
void setFunc7(Word func7) { func7_ = func7; }
|
||||
void setImm(Word imm) { has_imm_ = true; imm_ = imm; }
|
||||
void setVlsWidth(Word width) { vlsWidth_ = width; }
|
||||
void setVmop(Word mop) { vMop_ = mop; }
|
||||
void setVnf(Word nf) { vNf_ = nf; }
|
||||
void setVmask(Word mask) { vmask_ = mask; }
|
||||
void setVs3(Word vs) { vs3_ = vs; }
|
||||
void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
|
||||
void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
|
||||
void setVediv(Word ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(Word func6) { func6_ = func6; }
|
||||
|
||||
/* Getters used by encoders. */
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
Word getFunc3() const { return func3_; }
|
||||
Word getFunc6() const { return func6_; }
|
||||
Word getFunc7() const { return func7_; }
|
||||
int getNRSrc() const { return num_rsrcs_; }
|
||||
int getRSrc(int i) const { return rsrc_[i]; }
|
||||
int getRSType(int i) const { return rsrc_type_[i]; }
|
||||
int getRDest() const { return rdest_; }
|
||||
int getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
Word getImm() const { return imm_; }
|
||||
Word getVlsWidth() const { return vlsWidth_; }
|
||||
Word getVmop() const { return vMop_; }
|
||||
Word getvNf() const { return vNf_; }
|
||||
Word getVmask() const { return vmask_; }
|
||||
Word getVs3() const { return vs3_; }
|
||||
Word getVlmul() const { return vlmul_; }
|
||||
Word getVsew() const { return vsew_; }
|
||||
Word getVediv() const { return vediv_; }
|
||||
|
||||
private:
|
||||
|
||||
enum {
|
||||
MAX_REG_SOURCES = 3
|
||||
};
|
||||
|
||||
Opcode opcode_;
|
||||
int num_rsrcs_;
|
||||
bool has_imm_;
|
||||
int rdest_type_;
|
||||
int isrc_mask_;
|
||||
int fsrc_mask_;
|
||||
int vsrc_mask_;
|
||||
Word imm_;
|
||||
int rsrc_type_[MAX_REG_SOURCES];
|
||||
int rsrc_[MAX_REG_SOURCES];
|
||||
int rdest_;
|
||||
Word func3_;
|
||||
Word func7_;
|
||||
|
||||
//Vector
|
||||
Word vmask_;
|
||||
Word vlsWidth_;
|
||||
Word vMop_;
|
||||
Word vNf_;
|
||||
Word vs3_;
|
||||
Word vlmul_;
|
||||
Word vsew_;
|
||||
Word vediv_;
|
||||
Word func6_;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Instr&);
|
||||
};
|
||||
|
||||
}
|
||||
109
sim/simX/main.cpp
Normal file
109
sim/simX/main.cpp
Normal file
@@ -0,0 +1,109 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "core.h"
|
||||
#include "args.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string archString("rv32imf");
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
std::string imgFileName;
|
||||
bool showHelp(false);
|
||||
bool showStats(false);
|
||||
bool riscv_test(false);
|
||||
|
||||
/* Read the command line arguments. */
|
||||
CommandLineArgFlag fh("-h", "--help", "", showHelp);
|
||||
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archString);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
|
||||
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
|
||||
CommandLineArgFlag fr("-r", "--riscv", "", riscv_test);
|
||||
CommandLineArgFlag fs("-s", "--stats", "", showStats);
|
||||
|
||||
CommandLineArg::readArgs(argc - 1, argv + 1);
|
||||
|
||||
if (showHelp || imgFileName.empty()) {
|
||||
std::cout << "Vortex emulator command line arguments:\n"
|
||||
" -i, --image <filename> Program RAM image\n"
|
||||
" -c, --cores <num> Number of cores\n"
|
||||
" -w, --warps <num> Number of warps\n"
|
||||
" -t, --threads <num> Number of threads\n"
|
||||
" -a, --arch <arch string> Architecture string\n"
|
||||
" -r, --riscv riscv test\n"
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
ArchDef arch(archString, num_cores, num_warps, num_threads);
|
||||
|
||||
Decoder decoder(arch);
|
||||
MemoryUnit mu(0, arch.wsize(), true);
|
||||
|
||||
RAM ram((1<<12), (1<<20));
|
||||
|
||||
std::string program_ext(fileExtension(imgFileName.c_str()));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(imgFileName.c_str(), STARTUP_ADDR);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(imgFileName.c_str());
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
mu.attach(ram, 0, 0xFFFFFFFF);
|
||||
|
||||
struct stat hello;
|
||||
fstat(0, &hello);
|
||||
|
||||
std::vector<std::shared_ptr<Core>> cores(num_cores);
|
||||
for (int i = 0; i < num_cores; ++i) {
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu, i);
|
||||
}
|
||||
|
||||
bool running;
|
||||
int exitcode = 0;
|
||||
do {
|
||||
running = false;
|
||||
for (auto& core : cores) {
|
||||
core->step();
|
||||
if (core->running()) {
|
||||
running = true;
|
||||
}
|
||||
if (core->check_ebreak()) {
|
||||
exitcode = core->getIRegValue(3);
|
||||
running = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (running);
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
63
sim/simX/pipeline.cpp
Normal file
63
sim/simX/pipeline.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
#include <iostream>
|
||||
#include "pipeline.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Pipeline& pipeline) {
|
||||
os << pipeline.name_ << ": valid=" << pipeline.valid << std::endl;
|
||||
os << pipeline.name_ << ": stalled=" << pipeline.stalled << std::endl;
|
||||
os << pipeline.name_ << ": stall_warp=" << pipeline.stall_warp << std::endl;
|
||||
os << pipeline.name_ << ": wid=" << pipeline.wid << std::endl;
|
||||
os << pipeline.name_ << ": PC=" << std::hex << pipeline.PC << std::endl;
|
||||
os << pipeline.name_ << ": used_iregs=" << pipeline.used_iregs << std::endl;
|
||||
os << pipeline.name_ << ": used_fregs=" << pipeline.used_fregs << std::endl;
|
||||
os << pipeline.name_ << ": used_vregs=" << pipeline.used_vregs << std::endl;
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
Pipeline::Pipeline(const char* name)
|
||||
: name_(name) {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void Pipeline::clear() {
|
||||
valid = false;
|
||||
stalled = false;
|
||||
stall_warp = false;
|
||||
wid = 0;
|
||||
PC = 0;
|
||||
used_iregs.reset();
|
||||
used_fregs.reset();
|
||||
used_vregs.reset();
|
||||
}
|
||||
|
||||
bool Pipeline::enter(Pipeline *drain) {
|
||||
if (drain) {
|
||||
if (drain->stalled) {
|
||||
this->stalled = true;
|
||||
return false;
|
||||
}
|
||||
drain->valid = false;
|
||||
}
|
||||
this->stalled = false;
|
||||
if (!this->valid)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Pipeline::next(Pipeline *drain) {
|
||||
if (drain) {
|
||||
drain->valid = this->valid;
|
||||
drain->stalled = this->stalled;
|
||||
drain->stall_warp = this->stall_warp;
|
||||
drain->wid = this->wid;
|
||||
drain->PC = this->PC;
|
||||
drain->rdest = this->rdest;
|
||||
drain->rdest_type = this->rdest_type;
|
||||
drain->used_iregs = this->used_iregs;
|
||||
drain->used_fregs = this->used_fregs;
|
||||
drain->used_vregs = this->used_vregs;
|
||||
}
|
||||
}
|
||||
48
sim/simX/pipeline.h
Normal file
48
sim/simX/pipeline.h
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Instr;
|
||||
|
||||
class Pipeline {
|
||||
public:
|
||||
Pipeline(const char* name);
|
||||
|
||||
void clear();
|
||||
|
||||
bool enter(Pipeline* drain);
|
||||
|
||||
void next(Pipeline* drain);
|
||||
|
||||
//--
|
||||
bool valid;
|
||||
|
||||
//--
|
||||
bool stalled;
|
||||
bool stall_warp;
|
||||
|
||||
//--
|
||||
int wid;
|
||||
Word PC;
|
||||
|
||||
//--
|
||||
int rdest_type;
|
||||
int rdest;
|
||||
RegMask used_iregs;
|
||||
RegMask used_fregs;
|
||||
RegMask used_vregs;
|
||||
|
||||
private:
|
||||
|
||||
const char* name_;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Pipeline&);
|
||||
};
|
||||
|
||||
}
|
||||
22
sim/simX/types.h
Normal file
22
sim/simX/types.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <bitset>
|
||||
#include <VX_config.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
typedef uint8_t Byte;
|
||||
typedef uint32_t Word;
|
||||
typedef int32_t WordI;
|
||||
|
||||
typedef uint32_t Addr;
|
||||
typedef uint32_t Size;
|
||||
|
||||
typedef std::bitset<32> RegMask;
|
||||
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
|
||||
typedef std::bitset<32> WarpMask;
|
||||
|
||||
}
|
||||
93
sim/simX/warp.cpp
Normal file
93
sim/simX/warp.cpp
Normal file
@@ -0,0 +1,93 @@
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
|
||||
#include "instr.h"
|
||||
#include "core.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Warp::Warp(Core *core, Word id)
|
||||
: id_(id)
|
||||
, core_(core) {
|
||||
iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void Warp::clear() {
|
||||
PC_ = STARTUP_ADDR;
|
||||
tmask_.reset();
|
||||
active_ = false;
|
||||
}
|
||||
|
||||
void Warp::step(Pipeline *pipeline) {
|
||||
assert(tmask_.any());
|
||||
|
||||
DPH(2, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_ << ", tmask=");
|
||||
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i)
|
||||
DPN(2, tmask_[n-i-1]);
|
||||
DPN(2, "\n");
|
||||
|
||||
/* Fetch and decode. */
|
||||
|
||||
Word fetched = core_->icache_fetch(PC_);
|
||||
auto instr = core_->decoder().decode(fetched, PC_);
|
||||
|
||||
// Update pipeline
|
||||
pipeline->valid = true;
|
||||
pipeline->PC = PC_;
|
||||
pipeline->rdest = instr->getRDest();
|
||||
pipeline->rdest_type = instr->getRDType();
|
||||
pipeline->used_iregs.reset();
|
||||
pipeline->used_fregs.reset();
|
||||
pipeline->used_vregs.reset();
|
||||
|
||||
switch (pipeline->rdest_type) {
|
||||
case 1:
|
||||
pipeline->used_iregs[pipeline->rdest] = 1;
|
||||
break;
|
||||
case 2:
|
||||
pipeline->used_fregs[pipeline->rdest] = 1;
|
||||
break;
|
||||
case 3:
|
||||
pipeline->used_vregs[pipeline->rdest] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < instr->getNRSrc(); ++i) {
|
||||
int type = instr->getRSType(i);
|
||||
int reg = instr->getRSrc(i);
|
||||
switch (type) {
|
||||
case 1:
|
||||
pipeline->used_iregs[reg] = 1;
|
||||
break;
|
||||
case 2:
|
||||
pipeline->used_fregs[reg] = 1;
|
||||
break;
|
||||
case 3:
|
||||
pipeline->used_vregs[reg] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Execute
|
||||
this->execute(*instr, pipeline);
|
||||
|
||||
D(4, "Register state:");
|
||||
for (int i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (int j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
}
|
||||
}
|
||||
112
sim/simX/warp.h
Normal file
112
sim/simX/warp.h
Normal file
@@ -0,0 +1,112 @@
|
||||
#ifndef __WARP_H
|
||||
#define __WARP_H
|
||||
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core;
|
||||
class Instr;
|
||||
class Pipeline;
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
: tmask(tmask)
|
||||
, PC(PC)
|
||||
, fallThrough(false)
|
||||
, unanimous(false)
|
||||
{}
|
||||
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, PC(0)
|
||||
, fallThrough(true)
|
||||
, unanimous(false)
|
||||
{}
|
||||
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
bool fallThrough;
|
||||
bool unanimous;
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
int vill;
|
||||
int vediv;
|
||||
int vsew;
|
||||
int vlmul;
|
||||
};
|
||||
|
||||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, Word id);
|
||||
|
||||
void clear();
|
||||
|
||||
bool active() const {
|
||||
return active_;
|
||||
}
|
||||
|
||||
void activate() {
|
||||
active_ = true;
|
||||
}
|
||||
|
||||
std::size_t getActiveThreads() const {
|
||||
if (active_)
|
||||
return tmask_.count();
|
||||
return 0;
|
||||
}
|
||||
|
||||
Word id() const {
|
||||
return id_;
|
||||
}
|
||||
|
||||
Word getPC() const {
|
||||
return PC_;
|
||||
}
|
||||
|
||||
void setPC(Word PC) {
|
||||
PC_ = PC;
|
||||
}
|
||||
|
||||
void setTmask(size_t index, bool value) {
|
||||
tmask_[index] = value;
|
||||
active_ = tmask_.any();
|
||||
}
|
||||
|
||||
Word getTmask() const {
|
||||
if (active_)
|
||||
return tmask_.to_ulong();
|
||||
return 0;
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
return iRegFile_[0][reg];
|
||||
}
|
||||
|
||||
void step(Pipeline *);
|
||||
|
||||
private:
|
||||
|
||||
void execute(const Instr &instr, Pipeline *);
|
||||
|
||||
Word id_;
|
||||
bool active_;
|
||||
Core *core_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> iRegFile_;
|
||||
std::vector<std::vector<Word>> fRegFile_;
|
||||
std::vector<std::vector<Byte>> vRegFile_;
|
||||
std::stack<DomStackEntry> domStack_;
|
||||
|
||||
struct vtype vtype_;
|
||||
int vl_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
1
sim/vlsim/.gitignore
vendored
Normal file
1
sim/vlsim/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/obj_dir/*
|
||||
116
sim/vlsim/Makefile
Normal file
116
sim/vlsim/Makefile
Normal file
@@ -0,0 +1,116 @@
|
||||
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../../../hw -I../../common
|
||||
CXXFLAGS += -I$(VERILATOR_ROOT)/include -I$(VERILATOR_ROOT)/include/vltstd
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS = --cc $(TOP) --top-module $(TOP)
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
|
||||
#$(info OBJS is $(OBJS))
|
||||
#$(info VPATH is $(VPATH))
|
||||
|
||||
PROJECT = libopae-c-vlsim
|
||||
|
||||
all: shared
|
||||
|
||||
shared: $(SRCS)
|
||||
verilator --build --exe $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT).so
|
||||
|
||||
obj_dir/V$(TOP)__ALL.a:
|
||||
verilator --build $(VL_FLAGS) -CFLAGS '$(CXXFLAGS)'
|
||||
|
||||
obj_dir/%.o: %.cpp
|
||||
cd obj_dir && $(CXX) $(CXXFLAGS) -c ../$< -o $(notdir $@)
|
||||
|
||||
static: obj_dir/V$(TOP)__ALL.a $(OBJS)
|
||||
cp obj_dir/V$(TOP)__ALL.a $(PROJECT).a
|
||||
$(AR) rs $(PROJECT).a $(OBJS)
|
||||
|
||||
clean-objdir:
|
||||
rm -rf obj_dir
|
||||
|
||||
clean: clean-objdir
|
||||
rm -rf $(PROJECT).a $(PROJECT).so
|
||||
84
sim/vlsim/fpga.cpp
Normal file
84
sim/vlsim/fpga.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
*handle = reinterpret_cast<fpga_handle>(sim);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaClose(fpga_handle handle) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
delete sim;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
int ret = sim->prepare_buffer(len, buf_addr, wsid, flags);
|
||||
if (ret != 0)
|
||||
return FPGA_NO_MEMORY;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->release_buffer(wsid);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
|
||||
if (NULL == handle || ioaddr == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->get_io_address(wsid, ioaddr);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
if (NULL == handle || mmio_num != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->write_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
if (NULL == handle || mmio_num != 0 || value == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->read_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
||||
48
sim/vlsim/fpga.h
Normal file
48
sim/vlsim/fpga.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#ifndef __FPGA_H__
|
||||
#define __FPGA_H__
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
FPGA_OK = 0, /**< Operation completed successfully */
|
||||
FPGA_INVALID_PARAM, /**< Invalid parameter supplied */
|
||||
FPGA_BUSY, /**< Resource is busy */
|
||||
FPGA_EXCEPTION, /**< An exception occurred */
|
||||
FPGA_NOT_FOUND, /**< A required resource was not found */
|
||||
FPGA_NO_MEMORY, /**< Not enough memory to complete operation */
|
||||
FPGA_NOT_SUPPORTED, /**< Requested operation is not supported */
|
||||
FPGA_NO_DRIVER, /**< Driver is not loaded */
|
||||
FPGA_NO_DAEMON, /**< FPGA Daemon (fpgad) is not running */
|
||||
FPGA_NO_ACCESS, /**< Insufficient privileges or permissions */
|
||||
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
|
||||
} fpga_result;
|
||||
|
||||
typedef void *fpga_handle;
|
||||
|
||||
typedef void *fpga_token;
|
||||
|
||||
fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags);
|
||||
|
||||
fpga_result fpgaClose(fpga_handle handle);
|
||||
|
||||
fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid);
|
||||
|
||||
fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
const char *fpgaErrStr(fpga_result e);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // __FPGA_H__
|
||||
442
sim/vlsim/opae_sim.cpp
Normal file
442
sim/vlsim/opae_sim.cpp
Normal file
@@ -0,0 +1,442 @@
|
||||
#include "opae_sim.h"
|
||||
|
||||
#include <verilated.h>
|
||||
#include "Vvortex_afu_shim.h"
|
||||
#include "Vvortex_afu_shim__Syms.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_RAND_MOD 8
|
||||
#define CCI_RQ_SIZE 16
|
||||
#define CCI_WQ_SIZE 16
|
||||
|
||||
#define ENABLE_MEM_STALLS
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#ifndef MEM_RQ_SIZE
|
||||
#define MEM_RQ_SIZE 16
|
||||
#endif
|
||||
|
||||
#ifndef MEM_STALLS_MODULO
|
||||
#define MEM_STALLS_MODULO 16
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
static void *__aligned_malloc(size_t alignment, size_t size) {
|
||||
// reserve margin for alignment and storing of unaligned address
|
||||
size_t margin = (alignment-1) + sizeof(void*);
|
||||
void *unaligned_addr = malloc(size + margin);
|
||||
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
|
||||
aligned_addr[-1] = unaligned_addr;
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
static void __aligned_free(void *ptr) {
|
||||
// retreive the stored unaligned address and use it to free the allocation
|
||||
void* unaligned_addr = ((void**)ptr)[-1];
|
||||
free(unaligned_addr);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace vortex {
|
||||
class VL_OBJ {
|
||||
public:
|
||||
#ifdef AXI_BUS
|
||||
VVortex_axi *device;
|
||||
#else
|
||||
Vvortex_afu_shim *device;
|
||||
#endif
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace;
|
||||
#endif
|
||||
|
||||
VL_OBJ() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->device = new Vvortex_afu_shim();
|
||||
#else
|
||||
this->device = new Vvortex_afu_shim();
|
||||
#endif
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
this->trace = new VerilatedVcdC();
|
||||
this->device->trace(this->trace, 99);
|
||||
this->trace->open("trace.vcd");
|
||||
#endif
|
||||
}
|
||||
|
||||
~VL_OBJ() {
|
||||
#ifdef VCD_OUTPUT
|
||||
this->trace->close();
|
||||
delete this->trace;
|
||||
#endif
|
||||
delete this->device;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
opae_sim::opae_sim()
|
||||
: stop_(false)
|
||||
, host_buffer_ids_(0) {
|
||||
vl_obj_ = new VL_OBJ();
|
||||
ram_ = new RAM((1<<12), (1<<20));
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// launch execution thread
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
while (!stop_) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
this->step();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
opae_sim::~opae_sim() {
|
||||
stop_ = true;
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (auto& buffer : host_buffers_) {
|
||||
__aligned_free(buffer.second.data);
|
||||
}
|
||||
delete vl_obj_;
|
||||
delete ram_;
|
||||
}
|
||||
|
||||
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
buffer.ioaddr = uintptr_t(alloc);
|
||||
auto buffer_id = host_buffer_ids_++;
|
||||
host_buffers_.emplace(buffer_id, buffer);
|
||||
*buf_addr = alloc;
|
||||
*wsid = buffer_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void opae_sim::release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
__aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = host_buffers_[wsid].ioaddr;
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->step();
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
assert(vl_obj_->device->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = vl_obj_->device->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
memcpy(vl_obj_->device->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
this->step();
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void opae_sim::reset() {
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
mem_reads_[b].clear();
|
||||
vl_obj_->device->avs_readdatavalid[b] = 0;
|
||||
vl_obj_->device->avs_waitrequest[b] = 0;
|
||||
}
|
||||
|
||||
vl_obj_->device->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
vl_obj_->device->clk = 0;
|
||||
this->eval();
|
||||
vl_obj_->device->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
vl_obj_->device->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void opae_sim::step() {
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
|
||||
vl_obj_->device->clk = 0;
|
||||
this->eval();
|
||||
vl_obj_->device->clk = 1;
|
||||
this->eval();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void opae_sim::eval() {
|
||||
vl_obj_->device->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
vl_obj_->trace->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void opae_sim::sRxPort_bus() {
|
||||
// check mmio request
|
||||
bool mmio_req_enabled = vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid
|
||||
|| vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
// schedule CCI read responses
|
||||
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
|
||||
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_rd_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
|
||||
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_wr_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_it != cci_writes_.end()) {
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_hdr_resp_type = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
|
||||
cci_writes_.erase(cci_wr_it);
|
||||
}
|
||||
|
||||
// send CCI read response (ensure mmio disabled)
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||
memcpy(vl_obj_->device->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::sTxPort_bus() {
|
||||
// process read requests
|
||||
if (vl_obj_->device->af2cp_sTxPort_c0_valid) {
|
||||
assert(!vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull);
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.addr = vl_obj_->device->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = vl_obj_->device->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vl_obj_->device->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vl_obj_->device->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (vl_obj_->device->af2cp_sTxPort_c1_valid) {
|
||||
assert(!vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull);
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.mdata = vl_obj_->device->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vl_obj_->device->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(host_ptr, vl_obj_->device->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
|
||||
cci_writes_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// check queues overflow
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
|
||||
}
|
||||
|
||||
void opae_sim::avs_bus() {
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
// update memory responses schedule
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
|
||||
// schedule memory responses in FIFO order
|
||||
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
|
||||
if (!mem_reads_[b].empty()
|
||||
&& (0 == mem_reads_[b].begin()->cycles_left)) {
|
||||
mem_rd_it = mem_reads_[b].begin();
|
||||
}
|
||||
|
||||
// send memory response
|
||||
vl_obj_->device->avs_readdatavalid[b] = 0;
|
||||
if (mem_rd_it != mem_reads_[b].end()) {
|
||||
vl_obj_->device->avs_readdatavalid[b] = 1;
|
||||
memcpy(vl_obj_->device->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
|
||||
uint32_t addr = mem_rd_it->addr;
|
||||
mem_reads_[b].erase(mem_rd_it);
|
||||
/*printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%x, pending={", timestamp, b, addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
assert(!vl_obj_->device->avs_read[b] || !vl_obj_->device->avs_write[b]);
|
||||
if (vl_obj_->device->avs_write[b]) {
|
||||
uint64_t byteen = vl_obj_->device->avs_byteenable[b];
|
||||
unsigned base_addr = vl_obj_->device->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
uint8_t* data = (uint8_t*)(vl_obj_->device->avs_writedata[b]);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
}
|
||||
if (vl_obj_->device->avs_read[b]) {
|
||||
mem_rd_req_t mem_req;
|
||||
mem_req.addr = vl_obj_->device->avs_address[b];
|
||||
ram_->read(mem_req.data.data(), vl_obj_->device->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE);
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mem_reads_[b].emplace_back(mem_req);
|
||||
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
}
|
||||
|
||||
vl_obj_->device->avs_waitrequest[b] = mem_stalled;
|
||||
}
|
||||
}
|
||||
100
sim/vlsim/opae_sim.h
Normal file
100
sim/vlsim/opae_sim.h
Normal file
@@ -0,0 +1,100 @@
|
||||
#pragma once
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <vortex_afu.h>
|
||||
|
||||
#include <ostream>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class VL_OBJ;
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} mem_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint32_t mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
uint64_t* data;
|
||||
size_t size;
|
||||
uint64_t ioaddr;
|
||||
} host_buffer_t;
|
||||
|
||||
void reset();
|
||||
|
||||
void eval();
|
||||
|
||||
void step();
|
||||
|
||||
void sRxPort_bus();
|
||||
void sTxPort_bus();
|
||||
void avs_bus();
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
|
||||
std::list<mem_rd_req_t> mem_reads_ [MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM *ram_;
|
||||
|
||||
VL_OBJ* vl_obj_;
|
||||
};
|
||||
|
||||
}
|
||||
10
sim/vlsim/verilator.vlt
Normal file
10
sim/vlsim/verilator.vlt
Normal file
@@ -0,0 +1,10 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../rtl/fp_cores/fpnew/*"
|
||||
170
sim/vlsim/vortex_afu_shim.sv
Normal file
170
sim/vlsim/vortex_afu_shim.sv
Normal file
@@ -0,0 +1,170 @@
|
||||
`include "VX_platform.vh"
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`include "vortex_afu.vh"
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module vortex_afu_shim (
|
||||
// global signals
|
||||
input clk,
|
||||
input reset,
|
||||
|
||||
// IF signals between CCI and AFU
|
||||
input logic vcp2af_sRxPort_c0_TxAlmFull,
|
||||
input logic vcp2af_sRxPort_c1_TxAlmFull,
|
||||
|
||||
input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used,
|
||||
input logic vcp2af_sRxPort_c0_hdr_rsvd1,
|
||||
input logic vcp2af_sRxPort_c0_hdr_hit_miss,
|
||||
input logic [1:0] vcp2af_sRxPort_c0_hdr_rsvd0,
|
||||
input t_ccip_clNum vcp2af_sRxPort_c0_hdr_cl_num,
|
||||
input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type,
|
||||
input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata,
|
||||
input t_ccip_clData vcp2af_sRxPort_c0_data,
|
||||
input logic vcp2af_sRxPort_c0_rspValid,
|
||||
input logic vcp2af_sRxPort_c0_mmioRdValid,
|
||||
input logic vcp2af_sRxPort_c0_mmioWrValid,
|
||||
|
||||
input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address,
|
||||
input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length,
|
||||
input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd,
|
||||
input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid,
|
||||
|
||||
input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used,
|
||||
input logic vcp2af_sRxPort_c1_hdr_rsvd1,
|
||||
input logic vcp2af_sRxPort_c1_hdr_hit_miss,
|
||||
input logic vcp2af_sRxPort_c1_hdr_format,
|
||||
input logic vcp2af_sRxPort_c1_hdr_rsvd0,
|
||||
input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num,
|
||||
input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type,
|
||||
input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata,
|
||||
input logic vcp2af_sRxPort_c1_rspValid,
|
||||
|
||||
output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel,
|
||||
output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1,
|
||||
output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len,
|
||||
output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type,
|
||||
output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0,
|
||||
output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address,
|
||||
output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata,
|
||||
output logic af2cp_sTxPort_c0_valid,
|
||||
|
||||
output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2,
|
||||
output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel,
|
||||
output logic af2cp_sTxPort_c1_hdr_sop,
|
||||
output logic af2cp_sTxPort_c1_hdr_rsvd1,
|
||||
output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len,
|
||||
output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type,
|
||||
output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0,
|
||||
output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address,
|
||||
output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata,
|
||||
output t_ccip_clData af2cp_sTxPort_c1_data,
|
||||
output logic af2cp_sTxPort_c1_valid,
|
||||
|
||||
output t_ccip_tid af2cp_sTxPort_c2_hdr_tid,
|
||||
output logic af2cp_sTxPort_c2_mmioRdValid,
|
||||
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
|
||||
);
|
||||
|
||||
t_if_ccip_Rx cp2af_sRxPort;
|
||||
t_if_ccip_Tx af2cp_sTxPort;
|
||||
|
||||
vortex_afu #(
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)
|
||||
) afu (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.cp2af_sRxPort(cp2af_sRxPort),
|
||||
.af2cp_sTxPort(af2cp_sTxPort),
|
||||
.avs_writedata(avs_writedata),
|
||||
.avs_readdata(avs_readdata),
|
||||
.avs_address(avs_address),
|
||||
.avs_waitrequest(avs_waitrequest),
|
||||
.avs_write(avs_write),
|
||||
.avs_read(avs_read),
|
||||
.avs_byteenable(avs_byteenable),
|
||||
.avs_burstcount(avs_burstcount),
|
||||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
t_if_ccip_c0_RxHdr c0_RxHdr;
|
||||
always @ (*) begin
|
||||
c0_RxHdr = 'x;
|
||||
if (vcp2af_sRxPort_c0_mmioWrValid || vcp2af_sRxPort_c0_mmioRdValid) begin
|
||||
c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address;
|
||||
c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length;
|
||||
c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd;
|
||||
c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid;
|
||||
end else begin
|
||||
c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used;
|
||||
c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1;
|
||||
c0_RxHdr.rspMemHdr.hit_miss = vcp2af_sRxPort_c0_hdr_hit_miss;
|
||||
c0_RxHdr.rspMemHdr.rsvd0 = vcp2af_sRxPort_c0_hdr_rsvd0;
|
||||
c0_RxHdr.rspMemHdr.cl_num = vcp2af_sRxPort_c0_hdr_cl_num;
|
||||
c0_RxHdr.rspMemHdr.resp_type = vcp2af_sRxPort_c0_hdr_resp_type;
|
||||
c0_RxHdr.rspMemHdr.mdata = vcp2af_sRxPort_c0_hdr_mdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull;
|
||||
assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull;
|
||||
|
||||
assign cp2af_sRxPort.c0.hdr = c0_RxHdr;
|
||||
assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data;
|
||||
assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid;
|
||||
assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid;
|
||||
assign cp2af_sRxPort.c0.mmioWrValid = vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
assign cp2af_sRxPort.c1.hdr.vc_used = vcp2af_sRxPort_c1_hdr_vc_used;
|
||||
assign cp2af_sRxPort.c1.hdr.rsvd1 = vcp2af_sRxPort_c1_hdr_rsvd1;
|
||||
assign cp2af_sRxPort.c1.hdr.hit_miss = vcp2af_sRxPort_c1_hdr_hit_miss;
|
||||
assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format;
|
||||
assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0;
|
||||
assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num;
|
||||
assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type;
|
||||
assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata;
|
||||
assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid;
|
||||
|
||||
assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel;
|
||||
assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1;
|
||||
assign af2cp_sTxPort_c0_hdr_cl_len = af2cp_sTxPort.c0.hdr.cl_len;
|
||||
assign af2cp_sTxPort_c0_hdr_req_type = af2cp_sTxPort.c0.hdr.req_type;
|
||||
assign af2cp_sTxPort_c0_hdr_rsvd0 = af2cp_sTxPort.c0.hdr.rsvd0;
|
||||
assign af2cp_sTxPort_c0_hdr_address = af2cp_sTxPort.c0.hdr.address;
|
||||
assign af2cp_sTxPort_c0_hdr_mdata = af2cp_sTxPort.c0.hdr.mdata;
|
||||
assign af2cp_sTxPort_c0_valid = af2cp_sTxPort.c0.valid;
|
||||
|
||||
assign af2cp_sTxPort_c1_hdr_rsvd2 = af2cp_sTxPort.c1.hdr.rsvd2;
|
||||
assign af2cp_sTxPort_c1_hdr_vc_sel = af2cp_sTxPort.c1.hdr.vc_sel;
|
||||
assign af2cp_sTxPort_c1_hdr_sop = af2cp_sTxPort.c1.hdr.sop;
|
||||
assign af2cp_sTxPort_c1_hdr_rsvd1 = af2cp_sTxPort.c1.hdr.rsvd1;
|
||||
assign af2cp_sTxPort_c1_hdr_cl_len = af2cp_sTxPort.c1.hdr.cl_len;
|
||||
assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type;
|
||||
assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0;
|
||||
assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address;
|
||||
assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata;
|
||||
assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data;
|
||||
assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid;
|
||||
|
||||
assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid;
|
||||
assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid;
|
||||
assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data;
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user