simulation framework refactoring

This commit is contained in:
Blaise Tine
2021-10-09 10:20:42 -04:00
parent 51673665b5
commit 54bddeee9c
89 changed files with 1217 additions and 1471 deletions

View File

@@ -1,90 +1,38 @@
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
RTLSIM_DIR = ../../sim/rtlsim
CFLAGS += -DUSE_RTLSIM -fPIC -Wno-maybe-uninitialized
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
CXXFLAGS += -I../include -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
LDFLAGS += $(RTLSIM_DIR)/librtlsim.a
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
# Position independent code
CXXFLAGS += -fPIC
CFLAGS += $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
# Add external configuration
CXXFLAGS += $(CONFIGS)
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
ifdef AXI_BUS
TOP = Vortex_axi
CFLAGS += -DAXI_BUS
else
TOP = Vortex
endif
LDFLAGS += -shared
RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
endif
SRCS = vortex.cpp ../common/vx_utils.cpp
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CFLAGS += -DPERF_ENABLE
CXXFLAGS += -DPERF_ENABLE
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
PROJECT = libvortex.so
# PROJECT = libvortex.dylib
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
$(MAKE) -C $(RTLSIM_DIR) static
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT)
clean:
rm -rf $(PROJECT) obj_dir
$(MAKE) -C $(RTLSIM_DIR) clean-objdir
rm -rf $(PROJECT) *.o .depend

View File

@@ -1,64 +0,0 @@
#pragma once
#include <stdio.h>
#include <stdint.h>
class RAM {
private:
mutable uint8_t *mem_[(1 << 12)];
uint8_t *get(uint32_t address) const {
uint32_t block_addr = address >> 20;
uint32_t block_offset = address & 0x000FFFFF;
if (mem_[block_addr] == NULL) {
mem_[block_addr] = new uint8_t[(1 << 20)];
}
return mem_[block_addr] + block_offset;
}
public:
RAM() {
for (uint32_t i = 0; i < (1 << 12); i++) {
mem_[i] = NULL;
}
}
~RAM() {
this->clear();
}
size_t size() const {
return (1ull << 32);
}
void clear() {
for (uint32_t i = 0; i < (1 << 12); i++) {
if (mem_[i]) {
delete [] mem_[i];
mem_[i] = NULL;
}
}
}
void read(uint32_t address, uint32_t length, uint8_t *data) const {
for (unsigned i = 0; i < length; i++) {
data[i] = *this->get(address + i);
}
}
void write(uint32_t address, uint32_t length, const uint8_t *data) {
for (unsigned i = 0; i < length; i++) {
*this->get(address + i) = data[i];
}
}
uint8_t& operator[](uint32_t address) {
return *get(address);
}
const uint8_t& operator[](uint32_t address) const {
return *get(address);
}
};

View File

@@ -1,10 +0,0 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -file "../rtl/fp_cores/fpnew/*"

View File

@@ -8,15 +8,11 @@
#include <vortex.h>
#include <VX_config.h>
#include <ram.h>
#include <mem.h>
#include <util.h>
#include <simulator.h>
///////////////////////////////////////////////////////////////////////////////
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
using namespace vortex;
///////////////////////////////////////////////////////////////////////////////
@@ -58,7 +54,7 @@ private:
class vx_device {
public:
vx_device() {
vx_device() : ram_((1<<12), (1<<20)) {
mem_allocation_ = ALLOC_BASE_ADDR;
}
@@ -92,7 +88,7 @@ public:
}
printf("\n");*/
ram_.write(dest_addr, asize, (const uint8_t*)src + src_offset);
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
return 0;
}
@@ -101,7 +97,7 @@ public:
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
/*printf("VXDRV: download %ld bytes to 0x%lx:", size, uintptr_t((uint8_t*)dest + dest_offset));
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {