Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
|
||||
|
||||
all: stub rtlsim simx
|
||||
|
||||
stub:
|
||||
@@ -15,9 +13,9 @@ simx:
|
||||
$(MAKE) -C simx
|
||||
|
||||
clean:
|
||||
$(MAKE) clean -C dummy
|
||||
$(MAKE) clean -C stub
|
||||
$(MAKE) clean -C opae
|
||||
$(MAKE) clean -C rtlsim
|
||||
$(MAKE) clean -C simx
|
||||
|
||||
.PHONY: all opae rtlsim simx clean
|
||||
.PHONY: all stub opae rtlsim simx clean
|
||||
@@ -1,37 +0,0 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../runtime)
|
||||
|
||||
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
|
||||
CFLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostdlib
|
||||
|
||||
LIBS = $(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib/libc.a $(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
PROJECT = demo
|
||||
|
||||
SRCS = main.c
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).elf: $(SRCS)
|
||||
$(CC) $(CFLAGS) $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(SRCS) $(LIBS) -I$(VX_RT_PATH) -o $(PROJECT).elf
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||
@@ -1,65 +0,0 @@
|
||||
#include "intrinsics/vx_intrinsics.h"
|
||||
#include "io/vx_io.h"
|
||||
#include "vx_api/vx_api.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned * x;
|
||||
unsigned * y;
|
||||
unsigned * z;
|
||||
unsigned numColums;
|
||||
unsigned numRows;
|
||||
} mat_add_args_t;
|
||||
|
||||
unsigned x[] = {5, 5, 5, 5,
|
||||
6, 6, 6, 6,
|
||||
7, 7, 7, 7,
|
||||
8, 8, 8, 8};
|
||||
|
||||
unsigned y[] = {1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1};
|
||||
|
||||
unsigned z[] = {0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0};
|
||||
|
||||
void mat_add_kernel(void * void_arguments)
|
||||
{
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned tid = vx_threadID();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
__if (valid)
|
||||
{
|
||||
unsigned index = (wid * arguments->numColums) + tid;
|
||||
arguments->z[index] = arguments->x[index] + arguments->y[index];
|
||||
}
|
||||
__endif
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Main is called with all threads active of warp 0
|
||||
vx_tmc(1);
|
||||
|
||||
vx_print_str("Demo kernel\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
arguments.y = y;
|
||||
arguments.z = z;
|
||||
arguments.numColums = 4;
|
||||
arguments.numRows = 4;
|
||||
|
||||
vx_spawnWarps(4, 4, mat_add_kernel, &arguments);
|
||||
|
||||
vx_print_str("done.");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -4,16 +4,17 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
||||
-DDBG_PRINT_CORE_DCACHE \
|
||||
-DDBG_PRINT_CACHE_BANK \
|
||||
-DDBG_PRINT_CACHE_SNP \
|
||||
-DDBG_PRINT_CACHE_MSRQ \
|
||||
-DDBG_PRINT_DRAM \
|
||||
-DDBG_PRINT_PIPELINE \
|
||||
-DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_PRINT=$(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||
@@ -46,13 +47,11 @@ VL_FLAGS += --x-assign unique
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace -DVCD_OUTPUT $(DBG_PRINT)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_PRINT)
|
||||
#VL_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
#CFLAGS += -DDBG_CORE_REQ_INFO
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# AFU
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
|
||||
CXXFLAGS += -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../sw
|
||||
|
||||
LDFLAGS += -L./obj_dir
|
||||
|
||||
DRV_CFLAGS += -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
DRV_CFLAGS += -I../../sw
|
||||
|
||||
DRV_CFLAGS += -fPIC
|
||||
|
||||
DRV_LDFLAGS += -shared -pthread
|
||||
|
||||
DRV_SRCS = vx_driver.cpp ../../simX/args.cpp ../../simX/mem.cpp ../../simX/core.cpp ../../simX/instruction.cpp ../../simX/enc.cpp ../../simX/util.cpp
|
||||
|
||||
RTL_TOP = ../../simX/cache_simX.v
|
||||
|
||||
RTL_INCLUDE = -I../../rtl/shared_memory -I../../rtl/cache -I../../rtl/interfaces -Isimulate -I../../rtl
|
||||
|
||||
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
|
||||
|
||||
VL_FLAGS += --trace -DVL_DEBUG=1
|
||||
|
||||
PROJECT = libvxdrv_sim.so
|
||||
|
||||
all: $(PROJECT) test
|
||||
|
||||
$(PROJECT): $(SIMX_SRCS)
|
||||
verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(DRV_SRCS) -CFLAGS '$(DRV_CFLAGS)' -LDFLAGS '$(DRV_LDFLAGS)' -o $(PROJECT)
|
||||
make -j -C obj_dir -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1
|
||||
|
||||
test: $(PROJECT) test.o utils.o
|
||||
$(CXX) $(CXXFLAGS) test.o utils.o $(LDFLAGS) -lvxdrv_sim -o $@
|
||||
|
||||
utils.o: ../sw/utils.cpp
|
||||
$(CXX) $(CXXFLAGS) -c ../sw/utils.cpp -o $@
|
||||
|
||||
test.o: ../sw/test.cpp
|
||||
$(CXX) $(CXXFLAGS) -c ../sw/test.cpp -o $@
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) test *.so *.o obj_dir
|
||||
@@ -1,272 +0,0 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
#include <vx_driver.h>
|
||||
|
||||
#include "../../simX/include/debug.h"
|
||||
#include "../../simX/include/types.h"
|
||||
#include "../../simX/include/core.h"
|
||||
#include "../../simX/include/enc.h"
|
||||
#include "../../simX/include/instruction.h"
|
||||
#include "../../simX/include/mem.h"
|
||||
#include "../../simX/include/obj.h"
|
||||
#include "../../simX/include/archdef.h"
|
||||
#include "../../simX/include/help.h"
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return CACHE_LINESIZE * ((size + CACHE_LINESIZE - 1) / CACHE_LINESIZE);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
auto data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
auto size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
auto device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, is_running_(false)
|
||||
, thread_(__thread_proc__, this)
|
||||
{}
|
||||
|
||||
~vx_device() {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
thread_.join();
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
if (dest_addr + size > ram_.size())
|
||||
return -1;
|
||||
ram_.write(dest_addr, size, (uint8_t*)src + src_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
if (src_addr + size > ram_.size())
|
||||
return -1;
|
||||
ram_.read(src_addr, size, (uint8_t*)dest + dest_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
if (this->wait(-1) != 0)
|
||||
return -1;
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_running || 0 == timeout--)
|
||||
break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void run() {
|
||||
Harp::ArchDef arch("rv32i", false);
|
||||
Harp::WordDecoder dec(arch);
|
||||
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
Harp::Core core(arch, dec, mu);
|
||||
mu.attach(ram_, 0);
|
||||
|
||||
while (core.running()) {
|
||||
core.step();
|
||||
}
|
||||
core.printStats();
|
||||
}
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_done = is_done_;
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (is_done)
|
||||
break;
|
||||
|
||||
if (is_running) {
|
||||
std::cout << "Device running..." << std::endl;
|
||||
|
||||
this->run();
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = false;
|
||||
mutex_.unlock();
|
||||
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Device shutdown..." << std::endl;
|
||||
}
|
||||
|
||||
static void __thread_proc__(vx_device* device) {
|
||||
device->thread_proc();
|
||||
}
|
||||
|
||||
bool is_done_;
|
||||
bool is_running_;
|
||||
std::thread thread_;
|
||||
Harp::RAM ram_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern vx_device_h vx_dev_open() {
|
||||
|
||||
auto device = new vx_device();
|
||||
|
||||
return (vx_device_h)device;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
delete (vx_device*)hdevice;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size) {
|
||||
if (nullptr == hdevice)
|
||||
return nullptr;
|
||||
|
||||
auto buffer = new vx_buffer(size, (vx_device*)hdevice);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return (vx_buffer*)buffer;
|
||||
}
|
||||
|
||||
extern void* vs_buf_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
return ((vx_buffer*)hbuffer)->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
delete (vx_buffer*)hbuffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dest_addr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), src_addr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
return ((vx_device*)hdevice)->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
return ((vx_device*)hdevice)->wait(timeout);
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I../../runtime
|
||||
CXXFLAGS += -I../include -I../../runtime -I../../hw
|
||||
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
|
||||
DRV_CFLAGS += -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
DRV_CFLAGS += -I/tools/opae/1.4.0/include
|
||||
|
||||
DRV_LDFLAGS += -L/tools/opae/1.4.0/lib
|
||||
|
||||
# stack execution protection
|
||||
DRV_LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
DRV_LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
# Note that CentOS 7 has gcc 4.8 by default. When we switch
|
||||
# to a system with gcc 4.9 or newer this should be changed to
|
||||
# CFLAGS="-fstack-protector-strong"
|
||||
DRV_CFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
DRV_CFLAGS += -fPIC
|
||||
|
||||
DRV_LDFLAGS += -luuid
|
||||
|
||||
DRV_LDFLAGS += -shared
|
||||
|
||||
FPGA_LIBS += -lopae-c
|
||||
ASE_LIBS += -lopae-c-ase
|
||||
|
||||
CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
LDFLAGS += -L.
|
||||
|
||||
PROJECT = libvxdrv.so
|
||||
PROJECT_ASE = libvxdrv_ase.so
|
||||
|
||||
AFU_JSON_INFO = vortex_afu.h
|
||||
|
||||
all: $(PROJECT) $(PROJECT_ASE) test test_ase
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
$(AFU_JSON_INFO): ../hw/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
$(PROJECT): vx_driver.o
|
||||
$(CC) $(DRV_CFLAGS) $^ $(DRV_LDFLAGS) $(FPGA_LIBS) -o $@
|
||||
|
||||
$(PROJECT_ASE): vx_driver.o
|
||||
$(CC) $(DRV_CFLAGS) -DUSE_ASE $^ $(DRV_LDFLAGS) $(ASE_LIBS) -o $@
|
||||
|
||||
test: test.o utils.o $(PROJECT)
|
||||
$(CXX) $(CXXFLAGS) test.o utils.o $(LDFLAGS) -lvxdrv -o $@
|
||||
|
||||
test_ase: test.o utils.o $(PROJECT_ASE)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE test.o utils.o $(LDFLAGS) -lvxdrv_ase -o $@
|
||||
|
||||
vx_driver.o: vx_driver.c
|
||||
$(CC) $(DRV_CFLAGS) -c $^ -o $@
|
||||
|
||||
test.o: test.cpp $(AFU_JSON_INFO)
|
||||
$(CXX) $(CXXFLAGS) -c test.cpp -o $@
|
||||
|
||||
.depend: vx_driver.c test.cpp
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) test test_ase $(AFU_JSON_INFO) *.so *.o .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
@@ -1,69 +0,0 @@
|
||||
#include <vx_driver.h>
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
|
||||
const char* program_file = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-f: program] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "f:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'f': {
|
||||
program_file = optarg;
|
||||
} break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nullptr == program_file) {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
// open device connection
|
||||
auto device = vx_dev_open();
|
||||
|
||||
// upload program
|
||||
if (0 != upload_program(device, program_file)) {
|
||||
vx_dev_close(device);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// start device
|
||||
if (0 != vx_start(device)) {
|
||||
vx_dev_close(device);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// wait for completion
|
||||
if (0 != vx_ready_wait(device, -1)) {
|
||||
vx_dev_close(device);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// close device
|
||||
vx_dev_close(device);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,156 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "utils.h"
|
||||
|
||||
static uint32_t hti_old(char c) {
|
||||
if (c >= 'A' && c <= 'F')
|
||||
return c - 'A' + 10;
|
||||
if (c >= 'a' && c <= 'f')
|
||||
return c - 'a' + 10;
|
||||
return c - '0';
|
||||
}
|
||||
|
||||
static uint32_t hToI_old(char *c, uint32_t size) {
|
||||
uint32_t value = 0;
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
value += hti_old(c[i]) << ((size - i - 1) * 4);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int parse_ihex_line(char* line, ihex_t* out) {
|
||||
if (line[0] != ':') {
|
||||
std::cout << "error: invalid line entry!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t data_size = 0;
|
||||
uint32_t address = 0;
|
||||
uint32_t offset = 0;
|
||||
bool has_offset = false;
|
||||
bool is_eof = false;
|
||||
|
||||
auto record_type = hToI_old(line + 7, 2);
|
||||
|
||||
switch (record_type) {
|
||||
case 0: { // data
|
||||
data_size = hToI_old(line + 1, 2);
|
||||
address = hToI_old(line + 3, 4);
|
||||
for (uint32_t i = 0; i < data_size; i++) {
|
||||
out->data[i] = hToI_old(line + 9 + i * 2, 2);
|
||||
}
|
||||
} break;
|
||||
case 1: // end of file
|
||||
is_eof = true;
|
||||
break;
|
||||
case 2: // extended segment address
|
||||
offset = hToI_old(line + 9, 4) << 4;
|
||||
has_offset = true;
|
||||
break;
|
||||
case 3: // start segment address
|
||||
break;
|
||||
case 4: // extended linear address
|
||||
offset = hToI_old(line + 9, 4) << 16;
|
||||
has_offset = true;
|
||||
break;
|
||||
case 5: // start linear address
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
out->address = address;
|
||||
out->data_size = data_size;
|
||||
out->offset = offset;
|
||||
out->has_offset = has_offset;
|
||||
out->is_eof = is_eof;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload_program(vx_device_h device, const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t transfer_size = 16 * VX_CACHE_LINESIZE;
|
||||
|
||||
// allocate device buffer
|
||||
auto buffer = vx_buf_alloc(device, transfer_size);
|
||||
if (nullptr == buffer)
|
||||
return -1;
|
||||
|
||||
// get buffer address
|
||||
auto buf_ptr = (uint8_t*)vs_buf_ptr(buffer);
|
||||
|
||||
//
|
||||
// copy initialization routine
|
||||
//
|
||||
|
||||
((uint32_t*)buf_ptr)[0] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[1] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[2] = 0x30101073;
|
||||
((uint32_t*)buf_ptr)[3] = 0x800000b7;
|
||||
((uint32_t*)buf_ptr)[4] = 0x000080e7;
|
||||
|
||||
vx_copy_to_fpga(buffer, 0, 5 * 4, 0);
|
||||
|
||||
//
|
||||
// copy hex program
|
||||
//
|
||||
|
||||
char line[ihex_t::MAX_LINE_SIZE];
|
||||
uint32_t hex_offset = 0;
|
||||
uint32_t prev_hex_address = 0;
|
||||
uint32_t dest_address = -1;
|
||||
uint32_t src_offset = 0;
|
||||
|
||||
while (true) {
|
||||
ifs.getline(line, ihex_t::MAX_LINE_SIZE);
|
||||
if (!ifs)
|
||||
break;
|
||||
|
||||
ihex_t ihex;
|
||||
parse_ihex_line(line, &ihex);
|
||||
if (ihex.is_eof)
|
||||
break;
|
||||
|
||||
if (ihex.has_offset) {
|
||||
hex_offset = ihex.offset;
|
||||
}
|
||||
|
||||
if (ihex.data_size != 0) {
|
||||
auto hex_address = ihex.address + hex_offset;
|
||||
if (dest_address == (uint32_t)-1) {
|
||||
dest_address = (hex_address / VX_CACHE_LINESIZE) * VX_CACHE_LINESIZE;
|
||||
src_offset = hex_address - dest_address;
|
||||
} else {
|
||||
auto delta = hex_address - prev_hex_address;
|
||||
src_offset += delta;
|
||||
}
|
||||
for (uint32_t i = 0; i < ihex.data_size; ++i) {
|
||||
if (src_offset >= transfer_size) {
|
||||
// flush current batch to FPGA
|
||||
vx_copy_to_fpga(buffer, dest_address, transfer_size, 0);
|
||||
dest_address = (hex_address/ VX_CACHE_LINESIZE) * VX_CACHE_LINESIZE;
|
||||
src_offset = hex_address - dest_address;
|
||||
}
|
||||
buf_ptr[src_offset++] = ihex.data[i];
|
||||
++hex_address;
|
||||
}
|
||||
prev_hex_address = hex_address;
|
||||
}
|
||||
}
|
||||
|
||||
// flush last batch to FPGA
|
||||
if (src_offset) {
|
||||
vx_copy_to_fpga(buffer, dest_address, src_offset, 0);
|
||||
}
|
||||
|
||||
vx_buf_release(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vx_driver.h>
|
||||
|
||||
struct ihex_t {
|
||||
static constexpr int MAX_LINE_SIZE = 524;
|
||||
static constexpr int MAX_DATA_SIZE = 255;
|
||||
uint8_t data[MAX_DATA_SIZE];
|
||||
uint32_t address;
|
||||
uint32_t data_size;
|
||||
uint32_t offset;
|
||||
bool has_offset;
|
||||
bool is_eof;
|
||||
};
|
||||
|
||||
int parse_ihex_line(char* line, ihex_t* out);
|
||||
|
||||
int upload_program(vx_device_h device, const char* filename);
|
||||
@@ -1,259 +0,0 @@
|
||||
#include "vx_driver.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <opae/fpga.h>
|
||||
|
||||
// MMIO Address Mappings
|
||||
#define AFU_ID AFU_ACCEL_UUID
|
||||
|
||||
#define MMIO_COPY_IO_ADDRESS 0X120
|
||||
#define MMIO_COPY_AVM_ADDRESS 0x100
|
||||
#define MMIO_COPY_DATA_SIZE 0X118
|
||||
|
||||
#define MMIO_CMD_TYPE 0X110 // MMIO location set by SW to denote read/write. read: 3; write: 1; vortex: 7
|
||||
#define MMIO_READY_FOR_CMD 0X198
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
uint64_t wsid;
|
||||
volatile void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
fpga_handle hdevice;
|
||||
size_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Search for an accelerator matching the requested UUID and connect to it
|
||||
// Convert this to void if required as storing the fpga_handle to params variable
|
||||
extern vx_device_h vx_dev_open(const char *accel_uuid) {
|
||||
fpga_properties filter = NULL;
|
||||
fpga_result res;
|
||||
fpga_guid guid;
|
||||
fpga_token accel_token;
|
||||
uint32_t num_matches;
|
||||
fpga_handle accel_handle;
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
fpgaGetProperties(NULL, &filter);
|
||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
|
||||
// Add the desired UUID to the filter
|
||||
uuid_parse(accel_uuid, guid);
|
||||
fpgaPropertiesSetGUID(filter, guid);
|
||||
|
||||
// Do the search across the available FPGA contexts
|
||||
num_matches = 1;
|
||||
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
||||
|
||||
// Not needed anymore
|
||||
fpgaDestroyProperties(&filter);
|
||||
|
||||
if (num_matches < 1) {
|
||||
fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Open accelerator
|
||||
res = fpgaOpen(accel_token, &accel_handle, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
|
||||
return accel_handle;
|
||||
}
|
||||
|
||||
// Close the fpga when all the operations are done
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
fpgaClose(hdevice);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size) {
|
||||
fpga_result res;
|
||||
void* host_ptr;
|
||||
uint64_t wsid;
|
||||
uint64_t io_addr;
|
||||
vx_buffer_t* buffer;
|
||||
|
||||
if (NULL == hdevice)
|
||||
return NULL;
|
||||
|
||||
size_t asize = align_size(size);
|
||||
|
||||
res = fpgaPrepareBuffer(hdevice, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Get the physical address of the buffer in the accelerator
|
||||
res = fpgaGetIOAddress(hdevice, wsid, &io_addr);
|
||||
if (FPGA_OK != res) {
|
||||
fpgaReleaseBuffer(hdevice, wsid);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
||||
if (NULL == buffer) {
|
||||
fpgaReleaseBuffer(hdevice, wsid);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer->wsid = wsid;
|
||||
buffer->host_ptr = host_ptr;
|
||||
buffer->io_addr = io_addr;
|
||||
buffer->hdevice = hdevice;
|
||||
buffer->size = size;
|
||||
|
||||
return (vx_buffer_h)buffer;
|
||||
}
|
||||
|
||||
extern volatile void* vs_buf_ptr(vx_buffer_h hbuffer) {
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
if (NULL == buffer)
|
||||
return NULL;
|
||||
|
||||
return buffer->host_ptr;
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
if (NULL == buffer)
|
||||
return -1;
|
||||
|
||||
fpgaReleaseBuffer(buffer->hdevice, buffer->wsid);
|
||||
|
||||
free(hbuffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if HW is ready for SW
|
||||
static int ready_for_sw(fpga_handle hdevice) {
|
||||
uint64_t data = 0;
|
||||
struct timespec sleep_time;
|
||||
|
||||
#ifdef USE_ASE
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
|
||||
nanosleep(&sleep_time, NULL);
|
||||
} while (data != 0x1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
|
||||
// bound checking
|
||||
if (size + src_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(buffer->hdevice) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_AVM_ADDRESS, dest_addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_CMD_TYPE, 1)); // WRITE CMD
|
||||
|
||||
// Wait for the write operation to finish
|
||||
return ready_for_sw(buffer->hdevice);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
|
||||
// bound checking
|
||||
if (size + dest_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(buffer->hdevice) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_AVM_ADDRESS, src_addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_CMD_TYPE, 3)); // READ CMD
|
||||
|
||||
// Wait for the write operation to finish
|
||||
return ready_for_sw(buffer->hdevice);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(hdevice) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(hdevice, 0, MMIO_CMD_TYPE, 7)); // START CMD
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
uint64_t data = 0;
|
||||
struct timespec sleep_time;
|
||||
|
||||
#ifdef USE_ASE
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
// to milliseconds
|
||||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
|
||||
nanosleep(&sleep_time, NULL);
|
||||
sleep_time_ms -= sleep_time_ms;
|
||||
if (timeout <= sleep_time_ms)
|
||||
break;
|
||||
} while (data != 0x1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
#ifndef __VX_DRIVER_H__
|
||||
#define __VX_DRIVER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void* vx_device_h;
|
||||
|
||||
typedef void* vx_buffer_h;
|
||||
|
||||
#define VX_CACHE_LINESIZE 64
|
||||
|
||||
// open the device and connect to it
|
||||
vx_device_h vx_dev_open();
|
||||
|
||||
// Close the device when all the operations are done
|
||||
int vx_dev_close(vx_device_h hdevice);
|
||||
|
||||
// Allocate shared buffer with device
|
||||
vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size);
|
||||
|
||||
// Get host pointer address
|
||||
void* vs_buf_ptr(vx_buffer_h hbuffer);
|
||||
|
||||
// release buffer
|
||||
int vx_buf_release(vx_buffer_h hbuffer);
|
||||
|
||||
// Copy bytes from buffer to device local memory
|
||||
int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dst_offset);
|
||||
|
||||
// Start device execution
|
||||
int vx_start(vx_device_h hdevice);
|
||||
|
||||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_DRIVER_H__
|
||||
@@ -1,21 +1,15 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
# RISCV_TOOL_PATH ?= /opt/riscv-new/drops
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_CFLAGS += -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
VX_CFLAGS += -I../../../hw
|
||||
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
@@ -32,13 +26,13 @@ SRCS = basic.cpp
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
0
driver/tests/basic/kernel.bin
Normal file → Executable file
0
driver/tests/basic/kernel.bin
Normal file → Executable file
@@ -1,6 +1,5 @@
|
||||
#include <stdint.h>
|
||||
#include <VX_config.h>
|
||||
#include "intrinsics/vx_intrinsics.h"
|
||||
#include <vx_intrinsics.h>
|
||||
#include "common.h"
|
||||
|
||||
void main() {
|
||||
|
||||
@@ -1,20 +1,15 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
#VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
#VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
#VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_CFLAGS += -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
VX_CFLAGS += -I../../../hw
|
||||
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
@@ -29,13 +24,13 @@ SRCS = demo.cpp
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
BIN
driver/tests/demo/kernel.bin
Normal file → Executable file
BIN
driver/tests/demo/kernel.bin
Normal file → Executable file
Binary file not shown.
@@ -1,7 +1,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "intrinsics/vx_intrinsics.h"
|
||||
#include "vx_api/vx_api.h"
|
||||
#include <stdint.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(void* arg) {
|
||||
@@ -20,10 +19,6 @@ void kernel_body(void* arg) {
|
||||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
/*printf("stride=%d\n", arg->stride);
|
||||
printf("src0_ptr=0x%src0\n", arg->src0_ptr);
|
||||
printf("src1_ptr=0x%src0\n", arg->src1_ptr);
|
||||
printf("dst_ptr=0x%src0\n", arg->dst_ptr);*/
|
||||
int num_warps = vx_num_warps();
|
||||
int num_threads = vx_num_threads();
|
||||
vx_spawn_warps(num_warps, num_threads, kernel_body, arg);
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user