merging changes from OPAE branch making this branch
This commit is contained in:
19
driver/sw/Makefile
Normal file
19
driver/sw/Makefile
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
|
||||
all: opae rtlsim simx
|
||||
|
||||
opae:
|
||||
$(MAKE) -C opae
|
||||
|
||||
rtlsim:
|
||||
$(MAKE) -C rtlsim
|
||||
|
||||
simx:
|
||||
$(MAKE) -C simx
|
||||
|
||||
clean:
|
||||
$(MAKE) clean -C opae
|
||||
$(MAKE) clean -C rtlsim
|
||||
$(MAKE) clean -C simx
|
||||
|
||||
.PHONY: all opae rtlsim simx clean
|
||||
67
driver/sw/include/vortex.h
Normal file
67
driver/sw/include/vortex.h
Normal file
@@ -0,0 +1,67 @@
|
||||
#ifndef __VX_DRIVER_H__
|
||||
#define __VX_DRIVER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void* vx_device_h;
|
||||
|
||||
typedef void* vx_buffer_h;
|
||||
|
||||
#define VX_LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
#define VX_ALLOC_BASE_ADDR 0x10000000
|
||||
|
||||
#define VX_KERNEL_BASE_ADDR 0x80000000
|
||||
|
||||
#define VX_CACHE_LINESIZE 64
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
||||
// Close the device when all the operations are done
|
||||
int vx_dev_close(vx_device_h hdevice);
|
||||
|
||||
// Allocate shared buffer with device
|
||||
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
|
||||
|
||||
// Get host pointer address
|
||||
volatile void* vx_host_ptr(vx_buffer_h hbuffer);
|
||||
|
||||
// release buffer
|
||||
int vx_buf_release(vx_buffer_h hbuffer);
|
||||
|
||||
// allocate device memory and return address
|
||||
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size);
|
||||
|
||||
// Copy bytes from buffer to device local memory
|
||||
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
|
||||
|
||||
// Start device execution
|
||||
int vx_start(vx_device_h hdevice);
|
||||
|
||||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
// upload kernel bytes to device
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
|
||||
|
||||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_DRIVER_H__
|
||||
66
driver/sw/opae/Makefile
Normal file
66
driver/sw/opae/Makefile
Normal file
@@ -0,0 +1,66 @@
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include
|
||||
|
||||
LDFLAGS += -L/tools/opae/1.4.0/lib
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
LDFLAGS += -luuid
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
FPGA_LIBS += -lopae-c
|
||||
|
||||
ASE_LIBS += -lopae-c-ase
|
||||
|
||||
LIB_DIR=../lib
|
||||
|
||||
ASE_DIR = ase
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
PROJECT_ASE = $(ASE_DIR)/libvortex.so
|
||||
|
||||
AFU_JSON_INFO = vortex_afu.h
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp
|
||||
|
||||
all: $(PROJECT) $(PROJECT_ASE)
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
$(AFU_JSON_INFO): ../../hw/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $@
|
||||
|
||||
$(PROJECT_ASE): $(SRCS) $(ASE_DIR)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
|
||||
|
||||
vortex.o: vortex.cpp $(AFU_JSON_INFO)
|
||||
$(CC) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||
|
||||
$(ASE_DIR):
|
||||
mkdir -p ase
|
||||
|
||||
.depend: $(SRCS) $(AFU_JSON_INFO)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) $(AFU_JSON_INFO) *.o .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
349
driver/sw/opae/vortex.cpp
Executable file
349
driver/sw/opae/vortex.cpp
Executable file
@@ -0,0 +1,349 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <opae/fpga.h>
|
||||
#include <vortex.h>
|
||||
#include "vortex_afu.h"
|
||||
|
||||
// MMIO Address Mappings
|
||||
#define MMIO_COPY_IO_ADDRESS 0X120
|
||||
#define MMIO_COPY_AVM_ADDRESS 0x100
|
||||
#define MMIO_COPY_DATA_SIZE 0X118
|
||||
|
||||
#define MMIO_CMD_TYPE 0X110
|
||||
#define MMIO_READY_FOR_CMD 0X198
|
||||
|
||||
#define MMIO_CMD_TYPE_READ 0
|
||||
#define MMIO_CMD_TYPE_WRITE 1
|
||||
#define MMIO_CMD_TYPE_START 2
|
||||
#define MMIO_CMD_TYPE_SNOOP 3
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
} vx_device_t;
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
uint64_t wsid;
|
||||
volatile void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
fpga_handle fpga;
|
||||
size_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Search for an accelerator matching the requested UUID and connect to it
|
||||
// Convert this to void if required as storing the fpga_handle to params variable
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
fpga_properties filter = NULL;
|
||||
fpga_result res;
|
||||
fpga_guid guid;
|
||||
fpga_token accel_token;
|
||||
uint32_t num_matches;
|
||||
fpga_handle accel_handle;
|
||||
vx_device_t* device;
|
||||
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
fpgaGetProperties(NULL, &filter);
|
||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
|
||||
// Add the desired UUID to the filter
|
||||
uuid_parse(AFU_ACCEL_UUID, guid);
|
||||
fpgaPropertiesSetGUID(filter, guid);
|
||||
|
||||
// Do the search across the available FPGA contexts
|
||||
num_matches = 1;
|
||||
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
||||
|
||||
// Not needed anymore
|
||||
fpgaDestroyProperties(&filter);
|
||||
|
||||
if (num_matches < 1) {
|
||||
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Open accelerator
|
||||
res = fpgaOpen(accel_token, &accel_handle, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
|
||||
// allocate device object
|
||||
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
||||
if (NULL == device) {
|
||||
fpgaClose(accel_handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = VX_ALLOC_BASE_ADDR;
|
||||
|
||||
*hdevice = device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Close the fpga when all the operations are done
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
||||
free(device);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (NULL == hdevice
|
||||
|| NULL == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
if (device->mem_allocation + asize > VX_ALLOC_BASE_ADDR)
|
||||
return -1;
|
||||
|
||||
*dev_maddr = device->mem_allocation;
|
||||
device->mem_allocation += asize;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
fpga_result res;
|
||||
void* host_ptr;
|
||||
uint64_t wsid;
|
||||
uint64_t io_addr;
|
||||
vx_buffer_t* buffer;
|
||||
|
||||
if (NULL == hdevice
|
||||
|| 0 >= size
|
||||
|| NULL == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
|
||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get the physical address of the buffer in the accelerator
|
||||
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
|
||||
if (FPGA_OK != res) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate buffer object
|
||||
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
||||
if (NULL == buffer) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
buffer->wsid = wsid;
|
||||
buffer->host_ptr = host_ptr;
|
||||
buffer->io_addr = io_addr;
|
||||
buffer->fpga = device->fpga;
|
||||
buffer->size = size;
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
if (NULL == buffer)
|
||||
return NULL;
|
||||
|
||||
return buffer->host_ptr;
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
if (NULL == buffer)
|
||||
return -1;
|
||||
|
||||
fpgaReleaseBuffer(buffer->fpga, buffer->wsid);
|
||||
|
||||
free(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if HW is ready for SW
|
||||
static int ready_for_sw(fpga_handle hdevice) {
|
||||
uint64_t data = 0;
|
||||
struct timespec sleep_time;
|
||||
|
||||
#ifdef USE_ASE
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
|
||||
nanosleep(&sleep_time, NULL);
|
||||
} while (data != 0x1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (NULL == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
|
||||
// bound checking
|
||||
if (size + src_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(buffer->fpga) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
return ready_for_sw(buffer->fpga);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (NULL == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
|
||||
// bound checking
|
||||
if (size + dest_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(buffer->fpga) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
return ready_for_sw(buffer->fpga);
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||
if (NULL == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
|
||||
// bound checking
|
||||
if (size + src_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(buffer->fpga) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_SNOOP));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
return ready_for_sw(buffer->fpga);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (ready_for_sw(device->fpga) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_START));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
uint64_t data = 0;
|
||||
struct timespec sleep_time;
|
||||
|
||||
#ifdef USE_ASE
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
// to milliseconds
|
||||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_READY_FOR_CMD, &data));
|
||||
nanosleep(&sleep_time, NULL);
|
||||
sleep_time_ms -= sleep_time_ms;
|
||||
if (timeout <= sleep_time_ms)
|
||||
break;
|
||||
} while (data != 0x1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
49
driver/sw/rtlsim/Makefile
Normal file
49
driver/sw/rtlsim/Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
#CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
#USE_MULTICORE=1
|
||||
|
||||
CFLAGS += -I../../include -I../../../../rtl/simulate
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
ifdef USE_MULTICORE
|
||||
CFLAGS += -DUSE_MULTICORE
|
||||
RTL_TOP = Vortex_SOC
|
||||
else
|
||||
RTL_TOP = Vortex
|
||||
endif
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp ../../../rtl/simulate/$(RTL_TOP).cpp
|
||||
|
||||
RTL_INCLUDE = -I../../../rtl -I../../../rtl/interfaces -I../../../rtl/cache -I../../../rtl/VX_cache -I../../../rtl/shared_memory -I../../../rtl/pipe_regs -I../../../rtl/compat
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
|
||||
|
||||
VL_FLAGS += -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH -Wno-UNSIGNED -Wno-UNOPTFLAT -Wno-LITENDIAN
|
||||
|
||||
# Debugigng
|
||||
VL_FLAGS += --trace -DVL_DEBUG=1
|
||||
CFLAGS += -DVCD_OUTPUT
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
.PHONY: build_config
|
||||
build_config:
|
||||
(cd ../../../rtl && ./gen_config.py --rtl_locations)
|
||||
|
||||
$(PROJECT): $(SRCS) build_config
|
||||
verilator --exe --cc $(RTL_TOP).v $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(RTL_TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
327
driver/sw/rtlsim/vortex.cpp
Normal file
327
driver/sw/rtlsim/vortex.cpp
Normal file
@@ -0,0 +1,327 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <ram.h>
|
||||
|
||||
#ifdef USE_MULTICORE
|
||||
#include <Vortex_SOC.h>
|
||||
#else
|
||||
#include <Vortex.h>
|
||||
#endif
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
vx_device* device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, mem_allocation_(VX_ALLOC_BASE_ADDR)
|
||||
, vortex_(&ram_) {
|
||||
thread_ = new std::thread(__thread_proc__, this);
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
if (thread_) {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
thread_->join();
|
||||
delete thread_;
|
||||
}
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
size_t asize = align_size(size);
|
||||
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
}*/
|
||||
|
||||
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int flush_caches(size_t dev_maddr, size_t size) {
|
||||
|
||||
mutex_.lock();
|
||||
vortex_.flush_caches(dev_maddr, size);
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
|
||||
mutex_.lock();
|
||||
vortex_.reset();
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_busy = vortex_.is_busy();
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_busy || 0 == timeout_sec--)
|
||||
break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_done = is_done_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (is_done)
|
||||
break;
|
||||
|
||||
mutex_.lock();
|
||||
vortex_.step();
|
||||
mutex_.unlock();
|
||||
}
|
||||
|
||||
std::cout << "Device shutdown..." << std::endl;
|
||||
}
|
||||
|
||||
static void __thread_proc__(vx_device* device) {
|
||||
device->thread_proc();
|
||||
}
|
||||
|
||||
bool is_done_;
|
||||
size_t mem_allocation_;
|
||||
RAM ram_;
|
||||
#ifdef USE_MULTICORE
|
||||
Vortex_SOC vortex_;
|
||||
#else
|
||||
Vortex vortex_;
|
||||
#endif
|
||||
std::thread* thread_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (NULL == hdevice
|
||||
|| NULL == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||
if (NULL == hdevice
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->flush_caches(dev_maddr, size);
|
||||
}
|
||||
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| NULL == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
auto buffer = new vx_buffer(size, device);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
return buffer->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
delete buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
37
driver/sw/simx/Makefile
Normal file
37
driver/sw/simx/Makefile
Normal file
@@ -0,0 +1,37 @@
|
||||
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
MAX_WARPS ?= 8
|
||||
MAX_THREADS ?= 4
|
||||
|
||||
CFLAGS += -I../../include -I../../../../simX/include
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_SIMX
|
||||
|
||||
CFLAGS += -DMAX_WARPS=$(MAX_WARPS) -DMAX_THREADS=$(MAX_THREADS)
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp ../../../simX/args.cpp ../../../simX/mem.cpp ../../../simX/core.cpp ../../../simX/instruction.cpp ../../../simX/enc.cpp ../../../simX/util.cpp
|
||||
|
||||
RTL_TOP = ../../../simX/cache_simX.v
|
||||
|
||||
RTL_INCLUDE = -I../../../old_rtl -I../../../old_rtl/interfaces -I../../../old_rtl/cache -I../../../old_rtl/shared_memory
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f Vcache_simX.mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
BIN
driver/sw/simx/libvortex.so
Executable file
BIN
driver/sw/simx/libvortex.so
Executable file
Binary file not shown.
324
driver/sw/simx/vortex.cpp
Normal file
324
driver/sw/simx/vortex.cpp
Normal file
@@ -0,0 +1,324 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include "core.h"
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
vx_device* device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, is_running_(false)
|
||||
, mem_allocation_(VX_ALLOC_BASE_ADDR)
|
||||
, thread_(__thread_proc__, this)
|
||||
{}
|
||||
|
||||
~vx_device() {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
thread_.join();
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
size_t asize = align_size(size);
|
||||
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
}*/
|
||||
|
||||
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_running || 0 == timeout_sec--)
|
||||
break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void run() {
|
||||
Harp::ArchDef arch("rv32i", false, MAX_WARPS, MAX_THREADS);
|
||||
Harp::WordDecoder dec(arch);
|
||||
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
Harp::Core core(arch, dec, mu);
|
||||
mu.attach(ram_, 0);
|
||||
|
||||
while (core.running()) {
|
||||
core.step();
|
||||
}
|
||||
core.printStats();
|
||||
}
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_done = is_done_;
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (is_done)
|
||||
break;
|
||||
|
||||
if (is_running) {
|
||||
std::cout << "Device running..." << std::endl;
|
||||
|
||||
this->run();
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = false;
|
||||
mutex_.unlock();
|
||||
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Device shutdown..." << std::endl;
|
||||
}
|
||||
|
||||
static void __thread_proc__(vx_device* device) {
|
||||
device->thread_proc();
|
||||
}
|
||||
|
||||
bool is_done_;
|
||||
bool is_running_;
|
||||
size_t mem_allocation_;
|
||||
std::thread thread_;
|
||||
Harp::RAM ram_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (NULL == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (NULL == hdevice
|
||||
|| NULL == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
|
||||
if (NULL == hdevice
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
// this functionality is not need by simX
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| NULL == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
auto buffer = new vx_buffer(size, device);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
return buffer->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
delete buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
91
driver/sw/vx_utils.cpp
Normal file
91
driver/sw/vx_utils.cpp
Normal file
@@ -0,0 +1,91 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <vortex.h>
|
||||
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
int err = 0;
|
||||
|
||||
if (NULL == content || 0 == size)
|
||||
return -1;
|
||||
|
||||
static constexpr uint32_t TRANSFER_SIZE = 4096;
|
||||
|
||||
// allocate device buffer
|
||||
vx_buffer_h buffer;
|
||||
err = vx_alloc_shared_mem(device, TRANSFER_SIZE, &buffer);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
||||
// get buffer address
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
|
||||
#if defined(USE_SIMX)
|
||||
// default startup routine
|
||||
((uint32_t*)buf_ptr)[0] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[1] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[2] = 0x30101073;
|
||||
((uint32_t*)buf_ptr)[3] = 0x800000b7;
|
||||
((uint32_t*)buf_ptr)[4] = 0x000080e7;
|
||||
err = vx_copy_to_dev(buffer, 0, 5 * 4, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
|
||||
// newlib io simulator trap
|
||||
((uint32_t*)buf_ptr)[0] = 0x00008067;
|
||||
err = vx_copy_to_dev(buffer, 0x70000000, 4, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// upload content
|
||||
//
|
||||
|
||||
size_t offset = 0;
|
||||
while (offset < size) {
|
||||
auto chunk_size = std::min<size_t>(TRANSFER_SIZE, size - offset);
|
||||
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
|
||||
err = vx_copy_to_dev(buffer, VX_KERNEL_BASE_ADDR + offset, chunk_size, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
offset += chunk_size;
|
||||
}
|
||||
|
||||
vx_buf_release(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// get length of file:
|
||||
ifs.seekg(0, ifs.end);
|
||||
auto size = ifs.tellg();
|
||||
ifs.seekg(0, ifs.beg);
|
||||
|
||||
// allocate buffer
|
||||
auto content = new char [size];
|
||||
|
||||
// read file content
|
||||
ifs.read(content, size);
|
||||
|
||||
// upload
|
||||
int err = vx_upload_kernel_bytes(device, content, size);
|
||||
|
||||
// release buffer
|
||||
delete[] content;
|
||||
|
||||
return err;
|
||||
}
|
||||
Reference in New Issue
Block a user