redesigned driver demo, fixed startup code, removed --cpu from simx,

This commit is contained in:
Blaise Tine
2020-03-29 00:38:17 -04:00
parent 2d5cf89e00
commit c8a6470595
63 changed files with 40963 additions and 364160 deletions

View File

@@ -11,13 +11,18 @@ typedef void* vx_device_h;
typedef void* vx_buffer_h;
#define VX_LOCAL_MEM_SIZE 0xffffffff
// device caps ids
#define VX_CAPS_VERSION 0x0
#define VX_CAPS_MAX_CORES 0x1
#define VX_CAPS_MAX_WARPS 0x2
#define VX_CAPS_MAX_THREADS 0x3
#define VX_CAPS_CACHE_LINESIZE 0x4
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
#define VX_ALLOC_BASE_ADDR 0x10000000
#define VX_KERNEL_BASE_ADDR 0x80000000
#define VX_CACHE_LINESIZE 64
// return device configurations
int vx_dev_caps(int caps_id);
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);

View File

@@ -47,7 +47,8 @@ typedef struct vx_buffer_ {
} vx_buffer_t;
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
@@ -103,7 +104,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
}
device->fpga = accel_handle;
device->mem_allocation = VX_ALLOC_BASE_ADDR;
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
*hdevice = device;
@@ -133,7 +134,8 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
if (device->mem_allocation + asize > VX_ALLOC_BASE_ADDR)
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
*dev_maddr = device->mem_allocation;
@@ -245,7 +247,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + src_offset);
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE));
@@ -269,7 +271,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + dest_offset);
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ));

View File

@@ -3,7 +3,7 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
#USE_MULTICORE=1
CFLAGS += -I../../include -I../../../../rtl/simulate
CFLAGS += -I../../include -I../../../../rtl/simulate -I../../../../runtime
CFLAGS += -fPIC

View File

@@ -30,7 +30,8 @@
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
@@ -76,10 +77,10 @@ class vx_device {
public:
vx_device()
: is_done_(false)
, mem_allocation_(VX_ALLOC_BASE_ADDR)
, vortex_(&ram_) {
vortex_.reset();
thread_ = new std::thread(__thread_proc__, this);
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
}
~vx_device() {
@@ -95,7 +96,8 @@ public:
int alloc_local_mem(size_t size, size_t* dev_maddr) {
size_t asize = align_size(size);
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;

View File

@@ -1,17 +1,12 @@
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
MAX_WARPS ?= 8
MAX_THREADS ?= 4
CFLAGS += -I../../include -I../../../../simX/include
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
CFLAGS += -fPIC
CFLAGS += -DUSE_SIMX
CFLAGS += -DMAX_WARPS=$(MAX_WARPS) -DMAX_THREADS=$(MAX_THREADS)
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../vx_utils.cpp ../../../simX/args.cpp ../../../simX/mem.cpp ../../../simX/core.cpp ../../../simX/instruction.cpp ../../../simX/enc.cpp ../../../simX/util.cpp

View File

@@ -8,7 +8,8 @@
#include <chrono>
#include <vortex.h>
#include "core.h"
#include <core.h>
#include <config.h>
#define PAGE_SIZE 4096
@@ -24,7 +25,8 @@
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
@@ -71,9 +73,9 @@ public:
vx_device()
: is_done_(false)
, is_running_(false)
, mem_allocation_(VX_ALLOC_BASE_ADDR)
, thread_(__thread_proc__, this)
{}
, thread_(__thread_proc__, this) {
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
}
~vx_device() {
mutex_.lock();
@@ -84,8 +86,9 @@ public:
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
size_t asize = align_size(size);
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
auto asize = align_size(size);
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
@@ -93,7 +96,7 @@ public:
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = align_size(size);
auto asize = align_size(size);
if (dest_addr + asize > ram_.size())
return -1;
@@ -148,7 +151,7 @@ public:
private:
void run() {
Harp::ArchDef arch("rv32i", false, MAX_WARPS, MAX_THREADS);
Harp::ArchDef arch("rv32i", NW, NT);
Harp::WordDecoder dec(arch);
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
Harp::Core core(arch, dec, mu);

View File

@@ -2,18 +2,45 @@
#include <fstream>
#include <cstring>
#include <vortex.h>
#include <config.h>
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
extern int vx_dev_caps(int caps_id) {
switch (caps_id) {
case VX_CAPS_VERSION:
return 0;
case VX_CAPS_MAX_CORES:
return NUMBER_CORES;
case VX_CAPS_MAX_WARPS:
return NW;
case VX_CAPS_MAX_THREADS:
return NT;
case VX_CAPS_CACHE_LINESIZE:
return GLOBAL_BLOCK_SIZE_BYTES;
case VX_CAPS_LOCAL_MEM_SIZE:
return 0xffffffff;
case VX_CAPS_ALLOC_BASE_ADDR:
return 0x10000000;
case VX_CAPS_KERNEL_BASE_ADDR:
return 0x80000000;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return 0;
}
}
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
int err = 0;
if (NULL == content || 0 == size)
return -1;
static constexpr uint32_t TRANSFER_SIZE = 4096;
uint32_t buffer_transfer_size = 4096;
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
// allocate device buffer
vx_buffer_h buffer;
err = vx_alloc_shared_mem(device, TRANSFER_SIZE, &buffer);
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
if (err != 0)
return -1;
@@ -48,9 +75,9 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
size_t offset = 0;
while (offset < size) {
auto chunk_size = std::min<size_t>(TRANSFER_SIZE, size - offset);
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
err = vx_copy_to_dev(buffer, VX_KERNEL_BASE_ADDR + offset, chunk_size, 0);
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
@@ -63,7 +90,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
return 0;
}
int vx_upload_kernel_file(vx_device_h device, const char* filename) {
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
std::ifstream ifs(filename);
if (!ifs) {
std::cout << "error: " << filename << " not found" << std::endl;