redesigned driver demo, fixed startup code, removed --cpu from simx,
This commit is contained in:
@@ -11,13 +11,18 @@ typedef void* vx_device_h;
|
||||
|
||||
typedef void* vx_buffer_h;
|
||||
|
||||
#define VX_LOCAL_MEM_SIZE 0xffffffff
|
||||
// device caps ids
|
||||
#define VX_CAPS_VERSION 0x0
|
||||
#define VX_CAPS_MAX_CORES 0x1
|
||||
#define VX_CAPS_MAX_WARPS 0x2
|
||||
#define VX_CAPS_MAX_THREADS 0x3
|
||||
#define VX_CAPS_CACHE_LINESIZE 0x4
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
|
||||
#define VX_ALLOC_BASE_ADDR 0x10000000
|
||||
|
||||
#define VX_KERNEL_BASE_ADDR 0x80000000
|
||||
|
||||
#define VX_CACHE_LINESIZE 64
|
||||
// return device configurations
|
||||
int vx_dev_caps(int caps_id);
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
||||
@@ -47,7 +47,8 @@ typedef struct vx_buffer_ {
|
||||
} vx_buffer_t;
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -103,7 +104,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
}
|
||||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = VX_ALLOC_BASE_ADDR;
|
||||
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
|
||||
*hdevice = device;
|
||||
|
||||
@@ -133,7 +134,8 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
if (device->mem_allocation + asize > VX_ALLOC_BASE_ADDR)
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
if (device->mem_allocation + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
*dev_maddr = device->mem_allocation;
|
||||
@@ -245,7 +247,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + src_offset);
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE));
|
||||
|
||||
@@ -269,7 +271,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + dest_offset);
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ));
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
#USE_MULTICORE=1
|
||||
|
||||
CFLAGS += -I../../include -I../../../../rtl/simulate
|
||||
CFLAGS += -I../../include -I../../../../rtl/simulate -I../../../../runtime
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
|
||||
@@ -30,7 +30,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -76,10 +77,10 @@ class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, mem_allocation_(VX_ALLOC_BASE_ADDR)
|
||||
, vortex_(&ram_) {
|
||||
vortex_.reset();
|
||||
thread_ = new std::thread(__thread_proc__, this);
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
@@ -95,7 +96,8 @@ public:
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
size_t asize = align_size(size);
|
||||
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
MAX_WARPS ?= 8
|
||||
MAX_THREADS ?= 4
|
||||
|
||||
CFLAGS += -I../../include -I../../../../simX/include
|
||||
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_SIMX
|
||||
|
||||
CFLAGS += -DMAX_WARPS=$(MAX_WARPS) -DMAX_THREADS=$(MAX_THREADS)
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp ../../../simX/args.cpp ../../../simX/mem.cpp ../../../simX/core.cpp ../../../simX/instruction.cpp ../../../simX/enc.cpp ../../../simX/util.cpp
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include "core.h"
|
||||
#include <core.h>
|
||||
#include <config.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
@@ -24,7 +25,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -71,9 +73,9 @@ public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, is_running_(false)
|
||||
, mem_allocation_(VX_ALLOC_BASE_ADDR)
|
||||
, thread_(__thread_proc__, this)
|
||||
{}
|
||||
, thread_(__thread_proc__, this) {
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
mutex_.lock();
|
||||
@@ -84,8 +86,9 @@ public:
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
size_t asize = align_size(size);
|
||||
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
|
||||
auto asize = align_size(size);
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
@@ -93,7 +96,7 @@ public:
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size);
|
||||
auto asize = align_size(size);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
@@ -148,7 +151,7 @@ public:
|
||||
private:
|
||||
|
||||
void run() {
|
||||
Harp::ArchDef arch("rv32i", false, MAX_WARPS, MAX_THREADS);
|
||||
Harp::ArchDef arch("rv32i", NW, NT);
|
||||
Harp::WordDecoder dec(arch);
|
||||
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
Harp::Core core(arch, dec, mu);
|
||||
|
||||
@@ -2,18 +2,45 @@
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <vortex.h>
|
||||
#include <config.h>
|
||||
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
extern int vx_dev_caps(int caps_id) {
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
return 0;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
return NUMBER_CORES;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
return NW;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
return NT;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
return GLOBAL_BLOCK_SIZE_BYTES;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
return 0xffffffff;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
return 0x10000000;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
return 0x80000000;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
int err = 0;
|
||||
|
||||
if (NULL == content || 0 == size)
|
||||
return -1;
|
||||
|
||||
static constexpr uint32_t TRANSFER_SIZE = 4096;
|
||||
uint32_t buffer_transfer_size = 4096;
|
||||
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
|
||||
|
||||
// allocate device buffer
|
||||
vx_buffer_h buffer;
|
||||
err = vx_alloc_shared_mem(device, TRANSFER_SIZE, &buffer);
|
||||
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
||||
@@ -48,9 +75,9 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
||||
|
||||
size_t offset = 0;
|
||||
while (offset < size) {
|
||||
auto chunk_size = std::min<size_t>(TRANSFER_SIZE, size - offset);
|
||||
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
|
||||
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
|
||||
err = vx_copy_to_dev(buffer, VX_KERNEL_BASE_ADDR + offset, chunk_size, 0);
|
||||
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
@@ -63,7 +90,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
|
||||
Reference in New Issue
Block a user