get device caps from CSRs

This commit is contained in:
Blaise Tine
2020-06-30 00:08:23 -07:00
parent f66c251309
commit 2de61b5982
13 changed files with 534 additions and 275 deletions

View File

@@ -4,31 +4,6 @@
#include <vortex.h>
#include <VX_config.h>
extern int vx_dev_caps(int caps_id) {
switch (caps_id) {
case VX_CAPS_VERSION:
return 0;
case VX_CAPS_MAX_CORES:
return NUM_CORES;
case VX_CAPS_MAX_WARPS:
return NUM_WARPS;
case VX_CAPS_MAX_THREADS:
return NUM_THREADS;
case VX_CAPS_CACHE_LINESIZE:
return 64;
case VX_CAPS_LOCAL_MEM_SIZE:
return 0xffffffff;
case VX_CAPS_ALLOC_BASE_ADDR:
return 0x10000000;
case VX_CAPS_KERNEL_BASE_ADDR:
return 0x80000000;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return 0;
}
}
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
int err = 0;
@@ -36,7 +11,10 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
return -1;
uint32_t buffer_transfer_size = 65536;
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
unsigned kernel_base_addr;
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
if (err != 0)
return -1;
// allocate device buffer
vx_buffer_h buffer;
@@ -47,7 +25,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
// get buffer address
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
#if defined(USE_SIMX)
#if defined(USE_SIMX)
// default startup routine
((uint32_t*)buf_ptr)[0] = 0xf1401073;
((uint32_t*)buf_ptr)[1] = 0xf1401073;

View File

@@ -21,15 +21,15 @@ typedef void* vx_buffer_h;
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
// return device configurations
int vx_dev_caps(int caps_id);
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);
// Close the device when all the operations are done
int vx_dev_close(vx_device_h hdevice);
// return device configurations
int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value);
// Allocate shared buffer with device
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
@@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice);
int vx_ready_wait(vx_device_h hdevice, long long timeout);
// set device constant registers
int vx_set_regiters(int state, int value);
int vx_csr_set(vx_device_h hdevice, int address, int value);
// get device constant registers
int vx_get_regiters(int state, int* value);
int vx_csr_get(vx_device_h hdevice, int address, int* value);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////

View File

@@ -18,8 +18,8 @@
return -1; \
} while (false)
#define MMIO_CSR_SCOPE_CMD (AFU_IMAGE_MMIO_CSR_SCOPE_CMD * 4)
#define MMIO_CSR_SCOPE_DATA (AFU_IMAGE_MMIO_CSR_SCOPE_DATA * 4)
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
struct scope_signal_t {
int width;
@@ -136,7 +136,7 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
if (delay != uint64_t(-1)) {
// set start delay
uint64_t cmd_delay = ((delay << 3) | 4);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_delay));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
std::cout << "scope start delay: " << delay << std::endl;
}
@@ -150,7 +150,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
if (delay != uint64_t(-1)) {
// stop recording
uint64_t cmd_stop = ((delay << 3) | 5);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_stop));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
std::cout << "scope stop delay: " << delay << std::endl;
}
@@ -170,9 +170,9 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
uint64_t frame_width, max_frames, data_valid;
// wait for recording to terminate
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
do {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
if (data_valid)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
@@ -180,15 +180,15 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
std::cout << "scope trace dump begin..." << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 2));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 2));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 3));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 3));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1));
if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
@@ -209,7 +209,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
ofs << "b1 0" << std::endl;
uint64_t delta;
fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &delta);
fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta);
assert(res == FPGA_OK);
while (delta != 0) {
@@ -228,14 +228,14 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
do {
if (frame_no == (max_frames-1)) {
// verify last frame is valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 1);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1));
}
uint64_t word;
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &word));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
do {
int signal_width = scope_signals[signal_id-1].width;
@@ -267,8 +267,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
// verify data not valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid));
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 0);
return 0;

View File

@@ -1,17 +1,24 @@
#include <stdint.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <unistd.h>
#include <assert.h>
#include <cmath>
#include <uuid/uuid.h>
#include <opae/fpga.h>
#include <vortex.h>
#include <VX_config.h>
#include "vortex_afu.h"
#ifdef SCOPE
#include "scope.h"
#endif
#define CACHE_LINESIZE 64
#define ALLOC_BASE_ADDR 0x10000000
#define LOCAL_MEM_SIZE 0xffffffff
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
@@ -24,22 +31,31 @@
///////////////////////////////////////////////////////////////////////////////
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
#define CMD_RUN AFU_IMAGE_CMD_RUN
#define CMD_CLFLUSH AFU_IMAGE_CMD_CLFLUSH
#define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ
#define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4)
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
#define MMIO_CSR_ADDR (AFU_IMAGE_MMIO_CSR_ADDR * 4)
#define MMIO_CSR_DATA (AFU_IMAGE_MMIO_CSR_DATA * 4)
#define MMIO_CSR_READ (AFU_IMAGE_MMIO_CSR_READ * 4)
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_device_ {
fpga_handle fpga;
size_t mem_allocation;
int implementation_id;
int num_cores;
int num_warps;
int num_threads;
} vx_device_t;
typedef struct vx_buffer_ {
@@ -62,21 +78,58 @@ inline bool is_aligned(size_t addr, size_t alignment) {
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
switch (caps_id) {
case VX_CAPS_VERSION:
*value = device->implementation_id;
break;
case VX_CAPS_MAX_CORES:
*value = device->num_cores;
break;
case VX_CAPS_MAX_WARPS:
*value = device->num_warps;
break;
case VX_CAPS_MAX_THREADS:
*value = device->num_threads;
break;
case VX_CAPS_CACHE_LINESIZE:
*value = CACHE_LINESIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
fprintf(stderr, "invalid caps id: %d\n", caps_id);
std::abort();
return -1;
}
return 0;
}
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
fpga_properties filter = nullptr;
fpga_result res;
fpga_guid guid;
fpga_token accel_token;
uint32_t num_matches;
fpga_handle accel_handle;
vx_device_t* device;
if (nullptr == hdevice)
return -1;
// ensure that the block size 64
assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE));
vx_device_t* device;
// Set up a filter that will search for an accelerator
fpgaGetProperties(nullptr, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
@@ -114,17 +167,32 @@ extern int vx_dev_open(vx_device_h* hdevice) {
}
device->fpga = accel_handle;
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
device->mem_allocation = ALLOC_BASE_ADDR;
*hdevice = device;
{
// Load device CAPS
int ret = 0;
ret |= vx_csr_get(device, CSR_IMPL_ID, &device->implementation_id);
ret |= vx_csr_get(device, CSR_NC, &device->num_cores);
ret |= vx_csr_get(device, CSR_NW, &device->num_warps);
ret |= vx_csr_get(device, CSR_NT, &device->num_threads);
if (ret != 0) {
fpgaClose(accel_handle);
return ret;
}
}
#ifdef SCOPE
{
int ret = vx_scope_start(device->fpga, 0);
if (ret != 0)
int ret = vx_scope_start(accel_handle, 0);
if (ret != 0) {
fpgaClose(accel_handle);
return ret;
}
}
#endif
#endif
*hdevice = device;
return 0;
}
@@ -154,10 +222,8 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
vx_device_t *device = ((vx_device_t*)hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_LINESIZE);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
@@ -182,9 +248,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
vx_device_t *device = ((vx_device_t*)hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t asize = align_size(size, line_size);
size_t asize = align_size(size, CACHE_LINESIZE);
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
@@ -260,7 +324,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
for (;;) {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data));
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
if (0 == data || 0 == timeout) {
if (data != 0) {
fprintf(stdout, "ready-wait timed out: status=%ld\n", data);
@@ -282,17 +346,15 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_LINESIZE);
// check alignment
if (!is_aligned(dev_maddr, line_size))
if (!is_aligned(dev_maddr, CACHE_LINESIZE))
return -1;
if (!is_aligned(buffer->io_addr + src_offset, line_size))
if (!is_aligned(buffer->io_addr + src_offset, CACHE_LINESIZE))
return -1;
// bound checking
if (src_offset + asize > buffer->size)
return -1;
@@ -303,12 +365,12 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
auto ls_shift = (int)std::log2(line_size);
auto ls_shift = (int)std::log2(CACHE_LINESIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
@@ -325,15 +387,13 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_LINESIZE);
// check alignment
if (!is_aligned(dev_maddr, line_size))
if (!is_aligned(dev_maddr, CACHE_LINESIZE))
return -1;
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
if (!is_aligned(buffer->io_addr + dest_offset, CACHE_LINESIZE))
return -1;
// bound checking
@@ -346,12 +406,12 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
auto ls_shift = (int)std::log2(line_size);
auto ls_shift = (int)std::log2(CACHE_LINESIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
@@ -367,23 +427,21 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
vx_device_t* device = ((vx_device_t*)hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t asize = align_size(size, line_size);
size_t asize = align_size(size, CACHE_LINESIZE);
// check alignment
if (!is_aligned(dev_maddr, line_size))
if (!is_aligned(dev_maddr, CACHE_LINESIZE))
return -1;
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
auto ls_shift = (int)std::log2(line_size);
auto ls_shift = (int)std::log2(CACHE_LINESIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CLFLUSH));
// Wait for the write operation to finish
if (vx_ready_wait(hdevice, -1) != 0)
@@ -396,13 +454,59 @@ extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
// start execution
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
return 0;
}
// set device constant registers
extern int vx_csr_set(vx_device_h hdevice, int address, int value) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
// write CSR value
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
return 0;
}
// get device constant registers
extern int vx_csr_get(vx_device_h hdevice, int address, int* value) {
if (nullptr == hdevice || nullptr == value)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
// write CSR value
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
uint64_t value64;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_READ, &value64));
*value = (int)value64;
return 0;
}

View File

@@ -7,14 +7,19 @@
#include <chrono>
#include <vortex.h>
#include <VX_config.h>
#include <ram.h>
#include <simulator.h>
#define CACHE_LINESIZE 64
#define ALLOC_BASE_ADDR 0x10000000
#define LOCAL_MEM_SIZE 0xffffffff
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
///////////////////////////////////////////////////////////////////////////////
@@ -26,7 +31,7 @@ public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
auto aligned_asize = align_size(size, CACHE_LINESIZE);
data_ = malloc(aligned_asize);
}
@@ -59,7 +64,7 @@ private:
class vx_device {
public:
vx_device() {
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
mem_allocation_ = ALLOC_BASE_ADDR;
simulator_.attach_ram(&ram_);
}
@@ -70,8 +75,8 @@ public:
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
size_t asize = align_size(size);
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
auto dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_LINESIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
@@ -80,7 +85,7 @@ public:
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = align_size(size);
size_t asize = align_size(size, CACHE_LINESIZE);
if (dest_addr + asize > ram_.size())
return -1;
@@ -94,7 +99,7 @@ public:
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size);
size_t asize = align_size(size, CACHE_LINESIZE);
if (src_addr + asize > ram_.size())
return -1;
@@ -156,6 +161,44 @@ private:
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
if (nullptr == hdevice)
return -1;
switch (caps_id) {
case VX_CAPS_VERSION:
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;
break;
case VX_CAPS_MAX_THREADS:
*value = NUM_THREADS;
break;
case VX_CAPS_CACHE_LINESIZE:
*value = CACHE_LINESIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = 0xffffffff;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = 0x10000000;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return -1;
}
return 0;
}
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;

View File

@@ -11,13 +11,16 @@
#include <core.h>
#include <VX_config.h>
#define PAGE_SIZE 4096
#define CACHE_LINESIZE 64
#define PAGE_SIZE 4096
#define ALLOC_BASE_ADDR 0x10000000
#define LOCAL_MEM_SIZE 0xffffffff
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
///////////////////////////////////////////////////////////////////////////////
@@ -29,7 +32,7 @@ public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
auto aligned_asize = align_size(size, CACHE_LINESIZE);
data_ = malloc(aligned_asize);
}
@@ -65,7 +68,7 @@ public:
: is_done_(false)
, is_running_(false)
, thread_(__thread_proc__, this) {
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
mem_allocation_ = ALLOC_BASE_ADDR;
}
~vx_device() {
@@ -77,8 +80,8 @@ public:
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
auto asize = align_size(size);
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
auto dev_mem_size = LOCAL_MEM_SIZE;
auto asize = align_size(size, CACHE_LINESIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
@@ -87,7 +90,7 @@ public:
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
auto asize = align_size(size);
auto asize = align_size(size, CACHE_LINESIZE);
if (dest_addr + asize > ram_.size())
return -1;
@@ -101,7 +104,7 @@ public:
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size);
size_t asize = align_size(size, CACHE_LINESIZE);
if (src_addr + asize > ram_.size())
return -1;
@@ -216,6 +219,44 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
if (nullptr == hdevice)
return -1;
switch (caps_id) {
case VX_CAPS_VERSION:
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;
break;
case VX_CAPS_MAX_THREADS:
*value = NUM_THREADS;
break;
case VX_CAPS_CACHE_LINESIZE:
*value = CACHE_LINESIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return -1;
}
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr

View File

@@ -8,6 +8,10 @@ extern int vx_dev_close(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_dev_caps(vx_device_h /*hdevice*/, unsigned /*caps_id*/, unsigned* /*value*/) {
return -1;
}
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
return -1;
}

View File

@@ -179,7 +179,12 @@ int main(int argc, char *argv[]) {
count = 1;
}
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
uint32_t num_points = max_cores * count;
uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64;
uint32_t buf_size = num_blocks * 64;
@@ -187,10 +192,6 @@ int main(int argc, char *argv[]) {
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
// allocate device memory
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src_ptr = value;

View File

@@ -110,9 +110,14 @@ int main(int argc, char *argv[]) {
count = 1;
}
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_points = count * max_cores * max_warps * max_threads;
uint32_t buf_size = num_points * sizeof(uint32_t);
@@ -120,10 +125,6 @@ int main(int argc, char *argv[]) {
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));