+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
916 lines
27 KiB
C++
916 lines
27 KiB
C++
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <vortex.h>
|
|
#include <malloc.h>
|
|
#include <utils.h>
|
|
#include <VX_config.h>
|
|
#include <VX_types.h>
|
|
#include <stdarg.h>
|
|
#include <util.h>
|
|
#include <limits>
|
|
#include <unordered_map>
|
|
|
|
#ifdef SCOPE
|
|
#include "scope.h"
|
|
#endif
|
|
|
|
// XRT includes
|
|
#include "experimental/xrt_bo.h"
|
|
#include "experimental/xrt_ip.h"
|
|
#include "experimental/xrt_device.h"
|
|
#include "experimental/xrt_kernel.h"
|
|
#include "experimental/xrt_xclbin.h"
|
|
#include "experimental/xrt_error.h"
|
|
|
|
#define CPP_API
|
|
//#define BANK_INTERLEAVE
|
|
|
|
#define MMIO_CTL_ADDR 0x00
|
|
#define MMIO_DEV_ADDR 0x10
|
|
#define MMIO_ISA_ADDR 0x1C
|
|
#define MMIO_DCR_ADDR 0x28
|
|
#define MMIO_SCP_ADDR 0x34
|
|
#define MMIO_MEM_ADDR 0x40
|
|
|
|
#define CTL_AP_START (1<<0)
|
|
#define CTL_AP_DONE (1<<1)
|
|
#define CTL_AP_IDLE (1<<2)
|
|
#define CTL_AP_READY (1<<3)
|
|
#define CTL_AP_RESET (1<<4)
|
|
#define CTL_AP_RESTART (1<<7)
|
|
|
|
struct platform_info_t {
|
|
const char* prefix_name;
|
|
uint8_t lg2_num_banks;
|
|
uint8_t lg2_bank_size;
|
|
uint64_t mem_base;
|
|
};
|
|
|
|
static const platform_info_t g_platforms [] = {
|
|
{"xilinx_u50", 4, 0x1C, 0x0},
|
|
{"xilinx_u200", 4, 0x1C, 0x0},
|
|
{"xilinx_u280", 4, 0x1C, 0x0},
|
|
{"xilinx_vck5000", 0, 0x21, 0xC000000000},
|
|
};
|
|
|
|
#ifdef CPP_API
|
|
|
|
typedef xrt::device xrt_device_t;
|
|
typedef xrt::ip xrt_kernel_t;
|
|
typedef xrt::bo xrt_buffer_t;
|
|
|
|
#else
|
|
|
|
typedef xrtDeviceHandle xrt_device_t;
|
|
typedef xrtKernelHandle xrt_kernel_t;
|
|
typedef xrtBufferHandle xrt_buffer_t;
|
|
|
|
#endif
|
|
|
|
#define RAM_PAGE_SIZE 4096
|
|
|
|
#define DEFAULT_DEVICE_INDEX 0
|
|
|
|
#define DEFAULT_XCLBIN_PATH "vortex_afu.xclbin"
|
|
|
|
#define KERNEL_NAME "vortex_afu"
|
|
|
|
#ifndef NDEBUG
|
|
#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
|
|
#else
|
|
#define DBGPRINT(format, ...) ((void)0)
|
|
#endif
|
|
|
|
#define CHECK_HANDLE(handle, _expr, _cleanup) \
|
|
auto handle = _expr; \
|
|
if (handle == nullptr) { \
|
|
printf("[VXDRV] Error: '%s' returned NULL!\n", #_expr); \
|
|
_cleanup \
|
|
}
|
|
|
|
#define CHECK_ERR(_expr, _cleanup) \
|
|
do { \
|
|
auto err = _expr; \
|
|
if (err == 0) \
|
|
break; \
|
|
printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \
|
|
_cleanup \
|
|
} while (false)
|
|
|
|
using namespace vortex;
|
|
|
|
#ifndef CPP_API
|
|
|
|
static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) {
|
|
size_t len = 0;
|
|
xrtErrorGetString(xrtDevice, err, nullptr, 0, &len);
|
|
std::vector<char> buf(len);
|
|
xrtErrorGetString(xrtDevice, err, buf.data(), buf.size(), nullptr);
|
|
printf("[VXDRV] detail: %s!\n", buf.data());
|
|
}
|
|
|
|
#endif
|
|
|
|
static int get_platform_info(const std::string& device_name, platform_info_t* platform_info) {
|
|
for (size_t i = 0; i < (sizeof(g_platforms)/sizeof(platform_info_t)); ++i) {
|
|
auto& platform = g_platforms[i];
|
|
if (device_name.rfind(platform.prefix_name, 0) == 0) {
|
|
*platform_info = platform;
|
|
return 0;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*static void wait_for_enter(const std::string &msg) {
|
|
std::cout << msg << std::endl;
|
|
std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
|
|
}*/
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
class vx_device {
|
|
public:
|
|
|
|
vx_device(xrt_device_t& device, xrt_kernel_t& kernel, const platform_info_t& platform)
|
|
: xrtDevice_(device)
|
|
, xrtKernel_(kernel)
|
|
, platform_(platform)
|
|
{}
|
|
|
|
#ifndef CPP_API
|
|
|
|
~vx_device() {
|
|
for (auto& entry : xrtBuffers_) {
|
|
#ifdef BANK_INTERLEAVE
|
|
xrtBOFree(entry);
|
|
#else
|
|
xrtBOFree(entry.second.xrtBuffer);
|
|
#endif
|
|
}
|
|
if (xrtKernel_) {
|
|
xrtKernelClose(xrtKernel_);
|
|
}
|
|
if (xrtDevice_) {
|
|
xrtDeviceClose(xrtDevice_);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
int init() {
|
|
CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), {
|
|
return -1;
|
|
});
|
|
|
|
uint32_t num_banks = 1 << platform_.lg2_num_banks;
|
|
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
|
|
|
|
for (uint32_t i = 0; i < num_banks; ++i) {
|
|
uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12);
|
|
uint64_t reg_value = platform_.mem_base + i * bank_size;
|
|
CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), {
|
|
return -1;
|
|
});
|
|
|
|
CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), {
|
|
return -1;
|
|
});
|
|
#ifndef BANK_INTERLEAVE
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
CHECK_ERR(this->read_register(MMIO_DEV_ADDR, (uint32_t*)&this->dev_caps), {
|
|
return -1;
|
|
});
|
|
|
|
CHECK_ERR(this->read_register(MMIO_DEV_ADDR + 4, (uint32_t*)&this->dev_caps + 1), {
|
|
return -1;
|
|
});
|
|
|
|
CHECK_ERR(this->read_register(MMIO_ISA_ADDR, (uint32_t*)&this->isa_caps), {
|
|
return -1;
|
|
});
|
|
|
|
CHECK_ERR(this->read_register(MMIO_ISA_ADDR + 4, (uint32_t*)&this->isa_caps + 1), {
|
|
return -1;
|
|
});
|
|
|
|
this->global_mem_size = num_banks * bank_size;
|
|
|
|
this->global_mem_ = std::make_shared<vortex::MemoryAllocator>(
|
|
ALLOC_BASE_ADDR, ALLOC_MAX_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE);
|
|
|
|
uint64_t local_mem_size = 0;
|
|
vx_dev_caps(this, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size);
|
|
if (local_mem_size <= 1) {
|
|
this->local_mem_ = std::make_shared<vortex::MemoryAllocator>(
|
|
SMEM_BASE_ADDR, local_mem_size, RAM_PAGE_SIZE, 1);
|
|
}
|
|
|
|
#ifdef BANK_INTERLEAVE
|
|
xrtBuffers_.reserve(num_banks);
|
|
for (uint32_t i = 0; i < num_banks; ++i) {
|
|
#ifdef CPP_API
|
|
xrtBuffers_.emplace_back(xrtDevice_, bank_size, xrt::bo::flags::normal, i);
|
|
#else
|
|
CHECK_HANDLE(xrtBuffer, xrtBOAlloc(xrtDevice_, bank_size, XRT_BO_FLAGS_NONE, i), {
|
|
return -1;
|
|
});
|
|
xrtBuffers_.push_back(xrtBuffer);
|
|
#endif
|
|
printf("*** allocated bank%u/%u, size=%lu\n", i, num_banks, bank_size);
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mem_alloc(uint64_t size, int type, uint64_t* dev_addr) {
|
|
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
|
|
|
uint64_t addr;
|
|
|
|
if (type == VX_MEM_TYPE_GLOBAL) {
|
|
CHECK_ERR(global_mem_->allocate(asize, &addr), {
|
|
return -1;
|
|
});
|
|
#ifndef BANK_INTERLEAVE
|
|
uint32_t bank_id;
|
|
CHECK_ERR(this->get_bank_info(addr, &bank_id, nullptr), {
|
|
return -1;
|
|
});
|
|
CHECK_ERR(get_buffer(bank_id, nullptr), {
|
|
return -1;
|
|
});
|
|
#endif
|
|
} else if (type == VX_MEM_TYPE_LOCAL) {
|
|
if CHECK_ERR(local_mem_->allocate(asize, &addr), {
|
|
return -1;
|
|
});
|
|
} else {
|
|
return -1;
|
|
}
|
|
*dev_addr = addr;
|
|
return 0;
|
|
}
|
|
|
|
int mem_free(uint64_t dev_addr) {
|
|
if (dev_addr >= SMEM_BASE_ADDR) {
|
|
CHECK_ERR(local_mem_->release(dev_addr), {
|
|
return -1;
|
|
});
|
|
} else {
|
|
CHECK_ERR(global_mem_->release(dev_addr), {
|
|
return -1;
|
|
});
|
|
#ifdef BANK_INTERLEAVE
|
|
if (0 == global_mem_->allocated()) {
|
|
#ifndef CPP_API
|
|
for (auto& entry : xrtBuffers_) {
|
|
xrtBOFree(entry);
|
|
}
|
|
#endif
|
|
xrtBuffers_.clear();
|
|
}
|
|
#else
|
|
uint32_t bank_id;
|
|
CHECK_ERR(this->get_bank_info(dev_addr, &bank_id, nullptr), {
|
|
return -1;
|
|
});
|
|
auto it = xrtBuffers_.find(bank_id);
|
|
if (it != xrtBuffers_.end()) {
|
|
auto count = --it->second.count;
|
|
if (0 == count) {
|
|
printf("freeing bank%d...\n", bank_id);
|
|
#ifndef CPP_API
|
|
xrtBOFree(it->second.xrtBuffer);
|
|
#endif
|
|
xrtBuffers_.erase(it);
|
|
}
|
|
} else {
|
|
fprintf(stderr, "[VXDRV] Error: invalid device memory address: 0x%lx\n", dev_addr);
|
|
return -1;
|
|
}
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int mem_info(int type, uint64_t* mem_free, uint64_t* mem_used) const {
|
|
if (type == VX_MEM_TYPE_GLOBAL) {
|
|
if (mem_free)
|
|
*mem_free = global_mem_->free();
|
|
if (mem_used)
|
|
*mem_used = global_mem_->allocated();
|
|
} else if (type == VX_MEM_TYPE_LOCAL) {
|
|
if (mem_free)
|
|
*mem_free = local_mem_->free();
|
|
if (mem_used)
|
|
*mem_free = local_mem_->allocated();
|
|
} else {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int write_register(uint32_t addr, uint32_t value) {
|
|
#ifdef CPP_API
|
|
xrtKernel_.write_register(addr, value);
|
|
#else
|
|
CHECK_ERR(xrtKernelWriteRegister(xrtKernel_, addr, value), {
|
|
dump_xrt_error(xrtDevice_, err);
|
|
return -1;
|
|
});
|
|
#endif
|
|
DBGPRINT("*** write_register: addr=0x%x, value=0x%x\n", addr, value);
|
|
return 0;
|
|
}
|
|
|
|
int read_register(uint32_t addr, uint32_t* value) {
|
|
#ifdef CPP_API
|
|
*value = xrtKernel_.read_register(addr);
|
|
#else
|
|
CHECK_ERR(xrtKernelReadRegister(xrtKernel_, addr, value), {
|
|
dump_xrt_error(xrtDevice_, err);
|
|
return -1;
|
|
});
|
|
#endif
|
|
DBGPRINT("*** read_register: addr=0x%x, value=0x%x\n", addr, *value);
|
|
return 0;
|
|
}
|
|
|
|
int upload(uint64_t dev_addr, uint8_t* host_ptr, uint64_t asize) {
|
|
for (uint64_t end = dev_addr + asize; dev_addr < end;
|
|
dev_addr += CACHE_BLOCK_SIZE,
|
|
host_ptr += CACHE_BLOCK_SIZE) {
|
|
#ifdef BANK_INTERLEAVE
|
|
asize = CACHE_BLOCK_SIZE;
|
|
#else
|
|
end = 0;
|
|
#endif
|
|
uint32_t bo_index;
|
|
uint64_t bo_offset;
|
|
xrt_buffer_t xrtBuffer;
|
|
CHECK_ERR(this->get_bank_info(dev_addr, &bo_index, &bo_offset), {
|
|
return -1;
|
|
});
|
|
CHECK_ERR(this->get_buffer(bo_index, &xrtBuffer), {
|
|
return -1;
|
|
});
|
|
#ifdef CPP_API
|
|
xrtBuffer.write(host_ptr, asize, bo_offset);
|
|
xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset);
|
|
#else
|
|
CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, asize, bo_offset), {
|
|
dump_xrt_error(xrtDevice_, err);
|
|
return -1;
|
|
});
|
|
CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset), {
|
|
dump_xrt_error(xrtDevice_, err);
|
|
return -1;
|
|
});
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int download(uint8_t* host_ptr, uint64_t dev_addr, uint64_t asize) {
|
|
for (uint64_t end = dev_addr + asize; dev_addr < end;
|
|
dev_addr += CACHE_BLOCK_SIZE,
|
|
host_ptr += CACHE_BLOCK_SIZE) {
|
|
#ifdef BANK_INTERLEAVE
|
|
asize = CACHE_BLOCK_SIZE;
|
|
#else
|
|
end = 0;
|
|
#endif
|
|
uint32_t bo_index;
|
|
uint64_t bo_offset;
|
|
xrt_buffer_t xrtBuffer;
|
|
CHECK_ERR(this->get_bank_info(dev_addr, &bo_index, &bo_offset), {
|
|
return -1;
|
|
});
|
|
CHECK_ERR(this->get_buffer(bo_index, &xrtBuffer), {
|
|
return -1;
|
|
});
|
|
#ifdef CPP_API
|
|
xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset);
|
|
xrtBuffer.read(host_ptr, asize, bo_offset);
|
|
#else
|
|
CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset), {
|
|
dump_xrt_error(xrtDevice_, err);
|
|
return -1;
|
|
});
|
|
CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, asize, bo_offset), {
|
|
dump_xrt_error(xrtDevice_, err);
|
|
return -1;
|
|
});
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
DeviceConfig dcrs;
|
|
uint64_t dev_caps;
|
|
uint64_t isa_caps;
|
|
uint64_t global_mem_size;
|
|
|
|
private:
|
|
|
|
xrt_device_t xrtDevice_;
|
|
xrt_kernel_t xrtKernel_;
|
|
const platform_info_t platform_;
|
|
std::shared_ptr<vortex::MemoryAllocator> global_mem_;
|
|
std::shared_ptr<vortex::MemoryAllocator> local_mem_;
|
|
|
|
#ifdef BANK_INTERLEAVE
|
|
|
|
std::vector<xrt_buffer_t> xrtBuffers_;
|
|
|
|
int get_bank_info(uint64_t addr, uint32_t* pIdx, uint64_t* pOff) {
|
|
uint32_t num_banks = 1 << platform_.lg2_num_banks;
|
|
uint64_t block_addr = addr / CACHE_BLOCK_SIZE;
|
|
uint32_t index = block_addr & (num_banks-1);
|
|
uint64_t offset = (block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE;
|
|
if (pIdx) {
|
|
*pIdx = index;
|
|
}
|
|
if (pOff) {
|
|
*pOff = offset;
|
|
}
|
|
printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset);
|
|
return 0;
|
|
}
|
|
|
|
int get_buffer(uint32_t bank_id, xrt_buffer_t* pBuf) {
|
|
if (pBuf) {
|
|
*pBuf = xrtBuffers_.at(bank_id);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
|
|
struct buf_cnt_t {
|
|
xrt_buffer_t xrtBuffer;
|
|
uint32_t count;
|
|
};
|
|
|
|
std::unordered_map<uint32_t, buf_cnt_t> xrtBuffers_;
|
|
|
|
int get_bank_info(uint64_t addr, uint32_t* pIdx, uint64_t* pOff) {
|
|
uint32_t num_banks = 1 << platform_.lg2_num_banks;
|
|
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
|
|
uint32_t index = addr >> platform_.lg2_bank_size;
|
|
uint64_t offset = addr & (bank_size-1);
|
|
if (index > num_banks) {
|
|
fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr);
|
|
return -1;
|
|
}
|
|
if (pIdx) {
|
|
*pIdx = index;
|
|
}
|
|
if (pOff) {
|
|
*pOff = offset;
|
|
}
|
|
printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset);
|
|
return 0;
|
|
}
|
|
|
|
int get_buffer(uint32_t bank_id, xrt_buffer_t* pBuf) {
|
|
auto it = xrtBuffers_.find(bank_id);
|
|
if (it != xrtBuffers_.end()) {
|
|
if (pBuf) {
|
|
*pBuf = it->second.xrtBuffer;
|
|
} else {
|
|
printf("reusing bank%d...\n", bank_id);
|
|
++it->second.count;
|
|
}
|
|
} else {
|
|
printf("allocating bank%d...\n", bank_id);
|
|
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
|
|
#ifdef CPP_API
|
|
xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id);
|
|
#else
|
|
CHECK_HANDLE(xrtBuffer, xrtBOAlloc(xrtDevice_, bank_size, XRT_BO_FLAGS_NONE, bank_id), {
|
|
return -1;
|
|
});
|
|
#endif
|
|
xrtBuffers_.insert({bank_id, {xrtBuffer, 1}});
|
|
if (pBuf) {
|
|
*pBuf = xrtBuffer;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
vx_device *device = ((vx_device*)hdevice);
|
|
|
|
switch (caps_id) {
|
|
case VX_CAPS_VERSION:
|
|
*value = (device->dev_caps >> 0) & 0xff;
|
|
break;
|
|
case VX_CAPS_NUM_THREADS:
|
|
*value = (device->dev_caps >> 8) & 0xff;
|
|
break;
|
|
case VX_CAPS_NUM_WARPS:
|
|
*value = (device->dev_caps >> 16) & 0xff;
|
|
break;
|
|
case VX_CAPS_NUM_CORES:
|
|
*value = (device->dev_caps >> 24) & 0xffff;
|
|
break;
|
|
case VX_CAPS_CACHE_LINE_SIZE:
|
|
*value = CACHE_BLOCK_SIZE;
|
|
break;
|
|
case VX_CAPS_GLOBAL_MEM_SIZE:
|
|
*value = device->global_mem_size;
|
|
break;
|
|
case VX_CAPS_LOCAL_MEM_SIZE:
|
|
*value = 1ull << ((device->dev_caps >> 40) & 0xff);
|
|
break;
|
|
case VX_CAPS_KERNEL_BASE_ADDR:
|
|
*value = (uint64_t(device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) |
|
|
device->dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
|
break;
|
|
case VX_CAPS_ISA_FLAGS:
|
|
*value = device->isa_caps;
|
|
break;
|
|
default:
|
|
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
|
std::abort();
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_dev_open(vx_device_h* hdevice) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
int device_index = DEFAULT_DEVICE_INDEX;
|
|
const char* device_index_s = getenv("XRT_DEVICE_INDEX");
|
|
if (device_index_s != nullptr) {
|
|
device_index = atoi(device_index_s);
|
|
}
|
|
|
|
const char* xlbin_path_s = getenv("XRT_XCLBIN_PATH");
|
|
if (xlbin_path_s == nullptr) {
|
|
xlbin_path_s = DEFAULT_XCLBIN_PATH;
|
|
}
|
|
|
|
#ifdef CPP_API
|
|
|
|
auto xrtDevice = xrt::device(device_index);
|
|
auto uuid = xrtDevice.load_xclbin(xlbin_path_s);
|
|
auto xrtKernel = xrt::ip(xrtDevice, uuid, KERNEL_NAME);
|
|
auto xclbin = xrt::xclbin(xlbin_path_s);
|
|
|
|
auto device_name = xrtDevice.get_info<xrt::info::device::name>();
|
|
|
|
/*{
|
|
uint32_t num_banks = 0;
|
|
uint64_t bank_size = 0;
|
|
uint64_t mem_base = 0;
|
|
|
|
auto mem_json = nlohmann::json::parse(xrtDevice.get_info<xrt::info::device::memory>());
|
|
if (!mem_json.is_null()) {
|
|
uint32_t index = 0;
|
|
for (auto& mem : mem_json["board"]["memory"]["memories"]) {
|
|
auto enabled = mem["enabled"].get<std::string>();
|
|
if (enabled == "true") {
|
|
if (index == 0) {
|
|
mem_base = std::stoull(mem["base_address"].get<std::string>(), nullptr, 16);
|
|
bank_size = std::stoull(mem["range_bytes"].get<std::string>(), nullptr, 16);
|
|
}
|
|
++index;
|
|
}
|
|
}
|
|
num_banks = index;
|
|
}
|
|
|
|
fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx, count=%d\n", mem_base, bank_size, num_banks);
|
|
}*/
|
|
|
|
/*{
|
|
std::cout << "Device" << device_index << " : " << xrtDevice.get_info<xrt::info::device::name>() << std::endl;
|
|
std::cout << " bdf : " << xrtDevice.get_info<xrt::info::device::bdf>() << std::endl;
|
|
std::cout << " kdma : " << xrtDevice.get_info<xrt::info::device::kdma>() << std::endl;
|
|
std::cout << " max_freq : " << xrtDevice.get_info<xrt::info::device::max_clock_frequency_mhz>() << std::endl;
|
|
std::cout << " memory : " << xrtDevice.get_info<xrt::info::device::memory>() << std::endl;
|
|
std::cout << " thermal : " << xrtDevice.get_info<xrt::info::device::thermal>() << std::endl;
|
|
std::cout << " m2m : " << std::boolalpha << xrtDevice.get_info<xrt::info::device::m2m>() << std::dec << std::endl;
|
|
std::cout << " nodma : " << std::boolalpha << xrtDevice.get_info<xrt::info::device::nodma>() << std::dec << std::endl;
|
|
|
|
std::cout << "Memory info :" << std::endl;
|
|
for (const auto& mem_bank : xclbin.get_mems()) {
|
|
std::cout << " index : " << mem_bank.get_index() << std::endl;
|
|
std::cout << " tag : " << mem_bank.get_tag() << std::endl;
|
|
std::cout << " type : " << (int)mem_bank.get_type() << std::endl;
|
|
std::cout << " base_address : 0x" << std::hex << mem_bank.get_base_address() << std::endl;
|
|
std::cout << " size : 0x" << (mem_bank.get_size_kb() * 1000) << std::dec << std::endl;
|
|
std::cout << " used :" << mem_bank.get_used() << std::endl;
|
|
}
|
|
}*/
|
|
|
|
// get platform info
|
|
platform_info_t platform_info;
|
|
CHECK_ERR(get_platform_info(device_name, &platform_info), {
|
|
fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str());
|
|
return -1;
|
|
});
|
|
|
|
CHECK_HANDLE(device, new vx_device(xrtDevice, xrtKernel, platform_info), {
|
|
return -1;
|
|
});
|
|
|
|
#else
|
|
|
|
CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), {
|
|
return -1;
|
|
});
|
|
|
|
CHECK_ERR(xrtDeviceLoadXclbinFile(xrtDevice, xlbin_path_s), {
|
|
dump_xrt_error(xrtDevice, err);
|
|
xrtDeviceClose(xrtDevice);
|
|
return -1;
|
|
});
|
|
|
|
xuid_t uuid;
|
|
CHECK_ERR(xrtDeviceGetXclbinUUID(xrtDevice, uuid), {
|
|
dump_xrt_error(xrtDevice, err);
|
|
xrtDeviceClose(xrtDevice);
|
|
return -1;
|
|
});
|
|
|
|
CHECK_HANDLE(xrtKernel, xrtPLKernelOpenExclusive(xrtDevice, uuid, KERNEL_NAME), {
|
|
xrtDeviceClose(xrtDevice);
|
|
return -1;
|
|
});
|
|
|
|
int device_name_size;
|
|
xrtXclbinGetXSAName(xrtDevice, nullptr, 0, &device_name_size);
|
|
std::vector<char> device_name(device_name_size);
|
|
xrtXclbinGetXSAName(xrtDevice, device_name.data(), device_name_size, nullptr);
|
|
|
|
// get platform info
|
|
platform_info_t platform_info;
|
|
CHECK_ERR(get_platform_info(device_name.data(), &platform_info), {
|
|
fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.data());
|
|
return -1;
|
|
});
|
|
|
|
CHECK_HANDLE(device, new vx_device(xrtDevice, xrtKernel, platform_info), {
|
|
xrtKernelClose(xrtKernel);
|
|
xrtDeviceClose(xrtDevice);
|
|
return -1;
|
|
});
|
|
|
|
#endif
|
|
|
|
// initialize device
|
|
CHECK_ERR(device->init(), {
|
|
return -1;
|
|
});
|
|
|
|
#ifdef SCOPE
|
|
{
|
|
scope_callback_t callback;
|
|
callback.registerWrite = [](vx_device_h hdevice, uint64_t value)->int {
|
|
auto device = (vx_device*)hdevice;
|
|
uint32_t value_lo = (uint32_t)(value);
|
|
uint32_t value_hi = (uint32_t)(value >> 32);
|
|
CHECK_ERR(device->write_register(MMIO_SCP_ADDR, value_lo), {
|
|
return -1;
|
|
});
|
|
CHECK_ERR(device->write_register(MMIO_SCP_ADDR + 4, value_hi), {
|
|
return -1;
|
|
});
|
|
return 0;
|
|
};
|
|
callback.registerRead = [](vx_device_h hdevice, uint64_t* value)->int {
|
|
auto device = (vx_device*)hdevice;
|
|
uint32_t value_lo, value_hi;
|
|
CHECK_ERR(device->read_register(MMIO_SCP_ADDR, &value_lo), {
|
|
return -1;
|
|
});
|
|
CHECK_ERR(device->read_register(MMIO_SCP_ADDR + 4, &value_hi), {
|
|
return -1;
|
|
});
|
|
*value = (((uint64_t)value_hi) << 32) | value_lo;
|
|
return 0;
|
|
};
|
|
int ret = vx_scope_start(&callback, device, 0, -1);
|
|
if (ret != 0) {
|
|
delete device;
|
|
return ret;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
CHECK_ERR(dcr_initialize(device), {
|
|
delete device;
|
|
return -1;
|
|
});
|
|
|
|
#ifdef DUMP_PERF_STATS
|
|
perf_add_device(device);
|
|
#endif
|
|
|
|
*hdevice = device;
|
|
|
|
DBGPRINT("device creation complete!\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_dev_close(vx_device_h hdevice) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
#ifdef SCOPE
|
|
vx_scope_stop(hdevice);
|
|
#endif
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
|
|
delete device;
|
|
|
|
DBGPRINT("device destroyed!\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, int type, uint64_t* dev_addr) {
|
|
if (nullptr == hdevice
|
|
|| nullptr == dev_addr
|
|
|| 0 == size)
|
|
return -1;
|
|
|
|
auto device = ((vx_device*)hdevice);
|
|
return device->mem_alloc(size, type, dev_addr);
|
|
}
|
|
|
|
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_addr) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
if (0 == dev_addr)
|
|
return 0;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
return device->mem_free(dev_addr);
|
|
}
|
|
|
|
extern int vx_mem_info(vx_device_h hdevice, int type, uint64_t* mem_free, uint64_t* mem_used) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
return device->mem_info(type, mem_free, mem_used);
|
|
}
|
|
|
|
extern int vx_copy_to_dev(vx_device_h hdevice, uint64_t dev_addr, const void* host_ptr, uint64_t size) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
|
|
// check alignment
|
|
if (!is_aligned(dev_addr, CACHE_BLOCK_SIZE))
|
|
return -1;
|
|
|
|
auto asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
|
|
|
// bound checking
|
|
if (dev_addr + asize > device->global_mem_size)
|
|
return -1;
|
|
|
|
CHECK_ERR(device->upload(dev_addr, host_ptr, asize), {
|
|
return -1;
|
|
});
|
|
|
|
DBGPRINT("COPY_TO_DEV: dev_addr=0x%lx, host_addr=0x%lx, size=%ld\n", dev_addr, (uintptr_t)host_ptr, size);
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_copy_from_dev(vx_device_h hdevice, void* host_ptr, uint64_t dev_addr, uint64_t size) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
|
|
// check alignment
|
|
if (!is_aligned(dev_addr, CACHE_BLOCK_SIZE))
|
|
return -1;
|
|
|
|
auto asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
|
|
|
// bound checking
|
|
if (dev_addr + asize > device->global_mem_size)
|
|
return -1;
|
|
|
|
CHECK_ERR(device->download(host_ptr, dev_addr, asize), {
|
|
return -1;
|
|
});
|
|
|
|
DBGPRINT("COPY_FROM_DEV: dev_addr=0x%lx, host_addr=0x%lx, size=%ld\n", dev_addr, (uintptr_t)host_ptr, asize);
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_start(vx_device_h hdevice) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
|
|
//wait_for_enter("\nPress ENTER to continue after setting up ILA trigger...");
|
|
|
|
CHECK_ERR(device->write_register(MMIO_CTL_ADDR, CTL_AP_START), {
|
|
return -1;
|
|
});
|
|
|
|
DBGPRINT("START\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
|
|
struct timespec sleep_time;
|
|
|
|
#ifndef NDEBUG
|
|
sleep_time.tv_sec = 1;
|
|
sleep_time.tv_nsec = 0;
|
|
#else
|
|
sleep_time.tv_sec = 0;
|
|
sleep_time.tv_nsec = 1000000;
|
|
#endif
|
|
|
|
// to milliseconds
|
|
uint64_t sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
|
|
|
for (;;) {
|
|
uint32_t status = 0;
|
|
CHECK_ERR(device->read_register(MMIO_CTL_ADDR, &status), {
|
|
return -1;
|
|
});
|
|
bool is_done = (status & CTL_AP_DONE) == CTL_AP_DONE;
|
|
if (is_done || 0 == timeout) {
|
|
break;
|
|
}
|
|
nanosleep(&sleep_time, nullptr);
|
|
timeout -= sleep_time_ms;
|
|
};
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int vx_dcr_write(vx_device_h hdevice, uint32_t addr, uint64_t value) {
|
|
if (nullptr == hdevice)
|
|
return -1;
|
|
|
|
auto device = (vx_device*)hdevice;
|
|
|
|
CHECK_ERR(device->write_register(MMIO_DCR_ADDR, addr), {
|
|
return -1;
|
|
});
|
|
|
|
CHECK_ERR(device->write_register(MMIO_DCR_ADDR + 4, value), {
|
|
return -1;
|
|
});
|
|
|
|
// save the value
|
|
DBGPRINT("DCR_WRITE: addr=0x%x, value=0x%lx\n", addr, value);
|
|
device->dcrs.write(addr, value);
|
|
|
|
return 0;
|
|
}
|