fixed loader script stack setup
This commit is contained in:
@@ -17,11 +17,15 @@ typedef void* vx_buffer_h;
|
||||
#define VX_CAPS_MAX_CORES 0x1
|
||||
#define VX_CAPS_MAX_WARPS 0x2
|
||||
#define VX_CAPS_MAX_THREADS 0x3
|
||||
#define VX_CAPS_CACHE_LINESIZE 0x4
|
||||
#define VX_CAPS_CACHE_LINE_SIZE 0x4
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x00000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_RAND_MOD 8
|
||||
#define CCI_RQ_SIZE 16
|
||||
#define CCI_WQ_SIZE 16
|
||||
@@ -204,11 +204,11 @@ void opae_sim::sRxPort_bus() {
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
@@ -223,7 +223,7 @@ void opae_sim::sTxPort_bus() {
|
||||
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
@@ -262,7 +262,7 @@ void opae_sim::avs_bus() {
|
||||
vortex_afu_->avs_readdatavalid = 0;
|
||||
if (dram_rd_it != dram_reads_.end()) {
|
||||
vortex_afu_->avs_readdatavalid = 1;
|
||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
uint32_t addr = dram_rd_it->addr;
|
||||
dram_reads_.erase(dram_rd_it);
|
||||
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
|
||||
@@ -304,7 +304,7 @@ void opae_sim::avs_bus() {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
dram_rd_req_t dram_req;
|
||||
dram_req.addr = vortex_afu_->avs_address;
|
||||
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.data.data());
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
for (auto& rsp : dram_reads_) {
|
||||
if (dram_req.addr == rsp.addr) {
|
||||
|
||||
@@ -40,13 +40,13 @@ private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} dram_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
@@ -22,10 +22,6 @@
|
||||
#include "vx_scope.h"
|
||||
#endif
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x10000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
@@ -104,7 +100,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = device->num_threads;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
|
||||
@@ -11,10 +11,6 @@
|
||||
#include <ram.h>
|
||||
#include <simulator.h>
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x10000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
@@ -31,7 +27,7 @@ public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size, CACHE_LINESIZE);
|
||||
auto aligned_asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
@@ -75,7 +71,7 @@ public:
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
@@ -84,7 +80,7 @@ public:
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
@@ -98,7 +94,7 @@ public:
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
@@ -189,8 +185,8 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = NUM_THREADS;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
*value = CACHE_LINESIZE;
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = 0xffffffff;
|
||||
|
||||
@@ -11,10 +11,7 @@
|
||||
#include <core.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
#define CACHE_LINESIZE 64
|
||||
#define PAGE_SIZE 4096
|
||||
#define ALLOC_BASE_ADDR 0x10000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -32,7 +29,7 @@ public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size, CACHE_LINESIZE);
|
||||
auto aligned_asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
@@ -81,7 +78,7 @@ public:
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
auto asize = align_size(size, CACHE_LINESIZE);
|
||||
auto asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
@@ -90,7 +87,7 @@ public:
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
auto asize = align_size(size, CACHE_LINESIZE);
|
||||
auto asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
@@ -104,7 +101,7 @@ public:
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size, CACHE_LINESIZE);
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
@@ -236,8 +233,8 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = NUM_THREADS;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
*value = CACHE_LINESIZE;
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = LOCAL_MEM_SIZE;
|
||||
|
||||
Reference in New Issue
Block a user