OPAE rtl fixes

This commit is contained in:
Blaise Tine
2020-05-08 08:28:28 -07:00
parent 13dfd5c8c7
commit c2e9240b7d
12 changed files with 208 additions and 243 deletions

View File

@@ -143,6 +143,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE); size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size); size_t asize = align_size(size, line_size);
if (device->mem_allocation + asize > dev_mem_size) if (device->mem_allocation + asize > dev_mem_size)
return -1; return -1;
@@ -189,11 +190,11 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
return -1; return -1;
} }
buffer->wsid = wsid; buffer->wsid = wsid;
buffer->host_ptr = host_ptr; buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr; buffer->io_addr = io_addr;
buffer->hdevice = hdevice; buffer->hdevice = hdevice;
buffer->size = size; buffer->size = asize;
*hbuffer = buffer; *hbuffer = buffer;
@@ -265,18 +266,18 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE); size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
// check alignment // check alignment
if (!is_aligned(dev_maddr, line_size)) if (!is_aligned(dev_maddr, line_size))
return -1; return -1;
if (!is_aligned(size, line_size))
return -1;
if (!is_aligned(buffer->io_addr + src_offset, line_size)) if (!is_aligned(buffer->io_addr + src_offset, line_size))
return -1; return -1;
// bound checking // bound checking
if (size + src_offset > buffer->size) if (src_offset + asize > buffer->size)
return -1; return -1;
if (dev_maddr + size > dev_mem_size) if (dev_maddr + asize > dev_mem_size)
return -1; return -1;
// Ensure ready for new command // Ensure ready for new command
@@ -287,7 +288,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) )); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) ));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
// Wait for the write operation to finish // Wait for the write operation to finish
@@ -308,18 +309,18 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE); size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
// check alignment // check alignment
if (!is_aligned(dev_maddr, line_size)) if (!is_aligned(dev_maddr, line_size))
return -1; return -1;
if (!is_aligned(size, line_size))
return -1;
if (!is_aligned(buffer->io_addr + dest_offset, line_size)) if (!is_aligned(buffer->io_addr + dest_offset, line_size))
return -1; return -1;
// bound checking // bound checking
if (size + dest_offset > buffer->size) if (dest_offset + asize > buffer->size)
return -1; return -1;
if (dev_maddr + size > dev_mem_size) if (dev_maddr + asize > dev_mem_size)
return -1; return -1;
// Ensure ready for new command // Ensure ready for new command
@@ -330,7 +331,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
// Wait for the write operation to finish // Wait for the write operation to finish
@@ -347,13 +348,13 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
vx_device_t* device = ((vx_device_t*)hdevice); vx_device_t* device = ((vx_device_t*)hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t asize = align_size(size, line_size);
// check alignment // check alignment
if (!is_aligned(dev_maddr, line_size)) if (!is_aligned(dev_maddr, line_size))
return -1; return -1;
if (!is_aligned(size, line_size))
return -1;
// Ensure ready for new command // Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0) if (vx_ready_wait(hdevice, -1) != 0)
@@ -362,7 +363,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
auto ls_shift = (int)std::log2(line_size); auto ls_shift = (int)std::log2(line_size);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
// Wait for the write operation to finish // Wait for the write operation to finish

View File

@@ -4,6 +4,16 @@
int test = -1; int test = -1;
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
static void parse_args(int argc, char **argv) { static void parse_args(int argc, char **argv) {
int c; int c;
while ((c = getopt(argc, argv, "t:h?")) != -1) { while ((c = getopt(argc, argv, "t:h?")) != -1) {
@@ -27,12 +37,27 @@ uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));; return (value << i) | (value & ((1 << i)-1));;
} }
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int run_memcopy_test(vx_buffer_h sbuf, int run_memcopy_test(vx_buffer_h sbuf,
vx_buffer_h dbuf, vx_buffer_h dbuf,
uint32_t address, uint32_t address,
uint64_t value, uint64_t value,
int num_blocks) { int num_blocks) {
int ret;
int errors = 0; int errors = 0;
// write sbuf data // write sbuf data
@@ -42,15 +67,11 @@ int run_memcopy_test(vx_buffer_h sbuf,
// write buffer to local memory // write buffer to local memory
std::cout << "write buffer to local memory" << std::endl; std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0); RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0));
if (ret != 0)
return ret;
// read buffer from local memory // read buffer from local memory
std::cout << "read buffer from local memory" << std::endl; std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0); RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0));
if (ret != 0)
return ret;
// verify result // verify result
std::cout << "verify result" << std::endl; std::cout << "verify result" << std::endl;
@@ -77,7 +98,6 @@ int run_kernel_test(vx_device_h device,
vx_buffer_h sbuf, vx_buffer_h sbuf,
vx_buffer_h dbuf, vx_buffer_h dbuf,
const char* program) { const char* program) {
int ret;
int errors = 0; int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff; uint64_t seed = 0x0badf00d40ff40ff;
@@ -93,43 +113,27 @@ int run_kernel_test(vx_device_h device,
// write buffer to local memory // write buffer to local memory
std::cout << "write buffer to local memory" << std::endl; std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0); RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0));
if (ret != 0)
return ret;
// upload program // upload program
std::cout << "upload program" << std::endl; std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program); RT_CHECK(vx_upload_kernel_file(device, program));
if (ret != 0) {
return ret;
}
// start device // start device
std::cout << "start device" << std::endl; std::cout << "start device" << std::endl;
ret = vx_start(device); RT_CHECK(vx_start(device));
if (ret != 0) {
return ret;
}
// wait for completion // wait for completion
std::cout << "wait for completion" << std::endl; std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1); RT_CHECK(vx_ready_wait(device, -1));
if (ret != 0) {
return ret;
}
// flush the caches // flush the caches
std::cout << "flush the caches" << std::endl; std::cout << "flush the caches" << std::endl;
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks); RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks));
if (ret != 0) {
return ret;
}
// read buffer from local memory // read buffer from local memory
std::cout << "read buffer from local memory" << std::endl; std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0); RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0));
if (ret != 0)
return ret;
// verify result // verify result
std::cout << "verify result" << std::endl; std::cout << "verify result" << std::endl;
@@ -152,75 +156,33 @@ int run_kernel_test(vx_device_h device,
return 0; return 0;
} }
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
int ret;
// parse command arguments // parse command arguments
parse_args(argc, argv); parse_args(argc, argv);
// open device connection // open device connection
std::cout << "open device connection" << std::endl; std::cout << "open device connection" << std::endl;
vx_device_h device; vx_device_h device;
ret = vx_dev_open(&device); RT_CHECK(vx_dev_open(&device));
if (ret != 0)
return ret;
// create source buffer // create source buffer
std::cout << "create source buffer" << std::endl; std::cout << "create source buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &sbuf); RT_CHECK(vx_alloc_shared_mem(device, 4096, &sbuf));
if (ret != 0) {
cleanup();
return ret;
}
// create destination buffer // create destination buffer
std::cout << "create destination buffer" << std::endl; std::cout << "create destination buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &dbuf); RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf));
if (ret != 0) {
cleanup();
return ret;
}
// run tests // run tests
if (0 == test || -1 == test) { /*9if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl; std::cout << "run memcopy test" << std::endl;
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1));
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1); RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8));
if (ret != 0) { }*/
cleanup();
return ret;
}
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
if (ret != 0) {
cleanup();
return ret;
}
}
if (1 == test || -1 == test) { if (1 == test || -1 == test) {
std::cout << "run kernel test" << std::endl; std::cout << "run kernel test" << std::endl;
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin"); RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin"));
if (ret != 0) {
cleanup();
return ret;
}
} }
// cleanup // cleanup

View File

@@ -4,6 +4,16 @@
#include <vortex.h> #include <vortex.h>
#include "common.h" #include "common.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
const char* program_file = "kernel.bin"; const char* program_file = "kernel.bin";
uint32_t data_stride = 0xffffffff; uint32_t data_stride = 0xffffffff;
@@ -39,40 +49,38 @@ static void parse_args(int argc, char **argv) {
} }
} }
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int run_test(vx_device_h device, int run_test(vx_device_h device,
vx_buffer_h buffer, vx_buffer_h buffer,
const kernel_arg_t& kernel_arg, const kernel_arg_t& kernel_arg,
uint32_t buf_size, uint32_t buf_size,
uint32_t num_points) { uint32_t num_points) {
int ret;
// start device // start device
std::cout << "start device" << std::endl; std::cout << "start device" << std::endl;
ret = vx_start(device); RT_CHECK(vx_start(device));
if (ret != 0) {
return ret;
}
// wait for completion // wait for completion
std::cout << "wait for completion" << std::endl; std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1); RT_CHECK(vx_ready_wait(device, -1));
if (ret != 0) {
return ret;
}
// flush the destination buffer caches // flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl; std::cout << "flush the destination buffer caches" << std::endl;
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size); RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
if (ret != 0) {
return ret;
}
// download destination buffer // download destination buffer
std::cout << "download destination buffer" << std::endl; std::cout << "download destination buffer" << std::endl;
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0); RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
if (ret != 0) {
return ret;
}
// verify result // verify result
std::cout << "verify result" << std::endl; std::cout << "verify result" << std::endl;
@@ -96,20 +104,7 @@ int run_test(vx_device_h device,
return 0; return 0;
} }
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
int ret;
size_t value; size_t value;
kernel_arg_t kernel_arg; kernel_arg_t kernel_arg;
@@ -132,50 +127,28 @@ int main(int argc, char *argv[]) {
// open device connection // open device connection
std::cout << "open device connection" << std::endl; std::cout << "open device connection" << std::endl;
ret = vx_dev_open(&device); RT_CHECK(vx_dev_open(&device));
if (ret != 0)
return ret;
// upload program // upload program
std::cout << "upload program" << std::endl; std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program_file); RT_CHECK(vx_upload_kernel_file(device, program_file));
if (ret != 0) {
cleanup();
return ret;
}
// allocate device memory // allocate device memory
std::cout << "allocate device memory" << std::endl; std::cout << "allocate device memory" << std::endl;
ret = vx_alloc_dev_mem(device, buf_size, &value); RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
if (ret != 0) {
cleanup();
return ret;
}
kernel_arg.src0_ptr = value; kernel_arg.src0_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value); RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
if (ret != 0) {
cleanup();
return ret;
}
kernel_arg.src1_ptr = value; kernel_arg.src1_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value); RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
if (ret != 0) {
cleanup();
return ret;
}
kernel_arg.dst_ptr = value; kernel_arg.dst_ptr = value;
// allocate shared memory // allocate shared memory
std::cout << "allocate shared memory" << std::endl; std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t)); uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
ret = vx_alloc_shared_mem(device, alloc_size, &buffer); RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
if (ret != 0) {
cleanup();
return ret;
}
// populate source buffer values // populate source buffer values
std::cout << "populate source buffer values" << std::endl; std::cout << "populate source buffer values" << std::endl;
@@ -187,19 +160,9 @@ int main(int argc, char *argv[]) {
} }
// upload source buffers // upload source buffers
std::cout << "upload source buffers" << std::endl; std::cout << "upload source buffers" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0); RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
if (ret != 0) {
cleanup();
return ret;
}
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return ret;
}
// upload kernel argument // upload kernel argument
std::cout << "upload kernel argument" << std::endl; std::cout << "upload kernel argument" << std::endl;
@@ -210,20 +173,12 @@ int main(int argc, char *argv[]) {
auto buf_ptr = (int*)vx_host_ptr(buffer); auto buf_ptr = (int*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0); RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
if (ret != 0) {
cleanup();
return ret;
}
} }
// run tests // run tests
std::cout << "run tests" << std::endl; std::cout << "run tests" << std::endl;
ret = run_test(device, buffer, kernel_arg, buf_size, num_points); RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points));
if (ret != 0) {
cleanup();
return ret;
}
// cleanup // cleanup
std::cout << "cleanup" << std::endl; std::cout << "cleanup" << std::endl;

View File

@@ -42,6 +42,8 @@ make
# ASE build instructions # ASE build instructions
# #
source /export/fpga/bin/setup-fpga-env fpga-pac-a10
# Acquire a sever node for running ASE simulations # Acquire a sever node for running ASE simulations
qsub-sim qsub-sim
@@ -51,7 +53,17 @@ vcd add -r /*/Vortex/hw/rtl/*
run -all run -all
# compress VCD trace # compress VCD trace
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd tar -zcvf vortex.vcd.tar.gz ./build_ase/work/vortex.vcd
# decompress VCD trace # decompress VCD trace
tar -zxvf vortex.vcd.tar.gz vortex.vcd tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
# launch Gtkwave
gtkwave ./build_ase/work/vortex.vcd &
# test
./run_ase.sh ../../driver/tests/basic/basic
# kill process by Users
ps -u tinebp
kill -9 <pid>

View File

@@ -14,17 +14,19 @@ rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid
# Start Simulator in background # Start Simulator in background
pushd $SCRIPT_DIR/build_ase pushd $SCRIPT_DIR/build_ase
make sim & echo " [DBG] starting ASE simnulator"
nohup make sim &
popd popd
# Wait for simulator readiness # Wait for simulator readiness
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation # When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
while [! -f $ASE_WORKDIR/.ase_ready.pid] while [ ! -f $ASE_WORKDIR/.ase_ready.pid ]
do do
sleep 1 sleep 1
done done
# run application # run application
pushd $PROGRAM_DIR pushd $PROGRAM_DIR
echo " [DBG] running ./$PROGRAM $*"
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $* ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $*
popd popd

View File

@@ -28,7 +28,12 @@ module vortex_afu #(
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
); );
localparam DRAM_ADDR_WIDTH = (32 - `CLOG2(`GLOBAL_BLOCK_SIZE)); localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
localparam DRAM_TAG_WIDTH = `L3DRAM_TAG_WIDTH;
`STATIC_ASSERT(DRAM_ADDR_WIDTH == `L3DRAM_ADDR_WIDTH, "invalid vortex dram bus!")
`STATIC_ASSERT(DRAM_LINE_WIDTH == `L3DRAM_LINE_WIDTH, "invalid vortex dram bus!")
localparam AVS_RD_QUEUE_SIZE = 16; localparam AVS_RD_QUEUE_SIZE = 16;
@@ -58,6 +63,7 @@ typedef enum logic[3:0] {
STATE_IDLE, STATE_IDLE,
STATE_READ, STATE_READ,
STATE_WRITE, STATE_WRITE,
STATE_START,
STATE_RUN, STATE_RUN,
STATE_CLFLUSH STATE_CLFLUSH
} state_t; } state_t;
@@ -72,13 +78,13 @@ state_t state;
logic vx_dram_req_read; logic vx_dram_req_read;
logic vx_dram_req_write; logic vx_dram_req_write;
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data; logic [DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; logic [DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_ready; logic vx_dram_req_ready;
logic vx_dram_rsp_valid; logic vx_dram_rsp_valid;
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data; logic [DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; logic [DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic vx_dram_rsp_ready; logic vx_dram_rsp_ready;
logic vx_snp_req_valid; logic vx_snp_req_valid;
@@ -90,9 +96,9 @@ logic vx_busy;
// AVS Queues ///////////////////////////////////////////////////////////////// // AVS Queues /////////////////////////////////////////////////////////////////
logic avs_rtq_push; logic avs_rtq_push;
t_local_mem_addr avs_rtq_din; logic [DRAM_TAG_WIDTH-1:0] avs_rtq_din;
logic avs_rtq_pop; logic avs_rtq_pop;
t_local_mem_addr avs_rtq_dout; logic [DRAM_TAG_WIDTH-1:0] avs_rtq_dout;
logic avs_rtq_empty; logic avs_rtq_empty;
logic avs_rtq_full; logic avs_rtq_full;
@@ -229,7 +235,7 @@ begin
CMD_TYPE_RUN: begin CMD_TYPE_RUN: begin
$display("%t: STATE START", $time); $display("%t: STATE START", $time);
vx_reset <= 1; vx_reset <= 1;
state <= STATE_RUN; state <= STATE_START;
end end
CMD_TYPE_CLFLUSH: begin CMD_TYPE_CLFLUSH: begin
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size); $display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
@@ -250,6 +256,10 @@ begin
end end
end end
STATE_START: begin // vortex reset cycle
state <= STATE_RUN;
end
STATE_RUN: begin STATE_RUN: begin
if (!vx_busy) begin if (!vx_busy) begin
state <= STATE_IDLE; state <= STATE_IDLE;
@@ -271,7 +281,7 @@ end
logic cci_rdq_empty; logic cci_rdq_empty;
t_cci_rdq_data cci_rdq_dout; t_cci_rdq_data cci_rdq_dout;
logic cci_rdq_pop; logic cci_rdq_pop;
logic [`L3DRAM_TAG_WIDTH-1:0] dram_req_tag; logic [DRAM_TAG_WIDTH-1:0] dram_req_tag;
t_ccip_clAddr next_avs_address; t_ccip_clAddr next_avs_address;
always_comb always_comb
@@ -372,7 +382,7 @@ end
always_comb always_comb
begin begin
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready; vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
vx_dram_rsp_tag = avs_rtq_dout; vx_dram_rsp_tag = avs_rtq_dout;
vx_dram_rsp_data = avs_rdq_dout; vx_dram_rsp_data = avs_rdq_dout;
end end
@@ -389,7 +399,7 @@ begin
end end
VX_generic_queue #( VX_generic_queue #(
.DATAW($bits(t_local_mem_addr)), .DATAW(DRAM_TAG_WIDTH),
.SIZE(AVS_RD_QUEUE_SIZE) .SIZE(AVS_RD_QUEUE_SIZE)
) avs_rd_req_queue ( ) avs_rd_req_queue (
.clk (clk), .clk (clk),
@@ -412,7 +422,7 @@ begin
end end
VX_generic_queue #( VX_generic_queue #(
.DATAW($bits(t_local_mem_data)), .DATAW(DRAM_LINE_WIDTH),
.SIZE(AVS_RD_QUEUE_SIZE) .SIZE(AVS_RD_QUEUE_SIZE)
) avs_rd_rsp_queue ( ) avs_rd_rsp_queue (
.clk (clk), .clk (clk),
@@ -595,30 +605,46 @@ end
// Vortex binding ///////////////////////////////////////////////////////////// // Vortex binding /////////////////////////////////////////////////////////////
Vortex_Socket #() vx_socket ( Vortex_Socket #() vx_socket (
.clk (clk), .clk (clk),
.reset (SoftReset || vx_reset), .reset (vx_reset),
// DRAM Req // DRAM request
.dram_req_write (vx_dram_req_write), .dram_req_write (vx_dram_req_write),
.dram_req_read (vx_dram_req_read), .dram_req_read (vx_dram_req_read),
.dram_req_addr (vx_dram_req_addr), .dram_req_addr (vx_dram_req_addr),
.dram_req_data (vx_dram_req_data), .dram_req_data (vx_dram_req_data),
.dram_req_tag (vx_dram_req_tag), .dram_req_tag (vx_dram_req_tag),
.dram_req_ready (vx_dram_req_ready), .dram_req_ready (vx_dram_req_ready),
// DRAM Rsp // DRAM response
.dram_rsp_valid (vx_dram_rsp_valid), .dram_rsp_valid (vx_dram_rsp_valid),
.dram_rsp_data (vx_dram_rsp_data), .dram_rsp_data (vx_dram_rsp_data),
.dram_rsp_tag (vx_dram_rsp_tag), .dram_rsp_tag (vx_dram_rsp_tag),
.dram_rsp_ready (vx_dram_rsp_ready), .dram_rsp_ready (vx_dram_rsp_ready),
// Cache Snooping Req // Cache snooping
.snp_req_valid (vx_snp_req_valid), .snp_req_valid (vx_snp_req_valid),
.snp_req_addr (vx_snp_req_addr), .snp_req_addr (vx_snp_req_addr),
.snp_req_ready (vx_snp_req_ready), .snp_req_ready (vx_snp_req_ready),
// I/O request
.io_req_read (),
.io_req_write (),
.io_req_addr (),
.io_req_data (),
.io_req_byteen (),
.io_req_tag (),
.io_req_ready (0),
// I/O response
.io_rsp_valid (0),
.io_rsp_data (0),
.io_rsp_tag (0),
.io_rsp_ready (),
// status // status
.busy (vx_busy) .busy (vx_busy),
.ebreak ()
); );
endmodule endmodule

View File

@@ -8,12 +8,8 @@ module VX_back_end #(
input wire schedule_delay, input wire schedule_delay,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if, VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
output wire mem_delay,
output wire exec_delay,
output wire gpr_stage_delay,
VX_jal_rsp_if jal_rsp_if, VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if, VX_branch_rsp_if branch_rsp_if,
@@ -22,6 +18,10 @@ module VX_back_end #(
VX_warp_ctl_if warp_ctl_if, VX_warp_ctl_if warp_ctl_if,
output wire mem_delay,
output wire exec_delay,
output wire gpr_stage_delay,
output wire ebreak output wire ebreak
); );
@@ -78,8 +78,8 @@ module VX_back_end #(
.reset (reset), .reset (reset),
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if), .mem_wb_if (mem_wb_if),
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if), .dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
.delay (mem_delay), .delay (mem_delay),
.no_slot_mem (no_slot_mem) .no_slot_mem (no_slot_mem)
); );

View File

@@ -35,8 +35,11 @@ module VX_dmem_ctrl (
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS) .CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_rsp_qual_if(), smem_core_rsp_if(); ) dcache_core_rsp_qual_if(), smem_core_rsp_if();
// use "case equality" to handle uninitialized address value
wire smem_select = ((dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR) === 1'b1);
VX_dcache_io_arb dcache_io_arb ( VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR), .io_select (smem_select),
.core_req_if (dcache_core_req_if), .core_req_if (dcache_core_req_if),
.dcache_core_req_if (dcache_core_req_qual_if), .dcache_core_req_if (dcache_core_req_qual_if),
.io_core_req_if (smem_core_req_if), .io_core_req_if (smem_core_req_if),

View File

@@ -9,19 +9,24 @@ module VX_icache_stage (
output wire[`NUM_THREADS-1:0] icache_stage_valids, output wire[`NUM_THREADS-1:0] icache_stage_valids,
VX_inst_meta_if fe_inst_meta_fi, VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id, VX_inst_meta_if fe_inst_meta_id,
VX_cache_core_rsp_if icache_rsp_if, VX_cache_core_req_if icache_req_if,
VX_cache_core_req_if icache_req_if VX_cache_core_rsp_if icache_rsp_if
); );
reg[`NUM_THREADS-1:0] pending_threads[`NUM_WARPS-1:0]; reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
wire valid_inst = (| fe_inst_meta_fi.valid); wire valid_inst = (| fe_inst_meta_fi.valid);
`DEBUG_BEGIN
wire [`CORE_REQ_TAG_WIDTH-1:0] core_req_tag = icache_req_if.core_req_tag;
wire [`CORE_REQ_TAG_WIDTH-1:0] core_rsp_tag = icache_rsp_if.core_rsp_tag;
`DEBUG_END
// Icache Request // Icache Request
assign icache_req_if.core_req_valid = valid_inst && !total_freeze; assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc; assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_data = 32'b0; assign icache_req_if.core_req_data = 'z;
assign icache_req_if.core_req_read = `BYTE_EN_LW; assign icache_req_if.core_req_read = `BYTE_EN_LW;
assign icache_req_if.core_req_write = `BYTE_EN_NO; assign icache_req_if.core_req_write = `BYTE_EN_NO;
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num}; assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
@@ -33,8 +38,8 @@ module VX_icache_stage (
assign {fe_inst_meta_id.inst_pc, rsp_wb, rsp_rd, fe_inst_meta_id.warp_num} = icache_rsp_if.core_rsp_tag; assign {fe_inst_meta_id.inst_pc, rsp_wb, rsp_rd, fe_inst_meta_id.warp_num} = icache_rsp_if.core_rsp_tag;
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0][31:0]; assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0];
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? pending_threads[fe_inst_meta_id.warp_num] : 0; assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
assign icache_stage_wid = fe_inst_meta_id.warp_num; assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}}; assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
@@ -49,12 +54,10 @@ module VX_icache_stage (
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (i = 0; i < `NUM_WARPS; i = i + 1) begin //--
pending_threads[i] <= 0;
end
end else begin end else begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
pending_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid; valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
end end
end end
end end

View File

@@ -11,8 +11,8 @@ module VX_lsu_unit (
VX_wb_if mem_wb_if, VX_wb_if mem_wb_if,
// Dcache interface // Dcache interface
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if, VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
output wire delay output wire delay
); );

View File

@@ -244,8 +244,11 @@ module Vortex #(
.icache_dram_rsp_if (icache_dram_rsp_if) .icache_dram_rsp_if (icache_dram_rsp_if)
); );
// use "case equality" to handle uninitialized address value
wire io_select = ((dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR) === 1'b1);
VX_dcache_io_arb dcache_io_arb ( VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR), .io_select (io_select),
.core_req_if (dcache_io_core_req_if), .core_req_if (dcache_io_core_req_if),
.dcache_core_req_if (dcache_core_req_if), .dcache_core_req_if (dcache_core_req_if),
.io_core_req_if (io_core_req_if), .io_core_req_if (io_core_req_if),

View File

@@ -90,13 +90,11 @@ module VX_tag_data_access #(
wire fill_sent; wire fill_sent;
wire invalidate_line; wire invalidate_line;
wire tags_match;
wire real_writefill = writefill_st1e wire real_writefill = writefill_st1e
&& ((valid_req_st1e && ((valid_req_st1e && !use_read_valid_st1e)
&& !use_read_valid_st1e) || (valid_req_st1e && use_read_valid_st1e && !tags_match));
|| (valid_req_st1e
&& use_read_valid_st1e
&& (writeaddr_st1e[`TAG_LINE_ADDR_RNG] != use_read_tag_st1e)));
VX_tag_data_structure #( VX_tag_data_structure #(
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
@@ -256,14 +254,14 @@ module VX_tag_data_access #(
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat; assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat;
end end
end end
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we; assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
assign use_write_data = data_write; assign use_write_data = data_write;
wire[`TAG_SELECT_BITS-1:0] writeaddr_tag = writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = ((writeaddr_st1e[`TAG_LINE_ADDR_RNG] == use_read_tag_st1e) === 1'b1);
wire tags_match = writeaddr_tag == use_read_tag_st1e;
wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e; wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e; wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;