Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions

View File

@@ -20,12 +20,12 @@
const char* kernel_file = "kernel.bin";
uint32_t count = 0;
std::vector<int32_t> src_data;
std::vector<int32_t> ref_data;
std::vector<TYPE> src_data;
std::vector<TYPE> ref_data;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
kernel_arg_t kernel_arg;
std::vector<uint8_t> staging_buf;
kernel_arg_t kernel_arg = {};
static void show_usage() {
std::cout << "Vortex Test." << std::endl;
@@ -55,9 +55,6 @@ static void parse_args(int argc, char **argv) {
}
void cleanup() {
if (staging_buf) {
vx_buf_free(staging_buf);
}
if (device) {
vx_mem_free(device, kernel_arg.src_addr);
vx_mem_free(device, kernel_arg.dst_addr);
@@ -70,9 +67,9 @@ void gen_input_data(uint32_t num_points) {
for (uint32_t i = 0; i < num_points; ++i) {
float r = static_cast<float>(std::rand()) / RAND_MAX;
int32_t value = r * num_points;
TYPE value = r * num_points;
src_data[i] = value;
std::cout << std::dec << i << ": value=0x" << std::hex << value << std::endl;
std::cout << std::dec << i << ": value=" << value << std::endl;
}
}
@@ -80,13 +77,11 @@ void gen_ref_data(uint32_t num_points) {
ref_data.resize(num_points);
for (uint32_t i = 0; i < num_points; ++i) {
int32_t ref_value = src_data.at(i);
TYPE ref_value = src_data.at(i);
uint32_t pos = 0;
for (uint32_t j = 0; j < num_points; ++j) {
int32_t cur_value = src_data.at(j);
int is_smaller = (cur_value < ref_value)
|| (cur_value == ref_value && j < i);
pos += is_smaller;
TYPE cur_value = src_data.at(j);
pos += (cur_value < ref_value) || (cur_value == ref_value && j < i);
}
ref_data.at(pos) = ref_value;
}
@@ -101,23 +96,23 @@ int run_test(const kernel_arg_t& kernel_arg,
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size));
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
auto buf_ptr = (TYPE*)staging_buf.data();
for (uint32_t i = 0; i < num_points; ++i) {
int ref = ref_data.at(i);
int cur = buf_ptr[i];
TYPE ref = ref_data.at(i);
TYPE cur = buf_ptr[i];
if (cur != ref) {
std::cout << "error at result #" << std::dec << i
<< std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
<< std::hex << ": actual=" << cur << ", expected=" << ref << std::endl;
++errors;
}
}
@@ -131,9 +126,7 @@ int run_test(const kernel_arg_t& kernel_arg,
return 0;
}
int main(int argc, char *argv[]) {
size_t value;
int main(int argc, char *argv[]) {
// parse command arguments
parse_args(argc, argv);
@@ -166,52 +159,49 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
kernel_arg.src_addr = value;
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
kernel_arg.dst_addr = value;
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr));
RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
kernel_arg.num_points = num_points;
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl;
std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
std::max<uint32_t>(dst_buf_size,
sizeof(kernel_arg_t)));
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
// allocate staging buffer
{
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
std::cout << "allocate staging buffer" << std::endl;
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
std::max<uint32_t>(dst_buf_size,
sizeof(kernel_arg_t)));
staging_buf.resize(staging_buf_size);
}
// upload kernel argument
{
std::cout << "upload kernel argument" << std::endl;
auto buf_ptr = staging_buf.data();
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t)));
}
// upload source buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = src_data.at(i);
}
std::cout << "upload source buffer" << std::endl;
auto buf_ptr = staging_buf.data();
memcpy(buf_ptr, src_data.data(), num_points * sizeof(TYPE));
RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), src_buf_size));
}
std::cout << "upload source buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
std::cout << "clear destination buffer" << std::endl;
auto buf_ptr = (int32_t*)staging_buf.data();
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0xdeadbeef;
}
}
RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), dst_buf_size));
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
// run tests
std::cout << "run tests" << std::endl;