Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
@@ -1,109 +1,49 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "testcases.h"
|
||||
#include "common.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class TestMngr {
|
||||
public:
|
||||
TestMngr() {
|
||||
this->add_test("iadd", new Test_IADD());
|
||||
this->add_test("imul", new Test_IMUL());
|
||||
this->add_test("idiv", new Test_IDIV());
|
||||
this->add_test("idiv-mul", new Test_IDIV_MUL());
|
||||
#ifdef EXT_F_ENABLE
|
||||
this->add_test("fadd", new Test_FADD());
|
||||
this->add_test("fsub", new Test_FSUB());
|
||||
this->add_test("fmul", new Test_FMUL());
|
||||
this->add_test("fmadd", new Test_FMADD());
|
||||
this->add_test("fmsub", new Test_FMSUB());
|
||||
this->add_test("fnmadd", new Test_FNMADD());
|
||||
this->add_test("fnmsub", new Test_FNMSUB());
|
||||
this->add_test("fnmadd-madd", new Test_FNMADD_MADD());
|
||||
this->add_test("fdiv", new Test_FDIV());
|
||||
this->add_test("fdiv2", new Test_FDIV2());
|
||||
this->add_test("fsqrt", new Test_FSQRT());
|
||||
this->add_test("ftoi", new Test_FTOI());
|
||||
this->add_test("ftou", new Test_FTOU());
|
||||
this->add_test("itof", new Test_ITOF());
|
||||
this->add_test("utof", new Test_UTOF());
|
||||
#endif
|
||||
}
|
||||
|
||||
~TestMngr() {
|
||||
for (size_t i = 0; i < _tests.size(); ++i) {
|
||||
delete _tests[i];
|
||||
}
|
||||
}
|
||||
|
||||
const std::string& get_name(int testid) const {
|
||||
return _names.at(testid);
|
||||
}
|
||||
|
||||
ITestCase* get_test(int testid) const {
|
||||
return _tests.at(testid);
|
||||
}
|
||||
|
||||
void add_test(const char* name, ITestCase* test) {
|
||||
_names.push_back(name);
|
||||
_tests.push_back(test);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _tests.size();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string> _names;
|
||||
std::vector<ITestCase*> _tests;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TestMngr testMngr;
|
||||
TestSuite* testSuite = nullptr;
|
||||
const char* kernel_file = "kernel.bin";
|
||||
int count = 0;
|
||||
int count = 0;
|
||||
std::unordered_set<int> included;
|
||||
std::unordered_set<int> excluded;
|
||||
int testid_s = 0;
|
||||
int testid_e = (testMngr.size() - 1);
|
||||
int testid_e = 0;
|
||||
bool stop_on_error = true;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h arg_buf = nullptr;
|
||||
vx_buffer_h src1_buf = nullptr;
|
||||
vx_buffer_h src2_buf = nullptr;
|
||||
vx_buffer_h dst_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
vx_device_h device = nullptr;
|
||||
std::vector<uint8_t> arg_buf;
|
||||
std::vector<uint8_t> src1_buf;
|
||||
std::vector<uint8_t> src2_buf;
|
||||
std::vector<uint8_t> dst_buf;
|
||||
kernel_arg_t kernel_arg = {};
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
std::cout << "Usage: [-t:testid] [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
|
||||
std::cout << "Usage: [-t<testid>: selected test] [-s<testid>: start test] [-e<testid>: end test] [-x<testid>: excluded tests]" << std::endl;
|
||||
std::cout << " [-k<kernel>] [-n<words>] [-c] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:t:s:e:k:ch?")) != -1) {
|
||||
while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
testid_s = atoi(optarg);
|
||||
testid_e = atoi(optarg);
|
||||
included.insert(atoi(optarg));
|
||||
break;
|
||||
case 'x':
|
||||
excluded.insert(atoi(optarg));
|
||||
break;
|
||||
case 's':
|
||||
testid_s = atoi(optarg);
|
||||
@@ -130,17 +70,8 @@ static void parse_args(int argc, char **argv) {
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (arg_buf) {
|
||||
vx_buf_free(arg_buf);
|
||||
}
|
||||
if (src1_buf) {
|
||||
vx_buf_free(src1_buf);
|
||||
}
|
||||
if (src2_buf) {
|
||||
vx_buf_free(src2_buf);
|
||||
}
|
||||
if (dst_buf) {
|
||||
vx_buf_free(dst_buf);
|
||||
if (testSuite) {
|
||||
delete testSuite;
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
@@ -152,7 +83,6 @@ void cleanup() {
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int exitcode = 0;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -171,12 +101,12 @@ int main(int argc, char *argv[]) {
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
uint64_t num_cores, num_warps, num_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads));
|
||||
|
||||
int num_tasks = max_cores * max_warps * max_threads;
|
||||
int num_tasks = num_cores * num_warps * num_threads;
|
||||
int num_points = count * num_tasks;
|
||||
size_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
@@ -188,59 +118,69 @@ int main(int argc, char *argv[]) {
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr));
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::dec << std::endl;
|
||||
std::cout << "dev_src0=0x" << std::hex << kernel_arg.src0_addr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=0x" << std::hex << kernel_arg.src1_addr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::dec << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
RT_CHECK(vx_buf_alloc(device, sizeof(kernel_arg_t), &arg_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &src1_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &src2_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &dst_buf));
|
||||
// allocate staging buffer
|
||||
std::cout << "allocate staging buffer" << std::endl;
|
||||
arg_buf.resize(sizeof(kernel_arg_t));
|
||||
src1_buf.resize(buf_size);
|
||||
src2_buf.resize(buf_size);
|
||||
dst_buf.resize(buf_size);
|
||||
|
||||
// allocate test suite
|
||||
testSuite = new TestSuite(device);
|
||||
if (testid_e == 0) {
|
||||
testid_e = (testSuite->size() - 1);
|
||||
}
|
||||
// execute tests
|
||||
for (int t = testid_s; t <= testid_e; ++t) {
|
||||
auto name = testMngr.get_name(t);
|
||||
auto test = testMngr.get_test(t);
|
||||
if (!included.empty()) {
|
||||
if (included.count(t) == 0)
|
||||
continue;
|
||||
}
|
||||
if (!excluded.empty()) {
|
||||
if (excluded.count(t) != 0)
|
||||
continue;
|
||||
}
|
||||
auto test = testSuite->get_test(t);
|
||||
auto name = test->name();
|
||||
|
||||
std::cout << "Test" << t << ": " << name << std::endl;
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
kernel_arg.testid = t;
|
||||
memcpy((void*)vx_host_ptr(arg_buf), &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(arg_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
memcpy(arg_buf.data(), &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, arg_buf.data(), sizeof(kernel_arg_t)));
|
||||
|
||||
// get test arguments
|
||||
std::cout << "get test arguments" << std::endl;
|
||||
test->setup(num_points, (void*)vx_host_ptr(src1_buf), (void*)vx_host_ptr(src2_buf));
|
||||
RT_CHECK(test->setup(num_points, (void*)src1_buf.data(), (void*)src2_buf.data()));
|
||||
|
||||
// upload source buffer0
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, src1_buf.data(), buf_size));
|
||||
|
||||
// upload source buffer1
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, src2_buf.data(), buf_size));
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
|
||||
((uint32_t*)dst_buf.data())[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, dst_buf.data(), buf_size));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
@@ -248,18 +188,15 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(device, dst_buf.data(), kernel_arg.dst_addr, buf_size));
|
||||
|
||||
// verify destination
|
||||
std::cout << "verify test result" << std::endl;
|
||||
int errors = test->verify(num_points,
|
||||
(void*)vx_host_ptr(dst_buf),
|
||||
(void*)vx_host_ptr(src1_buf),
|
||||
(void*)vx_host_ptr(src2_buf));
|
||||
int errors = test->verify(num_points, dst_buf.data(), src1_buf.data(), src2_buf.data());
|
||||
if (errors != 0) {
|
||||
std::cout << "found " << std::dec << errors << " errors!" << std::endl;
|
||||
std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
|
||||
|
||||
Reference in New Issue
Block a user