refactoring device memory allocation and cleanup

This commit is contained in:
Blaise Tine
2022-01-28 21:57:16 -05:00
parent 29df0da8b5
commit f7887d8720
49 changed files with 875 additions and 373 deletions

View File

@@ -7,9 +7,9 @@ typedef struct {
uint32_t testid;
uint32_t num_tasks;
uint32_t task_size;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
uint32_t src0_addr;
uint32_t src1_addr;
uint32_t dst_addr;
} kernel_arg_t;
#endif

View File

@@ -13,9 +13,9 @@ inline float __ieee754_sqrtf (float x) {
void kernel_iadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -28,9 +28,9 @@ void kernel_iadd(int task_id, kernel_arg_t* arg) {
void kernel_imul(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -43,9 +43,9 @@ void kernel_imul(int task_id, kernel_arg_t* arg) {
void kernel_idiv(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -58,9 +58,9 @@ void kernel_idiv(int task_id, kernel_arg_t* arg) {
void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -75,9 +75,9 @@ void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
void kernel_fadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -90,9 +90,9 @@ void kernel_fadd(int task_id, kernel_arg_t* arg) {
void kernel_fsub(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -105,9 +105,9 @@ void kernel_fsub(int task_id, kernel_arg_t* arg) {
void kernel_fmul(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -120,9 +120,9 @@ void kernel_fmul(int task_id, kernel_arg_t* arg) {
void kernel_fmadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -135,9 +135,9 @@ void kernel_fmadd(int task_id, kernel_arg_t* arg) {
void kernel_fmsub(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -150,9 +150,9 @@ void kernel_fmsub(int task_id, kernel_arg_t* arg) {
void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -165,9 +165,9 @@ void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -180,9 +180,9 @@ void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -197,9 +197,9 @@ void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
void kernel_fdiv(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -212,9 +212,9 @@ void kernel_fdiv(int task_id, kernel_arg_t* arg) {
void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -229,9 +229,9 @@ void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -244,9 +244,9 @@ void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
void kernel_ftoi(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -260,9 +260,9 @@ void kernel_ftoi(int task_id, kernel_arg_t* arg) {
void kernel_ftou(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
float* src0_ptr = (float*)arg->src0_addr;
float* src1_ptr = (float*)arg->src1_addr;
uint32_t* dst_ptr = (uint32_t*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -276,9 +276,9 @@ void kernel_ftou(int task_id, kernel_arg_t* arg) {
void kernel_itof(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@@ -292,9 +292,9 @@ void kernel_itof(int task_id, kernel_arg_t* arg) {
void kernel_utof(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
float* dst_ptr = (float*)arg->dst_addr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {

View File

@@ -87,6 +87,7 @@ vx_buffer_h arg_buf = nullptr;
vx_buffer_h src1_buf = nullptr;
vx_buffer_h src2_buf = nullptr;
vx_buffer_h dst_buf = nullptr;
kernel_arg_t kernel_arg;
static void show_usage() {
std::cout << "Vortex Test." << std::endl;
@@ -130,26 +131,28 @@ static void parse_args(int argc, char **argv) {
void cleanup() {
if (arg_buf) {
vx_buf_release(arg_buf);
vx_buf_free(arg_buf);
}
if (src1_buf) {
vx_buf_release(src1_buf);
vx_buf_free(src1_buf);
}
if (src2_buf) {
vx_buf_release(src2_buf);
vx_buf_free(src2_buf);
}
if (dst_buf) {
vx_buf_release(dst_buf);
vx_buf_free(dst_buf);
}
if (device) {
vx_mem_free(device, kernel_arg.src0_addr);
vx_mem_free(device, kernel_arg.src1_addr);
vx_mem_free(device, kernel_arg.dst_addr);
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int exitcode = 0;
size_t value;
kernel_arg_t kernel_arg;
size_t value;
// parse command arguments
parse_args(argc, argv);
@@ -187,26 +190,26 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src0_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src1_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
kernel_arg.src0_addr = value;
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
kernel_arg.src1_addr = value;
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
kernel_arg.dst_addr = value;
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::dec << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::dec << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::dec << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
RT_CHECK(vx_buf_alloc(device, sizeof(kernel_arg_t), &arg_buf));
RT_CHECK(vx_buf_alloc(device, buf_size, &src1_buf));
RT_CHECK(vx_buf_alloc(device, buf_size, &src2_buf));
RT_CHECK(vx_buf_alloc(device, buf_size, &dst_buf));
for (int t = testid_s; t <= testid_e; ++t) {
auto name = testMngr.get_name(t);
@@ -226,18 +229,18 @@ int main(int argc, char *argv[]) {
// upload source buffer0
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_addr, buf_size, 0));
// upload source buffer1
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_addr, buf_size, 0));
// clear destination buffer
std::cout << "clear destination buffer" << std::endl;
for (int i = 0; i < num_points; ++i) {
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
}
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
// start device
std::cout << "start device" << std::endl;
@@ -249,7 +252,7 @@ int main(int argc, char *argv[]) {
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
// verify destination
std::cout << "verify test result" << std::endl;