refactoring device memory allocation and cleanup
This commit is contained in:
@@ -7,9 +7,9 @@ typedef struct {
|
||||
uint32_t testid;
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src0_addr;
|
||||
uint32_t src1_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -13,9 +13,9 @@ inline float __ieee754_sqrtf (float x) {
|
||||
|
||||
void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -28,9 +28,9 @@ void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_imul(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -43,9 +43,9 @@ void kernel_imul(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -58,9 +58,9 @@ void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -75,9 +75,9 @@ void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -90,9 +90,9 @@ void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -105,9 +105,9 @@ void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -120,9 +120,9 @@ void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -135,9 +135,9 @@ void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -150,9 +150,9 @@ void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -165,9 +165,9 @@ void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -180,9 +180,9 @@ void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -197,9 +197,9 @@ void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -212,9 +212,9 @@ void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -229,9 +229,9 @@ void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -244,9 +244,9 @@ void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -260,9 +260,9 @@ void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -276,9 +276,9 @@ void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_itof(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -292,9 +292,9 @@ void kernel_itof(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_utof(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
|
||||
@@ -87,6 +87,7 @@ vx_buffer_h arg_buf = nullptr;
|
||||
vx_buffer_h src1_buf = nullptr;
|
||||
vx_buffer_h src2_buf = nullptr;
|
||||
vx_buffer_h dst_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -130,26 +131,28 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (arg_buf) {
|
||||
vx_buf_release(arg_buf);
|
||||
vx_buf_free(arg_buf);
|
||||
}
|
||||
if (src1_buf) {
|
||||
vx_buf_release(src1_buf);
|
||||
vx_buf_free(src1_buf);
|
||||
}
|
||||
if (src2_buf) {
|
||||
vx_buf_release(src2_buf);
|
||||
vx_buf_free(src2_buf);
|
||||
}
|
||||
if (dst_buf) {
|
||||
vx_buf_release(dst_buf);
|
||||
vx_buf_free(dst_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
vx_mem_free(device, kernel_arg.src1_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int exitcode = 0;
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -187,26 +190,26 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::dec << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, sizeof(kernel_arg_t), &arg_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &src1_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &src2_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &dst_buf));
|
||||
|
||||
for (int t = testid_s; t <= testid_e; ++t) {
|
||||
auto name = testMngr.get_name(t);
|
||||
@@ -226,18 +229,18 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// upload source buffer0
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
@@ -249,7 +252,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify destination
|
||||
std::cout << "verify test result" << std::endl;
|
||||
|
||||
Reference in New Issue
Block a user