diff --git a/tests/regression/diverge/Makefile b/tests/regression/diverge/Makefile new file mode 100644 index 00000000..6c531257 --- /dev/null +++ b/tests/regression/diverge/Makefile @@ -0,0 +1,70 @@ +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain +VORTEX_DRV_PATH ?= $(realpath ../../../driver) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) + +OPTS ?= -n16 + +VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc +VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ +VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump +VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy + +VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections +VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw + +VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a + +VX_SRCS = kernel.c + +#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors + +CXXFLAGS += -I$(VORTEX_DRV_PATH)/include + +LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex + +PROJECT = diverge + +SRCS = main.cpp + +all: $(PROJECT) kernel.bin kernel.dump + +kernel.dump: kernel.elf + $(VX_DP) -D kernel.elf > kernel.dump + +kernel.bin: kernel.elf + $(VX_CP) -O binary kernel.elf kernel.bin + +kernel.elf: $(VX_SRCS) + $(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run-simx: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-fpga: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-asesim: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-vlsim: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-rtlsim: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; + +clean: + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm -rf *.elf *.bin *.dump + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/tests/regression/diverge/common.h b/tests/regression/diverge/common.h new file mode 100644 index 00000000..73247b2c --- /dev/null +++ b/tests/regression/diverge/common.h @@ -0,0 +1,12 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 + +struct kernel_arg_t { + uint32_t num_points; + uint32_t src_ptr; + uint32_t dst_ptr; +}; + +#endif \ No newline at end of file diff --git a/tests/regression/diverge/kernel.bin b/tests/regression/diverge/kernel.bin new file mode 100755 index 00000000..24d87892 Binary files /dev/null and b/tests/regression/diverge/kernel.bin differ diff --git a/tests/regression/diverge/kernel.c b/tests/regression/diverge/kernel.c new file mode 100644 index 00000000..a71e516d --- /dev/null +++ b/tests/regression/diverge/kernel.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include "common.h" + +// Parallel Selection sort + +void kernel_body(int task_id, void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + int32_t* src_ptr = (int32_t*)_arg->src_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + + int value = src_ptr[task_id]; + + // none taken + __if (task_id >= 0x7fffffff) { + value = 0; + }__else { + value += 2; + }__endif + + // diverge + __if (task_id > 1) { + __if (task_id > 2) { + value += 6; + }__else { + value += 5; + }__endif + }__else { + __if (task_id > 0) { + value += 4; + }__else { + value += 3; + }__endif + }__endif + + // all taken + __if (task_id >= 0) { + value += 7; + }__else { + value = 0; + }__endif + + dst_ptr[task_id] = value; +} + +void main() { + struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; + vx_spawn_tasks(arg->num_points, kernel_body, arg); +} \ No newline at end of file diff --git a/tests/regression/diverge/kernel.dump b/tests/regression/diverge/kernel.dump new file mode 100644 index 00000000..ebe275d7 --- /dev/null +++ b/tests/regression/diverge/kernel.dump @@ -0,0 +1,719 @@ + +kernel.elf: file format elf32-littleriscv + + +Disassembly of section .init: + +80000000 <_start>: +80000000: 00000597 auipc a1,0x0 +80000004: 14458593 addi a1,a1,324 # 80000144 +80000008: fc102573 csrr a0,0xfc1 +8000000c: 00b5106b 0xb5106b +80000010: 134000ef jal ra,80000144 +80000014: 00100513 li a0,1 +80000018: 0005006b 0x5006b +8000001c: 00002517 auipc a0,0x2 +80000020: d9050513 addi a0,a0,-624 # 80001dac +80000024: 00002617 auipc a2,0x2 +80000028: e0860613 addi a2,a2,-504 # 80001e2c <__BSS_END__> +8000002c: 40a60633 sub a2,a2,a0 +80000030: 00000593 li a1,0 +80000034: 6a8000ef jal ra,800006dc +80000038: 00000517 auipc a0,0x0 +8000003c: 5ac50513 addi a0,a0,1452 # 800005e4 <__libc_fini_array> +80000040: 55c000ef jal ra,8000059c +80000044: 5fc000ef jal ra,80000640 <__libc_init_array> +80000048: 008000ef jal ra,80000050
+8000004c: 5640006f j 800005b0 + +Disassembly of section .text: + +80000050
: +80000050: 7ffff7b7 lui a5,0x7ffff +80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00> +80000058: 800005b7 lui a1,0x80000 +8000005c: 7ffff637 lui a2,0x7ffff +80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080> +80000064: 1dc0006f j 80000240 + +80000068 : +80000068: 00000793 li a5,0 +8000006c: 00078863 beqz a5,8000007c +80000070: 80000537 lui a0,0x80000 +80000074: 5e450513 addi a0,a0,1508 # 800005e4 <__stack_top+0x810005e4> +80000078: 5240006f j 8000059c +8000007c: 00008067 ret + +80000080 : +80000080: 0045a683 lw a3,4(a1) +80000084: 80000737 lui a4,0x80000 +80000088: 00251813 slli a6,a0,0x2 +8000008c: 00170793 addi a5,a4,1 # 80000001 <__stack_top+0x81000001> +80000090: 01068633 add a2,a3,a6 +80000094: 00f507b3 add a5,a0,a5 +80000098: 0085a683 lw a3,8(a1) +8000009c: 0017b793 seqz a5,a5 +800000a0: 00062583 lw a1,0(a2) # 7ffff000 <__stack_size+0x7fffec00> +800000a4: 0007a06b 0x7a06b +800000a8: fff74713 not a4,a4 +800000ac: 00000613 li a2,0 +800000b0: 00e50463 beq a0,a4,800000b8 +800000b4: 00258613 addi a2,a1,2 +800000b8: 0000306b 0x306b +800000bc: 00252713 slti a4,a0,2 +800000c0: 00174713 xori a4,a4,1 +800000c4: 0007206b 0x7206b +800000c8: 00100793 li a5,1 +800000cc: 04a7d863 bge a5,a0,8000011c +800000d0: 00352793 slti a5,a0,3 +800000d4: 0017c793 xori a5,a5,1 +800000d8: 0007a06b 0x7a06b +800000dc: ffe50793 addi a5,a0,-2 +800000e0: 00f037b3 snez a5,a5 +800000e4: 00c787b3 add a5,a5,a2 +800000e8: 00578793 addi a5,a5,5 +800000ec: 0000306b 0x306b +800000f0: 0000306b 0x306b +800000f4: fff54713 not a4,a0 +800000f8: 01f75713 srli a4,a4,0x1f +800000fc: 0007206b 0x7206b +80000100: 00000713 li a4,0 +80000104: 00054463 bltz a0,8000010c +80000108: 00778713 addi a4,a5,7 +8000010c: 0000306b 0x306b +80000110: 010687b3 add a5,a3,a6 +80000114: 00e7a023 sw a4,0(a5) +80000118: 00008067 ret +8000011c: fff50793 addi a5,a0,-1 +80000120: 0017b793 seqz a5,a5 +80000124: 0007a06b 0x7a06b +80000128: 00c787b3 add a5,a5,a2 +8000012c: 00378793 addi a5,a5,3 +80000130: 0000306b 0x306b +80000134: fbdff06f j 800000f0 + +80000138 <_exit>: +80000138: 250000ef jal ra,80000388 +8000013c: 00000513 li a0,0 +80000140: 0005006b 0x5006b + +80000144 : +80000144: fc002573 csrr a0,0xfc0 +80000148: 0005006b 0x5006b +8000014c: 00002197 auipc gp,0x2 +80000150: 03418193 addi gp,gp,52 # 80002180 <__global_pointer> +80000154: 7f000117 auipc sp,0x7f000 +80000158: eac10113 addi sp,sp,-340 # ff000000 <__stack_top> +8000015c: 40000593 li a1,1024 +80000160: cc102673 csrr a2,0xcc1 +80000164: 02c585b3 mul a1,a1,a2 +80000168: 40b10133 sub sp,sp,a1 +8000016c: cc3026f3 csrr a3,0xcc3 +80000170: 00068663 beqz a3,8000017c +80000174: 00000513 li a0,0 +80000178: 0005006b 0x5006b + +8000017c : +8000017c: 00008067 ret + +80000180 : +80000180: fe010113 addi sp,sp,-32 +80000184: 00112e23 sw ra,28(sp) +80000188: 00812c23 sw s0,24(sp) +8000018c: 00912a23 sw s1,20(sp) +80000190: 01212823 sw s2,16(sp) +80000194: 01312623 sw s3,12(sp) +80000198: fc0027f3 csrr a5,0xfc0 +8000019c: 0007806b 0x7806b +800001a0: cc5026f3 csrr a3,0xcc5 +800001a4: cc3029f3 csrr s3,0xcc3 +800001a8: cc002773 csrr a4,0xcc0 +800001ac: fc002673 csrr a2,0xfc0 +800001b0: 800027b7 lui a5,0x80002 +800001b4: 00269693 slli a3,a3,0x2 +800001b8: dac78793 addi a5,a5,-596 # 80001dac <__stack_top+0x81001dac> +800001bc: 00d787b3 add a5,a5,a3 +800001c0: 0007a483 lw s1,0(a5) +800001c4: 0104a403 lw s0,16(s1) +800001c8: 00c4a683 lw a3,12(s1) +800001cc: 0089a933 slt s2,s3,s0 +800001d0: 00040793 mv a5,s0 +800001d4: 00d90933 add s2,s2,a3 +800001d8: 03368433 mul s0,a3,s3 +800001dc: 00f9d463 bge s3,a5,800001e4 +800001e0: 00098793 mv a5,s3 +800001e4: 00f40433 add s0,s0,a5 +800001e8: 0084a683 lw a3,8(s1) +800001ec: 02c40433 mul s0,s0,a2 +800001f0: 02e907b3 mul a5,s2,a4 +800001f4: 00d40433 add s0,s0,a3 +800001f8: 00f40433 add s0,s0,a5 +800001fc: 00890933 add s2,s2,s0 +80000200: 01245e63 bge s0,s2,8000021c +80000204: 0004a783 lw a5,0(s1) +80000208: 0044a583 lw a1,4(s1) +8000020c: 00040513 mv a0,s0 +80000210: 00140413 addi s0,s0,1 +80000214: 000780e7 jalr a5 +80000218: fe8916e3 bne s2,s0,80000204 +8000021c: 0019b993 seqz s3,s3 +80000220: 0009806b 0x9806b +80000224: 01c12083 lw ra,28(sp) +80000228: 01812403 lw s0,24(sp) +8000022c: 01412483 lw s1,20(sp) +80000230: 01012903 lw s2,16(sp) +80000234: 00c12983 lw s3,12(sp) +80000238: 02010113 addi sp,sp,32 +8000023c: 00008067 ret + +80000240 : +80000240: fc010113 addi sp,sp,-64 +80000244: 02112e23 sw ra,60(sp) +80000248: 02812c23 sw s0,56(sp) +8000024c: 02912a23 sw s1,52(sp) +80000250: 03212823 sw s2,48(sp) +80000254: 03312623 sw s3,44(sp) +80000258: fc2026f3 csrr a3,0xfc2 +8000025c: fc102873 csrr a6,0xfc1 +80000260: fc002473 csrr s0,0xfc0 +80000264: cc5027f3 csrr a5,0xcc5 +80000268: 01f00713 li a4,31 +8000026c: 0cf74463 blt a4,a5,80000334 +80000270: 030408b3 mul a7,s0,a6 +80000274: 00100713 li a4,1 +80000278: 00a8d463 bge a7,a0,80000280 +8000027c: 03154733 div a4,a0,a7 +80000280: 0ce6c863 blt a3,a4,80000350 +80000284: 0ae7d863 bge a5,a4,80000334 +80000288: fff68693 addi a3,a3,-1 +8000028c: 02e54333 div t1,a0,a4 +80000290: 00030893 mv a7,t1 +80000294: 00f69663 bne a3,a5,800002a0 +80000298: 02e56533 rem a0,a0,a4 +8000029c: 006508b3 add a7,a0,t1 +800002a0: 0288c4b3 div s1,a7,s0 +800002a4: 0288e933 rem s2,a7,s0 +800002a8: 0b04ca63 blt s1,a6,8000035c +800002ac: 00100693 li a3,1 +800002b0: 0304c733 div a4,s1,a6 +800002b4: 00070663 beqz a4,800002c0 +800002b8: 00070693 mv a3,a4 +800002bc: 0304e733 rem a4,s1,a6 +800002c0: 800029b7 lui s3,0x80002 +800002c4: dac98993 addi s3,s3,-596 # 80001dac <__stack_top+0x81001dac> +800002c8: 00e12e23 sw a4,28(sp) +800002cc: 00c10713 addi a4,sp,12 +800002d0: 00b12623 sw a1,12(sp) +800002d4: 00c12823 sw a2,16(sp) +800002d8: 00d12c23 sw a3,24(sp) +800002dc: 02f30333 mul t1,t1,a5 +800002e0: 00279793 slli a5,a5,0x2 +800002e4: 00f987b3 add a5,s3,a5 +800002e8: 00e7a023 sw a4,0(a5) +800002ec: 00612a23 sw t1,20(sp) +800002f0: 06904c63 bgtz s1,80000368 +800002f4: 04090063 beqz s2,80000334 +800002f8: 02848433 mul s0,s1,s0 +800002fc: 00812a23 sw s0,20(sp) +80000300: 0009006b 0x9006b +80000304: cc5027f3 csrr a5,0xcc5 +80000308: cc202573 csrr a0,0xcc2 +8000030c: 00279793 slli a5,a5,0x2 +80000310: 00f989b3 add s3,s3,a5 +80000314: 0009a783 lw a5,0(s3) +80000318: 0087a683 lw a3,8(a5) +8000031c: 0007a703 lw a4,0(a5) +80000320: 0047a583 lw a1,4(a5) +80000324: 00d50533 add a0,a0,a3 +80000328: 000700e7 jalr a4 +8000032c: 00100793 li a5,1 +80000330: 0007806b 0x7806b +80000334: 03c12083 lw ra,60(sp) +80000338: 03812403 lw s0,56(sp) +8000033c: 03412483 lw s1,52(sp) +80000340: 03012903 lw s2,48(sp) +80000344: 02c12983 lw s3,44(sp) +80000348: 04010113 addi sp,sp,64 +8000034c: 00008067 ret +80000350: 00068713 mv a4,a3 +80000354: f2e7cae3 blt a5,a4,80000288 +80000358: fddff06f j 80000334 +8000035c: 00000713 li a4,0 +80000360: 00100693 li a3,1 +80000364: f5dff06f j 800002c0 +80000368: 00048713 mv a4,s1 +8000036c: 00985463 bge a6,s1,80000374 +80000370: 00080713 mv a4,a6 +80000374: 800007b7 lui a5,0x80000 +80000378: 18078793 addi a5,a5,384 # 80000180 <__stack_top+0x81000180> +8000037c: 00f7106b 0xf7106b +80000380: e01ff0ef jal ra,80000180 +80000384: f71ff06f j 800002f4 + +80000388 : +80000388: cc5027f3 csrr a5,0xcc5 +8000038c: 00ff0737 lui a4,0xff0 +80000390: 00e787b3 add a5,a5,a4 +80000394: 00879793 slli a5,a5,0x8 +80000398: b0002773 csrr a4,mcycle +8000039c: 00e7a023 sw a4,0(a5) +800003a0: b0102773 csrr a4,0xb01 +800003a4: 00e7a223 sw a4,4(a5) +800003a8: b0202773 csrr a4,minstret +800003ac: 00e7a423 sw a4,8(a5) +800003b0: b0302773 csrr a4,mhpmcounter3 +800003b4: 00e7a623 sw a4,12(a5) +800003b8: b0402773 csrr a4,mhpmcounter4 +800003bc: 00e7a823 sw a4,16(a5) +800003c0: b0502773 csrr a4,mhpmcounter5 +800003c4: 00e7aa23 sw a4,20(a5) +800003c8: b0602773 csrr a4,mhpmcounter6 +800003cc: 00e7ac23 sw a4,24(a5) +800003d0: b0702773 csrr a4,mhpmcounter7 +800003d4: 00e7ae23 sw a4,28(a5) +800003d8: b0802773 csrr a4,mhpmcounter8 +800003dc: 02e7a023 sw a4,32(a5) +800003e0: b0902773 csrr a4,mhpmcounter9 +800003e4: 02e7a223 sw a4,36(a5) +800003e8: b0a02773 csrr a4,mhpmcounter10 +800003ec: 02e7a423 sw a4,40(a5) +800003f0: b0b02773 csrr a4,mhpmcounter11 +800003f4: 02e7a623 sw a4,44(a5) +800003f8: b0c02773 csrr a4,mhpmcounter12 +800003fc: 02e7a823 sw a4,48(a5) +80000400: b0d02773 csrr a4,mhpmcounter13 +80000404: 02e7aa23 sw a4,52(a5) +80000408: b0e02773 csrr a4,mhpmcounter14 +8000040c: 02e7ac23 sw a4,56(a5) +80000410: b0f02773 csrr a4,mhpmcounter15 +80000414: 02e7ae23 sw a4,60(a5) +80000418: b1002773 csrr a4,mhpmcounter16 +8000041c: 04e7a023 sw a4,64(a5) +80000420: b1102773 csrr a4,mhpmcounter17 +80000424: 04e7a223 sw a4,68(a5) +80000428: b1202773 csrr a4,mhpmcounter18 +8000042c: 04e7a423 sw a4,72(a5) +80000430: b1302773 csrr a4,mhpmcounter19 +80000434: 04e7a623 sw a4,76(a5) +80000438: b1402773 csrr a4,mhpmcounter20 +8000043c: 04e7a823 sw a4,80(a5) +80000440: b1502773 csrr a4,mhpmcounter21 +80000444: 04e7aa23 sw a4,84(a5) +80000448: b1602773 csrr a4,mhpmcounter22 +8000044c: 04e7ac23 sw a4,88(a5) +80000450: b1702773 csrr a4,mhpmcounter23 +80000454: 04e7ae23 sw a4,92(a5) +80000458: b1802773 csrr a4,mhpmcounter24 +8000045c: 06e7a023 sw a4,96(a5) +80000460: b1902773 csrr a4,mhpmcounter25 +80000464: 06e7a223 sw a4,100(a5) +80000468: b1a02773 csrr a4,mhpmcounter26 +8000046c: 06e7a423 sw a4,104(a5) +80000470: b1b02773 csrr a4,mhpmcounter27 +80000474: 06e7a623 sw a4,108(a5) +80000478: b1c02773 csrr a4,mhpmcounter28 +8000047c: 06e7a823 sw a4,112(a5) +80000480: b1d02773 csrr a4,mhpmcounter29 +80000484: 06e7aa23 sw a4,116(a5) +80000488: b1e02773 csrr a4,mhpmcounter30 +8000048c: 06e7ac23 sw a4,120(a5) +80000490: b1f02773 csrr a4,mhpmcounter31 +80000494: 06e7ae23 sw a4,124(a5) +80000498: b8002773 csrr a4,mcycleh +8000049c: 08e7a023 sw a4,128(a5) +800004a0: b8102773 csrr a4,0xb81 +800004a4: 08e7a223 sw a4,132(a5) +800004a8: b8202773 csrr a4,minstreth +800004ac: 08e7a423 sw a4,136(a5) +800004b0: b8302773 csrr a4,mhpmcounter3h +800004b4: 08e7a623 sw a4,140(a5) +800004b8: b8402773 csrr a4,mhpmcounter4h +800004bc: 08e7a823 sw a4,144(a5) +800004c0: b8502773 csrr a4,mhpmcounter5h +800004c4: 08e7aa23 sw a4,148(a5) +800004c8: b8602773 csrr a4,mhpmcounter6h +800004cc: 08e7ac23 sw a4,152(a5) +800004d0: b8702773 csrr a4,mhpmcounter7h +800004d4: 08e7ae23 sw a4,156(a5) +800004d8: b8802773 csrr a4,mhpmcounter8h +800004dc: 0ae7a023 sw a4,160(a5) +800004e0: b8902773 csrr a4,mhpmcounter9h +800004e4: 0ae7a223 sw a4,164(a5) +800004e8: b8a02773 csrr a4,mhpmcounter10h +800004ec: 0ae7a423 sw a4,168(a5) +800004f0: b8b02773 csrr a4,mhpmcounter11h +800004f4: 0ae7a623 sw a4,172(a5) +800004f8: b8c02773 csrr a4,mhpmcounter12h +800004fc: 0ae7a823 sw a4,176(a5) +80000500: b8d02773 csrr a4,mhpmcounter13h +80000504: 0ae7aa23 sw a4,180(a5) +80000508: b8e02773 csrr a4,mhpmcounter14h +8000050c: 0ae7ac23 sw a4,184(a5) +80000510: b8f02773 csrr a4,mhpmcounter15h +80000514: 0ae7ae23 sw a4,188(a5) +80000518: b9002773 csrr a4,mhpmcounter16h +8000051c: 0ce7a023 sw a4,192(a5) +80000520: b9102773 csrr a4,mhpmcounter17h +80000524: 0ce7a223 sw a4,196(a5) +80000528: b9202773 csrr a4,mhpmcounter18h +8000052c: 0ce7a423 sw a4,200(a5) +80000530: b9302773 csrr a4,mhpmcounter19h +80000534: 0ce7a623 sw a4,204(a5) +80000538: b9402773 csrr a4,mhpmcounter20h +8000053c: 0ce7a823 sw a4,208(a5) +80000540: b9502773 csrr a4,mhpmcounter21h +80000544: 0ce7aa23 sw a4,212(a5) +80000548: b9602773 csrr a4,mhpmcounter22h +8000054c: 0ce7ac23 sw a4,216(a5) +80000550: b9702773 csrr a4,mhpmcounter23h +80000554: 0ce7ae23 sw a4,220(a5) +80000558: b9802773 csrr a4,mhpmcounter24h +8000055c: 0ee7a023 sw a4,224(a5) +80000560: b9902773 csrr a4,mhpmcounter25h +80000564: 0ee7a223 sw a4,228(a5) +80000568: b9a02773 csrr a4,mhpmcounter26h +8000056c: 0ee7a423 sw a4,232(a5) +80000570: b9b02773 csrr a4,mhpmcounter27h +80000574: 0ee7a623 sw a4,236(a5) +80000578: b9c02773 csrr a4,mhpmcounter28h +8000057c: 0ee7a823 sw a4,240(a5) +80000580: b9d02773 csrr a4,mhpmcounter29h +80000584: 0ee7aa23 sw a4,244(a5) +80000588: b9e02773 csrr a4,mhpmcounter30h +8000058c: 0ee7ac23 sw a4,248(a5) +80000590: b9f02773 csrr a4,mhpmcounter31h +80000594: 0ee7ae23 sw a4,252(a5) +80000598: 00008067 ret + +8000059c : +8000059c: 00050593 mv a1,a0 +800005a0: 00000693 li a3,0 +800005a4: 00000613 li a2,0 +800005a8: 00000513 li a0,0 +800005ac: 20c0006f j 800007b8 <__register_exitproc> + +800005b0 : +800005b0: ff010113 addi sp,sp,-16 +800005b4: 00000593 li a1,0 +800005b8: 00812423 sw s0,8(sp) +800005bc: 00112623 sw ra,12(sp) +800005c0: 00050413 mv s0,a0 +800005c4: 290000ef jal ra,80000854 <__call_exitprocs> +800005c8: 800027b7 lui a5,0x80002 +800005cc: da87a503 lw a0,-600(a5) # 80001da8 <__stack_top+0x81001da8> +800005d0: 03c52783 lw a5,60(a0) +800005d4: 00078463 beqz a5,800005dc +800005d8: 000780e7 jalr a5 +800005dc: 00040513 mv a0,s0 +800005e0: b59ff0ef jal ra,80000138 <_exit> + +800005e4 <__libc_fini_array>: +800005e4: ff010113 addi sp,sp,-16 +800005e8: 00812423 sw s0,8(sp) +800005ec: 800027b7 lui a5,0x80002 +800005f0: 80002437 lui s0,0x80002 +800005f4: 97c40413 addi s0,s0,-1668 # 8000197c <__stack_top+0x8100197c> +800005f8: 97c78793 addi a5,a5,-1668 # 8000197c <__stack_top+0x8100197c> +800005fc: 408787b3 sub a5,a5,s0 +80000600: 00912223 sw s1,4(sp) +80000604: 00112623 sw ra,12(sp) +80000608: 4027d493 srai s1,a5,0x2 +8000060c: 02048063 beqz s1,8000062c <__libc_fini_array+0x48> +80000610: ffc78793 addi a5,a5,-4 +80000614: 00878433 add s0,a5,s0 +80000618: 00042783 lw a5,0(s0) +8000061c: fff48493 addi s1,s1,-1 +80000620: ffc40413 addi s0,s0,-4 +80000624: 000780e7 jalr a5 +80000628: fe0498e3 bnez s1,80000618 <__libc_fini_array+0x34> +8000062c: 00c12083 lw ra,12(sp) +80000630: 00812403 lw s0,8(sp) +80000634: 00412483 lw s1,4(sp) +80000638: 01010113 addi sp,sp,16 +8000063c: 00008067 ret + +80000640 <__libc_init_array>: +80000640: ff010113 addi sp,sp,-16 +80000644: 00812423 sw s0,8(sp) +80000648: 01212023 sw s2,0(sp) +8000064c: 80002437 lui s0,0x80002 +80000650: 80002937 lui s2,0x80002 +80000654: 97840793 addi a5,s0,-1672 # 80001978 <__stack_top+0x81001978> +80000658: 97890913 addi s2,s2,-1672 # 80001978 <__stack_top+0x81001978> +8000065c: 40f90933 sub s2,s2,a5 +80000660: 00112623 sw ra,12(sp) +80000664: 00912223 sw s1,4(sp) +80000668: 40295913 srai s2,s2,0x2 +8000066c: 02090063 beqz s2,8000068c <__libc_init_array+0x4c> +80000670: 97840413 addi s0,s0,-1672 +80000674: 00000493 li s1,0 +80000678: 00042783 lw a5,0(s0) +8000067c: 00148493 addi s1,s1,1 +80000680: 00440413 addi s0,s0,4 +80000684: 000780e7 jalr a5 +80000688: fe9918e3 bne s2,s1,80000678 <__libc_init_array+0x38> +8000068c: 80002437 lui s0,0x80002 +80000690: 80002937 lui s2,0x80002 +80000694: 97840793 addi a5,s0,-1672 # 80001978 <__stack_top+0x81001978> +80000698: 97c90913 addi s2,s2,-1668 # 8000197c <__stack_top+0x8100197c> +8000069c: 40f90933 sub s2,s2,a5 +800006a0: 40295913 srai s2,s2,0x2 +800006a4: 02090063 beqz s2,800006c4 <__libc_init_array+0x84> +800006a8: 97840413 addi s0,s0,-1672 +800006ac: 00000493 li s1,0 +800006b0: 00042783 lw a5,0(s0) +800006b4: 00148493 addi s1,s1,1 +800006b8: 00440413 addi s0,s0,4 +800006bc: 000780e7 jalr a5 +800006c0: fe9918e3 bne s2,s1,800006b0 <__libc_init_array+0x70> +800006c4: 00c12083 lw ra,12(sp) +800006c8: 00812403 lw s0,8(sp) +800006cc: 00412483 lw s1,4(sp) +800006d0: 00012903 lw s2,0(sp) +800006d4: 01010113 addi sp,sp,16 +800006d8: 00008067 ret + +800006dc : +800006dc: 00f00313 li t1,15 +800006e0: 00050713 mv a4,a0 +800006e4: 02c37e63 bgeu t1,a2,80000720 +800006e8: 00f77793 andi a5,a4,15 +800006ec: 0a079063 bnez a5,8000078c +800006f0: 08059263 bnez a1,80000774 +800006f4: ff067693 andi a3,a2,-16 +800006f8: 00f67613 andi a2,a2,15 +800006fc: 00e686b3 add a3,a3,a4 +80000700: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00> +80000704: 00b72223 sw a1,4(a4) +80000708: 00b72423 sw a1,8(a4) +8000070c: 00b72623 sw a1,12(a4) +80000710: 01070713 addi a4,a4,16 +80000714: fed766e3 bltu a4,a3,80000700 +80000718: 00061463 bnez a2,80000720 +8000071c: 00008067 ret +80000720: 40c306b3 sub a3,t1,a2 +80000724: 00269693 slli a3,a3,0x2 +80000728: 00000297 auipc t0,0x0 +8000072c: 005686b3 add a3,a3,t0 +80000730: 00c68067 jr 12(a3) +80000734: 00b70723 sb a1,14(a4) +80000738: 00b706a3 sb a1,13(a4) +8000073c: 00b70623 sb a1,12(a4) +80000740: 00b705a3 sb a1,11(a4) +80000744: 00b70523 sb a1,10(a4) +80000748: 00b704a3 sb a1,9(a4) +8000074c: 00b70423 sb a1,8(a4) +80000750: 00b703a3 sb a1,7(a4) +80000754: 00b70323 sb a1,6(a4) +80000758: 00b702a3 sb a1,5(a4) +8000075c: 00b70223 sb a1,4(a4) +80000760: 00b701a3 sb a1,3(a4) +80000764: 00b70123 sb a1,2(a4) +80000768: 00b700a3 sb a1,1(a4) +8000076c: 00b70023 sb a1,0(a4) +80000770: 00008067 ret +80000774: 0ff5f593 andi a1,a1,255 +80000778: 00859693 slli a3,a1,0x8 +8000077c: 00d5e5b3 or a1,a1,a3 +80000780: 01059693 slli a3,a1,0x10 +80000784: 00d5e5b3 or a1,a1,a3 +80000788: f6dff06f j 800006f4 +8000078c: 00279693 slli a3,a5,0x2 +80000790: 00000297 auipc t0,0x0 +80000794: 005686b3 add a3,a3,t0 +80000798: 00008293 mv t0,ra +8000079c: fa0680e7 jalr -96(a3) +800007a0: 00028093 mv ra,t0 +800007a4: ff078793 addi a5,a5,-16 +800007a8: 40f70733 sub a4,a4,a5 +800007ac: 00f60633 add a2,a2,a5 +800007b0: f6c378e3 bgeu t1,a2,80000720 +800007b4: f3dff06f j 800006f0 + +800007b8 <__register_exitproc>: +800007b8: 800027b7 lui a5,0x80002 +800007bc: da87a703 lw a4,-600(a5) # 80001da8 <__stack_top+0x81001da8> +800007c0: 14872783 lw a5,328(a4) +800007c4: 04078c63 beqz a5,8000081c <__register_exitproc+0x64> +800007c8: 0047a703 lw a4,4(a5) +800007cc: 01f00813 li a6,31 +800007d0: 06e84e63 blt a6,a4,8000084c <__register_exitproc+0x94> +800007d4: 00271813 slli a6,a4,0x2 +800007d8: 02050663 beqz a0,80000804 <__register_exitproc+0x4c> +800007dc: 01078333 add t1,a5,a6 +800007e0: 08c32423 sw a2,136(t1) +800007e4: 1887a883 lw a7,392(a5) +800007e8: 00100613 li a2,1 +800007ec: 00e61633 sll a2,a2,a4 +800007f0: 00c8e8b3 or a7,a7,a2 +800007f4: 1917a423 sw a7,392(a5) +800007f8: 10d32423 sw a3,264(t1) +800007fc: 00200693 li a3,2 +80000800: 02d50463 beq a0,a3,80000828 <__register_exitproc+0x70> +80000804: 00170713 addi a4,a4,1 +80000808: 00e7a223 sw a4,4(a5) +8000080c: 010787b3 add a5,a5,a6 +80000810: 00b7a423 sw a1,8(a5) +80000814: 00000513 li a0,0 +80000818: 00008067 ret +8000081c: 14c70793 addi a5,a4,332 +80000820: 14f72423 sw a5,328(a4) +80000824: fa5ff06f j 800007c8 <__register_exitproc+0x10> +80000828: 18c7a683 lw a3,396(a5) +8000082c: 00170713 addi a4,a4,1 +80000830: 00e7a223 sw a4,4(a5) +80000834: 00c6e633 or a2,a3,a2 +80000838: 18c7a623 sw a2,396(a5) +8000083c: 010787b3 add a5,a5,a6 +80000840: 00b7a423 sw a1,8(a5) +80000844: 00000513 li a0,0 +80000848: 00008067 ret +8000084c: fff00513 li a0,-1 +80000850: 00008067 ret + +80000854 <__call_exitprocs>: +80000854: fd010113 addi sp,sp,-48 +80000858: 800027b7 lui a5,0x80002 +8000085c: 01412c23 sw s4,24(sp) +80000860: da87aa03 lw s4,-600(a5) # 80001da8 <__stack_top+0x81001da8> +80000864: 03212023 sw s2,32(sp) +80000868: 02112623 sw ra,44(sp) +8000086c: 148a2903 lw s2,328(s4) +80000870: 02812423 sw s0,40(sp) +80000874: 02912223 sw s1,36(sp) +80000878: 01312e23 sw s3,28(sp) +8000087c: 01512a23 sw s5,20(sp) +80000880: 01612823 sw s6,16(sp) +80000884: 01712623 sw s7,12(sp) +80000888: 01812423 sw s8,8(sp) +8000088c: 04090063 beqz s2,800008cc <__call_exitprocs+0x78> +80000890: 00050b13 mv s6,a0 +80000894: 00058b93 mv s7,a1 +80000898: 00100a93 li s5,1 +8000089c: fff00993 li s3,-1 +800008a0: 00492483 lw s1,4(s2) +800008a4: fff48413 addi s0,s1,-1 +800008a8: 02044263 bltz s0,800008cc <__call_exitprocs+0x78> +800008ac: 00249493 slli s1,s1,0x2 +800008b0: 009904b3 add s1,s2,s1 +800008b4: 040b8463 beqz s7,800008fc <__call_exitprocs+0xa8> +800008b8: 1044a783 lw a5,260(s1) +800008bc: 05778063 beq a5,s7,800008fc <__call_exitprocs+0xa8> +800008c0: fff40413 addi s0,s0,-1 +800008c4: ffc48493 addi s1,s1,-4 +800008c8: ff3416e3 bne s0,s3,800008b4 <__call_exitprocs+0x60> +800008cc: 02c12083 lw ra,44(sp) +800008d0: 02812403 lw s0,40(sp) +800008d4: 02412483 lw s1,36(sp) +800008d8: 02012903 lw s2,32(sp) +800008dc: 01c12983 lw s3,28(sp) +800008e0: 01812a03 lw s4,24(sp) +800008e4: 01412a83 lw s5,20(sp) +800008e8: 01012b03 lw s6,16(sp) +800008ec: 00c12b83 lw s7,12(sp) +800008f0: 00812c03 lw s8,8(sp) +800008f4: 03010113 addi sp,sp,48 +800008f8: 00008067 ret +800008fc: 00492783 lw a5,4(s2) +80000900: 0044a683 lw a3,4(s1) +80000904: fff78793 addi a5,a5,-1 +80000908: 04878e63 beq a5,s0,80000964 <__call_exitprocs+0x110> +8000090c: 0004a223 sw zero,4(s1) +80000910: fa0688e3 beqz a3,800008c0 <__call_exitprocs+0x6c> +80000914: 18892783 lw a5,392(s2) +80000918: 008a9733 sll a4,s5,s0 +8000091c: 00492c03 lw s8,4(s2) +80000920: 00f777b3 and a5,a4,a5 +80000924: 02079263 bnez a5,80000948 <__call_exitprocs+0xf4> +80000928: 000680e7 jalr a3 +8000092c: 00492703 lw a4,4(s2) +80000930: 148a2783 lw a5,328(s4) +80000934: 01871463 bne a4,s8,8000093c <__call_exitprocs+0xe8> +80000938: f8f904e3 beq s2,a5,800008c0 <__call_exitprocs+0x6c> +8000093c: f80788e3 beqz a5,800008cc <__call_exitprocs+0x78> +80000940: 00078913 mv s2,a5 +80000944: f5dff06f j 800008a0 <__call_exitprocs+0x4c> +80000948: 18c92783 lw a5,396(s2) +8000094c: 0844a583 lw a1,132(s1) +80000950: 00f77733 and a4,a4,a5 +80000954: 00071c63 bnez a4,8000096c <__call_exitprocs+0x118> +80000958: 000b0513 mv a0,s6 +8000095c: 000680e7 jalr a3 +80000960: fcdff06f j 8000092c <__call_exitprocs+0xd8> +80000964: 00892223 sw s0,4(s2) +80000968: fa9ff06f j 80000910 <__call_exitprocs+0xbc> +8000096c: 00058513 mv a0,a1 +80000970: 000680e7 jalr a3 +80000974: fb9ff06f j 8000092c <__call_exitprocs+0xd8> + +Disassembly of section .init_array: + +80001978 <__init_array_start>: +80001978: 0068 addi a0,sp,12 +8000197a: 8000 0x8000 + +Disassembly of section .data: + +80001980 : +80001980: 0000 unimp +80001982: 0000 unimp +80001984: 1c6c addi a1,sp,572 +80001986: 8000 0x8000 +80001988: 1cd4 addi a3,sp,628 +8000198a: 8000 0x8000 +8000198c: 1d3c addi a5,sp,696 +8000198e: 8000 0x8000 + ... +80001a28: 0001 nop +80001a2a: 0000 unimp +80001a2c: 0000 unimp +80001a2e: 0000 unimp +80001a30: 330e fld ft6,224(sp) +80001a32: abcd j 80002024 <__BSS_END__+0x1f8> +80001a34: 1234 addi a3,sp,296 +80001a36: e66d bnez a2,80001b20 +80001a38: deec sw a1,124(a3) +80001a3a: 0005 c.nop 1 +80001a3c: 0000000b 0xb + ... + +Disassembly of section .sdata: + +80001da8 <_global_impure_ptr>: +80001da8: 1980 addi s0,sp,240 +80001daa: 8000 0x8000 + +Disassembly of section .bss: + +80001dac : + ... + +Disassembly of section .comment: + +00000000 <.comment>: + 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm + 4: 2820 fld fs0,80(s0) + 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm + a: 3920 fld fs0,112(a0) + c: 322e fld ft4,232(sp) + e: 302e fld ft0,232(sp) + ... + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2541 jal 680 <__stack_size+0x280> + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14> + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 + 12: 7205 lui tp,0xfffe1 + 14: 3376 fld ft6,376(sp) + 16: 6932 flw fs2,12(sp) + 18: 7032 flw ft0,44(sp) + 1a: 5f30 lw a2,120(a4) + 1c: 326d jal fffff9c6 <__stack_top+0xfff9c6> + 1e: 3070 fld fa2,224(s0) + 20: 665f 7032 0030 0x307032665f diff --git a/tests/regression/diverge/kernel.elf b/tests/regression/diverge/kernel.elf new file mode 100755 index 00000000..0d8a60f6 Binary files /dev/null and b/tests/regression/diverge/kernel.elf differ diff --git a/tests/regression/diverge/main.cpp b/tests/regression/diverge/main.cpp new file mode 100644 index 00000000..7b27760c --- /dev/null +++ b/tests/regression/diverge/main.cpp @@ -0,0 +1,248 @@ +#include +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.bin"; +uint32_t count = 0; + +std::vector src_data; +std::vector ref_data; + +vx_device_h device = nullptr; +vx_buffer_h staging_buf = nullptr; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + switch (c) { + case 'n': + count = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (staging_buf) { + vx_buf_release(staging_buf); + } + if (device) { + vx_dev_close(device); + } +} + +void gen_input_data(uint32_t num_points) { + src_data.resize(num_points); + + for (uint32_t i = 0; i < src_data.size(); ++i) { + int value = std::rand(); + src_data[i] = value; + //std::cout << std::dec << i << ": value=0x" << std::hex << value << std::endl; + } +} + +void gen_ref_data(uint32_t num_points) { + ref_data.resize(num_points); + + for (int i = 0; i < (int)ref_data.size(); ++i) { + int value = src_data.at(i); + + // none taken + if (i >= 0x7fffffff) { + value = 0; + } else { + value += 2; + } + + // diverge + if (i > 1) { + if (i > 2) { + value += 6; + } else { + value += 5; + } + } else { + if (i > 0) { + value += 4; + } else { + value += 3; + } + } + + // all taken + if (i >= 0) { + value += 7; + } else { + value = 0; + } + + ref_data[i] = value; + //std::cout << std::dec << i << ": result=0x" << std::hex << value << std::endl; + } +} + +int run_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t num_points) { + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, -1)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + + // verify result + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + int ref = ref_data.at(i); + int cur = buf_ptr[i]; + if (cur != ref) { + std::cout << "error at result #" << std::dec << i + << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + size_t value; + kernel_arg_t kernel_arg; + + // parse command arguments + parse_args(argc, argv); + + if (count == 0) { + count = 1; + } + + std::srand(50); + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + uint32_t num_points = count; + + // generate input data + gen_input_data(num_points); + + // generate reference data + gen_ref_data(num_points); + + uint32_t src_buf_size = src_data.size() * sizeof(int32_t); + uint32_t dst_buf_size = ref_data.size() * sizeof(int32_t); + + std::cout << "number of points: " << num_points << std::endl; + std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; + + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value)); + kernel_arg.src_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value)); + kernel_arg.dst_ptr = value; + + kernel_arg.num_points = num_points; + + std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + + // allocate shared memory + std::cout << "allocate shared memory" << std::endl; + uint32_t staging_buf_size = std::max(src_buf_size, + std::max(dst_buf_size, + sizeof(kernel_arg_t))); + RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf)); + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + { + auto buf_ptr = (int*)vx_host_ptr(staging_buf); + memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); + RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); + } + + // upload source buffer + { + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = src_data.at(i); + } + } + std::cout << "upload source buffer" << std::endl; + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0)); + + // clear destination buffer + { + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = 0xdeadbeef; + } + } + std::cout << "clear destination buffer" << std::endl; + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0)); + + // run tests + std::cout << "run tests" << std::endl; + RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points)); + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file