This commit is contained in:
Blaise Tine
2021-06-13 21:44:46 -04:00
640 changed files with 394599 additions and 653711 deletions

View File

@@ -1,4 +1,4 @@
all: stub rtlsim simx opae tests
all: stub rtlsim simx opae
stub:
$(MAKE) -C stub
@@ -12,14 +12,10 @@ rtlsim:
simx:
$(MAKE) -C simx
tests:
$(MAKE) -C tests
clean:
$(MAKE) clean -C stub
$(MAKE) clean -C opae
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
$(MAKE) clean -C tests
.PHONY: all stub opae rtlsim simx tests clean
.PHONY: all stub opae rtlsim simx clean

View File

@@ -1,28 +0,0 @@
all:
$(MAKE) -C basic
$(MAKE) -C demo
$(MAKE) -C dogfood
$(MAKE) -C mstress
$(MAKE) -C io_addr
run:
$(MAKE) -C basic run-vlsim
$(MAKE) -C demo run-vlsim
$(MAKE) -C dogfood run-vlsim
$(MAKE) -C mstress run-vlsim
$(MAKE) -C io_addr run-vlsim
clean:
$(MAKE) -C basic clean
$(MAKE) -C demo clean
$(MAKE) -C dogfood clean
$(MAKE) -C mstress clean
$(MAKE) -C io_addr clean
clean-all:
$(MAKE) -C basic clean-all
$(MAKE) -C demo clean-all
$(MAKE) -C dogfood clean-all
$(MAKE) -C mstress clean-all
$(MAKE) -C io_addr clean-all

View File

@@ -1,69 +0,0 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n256
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../include
LDFLAGS +=
PROJECT = basic
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1,12 +0,0 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t count;
uint32_t src_ptr;
uint32_t dst_ptr;
};
#endif

Binary file not shown.

View File

@@ -1,16 +0,0 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include "common.h"
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t count = arg->count;
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = vx_core_id() * count;
for (uint32_t i = 0; i < count; ++i) {
dst_ptr[offset + i] = src_ptr[offset + i];
}
}

View File

@@ -1,545 +0,0 @@
kernel.elf: file format elf32-littleriscv
Disassembly of section .init:
80000000 <_start>:
80000000: 00000597 auipc a1,0x0
80000004: 0c058593 addi a1,a1,192 # 800000c0 <vx_set_sp>
80000008: fc102573 csrr a0,0xfc1
8000000c: 00b5106b 0xb5106b
80000010: 0b0000ef jal ra,800000c0 <vx_set_sp>
80000014: 00100513 li a0,1
80000018: 0005006b 0x5006b
8000001c: 00002517 auipc a0,0x2
80000020: b0050513 addi a0,a0,-1280 # 80001b1c <__BSS_END__>
80000024: 00002617 auipc a2,0x2
80000028: af860613 addi a2,a2,-1288 # 80001b1c <__BSS_END__>
8000002c: 40a60633 sub a2,a2,a0
80000030: 00000593 li a1,0
80000034: 41c000ef jal ra,80000450 <memset>
80000038: 00000517 auipc a0,0x0
8000003c: 32050513 addi a0,a0,800 # 80000358 <__libc_fini_array>
80000040: 2d0000ef jal ra,80000310 <atexit>
80000044: 370000ef jal ra,800003b4 <__libc_init_array>
80000048: 008000ef jal ra,80000050 <main>
8000004c: 2d80006f j 80000324 <exit>
Disassembly of section .text:
80000050 <main>:
80000050: 7ffff7b7 lui a5,0x7ffff
80000054: 0007a703 lw a4,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
80000058: 0047a683 lw a3,4(a5)
8000005c: 0087a583 lw a1,8(a5)
80000060: cc5027f3 csrr a5,0xcc5
80000064: 02e787b3 mul a5,a5,a4
80000068: 02070863 beqz a4,80000098 <main+0x48>
8000006c: 00f70733 add a4,a4,a5
80000070: 00271713 slli a4,a4,0x2
80000074: 00279793 slli a5,a5,0x2
80000078: 00d787b3 add a5,a5,a3
8000007c: 00d70733 add a4,a4,a3
80000080: 40d585b3 sub a1,a1,a3
80000084: 0007a603 lw a2,0(a5)
80000088: 00f586b3 add a3,a1,a5
8000008c: 00478793 addi a5,a5,4
80000090: 00c6a023 sw a2,0(a3)
80000094: fef718e3 bne a4,a5,80000084 <main+0x34>
80000098: 00008067 ret
8000009c <register_fini>:
8000009c: 00000793 li a5,0
800000a0: 00078863 beqz a5,800000b0 <register_fini+0x14>
800000a4: 80000537 lui a0,0x80000
800000a8: 35850513 addi a0,a0,856 # 80000358 <__stack_top+0x81000358>
800000ac: 2640006f j 80000310 <atexit>
800000b0: 00008067 ret
800000b4 <_exit>:
800000b4: 048000ef jal ra,800000fc <vx_perf_dump>
800000b8: 00000513 li a0,0
800000bc: 0005006b 0x5006b
800000c0 <vx_set_sp>:
800000c0: fc002573 csrr a0,0xfc0
800000c4: 0005006b 0x5006b
800000c8: 00002197 auipc gp,0x2
800000cc: e2818193 addi gp,gp,-472 # 80001ef0 <__global_pointer>
800000d0: 7f000117 auipc sp,0x7f000
800000d4: f3010113 addi sp,sp,-208 # ff000000 <__stack_top>
800000d8: 40000593 li a1,1024
800000dc: cc102673 csrr a2,0xcc1
800000e0: 02c585b3 mul a1,a1,a2
800000e4: 40b10133 sub sp,sp,a1
800000e8: cc3026f3 csrr a3,0xcc3
800000ec: 00068663 beqz a3,800000f8 <RETURN>
800000f0: 00000513 li a0,0
800000f4: 0005006b 0x5006b
800000f8 <RETURN>:
800000f8: 00008067 ret
800000fc <vx_perf_dump>:
800000fc: cc5027f3 csrr a5,0xcc5
80000100: 00ff0737 lui a4,0xff0
80000104: 00e787b3 add a5,a5,a4
80000108: 00879793 slli a5,a5,0x8
8000010c: b0002773 csrr a4,mcycle
80000110: 00e7a023 sw a4,0(a5)
80000114: b0102773 csrr a4,0xb01
80000118: 00e7a223 sw a4,4(a5)
8000011c: b0202773 csrr a4,minstret
80000120: 00e7a423 sw a4,8(a5)
80000124: b0302773 csrr a4,mhpmcounter3
80000128: 00e7a623 sw a4,12(a5)
8000012c: b0402773 csrr a4,mhpmcounter4
80000130: 00e7a823 sw a4,16(a5)
80000134: b0502773 csrr a4,mhpmcounter5
80000138: 00e7aa23 sw a4,20(a5)
8000013c: b0602773 csrr a4,mhpmcounter6
80000140: 00e7ac23 sw a4,24(a5)
80000144: b0702773 csrr a4,mhpmcounter7
80000148: 00e7ae23 sw a4,28(a5)
8000014c: b0802773 csrr a4,mhpmcounter8
80000150: 02e7a023 sw a4,32(a5)
80000154: b0902773 csrr a4,mhpmcounter9
80000158: 02e7a223 sw a4,36(a5)
8000015c: b0a02773 csrr a4,mhpmcounter10
80000160: 02e7a423 sw a4,40(a5)
80000164: b0b02773 csrr a4,mhpmcounter11
80000168: 02e7a623 sw a4,44(a5)
8000016c: b0c02773 csrr a4,mhpmcounter12
80000170: 02e7a823 sw a4,48(a5)
80000174: b0d02773 csrr a4,mhpmcounter13
80000178: 02e7aa23 sw a4,52(a5)
8000017c: b0e02773 csrr a4,mhpmcounter14
80000180: 02e7ac23 sw a4,56(a5)
80000184: b0f02773 csrr a4,mhpmcounter15
80000188: 02e7ae23 sw a4,60(a5)
8000018c: b1002773 csrr a4,mhpmcounter16
80000190: 04e7a023 sw a4,64(a5)
80000194: b1102773 csrr a4,mhpmcounter17
80000198: 04e7a223 sw a4,68(a5)
8000019c: b1202773 csrr a4,mhpmcounter18
800001a0: 04e7a423 sw a4,72(a5)
800001a4: b1302773 csrr a4,mhpmcounter19
800001a8: 04e7a623 sw a4,76(a5)
800001ac: b1402773 csrr a4,mhpmcounter20
800001b0: 04e7a823 sw a4,80(a5)
800001b4: b1502773 csrr a4,mhpmcounter21
800001b8: 04e7aa23 sw a4,84(a5)
800001bc: b1602773 csrr a4,mhpmcounter22
800001c0: 04e7ac23 sw a4,88(a5)
800001c4: b1702773 csrr a4,mhpmcounter23
800001c8: 04e7ae23 sw a4,92(a5)
800001cc: b1802773 csrr a4,mhpmcounter24
800001d0: 06e7a023 sw a4,96(a5)
800001d4: b1902773 csrr a4,mhpmcounter25
800001d8: 06e7a223 sw a4,100(a5)
800001dc: b1a02773 csrr a4,mhpmcounter26
800001e0: 06e7a423 sw a4,104(a5)
800001e4: b1b02773 csrr a4,mhpmcounter27
800001e8: 06e7a623 sw a4,108(a5)
800001ec: b1c02773 csrr a4,mhpmcounter28
800001f0: 06e7a823 sw a4,112(a5)
800001f4: b1d02773 csrr a4,mhpmcounter29
800001f8: 06e7aa23 sw a4,116(a5)
800001fc: b1e02773 csrr a4,mhpmcounter30
80000200: 06e7ac23 sw a4,120(a5)
80000204: b1f02773 csrr a4,mhpmcounter31
80000208: 06e7ae23 sw a4,124(a5)
8000020c: b8002773 csrr a4,mcycleh
80000210: 08e7a023 sw a4,128(a5)
80000214: b8102773 csrr a4,0xb81
80000218: 08e7a223 sw a4,132(a5)
8000021c: b8202773 csrr a4,minstreth
80000220: 08e7a423 sw a4,136(a5)
80000224: b8302773 csrr a4,mhpmcounter3h
80000228: 08e7a623 sw a4,140(a5)
8000022c: b8402773 csrr a4,mhpmcounter4h
80000230: 08e7a823 sw a4,144(a5)
80000234: b8502773 csrr a4,mhpmcounter5h
80000238: 08e7aa23 sw a4,148(a5)
8000023c: b8602773 csrr a4,mhpmcounter6h
80000240: 08e7ac23 sw a4,152(a5)
80000244: b8702773 csrr a4,mhpmcounter7h
80000248: 08e7ae23 sw a4,156(a5)
8000024c: b8802773 csrr a4,mhpmcounter8h
80000250: 0ae7a023 sw a4,160(a5)
80000254: b8902773 csrr a4,mhpmcounter9h
80000258: 0ae7a223 sw a4,164(a5)
8000025c: b8a02773 csrr a4,mhpmcounter10h
80000260: 0ae7a423 sw a4,168(a5)
80000264: b8b02773 csrr a4,mhpmcounter11h
80000268: 0ae7a623 sw a4,172(a5)
8000026c: b8c02773 csrr a4,mhpmcounter12h
80000270: 0ae7a823 sw a4,176(a5)
80000274: b8d02773 csrr a4,mhpmcounter13h
80000278: 0ae7aa23 sw a4,180(a5)
8000027c: b8e02773 csrr a4,mhpmcounter14h
80000280: 0ae7ac23 sw a4,184(a5)
80000284: b8f02773 csrr a4,mhpmcounter15h
80000288: 0ae7ae23 sw a4,188(a5)
8000028c: b9002773 csrr a4,mhpmcounter16h
80000290: 0ce7a023 sw a4,192(a5)
80000294: b9102773 csrr a4,mhpmcounter17h
80000298: 0ce7a223 sw a4,196(a5)
8000029c: b9202773 csrr a4,mhpmcounter18h
800002a0: 0ce7a423 sw a4,200(a5)
800002a4: b9302773 csrr a4,mhpmcounter19h
800002a8: 0ce7a623 sw a4,204(a5)
800002ac: b9402773 csrr a4,mhpmcounter20h
800002b0: 0ce7a823 sw a4,208(a5)
800002b4: b9502773 csrr a4,mhpmcounter21h
800002b8: 0ce7aa23 sw a4,212(a5)
800002bc: b9602773 csrr a4,mhpmcounter22h
800002c0: 0ce7ac23 sw a4,216(a5)
800002c4: b9702773 csrr a4,mhpmcounter23h
800002c8: 0ce7ae23 sw a4,220(a5)
800002cc: b9802773 csrr a4,mhpmcounter24h
800002d0: 0ee7a023 sw a4,224(a5)
800002d4: b9902773 csrr a4,mhpmcounter25h
800002d8: 0ee7a223 sw a4,228(a5)
800002dc: b9a02773 csrr a4,mhpmcounter26h
800002e0: 0ee7a423 sw a4,232(a5)
800002e4: b9b02773 csrr a4,mhpmcounter27h
800002e8: 0ee7a623 sw a4,236(a5)
800002ec: b9c02773 csrr a4,mhpmcounter28h
800002f0: 0ee7a823 sw a4,240(a5)
800002f4: b9d02773 csrr a4,mhpmcounter29h
800002f8: 0ee7aa23 sw a4,244(a5)
800002fc: b9e02773 csrr a4,mhpmcounter30h
80000300: 0ee7ac23 sw a4,248(a5)
80000304: b9f02773 csrr a4,mhpmcounter31h
80000308: 0ee7ae23 sw a4,252(a5)
8000030c: 00008067 ret
80000310 <atexit>:
80000310: 00050593 mv a1,a0
80000314: 00000693 li a3,0
80000318: 00000613 li a2,0
8000031c: 00000513 li a0,0
80000320: 20c0006f j 8000052c <__register_exitproc>
80000324 <exit>:
80000324: ff010113 addi sp,sp,-16
80000328: 00000593 li a1,0
8000032c: 00812423 sw s0,8(sp)
80000330: 00112623 sw ra,12(sp)
80000334: 00050413 mv s0,a0
80000338: 290000ef jal ra,800005c8 <__call_exitprocs>
8000033c: 800027b7 lui a5,0x80002
80000340: b187a503 lw a0,-1256(a5) # 80001b18 <__stack_top+0x81001b18>
80000344: 03c52783 lw a5,60(a0)
80000348: 00078463 beqz a5,80000350 <exit+0x2c>
8000034c: 000780e7 jalr a5
80000350: 00040513 mv a0,s0
80000354: d61ff0ef jal ra,800000b4 <_exit>
80000358 <__libc_fini_array>:
80000358: ff010113 addi sp,sp,-16
8000035c: 00812423 sw s0,8(sp)
80000360: 800017b7 lui a5,0x80001
80000364: 80001437 lui s0,0x80001
80000368: 6f040413 addi s0,s0,1776 # 800016f0 <__stack_top+0x810016f0>
8000036c: 6f078793 addi a5,a5,1776 # 800016f0 <__stack_top+0x810016f0>
80000370: 408787b3 sub a5,a5,s0
80000374: 00912223 sw s1,4(sp)
80000378: 00112623 sw ra,12(sp)
8000037c: 4027d493 srai s1,a5,0x2
80000380: 02048063 beqz s1,800003a0 <__libc_fini_array+0x48>
80000384: ffc78793 addi a5,a5,-4
80000388: 00878433 add s0,a5,s0
8000038c: 00042783 lw a5,0(s0)
80000390: fff48493 addi s1,s1,-1
80000394: ffc40413 addi s0,s0,-4
80000398: 000780e7 jalr a5
8000039c: fe0498e3 bnez s1,8000038c <__libc_fini_array+0x34>
800003a0: 00c12083 lw ra,12(sp)
800003a4: 00812403 lw s0,8(sp)
800003a8: 00412483 lw s1,4(sp)
800003ac: 01010113 addi sp,sp,16
800003b0: 00008067 ret
800003b4 <__libc_init_array>:
800003b4: ff010113 addi sp,sp,-16
800003b8: 00812423 sw s0,8(sp)
800003bc: 01212023 sw s2,0(sp)
800003c0: 80001437 lui s0,0x80001
800003c4: 80001937 lui s2,0x80001
800003c8: 6ec40793 addi a5,s0,1772 # 800016ec <__stack_top+0x810016ec>
800003cc: 6ec90913 addi s2,s2,1772 # 800016ec <__stack_top+0x810016ec>
800003d0: 40f90933 sub s2,s2,a5
800003d4: 00112623 sw ra,12(sp)
800003d8: 00912223 sw s1,4(sp)
800003dc: 40295913 srai s2,s2,0x2
800003e0: 02090063 beqz s2,80000400 <__libc_init_array+0x4c>
800003e4: 6ec40413 addi s0,s0,1772
800003e8: 00000493 li s1,0
800003ec: 00042783 lw a5,0(s0)
800003f0: 00148493 addi s1,s1,1
800003f4: 00440413 addi s0,s0,4
800003f8: 000780e7 jalr a5
800003fc: fe9918e3 bne s2,s1,800003ec <__libc_init_array+0x38>
80000400: 80001437 lui s0,0x80001
80000404: 80001937 lui s2,0x80001
80000408: 6ec40793 addi a5,s0,1772 # 800016ec <__stack_top+0x810016ec>
8000040c: 6f090913 addi s2,s2,1776 # 800016f0 <__stack_top+0x810016f0>
80000410: 40f90933 sub s2,s2,a5
80000414: 40295913 srai s2,s2,0x2
80000418: 02090063 beqz s2,80000438 <__libc_init_array+0x84>
8000041c: 6ec40413 addi s0,s0,1772
80000420: 00000493 li s1,0
80000424: 00042783 lw a5,0(s0)
80000428: 00148493 addi s1,s1,1
8000042c: 00440413 addi s0,s0,4
80000430: 000780e7 jalr a5
80000434: fe9918e3 bne s2,s1,80000424 <__libc_init_array+0x70>
80000438: 00c12083 lw ra,12(sp)
8000043c: 00812403 lw s0,8(sp)
80000440: 00412483 lw s1,4(sp)
80000444: 00012903 lw s2,0(sp)
80000448: 01010113 addi sp,sp,16
8000044c: 00008067 ret
80000450 <memset>:
80000450: 00f00313 li t1,15
80000454: 00050713 mv a4,a0
80000458: 02c37e63 bgeu t1,a2,80000494 <memset+0x44>
8000045c: 00f77793 andi a5,a4,15
80000460: 0a079063 bnez a5,80000500 <memset+0xb0>
80000464: 08059263 bnez a1,800004e8 <memset+0x98>
80000468: ff067693 andi a3,a2,-16
8000046c: 00f67613 andi a2,a2,15
80000470: 00e686b3 add a3,a3,a4
80000474: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00>
80000478: 00b72223 sw a1,4(a4)
8000047c: 00b72423 sw a1,8(a4)
80000480: 00b72623 sw a1,12(a4)
80000484: 01070713 addi a4,a4,16
80000488: fed766e3 bltu a4,a3,80000474 <memset+0x24>
8000048c: 00061463 bnez a2,80000494 <memset+0x44>
80000490: 00008067 ret
80000494: 40c306b3 sub a3,t1,a2
80000498: 00269693 slli a3,a3,0x2
8000049c: 00000297 auipc t0,0x0
800004a0: 005686b3 add a3,a3,t0
800004a4: 00c68067 jr 12(a3)
800004a8: 00b70723 sb a1,14(a4)
800004ac: 00b706a3 sb a1,13(a4)
800004b0: 00b70623 sb a1,12(a4)
800004b4: 00b705a3 sb a1,11(a4)
800004b8: 00b70523 sb a1,10(a4)
800004bc: 00b704a3 sb a1,9(a4)
800004c0: 00b70423 sb a1,8(a4)
800004c4: 00b703a3 sb a1,7(a4)
800004c8: 00b70323 sb a1,6(a4)
800004cc: 00b702a3 sb a1,5(a4)
800004d0: 00b70223 sb a1,4(a4)
800004d4: 00b701a3 sb a1,3(a4)
800004d8: 00b70123 sb a1,2(a4)
800004dc: 00b700a3 sb a1,1(a4)
800004e0: 00b70023 sb a1,0(a4)
800004e4: 00008067 ret
800004e8: 0ff5f593 andi a1,a1,255
800004ec: 00859693 slli a3,a1,0x8
800004f0: 00d5e5b3 or a1,a1,a3
800004f4: 01059693 slli a3,a1,0x10
800004f8: 00d5e5b3 or a1,a1,a3
800004fc: f6dff06f j 80000468 <memset+0x18>
80000500: 00279693 slli a3,a5,0x2
80000504: 00000297 auipc t0,0x0
80000508: 005686b3 add a3,a3,t0
8000050c: 00008293 mv t0,ra
80000510: fa0680e7 jalr -96(a3)
80000514: 00028093 mv ra,t0
80000518: ff078793 addi a5,a5,-16
8000051c: 40f70733 sub a4,a4,a5
80000520: 00f60633 add a2,a2,a5
80000524: f6c378e3 bgeu t1,a2,80000494 <memset+0x44>
80000528: f3dff06f j 80000464 <memset+0x14>
8000052c <__register_exitproc>:
8000052c: 800027b7 lui a5,0x80002
80000530: b187a703 lw a4,-1256(a5) # 80001b18 <__stack_top+0x81001b18>
80000534: 14872783 lw a5,328(a4)
80000538: 04078c63 beqz a5,80000590 <__register_exitproc+0x64>
8000053c: 0047a703 lw a4,4(a5)
80000540: 01f00813 li a6,31
80000544: 06e84e63 blt a6,a4,800005c0 <__register_exitproc+0x94>
80000548: 00271813 slli a6,a4,0x2
8000054c: 02050663 beqz a0,80000578 <__register_exitproc+0x4c>
80000550: 01078333 add t1,a5,a6
80000554: 08c32423 sw a2,136(t1)
80000558: 1887a883 lw a7,392(a5)
8000055c: 00100613 li a2,1
80000560: 00e61633 sll a2,a2,a4
80000564: 00c8e8b3 or a7,a7,a2
80000568: 1917a423 sw a7,392(a5)
8000056c: 10d32423 sw a3,264(t1)
80000570: 00200693 li a3,2
80000574: 02d50463 beq a0,a3,8000059c <__register_exitproc+0x70>
80000578: 00170713 addi a4,a4,1
8000057c: 00e7a223 sw a4,4(a5)
80000580: 010787b3 add a5,a5,a6
80000584: 00b7a423 sw a1,8(a5)
80000588: 00000513 li a0,0
8000058c: 00008067 ret
80000590: 14c70793 addi a5,a4,332
80000594: 14f72423 sw a5,328(a4)
80000598: fa5ff06f j 8000053c <__register_exitproc+0x10>
8000059c: 18c7a683 lw a3,396(a5)
800005a0: 00170713 addi a4,a4,1
800005a4: 00e7a223 sw a4,4(a5)
800005a8: 00c6e633 or a2,a3,a2
800005ac: 18c7a623 sw a2,396(a5)
800005b0: 010787b3 add a5,a5,a6
800005b4: 00b7a423 sw a1,8(a5)
800005b8: 00000513 li a0,0
800005bc: 00008067 ret
800005c0: fff00513 li a0,-1
800005c4: 00008067 ret
800005c8 <__call_exitprocs>:
800005c8: fd010113 addi sp,sp,-48
800005cc: 800027b7 lui a5,0x80002
800005d0: 01412c23 sw s4,24(sp)
800005d4: b187aa03 lw s4,-1256(a5) # 80001b18 <__stack_top+0x81001b18>
800005d8: 03212023 sw s2,32(sp)
800005dc: 02112623 sw ra,44(sp)
800005e0: 148a2903 lw s2,328(s4)
800005e4: 02812423 sw s0,40(sp)
800005e8: 02912223 sw s1,36(sp)
800005ec: 01312e23 sw s3,28(sp)
800005f0: 01512a23 sw s5,20(sp)
800005f4: 01612823 sw s6,16(sp)
800005f8: 01712623 sw s7,12(sp)
800005fc: 01812423 sw s8,8(sp)
80000600: 04090063 beqz s2,80000640 <__call_exitprocs+0x78>
80000604: 00050b13 mv s6,a0
80000608: 00058b93 mv s7,a1
8000060c: 00100a93 li s5,1
80000610: fff00993 li s3,-1
80000614: 00492483 lw s1,4(s2)
80000618: fff48413 addi s0,s1,-1
8000061c: 02044263 bltz s0,80000640 <__call_exitprocs+0x78>
80000620: 00249493 slli s1,s1,0x2
80000624: 009904b3 add s1,s2,s1
80000628: 040b8463 beqz s7,80000670 <__call_exitprocs+0xa8>
8000062c: 1044a783 lw a5,260(s1)
80000630: 05778063 beq a5,s7,80000670 <__call_exitprocs+0xa8>
80000634: fff40413 addi s0,s0,-1
80000638: ffc48493 addi s1,s1,-4
8000063c: ff3416e3 bne s0,s3,80000628 <__call_exitprocs+0x60>
80000640: 02c12083 lw ra,44(sp)
80000644: 02812403 lw s0,40(sp)
80000648: 02412483 lw s1,36(sp)
8000064c: 02012903 lw s2,32(sp)
80000650: 01c12983 lw s3,28(sp)
80000654: 01812a03 lw s4,24(sp)
80000658: 01412a83 lw s5,20(sp)
8000065c: 01012b03 lw s6,16(sp)
80000660: 00c12b83 lw s7,12(sp)
80000664: 00812c03 lw s8,8(sp)
80000668: 03010113 addi sp,sp,48
8000066c: 00008067 ret
80000670: 00492783 lw a5,4(s2)
80000674: 0044a683 lw a3,4(s1)
80000678: fff78793 addi a5,a5,-1
8000067c: 04878e63 beq a5,s0,800006d8 <__call_exitprocs+0x110>
80000680: 0004a223 sw zero,4(s1)
80000684: fa0688e3 beqz a3,80000634 <__call_exitprocs+0x6c>
80000688: 18892783 lw a5,392(s2)
8000068c: 008a9733 sll a4,s5,s0
80000690: 00492c03 lw s8,4(s2)
80000694: 00f777b3 and a5,a4,a5
80000698: 02079263 bnez a5,800006bc <__call_exitprocs+0xf4>
8000069c: 000680e7 jalr a3
800006a0: 00492703 lw a4,4(s2)
800006a4: 148a2783 lw a5,328(s4)
800006a8: 01871463 bne a4,s8,800006b0 <__call_exitprocs+0xe8>
800006ac: f8f904e3 beq s2,a5,80000634 <__call_exitprocs+0x6c>
800006b0: f80788e3 beqz a5,80000640 <__call_exitprocs+0x78>
800006b4: 00078913 mv s2,a5
800006b8: f5dff06f j 80000614 <__call_exitprocs+0x4c>
800006bc: 18c92783 lw a5,396(s2)
800006c0: 0844a583 lw a1,132(s1)
800006c4: 00f77733 and a4,a4,a5
800006c8: 00071c63 bnez a4,800006e0 <__call_exitprocs+0x118>
800006cc: 000b0513 mv a0,s6
800006d0: 000680e7 jalr a3
800006d4: fcdff06f j 800006a0 <__call_exitprocs+0xd8>
800006d8: 00892223 sw s0,4(s2)
800006dc: fa9ff06f j 80000684 <__call_exitprocs+0xbc>
800006e0: 00058513 mv a0,a1
800006e4: 000680e7 jalr a3
800006e8: fb9ff06f j 800006a0 <__call_exitprocs+0xd8>
Disassembly of section .init_array:
800016ec <__init_array_start>:
800016ec: 009c addi a5,sp,64
800016ee: 8000 0x8000
Disassembly of section .data:
800016f0 <impure_data>:
800016f0: 0000 unimp
800016f2: 0000 unimp
800016f4: 19dc addi a5,sp,244
800016f6: 8000 0x8000
800016f8: 1a44 addi s1,sp,308
800016fa: 8000 0x8000
800016fc: 1aac addi a1,sp,376
800016fe: 8000 0x8000
...
80001798: 0001 nop
8000179a: 0000 unimp
8000179c: 0000 unimp
8000179e: 0000 unimp
800017a0: 330e fld ft6,224(sp)
800017a2: abcd j 80001d94 <__BSS_END__+0x278>
800017a4: 1234 addi a3,sp,296
800017a6: e66d bnez a2,80001890 <impure_data+0x1a0>
800017a8: deec sw a1,124(a3)
800017aa: 0005 c.nop 1
800017ac: 0000000b 0xb
...
Disassembly of section .sdata:
80001b18 <_global_impure_ptr>:
80001b18: 16f0 addi a2,sp,876
80001b1a: 8000 0x8000
Disassembly of section .comment:
00000000 <.comment>:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3920 fld fs0,112(a0)
c: 322e fld ft4,232(sp)
e: 302e fld ft0,232(sp)
...
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2541 jal 680 <__stack_size+0x280>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
18: 7032 flw ft0,44(sp)
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f

Binary file not shown.

View File

@@ -1,153 +0,0 @@
#include <iostream>
#include <assert.h>
#define NUM_CORES_MAX 32
#define MIN(a, b) ((a) < (b) ? (a) : (b))
struct context_t {
uint32_t num_groups[3];
uint32_t global_offset[3];
uint32_t local_size[3];
char * printf_buffer;
uint32_t *printf_buffer_position;
uint32_t printf_buffer_capacity;
uint32_t work_dim;
};
typedef void (*vx_pocl_workgroup_func) (
const void * /* args */,
const struct context_t * /* context */,
uint32_t /* group_x */,
uint32_t /* group_y */,
uint32_t /* group_z */
);
typedef struct {
struct context_t * ctx;
vx_pocl_workgroup_func pfn;
const void * args;
int offset;
int N;
int R;
} wspawn_args_t;
void kernel_spawn_callback(int core_id, int NW, int NT, int nW, wspawn_args_t* p_wspawn_args) {
assert(nW <= NW);
for (int wid = 0; wid < nW; ++wid) {
for (int tid = 0; tid < NT; ++tid) {
int wK = (p_wspawn_args->N * wid) + MIN(p_wspawn_args->R, wid);
int tK = p_wspawn_args->N + (wid < p_wspawn_args->R);
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
int X = p_wspawn_args->ctx->num_groups[0];
int Y = p_wspawn_args->ctx->num_groups[1];
int XY = X * Y;
for (int wg_id = offset, N = wg_id + tK; wg_id < N; ++wg_id) {
int k = wg_id / XY;
int wg_2d = wg_id - k * XY;
int j = wg_2d / X;
int i = wg_2d - j * X;
int gid0 = p_wspawn_args->ctx->global_offset[0] + i;
int gid1 = p_wspawn_args->ctx->global_offset[1] + j;
int gid2 = p_wspawn_args->ctx->global_offset[2] + k;
printf("c%d w%d t%d: g={%d, %d, %d}\n", core_id, wid, tid, gid0, gid1, gid2);
}
}
}
}
void kernel_spawn_remaining_callback(int core_id, int NW, int NT, int wid, int nT, wspawn_args_t* p_wspawn_args) {
assert(wid < NW);
assert(nT <= NT);
for (int t = 0; t < nT; ++t) {
int tid = core_id * NW * NT + wid * NT + t;
int wg_id = p_wspawn_args->offset + tid;
int X = p_wspawn_args->ctx->num_groups[0];
int Y = p_wspawn_args->ctx->num_groups[1];
int XY = X * Y;
int k = wg_id / XY;
int wg_2d = wg_id - k * XY;
int j = wg_2d / X;
int i = wg_2d - j * X;
int gid0 = p_wspawn_args->ctx->global_offset[0] + i;
int gid1 = p_wspawn_args->ctx->global_offset[1] + j;
int gid2 = p_wspawn_args->ctx->global_offset[2] + k;
printf("c%d w%d t%d: g={%d, %d, %d}\n", core_id, wid, tid, gid0, gid1, gid2);
}
}
void kernel_run_once(context_t* ctx, int NC, int NW, int NT, int core_id) {
// total number of WGs
int X = ctx->num_groups[0];
int Y = ctx->num_groups[1];
int Z = ctx->num_groups[2];
int Q = X * Y * Z;
// current core id
if (core_id >= NUM_CORES_MAX)
return;
// calculate necessary active cores
int WT = NW * NT;
int nC = (Q > WT) ? (Q / WT) : 1;
int nc = MIN(nC, NC);
if (core_id >= nc)
return; // terminate extra cores
// number of workgroups per core
int wgs_per_core = Q / nc;
int wgs_per_core0 = wgs_per_core;
if (core_id == (NC-1)) {
int QC_r = Q - (nc * wgs_per_core0);
wgs_per_core0 += QC_r; // last core executes remaining WGs
}
// number of workgroups per warp
int nW = wgs_per_core0 / NT; // total warps per core
int rT = wgs_per_core0 - (nW * NT); // remaining threads
int fW = (nW >= NW) ? (nW / NW) : 0; // full warps iterations
int rW = (fW != 0) ? (nW - fW * NW) : 0; // reamining full warps
if (0 == fW)
fW = 1;
//--
wspawn_args_t wspawn_args = { ctx, NULL, NULL, core_id * wgs_per_core, fW, rW };
//--
if (nW >= 1) {
int nw = MIN(nW, NW);
kernel_spawn_callback(core_id, NW, NT, nw, &wspawn_args);
}
//--
if (rT != 0) {
wspawn_args.offset = wgs_per_core0 - rT;
kernel_spawn_remaining_callback(core_id, NW, NT, 0, rT, &wspawn_args);
}
}
void kernel_run(int X, int Y, int Z, int NC, int NW, int NT) {
context_t ctx;
ctx.num_groups[0] = X;
ctx.num_groups[1] = Y;
ctx.num_groups[2] = Z;
ctx.global_offset[0] = 0;
ctx.global_offset[1] = 0;
ctx.global_offset[2] = 0;
for (int cid = 0; cid < NC; ++cid) {
kernel_run_once(&ctx, NC, NW, NT, cid);
}
exit (0);
}

View File

@@ -1,288 +0,0 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include <chrono>
#include "common.h"
#include "kernel_scheduler.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
int test = -1;
uint32_t count = 0;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-t testno][-k: kernel][-n words][-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 't':
test = atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (staging_buf) {
vx_buf_release(staging_buf);
}
if (device) {
vx_dev_close(device);
}
}
uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));;
}
int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
int errors = 0;
auto time_start = std::chrono::high_resolution_clock::now();
int num_blocks_8 = (64 * num_blocks) / 8;
// update source buffer
for (int i = 0; i < num_blocks_8; ++i) {
((uint64_t*)vx_host_ptr(staging_buf))[i] = shuffle(i, value);
}
/*for (int i = 0; i < num_blocks; ++i) {
std::cout << "data[" << i << "]=0x";
for (int j = 7; j >= 0; --j) {
std::cout << std::hex << ((uint64_t*)vx_host_ptr(staging_buf))[i * 8 +j];
}
std::cout << std::endl;
}*/
// write source buffer to local memory
std::cout << "write source buffer to local memory" << std::endl;
auto t0 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_copy_to_dev(staging_buf, dev_addr, 64 * num_blocks, 0));
auto t1 = std::chrono::high_resolution_clock::now();
// clear destination buffer
for (int i = 0; i < num_blocks_8; ++i) {
((uint64_t*)vx_host_ptr(staging_buf))[i] = 0;
}
// read destination buffer from local memory
std::cout << "read destination buffer from local memory" << std::endl;
auto t2 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_copy_from_dev(staging_buf, dev_addr, 64 * num_blocks, 0));
auto t3 = std::chrono::high_resolution_clock::now();
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < num_blocks_8; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(staging_buf))[i];
auto ref = shuffle(i, value);
if (curr != ref) {
std::cout << "error at 0x" << std::hex << (dev_addr + 8 * i)
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed;
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
printf("upload time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
printf("download time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Total elapsed time: %lg ms\n", elapsed);
return 0;
}
int run_kernel_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
int errors = 0;
auto time_start = std::chrono::high_resolution_clock::now();
// update source buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i;
}
}
std::cout << "upload source buffer" << std::endl;
auto t0 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
auto t1 = std::chrono::high_resolution_clock::now();
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0xdeadbeef;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
// start device
std::cout << "start execution" << std::endl;
auto t2 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_start(device));
RT_CHECK(vx_ready_wait(device, -1));
auto t3 = std::chrono::high_resolution_clock::now();
// read destination buffer from local memory
std::cout << "read destination buffer from local memory" << std::endl;
auto t4 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
auto t5 = std::chrono::high_resolution_clock::now();
// verify result
std::cout << "verify result" << std::endl;
for (uint32_t i = 0; i < num_points; ++i) {
int32_t curr = ((int32_t*)vx_host_ptr(staging_buf))[i];
int32_t ref = i;
if (curr != ref) {
std::cout << "error at result #" << std::dec << i
<< std::hex << ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed;
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
printf("upload time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
printf("execute time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t5 - t4).count();
printf("download time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Total elapsed time: %lg ms\n", elapsed);
return 0;
}
int main(int argc, char *argv[]) {
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
//kernel_run(count, 1, 1, test, 4, 4);
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
uint32_t num_points = 1 * count;
uint32_t num_blocks = (num_points * sizeof(int32_t) + 63) / 64;
uint32_t buf_size = num_blocks * 64;
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// allocate device memory
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.count = count;
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
// run tests
if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl;
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
}
if (1 == test || -1 == test) {
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (void*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
std::cout << "run kernel test" << std::endl;
RT_CHECK(run_kernel_test(kernel_arg, buf_size, num_points));
}
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "Test PASSED" << std::endl;
return 0;
}

View File

@@ -1,67 +0,0 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n64
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../include
PROJECT = demo
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT)
LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1,14 +0,0 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t num_tasks;
uint32_t task_size;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
};
#endif

Binary file not shown.

View File

@@ -1,23 +0,0 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
}
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
}

View File

@@ -1,695 +0,0 @@
kernel.elf: file format elf32-littleriscv
Disassembly of section .init:
80000000 <_start>:
80000000: 00000597 auipc a1,0x0
80000004: 0e458593 addi a1,a1,228 # 800000e4 <vx_set_sp>
80000008: fc102573 csrr a0,0xfc1
8000000c: 00b5106b 0xb5106b
80000010: 0d4000ef jal ra,800000e4 <vx_set_sp>
80000014: 00100513 li a0,1
80000018: 0005006b 0x5006b
8000001c: 00002517 auipc a0,0x2
80000020: d3050513 addi a0,a0,-720 # 80001d4c <g_wspawn_args>
80000024: 00002617 auipc a2,0x2
80000028: da860613 addi a2,a2,-600 # 80001dcc <__BSS_END__>
8000002c: 40a60633 sub a2,a2,a0
80000030: 00000593 li a1,0
80000034: 648000ef jal ra,8000067c <memset>
80000038: 00000517 auipc a0,0x0
8000003c: 54c50513 addi a0,a0,1356 # 80000584 <__libc_fini_array>
80000040: 4fc000ef jal ra,8000053c <atexit>
80000044: 59c000ef jal ra,800005e0 <__libc_init_array>
80000048: 008000ef jal ra,80000050 <main>
8000004c: 5040006f j 80000550 <exit>
Disassembly of section .text:
80000050 <main>:
80000050: 7ffff7b7 lui a5,0x7ffff
80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
80000058: 800005b7 lui a1,0x80000
8000005c: 7ffff637 lui a2,0x7ffff
80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080>
80000064: 17c0006f j 800001e0 <vx_spawn_tasks>
80000068 <register_fini>:
80000068: 00000793 li a5,0
8000006c: 00078863 beqz a5,8000007c <register_fini+0x14>
80000070: 80000537 lui a0,0x80000
80000074: 58450513 addi a0,a0,1412 # 80000584 <__stack_top+0x81000584>
80000078: 4c40006f j 8000053c <atexit>
8000007c: 00008067 ret
80000080 <kernel_body>:
80000080: 0045a683 lw a3,4(a1)
80000084: 0085a603 lw a2,8(a1)
80000088: 00c5a703 lw a4,12(a1)
8000008c: 02d50533 mul a0,a0,a3
80000090: 0105a803 lw a6,16(a1)
80000094: 04068063 beqz a3,800000d4 <kernel_body+0x54>
80000098: 00a686b3 add a3,a3,a0
8000009c: 00269693 slli a3,a3,0x2
800000a0: 00251513 slli a0,a0,0x2
800000a4: 00c507b3 add a5,a0,a2
800000a8: 00c686b3 add a3,a3,a2
800000ac: 40c80833 sub a6,a6,a2
800000b0: 40c70533 sub a0,a4,a2
800000b4: 00f50733 add a4,a0,a5
800000b8: 0007a583 lw a1,0(a5)
800000bc: 00072703 lw a4,0(a4)
800000c0: 00f80633 add a2,a6,a5
800000c4: 00478793 addi a5,a5,4
800000c8: 00b70733 add a4,a4,a1
800000cc: 00e62023 sw a4,0(a2) # 7ffff000 <__stack_size+0x7fffec00>
800000d0: fef692e3 bne a3,a5,800000b4 <kernel_body+0x34>
800000d4: 00008067 ret
800000d8 <_exit>:
800000d8: 250000ef jal ra,80000328 <vx_perf_dump>
800000dc: 00000513 li a0,0
800000e0: 0005006b 0x5006b
800000e4 <vx_set_sp>:
800000e4: fc002573 csrr a0,0xfc0
800000e8: 0005006b 0x5006b
800000ec: 00002197 auipc gp,0x2
800000f0: 03418193 addi gp,gp,52 # 80002120 <__global_pointer>
800000f4: 7f000117 auipc sp,0x7f000
800000f8: f0c10113 addi sp,sp,-244 # ff000000 <__stack_top>
800000fc: 40000593 li a1,1024
80000100: cc102673 csrr a2,0xcc1
80000104: 02c585b3 mul a1,a1,a2
80000108: 40b10133 sub sp,sp,a1
8000010c: cc3026f3 csrr a3,0xcc3
80000110: 00068663 beqz a3,8000011c <RETURN>
80000114: 00000513 li a0,0
80000118: 0005006b 0x5006b
8000011c <RETURN>:
8000011c: 00008067 ret
80000120 <spawn_tasks_callback>:
80000120: fe010113 addi sp,sp,-32
80000124: 00112e23 sw ra,28(sp)
80000128: 00812c23 sw s0,24(sp)
8000012c: 00912a23 sw s1,20(sp)
80000130: 01212823 sw s2,16(sp)
80000134: 01312623 sw s3,12(sp)
80000138: fc0027f3 csrr a5,0xfc0
8000013c: 0007806b 0x7806b
80000140: cc5026f3 csrr a3,0xcc5
80000144: cc3029f3 csrr s3,0xcc3
80000148: cc002773 csrr a4,0xcc0
8000014c: fc002673 csrr a2,0xfc0
80000150: 800027b7 lui a5,0x80002
80000154: 00269693 slli a3,a3,0x2
80000158: d4c78793 addi a5,a5,-692 # 80001d4c <__stack_top+0x81001d4c>
8000015c: 00d787b3 add a5,a5,a3
80000160: 0007a483 lw s1,0(a5)
80000164: 0104a403 lw s0,16(s1)
80000168: 00c4a683 lw a3,12(s1)
8000016c: 0089a933 slt s2,s3,s0
80000170: 00040793 mv a5,s0
80000174: 00d90933 add s2,s2,a3
80000178: 03368433 mul s0,a3,s3
8000017c: 00f9d463 bge s3,a5,80000184 <spawn_tasks_callback+0x64>
80000180: 00098793 mv a5,s3
80000184: 00f40433 add s0,s0,a5
80000188: 0084a683 lw a3,8(s1)
8000018c: 02c40433 mul s0,s0,a2
80000190: 02e907b3 mul a5,s2,a4
80000194: 00d40433 add s0,s0,a3
80000198: 00f40433 add s0,s0,a5
8000019c: 00890933 add s2,s2,s0
800001a0: 01245e63 bge s0,s2,800001bc <spawn_tasks_callback+0x9c>
800001a4: 0004a783 lw a5,0(s1)
800001a8: 0044a583 lw a1,4(s1)
800001ac: 00040513 mv a0,s0
800001b0: 00140413 addi s0,s0,1
800001b4: 000780e7 jalr a5
800001b8: fe8916e3 bne s2,s0,800001a4 <spawn_tasks_callback+0x84>
800001bc: 0019b993 seqz s3,s3
800001c0: 0009806b 0x9806b
800001c4: 01c12083 lw ra,28(sp)
800001c8: 01812403 lw s0,24(sp)
800001cc: 01412483 lw s1,20(sp)
800001d0: 01012903 lw s2,16(sp)
800001d4: 00c12983 lw s3,12(sp)
800001d8: 02010113 addi sp,sp,32
800001dc: 00008067 ret
800001e0 <vx_spawn_tasks>:
800001e0: fc010113 addi sp,sp,-64
800001e4: 02112e23 sw ra,60(sp)
800001e8: 02812c23 sw s0,56(sp)
800001ec: 02912a23 sw s1,52(sp)
800001f0: 03212823 sw s2,48(sp)
800001f4: 03312623 sw s3,44(sp)
800001f8: fc2026f3 csrr a3,0xfc2
800001fc: fc102873 csrr a6,0xfc1
80000200: fc002473 csrr s0,0xfc0
80000204: cc5027f3 csrr a5,0xcc5
80000208: 01f00713 li a4,31
8000020c: 0cf74463 blt a4,a5,800002d4 <vx_spawn_tasks+0xf4>
80000210: 030408b3 mul a7,s0,a6
80000214: 00100713 li a4,1
80000218: 00a8d463 bge a7,a0,80000220 <vx_spawn_tasks+0x40>
8000021c: 03154733 div a4,a0,a7
80000220: 0ce6c863 blt a3,a4,800002f0 <vx_spawn_tasks+0x110>
80000224: 0ae7d863 bge a5,a4,800002d4 <vx_spawn_tasks+0xf4>
80000228: fff68693 addi a3,a3,-1
8000022c: 02e54333 div t1,a0,a4
80000230: 00030893 mv a7,t1
80000234: 00f69663 bne a3,a5,80000240 <vx_spawn_tasks+0x60>
80000238: 02e56533 rem a0,a0,a4
8000023c: 006508b3 add a7,a0,t1
80000240: 0288c4b3 div s1,a7,s0
80000244: 0288e933 rem s2,a7,s0
80000248: 0b04ca63 blt s1,a6,800002fc <vx_spawn_tasks+0x11c>
8000024c: 00100693 li a3,1
80000250: 0304c733 div a4,s1,a6
80000254: 00070663 beqz a4,80000260 <vx_spawn_tasks+0x80>
80000258: 00070693 mv a3,a4
8000025c: 0304e733 rem a4,s1,a6
80000260: 800029b7 lui s3,0x80002
80000264: d4c98993 addi s3,s3,-692 # 80001d4c <__stack_top+0x81001d4c>
80000268: 00e12e23 sw a4,28(sp)
8000026c: 00c10713 addi a4,sp,12
80000270: 00b12623 sw a1,12(sp)
80000274: 00c12823 sw a2,16(sp)
80000278: 00d12c23 sw a3,24(sp)
8000027c: 02f30333 mul t1,t1,a5
80000280: 00279793 slli a5,a5,0x2
80000284: 00f987b3 add a5,s3,a5
80000288: 00e7a023 sw a4,0(a5)
8000028c: 00612a23 sw t1,20(sp)
80000290: 06904c63 bgtz s1,80000308 <vx_spawn_tasks+0x128>
80000294: 04090063 beqz s2,800002d4 <vx_spawn_tasks+0xf4>
80000298: 02848433 mul s0,s1,s0
8000029c: 00812a23 sw s0,20(sp)
800002a0: 0009006b 0x9006b
800002a4: cc5027f3 csrr a5,0xcc5
800002a8: cc202573 csrr a0,0xcc2
800002ac: 00279793 slli a5,a5,0x2
800002b0: 00f989b3 add s3,s3,a5
800002b4: 0009a783 lw a5,0(s3)
800002b8: 0087a683 lw a3,8(a5)
800002bc: 0007a703 lw a4,0(a5)
800002c0: 0047a583 lw a1,4(a5)
800002c4: 00d50533 add a0,a0,a3
800002c8: 000700e7 jalr a4
800002cc: 00100793 li a5,1
800002d0: 0007806b 0x7806b
800002d4: 03c12083 lw ra,60(sp)
800002d8: 03812403 lw s0,56(sp)
800002dc: 03412483 lw s1,52(sp)
800002e0: 03012903 lw s2,48(sp)
800002e4: 02c12983 lw s3,44(sp)
800002e8: 04010113 addi sp,sp,64
800002ec: 00008067 ret
800002f0: 00068713 mv a4,a3
800002f4: f2e7cae3 blt a5,a4,80000228 <vx_spawn_tasks+0x48>
800002f8: fddff06f j 800002d4 <vx_spawn_tasks+0xf4>
800002fc: 00000713 li a4,0
80000300: 00100693 li a3,1
80000304: f5dff06f j 80000260 <vx_spawn_tasks+0x80>
80000308: 00048713 mv a4,s1
8000030c: 00985463 bge a6,s1,80000314 <vx_spawn_tasks+0x134>
80000310: 00080713 mv a4,a6
80000314: 800007b7 lui a5,0x80000
80000318: 12078793 addi a5,a5,288 # 80000120 <__stack_top+0x81000120>
8000031c: 00f7106b 0xf7106b
80000320: e01ff0ef jal ra,80000120 <spawn_tasks_callback>
80000324: f71ff06f j 80000294 <vx_spawn_tasks+0xb4>
80000328 <vx_perf_dump>:
80000328: cc5027f3 csrr a5,0xcc5
8000032c: 00ff0737 lui a4,0xff0
80000330: 00e787b3 add a5,a5,a4
80000334: 00879793 slli a5,a5,0x8
80000338: b0002773 csrr a4,mcycle
8000033c: 00e7a023 sw a4,0(a5)
80000340: b0102773 csrr a4,0xb01
80000344: 00e7a223 sw a4,4(a5)
80000348: b0202773 csrr a4,minstret
8000034c: 00e7a423 sw a4,8(a5)
80000350: b0302773 csrr a4,mhpmcounter3
80000354: 00e7a623 sw a4,12(a5)
80000358: b0402773 csrr a4,mhpmcounter4
8000035c: 00e7a823 sw a4,16(a5)
80000360: b0502773 csrr a4,mhpmcounter5
80000364: 00e7aa23 sw a4,20(a5)
80000368: b0602773 csrr a4,mhpmcounter6
8000036c: 00e7ac23 sw a4,24(a5)
80000370: b0702773 csrr a4,mhpmcounter7
80000374: 00e7ae23 sw a4,28(a5)
80000378: b0802773 csrr a4,mhpmcounter8
8000037c: 02e7a023 sw a4,32(a5)
80000380: b0902773 csrr a4,mhpmcounter9
80000384: 02e7a223 sw a4,36(a5)
80000388: b0a02773 csrr a4,mhpmcounter10
8000038c: 02e7a423 sw a4,40(a5)
80000390: b0b02773 csrr a4,mhpmcounter11
80000394: 02e7a623 sw a4,44(a5)
80000398: b0c02773 csrr a4,mhpmcounter12
8000039c: 02e7a823 sw a4,48(a5)
800003a0: b0d02773 csrr a4,mhpmcounter13
800003a4: 02e7aa23 sw a4,52(a5)
800003a8: b0e02773 csrr a4,mhpmcounter14
800003ac: 02e7ac23 sw a4,56(a5)
800003b0: b0f02773 csrr a4,mhpmcounter15
800003b4: 02e7ae23 sw a4,60(a5)
800003b8: b1002773 csrr a4,mhpmcounter16
800003bc: 04e7a023 sw a4,64(a5)
800003c0: b1102773 csrr a4,mhpmcounter17
800003c4: 04e7a223 sw a4,68(a5)
800003c8: b1202773 csrr a4,mhpmcounter18
800003cc: 04e7a423 sw a4,72(a5)
800003d0: b1302773 csrr a4,mhpmcounter19
800003d4: 04e7a623 sw a4,76(a5)
800003d8: b1402773 csrr a4,mhpmcounter20
800003dc: 04e7a823 sw a4,80(a5)
800003e0: b1502773 csrr a4,mhpmcounter21
800003e4: 04e7aa23 sw a4,84(a5)
800003e8: b1602773 csrr a4,mhpmcounter22
800003ec: 04e7ac23 sw a4,88(a5)
800003f0: b1702773 csrr a4,mhpmcounter23
800003f4: 04e7ae23 sw a4,92(a5)
800003f8: b1802773 csrr a4,mhpmcounter24
800003fc: 06e7a023 sw a4,96(a5)
80000400: b1902773 csrr a4,mhpmcounter25
80000404: 06e7a223 sw a4,100(a5)
80000408: b1a02773 csrr a4,mhpmcounter26
8000040c: 06e7a423 sw a4,104(a5)
80000410: b1b02773 csrr a4,mhpmcounter27
80000414: 06e7a623 sw a4,108(a5)
80000418: b1c02773 csrr a4,mhpmcounter28
8000041c: 06e7a823 sw a4,112(a5)
80000420: b1d02773 csrr a4,mhpmcounter29
80000424: 06e7aa23 sw a4,116(a5)
80000428: b1e02773 csrr a4,mhpmcounter30
8000042c: 06e7ac23 sw a4,120(a5)
80000430: b1f02773 csrr a4,mhpmcounter31
80000434: 06e7ae23 sw a4,124(a5)
80000438: b8002773 csrr a4,mcycleh
8000043c: 08e7a023 sw a4,128(a5)
80000440: b8102773 csrr a4,0xb81
80000444: 08e7a223 sw a4,132(a5)
80000448: b8202773 csrr a4,minstreth
8000044c: 08e7a423 sw a4,136(a5)
80000450: b8302773 csrr a4,mhpmcounter3h
80000454: 08e7a623 sw a4,140(a5)
80000458: b8402773 csrr a4,mhpmcounter4h
8000045c: 08e7a823 sw a4,144(a5)
80000460: b8502773 csrr a4,mhpmcounter5h
80000464: 08e7aa23 sw a4,148(a5)
80000468: b8602773 csrr a4,mhpmcounter6h
8000046c: 08e7ac23 sw a4,152(a5)
80000470: b8702773 csrr a4,mhpmcounter7h
80000474: 08e7ae23 sw a4,156(a5)
80000478: b8802773 csrr a4,mhpmcounter8h
8000047c: 0ae7a023 sw a4,160(a5)
80000480: b8902773 csrr a4,mhpmcounter9h
80000484: 0ae7a223 sw a4,164(a5)
80000488: b8a02773 csrr a4,mhpmcounter10h
8000048c: 0ae7a423 sw a4,168(a5)
80000490: b8b02773 csrr a4,mhpmcounter11h
80000494: 0ae7a623 sw a4,172(a5)
80000498: b8c02773 csrr a4,mhpmcounter12h
8000049c: 0ae7a823 sw a4,176(a5)
800004a0: b8d02773 csrr a4,mhpmcounter13h
800004a4: 0ae7aa23 sw a4,180(a5)
800004a8: b8e02773 csrr a4,mhpmcounter14h
800004ac: 0ae7ac23 sw a4,184(a5)
800004b0: b8f02773 csrr a4,mhpmcounter15h
800004b4: 0ae7ae23 sw a4,188(a5)
800004b8: b9002773 csrr a4,mhpmcounter16h
800004bc: 0ce7a023 sw a4,192(a5)
800004c0: b9102773 csrr a4,mhpmcounter17h
800004c4: 0ce7a223 sw a4,196(a5)
800004c8: b9202773 csrr a4,mhpmcounter18h
800004cc: 0ce7a423 sw a4,200(a5)
800004d0: b9302773 csrr a4,mhpmcounter19h
800004d4: 0ce7a623 sw a4,204(a5)
800004d8: b9402773 csrr a4,mhpmcounter20h
800004dc: 0ce7a823 sw a4,208(a5)
800004e0: b9502773 csrr a4,mhpmcounter21h
800004e4: 0ce7aa23 sw a4,212(a5)
800004e8: b9602773 csrr a4,mhpmcounter22h
800004ec: 0ce7ac23 sw a4,216(a5)
800004f0: b9702773 csrr a4,mhpmcounter23h
800004f4: 0ce7ae23 sw a4,220(a5)
800004f8: b9802773 csrr a4,mhpmcounter24h
800004fc: 0ee7a023 sw a4,224(a5)
80000500: b9902773 csrr a4,mhpmcounter25h
80000504: 0ee7a223 sw a4,228(a5)
80000508: b9a02773 csrr a4,mhpmcounter26h
8000050c: 0ee7a423 sw a4,232(a5)
80000510: b9b02773 csrr a4,mhpmcounter27h
80000514: 0ee7a623 sw a4,236(a5)
80000518: b9c02773 csrr a4,mhpmcounter28h
8000051c: 0ee7a823 sw a4,240(a5)
80000520: b9d02773 csrr a4,mhpmcounter29h
80000524: 0ee7aa23 sw a4,244(a5)
80000528: b9e02773 csrr a4,mhpmcounter30h
8000052c: 0ee7ac23 sw a4,248(a5)
80000530: b9f02773 csrr a4,mhpmcounter31h
80000534: 0ee7ae23 sw a4,252(a5)
80000538: 00008067 ret
8000053c <atexit>:
8000053c: 00050593 mv a1,a0
80000540: 00000693 li a3,0
80000544: 00000613 li a2,0
80000548: 00000513 li a0,0
8000054c: 20c0006f j 80000758 <__register_exitproc>
80000550 <exit>:
80000550: ff010113 addi sp,sp,-16
80000554: 00000593 li a1,0
80000558: 00812423 sw s0,8(sp)
8000055c: 00112623 sw ra,12(sp)
80000560: 00050413 mv s0,a0
80000564: 290000ef jal ra,800007f4 <__call_exitprocs>
80000568: 800027b7 lui a5,0x80002
8000056c: d487a503 lw a0,-696(a5) # 80001d48 <__stack_top+0x81001d48>
80000570: 03c52783 lw a5,60(a0)
80000574: 00078463 beqz a5,8000057c <exit+0x2c>
80000578: 000780e7 jalr a5
8000057c: 00040513 mv a0,s0
80000580: b59ff0ef jal ra,800000d8 <_exit>
80000584 <__libc_fini_array>:
80000584: ff010113 addi sp,sp,-16
80000588: 00812423 sw s0,8(sp)
8000058c: 800027b7 lui a5,0x80002
80000590: 80002437 lui s0,0x80002
80000594: 91c40413 addi s0,s0,-1764 # 8000191c <__stack_top+0x8100191c>
80000598: 91c78793 addi a5,a5,-1764 # 8000191c <__stack_top+0x8100191c>
8000059c: 408787b3 sub a5,a5,s0
800005a0: 00912223 sw s1,4(sp)
800005a4: 00112623 sw ra,12(sp)
800005a8: 4027d493 srai s1,a5,0x2
800005ac: 02048063 beqz s1,800005cc <__libc_fini_array+0x48>
800005b0: ffc78793 addi a5,a5,-4
800005b4: 00878433 add s0,a5,s0
800005b8: 00042783 lw a5,0(s0)
800005bc: fff48493 addi s1,s1,-1
800005c0: ffc40413 addi s0,s0,-4
800005c4: 000780e7 jalr a5
800005c8: fe0498e3 bnez s1,800005b8 <__libc_fini_array+0x34>
800005cc: 00c12083 lw ra,12(sp)
800005d0: 00812403 lw s0,8(sp)
800005d4: 00412483 lw s1,4(sp)
800005d8: 01010113 addi sp,sp,16
800005dc: 00008067 ret
800005e0 <__libc_init_array>:
800005e0: ff010113 addi sp,sp,-16
800005e4: 00812423 sw s0,8(sp)
800005e8: 01212023 sw s2,0(sp)
800005ec: 80002437 lui s0,0x80002
800005f0: 80002937 lui s2,0x80002
800005f4: 91840793 addi a5,s0,-1768 # 80001918 <__stack_top+0x81001918>
800005f8: 91890913 addi s2,s2,-1768 # 80001918 <__stack_top+0x81001918>
800005fc: 40f90933 sub s2,s2,a5
80000600: 00112623 sw ra,12(sp)
80000604: 00912223 sw s1,4(sp)
80000608: 40295913 srai s2,s2,0x2
8000060c: 02090063 beqz s2,8000062c <__libc_init_array+0x4c>
80000610: 91840413 addi s0,s0,-1768
80000614: 00000493 li s1,0
80000618: 00042783 lw a5,0(s0)
8000061c: 00148493 addi s1,s1,1
80000620: 00440413 addi s0,s0,4
80000624: 000780e7 jalr a5
80000628: fe9918e3 bne s2,s1,80000618 <__libc_init_array+0x38>
8000062c: 80002437 lui s0,0x80002
80000630: 80002937 lui s2,0x80002
80000634: 91840793 addi a5,s0,-1768 # 80001918 <__stack_top+0x81001918>
80000638: 91c90913 addi s2,s2,-1764 # 8000191c <__stack_top+0x8100191c>
8000063c: 40f90933 sub s2,s2,a5
80000640: 40295913 srai s2,s2,0x2
80000644: 02090063 beqz s2,80000664 <__libc_init_array+0x84>
80000648: 91840413 addi s0,s0,-1768
8000064c: 00000493 li s1,0
80000650: 00042783 lw a5,0(s0)
80000654: 00148493 addi s1,s1,1
80000658: 00440413 addi s0,s0,4
8000065c: 000780e7 jalr a5
80000660: fe9918e3 bne s2,s1,80000650 <__libc_init_array+0x70>
80000664: 00c12083 lw ra,12(sp)
80000668: 00812403 lw s0,8(sp)
8000066c: 00412483 lw s1,4(sp)
80000670: 00012903 lw s2,0(sp)
80000674: 01010113 addi sp,sp,16
80000678: 00008067 ret
8000067c <memset>:
8000067c: 00f00313 li t1,15
80000680: 00050713 mv a4,a0
80000684: 02c37e63 bgeu t1,a2,800006c0 <memset+0x44>
80000688: 00f77793 andi a5,a4,15
8000068c: 0a079063 bnez a5,8000072c <memset+0xb0>
80000690: 08059263 bnez a1,80000714 <memset+0x98>
80000694: ff067693 andi a3,a2,-16
80000698: 00f67613 andi a2,a2,15
8000069c: 00e686b3 add a3,a3,a4
800006a0: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00>
800006a4: 00b72223 sw a1,4(a4)
800006a8: 00b72423 sw a1,8(a4)
800006ac: 00b72623 sw a1,12(a4)
800006b0: 01070713 addi a4,a4,16
800006b4: fed766e3 bltu a4,a3,800006a0 <memset+0x24>
800006b8: 00061463 bnez a2,800006c0 <memset+0x44>
800006bc: 00008067 ret
800006c0: 40c306b3 sub a3,t1,a2
800006c4: 00269693 slli a3,a3,0x2
800006c8: 00000297 auipc t0,0x0
800006cc: 005686b3 add a3,a3,t0
800006d0: 00c68067 jr 12(a3)
800006d4: 00b70723 sb a1,14(a4)
800006d8: 00b706a3 sb a1,13(a4)
800006dc: 00b70623 sb a1,12(a4)
800006e0: 00b705a3 sb a1,11(a4)
800006e4: 00b70523 sb a1,10(a4)
800006e8: 00b704a3 sb a1,9(a4)
800006ec: 00b70423 sb a1,8(a4)
800006f0: 00b703a3 sb a1,7(a4)
800006f4: 00b70323 sb a1,6(a4)
800006f8: 00b702a3 sb a1,5(a4)
800006fc: 00b70223 sb a1,4(a4)
80000700: 00b701a3 sb a1,3(a4)
80000704: 00b70123 sb a1,2(a4)
80000708: 00b700a3 sb a1,1(a4)
8000070c: 00b70023 sb a1,0(a4)
80000710: 00008067 ret
80000714: 0ff5f593 andi a1,a1,255
80000718: 00859693 slli a3,a1,0x8
8000071c: 00d5e5b3 or a1,a1,a3
80000720: 01059693 slli a3,a1,0x10
80000724: 00d5e5b3 or a1,a1,a3
80000728: f6dff06f j 80000694 <memset+0x18>
8000072c: 00279693 slli a3,a5,0x2
80000730: 00000297 auipc t0,0x0
80000734: 005686b3 add a3,a3,t0
80000738: 00008293 mv t0,ra
8000073c: fa0680e7 jalr -96(a3)
80000740: 00028093 mv ra,t0
80000744: ff078793 addi a5,a5,-16
80000748: 40f70733 sub a4,a4,a5
8000074c: 00f60633 add a2,a2,a5
80000750: f6c378e3 bgeu t1,a2,800006c0 <memset+0x44>
80000754: f3dff06f j 80000690 <memset+0x14>
80000758 <__register_exitproc>:
80000758: 800027b7 lui a5,0x80002
8000075c: d487a703 lw a4,-696(a5) # 80001d48 <__stack_top+0x81001d48>
80000760: 14872783 lw a5,328(a4)
80000764: 04078c63 beqz a5,800007bc <__register_exitproc+0x64>
80000768: 0047a703 lw a4,4(a5)
8000076c: 01f00813 li a6,31
80000770: 06e84e63 blt a6,a4,800007ec <__register_exitproc+0x94>
80000774: 00271813 slli a6,a4,0x2
80000778: 02050663 beqz a0,800007a4 <__register_exitproc+0x4c>
8000077c: 01078333 add t1,a5,a6
80000780: 08c32423 sw a2,136(t1)
80000784: 1887a883 lw a7,392(a5)
80000788: 00100613 li a2,1
8000078c: 00e61633 sll a2,a2,a4
80000790: 00c8e8b3 or a7,a7,a2
80000794: 1917a423 sw a7,392(a5)
80000798: 10d32423 sw a3,264(t1)
8000079c: 00200693 li a3,2
800007a0: 02d50463 beq a0,a3,800007c8 <__register_exitproc+0x70>
800007a4: 00170713 addi a4,a4,1
800007a8: 00e7a223 sw a4,4(a5)
800007ac: 010787b3 add a5,a5,a6
800007b0: 00b7a423 sw a1,8(a5)
800007b4: 00000513 li a0,0
800007b8: 00008067 ret
800007bc: 14c70793 addi a5,a4,332
800007c0: 14f72423 sw a5,328(a4)
800007c4: fa5ff06f j 80000768 <__register_exitproc+0x10>
800007c8: 18c7a683 lw a3,396(a5)
800007cc: 00170713 addi a4,a4,1
800007d0: 00e7a223 sw a4,4(a5)
800007d4: 00c6e633 or a2,a3,a2
800007d8: 18c7a623 sw a2,396(a5)
800007dc: 010787b3 add a5,a5,a6
800007e0: 00b7a423 sw a1,8(a5)
800007e4: 00000513 li a0,0
800007e8: 00008067 ret
800007ec: fff00513 li a0,-1
800007f0: 00008067 ret
800007f4 <__call_exitprocs>:
800007f4: fd010113 addi sp,sp,-48
800007f8: 800027b7 lui a5,0x80002
800007fc: 01412c23 sw s4,24(sp)
80000800: d487aa03 lw s4,-696(a5) # 80001d48 <__stack_top+0x81001d48>
80000804: 03212023 sw s2,32(sp)
80000808: 02112623 sw ra,44(sp)
8000080c: 148a2903 lw s2,328(s4)
80000810: 02812423 sw s0,40(sp)
80000814: 02912223 sw s1,36(sp)
80000818: 01312e23 sw s3,28(sp)
8000081c: 01512a23 sw s5,20(sp)
80000820: 01612823 sw s6,16(sp)
80000824: 01712623 sw s7,12(sp)
80000828: 01812423 sw s8,8(sp)
8000082c: 04090063 beqz s2,8000086c <__call_exitprocs+0x78>
80000830: 00050b13 mv s6,a0
80000834: 00058b93 mv s7,a1
80000838: 00100a93 li s5,1
8000083c: fff00993 li s3,-1
80000840: 00492483 lw s1,4(s2)
80000844: fff48413 addi s0,s1,-1
80000848: 02044263 bltz s0,8000086c <__call_exitprocs+0x78>
8000084c: 00249493 slli s1,s1,0x2
80000850: 009904b3 add s1,s2,s1
80000854: 040b8463 beqz s7,8000089c <__call_exitprocs+0xa8>
80000858: 1044a783 lw a5,260(s1)
8000085c: 05778063 beq a5,s7,8000089c <__call_exitprocs+0xa8>
80000860: fff40413 addi s0,s0,-1
80000864: ffc48493 addi s1,s1,-4
80000868: ff3416e3 bne s0,s3,80000854 <__call_exitprocs+0x60>
8000086c: 02c12083 lw ra,44(sp)
80000870: 02812403 lw s0,40(sp)
80000874: 02412483 lw s1,36(sp)
80000878: 02012903 lw s2,32(sp)
8000087c: 01c12983 lw s3,28(sp)
80000880: 01812a03 lw s4,24(sp)
80000884: 01412a83 lw s5,20(sp)
80000888: 01012b03 lw s6,16(sp)
8000088c: 00c12b83 lw s7,12(sp)
80000890: 00812c03 lw s8,8(sp)
80000894: 03010113 addi sp,sp,48
80000898: 00008067 ret
8000089c: 00492783 lw a5,4(s2)
800008a0: 0044a683 lw a3,4(s1)
800008a4: fff78793 addi a5,a5,-1
800008a8: 04878e63 beq a5,s0,80000904 <__call_exitprocs+0x110>
800008ac: 0004a223 sw zero,4(s1)
800008b0: fa0688e3 beqz a3,80000860 <__call_exitprocs+0x6c>
800008b4: 18892783 lw a5,392(s2)
800008b8: 008a9733 sll a4,s5,s0
800008bc: 00492c03 lw s8,4(s2)
800008c0: 00f777b3 and a5,a4,a5
800008c4: 02079263 bnez a5,800008e8 <__call_exitprocs+0xf4>
800008c8: 000680e7 jalr a3
800008cc: 00492703 lw a4,4(s2)
800008d0: 148a2783 lw a5,328(s4)
800008d4: 01871463 bne a4,s8,800008dc <__call_exitprocs+0xe8>
800008d8: f8f904e3 beq s2,a5,80000860 <__call_exitprocs+0x6c>
800008dc: f80788e3 beqz a5,8000086c <__call_exitprocs+0x78>
800008e0: 00078913 mv s2,a5
800008e4: f5dff06f j 80000840 <__call_exitprocs+0x4c>
800008e8: 18c92783 lw a5,396(s2)
800008ec: 0844a583 lw a1,132(s1)
800008f0: 00f77733 and a4,a4,a5
800008f4: 00071c63 bnez a4,8000090c <__call_exitprocs+0x118>
800008f8: 000b0513 mv a0,s6
800008fc: 000680e7 jalr a3
80000900: fcdff06f j 800008cc <__call_exitprocs+0xd8>
80000904: 00892223 sw s0,4(s2)
80000908: fa9ff06f j 800008b0 <__call_exitprocs+0xbc>
8000090c: 00058513 mv a0,a1
80000910: 000680e7 jalr a3
80000914: fb9ff06f j 800008cc <__call_exitprocs+0xd8>
Disassembly of section .init_array:
80001918 <__init_array_start>:
80001918: 0068 addi a0,sp,12
8000191a: 8000 0x8000
Disassembly of section .data:
80001920 <impure_data>:
80001920: 0000 unimp
80001922: 0000 unimp
80001924: 1c0c addi a1,sp,560
80001926: 8000 0x8000
80001928: 1c74 addi a3,sp,572
8000192a: 8000 0x8000
8000192c: 1cdc addi a5,sp,628
8000192e: 8000 0x8000
...
800019c8: 0001 nop
800019ca: 0000 unimp
800019cc: 0000 unimp
800019ce: 0000 unimp
800019d0: 330e fld ft6,224(sp)
800019d2: abcd j 80001fc4 <__BSS_END__+0x1f8>
800019d4: 1234 addi a3,sp,296
800019d6: e66d bnez a2,80001ac0 <impure_data+0x1a0>
800019d8: deec sw a1,124(a3)
800019da: 0005 c.nop 1
800019dc: 0000000b 0xb
...
Disassembly of section .sdata:
80001d48 <_global_impure_ptr>:
80001d48: 1920 addi s0,sp,184
80001d4a: 8000 0x8000
Disassembly of section .bss:
80001d4c <g_wspawn_args>:
...
Disassembly of section .comment:
00000000 <.comment>:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3920 fld fs0,112(a0)
c: 322e fld ft4,232(sp)
e: 302e fld ft0,232(sp)
...
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2541 jal 680 <__stack_size+0x280>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
18: 7032 flw ft0,44(sp)
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f

Binary file not shown.

View File

@@ -1,202 +0,0 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include "common.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
uint32_t count = 0;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (staging_buf) {
vx_buf_release(staging_buf);
}
if (device) {
vx_dev_close(device);
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = i + i;
int cur = buf_ptr[i];
if (cur != ref) {
std::cout << "error at result #" << std::dec << i
<< std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
}
return 0;
}
int main(int argc, char *argv[]) {
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads;
uint32_t num_points = count * num_tasks;
uint32_t buf_size = num_points * sizeof(int32_t);
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src0_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src1_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer0
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i-1;
}
}
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
// upload source buffer1
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i+1;
}
}
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0xdeadbeef;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, buf_size, num_points));
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "PASSED!" << std::endl;
return 0;
}

View File

@@ -1,69 +0,0 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n64
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_LDFLAGS += -lm
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../include -I../../../hw
PROJECT = dogfood
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -d -r -t kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1,15 +0,0 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t testid;
uint32_t num_tasks;
uint32_t task_size;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
};
#endif

Binary file not shown.

View File

@@ -1,353 +0,0 @@
#include <stdint.h>
#include <math.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
typedef void (*PFN_Kernel)(int task_id, void* arg);
inline float __ieee754_sqrtf (float x) {
asm ("fsqrt.s %0, %1" : "=f" (x) : "f" (x));
return x;
}
void kernel_iadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
int32_t a = src0_ptr[offset+i];
int32_t b = src1_ptr[offset+i];
int32_t c = a + b;
dst_ptr[offset+i] = c;
}
}
void kernel_imul(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
int32_t a = src0_ptr[offset+i];
int32_t b = src1_ptr[offset+i];
int32_t c = a * b;
dst_ptr[offset+i] = c;
}
}
void kernel_idiv(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
int32_t a = src0_ptr[offset+i];
int32_t b = src1_ptr[offset+i];
int32_t c = a / b;
dst_ptr[offset+i] = c;
}
}
void kernel_idiv_mul(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
int32_t a = src0_ptr[offset+i];
int32_t b = src1_ptr[offset+i];
int32_t c = a / b;
int32_t d = a * b;
int32_t e = c + d;
dst_ptr[offset+i] = e;
}
}
void kernel_fadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a + b;
dst_ptr[offset+i] = c;
}
}
void kernel_fsub(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a - b;
dst_ptr[offset+i] = c;
}
}
void kernel_fmul(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a * b;
dst_ptr[offset+i] = c;
}
}
void kernel_fmadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a * b + b;
dst_ptr[offset+i] = c;
}
}
void kernel_fmsub(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a * b - b;
dst_ptr[offset+i] = c;
}
}
void kernel_fnmadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c =-a * b - b;
dst_ptr[offset+i] = c;
}
}
void kernel_fnmsub(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c =-a * b + b;
dst_ptr[offset+i] = c;
}
}
void kernel_fnmadd_madd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c =-a * b - b;
float d = a * b + b;
float e = c + d;
dst_ptr[offset+i] = e;
}
}
void kernel_fdiv(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a / b;
dst_ptr[offset+i] = c;
}
}
void kernel_fdiv2(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a / b;
float d = b / a;
float e = c + d;
dst_ptr[offset+i] = e;
}
}
void kernel_fsqrt(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = __ieee754_sqrtf(a * b);
dst_ptr[offset+i] = c;
}
}
void kernel_ftoi(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a + b;
int32_t d = (int32_t)c;
dst_ptr[offset+i] = d;
}
}
void kernel_ftou(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = a + b;
uint32_t d = (uint32_t)c;
dst_ptr[offset+i] = d;
}
}
void kernel_itof(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
int32_t a = src0_ptr[offset+i];
int32_t b = src1_ptr[offset+i];
int32_t c = a + b;
float d = (float)c;
dst_ptr[offset+i] = d;
}
}
void kernel_utof(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
int32_t a = src0_ptr[offset+i];
int32_t b = src1_ptr[offset+i];
uint32_t c = a + b;
float d = (float)c;
dst_ptr[offset+i] = d;
}
}
static const PFN_Kernel sc_tests[] = {
kernel_iadd,
kernel_imul,
kernel_idiv,
kernel_idiv_mul,
kernel_fadd,
kernel_fsub,
kernel_fmul,
kernel_fmadd,
kernel_fmsub,
kernel_fnmadd,
kernel_fnmsub,
kernel_fnmadd_madd,
kernel_fdiv,
kernel_fdiv2,
kernel_fsqrt,
kernel_ftoi,
kernel_ftou,
kernel_itof,
kernel_utof,
};
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, sc_tests[arg->testid], arg);
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,278 +0,0 @@
#include <iostream>
#include <vector>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include <VX_config.h>
#include "testcases.h"
#include "common.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
class TestMngr {
public:
TestMngr() {
this->add_test("iadd", new Test_IADD());
this->add_test("imul", new Test_IMUL());
this->add_test("idiv", new Test_IDIV());
this->add_test("idiv-mul", new Test_IDIV_MUL());
#ifdef EXT_F_ENABLE
this->add_test("fadd", new Test_FADD());
this->add_test("fsub", new Test_FSUB());
this->add_test("fmul", new Test_FMUL());
this->add_test("fmadd", new Test_FMADD());
this->add_test("fmsub", new Test_FMSUB());
this->add_test("fnmadd", new Test_FNMADD());
this->add_test("fnmsub", new Test_FNMSUB());
this->add_test("fnmadd-madd", new Test_FNMADD_MADD());
this->add_test("fdiv", new Test_FDIV());
this->add_test("fdiv2", new Test_FDIV2());
this->add_test("fsqrt", new Test_FSQRT());
this->add_test("ftoi", new Test_FTOI());
this->add_test("ftou", new Test_FTOU());
this->add_test("itof", new Test_ITOF());
this->add_test("utof", new Test_UTOF());
#endif
}
~TestMngr() {
for (size_t i = 0; i < _tests.size(); ++i) {
delete _tests[i];
}
}
const std::string& get_name(int testid) const {
return _names.at(testid);
}
ITestCase* get_test(int testid) const {
return _tests.at(testid);
}
void add_test(const char* name, ITestCase* test) {
_names.push_back(name);
_tests.push_back(test);
}
size_t size() const {
return _tests.size();
}
private:
std::vector<std::string> _names;
std::vector<ITestCase*> _tests;
};
///////////////////////////////////////////////////////////////////////////////
TestMngr testMngr;
const char* kernel_file = "kernel.bin";
int count = 0;
int testid_s = 0;
int testid_e = (testMngr.size() - 1);
bool stop_on_error = true;
vx_device_h device = nullptr;
vx_buffer_h arg_buf = nullptr;
vx_buffer_h src1_buf = nullptr;
vx_buffer_h src2_buf = nullptr;
vx_buffer_h dst_buf = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-t:testid] [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:t:s:e:k:ch?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 't':
testid_s = atoi(optarg);
testid_e = atoi(optarg);
break;
case 's':
testid_s = atoi(optarg);
break;
case 'e':
testid_e = atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'c':
stop_on_error = false;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (arg_buf) {
vx_buf_release(arg_buf);
}
if (src1_buf) {
vx_buf_release(src1_buf);
}
if (src2_buf) {
vx_buf_release(src2_buf);
}
if (dst_buf) {
vx_buf_release(dst_buf);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int exitcode = 0;
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
std::cout << std::dec;
std::cout << "test ids: " << testid_s << " - " << testid_e << std::endl;
std::cout << "workitem size: " << count << std::endl;
std::cout << "using kernel: " << kernel_file << std::endl;
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
int num_tasks = max_cores * max_warps * max_threads;
int num_points = count * num_tasks;
size_t buf_size = num_points * sizeof(uint32_t);
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// upload program
std::cout << "upload kernel" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src0_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src1_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
for (int t = testid_s; t <= testid_e; ++t) {
auto name = testMngr.get_name(t);
auto test = testMngr.get_test(t);
std::cout << "Test" << t << ": " << name << std::endl;
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
kernel_arg.testid = t;
memcpy((void*)vx_host_ptr(arg_buf), &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(arg_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
// get test arguments
std::cout << "get test arguments" << std::endl;
test->setup(num_points, (void*)vx_host_ptr(src1_buf), (void*)vx_host_ptr(src2_buf));
// upload source buffer0
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
// upload source buffer1
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
// clear destination buffer
std::cout << "clear destination buffer" << std::endl;
for (int i = 0; i < num_points; ++i) {
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
}
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
// verify destination
std::cout << "verify test result" << std::endl;
int errors = test->verify(num_points,
(void*)vx_host_ptr(dst_buf),
(void*)vx_host_ptr(src1_buf),
(void*)vx_host_ptr(src2_buf));
if (errors != 0) {
std::cout << "found " << std::dec << errors << " errors!" << std::endl;
std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
if (stop_on_error) {
cleanup();
exit(1);
}
exitcode = 1;
} else {
std::cout << "Test" << t << "-" << name << " PASSED!" << std::endl << std::flush;
}
}
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
return exitcode;
}

View File

@@ -1,600 +0,0 @@
#pragma once
#include <iostream>
#include <math.h>
#include <limits>
union Float_t {
float f;
int i;
struct {
uint32_t man : 23;
uint32_t exp : 8;
uint32_t sign : 1;
} parts;
};
inline float fround(float x, int32_t precision = 8) {
auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10;
}
inline bool almost_equal_eps(float a, float b, int ulp = 128) {
auto eps = std::numeric_limits<float>::epsilon() * (std::max(fabs(a), fabs(b)) * ulp);
auto d = fabs(a - b);
if (d > eps) {
std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl;
return false;
}
return true;
}
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) {
Float_t fa{a}, fb{b};
auto d = std::abs(fa.i - fb.i);
if (d > ulp) {
std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false;
}
return true;
}
inline bool almost_equal(float a, float b) {
if (a == b)
return true;
/*if (almost_equal_eps(a, b))
return true;*/
return almost_equal_ulp(a, b);
}
class ITestCase {
public:
ITestCase() {}
virtual ~ITestCase() {}
virtual void setup(int n, void* src1, void* src2) = 0;
virtual int verify(int n, void* dst, const void* src1, const void* src2) = 0;
};
class Test_IADD : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
for (int i = 0; i < n; ++i) {
a[i] = n/2 - i;
b[i] = n/2 + i;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
auto c = (int32_t*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] + b[i];
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_IMUL : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
for (int i = 0; i < n; ++i) {
a[i] = n/2 - i;
b[i] = n/2 + i;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
auto c = (int32_t*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] * b[i];
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_IDIV : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
for (int i = 0; i < n; ++i) {
a[i] = n/2 - i;
b[i] = n/2 + i;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
auto c = (int32_t*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] / b[i];
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_IDIV_MUL : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
for (int i = 0; i < n; ++i) {
a[i] = n/2 - i;
b[i] = n/2 + i;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
auto c = (int32_t*)dst;
for (int i = 0; i < n; ++i) {
auto x = a[i] / b[i];
auto y = a[i] * b[i];
auto ref = x + y;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FADD : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] + b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FSUB : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] - b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FMUL : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] * b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FMADD : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] * b[i] + b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FMSUB : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] * b[i] - b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FNMADD : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = -a[i] * b[i] - b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FNMSUB : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = -a[i] * b[i] + b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FNMADD_MADD : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto x = -a[i] * b[i] - b[i];
auto y = a[i] * b[i] + b[i];
auto ref = x + y;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FDIV : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = a[i] / b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FDIV2 : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto x = a[i] / b[i];
auto y = b[i] / a[i];
auto ref = x + y;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FSQRT : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
float q = 1.0f + (i % 64);
a[i] = q;
b[i] = q;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto ref = sqrt(a[i] * b[i]);
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FTOI : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround((n/2 - i) + (float(i)/n));
b[i] = fround((n/2 - i) + (float(i)/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (int32_t*)dst;
for (int i = 0; i < n; ++i) {
auto x = a[i] + b[i];
auto ref = (int32_t)x;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_FTOU : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = fround(i + (float(i)/n));
b[i] = fround(i + (float(i)/n));
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (float*)src1;
auto b = (float*)src2;
auto c = (uint32_t*)dst;
for (int i = 0; i < n; ++i) {
auto x = a[i] + b[i];
auto ref = (uint32_t)x;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_ITOF : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
for (int i = 0; i < n; ++i) {
a[i] = n/2 - i;
b[i] = n/2 - i;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (int32_t*)src1;
auto b = (int32_t*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto x = a[i] + b[i];
auto ref = (float)x;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};
class Test_UTOF : public ITestCase {
public:
void setup(int n, void* src1, void* src2) override {
auto a = (uint32_t*)src1;
auto b = (uint32_t*)src2;
for (int i = 0; i < n; ++i) {
a[i] = i;
b[i] = i;
}
}
int verify(int n, void* dst, const void* src1, const void* src2) override {
int errors = 0;
auto a = (uint32_t*)src1;
auto b = (uint32_t*)src2;
auto c = (float*)dst;
for (int i = 0; i < n; ++i) {
auto x = a[i] + b[i];
auto ref = (float)x;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
return errors;
}
};

View File

@@ -1,67 +0,0 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n1
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../include -I$(VORTEX_RT_PATH)/../hw
PROJECT = io_addr
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT)
LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1,12 +0,0 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t num_points;
uint32_t src_ptr;
uint32_t dst_ptr;
};
#endif

Binary file not shown.

View File

@@ -1,19 +0,0 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t* src_ptr = (uint32_t*)_arg->src_ptr;
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]);
dst_ptr[task_id] = *addr_ptr;
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
}

View File

@@ -1,682 +0,0 @@
kernel.elf: file format elf32-littleriscv
Disassembly of section .init:
80000000 <_start>:
80000000: 00000597 auipc a1,0x0
80000004: 0b058593 addi a1,a1,176 # 800000b0 <vx_set_sp>
80000008: fc102573 csrr a0,0xfc1
8000000c: 00b5106b 0xb5106b
80000010: 0a0000ef jal ra,800000b0 <vx_set_sp>
80000014: 00100513 li a0,1
80000018: 0005006b 0x5006b
8000001c: 00002517 auipc a0,0x2
80000020: cf850513 addi a0,a0,-776 # 80001d14 <g_wspawn_args>
80000024: 00002617 auipc a2,0x2
80000028: d7060613 addi a2,a2,-656 # 80001d94 <__BSS_END__>
8000002c: 40a60633 sub a2,a2,a0
80000030: 00000593 li a1,0
80000034: 614000ef jal ra,80000648 <memset>
80000038: 00000517 auipc a0,0x0
8000003c: 51850513 addi a0,a0,1304 # 80000550 <__libc_fini_array>
80000040: 4c8000ef jal ra,80000508 <atexit>
80000044: 568000ef jal ra,800005ac <__libc_init_array>
80000048: 008000ef jal ra,80000050 <main>
8000004c: 4d00006f j 8000051c <exit>
Disassembly of section .text:
80000050 <main>:
80000050: 7ffff7b7 lui a5,0x7ffff
80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
80000058: 800005b7 lui a1,0x80000
8000005c: 7ffff637 lui a2,0x7ffff
80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080>
80000064: 1480006f j 800001ac <vx_spawn_tasks>
80000068 <register_fini>:
80000068: 00000793 li a5,0
8000006c: 00078863 beqz a5,8000007c <register_fini+0x14>
80000070: 80000537 lui a0,0x80000
80000074: 55050513 addi a0,a0,1360 # 80000550 <__stack_top+0x81000550>
80000078: 4900006f j 80000508 <atexit>
8000007c: 00008067 ret
80000080 <kernel_body>:
80000080: 0045a783 lw a5,4(a1)
80000084: 00251513 slli a0,a0,0x2
80000088: 00a787b3 add a5,a5,a0
8000008c: 0007a703 lw a4,0(a5)
80000090: 0085a783 lw a5,8(a1)
80000094: 00072703 lw a4,0(a4)
80000098: 00a78533 add a0,a5,a0
8000009c: 00e52023 sw a4,0(a0)
800000a0: 00008067 ret
800000a4 <_exit>:
800000a4: 250000ef jal ra,800002f4 <vx_perf_dump>
800000a8: 00000513 li a0,0
800000ac: 0005006b 0x5006b
800000b0 <vx_set_sp>:
800000b0: fc002573 csrr a0,0xfc0
800000b4: 0005006b 0x5006b
800000b8: 00002197 auipc gp,0x2
800000bc: 03018193 addi gp,gp,48 # 800020e8 <__global_pointer>
800000c0: 7f000117 auipc sp,0x7f000
800000c4: f4010113 addi sp,sp,-192 # ff000000 <__stack_top>
800000c8: 40000593 li a1,1024
800000cc: cc102673 csrr a2,0xcc1
800000d0: 02c585b3 mul a1,a1,a2
800000d4: 40b10133 sub sp,sp,a1
800000d8: cc3026f3 csrr a3,0xcc3
800000dc: 00068663 beqz a3,800000e8 <RETURN>
800000e0: 00000513 li a0,0
800000e4: 0005006b 0x5006b
800000e8 <RETURN>:
800000e8: 00008067 ret
800000ec <spawn_tasks_callback>:
800000ec: fe010113 addi sp,sp,-32
800000f0: 00112e23 sw ra,28(sp)
800000f4: 00812c23 sw s0,24(sp)
800000f8: 00912a23 sw s1,20(sp)
800000fc: 01212823 sw s2,16(sp)
80000100: 01312623 sw s3,12(sp)
80000104: fc0027f3 csrr a5,0xfc0
80000108: 0007806b 0x7806b
8000010c: cc5026f3 csrr a3,0xcc5
80000110: cc3029f3 csrr s3,0xcc3
80000114: cc002773 csrr a4,0xcc0
80000118: fc002673 csrr a2,0xfc0
8000011c: 800027b7 lui a5,0x80002
80000120: 00269693 slli a3,a3,0x2
80000124: d1478793 addi a5,a5,-748 # 80001d14 <__stack_top+0x81001d14>
80000128: 00d787b3 add a5,a5,a3
8000012c: 0007a483 lw s1,0(a5)
80000130: 0104a403 lw s0,16(s1)
80000134: 00c4a683 lw a3,12(s1)
80000138: 0089a933 slt s2,s3,s0
8000013c: 00040793 mv a5,s0
80000140: 00d90933 add s2,s2,a3
80000144: 03368433 mul s0,a3,s3
80000148: 00f9d463 bge s3,a5,80000150 <spawn_tasks_callback+0x64>
8000014c: 00098793 mv a5,s3
80000150: 00f40433 add s0,s0,a5
80000154: 0084a683 lw a3,8(s1)
80000158: 02c40433 mul s0,s0,a2
8000015c: 02e907b3 mul a5,s2,a4
80000160: 00d40433 add s0,s0,a3
80000164: 00f40433 add s0,s0,a5
80000168: 00890933 add s2,s2,s0
8000016c: 01245e63 bge s0,s2,80000188 <spawn_tasks_callback+0x9c>
80000170: 0004a783 lw a5,0(s1)
80000174: 0044a583 lw a1,4(s1)
80000178: 00040513 mv a0,s0
8000017c: 00140413 addi s0,s0,1
80000180: 000780e7 jalr a5
80000184: fe8916e3 bne s2,s0,80000170 <spawn_tasks_callback+0x84>
80000188: 0019b993 seqz s3,s3
8000018c: 0009806b 0x9806b
80000190: 01c12083 lw ra,28(sp)
80000194: 01812403 lw s0,24(sp)
80000198: 01412483 lw s1,20(sp)
8000019c: 01012903 lw s2,16(sp)
800001a0: 00c12983 lw s3,12(sp)
800001a4: 02010113 addi sp,sp,32
800001a8: 00008067 ret
800001ac <vx_spawn_tasks>:
800001ac: fc010113 addi sp,sp,-64
800001b0: 02112e23 sw ra,60(sp)
800001b4: 02812c23 sw s0,56(sp)
800001b8: 02912a23 sw s1,52(sp)
800001bc: 03212823 sw s2,48(sp)
800001c0: 03312623 sw s3,44(sp)
800001c4: fc2026f3 csrr a3,0xfc2
800001c8: fc102873 csrr a6,0xfc1
800001cc: fc002473 csrr s0,0xfc0
800001d0: cc5027f3 csrr a5,0xcc5
800001d4: 01f00713 li a4,31
800001d8: 0cf74463 blt a4,a5,800002a0 <vx_spawn_tasks+0xf4>
800001dc: 030408b3 mul a7,s0,a6
800001e0: 00100713 li a4,1
800001e4: 00a8d463 bge a7,a0,800001ec <vx_spawn_tasks+0x40>
800001e8: 03154733 div a4,a0,a7
800001ec: 0ce6c863 blt a3,a4,800002bc <vx_spawn_tasks+0x110>
800001f0: 0ae7d863 bge a5,a4,800002a0 <vx_spawn_tasks+0xf4>
800001f4: fff68693 addi a3,a3,-1
800001f8: 02e54333 div t1,a0,a4
800001fc: 00030893 mv a7,t1
80000200: 00f69663 bne a3,a5,8000020c <vx_spawn_tasks+0x60>
80000204: 02e56533 rem a0,a0,a4
80000208: 006508b3 add a7,a0,t1
8000020c: 0288c4b3 div s1,a7,s0
80000210: 0288e933 rem s2,a7,s0
80000214: 0b04ca63 blt s1,a6,800002c8 <vx_spawn_tasks+0x11c>
80000218: 00100693 li a3,1
8000021c: 0304c733 div a4,s1,a6
80000220: 00070663 beqz a4,8000022c <vx_spawn_tasks+0x80>
80000224: 00070693 mv a3,a4
80000228: 0304e733 rem a4,s1,a6
8000022c: 800029b7 lui s3,0x80002
80000230: d1498993 addi s3,s3,-748 # 80001d14 <__stack_top+0x81001d14>
80000234: 00e12e23 sw a4,28(sp)
80000238: 00c10713 addi a4,sp,12
8000023c: 00b12623 sw a1,12(sp)
80000240: 00c12823 sw a2,16(sp)
80000244: 00d12c23 sw a3,24(sp)
80000248: 02f30333 mul t1,t1,a5
8000024c: 00279793 slli a5,a5,0x2
80000250: 00f987b3 add a5,s3,a5
80000254: 00e7a023 sw a4,0(a5)
80000258: 00612a23 sw t1,20(sp)
8000025c: 06904c63 bgtz s1,800002d4 <vx_spawn_tasks+0x128>
80000260: 04090063 beqz s2,800002a0 <vx_spawn_tasks+0xf4>
80000264: 02848433 mul s0,s1,s0
80000268: 00812a23 sw s0,20(sp)
8000026c: 0009006b 0x9006b
80000270: cc5027f3 csrr a5,0xcc5
80000274: cc202573 csrr a0,0xcc2
80000278: 00279793 slli a5,a5,0x2
8000027c: 00f989b3 add s3,s3,a5
80000280: 0009a783 lw a5,0(s3)
80000284: 0087a683 lw a3,8(a5)
80000288: 0007a703 lw a4,0(a5)
8000028c: 0047a583 lw a1,4(a5)
80000290: 00d50533 add a0,a0,a3
80000294: 000700e7 jalr a4
80000298: 00100793 li a5,1
8000029c: 0007806b 0x7806b
800002a0: 03c12083 lw ra,60(sp)
800002a4: 03812403 lw s0,56(sp)
800002a8: 03412483 lw s1,52(sp)
800002ac: 03012903 lw s2,48(sp)
800002b0: 02c12983 lw s3,44(sp)
800002b4: 04010113 addi sp,sp,64
800002b8: 00008067 ret
800002bc: 00068713 mv a4,a3
800002c0: f2e7cae3 blt a5,a4,800001f4 <vx_spawn_tasks+0x48>
800002c4: fddff06f j 800002a0 <vx_spawn_tasks+0xf4>
800002c8: 00000713 li a4,0
800002cc: 00100693 li a3,1
800002d0: f5dff06f j 8000022c <vx_spawn_tasks+0x80>
800002d4: 00048713 mv a4,s1
800002d8: 00985463 bge a6,s1,800002e0 <vx_spawn_tasks+0x134>
800002dc: 00080713 mv a4,a6
800002e0: 800007b7 lui a5,0x80000
800002e4: 0ec78793 addi a5,a5,236 # 800000ec <__stack_top+0x810000ec>
800002e8: 00f7106b 0xf7106b
800002ec: e01ff0ef jal ra,800000ec <spawn_tasks_callback>
800002f0: f71ff06f j 80000260 <vx_spawn_tasks+0xb4>
800002f4 <vx_perf_dump>:
800002f4: cc5027f3 csrr a5,0xcc5
800002f8: 00ff0737 lui a4,0xff0
800002fc: 00e787b3 add a5,a5,a4
80000300: 00879793 slli a5,a5,0x8
80000304: b0002773 csrr a4,mcycle
80000308: 00e7a023 sw a4,0(a5)
8000030c: b0102773 csrr a4,0xb01
80000310: 00e7a223 sw a4,4(a5)
80000314: b0202773 csrr a4,minstret
80000318: 00e7a423 sw a4,8(a5)
8000031c: b0302773 csrr a4,mhpmcounter3
80000320: 00e7a623 sw a4,12(a5)
80000324: b0402773 csrr a4,mhpmcounter4
80000328: 00e7a823 sw a4,16(a5)
8000032c: b0502773 csrr a4,mhpmcounter5
80000330: 00e7aa23 sw a4,20(a5)
80000334: b0602773 csrr a4,mhpmcounter6
80000338: 00e7ac23 sw a4,24(a5)
8000033c: b0702773 csrr a4,mhpmcounter7
80000340: 00e7ae23 sw a4,28(a5)
80000344: b0802773 csrr a4,mhpmcounter8
80000348: 02e7a023 sw a4,32(a5)
8000034c: b0902773 csrr a4,mhpmcounter9
80000350: 02e7a223 sw a4,36(a5)
80000354: b0a02773 csrr a4,mhpmcounter10
80000358: 02e7a423 sw a4,40(a5)
8000035c: b0b02773 csrr a4,mhpmcounter11
80000360: 02e7a623 sw a4,44(a5)
80000364: b0c02773 csrr a4,mhpmcounter12
80000368: 02e7a823 sw a4,48(a5)
8000036c: b0d02773 csrr a4,mhpmcounter13
80000370: 02e7aa23 sw a4,52(a5)
80000374: b0e02773 csrr a4,mhpmcounter14
80000378: 02e7ac23 sw a4,56(a5)
8000037c: b0f02773 csrr a4,mhpmcounter15
80000380: 02e7ae23 sw a4,60(a5)
80000384: b1002773 csrr a4,mhpmcounter16
80000388: 04e7a023 sw a4,64(a5)
8000038c: b1102773 csrr a4,mhpmcounter17
80000390: 04e7a223 sw a4,68(a5)
80000394: b1202773 csrr a4,mhpmcounter18
80000398: 04e7a423 sw a4,72(a5)
8000039c: b1302773 csrr a4,mhpmcounter19
800003a0: 04e7a623 sw a4,76(a5)
800003a4: b1402773 csrr a4,mhpmcounter20
800003a8: 04e7a823 sw a4,80(a5)
800003ac: b1502773 csrr a4,mhpmcounter21
800003b0: 04e7aa23 sw a4,84(a5)
800003b4: b1602773 csrr a4,mhpmcounter22
800003b8: 04e7ac23 sw a4,88(a5)
800003bc: b1702773 csrr a4,mhpmcounter23
800003c0: 04e7ae23 sw a4,92(a5)
800003c4: b1802773 csrr a4,mhpmcounter24
800003c8: 06e7a023 sw a4,96(a5)
800003cc: b1902773 csrr a4,mhpmcounter25
800003d0: 06e7a223 sw a4,100(a5)
800003d4: b1a02773 csrr a4,mhpmcounter26
800003d8: 06e7a423 sw a4,104(a5)
800003dc: b1b02773 csrr a4,mhpmcounter27
800003e0: 06e7a623 sw a4,108(a5)
800003e4: b1c02773 csrr a4,mhpmcounter28
800003e8: 06e7a823 sw a4,112(a5)
800003ec: b1d02773 csrr a4,mhpmcounter29
800003f0: 06e7aa23 sw a4,116(a5)
800003f4: b1e02773 csrr a4,mhpmcounter30
800003f8: 06e7ac23 sw a4,120(a5)
800003fc: b1f02773 csrr a4,mhpmcounter31
80000400: 06e7ae23 sw a4,124(a5)
80000404: b8002773 csrr a4,mcycleh
80000408: 08e7a023 sw a4,128(a5)
8000040c: b8102773 csrr a4,0xb81
80000410: 08e7a223 sw a4,132(a5)
80000414: b8202773 csrr a4,minstreth
80000418: 08e7a423 sw a4,136(a5)
8000041c: b8302773 csrr a4,mhpmcounter3h
80000420: 08e7a623 sw a4,140(a5)
80000424: b8402773 csrr a4,mhpmcounter4h
80000428: 08e7a823 sw a4,144(a5)
8000042c: b8502773 csrr a4,mhpmcounter5h
80000430: 08e7aa23 sw a4,148(a5)
80000434: b8602773 csrr a4,mhpmcounter6h
80000438: 08e7ac23 sw a4,152(a5)
8000043c: b8702773 csrr a4,mhpmcounter7h
80000440: 08e7ae23 sw a4,156(a5)
80000444: b8802773 csrr a4,mhpmcounter8h
80000448: 0ae7a023 sw a4,160(a5)
8000044c: b8902773 csrr a4,mhpmcounter9h
80000450: 0ae7a223 sw a4,164(a5)
80000454: b8a02773 csrr a4,mhpmcounter10h
80000458: 0ae7a423 sw a4,168(a5)
8000045c: b8b02773 csrr a4,mhpmcounter11h
80000460: 0ae7a623 sw a4,172(a5)
80000464: b8c02773 csrr a4,mhpmcounter12h
80000468: 0ae7a823 sw a4,176(a5)
8000046c: b8d02773 csrr a4,mhpmcounter13h
80000470: 0ae7aa23 sw a4,180(a5)
80000474: b8e02773 csrr a4,mhpmcounter14h
80000478: 0ae7ac23 sw a4,184(a5)
8000047c: b8f02773 csrr a4,mhpmcounter15h
80000480: 0ae7ae23 sw a4,188(a5)
80000484: b9002773 csrr a4,mhpmcounter16h
80000488: 0ce7a023 sw a4,192(a5)
8000048c: b9102773 csrr a4,mhpmcounter17h
80000490: 0ce7a223 sw a4,196(a5)
80000494: b9202773 csrr a4,mhpmcounter18h
80000498: 0ce7a423 sw a4,200(a5)
8000049c: b9302773 csrr a4,mhpmcounter19h
800004a0: 0ce7a623 sw a4,204(a5)
800004a4: b9402773 csrr a4,mhpmcounter20h
800004a8: 0ce7a823 sw a4,208(a5)
800004ac: b9502773 csrr a4,mhpmcounter21h
800004b0: 0ce7aa23 sw a4,212(a5)
800004b4: b9602773 csrr a4,mhpmcounter22h
800004b8: 0ce7ac23 sw a4,216(a5)
800004bc: b9702773 csrr a4,mhpmcounter23h
800004c0: 0ce7ae23 sw a4,220(a5)
800004c4: b9802773 csrr a4,mhpmcounter24h
800004c8: 0ee7a023 sw a4,224(a5)
800004cc: b9902773 csrr a4,mhpmcounter25h
800004d0: 0ee7a223 sw a4,228(a5)
800004d4: b9a02773 csrr a4,mhpmcounter26h
800004d8: 0ee7a423 sw a4,232(a5)
800004dc: b9b02773 csrr a4,mhpmcounter27h
800004e0: 0ee7a623 sw a4,236(a5)
800004e4: b9c02773 csrr a4,mhpmcounter28h
800004e8: 0ee7a823 sw a4,240(a5)
800004ec: b9d02773 csrr a4,mhpmcounter29h
800004f0: 0ee7aa23 sw a4,244(a5)
800004f4: b9e02773 csrr a4,mhpmcounter30h
800004f8: 0ee7ac23 sw a4,248(a5)
800004fc: b9f02773 csrr a4,mhpmcounter31h
80000500: 0ee7ae23 sw a4,252(a5)
80000504: 00008067 ret
80000508 <atexit>:
80000508: 00050593 mv a1,a0
8000050c: 00000693 li a3,0
80000510: 00000613 li a2,0
80000514: 00000513 li a0,0
80000518: 20c0006f j 80000724 <__register_exitproc>
8000051c <exit>:
8000051c: ff010113 addi sp,sp,-16
80000520: 00000593 li a1,0
80000524: 00812423 sw s0,8(sp)
80000528: 00112623 sw ra,12(sp)
8000052c: 00050413 mv s0,a0
80000530: 290000ef jal ra,800007c0 <__call_exitprocs>
80000534: 800027b7 lui a5,0x80002
80000538: d107a503 lw a0,-752(a5) # 80001d10 <__stack_top+0x81001d10>
8000053c: 03c52783 lw a5,60(a0)
80000540: 00078463 beqz a5,80000548 <exit+0x2c>
80000544: 000780e7 jalr a5
80000548: 00040513 mv a0,s0
8000054c: b59ff0ef jal ra,800000a4 <_exit>
80000550 <__libc_fini_array>:
80000550: ff010113 addi sp,sp,-16
80000554: 00812423 sw s0,8(sp)
80000558: 800027b7 lui a5,0x80002
8000055c: 80002437 lui s0,0x80002
80000560: 8e840413 addi s0,s0,-1816 # 800018e8 <__stack_top+0x810018e8>
80000564: 8e878793 addi a5,a5,-1816 # 800018e8 <__stack_top+0x810018e8>
80000568: 408787b3 sub a5,a5,s0
8000056c: 00912223 sw s1,4(sp)
80000570: 00112623 sw ra,12(sp)
80000574: 4027d493 srai s1,a5,0x2
80000578: 02048063 beqz s1,80000598 <__libc_fini_array+0x48>
8000057c: ffc78793 addi a5,a5,-4
80000580: 00878433 add s0,a5,s0
80000584: 00042783 lw a5,0(s0)
80000588: fff48493 addi s1,s1,-1
8000058c: ffc40413 addi s0,s0,-4
80000590: 000780e7 jalr a5
80000594: fe0498e3 bnez s1,80000584 <__libc_fini_array+0x34>
80000598: 00c12083 lw ra,12(sp)
8000059c: 00812403 lw s0,8(sp)
800005a0: 00412483 lw s1,4(sp)
800005a4: 01010113 addi sp,sp,16
800005a8: 00008067 ret
800005ac <__libc_init_array>:
800005ac: ff010113 addi sp,sp,-16
800005b0: 00812423 sw s0,8(sp)
800005b4: 01212023 sw s2,0(sp)
800005b8: 80002437 lui s0,0x80002
800005bc: 80002937 lui s2,0x80002
800005c0: 8e440793 addi a5,s0,-1820 # 800018e4 <__stack_top+0x810018e4>
800005c4: 8e490913 addi s2,s2,-1820 # 800018e4 <__stack_top+0x810018e4>
800005c8: 40f90933 sub s2,s2,a5
800005cc: 00112623 sw ra,12(sp)
800005d0: 00912223 sw s1,4(sp)
800005d4: 40295913 srai s2,s2,0x2
800005d8: 02090063 beqz s2,800005f8 <__libc_init_array+0x4c>
800005dc: 8e440413 addi s0,s0,-1820
800005e0: 00000493 li s1,0
800005e4: 00042783 lw a5,0(s0)
800005e8: 00148493 addi s1,s1,1
800005ec: 00440413 addi s0,s0,4
800005f0: 000780e7 jalr a5
800005f4: fe9918e3 bne s2,s1,800005e4 <__libc_init_array+0x38>
800005f8: 80002437 lui s0,0x80002
800005fc: 80002937 lui s2,0x80002
80000600: 8e440793 addi a5,s0,-1820 # 800018e4 <__stack_top+0x810018e4>
80000604: 8e890913 addi s2,s2,-1816 # 800018e8 <__stack_top+0x810018e8>
80000608: 40f90933 sub s2,s2,a5
8000060c: 40295913 srai s2,s2,0x2
80000610: 02090063 beqz s2,80000630 <__libc_init_array+0x84>
80000614: 8e440413 addi s0,s0,-1820
80000618: 00000493 li s1,0
8000061c: 00042783 lw a5,0(s0)
80000620: 00148493 addi s1,s1,1
80000624: 00440413 addi s0,s0,4
80000628: 000780e7 jalr a5
8000062c: fe9918e3 bne s2,s1,8000061c <__libc_init_array+0x70>
80000630: 00c12083 lw ra,12(sp)
80000634: 00812403 lw s0,8(sp)
80000638: 00412483 lw s1,4(sp)
8000063c: 00012903 lw s2,0(sp)
80000640: 01010113 addi sp,sp,16
80000644: 00008067 ret
80000648 <memset>:
80000648: 00f00313 li t1,15
8000064c: 00050713 mv a4,a0
80000650: 02c37e63 bgeu t1,a2,8000068c <memset+0x44>
80000654: 00f77793 andi a5,a4,15
80000658: 0a079063 bnez a5,800006f8 <memset+0xb0>
8000065c: 08059263 bnez a1,800006e0 <memset+0x98>
80000660: ff067693 andi a3,a2,-16
80000664: 00f67613 andi a2,a2,15
80000668: 00e686b3 add a3,a3,a4
8000066c: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00>
80000670: 00b72223 sw a1,4(a4)
80000674: 00b72423 sw a1,8(a4)
80000678: 00b72623 sw a1,12(a4)
8000067c: 01070713 addi a4,a4,16
80000680: fed766e3 bltu a4,a3,8000066c <memset+0x24>
80000684: 00061463 bnez a2,8000068c <memset+0x44>
80000688: 00008067 ret
8000068c: 40c306b3 sub a3,t1,a2
80000690: 00269693 slli a3,a3,0x2
80000694: 00000297 auipc t0,0x0
80000698: 005686b3 add a3,a3,t0
8000069c: 00c68067 jr 12(a3)
800006a0: 00b70723 sb a1,14(a4)
800006a4: 00b706a3 sb a1,13(a4)
800006a8: 00b70623 sb a1,12(a4)
800006ac: 00b705a3 sb a1,11(a4)
800006b0: 00b70523 sb a1,10(a4)
800006b4: 00b704a3 sb a1,9(a4)
800006b8: 00b70423 sb a1,8(a4)
800006bc: 00b703a3 sb a1,7(a4)
800006c0: 00b70323 sb a1,6(a4)
800006c4: 00b702a3 sb a1,5(a4)
800006c8: 00b70223 sb a1,4(a4)
800006cc: 00b701a3 sb a1,3(a4)
800006d0: 00b70123 sb a1,2(a4)
800006d4: 00b700a3 sb a1,1(a4)
800006d8: 00b70023 sb a1,0(a4)
800006dc: 00008067 ret
800006e0: 0ff5f593 andi a1,a1,255
800006e4: 00859693 slli a3,a1,0x8
800006e8: 00d5e5b3 or a1,a1,a3
800006ec: 01059693 slli a3,a1,0x10
800006f0: 00d5e5b3 or a1,a1,a3
800006f4: f6dff06f j 80000660 <memset+0x18>
800006f8: 00279693 slli a3,a5,0x2
800006fc: 00000297 auipc t0,0x0
80000700: 005686b3 add a3,a3,t0
80000704: 00008293 mv t0,ra
80000708: fa0680e7 jalr -96(a3)
8000070c: 00028093 mv ra,t0
80000710: ff078793 addi a5,a5,-16
80000714: 40f70733 sub a4,a4,a5
80000718: 00f60633 add a2,a2,a5
8000071c: f6c378e3 bgeu t1,a2,8000068c <memset+0x44>
80000720: f3dff06f j 8000065c <memset+0x14>
80000724 <__register_exitproc>:
80000724: 800027b7 lui a5,0x80002
80000728: d107a703 lw a4,-752(a5) # 80001d10 <__stack_top+0x81001d10>
8000072c: 14872783 lw a5,328(a4)
80000730: 04078c63 beqz a5,80000788 <__register_exitproc+0x64>
80000734: 0047a703 lw a4,4(a5)
80000738: 01f00813 li a6,31
8000073c: 06e84e63 blt a6,a4,800007b8 <__register_exitproc+0x94>
80000740: 00271813 slli a6,a4,0x2
80000744: 02050663 beqz a0,80000770 <__register_exitproc+0x4c>
80000748: 01078333 add t1,a5,a6
8000074c: 08c32423 sw a2,136(t1)
80000750: 1887a883 lw a7,392(a5)
80000754: 00100613 li a2,1
80000758: 00e61633 sll a2,a2,a4
8000075c: 00c8e8b3 or a7,a7,a2
80000760: 1917a423 sw a7,392(a5)
80000764: 10d32423 sw a3,264(t1)
80000768: 00200693 li a3,2
8000076c: 02d50463 beq a0,a3,80000794 <__register_exitproc+0x70>
80000770: 00170713 addi a4,a4,1
80000774: 00e7a223 sw a4,4(a5)
80000778: 010787b3 add a5,a5,a6
8000077c: 00b7a423 sw a1,8(a5)
80000780: 00000513 li a0,0
80000784: 00008067 ret
80000788: 14c70793 addi a5,a4,332
8000078c: 14f72423 sw a5,328(a4)
80000790: fa5ff06f j 80000734 <__register_exitproc+0x10>
80000794: 18c7a683 lw a3,396(a5)
80000798: 00170713 addi a4,a4,1
8000079c: 00e7a223 sw a4,4(a5)
800007a0: 00c6e633 or a2,a3,a2
800007a4: 18c7a623 sw a2,396(a5)
800007a8: 010787b3 add a5,a5,a6
800007ac: 00b7a423 sw a1,8(a5)
800007b0: 00000513 li a0,0
800007b4: 00008067 ret
800007b8: fff00513 li a0,-1
800007bc: 00008067 ret
800007c0 <__call_exitprocs>:
800007c0: fd010113 addi sp,sp,-48
800007c4: 800027b7 lui a5,0x80002
800007c8: 01412c23 sw s4,24(sp)
800007cc: d107aa03 lw s4,-752(a5) # 80001d10 <__stack_top+0x81001d10>
800007d0: 03212023 sw s2,32(sp)
800007d4: 02112623 sw ra,44(sp)
800007d8: 148a2903 lw s2,328(s4)
800007dc: 02812423 sw s0,40(sp)
800007e0: 02912223 sw s1,36(sp)
800007e4: 01312e23 sw s3,28(sp)
800007e8: 01512a23 sw s5,20(sp)
800007ec: 01612823 sw s6,16(sp)
800007f0: 01712623 sw s7,12(sp)
800007f4: 01812423 sw s8,8(sp)
800007f8: 04090063 beqz s2,80000838 <__call_exitprocs+0x78>
800007fc: 00050b13 mv s6,a0
80000800: 00058b93 mv s7,a1
80000804: 00100a93 li s5,1
80000808: fff00993 li s3,-1
8000080c: 00492483 lw s1,4(s2)
80000810: fff48413 addi s0,s1,-1
80000814: 02044263 bltz s0,80000838 <__call_exitprocs+0x78>
80000818: 00249493 slli s1,s1,0x2
8000081c: 009904b3 add s1,s2,s1
80000820: 040b8463 beqz s7,80000868 <__call_exitprocs+0xa8>
80000824: 1044a783 lw a5,260(s1)
80000828: 05778063 beq a5,s7,80000868 <__call_exitprocs+0xa8>
8000082c: fff40413 addi s0,s0,-1
80000830: ffc48493 addi s1,s1,-4
80000834: ff3416e3 bne s0,s3,80000820 <__call_exitprocs+0x60>
80000838: 02c12083 lw ra,44(sp)
8000083c: 02812403 lw s0,40(sp)
80000840: 02412483 lw s1,36(sp)
80000844: 02012903 lw s2,32(sp)
80000848: 01c12983 lw s3,28(sp)
8000084c: 01812a03 lw s4,24(sp)
80000850: 01412a83 lw s5,20(sp)
80000854: 01012b03 lw s6,16(sp)
80000858: 00c12b83 lw s7,12(sp)
8000085c: 00812c03 lw s8,8(sp)
80000860: 03010113 addi sp,sp,48
80000864: 00008067 ret
80000868: 00492783 lw a5,4(s2)
8000086c: 0044a683 lw a3,4(s1)
80000870: fff78793 addi a5,a5,-1
80000874: 04878e63 beq a5,s0,800008d0 <__call_exitprocs+0x110>
80000878: 0004a223 sw zero,4(s1)
8000087c: fa0688e3 beqz a3,8000082c <__call_exitprocs+0x6c>
80000880: 18892783 lw a5,392(s2)
80000884: 008a9733 sll a4,s5,s0
80000888: 00492c03 lw s8,4(s2)
8000088c: 00f777b3 and a5,a4,a5
80000890: 02079263 bnez a5,800008b4 <__call_exitprocs+0xf4>
80000894: 000680e7 jalr a3
80000898: 00492703 lw a4,4(s2)
8000089c: 148a2783 lw a5,328(s4)
800008a0: 01871463 bne a4,s8,800008a8 <__call_exitprocs+0xe8>
800008a4: f8f904e3 beq s2,a5,8000082c <__call_exitprocs+0x6c>
800008a8: f80788e3 beqz a5,80000838 <__call_exitprocs+0x78>
800008ac: 00078913 mv s2,a5
800008b0: f5dff06f j 8000080c <__call_exitprocs+0x4c>
800008b4: 18c92783 lw a5,396(s2)
800008b8: 0844a583 lw a1,132(s1)
800008bc: 00f77733 and a4,a4,a5
800008c0: 00071c63 bnez a4,800008d8 <__call_exitprocs+0x118>
800008c4: 000b0513 mv a0,s6
800008c8: 000680e7 jalr a3
800008cc: fcdff06f j 80000898 <__call_exitprocs+0xd8>
800008d0: 00892223 sw s0,4(s2)
800008d4: fa9ff06f j 8000087c <__call_exitprocs+0xbc>
800008d8: 00058513 mv a0,a1
800008dc: 000680e7 jalr a3
800008e0: fb9ff06f j 80000898 <__call_exitprocs+0xd8>
Disassembly of section .init_array:
800018e4 <__init_array_start>:
800018e4: 0068 addi a0,sp,12
800018e6: 8000 0x8000
Disassembly of section .data:
800018e8 <impure_data>:
800018e8: 0000 unimp
800018ea: 0000 unimp
800018ec: 1bd4 addi a3,sp,500
800018ee: 8000 0x8000
800018f0: 1c3c addi a5,sp,568
800018f2: 8000 0x8000
800018f4: 1ca4 addi s1,sp,632
800018f6: 8000 0x8000
...
80001990: 0001 nop
80001992: 0000 unimp
80001994: 0000 unimp
80001996: 0000 unimp
80001998: 330e fld ft6,224(sp)
8000199a: abcd j 80001f8c <__BSS_END__+0x1f8>
8000199c: 1234 addi a3,sp,296
8000199e: e66d bnez a2,80001a88 <impure_data+0x1a0>
800019a0: deec sw a1,124(a3)
800019a2: 0005 c.nop 1
800019a4: 0000000b 0xb
...
Disassembly of section .sdata:
80001d10 <_global_impure_ptr>:
80001d10: 18e8 addi a0,sp,124
80001d12: 8000 0x8000
Disassembly of section .bss:
80001d14 <g_wspawn_args>:
...
Disassembly of section .comment:
00000000 <.comment>:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3920 fld fs0,112(a0)
c: 322e fld ft4,232(sp)
e: 302e fld ft0,232(sp)
...
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2541 jal 680 <__stack_size+0x280>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
18: 7032 flw ft0,44(sp)
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f

Binary file not shown.

View File

@@ -1,246 +0,0 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include <vector>
#include <VX_config.h>
#include "common.h"
#define NUM_ADDRS 16
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
uint32_t count = 0;
size_t usr_test_mem;
std::vector<uint32_t> src_data;
std::vector<int32_t> ref_data;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (staging_buf) {
vx_buf_release(staging_buf);
}
if (device) {
vx_dev_close(device);
}
}
void gen_input_data(uint32_t num_points) {
src_data.resize(num_points);
uint32_t u = 0, k = 0;
for (uint32_t i = 0; i < num_points; ++i) {
if (0 ==(i % 4)) {
k = (i + u) % NUM_ADDRS;
++u;
}
uint32_t j = i % NUM_ADDRS;
uint32_t v = ((j == k) ? usr_test_mem : IO_BASE_ADDR) + j * sizeof(uint32_t);
src_data[i] = v;
std::cout << std::dec << i << "," << k << ": value=0x" << std::hex << v << std::endl;
}
}
void gen_ref_data(uint32_t num_points) {
ref_data.resize(num_points);
for (uint32_t i = 0; i < num_points; ++i) {
uint32_t j = i % NUM_ADDRS;
ref_data[i] = j * j;
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = ref_data.at(i);
int cur = buf_ptr[i];
if (cur != ref) {
std::cout << "error at result #" << std::dec << i
<< std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
}
return 0;
}
int main(int argc, char *argv[]) {
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
std::srand(50);
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads;
uint32_t num_points = count * num_tasks;
RT_CHECK(vx_alloc_dev_mem(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem));
// generate input data
gen_input_data(num_points);
// generate reference data
gen_ref_data(num_points);
uint32_t src_buf_size = src_data.size() * sizeof(int32_t);
uint32_t dst_buf_size = src_data.size() * sizeof(int32_t);
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl;
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
kernel_arg.src_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.num_points = num_points;
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
std::max<uint32_t>(dst_buf_size,
sizeof(kernel_arg_t)));
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload test address data
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < NUM_ADDRS; ++i) {
buf_ptr[i] = i * i;
}
}
RT_CHECK(vx_copy_to_dev(staging_buf, 0xFF000000, NUM_ADDRS * sizeof(uint32_t), 0));
RT_CHECK(vx_copy_to_dev(staging_buf, usr_test_mem, NUM_ADDRS * sizeof(uint32_t), 0));
// upload source buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = src_data.at(i);
}
}
std::cout << "upload source buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0xdeadbeef;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points));
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "PASSED!" << std::endl;
return 0;
}

View File

@@ -1,67 +0,0 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n64
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../include
PROJECT = mstress
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT)
LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1,17 +0,0 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
#define NUM_LOADS 8
struct kernel_arg_t {
uint32_t num_tasks;
uint32_t size;
uint32_t stride;
uint32_t addr_ptr;
uint32_t src_ptr;
uint32_t dst_ptr;
};
#endif

Binary file not shown.

View File

@@ -1,29 +0,0 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t stride = _arg->stride;
uint32_t* addr_ptr = (uint32_t*)_arg->addr_ptr;
float* src_ptr = (float*)_arg->src_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
uint32_t offset = task_id * stride;
for (uint32_t i = 0; i < stride; ++i) {
float value = 0.0f;
for (uint32_t j = 0; j < NUM_LOADS; ++j) {
uint32_t addr = offset + i + j;
uint32_t index = addr_ptr[addr];
value *= src_ptr[index];
}
dst_ptr[offset+i] = value;
}
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
}

View File

@@ -1,731 +0,0 @@
kernel.elf: file format elf32-littleriscv
Disassembly of section .init:
80000000 <_start>:
80000000: 00000597 auipc a1,0x0
80000004: 17458593 addi a1,a1,372 # 80000174 <vx_set_sp>
80000008: fc102573 csrr a0,0xfc1
8000000c: 00b5106b 0xb5106b
80000010: 164000ef jal ra,80000174 <vx_set_sp>
80000014: 00100513 li a0,1
80000018: 0005006b 0x5006b
8000001c: 00002517 auipc a0,0x2
80000020: dc050513 addi a0,a0,-576 # 80001ddc <g_wspawn_args>
80000024: 00002617 auipc a2,0x2
80000028: e3860613 addi a2,a2,-456 # 80001e5c <__BSS_END__>
8000002c: 40a60633 sub a2,a2,a0
80000030: 00000593 li a1,0
80000034: 6d8000ef jal ra,8000070c <memset>
80000038: 00000517 auipc a0,0x0
8000003c: 5dc50513 addi a0,a0,1500 # 80000614 <__libc_fini_array>
80000040: 58c000ef jal ra,800005cc <atexit>
80000044: 62c000ef jal ra,80000670 <__libc_init_array>
80000048: 008000ef jal ra,80000050 <main>
8000004c: 5940006f j 800005e0 <exit>
Disassembly of section .text:
80000050 <main>:
80000050: 7ffff7b7 lui a5,0x7ffff
80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
80000058: 800005b7 lui a1,0x80000
8000005c: 7ffff637 lui a2,0x7ffff
80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080>
80000064: 20c0006f j 80000270 <vx_spawn_tasks>
80000068 <register_fini>:
80000068: 00000793 li a5,0
8000006c: 00078863 beqz a5,8000007c <register_fini+0x14>
80000070: 80000537 lui a0,0x80000
80000074: 61450513 addi a0,a0,1556 # 80000614 <__stack_top+0x81000614>
80000078: 5540006f j 800005cc <atexit>
8000007c: 00008067 ret
80000080 <kernel_body>:
80000080: 0085a783 lw a5,8(a1)
80000084: 00c5a603 lw a2,12(a1)
80000088: 0105a703 lw a4,16(a1)
8000008c: 02f506b3 mul a3,a0,a5
80000090: 0145a883 lw a7,20(a1)
80000094: 0c078863 beqz a5,80000164 <kernel_body+0xe4>
80000098: 00d78833 add a6,a5,a3
8000009c: f0000653 fmv.w.x fa2,zero
800000a0: 00269693 slli a3,a3,0x2
800000a4: 00281813 slli a6,a6,0x2
800000a8: 00c686b3 add a3,a3,a2
800000ac: 00c80833 add a6,a6,a2
800000b0: 40c888b3 sub a7,a7,a2
800000b4: 0006a583 lw a1,0(a3)
800000b8: 0086a603 lw a2,8(a3)
800000bc: 00c6a503 lw a0,12(a3)
800000c0: 00259593 slli a1,a1,0x2
800000c4: 00b705b3 add a1,a4,a1
800000c8: 0005a787 flw fa5,0(a1)
800000cc: 0046a583 lw a1,4(a3)
800000d0: 00261613 slli a2,a2,0x2
800000d4: 10f677d3 fmul.s fa5,fa2,fa5
800000d8: 00259593 slli a1,a1,0x2
800000dc: 00b705b3 add a1,a4,a1
800000e0: 0005a687 flw fa3,0(a1)
800000e4: 00c70633 add a2,a4,a2
800000e8: 00062707 flw fa4,0(a2) # 7ffff000 <__stack_size+0x7fffec00>
800000ec: 10d7f7d3 fmul.s fa5,fa5,fa3
800000f0: 00251513 slli a0,a0,0x2
800000f4: 00a70533 add a0,a4,a0
800000f8: 0106a583 lw a1,16(a3)
800000fc: 0146a603 lw a2,20(a3)
80000100: 10e7f7d3 fmul.s fa5,fa5,fa4
80000104: 00052707 flw fa4,0(a0)
80000108: 00259593 slli a1,a1,0x2
8000010c: 00b705b3 add a1,a4,a1
80000110: 0005a687 flw fa3,0(a1)
80000114: 10e7f7d3 fmul.s fa5,fa5,fa4
80000118: 00261613 slli a2,a2,0x2
8000011c: 00c70633 add a2,a4,a2
80000120: 00062707 flw fa4,0(a2)
80000124: 0186a583 lw a1,24(a3)
80000128: 10d7f7d3 fmul.s fa5,fa5,fa3
8000012c: 01c6a603 lw a2,28(a3)
80000130: 00259593 slli a1,a1,0x2
80000134: 00b705b3 add a1,a4,a1
80000138: 00261613 slli a2,a2,0x2
8000013c: 10e7f7d3 fmul.s fa5,fa5,fa4
80000140: 0005a707 flw fa4,0(a1)
80000144: 00c70633 add a2,a4,a2
80000148: 00d887b3 add a5,a7,a3
8000014c: 00468693 addi a3,a3,4
80000150: 10e7f7d3 fmul.s fa5,fa5,fa4
80000154: 00062707 flw fa4,0(a2)
80000158: 10f777d3 fmul.s fa5,fa4,fa5
8000015c: 00f7a027 fsw fa5,0(a5)
80000160: f4d81ae3 bne a6,a3,800000b4 <kernel_body+0x34>
80000164: 00008067 ret
80000168 <_exit>:
80000168: 250000ef jal ra,800003b8 <vx_perf_dump>
8000016c: 00000513 li a0,0
80000170: 0005006b 0x5006b
80000174 <vx_set_sp>:
80000174: fc002573 csrr a0,0xfc0
80000178: 0005006b 0x5006b
8000017c: 00002197 auipc gp,0x2
80000180: 03418193 addi gp,gp,52 # 800021b0 <__global_pointer>
80000184: 7f000117 auipc sp,0x7f000
80000188: e7c10113 addi sp,sp,-388 # ff000000 <__stack_top>
8000018c: 40000593 li a1,1024
80000190: cc102673 csrr a2,0xcc1
80000194: 02c585b3 mul a1,a1,a2
80000198: 40b10133 sub sp,sp,a1
8000019c: cc3026f3 csrr a3,0xcc3
800001a0: 00068663 beqz a3,800001ac <RETURN>
800001a4: 00000513 li a0,0
800001a8: 0005006b 0x5006b
800001ac <RETURN>:
800001ac: 00008067 ret
800001b0 <spawn_tasks_callback>:
800001b0: fe010113 addi sp,sp,-32
800001b4: 00112e23 sw ra,28(sp)
800001b8: 00812c23 sw s0,24(sp)
800001bc: 00912a23 sw s1,20(sp)
800001c0: 01212823 sw s2,16(sp)
800001c4: 01312623 sw s3,12(sp)
800001c8: fc0027f3 csrr a5,0xfc0
800001cc: 0007806b 0x7806b
800001d0: cc5026f3 csrr a3,0xcc5
800001d4: cc3029f3 csrr s3,0xcc3
800001d8: cc002773 csrr a4,0xcc0
800001dc: fc002673 csrr a2,0xfc0
800001e0: 800027b7 lui a5,0x80002
800001e4: 00269693 slli a3,a3,0x2
800001e8: ddc78793 addi a5,a5,-548 # 80001ddc <__stack_top+0x81001ddc>
800001ec: 00d787b3 add a5,a5,a3
800001f0: 0007a483 lw s1,0(a5)
800001f4: 0104a403 lw s0,16(s1)
800001f8: 00c4a683 lw a3,12(s1)
800001fc: 0089a933 slt s2,s3,s0
80000200: 00040793 mv a5,s0
80000204: 00d90933 add s2,s2,a3
80000208: 03368433 mul s0,a3,s3
8000020c: 00f9d463 bge s3,a5,80000214 <spawn_tasks_callback+0x64>
80000210: 00098793 mv a5,s3
80000214: 00f40433 add s0,s0,a5
80000218: 0084a683 lw a3,8(s1)
8000021c: 02c40433 mul s0,s0,a2
80000220: 02e907b3 mul a5,s2,a4
80000224: 00d40433 add s0,s0,a3
80000228: 00f40433 add s0,s0,a5
8000022c: 00890933 add s2,s2,s0
80000230: 01245e63 bge s0,s2,8000024c <spawn_tasks_callback+0x9c>
80000234: 0004a783 lw a5,0(s1)
80000238: 0044a583 lw a1,4(s1)
8000023c: 00040513 mv a0,s0
80000240: 00140413 addi s0,s0,1
80000244: 000780e7 jalr a5
80000248: fe8916e3 bne s2,s0,80000234 <spawn_tasks_callback+0x84>
8000024c: 0019b993 seqz s3,s3
80000250: 0009806b 0x9806b
80000254: 01c12083 lw ra,28(sp)
80000258: 01812403 lw s0,24(sp)
8000025c: 01412483 lw s1,20(sp)
80000260: 01012903 lw s2,16(sp)
80000264: 00c12983 lw s3,12(sp)
80000268: 02010113 addi sp,sp,32
8000026c: 00008067 ret
80000270 <vx_spawn_tasks>:
80000270: fc010113 addi sp,sp,-64
80000274: 02112e23 sw ra,60(sp)
80000278: 02812c23 sw s0,56(sp)
8000027c: 02912a23 sw s1,52(sp)
80000280: 03212823 sw s2,48(sp)
80000284: 03312623 sw s3,44(sp)
80000288: fc2026f3 csrr a3,0xfc2
8000028c: fc102873 csrr a6,0xfc1
80000290: fc002473 csrr s0,0xfc0
80000294: cc5027f3 csrr a5,0xcc5
80000298: 01f00713 li a4,31
8000029c: 0cf74463 blt a4,a5,80000364 <vx_spawn_tasks+0xf4>
800002a0: 030408b3 mul a7,s0,a6
800002a4: 00100713 li a4,1
800002a8: 00a8d463 bge a7,a0,800002b0 <vx_spawn_tasks+0x40>
800002ac: 03154733 div a4,a0,a7
800002b0: 0ce6c863 blt a3,a4,80000380 <vx_spawn_tasks+0x110>
800002b4: 0ae7d863 bge a5,a4,80000364 <vx_spawn_tasks+0xf4>
800002b8: fff68693 addi a3,a3,-1
800002bc: 02e54333 div t1,a0,a4
800002c0: 00030893 mv a7,t1
800002c4: 00f69663 bne a3,a5,800002d0 <vx_spawn_tasks+0x60>
800002c8: 02e56533 rem a0,a0,a4
800002cc: 006508b3 add a7,a0,t1
800002d0: 0288c4b3 div s1,a7,s0
800002d4: 0288e933 rem s2,a7,s0
800002d8: 0b04ca63 blt s1,a6,8000038c <vx_spawn_tasks+0x11c>
800002dc: 00100693 li a3,1
800002e0: 0304c733 div a4,s1,a6
800002e4: 00070663 beqz a4,800002f0 <vx_spawn_tasks+0x80>
800002e8: 00070693 mv a3,a4
800002ec: 0304e733 rem a4,s1,a6
800002f0: 800029b7 lui s3,0x80002
800002f4: ddc98993 addi s3,s3,-548 # 80001ddc <__stack_top+0x81001ddc>
800002f8: 00e12e23 sw a4,28(sp)
800002fc: 00c10713 addi a4,sp,12
80000300: 00b12623 sw a1,12(sp)
80000304: 00c12823 sw a2,16(sp)
80000308: 00d12c23 sw a3,24(sp)
8000030c: 02f30333 mul t1,t1,a5
80000310: 00279793 slli a5,a5,0x2
80000314: 00f987b3 add a5,s3,a5
80000318: 00e7a023 sw a4,0(a5)
8000031c: 00612a23 sw t1,20(sp)
80000320: 06904c63 bgtz s1,80000398 <vx_spawn_tasks+0x128>
80000324: 04090063 beqz s2,80000364 <vx_spawn_tasks+0xf4>
80000328: 02848433 mul s0,s1,s0
8000032c: 00812a23 sw s0,20(sp)
80000330: 0009006b 0x9006b
80000334: cc5027f3 csrr a5,0xcc5
80000338: cc202573 csrr a0,0xcc2
8000033c: 00279793 slli a5,a5,0x2
80000340: 00f989b3 add s3,s3,a5
80000344: 0009a783 lw a5,0(s3)
80000348: 0087a683 lw a3,8(a5)
8000034c: 0007a703 lw a4,0(a5)
80000350: 0047a583 lw a1,4(a5)
80000354: 00d50533 add a0,a0,a3
80000358: 000700e7 jalr a4
8000035c: 00100793 li a5,1
80000360: 0007806b 0x7806b
80000364: 03c12083 lw ra,60(sp)
80000368: 03812403 lw s0,56(sp)
8000036c: 03412483 lw s1,52(sp)
80000370: 03012903 lw s2,48(sp)
80000374: 02c12983 lw s3,44(sp)
80000378: 04010113 addi sp,sp,64
8000037c: 00008067 ret
80000380: 00068713 mv a4,a3
80000384: f2e7cae3 blt a5,a4,800002b8 <vx_spawn_tasks+0x48>
80000388: fddff06f j 80000364 <vx_spawn_tasks+0xf4>
8000038c: 00000713 li a4,0
80000390: 00100693 li a3,1
80000394: f5dff06f j 800002f0 <vx_spawn_tasks+0x80>
80000398: 00048713 mv a4,s1
8000039c: 00985463 bge a6,s1,800003a4 <vx_spawn_tasks+0x134>
800003a0: 00080713 mv a4,a6
800003a4: 800007b7 lui a5,0x80000
800003a8: 1b078793 addi a5,a5,432 # 800001b0 <__stack_top+0x810001b0>
800003ac: 00f7106b 0xf7106b
800003b0: e01ff0ef jal ra,800001b0 <spawn_tasks_callback>
800003b4: f71ff06f j 80000324 <vx_spawn_tasks+0xb4>
800003b8 <vx_perf_dump>:
800003b8: cc5027f3 csrr a5,0xcc5
800003bc: 00ff0737 lui a4,0xff0
800003c0: 00e787b3 add a5,a5,a4
800003c4: 00879793 slli a5,a5,0x8
800003c8: b0002773 csrr a4,mcycle
800003cc: 00e7a023 sw a4,0(a5)
800003d0: b0102773 csrr a4,0xb01
800003d4: 00e7a223 sw a4,4(a5)
800003d8: b0202773 csrr a4,minstret
800003dc: 00e7a423 sw a4,8(a5)
800003e0: b0302773 csrr a4,mhpmcounter3
800003e4: 00e7a623 sw a4,12(a5)
800003e8: b0402773 csrr a4,mhpmcounter4
800003ec: 00e7a823 sw a4,16(a5)
800003f0: b0502773 csrr a4,mhpmcounter5
800003f4: 00e7aa23 sw a4,20(a5)
800003f8: b0602773 csrr a4,mhpmcounter6
800003fc: 00e7ac23 sw a4,24(a5)
80000400: b0702773 csrr a4,mhpmcounter7
80000404: 00e7ae23 sw a4,28(a5)
80000408: b0802773 csrr a4,mhpmcounter8
8000040c: 02e7a023 sw a4,32(a5)
80000410: b0902773 csrr a4,mhpmcounter9
80000414: 02e7a223 sw a4,36(a5)
80000418: b0a02773 csrr a4,mhpmcounter10
8000041c: 02e7a423 sw a4,40(a5)
80000420: b0b02773 csrr a4,mhpmcounter11
80000424: 02e7a623 sw a4,44(a5)
80000428: b0c02773 csrr a4,mhpmcounter12
8000042c: 02e7a823 sw a4,48(a5)
80000430: b0d02773 csrr a4,mhpmcounter13
80000434: 02e7aa23 sw a4,52(a5)
80000438: b0e02773 csrr a4,mhpmcounter14
8000043c: 02e7ac23 sw a4,56(a5)
80000440: b0f02773 csrr a4,mhpmcounter15
80000444: 02e7ae23 sw a4,60(a5)
80000448: b1002773 csrr a4,mhpmcounter16
8000044c: 04e7a023 sw a4,64(a5)
80000450: b1102773 csrr a4,mhpmcounter17
80000454: 04e7a223 sw a4,68(a5)
80000458: b1202773 csrr a4,mhpmcounter18
8000045c: 04e7a423 sw a4,72(a5)
80000460: b1302773 csrr a4,mhpmcounter19
80000464: 04e7a623 sw a4,76(a5)
80000468: b1402773 csrr a4,mhpmcounter20
8000046c: 04e7a823 sw a4,80(a5)
80000470: b1502773 csrr a4,mhpmcounter21
80000474: 04e7aa23 sw a4,84(a5)
80000478: b1602773 csrr a4,mhpmcounter22
8000047c: 04e7ac23 sw a4,88(a5)
80000480: b1702773 csrr a4,mhpmcounter23
80000484: 04e7ae23 sw a4,92(a5)
80000488: b1802773 csrr a4,mhpmcounter24
8000048c: 06e7a023 sw a4,96(a5)
80000490: b1902773 csrr a4,mhpmcounter25
80000494: 06e7a223 sw a4,100(a5)
80000498: b1a02773 csrr a4,mhpmcounter26
8000049c: 06e7a423 sw a4,104(a5)
800004a0: b1b02773 csrr a4,mhpmcounter27
800004a4: 06e7a623 sw a4,108(a5)
800004a8: b1c02773 csrr a4,mhpmcounter28
800004ac: 06e7a823 sw a4,112(a5)
800004b0: b1d02773 csrr a4,mhpmcounter29
800004b4: 06e7aa23 sw a4,116(a5)
800004b8: b1e02773 csrr a4,mhpmcounter30
800004bc: 06e7ac23 sw a4,120(a5)
800004c0: b1f02773 csrr a4,mhpmcounter31
800004c4: 06e7ae23 sw a4,124(a5)
800004c8: b8002773 csrr a4,mcycleh
800004cc: 08e7a023 sw a4,128(a5)
800004d0: b8102773 csrr a4,0xb81
800004d4: 08e7a223 sw a4,132(a5)
800004d8: b8202773 csrr a4,minstreth
800004dc: 08e7a423 sw a4,136(a5)
800004e0: b8302773 csrr a4,mhpmcounter3h
800004e4: 08e7a623 sw a4,140(a5)
800004e8: b8402773 csrr a4,mhpmcounter4h
800004ec: 08e7a823 sw a4,144(a5)
800004f0: b8502773 csrr a4,mhpmcounter5h
800004f4: 08e7aa23 sw a4,148(a5)
800004f8: b8602773 csrr a4,mhpmcounter6h
800004fc: 08e7ac23 sw a4,152(a5)
80000500: b8702773 csrr a4,mhpmcounter7h
80000504: 08e7ae23 sw a4,156(a5)
80000508: b8802773 csrr a4,mhpmcounter8h
8000050c: 0ae7a023 sw a4,160(a5)
80000510: b8902773 csrr a4,mhpmcounter9h
80000514: 0ae7a223 sw a4,164(a5)
80000518: b8a02773 csrr a4,mhpmcounter10h
8000051c: 0ae7a423 sw a4,168(a5)
80000520: b8b02773 csrr a4,mhpmcounter11h
80000524: 0ae7a623 sw a4,172(a5)
80000528: b8c02773 csrr a4,mhpmcounter12h
8000052c: 0ae7a823 sw a4,176(a5)
80000530: b8d02773 csrr a4,mhpmcounter13h
80000534: 0ae7aa23 sw a4,180(a5)
80000538: b8e02773 csrr a4,mhpmcounter14h
8000053c: 0ae7ac23 sw a4,184(a5)
80000540: b8f02773 csrr a4,mhpmcounter15h
80000544: 0ae7ae23 sw a4,188(a5)
80000548: b9002773 csrr a4,mhpmcounter16h
8000054c: 0ce7a023 sw a4,192(a5)
80000550: b9102773 csrr a4,mhpmcounter17h
80000554: 0ce7a223 sw a4,196(a5)
80000558: b9202773 csrr a4,mhpmcounter18h
8000055c: 0ce7a423 sw a4,200(a5)
80000560: b9302773 csrr a4,mhpmcounter19h
80000564: 0ce7a623 sw a4,204(a5)
80000568: b9402773 csrr a4,mhpmcounter20h
8000056c: 0ce7a823 sw a4,208(a5)
80000570: b9502773 csrr a4,mhpmcounter21h
80000574: 0ce7aa23 sw a4,212(a5)
80000578: b9602773 csrr a4,mhpmcounter22h
8000057c: 0ce7ac23 sw a4,216(a5)
80000580: b9702773 csrr a4,mhpmcounter23h
80000584: 0ce7ae23 sw a4,220(a5)
80000588: b9802773 csrr a4,mhpmcounter24h
8000058c: 0ee7a023 sw a4,224(a5)
80000590: b9902773 csrr a4,mhpmcounter25h
80000594: 0ee7a223 sw a4,228(a5)
80000598: b9a02773 csrr a4,mhpmcounter26h
8000059c: 0ee7a423 sw a4,232(a5)
800005a0: b9b02773 csrr a4,mhpmcounter27h
800005a4: 0ee7a623 sw a4,236(a5)
800005a8: b9c02773 csrr a4,mhpmcounter28h
800005ac: 0ee7a823 sw a4,240(a5)
800005b0: b9d02773 csrr a4,mhpmcounter29h
800005b4: 0ee7aa23 sw a4,244(a5)
800005b8: b9e02773 csrr a4,mhpmcounter30h
800005bc: 0ee7ac23 sw a4,248(a5)
800005c0: b9f02773 csrr a4,mhpmcounter31h
800005c4: 0ee7ae23 sw a4,252(a5)
800005c8: 00008067 ret
800005cc <atexit>:
800005cc: 00050593 mv a1,a0
800005d0: 00000693 li a3,0
800005d4: 00000613 li a2,0
800005d8: 00000513 li a0,0
800005dc: 20c0006f j 800007e8 <__register_exitproc>
800005e0 <exit>:
800005e0: ff010113 addi sp,sp,-16
800005e4: 00000593 li a1,0
800005e8: 00812423 sw s0,8(sp)
800005ec: 00112623 sw ra,12(sp)
800005f0: 00050413 mv s0,a0
800005f4: 290000ef jal ra,80000884 <__call_exitprocs>
800005f8: 800027b7 lui a5,0x80002
800005fc: dd87a503 lw a0,-552(a5) # 80001dd8 <__stack_top+0x81001dd8>
80000600: 03c52783 lw a5,60(a0)
80000604: 00078463 beqz a5,8000060c <exit+0x2c>
80000608: 000780e7 jalr a5
8000060c: 00040513 mv a0,s0
80000610: b59ff0ef jal ra,80000168 <_exit>
80000614 <__libc_fini_array>:
80000614: ff010113 addi sp,sp,-16
80000618: 00812423 sw s0,8(sp)
8000061c: 800027b7 lui a5,0x80002
80000620: 80002437 lui s0,0x80002
80000624: 9ac40413 addi s0,s0,-1620 # 800019ac <__stack_top+0x810019ac>
80000628: 9ac78793 addi a5,a5,-1620 # 800019ac <__stack_top+0x810019ac>
8000062c: 408787b3 sub a5,a5,s0
80000630: 00912223 sw s1,4(sp)
80000634: 00112623 sw ra,12(sp)
80000638: 4027d493 srai s1,a5,0x2
8000063c: 02048063 beqz s1,8000065c <__libc_fini_array+0x48>
80000640: ffc78793 addi a5,a5,-4
80000644: 00878433 add s0,a5,s0
80000648: 00042783 lw a5,0(s0)
8000064c: fff48493 addi s1,s1,-1
80000650: ffc40413 addi s0,s0,-4
80000654: 000780e7 jalr a5
80000658: fe0498e3 bnez s1,80000648 <__libc_fini_array+0x34>
8000065c: 00c12083 lw ra,12(sp)
80000660: 00812403 lw s0,8(sp)
80000664: 00412483 lw s1,4(sp)
80000668: 01010113 addi sp,sp,16
8000066c: 00008067 ret
80000670 <__libc_init_array>:
80000670: ff010113 addi sp,sp,-16
80000674: 00812423 sw s0,8(sp)
80000678: 01212023 sw s2,0(sp)
8000067c: 80002437 lui s0,0x80002
80000680: 80002937 lui s2,0x80002
80000684: 9a840793 addi a5,s0,-1624 # 800019a8 <__stack_top+0x810019a8>
80000688: 9a890913 addi s2,s2,-1624 # 800019a8 <__stack_top+0x810019a8>
8000068c: 40f90933 sub s2,s2,a5
80000690: 00112623 sw ra,12(sp)
80000694: 00912223 sw s1,4(sp)
80000698: 40295913 srai s2,s2,0x2
8000069c: 02090063 beqz s2,800006bc <__libc_init_array+0x4c>
800006a0: 9a840413 addi s0,s0,-1624
800006a4: 00000493 li s1,0
800006a8: 00042783 lw a5,0(s0)
800006ac: 00148493 addi s1,s1,1
800006b0: 00440413 addi s0,s0,4
800006b4: 000780e7 jalr a5
800006b8: fe9918e3 bne s2,s1,800006a8 <__libc_init_array+0x38>
800006bc: 80002437 lui s0,0x80002
800006c0: 80002937 lui s2,0x80002
800006c4: 9a840793 addi a5,s0,-1624 # 800019a8 <__stack_top+0x810019a8>
800006c8: 9ac90913 addi s2,s2,-1620 # 800019ac <__stack_top+0x810019ac>
800006cc: 40f90933 sub s2,s2,a5
800006d0: 40295913 srai s2,s2,0x2
800006d4: 02090063 beqz s2,800006f4 <__libc_init_array+0x84>
800006d8: 9a840413 addi s0,s0,-1624
800006dc: 00000493 li s1,0
800006e0: 00042783 lw a5,0(s0)
800006e4: 00148493 addi s1,s1,1
800006e8: 00440413 addi s0,s0,4
800006ec: 000780e7 jalr a5
800006f0: fe9918e3 bne s2,s1,800006e0 <__libc_init_array+0x70>
800006f4: 00c12083 lw ra,12(sp)
800006f8: 00812403 lw s0,8(sp)
800006fc: 00412483 lw s1,4(sp)
80000700: 00012903 lw s2,0(sp)
80000704: 01010113 addi sp,sp,16
80000708: 00008067 ret
8000070c <memset>:
8000070c: 00f00313 li t1,15
80000710: 00050713 mv a4,a0
80000714: 02c37e63 bgeu t1,a2,80000750 <memset+0x44>
80000718: 00f77793 andi a5,a4,15
8000071c: 0a079063 bnez a5,800007bc <memset+0xb0>
80000720: 08059263 bnez a1,800007a4 <memset+0x98>
80000724: ff067693 andi a3,a2,-16
80000728: 00f67613 andi a2,a2,15
8000072c: 00e686b3 add a3,a3,a4
80000730: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00>
80000734: 00b72223 sw a1,4(a4)
80000738: 00b72423 sw a1,8(a4)
8000073c: 00b72623 sw a1,12(a4)
80000740: 01070713 addi a4,a4,16
80000744: fed766e3 bltu a4,a3,80000730 <memset+0x24>
80000748: 00061463 bnez a2,80000750 <memset+0x44>
8000074c: 00008067 ret
80000750: 40c306b3 sub a3,t1,a2
80000754: 00269693 slli a3,a3,0x2
80000758: 00000297 auipc t0,0x0
8000075c: 005686b3 add a3,a3,t0
80000760: 00c68067 jr 12(a3)
80000764: 00b70723 sb a1,14(a4)
80000768: 00b706a3 sb a1,13(a4)
8000076c: 00b70623 sb a1,12(a4)
80000770: 00b705a3 sb a1,11(a4)
80000774: 00b70523 sb a1,10(a4)
80000778: 00b704a3 sb a1,9(a4)
8000077c: 00b70423 sb a1,8(a4)
80000780: 00b703a3 sb a1,7(a4)
80000784: 00b70323 sb a1,6(a4)
80000788: 00b702a3 sb a1,5(a4)
8000078c: 00b70223 sb a1,4(a4)
80000790: 00b701a3 sb a1,3(a4)
80000794: 00b70123 sb a1,2(a4)
80000798: 00b700a3 sb a1,1(a4)
8000079c: 00b70023 sb a1,0(a4)
800007a0: 00008067 ret
800007a4: 0ff5f593 andi a1,a1,255
800007a8: 00859693 slli a3,a1,0x8
800007ac: 00d5e5b3 or a1,a1,a3
800007b0: 01059693 slli a3,a1,0x10
800007b4: 00d5e5b3 or a1,a1,a3
800007b8: f6dff06f j 80000724 <memset+0x18>
800007bc: 00279693 slli a3,a5,0x2
800007c0: 00000297 auipc t0,0x0
800007c4: 005686b3 add a3,a3,t0
800007c8: 00008293 mv t0,ra
800007cc: fa0680e7 jalr -96(a3)
800007d0: 00028093 mv ra,t0
800007d4: ff078793 addi a5,a5,-16
800007d8: 40f70733 sub a4,a4,a5
800007dc: 00f60633 add a2,a2,a5
800007e0: f6c378e3 bgeu t1,a2,80000750 <memset+0x44>
800007e4: f3dff06f j 80000720 <memset+0x14>
800007e8 <__register_exitproc>:
800007e8: 800027b7 lui a5,0x80002
800007ec: dd87a703 lw a4,-552(a5) # 80001dd8 <__stack_top+0x81001dd8>
800007f0: 14872783 lw a5,328(a4)
800007f4: 04078c63 beqz a5,8000084c <__register_exitproc+0x64>
800007f8: 0047a703 lw a4,4(a5)
800007fc: 01f00813 li a6,31
80000800: 06e84e63 blt a6,a4,8000087c <__register_exitproc+0x94>
80000804: 00271813 slli a6,a4,0x2
80000808: 02050663 beqz a0,80000834 <__register_exitproc+0x4c>
8000080c: 01078333 add t1,a5,a6
80000810: 08c32423 sw a2,136(t1)
80000814: 1887a883 lw a7,392(a5)
80000818: 00100613 li a2,1
8000081c: 00e61633 sll a2,a2,a4
80000820: 00c8e8b3 or a7,a7,a2
80000824: 1917a423 sw a7,392(a5)
80000828: 10d32423 sw a3,264(t1)
8000082c: 00200693 li a3,2
80000830: 02d50463 beq a0,a3,80000858 <__register_exitproc+0x70>
80000834: 00170713 addi a4,a4,1
80000838: 00e7a223 sw a4,4(a5)
8000083c: 010787b3 add a5,a5,a6
80000840: 00b7a423 sw a1,8(a5)
80000844: 00000513 li a0,0
80000848: 00008067 ret
8000084c: 14c70793 addi a5,a4,332
80000850: 14f72423 sw a5,328(a4)
80000854: fa5ff06f j 800007f8 <__register_exitproc+0x10>
80000858: 18c7a683 lw a3,396(a5)
8000085c: 00170713 addi a4,a4,1
80000860: 00e7a223 sw a4,4(a5)
80000864: 00c6e633 or a2,a3,a2
80000868: 18c7a623 sw a2,396(a5)
8000086c: 010787b3 add a5,a5,a6
80000870: 00b7a423 sw a1,8(a5)
80000874: 00000513 li a0,0
80000878: 00008067 ret
8000087c: fff00513 li a0,-1
80000880: 00008067 ret
80000884 <__call_exitprocs>:
80000884: fd010113 addi sp,sp,-48
80000888: 800027b7 lui a5,0x80002
8000088c: 01412c23 sw s4,24(sp)
80000890: dd87aa03 lw s4,-552(a5) # 80001dd8 <__stack_top+0x81001dd8>
80000894: 03212023 sw s2,32(sp)
80000898: 02112623 sw ra,44(sp)
8000089c: 148a2903 lw s2,328(s4)
800008a0: 02812423 sw s0,40(sp)
800008a4: 02912223 sw s1,36(sp)
800008a8: 01312e23 sw s3,28(sp)
800008ac: 01512a23 sw s5,20(sp)
800008b0: 01612823 sw s6,16(sp)
800008b4: 01712623 sw s7,12(sp)
800008b8: 01812423 sw s8,8(sp)
800008bc: 04090063 beqz s2,800008fc <__call_exitprocs+0x78>
800008c0: 00050b13 mv s6,a0
800008c4: 00058b93 mv s7,a1
800008c8: 00100a93 li s5,1
800008cc: fff00993 li s3,-1
800008d0: 00492483 lw s1,4(s2)
800008d4: fff48413 addi s0,s1,-1
800008d8: 02044263 bltz s0,800008fc <__call_exitprocs+0x78>
800008dc: 00249493 slli s1,s1,0x2
800008e0: 009904b3 add s1,s2,s1
800008e4: 040b8463 beqz s7,8000092c <__call_exitprocs+0xa8>
800008e8: 1044a783 lw a5,260(s1)
800008ec: 05778063 beq a5,s7,8000092c <__call_exitprocs+0xa8>
800008f0: fff40413 addi s0,s0,-1
800008f4: ffc48493 addi s1,s1,-4
800008f8: ff3416e3 bne s0,s3,800008e4 <__call_exitprocs+0x60>
800008fc: 02c12083 lw ra,44(sp)
80000900: 02812403 lw s0,40(sp)
80000904: 02412483 lw s1,36(sp)
80000908: 02012903 lw s2,32(sp)
8000090c: 01c12983 lw s3,28(sp)
80000910: 01812a03 lw s4,24(sp)
80000914: 01412a83 lw s5,20(sp)
80000918: 01012b03 lw s6,16(sp)
8000091c: 00c12b83 lw s7,12(sp)
80000920: 00812c03 lw s8,8(sp)
80000924: 03010113 addi sp,sp,48
80000928: 00008067 ret
8000092c: 00492783 lw a5,4(s2)
80000930: 0044a683 lw a3,4(s1)
80000934: fff78793 addi a5,a5,-1
80000938: 04878e63 beq a5,s0,80000994 <__call_exitprocs+0x110>
8000093c: 0004a223 sw zero,4(s1)
80000940: fa0688e3 beqz a3,800008f0 <__call_exitprocs+0x6c>
80000944: 18892783 lw a5,392(s2)
80000948: 008a9733 sll a4,s5,s0
8000094c: 00492c03 lw s8,4(s2)
80000950: 00f777b3 and a5,a4,a5
80000954: 02079263 bnez a5,80000978 <__call_exitprocs+0xf4>
80000958: 000680e7 jalr a3
8000095c: 00492703 lw a4,4(s2)
80000960: 148a2783 lw a5,328(s4)
80000964: 01871463 bne a4,s8,8000096c <__call_exitprocs+0xe8>
80000968: f8f904e3 beq s2,a5,800008f0 <__call_exitprocs+0x6c>
8000096c: f80788e3 beqz a5,800008fc <__call_exitprocs+0x78>
80000970: 00078913 mv s2,a5
80000974: f5dff06f j 800008d0 <__call_exitprocs+0x4c>
80000978: 18c92783 lw a5,396(s2)
8000097c: 0844a583 lw a1,132(s1)
80000980: 00f77733 and a4,a4,a5
80000984: 00071c63 bnez a4,8000099c <__call_exitprocs+0x118>
80000988: 000b0513 mv a0,s6
8000098c: 000680e7 jalr a3
80000990: fcdff06f j 8000095c <__call_exitprocs+0xd8>
80000994: 00892223 sw s0,4(s2)
80000998: fa9ff06f j 80000940 <__call_exitprocs+0xbc>
8000099c: 00058513 mv a0,a1
800009a0: 000680e7 jalr a3
800009a4: fb9ff06f j 8000095c <__call_exitprocs+0xd8>
Disassembly of section .init_array:
800019a8 <__init_array_start>:
800019a8: 0068 addi a0,sp,12
800019aa: 8000 0x8000
Disassembly of section .data:
800019b0 <impure_data>:
800019b0: 0000 unimp
800019b2: 0000 unimp
800019b4: 1c9c addi a5,sp,624
800019b6: 8000 0x8000
800019b8: 1d04 addi s1,sp,688
800019ba: 8000 0x8000
800019bc: 1d6c addi a1,sp,700
800019be: 8000 0x8000
...
80001a58: 0001 nop
80001a5a: 0000 unimp
80001a5c: 0000 unimp
80001a5e: 0000 unimp
80001a60: 330e fld ft6,224(sp)
80001a62: abcd j 80002054 <__BSS_END__+0x1f8>
80001a64: 1234 addi a3,sp,296
80001a66: e66d bnez a2,80001b50 <impure_data+0x1a0>
80001a68: deec sw a1,124(a3)
80001a6a: 0005 c.nop 1
80001a6c: 0000000b 0xb
...
Disassembly of section .sdata:
80001dd8 <_global_impure_ptr>:
80001dd8: 19b0 addi a2,sp,248
80001dda: 8000 0x8000
Disassembly of section .bss:
80001ddc <g_wspawn_args>:
...
Disassembly of section .comment:
00000000 <.comment>:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3920 fld fs0,112(a0)
c: 322e fld ft4,232(sp)
e: 302e fld ft0,232(sp)
...
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2541 jal 680 <__stack_size+0x280>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
18: 7032 flw ft0,44(sp)
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f

Binary file not shown.

View File

@@ -1,293 +0,0 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include "common.h"
#include <assert.h>
#include <limits>
#include <math.h>
#include <vector>
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
union Float_t {
float f;
int i;
struct {
uint32_t man : 23;
uint32_t exp : 8;
uint32_t sign : 1;
} parts;
};
inline float fround(float x, int32_t precision = 8) {
auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10;
}
inline bool almost_equal_eps(float a, float b, int ulp = 128) {
auto eps = std::numeric_limits<float>::epsilon() * (std::max(fabs(a), fabs(b)) * ulp);
auto d = fabs(a - b);
if (d > eps) {
std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl;
return false;
}
return true;
}
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) {
Float_t fa{a}, fb{b};
auto d = std::abs(fa.i - fb.i);
if (d > ulp) {
std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false;
}
return true;
}
inline bool almost_equal(float a, float b) {
if (a == b)
return true;
/*if (almost_equal_eps(a, b))
return true;*/
return almost_equal_ulp(a, b);
}
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
uint32_t count = 0;
std::vector<float> test_data;
std::vector<uint32_t> addr_table;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (staging_buf) {
vx_buf_release(staging_buf);
}
if (device) {
vx_dev_close(device);
}
}
void gen_input_data(uint32_t num_points) {
test_data.resize(num_points);
addr_table.resize(num_points + NUM_LOADS - 1);
for (uint32_t i = 0; i < num_points; ++i) {
float r = static_cast<float>(std::rand()) / RAND_MAX;
test_data[i] = r;
}
for (uint32_t i = 0; i < addr_table.size(); ++i) {
float r = static_cast<float>(std::rand()) / RAND_MAX;
uint32_t index = static_cast<uint32_t>(r * num_points);
assert(index < num_points);
addr_table[i] = index;
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t dst_buf_size,
uint32_t num_points) {
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (float*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
float ref = 0.0f;
for (uint32_t j = 0; j < NUM_LOADS; ++j) {
uint32_t addr = i + j;
uint32_t index = addr_table.at(addr);
float value = test_data.at(index);
//printf("*** [%d] addr=%d, index=%d, value=%f\n", i, addr, index, value);
ref *= value;
}
float cur = buf_ptr[i];
if (!almost_equal(cur, ref)) {
std::cout << "error at result #" << std::dec << i
<< ": actual " << cur << ", expected " << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
}
return 0;
}
int main(int argc, char *argv[]) {
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
std::srand(50);
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads;
uint32_t num_points = count * num_tasks;
// generate input data
gen_input_data(num_points);
uint32_t addr_buf_size = addr_table.size() * sizeof(int32_t);
uint32_t src_buf_size = test_data.size() * sizeof(int32_t);
uint32_t dst_buf_size = test_data.size() * sizeof(int32_t);
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl;
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, addr_buf_size, &value));
kernel_arg.addr_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
kernel_arg.src_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.num_tasks = num_tasks;
kernel_arg.stride = count;
std::cout << "dev_addr=" << std::hex << kernel_arg.addr_ptr << std::endl;
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
std::max<uint32_t>(addr_buf_size,
std::max<uint32_t>(dst_buf_size,
sizeof(kernel_arg_t))));
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer0
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < addr_table.size(); ++i) {
buf_ptr[i] = addr_table.at(i);
}
}
std::cout << "upload address buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.addr_ptr, addr_buf_size, 0));
// upload source buffer1
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < test_data.size(); ++i) {
buf_ptr[i] = test_data.at(i);
}
}
std::cout << "upload source buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < test_data.size(); ++i) {
buf_ptr[i] = 0xdeadbeef;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points));
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "PASSED!" << std::endl;
return 0;
}