diff --git a/benchmarks/opencl/guassian/.depend b/benchmarks/opencl/guassian/.depend new file mode 100644 index 00000000..7006e94e --- /dev/null +++ b/benchmarks/opencl/guassian/.depend @@ -0,0 +1,12 @@ +main.o: main.cc gaussianElim.h clutils.h \ + /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h \ + /opt/pocl/runtime/include/CL/opencl.h \ + /opt/pocl/runtime/include/CL/cl_gl.h \ + /opt/pocl/runtime/include/CL/cl_gl_ext.h \ + /opt/pocl/runtime/include/CL/cl_ext.h +clutils.o: clutils.cpp /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h clutils.h utils.h +utils.o: utils.cpp utils.h diff --git a/benchmarks/opencl/guassian/guassian b/benchmarks/opencl/guassian/guassian new file mode 100755 index 00000000..be28fb83 Binary files /dev/null and b/benchmarks/opencl/guassian/guassian differ diff --git a/benchmarks/opencl/nearn/.depend b/benchmarks/opencl/nearn/.depend new file mode 100644 index 00000000..083b7b77 --- /dev/null +++ b/benchmarks/opencl/nearn/.depend @@ -0,0 +1,11 @@ +main.o: main.cc nearestNeighbor.h /opt/pocl/runtime/include/CL/opencl.h \ + /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h \ + /opt/pocl/runtime/include/CL/cl_gl.h \ + /opt/pocl/runtime/include/CL/cl_gl_ext.h \ + /opt/pocl/runtime/include/CL/cl_ext.h clutils.h +clutils.o: clutils.cpp /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h clutils.h utils.h +utils.o: utils.cpp utils.h diff --git a/benchmarks/opencl/nearn/nearn b/benchmarks/opencl/nearn/nearn new file mode 100755 index 00000000..e302749a Binary files /dev/null and b/benchmarks/opencl/nearn/nearn differ diff --git a/benchmarks/opencl/saxpy/.depend b/benchmarks/opencl/saxpy/.depend new file mode 100644 index 00000000..1960f024 --- /dev/null +++ b/benchmarks/opencl/saxpy/.depend @@ -0,0 +1,3 @@ +main.o: main.cc /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h diff --git a/benchmarks/opencl/saxpy/saxpy b/benchmarks/opencl/saxpy/saxpy new file mode 100755 index 00000000..7e11e504 Binary files /dev/null and b/benchmarks/opencl/saxpy/saxpy differ diff --git a/benchmarks/opencl/sfilter/.depend b/benchmarks/opencl/sfilter/.depend new file mode 100644 index 00000000..1960f024 --- /dev/null +++ b/benchmarks/opencl/sfilter/.depend @@ -0,0 +1,3 @@ +main.o: main.cc /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h diff --git a/benchmarks/opencl/sfilter/sfilter b/benchmarks/opencl/sfilter/sfilter new file mode 100755 index 00000000..dfafd2bf Binary files /dev/null and b/benchmarks/opencl/sfilter/sfilter differ diff --git a/benchmarks/opencl/sgemm/.depend b/benchmarks/opencl/sgemm/.depend new file mode 100644 index 00000000..632ededc --- /dev/null +++ b/benchmarks/opencl/sgemm/.depend @@ -0,0 +1,7 @@ +main.o: main.cc /opt/pocl/runtime/include/CL/opencl.h \ + /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h \ + /opt/pocl/runtime/include/CL/cl_gl.h \ + /opt/pocl/runtime/include/CL/cl_gl_ext.h \ + /opt/pocl/runtime/include/CL/cl_ext.h diff --git a/benchmarks/opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm new file mode 100755 index 00000000..6a5dc24b Binary files /dev/null and b/benchmarks/opencl/sgemm/sgemm differ diff --git a/benchmarks/opencl/vecadd/.depend b/benchmarks/opencl/vecadd/.depend new file mode 100644 index 00000000..632ededc --- /dev/null +++ b/benchmarks/opencl/vecadd/.depend @@ -0,0 +1,7 @@ +main.o: main.cc /opt/pocl/runtime/include/CL/opencl.h \ + /opt/pocl/runtime/include/CL/cl.h \ + /opt/pocl/runtime/include/CL/cl_version.h \ + /opt/pocl/runtime/include/CL/cl_platform.h \ + /opt/pocl/runtime/include/CL/cl_gl.h \ + /opt/pocl/runtime/include/CL/cl_gl_ext.h \ + /opt/pocl/runtime/include/CL/cl_ext.h diff --git a/driver/opae/.depend b/driver/opae/.depend new file mode 100644 index 00000000..33a007b5 --- /dev/null +++ b/driver/opae/.depend @@ -0,0 +1,4 @@ +vortex.o: vortex.cpp ../include/vortex.h ../../hw/VX_config.h \ + vortex_afu.h +vx_utils.o: ../common/vx_utils.cpp ../include/vortex.h \ + ../../hw/VX_config.h diff --git a/driver/opae/vlsim/libopae-c-vlsim.so b/driver/opae/vlsim/libopae-c-vlsim.so new file mode 100755 index 00000000..39527d15 Binary files /dev/null and b/driver/opae/vlsim/libopae-c-vlsim.so differ diff --git a/driver/opae/vlsim/libvortex.so b/driver/opae/vlsim/libvortex.so new file mode 100755 index 00000000..a7fea0da Binary files /dev/null and b/driver/opae/vlsim/libvortex.so differ diff --git a/driver/stub/libvortex.so b/driver/stub/libvortex.so new file mode 100755 index 00000000..0c50c54c Binary files /dev/null and b/driver/stub/libvortex.so differ diff --git a/driver/tests/tex_demo/.depend b/driver/tests/tex_demo/.depend new file mode 100644 index 00000000..1647eae7 --- /dev/null +++ b/driver/tests/tex_demo/.depend @@ -0,0 +1 @@ +demo.o: demo.cpp ../../include/vortex.h common.h diff --git a/driver/tests/tex_demo/Makefile b/driver/tests/tex_demo/Makefile new file mode 100644 index 00000000..8ca0c6c8 --- /dev/null +++ b/driver/tests/tex_demo/Makefile @@ -0,0 +1,67 @@ +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain +VORTEX_RT_PATH ?= $(wildcard ../../../runtime) + +OPTS ?= -n64 + +VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc +VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ +VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump +VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy + +VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections +VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw + +VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a + +VX_SRCS = kernel.c + +#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors + +CXXFLAGS += -I../../include + +PROJECT = demo + +SRCS = demo.cpp + +all: $(PROJECT) kernel.bin kernel.dump + +kernel.dump: kernel.elf + $(VX_DP) -D kernel.elf > kernel.dump + +kernel.bin: kernel.elf + $(VX_CP) -O binary kernel.elf kernel.bin + +kernel.elf: $(VX_SRCS) + $(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ + +run-fpga: $(PROJECT) + LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-asesim: $(PROJECT) + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-vlsim: $(PROJECT) + LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-rtlsim: $(PROJECT) + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-simx: $(PROJECT) + LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; + +clean: + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm -rf *.elf *.bin *.dump + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/driver/tests/tex_demo/common.h b/driver/tests/tex_demo/common.h new file mode 100644 index 00000000..d6540ae1 --- /dev/null +++ b/driver/tests/tex_demo/common.h @@ -0,0 +1,14 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 + +struct kernel_arg_t { + uint32_t num_tasks; + uint32_t task_size; + uint32_t src0_ptr; + uint32_t src1_ptr; + uint32_t dst_ptr; +}; + +#endif \ No newline at end of file diff --git a/driver/tests/tex_demo/demo b/driver/tests/tex_demo/demo new file mode 100755 index 00000000..faa176c1 Binary files /dev/null and b/driver/tests/tex_demo/demo differ diff --git a/driver/tests/tex_demo/demo.cpp b/driver/tests/tex_demo/demo.cpp new file mode 100644 index 00000000..a28d675d --- /dev/null +++ b/driver/tests/tex_demo/demo.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.bin"; +uint32_t count = 0; + +vx_device_h device = nullptr; +vx_buffer_h buffer = nullptr; + +static void show_usage() { + std::cout << "Vortex Driver Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + switch (c) { + case 'n': + count = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (buffer) { + vx_buf_release(buffer); + } + if (device) { + vx_dev_close(device); + } +} + +int run_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t num_points) { + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, -1)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); + + // verify result + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + int ref = 0xFAAF; + int cur = buf_ptr[i]; + if (cur != ref) { + std::cout << "error at result #" << i + << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + size_t value; + kernel_arg_t kernel_arg; + + // parse command arguments + parse_args(argc, argv); + + if (count == 0) { + count = 1; + } + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + unsigned max_cores, max_warps, max_threads; + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads)); + + // uint32_t num_tasks = max_cores * max_warps * max_threads; + uint32_t num_tasks = 1; + uint32_t num_points = count * num_tasks; + uint32_t buf_size = num_points * sizeof(uint32_t); + + std::cout << "number of points: " << num_points << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; + + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src0_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src1_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.dst_ptr = value; + + kernel_arg.num_tasks = num_tasks; + kernel_arg.task_size = count; + + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + + // allocate shared memory + std::cout << "allocate shared memory" << std::endl; + uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); + RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + { + auto buf_ptr = (int*)vx_host_ptr(buffer); + memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); + RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); + } + + // upload source buffer0 + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i-1; + } + } + std::cout << "upload source buffer0" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0)); + + // upload source buffer1 + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i+1; + } + } + std::cout << "upload source buffer1" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0)); + + // clear destination buffer + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = 0xdeadbeef; + } + } + std::cout << "clear destination buffer" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); + + // run tests + std::cout << "run tests" << std::endl; + RT_CHECK(run_test(kernel_arg, buf_size, num_points)); + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file diff --git a/driver/tests/tex_demo/kernel.bin b/driver/tests/tex_demo/kernel.bin new file mode 100755 index 00000000..71e9dd03 Binary files /dev/null and b/driver/tests/tex_demo/kernel.bin differ diff --git a/driver/tests/tex_demo/kernel.c b/driver/tests/tex_demo/kernel.c new file mode 100644 index 00000000..c142eefb --- /dev/null +++ b/driver/tests/tex_demo/kernel.c @@ -0,0 +1,29 @@ +#include +#include +#include +#include "common.h" + +void kernel_body(int task_id, void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->task_size; + // int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + // int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + + unsigned lod = 1; + unsigned u = 1; + unsigned v = 1; + unsigned t = 1; + + uint32_t offset = task_id * count; + + for (uint32_t i = 0; i < count; ++i) { + // dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; + dst_ptr[offset+i] = vx_tex(t, u, v, lod); + } +} + +int main() { + struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; + vx_spawn_tasks(arg->num_tasks, kernel_body, arg); +} \ No newline at end of file diff --git a/driver/tests/tex_demo/kernel.dump b/driver/tests/tex_demo/kernel.dump new file mode 100644 index 00000000..da75481c --- /dev/null +++ b/driver/tests/tex_demo/kernel.dump @@ -0,0 +1,571 @@ + +kernel.elf: file format elf32-littleriscv + + +Disassembly of section .init: + +80000000 <_start>: +80000000: 00000597 auipc a1,0x0 +80000004: 0f458593 addi a1,a1,244 # 800000f4 +80000008: fc102573 csrr a0,0xfc1 +8000000c: 00b5106b 0xb5106b +80000010: 0e4000ef jal ra,800000f4 +80000014: 00100513 li a0,1 +80000018: 0005006b 0x5006b +8000001c: 00002517 auipc a0,0x2 +80000020: b4050513 addi a0,a0,-1216 # 80001b5c +80000024: 00002617 auipc a2,0x2 +80000028: bb860613 addi a2,a2,-1096 # 80001bdc <__BSS_END__> +8000002c: 40a60633 sub a2,a2,a0 +80000030: 00000593 li a1,0 +80000034: 458000ef jal ra,8000048c +80000038: 00000517 auipc a0,0x0 +8000003c: 35c50513 addi a0,a0,860 # 80000394 <__libc_fini_array> +80000040: 30c000ef jal ra,8000034c +80000044: 3ac000ef jal ra,800003f0 <__libc_init_array> +80000048: 008000ef jal ra,80000050
+8000004c: 3140006f j 80000360 + +Disassembly of section .text: + +80000050
: +80000050: 7ffff7b7 lui a5,0x7ffff +80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00> +80000058: 800005b7 lui a1,0x80000 +8000005c: 7ffff637 lui a2,0x7ffff +80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080> +80000064: 18c0006f j 800001f0 + +80000068 : +80000068: 00000793 li a5,0 +8000006c: 00078863 beqz a5,8000007c +80000070: 80000537 lui a0,0x80000 +80000074: 39450513 addi a0,a0,916 # 80000394 <__stack_top+0x81000394> +80000078: 2d40006f j 8000034c +8000007c: 00008067 ret + +80000080 : +80000080: 0045a783 lw a5,4(a1) +80000084: 0105a703 lw a4,16(a1) +80000088: 02f50533 mul a0,a0,a5 +8000008c: 04078e63 beqz a5,800000e8 +80000090: ff010113 addi sp,sp,-16 +80000094: 00912223 sw s1,4(sp) +80000098: 00a784b3 add s1,a5,a0 +8000009c: 00249493 slli s1,s1,0x2 +800000a0: 00251513 slli a0,a0,0x2 +800000a4: 00812423 sw s0,8(sp) +800000a8: 00112623 sw ra,12(sp) +800000ac: 00e50433 add s0,a0,a4 +800000b0: 00e484b3 add s1,s1,a4 +800000b4: 00100693 li a3,1 +800000b8: 00100613 li a2,1 +800000bc: 00100593 li a1,1 +800000c0: 00100513 li a0,1 +800000c4: 274000ef jal ra,80000338 +800000c8: 00a42023 sw a0,0(s0) +800000cc: 00440413 addi s0,s0,4 +800000d0: fe8492e3 bne s1,s0,800000b4 +800000d4: 00c12083 lw ra,12(sp) +800000d8: 00812403 lw s0,8(sp) +800000dc: 00412483 lw s1,4(sp) +800000e0: 01010113 addi sp,sp,16 +800000e4: 00008067 ret +800000e8: 00008067 ret + +800000ec <_exit>: +800000ec: 00000513 li a0,0 +800000f0: 0005006b 0x5006b + +800000f4 : +800000f4: fc002573 csrr a0,0xfc0 +800000f8: 0005006b 0x5006b +800000fc: 00002197 auipc gp,0x2 +80000100: e3418193 addi gp,gp,-460 # 80001f30 <__global_pointer> +80000104: 7f000117 auipc sp,0x7f000 +80000108: efc10113 addi sp,sp,-260 # ff000000 <__stack_top> +8000010c: 40000593 li a1,1024 +80000110: cc102673 csrr a2,0xcc1 +80000114: 02c585b3 mul a1,a1,a2 +80000118: 40b10133 sub sp,sp,a1 +8000011c: cc3026f3 csrr a3,0xcc3 +80000120: 00068663 beqz a3,8000012c +80000124: 00000513 li a0,0 +80000128: 0005006b 0x5006b + +8000012c : +8000012c: 00008067 ret + +80000130 : +80000130: fe010113 addi sp,sp,-32 +80000134: 00112e23 sw ra,28(sp) +80000138: 00812c23 sw s0,24(sp) +8000013c: 00912a23 sw s1,20(sp) +80000140: 01212823 sw s2,16(sp) +80000144: 01312623 sw s3,12(sp) +80000148: fc0027f3 csrr a5,0xfc0 +8000014c: 0007806b 0x7806b +80000150: cc5026f3 csrr a3,0xcc5 +80000154: cc3029f3 csrr s3,0xcc3 +80000158: cc002773 csrr a4,0xcc0 +8000015c: fc002673 csrr a2,0xfc0 +80000160: 800027b7 lui a5,0x80002 +80000164: 00269693 slli a3,a3,0x2 +80000168: b5c78793 addi a5,a5,-1188 # 80001b5c <__stack_top+0x81001b5c> +8000016c: 00d787b3 add a5,a5,a3 +80000170: 0007a483 lw s1,0(a5) +80000174: 0104a403 lw s0,16(s1) +80000178: 00c4a683 lw a3,12(s1) +8000017c: 0089a933 slt s2,s3,s0 +80000180: 00040793 mv a5,s0 +80000184: 00d90933 add s2,s2,a3 +80000188: 03368433 mul s0,a3,s3 +8000018c: 00f9d463 bge s3,a5,80000194 +80000190: 00098793 mv a5,s3 +80000194: 00f40433 add s0,s0,a5 +80000198: 0084a683 lw a3,8(s1) +8000019c: 02c40433 mul s0,s0,a2 +800001a0: 02e907b3 mul a5,s2,a4 +800001a4: 00d40433 add s0,s0,a3 +800001a8: 00f40433 add s0,s0,a5 +800001ac: 00890933 add s2,s2,s0 +800001b0: 01245e63 bge s0,s2,800001cc +800001b4: 0004a783 lw a5,0(s1) +800001b8: 0044a583 lw a1,4(s1) +800001bc: 00040513 mv a0,s0 +800001c0: 00140413 addi s0,s0,1 +800001c4: 000780e7 jalr a5 +800001c8: fe8916e3 bne s2,s0,800001b4 +800001cc: 0019b993 seqz s3,s3 +800001d0: 0009806b 0x9806b +800001d4: 01c12083 lw ra,28(sp) +800001d8: 01812403 lw s0,24(sp) +800001dc: 01412483 lw s1,20(sp) +800001e0: 01012903 lw s2,16(sp) +800001e4: 00c12983 lw s3,12(sp) +800001e8: 02010113 addi sp,sp,32 +800001ec: 00008067 ret + +800001f0 : +800001f0: fc010113 addi sp,sp,-64 +800001f4: 02112e23 sw ra,60(sp) +800001f8: 02812c23 sw s0,56(sp) +800001fc: 02912a23 sw s1,52(sp) +80000200: 03212823 sw s2,48(sp) +80000204: 03312623 sw s3,44(sp) +80000208: fc2026f3 csrr a3,0xfc2 +8000020c: fc102873 csrr a6,0xfc1 +80000210: fc002473 csrr s0,0xfc0 +80000214: cc5027f3 csrr a5,0xcc5 +80000218: 01f00713 li a4,31 +8000021c: 0cf74463 blt a4,a5,800002e4 +80000220: 030408b3 mul a7,s0,a6 +80000224: 00100713 li a4,1 +80000228: 00a8d463 bge a7,a0,80000230 +8000022c: 03154733 div a4,a0,a7 +80000230: 0ce6c863 blt a3,a4,80000300 +80000234: 0ae7d863 bge a5,a4,800002e4 +80000238: fff68693 addi a3,a3,-1 +8000023c: 02e54333 div t1,a0,a4 +80000240: 00030893 mv a7,t1 +80000244: 00f69663 bne a3,a5,80000250 +80000248: 02e56533 rem a0,a0,a4 +8000024c: 006508b3 add a7,a0,t1 +80000250: 0288c4b3 div s1,a7,s0 +80000254: 0288e933 rem s2,a7,s0 +80000258: 0b04ca63 blt s1,a6,8000030c +8000025c: 00100693 li a3,1 +80000260: 0304c733 div a4,s1,a6 +80000264: 00070663 beqz a4,80000270 +80000268: 00070693 mv a3,a4 +8000026c: 0304e733 rem a4,s1,a6 +80000270: 800029b7 lui s3,0x80002 +80000274: b5c98993 addi s3,s3,-1188 # 80001b5c <__stack_top+0x81001b5c> +80000278: 00e12e23 sw a4,28(sp) +8000027c: 00c10713 addi a4,sp,12 +80000280: 00b12623 sw a1,12(sp) +80000284: 00c12823 sw a2,16(sp) +80000288: 00d12c23 sw a3,24(sp) +8000028c: 02f30333 mul t1,t1,a5 +80000290: 00279793 slli a5,a5,0x2 +80000294: 00f987b3 add a5,s3,a5 +80000298: 00e7a023 sw a4,0(a5) +8000029c: 00612a23 sw t1,20(sp) +800002a0: 06904c63 bgtz s1,80000318 +800002a4: 04090063 beqz s2,800002e4 +800002a8: 02848433 mul s0,s1,s0 +800002ac: 00812a23 sw s0,20(sp) +800002b0: 0009006b 0x9006b +800002b4: cc5027f3 csrr a5,0xcc5 +800002b8: cc202573 csrr a0,0xcc2 +800002bc: 00279793 slli a5,a5,0x2 +800002c0: 00f989b3 add s3,s3,a5 +800002c4: 0009a783 lw a5,0(s3) +800002c8: 0087a683 lw a3,8(a5) +800002cc: 0007a703 lw a4,0(a5) +800002d0: 0047a583 lw a1,4(a5) +800002d4: 00d50533 add a0,a0,a3 +800002d8: 000700e7 jalr a4 +800002dc: 00100793 li a5,1 +800002e0: 0007806b 0x7806b +800002e4: 03c12083 lw ra,60(sp) +800002e8: 03812403 lw s0,56(sp) +800002ec: 03412483 lw s1,52(sp) +800002f0: 03012903 lw s2,48(sp) +800002f4: 02c12983 lw s3,44(sp) +800002f8: 04010113 addi sp,sp,64 +800002fc: 00008067 ret +80000300: 00068713 mv a4,a3 +80000304: f2e7cae3 blt a5,a4,80000238 +80000308: fddff06f j 800002e4 +8000030c: 00000713 li a4,0 +80000310: 00100693 li a3,1 +80000314: f5dff06f j 80000270 +80000318: 00048713 mv a4,s1 +8000031c: 00985463 bge a6,s1,80000324 +80000320: 00080713 mv a4,a6 +80000324: 800007b7 lui a5,0x80000 +80000328: 13078793 addi a5,a5,304 # 80000130 <__stack_top+0x81000130> +8000032c: 00f7106b 0xf7106b +80000330: e01ff0ef jal ra,80000130 +80000334: f71ff06f j 800002a4 + +80000338 : +80000338: 00869693 slli a3,a3,0x8 +8000033c: 00a6e6b3 or a3,a3,a0 +80000340: 00000513 li a0,0 +80000344: 6ac5d56b 0x6ac5d56b +80000348: 00008067 ret + +8000034c : +8000034c: 00050593 mv a1,a0 +80000350: 00000693 li a3,0 +80000354: 00000613 li a2,0 +80000358: 00000513 li a0,0 +8000035c: 20c0006f j 80000568 <__register_exitproc> + +80000360 : +80000360: ff010113 addi sp,sp,-16 +80000364: 00000593 li a1,0 +80000368: 00812423 sw s0,8(sp) +8000036c: 00112623 sw ra,12(sp) +80000370: 00050413 mv s0,a0 +80000374: 290000ef jal ra,80000604 <__call_exitprocs> +80000378: 800027b7 lui a5,0x80002 +8000037c: b587a503 lw a0,-1192(a5) # 80001b58 <__stack_top+0x81001b58> +80000380: 03c52783 lw a5,60(a0) +80000384: 00078463 beqz a5,8000038c +80000388: 000780e7 jalr a5 +8000038c: 00040513 mv a0,s0 +80000390: d5dff0ef jal ra,800000ec <_exit> + +80000394 <__libc_fini_array>: +80000394: ff010113 addi sp,sp,-16 +80000398: 00812423 sw s0,8(sp) +8000039c: 800017b7 lui a5,0x80001 +800003a0: 80001437 lui s0,0x80001 +800003a4: 72c40413 addi s0,s0,1836 # 8000172c <__stack_top+0x8100172c> +800003a8: 72c78793 addi a5,a5,1836 # 8000172c <__stack_top+0x8100172c> +800003ac: 408787b3 sub a5,a5,s0 +800003b0: 00912223 sw s1,4(sp) +800003b4: 00112623 sw ra,12(sp) +800003b8: 4027d493 srai s1,a5,0x2 +800003bc: 02048063 beqz s1,800003dc <__libc_fini_array+0x48> +800003c0: ffc78793 addi a5,a5,-4 +800003c4: 00878433 add s0,a5,s0 +800003c8: 00042783 lw a5,0(s0) +800003cc: fff48493 addi s1,s1,-1 +800003d0: ffc40413 addi s0,s0,-4 +800003d4: 000780e7 jalr a5 +800003d8: fe0498e3 bnez s1,800003c8 <__libc_fini_array+0x34> +800003dc: 00c12083 lw ra,12(sp) +800003e0: 00812403 lw s0,8(sp) +800003e4: 00412483 lw s1,4(sp) +800003e8: 01010113 addi sp,sp,16 +800003ec: 00008067 ret + +800003f0 <__libc_init_array>: +800003f0: ff010113 addi sp,sp,-16 +800003f4: 00812423 sw s0,8(sp) +800003f8: 01212023 sw s2,0(sp) +800003fc: 80001437 lui s0,0x80001 +80000400: 80001937 lui s2,0x80001 +80000404: 72840793 addi a5,s0,1832 # 80001728 <__stack_top+0x81001728> +80000408: 72890913 addi s2,s2,1832 # 80001728 <__stack_top+0x81001728> +8000040c: 40f90933 sub s2,s2,a5 +80000410: 00112623 sw ra,12(sp) +80000414: 00912223 sw s1,4(sp) +80000418: 40295913 srai s2,s2,0x2 +8000041c: 02090063 beqz s2,8000043c <__libc_init_array+0x4c> +80000420: 72840413 addi s0,s0,1832 +80000424: 00000493 li s1,0 +80000428: 00042783 lw a5,0(s0) +8000042c: 00148493 addi s1,s1,1 +80000430: 00440413 addi s0,s0,4 +80000434: 000780e7 jalr a5 +80000438: fe9918e3 bne s2,s1,80000428 <__libc_init_array+0x38> +8000043c: 80001437 lui s0,0x80001 +80000440: 80001937 lui s2,0x80001 +80000444: 72840793 addi a5,s0,1832 # 80001728 <__stack_top+0x81001728> +80000448: 72c90913 addi s2,s2,1836 # 8000172c <__stack_top+0x8100172c> +8000044c: 40f90933 sub s2,s2,a5 +80000450: 40295913 srai s2,s2,0x2 +80000454: 02090063 beqz s2,80000474 <__libc_init_array+0x84> +80000458: 72840413 addi s0,s0,1832 +8000045c: 00000493 li s1,0 +80000460: 00042783 lw a5,0(s0) +80000464: 00148493 addi s1,s1,1 +80000468: 00440413 addi s0,s0,4 +8000046c: 000780e7 jalr a5 +80000470: fe9918e3 bne s2,s1,80000460 <__libc_init_array+0x70> +80000474: 00c12083 lw ra,12(sp) +80000478: 00812403 lw s0,8(sp) +8000047c: 00412483 lw s1,4(sp) +80000480: 00012903 lw s2,0(sp) +80000484: 01010113 addi sp,sp,16 +80000488: 00008067 ret + +8000048c : +8000048c: 00f00313 li t1,15 +80000490: 00050713 mv a4,a0 +80000494: 02c37e63 bgeu t1,a2,800004d0 +80000498: 00f77793 andi a5,a4,15 +8000049c: 0a079063 bnez a5,8000053c +800004a0: 08059263 bnez a1,80000524 +800004a4: ff067693 andi a3,a2,-16 +800004a8: 00f67613 andi a2,a2,15 +800004ac: 00e686b3 add a3,a3,a4 +800004b0: 00b72023 sw a1,0(a4) +800004b4: 00b72223 sw a1,4(a4) +800004b8: 00b72423 sw a1,8(a4) +800004bc: 00b72623 sw a1,12(a4) +800004c0: 01070713 addi a4,a4,16 +800004c4: fed766e3 bltu a4,a3,800004b0 +800004c8: 00061463 bnez a2,800004d0 +800004cc: 00008067 ret +800004d0: 40c306b3 sub a3,t1,a2 +800004d4: 00269693 slli a3,a3,0x2 +800004d8: 00000297 auipc t0,0x0 +800004dc: 005686b3 add a3,a3,t0 +800004e0: 00c68067 jr 12(a3) +800004e4: 00b70723 sb a1,14(a4) +800004e8: 00b706a3 sb a1,13(a4) +800004ec: 00b70623 sb a1,12(a4) +800004f0: 00b705a3 sb a1,11(a4) +800004f4: 00b70523 sb a1,10(a4) +800004f8: 00b704a3 sb a1,9(a4) +800004fc: 00b70423 sb a1,8(a4) +80000500: 00b703a3 sb a1,7(a4) +80000504: 00b70323 sb a1,6(a4) +80000508: 00b702a3 sb a1,5(a4) +8000050c: 00b70223 sb a1,4(a4) +80000510: 00b701a3 sb a1,3(a4) +80000514: 00b70123 sb a1,2(a4) +80000518: 00b700a3 sb a1,1(a4) +8000051c: 00b70023 sb a1,0(a4) +80000520: 00008067 ret +80000524: 0ff5f593 andi a1,a1,255 +80000528: 00859693 slli a3,a1,0x8 +8000052c: 00d5e5b3 or a1,a1,a3 +80000530: 01059693 slli a3,a1,0x10 +80000534: 00d5e5b3 or a1,a1,a3 +80000538: f6dff06f j 800004a4 +8000053c: 00279693 slli a3,a5,0x2 +80000540: 00000297 auipc t0,0x0 +80000544: 005686b3 add a3,a3,t0 +80000548: 00008293 mv t0,ra +8000054c: fa0680e7 jalr -96(a3) +80000550: 00028093 mv ra,t0 +80000554: ff078793 addi a5,a5,-16 +80000558: 40f70733 sub a4,a4,a5 +8000055c: 00f60633 add a2,a2,a5 +80000560: f6c378e3 bgeu t1,a2,800004d0 +80000564: f3dff06f j 800004a0 + +80000568 <__register_exitproc>: +80000568: 800027b7 lui a5,0x80002 +8000056c: b587a703 lw a4,-1192(a5) # 80001b58 <__stack_top+0x81001b58> +80000570: 14872783 lw a5,328(a4) +80000574: 04078c63 beqz a5,800005cc <__register_exitproc+0x64> +80000578: 0047a703 lw a4,4(a5) +8000057c: 01f00813 li a6,31 +80000580: 06e84e63 blt a6,a4,800005fc <__register_exitproc+0x94> +80000584: 00271813 slli a6,a4,0x2 +80000588: 02050663 beqz a0,800005b4 <__register_exitproc+0x4c> +8000058c: 01078333 add t1,a5,a6 +80000590: 08c32423 sw a2,136(t1) +80000594: 1887a883 lw a7,392(a5) +80000598: 00100613 li a2,1 +8000059c: 00e61633 sll a2,a2,a4 +800005a0: 00c8e8b3 or a7,a7,a2 +800005a4: 1917a423 sw a7,392(a5) +800005a8: 10d32423 sw a3,264(t1) +800005ac: 00200693 li a3,2 +800005b0: 02d50463 beq a0,a3,800005d8 <__register_exitproc+0x70> +800005b4: 00170713 addi a4,a4,1 +800005b8: 00e7a223 sw a4,4(a5) +800005bc: 010787b3 add a5,a5,a6 +800005c0: 00b7a423 sw a1,8(a5) +800005c4: 00000513 li a0,0 +800005c8: 00008067 ret +800005cc: 14c70793 addi a5,a4,332 +800005d0: 14f72423 sw a5,328(a4) +800005d4: fa5ff06f j 80000578 <__register_exitproc+0x10> +800005d8: 18c7a683 lw a3,396(a5) +800005dc: 00170713 addi a4,a4,1 +800005e0: 00e7a223 sw a4,4(a5) +800005e4: 00c6e633 or a2,a3,a2 +800005e8: 18c7a623 sw a2,396(a5) +800005ec: 010787b3 add a5,a5,a6 +800005f0: 00b7a423 sw a1,8(a5) +800005f4: 00000513 li a0,0 +800005f8: 00008067 ret +800005fc: fff00513 li a0,-1 +80000600: 00008067 ret + +80000604 <__call_exitprocs>: +80000604: fd010113 addi sp,sp,-48 +80000608: 800027b7 lui a5,0x80002 +8000060c: 01412c23 sw s4,24(sp) +80000610: b587aa03 lw s4,-1192(a5) # 80001b58 <__stack_top+0x81001b58> +80000614: 03212023 sw s2,32(sp) +80000618: 02112623 sw ra,44(sp) +8000061c: 148a2903 lw s2,328(s4) +80000620: 02812423 sw s0,40(sp) +80000624: 02912223 sw s1,36(sp) +80000628: 01312e23 sw s3,28(sp) +8000062c: 01512a23 sw s5,20(sp) +80000630: 01612823 sw s6,16(sp) +80000634: 01712623 sw s7,12(sp) +80000638: 01812423 sw s8,8(sp) +8000063c: 04090063 beqz s2,8000067c <__call_exitprocs+0x78> +80000640: 00050b13 mv s6,a0 +80000644: 00058b93 mv s7,a1 +80000648: 00100a93 li s5,1 +8000064c: fff00993 li s3,-1 +80000650: 00492483 lw s1,4(s2) +80000654: fff48413 addi s0,s1,-1 +80000658: 02044263 bltz s0,8000067c <__call_exitprocs+0x78> +8000065c: 00249493 slli s1,s1,0x2 +80000660: 009904b3 add s1,s2,s1 +80000664: 040b8463 beqz s7,800006ac <__call_exitprocs+0xa8> +80000668: 1044a783 lw a5,260(s1) +8000066c: 05778063 beq a5,s7,800006ac <__call_exitprocs+0xa8> +80000670: fff40413 addi s0,s0,-1 +80000674: ffc48493 addi s1,s1,-4 +80000678: ff3416e3 bne s0,s3,80000664 <__call_exitprocs+0x60> +8000067c: 02c12083 lw ra,44(sp) +80000680: 02812403 lw s0,40(sp) +80000684: 02412483 lw s1,36(sp) +80000688: 02012903 lw s2,32(sp) +8000068c: 01c12983 lw s3,28(sp) +80000690: 01812a03 lw s4,24(sp) +80000694: 01412a83 lw s5,20(sp) +80000698: 01012b03 lw s6,16(sp) +8000069c: 00c12b83 lw s7,12(sp) +800006a0: 00812c03 lw s8,8(sp) +800006a4: 03010113 addi sp,sp,48 +800006a8: 00008067 ret +800006ac: 00492783 lw a5,4(s2) +800006b0: 0044a683 lw a3,4(s1) +800006b4: fff78793 addi a5,a5,-1 +800006b8: 04878e63 beq a5,s0,80000714 <__call_exitprocs+0x110> +800006bc: 0004a223 sw zero,4(s1) +800006c0: fa0688e3 beqz a3,80000670 <__call_exitprocs+0x6c> +800006c4: 18892783 lw a5,392(s2) +800006c8: 008a9733 sll a4,s5,s0 +800006cc: 00492c03 lw s8,4(s2) +800006d0: 00f777b3 and a5,a4,a5 +800006d4: 02079263 bnez a5,800006f8 <__call_exitprocs+0xf4> +800006d8: 000680e7 jalr a3 +800006dc: 00492703 lw a4,4(s2) +800006e0: 148a2783 lw a5,328(s4) +800006e4: 01871463 bne a4,s8,800006ec <__call_exitprocs+0xe8> +800006e8: f8f904e3 beq s2,a5,80000670 <__call_exitprocs+0x6c> +800006ec: f80788e3 beqz a5,8000067c <__call_exitprocs+0x78> +800006f0: 00078913 mv s2,a5 +800006f4: f5dff06f j 80000650 <__call_exitprocs+0x4c> +800006f8: 18c92783 lw a5,396(s2) +800006fc: 0844a583 lw a1,132(s1) +80000700: 00f77733 and a4,a4,a5 +80000704: 00071c63 bnez a4,8000071c <__call_exitprocs+0x118> +80000708: 000b0513 mv a0,s6 +8000070c: 000680e7 jalr a3 +80000710: fcdff06f j 800006dc <__call_exitprocs+0xd8> +80000714: 00892223 sw s0,4(s2) +80000718: fa9ff06f j 800006c0 <__call_exitprocs+0xbc> +8000071c: 00058513 mv a0,a1 +80000720: 000680e7 jalr a3 +80000724: fb9ff06f j 800006dc <__call_exitprocs+0xd8> + +Disassembly of section .init_array: + +80001728 <__init_array_start>: +80001728: 0068 addi a0,sp,12 +8000172a: 8000 0x8000 + +Disassembly of section .data: + +80001730 : +80001730: 0000 unimp +80001732: 0000 unimp +80001734: 1a1c addi a5,sp,304 +80001736: 8000 0x8000 +80001738: 1a84 addi s1,sp,368 +8000173a: 8000 0x8000 +8000173c: 1aec addi a1,sp,380 +8000173e: 8000 0x8000 + ... +800017d8: 0001 nop +800017da: 0000 unimp +800017dc: 0000 unimp +800017de: 0000 unimp +800017e0: 330e fld ft6,224(sp) +800017e2: abcd j 80001dd4 <__BSS_END__+0x1f8> +800017e4: 1234 addi a3,sp,296 +800017e6: e66d bnez a2,800018d0 +800017e8: deec sw a1,124(a3) +800017ea: 0005 c.nop 1 +800017ec: 0000000b 0xb + ... + +Disassembly of section .sdata: + +80001b58 <_global_impure_ptr>: +80001b58: 1730 addi a2,sp,936 +80001b5a: 8000 0x8000 + +Disassembly of section .bss: + +80001b5c : + ... + +Disassembly of section .comment: + +00000000 <.comment>: + 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm + 4: 2820 fld fs0,80(s0) + 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm + a: 3920 fld fs0,112(a0) + c: 322e fld ft4,232(sp) + e: 302e fld ft0,232(sp) + ... + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2541 jal 680 <__stack_size+0x280> + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14> + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 + 12: 7205 lui tp,0xfffe1 + 14: 3376 fld ft6,376(sp) + 16: 6932 flw fs2,12(sp) + 18: 7032 flw ft0,44(sp) + 1a: 5f30 lw a2,120(a4) + 1c: 326d jal fffff9c6 <__stack_top+0xfff9c6> + 1e: 3070 fld fa2,224(s0) + 20: 665f 7032 0030 0x307032665f diff --git a/driver/tests/tex_demo/kernel.elf b/driver/tests/tex_demo/kernel.elf new file mode 100755 index 00000000..b2e5502f Binary files /dev/null and b/driver/tests/tex_demo/kernel.elf differ diff --git a/hw/VX_config.h b/hw/VX_config.h new file mode 100644 index 00000000..f967463e --- /dev/null +++ b/hw/VX_config.h @@ -0,0 +1,412 @@ +// auto-generated by gen_config.py. DO NOT EDIT +// Generated at 2021-03-12 17:51:37.263369 + +#ifndef VX_USER_CONFIG +#define VX_USER_CONFIG + + +#endif +// auto-generated by gen_config.py. DO NOT EDIT +// Generated at 2021-03-12 17:51:37.265050 + +// Translated from VX_config.vh: + +#ifndef VX_CONFIG +#define VX_CONFIG + + + +#ifndef NUM_CLUSTERS +#define NUM_CLUSTERS 1 +#endif + +#ifndef NUM_CORES +#define NUM_CORES 1 +#endif + +#ifndef NUM_WARPS +#define NUM_WARPS 4 +#endif + +#ifndef NUM_THREADS +#define NUM_THREADS 4 +#endif + +#ifndef NUM_BARRIERS +#define NUM_BARRIERS 4 +#endif + +#ifndef L2_ENABLE +#define L2_ENABLE 0 +#endif + +#ifndef L3_ENABLE +#define L3_ENABLE 0 +#endif + +#ifndef SM_ENABLE +#define SM_ENABLE 1 +#endif + +#ifndef GLOBAL_BLOCK_SIZE +#define GLOBAL_BLOCK_SIZE 64 +#endif + +#ifndef L1_BLOCK_SIZE +#define L1_BLOCK_SIZE (NUM_THREADS * 4) +#endif + +#ifndef STARTUP_ADDR +#define STARTUP_ADDR 0x80000000 +#endif + +#ifndef IO_BUS_BASE_ADDR +#define IO_BUS_BASE_ADDR 0xFF000000 +#endif + +#ifndef SHARED_MEM_BASE_ADDR +#define SHARED_MEM_BASE_ADDR IO_BUS_BASE_ADDR +#endif + +#ifndef SHARED_MEM_BASE_ADDR_ALIGN +#define SHARED_MEM_BASE_ADDR_ALIGN 64 +#endif + +#ifndef IO_BUS_ADDR_COUT +#define IO_BUS_ADDR_COUT 0xFFFFFFFC +#endif + +#ifndef FRAME_BUFFER_BASE_ADDR +#define FRAME_BUFFER_BASE_ADDR 0xFF000000 +#endif + +#ifndef FRAME_BUFFER_WIDTH +#define FRAME_BUFFER_WIDTH 1920 +#endif + +#ifndef FRAME_BUFFER_HEIGHT +#define FRAME_BUFFER_HEIGHT 1080 +#endif + +#define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT) + +#ifndef EXT_M_DISABLE +#define EXT_M_ENABLE +#endif + +#ifndef EXT_F_DISABLE +#define EXT_F_ENABLE +#endif + +// Device identification +#define VENDOR_ID 0 +#define ARCHITECTURE_ID 0 +#define IMPLEMENTATION_ID 0 + +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LATENCY_IMUL +#define LATENCY_IMUL 3 +#endif + +#ifndef LATENCY_FNCP +#define LATENCY_FNCP 2 +#endif + +#ifndef LATENCY_FMA +#define LATENCY_FMA 4 +#endif + +#ifndef LATENCY_FDIV +#ifdef ALTERA_S10 +#define LATENCY_FDIV 34 +#else +#define LATENCY_FDIV 15 +#endif +#endif + +#ifndef LATENCY_FSQRT +#ifdef ALTERA_S10 +#define LATENCY_FSQRT 25 +#else +#define LATENCY_FSQRT 10 +#endif +#endif + +#ifndef LATENCY_FDIVSQRT +#define LATENCY_FDIVSQRT 32 +#endif + +#ifndef LATENCY_FCVT +#define LATENCY_FCVT 4 +#endif + +// CSR Addresses ////////////////////////////////////////////////////////////// + +// User Floating-Point CSRs +#define CSR_FFLAGS 0x001 +#define CSR_FRM 0x002 +#define CSR_FCSR 0x003 + +#define CSR_SATP 0x180 + +#define CSR_PMPCFG0 0x3A0 +#define CSR_PMPADDR0 0x3B0 + +#define CSR_MSTATUS 0x300 +#define CSR_MISA 0x301 +#define CSR_MEDELEG 0x302 +#define CSR_MIDELEG 0x303 +#define CSR_MIE 0x304 +#define CSR_MTVEC 0x305 + +#define CSR_MEPC 0x341 + +// Machine Counter/Timers +#define CSR_CYCLE 0xC00 +#define CSR_CYCLE_H 0xC80 +#define CSR_INSTRET 0xC02 +#define CSR_INSTRET_H 0xC82 + +// Machine Performance-monitoring counters +// PERF: pipeline +#define CSR_MPM_IBUF_ST 0xB03 +#define CSR_MPM_IBUF_ST_H 0xB83 +#define CSR_MPM_SCRB_ST 0xB04 +#define CSR_MPM_SCRB_ST_H 0xB84 +#define CSR_MPM_ALU_ST 0xB05 +#define CSR_MPM_ALU_ST_H 0xB85 +#define CSR_MPM_LSU_ST 0xB06 +#define CSR_MPM_LSU_ST_H 0xB86 +#define CSR_MPM_CSR_ST 0xB07 +#define CSR_MPM_CSR_ST_H 0xB87 +#define CSR_MPM_FPU_ST 0xB08 +#define CSR_MPM_FPU_ST_H 0xB88 +#define CSR_MPM_GPU_ST 0xB09 +#define CSR_MPM_GPU_ST_H 0xB89 +// PERF: icache +#define CSR_MPM_ICACHE_READS 0xB0A // total reads +#define CSR_MPM_ICACHE_READS_H 0xB8A +#define CSR_MPM_ICACHE_MISS_R 0xB0B // total misses +#define CSR_MPM_ICACHE_MISS_R_H 0xB8B +#define CSR_MPM_ICACHE_PIPE_ST 0xB0C // pipeline stalls +#define CSR_MPM_ICACHE_PIPE_ST_H 0xB8C +#define CSR_MPM_ICACHE_CRSP_ST 0xB0D // core response stalls +#define CSR_MPM_ICACHE_CRSP_ST_H 0xB8D +// PERF: dcache +#define CSR_MPM_DCACHE_READS 0xB0E // total reads +#define CSR_MPM_DCACHE_READS_H 0xB8E +#define CSR_MPM_DCACHE_WRITES 0xB0F // total writes +#define CSR_MPM_DCACHE_WRITES_H 0xB8F +#define CSR_MPM_DCACHE_MISS_R 0xB10 // read misses +#define CSR_MPM_DCACHE_MISS_R_H 0xB90 +#define CSR_MPM_DCACHE_MISS_W 0xB11 // write misses +#define CSR_MPM_DCACHE_MISS_W_H 0xB91 +#define CSR_MPM_DCACHE_BANK_ST 0xB12 // bank conflicts stalls +#define CSR_MPM_DCACHE_BANK_ST_H 0xB92 +#define CSR_MPM_DCACHE_MSHR_ST 0xB13 // MSHR stalls +#define CSR_MPM_DCACHE_MSHR_ST_H 0xB93 +#define CSR_MPM_DCACHE_PIPE_ST 0xB14 // pipeline stalls +#define CSR_MPM_DCACHE_PIPE_ST_H 0xB94 +#define CSR_MPM_DCACHE_CRSP_ST 0xB15 // core response stalls +#define CSR_MPM_DCACHE_CRSP_ST_H 0xB95 +// PERF: smem +#define CSR_MPM_SMEM_READS 0xB16 // total reads +#define CSR_MPM_SMEM_READS_H 0xB96 +#define CSR_MPM_SMEM_WRITES 0xB17 // total writes +#define CSR_MPM_SMEM_WRITES_H 0xB97 +#define CSR_MPM_SMEM_BANK_ST 0xB18 // bank conflicts stalls +#define CSR_MPM_SMEM_BANK_ST_H 0xB98 +// PERF: memory +#define CSR_MPM_DRAM_READS 0xB19 // dram reads +#define CSR_MPM_DRAM_READS_H 0xB99 +#define CSR_MPM_DRAM_WRITES 0xB1A // dram writes +#define CSR_MPM_DRAM_WRITES_H 0xB9A +#define CSR_MPM_DRAM_ST 0xB1B // dram request stalls +#define CSR_MPM_DRAM_ST_H 0xB9B +#define CSR_MPM_DRAM_LAT 0xB1C // dram latency (total) +#define CSR_MPM_DRAM_LAT_H 0xB9C + +// Machine Information Registers +#define CSR_MVENDORID 0xF11 +#define CSR_MARCHID 0xF12 +#define CSR_MIMPID 0xF13 +#define CSR_MHARTID 0xF14 + +// User SIMT CSRs +#define CSR_WTID 0xCC0 +#define CSR_LTID 0xCC1 +#define CSR_GTID 0xCC2 +#define CSR_LWID 0xCC3 +#define CSR_GWID CSR_MHARTID +#define CSR_GCID 0xCC5 + +// Machine SIMT CSRs +#define CSR_NT 0xFC0 +#define CSR_NW 0xFC1 +#define CSR_NC 0xFC2 + +// Pipeline Queues //////////////////////////////////////////////////////////// + +// Size of LSU Request Queue +#ifndef LSUQ_SIZE +#define LSUQ_SIZE 8 +#endif + +// Size of FPU Request Queue +#ifndef FPUQ_SIZE +#define FPUQ_SIZE 8 +#endif + +// Icache Configurable Knobs ////////////////////////////////////////////////// + +// Size of cache in bytes +#ifndef ICACHE_SIZE +#define ICACHE_SIZE 16384 +#endif + +// Core Request Queue Size +#ifndef ICREQ_SIZE +#define ICREQ_SIZE 4 +#endif + +// Miss Handling Register Size +#ifndef IMSHR_SIZE +#define IMSHR_SIZE NUM_WARPS +#endif + +// DRAM Request Queue Size +#ifndef IDREQ_SIZE +#define IDREQ_SIZE 4 +#endif + +// DRAM Response Queue Size +#ifndef IDRSQ_SIZE +#define IDRSQ_SIZE 4 +#endif + +// Dcache Configurable Knobs ////////////////////////////////////////////////// + +// Size of cache in bytes +#ifndef DCACHE_SIZE +#define DCACHE_SIZE 16384 +#endif + +// Number of banks +#ifndef DNUM_BANKS +#define DNUM_BANKS NUM_THREADS +#endif + +// Number of bank ports +#ifndef DNUM_PORTS +#define DNUM_PORTS 1 +#endif + +// Core Request Queue Size +#ifndef DCREQ_SIZE +#define DCREQ_SIZE 4 +#endif + +// Miss Handling Register Size +#ifndef DMSHR_SIZE +#define DMSHR_SIZE LSUQ_SIZE +#endif + +// DRAM Request Queue Size +#ifndef DDREQ_SIZE +#define DDREQ_SIZE 4 +#endif + +// DRAM Response Queue Size +#ifndef DDRSQ_SIZE +#define DDRSQ_SIZE MAX(4, (DNUM_BANKS * 2)) +#endif + +// SM Configurable Knobs ////////////////////////////////////////////////////// + +// per thread stack size +#ifndef STACK_SIZE +#define STACK_SIZE 1024 +#endif + +// Size of cache in bytes +#ifndef SMEM_SIZE +#define SMEM_SIZE (STACK_SIZE * NUM_WARPS * NUM_THREADS) +#endif + +// Number of banks +#ifndef SNUM_BANKS +#define SNUM_BANKS NUM_THREADS +#endif + +// Core Request Queue Size +#ifndef SCREQ_SIZE +#define SCREQ_SIZE 4 +#endif + +// L2cache Configurable Knobs ///////////////////////////////////////////////// + +// Size of cache in bytes +#ifndef L2CACHE_SIZE +#define L2CACHE_SIZE 65536 +#endif + +// Number of banks +#ifndef L2NUM_BANKS +#define L2NUM_BANKS MIN(NUM_CORES, 4) +#endif + +// Core Request Queue Size +#ifndef L2CREQ_SIZE +#define L2CREQ_SIZE 4 +#endif + +// Miss Handling Register Size +#ifndef L2MSHR_SIZE +#define L2MSHR_SIZE 16 +#endif + +// DRAM Request Queue Size +#ifndef L2DREQ_SIZE +#define L2DREQ_SIZE 4 +#endif + +// DRAM Response Queue Size +#ifndef L2DRSQ_SIZE +#define L2DRSQ_SIZE MAX(4, (L2NUM_BANKS * 2)) +#endif + +// L3cache Configurable Knobs ///////////////////////////////////////////////// + +// Size of cache in bytes +#ifndef L3CACHE_SIZE +#define L3CACHE_SIZE 131072 +#endif + +// Number of banks +#ifndef L3NUM_BANKS +#define L3NUM_BANKS MIN(NUM_CLUSTERS, 4) +#endif + +// Core Request Queue Size +#ifndef L3CREQ_SIZE +#define L3CREQ_SIZE 4 +#endif + +// Miss Handling Register Size +#ifndef L3MSHR_SIZE +#define L3MSHR_SIZE 16 +#endif + +// DRAM Request Queue Size +#ifndef L3DREQ_SIZE +#define L3DREQ_SIZE 4 +#endif + +// DRAM Response Queue Size +#ifndef L3DRSQ_SIZE +#define L3DRSQ_SIZE MAX(4, (L3NUM_BANKS * 2)) +#endif + +#endif + diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index d8a8bb59..b4b99c4c 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -73,13 +73,14 @@ module VX_commit #( .ld_commit_if (ld_commit_if), .csr_commit_if (csr_commit_if), .fpu_commit_if (fpu_commit_if), + .gpu_commit_if (gpu_commit_if), .writeback_if (writeback_if) ); - // store and gpu commits don't writeback + // store doesn't writeback assign st_commit_if.ready = 1'b1; - assign gpu_commit_if.ready = 1'b1; + // assign gpu_commit_if.ready = 1'b1; `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index f68ec116..7ddc8000 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -358,6 +358,13 @@ module VX_decode #( use_rs2 = 1; is_wstall = 1; end + 3'h5: begin + op_type = `OP_BITS'(`GPU_TEX); + use_rd = 1; + use_rs1 = 1; + use_rs2 = 1; + use_rs3 = 1; + end default:; endcase end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 66afd80e..db1be050 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -52,6 +52,8 @@ `define INST_GPU 7'b1101011 +`define INST_TEX 7'b0101011 + /////////////////////////////////////////////////////////////////////////////// `define FRM_RNE 3'b000 // round to nearest even @@ -182,6 +184,7 @@ `define GPU_SPLIT 3'h2 `define GPU_JOIN 3'h3 `define GPU_BAR 3'h4 +`define GPU_TEX 3'h5 `define GPU_OTHER 3'h7 `define GPU_BITS 3 `define GPU_OP(x) x[`GPU_BITS-1:0] @@ -381,6 +384,17 @@ `define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2)) +////////////////////////// Texture Unit Configurable Knobs ////////////////////////////// +`define MADDRW 8 +`define MAXWTW 8 +`define MAXHTW 8 +`define MAXFTW 8 +`define MAXFMW 8 +`define MAXAMW 8 +`define TAGW 8 +`define DATAW 32 +//////////////////////////////////////////////////////////////////////////////////////// + `include "VX_types.vh" `endif diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 9d0615bb..dadf50c7 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -25,10 +25,14 @@ module VX_gpu_unit #( gpu_barrier_t barrier; gpu_split_t split; + VX_tex_req_if tex_req_if; + VX_tex_rsp_if tex_rsp_if; + wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN); wire is_tmc = (gpu_req_if.op_type == `GPU_TMC); wire is_split = (gpu_req_if.op_type == `GPU_SPLIT); wire is_bar = (gpu_req_if.op_type == `GPU_BAR); + wire is_tex = (gpu_req_if.op_type == `GPU_TEX); // tmc @@ -41,7 +45,7 @@ module VX_gpu_unit #( // wspawn - wire [31:0] wspawn_pc = gpu_req_if.rs2_data; + wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0]; wire [`NUM_WARPS-1:0] wspawn_wmask; for (genvar i = 0; i < `NUM_WARPS; i++) begin assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]); @@ -71,21 +75,48 @@ module VX_gpu_unit #( assign barrier.valid = is_bar; assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0]; - assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1); + assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1); + + // texture + assign tex_req_if.valid = is_tex; + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + assign tex_req_if.u[i] = gpu_req_if.rs1_data[i]; + assign tex_req_if.v[i] = gpu_req_if.rs2_data[i]; + assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i]; + end + + `UNUSED_VAR (tex_req_if.u) + `UNUSED_VAR (tex_req_if.v) + `UNUSED_VAR (tex_req_if.valid) + `UNUSED_VAR (tex_req_if.lod_t) + + + VX_tex_unit #( + .CORE_ID(CORE_ID) + ) texture_unit ( + .clk (clk), + .reset (reset), + + .tex_req_if (tex_req_if), + .tex_rsp_if (tex_rsp_if) + ); + + assign gpu_req_if.valid = is_tex; + assign gpu_req_if.wb = tex_rsp_if.ready; // output - wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (!stall), - .data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), - .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) + .data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), + .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) ); assign gpu_commit_if.eop = 1'b1; @@ -101,7 +132,7 @@ module VX_gpu_unit #( `SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask); `SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type); `SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]); - `SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data); + `SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]); `SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid); `SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid); `SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc); diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index 36164b94..68b5123d 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -111,14 +111,14 @@ module VX_instr_demux ( wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU); VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits ) gpu_buffer ( .clk (clk), .reset (reset), .valid_in (gpu_req_valid), .ready_in (gpu_req_ready), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), - .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), + .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}), .valid_out (gpu_req_if.valid), .ready_out (gpu_req_if.ready) ); diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 4dee992f..985a3ebf 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -11,6 +11,7 @@ module VX_writeback #( VX_commit_if ld_commit_if, VX_commit_if csr_commit_if, VX_commit_if fpu_commit_if, + VX_commit_if gpu_commit_if, // outputs VX_writeback_if writeback_if @@ -22,6 +23,7 @@ module VX_writeback #( wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb; wire csr_valid = csr_commit_if.valid && csr_commit_if.wb; wire alu_valid = alu_commit_if.valid && alu_commit_if.wb; + wire gpu_valid = gpu_commit_if.valid && gpu_commit_if.wb; wire wb_valid; wire [`NW_BITS-1:0] wb_wid; @@ -34,37 +36,44 @@ module VX_writeback #( assign wb_valid = ld_valid | fpu_valid | csr_valid | - alu_valid; + alu_valid | + gpu_valid; assign wb_wid = ld_valid ? ld_commit_if.wid : fpu_valid ? fpu_commit_if.wid : csr_valid ? csr_commit_if.wid : - /*alu_valid ?*/ alu_commit_if.wid; + alu_valid ? alu_commit_if.wid : + /*gpu_valid*/ gpu_commit_if.wid; assign wb_PC = ld_valid ? ld_commit_if.PC : fpu_valid ? fpu_commit_if.PC : csr_valid ? csr_commit_if.PC : - /*alu_valid ?*/ alu_commit_if.PC; - + alu_valid ? alu_commit_if.PC : + /*gpu_valid*/ gpu_commit_if.PC; + assign wb_tmask = ld_valid ? ld_commit_if.tmask : fpu_valid ? fpu_commit_if.tmask : csr_valid ? csr_commit_if.tmask : - /*alu_valid ?*/ alu_commit_if.tmask; + alu_valid ? alu_commit_if.tmask : + /*gpu_valid*/ gpu_commit_if.tmask; assign wb_rd = ld_valid ? ld_commit_if.rd : fpu_valid ? fpu_commit_if.rd : csr_valid ? csr_commit_if.rd : - /*alu_valid ?*/ alu_commit_if.rd; + alu_valid ? alu_commit_if.rd : + /*gpu_valid*/ gpu_commit_if.rd; assign wb_data = ld_valid ? ld_commit_if.data : fpu_valid ? fpu_commit_if.data : csr_valid ? csr_commit_if.data : - /*alu_valid ?*/ alu_commit_if.data; + alu_valid ? alu_commit_if.data : + /*gpu_valid*/ gpu_commit_if.data; assign wb_eop = ld_valid ? ld_commit_if.eop : fpu_valid ? fpu_commit_if.eop : csr_valid ? csr_commit_if.eop : - /*alu_valid ?*/ alu_commit_if.eop; + alu_valid ? alu_commit_if.eop : + /*gpu_valid*/ gpu_commit_if.eop; wire stall = ~writeback_if.ready && writeback_if.valid; @@ -82,7 +91,9 @@ module VX_writeback #( assign ld_commit_if.ready = !stall; assign fpu_commit_if.ready = !stall && !ld_valid; assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid; - assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid; + assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid; + // if not TEX instruction, no writeback and commit is ready + assign gpu_commit_if.ready = (!stall && !ld_valid && !fpu_valid && !csr_valid && !alu_valid) || !gpu_commit_if.wb ; // special workaround to get RISC-V tests Pass/Fail status reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */; diff --git a/hw/rtl/interfaces/VX_gpu_req_if.v b/hw/rtl/interfaces/VX_gpu_req_if.v index 5f024ae9..8ce8663d 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_req_if.v @@ -13,7 +13,8 @@ interface VX_gpu_req_if(); wire [31:0] next_PC; wire [`GPU_BITS-1:0] op_type; wire [`NUM_THREADS-1:0][31:0] rs1_data; - wire [31:0] rs2_data; + wire [`NUM_THREADS-1:0][31:0] rs2_data; + wire [`NUM_THREADS-1:0][31:0] rs3_data; wire [`NR_BITS-1:0] rd; wire wb; diff --git a/hw/rtl/interfaces/VX_tex_req_if.v b/hw/rtl/interfaces/VX_tex_req_if.v new file mode 100644 index 00000000..1d3fdf58 --- /dev/null +++ b/hw/rtl/interfaces/VX_tex_req_if.v @@ -0,0 +1,24 @@ +`ifndef VX_TEX_REQ_IF +`define VX_TEX_REQ_IF + +`include "VX_define.vh" + +interface VX_tex_req_if (); + wire valid; + wire [`NUM_THREADS-1:0][31:0] u; + wire [`NUM_THREADS-1:0][31:0] v; + wire [`NUM_THREADS-1:0][31:0] lod_t; + // wire [`MADDRW-1:0] addr; + // wire [`MAXWTW-1:0] width; + // wire [`MAXHTW-1:0] height; + // wire [`MAXFTW-1:0] format; + // wire [`MAXFMW-1:0] filter; + // wire [`MAXAMW-1:0] clamp; + // wire [`TAGW-1:0] tag; + // wire ready; + +endinterface +`endif + + + \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_tex_rsp_if.v b/hw/rtl/interfaces/VX_tex_rsp_if.v new file mode 100644 index 00000000..3ca929d5 --- /dev/null +++ b/hw/rtl/interfaces/VX_tex_rsp_if.v @@ -0,0 +1,14 @@ +`ifndef VX_TEX_RSP_IF +`define VX_TEX_RSP_IF + +`include "VX_define.vh" + +interface VX_tex_rsp_if (); + // wire valid; + // wire [`TAGW-1:0] tag; + wire [`NUM_THREADS-1:0][31:0] data; + wire ready; +endinterface +`endif + + diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index a7c38cfe..d026a562 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -1,50 +1,55 @@ `include "VX_platform.vh" +`include "VX_define.vh" -module VX_tex_unit #( - parameter TADDRW = 32, - parameter MADDRW = 32, - parameter DATAW = 32, - parameter MAXWTW = 8, - parameter MAXHTW = 8, - parameter MAXFTW = 2, - parameter MAXFMW = 1, - parameter MAXAMW = 2, - parameter TAGW = 16, - - parameter NUMCRQS = 32 +module VX_tex_unit #( + parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, + // Inputs + VX_tex_req_if tex_req_if, - // Texture Request - input wire tex_req_valid, - input wire [TADDRW-1:0] tex_req_u, - input wire [TADDRW-1:0] tex_req_v, - input wire [MADDRW-1:0] tex_req_addr, - input wire [MAXWTW-1:0] tex_req_width, - input wire [MAXHTW-1:0] tex_req_height, - input wire [MAXFTW-1:0] tex_req_format, - input wire [MAXFMW-1:0] tex_req_filter, - input wire [MAXAMW-1:0] tex_req_clamp, - input wire [TAGW-1:0] tex_req_tag, - output wire tex_req_ready, + // Outputs + VX_tex_rsp_if tex_rsp_if + // VX_commit_if gpu_commit_if + // // Texture Request + // input wire tex_req_valid, + // input wire [`TADDRW-1:0] tex_req_u, + // input wire [`TADDRW-1:0] tex_req_v, + // input wire [`MADDRW-1:0] tex_req_addr, + // input wire [`MAXWTW-1:0] tex_req_width, + // input wire [`MAXHTW-1:0] tex_req_height, + // input wire [`MAXFTW-1:0] tex_req_format, + // input wire [`MAXFMW-1:0] tex_req_filter, + // input wire [`MAXAMW-1:0] tex_req_clamp, + // input wire [`TAGW-1:0] tex_req_tag, + // output wire tex_req_ready, - // Texture Response - output wire tex_rsp_valid, - output wire [TAGW-1:0] tex_rsp_tag, - input wire [DATAW-1:0] tex_rsp_data, - input wire tex_rsp_ready, + // // Texture Response + // output wire tex_rsp_valid, + // output wire [`TAGW-1:0] tex_rsp_tag, + // input wire [`DATAW-1:0] tex_rsp_data, + // input wire tex_rsp_ready, // Cache Request - output wire [NUMCRQS-1:0] cache_req_valids, - output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs, - input wire cache_req_ready, + // output wire [NUMCRQS-1:0] cache_req_valids, + // output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs, + // input wire cache_req_ready, // Cache Response - input wire cache_rsp_valid, - input wire [MADDRW-1:0] cache_rsp_addr, - input wire [DATAW-1:0] cache_rsp_data, - output wire cache_rsp_ready + // input wire cache_rsp_valid, + // input wire [MADDRW-1:0] cache_rsp_addr, + // input wire [DATAW-1:0] cache_rsp_data, + // output wire cache_rsp_ready ); + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + assign tex_rsp_if.data[i] = 32'hFAAF; + end + + assign tex_rsp_if.ready = 1'b1; + endmodule \ No newline at end of file diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index c30d91b1..b468ea62 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -36,8 +36,9 @@ RTL_DIR=../rtl DPI_DIR=../dpi FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src -RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) - +TEX_INCLUDE = -I$(RTL_DIR)/tex_unit +RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE) + SRCS = simulator.cpp testbench.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp diff --git a/runtime/Makefile b/runtime/Makefile index ba9d1366..40b96463 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -10,7 +10,7 @@ CFLAGS += -I./include -I../hw PROJECT = libvortexrt -SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c +SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_tex.c OBJS := $(addsuffix .o, $(notdir $(SRCS))) diff --git a/runtime/include/vx_intrinsics.h b/runtime/include/vx_intrinsics.h index 962a28fd..48e27851 100644 --- a/runtime/include/vx_intrinsics.h +++ b/runtime/include/vx_intrinsics.h @@ -109,6 +109,14 @@ inline int vx_num_instrs() { return result; } +// Texture load instruction +inline int vx_tex_ld(unsigned t, unsigned u, unsigned v, unsigned lod_t) { + lod_t = (lod_t << 8) | t; + int result; + asm volatile (".insn r4 0x6b, 5, 1, %0, %1, %2, %3" :: "r"(result), "r"(u), "r"(v), "r"(lod_t)); + return result; +} + #define __if(b) vx_split(b); \ if (b) diff --git a/runtime/include/vx_tex.h b/runtime/include/vx_tex.h new file mode 100644 index 00000000..002d2de2 --- /dev/null +++ b/runtime/include/vx_tex.h @@ -0,0 +1,17 @@ +#ifndef VX_API_H +#define VX_API_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/runtime/libvortexrt.a b/runtime/libvortexrt.a new file mode 100644 index 00000000..e762c866 Binary files /dev/null and b/runtime/libvortexrt.a differ diff --git a/runtime/libvortexrt.dump b/runtime/libvortexrt.dump new file mode 100644 index 00000000..7f1de08a --- /dev/null +++ b/runtime/libvortexrt.dump @@ -0,0 +1,1073 @@ +In archive libvortexrt.a: + +vx_start.S.o: file format elf32-littleriscv + + +Disassembly of section .text: + +00000000 <_exit>: + 0: 00000513 li a0,0 + 4: 0005006b 0x5006b + +00000008 : + 8: fc002573 csrr a0,0xfc0 + c: 0005006b 0x5006b + 10: 00000197 auipc gp,0x0 + 14: 00018193 mv gp,gp + 18: 00000117 auipc sp,0x0 + 1c: 00010113 mv sp,sp + 20: 00000597 auipc a1,0x0 + 24: 00058593 mv a1,a1 + 28: cc102673 csrr a2,0xcc1 + 2c: 02c585b3 mul a1,a1,a2 + 30: 40b10133 sub sp,sp,a1 + 34: cc3026f3 csrr a3,0xcc3 + 38: 00068663 beqz a3,44 + 3c: 00000513 li a0,0 + 40: 0005006b 0x5006b + +00000044 : + 44: 00008067 ret + +Disassembly of section .data: + +00000000 <__dso_handle>: + 0: 0000 unimp + ... + +Disassembly of section .init: + +00000000 <_start>: + 0: 00000597 auipc a1,0x0 + 4: 00058593 mv a1,a1 + 8: fc102573 csrr a0,0xfc1 + c: 00b5106b 0xb5106b + 10: ff9ff0ef jal ra,8 <_start+0x8> + 14: 00100513 li a0,1 + 18: 0005006b 0x5006b + 1c: 00000517 auipc a0,0x0 + 20: 00050513 mv a0,a0 + 24: 00000617 auipc a2,0x0 + 28: 00060613 mv a2,a2 + 2c: 40a60633 sub a2,a2,a0 + 30: 00000593 li a1,0 + 34: 00000097 auipc ra,0x0 + 38: 000080e7 jalr ra # 34 <_start+0x34> + 3c: 00000517 auipc a0,0x0 + 40: 00050513 mv a0,a0 + 44: 00000097 auipc ra,0x0 + 48: 000080e7 jalr ra # 44 <_start+0x44> + 4c: 00000097 auipc ra,0x0 + 50: 000080e7 jalr ra # 4c <_start+0x4c> + 54: 00000097 auipc ra,0x0 + 58: 000080e7 jalr ra # 54 <_start+0x54> + 5c: 00000317 auipc t1,0x0 + 60: 00030067 jr t1 # 5c <_start+0x5c> + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2341 jal 580 + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <.riscv.attributes+0x14> + c: 0019 c.nop 6 + e: 0000 unimp + 10: 7205 lui tp,0xfffe1 + 12: 3376 fld ft6,376(sp) + 14: 6932 flw fs2,12(sp) + 16: 7032 flw ft0,44(sp) + 18: 5f30 lw a2,120(a4) + 1a: 326d jal fffff9c4 + 1c: 3070 fld fa2,224(s0) + 1e: 665f 7032 0030 0x307032665f + +vx_print.S.o: file format elf32-littleriscv + + +Disassembly of section .text: + +00000000 : + 0: 00000297 auipc t0,0x0 + 4: 00028293 mv t0,t0 + 8: 0002a283 lw t0,0(t0) # 0 + c: cc202373 csrr t1,0xcc2 + 10: 01031313 slli t1,t1,0x10 + 14: 00a36333 or t1,t1,a0 + 18: 0062a023 sw t1,0(t0) + 1c: 00008067 ret + +Disassembly of section .data: + +00000000 : + 0: fffc fsw fa5,124(a5) + 2: ffff 0xffff + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2341 jal 580 + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <.riscv.attributes+0x14> + c: 0019 c.nop 6 + e: 0000 unimp + 10: 7205 lui tp,0xfffe1 + 12: 3376 fld ft6,376(sp) + 14: 6932 flw fs2,12(sp) + 16: 7032 flw ft0,44(sp) + 18: 5f30 lw a2,120(a4) + 1a: 326d jal fffff9c4 + 1c: 3070 fld fa2,224(s0) + 1e: 665f 7032 0030 0x307032665f + +vx_print.c.o: file format elf32-littleriscv + + +Disassembly of section .text.vx_vprintf: + +00000000 : + 0: 22050063 beqz a0,220 <.L24> + 4: f5010113 addi sp,sp,-176 + 8: 0a812423 sw s0,168(sp) + c: 09312e23 sw s3,156(sp) + 10: 0a112623 sw ra,172(sp) + 14: 0a912223 sw s1,164(sp) + 18: 0b212023 sw s2,160(sp) + 1c: 09412c23 sw s4,152(sp) + 20: 09512a23 sw s5,148(sp) + 24: 09612823 sw s6,144(sp) + 28: 09712623 sw s7,140(sp) + 2c: 00050993 mv s3,a0 + 30: 00054503 lbu a0,0(a0) + 34: 00198413 addi s0,s3,1 + 38: 1e050063 beqz a0,218 <.L25> + 3c: 000034b7 lui s1,0x3 + 40: 00000ab7 lui s5,0x0 + 44: 00058a13 mv s4,a1 + 48: 02500913 li s2,37 + 4c: 80948493 addi s1,s1,-2039 # 2809 <.L24+0x25e9> + 50: 000a8a93 mv s5,s5 + +00000054 <.L23>: + 54: 05250663 beq a0,s2,a0 <.L38> + 58: 00000097 auipc ra,0x0 + 5c: 000080e7 jalr ra # 58 <.L23+0x4> + 60: 00044503 lbu a0,0(s0) + 64: 00140b13 addi s6,s0,1 + 68: 000b0413 mv s0,s6 + 6c: fe0514e3 bnez a0,54 <.L23> + +00000070 <.L40>: + 70: 41340533 sub a0,s0,s3 + +00000074 <.L1>: + 74: 0ac12083 lw ra,172(sp) + 78: 0a812403 lw s0,168(sp) + 7c: 0a412483 lw s1,164(sp) + 80: 0a012903 lw s2,160(sp) + 84: 09c12983 lw s3,156(sp) + 88: 09812a03 lw s4,152(sp) + 8c: 09412a83 lw s5,148(sp) + 90: 09012b03 lw s6,144(sp) + 94: 08c12b83 lw s7,140(sp) + 98: 0b010113 addi sp,sp,176 + 9c: 00008067 ret + +000000a0 <.L38>: + a0: 00040b13 mv s6,s0 + a4: 00d00613 li a2,13 + a8: 00100693 li a3,1 + +000000ac <.L6>: + ac: 000b4703 lbu a4,0(s6) + b0: 000b0513 mv a0,s6 + b4: 001b0b13 addi s6,s6,1 + b8: fe070793 addi a5,a4,-32 + bc: 0ff7f793 andi a5,a5,255 + c0: 00f66863 bltu a2,a5,d0 <.L5> + c4: 00f697b3 sll a5,a3,a5 + c8: 0097f7b3 and a5,a5,s1 + cc: fe0790e3 bnez a5,ac <.L6> + +000000d0 <.L5>: + d0: 02a00793 li a5,42 + d4: 00f70c63 beq a4,a5,ec <.L7> + d8: 00a00613 li a2,10 + dc: 04010593 addi a1,sp,64 + e0: 00000097 auipc ra,0x0 + e4: 000080e7 jalr ra # e0 <.L5+0x10> + e8: 04012b03 lw s6,64(sp) + +000000ec <.L7>: + ec: 000b4783 lbu a5,0(s6) + f0: 02e00713 li a4,46 + f4: 001b0513 addi a0,s6,1 + f8: 0ee78263 beq a5,a4,1dc <.L39> + +000000fc <.L8>: + fc: fb478793 addi a5,a5,-76 + 100: 0ff7f793 andi a5,a5,255 + 104: 02e00713 li a4,46 + 108: 02f76463 bltu a4,a5,130 <.L10> + 10c: 00279793 slli a5,a5,0x2 + 110: 015787b3 add a5,a5,s5 + 114: 0007a783 lw a5,0(a5) + 118: 00078067 jr a5 + +0000011c <.L14>: + 11c: 001b4703 lbu a4,1(s6) + 120: 06800793 li a5,104 + 124: 08f70e63 beq a4,a5,1c0 <.L16> + +00000128 <.L11>: + 128: 00050b13 mv s6,a0 + 12c: 00150513 addi a0,a0,1 + +00000130 <.L10>: + 130: 05210023 sb s2,64(sp) + 134: 40850533 sub a0,a0,s0 + 138: 08a05e63 blez a0,1d4 <.L26> + +0000013c <.L41>: + 13c: 00040793 mv a5,s0 + 140: 04110713 addi a4,sp,65 + 144: 00a40633 add a2,s0,a0 + +00000148 <.L18>: + 148: 0007c683 lbu a3,0(a5) + 14c: 00178793 addi a5,a5,1 + 150: 00170713 addi a4,a4,1 + 154: fed70fa3 sb a3,-1(a4) + 158: fec798e3 bne a5,a2,148 <.L18> + 15c: 00150513 addi a0,a0,1 + +00000160 <.L17>: + 160: 08010793 addi a5,sp,128 + 164: 00a787b3 add a5,a5,a0 + 168: 000a0693 mv a3,s4 + 16c: 04010613 addi a2,sp,64 + 170: 10000593 li a1,256 + 174: 00010513 mv a0,sp + 178: fc078023 sb zero,-64(a5) + 17c: 00000097 auipc ra,0x0 + 180: 000080e7 jalr ra # 17c <.L17+0x1c> + 184: 00010413 mv s0,sp + 188: 00a10bb3 add s7,sp,a0 + 18c: 00a05c63 blez a0,1a4 <.L22> + +00000190 <.L21>: + 190: 00044503 lbu a0,0(s0) + 194: 00140413 addi s0,s0,1 + 198: 00000097 auipc ra,0x0 + 19c: 000080e7 jalr ra # 198 <.L21+0x8> + 1a0: fe8b98e3 bne s7,s0,190 <.L21> + +000001a4 <.L22>: + 1a4: 001b4503 lbu a0,1(s6) + 1a8: 002b0413 addi s0,s6,2 + 1ac: ea0514e3 bnez a0,54 <.L23> + 1b0: ec1ff06f j 70 <.L40> + +000001b4 <.L13>: + 1b4: 001b4703 lbu a4,1(s6) + 1b8: 06c00793 li a5,108 + 1bc: f6f716e3 bne a4,a5,128 <.L11> + +000001c0 <.L16>: + 1c0: 003b0513 addi a0,s6,3 + 1c4: 05210023 sb s2,64(sp) + 1c8: 40850533 sub a0,a0,s0 + 1cc: 002b0b13 addi s6,s6,2 + 1d0: f6a046e3 bgtz a0,13c <.L41> + +000001d4 <.L26>: + 1d4: 00100513 li a0,1 + 1d8: f89ff06f j 160 <.L17> + +000001dc <.L39>: + 1dc: 001b4703 lbu a4,1(s6) + 1e0: 02a00793 li a5,42 + 1e4: 00f71a63 bne a4,a5,1f8 <.L9> + 1e8: 002b4783 lbu a5,2(s6) + 1ec: 003b0513 addi a0,s6,3 + 1f0: 002b0b13 addi s6,s6,2 + 1f4: f09ff06f j fc <.L8> + +000001f8 <.L9>: + 1f8: 00a00613 li a2,10 + 1fc: 04010593 addi a1,sp,64 + 200: 00000097 auipc ra,0x0 + 204: 000080e7 jalr ra # 200 <.L9+0x8> + 208: 04012b03 lw s6,64(sp) + 20c: 000b4783 lbu a5,0(s6) + 210: 001b0513 addi a0,s6,1 + 214: ee9ff06f j fc <.L8> + +00000218 <.L25>: + 218: 00100513 li a0,1 + 21c: e59ff06f j 74 <.L1> + +00000220 <.L24>: + 220: fff00513 li a0,-1 + 224: 00008067 ret + +Disassembly of section .rodata.vx_vprintf: + +00000000 <.L12>: + ... + +Disassembly of section .text.vx_printf: + +00000000 : + 0: fc010113 addi sp,sp,-64 + 4: 02410313 addi t1,sp,36 + 8: 02b12223 sw a1,36(sp) + c: 00030593 mv a1,t1 + 10: 00112e23 sw ra,28(sp) + 14: 02c12423 sw a2,40(sp) + 18: 02d12623 sw a3,44(sp) + 1c: 02e12823 sw a4,48(sp) + 20: 02f12a23 sw a5,52(sp) + 24: 03012c23 sw a6,56(sp) + 28: 03112e23 sw a7,60(sp) + 2c: 00612623 sw t1,12(sp) + 30: 00000097 auipc ra,0x0 + 34: 000080e7 jalr ra # 30 + 38: 01c12083 lw ra,28(sp) + 3c: 04010113 addi sp,sp,64 + 40: 00008067 ret + +Disassembly of section .text.vx_prints: + +00000000 : + 0: ff010113 addi sp,sp,-16 + 4: 00812423 sw s0,8(sp) + 8: 00112623 sw ra,12(sp) + c: 00050413 mv s0,a0 + 10: 00054503 lbu a0,0(a0) + 14: 00050e63 beqz a0,30 <.L44> + 18: 00140413 addi s0,s0,1 + +0000001c <.L46>: + 1c: 00140413 addi s0,s0,1 + 20: 00000097 auipc ra,0x0 + 24: 000080e7 jalr ra # 20 <.L46+0x4> + 28: fff44503 lbu a0,-1(s0) + 2c: fe0518e3 bnez a0,1c <.L46> + +00000030 <.L44>: + 30: 00c12083 lw ra,12(sp) + 34: 00812403 lw s0,8(sp) + 38: 01010113 addi sp,sp,16 + 3c: 00008067 ret + +Disassembly of section .text.vx_printx: + +00000000 : + 0: ff010113 addi sp,sp,-16 + 4: 00912223 sw s1,4(sp) + 8: 00112623 sw ra,12(sp) + c: 00812423 sw s0,8(sp) + 10: 01212023 sw s2,0(sp) + 14: 00f00793 li a5,15 + 18: 00050493 mv s1,a0 + 1c: 06a7f063 bgeu a5,a0,7c <.L63> + 20: 00000937 lui s2,0x0 + 24: 00000693 li a3,0 + 28: 02000413 li s0,32 + 2c: 00090913 mv s2,s2 + +00000030 <.L53>: + 30: ffc40413 addi s0,s0,-4 + 34: 0084d7b3 srl a5,s1,s0 + 38: 00f7f793 andi a5,a5,15 + 3c: 00f90733 add a4,s2,a5 + 40: 00079463 bnez a5,48 <.L55> + 44: 00068a63 beqz a3,58 <.L56> + +00000048 <.L55>: + 48: 00074503 lbu a0,0(a4) + 4c: 00000097 auipc ra,0x0 + 50: 000080e7 jalr ra # 4c <.L55+0x4> + 54: 00100693 li a3,1 + +00000058 <.L56>: + 58: fc041ce3 bnez s0,30 <.L53> + 5c: 00812403 lw s0,8(sp) + 60: 00c12083 lw ra,12(sp) + 64: 00412483 lw s1,4(sp) + 68: 00012903 lw s2,0(sp) + 6c: 00a00513 li a0,10 + 70: 01010113 addi sp,sp,16 + 74: 00000317 auipc t1,0x0 + 78: 00030067 jr t1 # 74 <.L56+0x1c> + +0000007c <.L63>: + 7c: 000007b7 lui a5,0x0 + 80: 00078793 mv a5,a5 + 84: 00a784b3 add s1,a5,a0 + 88: 0004c503 lbu a0,0(s1) + 8c: 00000097 auipc ra,0x0 + 90: 000080e7 jalr ra # 8c <.L63+0x10> + 94: 00812403 lw s0,8(sp) + 98: 00c12083 lw ra,12(sp) + 9c: 00412483 lw s1,4(sp) + a0: 00012903 lw s2,0(sp) + a4: 00a00513 li a0,10 + a8: 01010113 addi sp,sp,16 + ac: 00000317 auipc t1,0x0 + b0: 00030067 jr t1 # ac <.L63+0x30> + +Disassembly of section .text.vx_printv: + +00000000 : + 0: ff010113 addi sp,sp,-16 + 4: 00812423 sw s0,8(sp) + 8: 00912223 sw s1,4(sp) + c: 00112623 sw ra,12(sp) + 10: 01212023 sw s2,0(sp) + 14: 00050413 mv s0,a0 + 18: 00054503 lbu a0,0(a0) + 1c: 00058493 mv s1,a1 + 20: 00050e63 beqz a0,3c <.L66> + 24: 00140413 addi s0,s0,1 + +00000028 <.L67>: + 28: 00140413 addi s0,s0,1 + 2c: 00000097 auipc ra,0x0 + 30: 000080e7 jalr ra # 2c <.L67+0x4> + 34: fff44503 lbu a0,-1(s0) + 38: fe0518e3 bnez a0,28 <.L67> + +0000003c <.L66>: + 3c: 00f00793 li a5,15 + 40: 00000693 li a3,0 + 44: 02000413 li s0,32 + 48: 0497fc63 bgeu a5,s1,a0 <.L82> + 4c: 00000937 lui s2,0x0 + 50: 00090913 mv s2,s2 + +00000054 <.L68>: + 54: ffc40413 addi s0,s0,-4 + 58: 0084d7b3 srl a5,s1,s0 + 5c: 00f7f793 andi a5,a5,15 + 60: 00f90733 add a4,s2,a5 + 64: 00079463 bnez a5,6c <.L70> + 68: 00068a63 beqz a3,7c <.L71> + +0000006c <.L70>: + 6c: 00074503 lbu a0,0(a4) + 70: 00000097 auipc ra,0x0 + 74: 000080e7 jalr ra # 70 <.L70+0x4> + 78: 00100693 li a3,1 + +0000007c <.L71>: + 7c: fc041ce3 bnez s0,54 <.L68> + 80: 00812403 lw s0,8(sp) + 84: 00c12083 lw ra,12(sp) + 88: 00412483 lw s1,4(sp) + 8c: 00012903 lw s2,0(sp) + 90: 00a00513 li a0,10 + 94: 01010113 addi sp,sp,16 + 98: 00000317 auipc t1,0x0 + 9c: 00030067 jr t1 # 98 <.L71+0x1c> + +000000a0 <.L82>: + a0: 000007b7 lui a5,0x0 + a4: 00078793 mv a5,a5 + a8: 009784b3 add s1,a5,s1 + ac: 0004c503 lbu a0,0(s1) + b0: 00000097 auipc ra,0x0 + b4: 000080e7 jalr ra # b0 <.L82+0x10> + b8: 00812403 lw s0,8(sp) + bc: 00c12083 lw ra,12(sp) + c0: 00412483 lw s1,4(sp) + c4: 00012903 lw s2,0(sp) + c8: 00a00513 li a0,10 + cc: 01010113 addi sp,sp,16 + d0: 00000317 auipc t1,0x0 + d4: 00030067 jr t1 # d0 <.L82+0x30> + +Disassembly of section .rodata.hextoa: + +00000000 : + 0: 3130 fld fa2,96(a0) + 2: 3332 fld ft6,296(sp) + 4: 3534 fld fa3,104(a0) + 6: 3736 fld fa4,360(sp) + 8: 3938 fld fa4,112(a0) + a: 6261 lui tp,0x18 + c: 66656463 bltu a0,t1,674 <.L24+0x454> + ... + +Disassembly of section .comment: + +00000000 <.comment>: + 0: 4700 lw s0,8(a4) + 2: 203a4343 fmadd.s ft6,fs4,ft3,ft4,rmm + 6: 4728 lw a0,72(a4) + 8: 554e lw a0,240(sp) + a: 2029 jal 14 + c: 2e39 jal 32a <.L24+0x10a> + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2541 jal 680 <.L24+0x460> + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <.riscv.attributes+0x14> + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 + 12: 7205 lui tp,0xfffe1 + 14: 3376 fld ft6,376(sp) + 16: 6932 flw fs2,12(sp) + 18: 7032 flw ft0,44(sp) + 1a: 5f30 lw a2,120(a4) + 1c: 326d jal fffff9c6 <.L24+0xfffff7a6> + 1e: 3070 fld fa2,224(s0) + 20: 665f 7032 0030 0x307032665f + +vx_spawn.c.o: file format elf32-littleriscv + + +Disassembly of section .text.spawn_tasks_callback: + +00000000 : + 0: fe010113 addi sp,sp,-32 + 4: 00112e23 sw ra,28(sp) + 8: 00812c23 sw s0,24(sp) + c: 00912a23 sw s1,20(sp) + 10: 01212823 sw s2,16(sp) + 14: 01312623 sw s3,12(sp) + 18: fc0027f3 csrr a5,0xfc0 + 1c: 0007806b 0x7806b + 20: cc5026f3 csrr a3,0xcc5 + 24: cc3029f3 csrr s3,0xcc3 + 28: cc002773 csrr a4,0xcc0 + 2c: fc002673 csrr a2,0xfc0 + 30: 000007b7 lui a5,0x0 + 34: 00269693 slli a3,a3,0x2 + 38: 00078793 mv a5,a5 + 3c: 00d787b3 add a5,a5,a3 + 40: 0007a483 lw s1,0(a5) # 0 + 44: 0104a403 lw s0,16(s1) + 48: 00c4a683 lw a3,12(s1) + 4c: 0089a933 slt s2,s3,s0 + 50: 00040793 mv a5,s0 + 54: 00d90933 add s2,s2,a3 + 58: 03368433 mul s0,a3,s3 + 5c: 00f9d463 bge s3,a5,64 <.L2> + 60: 00098793 mv a5,s3 + +00000064 <.L2>: + 64: 00f40433 add s0,s0,a5 + 68: 0084a683 lw a3,8(s1) + 6c: 02c40433 mul s0,s0,a2 + 70: 02e907b3 mul a5,s2,a4 + 74: 00d40433 add s0,s0,a3 + 78: 00f40433 add s0,s0,a5 + 7c: 00890933 add s2,s2,s0 + 80: 01245e63 bge s0,s2,9c <.L3> + +00000084 <.L4>: + 84: 0004a783 lw a5,0(s1) + 88: 0044a583 lw a1,4(s1) + 8c: 00040513 mv a0,s0 + 90: 00140413 addi s0,s0,1 + 94: 000780e7 jalr a5 + 98: fe8916e3 bne s2,s0,84 <.L4> + +0000009c <.L3>: + 9c: 0019b993 seqz s3,s3 + a0: 0009806b 0x9806b + a4: 01c12083 lw ra,28(sp) + a8: 01812403 lw s0,24(sp) + ac: 01412483 lw s1,20(sp) + b0: 01012903 lw s2,16(sp) + b4: 00c12983 lw s3,12(sp) + b8: 02010113 addi sp,sp,32 + bc: 00008067 ret + +Disassembly of section .text.spawn_kernel_callback: + +00000000 : + 0: fe010113 addi sp,sp,-32 + 4: 00112e23 sw ra,28(sp) + 8: 00812c23 sw s0,24(sp) + c: 00912a23 sw s1,20(sp) + 10: 01212823 sw s2,16(sp) + 14: 01312623 sw s3,12(sp) + 18: 01412423 sw s4,8(sp) + 1c: 01512223 sw s5,4(sp) + 20: fc0027f3 csrr a5,0xfc0 + 24: 0007806b 0x7806b + 28: cc5026f3 csrr a3,0xcc5 + 2c: cc302973 csrr s2,0xcc3 + 30: cc002773 csrr a4,0xcc0 + 34: fc002673 csrr a2,0xfc0 + 38: 000007b7 lui a5,0x0 + 3c: 00269693 slli a3,a3,0x2 + 40: 00078793 mv a5,a5 + 44: 00d787b3 add a5,a5,a3 + 48: 0007a403 lw s0,0(a5) # 0 + 4c: 01442483 lw s1,20(s0) + 50: 01042683 lw a3,16(s0) + 54: 00992ab3 slt s5,s2,s1 + 58: 00048793 mv a5,s1 + 5c: 00da8ab3 add s5,s5,a3 + 60: 032684b3 mul s1,a3,s2 + 64: 00f95463 bge s2,a5,6c <.L9> + 68: 00090793 mv a5,s2 + +0000006c <.L9>: + 6c: 00f484b3 add s1,s1,a5 + 70: 00042583 lw a1,0(s0) + 74: 00c42683 lw a3,12(s0) + 78: 0005a983 lw s3,0(a1) + 7c: 0045aa03 lw s4,4(a1) + 80: 02c484b3 mul s1,s1,a2 + 84: 02ea87b3 mul a5,s5,a4 + 88: 00d484b3 add s1,s1,a3 + 8c: 00f484b3 add s1,s1,a5 + 90: 009a8ab3 add s5,s5,s1 + 94: 03498a33 mul s4,s3,s4 + 98: 0754c063 blt s1,s5,f8 <.L15> + 9c: 0800006f j 11c <.L10> + +000000a0 <.L17>: + a0: 01a44703 lbu a4,26(s0) + a4: 01944683 lbu a3,25(s0) + a8: 40e4d733 sra a4,s1,a4 + ac: 034707b3 mul a5,a4,s4 + b0: 40f487b3 sub a5,s1,a5 + b4: 06068063 beqz a3,114 <.L13> + +000000b8 <.L18>: + b8: 01b44683 lbu a3,27(s0) + bc: 40d7d6b3 sra a3,a5,a3 + +000000c0 <.L14>: + c0: 033688b3 mul a7,a3,s3 + c4: 0145ae03 lw t3,20(a1) + c8: 0105a303 lw t1,16(a1) + cc: 00c5a603 lw a2,12(a1) + d0: 00442803 lw a6,4(s0) + d4: 00842503 lw a0,8(s0) + d8: 00148493 addi s1,s1,1 + dc: 01c70733 add a4,a4,t3 + e0: 006686b3 add a3,a3,t1 + e4: 411787b3 sub a5,a5,a7 + e8: 00c78633 add a2,a5,a2 + ec: 000800e7 jalr a6 + f0: 029a8663 beq s5,s1,11c <.L10> + f4: 00042583 lw a1,0(s0) + +000000f8 <.L15>: + f8: 01844783 lbu a5,24(s0) + fc: fa0792e3 bnez a5,a0 <.L17> + 100: 0344c733 div a4,s1,s4 + 104: 01944683 lbu a3,25(s0) + 108: 034707b3 mul a5,a4,s4 + 10c: 40f487b3 sub a5,s1,a5 + 110: fa0694e3 bnez a3,b8 <.L18> + +00000114 <.L13>: + 114: 0337c6b3 div a3,a5,s3 + 118: fa9ff06f j c0 <.L14> + +0000011c <.L10>: + 11c: 00193913 seqz s2,s2 + 120: 0009006b 0x9006b + 124: 01c12083 lw ra,28(sp) + 128: 01812403 lw s0,24(sp) + 12c: 01412483 lw s1,20(sp) + 130: 01012903 lw s2,16(sp) + 134: 00c12983 lw s3,12(sp) + 138: 00812a03 lw s4,8(sp) + 13c: 00412a83 lw s5,4(sp) + 140: 02010113 addi sp,sp,32 + 144: 00008067 ret + +Disassembly of section .text.spawn_remaining_tasks_callback: + +00000000 : + 0: ff010113 addi sp,sp,-16 + 4: 00112623 sw ra,12(sp) + 8: 0005006b 0x5006b + c: cc502773 csrr a4,0xcc5 + 10: cc202573 csrr a0,0xcc2 + 14: 000007b7 lui a5,0x0 + 18: 00271713 slli a4,a4,0x2 + 1c: 00078793 mv a5,a5 + 20: 00e787b3 add a5,a5,a4 + 24: 0007a783 lw a5,0(a5) # 0 + 28: 0087a683 lw a3,8(a5) + 2c: 0007a703 lw a4,0(a5) + 30: 0047a583 lw a1,4(a5) + 34: 00d50533 add a0,a0,a3 + 38: 000700e7 jalr a4 + 3c: 00100793 li a5,1 + 40: 0007806b 0x7806b + 44: 00c12083 lw ra,12(sp) + 48: 01010113 addi sp,sp,16 + 4c: 00008067 ret + +Disassembly of section .text.vx_spawn_tasks: + +00000000 : + 0: fc010113 addi sp,sp,-64 + 4: 02112e23 sw ra,60(sp) + 8: 02812c23 sw s0,56(sp) + c: 02912a23 sw s1,52(sp) + 10: 03212823 sw s2,48(sp) + 14: 03312623 sw s3,44(sp) + 18: fc2026f3 csrr a3,0xfc2 + 1c: fc102873 csrr a6,0xfc1 + 20: fc002473 csrr s0,0xfc0 + 24: cc5027f3 csrr a5,0xcc5 + 28: 01f00713 li a4,31 + 2c: 0cf74463 blt a4,a5,f4 <.L21> + 30: 030408b3 mul a7,s0,a6 + 34: 00100713 li a4,1 + 38: 00a8d463 bge a7,a0,40 <.L23> + 3c: 03154733 div a4,a0,a7 + +00000040 <.L23>: + 40: 0ce6c863 blt a3,a4,110 <.L39> + 44: 0ae7d863 bge a5,a4,f4 <.L21> + +00000048 <.L41>: + 48: fff68693 addi a3,a3,-1 + 4c: 02e54333 div t1,a0,a4 + 50: 00030893 mv a7,t1 + 54: 00f69663 bne a3,a5,60 <.L25> + 58: 02e56533 rem a0,a0,a4 + 5c: 006508b3 add a7,a0,t1 + +00000060 <.L25>: + 60: 0288c4b3 div s1,a7,s0 + 64: 0288e933 rem s2,a7,s0 + 68: 0b04ca63 blt s1,a6,11c <.L32> + 6c: 00100693 li a3,1 + 70: 0304c733 div a4,s1,a6 + 74: 00070663 beqz a4,80 <.L26> + 78: 00070693 mv a3,a4 + 7c: 0304e733 rem a4,s1,a6 + +00000080 <.L26>: + 80: 000009b7 lui s3,0x0 + 84: 00098993 mv s3,s3 + 88: 00e12e23 sw a4,28(sp) + 8c: 00c10713 addi a4,sp,12 + 90: 00b12623 sw a1,12(sp) + 94: 00c12823 sw a2,16(sp) + 98: 00d12c23 sw a3,24(sp) + 9c: 02f30333 mul t1,t1,a5 + a0: 00279793 slli a5,a5,0x2 + a4: 00f987b3 add a5,s3,a5 + a8: 00e7a023 sw a4,0(a5) + ac: 00612a23 sw t1,20(sp) + b0: 06904c63 bgtz s1,128 <.L40> + +000000b4 <.L27>: + b4: 04090063 beqz s2,f4 <.L21> + b8: 02848433 mul s0,s1,s0 + bc: 00812a23 sw s0,20(sp) + c0: 0009006b 0x9006b + c4: cc5027f3 csrr a5,0xcc5 + c8: cc202573 csrr a0,0xcc2 + cc: 00279793 slli a5,a5,0x2 + d0: 00f989b3 add s3,s3,a5 + d4: 0009a783 lw a5,0(s3) # 0 + d8: 0087a683 lw a3,8(a5) + dc: 0007a703 lw a4,0(a5) + e0: 0047a583 lw a1,4(a5) + e4: 00d50533 add a0,a0,a3 + e8: 000700e7 jalr a4 + ec: 00100793 li a5,1 + f0: 0007806b 0x7806b + +000000f4 <.L21>: + f4: 03c12083 lw ra,60(sp) + f8: 03812403 lw s0,56(sp) + fc: 03412483 lw s1,52(sp) + 100: 03012903 lw s2,48(sp) + 104: 02c12983 lw s3,44(sp) + 108: 04010113 addi sp,sp,64 + 10c: 00008067 ret + +00000110 <.L39>: + 110: 00068713 mv a4,a3 + 114: f2e7cae3 blt a5,a4,48 <.L41> + 118: fddff06f j f4 <.L21> + +0000011c <.L32>: + 11c: 00000713 li a4,0 + 120: 00100693 li a3,1 + 124: f5dff06f j 80 <.L26> + +00000128 <.L40>: + 128: 00048713 mv a4,s1 + 12c: 00985463 bge a6,s1,134 <.L28> + 130: 00080713 mv a4,a6 + +00000134 <.L28>: + 134: 000007b7 lui a5,0x0 + 138: 00078793 mv a5,a5 + 13c: 00f7106b 0xf7106b + 140: 00000097 auipc ra,0x0 + 144: 000080e7 jalr ra # 140 <.L28+0xc> + 148: f6dff06f j b4 <.L27> + +Disassembly of section .text.vx_spawn_kernel: + +00000000 : + 0: fc010113 addi sp,sp,-64 + 4: 02112e23 sw ra,60(sp) + 8: 02812c23 sw s0,56(sp) + c: 02912a23 sw s1,52(sp) + 10: 03212823 sw s2,48(sp) + 14: 03312623 sw s3,44(sp) + 18: fc2028f3 csrr a7,0xfc2 + 1c: fc102373 csrr t1,0xfc1 + 20: fc002473 csrr s0,0xfc0 + 24: cc5027f3 csrr a5,0xcc5 + 28: 01f00713 li a4,31 + 2c: 0ef74663 blt a4,a5,118 <.L42> + 30: 00052e03 lw t3,0(a0) + 34: 00452683 lw a3,4(a0) + 38: 00852803 lw a6,8(a0) + 3c: 02830eb3 mul t4,t1,s0 + 40: 00100713 li a4,1 + 44: 02de06b3 mul a3,t3,a3 + 48: 03068833 mul a6,a3,a6 + 4c: 010ed463 bge t4,a6,54 <.L44> + 50: 03d84733 div a4,a6,t4 + +00000054 <.L44>: + 54: 0ee8c063 blt a7,a4,134 <.L64> + 58: 0ce7d063 bge a5,a4,118 <.L42> + +0000005c <.L67>: + 5c: fff88893 addi a7,a7,-1 + 60: 02e84eb3 div t4,a6,a4 + 64: 000e8493 mv s1,t4 + 68: 00f89663 bne a7,a5,74 <.L46> + 6c: 02e86733 rem a4,a6,a4 + 70: 01d704b3 add s1,a4,t4 + +00000074 <.L46>: + 74: 0284c933 div s2,s1,s0 + 78: 0284e4b3 rem s1,s1,s0 + 7c: 0c694263 blt s2,t1,140 <.L57> + 80: 00100293 li t0,1 + 84: 02694833 div a6,s2,t1 + 88: 00080663 beqz a6,94 <.L47> + 8c: 00080293 mv t0,a6 + 90: 02696833 rem a6,s2,t1 + +00000094 <.L47>: + 94: d006f7d3 fcvt.s.w fa5,a3 + 98: fff68f93 addi t6,a3,-1 + 9c: fffe0f13 addi t5,t3,-1 + a0: 000009b7 lui s3,0x0 + a4: 00dff6b3 and a3,t6,a3 + a8: 00098993 mv s3,s3 + ac: 0016b693 seqz a3,a3 + b0: 00a12223 sw a0,4(sp) + b4: 00b12423 sw a1,8(sp) + b8: 00c12623 sw a2,12(sp) + bc: 00512a23 sw t0,20(sp) + c0: 01012c23 sw a6,24(sp) + c4: 00d10e23 sb a3,28(sp) + c8: 02fe8733 mul a4,t4,a5 + cc: e0078ed3 fmv.x.w t4,fa5 + d0: d00e77d3 fcvt.s.w fa5,t3 + d4: 00279793 slli a5,a5,0x2 + d8: 01cf7e33 and t3,t5,t3 + dc: e00788d3 fmv.x.w a7,fa5 + e0: 417ede93 srai t4,t4,0x17 + e4: 001e3e13 seqz t3,t3 + e8: 4178d893 srai a7,a7,0x17 + ec: f81e8e93 addi t4,t4,-127 + f0: f8188893 addi a7,a7,-127 + f4: 00f987b3 add a5,s3,a5 + f8: 00e12823 sw a4,16(sp) + fc: 00410713 addi a4,sp,4 + 100: 01c10ea3 sb t3,29(sp) + 104: 01d10f23 sb t4,30(sp) + 108: 01110fa3 sb a7,31(sp) + 10c: 00e7a023 sw a4,0(a5) # 0 + 110: 03204e63 bgtz s2,14c <.L65> + 114: 04049e63 bnez s1,170 <.L66> + +00000118 <.L42>: + 118: 03c12083 lw ra,60(sp) + 11c: 03812403 lw s0,56(sp) + 120: 03412483 lw s1,52(sp) + 124: 03012903 lw s2,48(sp) + 128: 02c12983 lw s3,44(sp) + 12c: 04010113 addi sp,sp,64 + 130: 00008067 ret + +00000134 <.L64>: + 134: 00088713 mv a4,a7 + 138: f2e7c2e3 blt a5,a4,5c <.L67> + 13c: fddff06f j 118 <.L42> + +00000140 <.L57>: + 140: 00000813 li a6,0 + 144: 00100293 li t0,1 + 148: f4dff06f j 94 <.L47> + +0000014c <.L65>: + 14c: 00090713 mv a4,s2 + 150: 01235463 bge t1,s2,158 <.L49> + 154: 00030713 mv a4,t1 + +00000158 <.L49>: + 158: 000007b7 lui a5,0x0 + 15c: 00078793 mv a5,a5 + 160: 00f7106b 0xf7106b + 164: 00000097 auipc ra,0x0 + 168: 000080e7 jalr ra # 164 <.L49+0xc> + 16c: fa0486e3 beqz s1,118 <.L42> + +00000170 <.L66>: + 170: 02890433 mul s0,s2,s0 + 174: 00812823 sw s0,16(sp) + 178: 0004806b 0x4806b + 17c: cc502773 csrr a4,0xcc5 + 180: cc2027f3 csrr a5,0xcc2 + 184: 00271713 slli a4,a4,0x2 + 188: 00e989b3 add s3,s3,a4 + 18c: 0009a503 lw a0,0(s3) # 0 + 190: 00052583 lw a1,0(a0) + 194: 00c52683 lw a3,12(a0) + 198: 01854703 lbu a4,24(a0) + 19c: 0005a803 lw a6,0(a1) + 1a0: 0045a603 lw a2,4(a1) + 1a4: 00d787b3 add a5,a5,a3 + 1a8: 02c80633 mul a2,a6,a2 + 1ac: 06070e63 beqz a4,228 <.L50> + 1b0: 01a54703 lbu a4,26(a0) + 1b4: 40e7d733 sra a4,a5,a4 + +000001b8 <.L51>: + 1b8: 01954683 lbu a3,25(a0) + 1bc: 02e60633 mul a2,a2,a4 + 1c0: 40c787b3 sub a5,a5,a2 + 1c4: 04068e63 beqz a3,220 <.L52> + 1c8: 01b54883 lbu a7,27(a0) + 1cc: 4117d8b3 sra a7,a5,a7 + +000001d0 <.L53>: + 1d0: 03180833 mul a6,a6,a7 + 1d4: 0145ae03 lw t3,20(a1) + 1d8: 0105a683 lw a3,16(a1) + 1dc: 00c5a603 lw a2,12(a1) + 1e0: 00452303 lw t1,4(a0) + 1e4: 00852503 lw a0,8(a0) + 1e8: 01c70733 add a4,a4,t3 + 1ec: 00d886b3 add a3,a7,a3 + 1f0: 410787b3 sub a5,a5,a6 + 1f4: 00c78633 add a2,a5,a2 + 1f8: 000300e7 jalr t1 + 1fc: 00100793 li a5,1 + 200: 0007806b 0x7806b + 204: 03c12083 lw ra,60(sp) + 208: 03812403 lw s0,56(sp) + 20c: 03412483 lw s1,52(sp) + 210: 03012903 lw s2,48(sp) + 214: 02c12983 lw s3,44(sp) + 218: 04010113 addi sp,sp,64 + 21c: 00008067 ret + +00000220 <.L52>: + 220: 0307c8b3 div a7,a5,a6 + 224: fadff06f j 1d0 <.L53> + +00000228 <.L50>: + 228: 02c7c733 div a4,a5,a2 + 22c: f8dff06f j 1b8 <.L51> + +Disassembly of section .comment: + +00000000 <.comment>: + 0: 4700 lw s0,8(a4) + 2: 203a4343 fmadd.s ft6,fs4,ft3,ft4,rmm + 6: 4728 lw a0,72(a4) + 8: 554e lw a0,240(sp) + a: 2029 jal 14 + c: 2e39 jal 32a <.L50+0x102> + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2541 jal 680 <.L50+0x458> + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <.riscv.attributes+0x14> + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 + 12: 7205 lui tp,0xfffe1 + 14: 3376 fld ft6,376(sp) + 16: 6932 flw fs2,12(sp) + 18: 7032 flw ft0,44(sp) + 1a: 5f30 lw a2,120(a4) + 1c: 326d jal fffff9c6 <.L50+0xfffff79e> + 1e: 3070 fld fa2,224(s0) + 20: 665f 7032 0030 0x307032665f + +vx_tex.c.o: file format elf32-littleriscv + + +Disassembly of section .text.vx_tex: + +00000000 : + 0: 00869693 slli a3,a3,0x8 + 4: 00a6e6b3 or a3,a3,a0 + 8: 00000513 li a0,0 + c: 6ac5d56b 0x6ac5d56b + 10: 00008067 ret + +Disassembly of section .comment: + +00000000 <.comment>: + 0: 4700 lw s0,8(a4) + 2: 203a4343 fmadd.s ft6,fs4,ft3,ft4,rmm + 6: 4728 lw a0,72(a4) + 8: 554e lw a0,240(sp) + a: 2029 jal 14 + c: 2e39 jal 32a + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2541 jal 680 + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 + 12: 7205 lui tp,0xfffe1 + 14: 3376 fld ft6,376(sp) + 16: 6932 flw fs2,12(sp) + 18: 7032 flw ft0,44(sp) + 1a: 5f30 lw a2,120(a4) + 1c: 326d jal fffff9c6 + 1e: 3070 fld fa2,224(s0) + 20: 665f 7032 0030 0x307032665f diff --git a/runtime/src/vx_tex.S b/runtime/src/vx_tex.S new file mode 100644 index 00000000..3345916a --- /dev/null +++ b/runtime/src/vx_tex.S @@ -0,0 +1,9 @@ +#include + +@ .type vx_tex_ld, @function +@ .global vx_tex_ld +@ vx_tex_ld: +@ slli a1,a1,0x8 +@ or a1,a1,a0 +@ .word 0x5ae7952b +@ ret \ No newline at end of file diff --git a/runtime/src/vx_tex.c b/runtime/src/vx_tex.c new file mode 100644 index 00000000..2a7ac2a1 --- /dev/null +++ b/runtime/src/vx_tex.c @@ -0,0 +1,13 @@ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define NUM_CORES_MAX 32 + +int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod){ + return vx_tex_ld(t,u,v,lod); +} diff --git a/runtime/vx_print.S.o b/runtime/vx_print.S.o new file mode 100644 index 00000000..fb1c1352 Binary files /dev/null and b/runtime/vx_print.S.o differ diff --git a/runtime/vx_print.c.o b/runtime/vx_print.c.o new file mode 100644 index 00000000..809e45b1 Binary files /dev/null and b/runtime/vx_print.c.o differ diff --git a/runtime/vx_spawn.c.o b/runtime/vx_spawn.c.o new file mode 100644 index 00000000..57381a4a Binary files /dev/null and b/runtime/vx_spawn.c.o differ diff --git a/runtime/vx_start.S.o b/runtime/vx_start.S.o new file mode 100644 index 00000000..f96091b9 Binary files /dev/null and b/runtime/vx_start.S.o differ diff --git a/runtime/vx_tex.c.o b/runtime/vx_tex.c.o new file mode 100644 index 00000000..69ffadd0 Binary files /dev/null and b/runtime/vx_tex.c.o differ diff --git a/simX/simX b/simX/simX new file mode 100755 index 00000000..cd3eb523 Binary files /dev/null and b/simX/simX differ