diff --git a/driver/tests/tex_demo/.depend b/driver/tests/tex_demo/.depend index 1647eae7..f4820fca 100644 --- a/driver/tests/tex_demo/.depend +++ b/driver/tests/tex_demo/.depend @@ -1 +1,2 @@ -demo.o: demo.cpp ../../include/vortex.h common.h +demo.o: demo.cpp ../../include/vortex.h common.h utils.h +utils.o: utils.cpp utils.h diff --git a/driver/tests/tex_demo/Makefile b/driver/tests/tex_demo/Makefile index 8ca0c6c8..6a600c3c 100644 --- a/driver/tests/tex_demo/Makefile +++ b/driver/tests/tex_demo/Makefile @@ -1,7 +1,7 @@ RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain VORTEX_RT_PATH ?= $(wildcard ../../../runtime) -OPTS ?= -n64 +#OPTS ?= -s1.0 VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ @@ -22,7 +22,7 @@ CXXFLAGS += -I../../include PROJECT = demo -SRCS = demo.cpp +SRCS = demo.cpp utils.cpp all: $(PROJECT) kernel.bin kernel.dump diff --git a/driver/tests/tex_demo/common.h b/driver/tests/tex_demo/common.h index d24a889c..9de20206 100644 --- a/driver/tests/tex_demo/common.h +++ b/driver/tests/tex_demo/common.h @@ -1,15 +1,14 @@ #ifndef _COMMON_H_ #define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -// #include - struct kernel_arg_t { uint32_t num_tasks; - uint32_t task_size; - uint32_t src0_ptr; - uint32_t src1_ptr; + uint32_t src_width; + uint32_t src_height; + uint32_t src_pitch; + uint32_t dst_width; + uint32_t dst_height; + uint32_t dst_pitch; + uint32_t src_ptr; uint32_t dst_ptr; }; diff --git a/driver/tests/tex_demo/demo b/driver/tests/tex_demo/demo index 69c2c6e8..aee45cbc 100755 Binary files a/driver/tests/tex_demo/demo and b/driver/tests/tex_demo/demo differ diff --git a/driver/tests/tex_demo/demo.cpp b/driver/tests/tex_demo/demo.cpp index a28d675d..4329c825 100644 --- a/driver/tests/tex_demo/demo.cpp +++ b/driver/tests/tex_demo/demo.cpp @@ -1,8 +1,11 @@ #include +#include #include #include +#include #include #include "common.h" +#include "utils.h" #define RT_CHECK(_expr) \ do { \ @@ -17,22 +20,30 @@ /////////////////////////////////////////////////////////////////////////////// const char* kernel_file = "kernel.bin"; -uint32_t count = 0; +const char* input_file = "sample.tga"; +const char* output_file = "output.tga"; +float scale = 1.0f; vx_device_h device = nullptr; vx_buffer_h buffer = nullptr; static void show_usage() { - std::cout << "Vortex Driver Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; + std::cout << "Vortex Texture Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-h: help]" << std::endl; } static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "i:o:k:h?")) != -1) { switch (c) { - case 'n': - count = atoi(optarg); + case 'i': + input_file = optarg; + break; + case 'o': + output_file = optarg; + break; + case 's': + scale = std::stof(optarg, NULL); break; case 'k': kernel_file = optarg; @@ -58,9 +69,7 @@ void cleanup() { } } -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { +int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t dst_bpp) { // start device std::cout << "start device" << std::endl; RT_CHECK(vx_start(device)); @@ -73,40 +82,36 @@ int run_test(const kernel_arg_t& kernel_arg, std::cout << "download destination buffer" << std::endl; RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (int32_t*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = 0xFAAF; - int cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << i - << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } + std::vector dst_pixels(buf_size); + auto buf_ptr = (int8_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < buf_size; ++i) { + dst_pixels[i] = buf_ptr[i]; + } + + // save output image + std::cout << "save output image" << std::endl; + RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, dst_bpp)); return 0; } int main(int argc, char *argv[]) { - size_t value; kernel_arg_t kernel_arg; - + std::vector src_pixels; + uint32_t src_width; + uint32_t src_height; + uint32_t src_bpp; + // parse command arguments parse_args(argc, argv); - if (count == 0) { - count = 1; - } + RT_CHECK(LoadTGA(input_file, src_pixels, &src_width, &src_height, &src_bpp)); + uint32_t src_bufsize = src_bpp * src_width * src_height; + + uint32_t dst_width = (uint32_t)(src_width * scale); + uint32_t dst_height = (uint32_t)(src_height * scale); + uint32_t dst_bpp = 4; + uint32_t dst_bufsize = dst_bpp * dst_width * dst_height; // open device connection std::cout << "open device connection" << std::endl; @@ -117,13 +122,11 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads)); - // uint32_t num_tasks = max_cores * max_warps * max_threads; - uint32_t num_tasks = 1; - uint32_t num_points = count * num_tasks; - uint32_t buf_size = num_points * sizeof(uint32_t); + uint32_t num_tasks = max_cores * max_warps * max_threads; - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; + std::cout << "number of tasks: " << num_tasks << std::endl; + std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl; + std::cout << "destination buffer: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl; // upload program std::cout << "upload program" << std::endl; @@ -131,67 +134,63 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; + size_t arg_addr, src_addr, dst_addr; + RT_CHECK(vx_alloc_dev_mem(device, sizeof(kernel_arg_t), &arg_addr)); + RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr)); + RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr)); - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src0_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src1_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + assert(arg_addr == ALLOC_BASE_ADDR); - kernel_arg.num_tasks = num_tasks; - kernel_arg.task_size = count; + std::cout << "arg_addr=" << std::hex << arg_addr << std::endl; + std::cout << "src_addr=" << std::hex << src_addr << std::endl; + std::cout << "dst_addr=" << std::hex << dst_addr << std::endl; - std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; - std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; - - // allocate shared memory + // allocate staging shared memory std::cout << "allocate shared memory" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); + uint32_t alloc_size = std::max(sizeof(kernel_arg_t), std::max(src_bufsize, dst_bufsize)); RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; { + kernel_arg.num_tasks = std::min(num_tasks, dst_height); + kernel_arg.src_width = src_width; + kernel_arg.src_height = src_height; + kernel_arg.src_pitch = src_bpp * src_width * src_height; + kernel_arg.dst_width = dst_width; + kernel_arg.dst_height = dst_height; + kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height; + kernel_arg.src_ptr = src_addr; + kernel_arg.dst_ptr = dst_addr; + auto buf_ptr = (int*)vx_host_ptr(buffer); memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); + RT_CHECK(vx_copy_to_dev(buffer, arg_addr, sizeof(kernel_arg_t), 0)); } // upload source buffer0 - { - auto buf_ptr = (int32_t*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i-1; - } - } std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0)); - - // upload source buffer1 - { - auto buf_ptr = (int32_t*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i+1; - } + { + auto buf_ptr = (int8_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < src_bufsize; ++i) { + buf_ptr[i] = src_pixels[i]; + } + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, src_bufsize, 0)); } - std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0)); // clear destination buffer - { - auto buf_ptr = (int32_t*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < (dst_bufsize/4); ++i) { + buf_ptr[i] = 0xdeadbeef; + } + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, dst_bufsize, 0)); + } // run tests std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, buf_size, num_points)); + RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp)); // cleanup std::cout << "cleanup" << std::endl; diff --git a/driver/tests/tex_demo/kernel.bin b/driver/tests/tex_demo/kernel.bin index 71e9dd03..c992ca17 100755 Binary files a/driver/tests/tex_demo/kernel.bin and b/driver/tests/tex_demo/kernel.bin differ diff --git a/driver/tests/tex_demo/kernel.c b/driver/tests/tex_demo/kernel.c index 9a936c1d..1e36b359 100644 --- a/driver/tests/tex_demo/kernel.c +++ b/driver/tests/tex_demo/kernel.c @@ -2,30 +2,56 @@ #include #include #include "common.h" +struct tile_arg_t { + struct kernel_arg_t karg; + uint32_t tile_width; + uint32_t tile_height; + float deltaX; + float deltaY; +}; void kernel_body(int task_id, void* arg) { - struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); - uint32_t count = _arg->task_size; - // int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; - // int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + struct tile_arg_t* _arg = (struct tile_arg_t*)(arg); + + uint32_t xoffset = task_id * _arg->tile_width; + uint32_t yoffset = task_id * _arg->tile_height; + uint32_t* dst_ptr = (uint32_t*)_arg->karg.dst_ptr + xoffset + yoffset * _arg->karg.dst_pitch; - unsigned lod = 1; - unsigned u = 1; - unsigned v = 1; - unsigned t = 1; + float fu = xoffset * _arg->deltaX; + float fv = yoffset * _arg->deltaY; - // vx_csr_set(_arg->device_ptr, 0, 0xfd0, 0xffd); - - uint32_t offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - // dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; - dst_ptr[offset+i] = vx_tex(t, u, v, lod); + for (uint32_t y = 0; y < _arg->tile_height; ++y) { + for (uint32_t x = 0; x < _arg->tile_width; ++x) { + int32_t u = (int32_t)(fu * (1<<28)); + int32_t v = (int32_t)(fv * (1<<28)); + dst_ptr[x] = vx_tex(0, u, v, 0); + fu += _arg->deltaX; + } + dst_ptr += _arg->karg.dst_pitch; + fv += _arg->deltaY; } } int main() { - struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_tasks, kernel_body, arg); + struct kernel_arg_t* arg = (struct kernel_arg_t*)0x0; + + // configure texture unit + vx_csr_write(CSR_TEX0_ADDR, arg->src_ptr); + vx_csr_write(CSR_TEX0_FORMAT, 0); + vx_csr_write(CSR_TEX0_WIDTH, arg->src_width); + vx_csr_write(CSR_TEX0_HEIGHT, arg->src_height); + vx_csr_write(CSR_TEX0_PITCH, arg->src_pitch); + vx_csr_write(CSR_TEX0_WRAP_U, 0); + vx_csr_write(CSR_TEX0_WRAP_V, 0); + vx_csr_write(CSR_TEX0_MIN_FILTER, 0); + vx_csr_write(CSR_TEX0_MAX_FILTER, 0); + + struct tile_arg_t targ; + targ.karg = *arg; + targ.tile_width = arg->dst_width; + targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks; + targ.deltaX = 1.0f / arg->dst_width; + targ.deltaY = 1.0f / arg->dst_height; + + vx_spawn_tasks(arg->num_tasks, kernel_body, targ); } \ No newline at end of file diff --git a/driver/tests/tex_demo/kernel.dump b/driver/tests/tex_demo/kernel.dump index da75481c..b9f88ac4 100644 --- a/driver/tests/tex_demo/kernel.dump +++ b/driver/tests/tex_demo/kernel.dump @@ -6,539 +6,360 @@ Disassembly of section .init: 80000000 <_start>: 80000000: 00000597 auipc a1,0x0 -80000004: 0f458593 addi a1,a1,244 # 800000f4 +80000004: 07858593 addi a1,a1,120 # 80000078 80000008: fc102573 csrr a0,0xfc1 8000000c: 00b5106b 0xb5106b -80000010: 0e4000ef jal ra,800000f4 +80000010: 068000ef jal ra,80000078 80000014: 00100513 li a0,1 80000018: 0005006b 0x5006b 8000001c: 00002517 auipc a0,0x2 -80000020: b4050513 addi a0,a0,-1216 # 80001b5c +80000020: 8a850513 addi a0,a0,-1880 # 800018c4 <__BSS_END__> 80000024: 00002617 auipc a2,0x2 -80000028: bb860613 addi a2,a2,-1096 # 80001bdc <__BSS_END__> +80000028: 8a060613 addi a2,a2,-1888 # 800018c4 <__BSS_END__> 8000002c: 40a60633 sub a2,a2,a0 80000030: 00000593 li a1,0 -80000034: 458000ef jal ra,8000048c +80000034: 1c0000ef jal ra,800001f4 80000038: 00000517 auipc a0,0x0 -8000003c: 35c50513 addi a0,a0,860 # 80000394 <__libc_fini_array> -80000040: 30c000ef jal ra,8000034c -80000044: 3ac000ef jal ra,800003f0 <__libc_init_array> +8000003c: 0c450513 addi a0,a0,196 # 800000fc <__libc_fini_array> +80000040: 074000ef jal ra,800000b4 +80000044: 114000ef jal ra,80000158 <__libc_init_array> 80000048: 008000ef jal ra,80000050
-8000004c: 3140006f j 80000360 +8000004c: 07c0006f j 800000c8 Disassembly of section .text: 80000050
: -80000050: 7ffff7b7 lui a5,0x7ffff -80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00> -80000058: 800005b7 lui a1,0x80000 -8000005c: 7ffff637 lui a2,0x7ffff -80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080> -80000064: 18c0006f j 800001f0 +80000050: 01c02783 lw a5,28(zero) # 1c <__stack_usage+0x1c> +80000054: 00100073 ebreak -80000068 : -80000068: 00000793 li a5,0 -8000006c: 00078863 beqz a5,8000007c -80000070: 80000537 lui a0,0x80000 -80000074: 39450513 addi a0,a0,916 # 80000394 <__stack_top+0x81000394> -80000078: 2d40006f j 8000034c -8000007c: 00008067 ret +80000058 : +80000058: 00000793 li a5,0 +8000005c: 00078863 beqz a5,8000006c +80000060: 80000537 lui a0,0x80000 +80000064: 0fc50513 addi a0,a0,252 # 800000fc <__stack_top+0x810000fc> +80000068: 04c0006f j 800000b4 +8000006c: 00008067 ret -80000080 : -80000080: 0045a783 lw a5,4(a1) -80000084: 0105a703 lw a4,16(a1) -80000088: 02f50533 mul a0,a0,a5 -8000008c: 04078e63 beqz a5,800000e8 -80000090: ff010113 addi sp,sp,-16 -80000094: 00912223 sw s1,4(sp) -80000098: 00a784b3 add s1,a5,a0 -8000009c: 00249493 slli s1,s1,0x2 -800000a0: 00251513 slli a0,a0,0x2 -800000a4: 00812423 sw s0,8(sp) -800000a8: 00112623 sw ra,12(sp) -800000ac: 00e50433 add s0,a0,a4 -800000b0: 00e484b3 add s1,s1,a4 -800000b4: 00100693 li a3,1 -800000b8: 00100613 li a2,1 -800000bc: 00100593 li a1,1 -800000c0: 00100513 li a0,1 -800000c4: 274000ef jal ra,80000338 -800000c8: 00a42023 sw a0,0(s0) -800000cc: 00440413 addi s0,s0,4 -800000d0: fe8492e3 bne s1,s0,800000b4 -800000d4: 00c12083 lw ra,12(sp) -800000d8: 00812403 lw s0,8(sp) -800000dc: 00412483 lw s1,4(sp) -800000e0: 01010113 addi sp,sp,16 -800000e4: 00008067 ret -800000e8: 00008067 ret +80000070 <_exit>: +80000070: 00000513 li a0,0 +80000074: 0005006b 0x5006b -800000ec <_exit>: -800000ec: 00000513 li a0,0 -800000f0: 0005006b 0x5006b +80000078 : +80000078: fc002573 csrr a0,0xfc0 +8000007c: 0005006b 0x5006b +80000080: 00002197 auipc gp,0x2 +80000084: c1818193 addi gp,gp,-1000 # 80001c98 <__global_pointer> +80000088: 7f000117 auipc sp,0x7f000 +8000008c: f7810113 addi sp,sp,-136 # ff000000 <__stack_top> +80000090: 40000593 li a1,1024 +80000094: cc102673 csrr a2,0xcc1 +80000098: 02c585b3 mul a1,a1,a2 +8000009c: 40b10133 sub sp,sp,a1 +800000a0: cc3026f3 csrr a3,0xcc3 +800000a4: 00068663 beqz a3,800000b0 +800000a8: 00000513 li a0,0 +800000ac: 0005006b 0x5006b -800000f4 : -800000f4: fc002573 csrr a0,0xfc0 -800000f8: 0005006b 0x5006b -800000fc: 00002197 auipc gp,0x2 -80000100: e3418193 addi gp,gp,-460 # 80001f30 <__global_pointer> -80000104: 7f000117 auipc sp,0x7f000 -80000108: efc10113 addi sp,sp,-260 # ff000000 <__stack_top> -8000010c: 40000593 li a1,1024 -80000110: cc102673 csrr a2,0xcc1 -80000114: 02c585b3 mul a1,a1,a2 -80000118: 40b10133 sub sp,sp,a1 -8000011c: cc3026f3 csrr a3,0xcc3 -80000120: 00068663 beqz a3,8000012c -80000124: 00000513 li a0,0 -80000128: 0005006b 0x5006b +800000b0 : +800000b0: 00008067 ret -8000012c : -8000012c: 00008067 ret +800000b4 : +800000b4: 00050593 mv a1,a0 +800000b8: 00000693 li a3,0 +800000bc: 00000613 li a2,0 +800000c0: 00000513 li a0,0 +800000c4: 20c0006f j 800002d0 <__register_exitproc> -80000130 : -80000130: fe010113 addi sp,sp,-32 -80000134: 00112e23 sw ra,28(sp) -80000138: 00812c23 sw s0,24(sp) -8000013c: 00912a23 sw s1,20(sp) -80000140: 01212823 sw s2,16(sp) -80000144: 01312623 sw s3,12(sp) -80000148: fc0027f3 csrr a5,0xfc0 -8000014c: 0007806b 0x7806b -80000150: cc5026f3 csrr a3,0xcc5 -80000154: cc3029f3 csrr s3,0xcc3 -80000158: cc002773 csrr a4,0xcc0 -8000015c: fc002673 csrr a2,0xfc0 -80000160: 800027b7 lui a5,0x80002 -80000164: 00269693 slli a3,a3,0x2 -80000168: b5c78793 addi a5,a5,-1188 # 80001b5c <__stack_top+0x81001b5c> -8000016c: 00d787b3 add a5,a5,a3 -80000170: 0007a483 lw s1,0(a5) -80000174: 0104a403 lw s0,16(s1) -80000178: 00c4a683 lw a3,12(s1) -8000017c: 0089a933 slt s2,s3,s0 -80000180: 00040793 mv a5,s0 -80000184: 00d90933 add s2,s2,a3 -80000188: 03368433 mul s0,a3,s3 -8000018c: 00f9d463 bge s3,a5,80000194 -80000190: 00098793 mv a5,s3 -80000194: 00f40433 add s0,s0,a5 -80000198: 0084a683 lw a3,8(s1) -8000019c: 02c40433 mul s0,s0,a2 -800001a0: 02e907b3 mul a5,s2,a4 -800001a4: 00d40433 add s0,s0,a3 -800001a8: 00f40433 add s0,s0,a5 -800001ac: 00890933 add s2,s2,s0 -800001b0: 01245e63 bge s0,s2,800001cc -800001b4: 0004a783 lw a5,0(s1) -800001b8: 0044a583 lw a1,4(s1) -800001bc: 00040513 mv a0,s0 -800001c0: 00140413 addi s0,s0,1 -800001c4: 000780e7 jalr a5 -800001c8: fe8916e3 bne s2,s0,800001b4 -800001cc: 0019b993 seqz s3,s3 -800001d0: 0009806b 0x9806b -800001d4: 01c12083 lw ra,28(sp) -800001d8: 01812403 lw s0,24(sp) -800001dc: 01412483 lw s1,20(sp) -800001e0: 01012903 lw s2,16(sp) -800001e4: 00c12983 lw s3,12(sp) -800001e8: 02010113 addi sp,sp,32 -800001ec: 00008067 ret +800000c8 : +800000c8: ff010113 addi sp,sp,-16 +800000cc: 00000593 li a1,0 +800000d0: 00812423 sw s0,8(sp) +800000d4: 00112623 sw ra,12(sp) +800000d8: 00050413 mv s0,a0 +800000dc: 290000ef jal ra,8000036c <__call_exitprocs> +800000e0: 800027b7 lui a5,0x80002 +800000e4: 8c07a503 lw a0,-1856(a5) # 800018c0 <__stack_top+0x810018c0> +800000e8: 03c52783 lw a5,60(a0) +800000ec: 00078463 beqz a5,800000f4 +800000f0: 000780e7 jalr a5 +800000f4: 00040513 mv a0,s0 +800000f8: f79ff0ef jal ra,80000070 <_exit> -800001f0 : -800001f0: fc010113 addi sp,sp,-64 -800001f4: 02112e23 sw ra,60(sp) -800001f8: 02812c23 sw s0,56(sp) -800001fc: 02912a23 sw s1,52(sp) -80000200: 03212823 sw s2,48(sp) -80000204: 03312623 sw s3,44(sp) -80000208: fc2026f3 csrr a3,0xfc2 -8000020c: fc102873 csrr a6,0xfc1 -80000210: fc002473 csrr s0,0xfc0 -80000214: cc5027f3 csrr a5,0xcc5 -80000218: 01f00713 li a4,31 -8000021c: 0cf74463 blt a4,a5,800002e4 -80000220: 030408b3 mul a7,s0,a6 -80000224: 00100713 li a4,1 -80000228: 00a8d463 bge a7,a0,80000230 -8000022c: 03154733 div a4,a0,a7 -80000230: 0ce6c863 blt a3,a4,80000300 -80000234: 0ae7d863 bge a5,a4,800002e4 -80000238: fff68693 addi a3,a3,-1 -8000023c: 02e54333 div t1,a0,a4 -80000240: 00030893 mv a7,t1 -80000244: 00f69663 bne a3,a5,80000250 -80000248: 02e56533 rem a0,a0,a4 -8000024c: 006508b3 add a7,a0,t1 -80000250: 0288c4b3 div s1,a7,s0 -80000254: 0288e933 rem s2,a7,s0 -80000258: 0b04ca63 blt s1,a6,8000030c -8000025c: 00100693 li a3,1 -80000260: 0304c733 div a4,s1,a6 -80000264: 00070663 beqz a4,80000270 -80000268: 00070693 mv a3,a4 -8000026c: 0304e733 rem a4,s1,a6 -80000270: 800029b7 lui s3,0x80002 -80000274: b5c98993 addi s3,s3,-1188 # 80001b5c <__stack_top+0x81001b5c> -80000278: 00e12e23 sw a4,28(sp) -8000027c: 00c10713 addi a4,sp,12 -80000280: 00b12623 sw a1,12(sp) -80000284: 00c12823 sw a2,16(sp) -80000288: 00d12c23 sw a3,24(sp) -8000028c: 02f30333 mul t1,t1,a5 -80000290: 00279793 slli a5,a5,0x2 -80000294: 00f987b3 add a5,s3,a5 -80000298: 00e7a023 sw a4,0(a5) -8000029c: 00612a23 sw t1,20(sp) -800002a0: 06904c63 bgtz s1,80000318 -800002a4: 04090063 beqz s2,800002e4 -800002a8: 02848433 mul s0,s1,s0 -800002ac: 00812a23 sw s0,20(sp) -800002b0: 0009006b 0x9006b -800002b4: cc5027f3 csrr a5,0xcc5 -800002b8: cc202573 csrr a0,0xcc2 -800002bc: 00279793 slli a5,a5,0x2 -800002c0: 00f989b3 add s3,s3,a5 -800002c4: 0009a783 lw a5,0(s3) -800002c8: 0087a683 lw a3,8(a5) -800002cc: 0007a703 lw a4,0(a5) -800002d0: 0047a583 lw a1,4(a5) -800002d4: 00d50533 add a0,a0,a3 -800002d8: 000700e7 jalr a4 -800002dc: 00100793 li a5,1 -800002e0: 0007806b 0x7806b -800002e4: 03c12083 lw ra,60(sp) -800002e8: 03812403 lw s0,56(sp) -800002ec: 03412483 lw s1,52(sp) -800002f0: 03012903 lw s2,48(sp) -800002f4: 02c12983 lw s3,44(sp) -800002f8: 04010113 addi sp,sp,64 -800002fc: 00008067 ret -80000300: 00068713 mv a4,a3 -80000304: f2e7cae3 blt a5,a4,80000238 -80000308: fddff06f j 800002e4 -8000030c: 00000713 li a4,0 -80000310: 00100693 li a3,1 -80000314: f5dff06f j 80000270 -80000318: 00048713 mv a4,s1 -8000031c: 00985463 bge a6,s1,80000324 -80000320: 00080713 mv a4,a6 -80000324: 800007b7 lui a5,0x80000 -80000328: 13078793 addi a5,a5,304 # 80000130 <__stack_top+0x81000130> -8000032c: 00f7106b 0xf7106b -80000330: e01ff0ef jal ra,80000130 -80000334: f71ff06f j 800002a4 +800000fc <__libc_fini_array>: +800000fc: ff010113 addi sp,sp,-16 +80000100: 00812423 sw s0,8(sp) +80000104: 800017b7 lui a5,0x80001 +80000108: 80001437 lui s0,0x80001 +8000010c: 49440413 addi s0,s0,1172 # 80001494 <__stack_top+0x81001494> +80000110: 49478793 addi a5,a5,1172 # 80001494 <__stack_top+0x81001494> +80000114: 408787b3 sub a5,a5,s0 +80000118: 00912223 sw s1,4(sp) +8000011c: 00112623 sw ra,12(sp) +80000120: 4027d493 srai s1,a5,0x2 +80000124: 02048063 beqz s1,80000144 <__libc_fini_array+0x48> +80000128: ffc78793 addi a5,a5,-4 +8000012c: 00878433 add s0,a5,s0 +80000130: 00042783 lw a5,0(s0) +80000134: fff48493 addi s1,s1,-1 +80000138: ffc40413 addi s0,s0,-4 +8000013c: 000780e7 jalr a5 +80000140: fe0498e3 bnez s1,80000130 <__libc_fini_array+0x34> +80000144: 00c12083 lw ra,12(sp) +80000148: 00812403 lw s0,8(sp) +8000014c: 00412483 lw s1,4(sp) +80000150: 01010113 addi sp,sp,16 +80000154: 00008067 ret -80000338 : -80000338: 00869693 slli a3,a3,0x8 -8000033c: 00a6e6b3 or a3,a3,a0 -80000340: 00000513 li a0,0 -80000344: 6ac5d56b 0x6ac5d56b -80000348: 00008067 ret +80000158 <__libc_init_array>: +80000158: ff010113 addi sp,sp,-16 +8000015c: 00812423 sw s0,8(sp) +80000160: 01212023 sw s2,0(sp) +80000164: 80001437 lui s0,0x80001 +80000168: 80001937 lui s2,0x80001 +8000016c: 49040793 addi a5,s0,1168 # 80001490 <__stack_top+0x81001490> +80000170: 49090913 addi s2,s2,1168 # 80001490 <__stack_top+0x81001490> +80000174: 40f90933 sub s2,s2,a5 +80000178: 00112623 sw ra,12(sp) +8000017c: 00912223 sw s1,4(sp) +80000180: 40295913 srai s2,s2,0x2 +80000184: 02090063 beqz s2,800001a4 <__libc_init_array+0x4c> +80000188: 49040413 addi s0,s0,1168 +8000018c: 00000493 li s1,0 +80000190: 00042783 lw a5,0(s0) +80000194: 00148493 addi s1,s1,1 +80000198: 00440413 addi s0,s0,4 +8000019c: 000780e7 jalr a5 +800001a0: fe9918e3 bne s2,s1,80000190 <__libc_init_array+0x38> +800001a4: 80001437 lui s0,0x80001 +800001a8: 80001937 lui s2,0x80001 +800001ac: 49040793 addi a5,s0,1168 # 80001490 <__stack_top+0x81001490> +800001b0: 49490913 addi s2,s2,1172 # 80001494 <__stack_top+0x81001494> +800001b4: 40f90933 sub s2,s2,a5 +800001b8: 40295913 srai s2,s2,0x2 +800001bc: 02090063 beqz s2,800001dc <__libc_init_array+0x84> +800001c0: 49040413 addi s0,s0,1168 +800001c4: 00000493 li s1,0 +800001c8: 00042783 lw a5,0(s0) +800001cc: 00148493 addi s1,s1,1 +800001d0: 00440413 addi s0,s0,4 +800001d4: 000780e7 jalr a5 +800001d8: fe9918e3 bne s2,s1,800001c8 <__libc_init_array+0x70> +800001dc: 00c12083 lw ra,12(sp) +800001e0: 00812403 lw s0,8(sp) +800001e4: 00412483 lw s1,4(sp) +800001e8: 00012903 lw s2,0(sp) +800001ec: 01010113 addi sp,sp,16 +800001f0: 00008067 ret -8000034c : -8000034c: 00050593 mv a1,a0 -80000350: 00000693 li a3,0 -80000354: 00000613 li a2,0 -80000358: 00000513 li a0,0 -8000035c: 20c0006f j 80000568 <__register_exitproc> +800001f4 : +800001f4: 00f00313 li t1,15 +800001f8: 00050713 mv a4,a0 +800001fc: 02c37e63 bgeu t1,a2,80000238 +80000200: 00f77793 andi a5,a4,15 +80000204: 0a079063 bnez a5,800002a4 +80000208: 08059263 bnez a1,8000028c +8000020c: ff067693 andi a3,a2,-16 +80000210: 00f67613 andi a2,a2,15 +80000214: 00e686b3 add a3,a3,a4 +80000218: 00b72023 sw a1,0(a4) +8000021c: 00b72223 sw a1,4(a4) +80000220: 00b72423 sw a1,8(a4) +80000224: 00b72623 sw a1,12(a4) +80000228: 01070713 addi a4,a4,16 +8000022c: fed766e3 bltu a4,a3,80000218 +80000230: 00061463 bnez a2,80000238 +80000234: 00008067 ret +80000238: 40c306b3 sub a3,t1,a2 +8000023c: 00269693 slli a3,a3,0x2 +80000240: 00000297 auipc t0,0x0 +80000244: 005686b3 add a3,a3,t0 +80000248: 00c68067 jr 12(a3) +8000024c: 00b70723 sb a1,14(a4) +80000250: 00b706a3 sb a1,13(a4) +80000254: 00b70623 sb a1,12(a4) +80000258: 00b705a3 sb a1,11(a4) +8000025c: 00b70523 sb a1,10(a4) +80000260: 00b704a3 sb a1,9(a4) +80000264: 00b70423 sb a1,8(a4) +80000268: 00b703a3 sb a1,7(a4) +8000026c: 00b70323 sb a1,6(a4) +80000270: 00b702a3 sb a1,5(a4) +80000274: 00b70223 sb a1,4(a4) +80000278: 00b701a3 sb a1,3(a4) +8000027c: 00b70123 sb a1,2(a4) +80000280: 00b700a3 sb a1,1(a4) +80000284: 00b70023 sb a1,0(a4) +80000288: 00008067 ret +8000028c: 0ff5f593 andi a1,a1,255 +80000290: 00859693 slli a3,a1,0x8 +80000294: 00d5e5b3 or a1,a1,a3 +80000298: 01059693 slli a3,a1,0x10 +8000029c: 00d5e5b3 or a1,a1,a3 +800002a0: f6dff06f j 8000020c +800002a4: 00279693 slli a3,a5,0x2 +800002a8: 00000297 auipc t0,0x0 +800002ac: 005686b3 add a3,a3,t0 +800002b0: 00008293 mv t0,ra +800002b4: fa0680e7 jalr -96(a3) +800002b8: 00028093 mv ra,t0 +800002bc: ff078793 addi a5,a5,-16 +800002c0: 40f70733 sub a4,a4,a5 +800002c4: 00f60633 add a2,a2,a5 +800002c8: f6c378e3 bgeu t1,a2,80000238 +800002cc: f3dff06f j 80000208 -80000360 : -80000360: ff010113 addi sp,sp,-16 -80000364: 00000593 li a1,0 -80000368: 00812423 sw s0,8(sp) -8000036c: 00112623 sw ra,12(sp) -80000370: 00050413 mv s0,a0 -80000374: 290000ef jal ra,80000604 <__call_exitprocs> -80000378: 800027b7 lui a5,0x80002 -8000037c: b587a503 lw a0,-1192(a5) # 80001b58 <__stack_top+0x81001b58> -80000380: 03c52783 lw a5,60(a0) -80000384: 00078463 beqz a5,8000038c -80000388: 000780e7 jalr a5 -8000038c: 00040513 mv a0,s0 -80000390: d5dff0ef jal ra,800000ec <_exit> +800002d0 <__register_exitproc>: +800002d0: 800027b7 lui a5,0x80002 +800002d4: 8c07a703 lw a4,-1856(a5) # 800018c0 <__stack_top+0x810018c0> +800002d8: 14872783 lw a5,328(a4) +800002dc: 04078c63 beqz a5,80000334 <__register_exitproc+0x64> +800002e0: 0047a703 lw a4,4(a5) +800002e4: 01f00813 li a6,31 +800002e8: 06e84e63 blt a6,a4,80000364 <__register_exitproc+0x94> +800002ec: 00271813 slli a6,a4,0x2 +800002f0: 02050663 beqz a0,8000031c <__register_exitproc+0x4c> +800002f4: 01078333 add t1,a5,a6 +800002f8: 08c32423 sw a2,136(t1) +800002fc: 1887a883 lw a7,392(a5) +80000300: 00100613 li a2,1 +80000304: 00e61633 sll a2,a2,a4 +80000308: 00c8e8b3 or a7,a7,a2 +8000030c: 1917a423 sw a7,392(a5) +80000310: 10d32423 sw a3,264(t1) +80000314: 00200693 li a3,2 +80000318: 02d50463 beq a0,a3,80000340 <__register_exitproc+0x70> +8000031c: 00170713 addi a4,a4,1 +80000320: 00e7a223 sw a4,4(a5) +80000324: 010787b3 add a5,a5,a6 +80000328: 00b7a423 sw a1,8(a5) +8000032c: 00000513 li a0,0 +80000330: 00008067 ret +80000334: 14c70793 addi a5,a4,332 +80000338: 14f72423 sw a5,328(a4) +8000033c: fa5ff06f j 800002e0 <__register_exitproc+0x10> +80000340: 18c7a683 lw a3,396(a5) +80000344: 00170713 addi a4,a4,1 +80000348: 00e7a223 sw a4,4(a5) +8000034c: 00c6e633 or a2,a3,a2 +80000350: 18c7a623 sw a2,396(a5) +80000354: 010787b3 add a5,a5,a6 +80000358: 00b7a423 sw a1,8(a5) +8000035c: 00000513 li a0,0 +80000360: 00008067 ret +80000364: fff00513 li a0,-1 +80000368: 00008067 ret -80000394 <__libc_fini_array>: -80000394: ff010113 addi sp,sp,-16 -80000398: 00812423 sw s0,8(sp) -8000039c: 800017b7 lui a5,0x80001 -800003a0: 80001437 lui s0,0x80001 -800003a4: 72c40413 addi s0,s0,1836 # 8000172c <__stack_top+0x8100172c> -800003a8: 72c78793 addi a5,a5,1836 # 8000172c <__stack_top+0x8100172c> -800003ac: 408787b3 sub a5,a5,s0 -800003b0: 00912223 sw s1,4(sp) -800003b4: 00112623 sw ra,12(sp) -800003b8: 4027d493 srai s1,a5,0x2 -800003bc: 02048063 beqz s1,800003dc <__libc_fini_array+0x48> -800003c0: ffc78793 addi a5,a5,-4 -800003c4: 00878433 add s0,a5,s0 -800003c8: 00042783 lw a5,0(s0) -800003cc: fff48493 addi s1,s1,-1 -800003d0: ffc40413 addi s0,s0,-4 -800003d4: 000780e7 jalr a5 -800003d8: fe0498e3 bnez s1,800003c8 <__libc_fini_array+0x34> -800003dc: 00c12083 lw ra,12(sp) -800003e0: 00812403 lw s0,8(sp) -800003e4: 00412483 lw s1,4(sp) -800003e8: 01010113 addi sp,sp,16 -800003ec: 00008067 ret - -800003f0 <__libc_init_array>: -800003f0: ff010113 addi sp,sp,-16 -800003f4: 00812423 sw s0,8(sp) -800003f8: 01212023 sw s2,0(sp) -800003fc: 80001437 lui s0,0x80001 -80000400: 80001937 lui s2,0x80001 -80000404: 72840793 addi a5,s0,1832 # 80001728 <__stack_top+0x81001728> -80000408: 72890913 addi s2,s2,1832 # 80001728 <__stack_top+0x81001728> -8000040c: 40f90933 sub s2,s2,a5 -80000410: 00112623 sw ra,12(sp) -80000414: 00912223 sw s1,4(sp) -80000418: 40295913 srai s2,s2,0x2 -8000041c: 02090063 beqz s2,8000043c <__libc_init_array+0x4c> -80000420: 72840413 addi s0,s0,1832 -80000424: 00000493 li s1,0 -80000428: 00042783 lw a5,0(s0) -8000042c: 00148493 addi s1,s1,1 -80000430: 00440413 addi s0,s0,4 -80000434: 000780e7 jalr a5 -80000438: fe9918e3 bne s2,s1,80000428 <__libc_init_array+0x38> -8000043c: 80001437 lui s0,0x80001 -80000440: 80001937 lui s2,0x80001 -80000444: 72840793 addi a5,s0,1832 # 80001728 <__stack_top+0x81001728> -80000448: 72c90913 addi s2,s2,1836 # 8000172c <__stack_top+0x8100172c> -8000044c: 40f90933 sub s2,s2,a5 -80000450: 40295913 srai s2,s2,0x2 -80000454: 02090063 beqz s2,80000474 <__libc_init_array+0x84> -80000458: 72840413 addi s0,s0,1832 -8000045c: 00000493 li s1,0 -80000460: 00042783 lw a5,0(s0) -80000464: 00148493 addi s1,s1,1 -80000468: 00440413 addi s0,s0,4 -8000046c: 000780e7 jalr a5 -80000470: fe9918e3 bne s2,s1,80000460 <__libc_init_array+0x70> -80000474: 00c12083 lw ra,12(sp) -80000478: 00812403 lw s0,8(sp) -8000047c: 00412483 lw s1,4(sp) -80000480: 00012903 lw s2,0(sp) -80000484: 01010113 addi sp,sp,16 -80000488: 00008067 ret - -8000048c : -8000048c: 00f00313 li t1,15 -80000490: 00050713 mv a4,a0 -80000494: 02c37e63 bgeu t1,a2,800004d0 -80000498: 00f77793 andi a5,a4,15 -8000049c: 0a079063 bnez a5,8000053c -800004a0: 08059263 bnez a1,80000524 -800004a4: ff067693 andi a3,a2,-16 -800004a8: 00f67613 andi a2,a2,15 -800004ac: 00e686b3 add a3,a3,a4 -800004b0: 00b72023 sw a1,0(a4) -800004b4: 00b72223 sw a1,4(a4) -800004b8: 00b72423 sw a1,8(a4) -800004bc: 00b72623 sw a1,12(a4) -800004c0: 01070713 addi a4,a4,16 -800004c4: fed766e3 bltu a4,a3,800004b0 -800004c8: 00061463 bnez a2,800004d0 -800004cc: 00008067 ret -800004d0: 40c306b3 sub a3,t1,a2 -800004d4: 00269693 slli a3,a3,0x2 -800004d8: 00000297 auipc t0,0x0 -800004dc: 005686b3 add a3,a3,t0 -800004e0: 00c68067 jr 12(a3) -800004e4: 00b70723 sb a1,14(a4) -800004e8: 00b706a3 sb a1,13(a4) -800004ec: 00b70623 sb a1,12(a4) -800004f0: 00b705a3 sb a1,11(a4) -800004f4: 00b70523 sb a1,10(a4) -800004f8: 00b704a3 sb a1,9(a4) -800004fc: 00b70423 sb a1,8(a4) -80000500: 00b703a3 sb a1,7(a4) -80000504: 00b70323 sb a1,6(a4) -80000508: 00b702a3 sb a1,5(a4) -8000050c: 00b70223 sb a1,4(a4) -80000510: 00b701a3 sb a1,3(a4) -80000514: 00b70123 sb a1,2(a4) -80000518: 00b700a3 sb a1,1(a4) -8000051c: 00b70023 sb a1,0(a4) -80000520: 00008067 ret -80000524: 0ff5f593 andi a1,a1,255 -80000528: 00859693 slli a3,a1,0x8 -8000052c: 00d5e5b3 or a1,a1,a3 -80000530: 01059693 slli a3,a1,0x10 -80000534: 00d5e5b3 or a1,a1,a3 -80000538: f6dff06f j 800004a4 -8000053c: 00279693 slli a3,a5,0x2 -80000540: 00000297 auipc t0,0x0 -80000544: 005686b3 add a3,a3,t0 -80000548: 00008293 mv t0,ra -8000054c: fa0680e7 jalr -96(a3) -80000550: 00028093 mv ra,t0 -80000554: ff078793 addi a5,a5,-16 -80000558: 40f70733 sub a4,a4,a5 -8000055c: 00f60633 add a2,a2,a5 -80000560: f6c378e3 bgeu t1,a2,800004d0 -80000564: f3dff06f j 800004a0 - -80000568 <__register_exitproc>: -80000568: 800027b7 lui a5,0x80002 -8000056c: b587a703 lw a4,-1192(a5) # 80001b58 <__stack_top+0x81001b58> -80000570: 14872783 lw a5,328(a4) -80000574: 04078c63 beqz a5,800005cc <__register_exitproc+0x64> -80000578: 0047a703 lw a4,4(a5) -8000057c: 01f00813 li a6,31 -80000580: 06e84e63 blt a6,a4,800005fc <__register_exitproc+0x94> -80000584: 00271813 slli a6,a4,0x2 -80000588: 02050663 beqz a0,800005b4 <__register_exitproc+0x4c> -8000058c: 01078333 add t1,a5,a6 -80000590: 08c32423 sw a2,136(t1) -80000594: 1887a883 lw a7,392(a5) -80000598: 00100613 li a2,1 -8000059c: 00e61633 sll a2,a2,a4 -800005a0: 00c8e8b3 or a7,a7,a2 -800005a4: 1917a423 sw a7,392(a5) -800005a8: 10d32423 sw a3,264(t1) -800005ac: 00200693 li a3,2 -800005b0: 02d50463 beq a0,a3,800005d8 <__register_exitproc+0x70> -800005b4: 00170713 addi a4,a4,1 -800005b8: 00e7a223 sw a4,4(a5) -800005bc: 010787b3 add a5,a5,a6 -800005c0: 00b7a423 sw a1,8(a5) -800005c4: 00000513 li a0,0 -800005c8: 00008067 ret -800005cc: 14c70793 addi a5,a4,332 -800005d0: 14f72423 sw a5,328(a4) -800005d4: fa5ff06f j 80000578 <__register_exitproc+0x10> -800005d8: 18c7a683 lw a3,396(a5) -800005dc: 00170713 addi a4,a4,1 -800005e0: 00e7a223 sw a4,4(a5) -800005e4: 00c6e633 or a2,a3,a2 -800005e8: 18c7a623 sw a2,396(a5) -800005ec: 010787b3 add a5,a5,a6 -800005f0: 00b7a423 sw a1,8(a5) -800005f4: 00000513 li a0,0 -800005f8: 00008067 ret -800005fc: fff00513 li a0,-1 -80000600: 00008067 ret - -80000604 <__call_exitprocs>: -80000604: fd010113 addi sp,sp,-48 -80000608: 800027b7 lui a5,0x80002 -8000060c: 01412c23 sw s4,24(sp) -80000610: b587aa03 lw s4,-1192(a5) # 80001b58 <__stack_top+0x81001b58> -80000614: 03212023 sw s2,32(sp) -80000618: 02112623 sw ra,44(sp) -8000061c: 148a2903 lw s2,328(s4) -80000620: 02812423 sw s0,40(sp) -80000624: 02912223 sw s1,36(sp) -80000628: 01312e23 sw s3,28(sp) -8000062c: 01512a23 sw s5,20(sp) -80000630: 01612823 sw s6,16(sp) -80000634: 01712623 sw s7,12(sp) -80000638: 01812423 sw s8,8(sp) -8000063c: 04090063 beqz s2,8000067c <__call_exitprocs+0x78> -80000640: 00050b13 mv s6,a0 -80000644: 00058b93 mv s7,a1 -80000648: 00100a93 li s5,1 -8000064c: fff00993 li s3,-1 -80000650: 00492483 lw s1,4(s2) -80000654: fff48413 addi s0,s1,-1 -80000658: 02044263 bltz s0,8000067c <__call_exitprocs+0x78> -8000065c: 00249493 slli s1,s1,0x2 -80000660: 009904b3 add s1,s2,s1 -80000664: 040b8463 beqz s7,800006ac <__call_exitprocs+0xa8> -80000668: 1044a783 lw a5,260(s1) -8000066c: 05778063 beq a5,s7,800006ac <__call_exitprocs+0xa8> -80000670: fff40413 addi s0,s0,-1 -80000674: ffc48493 addi s1,s1,-4 -80000678: ff3416e3 bne s0,s3,80000664 <__call_exitprocs+0x60> -8000067c: 02c12083 lw ra,44(sp) -80000680: 02812403 lw s0,40(sp) -80000684: 02412483 lw s1,36(sp) -80000688: 02012903 lw s2,32(sp) -8000068c: 01c12983 lw s3,28(sp) -80000690: 01812a03 lw s4,24(sp) -80000694: 01412a83 lw s5,20(sp) -80000698: 01012b03 lw s6,16(sp) -8000069c: 00c12b83 lw s7,12(sp) -800006a0: 00812c03 lw s8,8(sp) -800006a4: 03010113 addi sp,sp,48 -800006a8: 00008067 ret -800006ac: 00492783 lw a5,4(s2) -800006b0: 0044a683 lw a3,4(s1) -800006b4: fff78793 addi a5,a5,-1 -800006b8: 04878e63 beq a5,s0,80000714 <__call_exitprocs+0x110> -800006bc: 0004a223 sw zero,4(s1) -800006c0: fa0688e3 beqz a3,80000670 <__call_exitprocs+0x6c> -800006c4: 18892783 lw a5,392(s2) -800006c8: 008a9733 sll a4,s5,s0 -800006cc: 00492c03 lw s8,4(s2) -800006d0: 00f777b3 and a5,a4,a5 -800006d4: 02079263 bnez a5,800006f8 <__call_exitprocs+0xf4> -800006d8: 000680e7 jalr a3 -800006dc: 00492703 lw a4,4(s2) -800006e0: 148a2783 lw a5,328(s4) -800006e4: 01871463 bne a4,s8,800006ec <__call_exitprocs+0xe8> -800006e8: f8f904e3 beq s2,a5,80000670 <__call_exitprocs+0x6c> -800006ec: f80788e3 beqz a5,8000067c <__call_exitprocs+0x78> -800006f0: 00078913 mv s2,a5 -800006f4: f5dff06f j 80000650 <__call_exitprocs+0x4c> -800006f8: 18c92783 lw a5,396(s2) -800006fc: 0844a583 lw a1,132(s1) -80000700: 00f77733 and a4,a4,a5 -80000704: 00071c63 bnez a4,8000071c <__call_exitprocs+0x118> -80000708: 000b0513 mv a0,s6 -8000070c: 000680e7 jalr a3 -80000710: fcdff06f j 800006dc <__call_exitprocs+0xd8> -80000714: 00892223 sw s0,4(s2) -80000718: fa9ff06f j 800006c0 <__call_exitprocs+0xbc> -8000071c: 00058513 mv a0,a1 -80000720: 000680e7 jalr a3 -80000724: fb9ff06f j 800006dc <__call_exitprocs+0xd8> +8000036c <__call_exitprocs>: +8000036c: fd010113 addi sp,sp,-48 +80000370: 800027b7 lui a5,0x80002 +80000374: 01412c23 sw s4,24(sp) +80000378: 8c07aa03 lw s4,-1856(a5) # 800018c0 <__stack_top+0x810018c0> +8000037c: 03212023 sw s2,32(sp) +80000380: 02112623 sw ra,44(sp) +80000384: 148a2903 lw s2,328(s4) +80000388: 02812423 sw s0,40(sp) +8000038c: 02912223 sw s1,36(sp) +80000390: 01312e23 sw s3,28(sp) +80000394: 01512a23 sw s5,20(sp) +80000398: 01612823 sw s6,16(sp) +8000039c: 01712623 sw s7,12(sp) +800003a0: 01812423 sw s8,8(sp) +800003a4: 04090063 beqz s2,800003e4 <__call_exitprocs+0x78> +800003a8: 00050b13 mv s6,a0 +800003ac: 00058b93 mv s7,a1 +800003b0: 00100a93 li s5,1 +800003b4: fff00993 li s3,-1 +800003b8: 00492483 lw s1,4(s2) +800003bc: fff48413 addi s0,s1,-1 +800003c0: 02044263 bltz s0,800003e4 <__call_exitprocs+0x78> +800003c4: 00249493 slli s1,s1,0x2 +800003c8: 009904b3 add s1,s2,s1 +800003cc: 040b8463 beqz s7,80000414 <__call_exitprocs+0xa8> +800003d0: 1044a783 lw a5,260(s1) +800003d4: 05778063 beq a5,s7,80000414 <__call_exitprocs+0xa8> +800003d8: fff40413 addi s0,s0,-1 +800003dc: ffc48493 addi s1,s1,-4 +800003e0: ff3416e3 bne s0,s3,800003cc <__call_exitprocs+0x60> +800003e4: 02c12083 lw ra,44(sp) +800003e8: 02812403 lw s0,40(sp) +800003ec: 02412483 lw s1,36(sp) +800003f0: 02012903 lw s2,32(sp) +800003f4: 01c12983 lw s3,28(sp) +800003f8: 01812a03 lw s4,24(sp) +800003fc: 01412a83 lw s5,20(sp) +80000400: 01012b03 lw s6,16(sp) +80000404: 00c12b83 lw s7,12(sp) +80000408: 00812c03 lw s8,8(sp) +8000040c: 03010113 addi sp,sp,48 +80000410: 00008067 ret +80000414: 00492783 lw a5,4(s2) +80000418: 0044a683 lw a3,4(s1) +8000041c: fff78793 addi a5,a5,-1 +80000420: 04878e63 beq a5,s0,8000047c <__call_exitprocs+0x110> +80000424: 0004a223 sw zero,4(s1) +80000428: fa0688e3 beqz a3,800003d8 <__call_exitprocs+0x6c> +8000042c: 18892783 lw a5,392(s2) +80000430: 008a9733 sll a4,s5,s0 +80000434: 00492c03 lw s8,4(s2) +80000438: 00f777b3 and a5,a4,a5 +8000043c: 02079263 bnez a5,80000460 <__call_exitprocs+0xf4> +80000440: 000680e7 jalr a3 +80000444: 00492703 lw a4,4(s2) +80000448: 148a2783 lw a5,328(s4) +8000044c: 01871463 bne a4,s8,80000454 <__call_exitprocs+0xe8> +80000450: f8f904e3 beq s2,a5,800003d8 <__call_exitprocs+0x6c> +80000454: f80788e3 beqz a5,800003e4 <__call_exitprocs+0x78> +80000458: 00078913 mv s2,a5 +8000045c: f5dff06f j 800003b8 <__call_exitprocs+0x4c> +80000460: 18c92783 lw a5,396(s2) +80000464: 0844a583 lw a1,132(s1) +80000468: 00f77733 and a4,a4,a5 +8000046c: 00071c63 bnez a4,80000484 <__call_exitprocs+0x118> +80000470: 000b0513 mv a0,s6 +80000474: 000680e7 jalr a3 +80000478: fcdff06f j 80000444 <__call_exitprocs+0xd8> +8000047c: 00892223 sw s0,4(s2) +80000480: fa9ff06f j 80000428 <__call_exitprocs+0xbc> +80000484: 00058513 mv a0,a1 +80000488: 000680e7 jalr a3 +8000048c: fb9ff06f j 80000444 <__call_exitprocs+0xd8> Disassembly of section .init_array: -80001728 <__init_array_start>: -80001728: 0068 addi a0,sp,12 -8000172a: 8000 0x8000 +80001490 <__init_array_start>: +80001490: 0058 addi a4,sp,4 +80001492: 8000 0x8000 Disassembly of section .data: -80001730 : -80001730: 0000 unimp -80001732: 0000 unimp -80001734: 1a1c addi a5,sp,304 -80001736: 8000 0x8000 -80001738: 1a84 addi s1,sp,368 -8000173a: 8000 0x8000 -8000173c: 1aec addi a1,sp,380 -8000173e: 8000 0x8000 +80001498 : +80001498: 0000 unimp +8000149a: 0000 unimp +8000149c: 1784 addi s1,sp,992 +8000149e: 8000 0x8000 +800014a0: 17ec addi a1,sp,1004 +800014a2: 8000 0x8000 +800014a4: 1854 addi a3,sp,52 +800014a6: 8000 0x8000 ... -800017d8: 0001 nop -800017da: 0000 unimp -800017dc: 0000 unimp -800017de: 0000 unimp -800017e0: 330e fld ft6,224(sp) -800017e2: abcd j 80001dd4 <__BSS_END__+0x1f8> -800017e4: 1234 addi a3,sp,296 -800017e6: e66d bnez a2,800018d0 -800017e8: deec sw a1,124(a3) -800017ea: 0005 c.nop 1 -800017ec: 0000000b 0xb +80001540: 0001 nop +80001542: 0000 unimp +80001544: 0000 unimp +80001546: 0000 unimp +80001548: 330e fld ft6,224(sp) +8000154a: abcd j 80001b3c <__BSS_END__+0x278> +8000154c: 1234 addi a3,sp,296 +8000154e: e66d bnez a2,80001638 +80001550: deec sw a1,124(a3) +80001552: 0005 c.nop 1 +80001554: 0000000b 0xb ... Disassembly of section .sdata: -80001b58 <_global_impure_ptr>: -80001b58: 1730 addi a2,sp,936 -80001b5a: 8000 0x8000 - -Disassembly of section .bss: - -80001b5c : - ... +800018c0 <_global_impure_ptr>: +800018c0: 1498 addi a4,sp,608 +800018c2: 8000 0x8000 Disassembly of section .comment: diff --git a/driver/tests/tex_demo/kernel.elf b/driver/tests/tex_demo/kernel.elf index b2e5502f..a928fbde 100755 Binary files a/driver/tests/tex_demo/kernel.elf and b/driver/tests/tex_demo/kernel.elf differ diff --git a/driver/tests/tex_demo/sample.tga b/driver/tests/tex_demo/sample.tga new file mode 100755 index 00000000..1b04c925 Binary files /dev/null and b/driver/tests/tex_demo/sample.tga differ diff --git a/driver/tests/tex_demo/utils.cpp b/driver/tests/tex_demo/utils.cpp new file mode 100644 index 00000000..9b36c510 --- /dev/null +++ b/driver/tests/tex_demo/utils.cpp @@ -0,0 +1,115 @@ +#include "utils.h" +#include + +struct __attribute__((__packed__)) tga_header_t { + int8_t idlength; + int8_t colormaptype; + int8_t imagetype; + int16_t colormaporigin; + int16_t colormaplength; + int8_t colormapdepth; + int16_t xoffset; + int16_t yoffset; + int16_t width; + int16_t height; + int8_t bitsperpixel; + int8_t imagedescriptor; +}; + +int LoadTGA(const char *filename, + std::vector &pixels, + uint32_t *width, + uint32_t *height, + uint32_t *bpp) { + std::ifstream ifs(filename, std::ios::in | std::ios::binary); + if (!ifs.is_open()) { + std::cerr << "couldn't open file: " << filename << "!" << std::endl; + return -1; + } + + tga_header_t header; + ifs.read(reinterpret_cast(&header), sizeof(tga_header_t)); + if (ifs.fail()) { + std::cerr << "invalid TGA file header!" << std::endl; + return -1; + } + + if (header.imagetype != 2) { + std::cerr << "unsupported TGA encoding format!" << std::endl; + return -1; + } + + ifs.seekg(header.idlength, std::ios::cur); // skip string + if (ifs.fail()) { + std::cerr << "invalid TGA file!" << std::endl; + return -1; + } + + switch (header.bitsperpixel) { + case 24: + case 32: { + auto stride = header.bitsperpixel / 8; + auto pitch = header.width * stride; + pixels.resize(header.height * pitch); + + // we are going to load the pixel data line by line + for (int y = 0; y < header.height; ++y) { + // Read current line of pixels + auto line = pixels.data() + y * pitch; + ifs.read(reinterpret_cast(line), pitch); + if (ifs.fail()) { + std::cerr << "invalid TGA file!" << std::endl; + return -1; + } + + // Because the TGA is BGR instead of RGB, we must swap RG components + for (int i = 0; i < pitch; i += stride) { + auto tmp = line[i]; + line[i] = line[i + 2]; + line[i + 2] = tmp; + } + } + break; + } + default: + std::cerr << "unsupported TGA bitsperpixel!" << std::endl; + return -1; + } + + *width = header.width; + *height = header.height; + *bpp = header.bitsperpixel / 8; + + return 0; +} + +int SaveTGA(const char *filename, + const std::vector &pixels, + uint32_t width, + uint32_t height, + uint32_t bpp) { + std::ofstream ofs(filename, std::ios::out | std::ios::binary); + if (!ofs.is_open()) { + std::cerr << "couldn't create file: " << filename << "!" << std::endl; + return -1; + } + + tga_header_t header; + header.idlength = 0; + header.colormaptype = 0; // no palette + header.imagetype = 2; // color mapped data + header.colormaporigin = 0; + header.colormaplength = 0; + header.colormapdepth = 0; + header.xoffset = 0; + header.yoffset = 0; + header.width = width; + header.height = height; + header.bitsperpixel = bpp * 8; + header.imagedescriptor = 0; + + ofs.write(reinterpret_cast(&header), sizeof(tga_header_t)); + ofs.write((const char*)pixels.data(), pixels.size()); + + return 0; +} \ No newline at end of file diff --git a/driver/tests/tex_demo/utils.h b/driver/tests/tex_demo/utils.h new file mode 100644 index 00000000..b1d15e3b --- /dev/null +++ b/driver/tests/tex_demo/utils.h @@ -0,0 +1,15 @@ +#include +#include +#include + +int LoadTGA(const char *filename, + std::vector &pixels, + uint32_t *width, + uint32_t *height, + uint32_t *bpp); + +int SaveTGA(const char *filename, + const std::vector &pixels, + uint32_t width, + uint32_t height, + uint32_t bpp); \ No newline at end of file diff --git a/hw/VX_config.h b/hw/VX_config.h index 05339ece..1be339ed 100644 --- a/hw/VX_config.h +++ b/hw/VX_config.h @@ -253,7 +253,7 @@ #define CSR_TEX0_FORMAT CSR_TEX_BEGIN + 0x1 #define CSR_TEX0_WIDTH CSR_TEX_BEGIN + 0x2 #define CSR_TEX0_HEIGHT CSR_TEX_BEGIN + 0x3 -#define CSR_TEX0_STRIDE CSR_TEX_BEGIN + 0x4 +#define CSR_TEX0_PITCH CSR_TEX_BEGIN + 0x4 #define CSR_TEX0_WRAP_U CSR_TEX_BEGIN + 0x5 #define CSR_TEX0_WRAP_V CSR_TEX_BEGIN + 0x6 #define CSR_TEX0_MIN_FILTER CSR_TEX_BEGIN + 0x7 @@ -264,7 +264,7 @@ #define CSR_TEX1_FORMAT CSR_TEX_BEGIN + 0xA #define CSR_TEX1_WIDTH CSR_TEX_BEGIN + 0xB #define CSR_TEX1_HEIGHT CSR_TEX_BEGIN + 0xC -#define CSR_TEX1_STRIDE CSR_TEX_BEGIN + 0xD +#define CSR_TEX1_PITCH CSR_TEX_BEGIN + 0xD #define CSR_TEX1_WRAP_U CSR_TEX_BEGIN + 0xE #define CSR_TEX1_WRAP_V CSR_TEX_BEGIN + 0xF #define CSR_TEX1_MIN_FILTER CSR_TEX_BEGIN + 0x10 diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 2fc873fc..897e9f4b 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -238,30 +238,33 @@ `define CSR_NC 12'hFC2 ////////// Texture Unit CSRs ///////////// + `define CSR_TEX_BEGIN 12'hFD0 + // Unit 1 -`define CSR_TEX0_ADDR `CSR_TEX_BEGIN -`define CSR_TEX0_FORMAT `CSR_TEX_BEGIN + 12'h1 -`define CSR_TEX0_WIDTH `CSR_TEX_BEGIN + 12'h2 -`define CSR_TEX0_HEIGHT `CSR_TEX_BEGIN + 12'h3 -`define CSR_TEX0_STRIDE `CSR_TEX_BEGIN + 12'h4 -`define CSR_TEX0_WRAP_U `CSR_TEX_BEGIN + 12'h5 -`define CSR_TEX0_WRAP_V `CSR_TEX_BEGIN + 12'h6 -`define CSR_TEX0_MIN_FILTER `CSR_TEX_BEGIN + 12'h7 -`define CSR_TEX0_MAX_FILTER `CSR_TEX_BEGIN + 12'h8 +`define CSR_TEX0_ADDR `CSR_TEX_BEGIN +`define CSR_TEX0_FORMAT `CSR_TEX_BEGIN + 12'h1 +`define CSR_TEX0_WIDTH `CSR_TEX_BEGIN + 12'h2 +`define CSR_TEX0_HEIGHT `CSR_TEX_BEGIN + 12'h3 +`define CSR_TEX0_PITCH `CSR_TEX_BEGIN + 12'h4 +`define CSR_TEX0_WRAP_U `CSR_TEX_BEGIN + 12'h5 +`define CSR_TEX0_WRAP_V `CSR_TEX_BEGIN + 12'h6 +`define CSR_TEX0_MIN_FILTER `CSR_TEX_BEGIN + 12'h7 +`define CSR_TEX0_MAX_FILTER `CSR_TEX_BEGIN + 12'h8 // Unit 2 -`define CSR_TEX1_ADDR `CSR_TEX_BEGIN + 12'h9 -`define CSR_TEX1_FORMAT `CSR_TEX_BEGIN + 12'hA -`define CSR_TEX1_WIDTH `CSR_TEX_BEGIN + 12'hB -`define CSR_TEX1_HEIGHT `CSR_TEX_BEGIN + 12'hC -`define CSR_TEX1_STRIDE `CSR_TEX_BEGIN + 12'hD -`define CSR_TEX1_WRAP_U `CSR_TEX_BEGIN + 12'hE -`define CSR_TEX1_WRAP_V `CSR_TEX_BEGIN + 12'hF -`define CSR_TEX1_MIN_FILTER `CSR_TEX_BEGIN + 12'h10 -`define CSR_TEX1_MAX_FILTER `CSR_TEX_BEGIN + 12'h11 +`define CSR_TEX1_ADDR `CSR_TEX_BEGIN + 12'h9 +`define CSR_TEX1_FORMAT `CSR_TEX_BEGIN + 12'hA +`define CSR_TEX1_WIDTH `CSR_TEX_BEGIN + 12'hB +`define CSR_TEX1_HEIGHT `CSR_TEX_BEGIN + 12'hC +`define CSR_TEX1_PITCH `CSR_TEX_BEGIN + 12'hD +`define CSR_TEX1_WRAP_U `CSR_TEX_BEGIN + 12'hE +`define CSR_TEX1_WRAP_V `CSR_TEX_BEGIN + 12'hF +`define CSR_TEX1_MIN_FILTER `CSR_TEX_BEGIN + 12'h10 +`define CSR_TEX1_MAX_FILTER `CSR_TEX_BEGIN + 12'h11 `define CSR_TEX_END `CSR_TEX1_MAX_FILTER + // Pipeline Queues //////////////////////////////////////////////////////////// // Size of LSU Request Queue diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index e8b7668a..d70841a2 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -389,9 +389,7 @@ `define MAXHTW 8 `define MAXFTW 8 `define MAXFMW 8 -`define MAXAMW 8 -`define TAGW 8 -`define DATAW 32 +`define MAXAMW 8 //////////////////////////////////////////////////////////////////////////////////////// `include "VX_types.vh" diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index 1485e32c..08ecbcd3 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -55,7 +55,7 @@ module VX_tex_unit #( `CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data; `CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data; `CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data; - `CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data; + `CSR_TEX0_PITCH : tex_stride[0] <= tex_csr_if.write_data; `CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data; `CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data; `CSR_TEX0_MIN_FILTER : tex_min_filter[0] <= tex_csr_if.write_data; @@ -65,7 +65,7 @@ module VX_tex_unit #( `CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data; `CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data; `CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data; - `CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data; + `CSR_TEX1_PITCH : tex_stride[1] <= tex_csr_if.write_data; `CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data; `CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data; `CSR_TEX1_MIN_FILTER : tex_min_filter[1] <= tex_csr_if.write_data; @@ -88,7 +88,7 @@ module VX_tex_unit #( assign rsp_PC = tex_req_if.PC; assign rsp_rd = tex_req_if.rd; assign rsp_wb = tex_req_if.wb; - assign rsp_data = {`NUM_THREADS{32'hFAAF}}; // dummy color value + assign rsp_data = {`NUM_THREADS{32'hFF0000FF}}; // dummy blue value // output assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid; @@ -116,7 +116,7 @@ module VX_tex_unit #( $display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]); $display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]); $display("%t: core%0d-tex_csr: csr_tex0_height, csr_data=%0h", $time, CORE_ID, tex_height[0]); - $display("%t: core%0d-tex_csr: csr_tex0_stride, csr_data=%0h", $time, CORE_ID, tex_stride[0]); + $display("%t: core%0d-tex_csr: CSR_TEX0_PITCH, csr_data=%0h", $time, CORE_ID, tex_stride[0]); $display("%t: core%0d-tex_csr: csr_tex0_wrap_u, csr_data=%0h", $time, CORE_ID, tex_wrap_u[0]); $display("%t: core%0d-tex_csr: csr_tex0_wrap_v, csr_data=%0h", $time, CORE_ID, tex_wrap_v[0]); $display("%t: core%0d-tex_csr: csr_tex0_min_filter, csr_data=%0h", $time, CORE_ID, tex_min_filter[0]); diff --git a/runtime/include/vx_intrinsics.h b/runtime/include/vx_intrinsics.h index 48e27851..67644ff0 100644 --- a/runtime/include/vx_intrinsics.h +++ b/runtime/include/vx_intrinsics.h @@ -5,7 +5,52 @@ #ifdef __cplusplus extern "C" { + #endif +#ifdef __ASSEMBLY__ +#define __ASM_STR(x) x +#else +#define __ASM_STR(x) #x +#endif + +#define vx_csr_swap(csr, val) ({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ + __v; \ +}) + +#define vx_csr_read(csr) ({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) : "=r" (__v) :: "memory"); \ + __v; \ +}) + +#define vx_csr_write(csr, val) ({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ +}) + +#define vx_csr_read_set(csr, val) ({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ + __v; \ +}) + +#define vx_csr_set(csr, val) ({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ +}) + +#define vx_csr_read_clear(csr, val) ({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ + __v; \ +}) + +#define vx_csr_clear(csr, val) ({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ +}) // Set thread mask inline void vx_tmc(unsigned num_threads) {