diff --git a/benchmarks/opencl/guassian/Fan1.dump b/benchmarks/opencl/guassian/Fan1.dump index b9aef532..5c2d2060 100644 --- a/benchmarks/opencl/guassian/Fan1.dump +++ b/benchmarks/opencl/guassian/Fan1.dump @@ -1,42 +1,42 @@ -/tmp/pocl_vortex_kernel-35-3a-eb-b3-c9.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-4a-12-5b-81-37.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 05 00 00 auipc a1, 0 -80000004: 93 85 05 68 addi a1, a1, 1664 +80000004: 93 85 05 70 addi a1, a1, 1792 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 00 67 jal 1648 +80000010: ef 00 00 6f jal 1776 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 -8000001c: 17 25 00 00 auipc a0, 2 -80000020: 13 05 05 f3 addi a0, a0, -208 -80000024: 17 26 00 00 auipc a2, 2 -80000028: 13 06 86 f4 addi a2, a2, -184 +8000001c: 17 15 00 00 auipc a0, 1 +80000020: 13 05 85 41 addi a0, a0, 1048 +80000024: 17 16 00 00 auipc a2, 1 +80000028: 13 06 06 45 addi a2, a2, 1104 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 90 04 jal 2120 +80000034: ef 00 90 0c jal 2248 80000038: 17 05 00 00 auipc a0, 0 -8000003c: 13 05 c5 74 addi a0, a0, 1868 -80000040: ef 00 c0 6f jal 1788 -80000044: ef 00 c0 79 jal 1948 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 40 70 j 1796 +8000003c: 13 05 c5 7c addi a0, a0, 1996 +80000040: ef 00 c0 77 jal 1916 +80000044: ef 00 d0 01 jal 2076 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 40 78 j 1924 Disassembly of section .text: 80000050 register_fini: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 -80000058: 37 05 00 80 lui a0, 524288 -8000005c: 13 05 45 78 addi a0, a0, 1924 -80000060: 6f 00 c0 6d j 1756 +80000058: 37 15 00 80 lui a0, 524289 +8000005c: 13 05 45 80 addi a0, a0, -2044 +80000060: 6f 00 c0 75 j 1884 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,190 +48,190 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 00 68 jal 1664 -80000098: ef 00 c0 62 jal 1580 -8000009c: ef 00 00 67 jal 1648 +80000094: ef 00 00 70 jal 1792 +80000098: ef 00 c0 6a jal 1708 +8000009c: ef 00 00 6f jal 1776 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 00 64 jal 1600 +800000a4: ef 00 00 6c jal 1728 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 80 64 jal 1608 +800000ac: ef 00 80 6c jal 1736 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 00 66 jal 1632 -800000b8: b7 25 00 80 lui a1, 524290 -800000bc: 93 85 c5 f4 addi a1, a1, -180 +800000b4: ef 00 00 6e jal 1760 +800000b8: b7 15 00 80 lui a1, 524289 +800000bc: 93 85 45 43 addi a1, a1, 1076 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 40 51 j 1300 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 80 59 j 1432 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 40 50 jal 1284 -800001c4: ef 00 80 54 jal 1352 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 80 53 jal 1336 -800001d0: b7 25 00 80 lui a1, 524290 -800001d4: 93 85 c5 f4 addi a1, a1, -180 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 c0 47 j 1148 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 80 58 jal 1416 +800001c0: ef 00 c0 5c jal 1484 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 c0 5b jal 1468 +800001cc: b7 15 00 80 lui a1, 524289 +800001d0: 93 85 45 43 addi a1, a1, 1076 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 00 50 j 1280 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 40 49 jal 1172 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 40 48 jal 1156 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 40 47 jal 1140 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 40 46 jal 1124 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 80 51 jal 1304 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 80 50 jal 1288 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 80 4f jal 1272 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 80 4e jal 1256 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) -8000035c: b7 25 00 80 lui a1, 524290 -80000360: 93 85 c5 f4 addi a1, a1, -180 +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) +8000035c: b7 15 00 80 lui a1, 524289 +80000360: 93 85 45 43 addi a1, a1, 1076 80000364: 13 15 25 00 slli a0, a0, 2 80000368: 33 05 b5 00 add a0, a0, a1 8000036c: 93 05 01 00 mv a1, sp @@ -243,589 +243,621 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 c0 32 jal 812 +80000390: ef 00 c0 3a jal 940 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 00 3a jal 928 +800003a8: ef 00 40 3e jal 996 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 40 3d jal 980 +800003b4: b7 15 00 80 lui a1, 524289 +800003b8: 93 85 45 43 addi a1, a1, 1076 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 40 32 jal 804 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 05 4e addi a1, a0, 1248 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 05 56 addi a1, a0, 1376 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _pocl_kernel_Fan1: -80000404: 13 01 01 ff addi sp, sp, -16 -80000408: 23 26 11 00 sw ra, 12(sp) -8000040c: 23 24 81 00 sw s0, 8(sp) -80000410: 23 22 91 00 sw s1, 4(sp) -80000414: 13 04 01 01 addi s0, sp, 16 -80000418: 13 71 c1 ff andi sp, sp, -4 -8000041c: 93 08 00 00 mv a7, zero -80000420: 83 ae 87 01 lw t4, 24(a5) -80000424: 83 a2 c7 01 lw t0, 28(a5) -80000428: 03 a3 07 02 lw t1, 32(a5) -8000042c: 03 ae c7 00 lw t3, 12(a5) -80000430: b3 8f 0e 03 mul t6, t4, a6 -80000434: 13 16 27 00 slli a2, a4, 2 -80000438: 33 88 c5 00 add a6, a1, a2 -8000043c: b3 07 d7 02 mul a5, a4, a3 -80000440: 93 97 27 00 slli a5, a5, 2 -80000444: 33 08 f8 00 add a6, a6, a5 -80000448: 93 47 f7 ff not a5, a4 -8000044c: 33 8f d7 00 add t5, a5, a3 -80000450: 33 07 ee 00 add a4, t3, a4 -80000454: 33 07 f7 01 add a4, a4, t6 -80000458: 13 07 17 00 addi a4, a4, 1 -8000045c: 33 87 e6 02 mul a4, a3, a4 -80000460: 13 17 27 00 slli a4, a4, 2 -80000464: b3 03 c7 00 add t2, a4, a2 -80000468: 93 94 26 00 slli s1, a3, 2 -8000046c: 33 07 fe 01 add a4, t3, t6 -80000470: 6f 00 c0 00 j 12 -80000474: 93 88 18 00 addi a7, a7, 1 -80000478: 63 f8 68 04 bgeu a7, t1, 80 -8000047c: 13 0e 00 00 mv t3, zero -80000480: 6f 00 c0 00 j 12 -80000484: 13 0e 1e 00 addi t3, t3, 1 -80000488: e3 76 5e fe bgeu t3, t0, -20 -8000048c: 13 06 00 00 mv a2, zero -80000490: 93 87 03 00 mv a5, t2 -80000494: 6f 00 00 01 j 16 -80000498: 13 06 16 00 addi a2, a2, 1 -8000049c: b3 87 97 00 add a5, a5, s1 -800004a0: e3 72 d6 ff bgeu a2, t4, -28 -800004a4: b3 06 c7 00 add a3, a4, a2 -800004a8: e3 d8 e6 ff bge a3, t5, -16 -800004ac: b3 86 f5 00 add a3, a1, a5 -800004b0: 07 a0 06 00 flw ft0, 0(a3) -800004b4: 87 20 08 00 flw ft1, 0(a6) -800004b8: 53 70 10 18 fdiv.s ft0, ft0, ft1 -800004bc: b3 06 f5 00 add a3, a0, a5 -800004c0: 27 a0 06 00 fsw ft0, 0(a3) -800004c4: 6f f0 5f fd j -44 -800004c8: 13 01 04 ff addi sp, s0, -16 -800004cc: 83 24 41 00 lw s1, 4(sp) -800004d0: 03 24 81 00 lw s0, 8(sp) -800004d4: 83 20 c1 00 lw ra, 12(sp) -800004d8: 13 01 01 01 addi sp, sp, 16 -800004dc: 67 80 00 00 ret +80000484 _pocl_kernel_Fan1: +80000484: 13 01 01 ff addi sp, sp, -16 +80000488: 23 26 11 00 sw ra, 12(sp) +8000048c: 23 24 81 00 sw s0, 8(sp) +80000490: 23 22 91 00 sw s1, 4(sp) +80000494: 13 04 01 01 addi s0, sp, 16 +80000498: 13 71 c1 ff andi sp, sp, -4 +8000049c: 93 08 00 00 mv a7, zero +800004a0: 83 ae 87 01 lw t4, 24(a5) +800004a4: 83 a2 c7 01 lw t0, 28(a5) +800004a8: 03 a3 07 02 lw t1, 32(a5) +800004ac: 03 ae c7 00 lw t3, 12(a5) +800004b0: b3 8f 0e 03 mul t6, t4, a6 +800004b4: 13 16 27 00 slli a2, a4, 2 +800004b8: 33 88 c5 00 add a6, a1, a2 +800004bc: b3 07 d7 02 mul a5, a4, a3 +800004c0: 93 97 27 00 slli a5, a5, 2 +800004c4: 33 08 f8 00 add a6, a6, a5 +800004c8: 93 47 f7 ff not a5, a4 +800004cc: 33 8f d7 00 add t5, a5, a3 +800004d0: 33 07 ee 00 add a4, t3, a4 +800004d4: 33 07 f7 01 add a4, a4, t6 +800004d8: 13 07 17 00 addi a4, a4, 1 +800004dc: 33 87 e6 02 mul a4, a3, a4 +800004e0: 13 17 27 00 slli a4, a4, 2 +800004e4: b3 03 c7 00 add t2, a4, a2 +800004e8: 93 94 26 00 slli s1, a3, 2 +800004ec: 33 07 fe 01 add a4, t3, t6 +800004f0: 6f 00 c0 00 j 12 +800004f4: 93 88 18 00 addi a7, a7, 1 +800004f8: 63 f8 68 04 bgeu a7, t1, 80 +800004fc: 13 0e 00 00 mv t3, zero +80000500: 6f 00 c0 00 j 12 +80000504: 13 0e 1e 00 addi t3, t3, 1 +80000508: e3 76 5e fe bgeu t3, t0, -20 +8000050c: 13 06 00 00 mv a2, zero +80000510: 93 87 03 00 mv a5, t2 +80000514: 6f 00 00 01 j 16 +80000518: 13 06 16 00 addi a2, a2, 1 +8000051c: b3 87 97 00 add a5, a5, s1 +80000520: e3 72 d6 ff bgeu a2, t4, -28 +80000524: b3 06 c7 00 add a3, a4, a2 +80000528: e3 d8 e6 ff bge a3, t5, -16 +8000052c: b3 86 f5 00 add a3, a1, a5 +80000530: 07 a0 06 00 flw ft0, 0(a3) +80000534: 87 20 08 00 flw ft1, 0(a6) +80000538: 53 70 10 18 fdiv.s ft0, ft0, ft1 +8000053c: b3 06 f5 00 add a3, a0, a5 +80000540: 27 a0 06 00 fsw ft0, 0(a3) +80000544: 6f f0 5f fd j -44 +80000548: 13 01 04 ff addi sp, s0, -16 +8000054c: 83 24 41 00 lw s1, 4(sp) +80000550: 03 24 81 00 lw s0, 8(sp) +80000554: 83 20 c1 00 lw ra, 12(sp) +80000558: 13 01 01 01 addi sp, sp, 16 +8000055c: 67 80 00 00 ret -800004e0 _pocl_kernel_Fan1_workgroup: -800004e0: 13 08 00 00 mv a6, zero -800004e4: 83 26 05 00 lw a3, 0(a0) -800004e8: 03 27 45 00 lw a4, 4(a0) -800004ec: 83 27 c5 00 lw a5, 12(a0) -800004f0: 03 25 05 01 lw a0, 16(a0) -800004f4: 83 a3 06 00 lw t2, 0(a3) -800004f8: 83 2e 07 00 lw t4, 0(a4) -800004fc: 83 a6 07 00 lw a3, 0(a5) -80000500: 03 25 05 00 lw a0, 0(a0) -80000504: 83 af 85 01 lw t6, 24(a1) -80000508: 83 a2 c5 01 lw t0, 28(a1) -8000050c: 83 a8 05 02 lw a7, 32(a1) -80000510: 03 ae c5 00 lw t3, 12(a1) -80000514: 33 87 cf 02 mul a4, t6, a2 -80000518: 93 15 25 00 slli a1, a0, 2 -8000051c: 33 83 be 00 add t1, t4, a1 -80000520: 33 06 d5 02 mul a2, a0, a3 -80000524: 13 16 26 00 slli a2, a2, 2 -80000528: 33 0f c3 00 add t5, t1, a2 -8000052c: 13 46 f5 ff not a2, a0 -80000530: 33 86 c6 00 add a2, a3, a2 -80000534: 33 05 ae 00 add a0, t3, a0 -80000538: 33 05 e5 00 add a0, a0, a4 -8000053c: 13 05 15 00 addi a0, a0, 1 -80000540: 33 85 a6 02 mul a0, a3, a0 -80000544: 13 15 25 00 slli a0, a0, 2 -80000548: 33 03 b5 00 add t1, a0, a1 -8000054c: 93 96 26 00 slli a3, a3, 2 -80000550: 33 05 ee 00 add a0, t3, a4 -80000554: 6f 00 c0 00 j 12 -80000558: 13 08 18 00 addi a6, a6, 1 -8000055c: 63 78 18 05 bgeu a6, a7, 80 -80000560: 13 0e 00 00 mv t3, zero -80000564: 6f 00 c0 00 j 12 -80000568: 13 0e 1e 00 addi t3, t3, 1 -8000056c: e3 76 5e fe bgeu t3, t0, -20 -80000570: 13 07 00 00 mv a4, zero -80000574: 93 05 03 00 mv a1, t1 -80000578: 6f 00 00 01 j 16 -8000057c: 13 07 17 00 addi a4, a4, 1 -80000580: b3 85 d5 00 add a1, a1, a3 -80000584: e3 72 f7 ff bgeu a4, t6, -28 -80000588: b3 07 e5 00 add a5, a0, a4 -8000058c: e3 d8 c7 fe bge a5, a2, -16 -80000590: b3 87 be 00 add a5, t4, a1 -80000594: 07 a0 07 00 flw ft0, 0(a5) -80000598: 87 20 0f 00 flw ft1, 0(t5) -8000059c: 53 70 10 18 fdiv.s ft0, ft0, ft1 -800005a0: b3 87 b3 00 add a5, t2, a1 -800005a4: 27 a0 07 00 fsw ft0, 0(a5) -800005a8: 6f f0 5f fd j -44 -800005ac: 67 80 00 00 ret +80000560 _pocl_kernel_Fan1_workgroup: +80000560: 13 08 00 00 mv a6, zero +80000564: 83 26 05 00 lw a3, 0(a0) +80000568: 03 27 45 00 lw a4, 4(a0) +8000056c: 83 27 c5 00 lw a5, 12(a0) +80000570: 03 25 05 01 lw a0, 16(a0) +80000574: 83 a3 06 00 lw t2, 0(a3) +80000578: 83 2e 07 00 lw t4, 0(a4) +8000057c: 83 a6 07 00 lw a3, 0(a5) +80000580: 03 25 05 00 lw a0, 0(a0) +80000584: 83 af 85 01 lw t6, 24(a1) +80000588: 83 a2 c5 01 lw t0, 28(a1) +8000058c: 83 a8 05 02 lw a7, 32(a1) +80000590: 03 ae c5 00 lw t3, 12(a1) +80000594: 33 87 cf 02 mul a4, t6, a2 +80000598: 93 15 25 00 slli a1, a0, 2 +8000059c: 33 83 be 00 add t1, t4, a1 +800005a0: 33 06 d5 02 mul a2, a0, a3 +800005a4: 13 16 26 00 slli a2, a2, 2 +800005a8: 33 0f c3 00 add t5, t1, a2 +800005ac: 13 46 f5 ff not a2, a0 +800005b0: 33 86 c6 00 add a2, a3, a2 +800005b4: 33 05 ae 00 add a0, t3, a0 +800005b8: 33 05 e5 00 add a0, a0, a4 +800005bc: 13 05 15 00 addi a0, a0, 1 +800005c0: 33 85 a6 02 mul a0, a3, a0 +800005c4: 13 15 25 00 slli a0, a0, 2 +800005c8: 33 03 b5 00 add t1, a0, a1 +800005cc: 93 96 26 00 slli a3, a3, 2 +800005d0: 33 05 ee 00 add a0, t3, a4 +800005d4: 6f 00 c0 00 j 12 +800005d8: 13 08 18 00 addi a6, a6, 1 +800005dc: 63 78 18 05 bgeu a6, a7, 80 +800005e0: 13 0e 00 00 mv t3, zero +800005e4: 6f 00 c0 00 j 12 +800005e8: 13 0e 1e 00 addi t3, t3, 1 +800005ec: e3 76 5e fe bgeu t3, t0, -20 +800005f0: 13 07 00 00 mv a4, zero +800005f4: 93 05 03 00 mv a1, t1 +800005f8: 6f 00 00 01 j 16 +800005fc: 13 07 17 00 addi a4, a4, 1 +80000600: b3 85 d5 00 add a1, a1, a3 +80000604: e3 72 f7 ff bgeu a4, t6, -28 +80000608: b3 07 e5 00 add a5, a0, a4 +8000060c: e3 d8 c7 fe bge a5, a2, -16 +80000610: b3 87 be 00 add a5, t4, a1 +80000614: 07 a0 07 00 flw ft0, 0(a5) +80000618: 87 20 0f 00 flw ft1, 0(t5) +8000061c: 53 70 10 18 fdiv.s ft0, ft0, ft1 +80000620: b3 87 b3 00 add a5, t2, a1 +80000624: 27 a0 07 00 fsw ft0, 0(a5) +80000628: 6f f0 5f fd j -44 +8000062c: 67 80 00 00 ret -800005b0 _pocl_kernel_Fan1_workgroup_fast: -800005b0: 13 08 00 00 mv a6, zero -800005b4: 83 26 c5 00 lw a3, 12(a0) -800005b8: 03 27 05 01 lw a4, 16(a0) -800005bc: 83 23 05 00 lw t2, 0(a0) -800005c0: 83 2e 45 00 lw t4, 4(a0) -800005c4: 03 a5 06 00 lw a0, 0(a3) -800005c8: 83 26 07 00 lw a3, 0(a4) -800005cc: 83 af 85 01 lw t6, 24(a1) -800005d0: 83 a2 c5 01 lw t0, 28(a1) -800005d4: 83 a8 05 02 lw a7, 32(a1) -800005d8: 03 ae c5 00 lw t3, 12(a1) -800005dc: 33 87 cf 02 mul a4, t6, a2 -800005e0: 93 95 26 00 slli a1, a3, 2 -800005e4: 33 83 be 00 add t1, t4, a1 -800005e8: 33 86 a6 02 mul a2, a3, a0 -800005ec: 13 16 26 00 slli a2, a2, 2 -800005f0: 33 0f c3 00 add t5, t1, a2 -800005f4: 13 c6 f6 ff not a2, a3 -800005f8: 33 06 c5 00 add a2, a0, a2 -800005fc: b3 06 de 00 add a3, t3, a3 -80000600: b3 86 e6 00 add a3, a3, a4 -80000604: 93 86 16 00 addi a3, a3, 1 -80000608: b3 06 d5 02 mul a3, a0, a3 -8000060c: 93 96 26 00 slli a3, a3, 2 -80000610: 33 83 b6 00 add t1, a3, a1 -80000614: 93 16 25 00 slli a3, a0, 2 -80000618: 33 07 ee 00 add a4, t3, a4 -8000061c: 6f 00 c0 00 j 12 -80000620: 13 08 18 00 addi a6, a6, 1 -80000624: 63 78 18 05 bgeu a6, a7, 80 -80000628: 13 0e 00 00 mv t3, zero -8000062c: 6f 00 c0 00 j 12 -80000630: 13 0e 1e 00 addi t3, t3, 1 -80000634: e3 76 5e fe bgeu t3, t0, -20 -80000638: 13 05 00 00 mv a0, zero -8000063c: 93 05 03 00 mv a1, t1 -80000640: 6f 00 00 01 j 16 -80000644: 13 05 15 00 addi a0, a0, 1 -80000648: b3 85 d5 00 add a1, a1, a3 -8000064c: e3 72 f5 ff bgeu a0, t6, -28 -80000650: b3 07 a7 00 add a5, a4, a0 -80000654: e3 d8 c7 fe bge a5, a2, -16 -80000658: b3 87 be 00 add a5, t4, a1 -8000065c: 07 a0 07 00 flw ft0, 0(a5) -80000660: 87 20 0f 00 flw ft1, 0(t5) -80000664: 53 70 10 18 fdiv.s ft0, ft0, ft1 -80000668: b3 87 b3 00 add a5, t2, a1 -8000066c: 27 a0 07 00 fsw ft0, 0(a5) -80000670: 6f f0 5f fd j -44 -80000674: 67 80 00 00 ret +80000630 _pocl_kernel_Fan1_workgroup_fast: +80000630: 13 08 00 00 mv a6, zero +80000634: 83 26 c5 00 lw a3, 12(a0) +80000638: 03 27 05 01 lw a4, 16(a0) +8000063c: 83 23 05 00 lw t2, 0(a0) +80000640: 83 2e 45 00 lw t4, 4(a0) +80000644: 03 a5 06 00 lw a0, 0(a3) +80000648: 83 26 07 00 lw a3, 0(a4) +8000064c: 83 af 85 01 lw t6, 24(a1) +80000650: 83 a2 c5 01 lw t0, 28(a1) +80000654: 83 a8 05 02 lw a7, 32(a1) +80000658: 03 ae c5 00 lw t3, 12(a1) +8000065c: 33 87 cf 02 mul a4, t6, a2 +80000660: 93 95 26 00 slli a1, a3, 2 +80000664: 33 83 be 00 add t1, t4, a1 +80000668: 33 86 a6 02 mul a2, a3, a0 +8000066c: 13 16 26 00 slli a2, a2, 2 +80000670: 33 0f c3 00 add t5, t1, a2 +80000674: 13 c6 f6 ff not a2, a3 +80000678: 33 06 c5 00 add a2, a0, a2 +8000067c: b3 06 de 00 add a3, t3, a3 +80000680: b3 86 e6 00 add a3, a3, a4 +80000684: 93 86 16 00 addi a3, a3, 1 +80000688: b3 06 d5 02 mul a3, a0, a3 +8000068c: 93 96 26 00 slli a3, a3, 2 +80000690: 33 83 b6 00 add t1, a3, a1 +80000694: 93 16 25 00 slli a3, a0, 2 +80000698: 33 07 ee 00 add a4, t3, a4 +8000069c: 6f 00 c0 00 j 12 +800006a0: 13 08 18 00 addi a6, a6, 1 +800006a4: 63 78 18 05 bgeu a6, a7, 80 +800006a8: 13 0e 00 00 mv t3, zero +800006ac: 6f 00 c0 00 j 12 +800006b0: 13 0e 1e 00 addi t3, t3, 1 +800006b4: e3 76 5e fe bgeu t3, t0, -20 +800006b8: 13 05 00 00 mv a0, zero +800006bc: 93 05 03 00 mv a1, t1 +800006c0: 6f 00 00 01 j 16 +800006c4: 13 05 15 00 addi a0, a0, 1 +800006c8: b3 85 d5 00 add a1, a1, a3 +800006cc: e3 72 f5 ff bgeu a0, t6, -28 +800006d0: b3 07 a7 00 add a5, a4, a0 +800006d4: e3 d8 c7 fe bge a5, a2, -16 +800006d8: b3 87 be 00 add a5, t4, a1 +800006dc: 07 a0 07 00 flw ft0, 0(a5) +800006e0: 87 20 0f 00 flw ft1, 0(t5) +800006e4: 53 70 10 18 fdiv.s ft0, ft0, ft1 +800006e8: b3 87 b3 00 add a5, t2, a1 +800006ec: 27 a0 07 00 fsw ft0, 0(a5) +800006f0: 6f f0 5f fd j -44 +800006f4: 67 80 00 00 ret -80000678 _exit: -80000678: 13 05 00 00 mv a0, zero -8000067c: 6b 00 05 00 +800006f8 _exit: +800006f8: 13 05 00 00 mv a0, zero +800006fc: 6b 00 05 00 -80000680 vx_set_sp: -80000680: 73 25 00 fc csrr a0, 4032 -80000684: 6b 00 05 00 -80000688: 97 21 00 00 auipc gp, 2 -8000068c: 93 81 81 c9 addi gp, gp, -872 -80000690: 17 01 00 7f auipc sp, 520192 -80000694: 13 01 01 97 addi sp, sp, -1680 -80000698: 93 05 00 40 addi a1, zero, 1024 -8000069c: 73 26 10 cc csrr a2, 3265 -800006a0: b3 85 c5 02 mul a1, a1, a2 -800006a4: 33 01 b1 40 sub sp, sp, a1 -800006a8: f3 26 30 cc csrr a3, 3267 -800006ac: 63 86 06 00 beqz a3, 12 -800006b0: 13 05 00 00 mv a0, zero -800006b4: 6b 00 05 00 +80000700 vx_set_sp: +80000700: 73 25 00 fc csrr a0, 4032 +80000704: 6b 00 05 00 +80000708: 97 11 00 00 auipc gp, 1 +8000070c: 93 81 01 10 addi gp, gp, 256 +80000710: 17 01 00 7f auipc sp, 520192 +80000714: 13 01 01 8f addi sp, sp, -1808 +80000718: 93 05 00 40 addi a1, zero, 1024 +8000071c: 73 26 10 cc csrr a2, 3265 +80000720: b3 85 c5 02 mul a1, a1, a2 +80000724: 33 01 b1 40 sub sp, sp, a1 +80000728: f3 26 30 cc csrr a3, 3267 +8000072c: 63 86 06 00 beqz a3, 12 +80000730: 13 05 00 00 mv a0, zero +80000734: 6b 00 05 00 -800006b8 RETURN: -800006b8: 67 80 00 00 ret - -800006bc vx_wspawn: -800006bc: 6b 10 b5 00 -800006c0: 67 80 00 00 ret - -800006c4 vx_tmc: -800006c4: 6b 00 05 00 -800006c8: 67 80 00 00 ret - -800006cc vx_barrier: -800006cc: 6b 40 b5 00 -800006d0: 67 80 00 00 ret - -800006d4 vx_split: -800006d4: 6b 20 05 00 -800006d8: 67 80 00 00 ret - -800006dc vx_join: -800006dc: 6b 30 00 00 -800006e0: 67 80 00 00 ret - -800006e4 vx_warp_id: -800006e4: 73 25 30 cc csrr a0, 3267 -800006e8: 67 80 00 00 ret - -800006ec vx_warp_gid: -800006ec: 73 25 40 f1 csrr a0, mhartid -800006f0: 67 80 00 00 ret - -800006f4 vx_thread_id: -800006f4: 73 25 00 cc csrr a0, 3264 -800006f8: 67 80 00 00 ret - -800006fc vx_thread_lid: -800006fc: 73 25 10 cc csrr a0, 3265 -80000700: 67 80 00 00 ret - -80000704 vx_thread_gid: -80000704: 73 25 20 cc csrr a0, 3266 -80000708: 67 80 00 00 ret - -8000070c vx_core_id: -8000070c: 73 25 50 cc csrr a0, 3269 -80000710: 67 80 00 00 ret - -80000714 vx_num_threads: -80000714: 73 25 00 fc csrr a0, 4032 -80000718: 67 80 00 00 ret - -8000071c vx_num_warps: -8000071c: 73 25 10 fc csrr a0, 4033 -80000720: 67 80 00 00 ret - -80000724 vx_num_cores: -80000724: 73 25 20 fc csrr a0, 4034 -80000728: 67 80 00 00 ret - -8000072c vx_num_cycles: -8000072c: 73 25 00 b0 csrr a0, mcycle -80000730: 67 80 00 00 ret - -80000734 vx_num_instrs: -80000734: 73 25 20 b0 csrr a0, minstret +80000738 RETURN: 80000738: 67 80 00 00 ret -8000073c atexit: -8000073c: 93 05 05 00 mv a1, a0 -80000740: 93 06 00 00 mv a3, zero -80000744: 13 06 00 00 mv a2, zero -80000748: 13 05 00 00 mv a0, zero -8000074c: 6f 00 c0 20 j 524 +8000073c vx_wspawn: +8000073c: 6b 10 b5 00 +80000740: 67 80 00 00 ret -80000750 exit: -80000750: 13 01 01 ff addi sp, sp, -16 -80000754: 93 05 00 00 mv a1, zero -80000758: 23 24 81 00 sw s0, 8(sp) -8000075c: 23 26 11 00 sw ra, 12(sp) -80000760: 13 04 05 00 mv s0, a0 -80000764: ef 00 00 29 jal 656 -80000768: b7 27 00 80 lui a5, 524290 -8000076c: 03 a5 87 f4 lw a0, -184(a5) -80000770: 83 27 c5 03 lw a5, 60(a0) -80000774: 63 84 07 00 beqz a5, 8 -80000778: e7 80 07 00 jalr a5 -8000077c: 13 05 04 00 mv a0, s0 -80000780: ef f0 9f ef jal -264 +80000744 vx_tmc: +80000744: 6b 00 05 00 +80000748: 67 80 00 00 ret -80000784 __libc_fini_array: -80000784: 13 01 01 ff addi sp, sp, -16 -80000788: 23 24 81 00 sw s0, 8(sp) -8000078c: b7 27 00 80 lui a5, 524290 -80000790: 37 24 00 80 lui s0, 524290 -80000794: 13 04 c4 b1 addi s0, s0, -1252 -80000798: 93 87 c7 b1 addi a5, a5, -1252 -8000079c: b3 87 87 40 sub a5, a5, s0 -800007a0: 23 22 91 00 sw s1, 4(sp) -800007a4: 23 26 11 00 sw ra, 12(sp) -800007a8: 93 d4 27 40 srai s1, a5, 2 -800007ac: 63 80 04 02 beqz s1, 32 -800007b0: 93 87 c7 ff addi a5, a5, -4 -800007b4: 33 84 87 00 add s0, a5, s0 -800007b8: 83 27 04 00 lw a5, 0(s0) -800007bc: 93 84 f4 ff addi s1, s1, -1 -800007c0: 13 04 c4 ff addi s0, s0, -4 -800007c4: e7 80 07 00 jalr a5 -800007c8: e3 98 04 fe bnez s1, -16 -800007cc: 83 20 c1 00 lw ra, 12(sp) -800007d0: 03 24 81 00 lw s0, 8(sp) -800007d4: 83 24 41 00 lw s1, 4(sp) -800007d8: 13 01 01 01 addi sp, sp, 16 -800007dc: 67 80 00 00 ret +8000074c vx_barrier: +8000074c: 6b 40 b5 00 +80000750: 67 80 00 00 ret -800007e0 __libc_init_array: -800007e0: 13 01 01 ff addi sp, sp, -16 -800007e4: 23 24 81 00 sw s0, 8(sp) -800007e8: 23 20 21 01 sw s2, 0(sp) -800007ec: 37 24 00 80 lui s0, 524290 -800007f0: 37 29 00 80 lui s2, 524290 -800007f4: 93 07 84 b1 addi a5, s0, -1256 -800007f8: 13 09 89 b1 addi s2, s2, -1256 -800007fc: 33 09 f9 40 sub s2, s2, a5 -80000800: 23 26 11 00 sw ra, 12(sp) -80000804: 23 22 91 00 sw s1, 4(sp) -80000808: 13 59 29 40 srai s2, s2, 2 -8000080c: 63 00 09 02 beqz s2, 32 -80000810: 13 04 84 b1 addi s0, s0, -1256 -80000814: 93 04 00 00 mv s1, zero -80000818: 83 27 04 00 lw a5, 0(s0) -8000081c: 93 84 14 00 addi s1, s1, 1 -80000820: 13 04 44 00 addi s0, s0, 4 -80000824: e7 80 07 00 jalr a5 -80000828: e3 18 99 fe bne s2, s1, -16 -8000082c: 37 24 00 80 lui s0, 524290 -80000830: 37 29 00 80 lui s2, 524290 -80000834: 93 07 84 b1 addi a5, s0, -1256 -80000838: 13 09 c9 b1 addi s2, s2, -1252 -8000083c: 33 09 f9 40 sub s2, s2, a5 -80000840: 13 59 29 40 srai s2, s2, 2 -80000844: 63 00 09 02 beqz s2, 32 -80000848: 13 04 84 b1 addi s0, s0, -1256 -8000084c: 93 04 00 00 mv s1, zero -80000850: 83 27 04 00 lw a5, 0(s0) -80000854: 93 84 14 00 addi s1, s1, 1 -80000858: 13 04 44 00 addi s0, s0, 4 -8000085c: e7 80 07 00 jalr a5 -80000860: e3 18 99 fe bne s2, s1, -16 -80000864: 83 20 c1 00 lw ra, 12(sp) -80000868: 03 24 81 00 lw s0, 8(sp) -8000086c: 83 24 41 00 lw s1, 4(sp) -80000870: 03 29 01 00 lw s2, 0(sp) -80000874: 13 01 01 01 addi sp, sp, 16 -80000878: 67 80 00 00 ret +80000754 vx_split: +80000754: 6b 20 05 00 +80000758: 67 80 00 00 ret -8000087c memset: -8000087c: 13 03 f0 00 addi t1, zero, 15 -80000880: 13 07 05 00 mv a4, a0 -80000884: 63 7e c3 02 bgeu t1, a2, 60 -80000888: 93 77 f7 00 andi a5, a4, 15 -8000088c: 63 90 07 0a bnez a5, 160 -80000890: 63 92 05 08 bnez a1, 132 -80000894: 93 76 06 ff andi a3, a2, -16 -80000898: 13 76 f6 00 andi a2, a2, 15 -8000089c: b3 86 e6 00 add a3, a3, a4 -800008a0: 23 20 b7 00 sw a1, 0(a4) -800008a4: 23 22 b7 00 sw a1, 4(a4) -800008a8: 23 24 b7 00 sw a1, 8(a4) -800008ac: 23 26 b7 00 sw a1, 12(a4) -800008b0: 13 07 07 01 addi a4, a4, 16 -800008b4: e3 66 d7 fe bltu a4, a3, -20 -800008b8: 63 14 06 00 bnez a2, 8 -800008bc: 67 80 00 00 ret -800008c0: b3 06 c3 40 sub a3, t1, a2 -800008c4: 93 96 26 00 slli a3, a3, 2 -800008c8: 97 02 00 00 auipc t0, 0 -800008cc: b3 86 56 00 add a3, a3, t0 -800008d0: 67 80 c6 00 jr 12(a3) -800008d4: 23 07 b7 00 sb a1, 14(a4) -800008d8: a3 06 b7 00 sb a1, 13(a4) -800008dc: 23 06 b7 00 sb a1, 12(a4) -800008e0: a3 05 b7 00 sb a1, 11(a4) -800008e4: 23 05 b7 00 sb a1, 10(a4) -800008e8: a3 04 b7 00 sb a1, 9(a4) -800008ec: 23 04 b7 00 sb a1, 8(a4) -800008f0: a3 03 b7 00 sb a1, 7(a4) -800008f4: 23 03 b7 00 sb a1, 6(a4) -800008f8: a3 02 b7 00 sb a1, 5(a4) -800008fc: 23 02 b7 00 sb a1, 4(a4) -80000900: a3 01 b7 00 sb a1, 3(a4) -80000904: 23 01 b7 00 sb a1, 2(a4) -80000908: a3 00 b7 00 sb a1, 1(a4) -8000090c: 23 00 b7 00 sb a1, 0(a4) -80000910: 67 80 00 00 ret -80000914: 93 f5 f5 0f andi a1, a1, 255 -80000918: 93 96 85 00 slli a3, a1, 8 -8000091c: b3 e5 d5 00 or a1, a1, a3 -80000920: 93 96 05 01 slli a3, a1, 16 -80000924: b3 e5 d5 00 or a1, a1, a3 -80000928: 6f f0 df f6 j -148 -8000092c: 93 96 27 00 slli a3, a5, 2 -80000930: 97 02 00 00 auipc t0, 0 -80000934: b3 86 56 00 add a3, a3, t0 -80000938: 93 82 00 00 mv t0, ra -8000093c: e7 80 06 fa jalr -96(a3) -80000940: 93 80 02 00 mv ra, t0 -80000944: 93 87 07 ff addi a5, a5, -16 -80000948: 33 07 f7 40 sub a4, a4, a5 -8000094c: 33 06 f6 00 add a2, a2, a5 -80000950: e3 78 c3 f6 bgeu t1, a2, -144 -80000954: 6f f0 df f3 j -196 +8000075c vx_join: +8000075c: 6b 30 00 00 +80000760: 67 80 00 00 ret -80000958 __register_exitproc: -80000958: b7 27 00 80 lui a5, 524290 -8000095c: 03 a7 87 f4 lw a4, -184(a5) -80000960: 83 27 87 14 lw a5, 328(a4) -80000964: 63 8c 07 04 beqz a5, 88 -80000968: 03 a7 47 00 lw a4, 4(a5) -8000096c: 13 08 f0 01 addi a6, zero, 31 -80000970: 63 4e e8 06 blt a6, a4, 124 -80000974: 13 18 27 00 slli a6, a4, 2 -80000978: 63 06 05 02 beqz a0, 44 -8000097c: 33 83 07 01 add t1, a5, a6 -80000980: 23 24 c3 08 sw a2, 136(t1) -80000984: 83 a8 87 18 lw a7, 392(a5) -80000988: 13 06 10 00 addi a2, zero, 1 -8000098c: 33 16 e6 00 sll a2, a2, a4 -80000990: b3 e8 c8 00 or a7, a7, a2 -80000994: 23 a4 17 19 sw a7, 392(a5) -80000998: 23 24 d3 10 sw a3, 264(t1) -8000099c: 93 06 20 00 addi a3, zero, 2 -800009a0: 63 04 d5 02 beq a0, a3, 40 -800009a4: 13 07 17 00 addi a4, a4, 1 -800009a8: 23 a2 e7 00 sw a4, 4(a5) -800009ac: b3 87 07 01 add a5, a5, a6 -800009b0: 23 a4 b7 00 sw a1, 8(a5) -800009b4: 13 05 00 00 mv a0, zero -800009b8: 67 80 00 00 ret -800009bc: 93 07 c7 14 addi a5, a4, 332 -800009c0: 23 24 f7 14 sw a5, 328(a4) -800009c4: 6f f0 5f fa j -92 -800009c8: 83 a6 c7 18 lw a3, 396(a5) -800009cc: 13 07 17 00 addi a4, a4, 1 -800009d0: 23 a2 e7 00 sw a4, 4(a5) -800009d4: 33 e6 c6 00 or a2, a3, a2 -800009d8: 23 a6 c7 18 sw a2, 396(a5) -800009dc: b3 87 07 01 add a5, a5, a6 -800009e0: 23 a4 b7 00 sw a1, 8(a5) -800009e4: 13 05 00 00 mv a0, zero -800009e8: 67 80 00 00 ret -800009ec: 13 05 f0 ff addi a0, zero, -1 -800009f0: 67 80 00 00 ret +80000764 vx_warp_id: +80000764: 73 25 30 cc csrr a0, 3267 +80000768: 67 80 00 00 ret -800009f4 __call_exitprocs: -800009f4: 13 01 01 fd addi sp, sp, -48 -800009f8: b7 27 00 80 lui a5, 524290 -800009fc: 23 2c 41 01 sw s4, 24(sp) -80000a00: 03 aa 87 f4 lw s4, -184(a5) -80000a04: 23 20 21 03 sw s2, 32(sp) -80000a08: 23 26 11 02 sw ra, 44(sp) -80000a0c: 03 29 8a 14 lw s2, 328(s4) -80000a10: 23 24 81 02 sw s0, 40(sp) -80000a14: 23 22 91 02 sw s1, 36(sp) -80000a18: 23 2e 31 01 sw s3, 28(sp) -80000a1c: 23 2a 51 01 sw s5, 20(sp) -80000a20: 23 28 61 01 sw s6, 16(sp) -80000a24: 23 26 71 01 sw s7, 12(sp) -80000a28: 23 24 81 01 sw s8, 8(sp) -80000a2c: 63 00 09 04 beqz s2, 64 -80000a30: 13 0b 05 00 mv s6, a0 -80000a34: 93 8b 05 00 mv s7, a1 -80000a38: 93 0a 10 00 addi s5, zero, 1 -80000a3c: 93 09 f0 ff addi s3, zero, -1 -80000a40: 83 24 49 00 lw s1, 4(s2) -80000a44: 13 84 f4 ff addi s0, s1, -1 -80000a48: 63 42 04 02 bltz s0, 36 -80000a4c: 93 94 24 00 slli s1, s1, 2 -80000a50: b3 04 99 00 add s1, s2, s1 -80000a54: 63 84 0b 04 beqz s7, 72 -80000a58: 83 a7 44 10 lw a5, 260(s1) -80000a5c: 63 80 77 05 beq a5, s7, 64 -80000a60: 13 04 f4 ff addi s0, s0, -1 -80000a64: 93 84 c4 ff addi s1, s1, -4 -80000a68: e3 16 34 ff bne s0, s3, -20 -80000a6c: 83 20 c1 02 lw ra, 44(sp) -80000a70: 03 24 81 02 lw s0, 40(sp) -80000a74: 83 24 41 02 lw s1, 36(sp) -80000a78: 03 29 01 02 lw s2, 32(sp) -80000a7c: 83 29 c1 01 lw s3, 28(sp) -80000a80: 03 2a 81 01 lw s4, 24(sp) -80000a84: 83 2a 41 01 lw s5, 20(sp) -80000a88: 03 2b 01 01 lw s6, 16(sp) -80000a8c: 83 2b c1 00 lw s7, 12(sp) -80000a90: 03 2c 81 00 lw s8, 8(sp) -80000a94: 13 01 01 03 addi sp, sp, 48 -80000a98: 67 80 00 00 ret -80000a9c: 83 27 49 00 lw a5, 4(s2) -80000aa0: 83 a6 44 00 lw a3, 4(s1) -80000aa4: 93 87 f7 ff addi a5, a5, -1 -80000aa8: 63 8e 87 04 beq a5, s0, 92 -80000aac: 23 a2 04 00 sw zero, 4(s1) -80000ab0: e3 88 06 fa beqz a3, -80 -80000ab4: 83 27 89 18 lw a5, 392(s2) -80000ab8: 33 97 8a 00 sll a4, s5, s0 -80000abc: 03 2c 49 00 lw s8, 4(s2) -80000ac0: b3 77 f7 00 and a5, a4, a5 -80000ac4: 63 92 07 02 bnez a5, 36 -80000ac8: e7 80 06 00 jalr a3 -80000acc: 03 27 49 00 lw a4, 4(s2) -80000ad0: 83 27 8a 14 lw a5, 328(s4) -80000ad4: 63 14 87 01 bne a4, s8, 8 -80000ad8: e3 04 f9 f8 beq s2, a5, -120 -80000adc: e3 88 07 f8 beqz a5, -112 -80000ae0: 13 89 07 00 mv s2, a5 -80000ae4: 6f f0 df f5 j -164 -80000ae8: 83 27 c9 18 lw a5, 396(s2) -80000aec: 83 a5 44 08 lw a1, 132(s1) -80000af0: 33 77 f7 00 and a4, a4, a5 -80000af4: 63 1c 07 00 bnez a4, 24 -80000af8: 13 05 0b 00 mv a0, s6 -80000afc: e7 80 06 00 jalr a3 -80000b00: 6f f0 df fc j -52 -80000b04: 23 22 89 00 sw s0, 4(s2) -80000b08: 6f f0 9f fa j -88 -80000b0c: 13 85 05 00 mv a0, a1 -80000b10: e7 80 06 00 jalr a3 -80000b14: 6f f0 9f fb j -72 +8000076c vx_warp_gid: +8000076c: 73 25 40 f1 csrr a0, mhartid +80000770: 67 80 00 00 ret + +80000774 vx_thread_id: +80000774: 73 25 00 cc csrr a0, 3264 +80000778: 67 80 00 00 ret + +8000077c vx_thread_lid: +8000077c: 73 25 10 cc csrr a0, 3265 +80000780: 67 80 00 00 ret + +80000784 vx_thread_gid: +80000784: 73 25 20 cc csrr a0, 3266 +80000788: 67 80 00 00 ret + +8000078c vx_core_id: +8000078c: 73 25 50 cc csrr a0, 3269 +80000790: 67 80 00 00 ret + +80000794 vx_num_threads: +80000794: 73 25 00 fc csrr a0, 4032 +80000798: 67 80 00 00 ret + +8000079c vx_num_warps: +8000079c: 73 25 10 fc csrr a0, 4033 +800007a0: 67 80 00 00 ret + +800007a4 vx_num_cores: +800007a4: 73 25 20 fc csrr a0, 4034 +800007a8: 67 80 00 00 ret + +800007ac vx_num_cycles: +800007ac: 73 25 00 b0 csrr a0, mcycle +800007b0: 67 80 00 00 ret + +800007b4 vx_num_instrs: +800007b4: 73 25 20 b0 csrr a0, minstret +800007b8: 67 80 00 00 ret + +800007bc atexit: +800007bc: 93 05 05 00 mv a1, a0 +800007c0: 93 06 00 00 mv a3, zero +800007c4: 13 06 00 00 mv a2, zero +800007c8: 13 05 00 00 mv a0, zero +800007cc: 6f 00 c0 20 j 524 + +800007d0 exit: +800007d0: 13 01 01 ff addi sp, sp, -16 +800007d4: 93 05 00 00 mv a1, zero +800007d8: 23 24 81 00 sw s0, 8(sp) +800007dc: 23 26 11 00 sw ra, 12(sp) +800007e0: 13 04 05 00 mv s0, a0 +800007e4: ef 00 00 29 jal 656 +800007e8: b7 17 00 80 lui a5, 524289 +800007ec: 03 a5 07 43 lw a0, 1072(a5) +800007f0: 83 27 c5 03 lw a5, 60(a0) +800007f4: 63 84 07 00 beqz a5, 8 +800007f8: e7 80 07 00 jalr a5 +800007fc: 13 05 04 00 mv a0, s0 +80000800: ef f0 9f ef jal -264 + +80000804 __libc_fini_array: +80000804: 13 01 01 ff addi sp, sp, -16 +80000808: 23 24 81 00 sw s0, 8(sp) +8000080c: b7 17 00 80 lui a5, 524289 +80000810: 37 14 00 80 lui s0, 524289 +80000814: 13 04 44 00 addi s0, s0, 4 +80000818: 93 87 47 00 addi a5, a5, 4 +8000081c: b3 87 87 40 sub a5, a5, s0 +80000820: 23 22 91 00 sw s1, 4(sp) +80000824: 23 26 11 00 sw ra, 12(sp) +80000828: 93 d4 27 40 srai s1, a5, 2 +8000082c: 63 80 04 02 beqz s1, 32 +80000830: 93 87 c7 ff addi a5, a5, -4 +80000834: 33 84 87 00 add s0, a5, s0 +80000838: 83 27 04 00 lw a5, 0(s0) +8000083c: 93 84 f4 ff addi s1, s1, -1 +80000840: 13 04 c4 ff addi s0, s0, -4 +80000844: e7 80 07 00 jalr a5 +80000848: e3 98 04 fe bnez s1, -16 +8000084c: 83 20 c1 00 lw ra, 12(sp) +80000850: 03 24 81 00 lw s0, 8(sp) +80000854: 83 24 41 00 lw s1, 4(sp) +80000858: 13 01 01 01 addi sp, sp, 16 +8000085c: 67 80 00 00 ret + +80000860 __libc_init_array: +80000860: 13 01 01 ff addi sp, sp, -16 +80000864: 23 24 81 00 sw s0, 8(sp) +80000868: 23 20 21 01 sw s2, 0(sp) +8000086c: 37 14 00 80 lui s0, 524289 +80000870: 37 19 00 80 lui s2, 524289 +80000874: 93 07 04 00 mv a5, s0 +80000878: 13 09 09 00 mv s2, s2 +8000087c: 33 09 f9 40 sub s2, s2, a5 +80000880: 23 26 11 00 sw ra, 12(sp) +80000884: 23 22 91 00 sw s1, 4(sp) +80000888: 13 59 29 40 srai s2, s2, 2 +8000088c: 63 00 09 02 beqz s2, 32 +80000890: 13 04 04 00 mv s0, s0 +80000894: 93 04 00 00 mv s1, zero +80000898: 83 27 04 00 lw a5, 0(s0) +8000089c: 93 84 14 00 addi s1, s1, 1 +800008a0: 13 04 44 00 addi s0, s0, 4 +800008a4: e7 80 07 00 jalr a5 +800008a8: e3 18 99 fe bne s2, s1, -16 +800008ac: 37 14 00 80 lui s0, 524289 +800008b0: 37 19 00 80 lui s2, 524289 +800008b4: 93 07 04 00 mv a5, s0 +800008b8: 13 09 49 00 addi s2, s2, 4 +800008bc: 33 09 f9 40 sub s2, s2, a5 +800008c0: 13 59 29 40 srai s2, s2, 2 +800008c4: 63 00 09 02 beqz s2, 32 +800008c8: 13 04 04 00 mv s0, s0 +800008cc: 93 04 00 00 mv s1, zero +800008d0: 83 27 04 00 lw a5, 0(s0) +800008d4: 93 84 14 00 addi s1, s1, 1 +800008d8: 13 04 44 00 addi s0, s0, 4 +800008dc: e7 80 07 00 jalr a5 +800008e0: e3 18 99 fe bne s2, s1, -16 +800008e4: 83 20 c1 00 lw ra, 12(sp) +800008e8: 03 24 81 00 lw s0, 8(sp) +800008ec: 83 24 41 00 lw s1, 4(sp) +800008f0: 03 29 01 00 lw s2, 0(sp) +800008f4: 13 01 01 01 addi sp, sp, 16 +800008f8: 67 80 00 00 ret + +800008fc memset: +800008fc: 13 03 f0 00 addi t1, zero, 15 +80000900: 13 07 05 00 mv a4, a0 +80000904: 63 7e c3 02 bgeu t1, a2, 60 +80000908: 93 77 f7 00 andi a5, a4, 15 +8000090c: 63 90 07 0a bnez a5, 160 +80000910: 63 92 05 08 bnez a1, 132 +80000914: 93 76 06 ff andi a3, a2, -16 +80000918: 13 76 f6 00 andi a2, a2, 15 +8000091c: b3 86 e6 00 add a3, a3, a4 +80000920: 23 20 b7 00 sw a1, 0(a4) +80000924: 23 22 b7 00 sw a1, 4(a4) +80000928: 23 24 b7 00 sw a1, 8(a4) +8000092c: 23 26 b7 00 sw a1, 12(a4) +80000930: 13 07 07 01 addi a4, a4, 16 +80000934: e3 66 d7 fe bltu a4, a3, -20 +80000938: 63 14 06 00 bnez a2, 8 +8000093c: 67 80 00 00 ret +80000940: b3 06 c3 40 sub a3, t1, a2 +80000944: 93 96 26 00 slli a3, a3, 2 +80000948: 97 02 00 00 auipc t0, 0 +8000094c: b3 86 56 00 add a3, a3, t0 +80000950: 67 80 c6 00 jr 12(a3) +80000954: 23 07 b7 00 sb a1, 14(a4) +80000958: a3 06 b7 00 sb a1, 13(a4) +8000095c: 23 06 b7 00 sb a1, 12(a4) +80000960: a3 05 b7 00 sb a1, 11(a4) +80000964: 23 05 b7 00 sb a1, 10(a4) +80000968: a3 04 b7 00 sb a1, 9(a4) +8000096c: 23 04 b7 00 sb a1, 8(a4) +80000970: a3 03 b7 00 sb a1, 7(a4) +80000974: 23 03 b7 00 sb a1, 6(a4) +80000978: a3 02 b7 00 sb a1, 5(a4) +8000097c: 23 02 b7 00 sb a1, 4(a4) +80000980: a3 01 b7 00 sb a1, 3(a4) +80000984: 23 01 b7 00 sb a1, 2(a4) +80000988: a3 00 b7 00 sb a1, 1(a4) +8000098c: 23 00 b7 00 sb a1, 0(a4) +80000990: 67 80 00 00 ret +80000994: 93 f5 f5 0f andi a1, a1, 255 +80000998: 93 96 85 00 slli a3, a1, 8 +8000099c: b3 e5 d5 00 or a1, a1, a3 +800009a0: 93 96 05 01 slli a3, a1, 16 +800009a4: b3 e5 d5 00 or a1, a1, a3 +800009a8: 6f f0 df f6 j -148 +800009ac: 93 96 27 00 slli a3, a5, 2 +800009b0: 97 02 00 00 auipc t0, 0 +800009b4: b3 86 56 00 add a3, a3, t0 +800009b8: 93 82 00 00 mv t0, ra +800009bc: e7 80 06 fa jalr -96(a3) +800009c0: 93 80 02 00 mv ra, t0 +800009c4: 93 87 07 ff addi a5, a5, -16 +800009c8: 33 07 f7 40 sub a4, a4, a5 +800009cc: 33 06 f6 00 add a2, a2, a5 +800009d0: e3 78 c3 f6 bgeu t1, a2, -144 +800009d4: 6f f0 df f3 j -196 + +800009d8 __register_exitproc: +800009d8: b7 17 00 80 lui a5, 524289 +800009dc: 03 a7 07 43 lw a4, 1072(a5) +800009e0: 83 27 87 14 lw a5, 328(a4) +800009e4: 63 8c 07 04 beqz a5, 88 +800009e8: 03 a7 47 00 lw a4, 4(a5) +800009ec: 13 08 f0 01 addi a6, zero, 31 +800009f0: 63 4e e8 06 blt a6, a4, 124 +800009f4: 13 18 27 00 slli a6, a4, 2 +800009f8: 63 06 05 02 beqz a0, 44 +800009fc: 33 83 07 01 add t1, a5, a6 +80000a00: 23 24 c3 08 sw a2, 136(t1) +80000a04: 83 a8 87 18 lw a7, 392(a5) +80000a08: 13 06 10 00 addi a2, zero, 1 +80000a0c: 33 16 e6 00 sll a2, a2, a4 +80000a10: b3 e8 c8 00 or a7, a7, a2 +80000a14: 23 a4 17 19 sw a7, 392(a5) +80000a18: 23 24 d3 10 sw a3, 264(t1) +80000a1c: 93 06 20 00 addi a3, zero, 2 +80000a20: 63 04 d5 02 beq a0, a3, 40 +80000a24: 13 07 17 00 addi a4, a4, 1 +80000a28: 23 a2 e7 00 sw a4, 4(a5) +80000a2c: b3 87 07 01 add a5, a5, a6 +80000a30: 23 a4 b7 00 sw a1, 8(a5) +80000a34: 13 05 00 00 mv a0, zero +80000a38: 67 80 00 00 ret +80000a3c: 93 07 c7 14 addi a5, a4, 332 +80000a40: 23 24 f7 14 sw a5, 328(a4) +80000a44: 6f f0 5f fa j -92 +80000a48: 83 a6 c7 18 lw a3, 396(a5) +80000a4c: 13 07 17 00 addi a4, a4, 1 +80000a50: 23 a2 e7 00 sw a4, 4(a5) +80000a54: 33 e6 c6 00 or a2, a3, a2 +80000a58: 23 a6 c7 18 sw a2, 396(a5) +80000a5c: b3 87 07 01 add a5, a5, a6 +80000a60: 23 a4 b7 00 sw a1, 8(a5) +80000a64: 13 05 00 00 mv a0, zero +80000a68: 67 80 00 00 ret +80000a6c: 13 05 f0 ff addi a0, zero, -1 +80000a70: 67 80 00 00 ret + +80000a74 __call_exitprocs: +80000a74: 13 01 01 fd addi sp, sp, -48 +80000a78: b7 17 00 80 lui a5, 524289 +80000a7c: 23 2c 41 01 sw s4, 24(sp) +80000a80: 03 aa 07 43 lw s4, 1072(a5) +80000a84: 23 20 21 03 sw s2, 32(sp) +80000a88: 23 26 11 02 sw ra, 44(sp) +80000a8c: 03 29 8a 14 lw s2, 328(s4) +80000a90: 23 24 81 02 sw s0, 40(sp) +80000a94: 23 22 91 02 sw s1, 36(sp) +80000a98: 23 2e 31 01 sw s3, 28(sp) +80000a9c: 23 2a 51 01 sw s5, 20(sp) +80000aa0: 23 28 61 01 sw s6, 16(sp) +80000aa4: 23 26 71 01 sw s7, 12(sp) +80000aa8: 23 24 81 01 sw s8, 8(sp) +80000aac: 63 00 09 04 beqz s2, 64 +80000ab0: 13 0b 05 00 mv s6, a0 +80000ab4: 93 8b 05 00 mv s7, a1 +80000ab8: 93 0a 10 00 addi s5, zero, 1 +80000abc: 93 09 f0 ff addi s3, zero, -1 +80000ac0: 83 24 49 00 lw s1, 4(s2) +80000ac4: 13 84 f4 ff addi s0, s1, -1 +80000ac8: 63 42 04 02 bltz s0, 36 +80000acc: 93 94 24 00 slli s1, s1, 2 +80000ad0: b3 04 99 00 add s1, s2, s1 +80000ad4: 63 84 0b 04 beqz s7, 72 +80000ad8: 83 a7 44 10 lw a5, 260(s1) +80000adc: 63 80 77 05 beq a5, s7, 64 +80000ae0: 13 04 f4 ff addi s0, s0, -1 +80000ae4: 93 84 c4 ff addi s1, s1, -4 +80000ae8: e3 16 34 ff bne s0, s3, -20 +80000aec: 83 20 c1 02 lw ra, 44(sp) +80000af0: 03 24 81 02 lw s0, 40(sp) +80000af4: 83 24 41 02 lw s1, 36(sp) +80000af8: 03 29 01 02 lw s2, 32(sp) +80000afc: 83 29 c1 01 lw s3, 28(sp) +80000b00: 03 2a 81 01 lw s4, 24(sp) +80000b04: 83 2a 41 01 lw s5, 20(sp) +80000b08: 03 2b 01 01 lw s6, 16(sp) +80000b0c: 83 2b c1 00 lw s7, 12(sp) +80000b10: 03 2c 81 00 lw s8, 8(sp) +80000b14: 13 01 01 03 addi sp, sp, 48 +80000b18: 67 80 00 00 ret +80000b1c: 83 27 49 00 lw a5, 4(s2) +80000b20: 83 a6 44 00 lw a3, 4(s1) +80000b24: 93 87 f7 ff addi a5, a5, -1 +80000b28: 63 8e 87 04 beq a5, s0, 92 +80000b2c: 23 a2 04 00 sw zero, 4(s1) +80000b30: e3 88 06 fa beqz a3, -80 +80000b34: 83 27 89 18 lw a5, 392(s2) +80000b38: 33 97 8a 00 sll a4, s5, s0 +80000b3c: 03 2c 49 00 lw s8, 4(s2) +80000b40: b3 77 f7 00 and a5, a4, a5 +80000b44: 63 92 07 02 bnez a5, 36 +80000b48: e7 80 06 00 jalr a3 +80000b4c: 03 27 49 00 lw a4, 4(s2) +80000b50: 83 27 8a 14 lw a5, 328(s4) +80000b54: 63 14 87 01 bne a4, s8, 8 +80000b58: e3 04 f9 f8 beq s2, a5, -120 +80000b5c: e3 88 07 f8 beqz a5, -112 +80000b60: 13 89 07 00 mv s2, a5 +80000b64: 6f f0 df f5 j -164 +80000b68: 83 27 c9 18 lw a5, 396(s2) +80000b6c: 83 a5 44 08 lw a1, 132(s1) +80000b70: 33 77 f7 00 and a4, a4, a5 +80000b74: 63 1c 07 00 bnez a4, 24 +80000b78: 13 05 0b 00 mv a0, s6 +80000b7c: e7 80 06 00 jalr a3 +80000b80: 6f f0 df fc j -52 +80000b84: 23 22 89 00 sw s0, 4(s2) +80000b88: 6f f0 9f fa j -88 +80000b8c: 13 85 05 00 mv a0, a1 +80000b90: e7 80 06 00 jalr a3 +80000b94: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001b18 __preinit_array_start: -80001b18: 50 00 -80001b1a: 00 80 +80001000 __preinit_array_start: +80001000: 50 00 +80001002: 00 80 Disassembly of section .data: -80001b20 impure_data: -80001b20: 00 00 -80001b22: 00 00 -80001b24: 0c 1e -80001b26: 00 80 -80001b28: 74 1e -80001b2a: 00 80 -80001b2c: dc 1e -80001b2e: 00 80 +80001008 impure_data: +80001008: 00 00 +8000100a: 00 00 +8000100c: f4 12 +8000100e: 00 80 +80001010: 5c 13 +80001012: 00 80 +80001014: c4 13 +80001016: 00 80 ... -80001bc8: 01 00 -80001bca: 00 00 -80001bcc: 00 00 -80001bce: 00 00 -80001bd0: 0e 33 -80001bd2: cd ab -80001bd4: 34 12 -80001bd6: 6d e6 -80001bd8: ec de -80001bda: 05 00 -80001bdc: 0b 00 00 00 +800010b0: 01 00 +800010b2: 00 00 +800010b4: 00 00 +800010b6: 00 00 +800010b8: 0e 33 +800010ba: cd ab +800010bc: 34 12 +800010be: 6d e6 +800010c0: ec de +800010c2: 05 00 +800010c4: 0b 00 00 00 ... Disassembly of section .sdata: -80001f48 _global_impure_ptr: -80001f48: 20 1b -80001f4a: 00 80 +80001430 _global_impure_ptr: +80001430: 08 10 +80001432: 00 80 Disassembly of section .bss: -80001f4c g_wspawn_args: +80001434 g_wspawn_args: ... Disassembly of section .comment: @@ -921,28 +953,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 18 1b + 34: 00 10 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 20 1b + 44: 08 10 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: 48 1f + 54: 30 14 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: 4c 1f + 64: 34 14 66: 00 80 68: 00 00 6a: 00 00 @@ -958,7 +990,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: b8 06 + a4: 38 07 a6: 00 80 a8: 00 00 aa: 00 00 @@ -1015,7 +1047,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 20 1b + 154: 08 10 156: 00 80 158: 28 04 15a: 00 00 @@ -1026,49 +1058,49 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 1c 1b + 174: 04 10 176: 00 80 178: 00 00 17a: 00 00 17c: 00 00 17e: 03 00 a2 00 lb zero, 10(tp) 182: 00 00 - 184: 1c 1b + 184: 04 10 186: 00 80 188: 00 00 18a: 00 00 18c: 00 00 18e: 03 00 b5 00 lb zero, 11(a0) 192: 00 00 - 194: 1c 1b + 194: 04 10 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 18 1b + 1a4: 00 10 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 18 1b + 1b4: 00 10 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 18 1b + 1c4: 00 10 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 0c 07 + 1d4: 8c 07 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1081,7 +1113,7 @@ Disassembly of section .symtab: 1ee: f1 ff 1f0: 1c 01 1f2: 00 00 - 1f4: bc 06 + 1f4: 3c 07 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1089,46 +1121,46 @@ Disassembly of section .symtab: 1fe: 02 00 200: 26 01 202: 00 00 - 204: 68 00 - 206: 00 80 - 208: 4c 01 + 204: 00 04 + 206: 00 00 + 208: 00 00 20a: 00 00 - 20c: 12 00 - 20e: 02 00 - 210: 3c 01 - 212: 00 00 - 214: 00 04 - 216: 00 00 - 218: 00 00 + 20c: 10 00 + 20e: f1 ff + 210: 33 01 00 00 add sp, zero, zero + 214: 34 14 + 216: 00 80 + 218: 40 00 21a: 00 00 - 21c: 10 00 - 21e: f1 ff - 220: 49 01 + 21c: 11 00 + 21e: 06 00 + 220: 41 01 222: 00 00 - 224: 4c 1f + 224: 44 07 226: 00 80 - 228: 20 00 + 228: 00 00 22a: 00 00 - 22c: 11 00 - 22e: 06 00 - 230: 57 01 00 00 - 234: c4 06 + 22c: 12 00 + 22e: 02 00 + 230: 48 01 + 232: 00 00 + 234: 30 14 236: 00 80 238: 00 00 23a: 00 00 - 23c: 12 00 - 23e: 02 00 - 240: 5e 01 + 23c: 10 00 + 23e: 05 00 + 240: 58 01 242: 00 00 - 244: 48 1f + 244: 68 00 246: 00 80 - 248: 00 00 + 248: 48 01 24a: 00 00 - 24c: 10 00 - 24e: 05 00 + 24c: 12 00 + 24e: 02 00 250: 6e 01 252: 00 00 - 254: e0 04 + 254: 60 05 256: 00 80 258: d0 00 25a: 00 00 @@ -1136,21 +1168,21 @@ Disassembly of section .symtab: 25e: 02 00 260: 8a 01 262: 00 00 - 264: 20 23 + 264: 08 18 266: 00 80 268: 00 00 26a: 00 00 26c: 10 00 26e: f1 ff 270: 9b 01 00 00 - 274: dc 06 + 274: 5c 07 276: 00 80 278: 00 00 27a: 00 00 27c: 12 00 27e: 02 00 280: a3 01 00 00 sb zero, 3(zero) - 284: 1c 07 + 284: 9c 07 286: 00 80 288: 00 00 28a: 00 00 @@ -1158,15 +1190,15 @@ Disassembly of section .symtab: 28e: 02 00 290: b0 01 292: 00 00 - 294: 4c 02 + 294: 48 02 296: 00 80 - 298: 88 01 + 298: 0c 02 29a: 00 00 29c: 12 00 29e: 02 00 2a0: bd 01 2a2: 00 00 - 2a4: d4 06 + 2a4: 54 07 2a6: 00 80 2a8: 00 00 2aa: 00 00 @@ -1174,7 +1206,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: c6 01 2b2: 00 00 - 2b4: 48 1f + 2b4: 30 14 2b6: 00 80 2b8: 04 00 2ba: 00 00 @@ -1182,14 +1214,14 @@ Disassembly of section .symtab: 2be: 05 00 2c0: d9 01 2c2: 00 00 - 2c4: e0 07 + 2c4: 60 08 2c6: 00 80 2c8: 9c 00 2ca: 00 00 2cc: 12 00 2ce: 02 00 2d0: eb 01 00 00 - 2d4: 14 07 + 2d4: 94 07 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1197,7 +1229,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: fa 01 2e2: 00 00 - 2e4: e4 06 + 2e4: 64 07 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1205,7 +1237,7 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: 05 02 2f2: 00 00 - 2f4: f4 06 + 2f4: 74 07 2f6: 00 80 2f8: 00 00 2fa: 00 00 @@ -1213,7 +1245,7 @@ Disassembly of section .symtab: 2fe: 02 00 300: 12 02 302: 00 00 - 304: 84 07 + 304: 04 08 306: 00 80 308: 5c 00 30a: 00 00 @@ -1229,7 +1261,7 @@ Disassembly of section .symtab: 31e: f1 ff 320: 30 02 322: 00 00 - 324: 04 04 + 324: 84 04 326: 00 80 328: dc 00 32a: 00 00 @@ -1237,7 +1269,7 @@ Disassembly of section .symtab: 32e: 02 00 330: 42 02 332: 00 00 - 334: 80 06 + 334: 00 07 336: 00 80 338: 00 00 33a: 00 00 @@ -1245,14 +1277,14 @@ Disassembly of section .symtab: 33e: 02 00 340: 4c 02 342: 00 00 - 344: cc 06 + 344: 4c 07 346: 00 80 348: 00 00 34a: 00 00 34c: 12 00 34e: 02 00 350: 57 02 00 00 - 354: f4 09 + 354: 74 0a 356: 00 80 358: 24 01 35a: 00 00 @@ -1268,7 +1300,7 @@ Disassembly of section .symtab: 36e: 01 00 370: 68 02 372: 00 00 - 374: 58 09 + 374: d8 09 376: 00 80 378: 9c 00 37a: 00 00 @@ -1276,7 +1308,7 @@ Disassembly of section .symtab: 37e: 02 00 380: 7c 02 382: 00 00 - 384: 6c 1f + 384: 74 14 386: 00 80 388: 00 00 38a: 00 00 @@ -1284,7 +1316,7 @@ Disassembly of section .symtab: 38e: 06 00 390: 88 02 392: 00 00 - 394: 4c 1f + 394: 34 14 396: 00 80 398: 00 00 39a: 00 00 @@ -1292,14 +1324,14 @@ Disassembly of section .symtab: 39e: 06 00 3a0: 94 02 3a2: 00 00 - 3a4: 7c 08 + 3a4: fc 08 3a6: 00 80 3a8: dc 00 3aa: 00 00 3ac: 12 00 3ae: 02 00 3b0: 9b 02 00 00 - 3b4: d4 03 + 3b4: 54 04 3b6: 00 80 3b8: 30 00 3ba: 00 00 @@ -1307,7 +1339,7 @@ Disassembly of section .symtab: 3be: 02 00 3c0: a0 02 3c2: 00 00 - 3c4: 2c 07 + 3c4: ac 07 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1315,7 +1347,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: ae 02 3d2: 00 00 - 3d4: 3c 07 + 3d4: bc 07 3d6: 00 80 3d8: 14 00 3da: 00 00 @@ -1323,14 +1355,14 @@ Disassembly of section .symtab: 3de: 02 00 3e0: b5 02 3e2: 00 00 - 3e4: 04 07 + 3e4: 84 07 3e6: 00 80 3e8: 00 00 3ea: 00 00 3ec: 12 00 3ee: 02 00 3f0: c3 02 00 00 fmadd.s ft5, ft0, ft0, ft0, rne - 3f4: 24 07 + 3f4: a4 07 3f6: 00 80 3f8: 00 00 3fa: 00 00 @@ -1338,7 +1370,7 @@ Disassembly of section .symtab: 3fe: 02 00 400: d0 02 402: 00 00 - 404: ec 06 + 404: 6c 07 406: 00 80 408: 00 00 40a: 00 00 @@ -1346,70 +1378,67 @@ Disassembly of section .symtab: 40e: 02 00 410: dc 02 412: 00 00 - 414: b4 01 + 414: 08 10 416: 00 80 - 418: 98 00 + 418: 00 00 41a: 00 00 - 41c: 12 00 - 41e: 02 00 - 420: f5 02 - 422: 00 00 - 424: 20 1b + 41c: 10 00 + 41e: 04 00 + 420: eb 02 00 00 + 424: 30 06 426: 00 80 - 428: 00 00 + 428: c8 00 42a: 00 00 - 42c: 10 00 - 42e: 04 00 - 430: 04 03 + 42c: 12 00 + 42e: 02 00 + 430: 0c 03 432: 00 00 - 434: b0 05 + 434: 34 14 436: 00 80 - 438: c8 00 + 438: 00 00 43a: 00 00 - 43c: 12 00 - 43e: 02 00 - 440: 25 03 + 43c: 10 00 + 43e: 05 00 + 440: 9d 00 442: 00 00 - 444: 4c 1f + 444: 74 14 446: 00 80 448: 00 00 44a: 00 00 44c: 10 00 - 44e: 05 00 - 450: 9d 00 + 44e: 06 00 + 450: 42 03 452: 00 00 - 454: 6c 1f + 454: d0 07 456: 00 80 - 458: 00 00 + 458: 34 00 45a: 00 00 - 45c: 10 00 - 45e: 06 00 - 460: 3b 03 00 00 - 464: 50 07 + 45c: 12 00 + 45e: 02 00 + 460: 13 03 00 00 mv t1, zero + 464: b0 01 466: 00 80 - 468: 34 00 + 468: 98 00 46a: 00 00 46c: 12 00 46e: 02 00 - 470: 2c 03 - 472: 00 00 - 474: fc 06 + 470: 33 03 00 00 add t1, zero, zero + 474: 7c 07 476: 00 80 478: 00 00 47a: 00 00 47c: 12 00 47e: 02 00 - 480: 3a 03 + 480: 41 03 482: 00 00 - 484: 78 06 + 484: f8 06 486: 00 80 488: 00 00 48a: 00 00 48c: 12 00 48e: 02 00 - 490: 40 03 - 492: 00 00 - 494: 34 07 + 490: 47 03 00 00 fmsub.s ft6, ft0, ft0, ft0, rne + 494: b4 07 496: 00 80 498: 00 00 49a: 00 00 @@ -1449,12 +1478,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 62 30 - 48: 2d 35 - 4a: 61 2d - 4c: 63 38 2d 30 - 50: 63 2d 62 32 - 54: 2e 63 + 46: 37 32 2d 62 lui tp, 402131 + 4a: 62 2d + 4c: 62 61 + 4e: 2d 63 + 50: 30 2d + 52: 37 64 2e 63 lui s0, 406246 56: 00 70 58: 61 72 5a: 61 6c @@ -1539,31 +1568,32 @@ Disassembly of section .strtab: 11e: 5f 77 73 70 122: 61 77 124: 6e 00 - 126: 6b 65 72 6e - 12a: 65 6c - 12c: 5f 73 70 61 - 130: 77 6e 5f 72 - 134: 75 6e - 136: 5f 77 61 72 - 13a: 70 00 - 13c: 5f 5f 73 74 - 140: 61 63 - 142: 6b 5f 73 69 - 146: 7a 65 - 148: 00 67 - 14a: 5f 77 73 70 - 14e: 61 77 - 150: 6e 5f - 152: 61 72 - 154: 67 73 00 76 - 158: 78 5f - 15a: 74 6d - 15c: 63 00 5f 5f beq t5, s5, 1504 - 160: 53 44 41 54 - 164: 41 5f - 166: 42 45 - 168: 47 49 4e 5f - 16c: 5f 00 5f 70 + 126: 5f 5f 73 74 + 12a: 61 63 + 12c: 6b 5f 73 69 + 130: 7a 65 + 132: 00 67 + 134: 5f 77 73 70 + 138: 61 77 + 13a: 6e 5f + 13c: 61 72 + 13e: 67 73 00 76 + 142: 78 5f + 144: 74 6d + 146: 63 00 5f 5f beq t5, s5, 1504 + 14a: 53 44 41 54 + 14e: 41 5f + 150: 42 45 + 152: 47 49 4e 5f + 156: 5f 00 6b 65 + 15a: 72 6e + 15c: 65 6c + 15e: 5f 73 70 61 + 162: 77 6e 5f 63 + 166: 61 6c + 168: 6c 62 + 16a: 61 63 + 16c: 6b 00 5f 70 170: 6f 63 6c 5f jal t1, 812534 174: 6b 65 72 6e 178: 65 6c @@ -1698,47 +1728,54 @@ Disassembly of section .strtab: 2d2: 5f 77 61 72 2d6: 70 5f 2d8: 67 69 64 00 - 2dc: 6b 65 72 6e - 2e0: 65 6c - 2e2: 5f 73 70 61 - 2e6: 77 6e 5f 72 - 2ea: 75 6e - 2ec: 5f 74 68 72 - 2f0: 65 61 - 2f2: 64 73 - 2f4: 00 5f - 2f6: 5f 44 41 54 - 2fa: 41 5f - 2fc: 42 45 - 2fe: 47 49 4e 5f - 302: 5f 00 5f 70 - 306: 6f 63 6c 5f jal t1, 812534 - 30a: 6b 65 72 6e - 30e: 65 6c - 310: 5f 46 61 6e - 314: 31 5f - 316: 77 6f 72 6b - 31a: 67 72 6f 75 - 31e: 70 5f - 320: 66 61 - 322: 73 74 00 5f csrrci s0, 1520, 0 - 326: 65 64 - 328: 61 74 - 32a: 61 00 - 32c: 76 78 - 32e: 5f 74 68 72 - 332: 65 61 - 334: 64 5f - 336: 6c 69 - 338: 64 00 - 33a: 5f 65 78 69 - 33e: 74 00 - 340: 76 78 - 342: 5f 6e 75 6d - 346: 5f 69 6e 73 - 34a: 74 72 - 34c: 73 - 34d: 00 + 2dc: 5f 5f 44 41 + 2e0: 54 41 + 2e2: 5f 42 45 47 + 2e6: 49 4e + 2e8: 5f 5f 00 5f + 2ec: 70 6f + 2ee: 63 6c 5f 6b bltu t5, s5, 1720 + 2f2: 65 72 + 2f4: 6e 65 + 2f6: 6c 5f + 2f8: 46 61 + 2fa: 6e 31 + 2fc: 5f 77 6f 72 + 300: 6b 67 72 6f + 304: 75 70 + 306: 5f 66 61 73 + 30a: 74 00 + 30c: 5f 65 64 61 + 310: 74 61 + 312: 00 6b + 314: 65 72 + 316: 6e 65 + 318: 6c 5f + 31a: 73 70 61 77 csrci 1910, 2 + 31e: 6e 5f + 320: 72 65 + 322: 6d 61 + 324: 69 6e + 326: 69 6e + 328: 67 5f 63 61 + 32c: 6c 6c + 32e: 62 61 + 330: 63 6b 00 76 bltu zero, zero, 1910 + 334: 78 5f + 336: 74 68 + 338: 72 65 + 33a: 61 64 + 33c: 5f 6c 69 64 + 340: 00 5f + 342: 65 78 + 344: 69 74 + 346: 00 76 + 348: 78 5f + 34a: 6e 75 + 34c: 6d 5f + 34e: 69 6e + 350: 73 74 72 73 csrrci s0, 1847, 4 + 354: 00 Disassembly of section .shstrtab: diff --git a/benchmarks/opencl/guassian/Fan2.dump b/benchmarks/opencl/guassian/Fan2.dump index 56ab3aa3..ae030757 100644 --- a/benchmarks/opencl/guassian/Fan2.dump +++ b/benchmarks/opencl/guassian/Fan2.dump @@ -1,30 +1,30 @@ -/tmp/pocl_vortex_kernel-b9-14-7c-e4-ed.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-55-6b-f9-64-73.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 c5 9d addi a1, a1, -1572 +80000004: 93 85 c5 a5 addi a1, a1, -1444 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 d0 1c jal 2508 +80000010: ef 00 d0 24 jal 2636 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 17 15 00 00 auipc a0, 1 80000020: 13 05 85 41 addi a0, a0, 1048 80000024: 17 16 00 00 auipc a2, 1 -80000028: 13 06 06 43 addi a2, a2, 1072 +80000028: 13 06 06 45 addi a2, a2, 1104 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 50 3a jal 2980 +80000034: ef 00 50 42 jal 3108 80000038: 17 15 00 00 auipc a0, 1 -8000003c: 13 05 85 aa addi a0, a0, -1368 -80000040: ef 00 90 25 jal 2648 -80000044: ef 00 90 2f jal 2808 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 10 26 j 2656 +8000003c: 13 05 85 b2 addi a0, a0, -1240 +80000040: ef 00 90 2d jal 2776 +80000044: ef 00 90 37 jal 2936 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 10 2e j 2784 Disassembly of section .text: @@ -32,11 +32,11 @@ Disassembly of section .text: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 80000058: 37 15 00 80 lui a0, 524289 -8000005c: 13 05 05 ae addi a0, a0, -1312 -80000060: 6f 00 90 23 j 2616 +8000005c: 13 05 05 b6 addi a0, a0, -1184 +80000060: 6f 00 90 2b j 2744 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,188 +48,188 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 d0 1d jal 2524 -80000098: ef 00 90 18 jal 2440 -8000009c: ef 00 d0 1c jal 2508 +80000094: ef 00 d0 25 jal 2652 +80000098: ef 00 90 20 jal 2568 +8000009c: ef 00 d0 24 jal 2636 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 d0 19 jal 2460 +800000a4: ef 00 d0 21 jal 2588 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 50 1a jal 2468 +800000ac: ef 00 50 22 jal 2596 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 d0 1b jal 2492 +800000b4: ef 00 d0 23 jal 2620 800000b8: b7 15 00 80 lui a1, 524289 800000bc: 93 85 45 43 addi a1, a1, 1076 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 10 07 j 2160 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 50 0f j 2292 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 10 06 jal 2144 -800001c4: ef 00 50 0a jal 2212 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 50 09 jal 2196 -800001d0: b7 15 00 80 lui a1, 524289 -800001d4: 93 85 45 43 addi a1, a1, 1076 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 80 7d j 2008 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 50 0e jal 2276 +800001c0: ef 00 90 12 jal 2344 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 90 11 jal 2328 +800001cc: b7 15 00 80 lui a1, 524289 +800001d0: 93 85 45 43 addi a1, a1, 1076 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 d0 05 j 2140 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 00 7f jal 2032 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 00 7e jal 2016 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 00 7d jal 2000 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 00 7c jal 1984 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 50 07 jal 2164 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 50 06 jal 2148 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 50 05 jal 2132 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 50 04 jal 2116 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) 8000035c: b7 15 00 80 lui a1, 524289 80000360: 93 85 45 43 addi a1, a1, 1076 80000364: 13 15 25 00 slli a0, a0, 2 @@ -243,763 +243,795 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 80 68 jal 1672 +80000390: ef 00 80 70 jal 1800 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 c0 6f jal 1788 +800003a8: ef 00 00 74 jal 1856 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 00 73 jal 1840 +800003b4: b7 15 00 80 lui a1, 524289 +800003b8: 93 85 45 43 addi a1, a1, 1076 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 00 68 jal 1664 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 05 5f addi a1, a0, 1520 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 05 67 addi a1, a0, 1648 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _pocl_kernel_Fan2: -80000404: 13 01 01 fd addi sp, sp, -48 -80000408: 23 26 11 02 sw ra, 44(sp) -8000040c: 23 24 81 02 sw s0, 40(sp) -80000410: 23 22 91 02 sw s1, 36(sp) -80000414: 23 20 21 03 sw s2, 32(sp) -80000418: 23 2e 31 01 sw s3, 28(sp) -8000041c: 23 2c 41 01 sw s4, 24(sp) -80000420: 23 2a 51 01 sw s5, 20(sp) -80000424: 23 28 61 01 sw s6, 16(sp) -80000428: 23 26 71 01 sw s7, 12(sp) -8000042c: 23 24 81 01 sw s8, 8(sp) -80000430: 23 22 91 01 sw s9, 4(sp) -80000434: 23 20 a1 01 sw s10, 0(sp) -80000438: 13 04 01 03 addi s0, sp, 48 -8000043c: 13 71 c1 ff andi sp, sp, -4 -80000440: 93 02 00 00 mv t0, zero -80000444: 03 a3 87 01 lw t1, 24(a5) -80000448: 83 a3 c7 01 lw t2, 28(a5) -8000044c: 03 ae 07 02 lw t3, 32(a5) -80000450: 03 aa 07 01 lw s4, 16(a5) -80000454: 03 a9 c7 00 lw s2, 12(a5) -80000458: b3 09 03 03 mul s3, t1, a6 -8000045c: b3 87 13 03 mul a5, t2, a7 -80000460: 33 08 fa 00 add a6, s4, a5 -80000464: 93 44 f7 ff not s1, a4 -80000468: b3 8f d4 00 add t6, s1, a3 -8000046c: b3 88 e6 40 sub a7, a3, a4 -80000470: b3 0e d7 02 mul t4, a4, a3 -80000474: 93 14 27 00 slli s1, a4, 2 -80000478: 33 0f 96 00 add t5, a2, s1 -8000047c: b3 04 ea 00 add s1, s4, a4 -80000480: 33 8a f4 00 add s4, s1, a5 -80000484: b3 04 e9 00 add s1, s2, a4 -80000488: b3 84 34 01 add s1, s1, s3 -8000048c: 13 8b 14 00 addi s6, s1, 1 -80000490: b3 87 66 03 mul a5, a3, s6 -80000494: b3 04 fa 00 add s1, s4, a5 -80000498: 93 94 24 00 slli s1, s1, 2 -8000049c: b3 0c 39 01 add s9, s2, s3 -800004a0: 33 89 95 00 add s2, a1, s1 -800004a4: 93 96 26 00 slli a3, a3, 2 -800004a8: b3 07 f7 00 add a5, a4, a5 -800004ac: 93 97 27 00 slli a5, a5, 2 -800004b0: b3 0a f5 00 add s5, a0, a5 -800004b4: 93 17 2b 00 slli a5, s6, 2 -800004b8: 33 0a f6 00 add s4, a2, a5 -800004bc: b3 09 95 00 add s3, a0, s1 -800004c0: 6f 00 c0 00 j 12 -800004c4: 93 82 12 00 addi t0, t0, 1 -800004c8: 63 f6 c2 0f bgeu t0, t3, 236 -800004cc: 13 0b 00 00 mv s6, zero -800004d0: 93 8b 09 00 mv s7, s3 -800004d4: 13 0d 09 00 mv s10, s2 -800004d8: 6f 00 40 01 j 20 -800004dc: 13 0b 1b 00 addi s6, s6, 1 -800004e0: 13 0d 4d 00 addi s10, s10, 4 -800004e4: 93 8b 4b 00 addi s7, s7, 4 -800004e8: e3 7e 7b fc bgeu s6, t2, -36 -800004ec: 33 05 68 01 add a0, a6, s6 -800004f0: e3 56 15 ff bge a0, a7, -20 -800004f4: 33 06 e5 00 add a2, a0, a4 -800004f8: 33 06 d6 01 add a2, a2, t4 -800004fc: 13 16 26 00 slli a2, a2, 2 -80000500: 33 8c c5 00 add s8, a1, a2 -80000504: 63 04 05 04 beqz a0, 72 -80000508: 13 05 00 00 mv a0, zero -8000050c: 13 06 00 00 mv a2, zero -80000510: 6f 00 00 01 j 16 -80000514: 13 06 16 00 addi a2, a2, 1 -80000518: 33 05 d5 00 add a0, a0, a3 -8000051c: e3 70 66 fc bgeu a2, t1, -64 -80000520: b3 87 cc 00 add a5, s9, a2 -80000524: e3 d8 f7 ff bge a5, t6, -16 -80000528: b3 87 aa 00 add a5, s5, a0 -8000052c: 07 a0 07 00 flw ft0, 0(a5) -80000530: 87 20 0c 00 flw ft1, 0(s8) -80000534: b3 07 ad 00 add a5, s10, a0 -80000538: 07 a1 07 00 flw ft2, 0(a5) -8000053c: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000540: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000544: 27 a0 07 00 fsw ft0, 0(a5) -80000548: 6f f0 df fc j -52 -8000054c: 13 06 00 00 mv a2, zero -80000550: 13 05 00 00 mv a0, zero -80000554: 93 07 0a 00 mv a5, s4 +80000484 _pocl_kernel_Fan2: +80000484: 13 01 01 fd addi sp, sp, -48 +80000488: 23 26 11 02 sw ra, 44(sp) +8000048c: 23 24 81 02 sw s0, 40(sp) +80000490: 23 22 91 02 sw s1, 36(sp) +80000494: 23 20 21 03 sw s2, 32(sp) +80000498: 23 2e 31 01 sw s3, 28(sp) +8000049c: 23 2c 41 01 sw s4, 24(sp) +800004a0: 23 2a 51 01 sw s5, 20(sp) +800004a4: 23 28 61 01 sw s6, 16(sp) +800004a8: 23 26 71 01 sw s7, 12(sp) +800004ac: 23 24 81 01 sw s8, 8(sp) +800004b0: 23 22 91 01 sw s9, 4(sp) +800004b4: 23 20 a1 01 sw s10, 0(sp) +800004b8: 13 04 01 03 addi s0, sp, 48 +800004bc: 13 71 c1 ff andi sp, sp, -4 +800004c0: 93 02 00 00 mv t0, zero +800004c4: 03 a3 87 01 lw t1, 24(a5) +800004c8: 83 a3 c7 01 lw t2, 28(a5) +800004cc: 03 ae 07 02 lw t3, 32(a5) +800004d0: 03 aa 07 01 lw s4, 16(a5) +800004d4: 03 a9 c7 00 lw s2, 12(a5) +800004d8: b3 09 03 03 mul s3, t1, a6 +800004dc: b3 87 13 03 mul a5, t2, a7 +800004e0: 33 08 fa 00 add a6, s4, a5 +800004e4: 93 44 f7 ff not s1, a4 +800004e8: b3 8f d4 00 add t6, s1, a3 +800004ec: b3 88 e6 40 sub a7, a3, a4 +800004f0: b3 0e d7 02 mul t4, a4, a3 +800004f4: 93 14 27 00 slli s1, a4, 2 +800004f8: 33 0f 96 00 add t5, a2, s1 +800004fc: b3 04 ea 00 add s1, s4, a4 +80000500: 33 8a f4 00 add s4, s1, a5 +80000504: b3 04 e9 00 add s1, s2, a4 +80000508: b3 84 34 01 add s1, s1, s3 +8000050c: 13 8b 14 00 addi s6, s1, 1 +80000510: b3 87 66 03 mul a5, a3, s6 +80000514: b3 04 fa 00 add s1, s4, a5 +80000518: 93 94 24 00 slli s1, s1, 2 +8000051c: b3 0c 39 01 add s9, s2, s3 +80000520: 33 89 95 00 add s2, a1, s1 +80000524: 93 96 26 00 slli a3, a3, 2 +80000528: b3 07 f7 00 add a5, a4, a5 +8000052c: 93 97 27 00 slli a5, a5, 2 +80000530: b3 0a f5 00 add s5, a0, a5 +80000534: 93 17 2b 00 slli a5, s6, 2 +80000538: 33 0a f6 00 add s4, a2, a5 +8000053c: b3 09 95 00 add s3, a0, s1 +80000540: 6f 00 c0 00 j 12 +80000544: 93 82 12 00 addi t0, t0, 1 +80000548: 63 f6 c2 0f bgeu t0, t3, 236 +8000054c: 13 0b 00 00 mv s6, zero +80000550: 93 8b 09 00 mv s7, s3 +80000554: 13 0d 09 00 mv s10, s2 80000558: 6f 00 40 01 j 20 -8000055c: 13 05 15 00 addi a0, a0, 1 -80000560: 93 87 47 00 addi a5, a5, 4 -80000564: 33 06 d6 00 add a2, a2, a3 -80000568: e3 7a 65 f6 bgeu a0, t1, -140 -8000056c: b3 84 ac 00 add s1, s9, a0 -80000570: e3 d6 f4 ff bge s1, t6, -20 -80000574: b3 84 ca 00 add s1, s5, a2 -80000578: 07 a0 04 00 flw ft0, 0(s1) -8000057c: 87 20 0c 00 flw ft1, 0(s8) -80000580: b3 04 cd 00 add s1, s10, a2 -80000584: 07 a1 04 00 flw ft2, 0(s1) -80000588: 53 70 10 10 fmul.s ft0, ft0, ft1 -8000058c: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000590: 27 a0 04 00 fsw ft0, 0(s1) -80000594: b3 84 cb 00 add s1, s7, a2 -80000598: 07 a0 04 00 flw ft0, 0(s1) -8000059c: 87 20 0f 00 flw ft1, 0(t5) -800005a0: 07 a1 07 00 flw ft2, 0(a5) -800005a4: 53 70 10 10 fmul.s ft0, ft0, ft1 -800005a8: 53 70 01 08 fsub.s ft0, ft2, ft0 -800005ac: 27 a0 07 00 fsw ft0, 0(a5) -800005b0: 6f f0 df fa j -84 -800005b4: 13 01 04 fd addi sp, s0, -48 -800005b8: 03 2d 01 00 lw s10, 0(sp) -800005bc: 83 2c 41 00 lw s9, 4(sp) -800005c0: 03 2c 81 00 lw s8, 8(sp) -800005c4: 83 2b c1 00 lw s7, 12(sp) -800005c8: 03 2b 01 01 lw s6, 16(sp) -800005cc: 83 2a 41 01 lw s5, 20(sp) -800005d0: 03 2a 81 01 lw s4, 24(sp) -800005d4: 83 29 c1 01 lw s3, 28(sp) -800005d8: 03 29 01 02 lw s2, 32(sp) -800005dc: 83 24 41 02 lw s1, 36(sp) -800005e0: 03 24 81 02 lw s0, 40(sp) -800005e4: 83 20 c1 02 lw ra, 44(sp) -800005e8: 13 01 01 03 addi sp, sp, 48 -800005ec: 67 80 00 00 ret +8000055c: 13 0b 1b 00 addi s6, s6, 1 +80000560: 13 0d 4d 00 addi s10, s10, 4 +80000564: 93 8b 4b 00 addi s7, s7, 4 +80000568: e3 7e 7b fc bgeu s6, t2, -36 +8000056c: 33 05 68 01 add a0, a6, s6 +80000570: e3 56 15 ff bge a0, a7, -20 +80000574: 33 06 e5 00 add a2, a0, a4 +80000578: 33 06 d6 01 add a2, a2, t4 +8000057c: 13 16 26 00 slli a2, a2, 2 +80000580: 33 8c c5 00 add s8, a1, a2 +80000584: 63 04 05 04 beqz a0, 72 +80000588: 13 05 00 00 mv a0, zero +8000058c: 13 06 00 00 mv a2, zero +80000590: 6f 00 00 01 j 16 +80000594: 13 06 16 00 addi a2, a2, 1 +80000598: 33 05 d5 00 add a0, a0, a3 +8000059c: e3 70 66 fc bgeu a2, t1, -64 +800005a0: b3 87 cc 00 add a5, s9, a2 +800005a4: e3 d8 f7 ff bge a5, t6, -16 +800005a8: b3 87 aa 00 add a5, s5, a0 +800005ac: 07 a0 07 00 flw ft0, 0(a5) +800005b0: 87 20 0c 00 flw ft1, 0(s8) +800005b4: b3 07 ad 00 add a5, s10, a0 +800005b8: 07 a1 07 00 flw ft2, 0(a5) +800005bc: 53 70 10 10 fmul.s ft0, ft0, ft1 +800005c0: 53 70 01 08 fsub.s ft0, ft2, ft0 +800005c4: 27 a0 07 00 fsw ft0, 0(a5) +800005c8: 6f f0 df fc j -52 +800005cc: 13 06 00 00 mv a2, zero +800005d0: 13 05 00 00 mv a0, zero +800005d4: 93 07 0a 00 mv a5, s4 +800005d8: 6f 00 40 01 j 20 +800005dc: 13 05 15 00 addi a0, a0, 1 +800005e0: 93 87 47 00 addi a5, a5, 4 +800005e4: 33 06 d6 00 add a2, a2, a3 +800005e8: e3 7a 65 f6 bgeu a0, t1, -140 +800005ec: b3 84 ac 00 add s1, s9, a0 +800005f0: e3 d6 f4 ff bge s1, t6, -20 +800005f4: b3 84 ca 00 add s1, s5, a2 +800005f8: 07 a0 04 00 flw ft0, 0(s1) +800005fc: 87 20 0c 00 flw ft1, 0(s8) +80000600: b3 04 cd 00 add s1, s10, a2 +80000604: 07 a1 04 00 flw ft2, 0(s1) +80000608: 53 70 10 10 fmul.s ft0, ft0, ft1 +8000060c: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000610: 27 a0 04 00 fsw ft0, 0(s1) +80000614: b3 84 cb 00 add s1, s7, a2 +80000618: 07 a0 04 00 flw ft0, 0(s1) +8000061c: 87 20 0f 00 flw ft1, 0(t5) +80000620: 07 a1 07 00 flw ft2, 0(a5) +80000624: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000628: 53 70 01 08 fsub.s ft0, ft2, ft0 +8000062c: 27 a0 07 00 fsw ft0, 0(a5) +80000630: 6f f0 df fa j -84 +80000634: 13 01 04 fd addi sp, s0, -48 +80000638: 03 2d 01 00 lw s10, 0(sp) +8000063c: 83 2c 41 00 lw s9, 4(sp) +80000640: 03 2c 81 00 lw s8, 8(sp) +80000644: 83 2b c1 00 lw s7, 12(sp) +80000648: 03 2b 01 01 lw s6, 16(sp) +8000064c: 83 2a 41 01 lw s5, 20(sp) +80000650: 03 2a 81 01 lw s4, 24(sp) +80000654: 83 29 c1 01 lw s3, 28(sp) +80000658: 03 29 01 02 lw s2, 32(sp) +8000065c: 83 24 41 02 lw s1, 36(sp) +80000660: 03 24 81 02 lw s0, 40(sp) +80000664: 83 20 c1 02 lw ra, 44(sp) +80000668: 13 01 01 03 addi sp, sp, 48 +8000066c: 67 80 00 00 ret -800005f0 _pocl_kernel_Fan2_workgroup: -800005f0: 13 01 01 fd addi sp, sp, -48 -800005f4: 23 26 81 02 sw s0, 44(sp) -800005f8: 23 24 91 02 sw s1, 40(sp) -800005fc: 23 22 21 03 sw s2, 36(sp) -80000600: 23 20 31 03 sw s3, 32(sp) -80000604: 23 2e 41 01 sw s4, 28(sp) -80000608: 23 2c 51 01 sw s5, 24(sp) -8000060c: 23 2a 61 01 sw s6, 20(sp) -80000610: 23 28 71 01 sw s7, 16(sp) -80000614: 23 26 81 01 sw s8, 12(sp) -80000618: 23 24 91 01 sw s9, 8(sp) -8000061c: 03 27 05 00 lw a4, 0(a0) -80000620: 13 08 00 00 mv a6, zero -80000624: 03 29 07 00 lw s2, 0(a4) -80000628: 03 27 45 00 lw a4, 4(a0) -8000062c: 83 24 85 00 lw s1, 8(a0) -80000630: 03 24 c5 00 lw s0, 12(a0) -80000634: 03 25 05 01 lw a0, 16(a0) -80000638: 83 28 07 00 lw a7, 0(a4) -8000063c: 83 a9 04 00 lw s3, 0(s1) -80000640: 03 24 04 00 lw s0, 0(s0) -80000644: 03 23 05 00 lw t1, 0(a0) -80000648: 83 ac 85 01 lw s9, 24(a1) -8000064c: 03 ae c5 01 lw t3, 28(a1) -80000650: 83 a2 05 02 lw t0, 32(a1) -80000654: 03 a5 05 01 lw a0, 16(a1) -80000658: 83 ae c5 00 lw t4, 12(a1) -8000065c: b3 85 cc 02 mul a1, s9, a2 -80000660: b3 06 de 02 mul a3, t3, a3 -80000664: 33 0f d5 00 add t5, a0, a3 -80000668: 13 46 f3 ff not a2, t1 -8000066c: 33 06 c4 00 add a2, s0, a2 -80000670: b3 0f 64 40 sub t6, s0, t1 -80000674: b3 03 83 02 mul t2, t1, s0 -80000678: 93 14 23 00 slli s1, t1, 2 -8000067c: 33 8a 99 00 add s4, s3, s1 -80000680: 33 05 65 00 add a0, a0, t1 -80000684: 33 05 d5 00 add a0, a0, a3 -80000688: b3 86 6e 00 add a3, t4, t1 -8000068c: b3 86 b6 00 add a3, a3, a1 -80000690: 93 86 16 00 addi a3, a3, 1 -80000694: b3 04 d4 02 mul s1, s0, a3 -80000698: 33 05 95 00 add a0, a0, s1 -8000069c: 93 17 25 00 slli a5, a0, 2 -800006a0: 33 85 be 00 add a0, t4, a1 -800006a4: b3 8e f8 00 add t4, a7, a5 -800006a8: 93 15 24 00 slli a1, s0, 2 -800006ac: 33 04 93 00 add s0, t1, s1 -800006b0: 13 14 24 00 slli s0, s0, 2 -800006b4: b3 0a 89 00 add s5, s2, s0 -800006b8: 93 96 26 00 slli a3, a3, 2 -800006bc: b3 89 d9 00 add s3, s3, a3 -800006c0: 33 09 f9 00 add s2, s2, a5 -800006c4: 6f 00 c0 00 j 12 -800006c8: 13 08 18 00 addi a6, a6, 1 -800006cc: 63 76 58 0e bgeu a6, t0, 236 -800006d0: 13 0b 00 00 mv s6, zero -800006d4: 93 0b 09 00 mv s7, s2 -800006d8: 93 87 0e 00 mv a5, t4 -800006dc: 6f 00 40 01 j 20 -800006e0: 13 0b 1b 00 addi s6, s6, 1 -800006e4: 93 87 47 00 addi a5, a5, 4 -800006e8: 93 8b 4b 00 addi s7, s7, 4 -800006ec: e3 7e cb fd bgeu s6, t3, -36 -800006f0: b3 06 6f 01 add a3, t5, s6 -800006f4: e3 d6 f6 ff bge a3, t6, -20 -800006f8: 33 84 66 00 add s0, a3, t1 -800006fc: 33 04 74 00 add s0, s0, t2 -80000700: 13 14 24 00 slli s0, s0, 2 -80000704: 33 8c 88 00 add s8, a7, s0 -80000708: 63 84 06 04 beqz a3, 72 -8000070c: 93 06 00 00 mv a3, zero -80000710: 13 04 00 00 mv s0, zero -80000714: 6f 00 00 01 j 16 -80000718: 13 04 14 00 addi s0, s0, 1 -8000071c: b3 86 b6 00 add a3, a3, a1 -80000720: e3 70 94 fd bgeu s0, s9, -64 -80000724: 33 07 85 00 add a4, a0, s0 -80000728: e3 58 c7 fe bge a4, a2, -16 -8000072c: 33 87 da 00 add a4, s5, a3 -80000730: 07 20 07 00 flw ft0, 0(a4) -80000734: 87 20 0c 00 flw ft1, 0(s8) -80000738: 33 87 d7 00 add a4, a5, a3 -8000073c: 07 21 07 00 flw ft2, 0(a4) -80000740: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000744: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000748: 27 20 07 00 fsw ft0, 0(a4) -8000074c: 6f f0 df fc j -52 -80000750: 93 04 00 00 mv s1, zero -80000754: 13 04 00 00 mv s0, zero -80000758: 93 86 09 00 mv a3, s3 +80000670 _pocl_kernel_Fan2_workgroup: +80000670: 13 01 01 fd addi sp, sp, -48 +80000674: 23 26 81 02 sw s0, 44(sp) +80000678: 23 24 91 02 sw s1, 40(sp) +8000067c: 23 22 21 03 sw s2, 36(sp) +80000680: 23 20 31 03 sw s3, 32(sp) +80000684: 23 2e 41 01 sw s4, 28(sp) +80000688: 23 2c 51 01 sw s5, 24(sp) +8000068c: 23 2a 61 01 sw s6, 20(sp) +80000690: 23 28 71 01 sw s7, 16(sp) +80000694: 23 26 81 01 sw s8, 12(sp) +80000698: 23 24 91 01 sw s9, 8(sp) +8000069c: 03 27 05 00 lw a4, 0(a0) +800006a0: 13 08 00 00 mv a6, zero +800006a4: 03 29 07 00 lw s2, 0(a4) +800006a8: 03 27 45 00 lw a4, 4(a0) +800006ac: 83 24 85 00 lw s1, 8(a0) +800006b0: 03 24 c5 00 lw s0, 12(a0) +800006b4: 03 25 05 01 lw a0, 16(a0) +800006b8: 83 28 07 00 lw a7, 0(a4) +800006bc: 83 a9 04 00 lw s3, 0(s1) +800006c0: 03 24 04 00 lw s0, 0(s0) +800006c4: 03 23 05 00 lw t1, 0(a0) +800006c8: 83 ac 85 01 lw s9, 24(a1) +800006cc: 03 ae c5 01 lw t3, 28(a1) +800006d0: 83 a2 05 02 lw t0, 32(a1) +800006d4: 03 a5 05 01 lw a0, 16(a1) +800006d8: 83 ae c5 00 lw t4, 12(a1) +800006dc: b3 85 cc 02 mul a1, s9, a2 +800006e0: b3 06 de 02 mul a3, t3, a3 +800006e4: 33 0f d5 00 add t5, a0, a3 +800006e8: 13 46 f3 ff not a2, t1 +800006ec: 33 06 c4 00 add a2, s0, a2 +800006f0: b3 0f 64 40 sub t6, s0, t1 +800006f4: b3 03 83 02 mul t2, t1, s0 +800006f8: 93 14 23 00 slli s1, t1, 2 +800006fc: 33 8a 99 00 add s4, s3, s1 +80000700: 33 05 65 00 add a0, a0, t1 +80000704: 33 05 d5 00 add a0, a0, a3 +80000708: b3 86 6e 00 add a3, t4, t1 +8000070c: b3 86 b6 00 add a3, a3, a1 +80000710: 93 86 16 00 addi a3, a3, 1 +80000714: b3 04 d4 02 mul s1, s0, a3 +80000718: 33 05 95 00 add a0, a0, s1 +8000071c: 93 17 25 00 slli a5, a0, 2 +80000720: 33 85 be 00 add a0, t4, a1 +80000724: b3 8e f8 00 add t4, a7, a5 +80000728: 93 15 24 00 slli a1, s0, 2 +8000072c: 33 04 93 00 add s0, t1, s1 +80000730: 13 14 24 00 slli s0, s0, 2 +80000734: b3 0a 89 00 add s5, s2, s0 +80000738: 93 96 26 00 slli a3, a3, 2 +8000073c: b3 89 d9 00 add s3, s3, a3 +80000740: 33 09 f9 00 add s2, s2, a5 +80000744: 6f 00 c0 00 j 12 +80000748: 13 08 18 00 addi a6, a6, 1 +8000074c: 63 76 58 0e bgeu a6, t0, 236 +80000750: 13 0b 00 00 mv s6, zero +80000754: 93 0b 09 00 mv s7, s2 +80000758: 93 87 0e 00 mv a5, t4 8000075c: 6f 00 40 01 j 20 -80000760: 13 04 14 00 addi s0, s0, 1 -80000764: 93 86 46 00 addi a3, a3, 4 -80000768: b3 84 b4 00 add s1, s1, a1 -8000076c: e3 7a 94 f7 bgeu s0, s9, -140 -80000770: 33 07 85 00 add a4, a0, s0 -80000774: e3 56 c7 fe bge a4, a2, -20 -80000778: 33 87 9a 00 add a4, s5, s1 -8000077c: 07 20 07 00 flw ft0, 0(a4) -80000780: 87 20 0c 00 flw ft1, 0(s8) -80000784: 33 87 97 00 add a4, a5, s1 -80000788: 07 21 07 00 flw ft2, 0(a4) -8000078c: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000790: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000794: 27 20 07 00 fsw ft0, 0(a4) -80000798: 33 87 9b 00 add a4, s7, s1 -8000079c: 07 20 07 00 flw ft0, 0(a4) -800007a0: 87 20 0a 00 flw ft1, 0(s4) -800007a4: 07 a1 06 00 flw ft2, 0(a3) -800007a8: 53 70 10 10 fmul.s ft0, ft0, ft1 -800007ac: 53 70 01 08 fsub.s ft0, ft2, ft0 -800007b0: 27 a0 06 00 fsw ft0, 0(a3) -800007b4: 6f f0 df fa j -84 -800007b8: 83 2c 81 00 lw s9, 8(sp) -800007bc: 03 2c c1 00 lw s8, 12(sp) -800007c0: 83 2b 01 01 lw s7, 16(sp) -800007c4: 03 2b 41 01 lw s6, 20(sp) -800007c8: 83 2a 81 01 lw s5, 24(sp) -800007cc: 03 2a c1 01 lw s4, 28(sp) -800007d0: 83 29 01 02 lw s3, 32(sp) -800007d4: 03 29 41 02 lw s2, 36(sp) -800007d8: 83 24 81 02 lw s1, 40(sp) -800007dc: 03 24 c1 02 lw s0, 44(sp) -800007e0: 13 01 01 03 addi sp, sp, 48 -800007e4: 67 80 00 00 ret +80000760: 13 0b 1b 00 addi s6, s6, 1 +80000764: 93 87 47 00 addi a5, a5, 4 +80000768: 93 8b 4b 00 addi s7, s7, 4 +8000076c: e3 7e cb fd bgeu s6, t3, -36 +80000770: b3 06 6f 01 add a3, t5, s6 +80000774: e3 d6 f6 ff bge a3, t6, -20 +80000778: 33 84 66 00 add s0, a3, t1 +8000077c: 33 04 74 00 add s0, s0, t2 +80000780: 13 14 24 00 slli s0, s0, 2 +80000784: 33 8c 88 00 add s8, a7, s0 +80000788: 63 84 06 04 beqz a3, 72 +8000078c: 93 06 00 00 mv a3, zero +80000790: 13 04 00 00 mv s0, zero +80000794: 6f 00 00 01 j 16 +80000798: 13 04 14 00 addi s0, s0, 1 +8000079c: b3 86 b6 00 add a3, a3, a1 +800007a0: e3 70 94 fd bgeu s0, s9, -64 +800007a4: 33 07 85 00 add a4, a0, s0 +800007a8: e3 58 c7 fe bge a4, a2, -16 +800007ac: 33 87 da 00 add a4, s5, a3 +800007b0: 07 20 07 00 flw ft0, 0(a4) +800007b4: 87 20 0c 00 flw ft1, 0(s8) +800007b8: 33 87 d7 00 add a4, a5, a3 +800007bc: 07 21 07 00 flw ft2, 0(a4) +800007c0: 53 70 10 10 fmul.s ft0, ft0, ft1 +800007c4: 53 70 01 08 fsub.s ft0, ft2, ft0 +800007c8: 27 20 07 00 fsw ft0, 0(a4) +800007cc: 6f f0 df fc j -52 +800007d0: 93 04 00 00 mv s1, zero +800007d4: 13 04 00 00 mv s0, zero +800007d8: 93 86 09 00 mv a3, s3 +800007dc: 6f 00 40 01 j 20 +800007e0: 13 04 14 00 addi s0, s0, 1 +800007e4: 93 86 46 00 addi a3, a3, 4 +800007e8: b3 84 b4 00 add s1, s1, a1 +800007ec: e3 7a 94 f7 bgeu s0, s9, -140 +800007f0: 33 07 85 00 add a4, a0, s0 +800007f4: e3 56 c7 fe bge a4, a2, -20 +800007f8: 33 87 9a 00 add a4, s5, s1 +800007fc: 07 20 07 00 flw ft0, 0(a4) +80000800: 87 20 0c 00 flw ft1, 0(s8) +80000804: 33 87 97 00 add a4, a5, s1 +80000808: 07 21 07 00 flw ft2, 0(a4) +8000080c: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000810: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000814: 27 20 07 00 fsw ft0, 0(a4) +80000818: 33 87 9b 00 add a4, s7, s1 +8000081c: 07 20 07 00 flw ft0, 0(a4) +80000820: 87 20 0a 00 flw ft1, 0(s4) +80000824: 07 a1 06 00 flw ft2, 0(a3) +80000828: 53 70 10 10 fmul.s ft0, ft0, ft1 +8000082c: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000830: 27 a0 06 00 fsw ft0, 0(a3) +80000834: 6f f0 df fa j -84 +80000838: 83 2c 81 00 lw s9, 8(sp) +8000083c: 03 2c c1 00 lw s8, 12(sp) +80000840: 83 2b 01 01 lw s7, 16(sp) +80000844: 03 2b 41 01 lw s6, 20(sp) +80000848: 83 2a 81 01 lw s5, 24(sp) +8000084c: 03 2a c1 01 lw s4, 28(sp) +80000850: 83 29 01 02 lw s3, 32(sp) +80000854: 03 29 41 02 lw s2, 36(sp) +80000858: 83 24 81 02 lw s1, 40(sp) +8000085c: 03 24 c1 02 lw s0, 44(sp) +80000860: 13 01 01 03 addi sp, sp, 48 +80000864: 67 80 00 00 ret -800007e8 _pocl_kernel_Fan2_workgroup_fast: -800007e8: 13 01 01 fd addi sp, sp, -48 -800007ec: 23 26 81 02 sw s0, 44(sp) -800007f0: 23 24 91 02 sw s1, 40(sp) -800007f4: 23 22 21 03 sw s2, 36(sp) -800007f8: 23 20 31 03 sw s3, 32(sp) -800007fc: 23 2e 41 01 sw s4, 28(sp) -80000800: 23 2c 51 01 sw s5, 24(sp) -80000804: 23 2a 61 01 sw s6, 20(sp) -80000808: 23 28 71 01 sw s7, 16(sp) -8000080c: 23 26 81 01 sw s8, 12(sp) -80000810: 23 24 91 01 sw s9, 8(sp) -80000814: 13 08 00 00 mv a6, zero -80000818: 03 29 05 00 lw s2, 0(a0) -8000081c: 03 27 c5 00 lw a4, 12(a0) -80000820: 83 27 05 01 lw a5, 16(a0) -80000824: 83 28 45 00 lw a7, 4(a0) -80000828: 83 29 85 00 lw s3, 8(a0) -8000082c: 83 24 07 00 lw s1, 0(a4) -80000830: 03 a3 07 00 lw t1, 0(a5) -80000834: 83 ac 85 01 lw s9, 24(a1) -80000838: 03 ae c5 01 lw t3, 28(a1) -8000083c: 83 a2 05 02 lw t0, 32(a1) -80000840: 83 a7 05 01 lw a5, 16(a1) -80000844: 83 ae c5 00 lw t4, 12(a1) -80000848: b3 85 cc 02 mul a1, s9, a2 -8000084c: b3 06 de 02 mul a3, t3, a3 -80000850: 33 8f d7 00 add t5, a5, a3 -80000854: 13 46 f3 ff not a2, t1 -80000858: 33 86 c4 00 add a2, s1, a2 -8000085c: b3 8f 64 40 sub t6, s1, t1 -80000860: b3 03 93 02 mul t2, t1, s1 -80000864: 13 15 23 00 slli a0, t1, 2 -80000868: 33 8a a9 00 add s4, s3, a0 -8000086c: 33 85 67 00 add a0, a5, t1 -80000870: b3 07 d5 00 add a5, a0, a3 -80000874: b3 86 6e 00 add a3, t4, t1 -80000878: b3 86 b6 00 add a3, a3, a1 -8000087c: 93 86 16 00 addi a3, a3, 1 -80000880: 33 85 d4 02 mul a0, s1, a3 -80000884: b3 87 a7 00 add a5, a5, a0 -80000888: 13 94 27 00 slli s0, a5, 2 -8000088c: b3 87 be 00 add a5, t4, a1 -80000890: b3 8e 88 00 add t4, a7, s0 -80000894: 93 95 24 00 slli a1, s1, 2 -80000898: 33 05 a3 00 add a0, t1, a0 -8000089c: 13 15 25 00 slli a0, a0, 2 -800008a0: b3 0a a9 00 add s5, s2, a0 -800008a4: 13 95 26 00 slli a0, a3, 2 -800008a8: b3 89 a9 00 add s3, s3, a0 -800008ac: 33 09 89 00 add s2, s2, s0 -800008b0: 6f 00 c0 00 j 12 -800008b4: 13 08 18 00 addi a6, a6, 1 -800008b8: 63 76 58 0e bgeu a6, t0, 236 -800008bc: 13 0b 00 00 mv s6, zero -800008c0: 93 0b 09 00 mv s7, s2 -800008c4: 13 85 0e 00 mv a0, t4 -800008c8: 6f 00 40 01 j 20 -800008cc: 13 0b 1b 00 addi s6, s6, 1 -800008d0: 13 05 45 00 addi a0, a0, 4 -800008d4: 93 8b 4b 00 addi s7, s7, 4 -800008d8: e3 7e cb fd bgeu s6, t3, -36 -800008dc: b3 06 6f 01 add a3, t5, s6 -800008e0: e3 d6 f6 ff bge a3, t6, -20 -800008e4: 33 84 66 00 add s0, a3, t1 -800008e8: 33 04 74 00 add s0, s0, t2 -800008ec: 13 14 24 00 slli s0, s0, 2 -800008f0: 33 8c 88 00 add s8, a7, s0 -800008f4: 63 84 06 04 beqz a3, 72 -800008f8: 93 06 00 00 mv a3, zero -800008fc: 13 04 00 00 mv s0, zero -80000900: 6f 00 00 01 j 16 -80000904: 13 04 14 00 addi s0, s0, 1 -80000908: b3 86 b6 00 add a3, a3, a1 -8000090c: e3 70 94 fd bgeu s0, s9, -64 -80000910: 33 87 87 00 add a4, a5, s0 -80000914: e3 58 c7 fe bge a4, a2, -16 -80000918: 33 87 da 00 add a4, s5, a3 -8000091c: 07 20 07 00 flw ft0, 0(a4) -80000920: 87 20 0c 00 flw ft1, 0(s8) -80000924: 33 07 d5 00 add a4, a0, a3 -80000928: 07 21 07 00 flw ft2, 0(a4) -8000092c: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000930: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000934: 27 20 07 00 fsw ft0, 0(a4) -80000938: 6f f0 df fc j -52 -8000093c: 93 04 00 00 mv s1, zero -80000940: 13 04 00 00 mv s0, zero -80000944: 93 86 09 00 mv a3, s3 +80000868 _pocl_kernel_Fan2_workgroup_fast: +80000868: 13 01 01 fd addi sp, sp, -48 +8000086c: 23 26 81 02 sw s0, 44(sp) +80000870: 23 24 91 02 sw s1, 40(sp) +80000874: 23 22 21 03 sw s2, 36(sp) +80000878: 23 20 31 03 sw s3, 32(sp) +8000087c: 23 2e 41 01 sw s4, 28(sp) +80000880: 23 2c 51 01 sw s5, 24(sp) +80000884: 23 2a 61 01 sw s6, 20(sp) +80000888: 23 28 71 01 sw s7, 16(sp) +8000088c: 23 26 81 01 sw s8, 12(sp) +80000890: 23 24 91 01 sw s9, 8(sp) +80000894: 13 08 00 00 mv a6, zero +80000898: 03 29 05 00 lw s2, 0(a0) +8000089c: 03 27 c5 00 lw a4, 12(a0) +800008a0: 83 27 05 01 lw a5, 16(a0) +800008a4: 83 28 45 00 lw a7, 4(a0) +800008a8: 83 29 85 00 lw s3, 8(a0) +800008ac: 83 24 07 00 lw s1, 0(a4) +800008b0: 03 a3 07 00 lw t1, 0(a5) +800008b4: 83 ac 85 01 lw s9, 24(a1) +800008b8: 03 ae c5 01 lw t3, 28(a1) +800008bc: 83 a2 05 02 lw t0, 32(a1) +800008c0: 83 a7 05 01 lw a5, 16(a1) +800008c4: 83 ae c5 00 lw t4, 12(a1) +800008c8: b3 85 cc 02 mul a1, s9, a2 +800008cc: b3 06 de 02 mul a3, t3, a3 +800008d0: 33 8f d7 00 add t5, a5, a3 +800008d4: 13 46 f3 ff not a2, t1 +800008d8: 33 86 c4 00 add a2, s1, a2 +800008dc: b3 8f 64 40 sub t6, s1, t1 +800008e0: b3 03 93 02 mul t2, t1, s1 +800008e4: 13 15 23 00 slli a0, t1, 2 +800008e8: 33 8a a9 00 add s4, s3, a0 +800008ec: 33 85 67 00 add a0, a5, t1 +800008f0: b3 07 d5 00 add a5, a0, a3 +800008f4: b3 86 6e 00 add a3, t4, t1 +800008f8: b3 86 b6 00 add a3, a3, a1 +800008fc: 93 86 16 00 addi a3, a3, 1 +80000900: 33 85 d4 02 mul a0, s1, a3 +80000904: b3 87 a7 00 add a5, a5, a0 +80000908: 13 94 27 00 slli s0, a5, 2 +8000090c: b3 87 be 00 add a5, t4, a1 +80000910: b3 8e 88 00 add t4, a7, s0 +80000914: 93 95 24 00 slli a1, s1, 2 +80000918: 33 05 a3 00 add a0, t1, a0 +8000091c: 13 15 25 00 slli a0, a0, 2 +80000920: b3 0a a9 00 add s5, s2, a0 +80000924: 13 95 26 00 slli a0, a3, 2 +80000928: b3 89 a9 00 add s3, s3, a0 +8000092c: 33 09 89 00 add s2, s2, s0 +80000930: 6f 00 c0 00 j 12 +80000934: 13 08 18 00 addi a6, a6, 1 +80000938: 63 76 58 0e bgeu a6, t0, 236 +8000093c: 13 0b 00 00 mv s6, zero +80000940: 93 0b 09 00 mv s7, s2 +80000944: 13 85 0e 00 mv a0, t4 80000948: 6f 00 40 01 j 20 -8000094c: 13 04 14 00 addi s0, s0, 1 -80000950: 93 86 46 00 addi a3, a3, 4 -80000954: b3 84 b4 00 add s1, s1, a1 -80000958: e3 7a 94 f7 bgeu s0, s9, -140 -8000095c: 33 87 87 00 add a4, a5, s0 -80000960: e3 56 c7 fe bge a4, a2, -20 -80000964: 33 87 9a 00 add a4, s5, s1 -80000968: 07 20 07 00 flw ft0, 0(a4) -8000096c: 87 20 0c 00 flw ft1, 0(s8) -80000970: 33 07 95 00 add a4, a0, s1 -80000974: 07 21 07 00 flw ft2, 0(a4) -80000978: 53 70 10 10 fmul.s ft0, ft0, ft1 -8000097c: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000980: 27 20 07 00 fsw ft0, 0(a4) -80000984: 33 87 9b 00 add a4, s7, s1 -80000988: 07 20 07 00 flw ft0, 0(a4) -8000098c: 87 20 0a 00 flw ft1, 0(s4) -80000990: 07 a1 06 00 flw ft2, 0(a3) -80000994: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000998: 53 70 01 08 fsub.s ft0, ft2, ft0 -8000099c: 27 a0 06 00 fsw ft0, 0(a3) -800009a0: 6f f0 df fa j -84 -800009a4: 83 2c 81 00 lw s9, 8(sp) -800009a8: 03 2c c1 00 lw s8, 12(sp) -800009ac: 83 2b 01 01 lw s7, 16(sp) -800009b0: 03 2b 41 01 lw s6, 20(sp) -800009b4: 83 2a 81 01 lw s5, 24(sp) -800009b8: 03 2a c1 01 lw s4, 28(sp) -800009bc: 83 29 01 02 lw s3, 32(sp) -800009c0: 03 29 41 02 lw s2, 36(sp) -800009c4: 83 24 81 02 lw s1, 40(sp) -800009c8: 03 24 c1 02 lw s0, 44(sp) -800009cc: 13 01 01 03 addi sp, sp, 48 -800009d0: 67 80 00 00 ret +8000094c: 13 0b 1b 00 addi s6, s6, 1 +80000950: 13 05 45 00 addi a0, a0, 4 +80000954: 93 8b 4b 00 addi s7, s7, 4 +80000958: e3 7e cb fd bgeu s6, t3, -36 +8000095c: b3 06 6f 01 add a3, t5, s6 +80000960: e3 d6 f6 ff bge a3, t6, -20 +80000964: 33 84 66 00 add s0, a3, t1 +80000968: 33 04 74 00 add s0, s0, t2 +8000096c: 13 14 24 00 slli s0, s0, 2 +80000970: 33 8c 88 00 add s8, a7, s0 +80000974: 63 84 06 04 beqz a3, 72 +80000978: 93 06 00 00 mv a3, zero +8000097c: 13 04 00 00 mv s0, zero +80000980: 6f 00 00 01 j 16 +80000984: 13 04 14 00 addi s0, s0, 1 +80000988: b3 86 b6 00 add a3, a3, a1 +8000098c: e3 70 94 fd bgeu s0, s9, -64 +80000990: 33 87 87 00 add a4, a5, s0 +80000994: e3 58 c7 fe bge a4, a2, -16 +80000998: 33 87 da 00 add a4, s5, a3 +8000099c: 07 20 07 00 flw ft0, 0(a4) +800009a0: 87 20 0c 00 flw ft1, 0(s8) +800009a4: 33 07 d5 00 add a4, a0, a3 +800009a8: 07 21 07 00 flw ft2, 0(a4) +800009ac: 53 70 10 10 fmul.s ft0, ft0, ft1 +800009b0: 53 70 01 08 fsub.s ft0, ft2, ft0 +800009b4: 27 20 07 00 fsw ft0, 0(a4) +800009b8: 6f f0 df fc j -52 +800009bc: 93 04 00 00 mv s1, zero +800009c0: 13 04 00 00 mv s0, zero +800009c4: 93 86 09 00 mv a3, s3 +800009c8: 6f 00 40 01 j 20 +800009cc: 13 04 14 00 addi s0, s0, 1 +800009d0: 93 86 46 00 addi a3, a3, 4 +800009d4: b3 84 b4 00 add s1, s1, a1 +800009d8: e3 7a 94 f7 bgeu s0, s9, -140 +800009dc: 33 87 87 00 add a4, a5, s0 +800009e0: e3 56 c7 fe bge a4, a2, -20 +800009e4: 33 87 9a 00 add a4, s5, s1 +800009e8: 07 20 07 00 flw ft0, 0(a4) +800009ec: 87 20 0c 00 flw ft1, 0(s8) +800009f0: 33 07 95 00 add a4, a0, s1 +800009f4: 07 21 07 00 flw ft2, 0(a4) +800009f8: 53 70 10 10 fmul.s ft0, ft0, ft1 +800009fc: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000a00: 27 20 07 00 fsw ft0, 0(a4) +80000a04: 33 87 9b 00 add a4, s7, s1 +80000a08: 07 20 07 00 flw ft0, 0(a4) +80000a0c: 87 20 0a 00 flw ft1, 0(s4) +80000a10: 07 a1 06 00 flw ft2, 0(a3) +80000a14: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000a18: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000a1c: 27 a0 06 00 fsw ft0, 0(a3) +80000a20: 6f f0 df fa j -84 +80000a24: 83 2c 81 00 lw s9, 8(sp) +80000a28: 03 2c c1 00 lw s8, 12(sp) +80000a2c: 83 2b 01 01 lw s7, 16(sp) +80000a30: 03 2b 41 01 lw s6, 20(sp) +80000a34: 83 2a 81 01 lw s5, 24(sp) +80000a38: 03 2a c1 01 lw s4, 28(sp) +80000a3c: 83 29 01 02 lw s3, 32(sp) +80000a40: 03 29 41 02 lw s2, 36(sp) +80000a44: 83 24 81 02 lw s1, 40(sp) +80000a48: 03 24 c1 02 lw s0, 44(sp) +80000a4c: 13 01 01 03 addi sp, sp, 48 +80000a50: 67 80 00 00 ret -800009d4 _exit: -800009d4: 13 05 00 00 mv a0, zero -800009d8: 6b 00 05 00 +80000a54 _exit: +80000a54: 13 05 00 00 mv a0, zero +80000a58: 6b 00 05 00 -800009dc vx_set_sp: -800009dc: 73 25 00 fc csrr a0, 4032 -800009e0: 6b 00 05 00 -800009e4: 97 11 00 00 auipc gp, 1 -800009e8: 93 81 41 e2 addi gp, gp, -476 -800009ec: 17 f1 ff 7e auipc sp, 520191 -800009f0: 13 01 41 61 addi sp, sp, 1556 -800009f4: 93 05 00 40 addi a1, zero, 1024 -800009f8: 73 26 10 cc csrr a2, 3265 -800009fc: b3 85 c5 02 mul a1, a1, a2 -80000a00: 33 01 b1 40 sub sp, sp, a1 -80000a04: f3 26 30 cc csrr a3, 3267 -80000a08: 63 86 06 00 beqz a3, 12 -80000a0c: 13 05 00 00 mv a0, zero -80000a10: 6b 00 05 00 +80000a5c vx_set_sp: +80000a5c: 73 25 00 fc csrr a0, 4032 +80000a60: 6b 00 05 00 +80000a64: 97 11 00 00 auipc gp, 1 +80000a68: 93 81 41 da addi gp, gp, -604 +80000a6c: 17 f1 ff 7e auipc sp, 520191 +80000a70: 13 01 41 59 addi sp, sp, 1428 +80000a74: 93 05 00 40 addi a1, zero, 1024 +80000a78: 73 26 10 cc csrr a2, 3265 +80000a7c: b3 85 c5 02 mul a1, a1, a2 +80000a80: 33 01 b1 40 sub sp, sp, a1 +80000a84: f3 26 30 cc csrr a3, 3267 +80000a88: 63 86 06 00 beqz a3, 12 +80000a8c: 13 05 00 00 mv a0, zero +80000a90: 6b 00 05 00 -80000a14 RETURN: -80000a14: 67 80 00 00 ret - -80000a18 vx_wspawn: -80000a18: 6b 10 b5 00 -80000a1c: 67 80 00 00 ret - -80000a20 vx_tmc: -80000a20: 6b 00 05 00 -80000a24: 67 80 00 00 ret - -80000a28 vx_barrier: -80000a28: 6b 40 b5 00 -80000a2c: 67 80 00 00 ret - -80000a30 vx_split: -80000a30: 6b 20 05 00 -80000a34: 67 80 00 00 ret - -80000a38 vx_join: -80000a38: 6b 30 00 00 -80000a3c: 67 80 00 00 ret - -80000a40 vx_warp_id: -80000a40: 73 25 30 cc csrr a0, 3267 -80000a44: 67 80 00 00 ret - -80000a48 vx_warp_gid: -80000a48: 73 25 40 f1 csrr a0, mhartid -80000a4c: 67 80 00 00 ret - -80000a50 vx_thread_id: -80000a50: 73 25 00 cc csrr a0, 3264 -80000a54: 67 80 00 00 ret - -80000a58 vx_thread_lid: -80000a58: 73 25 10 cc csrr a0, 3265 -80000a5c: 67 80 00 00 ret - -80000a60 vx_thread_gid: -80000a60: 73 25 20 cc csrr a0, 3266 -80000a64: 67 80 00 00 ret - -80000a68 vx_core_id: -80000a68: 73 25 50 cc csrr a0, 3269 -80000a6c: 67 80 00 00 ret - -80000a70 vx_num_threads: -80000a70: 73 25 00 fc csrr a0, 4032 -80000a74: 67 80 00 00 ret - -80000a78 vx_num_warps: -80000a78: 73 25 10 fc csrr a0, 4033 -80000a7c: 67 80 00 00 ret - -80000a80 vx_num_cores: -80000a80: 73 25 20 fc csrr a0, 4034 -80000a84: 67 80 00 00 ret - -80000a88 vx_num_cycles: -80000a88: 73 25 00 b0 csrr a0, mcycle -80000a8c: 67 80 00 00 ret - -80000a90 vx_num_instrs: -80000a90: 73 25 20 b0 csrr a0, minstret +80000a94 RETURN: 80000a94: 67 80 00 00 ret -80000a98 atexit: -80000a98: 93 05 05 00 mv a1, a0 -80000a9c: 93 06 00 00 mv a3, zero -80000aa0: 13 06 00 00 mv a2, zero -80000aa4: 13 05 00 00 mv a0, zero -80000aa8: 6f 00 c0 20 j 524 +80000a98 vx_wspawn: +80000a98: 6b 10 b5 00 +80000a9c: 67 80 00 00 ret -80000aac exit: -80000aac: 13 01 01 ff addi sp, sp, -16 -80000ab0: 93 05 00 00 mv a1, zero -80000ab4: 23 24 81 00 sw s0, 8(sp) -80000ab8: 23 26 11 00 sw ra, 12(sp) -80000abc: 13 04 05 00 mv s0, a0 -80000ac0: ef 00 00 29 jal 656 -80000ac4: b7 17 00 80 lui a5, 524289 -80000ac8: 03 a5 07 43 lw a0, 1072(a5) -80000acc: 83 27 c5 03 lw a5, 60(a0) -80000ad0: 63 84 07 00 beqz a5, 8 -80000ad4: e7 80 07 00 jalr a5 -80000ad8: 13 05 04 00 mv a0, s0 -80000adc: ef f0 9f ef jal -264 +80000aa0 vx_tmc: +80000aa0: 6b 00 05 00 +80000aa4: 67 80 00 00 ret -80000ae0 __libc_fini_array: -80000ae0: 13 01 01 ff addi sp, sp, -16 -80000ae4: 23 24 81 00 sw s0, 8(sp) -80000ae8: b7 17 00 80 lui a5, 524289 -80000aec: 37 14 00 80 lui s0, 524289 -80000af0: 13 04 44 00 addi s0, s0, 4 -80000af4: 93 87 47 00 addi a5, a5, 4 -80000af8: b3 87 87 40 sub a5, a5, s0 -80000afc: 23 22 91 00 sw s1, 4(sp) -80000b00: 23 26 11 00 sw ra, 12(sp) -80000b04: 93 d4 27 40 srai s1, a5, 2 -80000b08: 63 80 04 02 beqz s1, 32 -80000b0c: 93 87 c7 ff addi a5, a5, -4 -80000b10: 33 84 87 00 add s0, a5, s0 -80000b14: 83 27 04 00 lw a5, 0(s0) -80000b18: 93 84 f4 ff addi s1, s1, -1 -80000b1c: 13 04 c4 ff addi s0, s0, -4 -80000b20: e7 80 07 00 jalr a5 -80000b24: e3 98 04 fe bnez s1, -16 -80000b28: 83 20 c1 00 lw ra, 12(sp) -80000b2c: 03 24 81 00 lw s0, 8(sp) -80000b30: 83 24 41 00 lw s1, 4(sp) -80000b34: 13 01 01 01 addi sp, sp, 16 -80000b38: 67 80 00 00 ret +80000aa8 vx_barrier: +80000aa8: 6b 40 b5 00 +80000aac: 67 80 00 00 ret -80000b3c __libc_init_array: -80000b3c: 13 01 01 ff addi sp, sp, -16 -80000b40: 23 24 81 00 sw s0, 8(sp) -80000b44: 23 20 21 01 sw s2, 0(sp) -80000b48: 37 14 00 80 lui s0, 524289 -80000b4c: 37 19 00 80 lui s2, 524289 -80000b50: 93 07 04 00 mv a5, s0 -80000b54: 13 09 09 00 mv s2, s2 -80000b58: 33 09 f9 40 sub s2, s2, a5 -80000b5c: 23 26 11 00 sw ra, 12(sp) -80000b60: 23 22 91 00 sw s1, 4(sp) -80000b64: 13 59 29 40 srai s2, s2, 2 -80000b68: 63 00 09 02 beqz s2, 32 -80000b6c: 13 04 04 00 mv s0, s0 -80000b70: 93 04 00 00 mv s1, zero -80000b74: 83 27 04 00 lw a5, 0(s0) -80000b78: 93 84 14 00 addi s1, s1, 1 -80000b7c: 13 04 44 00 addi s0, s0, 4 -80000b80: e7 80 07 00 jalr a5 -80000b84: e3 18 99 fe bne s2, s1, -16 -80000b88: 37 14 00 80 lui s0, 524289 -80000b8c: 37 19 00 80 lui s2, 524289 -80000b90: 93 07 04 00 mv a5, s0 -80000b94: 13 09 49 00 addi s2, s2, 4 -80000b98: 33 09 f9 40 sub s2, s2, a5 -80000b9c: 13 59 29 40 srai s2, s2, 2 -80000ba0: 63 00 09 02 beqz s2, 32 -80000ba4: 13 04 04 00 mv s0, s0 -80000ba8: 93 04 00 00 mv s1, zero -80000bac: 83 27 04 00 lw a5, 0(s0) -80000bb0: 93 84 14 00 addi s1, s1, 1 -80000bb4: 13 04 44 00 addi s0, s0, 4 -80000bb8: e7 80 07 00 jalr a5 -80000bbc: e3 18 99 fe bne s2, s1, -16 -80000bc0: 83 20 c1 00 lw ra, 12(sp) -80000bc4: 03 24 81 00 lw s0, 8(sp) -80000bc8: 83 24 41 00 lw s1, 4(sp) -80000bcc: 03 29 01 00 lw s2, 0(sp) -80000bd0: 13 01 01 01 addi sp, sp, 16 -80000bd4: 67 80 00 00 ret +80000ab0 vx_split: +80000ab0: 6b 20 05 00 +80000ab4: 67 80 00 00 ret -80000bd8 memset: -80000bd8: 13 03 f0 00 addi t1, zero, 15 -80000bdc: 13 07 05 00 mv a4, a0 -80000be0: 63 7e c3 02 bgeu t1, a2, 60 -80000be4: 93 77 f7 00 andi a5, a4, 15 -80000be8: 63 90 07 0a bnez a5, 160 -80000bec: 63 92 05 08 bnez a1, 132 -80000bf0: 93 76 06 ff andi a3, a2, -16 -80000bf4: 13 76 f6 00 andi a2, a2, 15 -80000bf8: b3 86 e6 00 add a3, a3, a4 -80000bfc: 23 20 b7 00 sw a1, 0(a4) -80000c00: 23 22 b7 00 sw a1, 4(a4) -80000c04: 23 24 b7 00 sw a1, 8(a4) -80000c08: 23 26 b7 00 sw a1, 12(a4) -80000c0c: 13 07 07 01 addi a4, a4, 16 -80000c10: e3 66 d7 fe bltu a4, a3, -20 -80000c14: 63 14 06 00 bnez a2, 8 -80000c18: 67 80 00 00 ret -80000c1c: b3 06 c3 40 sub a3, t1, a2 -80000c20: 93 96 26 00 slli a3, a3, 2 -80000c24: 97 02 00 00 auipc t0, 0 -80000c28: b3 86 56 00 add a3, a3, t0 -80000c2c: 67 80 c6 00 jr 12(a3) -80000c30: 23 07 b7 00 sb a1, 14(a4) -80000c34: a3 06 b7 00 sb a1, 13(a4) -80000c38: 23 06 b7 00 sb a1, 12(a4) -80000c3c: a3 05 b7 00 sb a1, 11(a4) -80000c40: 23 05 b7 00 sb a1, 10(a4) -80000c44: a3 04 b7 00 sb a1, 9(a4) -80000c48: 23 04 b7 00 sb a1, 8(a4) -80000c4c: a3 03 b7 00 sb a1, 7(a4) -80000c50: 23 03 b7 00 sb a1, 6(a4) -80000c54: a3 02 b7 00 sb a1, 5(a4) -80000c58: 23 02 b7 00 sb a1, 4(a4) -80000c5c: a3 01 b7 00 sb a1, 3(a4) -80000c60: 23 01 b7 00 sb a1, 2(a4) -80000c64: a3 00 b7 00 sb a1, 1(a4) -80000c68: 23 00 b7 00 sb a1, 0(a4) -80000c6c: 67 80 00 00 ret -80000c70: 93 f5 f5 0f andi a1, a1, 255 -80000c74: 93 96 85 00 slli a3, a1, 8 -80000c78: b3 e5 d5 00 or a1, a1, a3 -80000c7c: 93 96 05 01 slli a3, a1, 16 -80000c80: b3 e5 d5 00 or a1, a1, a3 -80000c84: 6f f0 df f6 j -148 -80000c88: 93 96 27 00 slli a3, a5, 2 -80000c8c: 97 02 00 00 auipc t0, 0 -80000c90: b3 86 56 00 add a3, a3, t0 -80000c94: 93 82 00 00 mv t0, ra -80000c98: e7 80 06 fa jalr -96(a3) -80000c9c: 93 80 02 00 mv ra, t0 -80000ca0: 93 87 07 ff addi a5, a5, -16 -80000ca4: 33 07 f7 40 sub a4, a4, a5 -80000ca8: 33 06 f6 00 add a2, a2, a5 -80000cac: e3 78 c3 f6 bgeu t1, a2, -144 -80000cb0: 6f f0 df f3 j -196 +80000ab8 vx_join: +80000ab8: 6b 30 00 00 +80000abc: 67 80 00 00 ret -80000cb4 __register_exitproc: -80000cb4: b7 17 00 80 lui a5, 524289 -80000cb8: 03 a7 07 43 lw a4, 1072(a5) -80000cbc: 83 27 87 14 lw a5, 328(a4) -80000cc0: 63 8c 07 04 beqz a5, 88 -80000cc4: 03 a7 47 00 lw a4, 4(a5) -80000cc8: 13 08 f0 01 addi a6, zero, 31 -80000ccc: 63 4e e8 06 blt a6, a4, 124 -80000cd0: 13 18 27 00 slli a6, a4, 2 -80000cd4: 63 06 05 02 beqz a0, 44 -80000cd8: 33 83 07 01 add t1, a5, a6 -80000cdc: 23 24 c3 08 sw a2, 136(t1) -80000ce0: 83 a8 87 18 lw a7, 392(a5) -80000ce4: 13 06 10 00 addi a2, zero, 1 -80000ce8: 33 16 e6 00 sll a2, a2, a4 -80000cec: b3 e8 c8 00 or a7, a7, a2 -80000cf0: 23 a4 17 19 sw a7, 392(a5) -80000cf4: 23 24 d3 10 sw a3, 264(t1) -80000cf8: 93 06 20 00 addi a3, zero, 2 -80000cfc: 63 04 d5 02 beq a0, a3, 40 -80000d00: 13 07 17 00 addi a4, a4, 1 -80000d04: 23 a2 e7 00 sw a4, 4(a5) -80000d08: b3 87 07 01 add a5, a5, a6 -80000d0c: 23 a4 b7 00 sw a1, 8(a5) -80000d10: 13 05 00 00 mv a0, zero -80000d14: 67 80 00 00 ret -80000d18: 93 07 c7 14 addi a5, a4, 332 -80000d1c: 23 24 f7 14 sw a5, 328(a4) -80000d20: 6f f0 5f fa j -92 -80000d24: 83 a6 c7 18 lw a3, 396(a5) -80000d28: 13 07 17 00 addi a4, a4, 1 -80000d2c: 23 a2 e7 00 sw a4, 4(a5) -80000d30: 33 e6 c6 00 or a2, a3, a2 -80000d34: 23 a6 c7 18 sw a2, 396(a5) -80000d38: b3 87 07 01 add a5, a5, a6 -80000d3c: 23 a4 b7 00 sw a1, 8(a5) -80000d40: 13 05 00 00 mv a0, zero -80000d44: 67 80 00 00 ret -80000d48: 13 05 f0 ff addi a0, zero, -1 -80000d4c: 67 80 00 00 ret +80000ac0 vx_warp_id: +80000ac0: 73 25 30 cc csrr a0, 3267 +80000ac4: 67 80 00 00 ret -80000d50 __call_exitprocs: -80000d50: 13 01 01 fd addi sp, sp, -48 -80000d54: b7 17 00 80 lui a5, 524289 -80000d58: 23 2c 41 01 sw s4, 24(sp) -80000d5c: 03 aa 07 43 lw s4, 1072(a5) -80000d60: 23 20 21 03 sw s2, 32(sp) -80000d64: 23 26 11 02 sw ra, 44(sp) -80000d68: 03 29 8a 14 lw s2, 328(s4) -80000d6c: 23 24 81 02 sw s0, 40(sp) -80000d70: 23 22 91 02 sw s1, 36(sp) -80000d74: 23 2e 31 01 sw s3, 28(sp) -80000d78: 23 2a 51 01 sw s5, 20(sp) -80000d7c: 23 28 61 01 sw s6, 16(sp) -80000d80: 23 26 71 01 sw s7, 12(sp) -80000d84: 23 24 81 01 sw s8, 8(sp) -80000d88: 63 00 09 04 beqz s2, 64 -80000d8c: 13 0b 05 00 mv s6, a0 -80000d90: 93 8b 05 00 mv s7, a1 -80000d94: 93 0a 10 00 addi s5, zero, 1 -80000d98: 93 09 f0 ff addi s3, zero, -1 -80000d9c: 83 24 49 00 lw s1, 4(s2) -80000da0: 13 84 f4 ff addi s0, s1, -1 -80000da4: 63 42 04 02 bltz s0, 36 -80000da8: 93 94 24 00 slli s1, s1, 2 -80000dac: b3 04 99 00 add s1, s2, s1 -80000db0: 63 84 0b 04 beqz s7, 72 -80000db4: 83 a7 44 10 lw a5, 260(s1) -80000db8: 63 80 77 05 beq a5, s7, 64 -80000dbc: 13 04 f4 ff addi s0, s0, -1 -80000dc0: 93 84 c4 ff addi s1, s1, -4 -80000dc4: e3 16 34 ff bne s0, s3, -20 -80000dc8: 83 20 c1 02 lw ra, 44(sp) -80000dcc: 03 24 81 02 lw s0, 40(sp) -80000dd0: 83 24 41 02 lw s1, 36(sp) -80000dd4: 03 29 01 02 lw s2, 32(sp) -80000dd8: 83 29 c1 01 lw s3, 28(sp) -80000ddc: 03 2a 81 01 lw s4, 24(sp) -80000de0: 83 2a 41 01 lw s5, 20(sp) -80000de4: 03 2b 01 01 lw s6, 16(sp) -80000de8: 83 2b c1 00 lw s7, 12(sp) -80000dec: 03 2c 81 00 lw s8, 8(sp) -80000df0: 13 01 01 03 addi sp, sp, 48 -80000df4: 67 80 00 00 ret -80000df8: 83 27 49 00 lw a5, 4(s2) -80000dfc: 83 a6 44 00 lw a3, 4(s1) -80000e00: 93 87 f7 ff addi a5, a5, -1 -80000e04: 63 8e 87 04 beq a5, s0, 92 -80000e08: 23 a2 04 00 sw zero, 4(s1) -80000e0c: e3 88 06 fa beqz a3, -80 -80000e10: 83 27 89 18 lw a5, 392(s2) -80000e14: 33 97 8a 00 sll a4, s5, s0 -80000e18: 03 2c 49 00 lw s8, 4(s2) -80000e1c: b3 77 f7 00 and a5, a4, a5 -80000e20: 63 92 07 02 bnez a5, 36 -80000e24: e7 80 06 00 jalr a3 -80000e28: 03 27 49 00 lw a4, 4(s2) -80000e2c: 83 27 8a 14 lw a5, 328(s4) -80000e30: 63 14 87 01 bne a4, s8, 8 -80000e34: e3 04 f9 f8 beq s2, a5, -120 -80000e38: e3 88 07 f8 beqz a5, -112 -80000e3c: 13 89 07 00 mv s2, a5 -80000e40: 6f f0 df f5 j -164 -80000e44: 83 27 c9 18 lw a5, 396(s2) -80000e48: 83 a5 44 08 lw a1, 132(s1) -80000e4c: 33 77 f7 00 and a4, a4, a5 -80000e50: 63 1c 07 00 bnez a4, 24 -80000e54: 13 05 0b 00 mv a0, s6 -80000e58: e7 80 06 00 jalr a3 -80000e5c: 6f f0 df fc j -52 -80000e60: 23 22 89 00 sw s0, 4(s2) -80000e64: 6f f0 9f fa j -88 -80000e68: 13 85 05 00 mv a0, a1 -80000e6c: e7 80 06 00 jalr a3 -80000e70: 6f f0 9f fb j -72 +80000ac8 vx_warp_gid: +80000ac8: 73 25 40 f1 csrr a0, mhartid +80000acc: 67 80 00 00 ret + +80000ad0 vx_thread_id: +80000ad0: 73 25 00 cc csrr a0, 3264 +80000ad4: 67 80 00 00 ret + +80000ad8 vx_thread_lid: +80000ad8: 73 25 10 cc csrr a0, 3265 +80000adc: 67 80 00 00 ret + +80000ae0 vx_thread_gid: +80000ae0: 73 25 20 cc csrr a0, 3266 +80000ae4: 67 80 00 00 ret + +80000ae8 vx_core_id: +80000ae8: 73 25 50 cc csrr a0, 3269 +80000aec: 67 80 00 00 ret + +80000af0 vx_num_threads: +80000af0: 73 25 00 fc csrr a0, 4032 +80000af4: 67 80 00 00 ret + +80000af8 vx_num_warps: +80000af8: 73 25 10 fc csrr a0, 4033 +80000afc: 67 80 00 00 ret + +80000b00 vx_num_cores: +80000b00: 73 25 20 fc csrr a0, 4034 +80000b04: 67 80 00 00 ret + +80000b08 vx_num_cycles: +80000b08: 73 25 00 b0 csrr a0, mcycle +80000b0c: 67 80 00 00 ret + +80000b10 vx_num_instrs: +80000b10: 73 25 20 b0 csrr a0, minstret +80000b14: 67 80 00 00 ret + +80000b18 atexit: +80000b18: 93 05 05 00 mv a1, a0 +80000b1c: 93 06 00 00 mv a3, zero +80000b20: 13 06 00 00 mv a2, zero +80000b24: 13 05 00 00 mv a0, zero +80000b28: 6f 00 c0 20 j 524 + +80000b2c exit: +80000b2c: 13 01 01 ff addi sp, sp, -16 +80000b30: 93 05 00 00 mv a1, zero +80000b34: 23 24 81 00 sw s0, 8(sp) +80000b38: 23 26 11 00 sw ra, 12(sp) +80000b3c: 13 04 05 00 mv s0, a0 +80000b40: ef 00 00 29 jal 656 +80000b44: b7 17 00 80 lui a5, 524289 +80000b48: 03 a5 07 43 lw a0, 1072(a5) +80000b4c: 83 27 c5 03 lw a5, 60(a0) +80000b50: 63 84 07 00 beqz a5, 8 +80000b54: e7 80 07 00 jalr a5 +80000b58: 13 05 04 00 mv a0, s0 +80000b5c: ef f0 9f ef jal -264 + +80000b60 __libc_fini_array: +80000b60: 13 01 01 ff addi sp, sp, -16 +80000b64: 23 24 81 00 sw s0, 8(sp) +80000b68: b7 17 00 80 lui a5, 524289 +80000b6c: 37 14 00 80 lui s0, 524289 +80000b70: 13 04 44 00 addi s0, s0, 4 +80000b74: 93 87 47 00 addi a5, a5, 4 +80000b78: b3 87 87 40 sub a5, a5, s0 +80000b7c: 23 22 91 00 sw s1, 4(sp) +80000b80: 23 26 11 00 sw ra, 12(sp) +80000b84: 93 d4 27 40 srai s1, a5, 2 +80000b88: 63 80 04 02 beqz s1, 32 +80000b8c: 93 87 c7 ff addi a5, a5, -4 +80000b90: 33 84 87 00 add s0, a5, s0 +80000b94: 83 27 04 00 lw a5, 0(s0) +80000b98: 93 84 f4 ff addi s1, s1, -1 +80000b9c: 13 04 c4 ff addi s0, s0, -4 +80000ba0: e7 80 07 00 jalr a5 +80000ba4: e3 98 04 fe bnez s1, -16 +80000ba8: 83 20 c1 00 lw ra, 12(sp) +80000bac: 03 24 81 00 lw s0, 8(sp) +80000bb0: 83 24 41 00 lw s1, 4(sp) +80000bb4: 13 01 01 01 addi sp, sp, 16 +80000bb8: 67 80 00 00 ret + +80000bbc __libc_init_array: +80000bbc: 13 01 01 ff addi sp, sp, -16 +80000bc0: 23 24 81 00 sw s0, 8(sp) +80000bc4: 23 20 21 01 sw s2, 0(sp) +80000bc8: 37 14 00 80 lui s0, 524289 +80000bcc: 37 19 00 80 lui s2, 524289 +80000bd0: 93 07 04 00 mv a5, s0 +80000bd4: 13 09 09 00 mv s2, s2 +80000bd8: 33 09 f9 40 sub s2, s2, a5 +80000bdc: 23 26 11 00 sw ra, 12(sp) +80000be0: 23 22 91 00 sw s1, 4(sp) +80000be4: 13 59 29 40 srai s2, s2, 2 +80000be8: 63 00 09 02 beqz s2, 32 +80000bec: 13 04 04 00 mv s0, s0 +80000bf0: 93 04 00 00 mv s1, zero +80000bf4: 83 27 04 00 lw a5, 0(s0) +80000bf8: 93 84 14 00 addi s1, s1, 1 +80000bfc: 13 04 44 00 addi s0, s0, 4 +80000c00: e7 80 07 00 jalr a5 +80000c04: e3 18 99 fe bne s2, s1, -16 +80000c08: 37 14 00 80 lui s0, 524289 +80000c0c: 37 19 00 80 lui s2, 524289 +80000c10: 93 07 04 00 mv a5, s0 +80000c14: 13 09 49 00 addi s2, s2, 4 +80000c18: 33 09 f9 40 sub s2, s2, a5 +80000c1c: 13 59 29 40 srai s2, s2, 2 +80000c20: 63 00 09 02 beqz s2, 32 +80000c24: 13 04 04 00 mv s0, s0 +80000c28: 93 04 00 00 mv s1, zero +80000c2c: 83 27 04 00 lw a5, 0(s0) +80000c30: 93 84 14 00 addi s1, s1, 1 +80000c34: 13 04 44 00 addi s0, s0, 4 +80000c38: e7 80 07 00 jalr a5 +80000c3c: e3 18 99 fe bne s2, s1, -16 +80000c40: 83 20 c1 00 lw ra, 12(sp) +80000c44: 03 24 81 00 lw s0, 8(sp) +80000c48: 83 24 41 00 lw s1, 4(sp) +80000c4c: 03 29 01 00 lw s2, 0(sp) +80000c50: 13 01 01 01 addi sp, sp, 16 +80000c54: 67 80 00 00 ret + +80000c58 memset: +80000c58: 13 03 f0 00 addi t1, zero, 15 +80000c5c: 13 07 05 00 mv a4, a0 +80000c60: 63 7e c3 02 bgeu t1, a2, 60 +80000c64: 93 77 f7 00 andi a5, a4, 15 +80000c68: 63 90 07 0a bnez a5, 160 +80000c6c: 63 92 05 08 bnez a1, 132 +80000c70: 93 76 06 ff andi a3, a2, -16 +80000c74: 13 76 f6 00 andi a2, a2, 15 +80000c78: b3 86 e6 00 add a3, a3, a4 +80000c7c: 23 20 b7 00 sw a1, 0(a4) +80000c80: 23 22 b7 00 sw a1, 4(a4) +80000c84: 23 24 b7 00 sw a1, 8(a4) +80000c88: 23 26 b7 00 sw a1, 12(a4) +80000c8c: 13 07 07 01 addi a4, a4, 16 +80000c90: e3 66 d7 fe bltu a4, a3, -20 +80000c94: 63 14 06 00 bnez a2, 8 +80000c98: 67 80 00 00 ret +80000c9c: b3 06 c3 40 sub a3, t1, a2 +80000ca0: 93 96 26 00 slli a3, a3, 2 +80000ca4: 97 02 00 00 auipc t0, 0 +80000ca8: b3 86 56 00 add a3, a3, t0 +80000cac: 67 80 c6 00 jr 12(a3) +80000cb0: 23 07 b7 00 sb a1, 14(a4) +80000cb4: a3 06 b7 00 sb a1, 13(a4) +80000cb8: 23 06 b7 00 sb a1, 12(a4) +80000cbc: a3 05 b7 00 sb a1, 11(a4) +80000cc0: 23 05 b7 00 sb a1, 10(a4) +80000cc4: a3 04 b7 00 sb a1, 9(a4) +80000cc8: 23 04 b7 00 sb a1, 8(a4) +80000ccc: a3 03 b7 00 sb a1, 7(a4) +80000cd0: 23 03 b7 00 sb a1, 6(a4) +80000cd4: a3 02 b7 00 sb a1, 5(a4) +80000cd8: 23 02 b7 00 sb a1, 4(a4) +80000cdc: a3 01 b7 00 sb a1, 3(a4) +80000ce0: 23 01 b7 00 sb a1, 2(a4) +80000ce4: a3 00 b7 00 sb a1, 1(a4) +80000ce8: 23 00 b7 00 sb a1, 0(a4) +80000cec: 67 80 00 00 ret +80000cf0: 93 f5 f5 0f andi a1, a1, 255 +80000cf4: 93 96 85 00 slli a3, a1, 8 +80000cf8: b3 e5 d5 00 or a1, a1, a3 +80000cfc: 93 96 05 01 slli a3, a1, 16 +80000d00: b3 e5 d5 00 or a1, a1, a3 +80000d04: 6f f0 df f6 j -148 +80000d08: 93 96 27 00 slli a3, a5, 2 +80000d0c: 97 02 00 00 auipc t0, 0 +80000d10: b3 86 56 00 add a3, a3, t0 +80000d14: 93 82 00 00 mv t0, ra +80000d18: e7 80 06 fa jalr -96(a3) +80000d1c: 93 80 02 00 mv ra, t0 +80000d20: 93 87 07 ff addi a5, a5, -16 +80000d24: 33 07 f7 40 sub a4, a4, a5 +80000d28: 33 06 f6 00 add a2, a2, a5 +80000d2c: e3 78 c3 f6 bgeu t1, a2, -144 +80000d30: 6f f0 df f3 j -196 + +80000d34 __register_exitproc: +80000d34: b7 17 00 80 lui a5, 524289 +80000d38: 03 a7 07 43 lw a4, 1072(a5) +80000d3c: 83 27 87 14 lw a5, 328(a4) +80000d40: 63 8c 07 04 beqz a5, 88 +80000d44: 03 a7 47 00 lw a4, 4(a5) +80000d48: 13 08 f0 01 addi a6, zero, 31 +80000d4c: 63 4e e8 06 blt a6, a4, 124 +80000d50: 13 18 27 00 slli a6, a4, 2 +80000d54: 63 06 05 02 beqz a0, 44 +80000d58: 33 83 07 01 add t1, a5, a6 +80000d5c: 23 24 c3 08 sw a2, 136(t1) +80000d60: 83 a8 87 18 lw a7, 392(a5) +80000d64: 13 06 10 00 addi a2, zero, 1 +80000d68: 33 16 e6 00 sll a2, a2, a4 +80000d6c: b3 e8 c8 00 or a7, a7, a2 +80000d70: 23 a4 17 19 sw a7, 392(a5) +80000d74: 23 24 d3 10 sw a3, 264(t1) +80000d78: 93 06 20 00 addi a3, zero, 2 +80000d7c: 63 04 d5 02 beq a0, a3, 40 +80000d80: 13 07 17 00 addi a4, a4, 1 +80000d84: 23 a2 e7 00 sw a4, 4(a5) +80000d88: b3 87 07 01 add a5, a5, a6 +80000d8c: 23 a4 b7 00 sw a1, 8(a5) +80000d90: 13 05 00 00 mv a0, zero +80000d94: 67 80 00 00 ret +80000d98: 93 07 c7 14 addi a5, a4, 332 +80000d9c: 23 24 f7 14 sw a5, 328(a4) +80000da0: 6f f0 5f fa j -92 +80000da4: 83 a6 c7 18 lw a3, 396(a5) +80000da8: 13 07 17 00 addi a4, a4, 1 +80000dac: 23 a2 e7 00 sw a4, 4(a5) +80000db0: 33 e6 c6 00 or a2, a3, a2 +80000db4: 23 a6 c7 18 sw a2, 396(a5) +80000db8: b3 87 07 01 add a5, a5, a6 +80000dbc: 23 a4 b7 00 sw a1, 8(a5) +80000dc0: 13 05 00 00 mv a0, zero +80000dc4: 67 80 00 00 ret +80000dc8: 13 05 f0 ff addi a0, zero, -1 +80000dcc: 67 80 00 00 ret + +80000dd0 __call_exitprocs: +80000dd0: 13 01 01 fd addi sp, sp, -48 +80000dd4: b7 17 00 80 lui a5, 524289 +80000dd8: 23 2c 41 01 sw s4, 24(sp) +80000ddc: 03 aa 07 43 lw s4, 1072(a5) +80000de0: 23 20 21 03 sw s2, 32(sp) +80000de4: 23 26 11 02 sw ra, 44(sp) +80000de8: 03 29 8a 14 lw s2, 328(s4) +80000dec: 23 24 81 02 sw s0, 40(sp) +80000df0: 23 22 91 02 sw s1, 36(sp) +80000df4: 23 2e 31 01 sw s3, 28(sp) +80000df8: 23 2a 51 01 sw s5, 20(sp) +80000dfc: 23 28 61 01 sw s6, 16(sp) +80000e00: 23 26 71 01 sw s7, 12(sp) +80000e04: 23 24 81 01 sw s8, 8(sp) +80000e08: 63 00 09 04 beqz s2, 64 +80000e0c: 13 0b 05 00 mv s6, a0 +80000e10: 93 8b 05 00 mv s7, a1 +80000e14: 93 0a 10 00 addi s5, zero, 1 +80000e18: 93 09 f0 ff addi s3, zero, -1 +80000e1c: 83 24 49 00 lw s1, 4(s2) +80000e20: 13 84 f4 ff addi s0, s1, -1 +80000e24: 63 42 04 02 bltz s0, 36 +80000e28: 93 94 24 00 slli s1, s1, 2 +80000e2c: b3 04 99 00 add s1, s2, s1 +80000e30: 63 84 0b 04 beqz s7, 72 +80000e34: 83 a7 44 10 lw a5, 260(s1) +80000e38: 63 80 77 05 beq a5, s7, 64 +80000e3c: 13 04 f4 ff addi s0, s0, -1 +80000e40: 93 84 c4 ff addi s1, s1, -4 +80000e44: e3 16 34 ff bne s0, s3, -20 +80000e48: 83 20 c1 02 lw ra, 44(sp) +80000e4c: 03 24 81 02 lw s0, 40(sp) +80000e50: 83 24 41 02 lw s1, 36(sp) +80000e54: 03 29 01 02 lw s2, 32(sp) +80000e58: 83 29 c1 01 lw s3, 28(sp) +80000e5c: 03 2a 81 01 lw s4, 24(sp) +80000e60: 83 2a 41 01 lw s5, 20(sp) +80000e64: 03 2b 01 01 lw s6, 16(sp) +80000e68: 83 2b c1 00 lw s7, 12(sp) +80000e6c: 03 2c 81 00 lw s8, 8(sp) +80000e70: 13 01 01 03 addi sp, sp, 48 +80000e74: 67 80 00 00 ret +80000e78: 83 27 49 00 lw a5, 4(s2) +80000e7c: 83 a6 44 00 lw a3, 4(s1) +80000e80: 93 87 f7 ff addi a5, a5, -1 +80000e84: 63 8e 87 04 beq a5, s0, 92 +80000e88: 23 a2 04 00 sw zero, 4(s1) +80000e8c: e3 88 06 fa beqz a3, -80 +80000e90: 83 27 89 18 lw a5, 392(s2) +80000e94: 33 97 8a 00 sll a4, s5, s0 +80000e98: 03 2c 49 00 lw s8, 4(s2) +80000e9c: b3 77 f7 00 and a5, a4, a5 +80000ea0: 63 92 07 02 bnez a5, 36 +80000ea4: e7 80 06 00 jalr a3 +80000ea8: 03 27 49 00 lw a4, 4(s2) +80000eac: 83 27 8a 14 lw a5, 328(s4) +80000eb0: 63 14 87 01 bne a4, s8, 8 +80000eb4: e3 04 f9 f8 beq s2, a5, -120 +80000eb8: e3 88 07 f8 beqz a5, -112 +80000ebc: 13 89 07 00 mv s2, a5 +80000ec0: 6f f0 df f5 j -164 +80000ec4: 83 27 c9 18 lw a5, 396(s2) +80000ec8: 83 a5 44 08 lw a1, 132(s1) +80000ecc: 33 77 f7 00 and a4, a4, a5 +80000ed0: 63 1c 07 00 bnez a4, 24 +80000ed4: 13 05 0b 00 mv a0, s6 +80000ed8: e7 80 06 00 jalr a3 +80000edc: 6f f0 df fc j -52 +80000ee0: 23 22 89 00 sw s0, 4(s2) +80000ee4: 6f f0 9f fa j -88 +80000ee8: 13 85 05 00 mv a0, a1 +80000eec: e7 80 06 00 jalr a3 +80000ef0: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1173,7 +1205,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: 14 0a + a4: 94 0a a6: 00 80 a8: 00 00 aa: 00 00 @@ -1283,7 +1315,7 @@ Disassembly of section .symtab: 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 68 0a + 1d4: e8 0a 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1296,7 +1328,7 @@ Disassembly of section .symtab: 1ee: f1 ff 1f0: 1c 01 1f2: 00 00 - 1f4: 18 0a + 1f4: 98 0a 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1304,43 +1336,43 @@ Disassembly of section .symtab: 1fe: 02 00 200: 26 01 202: 00 00 - 204: 68 00 - 206: 00 80 - 208: 4c 01 + 204: 00 04 + 206: 00 00 + 208: 00 00 20a: 00 00 - 20c: 12 00 - 20e: 02 00 - 210: 3c 01 - 212: 00 00 - 214: 00 04 - 216: 00 00 - 218: 00 00 + 20c: 10 00 + 20e: f1 ff + 210: 33 01 00 00 add sp, zero, zero + 214: 34 14 + 216: 00 80 + 218: 40 00 21a: 00 00 - 21c: 10 00 - 21e: f1 ff - 220: 49 01 + 21c: 11 00 + 21e: 06 00 + 220: 41 01 222: 00 00 - 224: 34 14 + 224: a0 0a 226: 00 80 - 228: 20 00 + 228: 00 00 22a: 00 00 - 22c: 11 00 - 22e: 06 00 - 230: 57 01 00 00 - 234: 20 0a + 22c: 12 00 + 22e: 02 00 + 230: 48 01 + 232: 00 00 + 234: 30 14 236: 00 80 238: 00 00 23a: 00 00 - 23c: 12 00 - 23e: 02 00 - 240: 5e 01 + 23c: 10 00 + 23e: 05 00 + 240: 58 01 242: 00 00 - 244: 30 14 + 244: 68 00 246: 00 80 - 248: 00 00 + 248: 48 01 24a: 00 00 - 24c: 10 00 - 24e: 05 00 + 24c: 12 00 + 24e: 02 00 250: 6e 01 252: 00 00 254: 08 18 @@ -1350,14 +1382,14 @@ Disassembly of section .symtab: 25c: 10 00 25e: f1 ff 260: 7f 01 00 00 - 264: 38 0a + 264: b8 0a 266: 00 80 268: 00 00 26a: 00 00 26c: 12 00 26e: 02 00 270: 87 01 00 00 - 274: e8 07 + 274: 68 08 276: 00 80 278: ec 01 27a: 00 00 @@ -1365,7 +1397,7 @@ Disassembly of section .symtab: 27e: 02 00 280: a8 01 282: 00 00 - 284: 78 0a + 284: f8 0a 286: 00 80 288: 00 00 28a: 00 00 @@ -1373,15 +1405,15 @@ Disassembly of section .symtab: 28e: 02 00 290: b5 01 292: 00 00 - 294: 4c 02 + 294: 48 02 296: 00 80 - 298: 88 01 + 298: 0c 02 29a: 00 00 29c: 12 00 29e: 02 00 2a0: c2 01 2a2: 00 00 - 2a4: 30 0a + 2a4: b0 0a 2a6: 00 80 2a8: 00 00 2aa: 00 00 @@ -1396,7 +1428,7 @@ Disassembly of section .symtab: 2be: 05 00 2c0: de 01 2c2: 00 00 - 2c4: 3c 0b + 2c4: bc 0b 2c6: 00 80 2c8: 9c 00 2ca: 00 00 @@ -1404,14 +1436,14 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: f0 01 2d2: 00 00 - 2d4: 70 0a + 2d4: f0 0a 2d6: 00 80 2d8: 00 00 2da: 00 00 2dc: 12 00 2de: 02 00 2e0: ff 01 00 00 - 2e4: 40 0a + 2e4: c0 0a 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1419,14 +1451,14 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: 0a 02 2f2: 00 00 - 2f4: 50 0a + 2f4: d0 0a 2f6: 00 80 2f8: 00 00 2fa: 00 00 2fc: 12 00 2fe: 02 00 300: 17 02 00 00 auipc tp, 0 - 304: e0 0a + 304: 60 0b 306: 00 80 308: 5c 00 30a: 00 00 @@ -1442,21 +1474,21 @@ Disassembly of section .symtab: 31e: f1 ff 320: 35 02 322: 00 00 - 324: dc 09 + 324: 5c 0a 326: 00 80 328: 00 00 32a: 00 00 32c: 12 00 32e: 02 00 330: 3f 02 00 00 - 334: f0 05 + 334: 70 06 336: 00 80 338: f8 01 33a: 00 00 33c: 12 00 33e: 02 00 340: 5b 02 00 00 - 344: 28 0a + 344: a8 0a 346: 00 80 348: 00 00 34a: 00 00 @@ -1464,7 +1496,7 @@ Disassembly of section .symtab: 34e: 02 00 350: 66 02 352: 00 00 - 354: 50 0d + 354: d0 0d 356: 00 80 358: 24 01 35a: 00 00 @@ -1479,14 +1511,14 @@ Disassembly of section .symtab: 36c: 12 00 36e: 01 00 370: 77 02 00 00 - 374: b4 0c + 374: 34 0d 376: 00 80 378: 9c 00 37a: 00 00 37c: 12 00 37e: 02 00 380: 8b 02 00 00 - 384: 54 14 + 384: 74 14 386: 00 80 388: 00 00 38a: 00 00 @@ -1500,7 +1532,7 @@ Disassembly of section .symtab: 39c: 10 00 39e: 06 00 3a0: a3 02 00 00 sb zero, 5(zero) - 3a4: d8 0b + 3a4: 58 0c 3a6: 00 80 3a8: dc 00 3aa: 00 00 @@ -1508,14 +1540,14 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: aa 02 3b2: 00 00 - 3b4: d4 03 + 3b4: 54 04 3b6: 00 80 3b8: 30 00 3ba: 00 00 3bc: 12 00 3be: 02 00 3c0: af 02 00 00 - 3c4: 88 0a + 3c4: 08 0b 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1523,7 +1555,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: bd 02 3d2: 00 00 - 3d4: 98 0a + 3d4: 18 0b 3d6: 00 80 3d8: 14 00 3da: 00 00 @@ -1531,7 +1563,7 @@ Disassembly of section .symtab: 3de: 02 00 3e0: c4 02 3e2: 00 00 - 3e4: 60 0a + 3e4: e0 0a 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1539,84 +1571,83 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: d2 02 3f2: 00 00 - 3f4: 80 0a + 3f4: 00 0b 3f6: 00 80 3f8: 00 00 3fa: 00 00 3fc: 12 00 3fe: 02 00 400: df 02 00 00 - 404: 48 0a + 404: c8 0a 406: 00 80 408: 00 00 40a: 00 00 40c: 12 00 40e: 02 00 410: eb 02 00 00 - 414: b4 01 + 414: 08 10 416: 00 80 - 418: 98 00 + 418: 00 00 41a: 00 00 - 41c: 12 00 - 41e: 02 00 - 420: 04 03 + 41c: 10 00 + 41e: 04 00 + 420: fa 02 422: 00 00 - 424: 08 10 + 424: 34 14 426: 00 80 428: 00 00 42a: 00 00 42c: 10 00 - 42e: 04 00 - 430: 13 03 00 00 mv t1, zero - 434: 34 14 + 42e: 05 00 + 430: 9d 00 + 432: 00 00 + 434: 74 14 436: 00 80 438: 00 00 43a: 00 00 43c: 10 00 - 43e: 05 00 - 440: 9d 00 + 43e: 06 00 + 440: 30 03 442: 00 00 - 444: 54 14 + 444: 2c 0b 446: 00 80 - 448: 00 00 + 448: 34 00 44a: 00 00 - 44c: 10 00 - 44e: 06 00 - 450: 29 03 + 44c: 12 00 + 44e: 02 00 + 450: 01 03 452: 00 00 - 454: ac 0a + 454: b0 01 456: 00 80 - 458: 34 00 + 458: 98 00 45a: 00 00 45c: 12 00 45e: 02 00 - 460: 1a 03 + 460: 21 03 462: 00 00 - 464: 58 0a + 464: d8 0a 466: 00 80 468: 00 00 46a: 00 00 46c: 12 00 46e: 02 00 - 470: 28 03 - 472: 00 00 - 474: d4 09 + 470: 2f 03 00 00 + 474: 54 0a 476: 00 80 478: 00 00 47a: 00 00 47c: 12 00 47e: 02 00 - 480: 2e 03 + 480: 35 03 482: 00 00 - 484: 04 04 + 484: 84 04 486: 00 80 488: ec 01 48a: 00 00 48c: 12 00 48e: 02 00 - 490: 40 03 - 492: 00 00 - 494: 90 0a + 490: 47 03 00 00 fmsub.s ft6, ft0, ft0, ft0, rne + 494: 10 0b 496: 00 80 498: 00 00 49a: 00 00 @@ -1656,12 +1687,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 36 64 - 48: 2d 36 - 4a: 39 2d - 4c: 37 35 2d 35 lui a0, 217811 - 50: 32 2d - 52: 38 31 + 46: 39 34 + 48: 2d 62 + 4a: 61 2d + 4c: 36 30 + 4e: 2d 35 + 50: 65 2d + 52: 31 38 54: 2e 63 56: 00 70 58: 61 72 @@ -1747,31 +1779,32 @@ Disassembly of section .strtab: 11e: 5f 77 73 70 122: 61 77 124: 6e 00 - 126: 6b 65 72 6e - 12a: 65 6c - 12c: 5f 73 70 61 - 130: 77 6e 5f 72 - 134: 75 6e - 136: 5f 77 61 72 - 13a: 70 00 - 13c: 5f 5f 73 74 - 140: 61 63 - 142: 6b 5f 73 69 - 146: 7a 65 - 148: 00 67 - 14a: 5f 77 73 70 - 14e: 61 77 - 150: 6e 5f - 152: 61 72 - 154: 67 73 00 76 - 158: 78 5f - 15a: 74 6d - 15c: 63 00 5f 5f beq t5, s5, 1504 - 160: 53 44 41 54 - 164: 41 5f - 166: 42 45 - 168: 47 49 4e 5f - 16c: 5f 00 5f 5f + 126: 5f 5f 73 74 + 12a: 61 63 + 12c: 6b 5f 73 69 + 130: 7a 65 + 132: 00 67 + 134: 5f 77 73 70 + 138: 61 77 + 13a: 6e 5f + 13c: 61 72 + 13e: 67 73 00 76 + 142: 78 5f + 144: 74 6d + 146: 63 00 5f 5f beq t5, s5, 1504 + 14a: 53 44 41 54 + 14e: 41 5f + 150: 42 45 + 152: 47 49 4e 5f + 156: 5f 00 6b 65 + 15a: 72 6e + 15c: 65 6c + 15e: 5f 73 70 61 + 162: 77 6e 5f 63 + 166: 61 6c + 168: 6c 62 + 16a: 61 63 + 16c: 6b 00 5f 5f 170: 67 6c 6f 62 174: 61 6c 176: 5f 70 6f 69 @@ -1913,46 +1946,51 @@ Disassembly of section .strtab: 2e0: 78 5f 2e2: 77 61 72 70 2e6: 5f 67 69 64 - 2ea: 00 6b - 2ec: 65 72 - 2ee: 6e 65 - 2f0: 6c 5f - 2f2: 73 70 61 77 csrci 1910, 2 - 2f6: 6e 5f - 2f8: 72 75 - 2fa: 6e 5f - 2fc: 74 68 - 2fe: 72 65 - 300: 61 64 - 302: 73 00 5f 5f - 306: 44 41 - 308: 54 41 - 30a: 5f 42 45 47 - 30e: 49 4e - 310: 5f 5f 00 5f - 314: 65 64 - 316: 61 74 - 318: 61 00 - 31a: 76 78 - 31c: 5f 74 68 72 - 320: 65 61 - 322: 64 5f - 324: 6c 69 - 326: 64 00 - 328: 5f 65 78 69 - 32c: 74 00 - 32e: 5f 70 6f 63 - 332: 6c 5f - 334: 6b 65 72 6e - 338: 65 6c - 33a: 5f 46 61 6e - 33e: 32 00 - 340: 76 78 - 342: 5f 6e 75 6d - 346: 5f 69 6e 73 - 34a: 74 72 - 34c: 73 - 34d: 00 + 2ea: 00 5f + 2ec: 5f 44 41 54 + 2f0: 41 5f + 2f2: 42 45 + 2f4: 47 49 4e 5f + 2f8: 5f 00 5f 65 + 2fc: 64 61 + 2fe: 74 61 + 300: 00 6b + 302: 65 72 + 304: 6e 65 + 306: 6c 5f + 308: 73 70 61 77 csrci 1910, 2 + 30c: 6e 5f + 30e: 72 65 + 310: 6d 61 + 312: 69 6e + 314: 69 6e + 316: 67 5f 63 61 + 31a: 6c 6c + 31c: 62 61 + 31e: 63 6b 00 76 bltu zero, zero, 1910 + 322: 78 5f + 324: 74 68 + 326: 72 65 + 328: 61 64 + 32a: 5f 6c 69 64 + 32e: 00 5f + 330: 65 78 + 332: 69 74 + 334: 00 5f + 336: 70 6f + 338: 63 6c 5f 6b bltu t5, s5, 1720 + 33c: 65 72 + 33e: 6e 65 + 340: 6c 5f + 342: 46 61 + 344: 6e 32 + 346: 00 76 + 348: 78 5f + 34a: 6e 75 + 34c: 6d 5f + 34e: 69 6e + 350: 73 74 72 73 csrrci s0, 1847, 4 + 354: 00 Disassembly of section .shstrtab: diff --git a/benchmarks/opencl/guassian/kernel.pocl b/benchmarks/opencl/guassian/kernel.pocl index 229ae02d..799118d5 100644 Binary files a/benchmarks/opencl/guassian/kernel.pocl and b/benchmarks/opencl/guassian/kernel.pocl differ diff --git a/benchmarks/opencl/nearn/NearestNeighbor.dump b/benchmarks/opencl/nearn/NearestNeighbor.dump index a3fb938c..48e83fef 100644 --- a/benchmarks/opencl/nearn/NearestNeighbor.dump +++ b/benchmarks/opencl/nearn/NearestNeighbor.dump @@ -1,30 +1,30 @@ -/tmp/pocl_vortex_kernel-ee-0e-f1-33-1d.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-1e-61-ae-14-72.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 45 80 addi a1, a1, -2044 +80000004: 93 85 45 88 addi a1, a1, -1916 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 40 7f jal 2036 +80000010: ef 00 50 07 jal 2164 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 17 15 00 00 auipc a0, 1 80000020: 13 05 c5 41 addi a0, a0, 1052 80000024: 17 16 00 00 auipc a2, 1 -80000028: 13 06 86 43 addi a2, a2, 1080 +80000028: 13 06 86 45 addi a2, a2, 1112 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 90 35 jal 2904 +80000034: ef 00 90 3d jal 3032 80000038: 17 15 00 00 auipc a0, 1 -8000003c: 13 05 c5 a5 addi a0, a0, -1444 -80000040: ef 00 10 20 jal 2560 -80000044: ef 00 d0 2a jal 2732 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 50 21 j 2580 +8000003c: 13 05 c5 ad addi a0, a0, -1316 +80000040: ef 00 10 28 jal 2688 +80000044: ef 00 d0 32 jal 2860 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 50 29 j 2708 Disassembly of section .text: @@ -32,11 +32,11 @@ Disassembly of section .text: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 80000058: 37 15 00 80 lui a0, 524289 -8000005c: 13 05 45 a9 addi a0, a0, -1388 -80000060: 6f 00 10 1e j 2528 +8000005c: 13 05 45 b1 addi a0, a0, -1260 +80000060: 6f 00 10 26 j 2656 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,188 +48,188 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 50 00 jal 2052 -80000098: ef 00 00 7b jal 1968 -8000009c: ef 00 40 7f jal 2036 +80000094: ef 00 50 08 jal 2180 +80000098: ef 00 10 03 jal 2096 +8000009c: ef 00 50 07 jal 2164 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 40 7c jal 1988 +800000a4: ef 00 50 04 jal 2116 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 c0 7c jal 1996 +800000ac: ef 00 d0 04 jal 2124 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 40 7e jal 2020 +800000b4: ef 00 50 06 jal 2148 800000b8: b7 15 00 80 lui a1, 524289 800000bc: 93 85 c5 43 addi a1, a1, 1084 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 80 69 j 1688 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 c0 71 j 1820 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 80 68 jal 1672 -800001c4: ef 00 c0 6c jal 1740 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 c0 6b jal 1724 -800001d0: b7 15 00 80 lui a1, 524289 -800001d4: 93 85 c5 43 addi a1, a1, 1084 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 00 60 j 1536 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 c0 70 jal 1804 +800001c0: ef 00 00 75 jal 1872 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 00 74 jal 1856 +800001cc: b7 15 00 80 lui a1, 524289 +800001d0: 93 85 c5 43 addi a1, a1, 1084 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 40 68 j 1668 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 80 61 jal 1560 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 80 60 jal 1544 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 80 5f jal 1528 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 80 5e jal 1512 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 c0 69 jal 1692 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 c0 68 jal 1676 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 c0 67 jal 1660 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 c0 66 jal 1644 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) 8000035c: b7 15 00 80 lui a1, 524289 80000360: 93 85 c5 43 addi a1, a1, 1084 80000364: 13 15 25 00 slli a0, a0, 2 @@ -243,752 +243,784 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 00 4b jal 1200 +80000390: ef 00 00 53 jal 1328 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 40 52 jal 1316 +800003a8: ef 00 80 56 jal 1384 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 80 55 jal 1368 +800003b4: b7 15 00 80 lui a1, 524289 +800003b8: 93 85 c5 43 addi a1, a1, 1084 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 80 4a jal 1192 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 c5 56 addi a1, a0, 1388 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 c5 5e addi a1, a0, 1516 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _Z8_cl_sqrtf: -80000404: 13 01 01 ff addi sp, sp, -16 -80000408: 23 26 11 00 sw ra, 12(sp) -8000040c: 23 24 81 00 sw s0, 8(sp) -80000410: 13 04 01 01 addi s0, sp, 16 -80000414: 03 24 81 00 lw s0, 8(sp) -80000418: 83 20 c1 00 lw ra, 12(sp) -8000041c: 13 01 01 01 addi sp, sp, 16 -80000420: 17 03 00 00 auipc t1, 0 -80000424: 67 00 03 4a jr 1184(t1) +80000484 _Z8_cl_sqrtf: +80000484: 13 01 01 ff addi sp, sp, -16 +80000488: 23 26 11 00 sw ra, 12(sp) +8000048c: 23 24 81 00 sw s0, 8(sp) +80000490: 13 04 01 01 addi s0, sp, 16 +80000494: 03 24 81 00 lw s0, 8(sp) +80000498: 83 20 c1 00 lw ra, 12(sp) +8000049c: 13 01 01 01 addi sp, sp, 16 +800004a0: 17 03 00 00 auipc t1, 0 +800004a4: 67 00 03 4a jr 1184(t1) -80000428 _pocl_kernel_NearestNeighbor: -80000428: 13 01 01 fc addi sp, sp, -64 -8000042c: 23 2e 11 02 sw ra, 60(sp) -80000430: 23 2c 81 02 sw s0, 56(sp) -80000434: 23 2a 91 02 sw s1, 52(sp) -80000438: 23 28 21 03 sw s2, 48(sp) -8000043c: 23 26 31 03 sw s3, 44(sp) -80000440: 23 24 41 03 sw s4, 40(sp) -80000444: 23 22 51 03 sw s5, 36(sp) -80000448: 23 20 61 03 sw s6, 32(sp) -8000044c: 23 2e 71 01 sw s7, 28(sp) -80000450: 23 2c 81 01 sw s8, 24(sp) -80000454: 23 2a 91 01 sw s9, 20(sp) -80000458: 23 28 a1 01 sw s10, 16(sp) -8000045c: 23 26 b1 01 sw s11, 12(sp) -80000460: 27 24 81 00 fsw fs0, 8(sp) -80000464: 27 22 91 00 fsw fs1, 4(sp) -80000468: 13 04 01 04 addi s0, sp, 64 -8000046c: 13 71 c1 ff andi sp, sp, -4 -80000470: 53 84 b5 20 fmv.s fs0, fa1 -80000474: d3 04 a5 20 fmv.s fs1, fa0 -80000478: 93 09 06 00 mv s3, a2 -8000047c: 13 09 00 00 mv s2, zero -80000480: 03 ab 86 01 lw s6, 24(a3) -80000484: 03 a6 c6 00 lw a2, 12(a3) -80000488: 03 aa c6 01 lw s4, 28(a3) -8000048c: 83 a6 06 02 lw a3, 32(a3) -80000490: 23 20 d1 00 sw a3, 0(sp) -80000494: b3 06 eb 02 mul a3, s6, a4 -80000498: 33 0d d6 00 add s10, a2, a3 -8000049c: 13 16 3d 00 slli a2, s10, 3 -800004a0: 33 05 c5 00 add a0, a0, a2 -800004a4: 13 16 2d 00 slli a2, s10, 2 -800004a8: b3 8b c5 00 add s7, a1, a2 -800004ac: 93 0a 45 00 addi s5, a0, 4 -800004b0: 6f 00 00 01 j 16 -800004b4: 13 09 19 00 addi s2, s2, 1 -800004b8: 03 25 01 00 lw a0, 0(sp) -800004bc: 63 74 a9 06 bgeu s2, a0, 104 -800004c0: 93 0c 00 00 mv s9, zero -800004c4: 6f 00 c0 00 j 12 -800004c8: 93 8c 1c 00 addi s9, s9, 1 -800004cc: e3 f4 4c ff bgeu s9, s4, -24 -800004d0: 93 04 00 00 mv s1, zero -800004d4: 93 8d 0b 00 mv s11, s7 -800004d8: 13 8c 0a 00 mv s8, s5 -800004dc: 6f 00 40 01 j 20 -800004e0: 93 84 14 00 addi s1, s1, 1 -800004e4: 13 0c 8c 00 addi s8, s8, 8 -800004e8: 93 8d 4d 00 addi s11, s11, 4 -800004ec: e3 fe 64 fd bgeu s1, s6, -36 -800004f0: 33 05 9d 00 add a0, s10, s1 -800004f4: e3 56 35 ff bge a0, s3, -20 -800004f8: 07 20 cc ff flw ft0, -4(s8) -800004fc: 87 20 0c 00 flw ft1, 0(s8) -80000500: 53 f0 04 08 fsub.s ft0, fs1, ft0 -80000504: d3 70 14 08 fsub.s ft1, fs0, ft1 -80000508: d3 f0 10 10 fmul.s ft1, ft1, ft1 -8000050c: 53 70 00 10 fmul.s ft0, ft0, ft0 -80000510: 53 75 10 00 fadd.s fa0, ft0, ft1 -80000514: 97 00 00 00 auipc ra, 0 -80000518: e7 80 00 ef jalr -272(ra) -8000051c: 27 a0 ad 00 fsw fa0, 0(s11) -80000520: 6f f0 1f fc j -64 -80000524: 13 01 04 fc addi sp, s0, -64 -80000528: 87 24 41 00 flw fs1, 4(sp) -8000052c: 07 24 81 00 flw fs0, 8(sp) -80000530: 83 2d c1 00 lw s11, 12(sp) -80000534: 03 2d 01 01 lw s10, 16(sp) -80000538: 83 2c 41 01 lw s9, 20(sp) -8000053c: 03 2c 81 01 lw s8, 24(sp) -80000540: 83 2b c1 01 lw s7, 28(sp) -80000544: 03 2b 01 02 lw s6, 32(sp) -80000548: 83 2a 41 02 lw s5, 36(sp) -8000054c: 03 2a 81 02 lw s4, 40(sp) -80000550: 83 29 c1 02 lw s3, 44(sp) -80000554: 03 29 01 03 lw s2, 48(sp) -80000558: 83 24 41 03 lw s1, 52(sp) -8000055c: 03 24 81 03 lw s0, 56(sp) -80000560: 83 20 c1 03 lw ra, 60(sp) -80000564: 13 01 01 04 addi sp, sp, 64 -80000568: 67 80 00 00 ret +800004a8 _pocl_kernel_NearestNeighbor: +800004a8: 13 01 01 fc addi sp, sp, -64 +800004ac: 23 2e 11 02 sw ra, 60(sp) +800004b0: 23 2c 81 02 sw s0, 56(sp) +800004b4: 23 2a 91 02 sw s1, 52(sp) +800004b8: 23 28 21 03 sw s2, 48(sp) +800004bc: 23 26 31 03 sw s3, 44(sp) +800004c0: 23 24 41 03 sw s4, 40(sp) +800004c4: 23 22 51 03 sw s5, 36(sp) +800004c8: 23 20 61 03 sw s6, 32(sp) +800004cc: 23 2e 71 01 sw s7, 28(sp) +800004d0: 23 2c 81 01 sw s8, 24(sp) +800004d4: 23 2a 91 01 sw s9, 20(sp) +800004d8: 23 28 a1 01 sw s10, 16(sp) +800004dc: 23 26 b1 01 sw s11, 12(sp) +800004e0: 27 24 81 00 fsw fs0, 8(sp) +800004e4: 27 22 91 00 fsw fs1, 4(sp) +800004e8: 13 04 01 04 addi s0, sp, 64 +800004ec: 13 71 c1 ff andi sp, sp, -4 +800004f0: 53 84 b5 20 fmv.s fs0, fa1 +800004f4: d3 04 a5 20 fmv.s fs1, fa0 +800004f8: 93 09 06 00 mv s3, a2 +800004fc: 13 09 00 00 mv s2, zero +80000500: 03 ab 86 01 lw s6, 24(a3) +80000504: 03 a6 c6 00 lw a2, 12(a3) +80000508: 03 aa c6 01 lw s4, 28(a3) +8000050c: 83 a6 06 02 lw a3, 32(a3) +80000510: 23 20 d1 00 sw a3, 0(sp) +80000514: b3 06 eb 02 mul a3, s6, a4 +80000518: 33 0d d6 00 add s10, a2, a3 +8000051c: 13 16 3d 00 slli a2, s10, 3 +80000520: 33 05 c5 00 add a0, a0, a2 +80000524: 13 16 2d 00 slli a2, s10, 2 +80000528: b3 8b c5 00 add s7, a1, a2 +8000052c: 93 0a 45 00 addi s5, a0, 4 +80000530: 6f 00 00 01 j 16 +80000534: 13 09 19 00 addi s2, s2, 1 +80000538: 03 25 01 00 lw a0, 0(sp) +8000053c: 63 74 a9 06 bgeu s2, a0, 104 +80000540: 93 0c 00 00 mv s9, zero +80000544: 6f 00 c0 00 j 12 +80000548: 93 8c 1c 00 addi s9, s9, 1 +8000054c: e3 f4 4c ff bgeu s9, s4, -24 +80000550: 93 04 00 00 mv s1, zero +80000554: 93 8d 0b 00 mv s11, s7 +80000558: 13 8c 0a 00 mv s8, s5 +8000055c: 6f 00 40 01 j 20 +80000560: 93 84 14 00 addi s1, s1, 1 +80000564: 13 0c 8c 00 addi s8, s8, 8 +80000568: 93 8d 4d 00 addi s11, s11, 4 +8000056c: e3 fe 64 fd bgeu s1, s6, -36 +80000570: 33 05 9d 00 add a0, s10, s1 +80000574: e3 56 35 ff bge a0, s3, -20 +80000578: 07 20 cc ff flw ft0, -4(s8) +8000057c: 87 20 0c 00 flw ft1, 0(s8) +80000580: 53 f0 04 08 fsub.s ft0, fs1, ft0 +80000584: d3 70 14 08 fsub.s ft1, fs0, ft1 +80000588: d3 f0 10 10 fmul.s ft1, ft1, ft1 +8000058c: 53 70 00 10 fmul.s ft0, ft0, ft0 +80000590: 53 75 10 00 fadd.s fa0, ft0, ft1 +80000594: 97 00 00 00 auipc ra, 0 +80000598: e7 80 00 ef jalr -272(ra) +8000059c: 27 a0 ad 00 fsw fa0, 0(s11) +800005a0: 6f f0 1f fc j -64 +800005a4: 13 01 04 fc addi sp, s0, -64 +800005a8: 87 24 41 00 flw fs1, 4(sp) +800005ac: 07 24 81 00 flw fs0, 8(sp) +800005b0: 83 2d c1 00 lw s11, 12(sp) +800005b4: 03 2d 01 01 lw s10, 16(sp) +800005b8: 83 2c 41 01 lw s9, 20(sp) +800005bc: 03 2c 81 01 lw s8, 24(sp) +800005c0: 83 2b c1 01 lw s7, 28(sp) +800005c4: 03 2b 01 02 lw s6, 32(sp) +800005c8: 83 2a 41 02 lw s5, 36(sp) +800005cc: 03 2a 81 02 lw s4, 40(sp) +800005d0: 83 29 c1 02 lw s3, 44(sp) +800005d4: 03 29 01 03 lw s2, 48(sp) +800005d8: 83 24 41 03 lw s1, 52(sp) +800005dc: 03 24 81 03 lw s0, 56(sp) +800005e0: 83 20 c1 03 lw ra, 60(sp) +800005e4: 13 01 01 04 addi sp, sp, 64 +800005e8: 67 80 00 00 ret -8000056c _pocl_kernel_NearestNeighbor_workgroup: -8000056c: 13 01 01 fc addi sp, sp, -64 -80000570: 23 2e 11 02 sw ra, 60(sp) -80000574: 23 2c 81 02 sw s0, 56(sp) -80000578: 23 2a 91 02 sw s1, 52(sp) -8000057c: 23 28 21 03 sw s2, 48(sp) -80000580: 23 26 31 03 sw s3, 44(sp) -80000584: 23 24 41 03 sw s4, 40(sp) -80000588: 23 22 51 03 sw s5, 36(sp) -8000058c: 23 20 61 03 sw s6, 32(sp) -80000590: 23 2e 71 01 sw s7, 28(sp) -80000594: 23 2c 81 01 sw s8, 24(sp) -80000598: 23 2a 91 01 sw s9, 20(sp) -8000059c: 23 28 a1 01 sw s10, 16(sp) -800005a0: 23 26 b1 01 sw s11, 12(sp) -800005a4: 27 24 81 00 fsw fs0, 8(sp) -800005a8: 27 22 91 00 fsw fs1, 4(sp) -800005ac: 83 26 05 00 lw a3, 0(a0) -800005b0: 13 09 00 00 mv s2, zero -800005b4: 83 a6 06 00 lw a3, 0(a3) -800005b8: 03 27 45 00 lw a4, 4(a0) -800005bc: 83 27 85 00 lw a5, 8(a0) -800005c0: 83 24 c5 00 lw s1, 12(a0) -800005c4: 03 25 05 01 lw a0, 16(a0) -800005c8: 03 27 07 00 lw a4, 0(a4) -800005cc: 83 aa 07 00 lw s5, 0(a5) -800005d0: 07 a4 04 00 flw fs0, 0(s1) -800005d4: 87 24 05 00 flw fs1, 0(a0) -800005d8: 83 ac 85 01 lw s9, 24(a1) -800005dc: 03 a5 c5 00 lw a0, 12(a1) -800005e0: 83 a9 c5 01 lw s3, 28(a1) -800005e4: 03 aa 05 02 lw s4, 32(a1) -800005e8: b3 85 cc 02 mul a1, s9, a2 -800005ec: 33 0d b5 00 add s10, a0, a1 -800005f0: 13 15 3d 00 slli a0, s10, 3 -800005f4: 33 85 a6 00 add a0, a3, a0 -800005f8: 93 15 2d 00 slli a1, s10, 2 -800005fc: 33 0b b7 00 add s6, a4, a1 -80000600: 93 0b 45 00 addi s7, a0, 4 -80000604: 6f 00 c0 00 j 12 -80000608: 13 09 19 00 addi s2, s2, 1 -8000060c: 63 74 49 07 bgeu s2, s4, 104 -80000610: 13 0c 00 00 mv s8, zero -80000614: 6f 00 c0 00 j 12 -80000618: 13 0c 1c 00 addi s8, s8, 1 -8000061c: e3 76 3c ff bgeu s8, s3, -20 -80000620: 13 04 00 00 mv s0, zero -80000624: 93 0d 0b 00 mv s11, s6 -80000628: 93 84 0b 00 mv s1, s7 -8000062c: 6f 00 40 01 j 20 -80000630: 13 04 14 00 addi s0, s0, 1 -80000634: 93 84 84 00 addi s1, s1, 8 -80000638: 93 8d 4d 00 addi s11, s11, 4 -8000063c: e3 7e 94 fd bgeu s0, s9, -36 -80000640: 33 05 8d 00 add a0, s10, s0 -80000644: e3 56 55 ff bge a0, s5, -20 -80000648: 07 a0 c4 ff flw ft0, -4(s1) -8000064c: 87 a0 04 00 flw ft1, 0(s1) -80000650: 53 70 04 08 fsub.s ft0, fs0, ft0 -80000654: d3 f0 14 08 fsub.s ft1, fs1, ft1 -80000658: d3 f0 10 10 fmul.s ft1, ft1, ft1 -8000065c: 53 70 00 10 fmul.s ft0, ft0, ft0 -80000660: 53 75 10 00 fadd.s fa0, ft0, ft1 -80000664: 97 00 00 00 auipc ra, 0 -80000668: e7 80 00 da jalr -608(ra) -8000066c: 27 a0 ad 00 fsw fa0, 0(s11) -80000670: 6f f0 1f fc j -64 -80000674: 87 24 41 00 flw fs1, 4(sp) -80000678: 07 24 81 00 flw fs0, 8(sp) -8000067c: 83 2d c1 00 lw s11, 12(sp) -80000680: 03 2d 01 01 lw s10, 16(sp) -80000684: 83 2c 41 01 lw s9, 20(sp) -80000688: 03 2c 81 01 lw s8, 24(sp) -8000068c: 83 2b c1 01 lw s7, 28(sp) -80000690: 03 2b 01 02 lw s6, 32(sp) -80000694: 83 2a 41 02 lw s5, 36(sp) -80000698: 03 2a 81 02 lw s4, 40(sp) -8000069c: 83 29 c1 02 lw s3, 44(sp) -800006a0: 03 29 01 03 lw s2, 48(sp) -800006a4: 83 24 41 03 lw s1, 52(sp) -800006a8: 03 24 81 03 lw s0, 56(sp) -800006ac: 83 20 c1 03 lw ra, 60(sp) -800006b0: 13 01 01 04 addi sp, sp, 64 -800006b4: 67 80 00 00 ret +800005ec _pocl_kernel_NearestNeighbor_workgroup: +800005ec: 13 01 01 fc addi sp, sp, -64 +800005f0: 23 2e 11 02 sw ra, 60(sp) +800005f4: 23 2c 81 02 sw s0, 56(sp) +800005f8: 23 2a 91 02 sw s1, 52(sp) +800005fc: 23 28 21 03 sw s2, 48(sp) +80000600: 23 26 31 03 sw s3, 44(sp) +80000604: 23 24 41 03 sw s4, 40(sp) +80000608: 23 22 51 03 sw s5, 36(sp) +8000060c: 23 20 61 03 sw s6, 32(sp) +80000610: 23 2e 71 01 sw s7, 28(sp) +80000614: 23 2c 81 01 sw s8, 24(sp) +80000618: 23 2a 91 01 sw s9, 20(sp) +8000061c: 23 28 a1 01 sw s10, 16(sp) +80000620: 23 26 b1 01 sw s11, 12(sp) +80000624: 27 24 81 00 fsw fs0, 8(sp) +80000628: 27 22 91 00 fsw fs1, 4(sp) +8000062c: 83 26 05 00 lw a3, 0(a0) +80000630: 13 09 00 00 mv s2, zero +80000634: 83 a6 06 00 lw a3, 0(a3) +80000638: 03 27 45 00 lw a4, 4(a0) +8000063c: 83 27 85 00 lw a5, 8(a0) +80000640: 83 24 c5 00 lw s1, 12(a0) +80000644: 03 25 05 01 lw a0, 16(a0) +80000648: 03 27 07 00 lw a4, 0(a4) +8000064c: 83 aa 07 00 lw s5, 0(a5) +80000650: 07 a4 04 00 flw fs0, 0(s1) +80000654: 87 24 05 00 flw fs1, 0(a0) +80000658: 83 ac 85 01 lw s9, 24(a1) +8000065c: 03 a5 c5 00 lw a0, 12(a1) +80000660: 83 a9 c5 01 lw s3, 28(a1) +80000664: 03 aa 05 02 lw s4, 32(a1) +80000668: b3 85 cc 02 mul a1, s9, a2 +8000066c: 33 0d b5 00 add s10, a0, a1 +80000670: 13 15 3d 00 slli a0, s10, 3 +80000674: 33 85 a6 00 add a0, a3, a0 +80000678: 93 15 2d 00 slli a1, s10, 2 +8000067c: 33 0b b7 00 add s6, a4, a1 +80000680: 93 0b 45 00 addi s7, a0, 4 +80000684: 6f 00 c0 00 j 12 +80000688: 13 09 19 00 addi s2, s2, 1 +8000068c: 63 74 49 07 bgeu s2, s4, 104 +80000690: 13 0c 00 00 mv s8, zero +80000694: 6f 00 c0 00 j 12 +80000698: 13 0c 1c 00 addi s8, s8, 1 +8000069c: e3 76 3c ff bgeu s8, s3, -20 +800006a0: 13 04 00 00 mv s0, zero +800006a4: 93 0d 0b 00 mv s11, s6 +800006a8: 93 84 0b 00 mv s1, s7 +800006ac: 6f 00 40 01 j 20 +800006b0: 13 04 14 00 addi s0, s0, 1 +800006b4: 93 84 84 00 addi s1, s1, 8 +800006b8: 93 8d 4d 00 addi s11, s11, 4 +800006bc: e3 7e 94 fd bgeu s0, s9, -36 +800006c0: 33 05 8d 00 add a0, s10, s0 +800006c4: e3 56 55 ff bge a0, s5, -20 +800006c8: 07 a0 c4 ff flw ft0, -4(s1) +800006cc: 87 a0 04 00 flw ft1, 0(s1) +800006d0: 53 70 04 08 fsub.s ft0, fs0, ft0 +800006d4: d3 f0 14 08 fsub.s ft1, fs1, ft1 +800006d8: d3 f0 10 10 fmul.s ft1, ft1, ft1 +800006dc: 53 70 00 10 fmul.s ft0, ft0, ft0 +800006e0: 53 75 10 00 fadd.s fa0, ft0, ft1 +800006e4: 97 00 00 00 auipc ra, 0 +800006e8: e7 80 00 da jalr -608(ra) +800006ec: 27 a0 ad 00 fsw fa0, 0(s11) +800006f0: 6f f0 1f fc j -64 +800006f4: 87 24 41 00 flw fs1, 4(sp) +800006f8: 07 24 81 00 flw fs0, 8(sp) +800006fc: 83 2d c1 00 lw s11, 12(sp) +80000700: 03 2d 01 01 lw s10, 16(sp) +80000704: 83 2c 41 01 lw s9, 20(sp) +80000708: 03 2c 81 01 lw s8, 24(sp) +8000070c: 83 2b c1 01 lw s7, 28(sp) +80000710: 03 2b 01 02 lw s6, 32(sp) +80000714: 83 2a 41 02 lw s5, 36(sp) +80000718: 03 2a 81 02 lw s4, 40(sp) +8000071c: 83 29 c1 02 lw s3, 44(sp) +80000720: 03 29 01 03 lw s2, 48(sp) +80000724: 83 24 41 03 lw s1, 52(sp) +80000728: 03 24 81 03 lw s0, 56(sp) +8000072c: 83 20 c1 03 lw ra, 60(sp) +80000730: 13 01 01 04 addi sp, sp, 64 +80000734: 67 80 00 00 ret -800006b8 _pocl_kernel_NearestNeighbor_workgroup_fast: -800006b8: 13 01 01 fc addi sp, sp, -64 -800006bc: 23 2e 11 02 sw ra, 60(sp) -800006c0: 23 2c 81 02 sw s0, 56(sp) -800006c4: 23 2a 91 02 sw s1, 52(sp) -800006c8: 23 28 21 03 sw s2, 48(sp) -800006cc: 23 26 31 03 sw s3, 44(sp) -800006d0: 23 24 41 03 sw s4, 40(sp) -800006d4: 23 22 51 03 sw s5, 36(sp) -800006d8: 23 20 61 03 sw s6, 32(sp) -800006dc: 23 2e 71 01 sw s7, 28(sp) -800006e0: 23 2c 81 01 sw s8, 24(sp) -800006e4: 23 2a 91 01 sw s9, 20(sp) -800006e8: 23 28 a1 01 sw s10, 16(sp) -800006ec: 23 26 b1 01 sw s11, 12(sp) -800006f0: 27 24 81 00 fsw fs0, 8(sp) -800006f4: 27 22 91 00 fsw fs1, 4(sp) -800006f8: 13 09 00 00 mv s2, zero -800006fc: 83 26 05 00 lw a3, 0(a0) -80000700: 03 27 85 00 lw a4, 8(a0) -80000704: 83 27 c5 00 lw a5, 12(a0) -80000708: 83 24 05 01 lw s1, 16(a0) -8000070c: 03 25 45 00 lw a0, 4(a0) -80000710: 83 2a 07 00 lw s5, 0(a4) -80000714: 07 a4 07 00 flw fs0, 0(a5) -80000718: 87 a4 04 00 flw fs1, 0(s1) -8000071c: 83 ac 85 01 lw s9, 24(a1) -80000720: 03 a7 c5 00 lw a4, 12(a1) -80000724: 83 a9 c5 01 lw s3, 28(a1) -80000728: 03 aa 05 02 lw s4, 32(a1) -8000072c: b3 85 cc 02 mul a1, s9, a2 -80000730: 33 0d b7 00 add s10, a4, a1 -80000734: 93 15 3d 00 slli a1, s10, 3 -80000738: b3 85 b6 00 add a1, a3, a1 -8000073c: 13 16 2d 00 slli a2, s10, 2 -80000740: 33 0b c5 00 add s6, a0, a2 -80000744: 93 8b 45 00 addi s7, a1, 4 -80000748: 6f 00 c0 00 j 12 -8000074c: 13 09 19 00 addi s2, s2, 1 -80000750: 63 74 49 07 bgeu s2, s4, 104 -80000754: 13 0c 00 00 mv s8, zero -80000758: 6f 00 c0 00 j 12 -8000075c: 13 0c 1c 00 addi s8, s8, 1 -80000760: e3 76 3c ff bgeu s8, s3, -20 -80000764: 13 04 00 00 mv s0, zero -80000768: 93 0d 0b 00 mv s11, s6 -8000076c: 93 84 0b 00 mv s1, s7 -80000770: 6f 00 40 01 j 20 -80000774: 13 04 14 00 addi s0, s0, 1 -80000778: 93 84 84 00 addi s1, s1, 8 -8000077c: 93 8d 4d 00 addi s11, s11, 4 -80000780: e3 7e 94 fd bgeu s0, s9, -36 -80000784: 33 05 8d 00 add a0, s10, s0 -80000788: e3 56 55 ff bge a0, s5, -20 -8000078c: 07 a0 c4 ff flw ft0, -4(s1) -80000790: 87 a0 04 00 flw ft1, 0(s1) -80000794: 53 70 04 08 fsub.s ft0, fs0, ft0 -80000798: d3 f0 14 08 fsub.s ft1, fs1, ft1 -8000079c: d3 f0 10 10 fmul.s ft1, ft1, ft1 -800007a0: 53 70 00 10 fmul.s ft0, ft0, ft0 -800007a4: 53 75 10 00 fadd.s fa0, ft0, ft1 -800007a8: 97 00 00 00 auipc ra, 0 -800007ac: e7 80 c0 c5 jalr -932(ra) -800007b0: 27 a0 ad 00 fsw fa0, 0(s11) -800007b4: 6f f0 1f fc j -64 -800007b8: 87 24 41 00 flw fs1, 4(sp) -800007bc: 07 24 81 00 flw fs0, 8(sp) -800007c0: 83 2d c1 00 lw s11, 12(sp) -800007c4: 03 2d 01 01 lw s10, 16(sp) -800007c8: 83 2c 41 01 lw s9, 20(sp) -800007cc: 03 2c 81 01 lw s8, 24(sp) -800007d0: 83 2b c1 01 lw s7, 28(sp) -800007d4: 03 2b 01 02 lw s6, 32(sp) -800007d8: 83 2a 41 02 lw s5, 36(sp) -800007dc: 03 2a 81 02 lw s4, 40(sp) -800007e0: 83 29 c1 02 lw s3, 44(sp) -800007e4: 03 29 01 03 lw s2, 48(sp) -800007e8: 83 24 41 03 lw s1, 52(sp) -800007ec: 03 24 81 03 lw s0, 56(sp) -800007f0: 83 20 c1 03 lw ra, 60(sp) -800007f4: 13 01 01 04 addi sp, sp, 64 -800007f8: 67 80 00 00 ret +80000738 _pocl_kernel_NearestNeighbor_workgroup_fast: +80000738: 13 01 01 fc addi sp, sp, -64 +8000073c: 23 2e 11 02 sw ra, 60(sp) +80000740: 23 2c 81 02 sw s0, 56(sp) +80000744: 23 2a 91 02 sw s1, 52(sp) +80000748: 23 28 21 03 sw s2, 48(sp) +8000074c: 23 26 31 03 sw s3, 44(sp) +80000750: 23 24 41 03 sw s4, 40(sp) +80000754: 23 22 51 03 sw s5, 36(sp) +80000758: 23 20 61 03 sw s6, 32(sp) +8000075c: 23 2e 71 01 sw s7, 28(sp) +80000760: 23 2c 81 01 sw s8, 24(sp) +80000764: 23 2a 91 01 sw s9, 20(sp) +80000768: 23 28 a1 01 sw s10, 16(sp) +8000076c: 23 26 b1 01 sw s11, 12(sp) +80000770: 27 24 81 00 fsw fs0, 8(sp) +80000774: 27 22 91 00 fsw fs1, 4(sp) +80000778: 13 09 00 00 mv s2, zero +8000077c: 83 26 05 00 lw a3, 0(a0) +80000780: 03 27 85 00 lw a4, 8(a0) +80000784: 83 27 c5 00 lw a5, 12(a0) +80000788: 83 24 05 01 lw s1, 16(a0) +8000078c: 03 25 45 00 lw a0, 4(a0) +80000790: 83 2a 07 00 lw s5, 0(a4) +80000794: 07 a4 07 00 flw fs0, 0(a5) +80000798: 87 a4 04 00 flw fs1, 0(s1) +8000079c: 83 ac 85 01 lw s9, 24(a1) +800007a0: 03 a7 c5 00 lw a4, 12(a1) +800007a4: 83 a9 c5 01 lw s3, 28(a1) +800007a8: 03 aa 05 02 lw s4, 32(a1) +800007ac: b3 85 cc 02 mul a1, s9, a2 +800007b0: 33 0d b7 00 add s10, a4, a1 +800007b4: 93 15 3d 00 slli a1, s10, 3 +800007b8: b3 85 b6 00 add a1, a3, a1 +800007bc: 13 16 2d 00 slli a2, s10, 2 +800007c0: 33 0b c5 00 add s6, a0, a2 +800007c4: 93 8b 45 00 addi s7, a1, 4 +800007c8: 6f 00 c0 00 j 12 +800007cc: 13 09 19 00 addi s2, s2, 1 +800007d0: 63 74 49 07 bgeu s2, s4, 104 +800007d4: 13 0c 00 00 mv s8, zero +800007d8: 6f 00 c0 00 j 12 +800007dc: 13 0c 1c 00 addi s8, s8, 1 +800007e0: e3 76 3c ff bgeu s8, s3, -20 +800007e4: 13 04 00 00 mv s0, zero +800007e8: 93 0d 0b 00 mv s11, s6 +800007ec: 93 84 0b 00 mv s1, s7 +800007f0: 6f 00 40 01 j 20 +800007f4: 13 04 14 00 addi s0, s0, 1 +800007f8: 93 84 84 00 addi s1, s1, 8 +800007fc: 93 8d 4d 00 addi s11, s11, 4 +80000800: e3 7e 94 fd bgeu s0, s9, -36 +80000804: 33 05 8d 00 add a0, s10, s0 +80000808: e3 56 55 ff bge a0, s5, -20 +8000080c: 07 a0 c4 ff flw ft0, -4(s1) +80000810: 87 a0 04 00 flw ft1, 0(s1) +80000814: 53 70 04 08 fsub.s ft0, fs0, ft0 +80000818: d3 f0 14 08 fsub.s ft1, fs1, ft1 +8000081c: d3 f0 10 10 fmul.s ft1, ft1, ft1 +80000820: 53 70 00 10 fmul.s ft0, ft0, ft0 +80000824: 53 75 10 00 fadd.s fa0, ft0, ft1 +80000828: 97 00 00 00 auipc ra, 0 +8000082c: e7 80 c0 c5 jalr -932(ra) +80000830: 27 a0 ad 00 fsw fa0, 0(s11) +80000834: 6f f0 1f fc j -64 +80000838: 87 24 41 00 flw fs1, 4(sp) +8000083c: 07 24 81 00 flw fs0, 8(sp) +80000840: 83 2d c1 00 lw s11, 12(sp) +80000844: 03 2d 01 01 lw s10, 16(sp) +80000848: 83 2c 41 01 lw s9, 20(sp) +8000084c: 03 2c 81 01 lw s8, 24(sp) +80000850: 83 2b c1 01 lw s7, 28(sp) +80000854: 03 2b 01 02 lw s6, 32(sp) +80000858: 83 2a 41 02 lw s5, 36(sp) +8000085c: 03 2a 81 02 lw s4, 40(sp) +80000860: 83 29 c1 02 lw s3, 44(sp) +80000864: 03 29 01 03 lw s2, 48(sp) +80000868: 83 24 41 03 lw s1, 52(sp) +8000086c: 03 24 81 03 lw s0, 56(sp) +80000870: 83 20 c1 03 lw ra, 60(sp) +80000874: 13 01 01 04 addi sp, sp, 64 +80000878: 67 80 00 00 ret -800007fc _exit: -800007fc: 13 05 00 00 mv a0, zero -80000800: 6b 00 05 00 +8000087c _exit: +8000087c: 13 05 00 00 mv a0, zero +80000880: 6b 00 05 00 -80000804 vx_set_sp: -80000804: 73 25 00 fc csrr a0, 4032 -80000808: 6b 00 05 00 -8000080c: 97 11 00 00 auipc gp, 1 -80000810: 93 81 c1 ff addi gp, gp, -4 -80000814: 17 f1 ff 7e auipc sp, 520191 -80000818: 13 01 c1 7e addi sp, sp, 2028 -8000081c: 93 05 00 40 addi a1, zero, 1024 -80000820: 73 26 10 cc csrr a2, 3265 -80000824: b3 85 c5 02 mul a1, a1, a2 -80000828: 33 01 b1 40 sub sp, sp, a1 -8000082c: f3 26 30 cc csrr a3, 3267 -80000830: 63 86 06 00 beqz a3, 12 -80000834: 13 05 00 00 mv a0, zero -80000838: 6b 00 05 00 +80000884 vx_set_sp: +80000884: 73 25 00 fc csrr a0, 4032 +80000888: 6b 00 05 00 +8000088c: 97 11 00 00 auipc gp, 1 +80000890: 93 81 c1 f7 addi gp, gp, -132 +80000894: 17 f1 ff 7e auipc sp, 520191 +80000898: 13 01 c1 76 addi sp, sp, 1900 +8000089c: 93 05 00 40 addi a1, zero, 1024 +800008a0: 73 26 10 cc csrr a2, 3265 +800008a4: b3 85 c5 02 mul a1, a1, a2 +800008a8: 33 01 b1 40 sub sp, sp, a1 +800008ac: f3 26 30 cc csrr a3, 3267 +800008b0: 63 86 06 00 beqz a3, 12 +800008b4: 13 05 00 00 mv a0, zero +800008b8: 6b 00 05 00 -8000083c RETURN: -8000083c: 67 80 00 00 ret - -80000840 vx_wspawn: -80000840: 6b 10 b5 00 -80000844: 67 80 00 00 ret - -80000848 vx_tmc: -80000848: 6b 00 05 00 -8000084c: 67 80 00 00 ret - -80000850 vx_barrier: -80000850: 6b 40 b5 00 -80000854: 67 80 00 00 ret - -80000858 vx_split: -80000858: 6b 20 05 00 -8000085c: 67 80 00 00 ret - -80000860 vx_join: -80000860: 6b 30 00 00 -80000864: 67 80 00 00 ret - -80000868 vx_warp_id: -80000868: 73 25 30 cc csrr a0, 3267 -8000086c: 67 80 00 00 ret - -80000870 vx_warp_gid: -80000870: 73 25 40 f1 csrr a0, mhartid -80000874: 67 80 00 00 ret - -80000878 vx_thread_id: -80000878: 73 25 00 cc csrr a0, 3264 -8000087c: 67 80 00 00 ret - -80000880 vx_thread_lid: -80000880: 73 25 10 cc csrr a0, 3265 -80000884: 67 80 00 00 ret - -80000888 vx_thread_gid: -80000888: 73 25 20 cc csrr a0, 3266 -8000088c: 67 80 00 00 ret - -80000890 vx_core_id: -80000890: 73 25 50 cc csrr a0, 3269 -80000894: 67 80 00 00 ret - -80000898 vx_num_threads: -80000898: 73 25 00 fc csrr a0, 4032 -8000089c: 67 80 00 00 ret - -800008a0 vx_num_warps: -800008a0: 73 25 10 fc csrr a0, 4033 -800008a4: 67 80 00 00 ret - -800008a8 vx_num_cores: -800008a8: 73 25 20 fc csrr a0, 4034 -800008ac: 67 80 00 00 ret - -800008b0 vx_num_cycles: -800008b0: 73 25 00 b0 csrr a0, mcycle -800008b4: 67 80 00 00 ret - -800008b8 vx_num_instrs: -800008b8: 73 25 20 b0 csrr a0, minstret +800008bc RETURN: 800008bc: 67 80 00 00 ret -800008c0 sqrtf: -800008c0: 13 01 01 fe addi sp, sp, -32 -800008c4: 27 26 81 00 fsw fs0, 12(sp) -800008c8: 23 2e 11 00 sw ra, 28(sp) -800008cc: 53 04 a5 20 fmv.s fs0, fa0 -800008d0: 27 24 91 00 fsw fs1, 8(sp) -800008d4: ef 00 00 06 jal 96 -800008d8: b7 17 00 80 lui a5, 524289 -800008dc: 03 a7 87 43 lw a4, 1080(a5) -800008e0: 93 07 f0 ff addi a5, zero, -1 -800008e4: 63 0c f7 00 beq a4, a5, 24 -800008e8: d3 27 84 a0 feq.s a5, fs0, fs0 -800008ec: 63 88 07 00 beqz a5, 16 -800008f0: d3 04 00 f0 fmv.w.x fs1, zero -800008f4: d3 17 94 a0 flt.s a5, fs0, fs1 -800008f8: 63 9c 07 00 bnez a5, 24 -800008fc: 83 20 c1 01 lw ra, 28(sp) -80000900: 07 24 c1 00 flw fs0, 12(sp) -80000904: 87 24 81 00 flw fs1, 8(sp) -80000908: 13 01 01 02 addi sp, sp, 32 +800008c0 vx_wspawn: +800008c0: 6b 10 b5 00 +800008c4: 67 80 00 00 ret + +800008c8 vx_tmc: +800008c8: 6b 00 05 00 +800008cc: 67 80 00 00 ret + +800008d0 vx_barrier: +800008d0: 6b 40 b5 00 +800008d4: 67 80 00 00 ret + +800008d8 vx_split: +800008d8: 6b 20 05 00 +800008dc: 67 80 00 00 ret + +800008e0 vx_join: +800008e0: 6b 30 00 00 +800008e4: 67 80 00 00 ret + +800008e8 vx_warp_id: +800008e8: 73 25 30 cc csrr a0, 3267 +800008ec: 67 80 00 00 ret + +800008f0 vx_warp_gid: +800008f0: 73 25 40 f1 csrr a0, mhartid +800008f4: 67 80 00 00 ret + +800008f8 vx_thread_id: +800008f8: 73 25 00 cc csrr a0, 3264 +800008fc: 67 80 00 00 ret + +80000900 vx_thread_lid: +80000900: 73 25 10 cc csrr a0, 3265 +80000904: 67 80 00 00 ret + +80000908 vx_thread_gid: +80000908: 73 25 20 cc csrr a0, 3266 8000090c: 67 80 00 00 ret -80000910: ef 00 40 14 jal 324 -80000914: 83 20 c1 01 lw ra, 28(sp) -80000918: 93 07 10 02 addi a5, zero, 33 -8000091c: 23 20 f5 00 sw a5, 0(a0) -80000920: 53 f5 94 18 fdiv.s fa0, fs1, fs1 -80000924: 07 24 c1 00 flw fs0, 12(sp) -80000928: 87 24 81 00 flw fs1, 8(sp) -8000092c: 13 01 01 02 addi sp, sp, 32 -80000930: 67 80 00 00 ret -80000934 __ieee754_sqrtf: -80000934: d3 06 05 e0 fmv.x.w a3, fa0 -80000938: 37 07 80 7f lui a4, 522240 -8000093c: 93 97 16 00 slli a5, a3, 1 -80000940: 93 d7 17 00 srli a5, a5, 1 -80000944: 63 f2 e7 0c bgeu a5, a4, 196 -80000948: 53 05 05 e0 fmv.x.w a0, fa0 -8000094c: 63 8a 07 0a beqz a5, 180 -80000950: 93 87 06 00 mv a5, a3 -80000954: 63 c6 06 0c bltz a3, 204 -80000958: 33 76 d7 00 and a2, a4, a3 -8000095c: 13 d7 76 41 srai a4, a3, 23 -80000960: 63 14 06 02 bnez a2, 40 -80000964: 37 06 80 00 lui a2, 2048 -80000968: b3 76 d6 00 and a3, a2, a3 -8000096c: 63 94 06 0c bnez a3, 200 -80000970: 93 97 17 00 slli a5, a5, 1 -80000974: 93 95 87 00 slli a1, a5, 8 -80000978: 13 86 06 00 mv a2, a3 -8000097c: 93 86 16 00 addi a3, a3, 1 -80000980: e3 d8 05 fe bgez a1, -16 -80000984: 33 07 c7 40 sub a4, a4, a2 -80000988: b7 06 80 00 lui a3, 2048 -8000098c: 13 86 f6 ff addi a2, a3, -1 -80000990: b3 f7 c7 00 and a5, a5, a2 -80000994: 13 07 17 f8 addi a4, a4, -127 -80000998: b3 e6 d7 00 or a3, a5, a3 -8000099c: 13 76 17 00 andi a2, a4, 1 -800009a0: 93 97 16 00 slli a5, a3, 1 -800009a4: 63 1a 06 06 bnez a2, 116 -800009a8: 13 58 17 40 srai a6, a4, 1 -800009ac: 93 06 90 01 addi a3, zero, 25 -800009b0: 13 05 00 00 mv a0, zero -800009b4: 93 05 00 00 mv a1, zero -800009b8: 37 07 00 01 lui a4, 4096 -800009bc: 33 86 e5 00 add a2, a1, a4 -800009c0: 93 86 f6 ff addi a3, a3, -1 -800009c4: 63 c8 c7 00 blt a5, a2, 16 -800009c8: b3 05 e6 00 add a1, a2, a4 -800009cc: b3 87 c7 40 sub a5, a5, a2 -800009d0: 33 05 e5 00 add a0, a0, a4 -800009d4: 93 97 17 00 slli a5, a5, 1 -800009d8: 13 57 17 00 srli a4, a4, 1 -800009dc: e3 90 06 fe bnez a3, -32 -800009e0: 63 86 07 00 beqz a5, 12 -800009e4: 13 05 15 00 addi a0, a0, 1 -800009e8: 13 75 e5 ff andi a0, a0, -2 -800009ec: 13 55 15 40 srai a0, a0, 1 -800009f0: b7 07 00 3f lui a5, 258048 -800009f4: 33 05 f5 00 add a0, a0, a5 -800009f8: 13 17 78 01 slli a4, a6, 23 -800009fc: 33 05 a7 00 add a0, a4, a0 -80000a00: 53 05 05 f0 fmv.w.x fa0, a0 -80000a04: 67 80 00 00 ret -80000a08: c3 77 a5 50 fmadd.s fa5, fa0, fa0, fa0 -80000a0c: 53 85 07 e0 fmv.x.w a0, fa5 -80000a10: 53 05 05 f0 fmv.w.x fa0, a0 -80000a14: 67 80 00 00 ret -80000a18: 93 97 26 00 slli a5, a3, 2 -80000a1c: 6f f0 df f8 j -116 -80000a20: d3 77 a5 08 fsub.s fa5, fa0, fa0 -80000a24: d3 f7 f7 18 fdiv.s fa5, fa5, fa5 -80000a28: 53 85 07 e0 fmv.x.w a0, fa5 -80000a2c: 53 05 05 f0 fmv.w.x fa0, a0 -80000a30: 67 80 00 00 ret -80000a34: 13 06 f0 ff addi a2, zero, -1 -80000a38: 33 07 c7 40 sub a4, a4, a2 -80000a3c: 6f f0 df f4 j -180 +80000910 vx_core_id: +80000910: 73 25 50 cc csrr a0, 3269 +80000914: 67 80 00 00 ret -80000a40 atexit: -80000a40: 93 05 05 00 mv a1, a0 -80000a44: 93 06 00 00 mv a3, zero -80000a48: 13 06 00 00 mv a2, zero -80000a4c: 13 05 00 00 mv a0, zero -80000a50: 6f 00 80 21 j 536 +80000918 vx_num_threads: +80000918: 73 25 00 fc csrr a0, 4032 +8000091c: 67 80 00 00 ret -80000a54 __errno: -80000a54: b7 17 00 80 lui a5, 524289 -80000a58: 03 a5 47 43 lw a0, 1076(a5) -80000a5c: 67 80 00 00 ret +80000920 vx_num_warps: +80000920: 73 25 10 fc csrr a0, 4033 +80000924: 67 80 00 00 ret -80000a60 exit: -80000a60: 13 01 01 ff addi sp, sp, -16 -80000a64: 93 05 00 00 mv a1, zero -80000a68: 23 24 81 00 sw s0, 8(sp) -80000a6c: 23 26 11 00 sw ra, 12(sp) -80000a70: 13 04 05 00 mv s0, a0 -80000a74: ef 00 00 29 jal 656 -80000a78: b7 17 00 80 lui a5, 524289 -80000a7c: 03 a5 07 43 lw a0, 1072(a5) -80000a80: 83 27 c5 03 lw a5, 60(a0) -80000a84: 63 84 07 00 beqz a5, 8 -80000a88: e7 80 07 00 jalr a5 -80000a8c: 13 05 04 00 mv a0, s0 -80000a90: ef f0 df d6 jal -660 +80000928 vx_num_cores: +80000928: 73 25 20 fc csrr a0, 4034 +8000092c: 67 80 00 00 ret -80000a94 __libc_fini_array: -80000a94: 13 01 01 ff addi sp, sp, -16 -80000a98: 23 24 81 00 sw s0, 8(sp) -80000a9c: b7 17 00 80 lui a5, 524289 -80000aa0: 37 14 00 80 lui s0, 524289 -80000aa4: 13 04 44 00 addi s0, s0, 4 -80000aa8: 93 87 47 00 addi a5, a5, 4 -80000aac: b3 87 87 40 sub a5, a5, s0 -80000ab0: 23 22 91 00 sw s1, 4(sp) -80000ab4: 23 26 11 00 sw ra, 12(sp) -80000ab8: 93 d4 27 40 srai s1, a5, 2 -80000abc: 63 80 04 02 beqz s1, 32 -80000ac0: 93 87 c7 ff addi a5, a5, -4 -80000ac4: 33 84 87 00 add s0, a5, s0 -80000ac8: 83 27 04 00 lw a5, 0(s0) -80000acc: 93 84 f4 ff addi s1, s1, -1 -80000ad0: 13 04 c4 ff addi s0, s0, -4 -80000ad4: e7 80 07 00 jalr a5 -80000ad8: e3 98 04 fe bnez s1, -16 -80000adc: 83 20 c1 00 lw ra, 12(sp) -80000ae0: 03 24 81 00 lw s0, 8(sp) -80000ae4: 83 24 41 00 lw s1, 4(sp) -80000ae8: 13 01 01 01 addi sp, sp, 16 -80000aec: 67 80 00 00 ret +80000930 vx_num_cycles: +80000930: 73 25 00 b0 csrr a0, mcycle +80000934: 67 80 00 00 ret -80000af0 __libc_init_array: -80000af0: 13 01 01 ff addi sp, sp, -16 -80000af4: 23 24 81 00 sw s0, 8(sp) -80000af8: 23 20 21 01 sw s2, 0(sp) -80000afc: 37 14 00 80 lui s0, 524289 -80000b00: 37 19 00 80 lui s2, 524289 -80000b04: 93 07 04 00 mv a5, s0 -80000b08: 13 09 09 00 mv s2, s2 -80000b0c: 33 09 f9 40 sub s2, s2, a5 -80000b10: 23 26 11 00 sw ra, 12(sp) -80000b14: 23 22 91 00 sw s1, 4(sp) -80000b18: 13 59 29 40 srai s2, s2, 2 -80000b1c: 63 00 09 02 beqz s2, 32 -80000b20: 13 04 04 00 mv s0, s0 -80000b24: 93 04 00 00 mv s1, zero -80000b28: 83 27 04 00 lw a5, 0(s0) -80000b2c: 93 84 14 00 addi s1, s1, 1 -80000b30: 13 04 44 00 addi s0, s0, 4 -80000b34: e7 80 07 00 jalr a5 -80000b38: e3 18 99 fe bne s2, s1, -16 -80000b3c: 37 14 00 80 lui s0, 524289 -80000b40: 37 19 00 80 lui s2, 524289 -80000b44: 93 07 04 00 mv a5, s0 -80000b48: 13 09 49 00 addi s2, s2, 4 -80000b4c: 33 09 f9 40 sub s2, s2, a5 -80000b50: 13 59 29 40 srai s2, s2, 2 -80000b54: 63 00 09 02 beqz s2, 32 -80000b58: 13 04 04 00 mv s0, s0 -80000b5c: 93 04 00 00 mv s1, zero -80000b60: 83 27 04 00 lw a5, 0(s0) -80000b64: 93 84 14 00 addi s1, s1, 1 -80000b68: 13 04 44 00 addi s0, s0, 4 -80000b6c: e7 80 07 00 jalr a5 -80000b70: e3 18 99 fe bne s2, s1, -16 -80000b74: 83 20 c1 00 lw ra, 12(sp) -80000b78: 03 24 81 00 lw s0, 8(sp) -80000b7c: 83 24 41 00 lw s1, 4(sp) -80000b80: 03 29 01 00 lw s2, 0(sp) -80000b84: 13 01 01 01 addi sp, sp, 16 -80000b88: 67 80 00 00 ret +80000938 vx_num_instrs: +80000938: 73 25 20 b0 csrr a0, minstret +8000093c: 67 80 00 00 ret -80000b8c memset: -80000b8c: 13 03 f0 00 addi t1, zero, 15 -80000b90: 13 07 05 00 mv a4, a0 -80000b94: 63 7e c3 02 bgeu t1, a2, 60 -80000b98: 93 77 f7 00 andi a5, a4, 15 -80000b9c: 63 90 07 0a bnez a5, 160 -80000ba0: 63 92 05 08 bnez a1, 132 -80000ba4: 93 76 06 ff andi a3, a2, -16 -80000ba8: 13 76 f6 00 andi a2, a2, 15 -80000bac: b3 86 e6 00 add a3, a3, a4 -80000bb0: 23 20 b7 00 sw a1, 0(a4) -80000bb4: 23 22 b7 00 sw a1, 4(a4) -80000bb8: 23 24 b7 00 sw a1, 8(a4) -80000bbc: 23 26 b7 00 sw a1, 12(a4) -80000bc0: 13 07 07 01 addi a4, a4, 16 -80000bc4: e3 66 d7 fe bltu a4, a3, -20 -80000bc8: 63 14 06 00 bnez a2, 8 -80000bcc: 67 80 00 00 ret -80000bd0: b3 06 c3 40 sub a3, t1, a2 -80000bd4: 93 96 26 00 slli a3, a3, 2 -80000bd8: 97 02 00 00 auipc t0, 0 -80000bdc: b3 86 56 00 add a3, a3, t0 -80000be0: 67 80 c6 00 jr 12(a3) -80000be4: 23 07 b7 00 sb a1, 14(a4) -80000be8: a3 06 b7 00 sb a1, 13(a4) -80000bec: 23 06 b7 00 sb a1, 12(a4) -80000bf0: a3 05 b7 00 sb a1, 11(a4) -80000bf4: 23 05 b7 00 sb a1, 10(a4) -80000bf8: a3 04 b7 00 sb a1, 9(a4) -80000bfc: 23 04 b7 00 sb a1, 8(a4) -80000c00: a3 03 b7 00 sb a1, 7(a4) -80000c04: 23 03 b7 00 sb a1, 6(a4) -80000c08: a3 02 b7 00 sb a1, 5(a4) -80000c0c: 23 02 b7 00 sb a1, 4(a4) -80000c10: a3 01 b7 00 sb a1, 3(a4) -80000c14: 23 01 b7 00 sb a1, 2(a4) -80000c18: a3 00 b7 00 sb a1, 1(a4) -80000c1c: 23 00 b7 00 sb a1, 0(a4) -80000c20: 67 80 00 00 ret -80000c24: 93 f5 f5 0f andi a1, a1, 255 -80000c28: 93 96 85 00 slli a3, a1, 8 -80000c2c: b3 e5 d5 00 or a1, a1, a3 -80000c30: 93 96 05 01 slli a3, a1, 16 -80000c34: b3 e5 d5 00 or a1, a1, a3 -80000c38: 6f f0 df f6 j -148 -80000c3c: 93 96 27 00 slli a3, a5, 2 -80000c40: 97 02 00 00 auipc t0, 0 -80000c44: b3 86 56 00 add a3, a3, t0 -80000c48: 93 82 00 00 mv t0, ra -80000c4c: e7 80 06 fa jalr -96(a3) -80000c50: 93 80 02 00 mv ra, t0 -80000c54: 93 87 07 ff addi a5, a5, -16 -80000c58: 33 07 f7 40 sub a4, a4, a5 -80000c5c: 33 06 f6 00 add a2, a2, a5 -80000c60: e3 78 c3 f6 bgeu t1, a2, -144 -80000c64: 6f f0 df f3 j -196 +80000940 sqrtf: +80000940: 13 01 01 fe addi sp, sp, -32 +80000944: 27 26 81 00 fsw fs0, 12(sp) +80000948: 23 2e 11 00 sw ra, 28(sp) +8000094c: 53 04 a5 20 fmv.s fs0, fa0 +80000950: 27 24 91 00 fsw fs1, 8(sp) +80000954: ef 00 00 06 jal 96 +80000958: b7 17 00 80 lui a5, 524289 +8000095c: 03 a7 87 43 lw a4, 1080(a5) +80000960: 93 07 f0 ff addi a5, zero, -1 +80000964: 63 0c f7 00 beq a4, a5, 24 +80000968: d3 27 84 a0 feq.s a5, fs0, fs0 +8000096c: 63 88 07 00 beqz a5, 16 +80000970: d3 04 00 f0 fmv.w.x fs1, zero +80000974: d3 17 94 a0 flt.s a5, fs0, fs1 +80000978: 63 9c 07 00 bnez a5, 24 +8000097c: 83 20 c1 01 lw ra, 28(sp) +80000980: 07 24 c1 00 flw fs0, 12(sp) +80000984: 87 24 81 00 flw fs1, 8(sp) +80000988: 13 01 01 02 addi sp, sp, 32 +8000098c: 67 80 00 00 ret +80000990: ef 00 40 14 jal 324 +80000994: 83 20 c1 01 lw ra, 28(sp) +80000998: 93 07 10 02 addi a5, zero, 33 +8000099c: 23 20 f5 00 sw a5, 0(a0) +800009a0: 53 f5 94 18 fdiv.s fa0, fs1, fs1 +800009a4: 07 24 c1 00 flw fs0, 12(sp) +800009a8: 87 24 81 00 flw fs1, 8(sp) +800009ac: 13 01 01 02 addi sp, sp, 32 +800009b0: 67 80 00 00 ret -80000c68 __register_exitproc: -80000c68: b7 17 00 80 lui a5, 524289 -80000c6c: 03 a7 07 43 lw a4, 1072(a5) -80000c70: 83 27 87 14 lw a5, 328(a4) -80000c74: 63 8c 07 04 beqz a5, 88 -80000c78: 03 a7 47 00 lw a4, 4(a5) -80000c7c: 13 08 f0 01 addi a6, zero, 31 -80000c80: 63 4e e8 06 blt a6, a4, 124 -80000c84: 13 18 27 00 slli a6, a4, 2 -80000c88: 63 06 05 02 beqz a0, 44 -80000c8c: 33 83 07 01 add t1, a5, a6 -80000c90: 23 24 c3 08 sw a2, 136(t1) -80000c94: 83 a8 87 18 lw a7, 392(a5) -80000c98: 13 06 10 00 addi a2, zero, 1 -80000c9c: 33 16 e6 00 sll a2, a2, a4 -80000ca0: b3 e8 c8 00 or a7, a7, a2 -80000ca4: 23 a4 17 19 sw a7, 392(a5) -80000ca8: 23 24 d3 10 sw a3, 264(t1) -80000cac: 93 06 20 00 addi a3, zero, 2 -80000cb0: 63 04 d5 02 beq a0, a3, 40 -80000cb4: 13 07 17 00 addi a4, a4, 1 -80000cb8: 23 a2 e7 00 sw a4, 4(a5) -80000cbc: b3 87 07 01 add a5, a5, a6 -80000cc0: 23 a4 b7 00 sw a1, 8(a5) -80000cc4: 13 05 00 00 mv a0, zero -80000cc8: 67 80 00 00 ret -80000ccc: 93 07 c7 14 addi a5, a4, 332 -80000cd0: 23 24 f7 14 sw a5, 328(a4) -80000cd4: 6f f0 5f fa j -92 -80000cd8: 83 a6 c7 18 lw a3, 396(a5) -80000cdc: 13 07 17 00 addi a4, a4, 1 -80000ce0: 23 a2 e7 00 sw a4, 4(a5) -80000ce4: 33 e6 c6 00 or a2, a3, a2 -80000ce8: 23 a6 c7 18 sw a2, 396(a5) -80000cec: b3 87 07 01 add a5, a5, a6 -80000cf0: 23 a4 b7 00 sw a1, 8(a5) -80000cf4: 13 05 00 00 mv a0, zero -80000cf8: 67 80 00 00 ret -80000cfc: 13 05 f0 ff addi a0, zero, -1 -80000d00: 67 80 00 00 ret +800009b4 __ieee754_sqrtf: +800009b4: d3 06 05 e0 fmv.x.w a3, fa0 +800009b8: 37 07 80 7f lui a4, 522240 +800009bc: 93 97 16 00 slli a5, a3, 1 +800009c0: 93 d7 17 00 srli a5, a5, 1 +800009c4: 63 f2 e7 0c bgeu a5, a4, 196 +800009c8: 53 05 05 e0 fmv.x.w a0, fa0 +800009cc: 63 8a 07 0a beqz a5, 180 +800009d0: 93 87 06 00 mv a5, a3 +800009d4: 63 c6 06 0c bltz a3, 204 +800009d8: 33 76 d7 00 and a2, a4, a3 +800009dc: 13 d7 76 41 srai a4, a3, 23 +800009e0: 63 14 06 02 bnez a2, 40 +800009e4: 37 06 80 00 lui a2, 2048 +800009e8: b3 76 d6 00 and a3, a2, a3 +800009ec: 63 94 06 0c bnez a3, 200 +800009f0: 93 97 17 00 slli a5, a5, 1 +800009f4: 93 95 87 00 slli a1, a5, 8 +800009f8: 13 86 06 00 mv a2, a3 +800009fc: 93 86 16 00 addi a3, a3, 1 +80000a00: e3 d8 05 fe bgez a1, -16 +80000a04: 33 07 c7 40 sub a4, a4, a2 +80000a08: b7 06 80 00 lui a3, 2048 +80000a0c: 13 86 f6 ff addi a2, a3, -1 +80000a10: b3 f7 c7 00 and a5, a5, a2 +80000a14: 13 07 17 f8 addi a4, a4, -127 +80000a18: b3 e6 d7 00 or a3, a5, a3 +80000a1c: 13 76 17 00 andi a2, a4, 1 +80000a20: 93 97 16 00 slli a5, a3, 1 +80000a24: 63 1a 06 06 bnez a2, 116 +80000a28: 13 58 17 40 srai a6, a4, 1 +80000a2c: 93 06 90 01 addi a3, zero, 25 +80000a30: 13 05 00 00 mv a0, zero +80000a34: 93 05 00 00 mv a1, zero +80000a38: 37 07 00 01 lui a4, 4096 +80000a3c: 33 86 e5 00 add a2, a1, a4 +80000a40: 93 86 f6 ff addi a3, a3, -1 +80000a44: 63 c8 c7 00 blt a5, a2, 16 +80000a48: b3 05 e6 00 add a1, a2, a4 +80000a4c: b3 87 c7 40 sub a5, a5, a2 +80000a50: 33 05 e5 00 add a0, a0, a4 +80000a54: 93 97 17 00 slli a5, a5, 1 +80000a58: 13 57 17 00 srli a4, a4, 1 +80000a5c: e3 90 06 fe bnez a3, -32 +80000a60: 63 86 07 00 beqz a5, 12 +80000a64: 13 05 15 00 addi a0, a0, 1 +80000a68: 13 75 e5 ff andi a0, a0, -2 +80000a6c: 13 55 15 40 srai a0, a0, 1 +80000a70: b7 07 00 3f lui a5, 258048 +80000a74: 33 05 f5 00 add a0, a0, a5 +80000a78: 13 17 78 01 slli a4, a6, 23 +80000a7c: 33 05 a7 00 add a0, a4, a0 +80000a80: 53 05 05 f0 fmv.w.x fa0, a0 +80000a84: 67 80 00 00 ret +80000a88: c3 77 a5 50 fmadd.s fa5, fa0, fa0, fa0 +80000a8c: 53 85 07 e0 fmv.x.w a0, fa5 +80000a90: 53 05 05 f0 fmv.w.x fa0, a0 +80000a94: 67 80 00 00 ret +80000a98: 93 97 26 00 slli a5, a3, 2 +80000a9c: 6f f0 df f8 j -116 +80000aa0: d3 77 a5 08 fsub.s fa5, fa0, fa0 +80000aa4: d3 f7 f7 18 fdiv.s fa5, fa5, fa5 +80000aa8: 53 85 07 e0 fmv.x.w a0, fa5 +80000aac: 53 05 05 f0 fmv.w.x fa0, a0 +80000ab0: 67 80 00 00 ret +80000ab4: 13 06 f0 ff addi a2, zero, -1 +80000ab8: 33 07 c7 40 sub a4, a4, a2 +80000abc: 6f f0 df f4 j -180 -80000d04 __call_exitprocs: -80000d04: 13 01 01 fd addi sp, sp, -48 -80000d08: b7 17 00 80 lui a5, 524289 -80000d0c: 23 2c 41 01 sw s4, 24(sp) -80000d10: 03 aa 07 43 lw s4, 1072(a5) -80000d14: 23 20 21 03 sw s2, 32(sp) -80000d18: 23 26 11 02 sw ra, 44(sp) -80000d1c: 03 29 8a 14 lw s2, 328(s4) -80000d20: 23 24 81 02 sw s0, 40(sp) -80000d24: 23 22 91 02 sw s1, 36(sp) -80000d28: 23 2e 31 01 sw s3, 28(sp) -80000d2c: 23 2a 51 01 sw s5, 20(sp) -80000d30: 23 28 61 01 sw s6, 16(sp) -80000d34: 23 26 71 01 sw s7, 12(sp) -80000d38: 23 24 81 01 sw s8, 8(sp) -80000d3c: 63 00 09 04 beqz s2, 64 -80000d40: 13 0b 05 00 mv s6, a0 -80000d44: 93 8b 05 00 mv s7, a1 -80000d48: 93 0a 10 00 addi s5, zero, 1 -80000d4c: 93 09 f0 ff addi s3, zero, -1 -80000d50: 83 24 49 00 lw s1, 4(s2) -80000d54: 13 84 f4 ff addi s0, s1, -1 -80000d58: 63 42 04 02 bltz s0, 36 -80000d5c: 93 94 24 00 slli s1, s1, 2 -80000d60: b3 04 99 00 add s1, s2, s1 -80000d64: 63 84 0b 04 beqz s7, 72 -80000d68: 83 a7 44 10 lw a5, 260(s1) -80000d6c: 63 80 77 05 beq a5, s7, 64 -80000d70: 13 04 f4 ff addi s0, s0, -1 -80000d74: 93 84 c4 ff addi s1, s1, -4 -80000d78: e3 16 34 ff bne s0, s3, -20 -80000d7c: 83 20 c1 02 lw ra, 44(sp) -80000d80: 03 24 81 02 lw s0, 40(sp) -80000d84: 83 24 41 02 lw s1, 36(sp) -80000d88: 03 29 01 02 lw s2, 32(sp) -80000d8c: 83 29 c1 01 lw s3, 28(sp) -80000d90: 03 2a 81 01 lw s4, 24(sp) -80000d94: 83 2a 41 01 lw s5, 20(sp) -80000d98: 03 2b 01 01 lw s6, 16(sp) -80000d9c: 83 2b c1 00 lw s7, 12(sp) -80000da0: 03 2c 81 00 lw s8, 8(sp) -80000da4: 13 01 01 03 addi sp, sp, 48 -80000da8: 67 80 00 00 ret -80000dac: 83 27 49 00 lw a5, 4(s2) -80000db0: 83 a6 44 00 lw a3, 4(s1) -80000db4: 93 87 f7 ff addi a5, a5, -1 -80000db8: 63 8e 87 04 beq a5, s0, 92 -80000dbc: 23 a2 04 00 sw zero, 4(s1) -80000dc0: e3 88 06 fa beqz a3, -80 -80000dc4: 83 27 89 18 lw a5, 392(s2) -80000dc8: 33 97 8a 00 sll a4, s5, s0 -80000dcc: 03 2c 49 00 lw s8, 4(s2) -80000dd0: b3 77 f7 00 and a5, a4, a5 -80000dd4: 63 92 07 02 bnez a5, 36 -80000dd8: e7 80 06 00 jalr a3 -80000ddc: 03 27 49 00 lw a4, 4(s2) -80000de0: 83 27 8a 14 lw a5, 328(s4) -80000de4: 63 14 87 01 bne a4, s8, 8 -80000de8: e3 04 f9 f8 beq s2, a5, -120 -80000dec: e3 88 07 f8 beqz a5, -112 -80000df0: 13 89 07 00 mv s2, a5 -80000df4: 6f f0 df f5 j -164 -80000df8: 83 27 c9 18 lw a5, 396(s2) -80000dfc: 83 a5 44 08 lw a1, 132(s1) -80000e00: 33 77 f7 00 and a4, a4, a5 -80000e04: 63 1c 07 00 bnez a4, 24 -80000e08: 13 05 0b 00 mv a0, s6 -80000e0c: e7 80 06 00 jalr a3 -80000e10: 6f f0 df fc j -52 -80000e14: 23 22 89 00 sw s0, 4(s2) -80000e18: 6f f0 9f fa j -88 -80000e1c: 13 85 05 00 mv a0, a1 -80000e20: e7 80 06 00 jalr a3 -80000e24: 6f f0 9f fb j -72 +80000ac0 atexit: +80000ac0: 93 05 05 00 mv a1, a0 +80000ac4: 93 06 00 00 mv a3, zero +80000ac8: 13 06 00 00 mv a2, zero +80000acc: 13 05 00 00 mv a0, zero +80000ad0: 6f 00 80 21 j 536 + +80000ad4 __errno: +80000ad4: b7 17 00 80 lui a5, 524289 +80000ad8: 03 a5 47 43 lw a0, 1076(a5) +80000adc: 67 80 00 00 ret + +80000ae0 exit: +80000ae0: 13 01 01 ff addi sp, sp, -16 +80000ae4: 93 05 00 00 mv a1, zero +80000ae8: 23 24 81 00 sw s0, 8(sp) +80000aec: 23 26 11 00 sw ra, 12(sp) +80000af0: 13 04 05 00 mv s0, a0 +80000af4: ef 00 00 29 jal 656 +80000af8: b7 17 00 80 lui a5, 524289 +80000afc: 03 a5 07 43 lw a0, 1072(a5) +80000b00: 83 27 c5 03 lw a5, 60(a0) +80000b04: 63 84 07 00 beqz a5, 8 +80000b08: e7 80 07 00 jalr a5 +80000b0c: 13 05 04 00 mv a0, s0 +80000b10: ef f0 df d6 jal -660 + +80000b14 __libc_fini_array: +80000b14: 13 01 01 ff addi sp, sp, -16 +80000b18: 23 24 81 00 sw s0, 8(sp) +80000b1c: b7 17 00 80 lui a5, 524289 +80000b20: 37 14 00 80 lui s0, 524289 +80000b24: 13 04 44 00 addi s0, s0, 4 +80000b28: 93 87 47 00 addi a5, a5, 4 +80000b2c: b3 87 87 40 sub a5, a5, s0 +80000b30: 23 22 91 00 sw s1, 4(sp) +80000b34: 23 26 11 00 sw ra, 12(sp) +80000b38: 93 d4 27 40 srai s1, a5, 2 +80000b3c: 63 80 04 02 beqz s1, 32 +80000b40: 93 87 c7 ff addi a5, a5, -4 +80000b44: 33 84 87 00 add s0, a5, s0 +80000b48: 83 27 04 00 lw a5, 0(s0) +80000b4c: 93 84 f4 ff addi s1, s1, -1 +80000b50: 13 04 c4 ff addi s0, s0, -4 +80000b54: e7 80 07 00 jalr a5 +80000b58: e3 98 04 fe bnez s1, -16 +80000b5c: 83 20 c1 00 lw ra, 12(sp) +80000b60: 03 24 81 00 lw s0, 8(sp) +80000b64: 83 24 41 00 lw s1, 4(sp) +80000b68: 13 01 01 01 addi sp, sp, 16 +80000b6c: 67 80 00 00 ret + +80000b70 __libc_init_array: +80000b70: 13 01 01 ff addi sp, sp, -16 +80000b74: 23 24 81 00 sw s0, 8(sp) +80000b78: 23 20 21 01 sw s2, 0(sp) +80000b7c: 37 14 00 80 lui s0, 524289 +80000b80: 37 19 00 80 lui s2, 524289 +80000b84: 93 07 04 00 mv a5, s0 +80000b88: 13 09 09 00 mv s2, s2 +80000b8c: 33 09 f9 40 sub s2, s2, a5 +80000b90: 23 26 11 00 sw ra, 12(sp) +80000b94: 23 22 91 00 sw s1, 4(sp) +80000b98: 13 59 29 40 srai s2, s2, 2 +80000b9c: 63 00 09 02 beqz s2, 32 +80000ba0: 13 04 04 00 mv s0, s0 +80000ba4: 93 04 00 00 mv s1, zero +80000ba8: 83 27 04 00 lw a5, 0(s0) +80000bac: 93 84 14 00 addi s1, s1, 1 +80000bb0: 13 04 44 00 addi s0, s0, 4 +80000bb4: e7 80 07 00 jalr a5 +80000bb8: e3 18 99 fe bne s2, s1, -16 +80000bbc: 37 14 00 80 lui s0, 524289 +80000bc0: 37 19 00 80 lui s2, 524289 +80000bc4: 93 07 04 00 mv a5, s0 +80000bc8: 13 09 49 00 addi s2, s2, 4 +80000bcc: 33 09 f9 40 sub s2, s2, a5 +80000bd0: 13 59 29 40 srai s2, s2, 2 +80000bd4: 63 00 09 02 beqz s2, 32 +80000bd8: 13 04 04 00 mv s0, s0 +80000bdc: 93 04 00 00 mv s1, zero +80000be0: 83 27 04 00 lw a5, 0(s0) +80000be4: 93 84 14 00 addi s1, s1, 1 +80000be8: 13 04 44 00 addi s0, s0, 4 +80000bec: e7 80 07 00 jalr a5 +80000bf0: e3 18 99 fe bne s2, s1, -16 +80000bf4: 83 20 c1 00 lw ra, 12(sp) +80000bf8: 03 24 81 00 lw s0, 8(sp) +80000bfc: 83 24 41 00 lw s1, 4(sp) +80000c00: 03 29 01 00 lw s2, 0(sp) +80000c04: 13 01 01 01 addi sp, sp, 16 +80000c08: 67 80 00 00 ret + +80000c0c memset: +80000c0c: 13 03 f0 00 addi t1, zero, 15 +80000c10: 13 07 05 00 mv a4, a0 +80000c14: 63 7e c3 02 bgeu t1, a2, 60 +80000c18: 93 77 f7 00 andi a5, a4, 15 +80000c1c: 63 90 07 0a bnez a5, 160 +80000c20: 63 92 05 08 bnez a1, 132 +80000c24: 93 76 06 ff andi a3, a2, -16 +80000c28: 13 76 f6 00 andi a2, a2, 15 +80000c2c: b3 86 e6 00 add a3, a3, a4 +80000c30: 23 20 b7 00 sw a1, 0(a4) +80000c34: 23 22 b7 00 sw a1, 4(a4) +80000c38: 23 24 b7 00 sw a1, 8(a4) +80000c3c: 23 26 b7 00 sw a1, 12(a4) +80000c40: 13 07 07 01 addi a4, a4, 16 +80000c44: e3 66 d7 fe bltu a4, a3, -20 +80000c48: 63 14 06 00 bnez a2, 8 +80000c4c: 67 80 00 00 ret +80000c50: b3 06 c3 40 sub a3, t1, a2 +80000c54: 93 96 26 00 slli a3, a3, 2 +80000c58: 97 02 00 00 auipc t0, 0 +80000c5c: b3 86 56 00 add a3, a3, t0 +80000c60: 67 80 c6 00 jr 12(a3) +80000c64: 23 07 b7 00 sb a1, 14(a4) +80000c68: a3 06 b7 00 sb a1, 13(a4) +80000c6c: 23 06 b7 00 sb a1, 12(a4) +80000c70: a3 05 b7 00 sb a1, 11(a4) +80000c74: 23 05 b7 00 sb a1, 10(a4) +80000c78: a3 04 b7 00 sb a1, 9(a4) +80000c7c: 23 04 b7 00 sb a1, 8(a4) +80000c80: a3 03 b7 00 sb a1, 7(a4) +80000c84: 23 03 b7 00 sb a1, 6(a4) +80000c88: a3 02 b7 00 sb a1, 5(a4) +80000c8c: 23 02 b7 00 sb a1, 4(a4) +80000c90: a3 01 b7 00 sb a1, 3(a4) +80000c94: 23 01 b7 00 sb a1, 2(a4) +80000c98: a3 00 b7 00 sb a1, 1(a4) +80000c9c: 23 00 b7 00 sb a1, 0(a4) +80000ca0: 67 80 00 00 ret +80000ca4: 93 f5 f5 0f andi a1, a1, 255 +80000ca8: 93 96 85 00 slli a3, a1, 8 +80000cac: b3 e5 d5 00 or a1, a1, a3 +80000cb0: 93 96 05 01 slli a3, a1, 16 +80000cb4: b3 e5 d5 00 or a1, a1, a3 +80000cb8: 6f f0 df f6 j -148 +80000cbc: 93 96 27 00 slli a3, a5, 2 +80000cc0: 97 02 00 00 auipc t0, 0 +80000cc4: b3 86 56 00 add a3, a3, t0 +80000cc8: 93 82 00 00 mv t0, ra +80000ccc: e7 80 06 fa jalr -96(a3) +80000cd0: 93 80 02 00 mv ra, t0 +80000cd4: 93 87 07 ff addi a5, a5, -16 +80000cd8: 33 07 f7 40 sub a4, a4, a5 +80000cdc: 33 06 f6 00 add a2, a2, a5 +80000ce0: e3 78 c3 f6 bgeu t1, a2, -144 +80000ce4: 6f f0 df f3 j -196 + +80000ce8 __register_exitproc: +80000ce8: b7 17 00 80 lui a5, 524289 +80000cec: 03 a7 07 43 lw a4, 1072(a5) +80000cf0: 83 27 87 14 lw a5, 328(a4) +80000cf4: 63 8c 07 04 beqz a5, 88 +80000cf8: 03 a7 47 00 lw a4, 4(a5) +80000cfc: 13 08 f0 01 addi a6, zero, 31 +80000d00: 63 4e e8 06 blt a6, a4, 124 +80000d04: 13 18 27 00 slli a6, a4, 2 +80000d08: 63 06 05 02 beqz a0, 44 +80000d0c: 33 83 07 01 add t1, a5, a6 +80000d10: 23 24 c3 08 sw a2, 136(t1) +80000d14: 83 a8 87 18 lw a7, 392(a5) +80000d18: 13 06 10 00 addi a2, zero, 1 +80000d1c: 33 16 e6 00 sll a2, a2, a4 +80000d20: b3 e8 c8 00 or a7, a7, a2 +80000d24: 23 a4 17 19 sw a7, 392(a5) +80000d28: 23 24 d3 10 sw a3, 264(t1) +80000d2c: 93 06 20 00 addi a3, zero, 2 +80000d30: 63 04 d5 02 beq a0, a3, 40 +80000d34: 13 07 17 00 addi a4, a4, 1 +80000d38: 23 a2 e7 00 sw a4, 4(a5) +80000d3c: b3 87 07 01 add a5, a5, a6 +80000d40: 23 a4 b7 00 sw a1, 8(a5) +80000d44: 13 05 00 00 mv a0, zero +80000d48: 67 80 00 00 ret +80000d4c: 93 07 c7 14 addi a5, a4, 332 +80000d50: 23 24 f7 14 sw a5, 328(a4) +80000d54: 6f f0 5f fa j -92 +80000d58: 83 a6 c7 18 lw a3, 396(a5) +80000d5c: 13 07 17 00 addi a4, a4, 1 +80000d60: 23 a2 e7 00 sw a4, 4(a5) +80000d64: 33 e6 c6 00 or a2, a3, a2 +80000d68: 23 a6 c7 18 sw a2, 396(a5) +80000d6c: b3 87 07 01 add a5, a5, a6 +80000d70: 23 a4 b7 00 sw a1, 8(a5) +80000d74: 13 05 00 00 mv a0, zero +80000d78: 67 80 00 00 ret +80000d7c: 13 05 f0 ff addi a0, zero, -1 +80000d80: 67 80 00 00 ret + +80000d84 __call_exitprocs: +80000d84: 13 01 01 fd addi sp, sp, -48 +80000d88: b7 17 00 80 lui a5, 524289 +80000d8c: 23 2c 41 01 sw s4, 24(sp) +80000d90: 03 aa 07 43 lw s4, 1072(a5) +80000d94: 23 20 21 03 sw s2, 32(sp) +80000d98: 23 26 11 02 sw ra, 44(sp) +80000d9c: 03 29 8a 14 lw s2, 328(s4) +80000da0: 23 24 81 02 sw s0, 40(sp) +80000da4: 23 22 91 02 sw s1, 36(sp) +80000da8: 23 2e 31 01 sw s3, 28(sp) +80000dac: 23 2a 51 01 sw s5, 20(sp) +80000db0: 23 28 61 01 sw s6, 16(sp) +80000db4: 23 26 71 01 sw s7, 12(sp) +80000db8: 23 24 81 01 sw s8, 8(sp) +80000dbc: 63 00 09 04 beqz s2, 64 +80000dc0: 13 0b 05 00 mv s6, a0 +80000dc4: 93 8b 05 00 mv s7, a1 +80000dc8: 93 0a 10 00 addi s5, zero, 1 +80000dcc: 93 09 f0 ff addi s3, zero, -1 +80000dd0: 83 24 49 00 lw s1, 4(s2) +80000dd4: 13 84 f4 ff addi s0, s1, -1 +80000dd8: 63 42 04 02 bltz s0, 36 +80000ddc: 93 94 24 00 slli s1, s1, 2 +80000de0: b3 04 99 00 add s1, s2, s1 +80000de4: 63 84 0b 04 beqz s7, 72 +80000de8: 83 a7 44 10 lw a5, 260(s1) +80000dec: 63 80 77 05 beq a5, s7, 64 +80000df0: 13 04 f4 ff addi s0, s0, -1 +80000df4: 93 84 c4 ff addi s1, s1, -4 +80000df8: e3 16 34 ff bne s0, s3, -20 +80000dfc: 83 20 c1 02 lw ra, 44(sp) +80000e00: 03 24 81 02 lw s0, 40(sp) +80000e04: 83 24 41 02 lw s1, 36(sp) +80000e08: 03 29 01 02 lw s2, 32(sp) +80000e0c: 83 29 c1 01 lw s3, 28(sp) +80000e10: 03 2a 81 01 lw s4, 24(sp) +80000e14: 83 2a 41 01 lw s5, 20(sp) +80000e18: 03 2b 01 01 lw s6, 16(sp) +80000e1c: 83 2b c1 00 lw s7, 12(sp) +80000e20: 03 2c 81 00 lw s8, 8(sp) +80000e24: 13 01 01 03 addi sp, sp, 48 +80000e28: 67 80 00 00 ret +80000e2c: 83 27 49 00 lw a5, 4(s2) +80000e30: 83 a6 44 00 lw a3, 4(s1) +80000e34: 93 87 f7 ff addi a5, a5, -1 +80000e38: 63 8e 87 04 beq a5, s0, 92 +80000e3c: 23 a2 04 00 sw zero, 4(s1) +80000e40: e3 88 06 fa beqz a3, -80 +80000e44: 83 27 89 18 lw a5, 392(s2) +80000e48: 33 97 8a 00 sll a4, s5, s0 +80000e4c: 03 2c 49 00 lw s8, 4(s2) +80000e50: b3 77 f7 00 and a5, a4, a5 +80000e54: 63 92 07 02 bnez a5, 36 +80000e58: e7 80 06 00 jalr a3 +80000e5c: 03 27 49 00 lw a4, 4(s2) +80000e60: 83 27 8a 14 lw a5, 328(s4) +80000e64: 63 14 87 01 bne a4, s8, 8 +80000e68: e3 04 f9 f8 beq s2, a5, -120 +80000e6c: e3 88 07 f8 beqz a5, -112 +80000e70: 13 89 07 00 mv s2, a5 +80000e74: 6f f0 df f5 j -164 +80000e78: 83 27 c9 18 lw a5, 396(s2) +80000e7c: 83 a5 44 08 lw a1, 132(s1) +80000e80: 33 77 f7 00 and a4, a4, a5 +80000e84: 63 1c 07 00 bnez a4, 24 +80000e88: 13 05 0b 00 mv a0, s6 +80000e8c: e7 80 06 00 jalr a3 +80000e90: 6f f0 df fc j -52 +80000e94: 23 22 89 00 sw s0, 4(s2) +80000e98: 6f f0 9f fa j -88 +80000e9c: 13 85 05 00 mv a0, a1 +80000ea0: e7 80 06 00 jalr a3 +80000ea4: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1178,7 +1210,7 @@ Disassembly of section .symtab: ae: f1 ff b0: 0e 00 b2: 00 00 - b4: 3c 08 + b4: bc 08 b6: 00 80 b8: 00 00 ba: 00 00 @@ -1206,7 +1238,7 @@ Disassembly of section .symtab: fc: 04 00 fe: f1 ff 100: 63 00 00 00 beqz zero, 0 - 104: 04 04 + 104: 84 04 106: 00 80 108: 24 00 10a: 00 00 @@ -1311,7 +1343,7 @@ Disassembly of section .symtab: 21c: 00 00 21e: 03 00 2c 01 lb zero, 18(s8) 222: 00 00 - 224: 90 08 + 224: 10 09 226: 00 80 228: 00 00 22a: 00 00 @@ -1323,61 +1355,62 @@ Disassembly of section .symtab: 23e: f1 ff 240: 45 01 242: 00 00 - 244: 40 08 + 244: c0 08 246: 00 80 248: 00 00 24a: 00 00 24c: 12 00 24e: 02 00 250: 4f 01 00 00 fnmadd.s ft2, ft0, ft0, ft0, rne - 254: 54 0a + 254: d4 0a 256: 00 80 258: 0c 00 25a: 00 00 25c: 12 00 25e: 02 00 260: 57 01 00 00 - 264: 68 00 - 266: 00 80 - 268: 4c 01 + 264: 00 04 + 266: 00 00 + 268: 00 00 26a: 00 00 - 26c: 12 00 - 26e: 02 00 - 270: 6d 01 + 26c: 10 00 + 26e: f1 ff + 270: 64 01 272: 00 00 - 274: 00 04 - 276: 00 00 - 278: 00 00 + 274: 3c 14 + 276: 00 80 + 278: 40 00 27a: 00 00 - 27c: 10 00 - 27e: f1 ff - 280: 7a 01 + 27c: 11 00 + 27e: 07 00 72 01 282: 00 00 - 284: 3c 14 + 284: c8 08 286: 00 80 - 288: 20 00 + 288: 00 00 28a: 00 00 - 28c: 11 00 - 28e: 07 00 88 01 + 28c: 12 00 + 28e: 02 00 + 290: 79 01 292: 00 00 - 294: 48 08 + 294: 30 14 296: 00 80 298: 00 00 29a: 00 00 - 29c: 12 00 - 29e: 02 00 - 2a0: 8f 01 00 00 - 2a4: 30 14 + 29c: 10 00 + 29e: 05 00 + 2a0: b8 02 + 2a2: 00 00 + 2a4: 40 09 2a6: 00 80 - 2a8: 00 00 + 2a8: 74 00 2aa: 00 00 - 2ac: 10 00 - 2ae: 05 00 - 2b0: b8 02 + 2ac: 12 00 + 2ae: 02 00 + 2b0: 89 01 2b2: 00 00 - 2b4: c0 08 + 2b4: 68 00 2b6: 00 80 - 2b8: 74 00 + 2b8: 48 01 2ba: 00 00 2bc: 12 00 2be: 02 00 @@ -1390,7 +1423,7 @@ Disassembly of section .symtab: 2ce: f1 ff 2d0: b0 01 2d2: 00 00 - 2d4: 60 08 + 2d4: e0 08 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1398,7 +1431,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: b8 01 2e2: 00 00 - 2e4: a0 08 + 2e4: 20 09 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1406,15 +1439,15 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: c5 01 2f2: 00 00 - 2f4: 4c 02 + 2f4: 48 02 2f6: 00 80 - 2f8: 88 01 + 2f8: 0c 02 2fa: 00 00 2fc: 12 00 2fe: 02 00 300: d2 01 302: 00 00 - 304: 58 08 + 304: d8 08 306: 00 80 308: 00 00 30a: 00 00 @@ -1436,7 +1469,7 @@ Disassembly of section .symtab: 32e: 05 00 330: fe 01 332: 00 00 - 334: f0 0a + 334: 70 0b 336: 00 80 338: 9c 00 33a: 00 00 @@ -1444,14 +1477,14 @@ Disassembly of section .symtab: 33e: 02 00 340: 10 02 342: 00 00 - 344: 98 08 + 344: 18 09 346: 00 80 348: 00 00 34a: 00 00 34c: 12 00 34e: 02 00 350: 1f 02 00 00 - 354: 68 08 + 354: e8 08 356: 00 80 358: 00 00 35a: 00 00 @@ -1459,14 +1492,14 @@ Disassembly of section .symtab: 35e: 02 00 360: 2a 02 362: 00 00 - 364: 78 08 + 364: f8 08 366: 00 80 368: 00 00 36a: 00 00 36c: 12 00 36e: 02 00 370: 37 02 00 00 lui tp, 0 - 374: 94 0a + 374: 14 0b 376: 00 80 378: 5c 00 37a: 00 00 @@ -1482,14 +1515,14 @@ Disassembly of section .symtab: 38e: f1 ff 390: 55 02 392: 00 00 - 394: 04 08 + 394: 84 08 396: 00 80 398: 00 00 39a: 00 00 39c: 12 00 39e: 02 00 3a0: 5f 02 00 00 - 3a4: 50 08 + 3a4: d0 08 3a6: 00 80 3a8: 00 00 3aa: 00 00 @@ -1497,7 +1530,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 6a 02 3b2: 00 00 - 3b4: 04 0d + 3b4: 84 0d 3b6: 00 80 3b8: 24 01 3ba: 00 00 @@ -1512,14 +1545,14 @@ Disassembly of section .symtab: 3cc: 12 00 3ce: 01 00 3d0: 7b 02 00 00 - 3d4: 68 0c + 3d4: e8 0c 3d6: 00 80 3d8: 9c 00 3da: 00 00 3dc: 12 00 3de: 02 00 3e0: 8f 02 00 00 - 3e4: 5c 14 + 3e4: 7c 14 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1533,7 +1566,7 @@ Disassembly of section .symtab: 3fc: 10 00 3fe: 06 00 400: a7 02 00 00 - 404: 8c 0b + 404: 0c 0c 406: 00 80 408: dc 00 40a: 00 00 @@ -1541,7 +1574,7 @@ Disassembly of section .symtab: 40e: 02 00 410: ae 02 412: 00 00 - 414: 34 09 + 414: b4 09 416: 00 80 418: 0c 01 41a: 00 00 @@ -1549,21 +1582,21 @@ Disassembly of section .symtab: 41e: 02 00 420: be 02 422: 00 00 - 424: d4 03 + 424: 54 04 426: 00 80 428: 30 00 42a: 00 00 42c: 12 00 42e: 02 00 430: c3 02 00 00 fmadd.s ft5, ft0, ft0, ft0, rne - 434: b8 06 + 434: 38 07 436: 00 80 438: 44 01 43a: 00 00 43c: 12 00 43e: 02 00 440: ef 02 00 00 jal t0, 0 - 444: b0 08 + 444: 30 09 446: 00 80 448: 00 00 44a: 00 00 @@ -1571,7 +1604,7 @@ Disassembly of section .symtab: 44e: 02 00 450: fd 02 452: 00 00 - 454: 40 0a + 454: c0 0a 456: 00 80 458: 14 00 45a: 00 00 @@ -1587,7 +1620,7 @@ Disassembly of section .symtab: 46e: 05 00 470: 04 03 472: 00 00 - 474: 88 08 + 474: 08 09 476: 00 80 478: 00 00 47a: 00 00 @@ -1595,91 +1628,91 @@ Disassembly of section .symtab: 47e: 02 00 480: 12 03 482: 00 00 - 484: a8 08 + 484: 28 09 486: 00 80 488: 00 00 48a: 00 00 48c: 12 00 48e: 02 00 490: 1f 03 00 00 - 494: 70 08 + 494: f0 08 496: 00 80 498: 00 00 49a: 00 00 49c: 12 00 49e: 02 00 4a0: 2b 03 00 00 - 4a4: b4 01 + 4a4: ec 05 4a6: 00 80 - 4a8: 98 00 + 4a8: 4c 01 4aa: 00 00 4ac: 12 00 4ae: 02 00 - 4b0: 44 03 + 4b0: 52 03 4b2: 00 00 - 4b4: 6c 05 + 4b4: 08 10 4b6: 00 80 - 4b8: 4c 01 + 4b8: 00 00 4ba: 00 00 - 4bc: 12 00 - 4be: 02 00 - 4c0: 6b 03 00 00 - 4c4: 08 10 + 4bc: 10 00 + 4be: 04 00 + 4c0: 61 03 + 4c2: 00 00 + 4c4: 38 14 4c6: 00 80 4c8: 00 00 4ca: 00 00 4cc: 10 00 - 4ce: 04 00 - 4d0: 7a 03 + 4ce: 05 00 + 4d0: c6 00 4d2: 00 00 - 4d4: 38 14 + 4d4: 7c 14 4d6: 00 80 4d8: 00 00 4da: 00 00 4dc: 10 00 - 4de: 05 00 - 4e0: c6 00 + 4de: 07 00 68 03 4e2: 00 00 - 4e4: 5c 14 + 4e4: a8 04 4e6: 00 80 - 4e8: 00 00 + 4e8: 44 01 4ea: 00 00 - 4ec: 10 00 - 4ee: 07 00 81 03 + 4ec: 12 00 + 4ee: 02 00 + 4f0: b4 03 4f2: 00 00 - 4f4: 28 04 + 4f4: e0 0a 4f6: 00 80 - 4f8: 44 01 + 4f8: 34 00 4fa: 00 00 4fc: 12 00 4fe: 02 00 - 500: ad 03 + 500: 85 03 502: 00 00 - 504: 60 0a + 504: b0 01 506: 00 80 - 508: 34 00 + 508: 98 00 50a: 00 00 50c: 12 00 50e: 02 00 - 510: 9e 03 + 510: a5 03 512: 00 00 - 514: 80 08 + 514: 00 09 516: 00 80 518: 00 00 51a: 00 00 51c: 12 00 51e: 02 00 - 520: ac 03 - 522: 00 00 - 524: fc 07 + 520: b3 03 00 00 add t2, zero, zero + 524: 7c 08 526: 00 80 528: 00 00 52a: 00 00 52c: 12 00 52e: 02 00 - 530: b2 03 + 530: b9 03 532: 00 00 - 534: b8 08 + 534: 38 09 536: 00 80 538: 00 00 53a: 00 00 @@ -1719,13 +1752,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 64 32 - 48: 2d 66 - 4a: 65 2d - 4c: 30 31 - 4e: 2d 38 - 50: 62 2d - 52: 32 39 + 46: 36 35 + 48: 2d 62 + 4a: 62 2d + 4c: 30 66 + 4e: 2d 37 + 50: 34 2d + 52: 62 31 54: 2e 63 56: 00 70 58: 61 72 @@ -1823,32 +1856,31 @@ Disassembly of section .strtab: 14c: 77 6e 00 5f 150: 5f 65 72 72 154: 6e 6f - 156: 00 6b - 158: 65 72 - 15a: 6e 65 - 15c: 6c 5f - 15e: 73 70 61 77 csrci 1910, 2 - 162: 6e 5f - 164: 72 75 - 166: 6e 5f - 168: 77 61 72 70 - 16c: 00 5f - 16e: 5f 73 74 61 - 172: 63 6b 5f 73 bltu t5, s5, 1846 - 176: 69 7a - 178: 65 00 - 17a: 67 5f 77 73 - 17e: 70 61 - 180: 77 6e 5f 61 - 184: 72 67 - 186: 73 00 76 78 - 18a: 5f 74 6d 63 - 18e: 00 5f - 190: 5f 53 44 41 - 194: 54 41 - 196: 5f 42 45 47 - 19a: 49 4e - 19c: 5f 5f 00 5f + 156: 00 5f + 158: 5f 73 74 61 + 15c: 63 6b 5f 73 bltu t5, s5, 1846 + 160: 69 7a + 162: 65 00 + 164: 67 5f 77 73 + 168: 70 61 + 16a: 77 6e 5f 61 + 16e: 72 67 + 170: 73 00 76 78 + 174: 5f 74 6d 63 + 178: 00 5f + 17a: 5f 53 44 41 + 17e: 54 41 + 180: 5f 42 45 47 + 184: 49 4e + 186: 5f 5f 00 6b + 18a: 65 72 + 18c: 6e 65 + 18e: 6c 5f + 190: 73 70 61 77 csrci 1910, 2 + 194: 6e 5f + 196: 63 61 6c 6c bltu s8, t1, 1730 + 19a: 62 61 + 19c: 63 6b 00 5f bltu zero, a6, 1526 1a0: 5f 67 6c 6f 1a4: 62 61 1a6: 6c 5f @@ -1994,65 +2026,68 @@ Disassembly of section .strtab: 320: 78 5f 322: 77 61 72 70 326: 5f 67 69 64 - 32a: 00 6b - 32c: 65 72 - 32e: 6e 65 - 330: 6c 5f - 332: 73 70 61 77 csrci 1910, 2 - 336: 6e 5f - 338: 72 75 - 33a: 6e 5f - 33c: 74 68 - 33e: 72 65 - 340: 61 64 - 342: 73 00 5f 70 - 346: 6f 63 6c 5f jal t1, 812534 - 34a: 6b 65 72 6e - 34e: 65 6c - 350: 5f 4e 65 61 - 354: 72 65 - 356: 73 74 4e 65 csrrci s0, 1620, 28 - 35a: 69 67 - 35c: 68 62 - 35e: 6f 72 5f 77 jal tp, 1015668 - 362: 6f 72 6b 67 jal tp, 751222 - 366: 72 6f - 368: 75 70 - 36a: 00 5f - 36c: 5f 44 41 54 - 370: 41 5f - 372: 42 45 - 374: 47 49 4e 5f - 378: 5f 00 5f 65 - 37c: 64 61 - 37e: 74 61 - 380: 00 5f - 382: 70 6f - 384: 63 6c 5f 6b bltu t5, s5, 1720 - 388: 65 72 - 38a: 6e 65 - 38c: 6c 5f - 38e: 4e 65 - 390: 61 72 - 392: 65 73 - 394: 74 4e - 396: 65 69 - 398: 67 68 62 6f - 39c: 72 00 - 39e: 76 78 - 3a0: 5f 74 68 72 - 3a4: 65 61 - 3a6: 64 5f - 3a8: 6c 69 - 3aa: 64 00 - 3ac: 5f 65 78 69 - 3b0: 74 00 - 3b2: 76 78 - 3b4: 5f 6e 75 6d - 3b8: 5f 69 6e 73 - 3bc: 74 72 - 3be: 73 - 3bf: 00 + 32a: 00 5f + 32c: 70 6f + 32e: 63 6c 5f 6b bltu t5, s5, 1720 + 332: 65 72 + 334: 6e 65 + 336: 6c 5f + 338: 4e 65 + 33a: 61 72 + 33c: 65 73 + 33e: 74 4e + 340: 65 69 + 342: 67 68 62 6f + 346: 72 5f + 348: 77 6f 72 6b + 34c: 67 72 6f 75 + 350: 70 00 + 352: 5f 5f 44 41 + 356: 54 41 + 358: 5f 42 45 47 + 35c: 49 4e + 35e: 5f 5f 00 5f + 362: 65 64 + 364: 61 74 + 366: 61 00 + 368: 5f 70 6f 63 + 36c: 6c 5f + 36e: 6b 65 72 6e + 372: 65 6c + 374: 5f 4e 65 61 + 378: 72 65 + 37a: 73 74 4e 65 csrrci s0, 1620, 28 + 37e: 69 67 + 380: 68 62 + 382: 6f 72 00 6b jal tp, 30384 + 386: 65 72 + 388: 6e 65 + 38a: 6c 5f + 38c: 73 70 61 77 csrci 1910, 2 + 390: 6e 5f + 392: 72 65 + 394: 6d 61 + 396: 69 6e + 398: 69 6e + 39a: 67 5f 63 61 + 39e: 6c 6c + 3a0: 62 61 + 3a2: 63 6b 00 76 bltu zero, zero, 1910 + 3a6: 78 5f + 3a8: 74 68 + 3aa: 72 65 + 3ac: 61 64 + 3ae: 5f 6c 69 64 + 3b2: 00 5f + 3b4: 65 78 + 3b6: 69 74 + 3b8: 00 76 + 3ba: 78 5f + 3bc: 6e 75 + 3be: 6d 5f + 3c0: 69 6e + 3c2: 73 74 72 73 csrrci s0, 1847, 4 + 3c6: 00 Disassembly of section .shstrtab: diff --git a/benchmarks/opencl/nearn/kernel.pocl b/benchmarks/opencl/nearn/kernel.pocl index 6bfd086c..7cd6e666 100644 Binary files a/benchmarks/opencl/nearn/kernel.pocl and b/benchmarks/opencl/nearn/kernel.pocl differ diff --git a/benchmarks/opencl/saxpy/kernel.pocl b/benchmarks/opencl/saxpy/kernel.pocl index 77760b82..0672abde 100644 Binary files a/benchmarks/opencl/saxpy/kernel.pocl and b/benchmarks/opencl/saxpy/kernel.pocl differ diff --git a/benchmarks/opencl/saxpy/saxpy.dump b/benchmarks/opencl/saxpy/saxpy.dump index 11035981..1e8b2af7 100644 --- a/benchmarks/opencl/saxpy/saxpy.dump +++ b/benchmarks/opencl/saxpy/saxpy.dump @@ -1,30 +1,30 @@ -/tmp/pocl_vortex_kernel-93-6c-36-8e-a9.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-cd-81-06-70-1c.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 05 00 00 auipc a1, 0 -80000004: 93 85 85 5a addi a1, a1, 1448 +80000004: 93 85 85 62 addi a1, a1, 1576 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 80 59 jal 1432 +80000010: ef 00 80 61 jal 1560 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 17 25 00 00 auipc a0, 2 -80000020: 13 05 85 e5 addi a0, a0, -424 +80000020: 13 05 85 ed addi a0, a0, -296 80000024: 17 26 00 00 auipc a2, 2 -80000028: 13 06 06 e7 addi a2, a2, -400 +80000028: 13 06 06 f1 addi a2, a2, -240 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 00 77 jal 1904 +80000034: ef 00 00 7f jal 2032 80000038: 17 05 00 00 auipc a0, 0 -8000003c: 13 05 45 67 addi a0, a0, 1652 -80000040: ef 00 40 62 jal 1572 -80000044: ef 00 40 6c jal 1732 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 c0 62 j 1580 +8000003c: 13 05 45 6f addi a0, a0, 1780 +80000040: ef 00 40 6a jal 1700 +80000044: ef 00 40 74 jal 1860 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 c0 6a j 1708 Disassembly of section .text: @@ -32,11 +32,11 @@ Disassembly of section .text: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 80000058: 37 05 00 80 lui a0, 524288 -8000005c: 13 05 c5 6a addi a0, a0, 1708 -80000060: 6f 00 40 60 j 1540 +8000005c: 13 05 c5 72 addi a0, a0, 1836 +80000060: 6f 00 40 68 j 1668 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,190 +48,190 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 80 5a jal 1448 -80000098: ef 00 40 55 jal 1364 -8000009c: ef 00 80 59 jal 1432 +80000094: ef 00 80 62 jal 1576 +80000098: ef 00 40 5d jal 1492 +8000009c: ef 00 80 61 jal 1560 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 80 56 jal 1384 +800000a4: ef 00 80 5e jal 1512 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 00 57 jal 1392 +800000ac: ef 00 00 5f jal 1520 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 80 58 jal 1416 +800000b4: ef 00 80 60 jal 1544 800000b8: b7 25 00 80 lui a1, 524290 -800000bc: 93 85 45 e7 addi a1, a1, -396 +800000bc: 93 85 45 ef addi a1, a1, -268 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 c0 43 j 1084 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 00 4c j 1216 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 c0 42 jal 1068 -800001c4: ef 00 00 47 jal 1136 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 00 46 jal 1120 -800001d0: b7 25 00 80 lui a1, 524290 -800001d4: 93 85 45 e7 addi a1, a1, -396 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 40 3a j 932 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 00 4b jal 1200 +800001c0: ef 00 40 4f jal 1268 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 40 4e jal 1252 +800001cc: b7 25 00 80 lui a1, 524290 +800001d0: 93 85 45 ef addi a1, a1, -268 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 80 42 j 1064 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 c0 3b jal 956 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 c0 3a jal 940 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 c0 39 jal 924 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 c0 38 jal 908 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 00 44 jal 1088 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 00 43 jal 1072 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 00 42 jal 1056 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 00 41 jal 1040 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) 8000035c: b7 25 00 80 lui a1, 524290 -80000360: 93 85 45 e7 addi a1, a1, -396 +80000360: 93 85 45 ef addi a1, a1, -268 80000364: 13 15 25 00 slli a0, a0, 2 80000368: 33 05 b5 00 add a0, a0, a1 8000036c: 93 05 01 00 mv a1, sp @@ -243,535 +243,567 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 40 25 jal 596 +80000390: ef 00 40 2d jal 724 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 80 2c jal 712 +800003a8: ef 00 c0 30 jal 780 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 c0 2f jal 764 +800003b4: b7 25 00 80 lui a1, 524290 +800003b8: 93 85 45 ef addi a1, a1, -268 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 c0 24 jal 588 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 85 49 addi a1, a0, 1176 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 85 51 addi a1, a0, 1304 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _pocl_kernel_saxpy: -80000404: 13 01 01 ff addi sp, sp, -16 -80000408: 23 26 11 00 sw ra, 12(sp) -8000040c: 23 24 81 00 sw s0, 8(sp) -80000410: 13 04 01 01 addi s0, sp, 16 -80000414: 13 71 c1 ff andi sp, sp, -4 -80000418: 03 27 86 01 lw a4, 24(a2) -8000041c: 83 27 c6 00 lw a5, 12(a2) -80000420: 93 08 00 00 mv a7, zero -80000424: b3 06 d7 02 mul a3, a4, a3 -80000428: b3 86 d7 00 add a3, a5, a3 -8000042c: 83 22 c6 01 lw t0, 28(a2) -80000430: 03 28 06 02 lw a6, 32(a2) -80000434: 13 96 26 00 slli a2, a3, 2 -80000438: 33 83 c5 00 add t1, a1, a2 -8000043c: b3 06 c5 00 add a3, a0, a2 -80000440: 13 06 00 00 mv a2, zero -80000444: 93 07 00 00 mv a5, zero -80000448: 13 85 06 00 mv a0, a3 -8000044c: 93 05 03 00 mv a1, t1 -80000450: 07 20 05 00 flw ft0, 0(a0) -80000454: 87 a0 05 00 flw ft1, 0(a1) -80000458: 53 70 a0 10 fmul.s ft0, ft0, fa0 -8000045c: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000460: 27 a0 05 00 fsw ft0, 0(a1) -80000464: 93 87 17 00 addi a5, a5, 1 -80000468: 93 85 45 00 addi a1, a1, 4 -8000046c: 13 05 45 00 addi a0, a0, 4 -80000470: e3 e0 e7 fe bltu a5, a4, -32 -80000474: 13 06 16 00 addi a2, a2, 1 -80000478: e3 66 56 fc bltu a2, t0, -52 -8000047c: 93 88 18 00 addi a7, a7, 1 -80000480: e3 e0 08 fd bltu a7, a6, -64 -80000484: 13 01 04 ff addi sp, s0, -16 -80000488: 03 24 81 00 lw s0, 8(sp) -8000048c: 83 20 c1 00 lw ra, 12(sp) -80000490: 13 01 01 01 addi sp, sp, 16 -80000494: 67 80 00 00 ret +80000484 _pocl_kernel_saxpy: +80000484: 13 01 01 ff addi sp, sp, -16 +80000488: 23 26 11 00 sw ra, 12(sp) +8000048c: 23 24 81 00 sw s0, 8(sp) +80000490: 13 04 01 01 addi s0, sp, 16 +80000494: 13 71 c1 ff andi sp, sp, -4 +80000498: 03 27 86 01 lw a4, 24(a2) +8000049c: 83 27 c6 00 lw a5, 12(a2) +800004a0: 93 08 00 00 mv a7, zero +800004a4: b3 06 d7 02 mul a3, a4, a3 +800004a8: b3 86 d7 00 add a3, a5, a3 +800004ac: 83 22 c6 01 lw t0, 28(a2) +800004b0: 03 28 06 02 lw a6, 32(a2) +800004b4: 13 96 26 00 slli a2, a3, 2 +800004b8: 33 83 c5 00 add t1, a1, a2 +800004bc: b3 06 c5 00 add a3, a0, a2 +800004c0: 13 06 00 00 mv a2, zero +800004c4: 93 07 00 00 mv a5, zero +800004c8: 13 85 06 00 mv a0, a3 +800004cc: 93 05 03 00 mv a1, t1 +800004d0: 07 20 05 00 flw ft0, 0(a0) +800004d4: 87 a0 05 00 flw ft1, 0(a1) +800004d8: 53 70 a0 10 fmul.s ft0, ft0, fa0 +800004dc: 53 70 10 00 fadd.s ft0, ft0, ft1 +800004e0: 27 a0 05 00 fsw ft0, 0(a1) +800004e4: 93 87 17 00 addi a5, a5, 1 +800004e8: 93 85 45 00 addi a1, a1, 4 +800004ec: 13 05 45 00 addi a0, a0, 4 +800004f0: e3 e0 e7 fe bltu a5, a4, -32 +800004f4: 13 06 16 00 addi a2, a2, 1 +800004f8: e3 66 56 fc bltu a2, t0, -52 +800004fc: 93 88 18 00 addi a7, a7, 1 +80000500: e3 e0 08 fd bltu a7, a6, -64 +80000504: 13 01 04 ff addi sp, s0, -16 +80000508: 03 24 81 00 lw s0, 8(sp) +8000050c: 83 20 c1 00 lw ra, 12(sp) +80000510: 13 01 01 01 addi sp, sp, 16 +80000514: 67 80 00 00 ret -80000498 _pocl_kernel_saxpy_workgroup: -80000498: 83 26 85 00 lw a3, 8(a0) -8000049c: 93 08 00 00 mv a7, zero -800004a0: 03 27 05 00 lw a4, 0(a0) -800004a4: 83 27 45 00 lw a5, 4(a0) -800004a8: 07 a0 06 00 flw ft0, 0(a3) -800004ac: 03 a5 85 01 lw a0, 24(a1) -800004b0: 83 a6 c5 00 lw a3, 12(a1) -800004b4: 03 27 07 00 lw a4, 0(a4) -800004b8: 83 a7 07 00 lw a5, 0(a5) -800004bc: 33 06 c5 02 mul a2, a0, a2 -800004c0: 33 86 c6 00 add a2, a3, a2 -800004c4: 83 a2 c5 01 lw t0, 28(a1) -800004c8: 03 a8 05 02 lw a6, 32(a1) -800004cc: 93 15 26 00 slli a1, a2, 2 -800004d0: 33 83 b7 00 add t1, a5, a1 -800004d4: 33 06 b7 00 add a2, a4, a1 -800004d8: 93 05 00 00 mv a1, zero -800004dc: 93 06 00 00 mv a3, zero -800004e0: 93 07 06 00 mv a5, a2 -800004e4: 13 07 03 00 mv a4, t1 -800004e8: 87 a0 07 00 flw ft1, 0(a5) -800004ec: 07 21 07 00 flw ft2, 0(a4) -800004f0: d3 f0 00 10 fmul.s ft1, ft1, ft0 -800004f4: d3 f0 20 00 fadd.s ft1, ft1, ft2 -800004f8: 27 20 17 00 fsw ft1, 0(a4) -800004fc: 93 86 16 00 addi a3, a3, 1 -80000500: 13 07 47 00 addi a4, a4, 4 -80000504: 93 87 47 00 addi a5, a5, 4 -80000508: e3 e0 a6 fe bltu a3, a0, -32 -8000050c: 93 85 15 00 addi a1, a1, 1 -80000510: e3 e6 55 fc bltu a1, t0, -52 -80000514: 93 88 18 00 addi a7, a7, 1 -80000518: e3 e0 08 fd bltu a7, a6, -64 -8000051c: 67 80 00 00 ret - -80000520 _pocl_kernel_saxpy_workgroup_fast: -80000520: 83 26 85 00 lw a3, 8(a0) -80000524: 93 08 00 00 mv a7, zero +80000518 _pocl_kernel_saxpy_workgroup: +80000518: 83 26 85 00 lw a3, 8(a0) +8000051c: 93 08 00 00 mv a7, zero +80000520: 03 27 05 00 lw a4, 0(a0) +80000524: 83 27 45 00 lw a5, 4(a0) 80000528: 07 a0 06 00 flw ft0, 0(a3) -8000052c: 03 a7 85 01 lw a4, 24(a1) +8000052c: 03 a5 85 01 lw a0, 24(a1) 80000530: 83 a6 c5 00 lw a3, 12(a1) -80000534: 83 27 05 00 lw a5, 0(a0) -80000538: 03 25 45 00 lw a0, 4(a0) -8000053c: 33 06 c7 02 mul a2, a4, a2 +80000534: 03 27 07 00 lw a4, 0(a4) +80000538: 83 a7 07 00 lw a5, 0(a5) +8000053c: 33 06 c5 02 mul a2, a0, a2 80000540: 33 86 c6 00 add a2, a3, a2 80000544: 83 a2 c5 01 lw t0, 28(a1) 80000548: 03 a8 05 02 lw a6, 32(a1) 8000054c: 93 15 26 00 slli a1, a2, 2 -80000550: 33 03 b5 00 add t1, a0, a1 -80000554: 33 85 b7 00 add a0, a5, a1 +80000550: 33 83 b7 00 add t1, a5, a1 +80000554: 33 06 b7 00 add a2, a4, a1 80000558: 93 05 00 00 mv a1, zero 8000055c: 93 06 00 00 mv a3, zero -80000560: 93 07 05 00 mv a5, a0 -80000564: 13 06 03 00 mv a2, t1 +80000560: 93 07 06 00 mv a5, a2 +80000564: 13 07 03 00 mv a4, t1 80000568: 87 a0 07 00 flw ft1, 0(a5) -8000056c: 07 21 06 00 flw ft2, 0(a2) +8000056c: 07 21 07 00 flw ft2, 0(a4) 80000570: d3 f0 00 10 fmul.s ft1, ft1, ft0 80000574: d3 f0 20 00 fadd.s ft1, ft1, ft2 -80000578: 27 20 16 00 fsw ft1, 0(a2) +80000578: 27 20 17 00 fsw ft1, 0(a4) 8000057c: 93 86 16 00 addi a3, a3, 1 -80000580: 13 06 46 00 addi a2, a2, 4 +80000580: 13 07 47 00 addi a4, a4, 4 80000584: 93 87 47 00 addi a5, a5, 4 -80000588: e3 e0 e6 fe bltu a3, a4, -32 +80000588: e3 e0 a6 fe bltu a3, a0, -32 8000058c: 93 85 15 00 addi a1, a1, 1 80000590: e3 e6 55 fc bltu a1, t0, -52 80000594: 93 88 18 00 addi a7, a7, 1 80000598: e3 e0 08 fd bltu a7, a6, -64 8000059c: 67 80 00 00 ret -800005a0 _exit: -800005a0: 13 05 00 00 mv a0, zero -800005a4: 6b 00 05 00 +800005a0 _pocl_kernel_saxpy_workgroup_fast: +800005a0: 83 26 85 00 lw a3, 8(a0) +800005a4: 93 08 00 00 mv a7, zero +800005a8: 07 a0 06 00 flw ft0, 0(a3) +800005ac: 03 a7 85 01 lw a4, 24(a1) +800005b0: 83 a6 c5 00 lw a3, 12(a1) +800005b4: 83 27 05 00 lw a5, 0(a0) +800005b8: 03 25 45 00 lw a0, 4(a0) +800005bc: 33 06 c7 02 mul a2, a4, a2 +800005c0: 33 86 c6 00 add a2, a3, a2 +800005c4: 83 a2 c5 01 lw t0, 28(a1) +800005c8: 03 a8 05 02 lw a6, 32(a1) +800005cc: 93 15 26 00 slli a1, a2, 2 +800005d0: 33 03 b5 00 add t1, a0, a1 +800005d4: 33 85 b7 00 add a0, a5, a1 +800005d8: 93 05 00 00 mv a1, zero +800005dc: 93 06 00 00 mv a3, zero +800005e0: 93 07 05 00 mv a5, a0 +800005e4: 13 06 03 00 mv a2, t1 +800005e8: 87 a0 07 00 flw ft1, 0(a5) +800005ec: 07 21 06 00 flw ft2, 0(a2) +800005f0: d3 f0 00 10 fmul.s ft1, ft1, ft0 +800005f4: d3 f0 20 00 fadd.s ft1, ft1, ft2 +800005f8: 27 20 16 00 fsw ft1, 0(a2) +800005fc: 93 86 16 00 addi a3, a3, 1 +80000600: 13 06 46 00 addi a2, a2, 4 +80000604: 93 87 47 00 addi a5, a5, 4 +80000608: e3 e0 e6 fe bltu a3, a4, -32 +8000060c: 93 85 15 00 addi a1, a1, 1 +80000610: e3 e6 55 fc bltu a1, t0, -52 +80000614: 93 88 18 00 addi a7, a7, 1 +80000618: e3 e0 08 fd bltu a7, a6, -64 +8000061c: 67 80 00 00 ret -800005a8 vx_set_sp: -800005a8: 73 25 00 fc csrr a0, 4032 -800005ac: 6b 00 05 00 -800005b0: 97 21 00 00 auipc gp, 2 -800005b4: 93 81 81 c9 addi gp, gp, -872 -800005b8: 17 01 00 7f auipc sp, 520192 -800005bc: 13 01 81 a4 addi sp, sp, -1464 -800005c0: 93 05 00 40 addi a1, zero, 1024 -800005c4: 73 26 10 cc csrr a2, 3265 -800005c8: b3 85 c5 02 mul a1, a1, a2 -800005cc: 33 01 b1 40 sub sp, sp, a1 -800005d0: f3 26 30 cc csrr a3, 3267 -800005d4: 63 86 06 00 beqz a3, 12 -800005d8: 13 05 00 00 mv a0, zero -800005dc: 6b 00 05 00 +80000620 _exit: +80000620: 13 05 00 00 mv a0, zero +80000624: 6b 00 05 00 -800005e0 RETURN: -800005e0: 67 80 00 00 ret +80000628 vx_set_sp: +80000628: 73 25 00 fc csrr a0, 4032 +8000062c: 6b 00 05 00 +80000630: 97 21 00 00 auipc gp, 2 +80000634: 93 81 81 c9 addi gp, gp, -872 +80000638: 17 01 00 7f auipc sp, 520192 +8000063c: 13 01 81 9c addi sp, sp, -1592 +80000640: 93 05 00 40 addi a1, zero, 1024 +80000644: 73 26 10 cc csrr a2, 3265 +80000648: b3 85 c5 02 mul a1, a1, a2 +8000064c: 33 01 b1 40 sub sp, sp, a1 +80000650: f3 26 30 cc csrr a3, 3267 +80000654: 63 86 06 00 beqz a3, 12 +80000658: 13 05 00 00 mv a0, zero +8000065c: 6b 00 05 00 -800005e4 vx_wspawn: -800005e4: 6b 10 b5 00 -800005e8: 67 80 00 00 ret - -800005ec vx_tmc: -800005ec: 6b 00 05 00 -800005f0: 67 80 00 00 ret - -800005f4 vx_barrier: -800005f4: 6b 40 b5 00 -800005f8: 67 80 00 00 ret - -800005fc vx_split: -800005fc: 6b 20 05 00 -80000600: 67 80 00 00 ret - -80000604 vx_join: -80000604: 6b 30 00 00 -80000608: 67 80 00 00 ret - -8000060c vx_warp_id: -8000060c: 73 25 30 cc csrr a0, 3267 -80000610: 67 80 00 00 ret - -80000614 vx_warp_gid: -80000614: 73 25 40 f1 csrr a0, mhartid -80000618: 67 80 00 00 ret - -8000061c vx_thread_id: -8000061c: 73 25 00 cc csrr a0, 3264 -80000620: 67 80 00 00 ret - -80000624 vx_thread_lid: -80000624: 73 25 10 cc csrr a0, 3265 -80000628: 67 80 00 00 ret - -8000062c vx_thread_gid: -8000062c: 73 25 20 cc csrr a0, 3266 -80000630: 67 80 00 00 ret - -80000634 vx_core_id: -80000634: 73 25 50 cc csrr a0, 3269 -80000638: 67 80 00 00 ret - -8000063c vx_num_threads: -8000063c: 73 25 00 fc csrr a0, 4032 -80000640: 67 80 00 00 ret - -80000644 vx_num_warps: -80000644: 73 25 10 fc csrr a0, 4033 -80000648: 67 80 00 00 ret - -8000064c vx_num_cores: -8000064c: 73 25 20 fc csrr a0, 4034 -80000650: 67 80 00 00 ret - -80000654 vx_num_cycles: -80000654: 73 25 00 b0 csrr a0, mcycle -80000658: 67 80 00 00 ret - -8000065c vx_num_instrs: -8000065c: 73 25 20 b0 csrr a0, minstret +80000660 RETURN: 80000660: 67 80 00 00 ret -80000664 atexit: -80000664: 93 05 05 00 mv a1, a0 -80000668: 93 06 00 00 mv a3, zero -8000066c: 13 06 00 00 mv a2, zero -80000670: 13 05 00 00 mv a0, zero -80000674: 6f 00 c0 20 j 524 +80000664 vx_wspawn: +80000664: 6b 10 b5 00 +80000668: 67 80 00 00 ret -80000678 exit: -80000678: 13 01 01 ff addi sp, sp, -16 -8000067c: 93 05 00 00 mv a1, zero -80000680: 23 24 81 00 sw s0, 8(sp) -80000684: 23 26 11 00 sw ra, 12(sp) -80000688: 13 04 05 00 mv s0, a0 -8000068c: ef 00 00 29 jal 656 -80000690: b7 27 00 80 lui a5, 524290 -80000694: 03 a5 07 e7 lw a0, -400(a5) -80000698: 83 27 c5 03 lw a5, 60(a0) -8000069c: 63 84 07 00 beqz a5, 8 -800006a0: e7 80 07 00 jalr a5 -800006a4: 13 05 04 00 mv a0, s0 -800006a8: ef f0 9f ef jal -264 +8000066c vx_tmc: +8000066c: 6b 00 05 00 +80000670: 67 80 00 00 ret -800006ac __libc_fini_array: -800006ac: 13 01 01 ff addi sp, sp, -16 -800006b0: 23 24 81 00 sw s0, 8(sp) -800006b4: b7 27 00 80 lui a5, 524290 -800006b8: 37 24 00 80 lui s0, 524290 -800006bc: 13 04 44 a4 addi s0, s0, -1468 -800006c0: 93 87 47 a4 addi a5, a5, -1468 -800006c4: b3 87 87 40 sub a5, a5, s0 -800006c8: 23 22 91 00 sw s1, 4(sp) -800006cc: 23 26 11 00 sw ra, 12(sp) -800006d0: 93 d4 27 40 srai s1, a5, 2 -800006d4: 63 80 04 02 beqz s1, 32 -800006d8: 93 87 c7 ff addi a5, a5, -4 -800006dc: 33 84 87 00 add s0, a5, s0 -800006e0: 83 27 04 00 lw a5, 0(s0) -800006e4: 93 84 f4 ff addi s1, s1, -1 -800006e8: 13 04 c4 ff addi s0, s0, -4 -800006ec: e7 80 07 00 jalr a5 -800006f0: e3 98 04 fe bnez s1, -16 -800006f4: 83 20 c1 00 lw ra, 12(sp) -800006f8: 03 24 81 00 lw s0, 8(sp) -800006fc: 83 24 41 00 lw s1, 4(sp) -80000700: 13 01 01 01 addi sp, sp, 16 -80000704: 67 80 00 00 ret +80000674 vx_barrier: +80000674: 6b 40 b5 00 +80000678: 67 80 00 00 ret -80000708 __libc_init_array: -80000708: 13 01 01 ff addi sp, sp, -16 -8000070c: 23 24 81 00 sw s0, 8(sp) -80000710: 23 20 21 01 sw s2, 0(sp) -80000714: 37 24 00 80 lui s0, 524290 -80000718: 37 29 00 80 lui s2, 524290 -8000071c: 93 07 04 a4 addi a5, s0, -1472 -80000720: 13 09 09 a4 addi s2, s2, -1472 -80000724: 33 09 f9 40 sub s2, s2, a5 -80000728: 23 26 11 00 sw ra, 12(sp) -8000072c: 23 22 91 00 sw s1, 4(sp) -80000730: 13 59 29 40 srai s2, s2, 2 -80000734: 63 00 09 02 beqz s2, 32 -80000738: 13 04 04 a4 addi s0, s0, -1472 -8000073c: 93 04 00 00 mv s1, zero -80000740: 83 27 04 00 lw a5, 0(s0) -80000744: 93 84 14 00 addi s1, s1, 1 -80000748: 13 04 44 00 addi s0, s0, 4 -8000074c: e7 80 07 00 jalr a5 -80000750: e3 18 99 fe bne s2, s1, -16 -80000754: 37 24 00 80 lui s0, 524290 -80000758: 37 29 00 80 lui s2, 524290 -8000075c: 93 07 04 a4 addi a5, s0, -1472 -80000760: 13 09 49 a4 addi s2, s2, -1468 -80000764: 33 09 f9 40 sub s2, s2, a5 -80000768: 13 59 29 40 srai s2, s2, 2 -8000076c: 63 00 09 02 beqz s2, 32 -80000770: 13 04 04 a4 addi s0, s0, -1472 -80000774: 93 04 00 00 mv s1, zero -80000778: 83 27 04 00 lw a5, 0(s0) -8000077c: 93 84 14 00 addi s1, s1, 1 -80000780: 13 04 44 00 addi s0, s0, 4 -80000784: e7 80 07 00 jalr a5 -80000788: e3 18 99 fe bne s2, s1, -16 -8000078c: 83 20 c1 00 lw ra, 12(sp) -80000790: 03 24 81 00 lw s0, 8(sp) -80000794: 83 24 41 00 lw s1, 4(sp) -80000798: 03 29 01 00 lw s2, 0(sp) -8000079c: 13 01 01 01 addi sp, sp, 16 -800007a0: 67 80 00 00 ret +8000067c vx_split: +8000067c: 6b 20 05 00 +80000680: 67 80 00 00 ret -800007a4 memset: -800007a4: 13 03 f0 00 addi t1, zero, 15 -800007a8: 13 07 05 00 mv a4, a0 -800007ac: 63 7e c3 02 bgeu t1, a2, 60 -800007b0: 93 77 f7 00 andi a5, a4, 15 -800007b4: 63 90 07 0a bnez a5, 160 -800007b8: 63 92 05 08 bnez a1, 132 -800007bc: 93 76 06 ff andi a3, a2, -16 -800007c0: 13 76 f6 00 andi a2, a2, 15 -800007c4: b3 86 e6 00 add a3, a3, a4 -800007c8: 23 20 b7 00 sw a1, 0(a4) -800007cc: 23 22 b7 00 sw a1, 4(a4) -800007d0: 23 24 b7 00 sw a1, 8(a4) -800007d4: 23 26 b7 00 sw a1, 12(a4) -800007d8: 13 07 07 01 addi a4, a4, 16 -800007dc: e3 66 d7 fe bltu a4, a3, -20 -800007e0: 63 14 06 00 bnez a2, 8 -800007e4: 67 80 00 00 ret -800007e8: b3 06 c3 40 sub a3, t1, a2 -800007ec: 93 96 26 00 slli a3, a3, 2 -800007f0: 97 02 00 00 auipc t0, 0 -800007f4: b3 86 56 00 add a3, a3, t0 -800007f8: 67 80 c6 00 jr 12(a3) -800007fc: 23 07 b7 00 sb a1, 14(a4) -80000800: a3 06 b7 00 sb a1, 13(a4) -80000804: 23 06 b7 00 sb a1, 12(a4) -80000808: a3 05 b7 00 sb a1, 11(a4) -8000080c: 23 05 b7 00 sb a1, 10(a4) -80000810: a3 04 b7 00 sb a1, 9(a4) -80000814: 23 04 b7 00 sb a1, 8(a4) -80000818: a3 03 b7 00 sb a1, 7(a4) -8000081c: 23 03 b7 00 sb a1, 6(a4) -80000820: a3 02 b7 00 sb a1, 5(a4) -80000824: 23 02 b7 00 sb a1, 4(a4) -80000828: a3 01 b7 00 sb a1, 3(a4) -8000082c: 23 01 b7 00 sb a1, 2(a4) -80000830: a3 00 b7 00 sb a1, 1(a4) -80000834: 23 00 b7 00 sb a1, 0(a4) -80000838: 67 80 00 00 ret -8000083c: 93 f5 f5 0f andi a1, a1, 255 -80000840: 93 96 85 00 slli a3, a1, 8 -80000844: b3 e5 d5 00 or a1, a1, a3 -80000848: 93 96 05 01 slli a3, a1, 16 -8000084c: b3 e5 d5 00 or a1, a1, a3 -80000850: 6f f0 df f6 j -148 -80000854: 93 96 27 00 slli a3, a5, 2 -80000858: 97 02 00 00 auipc t0, 0 -8000085c: b3 86 56 00 add a3, a3, t0 -80000860: 93 82 00 00 mv t0, ra -80000864: e7 80 06 fa jalr -96(a3) -80000868: 93 80 02 00 mv ra, t0 -8000086c: 93 87 07 ff addi a5, a5, -16 -80000870: 33 07 f7 40 sub a4, a4, a5 -80000874: 33 06 f6 00 add a2, a2, a5 -80000878: e3 78 c3 f6 bgeu t1, a2, -144 -8000087c: 6f f0 df f3 j -196 +80000684 vx_join: +80000684: 6b 30 00 00 +80000688: 67 80 00 00 ret -80000880 __register_exitproc: -80000880: b7 27 00 80 lui a5, 524290 -80000884: 03 a7 07 e7 lw a4, -400(a5) -80000888: 83 27 87 14 lw a5, 328(a4) -8000088c: 63 8c 07 04 beqz a5, 88 -80000890: 03 a7 47 00 lw a4, 4(a5) -80000894: 13 08 f0 01 addi a6, zero, 31 -80000898: 63 4e e8 06 blt a6, a4, 124 -8000089c: 13 18 27 00 slli a6, a4, 2 -800008a0: 63 06 05 02 beqz a0, 44 -800008a4: 33 83 07 01 add t1, a5, a6 -800008a8: 23 24 c3 08 sw a2, 136(t1) -800008ac: 83 a8 87 18 lw a7, 392(a5) -800008b0: 13 06 10 00 addi a2, zero, 1 -800008b4: 33 16 e6 00 sll a2, a2, a4 -800008b8: b3 e8 c8 00 or a7, a7, a2 -800008bc: 23 a4 17 19 sw a7, 392(a5) -800008c0: 23 24 d3 10 sw a3, 264(t1) -800008c4: 93 06 20 00 addi a3, zero, 2 -800008c8: 63 04 d5 02 beq a0, a3, 40 -800008cc: 13 07 17 00 addi a4, a4, 1 -800008d0: 23 a2 e7 00 sw a4, 4(a5) -800008d4: b3 87 07 01 add a5, a5, a6 -800008d8: 23 a4 b7 00 sw a1, 8(a5) -800008dc: 13 05 00 00 mv a0, zero -800008e0: 67 80 00 00 ret -800008e4: 93 07 c7 14 addi a5, a4, 332 -800008e8: 23 24 f7 14 sw a5, 328(a4) -800008ec: 6f f0 5f fa j -92 -800008f0: 83 a6 c7 18 lw a3, 396(a5) -800008f4: 13 07 17 00 addi a4, a4, 1 -800008f8: 23 a2 e7 00 sw a4, 4(a5) -800008fc: 33 e6 c6 00 or a2, a3, a2 -80000900: 23 a6 c7 18 sw a2, 396(a5) -80000904: b3 87 07 01 add a5, a5, a6 -80000908: 23 a4 b7 00 sw a1, 8(a5) -8000090c: 13 05 00 00 mv a0, zero -80000910: 67 80 00 00 ret -80000914: 13 05 f0 ff addi a0, zero, -1 -80000918: 67 80 00 00 ret +8000068c vx_warp_id: +8000068c: 73 25 30 cc csrr a0, 3267 +80000690: 67 80 00 00 ret -8000091c __call_exitprocs: -8000091c: 13 01 01 fd addi sp, sp, -48 -80000920: b7 27 00 80 lui a5, 524290 -80000924: 23 2c 41 01 sw s4, 24(sp) -80000928: 03 aa 07 e7 lw s4, -400(a5) -8000092c: 23 20 21 03 sw s2, 32(sp) -80000930: 23 26 11 02 sw ra, 44(sp) -80000934: 03 29 8a 14 lw s2, 328(s4) -80000938: 23 24 81 02 sw s0, 40(sp) -8000093c: 23 22 91 02 sw s1, 36(sp) -80000940: 23 2e 31 01 sw s3, 28(sp) -80000944: 23 2a 51 01 sw s5, 20(sp) -80000948: 23 28 61 01 sw s6, 16(sp) -8000094c: 23 26 71 01 sw s7, 12(sp) -80000950: 23 24 81 01 sw s8, 8(sp) -80000954: 63 00 09 04 beqz s2, 64 -80000958: 13 0b 05 00 mv s6, a0 -8000095c: 93 8b 05 00 mv s7, a1 -80000960: 93 0a 10 00 addi s5, zero, 1 -80000964: 93 09 f0 ff addi s3, zero, -1 -80000968: 83 24 49 00 lw s1, 4(s2) -8000096c: 13 84 f4 ff addi s0, s1, -1 -80000970: 63 42 04 02 bltz s0, 36 -80000974: 93 94 24 00 slli s1, s1, 2 -80000978: b3 04 99 00 add s1, s2, s1 -8000097c: 63 84 0b 04 beqz s7, 72 -80000980: 83 a7 44 10 lw a5, 260(s1) -80000984: 63 80 77 05 beq a5, s7, 64 -80000988: 13 04 f4 ff addi s0, s0, -1 -8000098c: 93 84 c4 ff addi s1, s1, -4 -80000990: e3 16 34 ff bne s0, s3, -20 -80000994: 83 20 c1 02 lw ra, 44(sp) -80000998: 03 24 81 02 lw s0, 40(sp) -8000099c: 83 24 41 02 lw s1, 36(sp) -800009a0: 03 29 01 02 lw s2, 32(sp) -800009a4: 83 29 c1 01 lw s3, 28(sp) -800009a8: 03 2a 81 01 lw s4, 24(sp) -800009ac: 83 2a 41 01 lw s5, 20(sp) -800009b0: 03 2b 01 01 lw s6, 16(sp) -800009b4: 83 2b c1 00 lw s7, 12(sp) -800009b8: 03 2c 81 00 lw s8, 8(sp) -800009bc: 13 01 01 03 addi sp, sp, 48 -800009c0: 67 80 00 00 ret -800009c4: 83 27 49 00 lw a5, 4(s2) -800009c8: 83 a6 44 00 lw a3, 4(s1) -800009cc: 93 87 f7 ff addi a5, a5, -1 -800009d0: 63 8e 87 04 beq a5, s0, 92 -800009d4: 23 a2 04 00 sw zero, 4(s1) -800009d8: e3 88 06 fa beqz a3, -80 -800009dc: 83 27 89 18 lw a5, 392(s2) -800009e0: 33 97 8a 00 sll a4, s5, s0 -800009e4: 03 2c 49 00 lw s8, 4(s2) -800009e8: b3 77 f7 00 and a5, a4, a5 -800009ec: 63 92 07 02 bnez a5, 36 -800009f0: e7 80 06 00 jalr a3 -800009f4: 03 27 49 00 lw a4, 4(s2) -800009f8: 83 27 8a 14 lw a5, 328(s4) -800009fc: 63 14 87 01 bne a4, s8, 8 -80000a00: e3 04 f9 f8 beq s2, a5, -120 -80000a04: e3 88 07 f8 beqz a5, -112 -80000a08: 13 89 07 00 mv s2, a5 -80000a0c: 6f f0 df f5 j -164 -80000a10: 83 27 c9 18 lw a5, 396(s2) -80000a14: 83 a5 44 08 lw a1, 132(s1) -80000a18: 33 77 f7 00 and a4, a4, a5 -80000a1c: 63 1c 07 00 bnez a4, 24 -80000a20: 13 05 0b 00 mv a0, s6 -80000a24: e7 80 06 00 jalr a3 -80000a28: 6f f0 df fc j -52 -80000a2c: 23 22 89 00 sw s0, 4(s2) -80000a30: 6f f0 9f fa j -88 -80000a34: 13 85 05 00 mv a0, a1 -80000a38: e7 80 06 00 jalr a3 -80000a3c: 6f f0 9f fb j -72 +80000694 vx_warp_gid: +80000694: 73 25 40 f1 csrr a0, mhartid +80000698: 67 80 00 00 ret + +8000069c vx_thread_id: +8000069c: 73 25 00 cc csrr a0, 3264 +800006a0: 67 80 00 00 ret + +800006a4 vx_thread_lid: +800006a4: 73 25 10 cc csrr a0, 3265 +800006a8: 67 80 00 00 ret + +800006ac vx_thread_gid: +800006ac: 73 25 20 cc csrr a0, 3266 +800006b0: 67 80 00 00 ret + +800006b4 vx_core_id: +800006b4: 73 25 50 cc csrr a0, 3269 +800006b8: 67 80 00 00 ret + +800006bc vx_num_threads: +800006bc: 73 25 00 fc csrr a0, 4032 +800006c0: 67 80 00 00 ret + +800006c4 vx_num_warps: +800006c4: 73 25 10 fc csrr a0, 4033 +800006c8: 67 80 00 00 ret + +800006cc vx_num_cores: +800006cc: 73 25 20 fc csrr a0, 4034 +800006d0: 67 80 00 00 ret + +800006d4 vx_num_cycles: +800006d4: 73 25 00 b0 csrr a0, mcycle +800006d8: 67 80 00 00 ret + +800006dc vx_num_instrs: +800006dc: 73 25 20 b0 csrr a0, minstret +800006e0: 67 80 00 00 ret + +800006e4 atexit: +800006e4: 93 05 05 00 mv a1, a0 +800006e8: 93 06 00 00 mv a3, zero +800006ec: 13 06 00 00 mv a2, zero +800006f0: 13 05 00 00 mv a0, zero +800006f4: 6f 00 c0 20 j 524 + +800006f8 exit: +800006f8: 13 01 01 ff addi sp, sp, -16 +800006fc: 93 05 00 00 mv a1, zero +80000700: 23 24 81 00 sw s0, 8(sp) +80000704: 23 26 11 00 sw ra, 12(sp) +80000708: 13 04 05 00 mv s0, a0 +8000070c: ef 00 00 29 jal 656 +80000710: b7 27 00 80 lui a5, 524290 +80000714: 03 a5 07 ef lw a0, -272(a5) +80000718: 83 27 c5 03 lw a5, 60(a0) +8000071c: 63 84 07 00 beqz a5, 8 +80000720: e7 80 07 00 jalr a5 +80000724: 13 05 04 00 mv a0, s0 +80000728: ef f0 9f ef jal -264 + +8000072c __libc_fini_array: +8000072c: 13 01 01 ff addi sp, sp, -16 +80000730: 23 24 81 00 sw s0, 8(sp) +80000734: b7 27 00 80 lui a5, 524290 +80000738: 37 24 00 80 lui s0, 524290 +8000073c: 13 04 44 ac addi s0, s0, -1340 +80000740: 93 87 47 ac addi a5, a5, -1340 +80000744: b3 87 87 40 sub a5, a5, s0 +80000748: 23 22 91 00 sw s1, 4(sp) +8000074c: 23 26 11 00 sw ra, 12(sp) +80000750: 93 d4 27 40 srai s1, a5, 2 +80000754: 63 80 04 02 beqz s1, 32 +80000758: 93 87 c7 ff addi a5, a5, -4 +8000075c: 33 84 87 00 add s0, a5, s0 +80000760: 83 27 04 00 lw a5, 0(s0) +80000764: 93 84 f4 ff addi s1, s1, -1 +80000768: 13 04 c4 ff addi s0, s0, -4 +8000076c: e7 80 07 00 jalr a5 +80000770: e3 98 04 fe bnez s1, -16 +80000774: 83 20 c1 00 lw ra, 12(sp) +80000778: 03 24 81 00 lw s0, 8(sp) +8000077c: 83 24 41 00 lw s1, 4(sp) +80000780: 13 01 01 01 addi sp, sp, 16 +80000784: 67 80 00 00 ret + +80000788 __libc_init_array: +80000788: 13 01 01 ff addi sp, sp, -16 +8000078c: 23 24 81 00 sw s0, 8(sp) +80000790: 23 20 21 01 sw s2, 0(sp) +80000794: 37 24 00 80 lui s0, 524290 +80000798: 37 29 00 80 lui s2, 524290 +8000079c: 93 07 04 ac addi a5, s0, -1344 +800007a0: 13 09 09 ac addi s2, s2, -1344 +800007a4: 33 09 f9 40 sub s2, s2, a5 +800007a8: 23 26 11 00 sw ra, 12(sp) +800007ac: 23 22 91 00 sw s1, 4(sp) +800007b0: 13 59 29 40 srai s2, s2, 2 +800007b4: 63 00 09 02 beqz s2, 32 +800007b8: 13 04 04 ac addi s0, s0, -1344 +800007bc: 93 04 00 00 mv s1, zero +800007c0: 83 27 04 00 lw a5, 0(s0) +800007c4: 93 84 14 00 addi s1, s1, 1 +800007c8: 13 04 44 00 addi s0, s0, 4 +800007cc: e7 80 07 00 jalr a5 +800007d0: e3 18 99 fe bne s2, s1, -16 +800007d4: 37 24 00 80 lui s0, 524290 +800007d8: 37 29 00 80 lui s2, 524290 +800007dc: 93 07 04 ac addi a5, s0, -1344 +800007e0: 13 09 49 ac addi s2, s2, -1340 +800007e4: 33 09 f9 40 sub s2, s2, a5 +800007e8: 13 59 29 40 srai s2, s2, 2 +800007ec: 63 00 09 02 beqz s2, 32 +800007f0: 13 04 04 ac addi s0, s0, -1344 +800007f4: 93 04 00 00 mv s1, zero +800007f8: 83 27 04 00 lw a5, 0(s0) +800007fc: 93 84 14 00 addi s1, s1, 1 +80000800: 13 04 44 00 addi s0, s0, 4 +80000804: e7 80 07 00 jalr a5 +80000808: e3 18 99 fe bne s2, s1, -16 +8000080c: 83 20 c1 00 lw ra, 12(sp) +80000810: 03 24 81 00 lw s0, 8(sp) +80000814: 83 24 41 00 lw s1, 4(sp) +80000818: 03 29 01 00 lw s2, 0(sp) +8000081c: 13 01 01 01 addi sp, sp, 16 +80000820: 67 80 00 00 ret + +80000824 memset: +80000824: 13 03 f0 00 addi t1, zero, 15 +80000828: 13 07 05 00 mv a4, a0 +8000082c: 63 7e c3 02 bgeu t1, a2, 60 +80000830: 93 77 f7 00 andi a5, a4, 15 +80000834: 63 90 07 0a bnez a5, 160 +80000838: 63 92 05 08 bnez a1, 132 +8000083c: 93 76 06 ff andi a3, a2, -16 +80000840: 13 76 f6 00 andi a2, a2, 15 +80000844: b3 86 e6 00 add a3, a3, a4 +80000848: 23 20 b7 00 sw a1, 0(a4) +8000084c: 23 22 b7 00 sw a1, 4(a4) +80000850: 23 24 b7 00 sw a1, 8(a4) +80000854: 23 26 b7 00 sw a1, 12(a4) +80000858: 13 07 07 01 addi a4, a4, 16 +8000085c: e3 66 d7 fe bltu a4, a3, -20 +80000860: 63 14 06 00 bnez a2, 8 +80000864: 67 80 00 00 ret +80000868: b3 06 c3 40 sub a3, t1, a2 +8000086c: 93 96 26 00 slli a3, a3, 2 +80000870: 97 02 00 00 auipc t0, 0 +80000874: b3 86 56 00 add a3, a3, t0 +80000878: 67 80 c6 00 jr 12(a3) +8000087c: 23 07 b7 00 sb a1, 14(a4) +80000880: a3 06 b7 00 sb a1, 13(a4) +80000884: 23 06 b7 00 sb a1, 12(a4) +80000888: a3 05 b7 00 sb a1, 11(a4) +8000088c: 23 05 b7 00 sb a1, 10(a4) +80000890: a3 04 b7 00 sb a1, 9(a4) +80000894: 23 04 b7 00 sb a1, 8(a4) +80000898: a3 03 b7 00 sb a1, 7(a4) +8000089c: 23 03 b7 00 sb a1, 6(a4) +800008a0: a3 02 b7 00 sb a1, 5(a4) +800008a4: 23 02 b7 00 sb a1, 4(a4) +800008a8: a3 01 b7 00 sb a1, 3(a4) +800008ac: 23 01 b7 00 sb a1, 2(a4) +800008b0: a3 00 b7 00 sb a1, 1(a4) +800008b4: 23 00 b7 00 sb a1, 0(a4) +800008b8: 67 80 00 00 ret +800008bc: 93 f5 f5 0f andi a1, a1, 255 +800008c0: 93 96 85 00 slli a3, a1, 8 +800008c4: b3 e5 d5 00 or a1, a1, a3 +800008c8: 93 96 05 01 slli a3, a1, 16 +800008cc: b3 e5 d5 00 or a1, a1, a3 +800008d0: 6f f0 df f6 j -148 +800008d4: 93 96 27 00 slli a3, a5, 2 +800008d8: 97 02 00 00 auipc t0, 0 +800008dc: b3 86 56 00 add a3, a3, t0 +800008e0: 93 82 00 00 mv t0, ra +800008e4: e7 80 06 fa jalr -96(a3) +800008e8: 93 80 02 00 mv ra, t0 +800008ec: 93 87 07 ff addi a5, a5, -16 +800008f0: 33 07 f7 40 sub a4, a4, a5 +800008f4: 33 06 f6 00 add a2, a2, a5 +800008f8: e3 78 c3 f6 bgeu t1, a2, -144 +800008fc: 6f f0 df f3 j -196 + +80000900 __register_exitproc: +80000900: b7 27 00 80 lui a5, 524290 +80000904: 03 a7 07 ef lw a4, -272(a5) +80000908: 83 27 87 14 lw a5, 328(a4) +8000090c: 63 8c 07 04 beqz a5, 88 +80000910: 03 a7 47 00 lw a4, 4(a5) +80000914: 13 08 f0 01 addi a6, zero, 31 +80000918: 63 4e e8 06 blt a6, a4, 124 +8000091c: 13 18 27 00 slli a6, a4, 2 +80000920: 63 06 05 02 beqz a0, 44 +80000924: 33 83 07 01 add t1, a5, a6 +80000928: 23 24 c3 08 sw a2, 136(t1) +8000092c: 83 a8 87 18 lw a7, 392(a5) +80000930: 13 06 10 00 addi a2, zero, 1 +80000934: 33 16 e6 00 sll a2, a2, a4 +80000938: b3 e8 c8 00 or a7, a7, a2 +8000093c: 23 a4 17 19 sw a7, 392(a5) +80000940: 23 24 d3 10 sw a3, 264(t1) +80000944: 93 06 20 00 addi a3, zero, 2 +80000948: 63 04 d5 02 beq a0, a3, 40 +8000094c: 13 07 17 00 addi a4, a4, 1 +80000950: 23 a2 e7 00 sw a4, 4(a5) +80000954: b3 87 07 01 add a5, a5, a6 +80000958: 23 a4 b7 00 sw a1, 8(a5) +8000095c: 13 05 00 00 mv a0, zero +80000960: 67 80 00 00 ret +80000964: 93 07 c7 14 addi a5, a4, 332 +80000968: 23 24 f7 14 sw a5, 328(a4) +8000096c: 6f f0 5f fa j -92 +80000970: 83 a6 c7 18 lw a3, 396(a5) +80000974: 13 07 17 00 addi a4, a4, 1 +80000978: 23 a2 e7 00 sw a4, 4(a5) +8000097c: 33 e6 c6 00 or a2, a3, a2 +80000980: 23 a6 c7 18 sw a2, 396(a5) +80000984: b3 87 07 01 add a5, a5, a6 +80000988: 23 a4 b7 00 sw a1, 8(a5) +8000098c: 13 05 00 00 mv a0, zero +80000990: 67 80 00 00 ret +80000994: 13 05 f0 ff addi a0, zero, -1 +80000998: 67 80 00 00 ret + +8000099c __call_exitprocs: +8000099c: 13 01 01 fd addi sp, sp, -48 +800009a0: b7 27 00 80 lui a5, 524290 +800009a4: 23 2c 41 01 sw s4, 24(sp) +800009a8: 03 aa 07 ef lw s4, -272(a5) +800009ac: 23 20 21 03 sw s2, 32(sp) +800009b0: 23 26 11 02 sw ra, 44(sp) +800009b4: 03 29 8a 14 lw s2, 328(s4) +800009b8: 23 24 81 02 sw s0, 40(sp) +800009bc: 23 22 91 02 sw s1, 36(sp) +800009c0: 23 2e 31 01 sw s3, 28(sp) +800009c4: 23 2a 51 01 sw s5, 20(sp) +800009c8: 23 28 61 01 sw s6, 16(sp) +800009cc: 23 26 71 01 sw s7, 12(sp) +800009d0: 23 24 81 01 sw s8, 8(sp) +800009d4: 63 00 09 04 beqz s2, 64 +800009d8: 13 0b 05 00 mv s6, a0 +800009dc: 93 8b 05 00 mv s7, a1 +800009e0: 93 0a 10 00 addi s5, zero, 1 +800009e4: 93 09 f0 ff addi s3, zero, -1 +800009e8: 83 24 49 00 lw s1, 4(s2) +800009ec: 13 84 f4 ff addi s0, s1, -1 +800009f0: 63 42 04 02 bltz s0, 36 +800009f4: 93 94 24 00 slli s1, s1, 2 +800009f8: b3 04 99 00 add s1, s2, s1 +800009fc: 63 84 0b 04 beqz s7, 72 +80000a00: 83 a7 44 10 lw a5, 260(s1) +80000a04: 63 80 77 05 beq a5, s7, 64 +80000a08: 13 04 f4 ff addi s0, s0, -1 +80000a0c: 93 84 c4 ff addi s1, s1, -4 +80000a10: e3 16 34 ff bne s0, s3, -20 +80000a14: 83 20 c1 02 lw ra, 44(sp) +80000a18: 03 24 81 02 lw s0, 40(sp) +80000a1c: 83 24 41 02 lw s1, 36(sp) +80000a20: 03 29 01 02 lw s2, 32(sp) +80000a24: 83 29 c1 01 lw s3, 28(sp) +80000a28: 03 2a 81 01 lw s4, 24(sp) +80000a2c: 83 2a 41 01 lw s5, 20(sp) +80000a30: 03 2b 01 01 lw s6, 16(sp) +80000a34: 83 2b c1 00 lw s7, 12(sp) +80000a38: 03 2c 81 00 lw s8, 8(sp) +80000a3c: 13 01 01 03 addi sp, sp, 48 +80000a40: 67 80 00 00 ret +80000a44: 83 27 49 00 lw a5, 4(s2) +80000a48: 83 a6 44 00 lw a3, 4(s1) +80000a4c: 93 87 f7 ff addi a5, a5, -1 +80000a50: 63 8e 87 04 beq a5, s0, 92 +80000a54: 23 a2 04 00 sw zero, 4(s1) +80000a58: e3 88 06 fa beqz a3, -80 +80000a5c: 83 27 89 18 lw a5, 392(s2) +80000a60: 33 97 8a 00 sll a4, s5, s0 +80000a64: 03 2c 49 00 lw s8, 4(s2) +80000a68: b3 77 f7 00 and a5, a4, a5 +80000a6c: 63 92 07 02 bnez a5, 36 +80000a70: e7 80 06 00 jalr a3 +80000a74: 03 27 49 00 lw a4, 4(s2) +80000a78: 83 27 8a 14 lw a5, 328(s4) +80000a7c: 63 14 87 01 bne a4, s8, 8 +80000a80: e3 04 f9 f8 beq s2, a5, -120 +80000a84: e3 88 07 f8 beqz a5, -112 +80000a88: 13 89 07 00 mv s2, a5 +80000a8c: 6f f0 df f5 j -164 +80000a90: 83 27 c9 18 lw a5, 396(s2) +80000a94: 83 a5 44 08 lw a1, 132(s1) +80000a98: 33 77 f7 00 and a4, a4, a5 +80000a9c: 63 1c 07 00 bnez a4, 24 +80000aa0: 13 05 0b 00 mv a0, s6 +80000aa4: e7 80 06 00 jalr a3 +80000aa8: 6f f0 df fc j -52 +80000aac: 23 22 89 00 sw s0, 4(s2) +80000ab0: 6f f0 9f fa j -88 +80000ab4: 13 85 05 00 mv a0, a1 +80000ab8: e7 80 06 00 jalr a3 +80000abc: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001a40 __preinit_array_start: -80001a40: 50 00 -80001a42: 00 80 +80001ac0 __preinit_array_start: +80001ac0: 50 00 +80001ac2: 00 80 Disassembly of section .data: -80001a48 impure_data: -80001a48: 00 00 -80001a4a: 00 00 -80001a4c: 34 1d -80001a4e: 00 80 -80001a50: 9c 1d -80001a52: 00 80 -80001a54: 04 1e -80001a56: 00 80 +80001ac8 impure_data: +80001ac8: 00 00 +80001aca: 00 00 +80001acc: b4 1d +80001ace: 00 80 +80001ad0: 1c 1e +80001ad2: 00 80 +80001ad4: 84 1e +80001ad6: 00 80 ... -80001af0: 01 00 -80001af2: 00 00 -80001af4: 00 00 -80001af6: 00 00 -80001af8: 0e 33 -80001afa: cd ab -80001afc: 34 12 -80001afe: 6d e6 -80001b00: ec de -80001b02: 05 00 -80001b04: 0b 00 00 00 +80001b70: 01 00 +80001b72: 00 00 +80001b74: 00 00 +80001b76: 00 00 +80001b78: 0e 33 +80001b7a: cd ab +80001b7c: 34 12 +80001b7e: 6d e6 +80001b80: ec de +80001b82: 05 00 +80001b84: 0b 00 00 00 ... Disassembly of section .sdata: -80001e70 _global_impure_ptr: -80001e70: 48 1a -80001e72: 00 80 +80001ef0 _global_impure_ptr: +80001ef0: c8 1a +80001ef2: 00 80 Disassembly of section .bss: -80001e74 g_wspawn_args: +80001ef4 g_wspawn_args: ... Disassembly of section .comment: @@ -867,28 +899,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 40 1a + 34: c0 1a 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 48 1a + 44: c8 1a 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: 70 1e + 54: f0 1e 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: 74 1e + 64: f4 1e 66: 00 80 68: 00 00 6a: 00 00 @@ -904,7 +936,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: e0 05 + a4: 60 06 a6: 00 80 a8: 00 00 aa: 00 00 @@ -961,7 +993,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 48 1a + 154: c8 1a 156: 00 80 158: 28 04 15a: 00 00 @@ -972,49 +1004,49 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 44 1a + 174: c4 1a 176: 00 80 178: 00 00 17a: 00 00 17c: 00 00 17e: 03 00 a2 00 lb zero, 10(tp) 182: 00 00 - 184: 44 1a + 184: c4 1a 186: 00 80 188: 00 00 18a: 00 00 18c: 00 00 18e: 03 00 b5 00 lb zero, 11(a0) 192: 00 00 - 194: 44 1a + 194: c4 1a 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 40 1a + 1a4: c0 1a 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 40 1a + 1b4: c0 1a 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 40 1a + 1c4: c0 1a 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 34 06 + 1d4: b4 06 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1027,7 +1059,7 @@ Disassembly of section .symtab: 1ee: f1 ff 1f0: 1c 01 1f2: 00 00 - 1f4: e4 05 + 1f4: 64 06 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1035,60 +1067,60 @@ Disassembly of section .symtab: 1fe: 02 00 200: 26 01 202: 00 00 - 204: 68 00 - 206: 00 80 - 208: 4c 01 + 204: 00 04 + 206: 00 00 + 208: 00 00 20a: 00 00 - 20c: 12 00 - 20e: 02 00 - 210: 3c 01 - 212: 00 00 - 214: 00 04 - 216: 00 00 - 218: 00 00 + 20c: 10 00 + 20e: f1 ff + 210: 33 01 00 00 add sp, zero, zero + 214: f4 1e + 216: 00 80 + 218: 40 00 21a: 00 00 - 21c: 10 00 - 21e: f1 ff - 220: 49 01 + 21c: 11 00 + 21e: 06 00 + 220: 41 01 222: 00 00 - 224: 74 1e + 224: 6c 06 226: 00 80 - 228: 20 00 + 228: 00 00 22a: 00 00 - 22c: 11 00 - 22e: 06 00 - 230: 57 01 00 00 - 234: ec 05 + 22c: 12 00 + 22e: 02 00 + 230: 48 01 + 232: 00 00 + 234: f0 1e 236: 00 80 238: 00 00 23a: 00 00 - 23c: 12 00 - 23e: 02 00 - 240: 5e 01 + 23c: 10 00 + 23e: 05 00 + 240: 58 01 242: 00 00 - 244: 70 1e + 244: 68 00 246: 00 80 - 248: 00 00 + 248: 48 01 24a: 00 00 - 24c: 10 00 - 24e: 05 00 + 24c: 12 00 + 24e: 02 00 250: 6e 01 252: 00 00 - 254: 48 22 + 254: c8 22 256: 00 80 258: 00 00 25a: 00 00 25c: 10 00 25e: f1 ff 260: 7f 01 00 00 - 264: 04 06 + 264: 84 06 266: 00 80 268: 00 00 26a: 00 00 26c: 12 00 26e: 02 00 270: 87 01 00 00 - 274: 44 06 + 274: c4 06 276: 00 80 278: 00 00 27a: 00 00 @@ -1096,15 +1128,15 @@ Disassembly of section .symtab: 27e: 02 00 280: 94 01 282: 00 00 - 284: 4c 02 + 284: 48 02 286: 00 80 - 288: 88 01 + 288: 0c 02 28a: 00 00 28c: 12 00 28e: 02 00 290: a1 01 292: 00 00 - 294: fc 05 + 294: 7c 06 296: 00 80 298: 00 00 29a: 00 00 @@ -1112,7 +1144,7 @@ Disassembly of section .symtab: 29e: 02 00 2a0: aa 01 2a2: 00 00 - 2a4: 70 1e + 2a4: f0 1e 2a6: 00 80 2a8: 04 00 2aa: 00 00 @@ -1120,14 +1152,14 @@ Disassembly of section .symtab: 2ae: 05 00 2b0: bd 01 2b2: 00 00 - 2b4: 08 07 + 2b4: 88 07 2b6: 00 80 2b8: 9c 00 2ba: 00 00 2bc: 12 00 2be: 02 00 2c0: cf 01 00 00 fnmadd.s ft3, ft0, ft0, ft0, rne - 2c4: 3c 06 + 2c4: bc 06 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1135,7 +1167,7 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: de 01 2d2: 00 00 - 2d4: 0c 06 + 2d4: 8c 06 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1143,7 +1175,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: e9 01 2e2: 00 00 - 2e4: 1c 06 + 2e4: 9c 06 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1151,7 +1183,7 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: f6 01 2f2: 00 00 - 2f4: ac 06 + 2f4: 2c 07 2f6: 00 80 2f8: 5c 00 2fa: 00 00 @@ -1159,7 +1191,7 @@ Disassembly of section .symtab: 2fe: 02 00 300: 08 02 302: 00 00 - 304: 04 04 + 304: 84 04 306: 00 80 308: 94 00 30a: 00 00 @@ -1173,7 +1205,7 @@ Disassembly of section .symtab: 31c: 10 00 31e: f1 ff 320: 27 02 00 00 - 324: a8 05 + 324: 28 06 326: 00 80 328: 00 00 32a: 00 00 @@ -1181,7 +1213,7 @@ Disassembly of section .symtab: 32e: 02 00 330: 31 02 332: 00 00 - 334: f4 05 + 334: 74 06 336: 00 80 338: 00 00 33a: 00 00 @@ -1189,7 +1221,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 3c 02 342: 00 00 - 344: 1c 09 + 344: 9c 09 346: 00 80 348: 24 01 34a: 00 00 @@ -1205,7 +1237,7 @@ Disassembly of section .symtab: 35e: 01 00 360: 4d 02 362: 00 00 - 364: 80 08 + 364: 00 09 366: 00 80 368: 9c 00 36a: 00 00 @@ -1213,7 +1245,7 @@ Disassembly of section .symtab: 36e: 02 00 370: 61 02 372: 00 00 - 374: 94 1e + 374: 34 1f 376: 00 80 378: 00 00 37a: 00 00 @@ -1221,7 +1253,7 @@ Disassembly of section .symtab: 37e: 06 00 380: 6d 02 382: 00 00 - 384: 74 1e + 384: f4 1e 386: 00 80 388: 00 00 38a: 00 00 @@ -1229,7 +1261,7 @@ Disassembly of section .symtab: 38e: 06 00 390: 79 02 392: 00 00 - 394: a4 07 + 394: 24 08 396: 00 80 398: dc 00 39a: 00 00 @@ -1237,7 +1269,7 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 80 02 3a2: 00 00 - 3a4: d4 03 + 3a4: 54 04 3a6: 00 80 3a8: 30 00 3aa: 00 00 @@ -1245,14 +1277,14 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 85 02 3b2: 00 00 - 3b4: 20 05 + 3b4: a0 05 3b6: 00 80 3b8: 80 00 3ba: 00 00 3bc: 12 00 3be: 02 00 3c0: a7 02 00 00 - 3c4: 54 06 + 3c4: d4 06 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1260,7 +1292,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: b5 02 3d2: 00 00 - 3d4: 64 06 + 3d4: e4 06 3d6: 00 80 3d8: 14 00 3da: 00 00 @@ -1268,7 +1300,7 @@ Disassembly of section .symtab: 3de: 02 00 3e0: bc 02 3e2: 00 00 - 3e4: 2c 06 + 3e4: ac 06 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1276,83 +1308,84 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: ca 02 3f2: 00 00 - 3f4: 4c 06 + 3f4: cc 06 3f6: 00 80 3f8: 00 00 3fa: 00 00 3fc: 12 00 3fe: 02 00 400: d7 02 00 00 - 404: 14 06 + 404: 94 06 406: 00 80 408: 00 00 40a: 00 00 40c: 12 00 40e: 02 00 410: e3 02 00 00 beqz zero, 2052 - 414: b4 01 + 414: c8 1a 416: 00 80 - 418: 98 00 + 418: 00 00 41a: 00 00 - 41c: 12 00 - 41e: 02 00 - 420: fc 02 + 41c: 10 00 + 41e: 04 00 + 420: f2 02 422: 00 00 - 424: 48 1a + 424: f4 1e 426: 00 80 428: 00 00 42a: 00 00 42c: 10 00 - 42e: 04 00 - 430: 0b 03 00 00 - 434: 74 1e + 42e: 05 00 + 430: 9d 00 + 432: 00 00 + 434: 34 1f 436: 00 80 438: 00 00 43a: 00 00 43c: 10 00 - 43e: 05 00 - 440: 9d 00 + 43e: 06 00 + 440: 28 03 442: 00 00 - 444: 94 1e + 444: f8 06 446: 00 80 - 448: 00 00 + 448: 34 00 44a: 00 00 - 44c: 10 00 - 44e: 06 00 - 450: 21 03 + 44c: 12 00 + 44e: 02 00 + 450: f9 02 452: 00 00 - 454: 78 06 + 454: b0 01 456: 00 80 - 458: 34 00 + 458: 98 00 45a: 00 00 45c: 12 00 45e: 02 00 - 460: 12 03 + 460: 19 03 462: 00 00 - 464: 24 06 + 464: a4 06 466: 00 80 468: 00 00 46a: 00 00 46c: 12 00 46e: 02 00 - 470: 20 03 - 472: 00 00 - 474: a0 05 + 470: 27 03 00 00 + 474: 20 06 476: 00 80 478: 00 00 47a: 00 00 47c: 12 00 47e: 02 00 - 480: 26 03 + 480: 2d 03 482: 00 00 - 484: 98 04 + 484: 18 05 486: 00 80 488: 88 00 48a: 00 00 48c: 12 00 48e: 02 00 - 490: 43 03 00 00 fmadd.s ft6, ft0, ft0, ft0, rne - 494: 5c 06 + 490: 4a 03 + 492: 00 00 + 494: dc 06 496: 00 80 498: 00 00 49a: 00 00 @@ -1392,13 +1425,14 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 62 38 - 48: 2d 65 - 4a: 32 2d - 4c: 32 32 - 4e: 2d 34 + 46: 61 30 + 48: 2d 38 + 4a: 65 2d + 4c: 38 62 + 4e: 2d 64 50: 39 2d - 52: 63 34 2e 63 + 52: 32 66 + 54: 2e 63 56: 00 70 58: 61 72 5a: 61 6c @@ -1483,31 +1517,32 @@ Disassembly of section .strtab: 11e: 5f 77 73 70 122: 61 77 124: 6e 00 - 126: 6b 65 72 6e - 12a: 65 6c - 12c: 5f 73 70 61 - 130: 77 6e 5f 72 - 134: 75 6e - 136: 5f 77 61 72 - 13a: 70 00 - 13c: 5f 5f 73 74 - 140: 61 63 - 142: 6b 5f 73 69 - 146: 7a 65 - 148: 00 67 - 14a: 5f 77 73 70 - 14e: 61 77 - 150: 6e 5f - 152: 61 72 - 154: 67 73 00 76 - 158: 78 5f - 15a: 74 6d - 15c: 63 00 5f 5f beq t5, s5, 1504 - 160: 53 44 41 54 - 164: 41 5f - 166: 42 45 - 168: 47 49 4e 5f - 16c: 5f 00 5f 5f + 126: 5f 5f 73 74 + 12a: 61 63 + 12c: 6b 5f 73 69 + 130: 7a 65 + 132: 00 67 + 134: 5f 77 73 70 + 138: 61 77 + 13a: 6e 5f + 13c: 61 72 + 13e: 67 73 00 76 + 142: 78 5f + 144: 74 6d + 146: 63 00 5f 5f beq t5, s5, 1504 + 14a: 53 44 41 54 + 14e: 41 5f + 150: 42 45 + 152: 47 49 4e 5f + 156: 5f 00 6b 65 + 15a: 72 6e + 15c: 65 6c + 15e: 5f 73 70 61 + 162: 77 6e 5f 63 + 166: 61 6c + 168: 6c 62 + 16a: 61 63 + 16c: 6b 00 5f 5f 170: 67 6c 6f 62 174: 61 6c 176: 5f 70 6f 69 @@ -1645,50 +1680,53 @@ Disassembly of section .strtab: 2d8: 78 5f 2da: 77 61 72 70 2de: 5f 67 69 64 - 2e2: 00 6b - 2e4: 65 72 - 2e6: 6e 65 - 2e8: 6c 5f - 2ea: 73 70 61 77 csrci 1910, 2 - 2ee: 6e 5f - 2f0: 72 75 - 2f2: 6e 5f - 2f4: 74 68 - 2f6: 72 65 - 2f8: 61 64 - 2fa: 73 00 5f 5f - 2fe: 44 41 - 300: 54 41 - 302: 5f 42 45 47 - 306: 49 4e - 308: 5f 5f 00 5f - 30c: 65 64 - 30e: 61 74 - 310: 61 00 - 312: 76 78 - 314: 5f 74 68 72 - 318: 65 61 - 31a: 64 5f - 31c: 6c 69 - 31e: 64 00 - 320: 5f 65 78 69 - 324: 74 00 - 326: 5f 70 6f 63 - 32a: 6c 5f - 32c: 6b 65 72 6e - 330: 65 6c - 332: 5f 73 61 78 - 336: 70 79 - 338: 5f 77 6f 72 - 33c: 6b 67 72 6f - 340: 75 70 - 342: 00 76 - 344: 78 5f - 346: 6e 75 - 348: 6d 5f - 34a: 69 6e - 34c: 73 74 72 73 csrrci s0, 1847, 4 - 350: 00 + 2e2: 00 5f + 2e4: 5f 44 41 54 + 2e8: 41 5f + 2ea: 42 45 + 2ec: 47 49 4e 5f + 2f0: 5f 00 5f 65 + 2f4: 64 61 + 2f6: 74 61 + 2f8: 00 6b + 2fa: 65 72 + 2fc: 6e 65 + 2fe: 6c 5f + 300: 73 70 61 77 csrci 1910, 2 + 304: 6e 5f + 306: 72 65 + 308: 6d 61 + 30a: 69 6e + 30c: 69 6e + 30e: 67 5f 63 61 + 312: 6c 6c + 314: 62 61 + 316: 63 6b 00 76 bltu zero, zero, 1910 + 31a: 78 5f + 31c: 74 68 + 31e: 72 65 + 320: 61 64 + 322: 5f 6c 69 64 + 326: 00 5f + 328: 65 78 + 32a: 69 74 + 32c: 00 5f + 32e: 70 6f + 330: 63 6c 5f 6b bltu t5, s5, 1720 + 334: 65 72 + 336: 6e 65 + 338: 6c 5f + 33a: 73 61 78 70 csrrsi sp, 1799, 16 + 33e: 79 5f + 340: 77 6f 72 6b + 344: 67 72 6f 75 + 348: 70 00 + 34a: 76 78 + 34c: 5f 6e 75 6d + 350: 5f 69 6e 73 + 354: 74 72 + 356: 73 + 357: 00 Disassembly of section .shstrtab: diff --git a/benchmarks/opencl/sfilter/kernel.pocl b/benchmarks/opencl/sfilter/kernel.pocl index be37873f..a300113e 100644 Binary files a/benchmarks/opencl/sfilter/kernel.pocl and b/benchmarks/opencl/sfilter/kernel.pocl differ diff --git a/benchmarks/opencl/sfilter/sfilter.dump b/benchmarks/opencl/sfilter/sfilter.dump index 044d345d..8f334c0e 100644 --- a/benchmarks/opencl/sfilter/sfilter.dump +++ b/benchmarks/opencl/sfilter/sfilter.dump @@ -1,30 +1,30 @@ -/tmp/pocl_vortex_kernel-de-06-b5-48-79.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-10-e3-85-d7-4f.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 85 b3 addi a1, a1, -1224 +80000004: 93 85 85 bb addi a1, a1, -1096 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 90 32 jal 2856 +80000010: ef 00 90 3a jal 2984 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 -8000001c: 17 15 00 00 auipc a0, 1 -80000020: 13 05 85 41 addi a0, a0, 1048 -80000024: 17 16 00 00 auipc a2, 1 -80000028: 13 06 06 43 addi a2, a2, 1072 +8000001c: 17 25 00 00 auipc a0, 2 +80000020: 13 05 85 46 addi a0, a0, 1128 +80000024: 17 26 00 00 auipc a2, 2 +80000028: 13 06 06 4a addi a2, a2, 1184 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 10 50 jal 3328 +80000034: ef 00 10 58 jal 3456 80000038: 17 15 00 00 auipc a0, 1 -8000003c: 13 05 45 c0 addi a0, a0, -1020 -80000040: ef 00 50 3b jal 2996 -80000044: ef 00 50 45 jal 3156 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 d0 3b j 3004 +8000003c: 13 05 45 c8 addi a0, a0, -892 +80000040: ef 00 50 43 jal 3124 +80000044: ef 00 50 4d jal 3284 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 d0 43 j 3132 Disassembly of section .text: @@ -32,11 +32,11 @@ Disassembly of section .text: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 80000058: 37 15 00 80 lui a0, 524289 -8000005c: 13 05 c5 c3 addi a0, a0, -964 -80000060: 6f 00 50 39 j 2964 +8000005c: 13 05 c5 cb addi a0, a0, -836 +80000060: 6f 00 50 41 j 3092 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,190 +48,190 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 90 33 jal 2872 -80000098: ef 00 50 2e jal 2788 -8000009c: ef 00 90 32 jal 2856 +80000094: ef 00 90 3b jal 3000 +80000098: ef 00 50 36 jal 2916 +8000009c: ef 00 90 3a jal 2984 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 90 2f jal 2808 +800000a4: ef 00 90 37 jal 2936 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 10 30 jal 2816 +800000ac: ef 00 10 38 jal 2944 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 90 31 jal 2840 -800000b8: b7 15 00 80 lui a1, 524289 -800000bc: 93 85 45 43 addi a1, a1, 1076 +800000b4: ef 00 90 39 jal 2968 +800000b8: b7 25 00 80 lui a1, 524290 +800000bc: 93 85 45 48 addi a1, a1, 1156 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 d0 1c j 2508 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 10 25 j 2640 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 d0 1b jal 2492 -800001c4: ef 00 10 20 jal 2560 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 10 1f jal 2544 -800001d0: b7 15 00 80 lui a1, 524289 -800001d4: 93 85 45 43 addi a1, a1, 1076 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 50 13 j 2356 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 10 24 jal 2624 +800001c0: ef 00 50 28 jal 2692 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 50 27 jal 2676 +800001cc: b7 25 00 80 lui a1, 524290 +800001d0: 93 85 45 48 addi a1, a1, 1156 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 90 1b j 2488 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 d0 14 jal 2380 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 d0 13 jal 2364 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 d0 12 jal 2348 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 d0 11 jal 2332 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 10 1d jal 2512 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 10 1c jal 2496 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 10 1b jal 2480 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 10 1a jal 2464 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) -8000035c: b7 15 00 80 lui a1, 524289 -80000360: 93 85 45 43 addi a1, a1, 1076 +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) +8000035c: b7 25 00 80 lui a1, 524290 +80000360: 93 85 45 48 addi a1, a1, 1156 80000364: 13 15 25 00 slli a0, a0, 2 80000368: 33 05 b5 00 add a0, a0, a1 8000036c: 93 05 01 00 mv a1, sp @@ -243,891 +243,923 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 40 7e jal 2020 +80000390: ef 00 50 06 jal 2148 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 90 05 jal 2136 +800003a8: ef 00 d0 09 jal 2204 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 d0 08 jal 2188 +800003b4: b7 25 00 80 lui a1, 524290 +800003b8: 93 85 45 48 addi a1, a1, 1156 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 c0 7d jal 2012 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 85 63 addi a1, a0, 1592 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 85 6b addi a1, a0, 1720 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _pocl_kernel_sfilter: -80000404: 13 01 01 fc addi sp, sp, -64 -80000408: 23 2e 11 02 sw ra, 60(sp) -8000040c: 23 2c 81 02 sw s0, 56(sp) -80000410: 23 2a 91 02 sw s1, 52(sp) -80000414: 23 28 21 03 sw s2, 48(sp) -80000418: 23 26 31 03 sw s3, 44(sp) -8000041c: 23 24 41 03 sw s4, 40(sp) -80000420: 23 22 51 03 sw s5, 36(sp) -80000424: 23 20 61 03 sw s6, 32(sp) -80000428: 23 2e 71 01 sw s7, 28(sp) -8000042c: 23 2c 81 01 sw s8, 24(sp) -80000430: 23 2a 91 01 sw s9, 20(sp) -80000434: 23 28 a1 01 sw s10, 16(sp) -80000438: 23 26 b1 01 sw s11, 12(sp) -8000043c: 13 04 01 04 addi s0, sp, 64 -80000440: 13 71 c1 ff andi sp, sp, -4 -80000444: 93 02 00 00 mv t0, zero -80000448: 53 00 07 f0 fmv.w.x ft0, a4 -8000044c: 83 ab 87 01 lw s7, 24(a5) -80000450: 83 a3 c7 01 lw t2, 28(a5) -80000454: 83 a6 c7 00 lw a3, 12(a5) -80000458: 03 a7 07 02 lw a4, 32(a5) -8000045c: 23 24 e1 00 sw a4, 8(sp) -80000460: 03 a7 07 01 lw a4, 16(a5) -80000464: b3 87 0b 03 mul a5, s7, a6 -80000468: 33 8c f6 00 add s8, a3, a5 -8000046c: b3 86 13 03 mul a3, t2, a7 -80000470: b3 08 d7 00 add a7, a4, a3 -80000474: 93 86 18 00 addi a3, a7, 1 -80000478: b3 86 c6 02 mul a3, a3, a2 -8000047c: b3 06 dc 00 add a3, s8, a3 -80000480: 93 96 26 00 slli a3, a3, 2 -80000484: b3 06 d5 00 add a3, a0, a3 -80000488: 23 22 d1 00 sw a3, 4(sp) -8000048c: 93 1f 26 00 slli t6, a2, 2 -80000490: b3 86 c8 02 mul a3, a7, a2 -80000494: b3 06 dc 00 add a3, s8, a3 -80000498: 93 96 26 00 slli a3, a3, 2 -8000049c: b3 85 d5 00 add a1, a1, a3 -800004a0: 23 20 b1 00 sw a1, 0(sp) -800004a4: 33 0e d5 00 add t3, a0, a3 -800004a8: 93 85 f8 ff addi a1, a7, -1 -800004ac: b3 85 c5 02 mul a1, a1, a2 -800004b0: b3 05 bc 00 add a1, s8, a1 -800004b4: 93 95 25 00 slli a1, a1, 2 -800004b8: 33 0f b5 00 add t5, a0, a1 -800004bc: 13 0b 00 00 mv s6, zero -800004c0: 13 09 0f 00 mv s2, t5 -800004c4: 93 0e 0e 00 mv t4, t3 -800004c8: 03 2a 01 00 lw s4, 0(sp) -800004cc: 83 2a 41 00 lw s5, 4(sp) -800004d0: 93 06 00 00 mv a3, zero -800004d4: b3 85 68 01 add a1, a7, s6 -800004d8: 13 87 f5 ff addi a4, a1, -1 -800004dc: b3 0c c7 02 mul s9, a4, a2 -800004e0: 33 8d c5 02 mul s10, a1, a2 -800004e4: 93 85 15 00 addi a1, a1, 1 -800004e8: b3 8d c5 02 mul s11, a1, a2 -800004ec: 93 00 09 00 mv ra, s2 -800004f0: 93 89 0e 00 mv s3, t4 -800004f4: 93 05 0a 00 mv a1, s4 -800004f8: 13 87 0a 00 mv a4, s5 -800004fc: b3 07 dc 00 add a5, s8, a3 -80000500: 13 83 f7 ff addi t1, a5, -1 -80000504: 33 08 93 01 add a6, t1, s9 -80000508: 93 14 28 00 slli s1, a6, 2 -8000050c: b3 04 95 00 add s1, a0, s1 -80000510: 87 a0 04 00 flw ft1, 0(s1) -80000514: d3 f0 a0 10 fmul.s ft1, ft1, fa0 -80000518: 07 a1 00 00 flw ft2, 0(ra) -8000051c: 93 87 17 00 addi a5, a5, 1 -80000520: b3 84 97 01 add s1, a5, s9 -80000524: 93 94 24 00 slli s1, s1, 2 -80000528: b3 04 95 00 add s1, a0, s1 -8000052c: 87 a1 04 00 flw ft3, 0(s1) -80000530: b3 04 a3 01 add s1, t1, s10 -80000534: 93 94 24 00 slli s1, s1, 2 -80000538: b3 04 95 00 add s1, a0, s1 -8000053c: 07 a2 04 00 flw ft4, 0(s1) -80000540: 87 a2 09 00 flw ft5, 0(s3) -80000544: 53 71 b1 10 fmul.s ft2, ft2, fa1 -80000548: d3 f1 c1 10 fmul.s ft3, ft3, fa2 -8000054c: 53 72 d2 10 fmul.s ft4, ft4, fa3 -80000550: d3 f2 e2 10 fmul.s ft5, ft5, fa4 -80000554: b3 84 a7 01 add s1, a5, s10 -80000558: 93 94 24 00 slli s1, s1, 2 -8000055c: b3 04 95 00 add s1, a0, s1 -80000560: 07 a3 04 00 flw ft6, 0(s1) -80000564: b3 04 b3 01 add s1, t1, s11 -80000568: 93 94 24 00 slli s1, s1, 2 -8000056c: b3 04 95 00 add s1, a0, s1 -80000570: 87 a3 04 00 flw ft7, 0(s1) -80000574: 07 2e 07 00 flw ft8, 0(a4) -80000578: b3 87 b7 01 add a5, a5, s11 -8000057c: 93 97 27 00 slli a5, a5, 2 -80000580: b3 07 f5 00 add a5, a0, a5 -80000584: 87 ae 07 00 flw ft9, 0(a5) -80000588: 53 73 f3 10 fmul.s ft6, ft6, fa5 -8000058c: d3 f3 03 11 fmul.s ft7, ft7, fa6 -80000590: 53 7e 1e 11 fmul.s ft8, ft8, fa7 -80000594: d3 fe 0e 10 fmul.s ft9, ft9, ft0 -80000598: d3 f0 20 00 fadd.s ft1, ft1, ft2 -8000059c: d3 f0 30 00 fadd.s ft1, ft1, ft3 -800005a0: d3 f0 40 00 fadd.s ft1, ft1, ft4 -800005a4: d3 f0 50 00 fadd.s ft1, ft1, ft5 -800005a8: d3 f0 60 00 fadd.s ft1, ft1, ft6 -800005ac: d3 f0 70 00 fadd.s ft1, ft1, ft7 -800005b0: d3 f0 c0 01 fadd.s ft1, ft1, ft8 -800005b4: d3 f0 d0 01 fadd.s ft1, ft1, ft9 -800005b8: 27 a0 15 00 fsw ft1, 0(a1) -800005bc: 93 86 16 00 addi a3, a3, 1 -800005c0: 13 07 47 00 addi a4, a4, 4 -800005c4: 93 85 45 00 addi a1, a1, 4 -800005c8: 93 89 49 00 addi s3, s3, 4 -800005cc: 93 80 40 00 addi ra, ra, 4 -800005d0: e3 e6 76 f3 bltu a3, s7, -212 -800005d4: 13 0b 1b 00 addi s6, s6, 1 -800005d8: b3 8a fa 01 add s5, s5, t6 -800005dc: 33 0a fa 01 add s4, s4, t6 -800005e0: b3 8e fe 01 add t4, t4, t6 -800005e4: 33 09 f9 01 add s2, s2, t6 -800005e8: e3 64 7b ee bltu s6, t2, -280 -800005ec: 93 82 12 00 addi t0, t0, 1 -800005f0: 83 25 81 00 lw a1, 8(sp) -800005f4: e3 e4 b2 ec bltu t0, a1, -312 -800005f8: 13 01 04 fc addi sp, s0, -64 -800005fc: 83 2d c1 00 lw s11, 12(sp) -80000600: 03 2d 01 01 lw s10, 16(sp) -80000604: 83 2c 41 01 lw s9, 20(sp) -80000608: 03 2c 81 01 lw s8, 24(sp) -8000060c: 83 2b c1 01 lw s7, 28(sp) -80000610: 03 2b 01 02 lw s6, 32(sp) -80000614: 83 2a 41 02 lw s5, 36(sp) -80000618: 03 2a 81 02 lw s4, 40(sp) -8000061c: 83 29 c1 02 lw s3, 44(sp) -80000620: 03 29 01 03 lw s2, 48(sp) -80000624: 83 24 41 03 lw s1, 52(sp) -80000628: 03 24 81 03 lw s0, 56(sp) -8000062c: 83 20 c1 03 lw ra, 60(sp) -80000630: 13 01 01 04 addi sp, sp, 64 -80000634: 67 80 00 00 ret +80000484 _pocl_kernel_sfilter: +80000484: 13 01 01 fc addi sp, sp, -64 +80000488: 23 2e 11 02 sw ra, 60(sp) +8000048c: 23 2c 81 02 sw s0, 56(sp) +80000490: 23 2a 91 02 sw s1, 52(sp) +80000494: 23 28 21 03 sw s2, 48(sp) +80000498: 23 26 31 03 sw s3, 44(sp) +8000049c: 23 24 41 03 sw s4, 40(sp) +800004a0: 23 22 51 03 sw s5, 36(sp) +800004a4: 23 20 61 03 sw s6, 32(sp) +800004a8: 23 2e 71 01 sw s7, 28(sp) +800004ac: 23 2c 81 01 sw s8, 24(sp) +800004b0: 23 2a 91 01 sw s9, 20(sp) +800004b4: 23 28 a1 01 sw s10, 16(sp) +800004b8: 23 26 b1 01 sw s11, 12(sp) +800004bc: 13 04 01 04 addi s0, sp, 64 +800004c0: 13 71 c1 ff andi sp, sp, -4 +800004c4: 93 02 00 00 mv t0, zero +800004c8: 53 00 07 f0 fmv.w.x ft0, a4 +800004cc: 83 ab 87 01 lw s7, 24(a5) +800004d0: 83 a3 c7 01 lw t2, 28(a5) +800004d4: 83 a6 c7 00 lw a3, 12(a5) +800004d8: 03 a7 07 02 lw a4, 32(a5) +800004dc: 23 24 e1 00 sw a4, 8(sp) +800004e0: 03 a7 07 01 lw a4, 16(a5) +800004e4: b3 87 0b 03 mul a5, s7, a6 +800004e8: 33 8c f6 00 add s8, a3, a5 +800004ec: b3 86 13 03 mul a3, t2, a7 +800004f0: b3 08 d7 00 add a7, a4, a3 +800004f4: 93 86 18 00 addi a3, a7, 1 +800004f8: b3 86 c6 02 mul a3, a3, a2 +800004fc: b3 06 dc 00 add a3, s8, a3 +80000500: 93 96 26 00 slli a3, a3, 2 +80000504: b3 06 d5 00 add a3, a0, a3 +80000508: 23 22 d1 00 sw a3, 4(sp) +8000050c: 93 1f 26 00 slli t6, a2, 2 +80000510: b3 86 c8 02 mul a3, a7, a2 +80000514: b3 06 dc 00 add a3, s8, a3 +80000518: 93 96 26 00 slli a3, a3, 2 +8000051c: b3 85 d5 00 add a1, a1, a3 +80000520: 23 20 b1 00 sw a1, 0(sp) +80000524: 33 0e d5 00 add t3, a0, a3 +80000528: 93 85 f8 ff addi a1, a7, -1 +8000052c: b3 85 c5 02 mul a1, a1, a2 +80000530: b3 05 bc 00 add a1, s8, a1 +80000534: 93 95 25 00 slli a1, a1, 2 +80000538: 33 0f b5 00 add t5, a0, a1 +8000053c: 13 0b 00 00 mv s6, zero +80000540: 13 09 0f 00 mv s2, t5 +80000544: 93 0e 0e 00 mv t4, t3 +80000548: 03 2a 01 00 lw s4, 0(sp) +8000054c: 83 2a 41 00 lw s5, 4(sp) +80000550: 93 06 00 00 mv a3, zero +80000554: b3 85 68 01 add a1, a7, s6 +80000558: 13 87 f5 ff addi a4, a1, -1 +8000055c: b3 0c c7 02 mul s9, a4, a2 +80000560: 33 8d c5 02 mul s10, a1, a2 +80000564: 93 85 15 00 addi a1, a1, 1 +80000568: b3 8d c5 02 mul s11, a1, a2 +8000056c: 93 00 09 00 mv ra, s2 +80000570: 93 89 0e 00 mv s3, t4 +80000574: 93 05 0a 00 mv a1, s4 +80000578: 13 87 0a 00 mv a4, s5 +8000057c: b3 07 dc 00 add a5, s8, a3 +80000580: 13 83 f7 ff addi t1, a5, -1 +80000584: 33 08 93 01 add a6, t1, s9 +80000588: 93 14 28 00 slli s1, a6, 2 +8000058c: b3 04 95 00 add s1, a0, s1 +80000590: 87 a0 04 00 flw ft1, 0(s1) +80000594: d3 f0 a0 10 fmul.s ft1, ft1, fa0 +80000598: 07 a1 00 00 flw ft2, 0(ra) +8000059c: 93 87 17 00 addi a5, a5, 1 +800005a0: b3 84 97 01 add s1, a5, s9 +800005a4: 93 94 24 00 slli s1, s1, 2 +800005a8: b3 04 95 00 add s1, a0, s1 +800005ac: 87 a1 04 00 flw ft3, 0(s1) +800005b0: b3 04 a3 01 add s1, t1, s10 +800005b4: 93 94 24 00 slli s1, s1, 2 +800005b8: b3 04 95 00 add s1, a0, s1 +800005bc: 07 a2 04 00 flw ft4, 0(s1) +800005c0: 87 a2 09 00 flw ft5, 0(s3) +800005c4: 53 71 b1 10 fmul.s ft2, ft2, fa1 +800005c8: d3 f1 c1 10 fmul.s ft3, ft3, fa2 +800005cc: 53 72 d2 10 fmul.s ft4, ft4, fa3 +800005d0: d3 f2 e2 10 fmul.s ft5, ft5, fa4 +800005d4: b3 84 a7 01 add s1, a5, s10 +800005d8: 93 94 24 00 slli s1, s1, 2 +800005dc: b3 04 95 00 add s1, a0, s1 +800005e0: 07 a3 04 00 flw ft6, 0(s1) +800005e4: b3 04 b3 01 add s1, t1, s11 +800005e8: 93 94 24 00 slli s1, s1, 2 +800005ec: b3 04 95 00 add s1, a0, s1 +800005f0: 87 a3 04 00 flw ft7, 0(s1) +800005f4: 07 2e 07 00 flw ft8, 0(a4) +800005f8: b3 87 b7 01 add a5, a5, s11 +800005fc: 93 97 27 00 slli a5, a5, 2 +80000600: b3 07 f5 00 add a5, a0, a5 +80000604: 87 ae 07 00 flw ft9, 0(a5) +80000608: 53 73 f3 10 fmul.s ft6, ft6, fa5 +8000060c: d3 f3 03 11 fmul.s ft7, ft7, fa6 +80000610: 53 7e 1e 11 fmul.s ft8, ft8, fa7 +80000614: d3 fe 0e 10 fmul.s ft9, ft9, ft0 +80000618: d3 f0 20 00 fadd.s ft1, ft1, ft2 +8000061c: d3 f0 30 00 fadd.s ft1, ft1, ft3 +80000620: d3 f0 40 00 fadd.s ft1, ft1, ft4 +80000624: d3 f0 50 00 fadd.s ft1, ft1, ft5 +80000628: d3 f0 60 00 fadd.s ft1, ft1, ft6 +8000062c: d3 f0 70 00 fadd.s ft1, ft1, ft7 +80000630: d3 f0 c0 01 fadd.s ft1, ft1, ft8 +80000634: d3 f0 d0 01 fadd.s ft1, ft1, ft9 +80000638: 27 a0 15 00 fsw ft1, 0(a1) +8000063c: 93 86 16 00 addi a3, a3, 1 +80000640: 13 07 47 00 addi a4, a4, 4 +80000644: 93 85 45 00 addi a1, a1, 4 +80000648: 93 89 49 00 addi s3, s3, 4 +8000064c: 93 80 40 00 addi ra, ra, 4 +80000650: e3 e6 76 f3 bltu a3, s7, -212 +80000654: 13 0b 1b 00 addi s6, s6, 1 +80000658: b3 8a fa 01 add s5, s5, t6 +8000065c: 33 0a fa 01 add s4, s4, t6 +80000660: b3 8e fe 01 add t4, t4, t6 +80000664: 33 09 f9 01 add s2, s2, t6 +80000668: e3 64 7b ee bltu s6, t2, -280 +8000066c: 93 82 12 00 addi t0, t0, 1 +80000670: 83 25 81 00 lw a1, 8(sp) +80000674: e3 e4 b2 ec bltu t0, a1, -312 +80000678: 13 01 04 fc addi sp, s0, -64 +8000067c: 83 2d c1 00 lw s11, 12(sp) +80000680: 03 2d 01 01 lw s10, 16(sp) +80000684: 83 2c 41 01 lw s9, 20(sp) +80000688: 03 2c 81 01 lw s8, 24(sp) +8000068c: 83 2b c1 01 lw s7, 28(sp) +80000690: 03 2b 01 02 lw s6, 32(sp) +80000694: 83 2a 41 02 lw s5, 36(sp) +80000698: 03 2a 81 02 lw s4, 40(sp) +8000069c: 83 29 c1 02 lw s3, 44(sp) +800006a0: 03 29 01 03 lw s2, 48(sp) +800006a4: 83 24 41 03 lw s1, 52(sp) +800006a8: 03 24 81 03 lw s0, 56(sp) +800006ac: 83 20 c1 03 lw ra, 60(sp) +800006b0: 13 01 01 04 addi sp, sp, 64 +800006b4: 67 80 00 00 ret -80000638 _pocl_kernel_sfilter_workgroup: -80000638: 13 01 01 fc addi sp, sp, -64 -8000063c: 23 2e 11 02 sw ra, 60(sp) -80000640: 23 2c 81 02 sw s0, 56(sp) -80000644: 23 2a 91 02 sw s1, 52(sp) -80000648: 23 28 21 03 sw s2, 48(sp) -8000064c: 23 26 31 03 sw s3, 44(sp) -80000650: 23 24 41 03 sw s4, 40(sp) -80000654: 23 22 51 03 sw s5, 36(sp) -80000658: 23 20 61 03 sw s6, 32(sp) -8000065c: 23 2e 71 01 sw s7, 28(sp) -80000660: 23 2c 81 01 sw s8, 24(sp) -80000664: 23 2a 91 01 sw s9, 20(sp) -80000668: 23 28 a1 01 sw s10, 16(sp) -8000066c: 23 26 b1 01 sw s11, 12(sp) -80000670: 13 08 00 00 mv a6, zero -80000674: 03 27 05 00 lw a4, 0(a0) -80000678: 83 24 45 00 lw s1, 4(a0) -8000067c: 03 24 85 00 lw s0, 8(a0) -80000680: 83 28 c5 00 lw a7, 12(a0) -80000684: 03 23 07 00 lw t1, 0(a4) -80000688: 83 a3 04 00 lw t2, 0(s1) -8000068c: 83 2f 04 00 lw t6, 0(s0) -80000690: 07 a0 08 00 flw ft0, 0(a7) -80000694: 83 24 05 01 lw s1, 16(a0) -80000698: 03 24 45 01 lw s0, 20(a0) -8000069c: 03 27 85 01 lw a4, 24(a0) -800006a0: 83 28 c5 01 lw a7, 28(a0) -800006a4: 87 a0 04 00 flw ft1, 0(s1) -800006a8: 07 21 04 00 flw ft2, 0(s0) -800006ac: 87 21 07 00 flw ft3, 0(a4) -800006b0: 07 a2 08 00 flw ft4, 0(a7) -800006b4: 03 27 05 02 lw a4, 32(a0) -800006b8: 83 24 45 02 lw s1, 36(a0) -800006bc: 03 24 85 02 lw s0, 40(a0) -800006c0: 03 25 c5 02 lw a0, 44(a0) -800006c4: 87 22 07 00 flw ft5, 0(a4) -800006c8: 07 a3 04 00 flw ft6, 0(s1) -800006cc: 87 23 04 00 flw ft7, 0(s0) -800006d0: 07 25 05 00 flw fa0, 0(a0) -800006d4: 03 ac 85 01 lw s8, 24(a1) -800006d8: 83 a8 c5 01 lw a7, 28(a1) -800006dc: 03 a5 c5 00 lw a0, 12(a1) -800006e0: 03 a7 05 02 lw a4, 32(a1) -800006e4: 23 24 e1 00 sw a4, 8(sp) -800006e8: 83 a5 05 01 lw a1, 16(a1) -800006ec: 33 06 cc 02 mul a2, s8, a2 -800006f0: b3 0c c5 00 add s9, a0, a2 -800006f4: 33 85 d8 02 mul a0, a7, a3 -800006f8: 33 8f a5 00 add t5, a1, a0 -800006fc: 13 05 1f 00 addi a0, t5, 1 -80000700: 33 05 f5 03 mul a0, a0, t6 -80000704: 33 85 ac 00 add a0, s9, a0 -80000708: 13 15 25 00 slli a0, a0, 2 -8000070c: 33 05 a3 00 add a0, t1, a0 -80000710: 23 22 a1 00 sw a0, 4(sp) -80000714: 13 99 2f 00 slli s2, t6, 2 -80000718: 33 05 ff 03 mul a0, t5, t6 -8000071c: 33 85 ac 00 add a0, s9, a0 -80000720: 13 15 25 00 slli a0, a0, 2 -80000724: b3 83 a3 00 add t2, t2, a0 -80000728: 33 0e a3 00 add t3, t1, a0 -8000072c: 13 05 ff ff addi a0, t5, -1 -80000730: 33 05 f5 03 mul a0, a0, t6 -80000734: 33 85 ac 00 add a0, s9, a0 -80000738: 13 15 25 00 slli a0, a0, 2 -8000073c: b3 0e a3 00 add t4, t1, a0 -80000740: 93 0b 00 00 mv s7, zero -80000744: 93 89 0e 00 mv s3, t4 -80000748: 13 0a 0e 00 mv s4, t3 -8000074c: 93 8a 03 00 mv s5, t2 -80000750: 03 2b 41 00 lw s6, 4(sp) -80000754: 13 07 00 00 mv a4, zero -80000758: 33 05 7f 01 add a0, t5, s7 -8000075c: 93 05 f5 ff addi a1, a0, -1 -80000760: 33 8d f5 03 mul s10, a1, t6 -80000764: b3 8d af 02 mul s11, t6, a0 -80000768: 13 05 15 00 addi a0, a0, 1 -8000076c: b3 00 f5 03 mul ra, a0, t6 -80000770: 13 86 09 00 mv a2, s3 -80000774: 13 05 0a 00 mv a0, s4 -80000778: 93 85 0a 00 mv a1, s5 -8000077c: 13 04 0b 00 mv s0, s6 -80000780: b3 84 ec 00 add s1, s9, a4 -80000784: 93 86 f4 ff addi a3, s1, -1 -80000788: b3 82 a6 01 add t0, a3, s10 -8000078c: 93 97 22 00 slli a5, t0, 2 -80000790: b3 07 f3 00 add a5, t1, a5 -80000794: 87 a5 07 00 flw fa1, 0(a5) -80000798: d3 75 b0 10 fmul.s fa1, ft0, fa1 -8000079c: 07 26 06 00 flw fa2, 0(a2) -800007a0: 93 87 14 00 addi a5, s1, 1 -800007a4: b3 84 a7 01 add s1, a5, s10 -800007a8: 93 94 24 00 slli s1, s1, 2 -800007ac: b3 04 93 00 add s1, t1, s1 -800007b0: 87 a6 04 00 flw fa3, 0(s1) -800007b4: b3 84 b6 01 add s1, a3, s11 -800007b8: 93 94 24 00 slli s1, s1, 2 -800007bc: b3 04 93 00 add s1, t1, s1 -800007c0: 07 a7 04 00 flw fa4, 0(s1) -800007c4: 87 27 05 00 flw fa5, 0(a0) -800007c8: 53 f6 c0 10 fmul.s fa2, ft1, fa2 -800007cc: d3 76 d1 10 fmul.s fa3, ft2, fa3 -800007d0: 53 f7 e1 10 fmul.s fa4, ft3, fa4 -800007d4: d3 77 f2 10 fmul.s fa5, ft4, fa5 -800007d8: b3 84 b7 01 add s1, a5, s11 -800007dc: 93 94 24 00 slli s1, s1, 2 -800007e0: b3 04 93 00 add s1, t1, s1 -800007e4: 07 a8 04 00 flw fa6, 0(s1) -800007e8: b3 86 16 00 add a3, a3, ra -800007ec: 93 96 26 00 slli a3, a3, 2 -800007f0: b3 06 d3 00 add a3, t1, a3 -800007f4: 87 a8 06 00 flw fa7, 0(a3) -800007f8: 07 2e 04 00 flw ft8, 0(s0) -800007fc: b3 86 17 00 add a3, a5, ra -80000800: 93 96 26 00 slli a3, a3, 2 -80000804: b3 06 d3 00 add a3, t1, a3 -80000808: 87 ae 06 00 flw ft9, 0(a3) -8000080c: 53 f8 02 11 fmul.s fa6, ft5, fa6 -80000810: d3 78 13 11 fmul.s fa7, ft6, fa7 -80000814: 53 fe c3 11 fmul.s ft8, ft7, ft8 -80000818: d3 7e d5 11 fmul.s ft9, fa0, ft9 -8000081c: d3 f5 c5 00 fadd.s fa1, fa1, fa2 -80000820: d3 f5 d5 00 fadd.s fa1, fa1, fa3 -80000824: d3 f5 e5 00 fadd.s fa1, fa1, fa4 -80000828: d3 f5 f5 00 fadd.s fa1, fa1, fa5 -8000082c: d3 f5 05 01 fadd.s fa1, fa1, fa6 -80000830: d3 f5 15 01 fadd.s fa1, fa1, fa7 -80000834: d3 f5 c5 01 fadd.s fa1, fa1, ft8 -80000838: d3 f5 d5 01 fadd.s fa1, fa1, ft9 -8000083c: 27 a0 b5 00 fsw fa1, 0(a1) -80000840: 13 07 17 00 addi a4, a4, 1 -80000844: 13 04 44 00 addi s0, s0, 4 -80000848: 93 85 45 00 addi a1, a1, 4 -8000084c: 13 05 45 00 addi a0, a0, 4 -80000850: 13 06 46 00 addi a2, a2, 4 -80000854: e3 66 87 f3 bltu a4, s8, -212 -80000858: 93 8b 1b 00 addi s7, s7, 1 -8000085c: 33 0b 2b 01 add s6, s6, s2 -80000860: b3 8a 2a 01 add s5, s5, s2 -80000864: 33 0a 2a 01 add s4, s4, s2 -80000868: b3 89 29 01 add s3, s3, s2 -8000086c: e3 e4 1b ef bltu s7, a7, -280 -80000870: 13 08 18 00 addi a6, a6, 1 -80000874: 03 25 81 00 lw a0, 8(sp) -80000878: e3 64 a8 ec bltu a6, a0, -312 -8000087c: 83 2d c1 00 lw s11, 12(sp) -80000880: 03 2d 01 01 lw s10, 16(sp) -80000884: 83 2c 41 01 lw s9, 20(sp) -80000888: 03 2c 81 01 lw s8, 24(sp) -8000088c: 83 2b c1 01 lw s7, 28(sp) -80000890: 03 2b 01 02 lw s6, 32(sp) -80000894: 83 2a 41 02 lw s5, 36(sp) -80000898: 03 2a 81 02 lw s4, 40(sp) -8000089c: 83 29 c1 02 lw s3, 44(sp) -800008a0: 03 29 01 03 lw s2, 48(sp) -800008a4: 83 24 41 03 lw s1, 52(sp) -800008a8: 03 24 81 03 lw s0, 56(sp) -800008ac: 83 20 c1 03 lw ra, 60(sp) -800008b0: 13 01 01 04 addi sp, sp, 64 -800008b4: 67 80 00 00 ret +800006b8 _pocl_kernel_sfilter_workgroup: +800006b8: 13 01 01 fc addi sp, sp, -64 +800006bc: 23 2e 11 02 sw ra, 60(sp) +800006c0: 23 2c 81 02 sw s0, 56(sp) +800006c4: 23 2a 91 02 sw s1, 52(sp) +800006c8: 23 28 21 03 sw s2, 48(sp) +800006cc: 23 26 31 03 sw s3, 44(sp) +800006d0: 23 24 41 03 sw s4, 40(sp) +800006d4: 23 22 51 03 sw s5, 36(sp) +800006d8: 23 20 61 03 sw s6, 32(sp) +800006dc: 23 2e 71 01 sw s7, 28(sp) +800006e0: 23 2c 81 01 sw s8, 24(sp) +800006e4: 23 2a 91 01 sw s9, 20(sp) +800006e8: 23 28 a1 01 sw s10, 16(sp) +800006ec: 23 26 b1 01 sw s11, 12(sp) +800006f0: 13 08 00 00 mv a6, zero +800006f4: 03 27 05 00 lw a4, 0(a0) +800006f8: 83 24 45 00 lw s1, 4(a0) +800006fc: 03 24 85 00 lw s0, 8(a0) +80000700: 83 28 c5 00 lw a7, 12(a0) +80000704: 03 23 07 00 lw t1, 0(a4) +80000708: 83 a3 04 00 lw t2, 0(s1) +8000070c: 83 2f 04 00 lw t6, 0(s0) +80000710: 07 a0 08 00 flw ft0, 0(a7) +80000714: 83 24 05 01 lw s1, 16(a0) +80000718: 03 24 45 01 lw s0, 20(a0) +8000071c: 03 27 85 01 lw a4, 24(a0) +80000720: 83 28 c5 01 lw a7, 28(a0) +80000724: 87 a0 04 00 flw ft1, 0(s1) +80000728: 07 21 04 00 flw ft2, 0(s0) +8000072c: 87 21 07 00 flw ft3, 0(a4) +80000730: 07 a2 08 00 flw ft4, 0(a7) +80000734: 03 27 05 02 lw a4, 32(a0) +80000738: 83 24 45 02 lw s1, 36(a0) +8000073c: 03 24 85 02 lw s0, 40(a0) +80000740: 03 25 c5 02 lw a0, 44(a0) +80000744: 87 22 07 00 flw ft5, 0(a4) +80000748: 07 a3 04 00 flw ft6, 0(s1) +8000074c: 87 23 04 00 flw ft7, 0(s0) +80000750: 07 25 05 00 flw fa0, 0(a0) +80000754: 03 ac 85 01 lw s8, 24(a1) +80000758: 83 a8 c5 01 lw a7, 28(a1) +8000075c: 03 a5 c5 00 lw a0, 12(a1) +80000760: 03 a7 05 02 lw a4, 32(a1) +80000764: 23 24 e1 00 sw a4, 8(sp) +80000768: 83 a5 05 01 lw a1, 16(a1) +8000076c: 33 06 cc 02 mul a2, s8, a2 +80000770: b3 0c c5 00 add s9, a0, a2 +80000774: 33 85 d8 02 mul a0, a7, a3 +80000778: 33 8f a5 00 add t5, a1, a0 +8000077c: 13 05 1f 00 addi a0, t5, 1 +80000780: 33 05 f5 03 mul a0, a0, t6 +80000784: 33 85 ac 00 add a0, s9, a0 +80000788: 13 15 25 00 slli a0, a0, 2 +8000078c: 33 05 a3 00 add a0, t1, a0 +80000790: 23 22 a1 00 sw a0, 4(sp) +80000794: 13 99 2f 00 slli s2, t6, 2 +80000798: 33 05 ff 03 mul a0, t5, t6 +8000079c: 33 85 ac 00 add a0, s9, a0 +800007a0: 13 15 25 00 slli a0, a0, 2 +800007a4: b3 83 a3 00 add t2, t2, a0 +800007a8: 33 0e a3 00 add t3, t1, a0 +800007ac: 13 05 ff ff addi a0, t5, -1 +800007b0: 33 05 f5 03 mul a0, a0, t6 +800007b4: 33 85 ac 00 add a0, s9, a0 +800007b8: 13 15 25 00 slli a0, a0, 2 +800007bc: b3 0e a3 00 add t4, t1, a0 +800007c0: 93 0b 00 00 mv s7, zero +800007c4: 93 89 0e 00 mv s3, t4 +800007c8: 13 0a 0e 00 mv s4, t3 +800007cc: 93 8a 03 00 mv s5, t2 +800007d0: 03 2b 41 00 lw s6, 4(sp) +800007d4: 13 07 00 00 mv a4, zero +800007d8: 33 05 7f 01 add a0, t5, s7 +800007dc: 93 05 f5 ff addi a1, a0, -1 +800007e0: 33 8d f5 03 mul s10, a1, t6 +800007e4: b3 8d af 02 mul s11, t6, a0 +800007e8: 13 05 15 00 addi a0, a0, 1 +800007ec: b3 00 f5 03 mul ra, a0, t6 +800007f0: 13 86 09 00 mv a2, s3 +800007f4: 13 05 0a 00 mv a0, s4 +800007f8: 93 85 0a 00 mv a1, s5 +800007fc: 13 04 0b 00 mv s0, s6 +80000800: b3 84 ec 00 add s1, s9, a4 +80000804: 93 86 f4 ff addi a3, s1, -1 +80000808: b3 82 a6 01 add t0, a3, s10 +8000080c: 93 97 22 00 slli a5, t0, 2 +80000810: b3 07 f3 00 add a5, t1, a5 +80000814: 87 a5 07 00 flw fa1, 0(a5) +80000818: d3 75 b0 10 fmul.s fa1, ft0, fa1 +8000081c: 07 26 06 00 flw fa2, 0(a2) +80000820: 93 87 14 00 addi a5, s1, 1 +80000824: b3 84 a7 01 add s1, a5, s10 +80000828: 93 94 24 00 slli s1, s1, 2 +8000082c: b3 04 93 00 add s1, t1, s1 +80000830: 87 a6 04 00 flw fa3, 0(s1) +80000834: b3 84 b6 01 add s1, a3, s11 +80000838: 93 94 24 00 slli s1, s1, 2 +8000083c: b3 04 93 00 add s1, t1, s1 +80000840: 07 a7 04 00 flw fa4, 0(s1) +80000844: 87 27 05 00 flw fa5, 0(a0) +80000848: 53 f6 c0 10 fmul.s fa2, ft1, fa2 +8000084c: d3 76 d1 10 fmul.s fa3, ft2, fa3 +80000850: 53 f7 e1 10 fmul.s fa4, ft3, fa4 +80000854: d3 77 f2 10 fmul.s fa5, ft4, fa5 +80000858: b3 84 b7 01 add s1, a5, s11 +8000085c: 93 94 24 00 slli s1, s1, 2 +80000860: b3 04 93 00 add s1, t1, s1 +80000864: 07 a8 04 00 flw fa6, 0(s1) +80000868: b3 86 16 00 add a3, a3, ra +8000086c: 93 96 26 00 slli a3, a3, 2 +80000870: b3 06 d3 00 add a3, t1, a3 +80000874: 87 a8 06 00 flw fa7, 0(a3) +80000878: 07 2e 04 00 flw ft8, 0(s0) +8000087c: b3 86 17 00 add a3, a5, ra +80000880: 93 96 26 00 slli a3, a3, 2 +80000884: b3 06 d3 00 add a3, t1, a3 +80000888: 87 ae 06 00 flw ft9, 0(a3) +8000088c: 53 f8 02 11 fmul.s fa6, ft5, fa6 +80000890: d3 78 13 11 fmul.s fa7, ft6, fa7 +80000894: 53 fe c3 11 fmul.s ft8, ft7, ft8 +80000898: d3 7e d5 11 fmul.s ft9, fa0, ft9 +8000089c: d3 f5 c5 00 fadd.s fa1, fa1, fa2 +800008a0: d3 f5 d5 00 fadd.s fa1, fa1, fa3 +800008a4: d3 f5 e5 00 fadd.s fa1, fa1, fa4 +800008a8: d3 f5 f5 00 fadd.s fa1, fa1, fa5 +800008ac: d3 f5 05 01 fadd.s fa1, fa1, fa6 +800008b0: d3 f5 15 01 fadd.s fa1, fa1, fa7 +800008b4: d3 f5 c5 01 fadd.s fa1, fa1, ft8 +800008b8: d3 f5 d5 01 fadd.s fa1, fa1, ft9 +800008bc: 27 a0 b5 00 fsw fa1, 0(a1) +800008c0: 13 07 17 00 addi a4, a4, 1 +800008c4: 13 04 44 00 addi s0, s0, 4 +800008c8: 93 85 45 00 addi a1, a1, 4 +800008cc: 13 05 45 00 addi a0, a0, 4 +800008d0: 13 06 46 00 addi a2, a2, 4 +800008d4: e3 66 87 f3 bltu a4, s8, -212 +800008d8: 93 8b 1b 00 addi s7, s7, 1 +800008dc: 33 0b 2b 01 add s6, s6, s2 +800008e0: b3 8a 2a 01 add s5, s5, s2 +800008e4: 33 0a 2a 01 add s4, s4, s2 +800008e8: b3 89 29 01 add s3, s3, s2 +800008ec: e3 e4 1b ef bltu s7, a7, -280 +800008f0: 13 08 18 00 addi a6, a6, 1 +800008f4: 03 25 81 00 lw a0, 8(sp) +800008f8: e3 64 a8 ec bltu a6, a0, -312 +800008fc: 83 2d c1 00 lw s11, 12(sp) +80000900: 03 2d 01 01 lw s10, 16(sp) +80000904: 83 2c 41 01 lw s9, 20(sp) +80000908: 03 2c 81 01 lw s8, 24(sp) +8000090c: 83 2b c1 01 lw s7, 28(sp) +80000910: 03 2b 01 02 lw s6, 32(sp) +80000914: 83 2a 41 02 lw s5, 36(sp) +80000918: 03 2a 81 02 lw s4, 40(sp) +8000091c: 83 29 c1 02 lw s3, 44(sp) +80000920: 03 29 01 03 lw s2, 48(sp) +80000924: 83 24 41 03 lw s1, 52(sp) +80000928: 03 24 81 03 lw s0, 56(sp) +8000092c: 83 20 c1 03 lw ra, 60(sp) +80000930: 13 01 01 04 addi sp, sp, 64 +80000934: 67 80 00 00 ret -800008b8 _pocl_kernel_sfilter_workgroup_fast: -800008b8: 13 01 01 fc addi sp, sp, -64 -800008bc: 23 2e 11 02 sw ra, 60(sp) -800008c0: 23 2c 81 02 sw s0, 56(sp) -800008c4: 23 2a 91 02 sw s1, 52(sp) -800008c8: 23 28 21 03 sw s2, 48(sp) -800008cc: 23 26 31 03 sw s3, 44(sp) -800008d0: 23 24 41 03 sw s4, 40(sp) -800008d4: 23 22 51 03 sw s5, 36(sp) -800008d8: 23 20 61 03 sw s6, 32(sp) -800008dc: 23 2e 71 01 sw s7, 28(sp) -800008e0: 23 2c 81 01 sw s8, 24(sp) -800008e4: 23 2a 91 01 sw s9, 20(sp) -800008e8: 23 28 a1 01 sw s10, 16(sp) -800008ec: 23 26 b1 01 sw s11, 12(sp) -800008f0: 13 08 00 00 mv a6, zero -800008f4: 03 27 85 00 lw a4, 8(a0) -800008f8: 83 24 c5 00 lw s1, 12(a0) -800008fc: 03 23 05 00 lw t1, 0(a0) -80000900: 83 23 45 00 lw t2, 4(a0) -80000904: 83 2f 07 00 lw t6, 0(a4) -80000908: 07 a0 04 00 flw ft0, 0(s1) -8000090c: 03 27 05 01 lw a4, 16(a0) -80000910: 83 24 45 01 lw s1, 20(a0) -80000914: 03 24 85 01 lw s0, 24(a0) -80000918: 83 28 c5 01 lw a7, 28(a0) -8000091c: 87 20 07 00 flw ft1, 0(a4) -80000920: 07 a1 04 00 flw ft2, 0(s1) -80000924: 87 21 04 00 flw ft3, 0(s0) -80000928: 07 a2 08 00 flw ft4, 0(a7) -8000092c: 03 27 05 02 lw a4, 32(a0) -80000930: 83 24 45 02 lw s1, 36(a0) -80000934: 03 24 85 02 lw s0, 40(a0) -80000938: 03 25 c5 02 lw a0, 44(a0) -8000093c: 87 22 07 00 flw ft5, 0(a4) -80000940: 07 a3 04 00 flw ft6, 0(s1) -80000944: 87 23 04 00 flw ft7, 0(s0) -80000948: 07 25 05 00 flw fa0, 0(a0) -8000094c: 03 ac 85 01 lw s8, 24(a1) -80000950: 83 a8 c5 01 lw a7, 28(a1) -80000954: 03 a5 c5 00 lw a0, 12(a1) -80000958: 03 a7 05 02 lw a4, 32(a1) -8000095c: 23 24 e1 00 sw a4, 8(sp) -80000960: 83 a5 05 01 lw a1, 16(a1) -80000964: 33 06 cc 02 mul a2, s8, a2 -80000968: b3 0c c5 00 add s9, a0, a2 -8000096c: 33 85 d8 02 mul a0, a7, a3 -80000970: 33 8f a5 00 add t5, a1, a0 -80000974: 13 05 1f 00 addi a0, t5, 1 -80000978: 33 05 f5 03 mul a0, a0, t6 -8000097c: 33 85 ac 00 add a0, s9, a0 -80000980: 13 15 25 00 slli a0, a0, 2 -80000984: 33 05 a3 00 add a0, t1, a0 -80000988: 23 22 a1 00 sw a0, 4(sp) -8000098c: 13 99 2f 00 slli s2, t6, 2 -80000990: 33 05 ff 03 mul a0, t5, t6 -80000994: 33 85 ac 00 add a0, s9, a0 -80000998: 13 15 25 00 slli a0, a0, 2 -8000099c: b3 83 a3 00 add t2, t2, a0 -800009a0: 33 0e a3 00 add t3, t1, a0 -800009a4: 13 05 ff ff addi a0, t5, -1 -800009a8: 33 05 f5 03 mul a0, a0, t6 -800009ac: 33 85 ac 00 add a0, s9, a0 -800009b0: 13 15 25 00 slli a0, a0, 2 -800009b4: b3 0e a3 00 add t4, t1, a0 -800009b8: 93 0b 00 00 mv s7, zero -800009bc: 93 89 0e 00 mv s3, t4 -800009c0: 13 0a 0e 00 mv s4, t3 -800009c4: 93 8a 03 00 mv s5, t2 -800009c8: 03 2b 41 00 lw s6, 4(sp) -800009cc: 13 07 00 00 mv a4, zero -800009d0: 33 05 7f 01 add a0, t5, s7 -800009d4: 93 05 f5 ff addi a1, a0, -1 -800009d8: 33 8d f5 03 mul s10, a1, t6 -800009dc: b3 8d af 02 mul s11, t6, a0 -800009e0: 13 05 15 00 addi a0, a0, 1 -800009e4: b3 00 f5 03 mul ra, a0, t6 -800009e8: 13 86 09 00 mv a2, s3 -800009ec: 13 05 0a 00 mv a0, s4 -800009f0: 93 85 0a 00 mv a1, s5 -800009f4: 13 04 0b 00 mv s0, s6 -800009f8: b3 84 ec 00 add s1, s9, a4 -800009fc: 93 86 f4 ff addi a3, s1, -1 -80000a00: b3 82 a6 01 add t0, a3, s10 -80000a04: 93 97 22 00 slli a5, t0, 2 -80000a08: b3 07 f3 00 add a5, t1, a5 -80000a0c: 87 a5 07 00 flw fa1, 0(a5) -80000a10: d3 75 b0 10 fmul.s fa1, ft0, fa1 -80000a14: 07 26 06 00 flw fa2, 0(a2) -80000a18: 93 87 14 00 addi a5, s1, 1 -80000a1c: b3 84 a7 01 add s1, a5, s10 -80000a20: 93 94 24 00 slli s1, s1, 2 -80000a24: b3 04 93 00 add s1, t1, s1 -80000a28: 87 a6 04 00 flw fa3, 0(s1) -80000a2c: b3 84 b6 01 add s1, a3, s11 -80000a30: 93 94 24 00 slli s1, s1, 2 -80000a34: b3 04 93 00 add s1, t1, s1 -80000a38: 07 a7 04 00 flw fa4, 0(s1) -80000a3c: 87 27 05 00 flw fa5, 0(a0) -80000a40: 53 f6 c0 10 fmul.s fa2, ft1, fa2 -80000a44: d3 76 d1 10 fmul.s fa3, ft2, fa3 -80000a48: 53 f7 e1 10 fmul.s fa4, ft3, fa4 -80000a4c: d3 77 f2 10 fmul.s fa5, ft4, fa5 -80000a50: b3 84 b7 01 add s1, a5, s11 -80000a54: 93 94 24 00 slli s1, s1, 2 -80000a58: b3 04 93 00 add s1, t1, s1 -80000a5c: 07 a8 04 00 flw fa6, 0(s1) -80000a60: b3 86 16 00 add a3, a3, ra -80000a64: 93 96 26 00 slli a3, a3, 2 -80000a68: b3 06 d3 00 add a3, t1, a3 -80000a6c: 87 a8 06 00 flw fa7, 0(a3) -80000a70: 07 2e 04 00 flw ft8, 0(s0) -80000a74: b3 86 17 00 add a3, a5, ra -80000a78: 93 96 26 00 slli a3, a3, 2 -80000a7c: b3 06 d3 00 add a3, t1, a3 -80000a80: 87 ae 06 00 flw ft9, 0(a3) -80000a84: 53 f8 02 11 fmul.s fa6, ft5, fa6 -80000a88: d3 78 13 11 fmul.s fa7, ft6, fa7 -80000a8c: 53 fe c3 11 fmul.s ft8, ft7, ft8 -80000a90: d3 7e d5 11 fmul.s ft9, fa0, ft9 -80000a94: d3 f5 c5 00 fadd.s fa1, fa1, fa2 -80000a98: d3 f5 d5 00 fadd.s fa1, fa1, fa3 -80000a9c: d3 f5 e5 00 fadd.s fa1, fa1, fa4 -80000aa0: d3 f5 f5 00 fadd.s fa1, fa1, fa5 -80000aa4: d3 f5 05 01 fadd.s fa1, fa1, fa6 -80000aa8: d3 f5 15 01 fadd.s fa1, fa1, fa7 -80000aac: d3 f5 c5 01 fadd.s fa1, fa1, ft8 -80000ab0: d3 f5 d5 01 fadd.s fa1, fa1, ft9 -80000ab4: 27 a0 b5 00 fsw fa1, 0(a1) -80000ab8: 13 07 17 00 addi a4, a4, 1 -80000abc: 13 04 44 00 addi s0, s0, 4 -80000ac0: 93 85 45 00 addi a1, a1, 4 -80000ac4: 13 05 45 00 addi a0, a0, 4 -80000ac8: 13 06 46 00 addi a2, a2, 4 -80000acc: e3 66 87 f3 bltu a4, s8, -212 -80000ad0: 93 8b 1b 00 addi s7, s7, 1 -80000ad4: 33 0b 2b 01 add s6, s6, s2 -80000ad8: b3 8a 2a 01 add s5, s5, s2 -80000adc: 33 0a 2a 01 add s4, s4, s2 -80000ae0: b3 89 29 01 add s3, s3, s2 -80000ae4: e3 e4 1b ef bltu s7, a7, -280 -80000ae8: 13 08 18 00 addi a6, a6, 1 -80000aec: 03 25 81 00 lw a0, 8(sp) -80000af0: e3 64 a8 ec bltu a6, a0, -312 -80000af4: 83 2d c1 00 lw s11, 12(sp) -80000af8: 03 2d 01 01 lw s10, 16(sp) -80000afc: 83 2c 41 01 lw s9, 20(sp) -80000b00: 03 2c 81 01 lw s8, 24(sp) -80000b04: 83 2b c1 01 lw s7, 28(sp) -80000b08: 03 2b 01 02 lw s6, 32(sp) -80000b0c: 83 2a 41 02 lw s5, 36(sp) -80000b10: 03 2a 81 02 lw s4, 40(sp) -80000b14: 83 29 c1 02 lw s3, 44(sp) -80000b18: 03 29 01 03 lw s2, 48(sp) -80000b1c: 83 24 41 03 lw s1, 52(sp) -80000b20: 03 24 81 03 lw s0, 56(sp) -80000b24: 83 20 c1 03 lw ra, 60(sp) -80000b28: 13 01 01 04 addi sp, sp, 64 -80000b2c: 67 80 00 00 ret +80000938 _pocl_kernel_sfilter_workgroup_fast: +80000938: 13 01 01 fc addi sp, sp, -64 +8000093c: 23 2e 11 02 sw ra, 60(sp) +80000940: 23 2c 81 02 sw s0, 56(sp) +80000944: 23 2a 91 02 sw s1, 52(sp) +80000948: 23 28 21 03 sw s2, 48(sp) +8000094c: 23 26 31 03 sw s3, 44(sp) +80000950: 23 24 41 03 sw s4, 40(sp) +80000954: 23 22 51 03 sw s5, 36(sp) +80000958: 23 20 61 03 sw s6, 32(sp) +8000095c: 23 2e 71 01 sw s7, 28(sp) +80000960: 23 2c 81 01 sw s8, 24(sp) +80000964: 23 2a 91 01 sw s9, 20(sp) +80000968: 23 28 a1 01 sw s10, 16(sp) +8000096c: 23 26 b1 01 sw s11, 12(sp) +80000970: 13 08 00 00 mv a6, zero +80000974: 03 27 85 00 lw a4, 8(a0) +80000978: 83 24 c5 00 lw s1, 12(a0) +8000097c: 03 23 05 00 lw t1, 0(a0) +80000980: 83 23 45 00 lw t2, 4(a0) +80000984: 83 2f 07 00 lw t6, 0(a4) +80000988: 07 a0 04 00 flw ft0, 0(s1) +8000098c: 03 27 05 01 lw a4, 16(a0) +80000990: 83 24 45 01 lw s1, 20(a0) +80000994: 03 24 85 01 lw s0, 24(a0) +80000998: 83 28 c5 01 lw a7, 28(a0) +8000099c: 87 20 07 00 flw ft1, 0(a4) +800009a0: 07 a1 04 00 flw ft2, 0(s1) +800009a4: 87 21 04 00 flw ft3, 0(s0) +800009a8: 07 a2 08 00 flw ft4, 0(a7) +800009ac: 03 27 05 02 lw a4, 32(a0) +800009b0: 83 24 45 02 lw s1, 36(a0) +800009b4: 03 24 85 02 lw s0, 40(a0) +800009b8: 03 25 c5 02 lw a0, 44(a0) +800009bc: 87 22 07 00 flw ft5, 0(a4) +800009c0: 07 a3 04 00 flw ft6, 0(s1) +800009c4: 87 23 04 00 flw ft7, 0(s0) +800009c8: 07 25 05 00 flw fa0, 0(a0) +800009cc: 03 ac 85 01 lw s8, 24(a1) +800009d0: 83 a8 c5 01 lw a7, 28(a1) +800009d4: 03 a5 c5 00 lw a0, 12(a1) +800009d8: 03 a7 05 02 lw a4, 32(a1) +800009dc: 23 24 e1 00 sw a4, 8(sp) +800009e0: 83 a5 05 01 lw a1, 16(a1) +800009e4: 33 06 cc 02 mul a2, s8, a2 +800009e8: b3 0c c5 00 add s9, a0, a2 +800009ec: 33 85 d8 02 mul a0, a7, a3 +800009f0: 33 8f a5 00 add t5, a1, a0 +800009f4: 13 05 1f 00 addi a0, t5, 1 +800009f8: 33 05 f5 03 mul a0, a0, t6 +800009fc: 33 85 ac 00 add a0, s9, a0 +80000a00: 13 15 25 00 slli a0, a0, 2 +80000a04: 33 05 a3 00 add a0, t1, a0 +80000a08: 23 22 a1 00 sw a0, 4(sp) +80000a0c: 13 99 2f 00 slli s2, t6, 2 +80000a10: 33 05 ff 03 mul a0, t5, t6 +80000a14: 33 85 ac 00 add a0, s9, a0 +80000a18: 13 15 25 00 slli a0, a0, 2 +80000a1c: b3 83 a3 00 add t2, t2, a0 +80000a20: 33 0e a3 00 add t3, t1, a0 +80000a24: 13 05 ff ff addi a0, t5, -1 +80000a28: 33 05 f5 03 mul a0, a0, t6 +80000a2c: 33 85 ac 00 add a0, s9, a0 +80000a30: 13 15 25 00 slli a0, a0, 2 +80000a34: b3 0e a3 00 add t4, t1, a0 +80000a38: 93 0b 00 00 mv s7, zero +80000a3c: 93 89 0e 00 mv s3, t4 +80000a40: 13 0a 0e 00 mv s4, t3 +80000a44: 93 8a 03 00 mv s5, t2 +80000a48: 03 2b 41 00 lw s6, 4(sp) +80000a4c: 13 07 00 00 mv a4, zero +80000a50: 33 05 7f 01 add a0, t5, s7 +80000a54: 93 05 f5 ff addi a1, a0, -1 +80000a58: 33 8d f5 03 mul s10, a1, t6 +80000a5c: b3 8d af 02 mul s11, t6, a0 +80000a60: 13 05 15 00 addi a0, a0, 1 +80000a64: b3 00 f5 03 mul ra, a0, t6 +80000a68: 13 86 09 00 mv a2, s3 +80000a6c: 13 05 0a 00 mv a0, s4 +80000a70: 93 85 0a 00 mv a1, s5 +80000a74: 13 04 0b 00 mv s0, s6 +80000a78: b3 84 ec 00 add s1, s9, a4 +80000a7c: 93 86 f4 ff addi a3, s1, -1 +80000a80: b3 82 a6 01 add t0, a3, s10 +80000a84: 93 97 22 00 slli a5, t0, 2 +80000a88: b3 07 f3 00 add a5, t1, a5 +80000a8c: 87 a5 07 00 flw fa1, 0(a5) +80000a90: d3 75 b0 10 fmul.s fa1, ft0, fa1 +80000a94: 07 26 06 00 flw fa2, 0(a2) +80000a98: 93 87 14 00 addi a5, s1, 1 +80000a9c: b3 84 a7 01 add s1, a5, s10 +80000aa0: 93 94 24 00 slli s1, s1, 2 +80000aa4: b3 04 93 00 add s1, t1, s1 +80000aa8: 87 a6 04 00 flw fa3, 0(s1) +80000aac: b3 84 b6 01 add s1, a3, s11 +80000ab0: 93 94 24 00 slli s1, s1, 2 +80000ab4: b3 04 93 00 add s1, t1, s1 +80000ab8: 07 a7 04 00 flw fa4, 0(s1) +80000abc: 87 27 05 00 flw fa5, 0(a0) +80000ac0: 53 f6 c0 10 fmul.s fa2, ft1, fa2 +80000ac4: d3 76 d1 10 fmul.s fa3, ft2, fa3 +80000ac8: 53 f7 e1 10 fmul.s fa4, ft3, fa4 +80000acc: d3 77 f2 10 fmul.s fa5, ft4, fa5 +80000ad0: b3 84 b7 01 add s1, a5, s11 +80000ad4: 93 94 24 00 slli s1, s1, 2 +80000ad8: b3 04 93 00 add s1, t1, s1 +80000adc: 07 a8 04 00 flw fa6, 0(s1) +80000ae0: b3 86 16 00 add a3, a3, ra +80000ae4: 93 96 26 00 slli a3, a3, 2 +80000ae8: b3 06 d3 00 add a3, t1, a3 +80000aec: 87 a8 06 00 flw fa7, 0(a3) +80000af0: 07 2e 04 00 flw ft8, 0(s0) +80000af4: b3 86 17 00 add a3, a5, ra +80000af8: 93 96 26 00 slli a3, a3, 2 +80000afc: b3 06 d3 00 add a3, t1, a3 +80000b00: 87 ae 06 00 flw ft9, 0(a3) +80000b04: 53 f8 02 11 fmul.s fa6, ft5, fa6 +80000b08: d3 78 13 11 fmul.s fa7, ft6, fa7 +80000b0c: 53 fe c3 11 fmul.s ft8, ft7, ft8 +80000b10: d3 7e d5 11 fmul.s ft9, fa0, ft9 +80000b14: d3 f5 c5 00 fadd.s fa1, fa1, fa2 +80000b18: d3 f5 d5 00 fadd.s fa1, fa1, fa3 +80000b1c: d3 f5 e5 00 fadd.s fa1, fa1, fa4 +80000b20: d3 f5 f5 00 fadd.s fa1, fa1, fa5 +80000b24: d3 f5 05 01 fadd.s fa1, fa1, fa6 +80000b28: d3 f5 15 01 fadd.s fa1, fa1, fa7 +80000b2c: d3 f5 c5 01 fadd.s fa1, fa1, ft8 +80000b30: d3 f5 d5 01 fadd.s fa1, fa1, ft9 +80000b34: 27 a0 b5 00 fsw fa1, 0(a1) +80000b38: 13 07 17 00 addi a4, a4, 1 +80000b3c: 13 04 44 00 addi s0, s0, 4 +80000b40: 93 85 45 00 addi a1, a1, 4 +80000b44: 13 05 45 00 addi a0, a0, 4 +80000b48: 13 06 46 00 addi a2, a2, 4 +80000b4c: e3 66 87 f3 bltu a4, s8, -212 +80000b50: 93 8b 1b 00 addi s7, s7, 1 +80000b54: 33 0b 2b 01 add s6, s6, s2 +80000b58: b3 8a 2a 01 add s5, s5, s2 +80000b5c: 33 0a 2a 01 add s4, s4, s2 +80000b60: b3 89 29 01 add s3, s3, s2 +80000b64: e3 e4 1b ef bltu s7, a7, -280 +80000b68: 13 08 18 00 addi a6, a6, 1 +80000b6c: 03 25 81 00 lw a0, 8(sp) +80000b70: e3 64 a8 ec bltu a6, a0, -312 +80000b74: 83 2d c1 00 lw s11, 12(sp) +80000b78: 03 2d 01 01 lw s10, 16(sp) +80000b7c: 83 2c 41 01 lw s9, 20(sp) +80000b80: 03 2c 81 01 lw s8, 24(sp) +80000b84: 83 2b c1 01 lw s7, 28(sp) +80000b88: 03 2b 01 02 lw s6, 32(sp) +80000b8c: 83 2a 41 02 lw s5, 36(sp) +80000b90: 03 2a 81 02 lw s4, 40(sp) +80000b94: 83 29 c1 02 lw s3, 44(sp) +80000b98: 03 29 01 03 lw s2, 48(sp) +80000b9c: 83 24 41 03 lw s1, 52(sp) +80000ba0: 03 24 81 03 lw s0, 56(sp) +80000ba4: 83 20 c1 03 lw ra, 60(sp) +80000ba8: 13 01 01 04 addi sp, sp, 64 +80000bac: 67 80 00 00 ret -80000b30 _exit: -80000b30: 13 05 00 00 mv a0, zero -80000b34: 6b 00 05 00 +80000bb0 _exit: +80000bb0: 13 05 00 00 mv a0, zero +80000bb4: 6b 00 05 00 -80000b38 vx_set_sp: -80000b38: 73 25 00 fc csrr a0, 4032 -80000b3c: 6b 00 05 00 -80000b40: 97 11 00 00 auipc gp, 1 -80000b44: 93 81 81 cc addi gp, gp, -824 -80000b48: 17 f1 ff 7e auipc sp, 520191 -80000b4c: 13 01 81 4b addi sp, sp, 1208 -80000b50: 93 05 00 40 addi a1, zero, 1024 -80000b54: 73 26 10 cc csrr a2, 3265 -80000b58: b3 85 c5 02 mul a1, a1, a2 -80000b5c: 33 01 b1 40 sub sp, sp, a1 -80000b60: f3 26 30 cc csrr a3, 3267 -80000b64: 63 86 06 00 beqz a3, 12 -80000b68: 13 05 00 00 mv a0, zero -80000b6c: 6b 00 05 00 +80000bb8 vx_set_sp: +80000bb8: 73 25 00 fc csrr a0, 4032 +80000bbc: 6b 00 05 00 +80000bc0: 97 21 00 00 auipc gp, 2 +80000bc4: 93 81 81 c9 addi gp, gp, -872 +80000bc8: 17 f1 ff 7e auipc sp, 520191 +80000bcc: 13 01 81 43 addi sp, sp, 1080 +80000bd0: 93 05 00 40 addi a1, zero, 1024 +80000bd4: 73 26 10 cc csrr a2, 3265 +80000bd8: b3 85 c5 02 mul a1, a1, a2 +80000bdc: 33 01 b1 40 sub sp, sp, a1 +80000be0: f3 26 30 cc csrr a3, 3267 +80000be4: 63 86 06 00 beqz a3, 12 +80000be8: 13 05 00 00 mv a0, zero +80000bec: 6b 00 05 00 -80000b70 RETURN: -80000b70: 67 80 00 00 ret - -80000b74 vx_wspawn: -80000b74: 6b 10 b5 00 -80000b78: 67 80 00 00 ret - -80000b7c vx_tmc: -80000b7c: 6b 00 05 00 -80000b80: 67 80 00 00 ret - -80000b84 vx_barrier: -80000b84: 6b 40 b5 00 -80000b88: 67 80 00 00 ret - -80000b8c vx_split: -80000b8c: 6b 20 05 00 -80000b90: 67 80 00 00 ret - -80000b94 vx_join: -80000b94: 6b 30 00 00 -80000b98: 67 80 00 00 ret - -80000b9c vx_warp_id: -80000b9c: 73 25 30 cc csrr a0, 3267 -80000ba0: 67 80 00 00 ret - -80000ba4 vx_warp_gid: -80000ba4: 73 25 40 f1 csrr a0, mhartid -80000ba8: 67 80 00 00 ret - -80000bac vx_thread_id: -80000bac: 73 25 00 cc csrr a0, 3264 -80000bb0: 67 80 00 00 ret - -80000bb4 vx_thread_lid: -80000bb4: 73 25 10 cc csrr a0, 3265 -80000bb8: 67 80 00 00 ret - -80000bbc vx_thread_gid: -80000bbc: 73 25 20 cc csrr a0, 3266 -80000bc0: 67 80 00 00 ret - -80000bc4 vx_core_id: -80000bc4: 73 25 50 cc csrr a0, 3269 -80000bc8: 67 80 00 00 ret - -80000bcc vx_num_threads: -80000bcc: 73 25 00 fc csrr a0, 4032 -80000bd0: 67 80 00 00 ret - -80000bd4 vx_num_warps: -80000bd4: 73 25 10 fc csrr a0, 4033 -80000bd8: 67 80 00 00 ret - -80000bdc vx_num_cores: -80000bdc: 73 25 20 fc csrr a0, 4034 -80000be0: 67 80 00 00 ret - -80000be4 vx_num_cycles: -80000be4: 73 25 00 b0 csrr a0, mcycle -80000be8: 67 80 00 00 ret - -80000bec vx_num_instrs: -80000bec: 73 25 20 b0 csrr a0, minstret +80000bf0 RETURN: 80000bf0: 67 80 00 00 ret -80000bf4 atexit: -80000bf4: 93 05 05 00 mv a1, a0 -80000bf8: 93 06 00 00 mv a3, zero -80000bfc: 13 06 00 00 mv a2, zero -80000c00: 13 05 00 00 mv a0, zero -80000c04: 6f 00 c0 20 j 524 +80000bf4 vx_wspawn: +80000bf4: 6b 10 b5 00 +80000bf8: 67 80 00 00 ret -80000c08 exit: -80000c08: 13 01 01 ff addi sp, sp, -16 -80000c0c: 93 05 00 00 mv a1, zero -80000c10: 23 24 81 00 sw s0, 8(sp) -80000c14: 23 26 11 00 sw ra, 12(sp) -80000c18: 13 04 05 00 mv s0, a0 -80000c1c: ef 00 00 29 jal 656 -80000c20: b7 17 00 80 lui a5, 524289 -80000c24: 03 a5 07 43 lw a0, 1072(a5) -80000c28: 83 27 c5 03 lw a5, 60(a0) -80000c2c: 63 84 07 00 beqz a5, 8 -80000c30: e7 80 07 00 jalr a5 -80000c34: 13 05 04 00 mv a0, s0 -80000c38: ef f0 9f ef jal -264 +80000bfc vx_tmc: +80000bfc: 6b 00 05 00 +80000c00: 67 80 00 00 ret -80000c3c __libc_fini_array: -80000c3c: 13 01 01 ff addi sp, sp, -16 -80000c40: 23 24 81 00 sw s0, 8(sp) -80000c44: b7 17 00 80 lui a5, 524289 -80000c48: 37 14 00 80 lui s0, 524289 -80000c4c: 13 04 44 00 addi s0, s0, 4 -80000c50: 93 87 47 00 addi a5, a5, 4 -80000c54: b3 87 87 40 sub a5, a5, s0 -80000c58: 23 22 91 00 sw s1, 4(sp) -80000c5c: 23 26 11 00 sw ra, 12(sp) -80000c60: 93 d4 27 40 srai s1, a5, 2 -80000c64: 63 80 04 02 beqz s1, 32 -80000c68: 93 87 c7 ff addi a5, a5, -4 -80000c6c: 33 84 87 00 add s0, a5, s0 -80000c70: 83 27 04 00 lw a5, 0(s0) -80000c74: 93 84 f4 ff addi s1, s1, -1 -80000c78: 13 04 c4 ff addi s0, s0, -4 -80000c7c: e7 80 07 00 jalr a5 -80000c80: e3 98 04 fe bnez s1, -16 -80000c84: 83 20 c1 00 lw ra, 12(sp) -80000c88: 03 24 81 00 lw s0, 8(sp) -80000c8c: 83 24 41 00 lw s1, 4(sp) -80000c90: 13 01 01 01 addi sp, sp, 16 -80000c94: 67 80 00 00 ret +80000c04 vx_barrier: +80000c04: 6b 40 b5 00 +80000c08: 67 80 00 00 ret -80000c98 __libc_init_array: -80000c98: 13 01 01 ff addi sp, sp, -16 -80000c9c: 23 24 81 00 sw s0, 8(sp) -80000ca0: 23 20 21 01 sw s2, 0(sp) -80000ca4: 37 14 00 80 lui s0, 524289 -80000ca8: 37 19 00 80 lui s2, 524289 -80000cac: 93 07 04 00 mv a5, s0 -80000cb0: 13 09 09 00 mv s2, s2 -80000cb4: 33 09 f9 40 sub s2, s2, a5 -80000cb8: 23 26 11 00 sw ra, 12(sp) -80000cbc: 23 22 91 00 sw s1, 4(sp) -80000cc0: 13 59 29 40 srai s2, s2, 2 -80000cc4: 63 00 09 02 beqz s2, 32 -80000cc8: 13 04 04 00 mv s0, s0 -80000ccc: 93 04 00 00 mv s1, zero -80000cd0: 83 27 04 00 lw a5, 0(s0) -80000cd4: 93 84 14 00 addi s1, s1, 1 -80000cd8: 13 04 44 00 addi s0, s0, 4 -80000cdc: e7 80 07 00 jalr a5 -80000ce0: e3 18 99 fe bne s2, s1, -16 -80000ce4: 37 14 00 80 lui s0, 524289 -80000ce8: 37 19 00 80 lui s2, 524289 -80000cec: 93 07 04 00 mv a5, s0 -80000cf0: 13 09 49 00 addi s2, s2, 4 -80000cf4: 33 09 f9 40 sub s2, s2, a5 -80000cf8: 13 59 29 40 srai s2, s2, 2 -80000cfc: 63 00 09 02 beqz s2, 32 -80000d00: 13 04 04 00 mv s0, s0 -80000d04: 93 04 00 00 mv s1, zero -80000d08: 83 27 04 00 lw a5, 0(s0) -80000d0c: 93 84 14 00 addi s1, s1, 1 -80000d10: 13 04 44 00 addi s0, s0, 4 -80000d14: e7 80 07 00 jalr a5 -80000d18: e3 18 99 fe bne s2, s1, -16 -80000d1c: 83 20 c1 00 lw ra, 12(sp) -80000d20: 03 24 81 00 lw s0, 8(sp) -80000d24: 83 24 41 00 lw s1, 4(sp) -80000d28: 03 29 01 00 lw s2, 0(sp) -80000d2c: 13 01 01 01 addi sp, sp, 16 -80000d30: 67 80 00 00 ret +80000c0c vx_split: +80000c0c: 6b 20 05 00 +80000c10: 67 80 00 00 ret -80000d34 memset: -80000d34: 13 03 f0 00 addi t1, zero, 15 -80000d38: 13 07 05 00 mv a4, a0 -80000d3c: 63 7e c3 02 bgeu t1, a2, 60 -80000d40: 93 77 f7 00 andi a5, a4, 15 -80000d44: 63 90 07 0a bnez a5, 160 -80000d48: 63 92 05 08 bnez a1, 132 -80000d4c: 93 76 06 ff andi a3, a2, -16 -80000d50: 13 76 f6 00 andi a2, a2, 15 -80000d54: b3 86 e6 00 add a3, a3, a4 -80000d58: 23 20 b7 00 sw a1, 0(a4) -80000d5c: 23 22 b7 00 sw a1, 4(a4) -80000d60: 23 24 b7 00 sw a1, 8(a4) -80000d64: 23 26 b7 00 sw a1, 12(a4) -80000d68: 13 07 07 01 addi a4, a4, 16 -80000d6c: e3 66 d7 fe bltu a4, a3, -20 -80000d70: 63 14 06 00 bnez a2, 8 -80000d74: 67 80 00 00 ret -80000d78: b3 06 c3 40 sub a3, t1, a2 -80000d7c: 93 96 26 00 slli a3, a3, 2 -80000d80: 97 02 00 00 auipc t0, 0 -80000d84: b3 86 56 00 add a3, a3, t0 -80000d88: 67 80 c6 00 jr 12(a3) -80000d8c: 23 07 b7 00 sb a1, 14(a4) -80000d90: a3 06 b7 00 sb a1, 13(a4) -80000d94: 23 06 b7 00 sb a1, 12(a4) -80000d98: a3 05 b7 00 sb a1, 11(a4) -80000d9c: 23 05 b7 00 sb a1, 10(a4) -80000da0: a3 04 b7 00 sb a1, 9(a4) -80000da4: 23 04 b7 00 sb a1, 8(a4) -80000da8: a3 03 b7 00 sb a1, 7(a4) -80000dac: 23 03 b7 00 sb a1, 6(a4) -80000db0: a3 02 b7 00 sb a1, 5(a4) -80000db4: 23 02 b7 00 sb a1, 4(a4) -80000db8: a3 01 b7 00 sb a1, 3(a4) -80000dbc: 23 01 b7 00 sb a1, 2(a4) -80000dc0: a3 00 b7 00 sb a1, 1(a4) -80000dc4: 23 00 b7 00 sb a1, 0(a4) -80000dc8: 67 80 00 00 ret -80000dcc: 93 f5 f5 0f andi a1, a1, 255 -80000dd0: 93 96 85 00 slli a3, a1, 8 -80000dd4: b3 e5 d5 00 or a1, a1, a3 -80000dd8: 93 96 05 01 slli a3, a1, 16 -80000ddc: b3 e5 d5 00 or a1, a1, a3 -80000de0: 6f f0 df f6 j -148 -80000de4: 93 96 27 00 slli a3, a5, 2 -80000de8: 97 02 00 00 auipc t0, 0 -80000dec: b3 86 56 00 add a3, a3, t0 -80000df0: 93 82 00 00 mv t0, ra -80000df4: e7 80 06 fa jalr -96(a3) -80000df8: 93 80 02 00 mv ra, t0 -80000dfc: 93 87 07 ff addi a5, a5, -16 -80000e00: 33 07 f7 40 sub a4, a4, a5 -80000e04: 33 06 f6 00 add a2, a2, a5 -80000e08: e3 78 c3 f6 bgeu t1, a2, -144 -80000e0c: 6f f0 df f3 j -196 +80000c14 vx_join: +80000c14: 6b 30 00 00 +80000c18: 67 80 00 00 ret -80000e10 __register_exitproc: -80000e10: b7 17 00 80 lui a5, 524289 -80000e14: 03 a7 07 43 lw a4, 1072(a5) -80000e18: 83 27 87 14 lw a5, 328(a4) -80000e1c: 63 8c 07 04 beqz a5, 88 -80000e20: 03 a7 47 00 lw a4, 4(a5) -80000e24: 13 08 f0 01 addi a6, zero, 31 -80000e28: 63 4e e8 06 blt a6, a4, 124 -80000e2c: 13 18 27 00 slli a6, a4, 2 -80000e30: 63 06 05 02 beqz a0, 44 -80000e34: 33 83 07 01 add t1, a5, a6 -80000e38: 23 24 c3 08 sw a2, 136(t1) -80000e3c: 83 a8 87 18 lw a7, 392(a5) -80000e40: 13 06 10 00 addi a2, zero, 1 -80000e44: 33 16 e6 00 sll a2, a2, a4 -80000e48: b3 e8 c8 00 or a7, a7, a2 -80000e4c: 23 a4 17 19 sw a7, 392(a5) -80000e50: 23 24 d3 10 sw a3, 264(t1) -80000e54: 93 06 20 00 addi a3, zero, 2 -80000e58: 63 04 d5 02 beq a0, a3, 40 -80000e5c: 13 07 17 00 addi a4, a4, 1 -80000e60: 23 a2 e7 00 sw a4, 4(a5) -80000e64: b3 87 07 01 add a5, a5, a6 -80000e68: 23 a4 b7 00 sw a1, 8(a5) -80000e6c: 13 05 00 00 mv a0, zero -80000e70: 67 80 00 00 ret -80000e74: 93 07 c7 14 addi a5, a4, 332 -80000e78: 23 24 f7 14 sw a5, 328(a4) -80000e7c: 6f f0 5f fa j -92 -80000e80: 83 a6 c7 18 lw a3, 396(a5) -80000e84: 13 07 17 00 addi a4, a4, 1 -80000e88: 23 a2 e7 00 sw a4, 4(a5) -80000e8c: 33 e6 c6 00 or a2, a3, a2 -80000e90: 23 a6 c7 18 sw a2, 396(a5) -80000e94: b3 87 07 01 add a5, a5, a6 -80000e98: 23 a4 b7 00 sw a1, 8(a5) -80000e9c: 13 05 00 00 mv a0, zero -80000ea0: 67 80 00 00 ret -80000ea4: 13 05 f0 ff addi a0, zero, -1 -80000ea8: 67 80 00 00 ret +80000c1c vx_warp_id: +80000c1c: 73 25 30 cc csrr a0, 3267 +80000c20: 67 80 00 00 ret -80000eac __call_exitprocs: -80000eac: 13 01 01 fd addi sp, sp, -48 -80000eb0: b7 17 00 80 lui a5, 524289 -80000eb4: 23 2c 41 01 sw s4, 24(sp) -80000eb8: 03 aa 07 43 lw s4, 1072(a5) -80000ebc: 23 20 21 03 sw s2, 32(sp) -80000ec0: 23 26 11 02 sw ra, 44(sp) -80000ec4: 03 29 8a 14 lw s2, 328(s4) -80000ec8: 23 24 81 02 sw s0, 40(sp) -80000ecc: 23 22 91 02 sw s1, 36(sp) -80000ed0: 23 2e 31 01 sw s3, 28(sp) -80000ed4: 23 2a 51 01 sw s5, 20(sp) -80000ed8: 23 28 61 01 sw s6, 16(sp) -80000edc: 23 26 71 01 sw s7, 12(sp) -80000ee0: 23 24 81 01 sw s8, 8(sp) -80000ee4: 63 00 09 04 beqz s2, 64 -80000ee8: 13 0b 05 00 mv s6, a0 -80000eec: 93 8b 05 00 mv s7, a1 -80000ef0: 93 0a 10 00 addi s5, zero, 1 -80000ef4: 93 09 f0 ff addi s3, zero, -1 -80000ef8: 83 24 49 00 lw s1, 4(s2) -80000efc: 13 84 f4 ff addi s0, s1, -1 -80000f00: 63 42 04 02 bltz s0, 36 -80000f04: 93 94 24 00 slli s1, s1, 2 -80000f08: b3 04 99 00 add s1, s2, s1 -80000f0c: 63 84 0b 04 beqz s7, 72 -80000f10: 83 a7 44 10 lw a5, 260(s1) -80000f14: 63 80 77 05 beq a5, s7, 64 -80000f18: 13 04 f4 ff addi s0, s0, -1 -80000f1c: 93 84 c4 ff addi s1, s1, -4 -80000f20: e3 16 34 ff bne s0, s3, -20 -80000f24: 83 20 c1 02 lw ra, 44(sp) -80000f28: 03 24 81 02 lw s0, 40(sp) -80000f2c: 83 24 41 02 lw s1, 36(sp) -80000f30: 03 29 01 02 lw s2, 32(sp) -80000f34: 83 29 c1 01 lw s3, 28(sp) -80000f38: 03 2a 81 01 lw s4, 24(sp) -80000f3c: 83 2a 41 01 lw s5, 20(sp) -80000f40: 03 2b 01 01 lw s6, 16(sp) -80000f44: 83 2b c1 00 lw s7, 12(sp) -80000f48: 03 2c 81 00 lw s8, 8(sp) -80000f4c: 13 01 01 03 addi sp, sp, 48 -80000f50: 67 80 00 00 ret -80000f54: 83 27 49 00 lw a5, 4(s2) -80000f58: 83 a6 44 00 lw a3, 4(s1) -80000f5c: 93 87 f7 ff addi a5, a5, -1 -80000f60: 63 8e 87 04 beq a5, s0, 92 -80000f64: 23 a2 04 00 sw zero, 4(s1) -80000f68: e3 88 06 fa beqz a3, -80 -80000f6c: 83 27 89 18 lw a5, 392(s2) -80000f70: 33 97 8a 00 sll a4, s5, s0 -80000f74: 03 2c 49 00 lw s8, 4(s2) -80000f78: b3 77 f7 00 and a5, a4, a5 -80000f7c: 63 92 07 02 bnez a5, 36 -80000f80: e7 80 06 00 jalr a3 -80000f84: 03 27 49 00 lw a4, 4(s2) -80000f88: 83 27 8a 14 lw a5, 328(s4) -80000f8c: 63 14 87 01 bne a4, s8, 8 -80000f90: e3 04 f9 f8 beq s2, a5, -120 -80000f94: e3 88 07 f8 beqz a5, -112 -80000f98: 13 89 07 00 mv s2, a5 -80000f9c: 6f f0 df f5 j -164 -80000fa0: 83 27 c9 18 lw a5, 396(s2) -80000fa4: 83 a5 44 08 lw a1, 132(s1) -80000fa8: 33 77 f7 00 and a4, a4, a5 -80000fac: 63 1c 07 00 bnez a4, 24 -80000fb0: 13 05 0b 00 mv a0, s6 -80000fb4: e7 80 06 00 jalr a3 -80000fb8: 6f f0 df fc j -52 -80000fbc: 23 22 89 00 sw s0, 4(s2) -80000fc0: 6f f0 9f fa j -88 -80000fc4: 13 85 05 00 mv a0, a1 -80000fc8: e7 80 06 00 jalr a3 -80000fcc: 6f f0 9f fb j -72 +80000c24 vx_warp_gid: +80000c24: 73 25 40 f1 csrr a0, mhartid +80000c28: 67 80 00 00 ret + +80000c2c vx_thread_id: +80000c2c: 73 25 00 cc csrr a0, 3264 +80000c30: 67 80 00 00 ret + +80000c34 vx_thread_lid: +80000c34: 73 25 10 cc csrr a0, 3265 +80000c38: 67 80 00 00 ret + +80000c3c vx_thread_gid: +80000c3c: 73 25 20 cc csrr a0, 3266 +80000c40: 67 80 00 00 ret + +80000c44 vx_core_id: +80000c44: 73 25 50 cc csrr a0, 3269 +80000c48: 67 80 00 00 ret + +80000c4c vx_num_threads: +80000c4c: 73 25 00 fc csrr a0, 4032 +80000c50: 67 80 00 00 ret + +80000c54 vx_num_warps: +80000c54: 73 25 10 fc csrr a0, 4033 +80000c58: 67 80 00 00 ret + +80000c5c vx_num_cores: +80000c5c: 73 25 20 fc csrr a0, 4034 +80000c60: 67 80 00 00 ret + +80000c64 vx_num_cycles: +80000c64: 73 25 00 b0 csrr a0, mcycle +80000c68: 67 80 00 00 ret + +80000c6c vx_num_instrs: +80000c6c: 73 25 20 b0 csrr a0, minstret +80000c70: 67 80 00 00 ret + +80000c74 atexit: +80000c74: 93 05 05 00 mv a1, a0 +80000c78: 93 06 00 00 mv a3, zero +80000c7c: 13 06 00 00 mv a2, zero +80000c80: 13 05 00 00 mv a0, zero +80000c84: 6f 00 c0 20 j 524 + +80000c88 exit: +80000c88: 13 01 01 ff addi sp, sp, -16 +80000c8c: 93 05 00 00 mv a1, zero +80000c90: 23 24 81 00 sw s0, 8(sp) +80000c94: 23 26 11 00 sw ra, 12(sp) +80000c98: 13 04 05 00 mv s0, a0 +80000c9c: ef 00 00 29 jal 656 +80000ca0: b7 27 00 80 lui a5, 524290 +80000ca4: 03 a5 07 48 lw a0, 1152(a5) +80000ca8: 83 27 c5 03 lw a5, 60(a0) +80000cac: 63 84 07 00 beqz a5, 8 +80000cb0: e7 80 07 00 jalr a5 +80000cb4: 13 05 04 00 mv a0, s0 +80000cb8: ef f0 9f ef jal -264 + +80000cbc __libc_fini_array: +80000cbc: 13 01 01 ff addi sp, sp, -16 +80000cc0: 23 24 81 00 sw s0, 8(sp) +80000cc4: b7 27 00 80 lui a5, 524290 +80000cc8: 37 24 00 80 lui s0, 524290 +80000ccc: 13 04 44 05 addi s0, s0, 84 +80000cd0: 93 87 47 05 addi a5, a5, 84 +80000cd4: b3 87 87 40 sub a5, a5, s0 +80000cd8: 23 22 91 00 sw s1, 4(sp) +80000cdc: 23 26 11 00 sw ra, 12(sp) +80000ce0: 93 d4 27 40 srai s1, a5, 2 +80000ce4: 63 80 04 02 beqz s1, 32 +80000ce8: 93 87 c7 ff addi a5, a5, -4 +80000cec: 33 84 87 00 add s0, a5, s0 +80000cf0: 83 27 04 00 lw a5, 0(s0) +80000cf4: 93 84 f4 ff addi s1, s1, -1 +80000cf8: 13 04 c4 ff addi s0, s0, -4 +80000cfc: e7 80 07 00 jalr a5 +80000d00: e3 98 04 fe bnez s1, -16 +80000d04: 83 20 c1 00 lw ra, 12(sp) +80000d08: 03 24 81 00 lw s0, 8(sp) +80000d0c: 83 24 41 00 lw s1, 4(sp) +80000d10: 13 01 01 01 addi sp, sp, 16 +80000d14: 67 80 00 00 ret + +80000d18 __libc_init_array: +80000d18: 13 01 01 ff addi sp, sp, -16 +80000d1c: 23 24 81 00 sw s0, 8(sp) +80000d20: 23 20 21 01 sw s2, 0(sp) +80000d24: 37 24 00 80 lui s0, 524290 +80000d28: 37 29 00 80 lui s2, 524290 +80000d2c: 93 07 04 05 addi a5, s0, 80 +80000d30: 13 09 09 05 addi s2, s2, 80 +80000d34: 33 09 f9 40 sub s2, s2, a5 +80000d38: 23 26 11 00 sw ra, 12(sp) +80000d3c: 23 22 91 00 sw s1, 4(sp) +80000d40: 13 59 29 40 srai s2, s2, 2 +80000d44: 63 00 09 02 beqz s2, 32 +80000d48: 13 04 04 05 addi s0, s0, 80 +80000d4c: 93 04 00 00 mv s1, zero +80000d50: 83 27 04 00 lw a5, 0(s0) +80000d54: 93 84 14 00 addi s1, s1, 1 +80000d58: 13 04 44 00 addi s0, s0, 4 +80000d5c: e7 80 07 00 jalr a5 +80000d60: e3 18 99 fe bne s2, s1, -16 +80000d64: 37 24 00 80 lui s0, 524290 +80000d68: 37 29 00 80 lui s2, 524290 +80000d6c: 93 07 04 05 addi a5, s0, 80 +80000d70: 13 09 49 05 addi s2, s2, 84 +80000d74: 33 09 f9 40 sub s2, s2, a5 +80000d78: 13 59 29 40 srai s2, s2, 2 +80000d7c: 63 00 09 02 beqz s2, 32 +80000d80: 13 04 04 05 addi s0, s0, 80 +80000d84: 93 04 00 00 mv s1, zero +80000d88: 83 27 04 00 lw a5, 0(s0) +80000d8c: 93 84 14 00 addi s1, s1, 1 +80000d90: 13 04 44 00 addi s0, s0, 4 +80000d94: e7 80 07 00 jalr a5 +80000d98: e3 18 99 fe bne s2, s1, -16 +80000d9c: 83 20 c1 00 lw ra, 12(sp) +80000da0: 03 24 81 00 lw s0, 8(sp) +80000da4: 83 24 41 00 lw s1, 4(sp) +80000da8: 03 29 01 00 lw s2, 0(sp) +80000dac: 13 01 01 01 addi sp, sp, 16 +80000db0: 67 80 00 00 ret + +80000db4 memset: +80000db4: 13 03 f0 00 addi t1, zero, 15 +80000db8: 13 07 05 00 mv a4, a0 +80000dbc: 63 7e c3 02 bgeu t1, a2, 60 +80000dc0: 93 77 f7 00 andi a5, a4, 15 +80000dc4: 63 90 07 0a bnez a5, 160 +80000dc8: 63 92 05 08 bnez a1, 132 +80000dcc: 93 76 06 ff andi a3, a2, -16 +80000dd0: 13 76 f6 00 andi a2, a2, 15 +80000dd4: b3 86 e6 00 add a3, a3, a4 +80000dd8: 23 20 b7 00 sw a1, 0(a4) +80000ddc: 23 22 b7 00 sw a1, 4(a4) +80000de0: 23 24 b7 00 sw a1, 8(a4) +80000de4: 23 26 b7 00 sw a1, 12(a4) +80000de8: 13 07 07 01 addi a4, a4, 16 +80000dec: e3 66 d7 fe bltu a4, a3, -20 +80000df0: 63 14 06 00 bnez a2, 8 +80000df4: 67 80 00 00 ret +80000df8: b3 06 c3 40 sub a3, t1, a2 +80000dfc: 93 96 26 00 slli a3, a3, 2 +80000e00: 97 02 00 00 auipc t0, 0 +80000e04: b3 86 56 00 add a3, a3, t0 +80000e08: 67 80 c6 00 jr 12(a3) +80000e0c: 23 07 b7 00 sb a1, 14(a4) +80000e10: a3 06 b7 00 sb a1, 13(a4) +80000e14: 23 06 b7 00 sb a1, 12(a4) +80000e18: a3 05 b7 00 sb a1, 11(a4) +80000e1c: 23 05 b7 00 sb a1, 10(a4) +80000e20: a3 04 b7 00 sb a1, 9(a4) +80000e24: 23 04 b7 00 sb a1, 8(a4) +80000e28: a3 03 b7 00 sb a1, 7(a4) +80000e2c: 23 03 b7 00 sb a1, 6(a4) +80000e30: a3 02 b7 00 sb a1, 5(a4) +80000e34: 23 02 b7 00 sb a1, 4(a4) +80000e38: a3 01 b7 00 sb a1, 3(a4) +80000e3c: 23 01 b7 00 sb a1, 2(a4) +80000e40: a3 00 b7 00 sb a1, 1(a4) +80000e44: 23 00 b7 00 sb a1, 0(a4) +80000e48: 67 80 00 00 ret +80000e4c: 93 f5 f5 0f andi a1, a1, 255 +80000e50: 93 96 85 00 slli a3, a1, 8 +80000e54: b3 e5 d5 00 or a1, a1, a3 +80000e58: 93 96 05 01 slli a3, a1, 16 +80000e5c: b3 e5 d5 00 or a1, a1, a3 +80000e60: 6f f0 df f6 j -148 +80000e64: 93 96 27 00 slli a3, a5, 2 +80000e68: 97 02 00 00 auipc t0, 0 +80000e6c: b3 86 56 00 add a3, a3, t0 +80000e70: 93 82 00 00 mv t0, ra +80000e74: e7 80 06 fa jalr -96(a3) +80000e78: 93 80 02 00 mv ra, t0 +80000e7c: 93 87 07 ff addi a5, a5, -16 +80000e80: 33 07 f7 40 sub a4, a4, a5 +80000e84: 33 06 f6 00 add a2, a2, a5 +80000e88: e3 78 c3 f6 bgeu t1, a2, -144 +80000e8c: 6f f0 df f3 j -196 + +80000e90 __register_exitproc: +80000e90: b7 27 00 80 lui a5, 524290 +80000e94: 03 a7 07 48 lw a4, 1152(a5) +80000e98: 83 27 87 14 lw a5, 328(a4) +80000e9c: 63 8c 07 04 beqz a5, 88 +80000ea0: 03 a7 47 00 lw a4, 4(a5) +80000ea4: 13 08 f0 01 addi a6, zero, 31 +80000ea8: 63 4e e8 06 blt a6, a4, 124 +80000eac: 13 18 27 00 slli a6, a4, 2 +80000eb0: 63 06 05 02 beqz a0, 44 +80000eb4: 33 83 07 01 add t1, a5, a6 +80000eb8: 23 24 c3 08 sw a2, 136(t1) +80000ebc: 83 a8 87 18 lw a7, 392(a5) +80000ec0: 13 06 10 00 addi a2, zero, 1 +80000ec4: 33 16 e6 00 sll a2, a2, a4 +80000ec8: b3 e8 c8 00 or a7, a7, a2 +80000ecc: 23 a4 17 19 sw a7, 392(a5) +80000ed0: 23 24 d3 10 sw a3, 264(t1) +80000ed4: 93 06 20 00 addi a3, zero, 2 +80000ed8: 63 04 d5 02 beq a0, a3, 40 +80000edc: 13 07 17 00 addi a4, a4, 1 +80000ee0: 23 a2 e7 00 sw a4, 4(a5) +80000ee4: b3 87 07 01 add a5, a5, a6 +80000ee8: 23 a4 b7 00 sw a1, 8(a5) +80000eec: 13 05 00 00 mv a0, zero +80000ef0: 67 80 00 00 ret +80000ef4: 93 07 c7 14 addi a5, a4, 332 +80000ef8: 23 24 f7 14 sw a5, 328(a4) +80000efc: 6f f0 5f fa j -92 +80000f00: 83 a6 c7 18 lw a3, 396(a5) +80000f04: 13 07 17 00 addi a4, a4, 1 +80000f08: 23 a2 e7 00 sw a4, 4(a5) +80000f0c: 33 e6 c6 00 or a2, a3, a2 +80000f10: 23 a6 c7 18 sw a2, 396(a5) +80000f14: b3 87 07 01 add a5, a5, a6 +80000f18: 23 a4 b7 00 sw a1, 8(a5) +80000f1c: 13 05 00 00 mv a0, zero +80000f20: 67 80 00 00 ret +80000f24: 13 05 f0 ff addi a0, zero, -1 +80000f28: 67 80 00 00 ret + +80000f2c __call_exitprocs: +80000f2c: 13 01 01 fd addi sp, sp, -48 +80000f30: b7 27 00 80 lui a5, 524290 +80000f34: 23 2c 41 01 sw s4, 24(sp) +80000f38: 03 aa 07 48 lw s4, 1152(a5) +80000f3c: 23 20 21 03 sw s2, 32(sp) +80000f40: 23 26 11 02 sw ra, 44(sp) +80000f44: 03 29 8a 14 lw s2, 328(s4) +80000f48: 23 24 81 02 sw s0, 40(sp) +80000f4c: 23 22 91 02 sw s1, 36(sp) +80000f50: 23 2e 31 01 sw s3, 28(sp) +80000f54: 23 2a 51 01 sw s5, 20(sp) +80000f58: 23 28 61 01 sw s6, 16(sp) +80000f5c: 23 26 71 01 sw s7, 12(sp) +80000f60: 23 24 81 01 sw s8, 8(sp) +80000f64: 63 00 09 04 beqz s2, 64 +80000f68: 13 0b 05 00 mv s6, a0 +80000f6c: 93 8b 05 00 mv s7, a1 +80000f70: 93 0a 10 00 addi s5, zero, 1 +80000f74: 93 09 f0 ff addi s3, zero, -1 +80000f78: 83 24 49 00 lw s1, 4(s2) +80000f7c: 13 84 f4 ff addi s0, s1, -1 +80000f80: 63 42 04 02 bltz s0, 36 +80000f84: 93 94 24 00 slli s1, s1, 2 +80000f88: b3 04 99 00 add s1, s2, s1 +80000f8c: 63 84 0b 04 beqz s7, 72 +80000f90: 83 a7 44 10 lw a5, 260(s1) +80000f94: 63 80 77 05 beq a5, s7, 64 +80000f98: 13 04 f4 ff addi s0, s0, -1 +80000f9c: 93 84 c4 ff addi s1, s1, -4 +80000fa0: e3 16 34 ff bne s0, s3, -20 +80000fa4: 83 20 c1 02 lw ra, 44(sp) +80000fa8: 03 24 81 02 lw s0, 40(sp) +80000fac: 83 24 41 02 lw s1, 36(sp) +80000fb0: 03 29 01 02 lw s2, 32(sp) +80000fb4: 83 29 c1 01 lw s3, 28(sp) +80000fb8: 03 2a 81 01 lw s4, 24(sp) +80000fbc: 83 2a 41 01 lw s5, 20(sp) +80000fc0: 03 2b 01 01 lw s6, 16(sp) +80000fc4: 83 2b c1 00 lw s7, 12(sp) +80000fc8: 03 2c 81 00 lw s8, 8(sp) +80000fcc: 13 01 01 03 addi sp, sp, 48 +80000fd0: 67 80 00 00 ret +80000fd4: 83 27 49 00 lw a5, 4(s2) +80000fd8: 83 a6 44 00 lw a3, 4(s1) +80000fdc: 93 87 f7 ff addi a5, a5, -1 +80000fe0: 63 8e 87 04 beq a5, s0, 92 +80000fe4: 23 a2 04 00 sw zero, 4(s1) +80000fe8: e3 88 06 fa beqz a3, -80 +80000fec: 83 27 89 18 lw a5, 392(s2) +80000ff0: 33 97 8a 00 sll a4, s5, s0 +80000ff4: 03 2c 49 00 lw s8, 4(s2) +80000ff8: b3 77 f7 00 and a5, a4, a5 +80000ffc: 63 92 07 02 bnez a5, 36 +80001000: e7 80 06 00 jalr a3 +80001004: 03 27 49 00 lw a4, 4(s2) +80001008: 83 27 8a 14 lw a5, 328(s4) +8000100c: 63 14 87 01 bne a4, s8, 8 +80001010: e3 04 f9 f8 beq s2, a5, -120 +80001014: e3 88 07 f8 beqz a5, -112 +80001018: 13 89 07 00 mv s2, a5 +8000101c: 6f f0 df f5 j -164 +80001020: 83 27 c9 18 lw a5, 396(s2) +80001024: 83 a5 44 08 lw a1, 132(s1) +80001028: 33 77 f7 00 and a4, a4, a5 +8000102c: 63 1c 07 00 bnez a4, 24 +80001030: 13 05 0b 00 mv a0, s6 +80001034: e7 80 06 00 jalr a3 +80001038: 6f f0 df fc j -52 +8000103c: 23 22 89 00 sw s0, 4(s2) +80001040: 6f f0 9f fa j -88 +80001044: 13 85 05 00 mv a0, a1 +80001048: e7 80 06 00 jalr a3 +8000104c: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001000 __preinit_array_start: -80001000: 50 00 -80001002: 00 80 +80002050 __preinit_array_start: +80002050: 50 00 +80002052: 00 80 Disassembly of section .data: -80001008 impure_data: -80001008: 00 00 -8000100a: 00 00 -8000100c: f4 12 -8000100e: 00 80 -80001010: 5c 13 -80001012: 00 80 -80001014: c4 13 -80001016: 00 80 +80002058 impure_data: +80002058: 00 00 +8000205a: 00 00 +8000205c: 44 23 +8000205e: 00 80 +80002060: ac 23 +80002062: 00 80 +80002064: 14 24 +80002066: 00 80 ... -800010b0: 01 00 -800010b2: 00 00 -800010b4: 00 00 -800010b6: 00 00 -800010b8: 0e 33 -800010ba: cd ab -800010bc: 34 12 -800010be: 6d e6 -800010c0: ec de -800010c2: 05 00 -800010c4: 0b 00 00 00 +80002100: 01 00 +80002102: 00 00 +80002104: 00 00 +80002106: 00 00 +80002108: 0e 33 +8000210a: cd ab +8000210c: 34 12 +8000210e: 6d e6 +80002110: ec de +80002112: 05 00 +80002114: 0b 00 00 00 ... Disassembly of section .sdata: -80001430 _global_impure_ptr: -80001430: 08 10 -80001432: 00 80 +80002480 _global_impure_ptr: +80002480: 58 20 +80002482: 00 80 Disassembly of section .bss: -80001434 g_wspawn_args: +80002484 g_wspawn_args: ... Disassembly of section .comment: @@ -1223,28 +1255,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 00 10 + 34: 50 20 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 08 10 + 44: 58 20 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: 30 14 + 54: 80 24 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: 34 14 + 64: 84 24 66: 00 80 68: 00 00 6a: 00 00 @@ -1260,7 +1292,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: 70 0b + a4: f0 0b a6: 00 80 a8: 00 00 aa: 00 00 @@ -1317,7 +1349,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 08 10 + 154: 58 20 156: 00 80 158: 28 04 15a: 00 00 @@ -1328,49 +1360,49 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 04 10 + 174: 54 20 176: 00 80 178: 00 00 17a: 00 00 17c: 00 00 17e: 03 00 a2 00 lb zero, 10(tp) 182: 00 00 - 184: 04 10 + 184: 54 20 186: 00 80 188: 00 00 18a: 00 00 18c: 00 00 18e: 03 00 b5 00 lb zero, 11(a0) 192: 00 00 - 194: 04 10 + 194: 54 20 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 00 10 + 1a4: 50 20 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 00 10 + 1b4: 50 20 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 00 10 + 1c4: 50 20 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: c4 0b + 1d4: 44 0c 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1383,7 +1415,7 @@ Disassembly of section .symtab: 1ee: f1 ff 1f0: 1c 01 1f2: 00 00 - 1f4: 74 0b + 1f4: f4 0b 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1391,60 +1423,60 @@ Disassembly of section .symtab: 1fe: 02 00 200: 26 01 202: 00 00 - 204: 68 00 - 206: 00 80 - 208: 4c 01 + 204: 00 04 + 206: 00 00 + 208: 00 00 20a: 00 00 - 20c: 12 00 - 20e: 02 00 - 210: 3c 01 - 212: 00 00 - 214: 00 04 - 216: 00 00 - 218: 00 00 + 20c: 10 00 + 20e: f1 ff + 210: 33 01 00 00 add sp, zero, zero + 214: 84 24 + 216: 00 80 + 218: 40 00 21a: 00 00 - 21c: 10 00 - 21e: f1 ff - 220: 49 01 + 21c: 11 00 + 21e: 06 00 + 220: 41 01 222: 00 00 - 224: 34 14 + 224: fc 0b 226: 00 80 - 228: 20 00 + 228: 00 00 22a: 00 00 - 22c: 11 00 - 22e: 06 00 - 230: 57 01 00 00 - 234: 7c 0b + 22c: 12 00 + 22e: 02 00 + 230: 48 01 + 232: 00 00 + 234: 80 24 236: 00 80 238: 00 00 23a: 00 00 - 23c: 12 00 - 23e: 02 00 - 240: 5e 01 + 23c: 10 00 + 23e: 05 00 + 240: 58 01 242: 00 00 - 244: 30 14 + 244: 68 00 246: 00 80 - 248: 00 00 + 248: 48 01 24a: 00 00 - 24c: 10 00 - 24e: 05 00 + 24c: 12 00 + 24e: 02 00 250: 6e 01 252: 00 00 - 254: 08 18 + 254: 58 28 256: 00 80 258: 00 00 25a: 00 00 25c: 10 00 25e: f1 ff 260: 7f 01 00 00 - 264: 94 0b + 264: 14 0c 266: 00 80 268: 00 00 26a: 00 00 26c: 12 00 26e: 02 00 270: 87 01 00 00 - 274: d4 0b + 274: 54 0c 276: 00 80 278: 00 00 27a: 00 00 @@ -1452,15 +1484,15 @@ Disassembly of section .symtab: 27e: 02 00 280: 94 01 282: 00 00 - 284: 4c 02 + 284: 48 02 286: 00 80 - 288: 88 01 + 288: 0c 02 28a: 00 00 28c: 12 00 28e: 02 00 290: a1 01 292: 00 00 - 294: 8c 0b + 294: 0c 0c 296: 00 80 298: 00 00 29a: 00 00 @@ -1468,7 +1500,7 @@ Disassembly of section .symtab: 29e: 02 00 2a0: aa 01 2a2: 00 00 - 2a4: 30 14 + 2a4: 80 24 2a6: 00 80 2a8: 04 00 2aa: 00 00 @@ -1476,14 +1508,14 @@ Disassembly of section .symtab: 2ae: 05 00 2b0: bd 01 2b2: 00 00 - 2b4: 98 0c + 2b4: 18 0d 2b6: 00 80 2b8: 9c 00 2ba: 00 00 2bc: 12 00 2be: 02 00 2c0: cf 01 00 00 fnmadd.s ft3, ft0, ft0, ft0, rne - 2c4: cc 0b + 2c4: 4c 0c 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1491,7 +1523,7 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: de 01 2d2: 00 00 - 2d4: 9c 0b + 2d4: 1c 0c 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1499,7 +1531,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: e9 01 2e2: 00 00 - 2e4: ac 0b + 2e4: 2c 0c 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1507,7 +1539,7 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: f6 01 2f2: 00 00 - 2f4: 3c 0c + 2f4: bc 0c 2f6: 00 80 2f8: 5c 00 2fa: 00 00 @@ -1523,7 +1555,7 @@ Disassembly of section .symtab: 30e: f1 ff 310: 14 02 312: 00 00 - 314: 38 0b + 314: b8 0b 316: 00 80 318: 00 00 31a: 00 00 @@ -1531,7 +1563,7 @@ Disassembly of section .symtab: 31e: 02 00 320: 1e 02 322: 00 00 - 324: 84 0b + 324: 04 0c 326: 00 80 328: 00 00 32a: 00 00 @@ -1539,7 +1571,7 @@ Disassembly of section .symtab: 32e: 02 00 330: 29 02 332: 00 00 - 334: ac 0e + 334: 2c 0f 336: 00 80 338: 24 01 33a: 00 00 @@ -1555,7 +1587,7 @@ Disassembly of section .symtab: 34e: 01 00 350: 3a 02 352: 00 00 - 354: 10 0e + 354: 90 0e 356: 00 80 358: 9c 00 35a: 00 00 @@ -1563,28 +1595,28 @@ Disassembly of section .symtab: 35e: 02 00 360: 4e 02 362: 00 00 - 364: 04 04 + 364: 84 04 366: 00 80 368: 34 02 36a: 00 00 36c: 12 00 36e: 02 00 370: 63 02 00 00 beqz zero, 4 - 374: 54 14 + 374: c4 24 376: 00 80 378: 00 00 37a: 00 00 37c: 10 00 37e: 06 00 380: 6f 02 00 00 jal tp, 0 - 384: 34 14 + 384: 84 24 386: 00 80 388: 00 00 38a: 00 00 38c: 10 00 38e: 06 00 390: 7b 02 00 00 - 394: 34 0d + 394: b4 0d 396: 00 80 398: dc 00 39a: 00 00 @@ -1592,14 +1624,14 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 82 02 3a2: 00 00 - 3a4: d4 03 + 3a4: 54 04 3a6: 00 80 3a8: 30 00 3aa: 00 00 3ac: 12 00 3ae: 02 00 3b0: 87 02 00 00 - 3b4: 38 06 + 3b4: b8 06 3b6: 00 80 3b8: 80 02 3ba: 00 00 @@ -1607,7 +1639,7 @@ Disassembly of section .symtab: 3be: 02 00 3c0: a6 02 3c2: 00 00 - 3c4: e4 0b + 3c4: 64 0c 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1615,14 +1647,14 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: b4 02 3d2: 00 00 - 3d4: f4 0b + 3d4: 74 0c 3d6: 00 80 3d8: 14 00 3da: 00 00 3dc: 12 00 3de: 02 00 3e0: bb 02 00 00 - 3e4: bc 0b + 3e4: 3c 0c 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1630,7 +1662,7 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: c9 02 3f2: 00 00 - 3f4: dc 0b + 3f4: 5c 0c 3f6: 00 80 3f8: 00 00 3fa: 00 00 @@ -1638,7 +1670,7 @@ Disassembly of section .symtab: 3fe: 02 00 400: d6 02 402: 00 00 - 404: a4 0b + 404: 24 0c 406: 00 80 408: 00 00 40a: 00 00 @@ -1646,69 +1678,70 @@ Disassembly of section .symtab: 40e: 02 00 410: e2 02 412: 00 00 - 414: b4 01 + 414: 58 20 416: 00 80 - 418: 98 00 + 418: 00 00 41a: 00 00 - 41c: 12 00 - 41e: 02 00 - 420: fb 02 00 00 - 424: 08 10 + 41c: 10 00 + 41e: 04 00 + 420: f1 02 + 422: 00 00 + 424: 84 24 426: 00 80 428: 00 00 42a: 00 00 42c: 10 00 - 42e: 04 00 - 430: 0a 03 + 42e: 05 00 + 430: 9d 00 432: 00 00 - 434: 34 14 + 434: c4 24 436: 00 80 438: 00 00 43a: 00 00 43c: 10 00 - 43e: 05 00 - 440: 9d 00 - 442: 00 00 - 444: 54 14 + 43e: 06 00 + 440: 27 03 00 00 + 444: 88 0c 446: 00 80 - 448: 00 00 + 448: 34 00 44a: 00 00 - 44c: 10 00 - 44e: 06 00 - 450: 20 03 + 44c: 12 00 + 44e: 02 00 + 450: f8 02 452: 00 00 - 454: 08 0c + 454: b0 01 456: 00 80 - 458: 34 00 + 458: 98 00 45a: 00 00 45c: 12 00 45e: 02 00 - 460: 11 03 + 460: 18 03 462: 00 00 - 464: b4 0b + 464: 34 0c 466: 00 80 468: 00 00 46a: 00 00 46c: 12 00 46e: 02 00 - 470: 1f 03 00 00 - 474: 30 0b + 470: 26 03 + 472: 00 00 + 474: b0 0b 476: 00 80 478: 00 00 47a: 00 00 47c: 12 00 47e: 02 00 - 480: 25 03 + 480: 2c 03 482: 00 00 - 484: b8 08 + 484: 38 09 486: 00 80 488: 78 02 48a: 00 00 48c: 12 00 48e: 02 00 - 490: 49 03 + 490: 50 03 492: 00 00 - 494: ec 0b + 494: 6c 0c 496: 00 80 498: 00 00 49a: 00 00 @@ -1748,13 +1781,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 34 34 - 48: 2d 30 - 4a: 34 2d - 4c: 33 35 2d 66 - 50: 62 2d - 52: 65 31 - 54: 2e 63 + 46: 36 31 + 48: 2d 64 + 4a: 61 2d + 4c: 63 34 2d 64 + 50: 34 2d + 52: 37 63 2e 63 lui t1, 406246 56: 00 70 58: 61 72 5a: 61 6c @@ -1839,31 +1871,32 @@ Disassembly of section .strtab: 11e: 5f 77 73 70 122: 61 77 124: 6e 00 - 126: 6b 65 72 6e - 12a: 65 6c - 12c: 5f 73 70 61 - 130: 77 6e 5f 72 - 134: 75 6e - 136: 5f 77 61 72 - 13a: 70 00 - 13c: 5f 5f 73 74 - 140: 61 63 - 142: 6b 5f 73 69 - 146: 7a 65 - 148: 00 67 - 14a: 5f 77 73 70 - 14e: 61 77 - 150: 6e 5f - 152: 61 72 - 154: 67 73 00 76 - 158: 78 5f - 15a: 74 6d - 15c: 63 00 5f 5f beq t5, s5, 1504 - 160: 53 44 41 54 - 164: 41 5f - 166: 42 45 - 168: 47 49 4e 5f - 16c: 5f 00 5f 5f + 126: 5f 5f 73 74 + 12a: 61 63 + 12c: 6b 5f 73 69 + 130: 7a 65 + 132: 00 67 + 134: 5f 77 73 70 + 138: 61 77 + 13a: 6e 5f + 13c: 61 72 + 13e: 67 73 00 76 + 142: 78 5f + 144: 74 6d + 146: 63 00 5f 5f beq t5, s5, 1504 + 14a: 53 44 41 54 + 14e: 41 5f + 150: 42 45 + 152: 47 49 4e 5f + 156: 5f 00 6b 65 + 15a: 72 6e + 15c: 65 6c + 15e: 5f 73 70 61 + 162: 77 6e 5f 63 + 166: 61 6c + 168: 6c 62 + 16a: 61 63 + 16c: 6b 00 5f 5f 170: 67 6c 6f 62 174: 61 6c 176: 5f 70 6f 69 @@ -2000,51 +2033,51 @@ Disassembly of section .strtab: 2d8: 5f 77 61 72 2dc: 70 5f 2de: 67 69 64 00 - 2e2: 6b 65 72 6e - 2e6: 65 6c - 2e8: 5f 73 70 61 - 2ec: 77 6e 5f 72 - 2f0: 75 6e - 2f2: 5f 74 68 72 - 2f6: 65 61 - 2f8: 64 73 - 2fa: 00 5f - 2fc: 5f 44 41 54 - 300: 41 5f - 302: 42 45 - 304: 47 49 4e 5f - 308: 5f 00 5f 65 - 30c: 64 61 - 30e: 74 61 - 310: 00 76 - 312: 78 5f - 314: 74 68 - 316: 72 65 - 318: 61 64 - 31a: 5f 6c 69 64 - 31e: 00 5f - 320: 65 78 - 322: 69 74 - 324: 00 5f - 326: 70 6f - 328: 63 6c 5f 6b bltu t5, s5, 1720 - 32c: 65 72 - 32e: 6e 65 + 2e2: 5f 5f 44 41 + 2e6: 54 41 + 2e8: 5f 42 45 47 + 2ec: 49 4e + 2ee: 5f 5f 00 5f + 2f2: 65 64 + 2f4: 61 74 + 2f6: 61 00 + 2f8: 6b 65 72 6e + 2fc: 65 6c + 2fe: 5f 73 70 61 + 302: 77 6e 5f 72 + 306: 65 6d + 308: 61 69 + 30a: 6e 69 + 30c: 6e 67 + 30e: 5f 63 61 6c + 312: 6c 62 + 314: 61 63 + 316: 6b 00 76 78 + 31a: 5f 74 68 72 + 31e: 65 61 + 320: 64 5f + 322: 6c 69 + 324: 64 00 + 326: 5f 65 78 69 + 32a: 74 00 + 32c: 5f 70 6f 63 330: 6c 5f - 332: 73 66 69 6c csrrsi a2, 1734, 18 - 336: 74 65 - 338: 72 5f - 33a: 77 6f 72 6b - 33e: 67 72 6f 75 - 342: 70 5f - 344: 66 61 - 346: 73 74 00 76 csrrci s0, 1888, 0 - 34a: 78 5f - 34c: 6e 75 - 34e: 6d 5f - 350: 69 6e - 352: 73 74 72 73 csrrci s0, 1847, 4 - 356: 00 + 332: 6b 65 72 6e + 336: 65 6c + 338: 5f 73 66 69 + 33c: 6c 74 + 33e: 65 72 + 340: 5f 77 6f 72 + 344: 6b 67 72 6f + 348: 75 70 + 34a: 5f 66 61 73 + 34e: 74 00 + 350: 76 78 + 352: 5f 6e 75 6d + 356: 5f 69 6e 73 + 35a: 74 72 + 35c: 73 + 35d: 00 Disassembly of section .shstrtab: diff --git a/benchmarks/opencl/sgemm/kernel.pocl b/benchmarks/opencl/sgemm/kernel.pocl index c565830f..5c276444 100644 Binary files a/benchmarks/opencl/sgemm/kernel.pocl and b/benchmarks/opencl/sgemm/kernel.pocl differ diff --git a/benchmarks/opencl/sgemm/sgemm.dump b/benchmarks/opencl/sgemm/sgemm.dump index 87c1df79..360612ca 100644 --- a/benchmarks/opencl/sgemm/sgemm.dump +++ b/benchmarks/opencl/sgemm/sgemm.dump @@ -1,30 +1,30 @@ -/tmp/pocl_vortex_kernel-41-fe-25-b2-35.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-58-c3-55-9d-28.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 05 84 addi a1, a1, -1984 +80000004: 93 85 05 8c addi a1, a1, -1856 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 10 03 jal 2096 +80000010: ef 00 10 0b jal 2224 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 17 15 00 00 auipc a0, 1 80000020: 13 05 45 42 addi a0, a0, 1060 80000024: 17 16 00 00 auipc a2, 1 -80000028: 13 06 c6 43 addi a2, a2, 1084 +80000028: 13 06 c6 45 addi a2, a2, 1116 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 90 20 jal 2568 +80000034: ef 00 90 28 jal 2696 80000038: 17 15 00 00 auipc a0, 1 -8000003c: 13 05 c5 90 addi a0, a0, -1780 -80000040: ef 00 d0 0b jal 2236 -80000044: ef 00 d0 15 jal 2396 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 50 0c j 2244 +8000003c: 13 05 c5 98 addi a0, a0, -1652 +80000040: ef 00 d0 13 jal 2364 +80000044: ef 00 d0 1d jal 2524 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 50 14 j 2372 Disassembly of section .text: @@ -32,11 +32,11 @@ Disassembly of section .text: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 80000058: 37 15 00 80 lui a0, 524289 -8000005c: 13 05 45 94 addi a0, a0, -1724 -80000060: 6f 00 d0 09 j 2204 +8000005c: 13 05 45 9c addi a0, a0, -1596 +80000060: 6f 00 d0 11 j 2332 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,188 +48,188 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 10 04 jal 2112 -80000098: ef 00 c0 7e jal 2028 -8000009c: ef 00 10 03 jal 2096 +80000094: ef 00 10 0c jal 2240 +80000098: ef 00 d0 06 jal 2156 +8000009c: ef 00 10 0b jal 2224 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 10 00 jal 2048 +800000a4: ef 00 10 08 jal 2176 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 90 00 jal 2056 +800000ac: ef 00 90 08 jal 2184 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 10 02 jal 2080 +800000b4: ef 00 10 0a jal 2208 800000b8: b7 15 00 80 lui a1, 524289 800000bc: 93 85 05 44 addi a1, a1, 1088 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 40 6d j 1748 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 80 75 j 1880 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 40 6c jal 1732 -800001c4: ef 00 80 70 jal 1800 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 80 6f jal 1784 -800001d0: b7 15 00 80 lui a1, 524289 -800001d4: 93 85 05 44 addi a1, a1, 1088 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 c0 63 j 1596 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 80 74 jal 1864 +800001c0: ef 00 c0 78 jal 1932 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 c0 77 jal 1916 +800001cc: b7 15 00 80 lui a1, 524289 +800001d0: 93 85 05 44 addi a1, a1, 1088 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 00 6c j 1728 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 40 65 jal 1620 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 40 64 jal 1604 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 40 63 jal 1588 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 40 62 jal 1572 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 80 6d jal 1752 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 80 6c jal 1736 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 80 6b jal 1720 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 80 6a jal 1704 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) 8000035c: b7 15 00 80 lui a1, 524289 80000360: 93 85 05 44 addi a1, a1, 1088 80000364: 13 15 25 00 slli a0, a0, 2 @@ -243,660 +243,692 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 c0 4e jal 1260 +80000390: ef 00 c0 56 jal 1388 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 00 56 jal 1376 +800003a8: ef 00 40 5a jal 1444 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 40 59 jal 1428 +800003b4: b7 15 00 80 lui a1, 524289 +800003b8: 93 85 05 44 addi a1, a1, 1088 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 40 4e jal 1252 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 c5 56 addi a1, a0, 1388 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 c5 5e addi a1, a0, 1516 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _pocl_kernel_sgemm: -80000404: 13 01 01 fe addi sp, sp, -32 -80000408: 23 2e 11 00 sw ra, 28(sp) -8000040c: 23 2c 81 00 sw s0, 24(sp) -80000410: 23 2a 91 00 sw s1, 20(sp) -80000414: 23 28 21 01 sw s2, 16(sp) -80000418: 23 26 31 01 sw s3, 12(sp) -8000041c: 23 24 41 01 sw s4, 8(sp) -80000420: 23 22 51 01 sw s5, 4(sp) -80000424: 23 20 61 01 sw s6, 0(sp) -80000428: 13 04 01 02 addi s0, sp, 32 -8000042c: 13 71 c1 ff andi sp, sp, -4 -80000430: 93 08 00 00 mv a7, zero -80000434: 83 22 87 01 lw t0, 24(a4) -80000438: 03 23 c7 01 lw t1, 28(a4) -8000043c: 83 24 c7 00 lw s1, 12(a4) -80000440: 83 23 07 02 lw t2, 32(a4) -80000444: 03 27 07 01 lw a4, 16(a4) -80000448: b3 87 f2 02 mul a5, t0, a5 -8000044c: 33 89 f4 00 add s2, s1, a5 -80000450: b3 07 03 03 mul a5, t1, a6 -80000454: b3 0e f7 00 add t4, a4, a5 -80000458: 33 87 d6 03 mul a4, a3, t4 -8000045c: b3 07 e9 00 add a5, s2, a4 -80000460: 93 97 27 00 slli a5, a5, 2 -80000464: 33 08 f6 00 add a6, a2, a5 -80000468: 13 17 27 00 slli a4, a4, 2 -8000046c: 33 8e e5 00 add t3, a1, a4 -80000470: b7 15 00 80 lui a1, 524289 -80000474: 93 85 45 43 addi a1, a1, 1076 -80000478: 07 a0 05 00 flw ft0, 0(a1) -8000047c: 93 15 29 00 slli a1, s2, 2 -80000480: 33 0f b5 00 add t5, a0, a1 -80000484: 93 95 26 00 slli a1, a3, 2 -80000488: 6f 00 c0 00 j 12 -8000048c: 93 88 18 00 addi a7, a7, 1 -80000490: 63 f8 78 0a bgeu a7, t2, 176 -80000494: 63 5e d0 06 blez a3, 124 -80000498: 93 0f 00 00 mv t6, zero -8000049c: 93 09 0e 00 mv s3, t3 -800004a0: 93 07 00 00 mv a5, zero -800004a4: 33 85 fe 01 add a0, t4, t6 -800004a8: 33 0a d5 02 mul s4, a0, a3 -800004ac: 13 0b 0f 00 mv s6, t5 -800004b0: b3 0a f9 00 add s5, s2, a5 -800004b4: 13 07 0b 00 mv a4, s6 -800004b8: 13 85 09 00 mv a0, s3 -800004bc: 93 84 06 00 mv s1, a3 -800004c0: d3 00 00 20 fmv.s ft1, ft0 -800004c4: 07 21 07 00 flw ft2, 0(a4) -800004c8: 87 21 05 00 flw ft3, 0(a0) -800004cc: 53 71 31 10 fmul.s ft2, ft2, ft3 -800004d0: d3 70 11 00 fadd.s ft1, ft2, ft1 -800004d4: 93 84 f4 ff addi s1, s1, -1 -800004d8: 13 05 45 00 addi a0, a0, 4 -800004dc: 33 07 b7 00 add a4, a4, a1 -800004e0: e3 92 04 fe bnez s1, -28 -800004e4: 33 85 4a 01 add a0, s5, s4 -800004e8: 13 15 25 00 slli a0, a0, 2 -800004ec: 33 05 a6 00 add a0, a2, a0 -800004f0: 27 20 15 00 fsw ft1, 0(a0) -800004f4: 93 87 17 00 addi a5, a5, 1 -800004f8: 13 0b 4b 00 addi s6, s6, 4 -800004fc: e3 ea 57 fa bltu a5, t0, -76 -80000500: 93 8f 1f 00 addi t6, t6, 1 -80000504: b3 89 b9 00 add s3, s3, a1 -80000508: e3 ec 6f f8 bltu t6, t1, -104 -8000050c: 6f f0 1f f8 j -128 -80000510: 13 05 00 00 mv a0, zero -80000514: 93 04 08 00 mv s1, a6 -80000518: 93 07 00 00 mv a5, zero -8000051c: 13 87 04 00 mv a4, s1 -80000520: 23 20 07 00 sw zero, 0(a4) -80000524: 93 87 17 00 addi a5, a5, 1 -80000528: 13 07 47 00 addi a4, a4, 4 -8000052c: e3 ea 57 fe bltu a5, t0, -12 -80000530: 13 05 15 00 addi a0, a0, 1 -80000534: b3 84 b4 00 add s1, s1, a1 -80000538: e3 60 65 fe bltu a0, t1, -32 -8000053c: 6f f0 1f f5 j -176 -80000540: 13 01 04 fe addi sp, s0, -32 -80000544: 03 2b 01 00 lw s6, 0(sp) -80000548: 83 2a 41 00 lw s5, 4(sp) -8000054c: 03 2a 81 00 lw s4, 8(sp) -80000550: 83 29 c1 00 lw s3, 12(sp) -80000554: 03 29 01 01 lw s2, 16(sp) -80000558: 83 24 41 01 lw s1, 20(sp) -8000055c: 03 24 81 01 lw s0, 24(sp) -80000560: 83 20 c1 01 lw ra, 28(sp) -80000564: 13 01 01 02 addi sp, sp, 32 -80000568: 67 80 00 00 ret +80000484 _pocl_kernel_sgemm: +80000484: 13 01 01 fe addi sp, sp, -32 +80000488: 23 2e 11 00 sw ra, 28(sp) +8000048c: 23 2c 81 00 sw s0, 24(sp) +80000490: 23 2a 91 00 sw s1, 20(sp) +80000494: 23 28 21 01 sw s2, 16(sp) +80000498: 23 26 31 01 sw s3, 12(sp) +8000049c: 23 24 41 01 sw s4, 8(sp) +800004a0: 23 22 51 01 sw s5, 4(sp) +800004a4: 23 20 61 01 sw s6, 0(sp) +800004a8: 13 04 01 02 addi s0, sp, 32 +800004ac: 13 71 c1 ff andi sp, sp, -4 +800004b0: 93 08 00 00 mv a7, zero +800004b4: 83 22 87 01 lw t0, 24(a4) +800004b8: 03 23 c7 01 lw t1, 28(a4) +800004bc: 83 24 c7 00 lw s1, 12(a4) +800004c0: 83 23 07 02 lw t2, 32(a4) +800004c4: 03 27 07 01 lw a4, 16(a4) +800004c8: b3 87 f2 02 mul a5, t0, a5 +800004cc: 33 89 f4 00 add s2, s1, a5 +800004d0: b3 07 03 03 mul a5, t1, a6 +800004d4: b3 0e f7 00 add t4, a4, a5 +800004d8: 33 87 d6 03 mul a4, a3, t4 +800004dc: b3 07 e9 00 add a5, s2, a4 +800004e0: 93 97 27 00 slli a5, a5, 2 +800004e4: 33 08 f6 00 add a6, a2, a5 +800004e8: 13 17 27 00 slli a4, a4, 2 +800004ec: 33 8e e5 00 add t3, a1, a4 +800004f0: b7 15 00 80 lui a1, 524289 +800004f4: 93 85 45 43 addi a1, a1, 1076 +800004f8: 07 a0 05 00 flw ft0, 0(a1) +800004fc: 93 15 29 00 slli a1, s2, 2 +80000500: 33 0f b5 00 add t5, a0, a1 +80000504: 93 95 26 00 slli a1, a3, 2 +80000508: 6f 00 c0 00 j 12 +8000050c: 93 88 18 00 addi a7, a7, 1 +80000510: 63 f8 78 0a bgeu a7, t2, 176 +80000514: 63 5e d0 06 blez a3, 124 +80000518: 93 0f 00 00 mv t6, zero +8000051c: 93 09 0e 00 mv s3, t3 +80000520: 93 07 00 00 mv a5, zero +80000524: 33 85 fe 01 add a0, t4, t6 +80000528: 33 0a d5 02 mul s4, a0, a3 +8000052c: 13 0b 0f 00 mv s6, t5 +80000530: b3 0a f9 00 add s5, s2, a5 +80000534: 13 07 0b 00 mv a4, s6 +80000538: 13 85 09 00 mv a0, s3 +8000053c: 93 84 06 00 mv s1, a3 +80000540: d3 00 00 20 fmv.s ft1, ft0 +80000544: 07 21 07 00 flw ft2, 0(a4) +80000548: 87 21 05 00 flw ft3, 0(a0) +8000054c: 53 71 31 10 fmul.s ft2, ft2, ft3 +80000550: d3 70 11 00 fadd.s ft1, ft2, ft1 +80000554: 93 84 f4 ff addi s1, s1, -1 +80000558: 13 05 45 00 addi a0, a0, 4 +8000055c: 33 07 b7 00 add a4, a4, a1 +80000560: e3 92 04 fe bnez s1, -28 +80000564: 33 85 4a 01 add a0, s5, s4 +80000568: 13 15 25 00 slli a0, a0, 2 +8000056c: 33 05 a6 00 add a0, a2, a0 +80000570: 27 20 15 00 fsw ft1, 0(a0) +80000574: 93 87 17 00 addi a5, a5, 1 +80000578: 13 0b 4b 00 addi s6, s6, 4 +8000057c: e3 ea 57 fa bltu a5, t0, -76 +80000580: 93 8f 1f 00 addi t6, t6, 1 +80000584: b3 89 b9 00 add s3, s3, a1 +80000588: e3 ec 6f f8 bltu t6, t1, -104 +8000058c: 6f f0 1f f8 j -128 +80000590: 13 05 00 00 mv a0, zero +80000594: 93 04 08 00 mv s1, a6 +80000598: 93 07 00 00 mv a5, zero +8000059c: 13 87 04 00 mv a4, s1 +800005a0: 23 20 07 00 sw zero, 0(a4) +800005a4: 93 87 17 00 addi a5, a5, 1 +800005a8: 13 07 47 00 addi a4, a4, 4 +800005ac: e3 ea 57 fe bltu a5, t0, -12 +800005b0: 13 05 15 00 addi a0, a0, 1 +800005b4: b3 84 b4 00 add s1, s1, a1 +800005b8: e3 60 65 fe bltu a0, t1, -32 +800005bc: 6f f0 1f f5 j -176 +800005c0: 13 01 04 fe addi sp, s0, -32 +800005c4: 03 2b 01 00 lw s6, 0(sp) +800005c8: 83 2a 41 00 lw s5, 4(sp) +800005cc: 03 2a 81 00 lw s4, 8(sp) +800005d0: 83 29 c1 00 lw s3, 12(sp) +800005d4: 03 29 01 01 lw s2, 16(sp) +800005d8: 83 24 41 01 lw s1, 20(sp) +800005dc: 03 24 81 01 lw s0, 24(sp) +800005e0: 83 20 c1 01 lw ra, 28(sp) +800005e4: 13 01 01 02 addi sp, sp, 32 +800005e8: 67 80 00 00 ret -8000056c _pocl_kernel_sgemm_workgroup: -8000056c: 13 01 01 fe addi sp, sp, -32 -80000570: 23 2e 81 00 sw s0, 28(sp) -80000574: 23 2c 91 00 sw s1, 24(sp) -80000578: 23 2a 21 01 sw s2, 20(sp) -8000057c: 23 28 31 01 sw s3, 16(sp) -80000580: 23 26 41 01 sw s4, 12(sp) -80000584: 23 24 51 01 sw s5, 8(sp) -80000588: 13 08 00 00 mv a6, zero -8000058c: 03 27 05 00 lw a4, 0(a0) -80000590: 83 27 45 00 lw a5, 4(a0) -80000594: 83 24 85 00 lw s1, 8(a0) -80000598: 03 25 c5 00 lw a0, 12(a0) -8000059c: 03 27 07 00 lw a4, 0(a4) -800005a0: 83 a7 07 00 lw a5, 0(a5) -800005a4: 83 af 04 00 lw t6, 0(s1) -800005a8: 83 29 05 00 lw s3, 0(a0) -800005ac: 03 aa 85 01 lw s4, 24(a1) -800005b0: 83 a8 c5 01 lw a7, 28(a1) -800005b4: 03 a5 c5 00 lw a0, 12(a1) -800005b8: 83 a2 05 02 lw t0, 32(a1) -800005bc: 83 a5 05 01 lw a1, 16(a1) -800005c0: 33 06 ca 02 mul a2, s4, a2 -800005c4: 33 09 c5 00 add s2, a0, a2 -800005c8: 33 85 d8 02 mul a0, a7, a3 -800005cc: 33 8e a5 00 add t3, a1, a0 -800005d0: 33 85 c9 03 mul a0, s3, t3 -800005d4: b3 05 a9 00 add a1, s2, a0 -800005d8: 93 95 25 00 slli a1, a1, 2 -800005dc: 33 83 bf 00 add t1, t6, a1 -800005e0: 13 15 25 00 slli a0, a0, 2 -800005e4: b3 83 a7 00 add t2, a5, a0 -800005e8: 37 15 00 80 lui a0, 524289 -800005ec: 13 05 85 43 addi a0, a0, 1080 -800005f0: 07 20 05 00 flw ft0, 0(a0) -800005f4: 13 15 29 00 slli a0, s2, 2 -800005f8: b3 0e a7 00 add t4, a4, a0 -800005fc: 13 96 29 00 slli a2, s3, 2 -80000600: 6f 00 c0 00 j 12 -80000604: 13 08 18 00 addi a6, a6, 1 -80000608: 63 78 58 0a bgeu a6, t0, 176 -8000060c: 63 5e 30 07 blez s3, 124 -80000610: 13 0f 00 00 mv t5, zero -80000614: 93 8a 03 00 mv s5, t2 -80000618: 13 05 00 00 mv a0, zero -8000061c: b3 05 ee 01 add a1, t3, t5 -80000620: 33 84 35 03 mul s0, a1, s3 -80000624: 93 87 0e 00 mv a5, t4 -80000628: b3 05 a9 00 add a1, s2, a0 -8000062c: 93 84 07 00 mv s1, a5 -80000630: 93 86 0a 00 mv a3, s5 -80000634: 13 87 09 00 mv a4, s3 -80000638: d3 00 00 20 fmv.s ft1, ft0 -8000063c: 07 a1 04 00 flw ft2, 0(s1) -80000640: 87 a1 06 00 flw ft3, 0(a3) -80000644: 53 71 31 10 fmul.s ft2, ft2, ft3 -80000648: d3 70 11 00 fadd.s ft1, ft2, ft1 -8000064c: 13 07 f7 ff addi a4, a4, -1 -80000650: 93 86 46 00 addi a3, a3, 4 -80000654: b3 84 c4 00 add s1, s1, a2 -80000658: e3 12 07 fe bnez a4, -28 -8000065c: b3 85 85 00 add a1, a1, s0 -80000660: 93 95 25 00 slli a1, a1, 2 -80000664: b3 85 bf 00 add a1, t6, a1 -80000668: 27 a0 15 00 fsw ft1, 0(a1) -8000066c: 13 05 15 00 addi a0, a0, 1 -80000670: 93 87 47 00 addi a5, a5, 4 -80000674: e3 6a 45 fb bltu a0, s4, -76 -80000678: 13 0f 1f 00 addi t5, t5, 1 -8000067c: b3 8a ca 00 add s5, s5, a2 -80000680: e3 6c 1f f9 bltu t5, a7, -104 -80000684: 6f f0 1f f8 j -128 -80000688: 13 05 00 00 mv a0, zero -8000068c: 93 06 03 00 mv a3, t1 -80000690: 93 05 00 00 mv a1, zero -80000694: 13 87 06 00 mv a4, a3 -80000698: 23 20 07 00 sw zero, 0(a4) -8000069c: 93 85 15 00 addi a1, a1, 1 -800006a0: 13 07 47 00 addi a4, a4, 4 -800006a4: e3 ea 45 ff bltu a1, s4, -12 -800006a8: 13 05 15 00 addi a0, a0, 1 -800006ac: b3 86 c6 00 add a3, a3, a2 -800006b0: e3 60 15 ff bltu a0, a7, -32 -800006b4: 6f f0 1f f5 j -176 -800006b8: 83 2a 81 00 lw s5, 8(sp) -800006bc: 03 2a c1 00 lw s4, 12(sp) -800006c0: 83 29 01 01 lw s3, 16(sp) -800006c4: 03 29 41 01 lw s2, 20(sp) -800006c8: 83 24 81 01 lw s1, 24(sp) -800006cc: 03 24 c1 01 lw s0, 28(sp) -800006d0: 13 01 01 02 addi sp, sp, 32 -800006d4: 67 80 00 00 ret +800005ec _pocl_kernel_sgemm_workgroup: +800005ec: 13 01 01 fe addi sp, sp, -32 +800005f0: 23 2e 81 00 sw s0, 28(sp) +800005f4: 23 2c 91 00 sw s1, 24(sp) +800005f8: 23 2a 21 01 sw s2, 20(sp) +800005fc: 23 28 31 01 sw s3, 16(sp) +80000600: 23 26 41 01 sw s4, 12(sp) +80000604: 23 24 51 01 sw s5, 8(sp) +80000608: 13 08 00 00 mv a6, zero +8000060c: 03 27 05 00 lw a4, 0(a0) +80000610: 83 27 45 00 lw a5, 4(a0) +80000614: 83 24 85 00 lw s1, 8(a0) +80000618: 03 25 c5 00 lw a0, 12(a0) +8000061c: 03 27 07 00 lw a4, 0(a4) +80000620: 83 a7 07 00 lw a5, 0(a5) +80000624: 83 af 04 00 lw t6, 0(s1) +80000628: 83 29 05 00 lw s3, 0(a0) +8000062c: 03 aa 85 01 lw s4, 24(a1) +80000630: 83 a8 c5 01 lw a7, 28(a1) +80000634: 03 a5 c5 00 lw a0, 12(a1) +80000638: 83 a2 05 02 lw t0, 32(a1) +8000063c: 83 a5 05 01 lw a1, 16(a1) +80000640: 33 06 ca 02 mul a2, s4, a2 +80000644: 33 09 c5 00 add s2, a0, a2 +80000648: 33 85 d8 02 mul a0, a7, a3 +8000064c: 33 8e a5 00 add t3, a1, a0 +80000650: 33 85 c9 03 mul a0, s3, t3 +80000654: b3 05 a9 00 add a1, s2, a0 +80000658: 93 95 25 00 slli a1, a1, 2 +8000065c: 33 83 bf 00 add t1, t6, a1 +80000660: 13 15 25 00 slli a0, a0, 2 +80000664: b3 83 a7 00 add t2, a5, a0 +80000668: 37 15 00 80 lui a0, 524289 +8000066c: 13 05 85 43 addi a0, a0, 1080 +80000670: 07 20 05 00 flw ft0, 0(a0) +80000674: 13 15 29 00 slli a0, s2, 2 +80000678: b3 0e a7 00 add t4, a4, a0 +8000067c: 13 96 29 00 slli a2, s3, 2 +80000680: 6f 00 c0 00 j 12 +80000684: 13 08 18 00 addi a6, a6, 1 +80000688: 63 78 58 0a bgeu a6, t0, 176 +8000068c: 63 5e 30 07 blez s3, 124 +80000690: 13 0f 00 00 mv t5, zero +80000694: 93 8a 03 00 mv s5, t2 +80000698: 13 05 00 00 mv a0, zero +8000069c: b3 05 ee 01 add a1, t3, t5 +800006a0: 33 84 35 03 mul s0, a1, s3 +800006a4: 93 87 0e 00 mv a5, t4 +800006a8: b3 05 a9 00 add a1, s2, a0 +800006ac: 93 84 07 00 mv s1, a5 +800006b0: 93 86 0a 00 mv a3, s5 +800006b4: 13 87 09 00 mv a4, s3 +800006b8: d3 00 00 20 fmv.s ft1, ft0 +800006bc: 07 a1 04 00 flw ft2, 0(s1) +800006c0: 87 a1 06 00 flw ft3, 0(a3) +800006c4: 53 71 31 10 fmul.s ft2, ft2, ft3 +800006c8: d3 70 11 00 fadd.s ft1, ft2, ft1 +800006cc: 13 07 f7 ff addi a4, a4, -1 +800006d0: 93 86 46 00 addi a3, a3, 4 +800006d4: b3 84 c4 00 add s1, s1, a2 +800006d8: e3 12 07 fe bnez a4, -28 +800006dc: b3 85 85 00 add a1, a1, s0 +800006e0: 93 95 25 00 slli a1, a1, 2 +800006e4: b3 85 bf 00 add a1, t6, a1 +800006e8: 27 a0 15 00 fsw ft1, 0(a1) +800006ec: 13 05 15 00 addi a0, a0, 1 +800006f0: 93 87 47 00 addi a5, a5, 4 +800006f4: e3 6a 45 fb bltu a0, s4, -76 +800006f8: 13 0f 1f 00 addi t5, t5, 1 +800006fc: b3 8a ca 00 add s5, s5, a2 +80000700: e3 6c 1f f9 bltu t5, a7, -104 +80000704: 6f f0 1f f8 j -128 +80000708: 13 05 00 00 mv a0, zero +8000070c: 93 06 03 00 mv a3, t1 +80000710: 93 05 00 00 mv a1, zero +80000714: 13 87 06 00 mv a4, a3 +80000718: 23 20 07 00 sw zero, 0(a4) +8000071c: 93 85 15 00 addi a1, a1, 1 +80000720: 13 07 47 00 addi a4, a4, 4 +80000724: e3 ea 45 ff bltu a1, s4, -12 +80000728: 13 05 15 00 addi a0, a0, 1 +8000072c: b3 86 c6 00 add a3, a3, a2 +80000730: e3 60 15 ff bltu a0, a7, -32 +80000734: 6f f0 1f f5 j -176 +80000738: 83 2a 81 00 lw s5, 8(sp) +8000073c: 03 2a c1 00 lw s4, 12(sp) +80000740: 83 29 01 01 lw s3, 16(sp) +80000744: 03 29 41 01 lw s2, 20(sp) +80000748: 83 24 81 01 lw s1, 24(sp) +8000074c: 03 24 c1 01 lw s0, 28(sp) +80000750: 13 01 01 02 addi sp, sp, 32 +80000754: 67 80 00 00 ret -800006d8 _pocl_kernel_sgemm_workgroup_fast: -800006d8: 13 01 01 fe addi sp, sp, -32 -800006dc: 23 2e 81 00 sw s0, 28(sp) -800006e0: 23 2c 91 00 sw s1, 24(sp) -800006e4: 23 2a 21 01 sw s2, 20(sp) -800006e8: 23 28 31 01 sw s3, 16(sp) -800006ec: 23 26 41 01 sw s4, 12(sp) -800006f0: 23 24 51 01 sw s5, 8(sp) -800006f4: 13 08 00 00 mv a6, zero -800006f8: 03 27 c5 00 lw a4, 12(a0) -800006fc: 83 27 05 00 lw a5, 0(a0) -80000700: 83 24 45 00 lw s1, 4(a0) -80000704: 83 2f 85 00 lw t6, 8(a0) -80000708: 83 29 07 00 lw s3, 0(a4) -8000070c: 03 aa 85 01 lw s4, 24(a1) -80000710: 83 a8 c5 01 lw a7, 28(a1) -80000714: 03 a5 c5 00 lw a0, 12(a1) -80000718: 83 a2 05 02 lw t0, 32(a1) -8000071c: 83 a5 05 01 lw a1, 16(a1) -80000720: 33 06 ca 02 mul a2, s4, a2 -80000724: 33 09 c5 00 add s2, a0, a2 -80000728: 33 85 d8 02 mul a0, a7, a3 -8000072c: 33 8e a5 00 add t3, a1, a0 -80000730: 33 85 c9 03 mul a0, s3, t3 -80000734: b3 05 a9 00 add a1, s2, a0 -80000738: 93 95 25 00 slli a1, a1, 2 -8000073c: 33 83 bf 00 add t1, t6, a1 -80000740: 13 15 25 00 slli a0, a0, 2 -80000744: b3 83 a4 00 add t2, s1, a0 -80000748: 37 15 00 80 lui a0, 524289 -8000074c: 13 05 c5 43 addi a0, a0, 1084 -80000750: 07 20 05 00 flw ft0, 0(a0) -80000754: 13 15 29 00 slli a0, s2, 2 -80000758: b3 8e a7 00 add t4, a5, a0 -8000075c: 13 96 29 00 slli a2, s3, 2 -80000760: 6f 00 c0 00 j 12 -80000764: 13 08 18 00 addi a6, a6, 1 -80000768: 63 78 58 0a bgeu a6, t0, 176 -8000076c: 63 5e 30 07 blez s3, 124 -80000770: 13 0f 00 00 mv t5, zero -80000774: 93 8a 03 00 mv s5, t2 -80000778: 13 05 00 00 mv a0, zero -8000077c: b3 05 ee 01 add a1, t3, t5 -80000780: 33 84 35 03 mul s0, a1, s3 -80000784: 93 87 0e 00 mv a5, t4 -80000788: b3 05 a9 00 add a1, s2, a0 -8000078c: 93 84 07 00 mv s1, a5 -80000790: 93 86 0a 00 mv a3, s5 -80000794: 13 87 09 00 mv a4, s3 -80000798: d3 00 00 20 fmv.s ft1, ft0 -8000079c: 07 a1 04 00 flw ft2, 0(s1) -800007a0: 87 a1 06 00 flw ft3, 0(a3) -800007a4: 53 71 31 10 fmul.s ft2, ft2, ft3 -800007a8: d3 70 11 00 fadd.s ft1, ft2, ft1 -800007ac: 13 07 f7 ff addi a4, a4, -1 -800007b0: 93 86 46 00 addi a3, a3, 4 -800007b4: b3 84 c4 00 add s1, s1, a2 -800007b8: e3 12 07 fe bnez a4, -28 -800007bc: b3 85 85 00 add a1, a1, s0 -800007c0: 93 95 25 00 slli a1, a1, 2 -800007c4: b3 85 bf 00 add a1, t6, a1 -800007c8: 27 a0 15 00 fsw ft1, 0(a1) -800007cc: 13 05 15 00 addi a0, a0, 1 -800007d0: 93 87 47 00 addi a5, a5, 4 -800007d4: e3 6a 45 fb bltu a0, s4, -76 -800007d8: 13 0f 1f 00 addi t5, t5, 1 -800007dc: b3 8a ca 00 add s5, s5, a2 -800007e0: e3 6c 1f f9 bltu t5, a7, -104 -800007e4: 6f f0 1f f8 j -128 -800007e8: 13 05 00 00 mv a0, zero -800007ec: 93 05 03 00 mv a1, t1 -800007f0: 93 06 00 00 mv a3, zero -800007f4: 13 87 05 00 mv a4, a1 -800007f8: 23 20 07 00 sw zero, 0(a4) -800007fc: 93 86 16 00 addi a3, a3, 1 -80000800: 13 07 47 00 addi a4, a4, 4 -80000804: e3 ea 46 ff bltu a3, s4, -12 -80000808: 13 05 15 00 addi a0, a0, 1 -8000080c: b3 85 c5 00 add a1, a1, a2 -80000810: e3 60 15 ff bltu a0, a7, -32 -80000814: 6f f0 1f f5 j -176 -80000818: 83 2a 81 00 lw s5, 8(sp) -8000081c: 03 2a c1 00 lw s4, 12(sp) -80000820: 83 29 01 01 lw s3, 16(sp) -80000824: 03 29 41 01 lw s2, 20(sp) -80000828: 83 24 81 01 lw s1, 24(sp) -8000082c: 03 24 c1 01 lw s0, 28(sp) -80000830: 13 01 01 02 addi sp, sp, 32 -80000834: 67 80 00 00 ret +80000758 _pocl_kernel_sgemm_workgroup_fast: +80000758: 13 01 01 fe addi sp, sp, -32 +8000075c: 23 2e 81 00 sw s0, 28(sp) +80000760: 23 2c 91 00 sw s1, 24(sp) +80000764: 23 2a 21 01 sw s2, 20(sp) +80000768: 23 28 31 01 sw s3, 16(sp) +8000076c: 23 26 41 01 sw s4, 12(sp) +80000770: 23 24 51 01 sw s5, 8(sp) +80000774: 13 08 00 00 mv a6, zero +80000778: 03 27 c5 00 lw a4, 12(a0) +8000077c: 83 27 05 00 lw a5, 0(a0) +80000780: 83 24 45 00 lw s1, 4(a0) +80000784: 83 2f 85 00 lw t6, 8(a0) +80000788: 83 29 07 00 lw s3, 0(a4) +8000078c: 03 aa 85 01 lw s4, 24(a1) +80000790: 83 a8 c5 01 lw a7, 28(a1) +80000794: 03 a5 c5 00 lw a0, 12(a1) +80000798: 83 a2 05 02 lw t0, 32(a1) +8000079c: 83 a5 05 01 lw a1, 16(a1) +800007a0: 33 06 ca 02 mul a2, s4, a2 +800007a4: 33 09 c5 00 add s2, a0, a2 +800007a8: 33 85 d8 02 mul a0, a7, a3 +800007ac: 33 8e a5 00 add t3, a1, a0 +800007b0: 33 85 c9 03 mul a0, s3, t3 +800007b4: b3 05 a9 00 add a1, s2, a0 +800007b8: 93 95 25 00 slli a1, a1, 2 +800007bc: 33 83 bf 00 add t1, t6, a1 +800007c0: 13 15 25 00 slli a0, a0, 2 +800007c4: b3 83 a4 00 add t2, s1, a0 +800007c8: 37 15 00 80 lui a0, 524289 +800007cc: 13 05 c5 43 addi a0, a0, 1084 +800007d0: 07 20 05 00 flw ft0, 0(a0) +800007d4: 13 15 29 00 slli a0, s2, 2 +800007d8: b3 8e a7 00 add t4, a5, a0 +800007dc: 13 96 29 00 slli a2, s3, 2 +800007e0: 6f 00 c0 00 j 12 +800007e4: 13 08 18 00 addi a6, a6, 1 +800007e8: 63 78 58 0a bgeu a6, t0, 176 +800007ec: 63 5e 30 07 blez s3, 124 +800007f0: 13 0f 00 00 mv t5, zero +800007f4: 93 8a 03 00 mv s5, t2 +800007f8: 13 05 00 00 mv a0, zero +800007fc: b3 05 ee 01 add a1, t3, t5 +80000800: 33 84 35 03 mul s0, a1, s3 +80000804: 93 87 0e 00 mv a5, t4 +80000808: b3 05 a9 00 add a1, s2, a0 +8000080c: 93 84 07 00 mv s1, a5 +80000810: 93 86 0a 00 mv a3, s5 +80000814: 13 87 09 00 mv a4, s3 +80000818: d3 00 00 20 fmv.s ft1, ft0 +8000081c: 07 a1 04 00 flw ft2, 0(s1) +80000820: 87 a1 06 00 flw ft3, 0(a3) +80000824: 53 71 31 10 fmul.s ft2, ft2, ft3 +80000828: d3 70 11 00 fadd.s ft1, ft2, ft1 +8000082c: 13 07 f7 ff addi a4, a4, -1 +80000830: 93 86 46 00 addi a3, a3, 4 +80000834: b3 84 c4 00 add s1, s1, a2 +80000838: e3 12 07 fe bnez a4, -28 +8000083c: b3 85 85 00 add a1, a1, s0 +80000840: 93 95 25 00 slli a1, a1, 2 +80000844: b3 85 bf 00 add a1, t6, a1 +80000848: 27 a0 15 00 fsw ft1, 0(a1) +8000084c: 13 05 15 00 addi a0, a0, 1 +80000850: 93 87 47 00 addi a5, a5, 4 +80000854: e3 6a 45 fb bltu a0, s4, -76 +80000858: 13 0f 1f 00 addi t5, t5, 1 +8000085c: b3 8a ca 00 add s5, s5, a2 +80000860: e3 6c 1f f9 bltu t5, a7, -104 +80000864: 6f f0 1f f8 j -128 +80000868: 13 05 00 00 mv a0, zero +8000086c: 93 05 03 00 mv a1, t1 +80000870: 93 06 00 00 mv a3, zero +80000874: 13 87 05 00 mv a4, a1 +80000878: 23 20 07 00 sw zero, 0(a4) +8000087c: 93 86 16 00 addi a3, a3, 1 +80000880: 13 07 47 00 addi a4, a4, 4 +80000884: e3 ea 46 ff bltu a3, s4, -12 +80000888: 13 05 15 00 addi a0, a0, 1 +8000088c: b3 85 c5 00 add a1, a1, a2 +80000890: e3 60 15 ff bltu a0, a7, -32 +80000894: 6f f0 1f f5 j -176 +80000898: 83 2a 81 00 lw s5, 8(sp) +8000089c: 03 2a c1 00 lw s4, 12(sp) +800008a0: 83 29 01 01 lw s3, 16(sp) +800008a4: 03 29 41 01 lw s2, 20(sp) +800008a8: 83 24 81 01 lw s1, 24(sp) +800008ac: 03 24 c1 01 lw s0, 28(sp) +800008b0: 13 01 01 02 addi sp, sp, 32 +800008b4: 67 80 00 00 ret -80000838 _exit: -80000838: 13 05 00 00 mv a0, zero -8000083c: 6b 00 05 00 +800008b8 _exit: +800008b8: 13 05 00 00 mv a0, zero +800008bc: 6b 00 05 00 -80000840 vx_set_sp: -80000840: 73 25 00 fc csrr a0, 4032 -80000844: 6b 00 05 00 -80000848: 97 11 00 00 auipc gp, 1 -8000084c: 93 81 01 fc addi gp, gp, -64 -80000850: 17 f1 ff 7e auipc sp, 520191 -80000854: 13 01 01 7b addi sp, sp, 1968 -80000858: 93 05 00 40 addi a1, zero, 1024 -8000085c: 73 26 10 cc csrr a2, 3265 -80000860: b3 85 c5 02 mul a1, a1, a2 -80000864: 33 01 b1 40 sub sp, sp, a1 -80000868: f3 26 30 cc csrr a3, 3267 -8000086c: 63 86 06 00 beqz a3, 12 -80000870: 13 05 00 00 mv a0, zero -80000874: 6b 00 05 00 +800008c0 vx_set_sp: +800008c0: 73 25 00 fc csrr a0, 4032 +800008c4: 6b 00 05 00 +800008c8: 97 11 00 00 auipc gp, 1 +800008cc: 93 81 01 f4 addi gp, gp, -192 +800008d0: 17 f1 ff 7e auipc sp, 520191 +800008d4: 13 01 01 73 addi sp, sp, 1840 +800008d8: 93 05 00 40 addi a1, zero, 1024 +800008dc: 73 26 10 cc csrr a2, 3265 +800008e0: b3 85 c5 02 mul a1, a1, a2 +800008e4: 33 01 b1 40 sub sp, sp, a1 +800008e8: f3 26 30 cc csrr a3, 3267 +800008ec: 63 86 06 00 beqz a3, 12 +800008f0: 13 05 00 00 mv a0, zero +800008f4: 6b 00 05 00 -80000878 RETURN: -80000878: 67 80 00 00 ret - -8000087c vx_wspawn: -8000087c: 6b 10 b5 00 -80000880: 67 80 00 00 ret - -80000884 vx_tmc: -80000884: 6b 00 05 00 -80000888: 67 80 00 00 ret - -8000088c vx_barrier: -8000088c: 6b 40 b5 00 -80000890: 67 80 00 00 ret - -80000894 vx_split: -80000894: 6b 20 05 00 -80000898: 67 80 00 00 ret - -8000089c vx_join: -8000089c: 6b 30 00 00 -800008a0: 67 80 00 00 ret - -800008a4 vx_warp_id: -800008a4: 73 25 30 cc csrr a0, 3267 -800008a8: 67 80 00 00 ret - -800008ac vx_warp_gid: -800008ac: 73 25 40 f1 csrr a0, mhartid -800008b0: 67 80 00 00 ret - -800008b4 vx_thread_id: -800008b4: 73 25 00 cc csrr a0, 3264 -800008b8: 67 80 00 00 ret - -800008bc vx_thread_lid: -800008bc: 73 25 10 cc csrr a0, 3265 -800008c0: 67 80 00 00 ret - -800008c4 vx_thread_gid: -800008c4: 73 25 20 cc csrr a0, 3266 -800008c8: 67 80 00 00 ret - -800008cc vx_core_id: -800008cc: 73 25 50 cc csrr a0, 3269 -800008d0: 67 80 00 00 ret - -800008d4 vx_num_threads: -800008d4: 73 25 00 fc csrr a0, 4032 -800008d8: 67 80 00 00 ret - -800008dc vx_num_warps: -800008dc: 73 25 10 fc csrr a0, 4033 -800008e0: 67 80 00 00 ret - -800008e4 vx_num_cores: -800008e4: 73 25 20 fc csrr a0, 4034 -800008e8: 67 80 00 00 ret - -800008ec vx_num_cycles: -800008ec: 73 25 00 b0 csrr a0, mcycle -800008f0: 67 80 00 00 ret - -800008f4 vx_num_instrs: -800008f4: 73 25 20 b0 csrr a0, minstret +800008f8 RETURN: 800008f8: 67 80 00 00 ret -800008fc atexit: -800008fc: 93 05 05 00 mv a1, a0 -80000900: 93 06 00 00 mv a3, zero -80000904: 13 06 00 00 mv a2, zero -80000908: 13 05 00 00 mv a0, zero -8000090c: 6f 00 c0 20 j 524 +800008fc vx_wspawn: +800008fc: 6b 10 b5 00 +80000900: 67 80 00 00 ret -80000910 exit: -80000910: 13 01 01 ff addi sp, sp, -16 -80000914: 93 05 00 00 mv a1, zero -80000918: 23 24 81 00 sw s0, 8(sp) -8000091c: 23 26 11 00 sw ra, 12(sp) -80000920: 13 04 05 00 mv s0, a0 -80000924: ef 00 00 29 jal 656 -80000928: b7 17 00 80 lui a5, 524289 -8000092c: 03 a5 07 43 lw a0, 1072(a5) -80000930: 83 27 c5 03 lw a5, 60(a0) -80000934: 63 84 07 00 beqz a5, 8 -80000938: e7 80 07 00 jalr a5 -8000093c: 13 05 04 00 mv a0, s0 -80000940: ef f0 9f ef jal -264 +80000904 vx_tmc: +80000904: 6b 00 05 00 +80000908: 67 80 00 00 ret -80000944 __libc_fini_array: -80000944: 13 01 01 ff addi sp, sp, -16 -80000948: 23 24 81 00 sw s0, 8(sp) -8000094c: b7 17 00 80 lui a5, 524289 -80000950: 37 14 00 80 lui s0, 524289 -80000954: 13 04 44 00 addi s0, s0, 4 -80000958: 93 87 47 00 addi a5, a5, 4 -8000095c: b3 87 87 40 sub a5, a5, s0 -80000960: 23 22 91 00 sw s1, 4(sp) -80000964: 23 26 11 00 sw ra, 12(sp) -80000968: 93 d4 27 40 srai s1, a5, 2 -8000096c: 63 80 04 02 beqz s1, 32 -80000970: 93 87 c7 ff addi a5, a5, -4 -80000974: 33 84 87 00 add s0, a5, s0 -80000978: 83 27 04 00 lw a5, 0(s0) -8000097c: 93 84 f4 ff addi s1, s1, -1 -80000980: 13 04 c4 ff addi s0, s0, -4 -80000984: e7 80 07 00 jalr a5 -80000988: e3 98 04 fe bnez s1, -16 -8000098c: 83 20 c1 00 lw ra, 12(sp) -80000990: 03 24 81 00 lw s0, 8(sp) -80000994: 83 24 41 00 lw s1, 4(sp) -80000998: 13 01 01 01 addi sp, sp, 16 -8000099c: 67 80 00 00 ret +8000090c vx_barrier: +8000090c: 6b 40 b5 00 +80000910: 67 80 00 00 ret -800009a0 __libc_init_array: -800009a0: 13 01 01 ff addi sp, sp, -16 -800009a4: 23 24 81 00 sw s0, 8(sp) -800009a8: 23 20 21 01 sw s2, 0(sp) -800009ac: 37 14 00 80 lui s0, 524289 -800009b0: 37 19 00 80 lui s2, 524289 -800009b4: 93 07 04 00 mv a5, s0 -800009b8: 13 09 09 00 mv s2, s2 -800009bc: 33 09 f9 40 sub s2, s2, a5 -800009c0: 23 26 11 00 sw ra, 12(sp) -800009c4: 23 22 91 00 sw s1, 4(sp) -800009c8: 13 59 29 40 srai s2, s2, 2 -800009cc: 63 00 09 02 beqz s2, 32 -800009d0: 13 04 04 00 mv s0, s0 -800009d4: 93 04 00 00 mv s1, zero -800009d8: 83 27 04 00 lw a5, 0(s0) -800009dc: 93 84 14 00 addi s1, s1, 1 -800009e0: 13 04 44 00 addi s0, s0, 4 -800009e4: e7 80 07 00 jalr a5 -800009e8: e3 18 99 fe bne s2, s1, -16 -800009ec: 37 14 00 80 lui s0, 524289 -800009f0: 37 19 00 80 lui s2, 524289 -800009f4: 93 07 04 00 mv a5, s0 -800009f8: 13 09 49 00 addi s2, s2, 4 -800009fc: 33 09 f9 40 sub s2, s2, a5 -80000a00: 13 59 29 40 srai s2, s2, 2 -80000a04: 63 00 09 02 beqz s2, 32 -80000a08: 13 04 04 00 mv s0, s0 -80000a0c: 93 04 00 00 mv s1, zero -80000a10: 83 27 04 00 lw a5, 0(s0) -80000a14: 93 84 14 00 addi s1, s1, 1 -80000a18: 13 04 44 00 addi s0, s0, 4 -80000a1c: e7 80 07 00 jalr a5 -80000a20: e3 18 99 fe bne s2, s1, -16 -80000a24: 83 20 c1 00 lw ra, 12(sp) -80000a28: 03 24 81 00 lw s0, 8(sp) -80000a2c: 83 24 41 00 lw s1, 4(sp) -80000a30: 03 29 01 00 lw s2, 0(sp) -80000a34: 13 01 01 01 addi sp, sp, 16 -80000a38: 67 80 00 00 ret +80000914 vx_split: +80000914: 6b 20 05 00 +80000918: 67 80 00 00 ret -80000a3c memset: -80000a3c: 13 03 f0 00 addi t1, zero, 15 -80000a40: 13 07 05 00 mv a4, a0 -80000a44: 63 7e c3 02 bgeu t1, a2, 60 -80000a48: 93 77 f7 00 andi a5, a4, 15 -80000a4c: 63 90 07 0a bnez a5, 160 -80000a50: 63 92 05 08 bnez a1, 132 -80000a54: 93 76 06 ff andi a3, a2, -16 -80000a58: 13 76 f6 00 andi a2, a2, 15 -80000a5c: b3 86 e6 00 add a3, a3, a4 -80000a60: 23 20 b7 00 sw a1, 0(a4) -80000a64: 23 22 b7 00 sw a1, 4(a4) -80000a68: 23 24 b7 00 sw a1, 8(a4) -80000a6c: 23 26 b7 00 sw a1, 12(a4) -80000a70: 13 07 07 01 addi a4, a4, 16 -80000a74: e3 66 d7 fe bltu a4, a3, -20 -80000a78: 63 14 06 00 bnez a2, 8 -80000a7c: 67 80 00 00 ret -80000a80: b3 06 c3 40 sub a3, t1, a2 -80000a84: 93 96 26 00 slli a3, a3, 2 -80000a88: 97 02 00 00 auipc t0, 0 -80000a8c: b3 86 56 00 add a3, a3, t0 -80000a90: 67 80 c6 00 jr 12(a3) -80000a94: 23 07 b7 00 sb a1, 14(a4) -80000a98: a3 06 b7 00 sb a1, 13(a4) -80000a9c: 23 06 b7 00 sb a1, 12(a4) -80000aa0: a3 05 b7 00 sb a1, 11(a4) -80000aa4: 23 05 b7 00 sb a1, 10(a4) -80000aa8: a3 04 b7 00 sb a1, 9(a4) -80000aac: 23 04 b7 00 sb a1, 8(a4) -80000ab0: a3 03 b7 00 sb a1, 7(a4) -80000ab4: 23 03 b7 00 sb a1, 6(a4) -80000ab8: a3 02 b7 00 sb a1, 5(a4) -80000abc: 23 02 b7 00 sb a1, 4(a4) -80000ac0: a3 01 b7 00 sb a1, 3(a4) -80000ac4: 23 01 b7 00 sb a1, 2(a4) -80000ac8: a3 00 b7 00 sb a1, 1(a4) -80000acc: 23 00 b7 00 sb a1, 0(a4) -80000ad0: 67 80 00 00 ret -80000ad4: 93 f5 f5 0f andi a1, a1, 255 -80000ad8: 93 96 85 00 slli a3, a1, 8 -80000adc: b3 e5 d5 00 or a1, a1, a3 -80000ae0: 93 96 05 01 slli a3, a1, 16 -80000ae4: b3 e5 d5 00 or a1, a1, a3 -80000ae8: 6f f0 df f6 j -148 -80000aec: 93 96 27 00 slli a3, a5, 2 -80000af0: 97 02 00 00 auipc t0, 0 -80000af4: b3 86 56 00 add a3, a3, t0 -80000af8: 93 82 00 00 mv t0, ra -80000afc: e7 80 06 fa jalr -96(a3) -80000b00: 93 80 02 00 mv ra, t0 -80000b04: 93 87 07 ff addi a5, a5, -16 -80000b08: 33 07 f7 40 sub a4, a4, a5 -80000b0c: 33 06 f6 00 add a2, a2, a5 -80000b10: e3 78 c3 f6 bgeu t1, a2, -144 -80000b14: 6f f0 df f3 j -196 +8000091c vx_join: +8000091c: 6b 30 00 00 +80000920: 67 80 00 00 ret -80000b18 __register_exitproc: -80000b18: b7 17 00 80 lui a5, 524289 -80000b1c: 03 a7 07 43 lw a4, 1072(a5) -80000b20: 83 27 87 14 lw a5, 328(a4) -80000b24: 63 8c 07 04 beqz a5, 88 -80000b28: 03 a7 47 00 lw a4, 4(a5) -80000b2c: 13 08 f0 01 addi a6, zero, 31 -80000b30: 63 4e e8 06 blt a6, a4, 124 -80000b34: 13 18 27 00 slli a6, a4, 2 -80000b38: 63 06 05 02 beqz a0, 44 -80000b3c: 33 83 07 01 add t1, a5, a6 -80000b40: 23 24 c3 08 sw a2, 136(t1) -80000b44: 83 a8 87 18 lw a7, 392(a5) -80000b48: 13 06 10 00 addi a2, zero, 1 -80000b4c: 33 16 e6 00 sll a2, a2, a4 -80000b50: b3 e8 c8 00 or a7, a7, a2 -80000b54: 23 a4 17 19 sw a7, 392(a5) -80000b58: 23 24 d3 10 sw a3, 264(t1) -80000b5c: 93 06 20 00 addi a3, zero, 2 -80000b60: 63 04 d5 02 beq a0, a3, 40 -80000b64: 13 07 17 00 addi a4, a4, 1 -80000b68: 23 a2 e7 00 sw a4, 4(a5) -80000b6c: b3 87 07 01 add a5, a5, a6 -80000b70: 23 a4 b7 00 sw a1, 8(a5) -80000b74: 13 05 00 00 mv a0, zero -80000b78: 67 80 00 00 ret -80000b7c: 93 07 c7 14 addi a5, a4, 332 -80000b80: 23 24 f7 14 sw a5, 328(a4) -80000b84: 6f f0 5f fa j -92 -80000b88: 83 a6 c7 18 lw a3, 396(a5) -80000b8c: 13 07 17 00 addi a4, a4, 1 -80000b90: 23 a2 e7 00 sw a4, 4(a5) -80000b94: 33 e6 c6 00 or a2, a3, a2 -80000b98: 23 a6 c7 18 sw a2, 396(a5) -80000b9c: b3 87 07 01 add a5, a5, a6 -80000ba0: 23 a4 b7 00 sw a1, 8(a5) -80000ba4: 13 05 00 00 mv a0, zero -80000ba8: 67 80 00 00 ret -80000bac: 13 05 f0 ff addi a0, zero, -1 -80000bb0: 67 80 00 00 ret +80000924 vx_warp_id: +80000924: 73 25 30 cc csrr a0, 3267 +80000928: 67 80 00 00 ret -80000bb4 __call_exitprocs: -80000bb4: 13 01 01 fd addi sp, sp, -48 -80000bb8: b7 17 00 80 lui a5, 524289 -80000bbc: 23 2c 41 01 sw s4, 24(sp) -80000bc0: 03 aa 07 43 lw s4, 1072(a5) -80000bc4: 23 20 21 03 sw s2, 32(sp) -80000bc8: 23 26 11 02 sw ra, 44(sp) -80000bcc: 03 29 8a 14 lw s2, 328(s4) -80000bd0: 23 24 81 02 sw s0, 40(sp) -80000bd4: 23 22 91 02 sw s1, 36(sp) -80000bd8: 23 2e 31 01 sw s3, 28(sp) -80000bdc: 23 2a 51 01 sw s5, 20(sp) -80000be0: 23 28 61 01 sw s6, 16(sp) -80000be4: 23 26 71 01 sw s7, 12(sp) -80000be8: 23 24 81 01 sw s8, 8(sp) -80000bec: 63 00 09 04 beqz s2, 64 -80000bf0: 13 0b 05 00 mv s6, a0 -80000bf4: 93 8b 05 00 mv s7, a1 -80000bf8: 93 0a 10 00 addi s5, zero, 1 -80000bfc: 93 09 f0 ff addi s3, zero, -1 -80000c00: 83 24 49 00 lw s1, 4(s2) -80000c04: 13 84 f4 ff addi s0, s1, -1 -80000c08: 63 42 04 02 bltz s0, 36 -80000c0c: 93 94 24 00 slli s1, s1, 2 -80000c10: b3 04 99 00 add s1, s2, s1 -80000c14: 63 84 0b 04 beqz s7, 72 -80000c18: 83 a7 44 10 lw a5, 260(s1) -80000c1c: 63 80 77 05 beq a5, s7, 64 -80000c20: 13 04 f4 ff addi s0, s0, -1 -80000c24: 93 84 c4 ff addi s1, s1, -4 -80000c28: e3 16 34 ff bne s0, s3, -20 -80000c2c: 83 20 c1 02 lw ra, 44(sp) -80000c30: 03 24 81 02 lw s0, 40(sp) -80000c34: 83 24 41 02 lw s1, 36(sp) -80000c38: 03 29 01 02 lw s2, 32(sp) -80000c3c: 83 29 c1 01 lw s3, 28(sp) -80000c40: 03 2a 81 01 lw s4, 24(sp) -80000c44: 83 2a 41 01 lw s5, 20(sp) -80000c48: 03 2b 01 01 lw s6, 16(sp) -80000c4c: 83 2b c1 00 lw s7, 12(sp) -80000c50: 03 2c 81 00 lw s8, 8(sp) -80000c54: 13 01 01 03 addi sp, sp, 48 -80000c58: 67 80 00 00 ret -80000c5c: 83 27 49 00 lw a5, 4(s2) -80000c60: 83 a6 44 00 lw a3, 4(s1) -80000c64: 93 87 f7 ff addi a5, a5, -1 -80000c68: 63 8e 87 04 beq a5, s0, 92 -80000c6c: 23 a2 04 00 sw zero, 4(s1) -80000c70: e3 88 06 fa beqz a3, -80 -80000c74: 83 27 89 18 lw a5, 392(s2) -80000c78: 33 97 8a 00 sll a4, s5, s0 -80000c7c: 03 2c 49 00 lw s8, 4(s2) -80000c80: b3 77 f7 00 and a5, a4, a5 -80000c84: 63 92 07 02 bnez a5, 36 -80000c88: e7 80 06 00 jalr a3 -80000c8c: 03 27 49 00 lw a4, 4(s2) -80000c90: 83 27 8a 14 lw a5, 328(s4) -80000c94: 63 14 87 01 bne a4, s8, 8 -80000c98: e3 04 f9 f8 beq s2, a5, -120 -80000c9c: e3 88 07 f8 beqz a5, -112 -80000ca0: 13 89 07 00 mv s2, a5 -80000ca4: 6f f0 df f5 j -164 -80000ca8: 83 27 c9 18 lw a5, 396(s2) -80000cac: 83 a5 44 08 lw a1, 132(s1) -80000cb0: 33 77 f7 00 and a4, a4, a5 -80000cb4: 63 1c 07 00 bnez a4, 24 -80000cb8: 13 05 0b 00 mv a0, s6 -80000cbc: e7 80 06 00 jalr a3 -80000cc0: 6f f0 df fc j -52 -80000cc4: 23 22 89 00 sw s0, 4(s2) -80000cc8: 6f f0 9f fa j -88 -80000ccc: 13 85 05 00 mv a0, a1 -80000cd0: e7 80 06 00 jalr a3 -80000cd4: 6f f0 9f fb j -72 +8000092c vx_warp_gid: +8000092c: 73 25 40 f1 csrr a0, mhartid +80000930: 67 80 00 00 ret + +80000934 vx_thread_id: +80000934: 73 25 00 cc csrr a0, 3264 +80000938: 67 80 00 00 ret + +8000093c vx_thread_lid: +8000093c: 73 25 10 cc csrr a0, 3265 +80000940: 67 80 00 00 ret + +80000944 vx_thread_gid: +80000944: 73 25 20 cc csrr a0, 3266 +80000948: 67 80 00 00 ret + +8000094c vx_core_id: +8000094c: 73 25 50 cc csrr a0, 3269 +80000950: 67 80 00 00 ret + +80000954 vx_num_threads: +80000954: 73 25 00 fc csrr a0, 4032 +80000958: 67 80 00 00 ret + +8000095c vx_num_warps: +8000095c: 73 25 10 fc csrr a0, 4033 +80000960: 67 80 00 00 ret + +80000964 vx_num_cores: +80000964: 73 25 20 fc csrr a0, 4034 +80000968: 67 80 00 00 ret + +8000096c vx_num_cycles: +8000096c: 73 25 00 b0 csrr a0, mcycle +80000970: 67 80 00 00 ret + +80000974 vx_num_instrs: +80000974: 73 25 20 b0 csrr a0, minstret +80000978: 67 80 00 00 ret + +8000097c atexit: +8000097c: 93 05 05 00 mv a1, a0 +80000980: 93 06 00 00 mv a3, zero +80000984: 13 06 00 00 mv a2, zero +80000988: 13 05 00 00 mv a0, zero +8000098c: 6f 00 c0 20 j 524 + +80000990 exit: +80000990: 13 01 01 ff addi sp, sp, -16 +80000994: 93 05 00 00 mv a1, zero +80000998: 23 24 81 00 sw s0, 8(sp) +8000099c: 23 26 11 00 sw ra, 12(sp) +800009a0: 13 04 05 00 mv s0, a0 +800009a4: ef 00 00 29 jal 656 +800009a8: b7 17 00 80 lui a5, 524289 +800009ac: 03 a5 07 43 lw a0, 1072(a5) +800009b0: 83 27 c5 03 lw a5, 60(a0) +800009b4: 63 84 07 00 beqz a5, 8 +800009b8: e7 80 07 00 jalr a5 +800009bc: 13 05 04 00 mv a0, s0 +800009c0: ef f0 9f ef jal -264 + +800009c4 __libc_fini_array: +800009c4: 13 01 01 ff addi sp, sp, -16 +800009c8: 23 24 81 00 sw s0, 8(sp) +800009cc: b7 17 00 80 lui a5, 524289 +800009d0: 37 14 00 80 lui s0, 524289 +800009d4: 13 04 44 00 addi s0, s0, 4 +800009d8: 93 87 47 00 addi a5, a5, 4 +800009dc: b3 87 87 40 sub a5, a5, s0 +800009e0: 23 22 91 00 sw s1, 4(sp) +800009e4: 23 26 11 00 sw ra, 12(sp) +800009e8: 93 d4 27 40 srai s1, a5, 2 +800009ec: 63 80 04 02 beqz s1, 32 +800009f0: 93 87 c7 ff addi a5, a5, -4 +800009f4: 33 84 87 00 add s0, a5, s0 +800009f8: 83 27 04 00 lw a5, 0(s0) +800009fc: 93 84 f4 ff addi s1, s1, -1 +80000a00: 13 04 c4 ff addi s0, s0, -4 +80000a04: e7 80 07 00 jalr a5 +80000a08: e3 98 04 fe bnez s1, -16 +80000a0c: 83 20 c1 00 lw ra, 12(sp) +80000a10: 03 24 81 00 lw s0, 8(sp) +80000a14: 83 24 41 00 lw s1, 4(sp) +80000a18: 13 01 01 01 addi sp, sp, 16 +80000a1c: 67 80 00 00 ret + +80000a20 __libc_init_array: +80000a20: 13 01 01 ff addi sp, sp, -16 +80000a24: 23 24 81 00 sw s0, 8(sp) +80000a28: 23 20 21 01 sw s2, 0(sp) +80000a2c: 37 14 00 80 lui s0, 524289 +80000a30: 37 19 00 80 lui s2, 524289 +80000a34: 93 07 04 00 mv a5, s0 +80000a38: 13 09 09 00 mv s2, s2 +80000a3c: 33 09 f9 40 sub s2, s2, a5 +80000a40: 23 26 11 00 sw ra, 12(sp) +80000a44: 23 22 91 00 sw s1, 4(sp) +80000a48: 13 59 29 40 srai s2, s2, 2 +80000a4c: 63 00 09 02 beqz s2, 32 +80000a50: 13 04 04 00 mv s0, s0 +80000a54: 93 04 00 00 mv s1, zero +80000a58: 83 27 04 00 lw a5, 0(s0) +80000a5c: 93 84 14 00 addi s1, s1, 1 +80000a60: 13 04 44 00 addi s0, s0, 4 +80000a64: e7 80 07 00 jalr a5 +80000a68: e3 18 99 fe bne s2, s1, -16 +80000a6c: 37 14 00 80 lui s0, 524289 +80000a70: 37 19 00 80 lui s2, 524289 +80000a74: 93 07 04 00 mv a5, s0 +80000a78: 13 09 49 00 addi s2, s2, 4 +80000a7c: 33 09 f9 40 sub s2, s2, a5 +80000a80: 13 59 29 40 srai s2, s2, 2 +80000a84: 63 00 09 02 beqz s2, 32 +80000a88: 13 04 04 00 mv s0, s0 +80000a8c: 93 04 00 00 mv s1, zero +80000a90: 83 27 04 00 lw a5, 0(s0) +80000a94: 93 84 14 00 addi s1, s1, 1 +80000a98: 13 04 44 00 addi s0, s0, 4 +80000a9c: e7 80 07 00 jalr a5 +80000aa0: e3 18 99 fe bne s2, s1, -16 +80000aa4: 83 20 c1 00 lw ra, 12(sp) +80000aa8: 03 24 81 00 lw s0, 8(sp) +80000aac: 83 24 41 00 lw s1, 4(sp) +80000ab0: 03 29 01 00 lw s2, 0(sp) +80000ab4: 13 01 01 01 addi sp, sp, 16 +80000ab8: 67 80 00 00 ret + +80000abc memset: +80000abc: 13 03 f0 00 addi t1, zero, 15 +80000ac0: 13 07 05 00 mv a4, a0 +80000ac4: 63 7e c3 02 bgeu t1, a2, 60 +80000ac8: 93 77 f7 00 andi a5, a4, 15 +80000acc: 63 90 07 0a bnez a5, 160 +80000ad0: 63 92 05 08 bnez a1, 132 +80000ad4: 93 76 06 ff andi a3, a2, -16 +80000ad8: 13 76 f6 00 andi a2, a2, 15 +80000adc: b3 86 e6 00 add a3, a3, a4 +80000ae0: 23 20 b7 00 sw a1, 0(a4) +80000ae4: 23 22 b7 00 sw a1, 4(a4) +80000ae8: 23 24 b7 00 sw a1, 8(a4) +80000aec: 23 26 b7 00 sw a1, 12(a4) +80000af0: 13 07 07 01 addi a4, a4, 16 +80000af4: e3 66 d7 fe bltu a4, a3, -20 +80000af8: 63 14 06 00 bnez a2, 8 +80000afc: 67 80 00 00 ret +80000b00: b3 06 c3 40 sub a3, t1, a2 +80000b04: 93 96 26 00 slli a3, a3, 2 +80000b08: 97 02 00 00 auipc t0, 0 +80000b0c: b3 86 56 00 add a3, a3, t0 +80000b10: 67 80 c6 00 jr 12(a3) +80000b14: 23 07 b7 00 sb a1, 14(a4) +80000b18: a3 06 b7 00 sb a1, 13(a4) +80000b1c: 23 06 b7 00 sb a1, 12(a4) +80000b20: a3 05 b7 00 sb a1, 11(a4) +80000b24: 23 05 b7 00 sb a1, 10(a4) +80000b28: a3 04 b7 00 sb a1, 9(a4) +80000b2c: 23 04 b7 00 sb a1, 8(a4) +80000b30: a3 03 b7 00 sb a1, 7(a4) +80000b34: 23 03 b7 00 sb a1, 6(a4) +80000b38: a3 02 b7 00 sb a1, 5(a4) +80000b3c: 23 02 b7 00 sb a1, 4(a4) +80000b40: a3 01 b7 00 sb a1, 3(a4) +80000b44: 23 01 b7 00 sb a1, 2(a4) +80000b48: a3 00 b7 00 sb a1, 1(a4) +80000b4c: 23 00 b7 00 sb a1, 0(a4) +80000b50: 67 80 00 00 ret +80000b54: 93 f5 f5 0f andi a1, a1, 255 +80000b58: 93 96 85 00 slli a3, a1, 8 +80000b5c: b3 e5 d5 00 or a1, a1, a3 +80000b60: 93 96 05 01 slli a3, a1, 16 +80000b64: b3 e5 d5 00 or a1, a1, a3 +80000b68: 6f f0 df f6 j -148 +80000b6c: 93 96 27 00 slli a3, a5, 2 +80000b70: 97 02 00 00 auipc t0, 0 +80000b74: b3 86 56 00 add a3, a3, t0 +80000b78: 93 82 00 00 mv t0, ra +80000b7c: e7 80 06 fa jalr -96(a3) +80000b80: 93 80 02 00 mv ra, t0 +80000b84: 93 87 07 ff addi a5, a5, -16 +80000b88: 33 07 f7 40 sub a4, a4, a5 +80000b8c: 33 06 f6 00 add a2, a2, a5 +80000b90: e3 78 c3 f6 bgeu t1, a2, -144 +80000b94: 6f f0 df f3 j -196 + +80000b98 __register_exitproc: +80000b98: b7 17 00 80 lui a5, 524289 +80000b9c: 03 a7 07 43 lw a4, 1072(a5) +80000ba0: 83 27 87 14 lw a5, 328(a4) +80000ba4: 63 8c 07 04 beqz a5, 88 +80000ba8: 03 a7 47 00 lw a4, 4(a5) +80000bac: 13 08 f0 01 addi a6, zero, 31 +80000bb0: 63 4e e8 06 blt a6, a4, 124 +80000bb4: 13 18 27 00 slli a6, a4, 2 +80000bb8: 63 06 05 02 beqz a0, 44 +80000bbc: 33 83 07 01 add t1, a5, a6 +80000bc0: 23 24 c3 08 sw a2, 136(t1) +80000bc4: 83 a8 87 18 lw a7, 392(a5) +80000bc8: 13 06 10 00 addi a2, zero, 1 +80000bcc: 33 16 e6 00 sll a2, a2, a4 +80000bd0: b3 e8 c8 00 or a7, a7, a2 +80000bd4: 23 a4 17 19 sw a7, 392(a5) +80000bd8: 23 24 d3 10 sw a3, 264(t1) +80000bdc: 93 06 20 00 addi a3, zero, 2 +80000be0: 63 04 d5 02 beq a0, a3, 40 +80000be4: 13 07 17 00 addi a4, a4, 1 +80000be8: 23 a2 e7 00 sw a4, 4(a5) +80000bec: b3 87 07 01 add a5, a5, a6 +80000bf0: 23 a4 b7 00 sw a1, 8(a5) +80000bf4: 13 05 00 00 mv a0, zero +80000bf8: 67 80 00 00 ret +80000bfc: 93 07 c7 14 addi a5, a4, 332 +80000c00: 23 24 f7 14 sw a5, 328(a4) +80000c04: 6f f0 5f fa j -92 +80000c08: 83 a6 c7 18 lw a3, 396(a5) +80000c0c: 13 07 17 00 addi a4, a4, 1 +80000c10: 23 a2 e7 00 sw a4, 4(a5) +80000c14: 33 e6 c6 00 or a2, a3, a2 +80000c18: 23 a6 c7 18 sw a2, 396(a5) +80000c1c: b3 87 07 01 add a5, a5, a6 +80000c20: 23 a4 b7 00 sw a1, 8(a5) +80000c24: 13 05 00 00 mv a0, zero +80000c28: 67 80 00 00 ret +80000c2c: 13 05 f0 ff addi a0, zero, -1 +80000c30: 67 80 00 00 ret + +80000c34 __call_exitprocs: +80000c34: 13 01 01 fd addi sp, sp, -48 +80000c38: b7 17 00 80 lui a5, 524289 +80000c3c: 23 2c 41 01 sw s4, 24(sp) +80000c40: 03 aa 07 43 lw s4, 1072(a5) +80000c44: 23 20 21 03 sw s2, 32(sp) +80000c48: 23 26 11 02 sw ra, 44(sp) +80000c4c: 03 29 8a 14 lw s2, 328(s4) +80000c50: 23 24 81 02 sw s0, 40(sp) +80000c54: 23 22 91 02 sw s1, 36(sp) +80000c58: 23 2e 31 01 sw s3, 28(sp) +80000c5c: 23 2a 51 01 sw s5, 20(sp) +80000c60: 23 28 61 01 sw s6, 16(sp) +80000c64: 23 26 71 01 sw s7, 12(sp) +80000c68: 23 24 81 01 sw s8, 8(sp) +80000c6c: 63 00 09 04 beqz s2, 64 +80000c70: 13 0b 05 00 mv s6, a0 +80000c74: 93 8b 05 00 mv s7, a1 +80000c78: 93 0a 10 00 addi s5, zero, 1 +80000c7c: 93 09 f0 ff addi s3, zero, -1 +80000c80: 83 24 49 00 lw s1, 4(s2) +80000c84: 13 84 f4 ff addi s0, s1, -1 +80000c88: 63 42 04 02 bltz s0, 36 +80000c8c: 93 94 24 00 slli s1, s1, 2 +80000c90: b3 04 99 00 add s1, s2, s1 +80000c94: 63 84 0b 04 beqz s7, 72 +80000c98: 83 a7 44 10 lw a5, 260(s1) +80000c9c: 63 80 77 05 beq a5, s7, 64 +80000ca0: 13 04 f4 ff addi s0, s0, -1 +80000ca4: 93 84 c4 ff addi s1, s1, -4 +80000ca8: e3 16 34 ff bne s0, s3, -20 +80000cac: 83 20 c1 02 lw ra, 44(sp) +80000cb0: 03 24 81 02 lw s0, 40(sp) +80000cb4: 83 24 41 02 lw s1, 36(sp) +80000cb8: 03 29 01 02 lw s2, 32(sp) +80000cbc: 83 29 c1 01 lw s3, 28(sp) +80000cc0: 03 2a 81 01 lw s4, 24(sp) +80000cc4: 83 2a 41 01 lw s5, 20(sp) +80000cc8: 03 2b 01 01 lw s6, 16(sp) +80000ccc: 83 2b c1 00 lw s7, 12(sp) +80000cd0: 03 2c 81 00 lw s8, 8(sp) +80000cd4: 13 01 01 03 addi sp, sp, 48 +80000cd8: 67 80 00 00 ret +80000cdc: 83 27 49 00 lw a5, 4(s2) +80000ce0: 83 a6 44 00 lw a3, 4(s1) +80000ce4: 93 87 f7 ff addi a5, a5, -1 +80000ce8: 63 8e 87 04 beq a5, s0, 92 +80000cec: 23 a2 04 00 sw zero, 4(s1) +80000cf0: e3 88 06 fa beqz a3, -80 +80000cf4: 83 27 89 18 lw a5, 392(s2) +80000cf8: 33 97 8a 00 sll a4, s5, s0 +80000cfc: 03 2c 49 00 lw s8, 4(s2) +80000d00: b3 77 f7 00 and a5, a4, a5 +80000d04: 63 92 07 02 bnez a5, 36 +80000d08: e7 80 06 00 jalr a3 +80000d0c: 03 27 49 00 lw a4, 4(s2) +80000d10: 83 27 8a 14 lw a5, 328(s4) +80000d14: 63 14 87 01 bne a4, s8, 8 +80000d18: e3 04 f9 f8 beq s2, a5, -120 +80000d1c: e3 88 07 f8 beqz a5, -112 +80000d20: 13 89 07 00 mv s2, a5 +80000d24: 6f f0 df f5 j -164 +80000d28: 83 27 c9 18 lw a5, 396(s2) +80000d2c: 83 a5 44 08 lw a1, 132(s1) +80000d30: 33 77 f7 00 and a4, a4, a5 +80000d34: 63 1c 07 00 bnez a4, 24 +80000d38: 13 05 0b 00 mv a0, s6 +80000d3c: e7 80 06 00 jalr a3 +80000d40: 6f f0 df fc j -52 +80000d44: 23 22 89 00 sw s0, 4(s2) +80000d48: 6f f0 9f fa j -88 +80000d4c: 13 85 05 00 mv a0, a1 +80000d50: e7 80 06 00 jalr a3 +80000d54: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1071,7 +1103,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: 78 08 + a4: f8 08 a6: 00 80 a8: 00 00 aa: 00 00 @@ -1181,7 +1213,7 @@ Disassembly of section .symtab: 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: cc 08 + 1d4: 4c 09 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1194,7 +1226,7 @@ Disassembly of section .symtab: 1ee: f1 ff 1f0: 1c 01 1f2: 00 00 - 1f4: 7c 08 + 1f4: fc 08 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1202,50 +1234,51 @@ Disassembly of section .symtab: 1fe: 02 00 200: 26 01 202: 00 00 - 204: 68 00 - 206: 00 80 - 208: 4c 01 + 204: 00 04 + 206: 00 00 + 208: 00 00 20a: 00 00 - 20c: 12 00 - 20e: 02 00 - 210: 3c 01 - 212: 00 00 - 214: 00 04 - 216: 00 00 - 218: 00 00 + 20c: 10 00 + 20e: f1 ff + 210: 33 01 00 00 add sp, zero, zero + 214: 40 14 + 216: 00 80 + 218: 40 00 21a: 00 00 - 21c: 10 00 - 21e: f1 ff - 220: 49 01 + 21c: 11 00 + 21e: 06 00 + 220: 41 01 222: 00 00 - 224: 40 14 + 224: 04 09 226: 00 80 - 228: 20 00 + 228: 00 00 22a: 00 00 - 22c: 11 00 - 22e: 06 00 - 230: 57 01 00 00 - 234: 84 08 + 22c: 12 00 + 22e: 02 00 + 230: 48 01 + 232: 00 00 + 234: ec 05 236: 00 80 - 238: 00 00 + 238: 6c 01 23a: 00 00 23c: 12 00 23e: 02 00 - 240: 5e 01 + 240: 65 01 242: 00 00 - 244: 6c 05 + 244: 30 14 246: 00 80 - 248: 6c 01 + 248: 00 00 24a: 00 00 - 24c: 12 00 - 24e: 02 00 - 250: 7b 01 00 00 - 254: 30 14 + 24c: 10 00 + 24e: 05 00 + 250: 75 01 + 252: 00 00 + 254: 68 00 256: 00 80 - 258: 00 00 + 258: 48 01 25a: 00 00 - 25c: 10 00 - 25e: 05 00 + 25c: 12 00 + 25e: 02 00 260: 8b 01 00 00 264: 08 18 266: 00 80 @@ -1255,7 +1288,7 @@ Disassembly of section .symtab: 26e: f1 ff 270: 9c 01 272: 00 00 - 274: 9c 08 + 274: 1c 09 276: 00 80 278: 00 00 27a: 00 00 @@ -1263,7 +1296,7 @@ Disassembly of section .symtab: 27e: 02 00 280: a4 01 282: 00 00 - 284: dc 08 + 284: 5c 09 286: 00 80 288: 00 00 28a: 00 00 @@ -1271,15 +1304,15 @@ Disassembly of section .symtab: 28e: 02 00 290: b1 01 292: 00 00 - 294: 4c 02 + 294: 48 02 296: 00 80 - 298: 88 01 + 298: 0c 02 29a: 00 00 29c: 12 00 29e: 02 00 2a0: be 01 2a2: 00 00 - 2a4: 94 08 + 2a4: 14 09 2a6: 00 80 2a8: 00 00 2aa: 00 00 @@ -1294,7 +1327,7 @@ Disassembly of section .symtab: 2be: 05 00 2c0: da 01 2c2: 00 00 - 2c4: a0 09 + 2c4: 20 0a 2c6: 00 80 2c8: 9c 00 2ca: 00 00 @@ -1302,14 +1335,14 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: ec 01 2d2: 00 00 - 2d4: d4 08 + 2d4: 54 09 2d6: 00 80 2d8: 00 00 2da: 00 00 2dc: 12 00 2de: 02 00 2e0: fb 01 00 00 - 2e4: a4 08 + 2e4: 24 09 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1317,14 +1350,14 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: 06 02 2f2: 00 00 - 2f4: b4 08 + 2f4: 34 09 2f6: 00 80 2f8: 00 00 2fa: 00 00 2fc: 12 00 2fe: 02 00 300: 13 02 00 00 mv tp, zero - 304: 44 09 + 304: c4 09 306: 00 80 308: 5c 00 30a: 00 00 @@ -1340,14 +1373,14 @@ Disassembly of section .symtab: 31e: f1 ff 320: 31 02 322: 00 00 - 324: 40 08 + 324: c0 08 326: 00 80 328: 00 00 32a: 00 00 32c: 12 00 32e: 02 00 330: 3b 02 00 00 - 334: 8c 08 + 334: 0c 09 336: 00 80 338: 00 00 33a: 00 00 @@ -1355,7 +1388,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 46 02 342: 00 00 - 344: b4 0b + 344: 34 0c 346: 00 80 348: 24 01 34a: 00 00 @@ -1370,14 +1403,14 @@ Disassembly of section .symtab: 35c: 12 00 35e: 01 00 360: 57 02 00 00 - 364: 18 0b + 364: 98 0b 366: 00 80 368: 9c 00 36a: 00 00 36c: 12 00 36e: 02 00 370: 6b 02 00 00 - 374: 60 14 + 374: 80 14 376: 00 80 378: 00 00 37a: 00 00 @@ -1391,7 +1424,7 @@ Disassembly of section .symtab: 38c: 10 00 38e: 06 00 390: 83 02 00 00 lb t0, 0(zero) - 394: d8 06 + 394: 58 07 396: 00 80 398: 60 01 39a: 00 00 @@ -1399,7 +1432,7 @@ Disassembly of section .symtab: 39e: 02 00 3a0: a5 02 3a2: 00 00 - 3a4: 3c 0a + 3a4: bc 0a 3a6: 00 80 3a8: dc 00 3aa: 00 00 @@ -1407,7 +1440,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: ac 02 3b2: 00 00 - 3b4: d4 03 + 3b4: 54 04 3b6: 00 80 3b8: 30 00 3ba: 00 00 @@ -1415,14 +1448,14 @@ Disassembly of section .symtab: 3be: 02 00 3c0: b1 02 3c2: 00 00 - 3c4: ec 08 + 3c4: 6c 09 3c6: 00 80 3c8: 00 00 3ca: 00 00 3cc: 12 00 3ce: 02 00 3d0: bf 02 00 00 - 3d4: fc 08 + 3d4: 7c 09 3d6: 00 80 3d8: 14 00 3da: 00 00 @@ -1430,7 +1463,7 @@ Disassembly of section .symtab: 3de: 02 00 3e0: c6 02 3e2: 00 00 - 3e4: c4 08 + 3e4: 44 09 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1438,7 +1471,7 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: d4 02 3f2: 00 00 - 3f4: e4 08 + 3f4: 64 09 3f6: 00 80 3f8: 00 00 3fa: 00 00 @@ -1446,7 +1479,7 @@ Disassembly of section .symtab: 3fe: 02 00 400: e1 02 402: 00 00 - 404: ac 08 + 404: 2c 09 406: 00 80 408: 00 00 40a: 00 00 @@ -1454,70 +1487,69 @@ Disassembly of section .symtab: 40e: 02 00 410: ed 02 412: 00 00 - 414: b4 01 + 414: 08 10 416: 00 80 - 418: 98 00 + 418: 00 00 41a: 00 00 - 41c: 12 00 - 41e: 02 00 - 420: 06 03 + 41c: 10 00 + 41e: 04 00 + 420: fc 02 422: 00 00 - 424: 08 10 + 424: 40 14 426: 00 80 428: 00 00 42a: 00 00 42c: 10 00 - 42e: 04 00 - 430: 15 03 + 42e: 05 00 + 430: 9d 00 432: 00 00 - 434: 40 14 + 434: 80 14 436: 00 80 438: 00 00 43a: 00 00 43c: 10 00 - 43e: 05 00 - 440: 9d 00 + 43e: 06 00 + 440: 45 03 442: 00 00 - 444: 60 14 + 444: 90 09 446: 00 80 - 448: 00 00 + 448: 34 00 44a: 00 00 - 44c: 10 00 - 44e: 06 00 - 450: 3e 03 - 452: 00 00 - 454: 10 09 + 44c: 12 00 + 44e: 02 00 + 450: 03 03 00 00 lb t1, 0(zero) + 454: b0 01 456: 00 80 - 458: 34 00 + 458: 98 00 45a: 00 00 45c: 12 00 45e: 02 00 - 460: 1c 03 - 462: 00 00 - 464: bc 08 + 460: 23 03 00 00 sb zero, 6(zero) + 464: 3c 09 466: 00 80 468: 00 00 46a: 00 00 46c: 12 00 46e: 02 00 - 470: 2a 03 + 470: 31 03 472: 00 00 - 474: 04 04 + 474: 84 04 476: 00 80 478: 68 01 47a: 00 00 47c: 12 00 47e: 02 00 - 480: 3d 03 + 480: 44 03 482: 00 00 - 484: 38 08 + 484: b8 08 486: 00 80 488: 00 00 48a: 00 00 48c: 12 00 48e: 02 00 - 490: 43 03 00 00 fmadd.s ft6, ft0, ft0, ft0, rne - 494: f4 08 + 490: 4a 03 + 492: 00 00 + 494: 74 09 496: 00 80 498: 00 00 49a: 00 00 @@ -1557,13 +1589,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 65 34 - 48: 2d 38 - 4a: 33 2d 65 39 - 4e: 2d 33 - 50: 36 2d - 52: 65 62 - 54: 2e 63 + 46: 64 62 + 48: 2d 33 + 4a: 65 2d + 4c: 33 61 2d 63 + 50: 62 2d + 52: 33 30 2e 63 56: 00 70 58: 61 72 5a: 61 6c @@ -1648,40 +1679,41 @@ Disassembly of section .strtab: 11e: 5f 77 73 70 122: 61 77 124: 6e 00 - 126: 6b 65 72 6e - 12a: 65 6c - 12c: 5f 73 70 61 - 130: 77 6e 5f 72 - 134: 75 6e - 136: 5f 77 61 72 - 13a: 70 00 - 13c: 5f 5f 73 74 - 140: 61 63 - 142: 6b 5f 73 69 - 146: 7a 65 - 148: 00 67 - 14a: 5f 77 73 70 - 14e: 61 77 - 150: 6e 5f - 152: 61 72 - 154: 67 73 00 76 - 158: 78 5f - 15a: 74 6d - 15c: 63 00 5f 70 beq t5, t0, 1792 - 160: 6f 63 6c 5f jal t1, 812534 - 164: 6b 65 72 6e - 168: 65 6c - 16a: 5f 73 67 65 - 16e: 6d 6d - 170: 5f 77 6f 72 - 174: 6b 67 72 6f - 178: 75 70 - 17a: 00 5f - 17c: 5f 53 44 41 - 180: 54 41 - 182: 5f 42 45 47 - 186: 49 4e - 188: 5f 5f 00 5f + 126: 5f 5f 73 74 + 12a: 61 63 + 12c: 6b 5f 73 69 + 130: 7a 65 + 132: 00 67 + 134: 5f 77 73 70 + 138: 61 77 + 13a: 6e 5f + 13c: 61 72 + 13e: 67 73 00 76 + 142: 78 5f + 144: 74 6d + 146: 63 00 5f 70 beq t5, t0, 1792 + 14a: 6f 63 6c 5f jal t1, 812534 + 14e: 6b 65 72 6e + 152: 65 6c + 154: 5f 73 67 65 + 158: 6d 6d + 15a: 5f 77 6f 72 + 15e: 6b 67 72 6f + 162: 75 70 + 164: 00 5f + 166: 5f 53 44 41 + 16a: 54 41 + 16c: 5f 42 45 47 + 170: 49 4e + 172: 5f 5f 00 6b + 176: 65 72 + 178: 6e 65 + 17a: 6c 5f + 17c: 73 70 61 77 csrci 1910, 2 + 180: 6e 5f + 182: 63 61 6c 6c bltu s8, t1, 1730 + 186: 62 61 + 188: 63 6b 00 5f bltu zero, a6, 1526 18c: 5f 67 6c 6f 190: 62 61 192: 6c 5f @@ -1812,48 +1844,49 @@ Disassembly of section .strtab: 2e2: 78 5f 2e4: 77 61 72 70 2e8: 5f 67 69 64 - 2ec: 00 6b - 2ee: 65 72 - 2f0: 6e 65 - 2f2: 6c 5f - 2f4: 73 70 61 77 csrci 1910, 2 - 2f8: 6e 5f - 2fa: 72 75 - 2fc: 6e 5f - 2fe: 74 68 - 300: 72 65 - 302: 61 64 - 304: 73 00 5f 5f - 308: 44 41 - 30a: 54 41 - 30c: 5f 42 45 47 - 310: 49 4e - 312: 5f 5f 00 5f - 316: 65 64 - 318: 61 74 - 31a: 61 00 - 31c: 76 78 - 31e: 5f 74 68 72 - 322: 65 61 - 324: 64 5f - 326: 6c 69 - 328: 64 00 - 32a: 5f 70 6f 63 - 32e: 6c 5f - 330: 6b 65 72 6e - 334: 65 6c - 336: 5f 73 67 65 - 33a: 6d 6d - 33c: 00 5f - 33e: 65 78 - 340: 69 74 - 342: 00 76 - 344: 78 5f - 346: 6e 75 - 348: 6d 5f - 34a: 69 6e - 34c: 73 74 72 73 csrrci s0, 1847, 4 - 350: 00 + 2ec: 00 5f + 2ee: 5f 44 41 54 + 2f2: 41 5f + 2f4: 42 45 + 2f6: 47 49 4e 5f + 2fa: 5f 00 5f 65 + 2fe: 64 61 + 300: 74 61 + 302: 00 6b + 304: 65 72 + 306: 6e 65 + 308: 6c 5f + 30a: 73 70 61 77 csrci 1910, 2 + 30e: 6e 5f + 310: 72 65 + 312: 6d 61 + 314: 69 6e + 316: 69 6e + 318: 67 5f 63 61 + 31c: 6c 6c + 31e: 62 61 + 320: 63 6b 00 76 bltu zero, zero, 1910 + 324: 78 5f + 326: 74 68 + 328: 72 65 + 32a: 61 64 + 32c: 5f 6c 69 64 + 330: 00 5f + 332: 70 6f + 334: 63 6c 5f 6b bltu t5, s5, 1720 + 338: 65 72 + 33a: 6e 65 + 33c: 6c 5f + 33e: 73 67 65 6d csrrsi a4, 1750, 10 + 342: 6d 00 + 344: 5f 65 78 69 + 348: 74 00 + 34a: 76 78 + 34c: 5f 6e 75 6d + 350: 5f 69 6e 73 + 354: 74 72 + 356: 73 + 357: 00 Disassembly of section .shstrtab: diff --git a/benchmarks/opencl/vecadd/kernel.pocl b/benchmarks/opencl/vecadd/kernel.pocl index 15160e74..ad3437b9 100644 Binary files a/benchmarks/opencl/vecadd/kernel.pocl and b/benchmarks/opencl/vecadd/kernel.pocl differ diff --git a/benchmarks/opencl/vecadd/vecadd.dump b/benchmarks/opencl/vecadd/vecadd.dump index 8e67e988..998c987d 100644 --- a/benchmarks/opencl/vecadd/vecadd.dump +++ b/benchmarks/opencl/vecadd/vecadd.dump @@ -1,30 +1,30 @@ -/tmp/pocl_vortex_kernel-48-6e-0d-ed-d9.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-10-e6-f3-c4-7d.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 05 00 00 auipc a1, 0 -80000004: 93 85 c5 5b addi a1, a1, 1468 +80000004: 93 85 c5 63 addi a1, a1, 1596 80000008: 73 25 10 fc csrr a0, 4033 8000000c: 6b 10 b5 00 -80000010: ef 00 c0 5a jal 1452 +80000010: ef 00 c0 62 jal 1580 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 17 25 00 00 auipc a0, 2 -80000020: 13 05 85 e6 addi a0, a0, -408 +80000020: 13 05 85 ee addi a0, a0, -280 80000024: 17 26 00 00 auipc a2, 2 -80000028: 13 06 06 e8 addi a2, a2, -384 +80000028: 13 06 06 f2 addi a2, a2, -224 8000002c: 33 06 a6 40 sub a2, a2, a0 80000030: 93 05 00 00 mv a1, zero -80000034: ef 00 40 78 jal 1924 +80000034: ef 00 50 00 jal 2052 80000038: 17 05 00 00 auipc a0, 0 -8000003c: 13 05 85 68 addi a0, a0, 1672 -80000040: ef 00 80 63 jal 1592 -80000044: ef 00 80 6d jal 1752 -80000048: ef 00 c0 38 jal 908 -8000004c: 6f 00 00 64 j 1600 +8000003c: 13 05 85 70 addi a0, a0, 1800 +80000040: ef 00 80 6b jal 1720 +80000044: ef 00 80 75 jal 1880 +80000048: ef 00 c0 40 jal 1036 +8000004c: 6f 00 00 6c j 1728 Disassembly of section .text: @@ -32,11 +32,11 @@ Disassembly of section .text: 80000050: 93 07 00 00 mv a5, zero 80000054: 63 88 07 00 beqz a5, 16 80000058: 37 05 00 80 lui a0, 524288 -8000005c: 13 05 05 6c addi a0, a0, 1728 -80000060: 6f 00 80 61 j 1560 +8000005c: 13 05 05 74 addi a0, a0, 1856 +80000060: 6f 00 80 69 j 1688 80000064: 67 80 00 00 ret -80000068 kernel_spawn_run_warp: +80000068 kernel_spawn_callback: 80000068: 13 01 01 fd addi sp, sp, -48 8000006c: 23 26 11 02 sw ra, 44(sp) 80000070: 23 24 81 02 sw s0, 40(sp) @@ -48,190 +48,190 @@ Disassembly of section .text: 80000088: 23 28 61 01 sw s6, 16(sp) 8000008c: 23 26 71 01 sw s7, 12(sp) 80000090: 23 24 81 01 sw s8, 8(sp) -80000094: ef 00 c0 5b jal 1468 -80000098: ef 00 80 56 jal 1384 -8000009c: ef 00 c0 5a jal 1452 +80000094: ef 00 c0 63 jal 1596 +80000098: ef 00 80 5e jal 1512 +8000009c: ef 00 c0 62 jal 1580 800000a0: 93 04 05 00 mv s1, a0 -800000a4: ef 00 c0 57 jal 1404 +800000a4: ef 00 c0 5f jal 1532 800000a8: 93 09 05 00 mv s3, a0 -800000ac: ef 00 40 58 jal 1412 +800000ac: ef 00 40 60 jal 1540 800000b0: 13 09 05 00 mv s2, a0 -800000b4: ef 00 c0 59 jal 1436 +800000b4: ef 00 c0 61 jal 1564 800000b8: b7 25 00 80 lui a1, 524290 -800000bc: 93 85 45 e8 addi a1, a1, -380 +800000bc: 93 85 45 f0 addi a1, a1, -252 800000c0: 13 96 24 00 slli a2, s1, 2 800000c4: b3 05 b6 00 add a1, a2, a1 -800000c8: 03 ab 05 00 lw s6, 0(a1) -800000cc: 83 25 4b 01 lw a1, 20(s6) -800000d0: 03 26 0b 01 lw a2, 16(s6) -800000d4: 93 86 05 00 mv a3, a1 -800000d8: 63 c4 35 01 blt a1, s3, 8 -800000dc: 93 86 09 00 mv a3, s3 -800000e0: b3 a5 b9 00 slt a1, s3, a1 -800000e4: 33 07 b6 00 add a4, a2, a1 -800000e8: 93 05 10 00 addi a1, zero, 1 -800000ec: 63 4a b7 08 blt a4, a1, 148 -800000f0: 83 25 0b 00 lw a1, 0(s6) -800000f4: 83 aa 05 00 lw s5, 0(a1) -800000f8: 83 a7 45 00 lw a5, 4(a1) -800000fc: 83 24 cb 00 lw s1, 12(s6) -80000100: 33 8a 57 03 mul s4, a5, s5 -80000104: 13 0c f7 ff addi s8, a4, -1 -80000108: 33 86 c9 02 mul a2, s3, a2 -8000010c: 33 86 c6 00 add a2, a3, a2 -80000110: 33 05 c5 02 mul a0, a0, a2 -80000114: 33 85 a4 00 add a0, s1, a0 -80000118: 33 06 e9 02 mul a2, s2, a4 -8000011c: b3 04 c5 00 add s1, a0, a2 -80000120: 33 09 f0 40 neg s2, a5 -80000124: b3 0b 40 41 neg s7, s4 -80000128: 33 c6 44 03 div a2, s1, s4 +800000c8: 03 ac 05 00 lw s8, 0(a1) +800000cc: 83 26 4c 01 lw a3, 20(s8) +800000d0: 83 25 0c 01 lw a1, 16(s8) +800000d4: 13 86 06 00 mv a2, a3 +800000d8: 63 c4 36 01 blt a3, s3, 8 +800000dc: 13 86 09 00 mv a2, s3 +800000e0: b3 a6 d9 00 slt a3, s3, a3 +800000e4: b3 86 d5 00 add a3, a1, a3 +800000e8: 13 07 10 00 addi a4, zero, 1 +800000ec: 63 c8 e6 08 blt a3, a4, 144 +800000f0: 33 87 35 03 mul a4, a1, s3 +800000f4: 83 25 0c 00 lw a1, 0(s8) +800000f8: 33 06 e6 00 add a2, a2, a4 +800000fc: 03 27 cc 00 lw a4, 12(s8) +80000100: 33 05 a6 02 mul a0, a2, a0 +80000104: 03 aa 05 00 lw s4, 0(a1) +80000108: 03 a6 45 00 lw a2, 4(a1) +8000010c: 33 05 e5 00 add a0, a0, a4 +80000110: 33 87 26 03 mul a4, a3, s2 +80000114: 33 04 e5 00 add s0, a0, a4 +80000118: 33 09 46 03 mul s2, a2, s4 +8000011c: b3 0a d4 00 add s5, s0, a3 +80000120: 33 0b c0 40 neg s6, a2 +80000124: b3 0b 20 41 neg s7, s2 +80000128: 33 46 24 03 div a2, s0, s2 8000012c: 33 85 cb 02 mul a0, s7, a2 -80000130: 33 85 a4 00 add a0, s1, a0 -80000134: b3 46 55 03 div a3, a0, s5 +80000130: 33 05 a4 00 add a0, s0, a0 +80000134: b3 46 45 03 div a3, a0, s4 80000138: 03 a5 c5 00 lw a0, 12(a1) -8000013c: 33 07 c9 02 mul a4, s2, a2 +8000013c: 33 07 cb 02 mul a4, s6, a2 80000140: 33 07 d7 40 sub a4, a4, a3 -80000144: 33 87 ea 02 mul a4, s5, a4 +80000144: 33 07 ea 02 mul a4, s4, a4 80000148: 33 08 e5 00 add a6, a0, a4 8000014c: 03 a7 05 01 lw a4, 16(a1) -80000150: 03 a4 45 01 lw s0, 20(a1) -80000154: 83 27 4b 00 lw a5, 4(s6) -80000158: 03 25 8b 00 lw a0, 8(s6) +80000150: 83 a4 45 01 lw s1, 20(a1) +80000154: 83 27 4c 00 lw a5, 4(s8) +80000158: 03 25 8c 00 lw a0, 8(s8) 8000015c: b3 06 d7 00 add a3, a4, a3 -80000160: 33 07 c4 00 add a4, s0, a2 -80000164: 33 86 04 01 add a2, s1, a6 +80000160: 33 87 c4 00 add a4, s1, a2 +80000164: 33 06 04 01 add a2, s0, a6 80000168: e7 80 07 00 jalr a5 -8000016c: 63 0a 0c 00 beqz s8, 20 -80000170: 83 25 0b 00 lw a1, 0(s6) -80000174: 13 0c fc ff addi s8, s8, -1 -80000178: 93 84 14 00 addi s1, s1, 1 -8000017c: 6f f0 df fa j -84 -80000180: 13 b5 19 00 seqz a0, s3 -80000184: 03 2c 81 00 lw s8, 8(sp) -80000188: 83 2b c1 00 lw s7, 12(sp) -8000018c: 03 2b 01 01 lw s6, 16(sp) -80000190: 83 2a 41 01 lw s5, 20(sp) -80000194: 03 2a 81 01 lw s4, 24(sp) -80000198: 83 29 c1 01 lw s3, 28(sp) -8000019c: 03 29 01 02 lw s2, 32(sp) -800001a0: 83 24 41 02 lw s1, 36(sp) -800001a4: 03 24 81 02 lw s0, 40(sp) -800001a8: 83 20 c1 02 lw ra, 44(sp) -800001ac: 13 01 01 03 addi sp, sp, 48 -800001b0: 6f 00 00 45 j 1104 +8000016c: 13 04 14 00 addi s0, s0, 1 +80000170: 63 56 54 01 bge s0, s5, 12 +80000174: 83 25 0c 00 lw a1, 0(s8) +80000178: 6f f0 1f fb j -80 +8000017c: 13 b5 19 00 seqz a0, s3 +80000180: 03 2c 81 00 lw s8, 8(sp) +80000184: 83 2b c1 00 lw s7, 12(sp) +80000188: 03 2b 01 01 lw s6, 16(sp) +8000018c: 83 2a 41 01 lw s5, 20(sp) +80000190: 03 2a 81 01 lw s4, 24(sp) +80000194: 83 29 c1 01 lw s3, 28(sp) +80000198: 03 29 01 02 lw s2, 32(sp) +8000019c: 83 24 41 02 lw s1, 36(sp) +800001a0: 03 24 81 02 lw s0, 40(sp) +800001a4: 83 20 c1 02 lw ra, 44(sp) +800001a8: 13 01 01 03 addi sp, sp, 48 +800001ac: 6f 00 40 4d j 1236 -800001b4 kernel_spawn_run_threads: -800001b4: 13 01 01 ff addi sp, sp, -16 -800001b8: 23 26 11 00 sw ra, 12(sp) -800001bc: 23 24 81 00 sw s0, 8(sp) -800001c0: ef 00 00 44 jal 1088 -800001c4: ef 00 40 48 jal 1156 -800001c8: 13 04 05 00 mv s0, a0 -800001cc: ef 00 40 47 jal 1140 -800001d0: b7 25 00 80 lui a1, 524290 -800001d4: 93 85 45 e8 addi a1, a1, -380 -800001d8: 13 16 24 00 slli a2, s0, 2 -800001dc: b3 05 b6 00 add a1, a2, a1 -800001e0: 03 a6 05 00 lw a2, 0(a1) -800001e4: 83 25 06 00 lw a1, 0(a2) -800001e8: 83 26 c6 00 lw a3, 12(a2) -800001ec: 03 a7 05 00 lw a4, 0(a1) -800001f0: 83 a7 45 00 lw a5, 4(a1) -800001f4: 33 85 a6 00 add a0, a3, a0 -800001f8: b3 86 e7 02 mul a3, a5, a4 -800001fc: b3 47 d5 02 div a5, a0, a3 -80000200: b3 86 d7 02 mul a3, a5, a3 -80000204: 03 a4 c5 00 lw s0, 12(a1) -80000208: 33 05 d5 40 sub a0, a0, a3 -8000020c: b3 46 e5 02 div a3, a0, a4 -80000210: 33 88 e6 02 mul a6, a3, a4 -80000214: b3 08 a4 00 add a7, s0, a0 -80000218: 03 a7 05 01 lw a4, 16(a1) -8000021c: 03 a4 45 01 lw s0, 20(a1) -80000220: 83 22 46 00 lw t0, 4(a2) -80000224: 03 25 86 00 lw a0, 8(a2) -80000228: 33 86 08 41 sub a2, a7, a6 -8000022c: b3 06 d7 00 add a3, a4, a3 -80000230: 33 07 f4 00 add a4, s0, a5 -80000234: e7 80 02 00 jalr t0 -80000238: 13 05 10 00 addi a0, zero, 1 -8000023c: 03 24 81 00 lw s0, 8(sp) -80000240: 83 20 c1 00 lw ra, 12(sp) -80000244: 13 01 01 01 addi sp, sp, 16 -80000248: 6f 00 80 3b j 952 +800001b0 kernel_spawn_remaining_callback: +800001b0: 13 01 01 ff addi sp, sp, -16 +800001b4: 23 26 11 00 sw ra, 12(sp) +800001b8: 23 24 81 00 sw s0, 8(sp) +800001bc: ef 00 40 4c jal 1220 +800001c0: ef 00 80 50 jal 1288 +800001c4: 13 04 05 00 mv s0, a0 +800001c8: ef 00 80 4f jal 1272 +800001cc: b7 25 00 80 lui a1, 524290 +800001d0: 93 85 45 f0 addi a1, a1, -252 +800001d4: 13 16 24 00 slli a2, s0, 2 +800001d8: b3 05 b6 00 add a1, a2, a1 +800001dc: 03 a6 05 00 lw a2, 0(a1) +800001e0: 83 25 06 00 lw a1, 0(a2) +800001e4: 83 26 c6 00 lw a3, 12(a2) +800001e8: 03 a7 05 00 lw a4, 0(a1) +800001ec: 83 a7 45 00 lw a5, 4(a1) +800001f0: 33 85 a6 00 add a0, a3, a0 +800001f4: b3 86 e7 02 mul a3, a5, a4 +800001f8: b3 47 d5 02 div a5, a0, a3 +800001fc: b3 86 d7 02 mul a3, a5, a3 +80000200: 03 a4 c5 00 lw s0, 12(a1) +80000204: 33 05 d5 40 sub a0, a0, a3 +80000208: b3 46 e5 02 div a3, a0, a4 +8000020c: 33 88 e6 02 mul a6, a3, a4 +80000210: b3 08 a4 00 add a7, s0, a0 +80000214: 03 a7 05 01 lw a4, 16(a1) +80000218: 03 a4 45 01 lw s0, 20(a1) +8000021c: 83 22 46 00 lw t0, 4(a2) +80000220: 03 25 86 00 lw a0, 8(a2) +80000224: 33 86 08 41 sub a2, a7, a6 +80000228: b3 06 d7 00 add a3, a4, a3 +8000022c: 33 07 f4 00 add a4, s0, a5 +80000230: e7 80 02 00 jalr t0 +80000234: 13 05 10 00 addi a0, zero, 1 +80000238: 03 24 81 00 lw s0, 8(sp) +8000023c: 83 20 c1 00 lw ra, 12(sp) +80000240: 13 01 01 01 addi sp, sp, 16 +80000244: 6f 00 c0 43 j 1084 -8000024c kernel_spawn: -8000024c: 13 01 01 fc addi sp, sp, -64 -80000250: 23 2e 11 02 sw ra, 60(sp) -80000254: 23 2c 81 02 sw s0, 56(sp) -80000258: 23 2a 91 02 sw s1, 52(sp) -8000025c: 23 28 21 03 sw s2, 48(sp) -80000260: 23 26 31 03 sw s3, 44(sp) -80000264: 23 24 41 03 sw s4, 40(sp) -80000268: 23 22 51 03 sw s5, 36(sp) -8000026c: 23 20 61 03 sw s6, 32(sp) -80000270: 23 2e 71 01 sw s7, 28(sp) -80000274: 23 2c 81 01 sw s8, 24(sp) -80000278: 93 04 05 00 mv s1, a0 -8000027c: 83 2b 05 00 lw s7, 0(a0) -80000280: 03 24 45 00 lw s0, 4(a0) -80000284: 03 2c 85 00 lw s8, 8(a0) -80000288: 13 09 06 00 mv s2, a2 -8000028c: 93 89 05 00 mv s3, a1 -80000290: ef 00 00 3d jal 976 -80000294: 13 0b 05 00 mv s6, a0 -80000298: ef 00 00 3c jal 960 -8000029c: 13 0a 05 00 mv s4, a0 -800002a0: ef 00 00 3b jal 944 -800002a4: 93 0a 05 00 mv s5, a0 -800002a8: ef 00 00 3a jal 928 -800002ac: 93 05 70 00 addi a1, zero, 7 -800002b0: 63 ca a5 0e blt a1, a0, 244 -800002b4: b3 05 74 03 mul a1, s0, s7 -800002b8: 33 86 85 03 mul a2, a1, s8 -800002bc: b3 85 4a 03 mul a1, s5, s4 -800002c0: 93 06 10 00 addi a3, zero, 1 -800002c4: 63 c8 c5 00 blt a1, a2, 16 -800002c8: 63 da 66 01 bge a3, s6, 20 -800002cc: 63 4c d5 00 blt a0, a3, 24 -800002d0: 6f 00 40 0d j 212 -800002d4: b3 46 b6 02 div a3, a2, a1 -800002d8: e3 ca 66 ff blt a3, s6, -12 -800002dc: 93 06 0b 00 mv a3, s6 -800002e0: 63 52 d5 0c bge a0, a3, 196 -800002e4: 13 07 fb ff addi a4, s6, -1 -800002e8: b3 45 d6 02 div a1, a2, a3 -800002ec: 63 0e e5 00 beq a0, a4, 28 -800002f0: 13 06 00 00 mv a2, zero -800002f4: 33 0b b6 00 add s6, a2, a1 -800002f8: 33 46 5b 03 div a2, s6, s5 -800002fc: 93 06 00 00 mv a3, zero -80000300: 63 50 46 03 bge a2, s4, 32 -80000304: 6f 00 00 02 j 32 -80000308: b3 86 d5 02 mul a3, a1, a3 -8000030c: 33 06 d6 40 sub a2, a2, a3 -80000310: 33 0b b6 00 add s6, a2, a1 -80000314: 33 46 5b 03 div a2, s6, s5 -80000318: 93 06 00 00 mv a3, zero -8000031c: 63 44 46 01 blt a2, s4, 8 -80000320: b3 46 46 03 div a3, a2, s4 -80000324: 13 07 00 00 mv a4, zero -80000328: 93 07 10 00 addi a5, zero, 1 -8000032c: 63 88 06 00 beqz a3, 16 -80000330: 33 87 46 03 mul a4, a3, s4 -80000334: 33 07 e6 40 sub a4, a2, a4 -80000338: 93 87 06 00 mv a5, a3 -8000033c: 33 04 56 03 mul s0, a2, s5 +80000248 kernel_spawn: +80000248: 13 01 01 fc addi sp, sp, -64 +8000024c: 23 2e 11 02 sw ra, 60(sp) +80000250: 23 2c 81 02 sw s0, 56(sp) +80000254: 23 2a 91 02 sw s1, 52(sp) +80000258: 23 28 21 03 sw s2, 48(sp) +8000025c: 23 26 31 03 sw s3, 44(sp) +80000260: 23 24 41 03 sw s4, 40(sp) +80000264: 23 22 51 03 sw s5, 36(sp) +80000268: 23 20 61 03 sw s6, 32(sp) +8000026c: 23 2e 71 01 sw s7, 28(sp) +80000270: 23 2c 81 01 sw s8, 24(sp) +80000274: 93 04 05 00 mv s1, a0 +80000278: 83 2b 05 00 lw s7, 0(a0) +8000027c: 03 24 45 00 lw s0, 4(a0) +80000280: 03 2c 85 00 lw s8, 8(a0) +80000284: 13 09 06 00 mv s2, a2 +80000288: 93 89 05 00 mv s3, a1 +8000028c: ef 00 40 45 jal 1108 +80000290: 13 0b 05 00 mv s6, a0 +80000294: ef 00 40 44 jal 1092 +80000298: 13 0a 05 00 mv s4, a0 +8000029c: ef 00 40 43 jal 1076 +800002a0: 93 0a 05 00 mv s5, a0 +800002a4: ef 00 40 42 jal 1060 +800002a8: 93 05 f0 00 addi a1, zero, 15 +800002ac: 63 cc a5 16 blt a1, a0, 376 +800002b0: b3 05 74 03 mul a1, s0, s7 +800002b4: 33 86 85 03 mul a2, a1, s8 +800002b8: b3 85 4a 03 mul a1, s5, s4 +800002bc: 93 06 10 00 addi a3, zero, 1 +800002c0: 63 c8 c5 00 blt a1, a2, 16 +800002c4: 63 da 66 01 bge a3, s6, 20 +800002c8: 63 4c d5 00 blt a0, a3, 24 +800002cc: 6f 00 80 15 j 344 +800002d0: b3 46 b6 02 div a3, a2, a1 +800002d4: e3 ca 66 ff blt a3, s6, -12 +800002d8: 93 06 0b 00 mv a3, s6 +800002dc: 63 54 d5 14 bge a0, a3, 328 +800002e0: 13 07 fb ff addi a4, s6, -1 +800002e4: b3 45 d6 02 div a1, a2, a3 +800002e8: 63 0e e5 00 beq a0, a4, 28 +800002ec: 13 06 00 00 mv a2, zero +800002f0: b3 06 b6 00 add a3, a2, a1 +800002f4: 33 c6 56 03 div a2, a3, s5 +800002f8: 13 07 00 00 mv a4, zero +800002fc: 63 50 46 03 bge a2, s4, 32 +80000300: 6f 00 00 02 j 32 +80000304: b3 86 d5 02 mul a3, a1, a3 +80000308: 33 06 d6 40 sub a2, a2, a3 +8000030c: b3 06 b6 00 add a3, a2, a1 +80000310: 33 c6 56 03 div a2, a3, s5 +80000314: 13 07 00 00 mv a4, zero +80000318: 63 44 46 01 blt a2, s4, 8 +8000031c: 33 47 46 03 div a4, a2, s4 +80000320: 93 07 00 00 mv a5, zero +80000324: b3 0a 56 03 mul s5, a2, s5 +80000328: 13 04 10 00 addi s0, zero, 1 +8000032c: 63 08 07 00 beqz a4, 16 +80000330: b3 07 47 03 mul a5, a4, s4 +80000334: b3 07 f6 40 sub a5, a2, a5 +80000338: 13 04 07 00 mv s0, a4 +8000033c: 33 8b 56 41 sub s6, a3, s5 80000340: 23 20 91 00 sw s1, 0(sp) 80000344: 23 22 31 01 sw s3, 4(sp) 80000348: 23 24 21 01 sw s2, 8(sp) 8000034c: b3 85 a5 02 mul a1, a1, a0 80000350: 23 26 b1 00 sw a1, 12(sp) -80000354: 23 28 f1 00 sw a5, 16(sp) -80000358: 23 2a e1 00 sw a4, 20(sp) +80000354: 23 28 81 00 sw s0, 16(sp) +80000358: 23 2a f1 00 sw a5, 20(sp) 8000035c: b7 25 00 80 lui a1, 524290 -80000360: 93 85 45 e8 addi a1, a1, -380 +80000360: 93 85 45 f0 addi a1, a1, -252 80000364: 13 15 25 00 slli a0, a0, 2 80000368: 33 05 b5 00 add a0, a0, a1 8000036c: 93 05 01 00 mv a1, sp @@ -243,540 +243,572 @@ Disassembly of section .text: 80000384: 37 05 00 80 lui a0, 524288 80000388: 93 05 85 06 addi a1, a0, 104 8000038c: 13 05 06 00 mv a0, a2 -80000390: ef 00 80 26 jal 616 +80000390: ef 00 80 2e jal 744 80000394: ef f0 5f cd jal -812 -80000398: 63 06 8b 00 beq s6, s0, 12 -8000039c: 23 26 81 00 sw s0, 12(sp) -800003a0: ef f0 9f cc jal -824 -800003a4: 03 2c 81 01 lw s8, 24(sp) -800003a8: 83 2b c1 01 lw s7, 28(sp) -800003ac: 03 2b 01 02 lw s6, 32(sp) -800003b0: 83 2a 41 02 lw s5, 36(sp) -800003b4: 03 2a 81 02 lw s4, 40(sp) -800003b8: 83 29 c1 02 lw s3, 44(sp) -800003bc: 03 29 01 03 lw s2, 48(sp) -800003c0: 83 24 41 03 lw s1, 52(sp) -800003c4: 03 24 81 03 lw s0, 56(sp) -800003c8: 83 20 c1 03 lw ra, 60(sp) -800003cc: 13 01 01 04 addi sp, sp, 64 -800003d0: 67 80 00 00 ret +80000398: 63 06 0b 08 beqz s6, 140 +8000039c: 23 26 51 01 sw s5, 12(sp) +800003a0: 13 05 0b 00 mv a0, s6 +800003a4: ef 00 c0 2d jal 732 +800003a8: ef 00 00 32 jal 800 +800003ac: 13 04 05 00 mv s0, a0 +800003b0: ef 00 00 31 jal 784 +800003b4: b7 25 00 80 lui a1, 524290 +800003b8: 93 85 45 f0 addi a1, a1, -252 +800003bc: 13 16 24 00 slli a2, s0, 2 +800003c0: b3 05 b6 00 add a1, a2, a1 +800003c4: 03 a6 05 00 lw a2, 0(a1) +800003c8: 83 25 06 00 lw a1, 0(a2) +800003cc: 83 26 c6 00 lw a3, 12(a2) +800003d0: 03 a7 05 00 lw a4, 0(a1) +800003d4: 83 a7 45 00 lw a5, 4(a1) +800003d8: 33 85 a6 00 add a0, a3, a0 +800003dc: b3 86 e7 02 mul a3, a5, a4 +800003e0: b3 47 d5 02 div a5, a0, a3 +800003e4: b3 86 d7 02 mul a3, a5, a3 +800003e8: 83 a4 c5 00 lw s1, 12(a1) +800003ec: 33 05 d5 40 sub a0, a0, a3 +800003f0: b3 46 e5 02 div a3, a0, a4 +800003f4: 33 88 e6 02 mul a6, a3, a4 +800003f8: b3 84 a4 00 add s1, s1, a0 +800003fc: 03 a4 05 01 lw s0, 16(a1) +80000400: 03 a7 45 01 lw a4, 20(a1) +80000404: 83 28 46 00 lw a7, 4(a2) +80000408: 03 25 86 00 lw a0, 8(a2) +8000040c: 33 86 04 41 sub a2, s1, a6 +80000410: b3 06 d4 00 add a3, s0, a3 +80000414: 33 07 f7 00 add a4, a4, a5 +80000418: e7 80 08 00 jalr a7 +8000041c: 13 05 10 00 addi a0, zero, 1 +80000420: ef 00 00 26 jal 608 +80000424: 03 2c 81 01 lw s8, 24(sp) +80000428: 83 2b c1 01 lw s7, 28(sp) +8000042c: 03 2b 01 02 lw s6, 32(sp) +80000430: 83 2a 41 02 lw s5, 36(sp) +80000434: 03 2a 81 02 lw s4, 40(sp) +80000438: 83 29 c1 02 lw s3, 44(sp) +8000043c: 03 29 01 03 lw s2, 48(sp) +80000440: 83 24 41 03 lw s1, 52(sp) +80000444: 03 24 81 03 lw s0, 56(sp) +80000448: 83 20 c1 03 lw ra, 60(sp) +8000044c: 13 01 01 04 addi sp, sp, 64 +80000450: 67 80 00 00 ret -800003d4 main: -800003d4: 13 01 01 ff addi sp, sp, -16 -800003d8: 23 26 11 00 sw ra, 12(sp) -800003dc: 37 05 00 80 lui a0, 524288 -800003e0: 93 05 05 4a addi a1, a0, 1184 -800003e4: 37 05 ff 7f lui a0, 524272 -800003e8: 13 06 45 03 addi a2, a0, 52 -800003ec: 37 05 ff 7f lui a0, 524272 -800003f0: ef f0 df e5 jal -420 -800003f4: 13 05 00 00 mv a0, zero -800003f8: 83 20 c1 00 lw ra, 12(sp) -800003fc: 13 01 01 01 addi sp, sp, 16 -80000400: 67 80 00 00 ret +80000454 main: +80000454: 13 01 01 ff addi sp, sp, -16 +80000458: 23 26 11 00 sw ra, 12(sp) +8000045c: 37 05 00 80 lui a0, 524288 +80000460: 93 05 05 52 addi a1, a0, 1312 +80000464: 37 05 ff 7f lui a0, 524272 +80000468: 13 06 45 03 addi a2, a0, 52 +8000046c: 37 05 ff 7f lui a0, 524272 +80000470: ef f0 9f dd jal -552 +80000474: 13 05 00 00 mv a0, zero +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -80000404 _pocl_kernel_vecadd: -80000404: 13 01 01 ff addi sp, sp, -16 -80000408: 23 26 11 00 sw ra, 12(sp) -8000040c: 23 24 81 00 sw s0, 8(sp) -80000410: 13 04 01 01 addi s0, sp, 16 -80000414: 13 71 c1 ff andi sp, sp, -4 -80000418: 83 a7 86 01 lw a5, 24(a3) -8000041c: 83 a8 c6 00 lw a7, 12(a3) -80000420: 13 08 00 00 mv a6, zero -80000424: 33 87 e7 02 mul a4, a5, a4 -80000428: 33 87 e8 00 add a4, a7, a4 -8000042c: 83 a2 c6 01 lw t0, 28(a3) -80000430: 83 a8 06 02 lw a7, 32(a3) -80000434: 93 16 27 00 slli a3, a4, 2 -80000438: 33 03 d6 00 add t1, a2, a3 -8000043c: b3 83 d5 00 add t2, a1, a3 -80000440: 33 0e d5 00 add t3, a0, a3 -80000444: 93 06 00 00 mv a3, zero -80000448: 13 07 00 00 mv a4, zero -8000044c: 13 05 0e 00 mv a0, t3 -80000450: 93 85 03 00 mv a1, t2 -80000454: 13 06 03 00 mv a2, t1 -80000458: 07 20 05 00 flw ft0, 0(a0) -8000045c: 87 a0 05 00 flw ft1, 0(a1) -80000460: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000464: 27 20 06 00 fsw ft0, 0(a2) -80000468: 13 07 17 00 addi a4, a4, 1 -8000046c: 13 06 46 00 addi a2, a2, 4 -80000470: 93 85 45 00 addi a1, a1, 4 -80000474: 13 05 45 00 addi a0, a0, 4 -80000478: e3 60 f7 fe bltu a4, a5, -32 -8000047c: 93 86 16 00 addi a3, a3, 1 -80000480: e3 e4 56 fc bltu a3, t0, -56 -80000484: 13 08 18 00 addi a6, a6, 1 -80000488: e3 6e 18 fb bltu a6, a7, -68 -8000048c: 13 01 04 ff addi sp, s0, -16 -80000490: 03 24 81 00 lw s0, 8(sp) -80000494: 83 20 c1 00 lw ra, 12(sp) -80000498: 13 01 01 01 addi sp, sp, 16 -8000049c: 67 80 00 00 ret +80000484 _pocl_kernel_vecadd: +80000484: 13 01 01 ff addi sp, sp, -16 +80000488: 23 26 11 00 sw ra, 12(sp) +8000048c: 23 24 81 00 sw s0, 8(sp) +80000490: 13 04 01 01 addi s0, sp, 16 +80000494: 13 71 c1 ff andi sp, sp, -4 +80000498: 83 a7 86 01 lw a5, 24(a3) +8000049c: 83 a8 c6 00 lw a7, 12(a3) +800004a0: 13 08 00 00 mv a6, zero +800004a4: 33 87 e7 02 mul a4, a5, a4 +800004a8: 33 87 e8 00 add a4, a7, a4 +800004ac: 83 a2 c6 01 lw t0, 28(a3) +800004b0: 83 a8 06 02 lw a7, 32(a3) +800004b4: 93 16 27 00 slli a3, a4, 2 +800004b8: 33 03 d6 00 add t1, a2, a3 +800004bc: b3 83 d5 00 add t2, a1, a3 +800004c0: 33 0e d5 00 add t3, a0, a3 +800004c4: 93 06 00 00 mv a3, zero +800004c8: 13 07 00 00 mv a4, zero +800004cc: 13 05 0e 00 mv a0, t3 +800004d0: 93 85 03 00 mv a1, t2 +800004d4: 13 06 03 00 mv a2, t1 +800004d8: 07 20 05 00 flw ft0, 0(a0) +800004dc: 87 a0 05 00 flw ft1, 0(a1) +800004e0: 53 70 10 00 fadd.s ft0, ft0, ft1 +800004e4: 27 20 06 00 fsw ft0, 0(a2) +800004e8: 13 07 17 00 addi a4, a4, 1 +800004ec: 13 06 46 00 addi a2, a2, 4 +800004f0: 93 85 45 00 addi a1, a1, 4 +800004f4: 13 05 45 00 addi a0, a0, 4 +800004f8: e3 60 f7 fe bltu a4, a5, -32 +800004fc: 93 86 16 00 addi a3, a3, 1 +80000500: e3 e4 56 fc bltu a3, t0, -56 +80000504: 13 08 18 00 addi a6, a6, 1 +80000508: e3 6e 18 fb bltu a6, a7, -68 +8000050c: 13 01 04 ff addi sp, s0, -16 +80000510: 03 24 81 00 lw s0, 8(sp) +80000514: 83 20 c1 00 lw ra, 12(sp) +80000518: 13 01 01 01 addi sp, sp, 16 +8000051c: 67 80 00 00 ret -800004a0 _pocl_kernel_vecadd_workgroup: -800004a0: 83 26 05 00 lw a3, 0(a0) -800004a4: 93 08 00 00 mv a7, zero -800004a8: 03 ae 06 00 lw t3, 0(a3) -800004ac: 03 27 45 00 lw a4, 4(a0) -800004b0: 83 27 85 00 lw a5, 8(a0) -800004b4: 03 a5 85 01 lw a0, 24(a1) -800004b8: 83 a6 c5 00 lw a3, 12(a1) -800004bc: 03 27 07 00 lw a4, 0(a4) -800004c0: 83 a7 07 00 lw a5, 0(a5) -800004c4: 33 06 c5 02 mul a2, a0, a2 -800004c8: 33 86 c6 00 add a2, a3, a2 -800004cc: 83 a2 c5 01 lw t0, 28(a1) -800004d0: 03 a8 05 02 lw a6, 32(a1) -800004d4: 93 15 26 00 slli a1, a2, 2 -800004d8: 33 83 b7 00 add t1, a5, a1 -800004dc: b3 03 b7 00 add t2, a4, a1 -800004e0: 33 0e be 00 add t3, t3, a1 -800004e4: 93 06 00 00 mv a3, zero -800004e8: 13 06 00 00 mv a2, zero -800004ec: 93 05 0e 00 mv a1, t3 -800004f0: 93 87 03 00 mv a5, t2 -800004f4: 13 07 03 00 mv a4, t1 -800004f8: 07 a0 05 00 flw ft0, 0(a1) -800004fc: 87 a0 07 00 flw ft1, 0(a5) -80000500: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000504: 27 20 07 00 fsw ft0, 0(a4) -80000508: 13 06 16 00 addi a2, a2, 1 -8000050c: 13 07 47 00 addi a4, a4, 4 -80000510: 93 87 47 00 addi a5, a5, 4 -80000514: 93 85 45 00 addi a1, a1, 4 -80000518: e3 60 a6 fe bltu a2, a0, -32 -8000051c: 93 86 16 00 addi a3, a3, 1 -80000520: e3 e4 56 fc bltu a3, t0, -56 -80000524: 93 88 18 00 addi a7, a7, 1 -80000528: e3 ee 08 fb bltu a7, a6, -68 -8000052c: 67 80 00 00 ret +80000520 _pocl_kernel_vecadd_workgroup: +80000520: 83 26 05 00 lw a3, 0(a0) +80000524: 93 08 00 00 mv a7, zero +80000528: 03 ae 06 00 lw t3, 0(a3) +8000052c: 03 27 45 00 lw a4, 4(a0) +80000530: 83 27 85 00 lw a5, 8(a0) +80000534: 03 a5 85 01 lw a0, 24(a1) +80000538: 83 a6 c5 00 lw a3, 12(a1) +8000053c: 03 27 07 00 lw a4, 0(a4) +80000540: 83 a7 07 00 lw a5, 0(a5) +80000544: 33 06 c5 02 mul a2, a0, a2 +80000548: 33 86 c6 00 add a2, a3, a2 +8000054c: 83 a2 c5 01 lw t0, 28(a1) +80000550: 03 a8 05 02 lw a6, 32(a1) +80000554: 93 15 26 00 slli a1, a2, 2 +80000558: 33 83 b7 00 add t1, a5, a1 +8000055c: b3 03 b7 00 add t2, a4, a1 +80000560: 33 0e be 00 add t3, t3, a1 +80000564: 93 06 00 00 mv a3, zero +80000568: 13 06 00 00 mv a2, zero +8000056c: 93 05 0e 00 mv a1, t3 +80000570: 93 87 03 00 mv a5, t2 +80000574: 13 07 03 00 mv a4, t1 +80000578: 07 a0 05 00 flw ft0, 0(a1) +8000057c: 87 a0 07 00 flw ft1, 0(a5) +80000580: 53 70 10 00 fadd.s ft0, ft0, ft1 +80000584: 27 20 07 00 fsw ft0, 0(a4) +80000588: 13 06 16 00 addi a2, a2, 1 +8000058c: 13 07 47 00 addi a4, a4, 4 +80000590: 93 87 47 00 addi a5, a5, 4 +80000594: 93 85 45 00 addi a1, a1, 4 +80000598: e3 60 a6 fe bltu a2, a0, -32 +8000059c: 93 86 16 00 addi a3, a3, 1 +800005a0: e3 e4 56 fc bltu a3, t0, -56 +800005a4: 93 88 18 00 addi a7, a7, 1 +800005a8: e3 ee 08 fb bltu a7, a6, -68 +800005ac: 67 80 00 00 ret -80000530 _pocl_kernel_vecadd_workgroup_fast: -80000530: 93 08 00 00 mv a7, zero -80000534: 03 2e 05 00 lw t3, 0(a0) -80000538: 03 a7 85 01 lw a4, 24(a1) -8000053c: 83 a7 c5 00 lw a5, 12(a1) -80000540: 83 26 45 00 lw a3, 4(a0) -80000544: 03 25 85 00 lw a0, 8(a0) -80000548: 33 06 c7 02 mul a2, a4, a2 -8000054c: 33 86 c7 00 add a2, a5, a2 -80000550: 83 a2 c5 01 lw t0, 28(a1) -80000554: 03 a8 05 02 lw a6, 32(a1) -80000558: 93 15 26 00 slli a1, a2, 2 -8000055c: 33 03 b5 00 add t1, a0, a1 -80000560: b3 83 b6 00 add t2, a3, a1 -80000564: 33 0e be 00 add t3, t3, a1 -80000568: 93 06 00 00 mv a3, zero -8000056c: 13 05 00 00 mv a0, zero -80000570: 93 05 0e 00 mv a1, t3 -80000574: 93 87 03 00 mv a5, t2 -80000578: 13 06 03 00 mv a2, t1 -8000057c: 07 a0 05 00 flw ft0, 0(a1) -80000580: 87 a0 07 00 flw ft1, 0(a5) -80000584: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000588: 27 20 06 00 fsw ft0, 0(a2) -8000058c: 13 05 15 00 addi a0, a0, 1 -80000590: 13 06 46 00 addi a2, a2, 4 -80000594: 93 87 47 00 addi a5, a5, 4 -80000598: 93 85 45 00 addi a1, a1, 4 -8000059c: e3 60 e5 fe bltu a0, a4, -32 -800005a0: 93 86 16 00 addi a3, a3, 1 -800005a4: e3 e4 56 fc bltu a3, t0, -56 -800005a8: 93 88 18 00 addi a7, a7, 1 -800005ac: e3 ee 08 fb bltu a7, a6, -68 -800005b0: 67 80 00 00 ret - -800005b4 _exit: -800005b4: 13 05 00 00 mv a0, zero -800005b8: 6b 00 05 00 - -800005bc vx_set_sp: -800005bc: 73 25 00 fc csrr a0, 4032 -800005c0: 6b 00 05 00 -800005c4: 97 21 00 00 auipc gp, 2 -800005c8: 93 81 41 c9 addi gp, gp, -876 -800005cc: 17 01 00 7f auipc sp, 520192 -800005d0: 13 01 41 a3 addi sp, sp, -1484 -800005d4: 93 05 00 40 addi a1, zero, 1024 -800005d8: 73 26 10 cc csrr a2, 3265 -800005dc: b3 85 c5 02 mul a1, a1, a2 -800005e0: 33 01 b1 40 sub sp, sp, a1 -800005e4: f3 26 30 cc csrr a3, 3267 -800005e8: 63 86 06 00 beqz a3, 12 +800005b0 _pocl_kernel_vecadd_workgroup_fast: +800005b0: 93 08 00 00 mv a7, zero +800005b4: 03 2e 05 00 lw t3, 0(a0) +800005b8: 03 a7 85 01 lw a4, 24(a1) +800005bc: 83 a7 c5 00 lw a5, 12(a1) +800005c0: 83 26 45 00 lw a3, 4(a0) +800005c4: 03 25 85 00 lw a0, 8(a0) +800005c8: 33 06 c7 02 mul a2, a4, a2 +800005cc: 33 86 c7 00 add a2, a5, a2 +800005d0: 83 a2 c5 01 lw t0, 28(a1) +800005d4: 03 a8 05 02 lw a6, 32(a1) +800005d8: 93 15 26 00 slli a1, a2, 2 +800005dc: 33 03 b5 00 add t1, a0, a1 +800005e0: b3 83 b6 00 add t2, a3, a1 +800005e4: 33 0e be 00 add t3, t3, a1 +800005e8: 93 06 00 00 mv a3, zero 800005ec: 13 05 00 00 mv a0, zero -800005f0: 6b 00 05 00 +800005f0: 93 05 0e 00 mv a1, t3 +800005f4: 93 87 03 00 mv a5, t2 +800005f8: 13 06 03 00 mv a2, t1 +800005fc: 07 a0 05 00 flw ft0, 0(a1) +80000600: 87 a0 07 00 flw ft1, 0(a5) +80000604: 53 70 10 00 fadd.s ft0, ft0, ft1 +80000608: 27 20 06 00 fsw ft0, 0(a2) +8000060c: 13 05 15 00 addi a0, a0, 1 +80000610: 13 06 46 00 addi a2, a2, 4 +80000614: 93 87 47 00 addi a5, a5, 4 +80000618: 93 85 45 00 addi a1, a1, 4 +8000061c: e3 60 e5 fe bltu a0, a4, -32 +80000620: 93 86 16 00 addi a3, a3, 1 +80000624: e3 e4 56 fc bltu a3, t0, -56 +80000628: 93 88 18 00 addi a7, a7, 1 +8000062c: e3 ee 08 fb bltu a7, a6, -68 +80000630: 67 80 00 00 ret -800005f4 RETURN: -800005f4: 67 80 00 00 ret +80000634 _exit: +80000634: 13 05 00 00 mv a0, zero +80000638: 6b 00 05 00 -800005f8 vx_wspawn: -800005f8: 6b 10 b5 00 -800005fc: 67 80 00 00 ret +8000063c vx_set_sp: +8000063c: 73 25 00 fc csrr a0, 4032 +80000640: 6b 00 05 00 +80000644: 97 21 00 00 auipc gp, 2 +80000648: 93 81 41 c9 addi gp, gp, -876 +8000064c: 17 01 00 7f auipc sp, 520192 +80000650: 13 01 41 9b addi sp, sp, -1612 +80000654: 93 05 00 40 addi a1, zero, 1024 +80000658: 73 26 10 cc csrr a2, 3265 +8000065c: b3 85 c5 02 mul a1, a1, a2 +80000660: 33 01 b1 40 sub sp, sp, a1 +80000664: f3 26 30 cc csrr a3, 3267 +80000668: 63 86 06 00 beqz a3, 12 +8000066c: 13 05 00 00 mv a0, zero +80000670: 6b 00 05 00 -80000600 vx_tmc: -80000600: 6b 00 05 00 -80000604: 67 80 00 00 ret - -80000608 vx_barrier: -80000608: 6b 40 b5 00 -8000060c: 67 80 00 00 ret - -80000610 vx_split: -80000610: 6b 20 05 00 -80000614: 67 80 00 00 ret - -80000618 vx_join: -80000618: 6b 30 00 00 -8000061c: 67 80 00 00 ret - -80000620 vx_warp_id: -80000620: 73 25 30 cc csrr a0, 3267 -80000624: 67 80 00 00 ret - -80000628 vx_warp_gid: -80000628: 73 25 40 f1 csrr a0, mhartid -8000062c: 67 80 00 00 ret - -80000630 vx_thread_id: -80000630: 73 25 00 cc csrr a0, 3264 -80000634: 67 80 00 00 ret - -80000638 vx_thread_lid: -80000638: 73 25 10 cc csrr a0, 3265 -8000063c: 67 80 00 00 ret - -80000640 vx_thread_gid: -80000640: 73 25 20 cc csrr a0, 3266 -80000644: 67 80 00 00 ret - -80000648 vx_core_id: -80000648: 73 25 50 cc csrr a0, 3269 -8000064c: 67 80 00 00 ret - -80000650 vx_num_threads: -80000650: 73 25 00 fc csrr a0, 4032 -80000654: 67 80 00 00 ret - -80000658 vx_num_warps: -80000658: 73 25 10 fc csrr a0, 4033 -8000065c: 67 80 00 00 ret - -80000660 vx_num_cores: -80000660: 73 25 20 fc csrr a0, 4034 -80000664: 67 80 00 00 ret - -80000668 vx_num_cycles: -80000668: 73 25 00 b0 csrr a0, mcycle -8000066c: 67 80 00 00 ret - -80000670 vx_num_instrs: -80000670: 73 25 20 b0 csrr a0, minstret +80000674 RETURN: 80000674: 67 80 00 00 ret -80000678 atexit: -80000678: 93 05 05 00 mv a1, a0 -8000067c: 93 06 00 00 mv a3, zero -80000680: 13 06 00 00 mv a2, zero -80000684: 13 05 00 00 mv a0, zero -80000688: 6f 00 c0 20 j 524 +80000678 vx_wspawn: +80000678: 6b 10 b5 00 +8000067c: 67 80 00 00 ret -8000068c exit: -8000068c: 13 01 01 ff addi sp, sp, -16 -80000690: 93 05 00 00 mv a1, zero -80000694: 23 24 81 00 sw s0, 8(sp) -80000698: 23 26 11 00 sw ra, 12(sp) -8000069c: 13 04 05 00 mv s0, a0 -800006a0: ef 00 00 29 jal 656 -800006a4: b7 27 00 80 lui a5, 524290 -800006a8: 03 a5 07 e8 lw a0, -384(a5) -800006ac: 83 27 c5 03 lw a5, 60(a0) -800006b0: 63 84 07 00 beqz a5, 8 -800006b4: e7 80 07 00 jalr a5 -800006b8: 13 05 04 00 mv a0, s0 -800006bc: ef f0 9f ef jal -264 +80000680 vx_tmc: +80000680: 6b 00 05 00 +80000684: 67 80 00 00 ret -800006c0 __libc_fini_array: -800006c0: 13 01 01 ff addi sp, sp, -16 -800006c4: 23 24 81 00 sw s0, 8(sp) -800006c8: b7 27 00 80 lui a5, 524290 -800006cc: 37 24 00 80 lui s0, 524290 -800006d0: 13 04 84 a5 addi s0, s0, -1448 -800006d4: 93 87 87 a5 addi a5, a5, -1448 -800006d8: b3 87 87 40 sub a5, a5, s0 -800006dc: 23 22 91 00 sw s1, 4(sp) -800006e0: 23 26 11 00 sw ra, 12(sp) -800006e4: 93 d4 27 40 srai s1, a5, 2 -800006e8: 63 80 04 02 beqz s1, 32 -800006ec: 93 87 c7 ff addi a5, a5, -4 -800006f0: 33 84 87 00 add s0, a5, s0 -800006f4: 83 27 04 00 lw a5, 0(s0) -800006f8: 93 84 f4 ff addi s1, s1, -1 -800006fc: 13 04 c4 ff addi s0, s0, -4 -80000700: e7 80 07 00 jalr a5 -80000704: e3 98 04 fe bnez s1, -16 -80000708: 83 20 c1 00 lw ra, 12(sp) -8000070c: 03 24 81 00 lw s0, 8(sp) -80000710: 83 24 41 00 lw s1, 4(sp) -80000714: 13 01 01 01 addi sp, sp, 16 -80000718: 67 80 00 00 ret +80000688 vx_barrier: +80000688: 6b 40 b5 00 +8000068c: 67 80 00 00 ret -8000071c __libc_init_array: -8000071c: 13 01 01 ff addi sp, sp, -16 -80000720: 23 24 81 00 sw s0, 8(sp) -80000724: 23 20 21 01 sw s2, 0(sp) -80000728: 37 24 00 80 lui s0, 524290 -8000072c: 37 29 00 80 lui s2, 524290 -80000730: 93 07 44 a5 addi a5, s0, -1452 -80000734: 13 09 49 a5 addi s2, s2, -1452 -80000738: 33 09 f9 40 sub s2, s2, a5 -8000073c: 23 26 11 00 sw ra, 12(sp) -80000740: 23 22 91 00 sw s1, 4(sp) -80000744: 13 59 29 40 srai s2, s2, 2 -80000748: 63 00 09 02 beqz s2, 32 -8000074c: 13 04 44 a5 addi s0, s0, -1452 -80000750: 93 04 00 00 mv s1, zero -80000754: 83 27 04 00 lw a5, 0(s0) -80000758: 93 84 14 00 addi s1, s1, 1 -8000075c: 13 04 44 00 addi s0, s0, 4 -80000760: e7 80 07 00 jalr a5 -80000764: e3 18 99 fe bne s2, s1, -16 -80000768: 37 24 00 80 lui s0, 524290 -8000076c: 37 29 00 80 lui s2, 524290 -80000770: 93 07 44 a5 addi a5, s0, -1452 -80000774: 13 09 89 a5 addi s2, s2, -1448 -80000778: 33 09 f9 40 sub s2, s2, a5 -8000077c: 13 59 29 40 srai s2, s2, 2 -80000780: 63 00 09 02 beqz s2, 32 -80000784: 13 04 44 a5 addi s0, s0, -1452 -80000788: 93 04 00 00 mv s1, zero -8000078c: 83 27 04 00 lw a5, 0(s0) -80000790: 93 84 14 00 addi s1, s1, 1 -80000794: 13 04 44 00 addi s0, s0, 4 -80000798: e7 80 07 00 jalr a5 -8000079c: e3 18 99 fe bne s2, s1, -16 -800007a0: 83 20 c1 00 lw ra, 12(sp) -800007a4: 03 24 81 00 lw s0, 8(sp) -800007a8: 83 24 41 00 lw s1, 4(sp) -800007ac: 03 29 01 00 lw s2, 0(sp) -800007b0: 13 01 01 01 addi sp, sp, 16 -800007b4: 67 80 00 00 ret +80000690 vx_split: +80000690: 6b 20 05 00 +80000694: 67 80 00 00 ret -800007b8 memset: -800007b8: 13 03 f0 00 addi t1, zero, 15 -800007bc: 13 07 05 00 mv a4, a0 -800007c0: 63 7e c3 02 bgeu t1, a2, 60 -800007c4: 93 77 f7 00 andi a5, a4, 15 -800007c8: 63 90 07 0a bnez a5, 160 -800007cc: 63 92 05 08 bnez a1, 132 -800007d0: 93 76 06 ff andi a3, a2, -16 -800007d4: 13 76 f6 00 andi a2, a2, 15 -800007d8: b3 86 e6 00 add a3, a3, a4 -800007dc: 23 20 b7 00 sw a1, 0(a4) -800007e0: 23 22 b7 00 sw a1, 4(a4) -800007e4: 23 24 b7 00 sw a1, 8(a4) -800007e8: 23 26 b7 00 sw a1, 12(a4) -800007ec: 13 07 07 01 addi a4, a4, 16 -800007f0: e3 66 d7 fe bltu a4, a3, -20 -800007f4: 63 14 06 00 bnez a2, 8 -800007f8: 67 80 00 00 ret -800007fc: b3 06 c3 40 sub a3, t1, a2 -80000800: 93 96 26 00 slli a3, a3, 2 -80000804: 97 02 00 00 auipc t0, 0 -80000808: b3 86 56 00 add a3, a3, t0 -8000080c: 67 80 c6 00 jr 12(a3) -80000810: 23 07 b7 00 sb a1, 14(a4) -80000814: a3 06 b7 00 sb a1, 13(a4) -80000818: 23 06 b7 00 sb a1, 12(a4) -8000081c: a3 05 b7 00 sb a1, 11(a4) -80000820: 23 05 b7 00 sb a1, 10(a4) -80000824: a3 04 b7 00 sb a1, 9(a4) -80000828: 23 04 b7 00 sb a1, 8(a4) -8000082c: a3 03 b7 00 sb a1, 7(a4) -80000830: 23 03 b7 00 sb a1, 6(a4) -80000834: a3 02 b7 00 sb a1, 5(a4) -80000838: 23 02 b7 00 sb a1, 4(a4) -8000083c: a3 01 b7 00 sb a1, 3(a4) -80000840: 23 01 b7 00 sb a1, 2(a4) -80000844: a3 00 b7 00 sb a1, 1(a4) -80000848: 23 00 b7 00 sb a1, 0(a4) -8000084c: 67 80 00 00 ret -80000850: 93 f5 f5 0f andi a1, a1, 255 -80000854: 93 96 85 00 slli a3, a1, 8 -80000858: b3 e5 d5 00 or a1, a1, a3 -8000085c: 93 96 05 01 slli a3, a1, 16 -80000860: b3 e5 d5 00 or a1, a1, a3 -80000864: 6f f0 df f6 j -148 -80000868: 93 96 27 00 slli a3, a5, 2 -8000086c: 97 02 00 00 auipc t0, 0 -80000870: b3 86 56 00 add a3, a3, t0 -80000874: 93 82 00 00 mv t0, ra -80000878: e7 80 06 fa jalr -96(a3) -8000087c: 93 80 02 00 mv ra, t0 -80000880: 93 87 07 ff addi a5, a5, -16 -80000884: 33 07 f7 40 sub a4, a4, a5 -80000888: 33 06 f6 00 add a2, a2, a5 -8000088c: e3 78 c3 f6 bgeu t1, a2, -144 -80000890: 6f f0 df f3 j -196 +80000698 vx_join: +80000698: 6b 30 00 00 +8000069c: 67 80 00 00 ret -80000894 __register_exitproc: -80000894: b7 27 00 80 lui a5, 524290 -80000898: 03 a7 07 e8 lw a4, -384(a5) -8000089c: 83 27 87 14 lw a5, 328(a4) -800008a0: 63 8c 07 04 beqz a5, 88 -800008a4: 03 a7 47 00 lw a4, 4(a5) -800008a8: 13 08 f0 01 addi a6, zero, 31 -800008ac: 63 4e e8 06 blt a6, a4, 124 -800008b0: 13 18 27 00 slli a6, a4, 2 -800008b4: 63 06 05 02 beqz a0, 44 -800008b8: 33 83 07 01 add t1, a5, a6 -800008bc: 23 24 c3 08 sw a2, 136(t1) -800008c0: 83 a8 87 18 lw a7, 392(a5) -800008c4: 13 06 10 00 addi a2, zero, 1 -800008c8: 33 16 e6 00 sll a2, a2, a4 -800008cc: b3 e8 c8 00 or a7, a7, a2 -800008d0: 23 a4 17 19 sw a7, 392(a5) -800008d4: 23 24 d3 10 sw a3, 264(t1) -800008d8: 93 06 20 00 addi a3, zero, 2 -800008dc: 63 04 d5 02 beq a0, a3, 40 -800008e0: 13 07 17 00 addi a4, a4, 1 -800008e4: 23 a2 e7 00 sw a4, 4(a5) -800008e8: b3 87 07 01 add a5, a5, a6 -800008ec: 23 a4 b7 00 sw a1, 8(a5) -800008f0: 13 05 00 00 mv a0, zero -800008f4: 67 80 00 00 ret -800008f8: 93 07 c7 14 addi a5, a4, 332 -800008fc: 23 24 f7 14 sw a5, 328(a4) -80000900: 6f f0 5f fa j -92 -80000904: 83 a6 c7 18 lw a3, 396(a5) -80000908: 13 07 17 00 addi a4, a4, 1 -8000090c: 23 a2 e7 00 sw a4, 4(a5) -80000910: 33 e6 c6 00 or a2, a3, a2 -80000914: 23 a6 c7 18 sw a2, 396(a5) -80000918: b3 87 07 01 add a5, a5, a6 -8000091c: 23 a4 b7 00 sw a1, 8(a5) -80000920: 13 05 00 00 mv a0, zero -80000924: 67 80 00 00 ret -80000928: 13 05 f0 ff addi a0, zero, -1 -8000092c: 67 80 00 00 ret +800006a0 vx_warp_id: +800006a0: 73 25 30 cc csrr a0, 3267 +800006a4: 67 80 00 00 ret -80000930 __call_exitprocs: -80000930: 13 01 01 fd addi sp, sp, -48 -80000934: b7 27 00 80 lui a5, 524290 -80000938: 23 2c 41 01 sw s4, 24(sp) -8000093c: 03 aa 07 e8 lw s4, -384(a5) -80000940: 23 20 21 03 sw s2, 32(sp) -80000944: 23 26 11 02 sw ra, 44(sp) -80000948: 03 29 8a 14 lw s2, 328(s4) -8000094c: 23 24 81 02 sw s0, 40(sp) -80000950: 23 22 91 02 sw s1, 36(sp) -80000954: 23 2e 31 01 sw s3, 28(sp) -80000958: 23 2a 51 01 sw s5, 20(sp) -8000095c: 23 28 61 01 sw s6, 16(sp) -80000960: 23 26 71 01 sw s7, 12(sp) -80000964: 23 24 81 01 sw s8, 8(sp) -80000968: 63 00 09 04 beqz s2, 64 -8000096c: 13 0b 05 00 mv s6, a0 -80000970: 93 8b 05 00 mv s7, a1 -80000974: 93 0a 10 00 addi s5, zero, 1 -80000978: 93 09 f0 ff addi s3, zero, -1 -8000097c: 83 24 49 00 lw s1, 4(s2) -80000980: 13 84 f4 ff addi s0, s1, -1 -80000984: 63 42 04 02 bltz s0, 36 -80000988: 93 94 24 00 slli s1, s1, 2 -8000098c: b3 04 99 00 add s1, s2, s1 -80000990: 63 84 0b 04 beqz s7, 72 -80000994: 83 a7 44 10 lw a5, 260(s1) -80000998: 63 80 77 05 beq a5, s7, 64 -8000099c: 13 04 f4 ff addi s0, s0, -1 -800009a0: 93 84 c4 ff addi s1, s1, -4 -800009a4: e3 16 34 ff bne s0, s3, -20 -800009a8: 83 20 c1 02 lw ra, 44(sp) -800009ac: 03 24 81 02 lw s0, 40(sp) -800009b0: 83 24 41 02 lw s1, 36(sp) -800009b4: 03 29 01 02 lw s2, 32(sp) -800009b8: 83 29 c1 01 lw s3, 28(sp) -800009bc: 03 2a 81 01 lw s4, 24(sp) -800009c0: 83 2a 41 01 lw s5, 20(sp) -800009c4: 03 2b 01 01 lw s6, 16(sp) -800009c8: 83 2b c1 00 lw s7, 12(sp) -800009cc: 03 2c 81 00 lw s8, 8(sp) -800009d0: 13 01 01 03 addi sp, sp, 48 -800009d4: 67 80 00 00 ret -800009d8: 83 27 49 00 lw a5, 4(s2) -800009dc: 83 a6 44 00 lw a3, 4(s1) -800009e0: 93 87 f7 ff addi a5, a5, -1 -800009e4: 63 8e 87 04 beq a5, s0, 92 -800009e8: 23 a2 04 00 sw zero, 4(s1) -800009ec: e3 88 06 fa beqz a3, -80 -800009f0: 83 27 89 18 lw a5, 392(s2) -800009f4: 33 97 8a 00 sll a4, s5, s0 -800009f8: 03 2c 49 00 lw s8, 4(s2) -800009fc: b3 77 f7 00 and a5, a4, a5 -80000a00: 63 92 07 02 bnez a5, 36 -80000a04: e7 80 06 00 jalr a3 -80000a08: 03 27 49 00 lw a4, 4(s2) -80000a0c: 83 27 8a 14 lw a5, 328(s4) -80000a10: 63 14 87 01 bne a4, s8, 8 -80000a14: e3 04 f9 f8 beq s2, a5, -120 -80000a18: e3 88 07 f8 beqz a5, -112 -80000a1c: 13 89 07 00 mv s2, a5 -80000a20: 6f f0 df f5 j -164 -80000a24: 83 27 c9 18 lw a5, 396(s2) -80000a28: 83 a5 44 08 lw a1, 132(s1) -80000a2c: 33 77 f7 00 and a4, a4, a5 -80000a30: 63 1c 07 00 bnez a4, 24 -80000a34: 13 05 0b 00 mv a0, s6 -80000a38: e7 80 06 00 jalr a3 -80000a3c: 6f f0 df fc j -52 -80000a40: 23 22 89 00 sw s0, 4(s2) -80000a44: 6f f0 9f fa j -88 -80000a48: 13 85 05 00 mv a0, a1 -80000a4c: e7 80 06 00 jalr a3 -80000a50: 6f f0 9f fb j -72 +800006a8 vx_warp_gid: +800006a8: 73 25 40 f1 csrr a0, mhartid +800006ac: 67 80 00 00 ret + +800006b0 vx_thread_id: +800006b0: 73 25 00 cc csrr a0, 3264 +800006b4: 67 80 00 00 ret + +800006b8 vx_thread_lid: +800006b8: 73 25 10 cc csrr a0, 3265 +800006bc: 67 80 00 00 ret + +800006c0 vx_thread_gid: +800006c0: 73 25 20 cc csrr a0, 3266 +800006c4: 67 80 00 00 ret + +800006c8 vx_core_id: +800006c8: 73 25 50 cc csrr a0, 3269 +800006cc: 67 80 00 00 ret + +800006d0 vx_num_threads: +800006d0: 73 25 00 fc csrr a0, 4032 +800006d4: 67 80 00 00 ret + +800006d8 vx_num_warps: +800006d8: 73 25 10 fc csrr a0, 4033 +800006dc: 67 80 00 00 ret + +800006e0 vx_num_cores: +800006e0: 73 25 20 fc csrr a0, 4034 +800006e4: 67 80 00 00 ret + +800006e8 vx_num_cycles: +800006e8: 73 25 00 b0 csrr a0, mcycle +800006ec: 67 80 00 00 ret + +800006f0 vx_num_instrs: +800006f0: 73 25 20 b0 csrr a0, minstret +800006f4: 67 80 00 00 ret + +800006f8 atexit: +800006f8: 93 05 05 00 mv a1, a0 +800006fc: 93 06 00 00 mv a3, zero +80000700: 13 06 00 00 mv a2, zero +80000704: 13 05 00 00 mv a0, zero +80000708: 6f 00 c0 20 j 524 + +8000070c exit: +8000070c: 13 01 01 ff addi sp, sp, -16 +80000710: 93 05 00 00 mv a1, zero +80000714: 23 24 81 00 sw s0, 8(sp) +80000718: 23 26 11 00 sw ra, 12(sp) +8000071c: 13 04 05 00 mv s0, a0 +80000720: ef 00 00 29 jal 656 +80000724: b7 27 00 80 lui a5, 524290 +80000728: 03 a5 07 f0 lw a0, -256(a5) +8000072c: 83 27 c5 03 lw a5, 60(a0) +80000730: 63 84 07 00 beqz a5, 8 +80000734: e7 80 07 00 jalr a5 +80000738: 13 05 04 00 mv a0, s0 +8000073c: ef f0 9f ef jal -264 + +80000740 __libc_fini_array: +80000740: 13 01 01 ff addi sp, sp, -16 +80000744: 23 24 81 00 sw s0, 8(sp) +80000748: b7 27 00 80 lui a5, 524290 +8000074c: 37 24 00 80 lui s0, 524290 +80000750: 13 04 84 ad addi s0, s0, -1320 +80000754: 93 87 87 ad addi a5, a5, -1320 +80000758: b3 87 87 40 sub a5, a5, s0 +8000075c: 23 22 91 00 sw s1, 4(sp) +80000760: 23 26 11 00 sw ra, 12(sp) +80000764: 93 d4 27 40 srai s1, a5, 2 +80000768: 63 80 04 02 beqz s1, 32 +8000076c: 93 87 c7 ff addi a5, a5, -4 +80000770: 33 84 87 00 add s0, a5, s0 +80000774: 83 27 04 00 lw a5, 0(s0) +80000778: 93 84 f4 ff addi s1, s1, -1 +8000077c: 13 04 c4 ff addi s0, s0, -4 +80000780: e7 80 07 00 jalr a5 +80000784: e3 98 04 fe bnez s1, -16 +80000788: 83 20 c1 00 lw ra, 12(sp) +8000078c: 03 24 81 00 lw s0, 8(sp) +80000790: 83 24 41 00 lw s1, 4(sp) +80000794: 13 01 01 01 addi sp, sp, 16 +80000798: 67 80 00 00 ret + +8000079c __libc_init_array: +8000079c: 13 01 01 ff addi sp, sp, -16 +800007a0: 23 24 81 00 sw s0, 8(sp) +800007a4: 23 20 21 01 sw s2, 0(sp) +800007a8: 37 24 00 80 lui s0, 524290 +800007ac: 37 29 00 80 lui s2, 524290 +800007b0: 93 07 44 ad addi a5, s0, -1324 +800007b4: 13 09 49 ad addi s2, s2, -1324 +800007b8: 33 09 f9 40 sub s2, s2, a5 +800007bc: 23 26 11 00 sw ra, 12(sp) +800007c0: 23 22 91 00 sw s1, 4(sp) +800007c4: 13 59 29 40 srai s2, s2, 2 +800007c8: 63 00 09 02 beqz s2, 32 +800007cc: 13 04 44 ad addi s0, s0, -1324 +800007d0: 93 04 00 00 mv s1, zero +800007d4: 83 27 04 00 lw a5, 0(s0) +800007d8: 93 84 14 00 addi s1, s1, 1 +800007dc: 13 04 44 00 addi s0, s0, 4 +800007e0: e7 80 07 00 jalr a5 +800007e4: e3 18 99 fe bne s2, s1, -16 +800007e8: 37 24 00 80 lui s0, 524290 +800007ec: 37 29 00 80 lui s2, 524290 +800007f0: 93 07 44 ad addi a5, s0, -1324 +800007f4: 13 09 89 ad addi s2, s2, -1320 +800007f8: 33 09 f9 40 sub s2, s2, a5 +800007fc: 13 59 29 40 srai s2, s2, 2 +80000800: 63 00 09 02 beqz s2, 32 +80000804: 13 04 44 ad addi s0, s0, -1324 +80000808: 93 04 00 00 mv s1, zero +8000080c: 83 27 04 00 lw a5, 0(s0) +80000810: 93 84 14 00 addi s1, s1, 1 +80000814: 13 04 44 00 addi s0, s0, 4 +80000818: e7 80 07 00 jalr a5 +8000081c: e3 18 99 fe bne s2, s1, -16 +80000820: 83 20 c1 00 lw ra, 12(sp) +80000824: 03 24 81 00 lw s0, 8(sp) +80000828: 83 24 41 00 lw s1, 4(sp) +8000082c: 03 29 01 00 lw s2, 0(sp) +80000830: 13 01 01 01 addi sp, sp, 16 +80000834: 67 80 00 00 ret + +80000838 memset: +80000838: 13 03 f0 00 addi t1, zero, 15 +8000083c: 13 07 05 00 mv a4, a0 +80000840: 63 7e c3 02 bgeu t1, a2, 60 +80000844: 93 77 f7 00 andi a5, a4, 15 +80000848: 63 90 07 0a bnez a5, 160 +8000084c: 63 92 05 08 bnez a1, 132 +80000850: 93 76 06 ff andi a3, a2, -16 +80000854: 13 76 f6 00 andi a2, a2, 15 +80000858: b3 86 e6 00 add a3, a3, a4 +8000085c: 23 20 b7 00 sw a1, 0(a4) +80000860: 23 22 b7 00 sw a1, 4(a4) +80000864: 23 24 b7 00 sw a1, 8(a4) +80000868: 23 26 b7 00 sw a1, 12(a4) +8000086c: 13 07 07 01 addi a4, a4, 16 +80000870: e3 66 d7 fe bltu a4, a3, -20 +80000874: 63 14 06 00 bnez a2, 8 +80000878: 67 80 00 00 ret +8000087c: b3 06 c3 40 sub a3, t1, a2 +80000880: 93 96 26 00 slli a3, a3, 2 +80000884: 97 02 00 00 auipc t0, 0 +80000888: b3 86 56 00 add a3, a3, t0 +8000088c: 67 80 c6 00 jr 12(a3) +80000890: 23 07 b7 00 sb a1, 14(a4) +80000894: a3 06 b7 00 sb a1, 13(a4) +80000898: 23 06 b7 00 sb a1, 12(a4) +8000089c: a3 05 b7 00 sb a1, 11(a4) +800008a0: 23 05 b7 00 sb a1, 10(a4) +800008a4: a3 04 b7 00 sb a1, 9(a4) +800008a8: 23 04 b7 00 sb a1, 8(a4) +800008ac: a3 03 b7 00 sb a1, 7(a4) +800008b0: 23 03 b7 00 sb a1, 6(a4) +800008b4: a3 02 b7 00 sb a1, 5(a4) +800008b8: 23 02 b7 00 sb a1, 4(a4) +800008bc: a3 01 b7 00 sb a1, 3(a4) +800008c0: 23 01 b7 00 sb a1, 2(a4) +800008c4: a3 00 b7 00 sb a1, 1(a4) +800008c8: 23 00 b7 00 sb a1, 0(a4) +800008cc: 67 80 00 00 ret +800008d0: 93 f5 f5 0f andi a1, a1, 255 +800008d4: 93 96 85 00 slli a3, a1, 8 +800008d8: b3 e5 d5 00 or a1, a1, a3 +800008dc: 93 96 05 01 slli a3, a1, 16 +800008e0: b3 e5 d5 00 or a1, a1, a3 +800008e4: 6f f0 df f6 j -148 +800008e8: 93 96 27 00 slli a3, a5, 2 +800008ec: 97 02 00 00 auipc t0, 0 +800008f0: b3 86 56 00 add a3, a3, t0 +800008f4: 93 82 00 00 mv t0, ra +800008f8: e7 80 06 fa jalr -96(a3) +800008fc: 93 80 02 00 mv ra, t0 +80000900: 93 87 07 ff addi a5, a5, -16 +80000904: 33 07 f7 40 sub a4, a4, a5 +80000908: 33 06 f6 00 add a2, a2, a5 +8000090c: e3 78 c3 f6 bgeu t1, a2, -144 +80000910: 6f f0 df f3 j -196 + +80000914 __register_exitproc: +80000914: b7 27 00 80 lui a5, 524290 +80000918: 03 a7 07 f0 lw a4, -256(a5) +8000091c: 83 27 87 14 lw a5, 328(a4) +80000920: 63 8c 07 04 beqz a5, 88 +80000924: 03 a7 47 00 lw a4, 4(a5) +80000928: 13 08 f0 01 addi a6, zero, 31 +8000092c: 63 4e e8 06 blt a6, a4, 124 +80000930: 13 18 27 00 slli a6, a4, 2 +80000934: 63 06 05 02 beqz a0, 44 +80000938: 33 83 07 01 add t1, a5, a6 +8000093c: 23 24 c3 08 sw a2, 136(t1) +80000940: 83 a8 87 18 lw a7, 392(a5) +80000944: 13 06 10 00 addi a2, zero, 1 +80000948: 33 16 e6 00 sll a2, a2, a4 +8000094c: b3 e8 c8 00 or a7, a7, a2 +80000950: 23 a4 17 19 sw a7, 392(a5) +80000954: 23 24 d3 10 sw a3, 264(t1) +80000958: 93 06 20 00 addi a3, zero, 2 +8000095c: 63 04 d5 02 beq a0, a3, 40 +80000960: 13 07 17 00 addi a4, a4, 1 +80000964: 23 a2 e7 00 sw a4, 4(a5) +80000968: b3 87 07 01 add a5, a5, a6 +8000096c: 23 a4 b7 00 sw a1, 8(a5) +80000970: 13 05 00 00 mv a0, zero +80000974: 67 80 00 00 ret +80000978: 93 07 c7 14 addi a5, a4, 332 +8000097c: 23 24 f7 14 sw a5, 328(a4) +80000980: 6f f0 5f fa j -92 +80000984: 83 a6 c7 18 lw a3, 396(a5) +80000988: 13 07 17 00 addi a4, a4, 1 +8000098c: 23 a2 e7 00 sw a4, 4(a5) +80000990: 33 e6 c6 00 or a2, a3, a2 +80000994: 23 a6 c7 18 sw a2, 396(a5) +80000998: b3 87 07 01 add a5, a5, a6 +8000099c: 23 a4 b7 00 sw a1, 8(a5) +800009a0: 13 05 00 00 mv a0, zero +800009a4: 67 80 00 00 ret +800009a8: 13 05 f0 ff addi a0, zero, -1 +800009ac: 67 80 00 00 ret + +800009b0 __call_exitprocs: +800009b0: 13 01 01 fd addi sp, sp, -48 +800009b4: b7 27 00 80 lui a5, 524290 +800009b8: 23 2c 41 01 sw s4, 24(sp) +800009bc: 03 aa 07 f0 lw s4, -256(a5) +800009c0: 23 20 21 03 sw s2, 32(sp) +800009c4: 23 26 11 02 sw ra, 44(sp) +800009c8: 03 29 8a 14 lw s2, 328(s4) +800009cc: 23 24 81 02 sw s0, 40(sp) +800009d0: 23 22 91 02 sw s1, 36(sp) +800009d4: 23 2e 31 01 sw s3, 28(sp) +800009d8: 23 2a 51 01 sw s5, 20(sp) +800009dc: 23 28 61 01 sw s6, 16(sp) +800009e0: 23 26 71 01 sw s7, 12(sp) +800009e4: 23 24 81 01 sw s8, 8(sp) +800009e8: 63 00 09 04 beqz s2, 64 +800009ec: 13 0b 05 00 mv s6, a0 +800009f0: 93 8b 05 00 mv s7, a1 +800009f4: 93 0a 10 00 addi s5, zero, 1 +800009f8: 93 09 f0 ff addi s3, zero, -1 +800009fc: 83 24 49 00 lw s1, 4(s2) +80000a00: 13 84 f4 ff addi s0, s1, -1 +80000a04: 63 42 04 02 bltz s0, 36 +80000a08: 93 94 24 00 slli s1, s1, 2 +80000a0c: b3 04 99 00 add s1, s2, s1 +80000a10: 63 84 0b 04 beqz s7, 72 +80000a14: 83 a7 44 10 lw a5, 260(s1) +80000a18: 63 80 77 05 beq a5, s7, 64 +80000a1c: 13 04 f4 ff addi s0, s0, -1 +80000a20: 93 84 c4 ff addi s1, s1, -4 +80000a24: e3 16 34 ff bne s0, s3, -20 +80000a28: 83 20 c1 02 lw ra, 44(sp) +80000a2c: 03 24 81 02 lw s0, 40(sp) +80000a30: 83 24 41 02 lw s1, 36(sp) +80000a34: 03 29 01 02 lw s2, 32(sp) +80000a38: 83 29 c1 01 lw s3, 28(sp) +80000a3c: 03 2a 81 01 lw s4, 24(sp) +80000a40: 83 2a 41 01 lw s5, 20(sp) +80000a44: 03 2b 01 01 lw s6, 16(sp) +80000a48: 83 2b c1 00 lw s7, 12(sp) +80000a4c: 03 2c 81 00 lw s8, 8(sp) +80000a50: 13 01 01 03 addi sp, sp, 48 +80000a54: 67 80 00 00 ret +80000a58: 83 27 49 00 lw a5, 4(s2) +80000a5c: 83 a6 44 00 lw a3, 4(s1) +80000a60: 93 87 f7 ff addi a5, a5, -1 +80000a64: 63 8e 87 04 beq a5, s0, 92 +80000a68: 23 a2 04 00 sw zero, 4(s1) +80000a6c: e3 88 06 fa beqz a3, -80 +80000a70: 83 27 89 18 lw a5, 392(s2) +80000a74: 33 97 8a 00 sll a4, s5, s0 +80000a78: 03 2c 49 00 lw s8, 4(s2) +80000a7c: b3 77 f7 00 and a5, a4, a5 +80000a80: 63 92 07 02 bnez a5, 36 +80000a84: e7 80 06 00 jalr a3 +80000a88: 03 27 49 00 lw a4, 4(s2) +80000a8c: 83 27 8a 14 lw a5, 328(s4) +80000a90: 63 14 87 01 bne a4, s8, 8 +80000a94: e3 04 f9 f8 beq s2, a5, -120 +80000a98: e3 88 07 f8 beqz a5, -112 +80000a9c: 13 89 07 00 mv s2, a5 +80000aa0: 6f f0 df f5 j -164 +80000aa4: 83 27 c9 18 lw a5, 396(s2) +80000aa8: 83 a5 44 08 lw a1, 132(s1) +80000aac: 33 77 f7 00 and a4, a4, a5 +80000ab0: 63 1c 07 00 bnez a4, 24 +80000ab4: 13 05 0b 00 mv a0, s6 +80000ab8: e7 80 06 00 jalr a3 +80000abc: 6f f0 df fc j -52 +80000ac0: 23 22 89 00 sw s0, 4(s2) +80000ac4: 6f f0 9f fa j -88 +80000ac8: 13 85 05 00 mv a0, a1 +80000acc: e7 80 06 00 jalr a3 +80000ad0: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001a54 __preinit_array_start: -80001a54: 50 00 -80001a56: 00 80 +80001ad4 __preinit_array_start: +80001ad4: 50 00 +80001ad6: 00 80 Disassembly of section .data: -80001a58 impure_data: -80001a58: 00 00 -80001a5a: 00 00 -80001a5c: 44 1d -80001a5e: 00 80 -80001a60: ac 1d -80001a62: 00 80 -80001a64: 14 1e -80001a66: 00 80 +80001ad8 impure_data: +80001ad8: 00 00 +80001ada: 00 00 +80001adc: c4 1d +80001ade: 00 80 +80001ae0: 2c 1e +80001ae2: 00 80 +80001ae4: 94 1e +80001ae6: 00 80 ... -80001b00: 01 00 -80001b02: 00 00 -80001b04: 00 00 -80001b06: 00 00 -80001b08: 0e 33 -80001b0a: cd ab -80001b0c: 34 12 -80001b0e: 6d e6 -80001b10: ec de -80001b12: 05 00 -80001b14: 0b 00 00 00 +80001b80: 01 00 +80001b82: 00 00 +80001b84: 00 00 +80001b86: 00 00 +80001b88: 0e 33 +80001b8a: cd ab +80001b8c: 34 12 +80001b8e: 6d e6 +80001b90: ec de +80001b92: 05 00 +80001b94: 0b 00 00 00 ... Disassembly of section .sdata: -80001e80 _global_impure_ptr: -80001e80: 58 1a -80001e82: 00 80 +80001f00 _global_impure_ptr: +80001f00: d8 1a +80001f02: 00 80 Disassembly of section .bss: -80001e84 g_wspawn_args: +80001f04 g_wspawn_args: ... Disassembly of section .comment: @@ -872,28 +904,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 54 1a + 34: d4 1a 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 58 1a + 44: d8 1a 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: 80 1e + 54: 00 1f 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: 84 1e + 64: 04 1f 66: 00 80 68: 00 00 6a: 00 00 @@ -909,7 +941,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: f4 05 + a4: 74 06 a6: 00 80 a8: 00 00 aa: 00 00 @@ -966,7 +998,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 58 1a + 154: d8 1a 156: 00 80 158: 28 04 15a: 00 00 @@ -977,7 +1009,7 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 58 1a + 174: d8 1a 176: 00 80 178: 00 00 17a: 00 00 @@ -985,7 +1017,7 @@ Disassembly of section .symtab: 17e: 04 00 180: a2 00 182: 00 00 - 184: 58 1a + 184: d8 1a 186: 00 80 188: 00 00 18a: 00 00 @@ -993,35 +1025,35 @@ Disassembly of section .symtab: 18e: 04 00 190: b5 00 192: 00 00 - 194: 58 1a + 194: d8 1a 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 54 1a + 1a4: d4 1a 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 54 1a + 1b4: d4 1a 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 54 1a + 1c4: d4 1a 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 48 06 + 1d4: c8 06 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1034,7 +1066,7 @@ Disassembly of section .symtab: 1ee: f1 ff 1f0: 1c 01 1f2: 00 00 - 1f4: f8 05 + 1f4: 78 06 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1042,7 +1074,7 @@ Disassembly of section .symtab: 1fe: 02 00 200: 26 01 202: 00 00 - 204: 04 04 + 204: 84 04 206: 00 80 208: 9c 00 20a: 00 00 @@ -1050,60 +1082,60 @@ Disassembly of section .symtab: 20e: 02 00 210: 3a 01 212: 00 00 - 214: 68 00 - 216: 00 80 - 218: 4c 01 + 214: 00 04 + 216: 00 00 + 218: 00 00 21a: 00 00 - 21c: 12 00 - 21e: 02 00 - 220: 50 01 - 222: 00 00 - 224: 00 04 - 226: 00 00 - 228: 00 00 + 21c: 10 00 + 21e: f1 ff + 220: 47 01 00 00 fmsub.s ft2, ft0, ft0, ft0, rne + 224: 04 1f + 226: 00 80 + 228: 40 00 22a: 00 00 - 22c: 10 00 - 22e: f1 ff - 230: 5d 01 + 22c: 11 00 + 22e: 06 00 + 230: 55 01 232: 00 00 - 234: 84 1e + 234: 80 06 236: 00 80 - 238: 20 00 + 238: 00 00 23a: 00 00 - 23c: 11 00 - 23e: 06 00 - 240: 6b 01 00 00 - 244: 00 06 + 23c: 12 00 + 23e: 02 00 + 240: 5c 01 + 242: 00 00 + 244: 00 1f 246: 00 80 248: 00 00 24a: 00 00 - 24c: 12 00 - 24e: 02 00 - 250: 72 01 + 24c: 10 00 + 24e: 05 00 + 250: 6c 01 252: 00 00 - 254: 80 1e + 254: 68 00 256: 00 80 - 258: 00 00 + 258: 48 01 25a: 00 00 - 25c: 10 00 - 25e: 05 00 + 25c: 12 00 + 25e: 02 00 260: 82 01 262: 00 00 - 264: 58 22 + 264: d8 22 266: 00 80 268: 00 00 26a: 00 00 26c: 10 00 26e: f1 ff 270: 93 01 00 00 mv gp, zero - 274: 18 06 + 274: 98 06 276: 00 80 278: 00 00 27a: 00 00 27c: 12 00 27e: 02 00 280: 9b 01 00 00 - 284: 58 06 + 284: d8 06 286: 00 80 288: 00 00 28a: 00 00 @@ -1111,15 +1143,15 @@ Disassembly of section .symtab: 28e: 02 00 290: a8 01 292: 00 00 - 294: 4c 02 + 294: 48 02 296: 00 80 - 298: 88 01 + 298: 0c 02 29a: 00 00 29c: 12 00 29e: 02 00 2a0: b5 01 2a2: 00 00 - 2a4: 10 06 + 2a4: 90 06 2a6: 00 80 2a8: 00 00 2aa: 00 00 @@ -1127,7 +1159,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: be 01 2b2: 00 00 - 2b4: 80 1e + 2b4: 00 1f 2b6: 00 80 2b8: 04 00 2ba: 00 00 @@ -1135,14 +1167,14 @@ Disassembly of section .symtab: 2be: 05 00 2c0: d1 01 2c2: 00 00 - 2c4: 1c 07 + 2c4: 9c 07 2c6: 00 80 2c8: 9c 00 2ca: 00 00 2cc: 12 00 2ce: 02 00 2d0: e3 01 00 00 beqz zero, 2050 - 2d4: 50 06 + 2d4: d0 06 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1150,7 +1182,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: f2 01 2e2: 00 00 - 2e4: 20 06 + 2e4: a0 06 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1158,7 +1190,7 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: fd 01 2f2: 00 00 - 2f4: 30 06 + 2f4: b0 06 2f6: 00 80 2f8: 00 00 2fa: 00 00 @@ -1166,7 +1198,7 @@ Disassembly of section .symtab: 2fe: 02 00 300: 0a 02 302: 00 00 - 304: c0 06 + 304: 40 07 306: 00 80 308: 5c 00 30a: 00 00 @@ -1182,7 +1214,7 @@ Disassembly of section .symtab: 31e: f1 ff 320: 28 02 322: 00 00 - 324: bc 05 + 324: 3c 06 326: 00 80 328: 00 00 32a: 00 00 @@ -1190,7 +1222,7 @@ Disassembly of section .symtab: 32e: 02 00 330: 32 02 332: 00 00 - 334: 08 06 + 334: 88 06 336: 00 80 338: 00 00 33a: 00 00 @@ -1198,7 +1230,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 3d 02 342: 00 00 - 344: 30 09 + 344: b0 09 346: 00 80 348: 24 01 34a: 00 00 @@ -1213,7 +1245,7 @@ Disassembly of section .symtab: 35e: 01 00 360: 4e 02 362: 00 00 - 364: 94 08 + 364: 14 09 366: 00 80 368: 9c 00 36a: 00 00 @@ -1221,7 +1253,7 @@ Disassembly of section .symtab: 36e: 02 00 370: 62 02 372: 00 00 - 374: a4 1e + 374: 44 1f 376: 00 80 378: 00 00 37a: 00 00 @@ -1229,7 +1261,7 @@ Disassembly of section .symtab: 37e: 06 00 380: 6e 02 382: 00 00 - 384: 84 1e + 384: 04 1f 386: 00 80 388: 00 00 38a: 00 00 @@ -1237,7 +1269,7 @@ Disassembly of section .symtab: 38e: 06 00 390: 7a 02 392: 00 00 - 394: b8 07 + 394: 38 08 396: 00 80 398: dc 00 39a: 00 00 @@ -1245,7 +1277,7 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 81 02 3a2: 00 00 - 3a4: d4 03 + 3a4: 54 04 3a6: 00 80 3a8: 30 00 3aa: 00 00 @@ -1253,7 +1285,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 86 02 3b2: 00 00 - 3b4: 30 05 + 3b4: b0 05 3b6: 00 80 3b8: 84 00 3ba: 00 00 @@ -1261,14 +1293,14 @@ Disassembly of section .symtab: 3be: 02 00 3c0: a9 02 3c2: 00 00 - 3c4: 68 06 + 3c4: e8 06 3c6: 00 80 3c8: 00 00 3ca: 00 00 3cc: 12 00 3ce: 02 00 3d0: b7 02 00 00 lui t0, 0 - 3d4: 78 06 + 3d4: f8 06 3d6: 00 80 3d8: 14 00 3da: 00 00 @@ -1276,7 +1308,7 @@ Disassembly of section .symtab: 3de: 02 00 3e0: be 02 3e2: 00 00 - 3e4: 40 06 + 3e4: c0 06 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1284,7 +1316,7 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: cc 02 3f2: 00 00 - 3f4: 60 06 + 3f4: e0 06 3f6: 00 80 3f8: 00 00 3fa: 00 00 @@ -1292,76 +1324,76 @@ Disassembly of section .symtab: 3fe: 02 00 400: d9 02 402: 00 00 - 404: a0 04 + 404: 20 05 406: 00 80 408: 90 00 40a: 00 00 40c: 12 00 40e: 02 00 410: f7 02 00 00 - 414: 28 06 + 414: a8 06 416: 00 80 418: 00 00 41a: 00 00 41c: 12 00 41e: 02 00 420: 03 03 00 00 lb t1, 0(zero) - 424: b4 01 + 424: d8 1a 426: 00 80 - 428: 98 00 + 428: 00 00 42a: 00 00 - 42c: 12 00 - 42e: 02 00 - 430: 1c 03 + 42c: 10 00 + 42e: 04 00 + 430: 12 03 432: 00 00 - 434: 58 1a + 434: 04 1f 436: 00 80 438: 00 00 43a: 00 00 43c: 10 00 - 43e: 04 00 - 440: 2b 03 00 00 - 444: 84 1e + 43e: 05 00 + 440: 9d 00 + 442: 00 00 + 444: 44 1f 446: 00 80 448: 00 00 44a: 00 00 44c: 10 00 - 44e: 05 00 - 450: 9d 00 + 44e: 06 00 + 450: 48 03 452: 00 00 - 454: a4 1e + 454: 0c 07 456: 00 80 - 458: 00 00 + 458: 34 00 45a: 00 00 - 45c: 10 00 - 45e: 06 00 - 460: 41 03 + 45c: 12 00 + 45e: 02 00 + 460: 19 03 462: 00 00 - 464: 8c 06 + 464: b0 01 466: 00 80 - 468: 34 00 + 468: 98 00 46a: 00 00 46c: 12 00 46e: 02 00 - 470: 32 03 + 470: 39 03 472: 00 00 - 474: 38 06 + 474: b8 06 476: 00 80 478: 00 00 47a: 00 00 47c: 12 00 47e: 02 00 - 480: 40 03 - 482: 00 00 - 484: b4 05 + 480: 47 03 00 00 fmsub.s ft6, ft0, ft0, ft0, rne + 484: 34 06 486: 00 80 488: 00 00 48a: 00 00 48c: 12 00 48e: 02 00 - 490: 46 03 + 490: 4d 03 492: 00 00 - 494: 70 06 + 494: f0 06 496: 00 80 498: 00 00 49a: 00 00 @@ -1401,11 +1433,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 65 37 - 48: 2d 36 - 4a: 37 2d 32 64 lui s10, 410402 - 4e: 2d 63 - 50: 37 2d 65 39 lui s10, 235090 + 46: 31 62 + 48: 2d 37 + 4a: 31 2d + 4c: 63 66 2d 66 bltu s10, sp, 1644 + 50: 66 2d + 52: 34 32 54: 2e 63 56: 00 70 58: 61 72 @@ -1498,31 +1531,32 @@ Disassembly of section .strtab: 132: 5f 76 65 63 136: 61 64 138: 64 00 - 13a: 6b 65 72 6e - 13e: 65 6c - 140: 5f 73 70 61 - 144: 77 6e 5f 72 - 148: 75 6e - 14a: 5f 77 61 72 - 14e: 70 00 - 150: 5f 5f 73 74 - 154: 61 63 - 156: 6b 5f 73 69 - 15a: 7a 65 - 15c: 00 67 - 15e: 5f 77 73 70 - 162: 61 77 - 164: 6e 5f - 166: 61 72 - 168: 67 73 00 76 - 16c: 78 5f - 16e: 74 6d - 170: 63 00 5f 5f beq t5, s5, 1504 - 174: 53 44 41 54 - 178: 41 5f - 17a: 42 45 - 17c: 47 49 4e 5f - 180: 5f 00 5f 5f + 13a: 5f 5f 73 74 + 13e: 61 63 + 140: 6b 5f 73 69 + 144: 7a 65 + 146: 00 67 + 148: 5f 77 73 70 + 14c: 61 77 + 14e: 6e 5f + 150: 61 72 + 152: 67 73 00 76 + 156: 78 5f + 158: 74 6d + 15a: 63 00 5f 5f beq t5, s5, 1504 + 15e: 53 44 41 54 + 162: 41 5f + 164: 42 45 + 166: 47 49 4e 5f + 16a: 5f 00 6b 65 + 16e: 72 6e + 170: 65 6c + 172: 5f 73 70 61 + 176: 77 6e 5f 63 + 17a: 61 6c + 17c: 6c 62 + 17e: 61 63 + 180: 6b 00 5f 5f 184: 67 6c 6f 62 188: 61 6c 18a: 5f 70 6f 69 @@ -1666,40 +1700,43 @@ Disassembly of section .strtab: 2f8: 78 5f 2fa: 77 61 72 70 2fe: 5f 67 69 64 - 302: 00 6b - 304: 65 72 - 306: 6e 65 - 308: 6c 5f - 30a: 73 70 61 77 csrci 1910, 2 - 30e: 6e 5f - 310: 72 75 - 312: 6e 5f - 314: 74 68 - 316: 72 65 - 318: 61 64 - 31a: 73 00 5f 5f - 31e: 44 41 - 320: 54 41 - 322: 5f 42 45 47 - 326: 49 4e - 328: 5f 5f 00 5f - 32c: 65 64 - 32e: 61 74 - 330: 61 00 - 332: 76 78 - 334: 5f 74 68 72 - 338: 65 61 - 33a: 64 5f - 33c: 6c 69 - 33e: 64 00 - 340: 5f 65 78 69 - 344: 74 00 - 346: 76 78 - 348: 5f 6e 75 6d - 34c: 5f 69 6e 73 - 350: 74 72 - 352: 73 - 353: 00 + 302: 00 5f + 304: 5f 44 41 54 + 308: 41 5f + 30a: 42 45 + 30c: 47 49 4e 5f + 310: 5f 00 5f 65 + 314: 64 61 + 316: 74 61 + 318: 00 6b + 31a: 65 72 + 31c: 6e 65 + 31e: 6c 5f + 320: 73 70 61 77 csrci 1910, 2 + 324: 6e 5f + 326: 72 65 + 328: 6d 61 + 32a: 69 6e + 32c: 69 6e + 32e: 67 5f 63 61 + 332: 6c 6c + 334: 62 61 + 336: 63 6b 00 76 bltu zero, zero, 1910 + 33a: 78 5f + 33c: 74 68 + 33e: 72 65 + 340: 61 64 + 342: 5f 6c 69 64 + 346: 00 5f + 348: 65 78 + 34a: 69 74 + 34c: 00 76 + 34e: 78 5f + 350: 6e 75 + 352: 6d 5f + 354: 69 6e + 356: 73 74 72 73 csrrci s0, 1847, 4 + 35a: 00 Disassembly of section .shstrtab: