implementing gpu library

This commit is contained in:
felsabbagh3
2019-02-14 01:54:16 -05:00
parent c3c3cb0b45
commit 39003073f9
14 changed files with 9745 additions and 432 deletions

View File

@@ -0,0 +1,294 @@
gpgpu_test.elf: file format elf32-littleriscv
Disassembly of section .text:
80000000 <main>:
80000000: ff010113 addi sp,sp,-16
80000004: 00112623 sw ra,12(sp)
80000008: 00812423 sw s0,8(sp)
8000000c: 01010413 addi s0,sp,16
80000010: 2a8000ef jal ra,800002b8 <initiate_stack>
80000014: 800007b7 lui a5,0x80000
80000018: 04078613 addi a2,a5,64 # 80000040 <N+0xfeffff80>
8000001c: 00800593 li a1,8
80000020: 00200513 li a0,2
80000024: 22c000ef jal ra,80000250 <createWarps>
80000028: 00000793 li a5,0
8000002c: 00078513 mv a0,a5
80000030: 00c12083 lw ra,12(sp)
80000034: 00812403 lw s0,8(sp)
80000038: 01010113 addi sp,sp,16
8000003c: 00008067 ret
80000040 <mat>:
80000040: fd010113 addi sp,sp,-48
80000044: 02112623 sw ra,44(sp)
80000048: 02812423 sw s0,40(sp)
8000004c: 03010413 addi s0,sp,48
80000050: fca42e23 sw a0,-36(s0)
80000054: 244000ef jal ra,80000298 <get_tid>
80000058: fea42623 sw a0,-20(s0)
8000005c: fdc42783 lw a5,-36(s0)
80000060: 00379793 slli a5,a5,0x3
80000064: fec42703 lw a4,-20(s0)
80000068: 00f707b3 add a5,a4,a5
8000006c: fef42423 sw a5,-24(s0)
80000070: 00000013 nop
80000074: 00000013 nop
80000078: 00000013 nop
8000007c: 00000013 nop
80000080: 810007b7 lui a5,0x81000
80000084: fe842703 lw a4,-24(s0)
80000088: 00271713 slli a4,a4,0x2
8000008c: 04078793 addi a5,a5,64 # 81000040 <N+0xffffff80>
80000090: 00f707b3 add a5,a4,a5
80000094: 0007a703 lw a4,0(a5)
80000098: 810007b7 lui a5,0x81000
8000009c: fe842683 lw a3,-24(s0)
800000a0: 00269693 slli a3,a3,0x2
800000a4: 08078793 addi a5,a5,128 # 81000080 <N+0xffffffc0>
800000a8: 00f687b3 add a5,a3,a5
800000ac: 0007a783 lw a5,0(a5)
800000b0: 00f70733 add a4,a4,a5
800000b4: 810007b7 lui a5,0x81000
800000b8: fe842683 lw a3,-24(s0)
800000bc: 00269693 slli a3,a3,0x2
800000c0: 00078793 mv a5,a5
800000c4: 00f687b3 add a5,a3,a5
800000c8: 00e7a023 sw a4,0(a5) # 81000000 <N+0xffffff40>
800000cc: 00000013 nop
800000d0: 02c12083 lw ra,44(sp)
800000d4: 02812403 lw s0,40(sp)
800000d8: 03010113 addi sp,sp,48
800000dc: 00008067 ret
800000e0 <set_wid>:
800000e0: fe010113 addi sp,sp,-32
800000e4: 00812e23 sw s0,28(sp)
800000e8: 02010413 addi s0,sp,32
800000ec: fea42623 sw a0,-20(s0)
800000f0: fec42783 lw a5,-20(s0)
800000f4: 00e79073 csrw 0xe,a5
800000f8: 00000013 nop
800000fc: 01c12403 lw s0,28(sp)
80000100: 02010113 addi sp,sp,32
80000104: 00008067 ret
80000108 <set_func>:
80000108: fe010113 addi sp,sp,-32
8000010c: 00812e23 sw s0,28(sp)
80000110: 02010413 addi s0,sp,32
80000114: fea42623 sw a0,-20(s0)
80000118: fec42783 lw a5,-20(s0)
8000011c: 00f79073 csrw 0xf,a5
80000120: 00000013 nop
80000124: 01c12403 lw s0,28(sp)
80000128: 02010113 addi sp,sp,32
8000012c: 00008067 ret
80000130 <get_func>:
80000130: fe010113 addi sp,sp,-32
80000134: 00812e23 sw s0,28(sp)
80000138: 02010413 addi s0,sp,32
8000013c: 00f027f3 csrr a5,0xf
80000140: fef42623 sw a5,-20(s0)
80000144: fec42783 lw a5,-20(s0)
80000148: 00078513 mv a0,a5
8000014c: 01c12403 lw s0,28(sp)
80000150: 02010113 addi sp,sp,32
80000154: 00008067 ret
80000158 <get_wid>:
80000158: fe010113 addi sp,sp,-32
8000015c: 00812e23 sw s0,28(sp)
80000160: 02010413 addi s0,sp,32
80000164: 00e027f3 csrr a5,0xe
80000168: fef42623 sw a5,-20(s0)
8000016c: fec42783 lw a5,-20(s0)
80000170: 00078513 mv a0,a5
80000174: 01c12403 lw s0,28(sp)
80000178: 02010113 addi sp,sp,32
8000017c: 00008067 ret
80000180 <createThreads>:
80000180: fd010113 addi sp,sp,-48
80000184: 02812623 sw s0,44(sp)
80000188: 03a12423 sw s10,40(sp)
8000018c: 03b12223 sw s11,36(sp)
80000190: 03010413 addi s0,sp,48
80000194: fca42e23 sw a0,-36(s0)
80000198: fcb42c23 sw a1,-40(s0)
8000019c: fcc42a23 sw a2,-44(s0)
800001a0: 00010f13 mv t5,sp
800001a4: 00100793 li a5,1
800001a8: fef42623 sw a5,-20(s0)
800001ac: 01c0006f j 800001c8 <createThreads+0x48>
800001b0: fec42303 lw t1,-20(s0)
800001b4: f0010113 addi sp,sp,-256
800001b8: 0003506b 0x3506b
800001bc: fec42783 lw a5,-20(s0)
800001c0: 00178793 addi a5,a5,1
800001c4: fef42623 sw a5,-20(s0)
800001c8: fec42703 lw a4,-20(s0)
800001cc: fdc42783 lw a5,-36(s0)
800001d0: fef760e3 bltu a4,a5,800001b0 <createThreads+0x30>
800001d4: 000f0113 mv sp,t5
800001d8: 00000313 li t1,0
800001dc: fd442f83 lw t6,-44(s0)
800001e0: fdc42d83 lw s11,-36(s0)
800001e4: fd842503 lw a0,-40(s0)
800001e8: 01bfe0eb 0x1bfe0eb
800001ec: 00000073 ecall
800001f0: 00000013 nop
800001f4: 02c12403 lw s0,44(sp)
800001f8: 02812d03 lw s10,40(sp)
800001fc: 02412d83 lw s11,36(sp)
80000200: 03010113 addi sp,sp,48
80000204: 00008067 ret
80000208 <wspawn>:
80000208: fe010113 addi sp,sp,-32
8000020c: 00812e23 sw s0,28(sp)
80000210: 02010413 addi s0,sp,32
80000214: fea42623 sw a0,-20(s0)
80000218: feb42423 sw a1,-24(s0)
8000021c: fec42223 sw a2,-28(s0)
80000220: fec42503 lw a0,-20(s0)
80000224: fe842583 lw a1,-24(s0)
80000228: fe442783 lw a5,-28(s0)
8000022c: 00078613 mv a2,a5
80000230: 800007b7 lui a5,0x80000
80000234: 18078793 addi a5,a5,384 # 80000180 <N+0xff0000c0>
80000238: 00078313 mv t1,a5
8000023c: 0003006b 0x3006b
80000240: 00000013 nop
80000244: 01c12403 lw s0,28(sp)
80000248: 02010113 addi sp,sp,32
8000024c: 00008067 ret
80000250 <createWarps>:
80000250: fe010113 addi sp,sp,-32
80000254: 00112e23 sw ra,28(sp)
80000258: 00812c23 sw s0,24(sp)
8000025c: 02010413 addi s0,sp,32
80000260: fea42623 sw a0,-20(s0)
80000264: feb42423 sw a1,-24(s0)
80000268: fec42223 sw a2,-28(s0)
8000026c: fe442783 lw a5,-28(s0)
80000270: 00078613 mv a2,a5
80000274: 00000593 li a1,0
80000278: fe842503 lw a0,-24(s0)
8000027c: f05ff0ef jal ra,80000180 <createThreads>
80000280: 00000073 ecall
80000284: 00000013 nop
80000288: 01c12083 lw ra,28(sp)
8000028c: 01812403 lw s0,24(sp)
80000290: 02010113 addi sp,sp,32
80000294: 00008067 ret
80000298 <get_tid>:
80000298: ff010113 addi sp,sp,-16
8000029c: 00812623 sw s0,12(sp)
800002a0: 01010413 addi s0,sp,16
800002a4: 00000013 nop
800002a8: 00078513 mv a0,a5
800002ac: 00c12403 lw s0,12(sp)
800002b0: 01010113 addi sp,sp,16
800002b4: 00008067 ret
800002b8 <initiate_stack>:
800002b8: ff010113 addi sp,sp,-16
800002bc: 00812623 sw s0,12(sp)
800002c0: 01010413 addi s0,sp,16
800002c4: 7ffff137 lui sp,0x7ffff
800002c8: 00000013 nop
800002cc: 00c12403 lw s0,12(sp) # 7ffff00c <main-0xff4>
800002d0: 01010113 addi sp,sp,16
800002d4: 00008067 ret
Disassembly of section .bss:
81000000 <z>:
...
Disassembly of section .data:
81000040 <x>:
81000040: 0001 nop
81000042: 0000 unimp
81000044: 0005 c.nop 1
81000046: 0000 unimp
81000048: 000a c.slli zero,0x2
8100004a: 0000 unimp
8100004c: 0000 unimp
8100004e: 0000 unimp
81000050: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
81000054: 0001 nop
81000056: 0000 unimp
81000058: 0001 nop
8100005a: 0000 unimp
8100005c: 0002 c.slli64 zero
8100005e: 0000 unimp
81000060: 0008 0x8
81000062: 0000 unimp
81000064: 00000007 0x7
81000068: 0008 0x8
8100006a: 0000 unimp
8100006c: 00000007 0x7
81000070: 0005 c.nop 1
81000072: 0000 unimp
81000074: 00000007 0x7
81000078: 00000007 0x7
8100007c: 0009 c.nop 2
...
81000080 <y>:
81000080: 0000 unimp
81000082: 0000 unimp
81000084: 0002 c.slli64 zero
81000086: 0000 unimp
81000088: 0002 c.slli64 zero
8100008a: 0000 unimp
8100008c: 0000 unimp
8100008e: 0000 unimp
81000090: 0005 c.nop 1
81000092: 0000 unimp
81000094: 0000 unimp
81000096: 0000 unimp
81000098: 0001 nop
8100009a: 0000 unimp
8100009c: 0001 nop
8100009e: 0000 unimp
810000a0: 0004 0x4
810000a2: 0000 unimp
810000a4: 0002 c.slli64 zero
810000a6: 0000 unimp
810000a8: 0002 c.slli64 zero
810000aa: 0000 unimp
810000ac: 0000 unimp
810000ae: 0000 unimp
810000b0: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
810000b4: 0002 c.slli64 zero
810000b6: 0000 unimp
810000b8: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
810000bc: 0002 c.slli64 zero
...
Disassembly of section .sdata:
810000c0 <N>:
810000c0: 0010 0x10
...
Disassembly of section .comment:
82000000 <.comment>:
82000000: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
82000004: 2820 fld fs0,80(s0)
82000006: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
8200000a: 3820 fld fs0,112(s0)
8200000c: 322e fld ft4,232(sp)
8200000e: 302e fld ft0,232(sp)
...