diff --git a/driver/tests/tex_demo/common.h b/driver/tests/tex_demo/common.h index 2584cf7d..9e16ff38 100644 --- a/driver/tests/tex_demo/common.h +++ b/driver/tests/tex_demo/common.h @@ -9,7 +9,7 @@ struct kernel_arg_t { uint32_t src_height; uint32_t src_stride; uint32_t src_pitch; - uint32_t src_ptr; + uint8_t src_ptr; uint32_t dst_width; uint32_t dst_height; uint32_t dst_stride; diff --git a/driver/tests/tex_demo/demo b/driver/tests/tex_demo/demo index bb7902c0..b50fd169 100755 Binary files a/driver/tests/tex_demo/demo and b/driver/tests/tex_demo/demo differ diff --git a/driver/tests/tex_demo/football.tga b/driver/tests/tex_demo/football.tga new file mode 100644 index 00000000..a5544ec7 Binary files /dev/null and b/driver/tests/tex_demo/football.tga differ diff --git a/driver/tests/tex_demo/kernel.bin b/driver/tests/tex_demo/kernel.bin index d740e548..61f6bc26 100755 Binary files a/driver/tests/tex_demo/kernel.bin and b/driver/tests/tex_demo/kernel.bin differ diff --git a/driver/tests/tex_demo/kernel.c b/driver/tests/tex_demo/kernel.c index 87a2159f..1c83fcdc 100644 --- a/driver/tests/tex_demo/kernel.c +++ b/driver/tests/tex_demo/kernel.c @@ -16,18 +16,19 @@ struct tile_arg_t { void kernel_body(int task_id, void* arg) { struct tile_arg_t* _arg = (struct tile_arg_t*)(arg); - uint32_t xoffset = task_id * _arg->tile_width; + uint32_t xoffset = 0; uint32_t yoffset = task_id * _arg->tile_height; - uint32_t* dst_ptr = (uint32_t*)_arg->karg.dst_ptr + xoffset + yoffset * _arg->karg.dst_pitch; + uint8_t* dst_ptr = (uint8_t*)(_arg->karg.dst_ptr + xoffset * _arg->karg.dst_stride + yoffset * _arg->karg.dst_pitch); float fu = xoffset * _arg->deltaX; float fv = yoffset * _arg->deltaY; for (uint32_t y = 0; y < _arg->tile_height; ++y) { + uint32_t* dst_row = (uint32_t*)dst_ptr; for (uint32_t x = 0; x < _arg->tile_width; ++x) { int32_t u = (int32_t)(fu * (1<<20)); int32_t v = (int32_t)(fv * (1<<20)); - dst_ptr[x] = vx_tex(0, u, v, 0x0); + dst_row[x] = vx_tex(0, u, v, 0x0); fu += _arg->deltaX; } dst_ptr += _arg->karg.dst_pitch; diff --git a/driver/tests/tex_demo/kernel.dump b/driver/tests/tex_demo/kernel.dump index f1179c11..99a7fd04 100644 --- a/driver/tests/tex_demo/kernel.dump +++ b/driver/tests/tex_demo/kernel.dump @@ -6,48 +6,48 @@ Disassembly of section .init: 80000000 <_start>: 80000000: 00000597 auipc a1,0x0 -80000004: 23c58593 addi a1,a1,572 # 8000023c +80000004: 22c58593 addi a1,a1,556 # 8000022c 80000008: fc102573 csrr a0,0xfc1 8000000c: 00b5106b 0xb5106b -80000010: 22c000ef jal ra,8000023c +80000010: 21c000ef jal ra,8000022c 80000014: 00100513 li a0,1 80000018: 0005006b 0x5006b 8000001c: 00002517 auipc a0,0x2 -80000020: dc850513 addi a0,a0,-568 # 80001de4 +80000020: db850513 addi a0,a0,-584 # 80001dd4 80000024: 00002617 auipc a2,0x2 -80000028: e4060613 addi a2,a2,-448 # 80001e64 <__BSS_END__> +80000028: e3060613 addi a2,a2,-464 # 80001e54 <__BSS_END__> 8000002c: 40a60633 sub a2,a2,a0 80000030: 00000593 li a1,0 -80000034: 5d8000ef jal ra,8000060c +80000034: 5c8000ef jal ra,800005fc 80000038: 00000517 auipc a0,0x0 -8000003c: 4dc50513 addi a0,a0,1244 # 80000514 <__libc_fini_array> -80000040: 48c000ef jal ra,800004cc -80000044: 52c000ef jal ra,80000570 <__libc_init_array> +8000003c: 4cc50513 addi a0,a0,1228 # 80000504 <__libc_fini_array> +80000040: 47c000ef jal ra,800004bc +80000044: 51c000ef jal ra,80000560 <__libc_init_array> 80000048: 008000ef jal ra,80000050
-8000004c: 4940006f j 800004e0 +8000004c: 4840006f j 800004d0 Disassembly of section .text: 80000050
: 80000050: fb010113 addi sp,sp,-80 80000054: 04812423 sw s0,72(sp) -80000058: 7ffff437 lui s0,0x7ffff -8000005c: 04112623 sw ra,76(sp) -80000060: 04912223 sw s1,68(sp) -80000064: 01442783 lw a5,20(s0) # 7ffff014 <__stack_size+0x7fffec14> +80000058: 04112623 sw ra,76(sp) +8000005c: 04912223 sw s1,68(sp) +80000060: 7ffff437 lui s0,0x7ffff +80000064: 01444783 lbu a5,20(s0) # 7ffff014 <__stack_size+0x7fffec14> 80000068: fd079073 csrw 0xfd0,a5 8000006c: fd405073 csrwi 0xfd4,0 80000070: 00442503 lw a0,4(s0) 80000074: 01f00493 li s1,31 80000078: 00151513 slli a0,a0,0x1 8000007c: fff50513 addi a0,a0,-1 -80000080: 400000ef jal ra,80000480 <__clzsi2> +80000080: 3f0000ef jal ra,80000470 <__clzsi2> 80000084: 40a48533 sub a0,s1,a0 80000088: fd551073 csrw 0xfd5,a0 8000008c: 00842503 lw a0,8(s0) 80000090: 00151513 slli a0,a0,0x1 80000094: fff50513 addi a0,a0,-1 -80000098: 3e8000ef jal ra,80000480 <__clzsi2> +80000098: 3d8000ef jal ra,80000470 <__clzsi2> 8000009c: 40a484b3 sub s1,s1,a0 800000a0: fd649073 csrw 0xfd6,s1 800000a4: fd105073 csrwi 0xfd1,0 @@ -77,7 +77,7 @@ Disassembly of section .text: 80000104: 02f12623 sw a5,44(sp) 80000108: 01842783 lw a5,24(s0) 8000010c: 80002737 lui a4,0x80002 -80000110: ddc72707 flw fa4,-548(a4) # 80001ddc <__stack_top+0x81001ddc> +80000110: dcc72707 flw fa4,-564(a4) # 80001dcc <__stack_top+0x81001dcc> 80000114: 02f12823 sw a5,48(sp) 80000118: 00042503 lw a0,0(s0) 8000011c: 01c42703 lw a4,28(s0) @@ -94,7 +94,7 @@ Disassembly of section .text: 80000148: 02f12c27 fsw fa5,56(sp) 8000014c: 02f12a23 sw a5,52(sp) 80000150: 02e12e27 fsw fa4,60(sp) -80000154: 1e4000ef jal ra,80000338 +80000154: 1d4000ef jal ra,80000328 80000158: 04c12083 lw ra,76(sp) 8000015c: 04812403 lw s0,72(sp) 80000160: 04412483 lw s1,68(sp) @@ -105,513 +105,517 @@ Disassembly of section .text: 8000016c: 00000793 li a5,0 80000170: 00078863 beqz a5,80000180 80000174: 80000537 lui a0,0x80000 -80000178: 51450513 addi a0,a0,1300 # 80000514 <__stack_top+0x81000514> -8000017c: 3500006f j 800004cc +80000178: 50450513 addi a0,a0,1284 # 80000504 <__stack_top+0x81000504> +8000017c: 3400006f j 800004bc 80000180: 00008067 ret 80000184 : -80000184: 0305a683 lw a3,48(a1) -80000188: 02c5a783 lw a5,44(a1) -8000018c: 0245a603 lw a2,36(a1) -80000190: 02d50733 mul a4,a0,a3 -80000194: 0285a303 lw t1,40(a1) -80000198: 0345a687 flw fa3,52(a1) -8000019c: 0385a507 flw fa0,56(a1) -800001a0: 00000893 li a7,0 -800001a4: 02a78533 mul a0,a5,a0 -800001a8: d01775d3 fcvt.s.wu fa1,a4 +80000184: 0305a703 lw a4,48(a1) +80000188: 0245a683 lw a3,36(a1) +8000018c: 0285a603 lw a2,40(a1) +80000190: 02e507b3 mul a5,a0,a4 +80000194: 0345a687 flw fa3,52(a1) +80000198: 0385a507 flw fa0,56(a1) +8000019c: f00007d3 fmv.w.x fa5,zero +800001a0: 10f6f7d3 fmul.s fa5,fa3,fa5 +800001a4: 02f68833 mul a6,a3,a5 +800001a8: d017f5d3 fcvt.s.wu fa1,a5 800001ac: 10a5f5d3 fmul.s fa1,fa1,fa0 -800001b0: 02e60833 mul a6,a2,a4 -800001b4: d01577d3 fcvt.s.wu fa5,a0 -800001b8: 10d7f7d3 fmul.s fa5,fa5,fa3 -800001bc: 00a80833 add a6,a6,a0 -800001c0: 00281813 slli a6,a6,0x2 -800001c4: 00680833 add a6,a6,t1 -800001c8: 06068463 beqz a3,80000230 -800001cc: 80002737 lui a4,0x80002 -800001d0: dd872007 flw ft0,-552(a4) # 80001dd8 <__stack_top+0x81001dd8> -800001d4: 00000513 li a0,0 -800001d8: 20000653 fmv.s fa2,ft0 -800001dc: 04078063 beqz a5,8000021c -800001e0: 1005f753 fmul.s fa4,fa1,ft0 -800001e4: 00080693 mv a3,a6 -800001e8: 00000713 li a4,0 -800001ec: c0071653 fcvt.w.s a2,fa4,rtz -800001f0: 10c7f753 fmul.s fa4,fa5,fa2 -800001f4: c00717d3 fcvt.w.s a5,fa4,rtz -800001f8: 50c7d7eb 0x50c7d7eb -800001fc: 00f6a023 sw a5,0(a3) -80000200: 02c5a783 lw a5,44(a1) -80000204: 00170713 addi a4,a4,1 -80000208: 00d7f7d3 fadd.s fa5,fa5,fa3 -8000020c: 00468693 addi a3,a3,4 -80000210: fef760e3 bltu a4,a5,800001f0 -80000214: 0245a603 lw a2,36(a1) -80000218: 0305a683 lw a3,48(a1) -8000021c: 00261713 slli a4,a2,0x2 -80000220: 00188893 addi a7,a7,1 -80000224: 00a5f5d3 fadd.s fa1,fa1,fa0 -80000228: 00e80833 add a6,a6,a4 -8000022c: fad8e8e3 bltu a7,a3,800001dc -80000230: 00008067 ret +800001b0: 00c80833 add a6,a6,a2 +800001b4: 06070663 beqz a4,80000220 +800001b8: 800027b7 lui a5,0x80002 +800001bc: dc87a007 flw ft0,-568(a5) # 80001dc8 <__stack_top+0x81001dc8> +800001c0: 02c5a783 lw a5,44(a1) +800001c4: 00000893 li a7,0 +800001c8: 20000653 fmv.s fa2,ft0 +800001cc: 00000513 li a0,0 +800001d0: 04078063 beqz a5,80000210 +800001d4: 1005f753 fmul.s fa4,fa1,ft0 +800001d8: 00080693 mv a3,a6 +800001dc: 00000713 li a4,0 +800001e0: c0071653 fcvt.w.s a2,fa4,rtz +800001e4: 10c7f753 fmul.s fa4,fa5,fa2 +800001e8: c00717d3 fcvt.w.s a5,fa4,rtz +800001ec: 50c7d7eb 0x50c7d7eb +800001f0: 00f6a023 sw a5,0(a3) +800001f4: 02c5a783 lw a5,44(a1) +800001f8: 00170713 addi a4,a4,1 +800001fc: 00d7f7d3 fadd.s fa5,fa5,fa3 +80000200: 00468693 addi a3,a3,4 +80000204: fef760e3 bltu a4,a5,800001e4 +80000208: 0245a683 lw a3,36(a1) +8000020c: 0305a703 lw a4,48(a1) +80000210: 00188893 addi a7,a7,1 +80000214: 00a5f5d3 fadd.s fa1,fa1,fa0 +80000218: 00d80833 add a6,a6,a3 +8000021c: fae8eae3 bltu a7,a4,800001d0 +80000220: 00008067 ret -80000234 <_exit>: -80000234: 00000513 li a0,0 -80000238: 0005006b 0x5006b +80000224 <_exit>: +80000224: 00000513 li a0,0 +80000228: 0005006b 0x5006b -8000023c : -8000023c: fc002573 csrr a0,0xfc0 -80000240: 0005006b 0x5006b -80000244: 00002197 auipc gp,0x2 -80000248: f6c18193 addi gp,gp,-148 # 800021b0 <__global_pointer> -8000024c: 7f000117 auipc sp,0x7f000 -80000250: db410113 addi sp,sp,-588 # ff000000 <__stack_top> -80000254: 40000593 li a1,1024 -80000258: cc102673 csrr a2,0xcc1 -8000025c: 02c585b3 mul a1,a1,a2 -80000260: 40b10133 sub sp,sp,a1 -80000264: cc3026f3 csrr a3,0xcc3 -80000268: 00068663 beqz a3,80000274 -8000026c: 00000513 li a0,0 -80000270: 0005006b 0x5006b +8000022c : +8000022c: fc002573 csrr a0,0xfc0 +80000230: 0005006b 0x5006b +80000234: 00002197 auipc gp,0x2 +80000238: f6c18193 addi gp,gp,-148 # 800021a0 <__global_pointer> +8000023c: 7f000117 auipc sp,0x7f000 +80000240: dc410113 addi sp,sp,-572 # ff000000 <__stack_top> +80000244: 40000593 li a1,1024 +80000248: cc102673 csrr a2,0xcc1 +8000024c: 02c585b3 mul a1,a1,a2 +80000250: 40b10133 sub sp,sp,a1 +80000254: cc3026f3 csrr a3,0xcc3 +80000258: 00068663 beqz a3,80000264 +8000025c: 00000513 li a0,0 +80000260: 0005006b 0x5006b -80000274 : -80000274: 00008067 ret +80000264 : +80000264: 00008067 ret -80000278 : -80000278: fe010113 addi sp,sp,-32 -8000027c: 00112e23 sw ra,28(sp) -80000280: 00812c23 sw s0,24(sp) -80000284: 00912a23 sw s1,20(sp) -80000288: 01212823 sw s2,16(sp) -8000028c: 01312623 sw s3,12(sp) -80000290: fc0027f3 csrr a5,0xfc0 -80000294: 0007806b 0x7806b -80000298: cc5026f3 csrr a3,0xcc5 -8000029c: cc3029f3 csrr s3,0xcc3 -800002a0: cc002773 csrr a4,0xcc0 -800002a4: fc002673 csrr a2,0xfc0 -800002a8: 800027b7 lui a5,0x80002 -800002ac: 00269693 slli a3,a3,0x2 -800002b0: de478793 addi a5,a5,-540 # 80001de4 <__stack_top+0x81001de4> -800002b4: 00d787b3 add a5,a5,a3 -800002b8: 0007a483 lw s1,0(a5) -800002bc: 0104a403 lw s0,16(s1) -800002c0: 00c4a683 lw a3,12(s1) -800002c4: 0089a933 slt s2,s3,s0 -800002c8: 00040793 mv a5,s0 -800002cc: 00d90933 add s2,s2,a3 -800002d0: 03368433 mul s0,a3,s3 -800002d4: 00f9d463 bge s3,a5,800002dc -800002d8: 00098793 mv a5,s3 -800002dc: 00f40433 add s0,s0,a5 -800002e0: 0084a683 lw a3,8(s1) -800002e4: 02c40433 mul s0,s0,a2 -800002e8: 02e907b3 mul a5,s2,a4 -800002ec: 00d40433 add s0,s0,a3 -800002f0: 00f40433 add s0,s0,a5 -800002f4: 00890933 add s2,s2,s0 -800002f8: 01245e63 bge s0,s2,80000314 -800002fc: 0004a783 lw a5,0(s1) -80000300: 0044a583 lw a1,4(s1) -80000304: 00040513 mv a0,s0 -80000308: 00140413 addi s0,s0,1 -8000030c: 000780e7 jalr a5 -80000310: fe8916e3 bne s2,s0,800002fc -80000314: 0019b993 seqz s3,s3 -80000318: 0009806b 0x9806b -8000031c: 01c12083 lw ra,28(sp) -80000320: 01812403 lw s0,24(sp) -80000324: 01412483 lw s1,20(sp) -80000328: 01012903 lw s2,16(sp) -8000032c: 00c12983 lw s3,12(sp) -80000330: 02010113 addi sp,sp,32 -80000334: 00008067 ret +80000268 : +80000268: fe010113 addi sp,sp,-32 +8000026c: 00112e23 sw ra,28(sp) +80000270: 00812c23 sw s0,24(sp) +80000274: 00912a23 sw s1,20(sp) +80000278: 01212823 sw s2,16(sp) +8000027c: 01312623 sw s3,12(sp) +80000280: fc0027f3 csrr a5,0xfc0 +80000284: 0007806b 0x7806b +80000288: cc5026f3 csrr a3,0xcc5 +8000028c: cc3029f3 csrr s3,0xcc3 +80000290: cc002773 csrr a4,0xcc0 +80000294: fc002673 csrr a2,0xfc0 +80000298: 800027b7 lui a5,0x80002 +8000029c: 00269693 slli a3,a3,0x2 +800002a0: dd478793 addi a5,a5,-556 # 80001dd4 <__stack_top+0x81001dd4> +800002a4: 00d787b3 add a5,a5,a3 +800002a8: 0007a483 lw s1,0(a5) +800002ac: 0104a403 lw s0,16(s1) +800002b0: 00c4a683 lw a3,12(s1) +800002b4: 0089a933 slt s2,s3,s0 +800002b8: 00040793 mv a5,s0 +800002bc: 00d90933 add s2,s2,a3 +800002c0: 03368433 mul s0,a3,s3 +800002c4: 00f9d463 bge s3,a5,800002cc +800002c8: 00098793 mv a5,s3 +800002cc: 00f40433 add s0,s0,a5 +800002d0: 0084a683 lw a3,8(s1) +800002d4: 02c40433 mul s0,s0,a2 +800002d8: 02e907b3 mul a5,s2,a4 +800002dc: 00d40433 add s0,s0,a3 +800002e0: 00f40433 add s0,s0,a5 +800002e4: 00890933 add s2,s2,s0 +800002e8: 01245e63 bge s0,s2,80000304 +800002ec: 0004a783 lw a5,0(s1) +800002f0: 0044a583 lw a1,4(s1) +800002f4: 00040513 mv a0,s0 +800002f8: 00140413 addi s0,s0,1 +800002fc: 000780e7 jalr a5 +80000300: fe8916e3 bne s2,s0,800002ec +80000304: 0019b993 seqz s3,s3 +80000308: 0009806b 0x9806b +8000030c: 01c12083 lw ra,28(sp) +80000310: 01812403 lw s0,24(sp) +80000314: 01412483 lw s1,20(sp) +80000318: 01012903 lw s2,16(sp) +8000031c: 00c12983 lw s3,12(sp) +80000320: 02010113 addi sp,sp,32 +80000324: 00008067 ret -80000338 : -80000338: fc010113 addi sp,sp,-64 -8000033c: 02112e23 sw ra,60(sp) -80000340: 02812c23 sw s0,56(sp) -80000344: 02912a23 sw s1,52(sp) -80000348: 03212823 sw s2,48(sp) -8000034c: 03312623 sw s3,44(sp) -80000350: fc2026f3 csrr a3,0xfc2 -80000354: fc102873 csrr a6,0xfc1 -80000358: fc002473 csrr s0,0xfc0 -8000035c: cc5027f3 csrr a5,0xcc5 -80000360: 01f00713 li a4,31 -80000364: 0cf74463 blt a4,a5,8000042c -80000368: 030408b3 mul a7,s0,a6 -8000036c: 00100713 li a4,1 -80000370: 00a8d463 bge a7,a0,80000378 -80000374: 03154733 div a4,a0,a7 -80000378: 0ce6c863 blt a3,a4,80000448 -8000037c: 0ae7d863 bge a5,a4,8000042c -80000380: fff68693 addi a3,a3,-1 -80000384: 02e54333 div t1,a0,a4 -80000388: 00030893 mv a7,t1 -8000038c: 00f69663 bne a3,a5,80000398 -80000390: 02e56533 rem a0,a0,a4 -80000394: 006508b3 add a7,a0,t1 -80000398: 0288c4b3 div s1,a7,s0 -8000039c: 0288e933 rem s2,a7,s0 -800003a0: 0b04ca63 blt s1,a6,80000454 -800003a4: 00100693 li a3,1 -800003a8: 0304c733 div a4,s1,a6 -800003ac: 00070663 beqz a4,800003b8 -800003b0: 00070693 mv a3,a4 -800003b4: 0304e733 rem a4,s1,a6 -800003b8: 800029b7 lui s3,0x80002 -800003bc: de498993 addi s3,s3,-540 # 80001de4 <__stack_top+0x81001de4> -800003c0: 00e12e23 sw a4,28(sp) -800003c4: 00c10713 addi a4,sp,12 -800003c8: 00b12623 sw a1,12(sp) -800003cc: 00c12823 sw a2,16(sp) -800003d0: 00d12c23 sw a3,24(sp) -800003d4: 02f30333 mul t1,t1,a5 -800003d8: 00279793 slli a5,a5,0x2 -800003dc: 00f987b3 add a5,s3,a5 -800003e0: 00e7a023 sw a4,0(a5) -800003e4: 00612a23 sw t1,20(sp) -800003e8: 06904c63 bgtz s1,80000460 -800003ec: 04090063 beqz s2,8000042c -800003f0: 02848433 mul s0,s1,s0 -800003f4: 00812a23 sw s0,20(sp) -800003f8: 0009006b 0x9006b -800003fc: cc5027f3 csrr a5,0xcc5 -80000400: cc202573 csrr a0,0xcc2 -80000404: 00279793 slli a5,a5,0x2 -80000408: 00f989b3 add s3,s3,a5 -8000040c: 0009a783 lw a5,0(s3) -80000410: 0087a683 lw a3,8(a5) -80000414: 0007a703 lw a4,0(a5) -80000418: 0047a583 lw a1,4(a5) -8000041c: 00d50533 add a0,a0,a3 -80000420: 000700e7 jalr a4 -80000424: 00100793 li a5,1 -80000428: 0007806b 0x7806b -8000042c: 03c12083 lw ra,60(sp) -80000430: 03812403 lw s0,56(sp) -80000434: 03412483 lw s1,52(sp) -80000438: 03012903 lw s2,48(sp) -8000043c: 02c12983 lw s3,44(sp) -80000440: 04010113 addi sp,sp,64 -80000444: 00008067 ret -80000448: 00068713 mv a4,a3 -8000044c: f2e7cae3 blt a5,a4,80000380 -80000450: fddff06f j 8000042c -80000454: 00000713 li a4,0 -80000458: 00100693 li a3,1 -8000045c: f5dff06f j 800003b8 -80000460: 00048713 mv a4,s1 -80000464: 00985463 bge a6,s1,8000046c -80000468: 00080713 mv a4,a6 -8000046c: 800007b7 lui a5,0x80000 -80000470: 27878793 addi a5,a5,632 # 80000278 <__stack_top+0x81000278> -80000474: 00f7106b 0xf7106b -80000478: e01ff0ef jal ra,80000278 -8000047c: f71ff06f j 800003ec +80000328 : +80000328: fc010113 addi sp,sp,-64 +8000032c: 02112e23 sw ra,60(sp) +80000330: 02812c23 sw s0,56(sp) +80000334: 02912a23 sw s1,52(sp) +80000338: 03212823 sw s2,48(sp) +8000033c: 03312623 sw s3,44(sp) +80000340: fc2026f3 csrr a3,0xfc2 +80000344: fc102873 csrr a6,0xfc1 +80000348: fc002473 csrr s0,0xfc0 +8000034c: cc5027f3 csrr a5,0xcc5 +80000350: 01f00713 li a4,31 +80000354: 0cf74463 blt a4,a5,8000041c +80000358: 030408b3 mul a7,s0,a6 +8000035c: 00100713 li a4,1 +80000360: 00a8d463 bge a7,a0,80000368 +80000364: 03154733 div a4,a0,a7 +80000368: 0ce6c863 blt a3,a4,80000438 +8000036c: 0ae7d863 bge a5,a4,8000041c +80000370: fff68693 addi a3,a3,-1 +80000374: 02e54333 div t1,a0,a4 +80000378: 00030893 mv a7,t1 +8000037c: 00f69663 bne a3,a5,80000388 +80000380: 02e56533 rem a0,a0,a4 +80000384: 006508b3 add a7,a0,t1 +80000388: 0288c4b3 div s1,a7,s0 +8000038c: 0288e933 rem s2,a7,s0 +80000390: 0b04ca63 blt s1,a6,80000444 +80000394: 00100693 li a3,1 +80000398: 0304c733 div a4,s1,a6 +8000039c: 00070663 beqz a4,800003a8 +800003a0: 00070693 mv a3,a4 +800003a4: 0304e733 rem a4,s1,a6 +800003a8: 800029b7 lui s3,0x80002 +800003ac: dd498993 addi s3,s3,-556 # 80001dd4 <__stack_top+0x81001dd4> +800003b0: 00e12e23 sw a4,28(sp) +800003b4: 00c10713 addi a4,sp,12 +800003b8: 00b12623 sw a1,12(sp) +800003bc: 00c12823 sw a2,16(sp) +800003c0: 00d12c23 sw a3,24(sp) +800003c4: 02f30333 mul t1,t1,a5 +800003c8: 00279793 slli a5,a5,0x2 +800003cc: 00f987b3 add a5,s3,a5 +800003d0: 00e7a023 sw a4,0(a5) +800003d4: 00612a23 sw t1,20(sp) +800003d8: 06904c63 bgtz s1,80000450 +800003dc: 04090063 beqz s2,8000041c +800003e0: 02848433 mul s0,s1,s0 +800003e4: 00812a23 sw s0,20(sp) +800003e8: 0009006b 0x9006b +800003ec: cc5027f3 csrr a5,0xcc5 +800003f0: cc202573 csrr a0,0xcc2 +800003f4: 00279793 slli a5,a5,0x2 +800003f8: 00f989b3 add s3,s3,a5 +800003fc: 0009a783 lw a5,0(s3) +80000400: 0087a683 lw a3,8(a5) +80000404: 0007a703 lw a4,0(a5) +80000408: 0047a583 lw a1,4(a5) +8000040c: 00d50533 add a0,a0,a3 +80000410: 000700e7 jalr a4 +80000414: 00100793 li a5,1 +80000418: 0007806b 0x7806b +8000041c: 03c12083 lw ra,60(sp) +80000420: 03812403 lw s0,56(sp) +80000424: 03412483 lw s1,52(sp) +80000428: 03012903 lw s2,48(sp) +8000042c: 02c12983 lw s3,44(sp) +80000430: 04010113 addi sp,sp,64 +80000434: 00008067 ret +80000438: 00068713 mv a4,a3 +8000043c: f2e7cae3 blt a5,a4,80000370 +80000440: fddff06f j 8000041c +80000444: 00000713 li a4,0 +80000448: 00100693 li a3,1 +8000044c: f5dff06f j 800003a8 +80000450: 00048713 mv a4,s1 +80000454: 00985463 bge a6,s1,8000045c +80000458: 00080713 mv a4,a6 +8000045c: 800007b7 lui a5,0x80000 +80000460: 26878793 addi a5,a5,616 # 80000268 <__stack_top+0x81000268> +80000464: 00f7106b 0xf7106b +80000468: e01ff0ef jal ra,80000268 +8000046c: f71ff06f j 800003dc -80000480 <__clzsi2>: -80000480: 000107b7 lui a5,0x10 -80000484: 02f57a63 bgeu a0,a5,800004b8 <__clzsi2+0x38> -80000488: 0ff00793 li a5,255 -8000048c: 00a7b7b3 sltu a5,a5,a0 -80000490: 00379793 slli a5,a5,0x3 -80000494: 80001737 lui a4,0x80001 -80000498: 02000693 li a3,32 -8000049c: 40f686b3 sub a3,a3,a5 -800004a0: 00f55533 srl a0,a0,a5 -800004a4: 8a870793 addi a5,a4,-1880 # 800008a8 <__stack_top+0x810008a8> -800004a8: 00a78533 add a0,a5,a0 -800004ac: 00054503 lbu a0,0(a0) -800004b0: 40a68533 sub a0,a3,a0 -800004b4: 00008067 ret -800004b8: 01000737 lui a4,0x1000 -800004bc: 01000793 li a5,16 -800004c0: fce56ae3 bltu a0,a4,80000494 <__clzsi2+0x14> -800004c4: 01800793 li a5,24 -800004c8: fcdff06f j 80000494 <__clzsi2+0x14> +80000470 <__clzsi2>: +80000470: 000107b7 lui a5,0x10 +80000474: 02f57a63 bgeu a0,a5,800004a8 <__clzsi2+0x38> +80000478: 0ff00793 li a5,255 +8000047c: 00a7b7b3 sltu a5,a5,a0 +80000480: 00379793 slli a5,a5,0x3 +80000484: 80001737 lui a4,0x80001 +80000488: 02000693 li a3,32 +8000048c: 40f686b3 sub a3,a3,a5 +80000490: 00f55533 srl a0,a0,a5 +80000494: 89870793 addi a5,a4,-1896 # 80000898 <__stack_top+0x81000898> +80000498: 00a78533 add a0,a5,a0 +8000049c: 00054503 lbu a0,0(a0) +800004a0: 40a68533 sub a0,a3,a0 +800004a4: 00008067 ret +800004a8: 01000737 lui a4,0x1000 +800004ac: 01000793 li a5,16 +800004b0: fce56ae3 bltu a0,a4,80000484 <__clzsi2+0x14> +800004b4: 01800793 li a5,24 +800004b8: fcdff06f j 80000484 <__clzsi2+0x14> -800004cc : -800004cc: 00050593 mv a1,a0 -800004d0: 00000693 li a3,0 -800004d4: 00000613 li a2,0 -800004d8: 00000513 li a0,0 -800004dc: 20c0006f j 800006e8 <__register_exitproc> +800004bc : +800004bc: 00050593 mv a1,a0 +800004c0: 00000693 li a3,0 +800004c4: 00000613 li a2,0 +800004c8: 00000513 li a0,0 +800004cc: 20c0006f j 800006d8 <__register_exitproc> -800004e0 : -800004e0: ff010113 addi sp,sp,-16 -800004e4: 00000593 li a1,0 -800004e8: 00812423 sw s0,8(sp) -800004ec: 00112623 sw ra,12(sp) -800004f0: 00050413 mv s0,a0 -800004f4: 290000ef jal ra,80000784 <__call_exitprocs> -800004f8: 800027b7 lui a5,0x80002 -800004fc: de07a503 lw a0,-544(a5) # 80001de0 <__stack_top+0x81001de0> -80000500: 03c52783 lw a5,60(a0) -80000504: 00078463 beqz a5,8000050c -80000508: 000780e7 jalr a5 -8000050c: 00040513 mv a0,s0 -80000510: d25ff0ef jal ra,80000234 <_exit> +800004d0 : +800004d0: ff010113 addi sp,sp,-16 +800004d4: 00000593 li a1,0 +800004d8: 00812423 sw s0,8(sp) +800004dc: 00112623 sw ra,12(sp) +800004e0: 00050413 mv s0,a0 +800004e4: 290000ef jal ra,80000774 <__call_exitprocs> +800004e8: 800027b7 lui a5,0x80002 +800004ec: dd07a503 lw a0,-560(a5) # 80001dd0 <__stack_top+0x81001dd0> +800004f0: 03c52783 lw a5,60(a0) +800004f4: 00078463 beqz a5,800004fc +800004f8: 000780e7 jalr a5 +800004fc: 00040513 mv a0,s0 +80000500: d25ff0ef jal ra,80000224 <_exit> -80000514 <__libc_fini_array>: -80000514: ff010113 addi sp,sp,-16 -80000518: 00812423 sw s0,8(sp) -8000051c: 800027b7 lui a5,0x80002 -80000520: 80002437 lui s0,0x80002 -80000524: 9ac40413 addi s0,s0,-1620 # 800019ac <__stack_top+0x810019ac> -80000528: 9ac78793 addi a5,a5,-1620 # 800019ac <__stack_top+0x810019ac> -8000052c: 408787b3 sub a5,a5,s0 -80000530: 00912223 sw s1,4(sp) -80000534: 00112623 sw ra,12(sp) -80000538: 4027d493 srai s1,a5,0x2 -8000053c: 02048063 beqz s1,8000055c <__libc_fini_array+0x48> -80000540: ffc78793 addi a5,a5,-4 -80000544: 00878433 add s0,a5,s0 -80000548: 00042783 lw a5,0(s0) -8000054c: fff48493 addi s1,s1,-1 -80000550: ffc40413 addi s0,s0,-4 -80000554: 000780e7 jalr a5 -80000558: fe0498e3 bnez s1,80000548 <__libc_fini_array+0x34> -8000055c: 00c12083 lw ra,12(sp) -80000560: 00812403 lw s0,8(sp) -80000564: 00412483 lw s1,4(sp) -80000568: 01010113 addi sp,sp,16 -8000056c: 00008067 ret +80000504 <__libc_fini_array>: +80000504: ff010113 addi sp,sp,-16 +80000508: 00812423 sw s0,8(sp) +8000050c: 800027b7 lui a5,0x80002 +80000510: 80002437 lui s0,0x80002 +80000514: 99c40413 addi s0,s0,-1636 # 8000199c <__stack_top+0x8100199c> +80000518: 99c78793 addi a5,a5,-1636 # 8000199c <__stack_top+0x8100199c> +8000051c: 408787b3 sub a5,a5,s0 +80000520: 00912223 sw s1,4(sp) +80000524: 00112623 sw ra,12(sp) +80000528: 4027d493 srai s1,a5,0x2 +8000052c: 02048063 beqz s1,8000054c <__libc_fini_array+0x48> +80000530: ffc78793 addi a5,a5,-4 +80000534: 00878433 add s0,a5,s0 +80000538: 00042783 lw a5,0(s0) +8000053c: fff48493 addi s1,s1,-1 +80000540: ffc40413 addi s0,s0,-4 +80000544: 000780e7 jalr a5 +80000548: fe0498e3 bnez s1,80000538 <__libc_fini_array+0x34> +8000054c: 00c12083 lw ra,12(sp) +80000550: 00812403 lw s0,8(sp) +80000554: 00412483 lw s1,4(sp) +80000558: 01010113 addi sp,sp,16 +8000055c: 00008067 ret -80000570 <__libc_init_array>: -80000570: ff010113 addi sp,sp,-16 -80000574: 00812423 sw s0,8(sp) -80000578: 01212023 sw s2,0(sp) -8000057c: 80002437 lui s0,0x80002 -80000580: 80002937 lui s2,0x80002 -80000584: 9a840793 addi a5,s0,-1624 # 800019a8 <__stack_top+0x810019a8> -80000588: 9a890913 addi s2,s2,-1624 # 800019a8 <__stack_top+0x810019a8> -8000058c: 40f90933 sub s2,s2,a5 -80000590: 00112623 sw ra,12(sp) -80000594: 00912223 sw s1,4(sp) -80000598: 40295913 srai s2,s2,0x2 -8000059c: 02090063 beqz s2,800005bc <__libc_init_array+0x4c> -800005a0: 9a840413 addi s0,s0,-1624 -800005a4: 00000493 li s1,0 -800005a8: 00042783 lw a5,0(s0) -800005ac: 00148493 addi s1,s1,1 -800005b0: 00440413 addi s0,s0,4 -800005b4: 000780e7 jalr a5 -800005b8: fe9918e3 bne s2,s1,800005a8 <__libc_init_array+0x38> -800005bc: 80002437 lui s0,0x80002 -800005c0: 80002937 lui s2,0x80002 -800005c4: 9a840793 addi a5,s0,-1624 # 800019a8 <__stack_top+0x810019a8> -800005c8: 9ac90913 addi s2,s2,-1620 # 800019ac <__stack_top+0x810019ac> -800005cc: 40f90933 sub s2,s2,a5 -800005d0: 40295913 srai s2,s2,0x2 -800005d4: 02090063 beqz s2,800005f4 <__libc_init_array+0x84> -800005d8: 9a840413 addi s0,s0,-1624 -800005dc: 00000493 li s1,0 -800005e0: 00042783 lw a5,0(s0) -800005e4: 00148493 addi s1,s1,1 -800005e8: 00440413 addi s0,s0,4 -800005ec: 000780e7 jalr a5 -800005f0: fe9918e3 bne s2,s1,800005e0 <__libc_init_array+0x70> -800005f4: 00c12083 lw ra,12(sp) -800005f8: 00812403 lw s0,8(sp) -800005fc: 00412483 lw s1,4(sp) -80000600: 00012903 lw s2,0(sp) -80000604: 01010113 addi sp,sp,16 -80000608: 00008067 ret +80000560 <__libc_init_array>: +80000560: ff010113 addi sp,sp,-16 +80000564: 00812423 sw s0,8(sp) +80000568: 01212023 sw s2,0(sp) +8000056c: 80002437 lui s0,0x80002 +80000570: 80002937 lui s2,0x80002 +80000574: 99840793 addi a5,s0,-1640 # 80001998 <__stack_top+0x81001998> +80000578: 99890913 addi s2,s2,-1640 # 80001998 <__stack_top+0x81001998> +8000057c: 40f90933 sub s2,s2,a5 +80000580: 00112623 sw ra,12(sp) +80000584: 00912223 sw s1,4(sp) +80000588: 40295913 srai s2,s2,0x2 +8000058c: 02090063 beqz s2,800005ac <__libc_init_array+0x4c> +80000590: 99840413 addi s0,s0,-1640 +80000594: 00000493 li s1,0 +80000598: 00042783 lw a5,0(s0) +8000059c: 00148493 addi s1,s1,1 +800005a0: 00440413 addi s0,s0,4 +800005a4: 000780e7 jalr a5 +800005a8: fe9918e3 bne s2,s1,80000598 <__libc_init_array+0x38> +800005ac: 80002437 lui s0,0x80002 +800005b0: 80002937 lui s2,0x80002 +800005b4: 99840793 addi a5,s0,-1640 # 80001998 <__stack_top+0x81001998> +800005b8: 99c90913 addi s2,s2,-1636 # 8000199c <__stack_top+0x8100199c> +800005bc: 40f90933 sub s2,s2,a5 +800005c0: 40295913 srai s2,s2,0x2 +800005c4: 02090063 beqz s2,800005e4 <__libc_init_array+0x84> +800005c8: 99840413 addi s0,s0,-1640 +800005cc: 00000493 li s1,0 +800005d0: 00042783 lw a5,0(s0) +800005d4: 00148493 addi s1,s1,1 +800005d8: 00440413 addi s0,s0,4 +800005dc: 000780e7 jalr a5 +800005e0: fe9918e3 bne s2,s1,800005d0 <__libc_init_array+0x70> +800005e4: 00c12083 lw ra,12(sp) +800005e8: 00812403 lw s0,8(sp) +800005ec: 00412483 lw s1,4(sp) +800005f0: 00012903 lw s2,0(sp) +800005f4: 01010113 addi sp,sp,16 +800005f8: 00008067 ret -8000060c : -8000060c: 00f00313 li t1,15 -80000610: 00050713 mv a4,a0 -80000614: 02c37e63 bgeu t1,a2,80000650 -80000618: 00f77793 andi a5,a4,15 -8000061c: 0a079063 bnez a5,800006bc -80000620: 08059263 bnez a1,800006a4 -80000624: ff067693 andi a3,a2,-16 -80000628: 00f67613 andi a2,a2,15 -8000062c: 00e686b3 add a3,a3,a4 -80000630: 00b72023 sw a1,0(a4) # 1000000 <__stack_size+0xfffc00> -80000634: 00b72223 sw a1,4(a4) -80000638: 00b72423 sw a1,8(a4) -8000063c: 00b72623 sw a1,12(a4) -80000640: 01070713 addi a4,a4,16 -80000644: fed766e3 bltu a4,a3,80000630 -80000648: 00061463 bnez a2,80000650 -8000064c: 00008067 ret -80000650: 40c306b3 sub a3,t1,a2 -80000654: 00269693 slli a3,a3,0x2 -80000658: 00000297 auipc t0,0x0 -8000065c: 005686b3 add a3,a3,t0 -80000660: 00c68067 jr 12(a3) -80000664: 00b70723 sb a1,14(a4) -80000668: 00b706a3 sb a1,13(a4) -8000066c: 00b70623 sb a1,12(a4) -80000670: 00b705a3 sb a1,11(a4) -80000674: 00b70523 sb a1,10(a4) -80000678: 00b704a3 sb a1,9(a4) -8000067c: 00b70423 sb a1,8(a4) -80000680: 00b703a3 sb a1,7(a4) -80000684: 00b70323 sb a1,6(a4) -80000688: 00b702a3 sb a1,5(a4) -8000068c: 00b70223 sb a1,4(a4) -80000690: 00b701a3 sb a1,3(a4) -80000694: 00b70123 sb a1,2(a4) -80000698: 00b700a3 sb a1,1(a4) -8000069c: 00b70023 sb a1,0(a4) -800006a0: 00008067 ret -800006a4: 0ff5f593 andi a1,a1,255 -800006a8: 00859693 slli a3,a1,0x8 -800006ac: 00d5e5b3 or a1,a1,a3 -800006b0: 01059693 slli a3,a1,0x10 -800006b4: 00d5e5b3 or a1,a1,a3 -800006b8: f6dff06f j 80000624 -800006bc: 00279693 slli a3,a5,0x2 -800006c0: 00000297 auipc t0,0x0 -800006c4: 005686b3 add a3,a3,t0 -800006c8: 00008293 mv t0,ra -800006cc: fa0680e7 jalr -96(a3) -800006d0: 00028093 mv ra,t0 -800006d4: ff078793 addi a5,a5,-16 -800006d8: 40f70733 sub a4,a4,a5 -800006dc: 00f60633 add a2,a2,a5 -800006e0: f6c378e3 bgeu t1,a2,80000650 -800006e4: f3dff06f j 80000620 +800005fc : +800005fc: 00f00313 li t1,15 +80000600: 00050713 mv a4,a0 +80000604: 02c37e63 bgeu t1,a2,80000640 +80000608: 00f77793 andi a5,a4,15 +8000060c: 0a079063 bnez a5,800006ac +80000610: 08059263 bnez a1,80000694 +80000614: ff067693 andi a3,a2,-16 +80000618: 00f67613 andi a2,a2,15 +8000061c: 00e686b3 add a3,a3,a4 +80000620: 00b72023 sw a1,0(a4) # 1000000 <__stack_size+0xfffc00> +80000624: 00b72223 sw a1,4(a4) +80000628: 00b72423 sw a1,8(a4) +8000062c: 00b72623 sw a1,12(a4) +80000630: 01070713 addi a4,a4,16 +80000634: fed766e3 bltu a4,a3,80000620 +80000638: 00061463 bnez a2,80000640 +8000063c: 00008067 ret +80000640: 40c306b3 sub a3,t1,a2 +80000644: 00269693 slli a3,a3,0x2 +80000648: 00000297 auipc t0,0x0 +8000064c: 005686b3 add a3,a3,t0 +80000650: 00c68067 jr 12(a3) +80000654: 00b70723 sb a1,14(a4) +80000658: 00b706a3 sb a1,13(a4) +8000065c: 00b70623 sb a1,12(a4) +80000660: 00b705a3 sb a1,11(a4) +80000664: 00b70523 sb a1,10(a4) +80000668: 00b704a3 sb a1,9(a4) +8000066c: 00b70423 sb a1,8(a4) +80000670: 00b703a3 sb a1,7(a4) +80000674: 00b70323 sb a1,6(a4) +80000678: 00b702a3 sb a1,5(a4) +8000067c: 00b70223 sb a1,4(a4) +80000680: 00b701a3 sb a1,3(a4) +80000684: 00b70123 sb a1,2(a4) +80000688: 00b700a3 sb a1,1(a4) +8000068c: 00b70023 sb a1,0(a4) +80000690: 00008067 ret +80000694: 0ff5f593 andi a1,a1,255 +80000698: 00859693 slli a3,a1,0x8 +8000069c: 00d5e5b3 or a1,a1,a3 +800006a0: 01059693 slli a3,a1,0x10 +800006a4: 00d5e5b3 or a1,a1,a3 +800006a8: f6dff06f j 80000614 +800006ac: 00279693 slli a3,a5,0x2 +800006b0: 00000297 auipc t0,0x0 +800006b4: 005686b3 add a3,a3,t0 +800006b8: 00008293 mv t0,ra +800006bc: fa0680e7 jalr -96(a3) +800006c0: 00028093 mv ra,t0 +800006c4: ff078793 addi a5,a5,-16 +800006c8: 40f70733 sub a4,a4,a5 +800006cc: 00f60633 add a2,a2,a5 +800006d0: f6c378e3 bgeu t1,a2,80000640 +800006d4: f3dff06f j 80000610 -800006e8 <__register_exitproc>: -800006e8: 800027b7 lui a5,0x80002 -800006ec: de07a703 lw a4,-544(a5) # 80001de0 <__stack_top+0x81001de0> -800006f0: 14872783 lw a5,328(a4) -800006f4: 04078c63 beqz a5,8000074c <__register_exitproc+0x64> -800006f8: 0047a703 lw a4,4(a5) -800006fc: 01f00813 li a6,31 -80000700: 06e84e63 blt a6,a4,8000077c <__register_exitproc+0x94> -80000704: 00271813 slli a6,a4,0x2 -80000708: 02050663 beqz a0,80000734 <__register_exitproc+0x4c> -8000070c: 01078333 add t1,a5,a6 -80000710: 08c32423 sw a2,136(t1) -80000714: 1887a883 lw a7,392(a5) -80000718: 00100613 li a2,1 -8000071c: 00e61633 sll a2,a2,a4 -80000720: 00c8e8b3 or a7,a7,a2 -80000724: 1917a423 sw a7,392(a5) -80000728: 10d32423 sw a3,264(t1) -8000072c: 00200693 li a3,2 -80000730: 02d50463 beq a0,a3,80000758 <__register_exitproc+0x70> -80000734: 00170713 addi a4,a4,1 -80000738: 00e7a223 sw a4,4(a5) -8000073c: 010787b3 add a5,a5,a6 -80000740: 00b7a423 sw a1,8(a5) -80000744: 00000513 li a0,0 -80000748: 00008067 ret -8000074c: 14c70793 addi a5,a4,332 -80000750: 14f72423 sw a5,328(a4) -80000754: fa5ff06f j 800006f8 <__register_exitproc+0x10> -80000758: 18c7a683 lw a3,396(a5) -8000075c: 00170713 addi a4,a4,1 -80000760: 00e7a223 sw a4,4(a5) -80000764: 00c6e633 or a2,a3,a2 -80000768: 18c7a623 sw a2,396(a5) -8000076c: 010787b3 add a5,a5,a6 -80000770: 00b7a423 sw a1,8(a5) -80000774: 00000513 li a0,0 -80000778: 00008067 ret -8000077c: fff00513 li a0,-1 -80000780: 00008067 ret +800006d8 <__register_exitproc>: +800006d8: 800027b7 lui a5,0x80002 +800006dc: dd07a703 lw a4,-560(a5) # 80001dd0 <__stack_top+0x81001dd0> +800006e0: 14872783 lw a5,328(a4) +800006e4: 04078c63 beqz a5,8000073c <__register_exitproc+0x64> +800006e8: 0047a703 lw a4,4(a5) +800006ec: 01f00813 li a6,31 +800006f0: 06e84e63 blt a6,a4,8000076c <__register_exitproc+0x94> +800006f4: 00271813 slli a6,a4,0x2 +800006f8: 02050663 beqz a0,80000724 <__register_exitproc+0x4c> +800006fc: 01078333 add t1,a5,a6 +80000700: 08c32423 sw a2,136(t1) +80000704: 1887a883 lw a7,392(a5) +80000708: 00100613 li a2,1 +8000070c: 00e61633 sll a2,a2,a4 +80000710: 00c8e8b3 or a7,a7,a2 +80000714: 1917a423 sw a7,392(a5) +80000718: 10d32423 sw a3,264(t1) +8000071c: 00200693 li a3,2 +80000720: 02d50463 beq a0,a3,80000748 <__register_exitproc+0x70> +80000724: 00170713 addi a4,a4,1 +80000728: 00e7a223 sw a4,4(a5) +8000072c: 010787b3 add a5,a5,a6 +80000730: 00b7a423 sw a1,8(a5) +80000734: 00000513 li a0,0 +80000738: 00008067 ret +8000073c: 14c70793 addi a5,a4,332 +80000740: 14f72423 sw a5,328(a4) +80000744: fa5ff06f j 800006e8 <__register_exitproc+0x10> +80000748: 18c7a683 lw a3,396(a5) +8000074c: 00170713 addi a4,a4,1 +80000750: 00e7a223 sw a4,4(a5) +80000754: 00c6e633 or a2,a3,a2 +80000758: 18c7a623 sw a2,396(a5) +8000075c: 010787b3 add a5,a5,a6 +80000760: 00b7a423 sw a1,8(a5) +80000764: 00000513 li a0,0 +80000768: 00008067 ret +8000076c: fff00513 li a0,-1 +80000770: 00008067 ret -80000784 <__call_exitprocs>: -80000784: fd010113 addi sp,sp,-48 -80000788: 800027b7 lui a5,0x80002 -8000078c: 01412c23 sw s4,24(sp) -80000790: de07aa03 lw s4,-544(a5) # 80001de0 <__stack_top+0x81001de0> -80000794: 03212023 sw s2,32(sp) -80000798: 02112623 sw ra,44(sp) -8000079c: 148a2903 lw s2,328(s4) -800007a0: 02812423 sw s0,40(sp) -800007a4: 02912223 sw s1,36(sp) -800007a8: 01312e23 sw s3,28(sp) -800007ac: 01512a23 sw s5,20(sp) -800007b0: 01612823 sw s6,16(sp) -800007b4: 01712623 sw s7,12(sp) -800007b8: 01812423 sw s8,8(sp) -800007bc: 04090063 beqz s2,800007fc <__call_exitprocs+0x78> -800007c0: 00050b13 mv s6,a0 -800007c4: 00058b93 mv s7,a1 -800007c8: 00100a93 li s5,1 -800007cc: fff00993 li s3,-1 -800007d0: 00492483 lw s1,4(s2) -800007d4: fff48413 addi s0,s1,-1 -800007d8: 02044263 bltz s0,800007fc <__call_exitprocs+0x78> -800007dc: 00249493 slli s1,s1,0x2 -800007e0: 009904b3 add s1,s2,s1 -800007e4: 040b8463 beqz s7,8000082c <__call_exitprocs+0xa8> -800007e8: 1044a783 lw a5,260(s1) -800007ec: 05778063 beq a5,s7,8000082c <__call_exitprocs+0xa8> -800007f0: fff40413 addi s0,s0,-1 -800007f4: ffc48493 addi s1,s1,-4 -800007f8: ff3416e3 bne s0,s3,800007e4 <__call_exitprocs+0x60> -800007fc: 02c12083 lw ra,44(sp) -80000800: 02812403 lw s0,40(sp) -80000804: 02412483 lw s1,36(sp) -80000808: 02012903 lw s2,32(sp) -8000080c: 01c12983 lw s3,28(sp) -80000810: 01812a03 lw s4,24(sp) -80000814: 01412a83 lw s5,20(sp) -80000818: 01012b03 lw s6,16(sp) -8000081c: 00c12b83 lw s7,12(sp) -80000820: 00812c03 lw s8,8(sp) -80000824: 03010113 addi sp,sp,48 -80000828: 00008067 ret -8000082c: 00492783 lw a5,4(s2) -80000830: 0044a683 lw a3,4(s1) -80000834: fff78793 addi a5,a5,-1 -80000838: 04878e63 beq a5,s0,80000894 <__call_exitprocs+0x110> -8000083c: 0004a223 sw zero,4(s1) -80000840: fa0688e3 beqz a3,800007f0 <__call_exitprocs+0x6c> -80000844: 18892783 lw a5,392(s2) -80000848: 008a9733 sll a4,s5,s0 -8000084c: 00492c03 lw s8,4(s2) -80000850: 00f777b3 and a5,a4,a5 -80000854: 02079263 bnez a5,80000878 <__call_exitprocs+0xf4> -80000858: 000680e7 jalr a3 -8000085c: 00492703 lw a4,4(s2) -80000860: 148a2783 lw a5,328(s4) -80000864: 01871463 bne a4,s8,8000086c <__call_exitprocs+0xe8> -80000868: f8f904e3 beq s2,a5,800007f0 <__call_exitprocs+0x6c> -8000086c: f80788e3 beqz a5,800007fc <__call_exitprocs+0x78> -80000870: 00078913 mv s2,a5 -80000874: f5dff06f j 800007d0 <__call_exitprocs+0x4c> -80000878: 18c92783 lw a5,396(s2) -8000087c: 0844a583 lw a1,132(s1) -80000880: 00f77733 and a4,a4,a5 -80000884: 00071c63 bnez a4,8000089c <__call_exitprocs+0x118> -80000888: 000b0513 mv a0,s6 -8000088c: 000680e7 jalr a3 -80000890: fcdff06f j 8000085c <__call_exitprocs+0xd8> -80000894: 00892223 sw s0,4(s2) -80000898: fa9ff06f j 80000840 <__call_exitprocs+0xbc> -8000089c: 00058513 mv a0,a1 -800008a0: 000680e7 jalr a3 -800008a4: fb9ff06f j 8000085c <__call_exitprocs+0xd8> +80000774 <__call_exitprocs>: +80000774: fd010113 addi sp,sp,-48 +80000778: 800027b7 lui a5,0x80002 +8000077c: 01412c23 sw s4,24(sp) +80000780: dd07aa03 lw s4,-560(a5) # 80001dd0 <__stack_top+0x81001dd0> +80000784: 03212023 sw s2,32(sp) +80000788: 02112623 sw ra,44(sp) +8000078c: 148a2903 lw s2,328(s4) +80000790: 02812423 sw s0,40(sp) +80000794: 02912223 sw s1,36(sp) +80000798: 01312e23 sw s3,28(sp) +8000079c: 01512a23 sw s5,20(sp) +800007a0: 01612823 sw s6,16(sp) +800007a4: 01712623 sw s7,12(sp) +800007a8: 01812423 sw s8,8(sp) +800007ac: 04090063 beqz s2,800007ec <__call_exitprocs+0x78> +800007b0: 00050b13 mv s6,a0 +800007b4: 00058b93 mv s7,a1 +800007b8: 00100a93 li s5,1 +800007bc: fff00993 li s3,-1 +800007c0: 00492483 lw s1,4(s2) +800007c4: fff48413 addi s0,s1,-1 +800007c8: 02044263 bltz s0,800007ec <__call_exitprocs+0x78> +800007cc: 00249493 slli s1,s1,0x2 +800007d0: 009904b3 add s1,s2,s1 +800007d4: 040b8463 beqz s7,8000081c <__call_exitprocs+0xa8> +800007d8: 1044a783 lw a5,260(s1) +800007dc: 05778063 beq a5,s7,8000081c <__call_exitprocs+0xa8> +800007e0: fff40413 addi s0,s0,-1 +800007e4: ffc48493 addi s1,s1,-4 +800007e8: ff3416e3 bne s0,s3,800007d4 <__call_exitprocs+0x60> +800007ec: 02c12083 lw ra,44(sp) +800007f0: 02812403 lw s0,40(sp) +800007f4: 02412483 lw s1,36(sp) +800007f8: 02012903 lw s2,32(sp) +800007fc: 01c12983 lw s3,28(sp) +80000800: 01812a03 lw s4,24(sp) +80000804: 01412a83 lw s5,20(sp) +80000808: 01012b03 lw s6,16(sp) +8000080c: 00c12b83 lw s7,12(sp) +80000810: 00812c03 lw s8,8(sp) +80000814: 03010113 addi sp,sp,48 +80000818: 00008067 ret +8000081c: 00492783 lw a5,4(s2) +80000820: 0044a683 lw a3,4(s1) +80000824: fff78793 addi a5,a5,-1 +80000828: 04878e63 beq a5,s0,80000884 <__call_exitprocs+0x110> +8000082c: 0004a223 sw zero,4(s1) +80000830: fa0688e3 beqz a3,800007e0 <__call_exitprocs+0x6c> +80000834: 18892783 lw a5,392(s2) +80000838: 008a9733 sll a4,s5,s0 +8000083c: 00492c03 lw s8,4(s2) +80000840: 00f777b3 and a5,a4,a5 +80000844: 02079263 bnez a5,80000868 <__call_exitprocs+0xf4> +80000848: 000680e7 jalr a3 +8000084c: 00492703 lw a4,4(s2) +80000850: 148a2783 lw a5,328(s4) +80000854: 01871463 bne a4,s8,8000085c <__call_exitprocs+0xe8> +80000858: f8f904e3 beq s2,a5,800007e0 <__call_exitprocs+0x6c> +8000085c: f80788e3 beqz a5,800007ec <__call_exitprocs+0x78> +80000860: 00078913 mv s2,a5 +80000864: f5dff06f j 800007c0 <__call_exitprocs+0x4c> +80000868: 18c92783 lw a5,396(s2) +8000086c: 0844a583 lw a1,132(s1) +80000870: 00f77733 and a4,a4,a5 +80000874: 00071c63 bnez a4,8000088c <__call_exitprocs+0x118> +80000878: 000b0513 mv a0,s6 +8000087c: 000680e7 jalr a3 +80000880: fcdff06f j 8000084c <__call_exitprocs+0xd8> +80000884: 00892223 sw s0,4(s2) +80000888: fa9ff06f j 80000830 <__call_exitprocs+0xbc> +8000088c: 00058513 mv a0,a1 +80000890: 000680e7 jalr a3 +80000894: fb9ff06f j 8000084c <__call_exitprocs+0xd8> Disassembly of section .rodata: -800008a8 <__clz_tab>: -800008a8: 0100 addi s0,sp,128 -800008aa: 0202 c.slli64 tp -800008ac: 03030303 lb t1,48(t1) -800008b0: 0404 addi s1,sp,512 -800008b2: 0404 addi s1,sp,512 -800008b4: 0404 addi s1,sp,512 -800008b6: 0404 addi s1,sp,512 -800008b8: 0505 addi a0,a0,1 -800008ba: 0505 addi a0,a0,1 -800008bc: 0505 addi a0,a0,1 -800008be: 0505 addi a0,a0,1 -800008c0: 0505 addi a0,a0,1 -800008c2: 0505 addi a0,a0,1 -800008c4: 0505 addi a0,a0,1 -800008c6: 0505 addi a0,a0,1 +80000898 <__clz_tab>: +80000898: 0100 addi s0,sp,128 +8000089a: 0202 c.slli64 tp +8000089c: 03030303 lb t1,48(t1) +800008a0: 0404 addi s1,sp,512 +800008a2: 0404 addi s1,sp,512 +800008a4: 0404 addi s1,sp,512 +800008a6: 0404 addi s1,sp,512 +800008a8: 0505 addi a0,a0,1 +800008aa: 0505 addi a0,a0,1 +800008ac: 0505 addi a0,a0,1 +800008ae: 0505 addi a0,a0,1 +800008b0: 0505 addi a0,a0,1 +800008b2: 0505 addi a0,a0,1 +800008b4: 0505 addi a0,a0,1 +800008b6: 0505 addi a0,a0,1 +800008b8: 0606 slli a2,a2,0x1 +800008ba: 0606 slli a2,a2,0x1 +800008bc: 0606 slli a2,a2,0x1 +800008be: 0606 slli a2,a2,0x1 +800008c0: 0606 slli a2,a2,0x1 +800008c2: 0606 slli a2,a2,0x1 +800008c4: 0606 slli a2,a2,0x1 +800008c6: 0606 slli a2,a2,0x1 800008c8: 0606 slli a2,a2,0x1 800008ca: 0606 slli a2,a2,0x1 800008cc: 0606 slli a2,a2,0x1 @@ -620,14 +624,10 @@ Disassembly of section .rodata: 800008d2: 0606 slli a2,a2,0x1 800008d4: 0606 slli a2,a2,0x1 800008d6: 0606 slli a2,a2,0x1 -800008d8: 0606 slli a2,a2,0x1 -800008da: 0606 slli a2,a2,0x1 -800008dc: 0606 slli a2,a2,0x1 -800008de: 0606 slli a2,a2,0x1 -800008e0: 0606 slli a2,a2,0x1 -800008e2: 0606 slli a2,a2,0x1 -800008e4: 0606 slli a2,a2,0x1 -800008e6: 0606 slli a2,a2,0x1 +800008d8: 07070707 0x7070707 +800008dc: 07070707 0x7070707 +800008e0: 07070707 0x7070707 +800008e4: 07070707 0x7070707 800008e8: 07070707 0x7070707 800008ec: 07070707 0x7070707 800008f0: 07070707 0x7070707 @@ -640,10 +640,14 @@ Disassembly of section .rodata: 8000090c: 07070707 0x7070707 80000910: 07070707 0x7070707 80000914: 07070707 0x7070707 -80000918: 07070707 0x7070707 -8000091c: 07070707 0x7070707 -80000920: 07070707 0x7070707 -80000924: 07070707 0x7070707 +80000918: 0808 addi a0,sp,16 +8000091a: 0808 addi a0,sp,16 +8000091c: 0808 addi a0,sp,16 +8000091e: 0808 addi a0,sp,16 +80000920: 0808 addi a0,sp,16 +80000922: 0808 addi a0,sp,16 +80000924: 0808 addi a0,sp,16 +80000926: 0808 addi a0,sp,16 80000928: 0808 addi a0,sp,16 8000092a: 0808 addi a0,sp,16 8000092c: 0808 addi a0,sp,16 @@ -700,61 +704,53 @@ Disassembly of section .rodata: 80000992: 0808 addi a0,sp,16 80000994: 0808 addi a0,sp,16 80000996: 0808 addi a0,sp,16 -80000998: 0808 addi a0,sp,16 -8000099a: 0808 addi a0,sp,16 -8000099c: 0808 addi a0,sp,16 -8000099e: 0808 addi a0,sp,16 -800009a0: 0808 addi a0,sp,16 -800009a2: 0808 addi a0,sp,16 -800009a4: 0808 addi a0,sp,16 -800009a6: 0808 addi a0,sp,16 Disassembly of section .init_array: -800019a8 <__init_array_start>: -800019a8: 016c addi a1,sp,140 -800019aa: 8000 0x8000 +80001998 <__init_array_start>: +80001998: 016c addi a1,sp,140 +8000199a: 8000 0x8000 Disassembly of section .data: -800019b0 : -800019b0: 0000 unimp -800019b2: 0000 unimp -800019b4: 1c9c addi a5,sp,624 -800019b6: 8000 0x8000 -800019b8: 1d04 addi s1,sp,688 -800019ba: 8000 0x8000 -800019bc: 1d6c addi a1,sp,700 -800019be: 8000 0x8000 +800019a0 : +800019a0: 0000 unimp +800019a2: 0000 unimp +800019a4: 1c8c addi a1,sp,624 +800019a6: 8000 0x8000 +800019a8: 1cf4 addi a3,sp,636 +800019aa: 8000 0x8000 +800019ac: 1d5c addi a5,sp,692 +800019ae: 8000 0x8000 ... -80001a58: 0001 nop -80001a5a: 0000 unimp -80001a5c: 0000 unimp -80001a5e: 0000 unimp -80001a60: 330e fld ft6,224(sp) -80001a62: abcd j 80002054 <__BSS_END__+0x1f0> -80001a64: 1234 addi a3,sp,296 -80001a66: e66d bnez a2,80001b50 -80001a68: deec sw a1,124(a3) -80001a6a: 0005 c.nop 1 -80001a6c: 0000000b 0xb +80001a48: 0001 nop +80001a4a: 0000 unimp +80001a4c: 0000 unimp +80001a4e: 0000 unimp +80001a50: 330e fld ft6,224(sp) +80001a52: abcd j 80002044 <__BSS_END__+0x1f0> +80001a54: 1234 addi a3,sp,296 +80001a56: e66d bnez a2,80001b40 +80001a58: deec sw a1,124(a3) +80001a5a: 0005 c.nop 1 +80001a5c: 0000000b 0xb ... Disassembly of section .sdata: -80001dd8 <__SDATA_BEGIN__>: -80001dd8: 0000 unimp -80001dda: 4980 lw s0,16(a1) -80001ddc: 0000 unimp -80001dde: 3f80 fld fs0,56(a5) +80001dc8 <__SDATA_BEGIN__>: +80001dc8: 0000 unimp +80001dca: 4980 lw s0,16(a1) +80001dcc: 0000 unimp +80001dce: 3f80 fld fs0,56(a5) -80001de0 <_global_impure_ptr>: -80001de0: 19b0 addi a2,sp,248 -80001de2: 8000 0x8000 +80001dd0 <_global_impure_ptr>: +80001dd0: 19a0 addi s0,sp,248 +80001dd2: 8000 0x8000 Disassembly of section .bss: -80001de4 : +80001dd4 : ... Disassembly of section .comment: @@ -798,7 +794,7 @@ Disassembly of section .debug_aranges: a: 0004 0x4 c: 0000 unimp e: 0000 unimp - 10: 0480 addi s0,sp,576 + 10: 0470 addi a2,sp,524 12: 8000 0x8000 14: 004c addi a1,sp,4 ... @@ -824,7 +820,7 @@ Disassembly of section .debug_info: 12: 0004 0x4 14: 5900 lw s0,48(a0) 16: 0006 c.slli zero,0x1 - 18: 8000 0x8000 + 18: 7000 flw fs0,32(s0) 1a: 0004 0x4 1c: 4c80 lw s0,24(s1) 1e: 0000 unimp @@ -2072,7 +2068,7 @@ Disassembly of section .debug_info: b80: 02c2 slli t0,t0,0x10 b82: 2501 jal 1182 <__stack_size+0xd82> b84: 0000 unimp - b86: 8000 0x8000 + b86: 7000 flw fs0,32(s0) b88: 0004 0x4 b8a: 4c80 lw s0,24(s1) b8c: 0000 unimp @@ -3317,7 +3313,7 @@ Disassembly of section .debug_info: 1736: b20d j 1058 <__stack_size+0xc58> 1738: 0f02 c.slli64 t5 173a: 0305 addi t1,t1,1 - 173c: 08a8 addi a0,sp,88 + 173c: 0898 addi a4,sp,80 173e: 8000 0x8000 ... @@ -3913,7 +3909,7 @@ Disassembly of section .debug_line: 224: 0500 addi s0,sp,640 226: 0001 nop 228: 0205 addi tp,tp,1 - 22a: 0480 addi s0,sp,576 + 22a: 0470 addi a2,sp,524 22c: 8000 0x8000 22e: 0105c203 lbu tp,16(a1) 232: 0305 addi t1,t1,1 @@ -4193,7 +4189,7 @@ Disassembly of section .debug_frame: 12: 0000 unimp 14: 0000 unimp 16: 0000 unimp - 18: 0480 addi s0,sp,576 + 18: 0470 addi a2,sp,524 1a: 8000 0x8000 1c: 004c addi a1,sp,4 ... diff --git a/driver/tests/tex_demo/kernel.elf b/driver/tests/tex_demo/kernel.elf index ea2de947..2024bf95 100755 Binary files a/driver/tests/tex_demo/kernel.elf and b/driver/tests/tex_demo/kernel.elf differ diff --git a/driver/tests/tex_demo/main.cpp b/driver/tests/tex_demo/main.cpp index 41665918..d7c98fe5 100644 --- a/driver/tests/tex_demo/main.cpp +++ b/driver/tests/tex_demo/main.cpp @@ -20,7 +20,7 @@ /////////////////////////////////////////////////////////////////////////////// const char* kernel_file = "kernel.bin"; -const char* input_file = "sample.tga"; +const char* input_file = "palette.tga"; const char* output_file = "output.tga"; float scale = 1.0f; @@ -69,7 +69,7 @@ void cleanup() { } } -int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t dst_bpp) { +int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t bpp) { // start device std::cout << "start device" << std::endl; RT_CHECK(vx_start(device)); @@ -83,14 +83,15 @@ int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); std::vector dst_pixels(buf_size); - auto buf_ptr = (int8_t*)vx_host_ptr(buffer); + auto buf_ptr = (uint8_t*)vx_host_ptr(buffer); for (uint32_t i = 0; i < buf_size; ++i) { dst_pixels[i] = buf_ptr[i]; } // save output image std::cout << "save output image" << std::endl; - RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, dst_bpp)); + dump_image(dst_pixels, width, height, bpp); + RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, bpp)); return 0; } @@ -106,6 +107,7 @@ int main(int argc, char *argv[]) { parse_args(argc, argv); RT_CHECK(LoadTGA(input_file, src_pixels, &src_width, &src_height, &src_bpp)); + dump_image(src_pixels, src_width, src_height, src_bpp); uint32_t src_bufsize = src_bpp * src_width * src_height; uint32_t dst_width = (uint32_t)(src_width * scale); @@ -122,9 +124,9 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads)); - uint32_t num_tasks = max_cores * max_warps * max_threads; + uint32_t num_tasks = max_cores * max_warps * max_threads / 4; - std::cout << "number of tasks: " << num_tasks << std::endl; + std::cout << "number of tasks: " << std::dec << num_tasks << std::endl; std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl; std::cout << "destination buffer: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl; @@ -138,8 +140,8 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr)); RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr)); - std::cout << "src_addr=" << std::hex << src_addr << std::endl; - std::cout << "dst_addr=" << std::hex << dst_addr << std::endl; + std::cout << "src_addr=0x" << std::hex << src_addr << std::endl; + std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl; // allocate staging shared memory std::cout << "allocate shared memory" << std::endl; @@ -154,13 +156,13 @@ int main(int argc, char *argv[]) { kernel_arg.src_width = src_width; kernel_arg.src_height = src_height; kernel_arg.src_stride = src_bpp; - kernel_arg.src_pitch = src_bpp * src_width * src_height; + kernel_arg.src_pitch = src_bpp * src_width; kernel_arg.src_ptr = src_addr; kernel_arg.dst_width = dst_width; kernel_arg.dst_height = dst_height; kernel_arg.dst_stride = dst_bpp; - kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height; + kernel_arg.dst_pitch = dst_bpp * dst_width; kernel_arg.dst_ptr = dst_addr; auto buf_ptr = (int*)vx_host_ptr(buffer); diff --git a/driver/tests/tex_demo/palette.tga b/driver/tests/tex_demo/palette.tga new file mode 100644 index 00000000..2653f445 Binary files /dev/null and b/driver/tests/tex_demo/palette.tga differ diff --git a/driver/tests/tex_demo/sample.tga b/driver/tests/tex_demo/sample.tga deleted file mode 100755 index 1b04c925..00000000 Binary files a/driver/tests/tex_demo/sample.tga and /dev/null differ diff --git a/driver/tests/tex_demo/toad.tga b/driver/tests/tex_demo/toad.tga new file mode 100644 index 00000000..1903c5c3 Binary files /dev/null and b/driver/tests/tex_demo/toad.tga differ diff --git a/driver/tests/tex_demo/utils.cpp b/driver/tests/tex_demo/utils.cpp index 9b36c510..45d691d9 100644 --- a/driver/tests/tex_demo/utils.cpp +++ b/driver/tests/tex_demo/utils.cpp @@ -1,5 +1,6 @@ #include "utils.h" #include +#include struct __attribute__((__packed__)) tga_header_t { int8_t idlength; @@ -108,8 +109,35 @@ int SaveTGA(const char *filename, header.bitsperpixel = bpp * 8; header.imagedescriptor = 0; - ofs.write(reinterpret_cast(&header), sizeof(tga_header_t)); - ofs.write((const char*)pixels.data(), pixels.size()); + ofs.write(reinterpret_cast(&header), sizeof(tga_header_t)); + + uint32_t pitch = bpp * width; + const uint8_t* pixel_bytes = pixels.data() + (height - 1) * pitch; + for (uint32_t y = 0; y < height; ++y) { + const uint8_t* pixel_row = pixel_bytes; + for (uint32_t x = 0; x < width; ++x) { + ofs.write((const char*)pixel_row, bpp); + pixel_row += bpp; + } + pixel_bytes -= pitch; + } return 0; +} + +void dump_image(const std::vector& pixels, uint32_t width, uint32_t height, uint32_t bpp) { + assert(width * height * bpp == pixels.size()); + const uint8_t* pixel_bytes = pixels.data(); + for (uint32_t y = 0; y < height; ++y) { + for (uint32_t x = 0; x < width; ++x) { + uint32_t pixel32 = 0; + for (uint32_t b = 0; b < bpp; ++b) { + uint32_t pixel8 = *pixel_bytes++; + pixel32 |= pixel8 << (b * 8); + } + if (x) std::cout << ", "; + std::cout << std::hex << pixel32; + } + std::cout << std::endl; + } } \ No newline at end of file diff --git a/driver/tests/tex_demo/utils.h b/driver/tests/tex_demo/utils.h index b1d15e3b..24794a8f 100644 --- a/driver/tests/tex_demo/utils.h +++ b/driver/tests/tex_demo/utils.h @@ -12,4 +12,6 @@ int SaveTGA(const char *filename, const std::vector &pixels, uint32_t width, uint32_t height, - uint32_t bpp); \ No newline at end of file + uint32_t bpp); + +void dump_image(const std::vector& pixels, uint32_t width, uint32_t height, uint32_t bpp); \ No newline at end of file diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 37300527..5f50e106 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -183,19 +183,46 @@ module VX_issue #( `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (alu_req_if.valid && alu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data); + $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=", + $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd); + `PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS); + $write(", rs2_data="); + `PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS); + $write("\n"); end if (lsu_req_if.valid && lsu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); + $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=", + $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset); + `PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS); + $write(", data="); + `PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS); + $write("\n"); end if (csr_req_if.valid && csr_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data); + $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=", + $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr); + `PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS); + $write("\n"); end if (fpu_req_if.valid && fpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); + $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=", + $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd); + `PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS); + $write(", rs2_data="); + `PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS); + $write(", rs3_data="); + `PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS); + $write("\n"); end if (gpu_req_if.valid && gpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data); + $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=", + $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd); + `PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS); + $write(", rs2_data="); + `PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS); + $write(", rs3_data="); + `PRINT_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS); + $write("\n"); end end `endif diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index ca1fbc79..bcf1dafe 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -81,4 +81,24 @@ `define LTRIM(x, s) x[s-1:0] +`define PRINT_ARRAY1D(a, m) \ + $write("{"); \ + for (integer i = (m-1); i >= 0; --i) begin \ + if (i != (m-1)) $write(", "); \ + $write("0x%0h", a[i]); \ + end \ + $write("}"); \ + +`define PRINT_ARRAY2D(a, m, n) \ + $write("{"); \ + for (integer i = n-1; i >= 0; --i) begin \ + $write("{"); \ + for (integer j = (m-1); j >= 0; --j) begin \ + if (j != (m-1)) $write(", "); \ + $write("0x%0h", a[i][j]); \ + end \ + $write("}"); \ + end \ + $write("}") + `endif \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index 98ac31c5..1a798213 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -151,4 +151,15 @@ module VX_tex_addr #( assign ready_in = ~stall_out; + `ifdef DBG_PRINT_TEX + always @(posedge clk) begin + if (mem_req_valid && mem_req_ready) begin + $write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, filter=%0d, tride=%0d, addr=", + $time, CORE_ID, mem_req_wid, mem_req_PC, mem_req_tmask, mem_req_filter, mem_req_stride); + `PRINT_ARRAY2D(mem_req_addr, 4, `NUM_THREADS); + $write("\n"); + end + end +`endif + endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_bilerp.v b/hw/rtl/tex_unit/VX_tex_bilerp.v index 35d6ec47..0388dbfe 100644 --- a/hw/rtl/tex_unit/VX_tex_bilerp.v +++ b/hw/rtl/tex_unit/VX_tex_bilerp.v @@ -41,7 +41,6 @@ module VX_tex_bilerp #( `UNUSED_VAR (V_lerp[63:56]) - always @(*) begin if (color_enable[3]==1'b1) //R sampled_r[31:24] = V_lerp[55:48]; diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index e7573b1c..1b5f4fff 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -166,7 +166,8 @@ module VX_tex_memory #( // Dcache Response reg [3:0][`NUM_THREADS-1:0][31:0] rsp_texels, rsp_texels_n; - reg [`NUM_THREADS-1:0][31:0] rsp_cur_data; + wire [`NUM_THREADS-1:0][3:0][31:0] rsp_texels_qual; + reg [`NUM_THREADS-1:0][31:0] rsp_data_qual; reg [RSP_CTR_W-1:0] rsp_rem_ctr; wire [`NUM_THREADS-1:0] rsp_cur_tmask; wire [RSP_CTR_W-1:0] rsp_max_cnt; @@ -187,8 +188,9 @@ module VX_tex_memory #( assign rsp_max_cnt = $countones(q_req_tmask) * (q_req_filter ? 4 : 1); - for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [31:0] src_data = (i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; + for (genvar i = 0; i < `NUM_THREADS; i++) begin + wire [31:0] src_mask = {32{dcache_rsp_if.valid[i]}}; + wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]) & src_mask; reg [31:0] rsp_data_shifted; always @(*) begin @@ -199,16 +201,16 @@ module VX_tex_memory #( always @(*) begin case (q_req_stride) - 0: rsp_cur_data[i] = 32'(rsp_data_shifted[7:0]); - 1: rsp_cur_data[i] = 32'(rsp_data_shifted[15:0]); - default: rsp_cur_data[i] = rsp_data_shifted; + 0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]); + 1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]); + default: rsp_data_qual[i] = rsp_data_shifted; endcase end end always @(*) begin rsp_texels_n = rsp_texels; - rsp_texels_n[rsp_texel_idx] |= rsp_cur_data; + rsp_texels_n[rsp_texel_idx] |= rsp_data_qual; end always @(posedge clk) begin @@ -231,6 +233,12 @@ module VX_tex_memory #( end end + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar j = 0; j < 4; ++j) begin + assign rsp_texels_qual[i][j] = rsp_texels_n[j][i]; + end + end + wire stall_out = rsp_valid && ~rsp_ready; wire rsp_texels_done = dcache_rsp_fire && (rsp_rem_ctr == RSP_CTR_W'(rsp_cur_cnt)); @@ -244,8 +252,8 @@ module VX_tex_memory #( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, rsp_texels_n, q_req_info}), - .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_data, rsp_info}) + .data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, rsp_texels_qual, q_req_info}), + .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_data, rsp_info}) ); // Can accept new cache response? @@ -254,12 +262,22 @@ module VX_tex_memory #( `ifdef DBG_PRINT_TEX always @(posedge clk) begin if ((| dcache_req_fire)) begin - $display("%t: T$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, is_dup=%b", - $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_addr, dcache_req_if.tag, req_texel_dup); + $write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=", + $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag); + `PRINT_ARRAY1D(req_texel_addr, `NUM_THREADS); + $write(", is_dup=%b\n", req_texel_dup); end if (dcache_rsp_fire) begin - $display("%t: T$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, data=%0h, is_dup=%b", - $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_PC, dcache_rsp_if.tag, dcache_rsp_if.data, rsp_texel_dup); + $write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=", + $time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag); + `PRINT_ARRAY1D(rsp_data_qual, `NUM_THREADS); + $write("\n"); + end + if (rsp_valid && rsp_ready) begin + $write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, filter=%0d, data=", + $time, CORE_ID, rsp_wid, rsp_PC, rsp_filter); + `PRINT_ARRAY2D(rsp_data, 4, `NUM_THREADS); + $write("\n"); end end `endif diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index 594f12c6..9bfb16b0 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -38,7 +38,7 @@ module VX_tex_sampler #( wire stall_out; - for (genvar i = 0; i<`NUM_THREADS ;i++ ) begin + for (genvar i = 0; i < `NUM_THREADS; i++) begin wire [3:0][63:0] formatted_data; wire [`NUM_COLOR_CHANNEL-1:0] color_enable; @@ -57,24 +57,23 @@ module VX_tex_sampler #( VX_tex_bilerp #( .CORE_ID (CORE_ID) ) tex_bilerp ( - .blendU(req_u[i][`BLEND_FRAC_64-1:0]), //blendU - .blendV(req_v[i][`BLEND_FRAC_64-1:0]), //blendV + .blendU (req_u[i][`BLEND_FRAC_64-1:0]), + .blendV (req_v[i][`BLEND_FRAC_64-1:0]), - .color_enable(color_enable), - .texels(formatted_data), - - .sampled_data(req_data_bilerp[i]) + .color_enable (color_enable), + .texels (formatted_data), + + .sampled_data (req_data_bilerp[i]) ); end - for (genvar i = 0;i<`NUM_THREADS ;i++ ) begin + for (genvar i = 0; i < `NUM_THREADS; i++) begin assign req_data[i] = (req_filter == `TEX_FILTER_BITS'(0)) ? req_texels[i][0] : req_data_bilerp[i]; end - assign stall_out = ~rsp_ready; - assign req_ready = rsp_ready; - + assign stall_out = rsp_valid && ~rsp_ready; + VX_pipe_register #( .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .RESETW (1) @@ -86,4 +85,7 @@ module VX_tex_sampler #( .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}) ); + // can accept new request? + assign req_ready = ~stall_out; + endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index 69b3b35b..094c595e 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -41,16 +41,28 @@ module VX_tex_unit #( always @(posedge clk) begin if (tex_csr_if.write_enable) begin case (tex_csr_if.write_addr) - `CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0]; - `CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; - `CSR_TEX_WRAP(i) : begin + `CSR_TEX_ADDR(i) : begin + tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0]; + end + `CSR_TEX_FORMAT(i) : begin + tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; + end + `CSR_TEX_WRAP(i) : begin tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS]; tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS]; end - `CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; - `CSR_TEX_MIPOFF(i) : tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; - `CSR_TEX_WIDTH(i) : tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0]; - `CSR_TEX_HEIGHT(i) : tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0]; + `CSR_TEX_FILTER(i) : begin + tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; + end + `CSR_TEX_MIPOFF(i) : begin + tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; + end + `CSR_TEX_WIDTH(i) : begin + tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0]; + end + `CSR_TEX_HEIGHT(i) : begin + tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0]; + end default: assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0) && tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)); @@ -212,22 +224,34 @@ module VX_tex_unit #( ); `ifdef DBG_PRINT_TEX - for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin - always @(posedge clk) begin + for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin + always @(posedge clk) begin if (tex_csr_if.write_enable && (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i) && tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin - $display("%t: core%0d-tex_unit: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]); - $display("%t: core%0d-tex_unit: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]); - $display("%t: core%0d-tex_unit: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]); - $display("%t: core%0d-tex_unit: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]); - $display("%t: core%0d-tex_unit: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]); - $display("%t: core%0d-tex_unit: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]); - $display("%t: core%0d-tex_unit: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]); - $display("%t: core%0d-tex_unit: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]); + $display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]); + $display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]); + $display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]); + $display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]); + $display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]); + $display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]); + $display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]); + $display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]); end end end + always @(posedge clk) begin + if (tex_req_if.valid && tex_req_if.ready) begin + $display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, u=%0h, v=%0h, lod=%0h", + $time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.u, tex_req_if.v, tex_req_if.lod); + end + if (tex_rsp_if.valid && tex_rsp_if.ready) begin + $write("%t: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=", + $time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask); + `PRINT_ARRAY1D(tex_rsp_if.data, `NUM_THREADS); + $write("\n"); + end + end `endif endmodule \ No newline at end of file