From bbb237391962f50bf60523dd8ceb2f1599b7cf89 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Fri, 1 Nov 2019 21:53:37 -0400 Subject: [PATCH] Intrinsics: tests for TMC+Control Divergence --- rtl/VX_execute_unit.v | 12 +- rtl/VX_fetch.v | 1 + rtl/VX_gpgpu_inst.v | 3 +- rtl/VX_warp_scheduler.v | 18 +- rtl/cache/VX_Cache_Bank.v | 19 +- rtl/interfaces/VX_warp_ctl_inter.v | 1 + rtl/modelsim/vortex_dpi.cpp | 2 +- runtime/intrinsics/vx_intrinsics.h | 10 + runtime/intrinsics/vx_intrinsics.s | 1 + runtime/startup/vx_start.s | 78 +++-- runtime/vortex_runtime.dump | 502 +++++++++++++++++++---------- runtime/vortex_runtime.elf | Bin 9548 -> 9644 bytes runtime/vortex_runtime.hex | 112 ++++--- runtime/vx_main.c | 89 ++++- 14 files changed, 588 insertions(+), 260 deletions(-) diff --git a/rtl/VX_execute_unit.v b/rtl/VX_execute_unit.v index aecc38dd..56be1bdb 100644 --- a/rtl/VX_execute_unit.v +++ b/rtl/VX_execute_unit.v @@ -60,15 +60,15 @@ module VX_execute_unit ( endgenerate - wire [$clog2(`NT)-1:0] branch_use_index; - wire branch_found_valid; + wire [$clog2(`NT)-1:0] jal_branch_use_index; + wire jal_branch_found_valid; VX_generic_priority_encoder #(.N(`NT)) choose_alu_result( .valids(VX_exec_unit_req.valid), - .index (branch_use_index), - .found (branch_found_valid) + .index (jal_branch_use_index), + .found (jal_branch_found_valid) ); - wire[31:0] branch_use_alu_result = alu_result[branch_use_index]; + wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index]; reg temp_branch_dir; always @(*) @@ -104,7 +104,7 @@ module VX_execute_unit ( // Jal rsp assign VX_jal_rsp.jal = in_jal; - assign VX_jal_rsp.jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset); + assign VX_jal_rsp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset); assign VX_jal_rsp.jal_warp_num = VX_exec_unit_req.warp_num; // Branch rsp diff --git a/rtl/VX_fetch.v b/rtl/VX_fetch.v index f584f0dc..5c101c6f 100644 --- a/rtl/VX_fetch.v +++ b/rtl/VX_fetch.v @@ -57,6 +57,7 @@ module VX_fetch ( // Split .is_split (VX_warp_ctl.is_split), + .dont_split (VX_warp_ctl.dont_split), .split_new_mask (VX_warp_ctl.split_new_mask), .split_later_mask (VX_warp_ctl.split_later_mask), .split_save_pc (VX_warp_ctl.split_save_pc), diff --git a/rtl/VX_gpgpu_inst.v b/rtl/VX_gpgpu_inst.v index 6069be63..01a50515 100644 --- a/rtl/VX_gpgpu_inst.v +++ b/rtl/VX_gpgpu_inst.v @@ -71,7 +71,8 @@ module VX_gpgpu_inst ( // wire[`NW_M1:0] num_valids = $countones(curr_valids); - assign VX_warp_ctl.is_split = is_split && (num_valids > 1) && (split_new_use_mask != 0) && (split_new_use_mask != {`NT{1'b1}}); + assign VX_warp_ctl.is_split = is_split && (num_valids > 1); + assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}})); assign VX_warp_ctl.split_new_mask = split_new_use_mask; assign VX_warp_ctl.split_later_mask = split_new_later_mask; assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next; diff --git a/rtl/VX_warp_scheduler.v b/rtl/VX_warp_scheduler.v index 016e9c91..5837135a 100644 --- a/rtl/VX_warp_scheduler.v +++ b/rtl/VX_warp_scheduler.v @@ -29,6 +29,7 @@ module VX_warp_scheduler ( // Split input wire is_split, + input wire dont_split, input wire[`NT_M1:0] split_new_mask, input wire[`NT_M1:0] split_later_mask, input wire[31:0] split_save_pc, @@ -104,6 +105,8 @@ module VX_warp_scheduler ( reg[`NW-1:0] total_barrier_stall; + reg didnt_split; + /* verilator lint_off UNUSED */ // wire[$clog2(`NW):0] num_active; /* verilator lint_on UNUSED */ @@ -122,6 +125,7 @@ module VX_warp_scheduler ( visible_active[0] <= 1; // Activating first warp thread_masks[0] <= 1; // Activating first thread in first warp warp_stalled <= 0; + didnt_split <= 0; // total_barrier_stall = 0; for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin warp_pcs[curr_w_help] <= 0; @@ -148,14 +152,20 @@ module VX_warp_scheduler ( end else if (ctm) begin thread_masks[ctm_warp_num] <= ctm_mask; warp_stalled[ctm_warp_num] <= 0; - end else if (is_join) begin + end else if (is_join && !didnt_split) begin if (!join_fall) begin warp_pcs[join_warp_num] <= join_pc; end thread_masks[join_warp_num] <= join_tm; + didnt_split <= 0; end else if (is_split) begin warp_stalled[split_warp_num] <= 0; - thread_masks[split_warp_num] <= split_new_mask; + if (!dont_split) begin + thread_masks[split_warp_num] <= split_new_mask; + didnt_split <= 0; + end else begin + didnt_split <= 1; + end end if (whalt) begin @@ -243,9 +253,9 @@ module VX_warp_scheduler ( wire correct_warp_s = (curr_warp == split_warp_num); wire correct_warp_j = (curr_warp == join_warp_num); - wire push = is_split && correct_warp_s; + wire push = (is_split && !dont_split) && correct_warp_s; wire pop = is_join && correct_warp_j; - VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT))) ipdom_stack( + VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack( .clk (clk), .reset(reset), .push (push), diff --git a/rtl/cache/VX_Cache_Bank.v b/rtl/cache/VX_Cache_Bank.v index 1ffafc0a..58163593 100644 --- a/rtl/cache/VX_Cache_Bank.v +++ b/rtl/cache/VX_Cache_Bank.v @@ -149,6 +149,23 @@ module VX_Cache_Bank wire[31:0] lhu_data = (data_unQual & 32'hFFFF); wire[31:0] lw_data = (data_unQual); + + wire[31:0] sw_data = writedata; + + wire[31:0] sb_data = b1 ? {{16{1'b0}}, writedata[7:0], { 8{1'b0}}} : + b2 ? {{ 8{1'b0}}, writedata[7:0], {16{1'b0}}} : + b3 ? {{ 0{1'b0}}, writedata[7:0], {24{1'b0}}} : + writedata; + + wire[31:0] sh_data = b2 ? {writedata[15:0], {16{1'b0}}} : writedata; + + + + wire[31:0] use_write_data = sb ? sb_data : + sh ? sh_data : + sw_data; + + wire[31:0] data_Qual = lb ? lb_data : lh ? lh_data : lhu ? lhu_data : @@ -177,7 +194,7 @@ module VX_Cache_Bank // assign we[g] = (normal_write || (write_from_mem)) ? 1'b1 : 1'b0; - assign data_write[g] = write_from_mem ? fetched_writedata[g] : writedata; + assign data_write[g] = write_from_mem ? fetched_writedata[g] : use_write_data; assign way_to_update = write_from_mem ? evicted_way : update_way; end diff --git a/rtl/interfaces/VX_warp_ctl_inter.v b/rtl/interfaces/VX_warp_ctl_inter.v index 958f585b..53dec2a1 100644 --- a/rtl/interfaces/VX_warp_ctl_inter.v +++ b/rtl/interfaces/VX_warp_ctl_inter.v @@ -23,6 +23,7 @@ interface VX_warp_ctl_inter (); wire[$clog2(`NW):0] num_warps; wire is_split; + wire dont_split; wire[`NW_M1:0] split_warp_num; wire[`NT_M1:0] split_new_mask; wire[`NT_M1:0] split_later_mask; diff --git a/rtl/modelsim/vortex_dpi.cpp b/rtl/modelsim/vortex_dpi.cpp index f17f006b..5fe66cf6 100644 --- a/rtl/modelsim/vortex_dpi.cpp +++ b/rtl/modelsim/vortex_dpi.cpp @@ -202,7 +202,7 @@ void io_handler(bool clk, bool io_valid, unsigned io_data) void gracefulExit(int cycles) { - fprintf(stderr, "\n*********************\n\n"); + fprintf(stderr, "*********************\n\n"); fprintf(stderr, "DPI Cycle Num: %d\tVerilog Cycle Num: %d\n", num_cycles, cycles); } diff --git a/runtime/intrinsics/vx_intrinsics.h b/runtime/intrinsics/vx_intrinsics.h index 0231786f..8459789e 100644 --- a/runtime/intrinsics/vx_intrinsics.h +++ b/runtime/intrinsics/vx_intrinsics.h @@ -22,3 +22,13 @@ unsigned vx_threadID(void); // Get hardware warp ID unsigned vx_warpID(void); + + + +#define __if(b) vx_split(b); \ + if (b) \ + +#define __else else + + +#define __endif vx_join(); \ No newline at end of file diff --git a/runtime/intrinsics/vx_intrinsics.s b/runtime/intrinsics/vx_intrinsics.s index 5a37c183..c2852ca7 100644 --- a/runtime/intrinsics/vx_intrinsics.s +++ b/runtime/intrinsics/vx_intrinsics.s @@ -27,6 +27,7 @@ vx_barrier: .global vx_split vx_split: .word 0x0005206b # split a0 + ret .type vx_join, @function .global vx_join diff --git a/runtime/startup/vx_start.s b/runtime/startup/vx_start.s index 1719a506..cf585b33 100644 --- a/runtime/startup/vx_start.s +++ b/runtime/startup/vx_start.s @@ -3,40 +3,64 @@ .type _start, @function .global _start _start: - li a0, 4 - .word 0x0005006b # tmc a0 - csrr a1,0x20 - slli a1, a1, 2 + # li a0, 4 + # .word 0x0005006b # tmc a0 + # csrr a1,0x20 + # slli a1, a1, 2 - la a2, 0x20000000 - add a2, a2, a1 - sw a1, 0(a2) + # la a2, 0x20000000 + # add a2, a2, a1 + # sw a1, 0(a2) - la a2, 0x40000000 - add a2, a2, a1 - li a3, 5 - sw a3, 0(a2) + # la a2, 0x40000000 + # add a2, a2, a1 + # li a3, 5 + # sw a3, 0(a2) - la a2, 0x80000000 - add a2, a2, a1 - li a3, 7 - sw a3, 0(a2) + # la a2, 0x80000000 + # add a2, a2, a1 + # li a3, 7 + # sw a3, 0(a2) - la a2, 0x60000000 - add a2, a2, a1 - li a3, 7 - sw a3, 0(a2) + # la a2, 0x60000000 + # add a2, a2, a1 + # li a3, 7 + # sw a3, 0(a2) - la a2, 0x20000000 - add a2, a2, a1 - lw a4, 0(a2) - li a0, 0 - .word 0x0005006b # tmc a0 - ########################## - # lui sp, 0x7ffff - # jal main + # la a2, 0x20000000 + # add a2, a2, a1 + # lw a4, 0(a2) # li a0, 0 # .word 0x0005006b # tmc a0 + ########################## + # li a0, 4 + # .word 0x0005006b # tmc 4 + # csrr a1,0x20 # get tid + # slli a1, a1, 10 # multiply tid by 1024 + # lui sp, 0x6ffff # load base sp + # sub sp, sp, a1 # sub sp - (1024*tid) + + # la a2, 0x20000000 + # csrr a3,0x20 # get tid + # slli a3, a3, 2 + # add a2, a2, a3 + # sw a3, 0(a2) + # lw a4, 0(a2) + + # li a0, 0 + # .word 0x0005006b # tmc a0 + ########################## + li a0, 4 + .word 0x0005006b # tmc 4 + csrr a1,0x20 # get tid + slli a1, a1, 10 # multiply tid by 1024 + lui sp, 0x6ffff # load base sp + sub sp, sp, a1 # sub sp - (1024*tid) + li a0, 4 + .word 0x0005006b # tmc 4 + jal main + li a0, 0 + .word 0x0005006b # tmc a0 diff --git a/runtime/vortex_runtime.dump b/runtime/vortex_runtime.dump index 871f1252..c7208dbf 100644 --- a/runtime/vortex_runtime.dump +++ b/runtime/vortex_runtime.dump @@ -8,140 +8,288 @@ Disassembly of section .text: 80000000: 00400513 li a0,4 80000004: 0005006b 0x5006b 80000008: 020025f3 csrr a1,0x20 -8000000c: 00259593 slli a1,a1,0x2 -80000010: 20000637 lui a2,0x20000 -80000014: 00b60633 add a2,a2,a1 -80000018: 00b62023 sw a1,0(a2) # 20000000 <_start-0x60000000> -8000001c: 40000637 lui a2,0x40000 -80000020: 00b60633 add a2,a2,a1 -80000024: 00500693 li a3,5 -80000028: 00d62023 sw a3,0(a2) # 40000000 <_start-0x40000000> -8000002c: 80000637 lui a2,0x80000 -80000030: 00b60633 add a2,a2,a1 -80000034: 00700693 li a3,7 -80000038: 00d62023 sw a3,0(a2) # 80000000 -8000003c: 60000637 lui a2,0x60000 -80000040: 00b60633 add a2,a2,a1 -80000044: 00700693 li a3,7 -80000048: 00d62023 sw a3,0(a2) # 60000000 <_start-0x20000000> -8000004c: 20000637 lui a2,0x20000 -80000050: 00b60633 add a2,a2,a1 -80000054: 00062703 lw a4,0(a2) # 20000000 <_start-0x60000000> -80000058: 00000513 li a0,0 -8000005c: 0005006b 0x5006b +8000000c: 00a59593 slli a1,a1,0xa +80000010: 6ffff137 lui sp,0x6ffff +80000014: 40b10133 sub sp,sp,a1 +80000018: 00400513 li a0,4 +8000001c: 0005006b 0x5006b +80000020: 380000ef jal ra,800003a0
+80000024: 00000513 li a0,0 +80000028: 0005006b 0x5006b -80000060 : -80000060: 00b5106b 0xb5106b -80000064: 00008067 ret +8000002c : +8000002c: 00b5106b 0xb5106b +80000030: 00008067 ret -80000068 : -80000068: 0005006b 0x5006b -8000006c: 00008067 ret +80000034 : +80000034: 0005006b 0x5006b +80000038: 00008067 ret -80000070 : -80000070: 00b5406b 0xb5406b -80000074: 00008067 ret +8000003c : +8000003c: 00b5406b 0xb5406b +80000040: 00008067 ret -80000078 : -80000078: 0005206b 0x5206b +80000044 : +80000044: 0005206b 0x5206b +80000048: 00008067 ret -8000007c : -8000007c: 0000306b 0x306b -80000080: 00008067 ret +8000004c : +8000004c: 0000306b 0x306b +80000050: 00008067 ret -80000084 : -80000084: 02102573 csrr a0,0x21 -80000088: 00008067 ret +80000054 : +80000054: 02102573 csrr a0,0x21 +80000058: 00008067 ret -8000008c : -8000008c: 02002573 csrr a0,0x20 +8000005c : +8000005c: 02002573 csrr a0,0x20 +80000060: 00008067 ret + +80000064 : +80000064: ff410113 addi sp,sp,-12 # 6fffeff4 <_start-0x1000100c> +80000068: 00112023 sw ra,0(sp) +8000006c: 00b12223 sw a1,4(sp) + +80000070 : +80000070: 00054583 lbu a1,0(a0) +80000074: 00058863 beqz a1,80000084 +80000078: 01c000ef jal ra,80000094 +8000007c: 00150513 addi a0,a0,1 +80000080: ff1ff06f j 80000070 + +80000084 : +80000084: 00012083 lw ra,0(sp) +80000088: 00412583 lw a1,4(sp) +8000008c: 00c10113 addi sp,sp,12 80000090: 00008067 ret -80000094 : -80000094: ff410113 addi sp,sp,-12 -80000098: 00112023 sw ra,0(sp) -8000009c: 00b12223 sw a1,4(sp) +80000094 : +80000094: 000102b7 lui t0,0x10 +80000098: 00b2a023 sw a1,0(t0) # 10000 <_start-0x7fff0000> +8000009c: 00008067 ret -800000a0 : -800000a0: 00054583 lbu a1,0(a0) -800000a4: 00058863 beqz a1,800000b4 -800000a8: 01c000ef jal ra,800000c4 -800000ac: 00150513 addi a0,a0,1 -800000b0: ff1ff06f j 800000a0 +800000a0 : +800000a0: fe010113 addi sp,sp,-32 +800000a4: 00112e23 sw ra,28(sp) +800000a8: 00812c23 sw s0,24(sp) +800000ac: 02010413 addi s0,sp,32 +800000b0: fea42623 sw a0,-20(s0) +800000b4: 810007b7 lui a5,0x81000 +800000b8: fec42703 lw a4,-20(s0) +800000bc: 00271713 slli a4,a4,0x2 +800000c0: 0ac78793 addi a5,a5,172 # 810000ac +800000c4: 00f707b3 add a5,a4,a5 +800000c8: 0007a783 lw a5,0(a5) +800000cc: 00078513 mv a0,a5 +800000d0: f95ff0ef jal ra,80000064 +800000d4: 00000013 nop +800000d8: 01c12083 lw ra,28(sp) +800000dc: 01812403 lw s0,24(sp) +800000e0: 02010113 addi sp,sp,32 +800000e4: 00008067 ret -800000b4 : -800000b4: 00012083 lw ra,0(sp) -800000b8: 00412583 lw a1,4(sp) -800000bc: 00c10113 addi sp,sp,12 -800000c0: 00008067 ret +800000e8 : +800000e8: fe010113 addi sp,sp,-32 +800000ec: 00112e23 sw ra,28(sp) +800000f0: 00812c23 sw s0,24(sp) +800000f4: 02010413 addi s0,sp,32 +800000f8: fea42623 sw a0,-20(s0) +800000fc: feb42423 sw a1,-24(s0) +80000100: fec42503 lw a0,-20(s0) +80000104: f61ff0ef jal ra,80000064 +80000108: fe842503 lw a0,-24(s0) +8000010c: f95ff0ef jal ra,800000a0 +80000110: 810007b7 lui a5,0x81000 +80000114: 04078513 addi a0,a5,64 # 81000040 +80000118: f4dff0ef jal ra,80000064 +8000011c: 00000013 nop +80000120: 01c12083 lw ra,28(sp) +80000124: 01812403 lw s0,24(sp) +80000128: 02010113 addi sp,sp,32 +8000012c: 00008067 ret -800000c4 : -800000c4: 000102b7 lui t0,0x10 -800000c8: 00b2a023 sw a1,0(t0) # 10000 <_start-0x7fff0000> -800000cc: 00008067 ret - -800000d0 : -800000d0: fe010113 addi sp,sp,-32 -800000d4: 00112e23 sw ra,28(sp) -800000d8: 00812c23 sw s0,24(sp) -800000dc: 02010413 addi s0,sp,32 -800000e0: fea42623 sw a0,-20(s0) -800000e4: 810007b7 lui a5,0x81000 -800000e8: fec42703 lw a4,-20(s0) -800000ec: 00271713 slli a4,a4,0x2 -800000f0: 08478793 addi a5,a5,132 # 81000084 -800000f4: 00f707b3 add a5,a4,a5 -800000f8: 0007a783 lw a5,0(a5) -800000fc: 00078513 mv a0,a5 -80000100: f95ff0ef jal ra,80000094 -80000104: 00000013 nop -80000108: 01c12083 lw ra,28(sp) -8000010c: 01812403 lw s0,24(sp) -80000110: 02010113 addi sp,sp,32 -80000114: 00008067 ret - -80000118 : -80000118: fe010113 addi sp,sp,-32 -8000011c: 00112e23 sw ra,28(sp) -80000120: 00812c23 sw s0,24(sp) -80000124: 02010413 addi s0,sp,32 -80000128: fea42623 sw a0,-20(s0) -8000012c: feb42423 sw a1,-24(s0) -80000130: fec42503 lw a0,-20(s0) -80000134: f61ff0ef jal ra,80000094 -80000138: fe842503 lw a0,-24(s0) -8000013c: f95ff0ef jal ra,800000d0 +80000130 : +80000130: fe010113 addi sp,sp,-32 +80000134: 00112e23 sw ra,28(sp) +80000138: 00812c23 sw s0,24(sp) +8000013c: 02010413 addi s0,sp,32 80000140: 810007b7 lui a5,0x81000 -80000144: 04078513 addi a0,a5,64 # 81000040 -80000148: f4dff0ef jal ra,80000094 -8000014c: 00000013 nop -80000150: 01c12083 lw ra,28(sp) -80000154: 01812403 lw s0,24(sp) -80000158: 02010113 addi sp,sp,32 -8000015c: 00008067 ret +80000144: 08478513 addi a0,a5,132 # 81000084 +80000148: f1dff0ef jal ra,80000064 +8000014c: 00400513 li a0,4 +80000150: ee5ff0ef jal ra,80000034 +80000154: f09ff0ef jal ra,8000005c +80000158: fea42623 sw a0,-20(s0) +8000015c: fec42703 lw a4,-20(s0) +80000160: 810007b7 lui a5,0x81000 +80000164: fec42683 lw a3,-20(s0) +80000168: 00269693 slli a3,a3,0x2 +8000016c: 12c78793 addi a5,a5,300 # 8100012c +80000170: 00f687b3 add a5,a3,a5 +80000174: 00e7a023 sw a4,0(a5) +80000178: 00100513 li a0,1 +8000017c: eb9ff0ef jal ra,80000034 +80000180: 810007b7 lui a5,0x81000 +80000184: 12c7a783 lw a5,300(a5) # 8100012c +80000188: 00078513 mv a0,a5 +8000018c: f15ff0ef jal ra,800000a0 +80000190: 810007b7 lui a5,0x81000 +80000194: 09078513 addi a0,a5,144 # 81000090 +80000198: ecdff0ef jal ra,80000064 +8000019c: 810007b7 lui a5,0x81000 +800001a0: 12c78793 addi a5,a5,300 # 8100012c +800001a4: 0047a783 lw a5,4(a5) +800001a8: 00078513 mv a0,a5 +800001ac: ef5ff0ef jal ra,800000a0 +800001b0: 810007b7 lui a5,0x81000 +800001b4: 09078513 addi a0,a5,144 # 81000090 +800001b8: eadff0ef jal ra,80000064 +800001bc: 810007b7 lui a5,0x81000 +800001c0: 12c78793 addi a5,a5,300 # 8100012c +800001c4: 0087a783 lw a5,8(a5) +800001c8: 00078513 mv a0,a5 +800001cc: ed5ff0ef jal ra,800000a0 +800001d0: 810007b7 lui a5,0x81000 +800001d4: 09078513 addi a0,a5,144 # 81000090 +800001d8: e8dff0ef jal ra,80000064 +800001dc: 810007b7 lui a5,0x81000 +800001e0: 12c78793 addi a5,a5,300 # 8100012c +800001e4: 00c7a783 lw a5,12(a5) +800001e8: 00078513 mv a0,a5 +800001ec: eb5ff0ef jal ra,800000a0 +800001f0: 810007b7 lui a5,0x81000 +800001f4: 09078513 addi a0,a5,144 # 81000090 +800001f8: e6dff0ef jal ra,80000064 +800001fc: 00000013 nop +80000200: 01c12083 lw ra,28(sp) +80000204: 01812403 lw s0,24(sp) +80000208: 02010113 addi sp,sp,32 +8000020c: 00008067 ret -80000160
: -80000160: fe010113 addi sp,sp,-32 -80000164: 00112e23 sw ra,28(sp) -80000168: 00812c23 sw s0,24(sp) -8000016c: 02010413 addi s0,sp,32 -80000170: 00400513 li a0,4 -80000174: ef5ff0ef jal ra,80000068 -80000178: f15ff0ef jal ra,8000008c -8000017c: fea42623 sw a0,-20(s0) -80000180: fec42703 lw a4,-20(s0) -80000184: 810007b7 lui a5,0x81000 -80000188: fec42683 lw a3,-20(s0) -8000018c: 00269693 slli a3,a3,0x2 -80000190: 10478793 addi a5,a5,260 # 81000104 -80000194: 00f687b3 add a5,a3,a5 -80000198: 00e7a023 sw a4,0(a5) -8000019c: 00000793 li a5,0 -800001a0: 00078513 mv a0,a5 -800001a4: 01c12083 lw ra,28(sp) -800001a8: 01812403 lw s0,24(sp) -800001ac: 02010113 addi sp,sp,32 -800001b0: 00008067 ret +80000210 : +80000210: fe010113 addi sp,sp,-32 +80000214: 00112e23 sw ra,28(sp) +80000218: 00812c23 sw s0,24(sp) +8000021c: 02010413 addi s0,sp,32 +80000220: e3dff0ef jal ra,8000005c +80000224: fea42623 sw a0,-20(s0) +80000228: fec42783 lw a5,-20(s0) +8000022c: 0027b793 sltiu a5,a5,2 +80000230: fef405a3 sb a5,-21(s0) +80000234: feb44783 lbu a5,-21(s0) +80000238: 00078513 mv a0,a5 +8000023c: e09ff0ef jal ra,80000044 +80000240: feb44783 lbu a5,-21(s0) +80000244: 06078463 beqz a5,800002ac +80000248: fec42783 lw a5,-20(s0) +8000024c: 0017b793 seqz a5,a5 +80000250: fef40523 sb a5,-22(s0) +80000254: fea44783 lbu a5,-22(s0) +80000258: 00078513 mv a0,a5 +8000025c: de9ff0ef jal ra,80000044 +80000260: fea44783 lbu a5,-22(s0) +80000264: 02078263 beqz a5,80000288 +80000268: 810007b7 lui a5,0x81000 +8000026c: fec42703 lw a4,-20(s0) +80000270: 00271713 slli a4,a4,0x2 +80000274: 12c78793 addi a5,a5,300 # 8100012c +80000278: 00f707b3 add a5,a4,a5 +8000027c: 00a00713 li a4,10 +80000280: 00e7a023 sw a4,0(a5) +80000284: 0200006f j 800002a4 +80000288: 810007b7 lui a5,0x81000 +8000028c: fec42703 lw a4,-20(s0) +80000290: 00271713 slli a4,a4,0x2 +80000294: 12c78793 addi a5,a5,300 # 8100012c +80000298: 00f707b3 add a5,a4,a5 +8000029c: 00b00713 li a4,11 +800002a0: 00e7a023 sw a4,0(a5) +800002a4: da9ff0ef jal ra,8000004c +800002a8: 0640006f j 8000030c +800002ac: fec42783 lw a5,-20(s0) +800002b0: 0037b793 sltiu a5,a5,3 +800002b4: fef404a3 sb a5,-23(s0) +800002b8: fe944783 lbu a5,-23(s0) +800002bc: 00078513 mv a0,a5 +800002c0: d85ff0ef jal ra,80000044 +800002c4: fe944783 lbu a5,-23(s0) +800002c8: 02078263 beqz a5,800002ec +800002cc: 810007b7 lui a5,0x81000 +800002d0: fec42703 lw a4,-20(s0) +800002d4: 00271713 slli a4,a4,0x2 +800002d8: 12c78793 addi a5,a5,300 # 8100012c +800002dc: 00f707b3 add a5,a4,a5 +800002e0: 00c00713 li a4,12 +800002e4: 00e7a023 sw a4,0(a5) +800002e8: 0200006f j 80000308 +800002ec: 810007b7 lui a5,0x81000 +800002f0: fec42703 lw a4,-20(s0) +800002f4: 00271713 slli a4,a4,0x2 +800002f8: 12c78793 addi a5,a5,300 # 8100012c +800002fc: 00f707b3 add a5,a4,a5 +80000300: 00d00713 li a4,13 +80000304: 00e7a023 sw a4,0(a5) +80000308: d45ff0ef jal ra,8000004c +8000030c: d41ff0ef jal ra,8000004c +80000310: 810007b7 lui a5,0x81000 +80000314: 12c7a783 lw a5,300(a5) # 8100012c +80000318: 00078513 mv a0,a5 +8000031c: d85ff0ef jal ra,800000a0 +80000320: 810007b7 lui a5,0x81000 +80000324: 09078513 addi a0,a5,144 # 81000090 +80000328: d3dff0ef jal ra,80000064 +8000032c: 810007b7 lui a5,0x81000 +80000330: 12c78793 addi a5,a5,300 # 8100012c +80000334: 0047a783 lw a5,4(a5) +80000338: 00078513 mv a0,a5 +8000033c: d65ff0ef jal ra,800000a0 +80000340: 810007b7 lui a5,0x81000 +80000344: 09078513 addi a0,a5,144 # 81000090 +80000348: d1dff0ef jal ra,80000064 +8000034c: 810007b7 lui a5,0x81000 +80000350: 12c78793 addi a5,a5,300 # 8100012c +80000354: 0087a783 lw a5,8(a5) +80000358: 00078513 mv a0,a5 +8000035c: d45ff0ef jal ra,800000a0 +80000360: 810007b7 lui a5,0x81000 +80000364: 09078513 addi a0,a5,144 # 81000090 +80000368: cfdff0ef jal ra,80000064 +8000036c: 810007b7 lui a5,0x81000 +80000370: 12c78793 addi a5,a5,300 # 8100012c +80000374: 00c7a783 lw a5,12(a5) +80000378: 00078513 mv a0,a5 +8000037c: d25ff0ef jal ra,800000a0 +80000380: 810007b7 lui a5,0x81000 +80000384: 09078513 addi a0,a5,144 # 81000090 +80000388: cddff0ef jal ra,80000064 +8000038c: 00000013 nop +80000390: 01c12083 lw ra,28(sp) +80000394: 01812403 lw s0,24(sp) +80000398: 02010113 addi sp,sp,32 +8000039c: 00008067 ret + +800003a0
: +800003a0: ff010113 addi sp,sp,-16 +800003a4: 00112623 sw ra,12(sp) +800003a8: 00812423 sw s0,8(sp) +800003ac: 01010413 addi s0,sp,16 +800003b0: 00100513 li a0,1 +800003b4: c81ff0ef jal ra,80000034 +800003b8: d79ff0ef jal ra,80000130 +800003bc: 810007b7 lui a5,0x81000 +800003c0: 09478513 addi a0,a5,148 # 81000094 +800003c4: ca1ff0ef jal ra,80000064 +800003c8: 00400513 li a0,4 +800003cc: c69ff0ef jal ra,80000034 +800003d0: e41ff0ef jal ra,80000210 +800003d4: 00100513 li a0,1 +800003d8: c5dff0ef jal ra,80000034 +800003dc: 00000793 li a5,0 +800003e0: 00078513 mv a0,a5 +800003e4: 00c12083 lw ra,12(sp) +800003e8: 00812403 lw s0,8(sp) +800003ec: 01010113 addi sp,sp,16 +800003f0: 00008067 ret Disassembly of section .rodata: @@ -168,7 +316,7 @@ Disassembly of section .rodata: 8100002a: 0000 unimp 8100002c: 0062 c.slli zero,0x18 8100002e: 0000 unimp -81000030: 00000063 beqz zero,81000030 +81000030: 00000063 beqz zero,81000030 81000034: 0064 addi s1,sp,12 81000036: 0000 unimp 81000038: 0065 c.nop 25 @@ -199,86 +347,104 @@ Disassembly of section .rodata: 8100006e: 0000 unimp 81000070: 0062 c.slli zero,0x18 81000072: 0000 unimp -81000074: 00000063 beqz zero,81000074 +81000074: 00000063 beqz zero,81000074 81000078: 0064 addi s1,sp,12 8100007a: 0000 unimp 8100007c: 0065 c.nop 25 8100007e: 0000 unimp 81000080: 0066 c.slli zero,0x19 +81000082: 0000 unimp +81000084: 6574 flw fa3,76(a0) +81000086: 745f7473 csrrci s0,0x745,30 +8100008a: 636d lui t1,0x1b +8100008c: 000a c.slli zero,0x2 +8100008e: 0000 unimp +81000090: 000a c.slli zero,0x2 +81000092: 0000 unimp +81000094: 6e32 flw ft8,12(sp) +81000096: 7765 lui a4,0xffff9 +81000098: 7420 flw fs0,104(s0) +8100009a: 7365 lui t1,0xffff9 +8100009c: 5f74 lw a3,124(a4) +8100009e: 6964 flw fs1,84(a0) +810000a0: 6576 flw fa0,92(sp) +810000a2: 6772 flw fa4,28(sp) +810000a4: 6e65 lui t3,0x19 +810000a6: 000a6563 bltu s4,zero,810000b0 Disassembly of section .data: -81000084 : -81000084: 0000 unimp -81000086: 8100 0x8100 -81000088: 0004 0x4 -8100008a: 8100 0x8100 -8100008c: 0008 0x8 -8100008e: 8100 0x8100 -81000090: 000c 0xc -81000092: 8100 0x8100 -81000094: 0010 0x10 -81000096: 8100 0x8100 -81000098: 0014 0x14 -8100009a: 8100 0x8100 -8100009c: 0018 0x18 -8100009e: 8100 0x8100 -810000a0: 001c 0x1c -810000a2: 8100 0x8100 -810000a4: 0020 addi s0,sp,8 -810000a6: 8100 0x8100 -810000a8: 0024 addi s1,sp,8 -810000aa: 8100 0x8100 -810000ac: 0028 addi a0,sp,8 +810000ac : +810000ac: 0000 unimp 810000ae: 8100 0x8100 -810000b0: 002c addi a1,sp,8 +810000b0: 0004 0x4 810000b2: 8100 0x8100 -810000b4: 0030 addi a2,sp,8 +810000b4: 0008 0x8 810000b6: 8100 0x8100 -810000b8: 0034 addi a3,sp,8 +810000b8: 000c 0xc 810000ba: 8100 0x8100 -810000bc: 0038 addi a4,sp,8 +810000bc: 0010 0x10 810000be: 8100 0x8100 -810000c0: 003c addi a5,sp,8 +810000c0: 0014 0x14 810000c2: 8100 0x8100 - -810000c4 : -810000c4: 0044 addi s1,sp,4 +810000c4: 0018 0x18 810000c6: 8100 0x8100 -810000c8: 0048 addi a0,sp,4 +810000c8: 001c 0x1c 810000ca: 8100 0x8100 -810000cc: 004c addi a1,sp,4 +810000cc: 0020 addi s0,sp,8 810000ce: 8100 0x8100 -810000d0: 0050 addi a2,sp,4 +810000d0: 0024 addi s1,sp,8 810000d2: 8100 0x8100 -810000d4: 0054 addi a3,sp,4 +810000d4: 0028 addi a0,sp,8 810000d6: 8100 0x8100 -810000d8: 0058 addi a4,sp,4 +810000d8: 002c addi a1,sp,8 810000da: 8100 0x8100 -810000dc: 005c addi a5,sp,4 +810000dc: 0030 addi a2,sp,8 810000de: 8100 0x8100 -810000e0: 0060 addi s0,sp,12 +810000e0: 0034 addi a3,sp,8 810000e2: 8100 0x8100 -810000e4: 0064 addi s1,sp,12 +810000e4: 0038 addi a4,sp,8 810000e6: 8100 0x8100 -810000e8: 0068 addi a0,sp,12 +810000e8: 003c addi a5,sp,8 810000ea: 8100 0x8100 -810000ec: 006c addi a1,sp,12 + +810000ec : +810000ec: 0044 addi s1,sp,4 810000ee: 8100 0x8100 -810000f0: 0070 addi a2,sp,12 +810000f0: 0048 addi a0,sp,4 810000f2: 8100 0x8100 -810000f4: 0074 addi a3,sp,12 +810000f4: 004c addi a1,sp,4 810000f6: 8100 0x8100 -810000f8: 0078 addi a4,sp,12 +810000f8: 0050 addi a2,sp,4 810000fa: 8100 0x8100 -810000fc: 007c addi a5,sp,12 +810000fc: 0054 addi a3,sp,4 810000fe: 8100 0x8100 -81000100: 0080 addi s0,sp,64 +81000100: 0058 addi a4,sp,4 81000102: 8100 0x8100 +81000104: 005c addi a5,sp,4 +81000106: 8100 0x8100 +81000108: 0060 addi s0,sp,12 +8100010a: 8100 0x8100 +8100010c: 0064 addi s1,sp,12 +8100010e: 8100 0x8100 +81000110: 0068 addi a0,sp,12 +81000112: 8100 0x8100 +81000114: 006c addi a1,sp,12 +81000116: 8100 0x8100 +81000118: 0070 addi a2,sp,12 +8100011a: 8100 0x8100 +8100011c: 0074 addi a3,sp,12 +8100011e: 8100 0x8100 +81000120: 0078 addi a4,sp,12 +81000122: 8100 0x8100 +81000124: 007c addi a5,sp,12 +81000126: 8100 0x8100 +81000128: 0080 addi s0,sp,64 +8100012a: 8100 0x8100 Disassembly of section .bss: -81000104 : +8100012c : ... Disassembly of section .comment: diff --git a/runtime/vortex_runtime.elf b/runtime/vortex_runtime.elf index c6deec571a8b8fdded2a771c34f3a55985f3b304..3e9e39fae827fc2361b6bdfafb94a869b0093962 100644 GIT binary patch delta 1215 zcmah|Ur19?82`?$*P9}A=Z%D3_UB%=mqAg1K?(~-GxN!M=s_7Wmx-yhhDLwb)+Rm$ z+L3&U87e}IPz-z$Dy&5%nMp}O3PoC3n=tZi_MLn0wo4%0arwUA`Tf4{JLh-LUN5T7pI+MuAj`1AloX;^n=9tgXyAGfOsmhswKOm2; zgT#xdP5e~($yBA(FRe;MRxKz*q^}#IQcZx5Dz>hdlnzkVSolZ#kW@ zEVGTq3YLd`D+bHn6)h`x{@%d9E%F7+)rPvIZ;E`u@+Cw4f&>6D>l4B$Q6dpyYCjVJ zz+^{um_b&eAe9iJT@K3&bmfCq?8Wb8d8u8n#B730H-oy$wt`V$g!HEB!sEE-FCD4lTtX^A*icZ8w4LFFn(SWZZ z9yZ`OVvhmeM!bQz9wX~F0$>N1L2w_5QG?(i;#zvsZaLXzP%SVO2NB$w+GYvkAp(&# z9e={uY|yVG7EKTtB=o8Mcxh98W79V11Z!Gj)^@T131FdbRJn)aPbV5E)E0lev zEJy8%gFK*@6^S-06+AOLNxKxejG5-L;S`liDk5>wImOAlacz-)S7ct40k)~RTIK}> Ipbj(XFJ5bjkN^Mx delta 587 zcmZ9IKS&%w6vn^Vf2YX>_p%8{Ah@?Pw^v$NSOj4eK|NChk|Is;1Wgnha|96)+|?5s z18N3#4pCFYPI< zuZimRY~YvyuC9l!`1>pU?S@{gH@;;ME%@~Yeepp{efHtfKgi8XPzG=oGgxp>gsvjt z2%#NbJ9hS9HPAl0RoZh;b5$S$FP(S13|h>$=Qe>bCP1K@&l-Ktw>^q$gCwWpHJ#_3 zxq2$|jvf95k