fixed global obejct sharing between cores

This commit is contained in:
Blaise Tine
2020-12-24 19:36:07 -05:00
parent 703a861fe9
commit 4f689c4ce9
46 changed files with 6710 additions and 6792 deletions

View File

@@ -52,7 +52,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

View File

@@ -52,7 +52,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -55,7 +55,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

View File

@@ -52,7 +52,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

View File

@@ -55,7 +55,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -54,7 +54,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -54,7 +54,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

File diff suppressed because it is too large Load Diff

View File

@@ -54,7 +54,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -52,7 +52,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

View File

@@ -54,7 +54,7 @@ clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean clean-all: clean
rm *.pocl *.dump rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -61,8 +61,8 @@ run-simx: $(PROJECT)
clean: clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean-all: clean
rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

Binary file not shown.

View File

@@ -390,7 +390,7 @@ Disassembly of section .text:
80000518: 00492703 lw a4,4(s2) 80000518: 00492703 lw a4,4(s2)
8000051c: 148a2783 lw a5,328(s4) 8000051c: 148a2783 lw a5,328(s4)
80000520: 01871463 bne a4,s8,80000528 <__call_exitprocs+0xe4> 80000520: 01871463 bne a4,s8,80000528 <__call_exitprocs+0xe4>
80000524: f92784e3 beq a5,s2,800004ac <__call_exitprocs+0x68> 80000524: f8f904e3 beq s2,a5,800004ac <__call_exitprocs+0x68>
80000528: f80788e3 beqz a5,800004b8 <__call_exitprocs+0x74> 80000528: f80788e3 beqz a5,800004b8 <__call_exitprocs+0x74>
8000052c: 00078913 mv s2,a5 8000052c: 00078913 mv s2,a5
80000530: f5dff06f j 8000048c <__call_exitprocs+0x48> 80000530: f5dff06f j 8000048c <__call_exitprocs+0x48>
@@ -450,20 +450,21 @@ Disassembly of section .comment:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0) 4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3120 fld fs0,96(a0) a: 3920 fld fs0,112(a0)
c: 2e30 fld fa2,88(a2) c: 322e fld ft4,232(sp)
e: 2e32 fld ft8,264(sp) e: 302e fld ft0,232(sp)
10: 0030 addi a2,sp,8 ...
Disassembly of section .riscv.attributes: Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>: 00000000 <.riscv.attributes>:
0: 2941 jal 490 <_start-0x7ffffb70> 0: 2541 jal 680 <_start-0x7ffff980>
2: 0000 unimp 2: 0000 unimp
4: 7200 flw fs0,32(a2) 4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa 6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec>
c: 001f 0000 1004 0x10040000001f c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1 12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp) 14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp) 16: 6932 flw fs2,12(sp)
@@ -472,5 +473,3 @@ Disassembly of section .riscv.attributes:
1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e> 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e>
1e: 3070 fld fa2,224(s0) 1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f 20: 665f 7032 0030 0x307032665f
26: 0108 addi a0,sp,128
28: 0b0a slli s6,s6,0x2

Binary file not shown.

View File

@@ -59,8 +59,8 @@ run-simx: $(PROJECT)
clean: clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean-all: clean
rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

Binary file not shown.

View File

@@ -461,7 +461,7 @@ Disassembly of section .text:
8000061c: 00492703 lw a4,4(s2) 8000061c: 00492703 lw a4,4(s2)
80000620: 148a2783 lw a5,328(s4) 80000620: 148a2783 lw a5,328(s4)
80000624: 01871463 bne a4,s8,8000062c <__call_exitprocs+0xe4> 80000624: 01871463 bne a4,s8,8000062c <__call_exitprocs+0xe4>
80000628: f92784e3 beq a5,s2,800005b0 <__call_exitprocs+0x68> 80000628: f8f904e3 beq s2,a5,800005b0 <__call_exitprocs+0x68>
8000062c: f80788e3 beqz a5,800005bc <__call_exitprocs+0x74> 8000062c: f80788e3 beqz a5,800005bc <__call_exitprocs+0x74>
80000630: 00078913 mv s2,a5 80000630: 00078913 mv s2,a5
80000634: f5dff06f j 80000590 <__call_exitprocs+0x48> 80000634: f5dff06f j 80000590 <__call_exitprocs+0x48>
@@ -527,20 +527,21 @@ Disassembly of section .comment:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0) 4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3120 fld fs0,96(a0) a: 3920 fld fs0,112(a0)
c: 2e30 fld fa2,88(a2) c: 322e fld ft4,232(sp)
e: 2e32 fld ft8,264(sp) e: 302e fld ft0,232(sp)
10: 0030 addi a2,sp,8 ...
Disassembly of section .riscv.attributes: Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>: 00000000 <.riscv.attributes>:
0: 2941 jal 490 <_start-0x7ffffb70> 0: 2541 jal 680 <_start-0x7ffff980>
2: 0000 unimp 2: 0000 unimp
4: 7200 flw fs0,32(a2) 4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa 6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec>
c: 001f 0000 1004 0x10040000001f c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1 12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp) 14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp) 16: 6932 flw fs2,12(sp)
@@ -549,5 +550,3 @@ Disassembly of section .riscv.attributes:
1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdb56> 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdb56>
1e: 3070 fld fa2,224(s0) 1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f 20: 665f 7032 0030 0x307032665f
26: 0108 addi a0,sp,128
28: 0b0a slli s6,s6,0x2

Binary file not shown.

View File

@@ -60,8 +60,8 @@ run-simx: $(PROJECT)
clean: clean:
rm -rf $(PROJECT) *.o .depend rm -rf $(PROJECT) *.o .depend
clean-all: clean-all: clean
rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),clean)
-include .depend -include .depend

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -6,35 +6,61 @@
extern "C" { extern "C" {
#endif #endif
#define NUM_CORES_MAX 8
typedef struct { typedef struct {
func_t function; func_t function;
void * arguments; void * arguments;
int nthreads; int nthreads;
} spawn_t; } spawn_t;
spawn_t* g_spawn = NULL; spawn_t* g_spawn[NUM_CORES_MAX];
void spawn_warp_runonce() { void spawn_warp_all() {
// active all threads // active all threads
vx_tmc(g_spawn->nthreads); int num_threads = vx_num_threads();
vx_tmc(num_threads);
int core_id = vx_core_id();
spawn_t* p_spawn = g_spawn[core_id];
// call user routine // call user routine
g_spawn->function(g_spawn->arguments); p_spawn->function(p_spawn->arguments);
// resume single-thread execution on exit // resume single-warp execution on exit
int wid = vx_warp_id();
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
vx_tmc(tmask);
}
void spawn_warp_threads(int num_threads) {
// active all threads
vx_tmc(num_threads);
int core_id = vx_core_id();
spawn_t* p_spawn = g_spawn[core_id];
// call user routine
p_spawn->function(p_spawn->arguments);
// resume single-warp execution on exit
int wid = vx_warp_id(); int wid = vx_warp_id();
unsigned tmask = (0 == wid) ? 0x1 : 0x0; unsigned tmask = (0 == wid) ? 0x1 : 0x0;
vx_tmc(tmask); vx_tmc(tmask);
} }
void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args) { void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args) {
spawn_t spawn = { func_ptr, args, num_threads }; int core_id = vx_core_id();
g_spawn = &spawn; if (core_id >= NUM_CORES_MAX)
return;
spawn_t spawn = { func_ptr, args, num_threads };
g_spawn[core_id] = &spawn;
if (num_warps > 1) { if (num_warps > 1) {
vx_wspawn(num_warps, (unsigned)spawn_warp_runonce); vx_wspawn(num_warps, (unsigned)spawn_warp_all);
} }
spawn_warp_runonce(); spawn_warp_threads(num_threads);
} }
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -4,24 +4,37 @@
.global _start .global _start
.type _start, @function .type _start, @function
_start: _start:
# execute stack initialization on all warps
la a1, vx_set_sp la a1, vx_set_sp
csrr a0, CSR_NW # get num warps csrr a0, CSR_NW # get num warps
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN) .word 0x00b5106b # wspawn a0, a1
jal vx_set_sp jal vx_set_sp
# return back to single thread execution
li a0, 1 li a0, 1
.word 0x0005006b # back to single thread .word 0x0005006b # tmc a0
# Initialize global pointerp
# call __cxx_global_var_init
# Clear the bss segment # Clear the bss segment
la a0, _edata la a0, _edata
la a2, _end la a2, _end
sub a2, a2, a0 sub a2, a2, a0
li a1, 0 li a1, 0
call memset call memset
la a0, __libc_fini_array # Register global termination functions
call atexit # to be called upon exit # Register global termination functions
call __libc_init_array # Run global initialization functions la a0, __libc_fini_array
# to be called upon exit
call atexit
# Run global initialization functions
call __libc_init_array
# call main program routine
call main call main
# call exit routine
tail exit tail exit
.size _start, .-_start .size _start, .-_start
@@ -29,34 +42,39 @@ _start:
.type _exit, @function .type _exit, @function
.global _exit .global _exit
_exit: _exit:
# disable all threads in current warp
li a0, 0 li a0, 0
.word 0x0005006b # disable all threads .word 0x0005006b # tmc a0
.section .text .section .text
.type vx_set_sp, @function .type vx_set_sp, @function
.global vx_set_sp .global vx_set_sp
vx_set_sp: vx_set_sp:
# activate all threads
csrr a0, CSR_NT # get num threads csrr a0, CSR_NT # get num threads
.word 0x0005006b # activate all threads .word 0x0005006b # set thread mask
# set global pointer register
.option push .option push
.option norelax .option norelax
1:auipc gp, %pcrel_hi(__global_pointer$) la gp, __global_pointer$
addi gp, gp, %pcrel_lo(1b)
.option pop .option pop
# allocate stack region for a threads on the processor
# set stack pointer
csrr a1, CSR_GTID # get global thread id csrr a1, CSR_GTID # get global thread id
slli a1, a1, 10 # multiply by 1024 slli a1, a1, 10 # multiply by 1024
csrr a2, CSR_LTID # get local thread id csrr a2, CSR_LTID # get local thread id
slli a2, a2, 2 # multiply by 4 slli a2, a2, 2 # multiply by 4
lui sp, (SHARED_MEM_BASE_ADDR>>12) # load base sp la sp, __stack_top$ # load stack base address
sub sp, sp, a1 # sub thread block sub sp, sp, a1 # sub thread block
add sp, sp, a2 # reduce addr collision for perf add sp, sp, a2 # reduce addr collision for perf
csrr a3, CSR_LWID # get wid # disable active warps except warp0
csrr a3, CSR_LWID # get local wid
beqz a3, RETURN beqz a3, RETURN
li a0, 0 li a0, 0
.word 0x0005006b # tmc 0 .word 0x0005006b # tmc a0
RETURN: RETURN:
ret ret

View File

@@ -271,11 +271,11 @@ Disassembly of section .text:
80000374: 00008067 ret 80000374: 00008067 ret
80000378 <vx_num_cycles>: 80000378 <vx_num_cycles>:
80000378: c0002573 rdcycle a0 80000378: b0002573 csrr a0,mcycle
8000037c: 00008067 ret 8000037c: 00008067 ret
80000380 <vx_num_instrs>: 80000380 <vx_num_instrs>:
80000380: c0202573 rdinstret a0 80000380: b0202573 csrr a0,minstret
80000384: 00008067 ret 80000384: 00008067 ret
80000388 <vx_vprintf>: 80000388 <vx_vprintf>:

Binary file not shown.

View File

@@ -55,7 +55,7 @@
:100348007325000267800000732520026780000083 :100348007325000267800000732520026780000083
:100358007325400267800000732550026780000003 :100358007325400267800000732550026780000003
:1003680073256002678000007325700267800000B3 :1003680073256002678000007325700267800000B3
:10037800732500C067800000732520C067800000D7 :10037800732500B067800000732520B067800000F7
:1003880063060520130101F52324810A232E310970 :1003880063060520130101F52324810A232E310970
:100398002326110A2322910A2320210B232C410909 :100398002326110A2322910A2320210B232C410909
:1003A800232A510923286109232671099309050085 :1003A800232A510923286109232671099309050085

View File

@@ -132,11 +132,11 @@ Disassembly of section .text:
80000158: 00008067 ret 80000158: 00008067 ret
8000015c <vx_num_cycles>: 8000015c <vx_num_cycles>:
8000015c: c0002573 rdcycle a0 8000015c: b0002573 csrr a0,mcycle
80000160: 00008067 ret 80000160: 00008067 ret
80000164 <vx_num_instrs>: 80000164 <vx_num_instrs>:
80000164: c0202573 rdinstret a0 80000164: b0202573 csrr a0,minstret
80000168: 00008067 ret 80000168: 00008067 ret
8000016c <vx_vprintf>: 8000016c <vx_vprintf>:

Binary file not shown.

View File

@@ -21,7 +21,7 @@
:1001280067800000732500026780000073252002A5 :1001280067800000732500026780000073252002A5
:100138006780000073254002678000007325500225 :100138006780000073254002678000007325500225
:1001480067800000732560026780000073257002D5 :1001480067800000732560026780000073257002D5
:1001580067800000732500C067800000732520C0F9 :1001580067800000732500B067800000732520B019
:100168006780000063060520130101F52324810A36 :100168006780000063060520130101F52324810A36
:10017800232E31092326110A2322910A2320210B39 :10017800232E31092326110A2322910A2320210B39
:10018800232C4109232A51092328610923267109AF :10018800232C4109232A51092328610923267109AF

View File

@@ -407,11 +407,11 @@ Disassembly of section .text:
80000574: 00008067 ret 80000574: 00008067 ret
80000578 <vx_num_cycles>: 80000578 <vx_num_cycles>:
80000578: c0002573 rdcycle a0 80000578: b0002573 csrr a0,mcycle
8000057c: 00008067 ret 8000057c: 00008067 ret
80000580 <vx_num_instrs>: 80000580 <vx_num_instrs>:
80000580: c0202573 rdinstret a0 80000580: b0202573 csrr a0,minstret
80000584: 00008067 ret 80000584: 00008067 ret
80000588 <vx_vprintf>: 80000588 <vx_vprintf>:

Binary file not shown.

View File

@@ -87,7 +87,7 @@
:100548007325000267800000732520026780000081 :100548007325000267800000732520026780000081
:100558007325400267800000732550026780000001 :100558007325400267800000732550026780000001
:1005680073256002678000007325700267800000B1 :1005680073256002678000007325700267800000B1
:10057800732500C067800000732520C067800000D5 :10057800732500B067800000732520B067800000F5
:1005880063060520130101F52324810A232E31096E :1005880063060520130101F52324810A232E31096E
:100598002326110A2322910A2320210B232C410907 :100598002326110A2322910A2320210B232C410907
:1005A800232A510923286109232671099309050083 :1005A800232A510923286109232671099309050083