fixed global obejct sharing between cores
This commit is contained in:
@@ -6,35 +6,61 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NUM_CORES_MAX 8
|
||||
|
||||
typedef struct {
|
||||
func_t function;
|
||||
void * arguments;
|
||||
int nthreads;
|
||||
} spawn_t;
|
||||
|
||||
spawn_t* g_spawn = NULL;
|
||||
spawn_t* g_spawn[NUM_CORES_MAX];
|
||||
|
||||
void spawn_warp_runonce() {
|
||||
void spawn_warp_all() {
|
||||
// active all threads
|
||||
vx_tmc(g_spawn->nthreads);
|
||||
int num_threads = vx_num_threads();
|
||||
vx_tmc(num_threads);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
spawn_t* p_spawn = g_spawn[core_id];
|
||||
|
||||
// call user routine
|
||||
g_spawn->function(g_spawn->arguments);
|
||||
p_spawn->function(p_spawn->arguments);
|
||||
|
||||
// resume single-thread execution on exit
|
||||
// resume single-warp execution on exit
|
||||
int wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
}
|
||||
|
||||
void spawn_warp_threads(int num_threads) {
|
||||
// active all threads
|
||||
vx_tmc(num_threads);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
spawn_t* p_spawn = g_spawn[core_id];
|
||||
|
||||
// call user routine
|
||||
p_spawn->function(p_spawn->arguments);
|
||||
|
||||
// resume single-warp execution on exit
|
||||
int wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
}
|
||||
|
||||
void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args) {
|
||||
spawn_t spawn = { func_ptr, args, num_threads };
|
||||
g_spawn = &spawn;
|
||||
int core_id = vx_core_id();
|
||||
if (core_id >= NUM_CORES_MAX)
|
||||
return;
|
||||
|
||||
spawn_t spawn = { func_ptr, args, num_threads };
|
||||
g_spawn[core_id] = &spawn;
|
||||
|
||||
if (num_warps > 1) {
|
||||
vx_wspawn(num_warps, (unsigned)spawn_warp_runonce);
|
||||
vx_wspawn(num_warps, (unsigned)spawn_warp_all);
|
||||
}
|
||||
spawn_warp_runonce();
|
||||
spawn_warp_threads(num_threads);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -4,24 +4,37 @@
|
||||
.global _start
|
||||
.type _start, @function
|
||||
_start:
|
||||
|
||||
# execute stack initialization on all warps
|
||||
la a1, vx_set_sp
|
||||
csrr a0, CSR_NW # get num warps
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
.word 0x00b5106b # wspawn a0, a1
|
||||
jal vx_set_sp
|
||||
|
||||
# return back to single thread execution
|
||||
li a0, 1
|
||||
.word 0x0005006b # back to single thread
|
||||
# Initialize global pointerp
|
||||
# call __cxx_global_var_init
|
||||
.word 0x0005006b # tmc a0
|
||||
|
||||
# Clear the bss segment
|
||||
la a0, _edata
|
||||
la a2, _end
|
||||
sub a2, a2, a0
|
||||
li a1, 0
|
||||
call memset
|
||||
la a0, __libc_fini_array # Register global termination functions
|
||||
call atexit # to be called upon exit
|
||||
call __libc_init_array # Run global initialization functions
|
||||
|
||||
# Register global termination functions
|
||||
la a0, __libc_fini_array
|
||||
|
||||
# to be called upon exit
|
||||
call atexit
|
||||
|
||||
# Run global initialization functions
|
||||
call __libc_init_array
|
||||
|
||||
# call main program routine
|
||||
call main
|
||||
|
||||
# call exit routine
|
||||
tail exit
|
||||
.size _start, .-_start
|
||||
|
||||
@@ -29,34 +42,39 @@ _start:
|
||||
.type _exit, @function
|
||||
.global _exit
|
||||
_exit:
|
||||
# disable all threads in current warp
|
||||
li a0, 0
|
||||
.word 0x0005006b # disable all threads
|
||||
.word 0x0005006b # tmc a0
|
||||
|
||||
.section .text
|
||||
.type vx_set_sp, @function
|
||||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
# activate all threads
|
||||
csrr a0, CSR_NT # get num threads
|
||||
.word 0x0005006b # activate all threads
|
||||
.word 0x0005006b # set thread mask
|
||||
|
||||
# set global pointer register
|
||||
.option push
|
||||
.option norelax
|
||||
1:auipc gp, %pcrel_hi(__global_pointer$)
|
||||
addi gp, gp, %pcrel_lo(1b)
|
||||
la gp, __global_pointer$
|
||||
.option pop
|
||||
|
||||
# allocate stack region for a threads on the processor
|
||||
# set stack pointer
|
||||
csrr a1, CSR_GTID # get global thread id
|
||||
slli a1, a1, 10 # multiply by 1024
|
||||
csrr a2, CSR_LTID # get local thread id
|
||||
slli a2, a2, 2 # multiply by 4
|
||||
lui sp, (SHARED_MEM_BASE_ADDR>>12) # load base sp
|
||||
la sp, __stack_top$ # load stack base address
|
||||
sub sp, sp, a1 # sub thread block
|
||||
add sp, sp, a2 # reduce addr collision for perf
|
||||
|
||||
csrr a3, CSR_LWID # get wid
|
||||
# disable active warps except warp0
|
||||
csrr a3, CSR_LWID # get local wid
|
||||
beqz a3, RETURN
|
||||
li a0, 0
|
||||
.word 0x0005006b # tmc 0
|
||||
.word 0x0005006b # tmc a0
|
||||
RETURN:
|
||||
ret
|
||||
|
||||
|
||||
@@ -271,11 +271,11 @@ Disassembly of section .text:
|
||||
80000374: 00008067 ret
|
||||
|
||||
80000378 <vx_num_cycles>:
|
||||
80000378: c0002573 rdcycle a0
|
||||
80000378: b0002573 csrr a0,mcycle
|
||||
8000037c: 00008067 ret
|
||||
|
||||
80000380 <vx_num_instrs>:
|
||||
80000380: c0202573 rdinstret a0
|
||||
80000380: b0202573 csrr a0,minstret
|
||||
80000384: 00008067 ret
|
||||
|
||||
80000388 <vx_vprintf>:
|
||||
|
||||
Binary file not shown.
@@ -55,7 +55,7 @@
|
||||
:100348007325000267800000732520026780000083
|
||||
:100358007325400267800000732550026780000003
|
||||
:1003680073256002678000007325700267800000B3
|
||||
:10037800732500C067800000732520C067800000D7
|
||||
:10037800732500B067800000732520B067800000F7
|
||||
:1003880063060520130101F52324810A232E310970
|
||||
:100398002326110A2322910A2320210B232C410909
|
||||
:1003A800232A510923286109232671099309050085
|
||||
|
||||
@@ -132,11 +132,11 @@ Disassembly of section .text:
|
||||
80000158: 00008067 ret
|
||||
|
||||
8000015c <vx_num_cycles>:
|
||||
8000015c: c0002573 rdcycle a0
|
||||
8000015c: b0002573 csrr a0,mcycle
|
||||
80000160: 00008067 ret
|
||||
|
||||
80000164 <vx_num_instrs>:
|
||||
80000164: c0202573 rdinstret a0
|
||||
80000164: b0202573 csrr a0,minstret
|
||||
80000168: 00008067 ret
|
||||
|
||||
8000016c <vx_vprintf>:
|
||||
|
||||
Binary file not shown.
@@ -21,7 +21,7 @@
|
||||
:1001280067800000732500026780000073252002A5
|
||||
:100138006780000073254002678000007325500225
|
||||
:1001480067800000732560026780000073257002D5
|
||||
:1001580067800000732500C067800000732520C0F9
|
||||
:1001580067800000732500B067800000732520B019
|
||||
:100168006780000063060520130101F52324810A36
|
||||
:10017800232E31092326110A2322910A2320210B39
|
||||
:10018800232C4109232A51092328610923267109AF
|
||||
|
||||
@@ -407,11 +407,11 @@ Disassembly of section .text:
|
||||
80000574: 00008067 ret
|
||||
|
||||
80000578 <vx_num_cycles>:
|
||||
80000578: c0002573 rdcycle a0
|
||||
80000578: b0002573 csrr a0,mcycle
|
||||
8000057c: 00008067 ret
|
||||
|
||||
80000580 <vx_num_instrs>:
|
||||
80000580: c0202573 rdinstret a0
|
||||
80000580: b0202573 csrr a0,minstret
|
||||
80000584: 00008067 ret
|
||||
|
||||
80000588 <vx_vprintf>:
|
||||
|
||||
Binary file not shown.
@@ -87,7 +87,7 @@
|
||||
:100548007325000267800000732520026780000081
|
||||
:100558007325400267800000732550026780000001
|
||||
:1005680073256002678000007325700267800000B1
|
||||
:10057800732500C067800000732520C067800000D5
|
||||
:10057800732500B067800000732520B067800000F5
|
||||
:1005880063060520130101F52324810A232E31096E
|
||||
:100598002326110A2322910A2320210B232C410907
|
||||
:1005A800232A510923286109232671099309050083
|
||||
|
||||
Reference in New Issue
Block a user