multicore fix
This commit is contained in:
@@ -35,26 +35,50 @@ vx_join:
|
||||
.type vx_warp_id, @function
|
||||
.global vx_warp_id
|
||||
vx_warp_id:
|
||||
csrr a0, CSR_LWID # read warp index
|
||||
csrr a0, CSR_LWID
|
||||
ret
|
||||
|
||||
.type vx_warp_gid, @function
|
||||
.global vx_warp_gid
|
||||
vx_warp_gid:
|
||||
csrr a0, CSR_GWID # read warp index
|
||||
csrr a0, CSR_GWID
|
||||
ret
|
||||
|
||||
.type vx_thread_id, @function
|
||||
.global vx_thread_id
|
||||
vx_thread_id:
|
||||
csrr a0, CSR_LTID # read thread index
|
||||
csrr a0, CSR_LTID
|
||||
ret
|
||||
|
||||
.type vx_thread_gid, @function
|
||||
.global vx_thread_gid
|
||||
vx_thread_gid:
|
||||
csrr a0, CSR_GTID # read thread index
|
||||
csrr a0, CSR_GTID
|
||||
ret
|
||||
|
||||
.type vx_core_id, @function
|
||||
.global vx_core_id
|
||||
vx_core_id:
|
||||
csrr a0, CSR_GCID
|
||||
ret
|
||||
|
||||
.type vx_num_threads, @function
|
||||
.global vx_num_threads
|
||||
vx_num_threads:
|
||||
csrr a0, CSR_NT
|
||||
ret
|
||||
|
||||
.type vx_num_warps, @function
|
||||
.global vx_num_warps
|
||||
vx_num_warps:
|
||||
csrr a0, CSR_NW
|
||||
ret
|
||||
|
||||
.type vx_num_cores, @function
|
||||
.global vx_num_cores
|
||||
vx_num_cores:
|
||||
csrr a0, CSR_NC
|
||||
ret
|
||||
|
||||
.type vx_num_cycles, @function
|
||||
.global vx_num_cycles
|
||||
|
||||
@@ -8,37 +8,49 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Spawn warps
|
||||
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
|
||||
void vx_wspawn(int numWarps, int PC_spawn);
|
||||
|
||||
// Set thread mask
|
||||
void vx_tmc(unsigned numThreads);
|
||||
void vx_tmc(int numThreads);
|
||||
|
||||
// Warp Barrier
|
||||
void vx_barrier(unsigned barriedID, unsigned numWarps);
|
||||
void vx_barrier(int barriedID, int numWarps);
|
||||
|
||||
// Split on a predicate
|
||||
void vx_split(unsigned predicate);
|
||||
void vx_split(int predicate);
|
||||
|
||||
// Join
|
||||
void vx_join(void);
|
||||
void vx_join();
|
||||
|
||||
// Return the warp thread index
|
||||
unsigned vx_thread_id(void);
|
||||
// Return the warp's unique thread id
|
||||
int vx_thread_id();
|
||||
|
||||
// Return the core warp index
|
||||
unsigned vx_warp_id(void);
|
||||
// Return the core's unique warp id
|
||||
int vx_warp_id();
|
||||
|
||||
// Return processsor unique thread id
|
||||
unsigned vx_thread_gid(void);
|
||||
// Return processsor unique core id
|
||||
int vx_core_id();
|
||||
|
||||
// Return processsor unique warp id
|
||||
unsigned vx_warp_gid(void);
|
||||
// Return processsor global thread id
|
||||
int vx_thread_gid();
|
||||
|
||||
// Return number cycles
|
||||
unsigned vx_num_cycles(void);
|
||||
// Return processsor global warp id
|
||||
int vx_warp_gid();
|
||||
|
||||
// Return number instructions
|
||||
unsigned vx_num_instrs(void);
|
||||
// Return the number of threads in a warp
|
||||
int vx_num_threads();
|
||||
|
||||
// Return the number of warps in a core
|
||||
int vx_num_warps();
|
||||
|
||||
// Return the number of cores in the processsor
|
||||
int vx_num_cores();
|
||||
|
||||
// Return the number of cycles
|
||||
int vx_num_cycles();
|
||||
|
||||
// Return the number of instructions
|
||||
int vx_num_instrs();
|
||||
|
||||
#define __if(b) vx_split(b); \
|
||||
if (b)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
.type _start, @function
|
||||
_start:
|
||||
la a1, vx_set_sp
|
||||
li a0, NUM_WARPS # activate all warps
|
||||
csrr a0, CSR_NW # get num warps
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
jal vx_set_sp
|
||||
li a0, 1
|
||||
@@ -32,12 +32,11 @@ _exit:
|
||||
li a0, 0
|
||||
.word 0x0005006b # disable all threads
|
||||
|
||||
|
||||
.section .text
|
||||
.type vx_set_sp, @function
|
||||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
li a0, NUM_THREADS
|
||||
csrr a0, CSR_NT # get num threads
|
||||
.word 0x0005006b # activate all threads
|
||||
|
||||
.option push
|
||||
@@ -46,10 +45,10 @@ vx_set_sp:
|
||||
addi gp, gp, %pcrel_lo(1b)
|
||||
.option pop
|
||||
|
||||
csrr a1, CSR_GTID # get gtid
|
||||
slli a1, a1, 10 # multiply tid by 1024
|
||||
csrr a2, CSR_LTID # get tid
|
||||
slli a2, a2, 2 # multiply tid by 4
|
||||
csrr a1, CSR_GTID # get global thread id
|
||||
slli a1, a1, 10 # multiply by 1024
|
||||
csrr a2, CSR_LTID # get local thread id
|
||||
slli a2, a2, 2 # multiply by 4
|
||||
lui sp, STACK_BASE_ADDR # load base sp
|
||||
sub sp, sp, a1 # sub thread block
|
||||
add sp, sp, a2 # reduce addr collision for perf
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user