runtime update
This commit is contained in:
@@ -20,7 +20,7 @@ typedef struct {
|
||||
} wspawn_tasks_args_t;
|
||||
|
||||
typedef struct {
|
||||
struct context_t * ctx;
|
||||
context_t * ctx;
|
||||
vx_spawn_kernel_cb callback;
|
||||
void * arg;
|
||||
int offset;
|
||||
@@ -44,10 +44,7 @@ inline int fast_log2(int x) {
|
||||
return (*(int*)(&f)>>23) - 127;
|
||||
}
|
||||
|
||||
static void spawn_tasks_callback() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_tasks_all_stub() {
|
||||
int core_id = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
int tid = vx_thread_id();
|
||||
@@ -65,15 +62,9 @@ static void spawn_tasks_callback() {
|
||||
|
||||
// wait for all warps to complete
|
||||
vx_barrier(0, p_wspawn_args->NW);
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
void spawn_remaining_tasks_callback(int thread_mask) {
|
||||
// activate threads
|
||||
vx_tmc(thread_mask);
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_tasks_rem_stub() {
|
||||
int core_id = vx_core_id();
|
||||
int tid = vx_thread_gid();
|
||||
|
||||
@@ -81,6 +72,26 @@ void spawn_remaining_tasks_callback(int thread_mask) {
|
||||
|
||||
int task_id = p_wspawn_args->offset + tid;
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
}
|
||||
|
||||
static void spawn_tasks_all_cb() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// call stub routine
|
||||
spawn_tasks_all_stub();
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
int wid = vx_warp_id();
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
static void spawn_tasks_rem_cb(int thread_mask) {
|
||||
// activate threads
|
||||
vx_tmc(thread_mask);
|
||||
|
||||
// call stub routine
|
||||
spawn_tasks_rem_stub();
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc(1);
|
||||
@@ -128,24 +139,21 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
||||
if (nW >= 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
wspawn_args.NW = nw;
|
||||
vx_wspawn(nw, spawn_tasks_callback);
|
||||
spawn_tasks_callback();
|
||||
vx_wspawn(nw, spawn_tasks_all_cb);
|
||||
spawn_tasks_all_cb();
|
||||
}
|
||||
|
||||
//--
|
||||
if (rT != 0) {
|
||||
wspawn_args.offset = tasks_per_core0 - rT;
|
||||
int tmask = (1 << rT) - 1;
|
||||
spawn_remaining_tasks_callback(tmask);
|
||||
spawn_tasks_rem_cb(tmask);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void spawn_kernel_callback() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_kernel_all_stub() {
|
||||
int core_id = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
int tid = vx_thread_id();
|
||||
@@ -176,15 +184,9 @@ static void spawn_kernel_callback() {
|
||||
|
||||
// wait for all warps to complete
|
||||
vx_barrier(0, p_wspawn_args->NW);
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
static void spawn_kernel_remaining_callback(int thread_mask) {
|
||||
// activate threads
|
||||
vx_tmc(thread_mask);
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_kernel_rem_stub() {
|
||||
int core_id = vx_core_id();
|
||||
int tid = vx_thread_gid();
|
||||
|
||||
@@ -206,12 +208,32 @@ static void spawn_kernel_remaining_callback(int thread_mask) {
|
||||
int gid2 = p_wspawn_args->ctx->global_offset[2] + k;
|
||||
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
}
|
||||
|
||||
static void spawn_kernel_all_cb() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// call stub routine
|
||||
spawn_kernel_all_stub();
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
int wid = vx_warp_id();
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
static void spawn_kernel_rem_cb(int thread_mask) {
|
||||
// activate threads
|
||||
vx_tmc(thread_mask);
|
||||
|
||||
// call stub routine
|
||||
spawn_kernel_rem_stub();
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void * arg) {
|
||||
void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg) {
|
||||
// total number of WGs
|
||||
int X = ctx->num_groups[0];
|
||||
int Y = ctx->num_groups[1];
|
||||
@@ -268,15 +290,15 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void *
|
||||
if (nW >= 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
wspawn_args.NW = nw;
|
||||
vx_wspawn(nw, spawn_kernel_callback);
|
||||
spawn_kernel_callback();
|
||||
vx_wspawn(nw, spawn_kernel_all_cb);
|
||||
spawn_kernel_all_cb();
|
||||
}
|
||||
|
||||
//--
|
||||
if (rT != 0) {
|
||||
wspawn_args.offset = wgs_per_core0 - rT;
|
||||
int tmask = (1 << rT) - 1;
|
||||
spawn_kernel_remaining_callback(tmask);
|
||||
spawn_kernel_rem_cb(tmask);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user