vx_spawn: Add spawn_tasks_contiguous_all_stub
Spawns tasks in a way that the threads in a warp see contiguous thread_id, unlike the original variant where each thread were allocated a range of thread_id that spans the number of batches. E.g. in a 4-thread config, instead of mapping IDs (0,2,4,6)->(1,3,5,7), map (0,1,2,3)->(4,5,6,7). TODO remaining logic not implemented.
This commit is contained in:
@@ -74,6 +74,27 @@ static void __attribute__ ((noinline)) spawn_tasks_all_stub() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __attribute__ ((noinline)) spawn_tasks_contiguous_all_stub() {
|
||||||
|
int NT = vx_num_threads();
|
||||||
|
int NW = vx_num_warps();
|
||||||
|
int cid = vx_core_id();
|
||||||
|
int wid = vx_warp_id();
|
||||||
|
int tid = vx_thread_id();
|
||||||
|
|
||||||
|
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
|
||||||
|
|
||||||
|
// FIXME: handle RW
|
||||||
|
int waves = p_wspawn_args->NWs;
|
||||||
|
int offset = p_wspawn_args->offset + (NT * wid + tid);
|
||||||
|
|
||||||
|
vx_spawn_tasks_cb callback = p_wspawn_args->callback;
|
||||||
|
void* arg = p_wspawn_args->arg;
|
||||||
|
for (int wave_id = 0; wave_id < waves; ++wave_id) {
|
||||||
|
int task_id = offset + (wave_id * NT * NW);
|
||||||
|
callback(task_id, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void __attribute__ ((noinline)) spawn_tasks_rem_stub() {
|
static void __attribute__ ((noinline)) spawn_tasks_rem_stub() {
|
||||||
int cid = vx_core_id();
|
int cid = vx_core_id();
|
||||||
int tid = vx_thread_id();
|
int tid = vx_thread_id();
|
||||||
@@ -88,7 +109,8 @@ static void __attribute__ ((noinline)) spawn_tasks_all_cb() {
|
|||||||
vx_tmc(-1);
|
vx_tmc(-1);
|
||||||
|
|
||||||
// call stub routine
|
// call stub routine
|
||||||
spawn_tasks_all_stub();
|
// spawn_tasks_all_stub();
|
||||||
|
spawn_tasks_contiguous_all_stub();
|
||||||
|
|
||||||
// disable warp
|
// disable warp
|
||||||
vx_tmc_zero();
|
vx_tmc_zero();
|
||||||
@@ -141,7 +163,7 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
|||||||
vx_tmc(-1);
|
vx_tmc(-1);
|
||||||
|
|
||||||
// call stub routine
|
// call stub routine
|
||||||
spawn_tasks_all_stub();
|
spawn_tasks_contiguous_all_stub();
|
||||||
|
|
||||||
// back to single-threaded
|
// back to single-threaded
|
||||||
vx_tmc_one();
|
vx_tmc_one();
|
||||||
|
|||||||
Reference in New Issue
Block a user