vx_spawn.c: Implement spawn_tasks_cluster_rem_stub
This commit is contained in:
@@ -102,14 +102,15 @@ static void __attribute__ ((noinline)) spawn_tasks_cluster_all_stub() {
|
|||||||
int wid = vx_warp_id();
|
int wid = vx_warp_id();
|
||||||
int tid = vx_thread_id();
|
int tid = vx_thread_id();
|
||||||
|
|
||||||
const int core_id_in_cluster = vx_core_id() % CORES_PER_CLUSTER;
|
const int core_id_in_cluster = cid % CORES_PER_CLUSTER;
|
||||||
const int cluster_wid = CORES_PER_CLUSTER * wid + core_id_in_cluster;
|
// round-robin warp_id allocation across cores in cluster
|
||||||
|
const int wid_in_cluster = CORES_PER_CLUSTER * wid + core_id_in_cluster;
|
||||||
|
|
||||||
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
|
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
|
||||||
|
|
||||||
// FIXME: handle RW
|
// FIXME: handle RW
|
||||||
int waves = p_wspawn_args->NWs;
|
int waves = p_wspawn_args->NWs;
|
||||||
int offset = p_wspawn_args->offset + (NT * cluster_wid + tid);
|
int offset = p_wspawn_args->offset + (NT * wid_in_cluster + tid);
|
||||||
|
|
||||||
vx_spawn_tasks_cb callback = p_wspawn_args->callback;
|
vx_spawn_tasks_cb callback = p_wspawn_args->callback;
|
||||||
void* arg = p_wspawn_args->arg;
|
void* arg = p_wspawn_args->arg;
|
||||||
@@ -128,6 +129,25 @@ static void __attribute__ ((noinline)) spawn_tasks_rem_stub() {
|
|||||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __attribute__ ((noinline)) spawn_tasks_cluster_rem_stub() {
|
||||||
|
int NT = vx_num_threads();
|
||||||
|
int cid = vx_core_id();
|
||||||
|
int tid = vx_thread_id();
|
||||||
|
int wid = vx_warp_id();
|
||||||
|
|
||||||
|
const int core_id_in_cluster = cid % CORES_PER_CLUSTER;
|
||||||
|
// round-robin warp_id allocation across cores in cluster
|
||||||
|
const int wid_in_cluster = CORES_PER_CLUSTER * wid + core_id_in_cluster;
|
||||||
|
|
||||||
|
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
|
||||||
|
// FIXME: This assumes that all cores but the last one are working with full
|
||||||
|
// warps, and only the last core has a partially-filled warp.
|
||||||
|
int offset = p_wspawn_args->offset + (NT * wid_in_cluster + tid);
|
||||||
|
|
||||||
|
int task_id = offset;
|
||||||
|
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||||
|
}
|
||||||
|
|
||||||
static void __attribute__ ((noinline)) spawn_tasks_cluster_all_cb() {
|
static void __attribute__ ((noinline)) spawn_tasks_cluster_all_cb() {
|
||||||
// activate all threads
|
// activate all threads
|
||||||
vx_tmc(-1);
|
vx_tmc(-1);
|
||||||
@@ -224,8 +244,7 @@ void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg
|
|||||||
vx_tmc(tmask);
|
vx_tmc(tmask);
|
||||||
|
|
||||||
// call stub routine
|
// call stub routine
|
||||||
// FIXME: unimplemented for cluster!
|
spawn_tasks_cluster_rem_stub();
|
||||||
spawn_tasks_rem_stub();
|
|
||||||
|
|
||||||
// back to single-threaded
|
// back to single-threaded
|
||||||
vx_tmc_one();
|
vx_tmc_one();
|
||||||
|
|||||||
Reference in New Issue
Block a user