vx_spawn.c: Handle num_clusters > 1
WIP: still assumes num_tasks is divisible by num_cluster
This commit is contained in:
@@ -186,9 +186,11 @@ static void __attribute__ ((noinline)) spawn_tasks_all_cb() {
|
|||||||
// core has to enable to fulfill an entire grid of computation.
|
// core has to enable to fulfill an entire grid of computation.
|
||||||
void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg) {
|
void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg) {
|
||||||
// device specs
|
// device specs
|
||||||
int NC = vx_num_cores();
|
const int NC = vx_num_cores();
|
||||||
int NW = vx_num_warps();
|
const int NW = vx_num_warps();
|
||||||
int NT = vx_num_threads();
|
const int NT = vx_num_threads();
|
||||||
|
// NOTE: assumes divisible
|
||||||
|
const int num_cluster = NC / CORES_PER_CLUSTER;
|
||||||
|
|
||||||
// current core id
|
// current core id
|
||||||
int core_id = vx_core_id();
|
int core_id = vx_core_id();
|
||||||
@@ -206,8 +208,8 @@ void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg
|
|||||||
if (core_id >= num_active_cores)
|
if (core_id >= num_active_cores)
|
||||||
return; // terminate extra cores
|
return; // terminate extra cores
|
||||||
|
|
||||||
// FIXME: HARDCODES 1 CLUSTER!
|
// FIXME: assumes num_tasks is divisible by num_cluster
|
||||||
const int num_tasks_this_cluster = num_tasks;
|
const int num_tasks_this_cluster = num_tasks / num_cluster;
|
||||||
const int num_full_warps = num_tasks_this_cluster / NT;
|
const int num_full_warps = num_tasks_this_cluster / NT;
|
||||||
const int rem_threads_in_last_warp = num_tasks_this_cluster % NT;
|
const int rem_threads_in_last_warp = num_tasks_this_cluster % NT;
|
||||||
// const int num_warps = (num_tasks_this_cluster + (NT - 1)) / NT;
|
// const int num_warps = (num_tasks_this_cluster + (NT - 1)) / NT;
|
||||||
|
|||||||
Reference in New Issue
Block a user