mcexec: bind to CPus according to ikc_map

This commit is contained in:
Balazs Gerofi
2017-02-11 18:15:47 +09:00
parent 610463ff39
commit 2d72042021
3 changed files with 148 additions and 54 deletions

View File

@@ -86,6 +86,9 @@ struct get_cpu_set_arg {
size_t cpu_set_size; // Size in bytes size_t cpu_set_size; // Size in bytes
int *target_core; int *target_core;
int *mcexec_linux_numa; // NUMA domain to bind mcexec to int *mcexec_linux_numa; // NUMA domain to bind mcexec to
void *mcexec_cpu_set;
size_t mcexec_cpu_set_size; // Size in bytes
int *ikc_mapped;
}; };
#define PLD_CPU_SET_MAX_CPUS 1024 #define PLD_CPU_SET_MAX_CPUS 1024

View File

@@ -505,8 +505,9 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
int cpu, cpus_assigned, cpus_to_assign, cpu_prev; int cpu, cpus_assigned, cpus_to_assign, cpu_prev;
int ret = 0; int ret = 0;
int mcexec_linux_numa; int mcexec_linux_numa;
cpumask_t cpus_used; cpumask_t *mcexec_cpu_set = NULL;
cpumask_t cpus_to_use; cpumask_t *cpus_used = NULL;
cpumask_t *cpus_to_use = NULL;
struct mcctrl_per_proc_data *ppd; struct mcctrl_per_proc_data *ppd;
struct process_list_item *pli; struct process_list_item *pli;
struct process_list_item *pli_next = NULL; struct process_list_item *pli_next = NULL;
@@ -619,11 +620,20 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
kfree(pli); kfree(pli);
cpus_to_assign = udp->cpu_info->n_cpus / req.nr_processes; cpus_to_assign = udp->cpu_info->n_cpus / req.nr_processes;
memcpy(&cpus_used, &pe->cpus_used, sizeof(cpumask_t)); cpus_used = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
memset(&cpus_to_use, 0, sizeof(cpus_to_use)); cpus_to_use = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
mcexec_cpu_set = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!cpus_used || !cpus_to_use || !mcexec_cpu_set) {
printk("%s: error: allocating cpu masks\n", __FUNCTION__);
ret = -ENOMEM;
goto put_and_unlock_out;
}
memcpy(cpus_used, &pe->cpus_used, sizeof(cpumask_t));
memset(cpus_to_use, 0, sizeof(cpumask_t));
memset(mcexec_cpu_set, 0, sizeof(cpumask_t));
/* Find the first unused CPU */ /* Find the first unused CPU */
cpu = cpumask_next_zero(-1, &cpus_used); cpu = cpumask_next_zero(-1, cpus_used);
if (cpu >= udp->cpu_info->n_cpus) { if (cpu >= udp->cpu_info->n_cpus) {
printk("%s: error: no more CPUs available\n", printk("%s: error: no more CPUs available\n",
__FUNCTION__); __FUNCTION__);
@@ -632,11 +642,17 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
} }
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
cpumask_set_cpu(cpu, &cpus_used); cpumask_set_cpu(cpu, cpus_used);
cpumask_set_cpu(cpu, &cpus_to_use); cpumask_set_cpu(cpu, cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpumask_set_cpu(udp->cpu_info->ikc_map[cpu], mcexec_cpu_set);
}
#else #else
cpu_set(cpu, cpus_used); cpu_set(cpu, *cpus_used);
cpu_set(cpu, cpus_to_use); cpu_set(cpu, *cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpu_set(udp->cpu_info->ikc_map[cpu], *mcexec_cpu_set);
}
#endif #endif
cpu_prev = cpu; cpu_prev = cpu;
dprintk("%s: CPU %d assigned (first)\n", __FUNCTION__, cpu); dprintk("%s: CPU %d assigned (first)\n", __FUNCTION__, cpu);
@@ -666,16 +682,24 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
list_for_each_entry(cache_top, &cpu_top->cache_list, chain) { list_for_each_entry(cache_top, &cpu_top->cache_list, chain) {
for_each_cpu(cpu, &cache_top->shared_cpu_map) { for_each_cpu(cpu, &cache_top->shared_cpu_map) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
if (!cpumask_test_cpu(cpu, &cpus_used)) { if (!cpumask_test_cpu(cpu, cpus_used)) {
#else #else
if (!cpu_isset(cpu, cpus_used)) { if (!cpu_isset(cpu, *cpus_used)) {
#endif #endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
cpumask_set_cpu(cpu, &cpus_used); cpumask_set_cpu(cpu, cpus_used);
cpumask_set_cpu(cpu, &cpus_to_use); cpumask_set_cpu(cpu, cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpumask_set_cpu(udp->cpu_info->ikc_map[cpu],
mcexec_cpu_set);
}
#else #else
cpu_set(cpu, cpus_used); cpu_set(cpu, *cpus_used);
cpu_set(cpu, cpus_to_use); cpu_set(cpu, *cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpu_set(udp->cpu_info->ikc_map[cpu],
*mcexec_cpu_set);
}
#endif #endif
cpu_prev = cpu; cpu_prev = cpu;
dprintk("%s: CPU %d assigned (same cache L%lu)\n", dprintk("%s: CPU %d assigned (same cache L%lu)\n",
@@ -689,7 +713,7 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
node = linux_numa_2_mckernel_numa(udp, node = linux_numa_2_mckernel_numa(udp,
cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu_prev))); cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu_prev)));
for_each_cpu_not(cpu, &cpus_used) { for_each_cpu_not(cpu, cpus_used) {
/* Invalid CPU? */ /* Invalid CPU? */
if (cpu >= udp->cpu_info->n_cpus) if (cpu >= udp->cpu_info->n_cpus)
break; break;
@@ -698,11 +722,19 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
if (node == linux_numa_2_mckernel_numa(udp, if (node == linux_numa_2_mckernel_numa(udp,
cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu)))) { cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu)))) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
cpumask_set_cpu(cpu, &cpus_used); cpumask_set_cpu(cpu, cpus_used);
cpumask_set_cpu(cpu, &cpus_to_use); cpumask_set_cpu(cpu, cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpumask_set_cpu(udp->cpu_info->ikc_map[cpu],
mcexec_cpu_set);
}
#else #else
cpu_set(cpu, cpus_used); cpu_set(cpu, *cpus_used);
cpu_set(cpu, cpus_to_use); cpu_set(cpu, *cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpu_set(udp->cpu_info->ikc_map[cpu],
*mcexec_cpu_set);
}
#endif #endif
cpu_prev = cpu; cpu_prev = cpu;
dprintk("%s: CPU %d assigned (same NUMA)\n", dprintk("%s: CPU %d assigned (same NUMA)\n",
@@ -712,7 +744,7 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
} }
/* No CPU? Simply find the next unused one */ /* No CPU? Simply find the next unused one */
cpu = cpumask_next_zero(-1, &cpus_used); cpu = cpumask_next_zero(-1, cpus_used);
if (cpu >= udp->cpu_info->n_cpus) { if (cpu >= udp->cpu_info->n_cpus) {
printk("%s: error: no more CPUs available\n", printk("%s: error: no more CPUs available\n",
__FUNCTION__); __FUNCTION__);
@@ -721,11 +753,17 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
} }
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
cpumask_set_cpu(cpu, &cpus_used); cpumask_set_cpu(cpu, cpus_used);
cpumask_set_cpu(cpu, &cpus_to_use); cpumask_set_cpu(cpu, cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpumask_set_cpu(udp->cpu_info->ikc_map[cpu], mcexec_cpu_set);
}
#else #else
cpu_set(cpu, cpus_used); cpu_set(cpu, *cpus_used);
cpu_set(cpu, cpus_to_use); cpu_set(cpu, *cpus_to_use);
if (udp->cpu_info->ikc_mapped) {
cpu_set(udp->cpu_info->ikc_map[cpu], *mcexec_cpu_set);
}
#endif #endif
cpu_prev = cpu; cpu_prev = cpu;
dprintk("%s: CPU %d assigned (unused)\n", dprintk("%s: CPU %d assigned (unused)\n",
@@ -736,16 +774,16 @@ next_cpu:
} }
/* Found all cores, let user know */ /* Found all cores, let user know */
if (copy_to_user(req.cpu_set, &cpus_to_use, if (copy_to_user(req.cpu_set, cpus_to_use,
(req.cpu_set_size < sizeof(cpus_to_use) ? (req.cpu_set_size < sizeof(cpumask_t) ?
req.cpu_set_size : sizeof(cpus_to_use)))) { req.cpu_set_size : sizeof(cpumask_t)))) {
printk("%s: error copying mask to user\n", __FUNCTION__); printk("%s: error copying mask to user\n", __FUNCTION__);
ret = -EINVAL; ret = -EINVAL;
goto put_and_unlock_out; goto put_and_unlock_out;
} }
/* Copy IKC target core and mcexec Linux NUMA id */ /* Copy IKC target core */
cpu = cpumask_next(-1, &cpus_to_use); cpu = cpumask_next(-1, cpus_to_use);
if (copy_to_user(req.target_core, &cpu, sizeof(cpu))) { if (copy_to_user(req.target_core, &cpu, sizeof(cpu))) {
printk("%s: error copying target core to user\n", printk("%s: error copying target core to user\n",
__FUNCTION__); __FUNCTION__);
@@ -753,6 +791,7 @@ next_cpu:
goto put_and_unlock_out; goto put_and_unlock_out;
} }
/* mcexec NUMA to bind to */
mcexec_linux_numa = cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu)); mcexec_linux_numa = cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu));
if (copy_to_user(req.mcexec_linux_numa, &mcexec_linux_numa, if (copy_to_user(req.mcexec_linux_numa, &mcexec_linux_numa,
sizeof(mcexec_linux_numa))) { sizeof(mcexec_linux_numa))) {
@@ -762,12 +801,32 @@ next_cpu:
goto put_and_unlock_out; goto put_and_unlock_out;
} }
/* mcexec cpu_set to bind to if user requested */
if (req.mcexec_cpu_set && udp->cpu_info->ikc_mapped) {
int ikc_mapped = 1;
if (copy_to_user(req.mcexec_cpu_set, mcexec_cpu_set,
(req.mcexec_cpu_set_size < sizeof(cpumask_t) ?
req.mcexec_cpu_set_size : sizeof(cpumask_t)))) {
printk("%s: error copying mcexec CPU set to user\n", __FUNCTION__);
ret = -EINVAL;
goto put_and_unlock_out;
}
if (copy_to_user(req.ikc_mapped, &ikc_mapped,
sizeof(ikc_mapped))) {
printk("%s: error copying ikc_mapped\n", __FUNCTION__);
ret = -EINVAL;
goto put_and_unlock_out;
}
}
/* Save in per-process structure */ /* Save in per-process structure */
memcpy(&ppd->cpu_set, &cpus_to_use, sizeof(cpumask_t)); memcpy(&ppd->cpu_set, cpus_to_use, sizeof(cpumask_t));
ppd->ikc_target_cpu = cpu; ppd->ikc_target_cpu = cpu;
/* Commit used cores to OS structure */ /* Commit used cores to OS structure */
memcpy(&pe->cpus_used, &cpus_used, sizeof(cpus_used)); memcpy(&pe->cpus_used, cpus_used, sizeof(*cpus_used));
/* Reset if last process */ /* Reset if last process */
if (pe->nr_processes_left == 0) { if (pe->nr_processes_left == 0) {
@@ -790,6 +849,9 @@ next_cpu:
ret = 0; ret = 0;
put_and_unlock_out: put_and_unlock_out:
kfree(cpus_to_use);
kfree(cpus_used);
kfree(mcexec_cpu_set);
mcctrl_put_per_proc_data(ppd); mcctrl_put_per_proc_data(ppd);
mutex_unlock(&pe->lock); mutex_unlock(&pe->lock);

View File

@@ -1657,12 +1657,19 @@ int main(int argc, char **argv)
if (nr_processes > 0) { if (nr_processes > 0) {
struct get_cpu_set_arg cpu_set_arg; struct get_cpu_set_arg cpu_set_arg;
int mcexec_linux_numa = 0; int mcexec_linux_numa = 0;
int ikc_mapped = 0;
cpu_set_t mcexec_cpu_set;
CPU_ZERO(&mcexec_cpu_set);
cpu_set_arg.cpu_set = (void *)&desc->cpu_set; cpu_set_arg.cpu_set = (void *)&desc->cpu_set;
cpu_set_arg.cpu_set_size = sizeof(desc->cpu_set); cpu_set_arg.cpu_set_size = sizeof(desc->cpu_set);
cpu_set_arg.nr_processes = nr_processes; cpu_set_arg.nr_processes = nr_processes;
cpu_set_arg.target_core = &target_core; cpu_set_arg.target_core = &target_core;
cpu_set_arg.mcexec_linux_numa = &mcexec_linux_numa; cpu_set_arg.mcexec_linux_numa = &mcexec_linux_numa;
cpu_set_arg.mcexec_cpu_set = &mcexec_cpu_set;
cpu_set_arg.mcexec_cpu_set_size = sizeof(mcexec_cpu_set);
cpu_set_arg.ikc_mapped = &ikc_mapped;
if (ioctl(fd, MCEXEC_UP_GET_CPUSET, (void *)&cpu_set_arg) != 0) { if (ioctl(fd, MCEXEC_UP_GET_CPUSET, (void *)&cpu_set_arg) != 0) {
perror("getting CPU set for partitioned execution"); perror("getting CPU set for partitioned execution");
@@ -1672,32 +1679,54 @@ int main(int argc, char **argv)
desc->cpu = target_core; desc->cpu = target_core;
/* This call may not succeed, but that is fine */ /* Bind to CPU cores where the LWK process' IKC target maps to */
if (numa_run_on_node(mcexec_linux_numa) < 0) { if (ikc_mapped) {
__dprint("%s: WARNING: couldn't bind to NUMA %d\n", /* This call may not succeed, but that is fine */
__FUNCTION__, mcexec_linux_numa); if (sched_setaffinity(0, sizeof(mcexec_cpu_set),
} &mcexec_cpu_set) < 0) {
#ifdef DEBUG __dprint("%s: WARNING: couldn't bind to mcexec_cpu_set\n",
else { __FUNCTION__);
cpu_set_t cpuset;
char affinity[BUFSIZ];
CPU_ZERO(&cpuset);
if ((sched_getaffinity(0, sizeof(cpu_set_t), &cpuset)) != 0) {
perror("Error sched_getaffinity");
exit(1);
} }
#ifdef DEBUG
affinity[0] = '\0'; else {
for (i = 0; i < 512; i++) { int i;
if (CPU_ISSET(i, &cpuset) == 1) { for (i = 0; i < numa_num_possible_cpus(); ++i) {
sprintf(affinity, "%s %d", affinity, i); if (CPU_ISSET(i, &mcexec_cpu_set)) {
__dprint("%s: PID %d bound to CPU %d\n",
__FUNCTION__, getpid(), i);
}
} }
} }
__dprint("%s: PID: %d affinity: %s\n", #endif // DEBUG
__FUNCTION__, getpid(), affinity); }
else {
/* This call may not succeed, but that is fine */
if (numa_run_on_node(mcexec_linux_numa) < 0) {
__dprint("%s: WARNING: couldn't bind to NUMA %d\n",
__FUNCTION__, mcexec_linux_numa);
}
#ifdef DEBUG
else {
cpu_set_t cpuset;
char affinity[BUFSIZ];
CPU_ZERO(&cpuset);
if ((sched_getaffinity(0, sizeof(cpu_set_t), &cpuset)) != 0) {
perror("Error sched_getaffinity");
exit(1);
}
affinity[0] = '\0';
for (i = 0; i < 512; i++) {
if (CPU_ISSET(i, &cpuset) == 1) {
sprintf(affinity, "%s %d", affinity, i);
}
}
__dprint("%s: PID: %d affinity: %s\n",
__FUNCTION__, getpid(), affinity);
}
#endif // DEBUG
} }
#endif
} }
desc->mpol_flags = 0; desc->mpol_flags = 0;