mcctrl: refactor prepare_image into new generic ikc send&wait

Many ikc messages expecting a reply use wait_event_interruptible
incorrectly, freeing memory that could still be used on the other side.

This commit implements a generic ikc send and wait helper that helps
with memory management and ownership properly:
 - if the message succeeds and a reply comes back normally, the memory
is freed by the caller as usual
 - if the wait fails (signal before the reply comes or timeout) then the
memory is set as owner by ikc and will be free when the reply comes back
later
 - if the reply never comes, the memory is freed at shutdown when
destroying ikc channels

Refs: #1076
Change-Id: I7f348d9029a6ad56ba9a50c836105ec39fa14943
This commit is contained in:
Dominique Martinet
2018-06-21 19:12:59 +09:00
committed by Masamichi Takagi
parent ec202a1ca9
commit b939ca9370
6 changed files with 165 additions and 77 deletions

View File

@@ -111,10 +111,8 @@ typedef unsigned long __cpu_set_unit;
struct program_load_desc { struct program_load_desc {
int num_sections; int num_sections;
int status;
int cpu; int cpu;
int pid; int pid;
int err;
int stack_prot; int stack_prot;
int pgid; int pgid;
int cred[8]; int cred[8];

View File

@@ -107,6 +107,7 @@ static long mcexec_prepare_image(ihk_os_t os,
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd = NULL; struct mcctrl_per_proc_data *ppd = NULL;
int num_sections; int num_sections;
int free_ikc_pointers = 1;
desc = kmalloc(sizeof(*desc), GFP_KERNEL); desc = kmalloc(sizeof(*desc), GFP_KERNEL);
if (!desc) { if (!desc) {
@@ -140,9 +141,9 @@ static long mcexec_prepare_image(ihk_os_t os,
goto put_and_free_out; goto put_and_free_out;
} }
pdesc = kmalloc(sizeof(struct program_load_desc) + pdesc = kmalloc(sizeof(struct program_load_desc) +
sizeof(struct program_image_section) sizeof(struct program_image_section) * num_sections,
* num_sections, GFP_KERNEL); GFP_KERNEL);
memcpy(pdesc, desc, sizeof(struct program_load_desc)); memcpy(pdesc, desc, sizeof(struct program_load_desc));
if (copy_from_user(pdesc->sections, udesc->sections, if (copy_from_user(pdesc->sections, udesc->sections,
@@ -167,10 +168,10 @@ static long mcexec_prepare_image(ihk_os_t os,
ret = -EFAULT; ret = -EFAULT;
goto put_and_free_out; goto put_and_free_out;
} }
envs = kmalloc(pdesc->envs_len, GFP_KERNEL); envs = kmalloc(pdesc->envs_len, GFP_KERNEL);
if (copy_from_user(envs, pdesc->envs, pdesc->envs_len)) { if (copy_from_user(envs, pdesc->envs, pdesc->envs_len)) {
ret = -EFAULT; ret = -EFAULT;
goto put_and_free_out; goto put_and_free_out;
} }
@@ -187,36 +188,24 @@ static long mcexec_prepare_image(ihk_os_t os,
dprintk("# of sections: %d\n", pdesc->num_sections); dprintk("# of sections: %d\n", pdesc->num_sections);
dprintk("%p (%lx)\n", pdesc, isp.arg); dprintk("%p (%lx)\n", pdesc, isp.arg);
pdesc->status = 0;
mb();
ret = mcctrl_ikc_send(os, pdesc->cpu, &isp);
if(ret < 0) {
printk("%s: ERROR mcctrl_ikc_send: %d\n", __FUNCTION__, ret);
goto put_and_free_out;
}
ret = wait_event_interruptible(ppd->wq_prepare, pdesc->status); ret = mcctrl_ikc_send_wait(os, pdesc->cpu, &isp, 0, &free_ikc_pointers,
3, pdesc, args, envs);
if (ret < 0) { if (ret < 0) {
printk("%s: ERROR after wait: %d\n", __FUNCTION__, ret); /* either send or remote prepare_process failed */
goto put_and_free_out;
}
if (pdesc->err < 0) {
ret = pdesc->err;
goto put_and_free_out; goto put_and_free_out;
} }
/* Update rpgtable */ /* Update rpgtable */
ppd->rpgtable = pdesc->rpgtable; ppd->rpgtable = pdesc->rpgtable;
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
sizeof(struct program_image_section) * num_sections)) { sizeof(struct program_image_section) * num_sections)) {
ret = -EFAULT; ret = -EFAULT;
goto put_and_free_out; goto put_and_free_out;
} }
dprintk("%s: pid %d, rpgtable: 0x%lx added\n", dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
__FUNCTION__, ppd->pid, ppd->rpgtable); __FUNCTION__, ppd->pid, ppd->rpgtable);
ret = 0; ret = 0;
@@ -224,10 +213,12 @@ static long mcexec_prepare_image(ihk_os_t os,
put_and_free_out: put_and_free_out:
mcctrl_put_per_proc_data(ppd); mcctrl_put_per_proc_data(ppd);
free_out: free_out:
kfree(args); if (free_ikc_pointers) {
kfree(pdesc); kfree(args);
kfree(envs); kfree(pdesc);
kfree(desc); kfree(envs);
kfree(desc);
}
return ret; return ret;
} }
@@ -1691,7 +1682,6 @@ int mcexec_create_per_process_data(ihk_os_t os)
INIT_LIST_HEAD(&ppd->wq_list); INIT_LIST_HEAD(&ppd->wq_list);
INIT_LIST_HEAD(&ppd->wq_req_list); INIT_LIST_HEAD(&ppd->wq_req_list);
INIT_LIST_HEAD(&ppd->wq_list_exact); INIT_LIST_HEAD(&ppd->wq_list_exact);
init_waitqueue_head(&ppd->wq_prepare);
init_waitqueue_head(&ppd->wq_procfs); init_waitqueue_head(&ppd->wq_procfs);
spin_lock_init(&ppd->wq_list_lock); spin_lock_init(&ppd->wq_list_lock);
memset(&ppd->cpu_set, 0, sizeof(cpumask_t)); memset(&ppd->cpu_set, 0, sizeof(cpumask_t));
@@ -2925,28 +2915,6 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
return -EINVAL; return -EINVAL;
} }
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err)
{
struct program_load_desc *desc = phys_to_virt(arg);
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd = NULL;
ppd = mcctrl_get_per_proc_data(usrdata, desc->pid);
if (!ppd) {
printk("%s: ERROR: no per process data for PID %d\n",
__FUNCTION__, desc->pid);
return;
}
desc->err = err;
desc->status = 1;
mb();
wake_up_all(&ppd->wq_prepare);
mcctrl_put_per_proc_data(ppd);
}
/* Per-CPU register manipulation functions */ /* Per-CPU register manipulation functions */
struct mcctrl_os_cpu_response { struct mcctrl_os_cpu_response {
int done; int done;

View File

@@ -49,7 +49,6 @@
//struct mcctrl_channel *channels; //struct mcctrl_channel *channels;
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet); int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
void sig_done(unsigned long arg, int err); void sig_done(unsigned long arg, int err);
@@ -58,6 +57,119 @@ void mcctrl_os_read_write_cpu_response(ihk_os_t os,
struct ikc_scd_packet *pisp); struct ikc_scd_packet *pisp);
void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp); void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp);
struct mcctrl_wakeup_desc {
int status;
int err;
wait_queue_head_t wq;
struct list_head chain;
int free_addrs_count;
void *free_addrs[];
};
/* Assumes usrdata->wakeup_descs_lock taken */
static void mcctrl_wakeup_desc_cleanup(ihk_os_t os,
struct mcctrl_wakeup_desc *desc)
{
int i;
list_del(&desc->chain);
for (i = 0; i < desc->free_addrs_count; i++) {
kfree(desc->free_addrs[i]);
}
}
static void mcctrl_wakeup_cb(ihk_os_t os, struct ikc_scd_packet *packet)
{
struct mcctrl_wakeup_desc *desc = packet->reply;
WRITE_ONCE(desc->err, packet->err);
/*
* Check if the other side is still waiting, and signal it we're done.
*
* Setting status needs to be done last because the other side could
* wake up opportunistically between this set and the wake_up call.
*
* If the other side is no longer waiting, free the memory that was
* left for us.
*/
if (cmpxchg(&desc->status, 0, 1)) {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
mcctrl_wakeup_desc_cleanup(os, desc);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
return;
}
wake_up_interruptible(&desc->wq);
}
int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
long int timeout, int *do_frees, int free_addrs_count, ...)
{
int ret, i;
va_list ap;
struct mcctrl_wakeup_desc *desc;
*do_frees = 1;
desc = kmalloc(sizeof(struct mcctrl_wakeup_desc) +
(free_addrs_count + 1) * sizeof(void *),
GFP_KERNEL);
if (!desc) {
pr_warn("%s: Could not allocate wakeup descriptor", __func__);
return -ENOMEM;
}
pisp->reply = desc;
va_start(ap, free_addrs_count);
for (i = 0; i < free_addrs_count; i++) {
desc->free_addrs[i] = va_arg(ap, void*);
}
desc->free_addrs[free_addrs_count] = desc;
va_end(ap);
desc->free_addrs_count = free_addrs_count + 1;
init_waitqueue_head(&desc->wq);
WRITE_ONCE(desc->err, 0);
WRITE_ONCE(desc->status, 0);
ret = mcctrl_ikc_send(os, cpu, pisp);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send failed: %d\n", __func__, ret);
kfree(desc);
return ret;
}
if (timeout) {
ret = wait_event_interruptible_timeout(desc->wq,
desc->status, timeout);
} else {
ret = wait_event_interruptible(desc->wq, desc->status);
}
/*
* Check if wait aborted (signal..) or timed out, and notify
* the callback it will need to free things for us
*/
if (!cmpxchg(&desc->status, 0, 1)) {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_add(&desc->chain, &usrdata->wakeup_descs_list);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
*do_frees = 0;
return ret < 0 ? ret : -ETIME;
}
ret = READ_ONCE(desc->err);
kfree(desc);
return ret;
}
/* XXX: this runs in atomic context! */ /* XXX: this runs in atomic context! */
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
void *__packet, void *__os) void *__packet, void *__os)
@@ -72,11 +184,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break; break;
case SCD_MSG_PREPARE_PROCESS_ACKED: case SCD_MSG_PREPARE_PROCESS_ACKED:
mcexec_prepare_ack(__os, pisp->arg, 0); mcctrl_wakeup_cb(__os, pisp);
break;
case SCD_MSG_PREPARE_PROCESS_NACKED:
mcexec_prepare_ack(__os, pisp->arg, pisp->err);
break; break;
case SCD_MSG_SYSCALL_ONESIDE: case SCD_MSG_SYSCALL_ONESIDE:
@@ -157,10 +265,15 @@ static int dummy_packet_handler(struct ihk_ikc_channel_desc *c,
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp) int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp)
{ {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata;
if (cpu < 0 || os == NULL || usrdata == NULL || if (cpu < 0 || os == NULL) {
cpu >= usrdata->num_channels || !usrdata->channels[cpu].c) { return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (usrdata == NULL || cpu >= usrdata->num_channels ||
!usrdata->channels[cpu].c) {
return -EINVAL; return -EINVAL;
} }
return ihk_ikc_send(usrdata->channels[cpu].c, pisp, 0); return ihk_ikc_send(usrdata->channels[cpu].c, pisp, 0);
@@ -354,6 +467,8 @@ int prepare_ikc_channels(ihk_os_t os)
mutex_init(&usrdata->part_exec.lock); mutex_init(&usrdata->part_exec.lock);
INIT_LIST_HEAD(&usrdata->part_exec.pli_list); INIT_LIST_HEAD(&usrdata->part_exec.pli_list);
usrdata->part_exec.nr_processes = -1; usrdata->part_exec.nr_processes = -1;
INIT_LIST_HEAD(&usrdata->wakeup_descs_list);
spin_lock_init(&usrdata->wakeup_descs_lock);
return 0; return 0;
@@ -375,7 +490,9 @@ void __destroy_ikc_channel(ihk_os_t os, struct mcctrl_channel *pmc)
void destroy_ikc_channels(ihk_os_t os) void destroy_ikc_channels(ihk_os_t os)
{ {
int i; int i;
unsigned long flags;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_wakeup_desc *mwd_entry, *mwd_next;
if (!usrdata) { if (!usrdata) {
printk("%s: WARNING: no mcctrl_usrdata found\n", __FUNCTION__); printk("%s: WARNING: no mcctrl_usrdata found\n", __FUNCTION__);
@@ -395,6 +512,12 @@ void destroy_ikc_channels(ihk_os_t os)
ihk_ikc_destroy_channel(usrdata->ikc2linux[i]); ihk_ikc_destroy_channel(usrdata->ikc2linux[i]);
} }
} }
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_for_each_entry_safe(mwd_entry, mwd_next,
&usrdata->wakeup_descs_list, chain) {
mcctrl_wakeup_desc_cleanup(os, mwd_entry);
}
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
kfree(usrdata->channels); kfree(usrdata->channels);
kfree(usrdata->ikc2linux); kfree(usrdata->ikc2linux);

View File

@@ -48,7 +48,6 @@
#define SCD_MSG_PREPARE_PROCESS 0x1 #define SCD_MSG_PREPARE_PROCESS 0x1
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3 #define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14 #define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
@@ -128,6 +127,7 @@ enum mcctrl_os_cpu_operation {
struct ikc_scd_packet { struct ikc_scd_packet {
int msg; int msg;
int err; int err;
void *reply;
union { union {
/* for traditional SCD_MSG_* */ /* for traditional SCD_MSG_* */
struct { struct {
@@ -163,9 +163,10 @@ struct ikc_scd_packet {
int eventfd_type; int eventfd_type;
}; };
}; };
char padding[12]; char padding[8];
}; };
struct mcctrl_priv { struct mcctrl_priv {
ihk_os_t os; ihk_os_t os;
struct program_load_desc *desc; struct program_load_desc *desc;
@@ -231,7 +232,6 @@ struct mcctrl_per_proc_data {
struct list_head wq_list_exact; /* These requests come from IKC IRQ handler targeting a particular thread */ struct list_head wq_list_exact; /* These requests come from IKC IRQ handler targeting a particular thread */
ihk_spinlock_t wq_list_lock; ihk_spinlock_t wq_list_lock;
wait_queue_head_t wq_prepare;
wait_queue_head_t wq_procfs; wait_queue_head_t wq_procfs;
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE]; struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
@@ -342,6 +342,8 @@ struct mcctrl_usrdata {
wait_queue_head_t wq_procfs; wait_queue_head_t wq_procfs;
struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE]; struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE]; rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE];
struct list_head wakeup_descs_list;
spinlock_t wakeup_descs_lock;
void **keys; void **keys;
struct sysfsm_data sysfsm_data; struct sysfsm_data sysfsm_data;
@@ -367,6 +369,10 @@ int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg); int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
/* ikc query-and-wait helper */
int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
long int timeout, int *do_frees, int free_addrs_count, ...);
ihk_os_t osnum_to_os(int n); ihk_os_t osnum_to_os(int n);
/* syscall.c */ /* syscall.c */

View File

@@ -597,14 +597,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PREPARE_PROCESS: case SCD_MSG_PREPARE_PROCESS:
if((rc = process_msg_prepare_process(packet->arg)) == 0){ pckt.err = process_msg_prepare_process(packet->arg);
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED; pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
pckt.err = 0; pckt.reply = packet->reply;
}
else{
pckt.msg = SCD_MSG_PREPARE_PROCESS_NACKED;
pckt.err = rc;
}
pckt.ref = packet->ref; pckt.ref = packet->ref;
pckt.arg = packet->arg; pckt.arg = packet->arg;
syscall_channel_send(resp_channel, &pckt); syscall_channel_send(resp_channel, &pckt);

View File

@@ -30,7 +30,6 @@
#define SCD_MSG_PREPARE_PROCESS 0x1 #define SCD_MSG_PREPARE_PROCESS 0x1
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3 #define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14 #define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
@@ -168,10 +167,8 @@ typedef unsigned long __cpu_set_unit;
struct program_load_desc { struct program_load_desc {
int num_sections; int num_sections;
int status;
int cpu; int cpu;
int pid; int pid;
int err;
int stack_prot; int stack_prot;
int pgid; int pgid;
int cred[8]; int cred[8];
@@ -241,6 +238,7 @@ enum mcctrl_os_cpu_operation {
struct ikc_scd_packet { struct ikc_scd_packet {
int msg; int msg;
int err; int err;
void *reply;
union { union {
/* for traditional SCD_MSG_* */ /* for traditional SCD_MSG_* */
struct { struct {
@@ -276,7 +274,7 @@ struct ikc_scd_packet {
int eventfd_type; int eventfd_type;
}; };
}; };
char padding[12]; char padding[8];
}; };
#define IHK_SCD_REQ_THREAD_SPINNING 0 #define IHK_SCD_REQ_THREAD_SPINNING 0