mcctrl: thread pool based system call offload handling

This commit is contained in:
Balazs Gerofi
2016-08-08 19:43:05 +09:00
parent 5fbeee953a
commit fb84d4ef11
8 changed files with 395 additions and 222 deletions

View File

@@ -110,6 +110,13 @@ struct program_load_desc {
};
struct syscall_request {
/* TID of requesting thread */
int rtid;
/*
* TID of target thread. Remote page fault response needs to designate the
* thread that must serve the request, 0 indicates any thread from the pool
*/
int ttid;
unsigned long valid;
unsigned long number;
unsigned long args[6];
@@ -129,6 +136,10 @@ struct syscall_load_desc {
};
struct syscall_response {
/* TID of the thread that requested the service */
int ttid;
/* TID of the mcexec thread that is serving or has served the request */
int stid;
unsigned long status;
long ret;
unsigned long fault_address;

View File

@@ -83,6 +83,7 @@ static long mcexec_prepare_image(ihk_os_t os,
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
struct mcctrl_per_proc_data *ppd = NULL;
int i;
if (copy_from_user(&desc, udesc,
sizeof(struct program_load_desc))) {
@@ -156,6 +157,14 @@ static long mcexec_prepare_image(ihk_os_t os,
ppd->pid = pdesc->pid;
ppd->rpgtable = pdesc->rpgtable;
INIT_LIST_HEAD(&ppd->wq_list);
INIT_LIST_HEAD(&ppd->wq_list_exact);
spin_lock_init(&ppd->wq_list_lock);
for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]);
rwlock_init(&ppd->per_thread_data_hash_lock[i]);
}
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
list_add_tail(&ppd->list, &usrdata->per_proc_list);
@@ -417,42 +426,115 @@ static long mcexec_get_cpu(ihk_os_t os)
return info->n_cpus;
}
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg)
struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
struct mcctrl_usrdata *ud,
int pid)
{
struct wait_queue_head_list_node *wqhln = NULL;
struct wait_queue_head_list_node *wqhln_iter;
struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter;
unsigned long flags;
/* Look up per-process wait queue head with pid */
flags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
if (wqhln_iter->pid == pid) {
wqhln = wqhln_iter;
/* Look up per-process structure */
flags = ihk_ikc_spinlock_lock(&ud->per_proc_list_lock);
list_for_each_entry(ppd_iter, &ud->per_proc_list, list) {
if (ppd_iter->pid == pid) {
ppd = ppd_iter;
break;
}
}
ihk_ikc_spinlock_unlock(&ud->per_proc_list_lock, flags);
return ppd;
}
/*
* Called indirectly from the IKC message handler.
*/
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
{
struct wait_queue_head_list_node *wqhln = NULL;
struct wait_queue_head_list_node *wqhln_iter;
struct wait_queue_head_list_node *wqhln_alloc = NULL;
struct mcctrl_channel *c = ud->channels + packet->ref;
int pid = packet->pid;
unsigned long flags;
struct mcctrl_per_proc_data *ppd;
if (!wqhln) {
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
wqhln->pid = pid;
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
list_add_tail(&wqhln->list, &c->wq_list);
wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_KERNEL);
if (!wqhln_alloc) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(ud, pid);
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return 0;
}
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n",
__FUNCTION__,
c->param.request_va->rtid,
c->param.request_va->ttid,
c->param.request_va->number);
/*
* Three scenarios are possible:
* - Find the designated thread if req->ttid is specified.
* - Find any available thread if req->ttid is zero.
* - Add a request element if no threads are available.
*/
flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* Is this a request for a specific thread? See if it's waiting */
if (c->param.request_va->ttid) {
list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) {
if (c->param.request_va->ttid != task_pid_vnr(wqhln_iter->task))
continue;
wqhln = wqhln_iter;
break;
}
if (!wqhln) {
printk("%s: WARNING: no target thread found for exact request??\n",
__FUNCTION__);
}
}
/* Is there any thread available? */
else {
list_for_each_entry(wqhln_iter, &ppd->wq_list, list) {
if (wqhln_iter->task && !wqhln_iter->req) {
wqhln = wqhln_iter;
break;
}
}
}
/* If no match found, add request */
if (!wqhln) {
wqhln = wqhln_alloc;
wqhln->req = 0;
wqhln->task = NULL;
init_waitqueue_head(&wqhln->wq_syscall);
list_add_tail(&wqhln->list, &ppd->wq_list);
}
else {
kfree(wqhln_alloc);
}
memcpy(&wqhln->packet, packet, sizeof(*packet));
wqhln->req = 1;
wake_up(&wqhln->wq_syscall);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags);
return 0;
}
/*
* Called from an mcexec thread via ioctl().
*/
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
{
struct syscall_wait_desc swd;
@@ -462,8 +544,18 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
struct wait_queue_head_list_node *wqhln_iter;
int ret = 0;
unsigned long irqflags;
//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu));
struct mcctrl_per_proc_data *ppd;
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
}
//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu));
if (copy_from_user(&swd, req, sizeof(swd))) {
return -EFAULT;
}
@@ -471,16 +563,15 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
if (swd.cpu >= usrdata->num_channels)
return -EINVAL;
c = get_peer_channel(usrdata, current);
c = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current);
if (c) {
printk("mcexec_wait_syscall:already registered. task %p ch %p\n",
current, c);
return -EBUSY;
}
c = usrdata->channels + swd.cpu;
retry:
/* Prepare per-process wait queue head */
/* Prepare per-thread wait queue head or find a valid request */
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
if (!wqhln) {
@@ -488,35 +579,48 @@ retry_alloc:
goto retry_alloc;
}
wqhln->pid = swd.pid;
wqhln->task = current;
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
/* First see if there is one wait queue already */
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
if (wqhln_iter->pid == task_tgid_vnr(current)) {
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* First see if there is a valid request already that is not yet taken */
list_for_each_entry(wqhln_iter, &ppd->wq_list, list) {
if (wqhln_iter->task == NULL && wqhln_iter->req) {
kfree(wqhln);
wqhln = wqhln_iter;
wqhln->task = current;
list_del(&wqhln->list);
break;
}
}
list_add_tail(&wqhln->list, &c->wq_list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* No valid request? Wait for one.. */
if (wqhln->req == 0) {
list_add_tail(&wqhln->list, &ppd->wq_list);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* Remove per-thread wait queue head */
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
list_del(&wqhln->list);
}
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
/* Remove per-process wait queue head */
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
if (ret && !wqhln->req) {
kfree(wqhln);
return -EINTR;
}
/* Channel is determined by request */
dprintk("%s: tid: %d request from CPU %d\n",
__FUNCTION__, task_pid_vnr(current), wqhln->packet.ref);
c = usrdata->channels + wqhln->packet.ref;
kfree(wqhln);
#if 0
if (c->param.request_va->number == 61 &&
c->param.request_va->args[0] == swd.pid) {
@@ -528,6 +632,7 @@ retry_alloc:
return -EINTR;
}
#endif
mb();
if (!c->param.request_va->valid) {
@@ -543,18 +648,27 @@ retry_alloc:
dprintk("SC #%lx, %lx\n",
c->param.request_va->number,
c->param.request_va->args[0]);
register_peer_channel(usrdata, current, c);
if (mcctrl_add_per_thread_data(ppd, current, c) < 0) {
kprintf("%s: error adding per-thread data\n", __FUNCTION__);
return -EINVAL;
}
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
if (copy_to_user(&req->sr, c->param.request_va,
sizeof(struct syscall_request))) {
deregister_peer_channel(usrdata, current, c);
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
return -EINVAL;
}
return -EFAULT;
}
return 0;
}
deregister_peer_channel(usrdata, current, c);
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
return -EINVAL;
}
goto retry;
}
@@ -675,6 +789,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
struct syscall_ret_desc ret;
struct mcctrl_channel *mc;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd;
#if 0
ihk_dma_channel_t channel;
struct ihk_dma_request request;
@@ -688,13 +803,25 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
return -EFAULT;
}
mc = usrdata->channels + ret.cpu;
if (!mc) {
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
}
deregister_peer_channel(usrdata, current, mc);
mc = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current);
if (!mc) {
kprintf("%s: ERROR: no peer channel registerred??\n", __FUNCTION__);
return -EINVAL;
}
mcctrl_delete_per_thread_data(ppd, current);
mc->param.response_va->ret = ret.ret;
mc->param.response_va->stid = task_pid_vnr(current);
if (ret.size > 0) {
/* Host => Accel. Write is fast. */
@@ -876,6 +1003,34 @@ int mcexec_close_exec(ihk_os_t os)
struct mckernel_exec_file *mcef = NULL;
int found = 0;
int os_ind = ihk_host_os_get_index(os);
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter;
ppd = NULL;
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) {
if (ppd_iter->pid == task_tgid_vnr(current)) {
ppd = ppd_iter;
break;
}
}
if (ppd) {
list_del(&ppd->list);
dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n",
task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable);
kfree(ppd);
}
else {
printk("WARNING: no per process data for pid %d ?\n",
task_tgid_vnr(current));
}
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
if (os_ind < 0) {
return EINVAL;

View File

@@ -40,7 +40,7 @@
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg);
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
void sig_done(unsigned long arg, int err);
/* XXX: this runs in atomic context! */
@@ -64,7 +64,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break;
case SCD_MSG_SYSCALL_ONESIDE:
mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg);
mcexec_syscall(usrdata, pisp);
break;
case SCD_MSG_PROCFS_ANSWER:
@@ -263,9 +263,6 @@ static int connect_handler(struct ihk_ikc_channel_info *param)
}
param->packet_handler = syscall_packet_handler;
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
usrdata->channels[cpu].c = c;
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
@@ -284,9 +281,6 @@ static int connect_handler2(struct ihk_ikc_channel_info *param)
param->packet_handler = syscall_packet_handler;
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
usrdata->channels[cpu].c = c;
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
@@ -313,7 +307,6 @@ int prepare_ikc_channels(ihk_os_t os)
{
struct ihk_cpu_info *info;
struct mcctrl_usrdata *usrdata;
int error;
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
@@ -351,11 +344,6 @@ int prepare_ikc_channels(ihk_os_t os)
INIT_LIST_HEAD(&usrdata->cpu_topology_list);
INIT_LIST_HEAD(&usrdata->node_topology_list);
error = init_peer_channel_registry(usrdata);
if (error) {
return error;
}
return 0;
}
@@ -394,7 +382,6 @@ void destroy_ikc_channels(ihk_os_t os)
}
free_page((unsigned long)usrdata->mcctrl_doorbell_va);
destroy_peer_channel_registry(usrdata);
kfree(usrdata->channels);
kfree(usrdata);
}

View File

@@ -41,6 +41,7 @@
#include <ikc/master.h>
#include <ihk/msr.h>
#include <linux/semaphore.h>
#include <linux/rwlock.h>
#include <linux/threads.h>
#include "sysfs.h"
@@ -154,8 +155,11 @@ struct syscall_params {
struct wait_queue_head_list_node {
struct list_head list;
wait_queue_head_t wq_syscall;
int pid;
struct task_struct *task;
/* Denotes an exclusive wait for requester TID rtid */
int rtid;
int req;
struct ikc_scd_packet packet;
};
struct mcctrl_channel {
@@ -163,15 +167,29 @@ struct mcctrl_channel {
struct syscall_params param;
struct ikc_scd_init_param init;
void *dma_buf;
struct list_head wq_list;
ihk_spinlock_t wq_list_lock;
};
struct mcctrl_per_thread_data {
struct list_head hash;
struct task_struct *task;
void *data;
};
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT)
#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1)
struct mcctrl_per_proc_data {
struct list_head list;
int pid;
unsigned long rpgtable; /* per process, not per OS */
struct list_head wq_list;
struct list_head wq_list_exact;
ihk_spinlock_t wq_list_lock;
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
};
struct sysfsm_req {
@@ -273,12 +291,16 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
ihk_os_t osnum_to_os(int n);
/* syscall.c */
int init_peer_channel_registry(struct mcctrl_usrdata *ud);
void destroy_peer_channel_registry(struct mcctrl_usrdata *ud);
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key);
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc);
struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
struct mcctrl_usrdata *ud,
int pid);
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task, void *data);
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task);
struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
#define PROCFS_NAME_MAX 1000

View File

@@ -84,88 +84,96 @@ static void print_dma_lastreq(void)
}
#endif
int init_peer_channel_registry(struct mcctrl_usrdata *ud)
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task, void *data)
{
ud->keys = kzalloc(sizeof(void *) * ud->num_channels, GFP_KERNEL);
if (!ud->keys) {
printk("Error: cannot allocate usrdata.keys[].\n");
return -ENOMEM;
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
struct mcctrl_per_thread_data *ptd_alloc = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
int ret = 0;
unsigned long flags;
ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC);
if (!ptd_alloc) {
kprintf("%s: error allocate per thread data\n", __FUNCTION__);
ret = -ENOMEM;
goto out_noalloc;
}
return 0;
}
void destroy_peer_channel_registry(struct mcctrl_usrdata *ud)
{
kfree(ud->keys);
ud->keys = NULL;
return;
}
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch)
{
int cpu;
cpu = ch - ud->channels;
if ((cpu < 0) || (ud->num_channels <= cpu)) {
printk("register_peer_channel(%p,%p,%p):"
"not a syscall channel. cpu=%d\n",
ud, key, ch, cpu);
return -EINVAL;
}
if (ud->keys[cpu] != NULL) {
printk("register_peer_channel(%p,%p,%p):"
"already registered. cpu=%d\n",
ud, key, ch, cpu);
/*
* When mcexec receives a signal,
* it may be finished without doing deregister_peer_channel().
* Therefore a substitute registration is necessary.
*/
#if 0
return -EBUSY;
#endif
}
ud->keys[cpu] = key;
return 0;
}
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch)
{
int cpu;
cpu = ch - ud->channels;
if ((cpu < 0) || (ud->num_channels <= cpu)) {
printk("deregister_peer_channel(%p,%p,%p):"
"not a syscall channel. cpu=%d\n",
ud, key, ch, cpu);
return -EINVAL;
}
if (ud->keys[cpu] && (ud->keys[cpu] != key)) {
printk("deregister_peer_channel(%p,%p,%p):"
"not registered. cpu=%d\n",
ud, key, ch, cpu);
return -EBUSY;
}
ud->keys[cpu] = NULL;
return 0;
}
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key)
{
int cpu;
for (cpu = 0; cpu < ud->num_channels; ++cpu) {
if (ud->keys[cpu] == key) {
return &ud->channels[cpu];
/* Check if data for this thread exists and add if not */
write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) {
ptd = ptd_iter;
break;
}
}
return NULL;
if (ptd) {
ret = -EBUSY;
kfree(ptd_alloc);
goto out;
}
ptd = ptd_alloc;
ptd->task = task;
ptd->data = data;
list_add_tail(&ptd->hash, &ppd->per_thread_data_hash[hash]);
out:
write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
out_noalloc:
return ret;
}
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task)
{
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
int ret = 0;
unsigned long flags;
/* Check if data for this thread exists and delete it */
write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) {
ptd = ptd_iter;
break;
}
}
if (!ptd) {
ret = -EINVAL;
goto out;
}
list_del(&ptd->hash);
kfree(ptd);
out:
write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ret;
}
struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task)
{
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
unsigned long flags;
/* Check if data for this thread exists and return it */
read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) {
ptd = ptd_iter;
break;
}
}
read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ptd ? ptd->data : NULL;
}
#if 1 /* x86 depend, host OS side */
@@ -238,10 +246,23 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u
struct syscall_request *req;
struct syscall_response *resp;
int error;
struct wait_queue_head_list_node *wqhln;
unsigned long irqflags;
struct mcctrl_per_proc_data *ppd;
dprintk("remote_page_fault(%p,%p,%llx)\n", usrdata, fault_addr, reason);
dprintk("%s: tid: %d, fault_addr: %p\n",
__FUNCTION__, task_pid_vnr(current), fault_addr);
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
channel = get_peer_channel(usrdata, current);
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
}
channel = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current);
if (!channel) {
error = -ENOENT;
printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n",
@@ -252,10 +273,28 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u
req = channel->param.request_va;
resp = channel->param.response_va;
/* request page fault */
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
/* Prepare per-thread wait queue head */
wqhln->task = current;
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* Add to exact list */
list_add_tail(&wqhln->list, &ppd->wq_list_exact);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
/* Request page fault */
resp->ret = -EFAULT;
resp->fault_address = (unsigned long)fault_addr;
resp->fault_reason = reason;
resp->stid = task_pid_vnr(current);
#define STATUS_PAGER_COMPLETED 1
#define STATUS_PAGE_FAULT 3
@@ -264,43 +303,18 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u
resp->status = STATUS_PAGE_FAULT;
for (;;) {
struct wait_queue_head_list_node *wqhln;
struct wait_queue_head_list_node *wqhln_iter;
unsigned long irqflags;
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
/* Prepare per-process wait queue head */
wqhln->pid = task_tgid_vnr(current);
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock);
/* First see if there is a wait queue already */
list_for_each_entry(wqhln_iter, &channel->wq_list, list) {
if (wqhln_iter->pid == task_tgid_vnr(current)) {
kfree(wqhln);
wqhln = wqhln_iter;
list_del(&wqhln->list);
break;
}
}
list_add_tail(&wqhln->list, &channel->wq_list);
ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags);
dprintk("%s: tid: %d, fault_addr: %p SLEEPING\n",
__FUNCTION__, task_pid_vnr(current), fault_addr);
/* wait for response */
error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* Remove per-process wait queue head */
irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock);
/* Remove per-thread wait queue head */
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
kfree(wqhln);
dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n",
__FUNCTION__, task_pid_vnr(current), fault_addr);
if (error) {
printk("remote_page_fault:interrupted. %d\n", error);
@@ -472,26 +486,18 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
#if USE_VM_INSERT_PFN
size_t pix;
#endif
struct mcctrl_per_proc_data *ppd, *ppd_iter;
unsigned long flags;
struct mcctrl_per_proc_data *ppd;
dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
ppd = NULL;
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) {
if (ppd_iter->pid == task_tgid_vnr(current)) {
ppd = ppd_iter;
break;
}
}
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
printk("ERROR: no per process data for pid %d\n", task_tgid_vnr(current));
return VM_FAULT_SIGBUS;
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
}
for (try = 1; ; ++try) {
@@ -1711,33 +1717,6 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
break;
case __NR_exit_group: {
unsigned long flags;
struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter;
ppd = NULL;
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) {
if (ppd_iter->pid == task_tgid_vnr(current)) {
ppd = ppd_iter;
break;
}
}
if (ppd) {
list_del(&ppd->list);
dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n",
task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable);
kfree(ppd);
}
else {
printk("WARNING: no per process data for pid %d ?\n",
task_tgid_vnr(current));
}
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
/* Make sure the user space handler will be called as well */
error = -ENOSYS;

View File

@@ -1870,13 +1870,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
sig = 0;
term = 0;
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
/* Drop executable file */
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n");
}
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
__dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n",
w.sr.args[0], cpu);
if(w.sr.number == __NR_exit_group){

View File

@@ -210,12 +210,23 @@ struct ikc_scd_init_param {
};
struct syscall_request {
/* TID of requesting thread */
int rtid;
/*
* TID of target thread. Remote page fault response needs to designate the
* thread that must serve the request, 0 indicates any thread from the pool
*/
int ttid;
unsigned long valid;
unsigned long number;
unsigned long args[6];
};
struct syscall_response {
/* TID of the thread that requested the service */
int ttid;
/* TID of the mcexec thread that is serving the request */
int stid;
unsigned long status;
long ret;
unsigned long fault_address;

View File

@@ -227,6 +227,10 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
scp = &get_cpu_local_var(cpu)->scp;
}
res = scp->response_va;
/* The current thread is the requester and any thread from
* the pool may serve the request */
req->rtid = cpu_local_var(current)->tid;
req->ttid = 0;
send_syscall(req, cpu, pid);
@@ -281,6 +285,10 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
#define PAGER_RESUME_PAGE_FAULT 0x0101
req2.args[0] = PAGER_RESUME_PAGE_FAULT;
req2.args[1] = error;
/* The current thread is the requester and only the waiting thread
* may serve the request */
req2.rtid = cpu_local_var(current)->tid;
req2.ttid = res->stid;
send_syscall(&req2, cpu, pid);
}