From d7bc947a02479eb4c57aad1cba0c7634d6a9cdb9 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 9 Aug 2016 16:49:42 +0900 Subject: [PATCH] mcctrl: redesign mcctrl_channels for IKC packet based syscall offloading --- executer/kernel/mcctrl/control.c | 185 ++++++----------- executer/kernel/mcctrl/ikc.c | 24 ++- executer/kernel/mcctrl/mcctrl.h | 8 +- executer/kernel/mcctrl/procfs.c | 3 +- executer/kernel/mcctrl/syscall.c | 346 ++++++++----------------------- executer/kernel/mcctrl/sysfs.c | 3 +- kernel/host.c | 42 ++-- kernel/include/syscall.h | 46 ++-- kernel/syscall.c | 52 ++--- 9 files changed, 244 insertions(+), 465 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index c9893406..0ec4db1d 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -125,30 +126,30 @@ static long mcexec_prepare_image(ihk_os_t os, } pdesc->args = (void*)virt_to_phys(args); - printk("args: 0x%lX\n", (unsigned long)pdesc->args); - printk("argc: %ld\n", *(long *)args); + dprintk("args: 0x%lX\n", (unsigned long)pdesc->args); + dprintk("argc: %ld\n", *(long *)args); pdesc->envs = (void*)virt_to_phys(envs); - printk("envs: 0x%lX\n", (unsigned long)pdesc->envs); - printk("envc: %ld\n", *(long *)envs); + dprintk("envs: 0x%lX\n", (unsigned long)pdesc->envs); + dprintk("envc: %ld\n", *(long *)envs); isp.msg = SCD_MSG_PREPARE_PROCESS; isp.ref = pdesc->cpu; isp.arg = virt_to_phys(pdesc); - printk("# of sections: %d\n", pdesc->num_sections); - printk("%p (%lx)\n", pdesc, isp.arg); + dprintk("# of sections: %d\n", pdesc->num_sections); + dprintk("%p (%lx)\n", pdesc, isp.arg); pdesc->status = 0; mcctrl_ikc_send(os, pdesc->cpu, &isp); - wait_event_interruptible(usrdata->wq_prepare, pdesc->status); + while (wait_event_interruptible(usrdata->wq_prepare, pdesc->status) != 0); if(pdesc->err < 0){ ret = pdesc->err; goto free_out; } - ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); + ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); if (!ppd) { printk("ERROR: allocating per process data\n"); ret = -ENOMEM; @@ -170,15 +171,15 @@ static long mcexec_prepare_image(ihk_os_t os, list_add_tail(&ppd->list, &usrdata->per_proc_list); ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); - dprintk("pid %d, rpgtable: 0x%lx added\n", - ppd->pid, ppd->rpgtable); - if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * desc.num_sections)) { ret = -EFAULT; goto free_out; } + dprintk("%s: pid %d, rpgtable: 0x%lx added\n", + __FUNCTION__, ppd->pid, ppd->rpgtable); + ret = 0; free_out: @@ -454,7 +455,6 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) struct wait_queue_head_list_node *wqhln = NULL; struct wait_queue_head_list_node *wqhln_iter; struct wait_queue_head_list_node *wqhln_alloc = NULL; - struct mcctrl_channel *c = ud->channels + packet->ref; int pid = packet->pid; unsigned long flags; struct mcctrl_per_proc_data *ppd; @@ -477,9 +477,9 @@ retry_alloc: dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n", __FUNCTION__, - c->param.request_va->rtid, - c->param.request_va->ttid, - c->param.request_va->number); + packet->req.rtid, + packet->req.ttid, + packet->req.number); /* * Three scenarios are possible: * - Find the designated thread if req->ttid is specified. @@ -489,9 +489,9 @@ retry_alloc: flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* Is this a request for a specific thread? See if it's waiting */ - if (c->param.request_va->ttid) { + if (packet->req.ttid) { list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) { - if (c->param.request_va->ttid != task_pid_vnr(wqhln_iter->task)) + if (packet->req.ttid != task_pid_vnr(wqhln_iter->task)) continue; wqhln = wqhln_iter; @@ -524,7 +524,7 @@ retry_alloc: kfree(wqhln_alloc); } - memcpy(&wqhln->packet, packet, sizeof(*packet)); + wqhln->packet = packet; wqhln->req = 1; wake_up(&wqhln->wq_syscall); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags); @@ -537,8 +537,7 @@ retry_alloc: */ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { - struct syscall_wait_desc swd; - struct mcctrl_channel *c; + struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct wait_queue_head_list_node *wqhln; struct wait_queue_head_list_node *wqhln_iter; @@ -555,18 +554,10 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) return -EINVAL; } - //printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); - if (copy_from_user(&swd, req, sizeof(swd))) { - return -EFAULT; - } - - if (swd.cpu >= usrdata->num_channels) - return -EINVAL; - - c = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); - if (c) { - printk("mcexec_wait_syscall:already registered. task %p ch %p\n", - current, c); + packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + if (packet) { + printk("%s: ERROR: packet %p is already registered for thread %d\n", + __FUNCTION__, packet, task_pid_vnr(current)); return -EBUSY; } @@ -613,49 +604,44 @@ retry_alloc: return -EINTR; } - /* Channel is determined by request */ - dprintk("%s: tid: %d request from CPU %d\n", - __FUNCTION__, task_pid_vnr(current), wqhln->packet.ref); - - c = usrdata->channels + wqhln->packet.ref; + packet = wqhln->packet; kfree(wqhln); -#if 0 - if (c->param.request_va->number == 61 && - c->param.request_va->args[0] == swd.pid) { - - dprintk("pid: %d, tid: %d: SC %d, swd.cpu: %d, WARNING: wait4() for self?\n", - task_tgid_vnr(current), - task_pid_vnr(current); - c->param.request_va->number, - swd.cpu); - - return -EINTR; - } -#endif + dprintk("%s: tid: %d request from CPU %d\n", + __FUNCTION__, task_pid_vnr(current), packet->ref); mb(); - if (!c->param.request_va->valid) { - printk("mcexec_wait_syscall:stray wakeup pid: %d, tid: %d: SC %d, swd.cpu: %d\n", + if (!packet->req.valid) { + printk("%s: ERROR: stray wakeup pid: %d, tid: %d: SC %lu\n", + __FUNCTION__, task_tgid_vnr(current), task_pid_vnr(current), - c->param.request_va->number, - swd.cpu); + packet->req.number); + kfree(packet); goto retry; } - c->param.request_va->valid = 0; /* ack */ - dprintk("SC #%lx, %lx\n", - c->param.request_va->number, - c->param.request_va->args[0]); - if (mcctrl_add_per_thread_data(ppd, current, c) < 0) { + packet->req.valid = 0; /* ack */ + dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, " + "args[3]: %lu, args[4]: %lu, args[5]: %lu\n", + __FUNCTION__, + packet->req.number, + packet->req.args[0], + packet->req.args[1], + packet->req.args[2], + packet->req.args[3], + packet->req.args[4], + packet->req.args[5]); + + if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) { kprintf("%s: error adding per-thread data\n", __FUNCTION__); return -EINVAL; } - if (__do_in_kernel_syscall(os, c, c->param.request_va)) { - if (copy_to_user(&req->sr, c->param.request_va, + if (__do_in_kernel_syscall(os, packet)) { + if (copy_to_user(&req->sr, &packet->req, sizeof(struct syscall_request))) { + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { kprintf("%s: error deleting per-thread data\n", __FUNCTION__); return -EINVAL; @@ -753,33 +739,6 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg) #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size); - -/* - ihk_dma_channel_t channel; - struct ihk_dma_request request; - unsigned long dma_status = 0; - - channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); - if (!channel) { - return -EINVAL; - } - - memset(&request, 0, sizeof(request)); - request.src_os = os; - request.src_phys = desc.src; - request.dest_os = NULL; - request.dest_phys = desc.dest; - request.size = desc.size; - request.notify = (void *)virt_to_phys(&dma_status); - request.priv = (void *)1; - - ihk_dma_request(channel, &request); - - while (!dma_status) { - mb(); - udelay(1); - } -*/ return 0; } @@ -787,18 +746,9 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg) long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) { struct syscall_ret_desc ret; - struct mcctrl_channel *mc; + struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd; -#if 0 - ihk_dma_channel_t channel; - struct ihk_dma_request request; - - channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); - if (!channel) { - return -EINVAL; - } -#endif if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { return -EFAULT; @@ -812,62 +762,43 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) return -EINVAL; } - mc = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); - if (!mc) { - kprintf("%s: ERROR: no peer channel registerred??\n", __FUNCTION__); + packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + if (!packet) { + kprintf("%s: ERROR: no packet registered for TID %d\n", + __FUNCTION__, task_pid_vnr(current)); return -EINVAL; } mcctrl_delete_per_thread_data(ppd, current); - mc->param.response_va->ret = ret.ret; - mc->param.response_va->stid = task_pid_vnr(current); - if (ret.size > 0) { /* Host => Accel. Write is fast. */ unsigned long phys; void *rpm; - phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, - ret.size); + phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, ret.size); #ifdef CONFIG_MIC rpm = ioremap_wc(phys, ret.size); #else rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, ret.size, NULL, 0); #endif - if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) { return -EFAULT; } - mb(); - mc->param.response_va->status = 1; - #ifdef CONFIG_MIC iounmap(rpm); #else ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size); #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size); + } -/* - memset(&request, 0, sizeof(request)); - request.src_os = NULL; - request.src_phys = ret.src; - request.dest_os = os; - request.dest_phys = ret.dest; - request.size = ret.size; - request.notify_os = os; - request.notify = (void *)mc->param.response_rpa; - request.priv = (void *)1; - - ihk_dma_request(channel, &request); -*/ - } else { - mb(); - mc->param.response_va->status = 1; - } + __return_syscall(os, packet, ret.ret, task_pid_vnr(current)); + + /* Free packet */ + kfree(packet); return 0; } diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 00e201e9..36199950 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "mcctrl.h" #ifdef ATTACHED_MIC #include @@ -49,8 +50,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, { struct ikc_scd_packet *pisp = __packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os); + int msg = pisp->msg; - switch (pisp->msg) { + switch (msg) { case SCD_MSG_INIT_CHANNEL: mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c); break; @@ -108,6 +110,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pisp->err, pisp->arg); break; } + + /* + * SCD_MSG_SYSCALL_ONESIDE holds the packet and frees is it + * mcexec_ret_syscall(), for the rest, free it here. + */ + if (msg != SCD_MSG_SYSCALL_ONESIDE) { + kfree(pisp); + } return 0; } @@ -144,8 +154,6 @@ int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu) ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c, ihk_ikc_get_processor_id()); - kprintf("Setting the target to %d\n", - ihk_ikc_get_processor_id()); return 0; } @@ -191,12 +199,13 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih #endif pmc->param.request_va = - (void *)__get_free_pages(GFP_ATOMIC, + (void *)__get_free_pages(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL, REQUEST_SHIFT - PAGE_SHIFT); pmc->param.request_pa = virt_to_phys(pmc->param.request_va); pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va; pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa; - pmc->param.post_va = (void *)__get_free_page(GFP_ATOMIC); + pmc->param.post_va = (void *)__get_free_page(in_interrupt() ? + GFP_ATOMIC : GFP_KERNEL); pmc->param.post_pa = virt_to_phys(pmc->param.post_va); memset(pmc->param.doorbell_va, 0, PAGE_SIZE); memset(pmc->param.request_va, 0, PAGE_SIZE); @@ -216,8 +225,9 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih PAGE_SIZE, NULL, 0); #endif - pmc->dma_buf = (void *)__get_free_pages(GFP_ATOMIC, - DMA_PIN_SHIFT - PAGE_SHIFT); + pmc->dma_buf = (void *)__get_free_pages(in_interrupt() ? + GFP_ATOMIC : GFP_KERNEL, + DMA_PIN_SHIFT - PAGE_SHIFT); rpm->request_page = pmc->param.request_pa; rpm->doorbell_page = pmc->param.doorbell_pa; diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 4d46e54d..c572fcee 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -113,6 +113,8 @@ struct ikc_scd_packet { int pid; int padding; unsigned long arg; + struct syscall_request req; + unsigned long resp_pa; }; /* for SCD_MSG_SYSFS_* */ @@ -159,7 +161,7 @@ struct wait_queue_head_list_node { /* Denotes an exclusive wait for requester TID rtid */ int rtid; int req; - struct ikc_scd_packet packet; + struct ikc_scd_packet *packet; }; struct mcctrl_channel { @@ -291,7 +293,7 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); ihk_os_t osnum_to_os(int n); /* syscall.c */ -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc); +int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet); struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( struct mcctrl_usrdata *ud, int pid); @@ -301,6 +303,8 @@ int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, struct task_struct *task); struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( struct mcctrl_per_proc_data *ppd, struct task_struct *task); +void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, + int ret, int stid); #define PROCFS_NAME_MAX 1000 diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index cbc4470f..36278bd8 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "mcctrl.h" #include #include @@ -748,7 +749,7 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) { struct procfs_work *work = NULL; - work = kzalloc(sizeof(*work), GFP_ATOMIC); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) { printk("%s: kzalloc failed\n", __FUNCTION__); return -1; diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 3a9b1e09..074bdafd 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -93,7 +94,7 @@ int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, int ret = 0; unsigned long flags; - ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC); + ptd_alloc = kmalloc(sizeof(*ptd), GFP_KERNEL); if (!ptd_alloc) { kprintf("%s: error allocate per thread data\n", __FUNCTION__); ret = -ENOMEM; @@ -242,16 +243,17 @@ out: static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) { - struct mcctrl_channel *channel; + struct ikc_scd_packet *packet; struct syscall_request *req; struct syscall_response *resp; int error; struct wait_queue_head_list_node *wqhln; unsigned long irqflags; struct mcctrl_per_proc_data *ppd; + unsigned long phys; - dprintk("%s: tid: %d, fault_addr: %p\n", - __FUNCTION__, task_pid_vnr(current), fault_addr); + dprintk("%s: tid: %d, fault_addr: %lu, reason: %lu\n", + __FUNCTION__, task_pid_vnr(current), fault_addr, reason); /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); @@ -262,16 +264,21 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u return -EINVAL; } - channel = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); - if (!channel) { + packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + if (!packet) { error = -ENOENT; - printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n", - usrdata, fault_addr, reason, error); - goto out; + printk("%s: no packet registered for TID %d\n", + __FUNCTION__, task_pid_vnr(current)); + goto out_no_unmap; } - req = channel->param.request_va; - resp = channel->param.response_va; + req = &packet->req; + + /* XXX: we need to map response structure here.. */ + phys = ihk_device_map_memory(ihk_os_to_dev(usrdata->os), + packet->resp_pa, sizeof(*resp)); + resp = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os), + phys, sizeof(*resp), NULL, 0); retry_alloc: wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); @@ -312,14 +319,35 @@ retry_alloc: irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); - kfree(wqhln); + dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n", __FUNCTION__, task_pid_vnr(current), fault_addr); if (error) { + kfree(wqhln); printk("remote_page_fault:interrupted. %d\n", error); goto out; } + else { + /* Update packet reference */ + packet = wqhln->packet; + req = &packet->req; + { + unsigned long phys2; + struct syscall_response *resp2; + phys2 = ihk_device_map_memory(ihk_os_to_dev(usrdata->os), + packet->resp_pa, sizeof(*resp)); + resp2 = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os), + phys2, sizeof(*resp), NULL, 0); + + if (resp != resp2) { + resp = resp2; + phys = phys2; + printk("%s: updated new remote PA for resp\n", __FUNCTION__); + } + } + } + if (!req->valid) { printk("remote_page_fault:not valid\n"); } @@ -337,21 +365,29 @@ retry_alloc: resp->ret = pager_call(usrdata->os, (void *)req); mb(); resp->status = STATUS_PAGER_COMPLETED; - continue; + break; + //continue; } else { error = req->args[1]; if (error) { printk("remote_page_fault:response %d\n", error); + kfree(wqhln); goto out; } } break; } + kfree(wqhln); error = 0; out: - dprintk("remote_page_fault(%p,%p,%llx): %d\n", usrdata, fault_addr, reason, error); + ihk_device_unmap_virtual(ihk_os_to_dev(usrdata->os), resp, sizeof(*resp)); + ihk_device_unmap_memory(ihk_os_to_dev(usrdata->os), phys, sizeof(*resp)); + +out_no_unmap: + dprintk("%s: tid: %d, fault_addr: %lu, reason: %lu, error: %d\n", + __FUNCTION__, task_pid_vnr(current), fault_addr, reason, error); return error; } @@ -403,8 +439,9 @@ static int rus_page_hash_insert(struct page *page) { int ret = 0; struct rus_page *rp; + unsigned long flags; - spin_lock(&rus_page_hash_lock); + spin_lock_irqsave(&rus_page_hash_lock, flags); rp = _rus_page_hash_lookup(page); if (!rp) { @@ -431,7 +468,7 @@ static int rus_page_hash_insert(struct page *page) out: - spin_unlock(&rus_page_hash_lock); + spin_unlock_irqrestore(&rus_page_hash_lock, flags); return ret; } @@ -440,8 +477,9 @@ void rus_page_hash_put_pages(void) int i; struct rus_page *rp_iter; struct rus_page *rp_iter_next; + unsigned long flags; - spin_lock(&rus_page_hash_lock); + spin_lock_irqsave(&rus_page_hash_lock, flags); for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) { @@ -454,7 +492,7 @@ void rus_page_hash_put_pages(void) } } - spin_unlock(&rus_page_hash_lock); + spin_unlock_irqrestore(&rus_page_hash_lock, flags); } @@ -631,237 +669,6 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u return start; } -//unsigned long last_thread_exec = 0; - -#ifndef DO_USER_MODE -static struct { - long (*do_sys_open)(int, const char __user *, int, int); - long (*sys_lseek)(unsigned int, off_t, unsigned int); - long (*sys_read)(unsigned int, char __user *, size_t); - long (*sys_write)(unsigned int, const char __user *, size_t); -} syscalls; - -void -mcctrl_syscall_init(void) -{ - printk("mcctrl_syscall_init\n"); - syscalls.do_sys_open = (void *)kallsyms_lookup_name("do_sys_open"); - syscalls.sys_lseek = (void *)kallsyms_lookup_name("sys_lseek"); - syscalls.sys_read = (void *)kallsyms_lookup_name("sys_read"); - syscalls.sys_write = (void *)kallsyms_lookup_name("sys_write"); - printk("syscalls.do_sys_open=%lx\n", (long)syscalls.do_sys_open); - printk("syscalls.sys_lseek=%lx\n", (long)syscalls.sys_lseek); - printk("syscalls.sys_read=%lx\n", (long)syscalls.sys_read); - printk("syscalls.sys_write=%lx\n", (long)syscalls.sys_write); -} - -static int do_async_copy(ihk_os_t os, unsigned long dest, unsigned long src, - unsigned long size, unsigned int inbound) -{ - struct ihk_dma_request request; - ihk_dma_channel_t channel; - unsigned long asize = ALIGN_WAIT_BUF(size); - - channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); - if (!channel) { - return -EINVAL; - } - - memset(&request, 0, sizeof(request)); - request.src_os = inbound ? os : NULL; - request.src_phys = src; - request.dest_os = inbound ? NULL : os; - request.dest_phys = dest; - request.size = size; - request.notify = (void *)(inbound ? dest + asize : src + asize); - request.priv = (void *)1; - - *(unsigned long *)phys_to_virt((unsigned long)request.notify) = 0; -#ifdef SC_DEBUG - last_request = request; -#endif - - ihk_dma_request(channel, &request); - - return 0; -} - -//int mcctrl_dma_abort; - -static void async_wait(ihk_os_t os, unsigned char *p, int size) -{ - int asize = ALIGN_WAIT_BUF(size); - unsigned long long s, w; - struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - - rdtscll(s); - while (!p[asize]) { - mb(); - cpu_relax(); - rdtscll(w); - if (w > s + 1024UL * 1024 * 1024 * 10) { - printk("DMA Timed out : %p (%p + %d) => %d\n", - p + asize, p, size, p[asize]); -#ifdef SC_DEBUG - print_dma_lastreq(); -#endif - usrdata->mcctrl_dma_abort = 1; - return; - } - } -} - -static void clear_wait(unsigned char *p, int size) -{ - //int asize = ALIGN_WAIT_BUF(size); - p[size] = 0; -} - -static unsigned long translate_remote_va(struct mcctrl_channel *c, - unsigned long rva) -{ - int i, n; - struct syscall_post *p; - - p = c->param.post_va; - - n = (int)p->v[0]; - if (n < 0 || n >= PAGE_SIZE / sizeof(struct syscall_post)) { - return -EINVAL; - } - for (i = 0; i < n; i++) { - if (p[i + 1].v[0] != 1) { - continue; - } - if (rva >= p[i + 1].v[1] && rva < p[i + 1].v[2]) { - return p[i + 1].v[3] + (rva - p[i + 1].v[1]); - } - } - - return -EFAULT; -} - -//extern struct mcctrl_channel *channels; - -#if 0 -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, - struct syscall_request *sc) -{ - int ret; - mm_segment_t fs; - unsigned long pa; - struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - - switch (sc->number) { - case 0: /* read */ - case 1024: - if (sc->number & 1024) { - sc->args[1] = translate_remote_va(c, sc->args[1]); - if ((long)sc->args[1] < 0) { - __return_syscall(c, -EFAULT); - return 0; - } - } - - clear_wait(c->dma_buf, sc->args[2]); - fs = get_fs(); - set_fs(KERNEL_DS); - ret = syscalls.sys_read(sc->args[0], c->dma_buf, sc->args[2]); - if (ret > 0) { - do_async_copy(os, sc->args[1], virt_to_phys(c->dma_buf), - sc->args[2], 0); - set_fs(fs); - - async_wait(os, c->dma_buf, sc->args[2]); - } - __return_syscall(c, ret); - return 0; - - case 1: /* write */ - case 1025: - if (sc->number & 1024) { - sc->args[1] = translate_remote_va(c, sc->args[1]); - if ((long)sc->args[1] < 0) { - __return_syscall(c, -EFAULT); - return 0; - } - } - - clear_wait(c->dma_buf, sc->args[2]); - do_async_copy(os, virt_to_phys(c->dma_buf), sc->args[1], - sc->args[2], 1); - fs = get_fs(); - set_fs(KERNEL_DS); - async_wait(os, c->dma_buf, sc->args[2]); - - ret = syscalls.sys_write(sc->args[0], c->dma_buf, sc->args[2]); - set_fs(fs); - - __return_syscall(c, ret); - return 0; - - case 2: /* open */ - case 1026: - if (sc->number & 1024) { - sc->args[0] = translate_remote_va(c, sc->args[0]); - if ((long)sc->args[0] < 0) { - __return_syscall(c, -EFAULT); - return 0; - } - } - - clear_wait(c->dma_buf, 256); - do_async_copy(os, virt_to_phys(c->dma_buf), sc->args[0], - 256, 1); - fs = get_fs(); - set_fs(KERNEL_DS); - async_wait(os, c->dma_buf, 256); - - ret = syscalls.do_sys_open(AT_FDCWD, c->dma_buf, sc->args[1], - sc->args[2]); - set_fs(fs); - - __return_syscall(c, ret); - return 0; - - case 3: /* Close */ - ret = sys_close(sc->args[0]); - __return_syscall(c, ret); - return 0; - - case 8: /* lseek */ - ret = syscalls.sys_lseek(sc->args[0], sc->args[1], sc->args[2]); - __return_syscall(c, ret); - return 0; - - case 56: /* Clone */ - usrdata->last_thread_exec++; - if (mcctrl_ikc_is_valid_thread(usrdata->last_thread_exec)) { - printk("Clone notification: %lx\n", sc->args[0]); - if (channels[usrdata->last_thread_exec].param.post_va) { - memcpy(usrdata->channels[usrdata->last_thread_exec].param.post_va, - c->param.post_va, PAGE_SIZE); - } - mcctrl_ikc_send_msg(usrdata->last_thread_exec, - SCD_MSG_SCHEDULE_PROCESS, - usrdata->last_thread_exec, sc->args[0]); - } - - __return_syscall(c, 0); - return 0; - - default: - if (sc->number & 1024) { - __return_syscall(c, -EFAULT); - return 0; - } else { - return -ENOSYS; - } - } -} -#endif -#endif /* !DO_USER_MODE */ - struct pager { struct list_head list; struct inode * inode; @@ -1480,11 +1287,25 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) return ret; } -static void __return_syscall(struct mcctrl_channel *c, int ret) +void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, + int ret, int stid) { - c->param.response_va->ret = ret; + unsigned long phys; + struct syscall_response *res; + + phys = ihk_device_map_memory(ihk_os_to_dev(os), + packet->resp_pa, sizeof(*res)); + res = ihk_device_map_virtual(ihk_os_to_dev(os), + phys, sizeof(*res), NULL, 0); + + /* Map response structure and notify offloading thread */ + res->ret = ret; + res->stid = stid; mb(); - c->param.response_va->status = 1; + res->status = 1; + + ihk_device_unmap_virtual(ihk_os_to_dev(os), res, sizeof(*res)); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, sizeof(*res)); } static int remap_user_space(uintptr_t rva, size_t len, int prot) @@ -1673,13 +1494,14 @@ fail: #define SCHED_CHECK_SAME_OWNER 0x01 #define SCHED_CHECK_ROOT 0x02 -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc) +int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) { + struct syscall_request *sc = &packet->req; int error; long ret = -1; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx)\n", os, c, sc->number, sc->args[0]); + dprintk("%s: system call: %d\n", __FUNCTION__, sc->args[0]); switch (sc->number) { case __NR_mmap: ret = pager_call(os, sc); @@ -1690,8 +1512,9 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall if (sc->args[2]) { unsigned long flags; struct mcctrl_per_proc_data *ppd = NULL; + int i; - ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); + ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); if (!ppd) { printk("ERROR: allocating per process data\n"); error = -ENOMEM; @@ -1700,6 +1523,14 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall ppd->pid = task_tgid_vnr(current); ppd->rpgtable = sc->args[2]; + INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_list_exact); + spin_lock_init(&ppd->wq_list_lock); + + for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { + INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); + rwlock_init(&ppd->per_thread_data_hash_lock[i]); + } flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); list_add_tail(&ppd->list, &usrdata->per_proc_list); @@ -1799,10 +1630,11 @@ sched_setparam_out: break; } - __return_syscall(c, ret); + __return_syscall(os, packet, ret, 0); error = 0; out: - dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx): %d %ld\n", os, c, sc->number, sc->args[0], error, ret); + dprintk("%s: system call: %d, error: %d, ret: %ld\n", + __FUNCTION__, sc->number, sc->args[0], error, ret); return error; } diff --git a/executer/kernel/mcctrl/sysfs.c b/executer/kernel/mcctrl/sysfs.c index b446864a..0610862c 100644 --- a/executer/kernel/mcctrl/sysfs.c +++ b/executer/kernel/mcctrl/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "mcctrl.h" #include "sysfs_msg.h" @@ -1887,7 +1888,7 @@ sysfsm_packet_handler(void *os, int msg, int err, long arg1, long arg2) { struct sysfs_work *work = NULL; - work = kzalloc(sizeof(*work), GFP_ATOMIC); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) { eprintk("mcctrl:sysfsm_packet_handler:kzalloc failed\n"); return; diff --git a/kernel/host.c b/kernel/host.c index 60f67834..d9d5763a 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -570,12 +570,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, } *sp, info; unsigned long pp; int cpuid; + int ret = 0; switch (packet->msg) { case SCD_MSG_INIT_CHANNEL_ACKED: dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n"); process_msg_init_acked(c, packet->arg); - return 0; + ret = 0; + break; case SCD_MSG_PREPARE_PROCESS: @@ -598,13 +600,15 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pckt.arg = packet->arg; syscall_channel_send(c, &pckt); - return 0; + ret = 0; + break; case SCD_MSG_SCHEDULE_PROCESS: cpuid = obtain_clone_cpuid(); if(cpuid == -1){ kprintf("No CPU available\n"); - return -1; + ret = -1; + break; } dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg); thread = (struct thread *)packet->arg; @@ -618,7 +622,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, runq_add_thread(thread, cpuid); //cpu_local_var(next) = (struct thread *)packet->arg; - return 0; + ret = 0; + break; + case SCD_MSG_SEND_SIGNAL: pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); @@ -633,18 +639,25 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0); kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc); - return 0; + ret = 0; + break; + case SCD_MSG_PROCFS_REQUEST: process_procfs_request(packet->arg); - return 0; + ret = 0; + break; + case SCD_MSG_CLEANUP_PROCESS: dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid); terminate_host(packet->pid); - return 0; + ret = 0; + break; + case SCD_MSG_DEBUG_LOG: dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg); debug_log(packet->arg); - return 0; + ret = 0; + break; case SCD_MSG_SYSFS_REQ_SHOW: case SCD_MSG_SYSFS_REQ_STORE: @@ -652,7 +665,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, sysfss_packet_handler(c, packet->msg, packet->err, packet->sysfs_arg1, packet->sysfs_arg2, packet->sysfs_arg3); - return 0; + ret = 0; + break; case SCD_MSG_GET_CPU_MAPPING: req_get_cpu_mapping(packet->arg); @@ -660,17 +674,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING; pckt.arg = packet->arg; syscall_channel_send(c, &pckt); - return 0; + ret = 0; + break; default: kprintf("syscall_pakcet_handler:unknown message " "(%d.%d.%d.%d.%d.%#lx)\n", packet->msg, packet->ref, packet->osnum, packet->pid, packet->err, packet->arg); - return 0; + ret = 0; + break; } - return 0; + + kfree(packet); + return ret; } void init_host_syscall_channel(void) diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 6d0ccfa0..111edf07 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -117,28 +117,6 @@ struct user_desc { unsigned int lm:1; }; -struct ikc_scd_packet { - int msg; - int err; - union { - /* for traditional SCD_MSG_* */ - struct { - int ref; - int osnum; - int pid; - int padding; - unsigned long arg; - }; - - /* for SCD_MSG_SYSFS_* */ - struct { - long sysfs_arg1; - long sysfs_arg2; - long sysfs_arg3; - }; - }; -}; - struct program_image_section { unsigned long vaddr; unsigned long len; @@ -222,6 +200,30 @@ struct syscall_request { unsigned long args[6]; }; +struct ikc_scd_packet { + int msg; + int err; + union { + /* for traditional SCD_MSG_* */ + struct { + int ref; + int osnum; + int pid; + int padding; + unsigned long arg; + struct syscall_request req; + unsigned long resp_pa; + }; + + /* for SCD_MSG_SYSFS_* */ + struct { + long sysfs_arg1; + long sysfs_arg2; + long sysfs_arg3; + }; + }; +}; + struct syscall_response { /* TID of the thread that requested the service */ int ttid; diff --git a/kernel/syscall.c b/kernel/syscall.c index dfe90328..43116fa5 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -127,11 +127,9 @@ int prepare_process_ranges_args_envs(struct thread *thread, static void do_mod_exit(int status); #endif -static void send_syscall(struct syscall_request *req, int cpu, int pid) +static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res) { struct ikc_scd_packet packet; - struct syscall_response *res; - struct syscall_params *scp; struct ihk_ikc_channel_desc *syscall_channel; int ret; @@ -140,7 +138,6 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid) req->number == __NR_kill){ // interrupt syscall extern int num_processors; - scp = &get_cpu_local_var(0)->scp2; syscall_channel = get_cpu_local_var(0)->syscall_channel2; /* XXX: is this really going to work if multiple processes @@ -152,34 +149,22 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid) pid = req->args[1]; } else{ - scp = &get_cpu_local_var(cpu)->scp; syscall_channel = get_cpu_local_var(cpu)->syscall_channel; } - res = scp->response_va; res->status = 0; req->valid = 0; -#ifdef USE_DMA - memcpy_async(scp->request_pa, - virt_to_phys(req), sizeof(*req), 0, &fin); - - memcpy_async_wait(&scp->post_fin); - scp->post_va->v[0] = scp->post_idx; - memcpy_async_wait(&fin); -#else - memcpy(scp->request_va, req, sizeof(*req)); -#endif + memcpy(&packet.req, req, sizeof(*req)); barrier(); - scp->request_va->valid = 1; - *(unsigned int *)scp->doorbell_va = cpu + 1; + packet.req.valid = 1; #ifdef SYSCALL_BY_IKC packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.ref = cpu; packet.pid = pid ? pid : cpu_local_var(current)->proc->pid; - packet.arg = scp->request_rpa; + packet.resp_pa = virt_to_phys(res); dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); ret = ihk_ikc_send(syscall_channel, &packet, 0); @@ -193,9 +178,8 @@ ihk_spinlock_t syscall_lock; long do_syscall(struct syscall_request *req, int cpu, int pid) { - struct syscall_response *res; + struct syscall_response res; struct syscall_request req2 IHK_DMA_ALIGN; - struct syscall_params *scp; int error; long rc; int islock = 0; @@ -219,20 +203,15 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) if(req->number == __NR_exit_group || req->number == __NR_gettid || req->number == __NR_kill){ // interrupt syscall - scp = &get_cpu_local_var(0)->scp2; islock = 1; irqstate = ihk_mc_spinlock_lock(&syscall_lock); } - else{ - scp = &get_cpu_local_var(cpu)->scp; - } - res = scp->response_va; /* The current thread is the requester and any thread from * the pool may serve the request */ req->rtid = cpu_local_var(current)->tid; req->ttid = 0; - send_syscall(req, cpu, pid); + send_syscall(req, cpu, pid, &res); dkprintf("%s: syscall num: %d waiting for Linux.. \n", __FUNCTION__, req->number); @@ -240,8 +219,8 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) #define STATUS_IN_PROGRESS 0 #define STATUS_COMPLETED 1 #define STATUS_PAGE_FAULT 3 - while (res->status != STATUS_COMPLETED) { - while (res->status == STATUS_IN_PROGRESS) { + while (res.status != STATUS_COMPLETED) { + while (res.status == STATUS_IN_PROGRESS) { struct cpu_local_var *v; int call_schedule = 0; long runq_irqstate; @@ -270,15 +249,16 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) if (call_schedule) { schedule(); ++thread->in_syscall_offload; + v->wait_in_syscall = NULL; } } - if (res->status == STATUS_PAGE_FAULT) { + if (res.status == STATUS_PAGE_FAULT) { dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", cpu_local_var(current)->proc->pid); error = page_fault_process_vm(thread->vm, - (void *)res->fault_address, - res->fault_reason|PF_POPULATE); + (void *)res.fault_address, + res.fault_reason|PF_POPULATE); /* send result */ req2.number = __NR_mmap; @@ -288,16 +268,16 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) /* The current thread is the requester and only the waiting thread * may serve the request */ req2.rtid = cpu_local_var(current)->tid; - req2.ttid = res->stid; + req2.ttid = res.stid; - send_syscall(&req2, cpu, pid); + send_syscall(&req2, cpu, pid, &res); } } dkprintf("%s: syscall num: %d got host reply: %d \n", - __FUNCTION__, req->number, res->ret); + __FUNCTION__, req->number, res.ret); - rc = res->ret; + rc = res.ret; if(islock){ ihk_mc_spinlock_unlock(&syscall_lock, irqstate); }