From 6aae35cb3d90ad111f7b046395e03197a769b03c Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 2 Aug 2016 16:59:04 +0900 Subject: [PATCH 01/42] process: transfer TIDs in bulk and reuse them locally --- executer/user/mcexec.c | 33 +++++++++++++++++++++ kernel/host.c | 4 +-- kernel/include/process.h | 10 +++++++ kernel/process.c | 22 ++++++++++++-- kernel/syscall.c | 62 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 122 insertions(+), 9 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index d7cbd68e..cec60856 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1944,6 +1944,39 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) thread_data[oldcpuid].remote_tid = wtid; } + /* + * Number of TIDs and the remote physical address where TIDs are + * expected are passed in arg 4 and 5, respectively. + */ + if (w.sr.args[4] > 0) { + struct remote_transfer trans; + int i = 0; + int *tids = malloc(sizeof(int) * w.sr.args[4]); + if (!tids) { + fprintf(stderr, "__NR_gettid(): error allocating TIDs\n"); + goto gettid_out; + } + + for (i = 0; i < ncpu && i < w.sr.args[4]; ++i) { + tids[i] = thread_data[i].tid; + } + + for (; i < ncpu; ++i) { + tids[i] = 0; + } + + trans.userp = (void*)tids; + trans.rphys = w.sr.args[5]; + trans.size = sizeof(int) * w.sr.args[4]; + trans.direction = MCEXEC_UP_TRANSFER_TO_REMOTE; + + if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) { + fprintf(stderr, "__NR_gettid(): error transfering TIDs\n"); + } + + free(tids); + } +gettid_out: do_syscall_return(fd, cpu, thread_data[newcpuid].remote_tid, 0, 0, 0, 0); break; } diff --git a/kernel/host.c b/kernel/host.c index ace510db..60f67834 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -521,8 +521,6 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c, } extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont); -extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid); - extern void process_procfs_request(unsigned long rarg); extern int memcheckall(); extern int freecheck(int runcount); @@ -612,7 +610,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, thread = (struct thread *)packet->arg; proc = thread->proc; - settid(thread, 0, cpuid, -1); + settid(thread, 0, cpuid, -1, 0, NULL); proc->status = PS_RUNNING; thread->status = PS_RUNNING; chain_thread(thread); diff --git a/kernel/include/process.h b/kernel/include/process.h index 7196b2cc..4ad055f4 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -348,6 +348,11 @@ struct sig_pending { typedef void pgio_func_t(void *arg); +struct mcexec_tid { + int tid; + struct thread *thread; +}; + /* Represents a node in the process fork tree, it may exist even after the * corresponding process exited due to references from the parent and/or * children and is used for implementing wait/waitpid without having a @@ -362,6 +367,9 @@ struct process { // threads and children struct list_head threads_list; mcs_rwlock_lock_t threads_lock; // lock for threads_list + /* TID set of proxy process */ + struct mcexec_tid *tids; + int nr_tids; /* The ptracing process behave as the parent of the ptraced process after using PTRACE_ATTACH except getppid. So we save it here. */ @@ -678,5 +686,7 @@ void chain_thread(struct thread *); void proc_init(); void set_timer(); struct sig_pending *hassigpending(struct thread *thread); +void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid, + int nr_tids, int *tids); #endif diff --git a/kernel/process.c b/kernel/process.c index 2e5570de..e82e1e2a 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -53,7 +53,6 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm); extern void release_fp_regs(struct thread *proc); extern void save_fp_regs(struct thread *proc); extern void restore_fp_regs(struct thread *proc); -void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid); extern void __runq_add_proc(struct thread *proc, int cpu_id); extern void terminate_host(int pid); extern void lapic_timer_enable(unsigned int clocks); @@ -2062,6 +2061,7 @@ release_process(struct process *proc) mcs_rwlock_writer_unlock(&parent->children_lock, &lock); } + if (proc->tids) kfree(proc->tids); kfree(proc); } @@ -2167,6 +2167,23 @@ release_sigcommon(struct sig_common *sigcommon) kfree(sigcommon); } +/* + * Release the TID from the process' TID set corresponding to this thread. + * NOTE: threads_lock must be held. + */ +void __release_tid(struct process *proc, struct thread *thread) { + int i; + + for (i = 0; i < proc->nr_tids; ++i) { + if (proc->tids[i].thread != thread) continue; + + proc->tids[i].thread = NULL; + dkprintf("%s: tid %d has been released by %p\n", + __FUNCTION__, thread->tid, thread); + break; + } +} + void destroy_thread(struct thread *thread) { struct sig_pending *pending; @@ -2183,6 +2200,7 @@ void destroy_thread(struct thread *thread) mcs_rwlock_writer_lock(&proc->threads_lock, &lock); list_del(&thread->siblings_list); + __release_tid(proc, thread); mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); cpu_clear(thread->cpu_id, &thread->vm->address_space->cpu_set, @@ -2522,7 +2540,7 @@ static void do_migrate(void) v->flags |= CPU_FLAG_NEED_RESCHED; ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); double_rq_unlock(cur_v, v, irqstate); - settid(req->thread, 2, cpu_id, old_cpu_id); + //settid(req->thread, 2, cpu_id, old_cpu_id, 0, NULL); ack: waitq_wakeup(&req->wq); diff --git a/kernel/syscall.c b/kernel/syscall.c index 864a6499..5d31da8b 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1478,8 +1478,8 @@ SYSCALL_DECLARE(getppid) return thread->proc->ppid_parent->pid; } -void -settid(struct thread *thread, int mode, int newcpuid, int oldcpuid) +void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid, + int nr_tids, int *tids) { struct syscall_request request IHK_DMA_ALIGN; unsigned long rc; @@ -1489,6 +1489,12 @@ settid(struct thread *thread, int mode, int newcpuid, int oldcpuid) request.args[1] = thread->proc->pid; request.args[2] = newcpuid; request.args[3] = oldcpuid; + /* + * If nr_tids is non-zero, tids should point to an array of ints + * where the thread ids of the mcexec process are expected. + */ + request.args[4] = nr_tids; + request.args[5] = virt_to_phys(tids); rc = do_syscall(&request, ihk_mc_get_processor_id(), thread->proc->pid); if (mode != 2) { thread->tid = rc; @@ -1893,7 +1899,55 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, &new->vm->address_space->cpu_set_lock); if (clone_flags & CLONE_VM) { - settid(new, 1, cpuid, -1); + int *tids = NULL; + int i; + struct mcs_rwlock_node_irqsave lock; + + mcs_rwlock_writer_lock(&newproc->threads_lock, &lock); + /* Obtain mcexec TIDs if not known yet */ + if (!newproc->nr_tids) { + tids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT); + if (!tids) { + mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock); + release_cpuid(cpuid); + return -ENOMEM; + } + + newproc->tids = kmalloc(sizeof(struct mcexec_tid) * num_processors, IHK_MC_AP_NOWAIT); + if (!newproc->tids) { + mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock); + kfree(tids); + release_cpuid(cpuid); + return -ENOMEM; + } + + settid(new, 1, cpuid, -1, num_processors, tids); + + for (i = 0; (i < num_processors) && tids[i]; ++i) { + dkprintf("%s: tid[%d]: %d\n", __FUNCTION__, i, tids[i]); + newproc->tids[i].tid = tids[i]; + newproc->tids[i].thread = NULL; + ++newproc->nr_tids; + } + } + + /* Find an unused TID */ + for (i = 0; i < newproc->nr_tids; ++i) { + if (!newproc->tids[i].thread) { + newproc->tids[i].thread = new; + new->tid = newproc->tids[i].tid; + dkprintf("%s: tid %d assigned to %p\n", __FUNCTION__, new->tid, new); + break; + } + } + + /* TODO: spawn more mcexec threads */ + if (!new->tid) { + kprintf("%s: no more TIDs available\n"); + panic(""); + } + + mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock); } /* fork() a new process on the host */ else { @@ -1913,7 +1967,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, } /* In a single threaded process TID equals to PID */ - settid(new, 0, cpuid, -1); + settid(new, 0, cpuid, -1, 0, NULL); new->vm->address_space->pids[0] = new->proc->pid; dkprintf("fork(): new pid: %d\n", new->proc->pid); From 76981bcc1850f7d9a960978dc08d1e5c42fbb8e8 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 4 Aug 2016 15:22:40 +0900 Subject: [PATCH 02/42] mcctrl: move procfs TID processing into dedicated work queue --- executer/kernel/mcctrl/ikc.c | 5 +--- executer/kernel/mcctrl/mcctrl.h | 1 + executer/kernel/mcctrl/procfs.c | 51 +++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index d5493db0..df854d52 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -88,11 +88,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, break; case SCD_MSG_PROCFS_TID_CREATE: - add_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg); - break; - case SCD_MSG_PROCFS_TID_DELETE: - delete_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg); + return procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg); break; case SCD_MSG_GET_VDSO_INFO: diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index cbf483f9..9f60326a 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -301,6 +301,7 @@ struct procfs_file { }; void procfs_answer(unsigned int arg, int err); +int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg); void add_tid_entry(int osnum, int pid, int tid); void add_pid_entry(int osnum, int pid); void delete_tid_entry(int osnum, int pid, int tid); diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index 42b25e8e..cbc4470f 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -713,6 +713,57 @@ mckernel_procfs_lseek(struct file *file, loff_t offset, int orig) return file->f_pos; } +struct procfs_work { + void *os; + int msg; + int pid; + unsigned long arg; + struct work_struct work; +}; + +static void procfsm_work_main(struct work_struct *work0) +{ + struct procfs_work *work = container_of(work0, struct procfs_work, work); + + switch (work->msg) { + case SCD_MSG_PROCFS_TID_CREATE: + add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg); + break; + + case SCD_MSG_PROCFS_TID_DELETE: + delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg); + break; + + default: + printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n", + __FUNCTION__, work->msg, work->pid, work->arg); + break; + } + + kfree(work); + return; +} + +int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) +{ + struct procfs_work *work = NULL; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + printk("%s: kzalloc failed\n", __FUNCTION__); + return -1; + } + + work->os = os; + work->msg = msg; + work->pid = pid; + work->arg = arg; + INIT_WORK(&work->work, &procfsm_work_main); + + schedule_work(&work->work); + return 0; +} + static const struct file_operations mckernel_forward_ro = { .llseek = mckernel_procfs_lseek, .read = mckernel_procfs_read, From 689da07ac6414ffa91c987df6b0d301676b2249f Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sat, 6 Aug 2016 08:52:14 +0900 Subject: [PATCH 03/42] ihk_mc_ikc_init_first_local(): hold ref to master channel --- arch/x86/kernel/mikc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/mikc.c b/arch/x86/kernel/mikc.c index cfde96da..eff3616d 100644 --- a/arch/x86/kernel/mikc.c +++ b/arch/x86/kernel/mikc.c @@ -38,7 +38,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel, arch_master_channel_packet_handler = packet_handler; ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq, - ihk_ikc_master_channel_packet_handler); + ihk_ikc_master_channel_packet_handler, channel); ihk_ikc_enable_channel(channel); /* Set boot parameter */ From 4cefb4333fd8a60b9c5f994bffa52bb4d131a7b0 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sat, 6 Aug 2016 08:54:55 +0900 Subject: [PATCH 04/42] mcctrl: use atomic malloc in IRQ context --- executer/kernel/mcctrl/ikc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index df854d52..2d513517 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -43,6 +43,7 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg); void sig_done(unsigned long arg, int err); +/* XXX: this runs in atomic context! */ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, void *__packet, void *__os) { @@ -89,7 +90,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, case SCD_MSG_PROCFS_TID_CREATE: case SCD_MSG_PROCFS_TID_DELETE: - return procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg); + procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg); break; case SCD_MSG_GET_VDSO_INFO: @@ -190,12 +191,12 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih #endif pmc->param.request_va = - (void *)__get_free_pages(GFP_KERNEL, + (void *)__get_free_pages(GFP_ATOMIC, REQUEST_SHIFT - PAGE_SHIFT); pmc->param.request_pa = virt_to_phys(pmc->param.request_va); pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va; pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa; - pmc->param.post_va = (void *)__get_free_page(GFP_KERNEL); + pmc->param.post_va = (void *)__get_free_page(GFP_ATOMIC); pmc->param.post_pa = virt_to_phys(pmc->param.post_va); memset(pmc->param.doorbell_va, 0, PAGE_SIZE); memset(pmc->param.request_va, 0, PAGE_SIZE); @@ -215,7 +216,7 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih PAGE_SIZE, NULL, 0); #endif - pmc->dma_buf = (void *)__get_free_pages(GFP_KERNEL, + pmc->dma_buf = (void *)__get_free_pages(GFP_ATOMIC, DMA_PIN_SHIFT - PAGE_SHIFT); rpm->request_page = pmc->param.request_pa; From 5fbeee953ae17160dce6eb9f8c192823e2c37d98 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 7 Aug 2016 20:55:36 +0900 Subject: [PATCH 05/42] mcctrl: clean up syscall offload wait code --- executer/kernel/mcctrl/control.c | 99 +++++++------------------------- 1 file changed, 21 insertions(+), 78 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index ba1aab7d..e1994b2b 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -453,13 +453,6 @@ retry_alloc: return 0; } -#ifndef DO_USER_MODE -// static int remaining_job, base_cpu, job_pos; -#endif - -// extern int num_channels; -// extern int mcctrl_dma_abort; - int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { struct syscall_wait_desc swd; @@ -469,9 +462,6 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) struct wait_queue_head_list_node *wqhln_iter; int ret = 0; unsigned long irqflags; -#ifndef DO_USER_MODE - unsigned long s, w, d; -#endif //printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); if (copy_from_user(&swd, req, sizeof(swd))) { @@ -489,7 +479,6 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) } c = usrdata->channels + swd.cpu; -#ifdef DO_USER_MODE retry: /* Prepare per-process wait queue head */ retry_alloc: @@ -517,7 +506,6 @@ retry_alloc: ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags); ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); - /* Remove per-process wait queue head */ irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); @@ -541,79 +529,34 @@ retry_alloc: return -EINTR; } -#if 1 mb(); if (!c->param.request_va->valid) { -printk("mcexec_wait_syscall:stray wakeup\n"); + printk("mcexec_wait_syscall:stray wakeup pid: %d, tid: %d: SC %d, swd.cpu: %d\n", + task_tgid_vnr(current), + task_pid_vnr(current), + c->param.request_va->number, + swd.cpu); goto retry; } -#endif -#else - while (1) { - c = usrdata->channels + swd.cpu; - ihk_get_tsc(s); - if (!usrdata->remaining_job) { - while (!(*c->param.doorbell_va)) { - mb(); - cpu_relax(); - ihk_get_tsc(w); - if (w > s + 1024UL * 1024 * 1024 * 10) { - return -EINTR; - } - } - d = (*c->param.doorbell_va) - 1; - *c->param.doorbell_va = 0; - if (d < 0 || d >= usrdata->num_channels) { - d = 0; - } - usrdata->base_cpu = d; - usrdata->job_pos = 0; - usrdata->remaining_job = 1; - } else { - usrdata->job_pos++; + c->param.request_va->valid = 0; /* ack */ + dprintk("SC #%lx, %lx\n", + c->param.request_va->number, + c->param.request_va->args[0]); + register_peer_channel(usrdata, current, c); + + if (__do_in_kernel_syscall(os, c, c->param.request_va)) { + if (copy_to_user(&req->sr, c->param.request_va, + sizeof(struct syscall_request))) { + deregister_peer_channel(usrdata, current, c); + return -EFAULT; } - - for (; usrdata->job_pos < usrdata->num_channels; usrdata->job_pos++) { - if (base_cpu + job_pos >= num_channels) { - c = usrdata->channels + - (usrdata->base_cpu + usrdata->job_pos - usrdata->num_channels); - } else { - c = usrdata->channels + usrdata->base_cpu + usrdata->job_pos; - } - if (!c) { - continue; - } - if (c->param.request_va && - c->param.request_va->valid) { -#endif - c->param.request_va->valid = 0; /* ack */ - dprintk("SC #%lx, %lx\n", - c->param.request_va->number, - c->param.request_va->args[0]); - register_peer_channel(usrdata, current, c); - if (__do_in_kernel_syscall(os, c, c->param.request_va)) { - if (copy_to_user(&req->sr, c->param.request_va, - sizeof(struct syscall_request))) { - deregister_peer_channel(usrdata, current, c); - return -EFAULT; - } - return 0; - } - deregister_peer_channel(usrdata, current, c); -#ifdef DO_USER_MODE - goto retry; -#endif -#ifndef DO_USER_MODE - if (usrdata->mcctrl_dma_abort) { - return -2; - } - } - } - usrdata->remaining_job = 0; + return 0; } -#endif - return 0; + + deregister_peer_channel(usrdata, current, c); + + goto retry; } long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg) From fb84d4ef11cca398f7ac61f6c24950c88033e760 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Mon, 8 Aug 2016 19:43:05 +0900 Subject: [PATCH 06/42] mcctrl: thread pool based system call offload handling --- executer/include/uprotocol.h | 11 ++ executer/kernel/mcctrl/control.c | 241 +++++++++++++++++++++----- executer/kernel/mcctrl/ikc.c | 17 +- executer/kernel/mcctrl/mcctrl.h | 40 ++++- executer/kernel/mcctrl/syscall.c | 285 ++++++++++++++----------------- executer/user/mcexec.c | 4 +- kernel/include/syscall.h | 11 ++ kernel/syscall.c | 8 + 8 files changed, 395 insertions(+), 222 deletions(-) diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 6d297bdf..e8dfe0ec 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -110,6 +110,13 @@ struct program_load_desc { }; struct syscall_request { + /* TID of requesting thread */ + int rtid; + /* + * TID of target thread. Remote page fault response needs to designate the + * thread that must serve the request, 0 indicates any thread from the pool + */ + int ttid; unsigned long valid; unsigned long number; unsigned long args[6]; @@ -129,6 +136,10 @@ struct syscall_load_desc { }; struct syscall_response { + /* TID of the thread that requested the service */ + int ttid; + /* TID of the mcexec thread that is serving or has served the request */ + int stid; unsigned long status; long ret; unsigned long fault_address; diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index e1994b2b..c9893406 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -83,6 +83,7 @@ static long mcexec_prepare_image(ihk_os_t os, struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); unsigned long flags; struct mcctrl_per_proc_data *ppd = NULL; + int i; if (copy_from_user(&desc, udesc, sizeof(struct program_load_desc))) { @@ -156,6 +157,14 @@ static long mcexec_prepare_image(ihk_os_t os, ppd->pid = pdesc->pid; ppd->rpgtable = pdesc->rpgtable; + INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_list_exact); + spin_lock_init(&ppd->wq_list_lock); + + for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { + INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); + rwlock_init(&ppd->per_thread_data_hash_lock[i]); + } flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); list_add_tail(&ppd->list, &usrdata->per_proc_list); @@ -417,42 +426,115 @@ static long mcexec_get_cpu(ihk_os_t os) return info->n_cpus; } -int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg) +struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( + struct mcctrl_usrdata *ud, + int pid) { - struct wait_queue_head_list_node *wqhln = NULL; - struct wait_queue_head_list_node *wqhln_iter; + struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; unsigned long flags; - /* Look up per-process wait queue head with pid */ - flags = ihk_ikc_spinlock_lock(&c->wq_list_lock); - list_for_each_entry(wqhln_iter, &c->wq_list, list) { - if (wqhln_iter->pid == pid) { - wqhln = wqhln_iter; + /* Look up per-process structure */ + flags = ihk_ikc_spinlock_lock(&ud->per_proc_list_lock); + list_for_each_entry(ppd_iter, &ud->per_proc_list, list) { + if (ppd_iter->pid == pid) { + ppd = ppd_iter; break; } } + ihk_ikc_spinlock_unlock(&ud->per_proc_list_lock, flags); + + return ppd; +} + +/* + * Called indirectly from the IKC message handler. + */ +int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) +{ + struct wait_queue_head_list_node *wqhln = NULL; + struct wait_queue_head_list_node *wqhln_iter; + struct wait_queue_head_list_node *wqhln_alloc = NULL; + struct mcctrl_channel *c = ud->channels + packet->ref; + int pid = packet->pid; + unsigned long flags; + struct mcctrl_per_proc_data *ppd; - if (!wqhln) { retry_alloc: - wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC); - if (!wqhln) { - printk("WARNING: coudln't alloc wait queue head, retrying..\n"); - goto retry_alloc; - } - - wqhln->pid = pid; - wqhln->req = 0; - init_waitqueue_head(&wqhln->wq_syscall); - list_add_tail(&wqhln->list, &c->wq_list); + wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_KERNEL); + if (!wqhln_alloc) { + printk("WARNING: coudln't alloc wait queue head, retrying..\n"); + goto retry_alloc; } + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(ud, pid); + + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return 0; + } + + dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n", + __FUNCTION__, + c->param.request_va->rtid, + c->param.request_va->ttid, + c->param.request_va->number); + /* + * Three scenarios are possible: + * - Find the designated thread if req->ttid is specified. + * - Find any available thread if req->ttid is zero. + * - Add a request element if no threads are available. + */ + flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + + /* Is this a request for a specific thread? See if it's waiting */ + if (c->param.request_va->ttid) { + list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) { + if (c->param.request_va->ttid != task_pid_vnr(wqhln_iter->task)) + continue; + + wqhln = wqhln_iter; + break; + } + if (!wqhln) { + printk("%s: WARNING: no target thread found for exact request??\n", + __FUNCTION__); + } + } + /* Is there any thread available? */ + else { + list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { + if (wqhln_iter->task && !wqhln_iter->req) { + wqhln = wqhln_iter; + break; + } + } + } + + /* If no match found, add request */ + if (!wqhln) { + wqhln = wqhln_alloc; + wqhln->req = 0; + wqhln->task = NULL; + init_waitqueue_head(&wqhln->wq_syscall); + list_add_tail(&wqhln->list, &ppd->wq_list); + } + else { + kfree(wqhln_alloc); + } + + memcpy(&wqhln->packet, packet, sizeof(*packet)); wqhln->req = 1; wake_up(&wqhln->wq_syscall); - ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags); return 0; } +/* + * Called from an mcexec thread via ioctl(). + */ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { struct syscall_wait_desc swd; @@ -462,8 +544,18 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) struct wait_queue_head_list_node *wqhln_iter; int ret = 0; unsigned long irqflags; - -//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); + struct mcctrl_per_proc_data *ppd; + + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); + + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -EINVAL; + } + + //printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); if (copy_from_user(&swd, req, sizeof(swd))) { return -EFAULT; } @@ -471,16 +563,15 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) if (swd.cpu >= usrdata->num_channels) return -EINVAL; - c = get_peer_channel(usrdata, current); + c = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); if (c) { printk("mcexec_wait_syscall:already registered. task %p ch %p\n", current, c); return -EBUSY; } - c = usrdata->channels + swd.cpu; retry: - /* Prepare per-process wait queue head */ + /* Prepare per-thread wait queue head or find a valid request */ retry_alloc: wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); if (!wqhln) { @@ -488,35 +579,48 @@ retry_alloc: goto retry_alloc; } - wqhln->pid = swd.pid; + wqhln->task = current; wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); - irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); - /* First see if there is one wait queue already */ - list_for_each_entry(wqhln_iter, &c->wq_list, list) { - if (wqhln_iter->pid == task_tgid_vnr(current)) { + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + /* First see if there is a valid request already that is not yet taken */ + list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { + if (wqhln_iter->task == NULL && wqhln_iter->req) { kfree(wqhln); wqhln = wqhln_iter; + wqhln->task = current; list_del(&wqhln->list); break; } } - list_add_tail(&wqhln->list, &c->wq_list); - ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags); - ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); + /* No valid request? Wait for one.. */ + if (wqhln->req == 0) { + list_add_tail(&wqhln->list, &ppd->wq_list); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); + + ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); + + /* Remove per-thread wait queue head */ + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + list_del(&wqhln->list); + } + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); - /* Remove per-process wait queue head */ - irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); - list_del(&wqhln->list); - ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags); if (ret && !wqhln->req) { kfree(wqhln); return -EINTR; } + + /* Channel is determined by request */ + dprintk("%s: tid: %d request from CPU %d\n", + __FUNCTION__, task_pid_vnr(current), wqhln->packet.ref); + + c = usrdata->channels + wqhln->packet.ref; kfree(wqhln); +#if 0 if (c->param.request_va->number == 61 && c->param.request_va->args[0] == swd.pid) { @@ -528,6 +632,7 @@ retry_alloc: return -EINTR; } +#endif mb(); if (!c->param.request_va->valid) { @@ -543,18 +648,27 @@ retry_alloc: dprintk("SC #%lx, %lx\n", c->param.request_va->number, c->param.request_va->args[0]); - register_peer_channel(usrdata, current, c); + if (mcctrl_add_per_thread_data(ppd, current, c) < 0) { + kprintf("%s: error adding per-thread data\n", __FUNCTION__); + return -EINVAL; + } if (__do_in_kernel_syscall(os, c, c->param.request_va)) { if (copy_to_user(&req->sr, c->param.request_va, sizeof(struct syscall_request))) { - deregister_peer_channel(usrdata, current, c); + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { + kprintf("%s: error deleting per-thread data\n", __FUNCTION__); + return -EINVAL; + } return -EFAULT; } return 0; } - deregister_peer_channel(usrdata, current, c); + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { + kprintf("%s: error deleting per-thread data\n", __FUNCTION__); + return -EINVAL; + } goto retry; } @@ -675,6 +789,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) struct syscall_ret_desc ret; struct mcctrl_channel *mc; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct mcctrl_per_proc_data *ppd; #if 0 ihk_dma_channel_t channel; struct ihk_dma_request request; @@ -688,13 +803,25 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { return -EFAULT; } - mc = usrdata->channels + ret.cpu; - if (!mc) { + + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); return -EINVAL; } - deregister_peer_channel(usrdata, current, mc); + + mc = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); + if (!mc) { + kprintf("%s: ERROR: no peer channel registerred??\n", __FUNCTION__); + return -EINVAL; + } + + mcctrl_delete_per_thread_data(ppd, current); mc->param.response_va->ret = ret.ret; + mc->param.response_va->stid = task_pid_vnr(current); if (ret.size > 0) { /* Host => Accel. Write is fast. */ @@ -876,6 +1003,34 @@ int mcexec_close_exec(ihk_os_t os) struct mckernel_exec_file *mcef = NULL; int found = 0; int os_ind = ihk_host_os_get_index(os); + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + unsigned long flags; + struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; + + ppd = NULL; + flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); + + list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { + if (ppd_iter->pid == task_tgid_vnr(current)) { + ppd = ppd_iter; + break; + } + } + + if (ppd) { + list_del(&ppd->list); + + dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", + task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable); + + kfree(ppd); + } + else { + printk("WARNING: no per process data for pid %d ?\n", + task_tgid_vnr(current)); + } + + ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); if (os_ind < 0) { return EINVAL; diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 2d513517..00e201e9 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -40,7 +40,7 @@ void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); -int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg); +int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet); void sig_done(unsigned long arg, int err); /* XXX: this runs in atomic context! */ @@ -64,7 +64,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, break; case SCD_MSG_SYSCALL_ONESIDE: - mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg); + mcexec_syscall(usrdata, pisp); break; case SCD_MSG_PROCFS_ANSWER: @@ -263,9 +263,6 @@ static int connect_handler(struct ihk_ikc_channel_info *param) } param->packet_handler = syscall_packet_handler; - INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list); - spin_lock_init(&usrdata->channels[cpu].wq_list_lock); - usrdata->channels[cpu].c = c; kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c); @@ -284,9 +281,6 @@ static int connect_handler2(struct ihk_ikc_channel_info *param) param->packet_handler = syscall_packet_handler; - INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list); - spin_lock_init(&usrdata->channels[cpu].wq_list_lock); - usrdata->channels[cpu].c = c; kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c); @@ -313,7 +307,6 @@ int prepare_ikc_channels(ihk_os_t os) { struct ihk_cpu_info *info; struct mcctrl_usrdata *usrdata; - int error; usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL); usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL); @@ -351,11 +344,6 @@ int prepare_ikc_channels(ihk_os_t os) INIT_LIST_HEAD(&usrdata->cpu_topology_list); INIT_LIST_HEAD(&usrdata->node_topology_list); - error = init_peer_channel_registry(usrdata); - if (error) { - return error; - } - return 0; } @@ -394,7 +382,6 @@ void destroy_ikc_channels(ihk_os_t os) } free_page((unsigned long)usrdata->mcctrl_doorbell_va); - destroy_peer_channel_registry(usrdata); kfree(usrdata->channels); kfree(usrdata); } diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 9f60326a..4d46e54d 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "sysfs.h" @@ -154,8 +155,11 @@ struct syscall_params { struct wait_queue_head_list_node { struct list_head list; wait_queue_head_t wq_syscall; - int pid; + struct task_struct *task; + /* Denotes an exclusive wait for requester TID rtid */ + int rtid; int req; + struct ikc_scd_packet packet; }; struct mcctrl_channel { @@ -163,15 +167,29 @@ struct mcctrl_channel { struct syscall_params param; struct ikc_scd_init_param init; void *dma_buf; - - struct list_head wq_list; - ihk_spinlock_t wq_list_lock; }; +struct mcctrl_per_thread_data { + struct list_head hash; + struct task_struct *task; + void *data; +}; + +#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8 +#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT) +#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1) + struct mcctrl_per_proc_data { struct list_head list; int pid; unsigned long rpgtable; /* per process, not per OS */ + + struct list_head wq_list; + struct list_head wq_list_exact; + ihk_spinlock_t wq_list_lock; + + struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE]; + rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE]; }; struct sysfsm_req { @@ -273,12 +291,16 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); ihk_os_t osnum_to_os(int n); /* syscall.c */ -int init_peer_channel_registry(struct mcctrl_usrdata *ud); -void destroy_peer_channel_registry(struct mcctrl_usrdata *ud); -int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch); -int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch); -struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key); int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc); +struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( + struct mcctrl_usrdata *ud, + int pid); +int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task, void *data); +int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task); +struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( + struct mcctrl_per_proc_data *ppd, struct task_struct *task); #define PROCFS_NAME_MAX 1000 diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 1b028ae1..3a9b1e09 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -84,88 +84,96 @@ static void print_dma_lastreq(void) } #endif -int init_peer_channel_registry(struct mcctrl_usrdata *ud) +int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task, void *data) { - ud->keys = kzalloc(sizeof(void *) * ud->num_channels, GFP_KERNEL); - if (!ud->keys) { - printk("Error: cannot allocate usrdata.keys[].\n"); - return -ENOMEM; + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + struct mcctrl_per_thread_data *ptd_alloc = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + int ret = 0; + unsigned long flags; + + ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC); + if (!ptd_alloc) { + kprintf("%s: error allocate per thread data\n", __FUNCTION__); + ret = -ENOMEM; + goto out_noalloc; } - return 0; -} - -void destroy_peer_channel_registry(struct mcctrl_usrdata *ud) -{ - kfree(ud->keys); - ud->keys = NULL; - return; -} - -int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch) -{ - int cpu; - - cpu = ch - ud->channels; - if ((cpu < 0) || (ud->num_channels <= cpu)) { - printk("register_peer_channel(%p,%p,%p):" - "not a syscall channel. cpu=%d\n", - ud, key, ch, cpu); - return -EINVAL; - } - - if (ud->keys[cpu] != NULL) { - printk("register_peer_channel(%p,%p,%p):" - "already registered. cpu=%d\n", - ud, key, ch, cpu); - /* - * When mcexec receives a signal, - * it may be finished without doing deregister_peer_channel(). - * Therefore a substitute registration is necessary. - */ -#if 0 - return -EBUSY; -#endif - } - - ud->keys[cpu] = key; - return 0; -} - -int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch) -{ - int cpu; - - cpu = ch - ud->channels; - if ((cpu < 0) || (ud->num_channels <= cpu)) { - printk("deregister_peer_channel(%p,%p,%p):" - "not a syscall channel. cpu=%d\n", - ud, key, ch, cpu); - return -EINVAL; - } - - if (ud->keys[cpu] && (ud->keys[cpu] != key)) { - printk("deregister_peer_channel(%p,%p,%p):" - "not registered. cpu=%d\n", - ud, key, ch, cpu); - return -EBUSY; - } - - ud->keys[cpu] = NULL; - return 0; -} - -struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key) -{ - int cpu; - - for (cpu = 0; cpu < ud->num_channels; ++cpu) { - if (ud->keys[cpu] == key) { - return &ud->channels[cpu]; + /* Check if data for this thread exists and add if not */ + write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; } } - return NULL; + if (ptd) { + ret = -EBUSY; + kfree(ptd_alloc); + goto out; + } + + ptd = ptd_alloc; + ptd->task = task; + ptd->data = data; + list_add_tail(&ptd->hash, &ppd->per_thread_data_hash[hash]); + +out: + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); +out_noalloc: + return ret; +} + +int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task) +{ + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + int ret = 0; + unsigned long flags; + + /* Check if data for this thread exists and delete it */ + write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; + } + } + + if (!ptd) { + ret = -EINVAL; + goto out; + } + + list_del(&ptd->hash); + kfree(ptd); + +out: + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + return ret; +} + +struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task) +{ + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + unsigned long flags; + + /* Check if data for this thread exists and return it */ + read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; + } + } + + read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + return ptd ? ptd->data : NULL; } #if 1 /* x86 depend, host OS side */ @@ -238,10 +246,23 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u struct syscall_request *req; struct syscall_response *resp; int error; + struct wait_queue_head_list_node *wqhln; + unsigned long irqflags; + struct mcctrl_per_proc_data *ppd; - dprintk("remote_page_fault(%p,%p,%llx)\n", usrdata, fault_addr, reason); + dprintk("%s: tid: %d, fault_addr: %p\n", + __FUNCTION__, task_pid_vnr(current), fault_addr); + + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); - channel = get_peer_channel(usrdata, current); + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -EINVAL; + } + + channel = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); if (!channel) { error = -ENOENT; printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n", @@ -252,10 +273,28 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u req = channel->param.request_va; resp = channel->param.response_va; - /* request page fault */ +retry_alloc: + wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); + if (!wqhln) { + printk("WARNING: coudln't alloc wait queue head, retrying..\n"); + goto retry_alloc; + } + + /* Prepare per-thread wait queue head */ + wqhln->task = current; + wqhln->req = 0; + init_waitqueue_head(&wqhln->wq_syscall); + + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + /* Add to exact list */ + list_add_tail(&wqhln->list, &ppd->wq_list_exact); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); + + /* Request page fault */ resp->ret = -EFAULT; resp->fault_address = (unsigned long)fault_addr; resp->fault_reason = reason; + resp->stid = task_pid_vnr(current); #define STATUS_PAGER_COMPLETED 1 #define STATUS_PAGE_FAULT 3 @@ -264,43 +303,18 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u resp->status = STATUS_PAGE_FAULT; for (;;) { - struct wait_queue_head_list_node *wqhln; - struct wait_queue_head_list_node *wqhln_iter; - unsigned long irqflags; - -retry_alloc: - wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); - if (!wqhln) { - printk("WARNING: coudln't alloc wait queue head, retrying..\n"); - goto retry_alloc; - } - - /* Prepare per-process wait queue head */ - wqhln->pid = task_tgid_vnr(current); - wqhln->req = 0; - init_waitqueue_head(&wqhln->wq_syscall); - - irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); - /* First see if there is a wait queue already */ - list_for_each_entry(wqhln_iter, &channel->wq_list, list) { - if (wqhln_iter->pid == task_tgid_vnr(current)) { - kfree(wqhln); - wqhln = wqhln_iter; - list_del(&wqhln->list); - break; - } - } - list_add_tail(&wqhln->list, &channel->wq_list); - ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags); - + dprintk("%s: tid: %d, fault_addr: %p SLEEPING\n", + __FUNCTION__, task_pid_vnr(current), fault_addr); /* wait for response */ error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); - - /* Remove per-process wait queue head */ - irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); + + /* Remove per-thread wait queue head */ + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); - ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); kfree(wqhln); + dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n", + __FUNCTION__, task_pid_vnr(current), fault_addr); if (error) { printk("remote_page_fault:interrupted. %d\n", error); @@ -472,26 +486,18 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #if USE_VM_INSERT_PFN size_t pix; #endif - struct mcctrl_per_proc_data *ppd, *ppd_iter; - unsigned long flags; + struct mcctrl_per_proc_data *ppd; dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); - ppd = NULL; - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - - list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { - if (ppd_iter->pid == task_tgid_vnr(current)) { - ppd = ppd_iter; - break; - } - } - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { - printk("ERROR: no per process data for pid %d\n", task_tgid_vnr(current)); - return VM_FAULT_SIGBUS; + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -EINVAL; } for (try = 1; ; ++try) { @@ -1711,33 +1717,6 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall break; case __NR_exit_group: { - unsigned long flags; - struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; - - ppd = NULL; - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - - list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { - if (ppd_iter->pid == task_tgid_vnr(current)) { - ppd = ppd_iter; - break; - } - } - - if (ppd) { - list_del(&ppd->list); - - dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", - task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable); - - kfree(ppd); - } - else { - printk("WARNING: no per process data for pid %d ?\n", - task_tgid_vnr(current)); - } - - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); /* Make sure the user space handler will be called as well */ error = -ENOSYS; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index cec60856..5ae855b9 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1870,13 +1870,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) sig = 0; term = 0; + do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); + /* Drop executable file */ if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) { fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n"); } - do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); - __dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n", w.sr.args[0], cpu); if(w.sr.number == __NR_exit_group){ diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 4aaf3244..6d0ccfa0 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -210,12 +210,23 @@ struct ikc_scd_init_param { }; struct syscall_request { + /* TID of requesting thread */ + int rtid; + /* + * TID of target thread. Remote page fault response needs to designate the + * thread that must serve the request, 0 indicates any thread from the pool + */ + int ttid; unsigned long valid; unsigned long number; unsigned long args[6]; }; struct syscall_response { + /* TID of the thread that requested the service */ + int ttid; + /* TID of the mcexec thread that is serving the request */ + int stid; unsigned long status; long ret; unsigned long fault_address; diff --git a/kernel/syscall.c b/kernel/syscall.c index 5d31da8b..dfe90328 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -227,6 +227,10 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) scp = &get_cpu_local_var(cpu)->scp; } res = scp->response_va; + /* The current thread is the requester and any thread from + * the pool may serve the request */ + req->rtid = cpu_local_var(current)->tid; + req->ttid = 0; send_syscall(req, cpu, pid); @@ -281,6 +285,10 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) #define PAGER_RESUME_PAGE_FAULT 0x0101 req2.args[0] = PAGER_RESUME_PAGE_FAULT; req2.args[1] = error; + /* The current thread is the requester and only the waiting thread + * may serve the request */ + req2.rtid = cpu_local_var(current)->tid; + req2.ttid = res->stid; send_syscall(&req2, cpu, pid); } From d7bc947a02479eb4c57aad1cba0c7634d6a9cdb9 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 9 Aug 2016 16:49:42 +0900 Subject: [PATCH 07/42] mcctrl: redesign mcctrl_channels for IKC packet based syscall offloading --- executer/kernel/mcctrl/control.c | 185 ++++++----------- executer/kernel/mcctrl/ikc.c | 24 ++- executer/kernel/mcctrl/mcctrl.h | 8 +- executer/kernel/mcctrl/procfs.c | 3 +- executer/kernel/mcctrl/syscall.c | 346 ++++++++----------------------- executer/kernel/mcctrl/sysfs.c | 3 +- kernel/host.c | 42 ++-- kernel/include/syscall.h | 46 ++-- kernel/syscall.c | 52 ++--- 9 files changed, 244 insertions(+), 465 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index c9893406..0ec4db1d 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -125,30 +126,30 @@ static long mcexec_prepare_image(ihk_os_t os, } pdesc->args = (void*)virt_to_phys(args); - printk("args: 0x%lX\n", (unsigned long)pdesc->args); - printk("argc: %ld\n", *(long *)args); + dprintk("args: 0x%lX\n", (unsigned long)pdesc->args); + dprintk("argc: %ld\n", *(long *)args); pdesc->envs = (void*)virt_to_phys(envs); - printk("envs: 0x%lX\n", (unsigned long)pdesc->envs); - printk("envc: %ld\n", *(long *)envs); + dprintk("envs: 0x%lX\n", (unsigned long)pdesc->envs); + dprintk("envc: %ld\n", *(long *)envs); isp.msg = SCD_MSG_PREPARE_PROCESS; isp.ref = pdesc->cpu; isp.arg = virt_to_phys(pdesc); - printk("# of sections: %d\n", pdesc->num_sections); - printk("%p (%lx)\n", pdesc, isp.arg); + dprintk("# of sections: %d\n", pdesc->num_sections); + dprintk("%p (%lx)\n", pdesc, isp.arg); pdesc->status = 0; mcctrl_ikc_send(os, pdesc->cpu, &isp); - wait_event_interruptible(usrdata->wq_prepare, pdesc->status); + while (wait_event_interruptible(usrdata->wq_prepare, pdesc->status) != 0); if(pdesc->err < 0){ ret = pdesc->err; goto free_out; } - ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); + ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); if (!ppd) { printk("ERROR: allocating per process data\n"); ret = -ENOMEM; @@ -170,15 +171,15 @@ static long mcexec_prepare_image(ihk_os_t os, list_add_tail(&ppd->list, &usrdata->per_proc_list); ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); - dprintk("pid %d, rpgtable: 0x%lx added\n", - ppd->pid, ppd->rpgtable); - if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * desc.num_sections)) { ret = -EFAULT; goto free_out; } + dprintk("%s: pid %d, rpgtable: 0x%lx added\n", + __FUNCTION__, ppd->pid, ppd->rpgtable); + ret = 0; free_out: @@ -454,7 +455,6 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) struct wait_queue_head_list_node *wqhln = NULL; struct wait_queue_head_list_node *wqhln_iter; struct wait_queue_head_list_node *wqhln_alloc = NULL; - struct mcctrl_channel *c = ud->channels + packet->ref; int pid = packet->pid; unsigned long flags; struct mcctrl_per_proc_data *ppd; @@ -477,9 +477,9 @@ retry_alloc: dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n", __FUNCTION__, - c->param.request_va->rtid, - c->param.request_va->ttid, - c->param.request_va->number); + packet->req.rtid, + packet->req.ttid, + packet->req.number); /* * Three scenarios are possible: * - Find the designated thread if req->ttid is specified. @@ -489,9 +489,9 @@ retry_alloc: flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* Is this a request for a specific thread? See if it's waiting */ - if (c->param.request_va->ttid) { + if (packet->req.ttid) { list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) { - if (c->param.request_va->ttid != task_pid_vnr(wqhln_iter->task)) + if (packet->req.ttid != task_pid_vnr(wqhln_iter->task)) continue; wqhln = wqhln_iter; @@ -524,7 +524,7 @@ retry_alloc: kfree(wqhln_alloc); } - memcpy(&wqhln->packet, packet, sizeof(*packet)); + wqhln->packet = packet; wqhln->req = 1; wake_up(&wqhln->wq_syscall); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags); @@ -537,8 +537,7 @@ retry_alloc: */ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { - struct syscall_wait_desc swd; - struct mcctrl_channel *c; + struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct wait_queue_head_list_node *wqhln; struct wait_queue_head_list_node *wqhln_iter; @@ -555,18 +554,10 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) return -EINVAL; } - //printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); - if (copy_from_user(&swd, req, sizeof(swd))) { - return -EFAULT; - } - - if (swd.cpu >= usrdata->num_channels) - return -EINVAL; - - c = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); - if (c) { - printk("mcexec_wait_syscall:already registered. task %p ch %p\n", - current, c); + packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + if (packet) { + printk("%s: ERROR: packet %p is already registered for thread %d\n", + __FUNCTION__, packet, task_pid_vnr(current)); return -EBUSY; } @@ -613,49 +604,44 @@ retry_alloc: return -EINTR; } - /* Channel is determined by request */ - dprintk("%s: tid: %d request from CPU %d\n", - __FUNCTION__, task_pid_vnr(current), wqhln->packet.ref); - - c = usrdata->channels + wqhln->packet.ref; + packet = wqhln->packet; kfree(wqhln); -#if 0 - if (c->param.request_va->number == 61 && - c->param.request_va->args[0] == swd.pid) { - - dprintk("pid: %d, tid: %d: SC %d, swd.cpu: %d, WARNING: wait4() for self?\n", - task_tgid_vnr(current), - task_pid_vnr(current); - c->param.request_va->number, - swd.cpu); - - return -EINTR; - } -#endif + dprintk("%s: tid: %d request from CPU %d\n", + __FUNCTION__, task_pid_vnr(current), packet->ref); mb(); - if (!c->param.request_va->valid) { - printk("mcexec_wait_syscall:stray wakeup pid: %d, tid: %d: SC %d, swd.cpu: %d\n", + if (!packet->req.valid) { + printk("%s: ERROR: stray wakeup pid: %d, tid: %d: SC %lu\n", + __FUNCTION__, task_tgid_vnr(current), task_pid_vnr(current), - c->param.request_va->number, - swd.cpu); + packet->req.number); + kfree(packet); goto retry; } - c->param.request_va->valid = 0; /* ack */ - dprintk("SC #%lx, %lx\n", - c->param.request_va->number, - c->param.request_va->args[0]); - if (mcctrl_add_per_thread_data(ppd, current, c) < 0) { + packet->req.valid = 0; /* ack */ + dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, " + "args[3]: %lu, args[4]: %lu, args[5]: %lu\n", + __FUNCTION__, + packet->req.number, + packet->req.args[0], + packet->req.args[1], + packet->req.args[2], + packet->req.args[3], + packet->req.args[4], + packet->req.args[5]); + + if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) { kprintf("%s: error adding per-thread data\n", __FUNCTION__); return -EINVAL; } - if (__do_in_kernel_syscall(os, c, c->param.request_va)) { - if (copy_to_user(&req->sr, c->param.request_va, + if (__do_in_kernel_syscall(os, packet)) { + if (copy_to_user(&req->sr, &packet->req, sizeof(struct syscall_request))) { + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { kprintf("%s: error deleting per-thread data\n", __FUNCTION__); return -EINVAL; @@ -753,33 +739,6 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg) #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size); - -/* - ihk_dma_channel_t channel; - struct ihk_dma_request request; - unsigned long dma_status = 0; - - channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); - if (!channel) { - return -EINVAL; - } - - memset(&request, 0, sizeof(request)); - request.src_os = os; - request.src_phys = desc.src; - request.dest_os = NULL; - request.dest_phys = desc.dest; - request.size = desc.size; - request.notify = (void *)virt_to_phys(&dma_status); - request.priv = (void *)1; - - ihk_dma_request(channel, &request); - - while (!dma_status) { - mb(); - udelay(1); - } -*/ return 0; } @@ -787,18 +746,9 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg) long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) { struct syscall_ret_desc ret; - struct mcctrl_channel *mc; + struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd; -#if 0 - ihk_dma_channel_t channel; - struct ihk_dma_request request; - - channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); - if (!channel) { - return -EINVAL; - } -#endif if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { return -EFAULT; @@ -812,62 +762,43 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) return -EINVAL; } - mc = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); - if (!mc) { - kprintf("%s: ERROR: no peer channel registerred??\n", __FUNCTION__); + packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + if (!packet) { + kprintf("%s: ERROR: no packet registered for TID %d\n", + __FUNCTION__, task_pid_vnr(current)); return -EINVAL; } mcctrl_delete_per_thread_data(ppd, current); - mc->param.response_va->ret = ret.ret; - mc->param.response_va->stid = task_pid_vnr(current); - if (ret.size > 0) { /* Host => Accel. Write is fast. */ unsigned long phys; void *rpm; - phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, - ret.size); + phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, ret.size); #ifdef CONFIG_MIC rpm = ioremap_wc(phys, ret.size); #else rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, ret.size, NULL, 0); #endif - if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) { return -EFAULT; } - mb(); - mc->param.response_va->status = 1; - #ifdef CONFIG_MIC iounmap(rpm); #else ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size); #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size); + } -/* - memset(&request, 0, sizeof(request)); - request.src_os = NULL; - request.src_phys = ret.src; - request.dest_os = os; - request.dest_phys = ret.dest; - request.size = ret.size; - request.notify_os = os; - request.notify = (void *)mc->param.response_rpa; - request.priv = (void *)1; - - ihk_dma_request(channel, &request); -*/ - } else { - mb(); - mc->param.response_va->status = 1; - } + __return_syscall(os, packet, ret.ret, task_pid_vnr(current)); + + /* Free packet */ + kfree(packet); return 0; } diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 00e201e9..36199950 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "mcctrl.h" #ifdef ATTACHED_MIC #include @@ -49,8 +50,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, { struct ikc_scd_packet *pisp = __packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os); + int msg = pisp->msg; - switch (pisp->msg) { + switch (msg) { case SCD_MSG_INIT_CHANNEL: mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c); break; @@ -108,6 +110,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pisp->err, pisp->arg); break; } + + /* + * SCD_MSG_SYSCALL_ONESIDE holds the packet and frees is it + * mcexec_ret_syscall(), for the rest, free it here. + */ + if (msg != SCD_MSG_SYSCALL_ONESIDE) { + kfree(pisp); + } return 0; } @@ -144,8 +154,6 @@ int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu) ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c, ihk_ikc_get_processor_id()); - kprintf("Setting the target to %d\n", - ihk_ikc_get_processor_id()); return 0; } @@ -191,12 +199,13 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih #endif pmc->param.request_va = - (void *)__get_free_pages(GFP_ATOMIC, + (void *)__get_free_pages(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL, REQUEST_SHIFT - PAGE_SHIFT); pmc->param.request_pa = virt_to_phys(pmc->param.request_va); pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va; pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa; - pmc->param.post_va = (void *)__get_free_page(GFP_ATOMIC); + pmc->param.post_va = (void *)__get_free_page(in_interrupt() ? + GFP_ATOMIC : GFP_KERNEL); pmc->param.post_pa = virt_to_phys(pmc->param.post_va); memset(pmc->param.doorbell_va, 0, PAGE_SIZE); memset(pmc->param.request_va, 0, PAGE_SIZE); @@ -216,8 +225,9 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih PAGE_SIZE, NULL, 0); #endif - pmc->dma_buf = (void *)__get_free_pages(GFP_ATOMIC, - DMA_PIN_SHIFT - PAGE_SHIFT); + pmc->dma_buf = (void *)__get_free_pages(in_interrupt() ? + GFP_ATOMIC : GFP_KERNEL, + DMA_PIN_SHIFT - PAGE_SHIFT); rpm->request_page = pmc->param.request_pa; rpm->doorbell_page = pmc->param.doorbell_pa; diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 4d46e54d..c572fcee 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -113,6 +113,8 @@ struct ikc_scd_packet { int pid; int padding; unsigned long arg; + struct syscall_request req; + unsigned long resp_pa; }; /* for SCD_MSG_SYSFS_* */ @@ -159,7 +161,7 @@ struct wait_queue_head_list_node { /* Denotes an exclusive wait for requester TID rtid */ int rtid; int req; - struct ikc_scd_packet packet; + struct ikc_scd_packet *packet; }; struct mcctrl_channel { @@ -291,7 +293,7 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); ihk_os_t osnum_to_os(int n); /* syscall.c */ -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc); +int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet); struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( struct mcctrl_usrdata *ud, int pid); @@ -301,6 +303,8 @@ int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, struct task_struct *task); struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( struct mcctrl_per_proc_data *ppd, struct task_struct *task); +void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, + int ret, int stid); #define PROCFS_NAME_MAX 1000 diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index cbc4470f..36278bd8 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "mcctrl.h" #include #include @@ -748,7 +749,7 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) { struct procfs_work *work = NULL; - work = kzalloc(sizeof(*work), GFP_ATOMIC); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) { printk("%s: kzalloc failed\n", __FUNCTION__); return -1; diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 3a9b1e09..074bdafd 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -93,7 +94,7 @@ int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, int ret = 0; unsigned long flags; - ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC); + ptd_alloc = kmalloc(sizeof(*ptd), GFP_KERNEL); if (!ptd_alloc) { kprintf("%s: error allocate per thread data\n", __FUNCTION__); ret = -ENOMEM; @@ -242,16 +243,17 @@ out: static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) { - struct mcctrl_channel *channel; + struct ikc_scd_packet *packet; struct syscall_request *req; struct syscall_response *resp; int error; struct wait_queue_head_list_node *wqhln; unsigned long irqflags; struct mcctrl_per_proc_data *ppd; + unsigned long phys; - dprintk("%s: tid: %d, fault_addr: %p\n", - __FUNCTION__, task_pid_vnr(current), fault_addr); + dprintk("%s: tid: %d, fault_addr: %lu, reason: %lu\n", + __FUNCTION__, task_pid_vnr(current), fault_addr, reason); /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); @@ -262,16 +264,21 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u return -EINVAL; } - channel = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); - if (!channel) { + packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + if (!packet) { error = -ENOENT; - printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n", - usrdata, fault_addr, reason, error); - goto out; + printk("%s: no packet registered for TID %d\n", + __FUNCTION__, task_pid_vnr(current)); + goto out_no_unmap; } - req = channel->param.request_va; - resp = channel->param.response_va; + req = &packet->req; + + /* XXX: we need to map response structure here.. */ + phys = ihk_device_map_memory(ihk_os_to_dev(usrdata->os), + packet->resp_pa, sizeof(*resp)); + resp = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os), + phys, sizeof(*resp), NULL, 0); retry_alloc: wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); @@ -312,14 +319,35 @@ retry_alloc: irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); - kfree(wqhln); + dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n", __FUNCTION__, task_pid_vnr(current), fault_addr); if (error) { + kfree(wqhln); printk("remote_page_fault:interrupted. %d\n", error); goto out; } + else { + /* Update packet reference */ + packet = wqhln->packet; + req = &packet->req; + { + unsigned long phys2; + struct syscall_response *resp2; + phys2 = ihk_device_map_memory(ihk_os_to_dev(usrdata->os), + packet->resp_pa, sizeof(*resp)); + resp2 = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os), + phys2, sizeof(*resp), NULL, 0); + + if (resp != resp2) { + resp = resp2; + phys = phys2; + printk("%s: updated new remote PA for resp\n", __FUNCTION__); + } + } + } + if (!req->valid) { printk("remote_page_fault:not valid\n"); } @@ -337,21 +365,29 @@ retry_alloc: resp->ret = pager_call(usrdata->os, (void *)req); mb(); resp->status = STATUS_PAGER_COMPLETED; - continue; + break; + //continue; } else { error = req->args[1]; if (error) { printk("remote_page_fault:response %d\n", error); + kfree(wqhln); goto out; } } break; } + kfree(wqhln); error = 0; out: - dprintk("remote_page_fault(%p,%p,%llx): %d\n", usrdata, fault_addr, reason, error); + ihk_device_unmap_virtual(ihk_os_to_dev(usrdata->os), resp, sizeof(*resp)); + ihk_device_unmap_memory(ihk_os_to_dev(usrdata->os), phys, sizeof(*resp)); + +out_no_unmap: + dprintk("%s: tid: %d, fault_addr: %lu, reason: %lu, error: %d\n", + __FUNCTION__, task_pid_vnr(current), fault_addr, reason, error); return error; } @@ -403,8 +439,9 @@ static int rus_page_hash_insert(struct page *page) { int ret = 0; struct rus_page *rp; + unsigned long flags; - spin_lock(&rus_page_hash_lock); + spin_lock_irqsave(&rus_page_hash_lock, flags); rp = _rus_page_hash_lookup(page); if (!rp) { @@ -431,7 +468,7 @@ static int rus_page_hash_insert(struct page *page) out: - spin_unlock(&rus_page_hash_lock); + spin_unlock_irqrestore(&rus_page_hash_lock, flags); return ret; } @@ -440,8 +477,9 @@ void rus_page_hash_put_pages(void) int i; struct rus_page *rp_iter; struct rus_page *rp_iter_next; + unsigned long flags; - spin_lock(&rus_page_hash_lock); + spin_lock_irqsave(&rus_page_hash_lock, flags); for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) { @@ -454,7 +492,7 @@ void rus_page_hash_put_pages(void) } } - spin_unlock(&rus_page_hash_lock); + spin_unlock_irqrestore(&rus_page_hash_lock, flags); } @@ -631,237 +669,6 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u return start; } -//unsigned long last_thread_exec = 0; - -#ifndef DO_USER_MODE -static struct { - long (*do_sys_open)(int, const char __user *, int, int); - long (*sys_lseek)(unsigned int, off_t, unsigned int); - long (*sys_read)(unsigned int, char __user *, size_t); - long (*sys_write)(unsigned int, const char __user *, size_t); -} syscalls; - -void -mcctrl_syscall_init(void) -{ - printk("mcctrl_syscall_init\n"); - syscalls.do_sys_open = (void *)kallsyms_lookup_name("do_sys_open"); - syscalls.sys_lseek = (void *)kallsyms_lookup_name("sys_lseek"); - syscalls.sys_read = (void *)kallsyms_lookup_name("sys_read"); - syscalls.sys_write = (void *)kallsyms_lookup_name("sys_write"); - printk("syscalls.do_sys_open=%lx\n", (long)syscalls.do_sys_open); - printk("syscalls.sys_lseek=%lx\n", (long)syscalls.sys_lseek); - printk("syscalls.sys_read=%lx\n", (long)syscalls.sys_read); - printk("syscalls.sys_write=%lx\n", (long)syscalls.sys_write); -} - -static int do_async_copy(ihk_os_t os, unsigned long dest, unsigned long src, - unsigned long size, unsigned int inbound) -{ - struct ihk_dma_request request; - ihk_dma_channel_t channel; - unsigned long asize = ALIGN_WAIT_BUF(size); - - channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); - if (!channel) { - return -EINVAL; - } - - memset(&request, 0, sizeof(request)); - request.src_os = inbound ? os : NULL; - request.src_phys = src; - request.dest_os = inbound ? NULL : os; - request.dest_phys = dest; - request.size = size; - request.notify = (void *)(inbound ? dest + asize : src + asize); - request.priv = (void *)1; - - *(unsigned long *)phys_to_virt((unsigned long)request.notify) = 0; -#ifdef SC_DEBUG - last_request = request; -#endif - - ihk_dma_request(channel, &request); - - return 0; -} - -//int mcctrl_dma_abort; - -static void async_wait(ihk_os_t os, unsigned char *p, int size) -{ - int asize = ALIGN_WAIT_BUF(size); - unsigned long long s, w; - struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - - rdtscll(s); - while (!p[asize]) { - mb(); - cpu_relax(); - rdtscll(w); - if (w > s + 1024UL * 1024 * 1024 * 10) { - printk("DMA Timed out : %p (%p + %d) => %d\n", - p + asize, p, size, p[asize]); -#ifdef SC_DEBUG - print_dma_lastreq(); -#endif - usrdata->mcctrl_dma_abort = 1; - return; - } - } -} - -static void clear_wait(unsigned char *p, int size) -{ - //int asize = ALIGN_WAIT_BUF(size); - p[size] = 0; -} - -static unsigned long translate_remote_va(struct mcctrl_channel *c, - unsigned long rva) -{ - int i, n; - struct syscall_post *p; - - p = c->param.post_va; - - n = (int)p->v[0]; - if (n < 0 || n >= PAGE_SIZE / sizeof(struct syscall_post)) { - return -EINVAL; - } - for (i = 0; i < n; i++) { - if (p[i + 1].v[0] != 1) { - continue; - } - if (rva >= p[i + 1].v[1] && rva < p[i + 1].v[2]) { - return p[i + 1].v[3] + (rva - p[i + 1].v[1]); - } - } - - return -EFAULT; -} - -//extern struct mcctrl_channel *channels; - -#if 0 -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, - struct syscall_request *sc) -{ - int ret; - mm_segment_t fs; - unsigned long pa; - struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - - switch (sc->number) { - case 0: /* read */ - case 1024: - if (sc->number & 1024) { - sc->args[1] = translate_remote_va(c, sc->args[1]); - if ((long)sc->args[1] < 0) { - __return_syscall(c, -EFAULT); - return 0; - } - } - - clear_wait(c->dma_buf, sc->args[2]); - fs = get_fs(); - set_fs(KERNEL_DS); - ret = syscalls.sys_read(sc->args[0], c->dma_buf, sc->args[2]); - if (ret > 0) { - do_async_copy(os, sc->args[1], virt_to_phys(c->dma_buf), - sc->args[2], 0); - set_fs(fs); - - async_wait(os, c->dma_buf, sc->args[2]); - } - __return_syscall(c, ret); - return 0; - - case 1: /* write */ - case 1025: - if (sc->number & 1024) { - sc->args[1] = translate_remote_va(c, sc->args[1]); - if ((long)sc->args[1] < 0) { - __return_syscall(c, -EFAULT); - return 0; - } - } - - clear_wait(c->dma_buf, sc->args[2]); - do_async_copy(os, virt_to_phys(c->dma_buf), sc->args[1], - sc->args[2], 1); - fs = get_fs(); - set_fs(KERNEL_DS); - async_wait(os, c->dma_buf, sc->args[2]); - - ret = syscalls.sys_write(sc->args[0], c->dma_buf, sc->args[2]); - set_fs(fs); - - __return_syscall(c, ret); - return 0; - - case 2: /* open */ - case 1026: - if (sc->number & 1024) { - sc->args[0] = translate_remote_va(c, sc->args[0]); - if ((long)sc->args[0] < 0) { - __return_syscall(c, -EFAULT); - return 0; - } - } - - clear_wait(c->dma_buf, 256); - do_async_copy(os, virt_to_phys(c->dma_buf), sc->args[0], - 256, 1); - fs = get_fs(); - set_fs(KERNEL_DS); - async_wait(os, c->dma_buf, 256); - - ret = syscalls.do_sys_open(AT_FDCWD, c->dma_buf, sc->args[1], - sc->args[2]); - set_fs(fs); - - __return_syscall(c, ret); - return 0; - - case 3: /* Close */ - ret = sys_close(sc->args[0]); - __return_syscall(c, ret); - return 0; - - case 8: /* lseek */ - ret = syscalls.sys_lseek(sc->args[0], sc->args[1], sc->args[2]); - __return_syscall(c, ret); - return 0; - - case 56: /* Clone */ - usrdata->last_thread_exec++; - if (mcctrl_ikc_is_valid_thread(usrdata->last_thread_exec)) { - printk("Clone notification: %lx\n", sc->args[0]); - if (channels[usrdata->last_thread_exec].param.post_va) { - memcpy(usrdata->channels[usrdata->last_thread_exec].param.post_va, - c->param.post_va, PAGE_SIZE); - } - mcctrl_ikc_send_msg(usrdata->last_thread_exec, - SCD_MSG_SCHEDULE_PROCESS, - usrdata->last_thread_exec, sc->args[0]); - } - - __return_syscall(c, 0); - return 0; - - default: - if (sc->number & 1024) { - __return_syscall(c, -EFAULT); - return 0; - } else { - return -ENOSYS; - } - } -} -#endif -#endif /* !DO_USER_MODE */ - struct pager { struct list_head list; struct inode * inode; @@ -1480,11 +1287,25 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) return ret; } -static void __return_syscall(struct mcctrl_channel *c, int ret) +void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, + int ret, int stid) { - c->param.response_va->ret = ret; + unsigned long phys; + struct syscall_response *res; + + phys = ihk_device_map_memory(ihk_os_to_dev(os), + packet->resp_pa, sizeof(*res)); + res = ihk_device_map_virtual(ihk_os_to_dev(os), + phys, sizeof(*res), NULL, 0); + + /* Map response structure and notify offloading thread */ + res->ret = ret; + res->stid = stid; mb(); - c->param.response_va->status = 1; + res->status = 1; + + ihk_device_unmap_virtual(ihk_os_to_dev(os), res, sizeof(*res)); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, sizeof(*res)); } static int remap_user_space(uintptr_t rva, size_t len, int prot) @@ -1673,13 +1494,14 @@ fail: #define SCHED_CHECK_SAME_OWNER 0x01 #define SCHED_CHECK_ROOT 0x02 -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc) +int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) { + struct syscall_request *sc = &packet->req; int error; long ret = -1; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx)\n", os, c, sc->number, sc->args[0]); + dprintk("%s: system call: %d\n", __FUNCTION__, sc->args[0]); switch (sc->number) { case __NR_mmap: ret = pager_call(os, sc); @@ -1690,8 +1512,9 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall if (sc->args[2]) { unsigned long flags; struct mcctrl_per_proc_data *ppd = NULL; + int i; - ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); + ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); if (!ppd) { printk("ERROR: allocating per process data\n"); error = -ENOMEM; @@ -1700,6 +1523,14 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall ppd->pid = task_tgid_vnr(current); ppd->rpgtable = sc->args[2]; + INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_list_exact); + spin_lock_init(&ppd->wq_list_lock); + + for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { + INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); + rwlock_init(&ppd->per_thread_data_hash_lock[i]); + } flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); list_add_tail(&ppd->list, &usrdata->per_proc_list); @@ -1799,10 +1630,11 @@ sched_setparam_out: break; } - __return_syscall(c, ret); + __return_syscall(os, packet, ret, 0); error = 0; out: - dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx): %d %ld\n", os, c, sc->number, sc->args[0], error, ret); + dprintk("%s: system call: %d, error: %d, ret: %ld\n", + __FUNCTION__, sc->number, sc->args[0], error, ret); return error; } diff --git a/executer/kernel/mcctrl/sysfs.c b/executer/kernel/mcctrl/sysfs.c index b446864a..0610862c 100644 --- a/executer/kernel/mcctrl/sysfs.c +++ b/executer/kernel/mcctrl/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "mcctrl.h" #include "sysfs_msg.h" @@ -1887,7 +1888,7 @@ sysfsm_packet_handler(void *os, int msg, int err, long arg1, long arg2) { struct sysfs_work *work = NULL; - work = kzalloc(sizeof(*work), GFP_ATOMIC); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) { eprintk("mcctrl:sysfsm_packet_handler:kzalloc failed\n"); return; diff --git a/kernel/host.c b/kernel/host.c index 60f67834..d9d5763a 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -570,12 +570,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, } *sp, info; unsigned long pp; int cpuid; + int ret = 0; switch (packet->msg) { case SCD_MSG_INIT_CHANNEL_ACKED: dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n"); process_msg_init_acked(c, packet->arg); - return 0; + ret = 0; + break; case SCD_MSG_PREPARE_PROCESS: @@ -598,13 +600,15 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pckt.arg = packet->arg; syscall_channel_send(c, &pckt); - return 0; + ret = 0; + break; case SCD_MSG_SCHEDULE_PROCESS: cpuid = obtain_clone_cpuid(); if(cpuid == -1){ kprintf("No CPU available\n"); - return -1; + ret = -1; + break; } dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg); thread = (struct thread *)packet->arg; @@ -618,7 +622,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, runq_add_thread(thread, cpuid); //cpu_local_var(next) = (struct thread *)packet->arg; - return 0; + ret = 0; + break; + case SCD_MSG_SEND_SIGNAL: pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); @@ -633,18 +639,25 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0); kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc); - return 0; + ret = 0; + break; + case SCD_MSG_PROCFS_REQUEST: process_procfs_request(packet->arg); - return 0; + ret = 0; + break; + case SCD_MSG_CLEANUP_PROCESS: dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid); terminate_host(packet->pid); - return 0; + ret = 0; + break; + case SCD_MSG_DEBUG_LOG: dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg); debug_log(packet->arg); - return 0; + ret = 0; + break; case SCD_MSG_SYSFS_REQ_SHOW: case SCD_MSG_SYSFS_REQ_STORE: @@ -652,7 +665,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, sysfss_packet_handler(c, packet->msg, packet->err, packet->sysfs_arg1, packet->sysfs_arg2, packet->sysfs_arg3); - return 0; + ret = 0; + break; case SCD_MSG_GET_CPU_MAPPING: req_get_cpu_mapping(packet->arg); @@ -660,17 +674,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING; pckt.arg = packet->arg; syscall_channel_send(c, &pckt); - return 0; + ret = 0; + break; default: kprintf("syscall_pakcet_handler:unknown message " "(%d.%d.%d.%d.%d.%#lx)\n", packet->msg, packet->ref, packet->osnum, packet->pid, packet->err, packet->arg); - return 0; + ret = 0; + break; } - return 0; + + kfree(packet); + return ret; } void init_host_syscall_channel(void) diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 6d0ccfa0..111edf07 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -117,28 +117,6 @@ struct user_desc { unsigned int lm:1; }; -struct ikc_scd_packet { - int msg; - int err; - union { - /* for traditional SCD_MSG_* */ - struct { - int ref; - int osnum; - int pid; - int padding; - unsigned long arg; - }; - - /* for SCD_MSG_SYSFS_* */ - struct { - long sysfs_arg1; - long sysfs_arg2; - long sysfs_arg3; - }; - }; -}; - struct program_image_section { unsigned long vaddr; unsigned long len; @@ -222,6 +200,30 @@ struct syscall_request { unsigned long args[6]; }; +struct ikc_scd_packet { + int msg; + int err; + union { + /* for traditional SCD_MSG_* */ + struct { + int ref; + int osnum; + int pid; + int padding; + unsigned long arg; + struct syscall_request req; + unsigned long resp_pa; + }; + + /* for SCD_MSG_SYSFS_* */ + struct { + long sysfs_arg1; + long sysfs_arg2; + long sysfs_arg3; + }; + }; +}; + struct syscall_response { /* TID of the thread that requested the service */ int ttid; diff --git a/kernel/syscall.c b/kernel/syscall.c index dfe90328..43116fa5 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -127,11 +127,9 @@ int prepare_process_ranges_args_envs(struct thread *thread, static void do_mod_exit(int status); #endif -static void send_syscall(struct syscall_request *req, int cpu, int pid) +static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res) { struct ikc_scd_packet packet; - struct syscall_response *res; - struct syscall_params *scp; struct ihk_ikc_channel_desc *syscall_channel; int ret; @@ -140,7 +138,6 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid) req->number == __NR_kill){ // interrupt syscall extern int num_processors; - scp = &get_cpu_local_var(0)->scp2; syscall_channel = get_cpu_local_var(0)->syscall_channel2; /* XXX: is this really going to work if multiple processes @@ -152,34 +149,22 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid) pid = req->args[1]; } else{ - scp = &get_cpu_local_var(cpu)->scp; syscall_channel = get_cpu_local_var(cpu)->syscall_channel; } - res = scp->response_va; res->status = 0; req->valid = 0; -#ifdef USE_DMA - memcpy_async(scp->request_pa, - virt_to_phys(req), sizeof(*req), 0, &fin); - - memcpy_async_wait(&scp->post_fin); - scp->post_va->v[0] = scp->post_idx; - memcpy_async_wait(&fin); -#else - memcpy(scp->request_va, req, sizeof(*req)); -#endif + memcpy(&packet.req, req, sizeof(*req)); barrier(); - scp->request_va->valid = 1; - *(unsigned int *)scp->doorbell_va = cpu + 1; + packet.req.valid = 1; #ifdef SYSCALL_BY_IKC packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.ref = cpu; packet.pid = pid ? pid : cpu_local_var(current)->proc->pid; - packet.arg = scp->request_rpa; + packet.resp_pa = virt_to_phys(res); dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); ret = ihk_ikc_send(syscall_channel, &packet, 0); @@ -193,9 +178,8 @@ ihk_spinlock_t syscall_lock; long do_syscall(struct syscall_request *req, int cpu, int pid) { - struct syscall_response *res; + struct syscall_response res; struct syscall_request req2 IHK_DMA_ALIGN; - struct syscall_params *scp; int error; long rc; int islock = 0; @@ -219,20 +203,15 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) if(req->number == __NR_exit_group || req->number == __NR_gettid || req->number == __NR_kill){ // interrupt syscall - scp = &get_cpu_local_var(0)->scp2; islock = 1; irqstate = ihk_mc_spinlock_lock(&syscall_lock); } - else{ - scp = &get_cpu_local_var(cpu)->scp; - } - res = scp->response_va; /* The current thread is the requester and any thread from * the pool may serve the request */ req->rtid = cpu_local_var(current)->tid; req->ttid = 0; - send_syscall(req, cpu, pid); + send_syscall(req, cpu, pid, &res); dkprintf("%s: syscall num: %d waiting for Linux.. \n", __FUNCTION__, req->number); @@ -240,8 +219,8 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) #define STATUS_IN_PROGRESS 0 #define STATUS_COMPLETED 1 #define STATUS_PAGE_FAULT 3 - while (res->status != STATUS_COMPLETED) { - while (res->status == STATUS_IN_PROGRESS) { + while (res.status != STATUS_COMPLETED) { + while (res.status == STATUS_IN_PROGRESS) { struct cpu_local_var *v; int call_schedule = 0; long runq_irqstate; @@ -270,15 +249,16 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) if (call_schedule) { schedule(); ++thread->in_syscall_offload; + v->wait_in_syscall = NULL; } } - if (res->status == STATUS_PAGE_FAULT) { + if (res.status == STATUS_PAGE_FAULT) { dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", cpu_local_var(current)->proc->pid); error = page_fault_process_vm(thread->vm, - (void *)res->fault_address, - res->fault_reason|PF_POPULATE); + (void *)res.fault_address, + res.fault_reason|PF_POPULATE); /* send result */ req2.number = __NR_mmap; @@ -288,16 +268,16 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) /* The current thread is the requester and only the waiting thread * may serve the request */ req2.rtid = cpu_local_var(current)->tid; - req2.ttid = res->stid; + req2.ttid = res.stid; - send_syscall(&req2, cpu, pid); + send_syscall(&req2, cpu, pid, &res); } } dkprintf("%s: syscall num: %d got host reply: %d \n", - __FUNCTION__, req->number, res->ret); + __FUNCTION__, req->number, res.ret); - rc = res->ret; + rc = res.ret; if(islock){ ihk_mc_spinlock_unlock(&syscall_lock, irqstate); } From ec521feb153118bc868c0e03db5fed5171c2153b Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 9 Aug 2016 17:16:47 +0900 Subject: [PATCH 08/42] do_syscall(): remove invalid reference --- kernel/syscall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/syscall.c b/kernel/syscall.c index 43116fa5..5c443d99 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -249,7 +249,6 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) if (call_schedule) { schedule(); ++thread->in_syscall_offload; - v->wait_in_syscall = NULL; } } From daca522d259c36f2285e3d45025ea10319bb3537 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 12 Aug 2016 10:14:16 +0900 Subject: [PATCH 09/42] mcctrl: move kmalloc/kfree of wait queue head out of fast path --- executer/kernel/mcctrl/control.c | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 0ec4db1d..c6e1f3ae 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -459,13 +459,6 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) unsigned long flags; struct mcctrl_per_proc_data *ppd; -retry_alloc: - wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_KERNEL); - if (!wqhln_alloc) { - printk("WARNING: coudln't alloc wait queue head, retrying..\n"); - goto retry_alloc; - } - /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(ud, pid); @@ -514,15 +507,19 @@ retry_alloc: /* If no match found, add request */ if (!wqhln) { +retry_alloc: + wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_ATOMIC); + if (!wqhln_alloc) { + printk("WARNING: coudln't alloc wait queue head, retrying..\n"); + goto retry_alloc; + } + wqhln = wqhln_alloc; wqhln->req = 0; wqhln->task = NULL; init_waitqueue_head(&wqhln->wq_syscall); list_add_tail(&wqhln->list, &ppd->wq_list); } - else { - kfree(wqhln_alloc); - } wqhln->packet = packet; wqhln->req = 1; @@ -539,7 +536,7 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - struct wait_queue_head_list_node *wqhln; + struct wait_queue_head_list_node *wqhln = NULL; struct wait_queue_head_list_node *wqhln_iter; int ret = 0; unsigned long irqflags; @@ -563,22 +560,10 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) retry: /* Prepare per-thread wait queue head or find a valid request */ -retry_alloc: - wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); - if (!wqhln) { - printk("WARNING: coudln't alloc wait queue head, retrying..\n"); - goto retry_alloc; - } - - wqhln->task = current; - wqhln->req = 0; - init_waitqueue_head(&wqhln->wq_syscall); - irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* First see if there is a valid request already that is not yet taken */ list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { if (wqhln_iter->task == NULL && wqhln_iter->req) { - kfree(wqhln); wqhln = wqhln_iter; wqhln->task = current; list_del(&wqhln->list); @@ -586,6 +571,19 @@ retry_alloc: } } + if (!wqhln) { +retry_alloc: + wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC); + if (!wqhln) { + printk("WARNING: coudln't alloc wait queue head, retrying..\n"); + goto retry_alloc; + } + + wqhln->task = current; + wqhln->req = 0; + init_waitqueue_head(&wqhln->wq_syscall); + } + /* No valid request? Wait for one.. */ if (wqhln->req == 0) { list_add_tail(&wqhln->list, &ppd->wq_list); @@ -601,11 +599,13 @@ retry_alloc: if (ret && !wqhln->req) { kfree(wqhln); + wqhln = NULL; return -EINTR; } packet = wqhln->packet; kfree(wqhln); + wqhln = NULL; dprintk("%s: tid: %d request from CPU %d\n", __FUNCTION__, task_pid_vnr(current), packet->ref); From a01ae910514439a4d74b01fe911132f4dd3582d7 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 12 Aug 2016 12:26:14 +0900 Subject: [PATCH 10/42] mcctrl: use IKC packet pools --- executer/kernel/mcctrl/control.c | 9 +++++++-- executer/kernel/mcctrl/ikc.c | 2 +- kernel/host.c | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index c6e1f3ae..9d4f937c 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -617,7 +617,8 @@ retry_alloc: task_tgid_vnr(current), task_pid_vnr(current), packet->req.number); - kfree(packet); + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, + (usrdata->channels + packet->ref)->c); goto retry; } @@ -651,6 +652,9 @@ retry_alloc: return 0; } + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, + (usrdata->channels + packet->ref)->c); + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { kprintf("%s: error deleting per-thread data\n", __FUNCTION__); return -EINVAL; @@ -798,7 +802,8 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) __return_syscall(os, packet, ret.ret, task_pid_vnr(current)); /* Free packet */ - kfree(packet); + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, + (usrdata->channels + packet->ref)->c); return 0; } diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 36199950..0103a266 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -116,7 +116,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, * mcexec_ret_syscall(), for the rest, free it here. */ if (msg != SCD_MSG_SYSCALL_ONESIDE) { - kfree(pisp); + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)__packet, c); } return 0; } diff --git a/kernel/host.c b/kernel/host.c index d9d5763a..2f4f9138 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -687,7 +687,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, } - kfree(packet); + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, c); return ret; } From f4155cc9e833105e05624c224d900ccbbbdb5c46 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 12 Aug 2016 12:27:04 +0900 Subject: [PATCH 11/42] mcstop+release-smp-x86.sh: fix OS instance discovery bug --- arch/x86/tools/mcstop+release-smp-x86.sh.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/tools/mcstop+release-smp-x86.sh.in b/arch/x86/tools/mcstop+release-smp-x86.sh.in index bf1b4962..e26c2e46 100644 --- a/arch/x86/tools/mcstop+release-smp-x86.sh.in +++ b/arch/x86/tools/mcstop+release-smp-x86.sh.in @@ -20,10 +20,12 @@ cpus="" if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit; fi # Destroy all LWK instances +if ls /dev/mcos* 1>/dev/null 2>&1; then for i in /dev/mcos*; do ind=`echo $i|cut -c10-`; if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed"; exit; fi done +fi # Query IHK-SMP resources and release them if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi From e3c7c9b890bb29c13849f37e8e58715c04a26b81 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 12 Aug 2016 21:52:13 +0900 Subject: [PATCH 12/42] mcctrl: separate waiting threads and pending requests --- executer/kernel/mcctrl/control.c | 23 +++++++++++------------ executer/kernel/mcctrl/mcctrl.h | 3 ++- executer/kernel/mcctrl/syscall.c | 5 +++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 9d4f937c..be9f2e3e 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -159,6 +159,7 @@ static long mcexec_prepare_image(ihk_os_t os, ppd->pid = pdesc->pid; ppd->rpgtable = pdesc->rpgtable; INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_req_list); INIT_LIST_HEAD(&ppd->wq_list_exact); spin_lock_init(&ppd->wq_list_lock); @@ -427,7 +428,7 @@ static long mcexec_get_cpu(ihk_os_t os) return info->n_cpus; } -struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( +inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( struct mcctrl_usrdata *ud, int pid) { @@ -462,7 +463,7 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(ud, pid); - if (!ppd) { + if (unlikely(!ppd)) { kprintf("%s: ERROR: no per-process structure for PID %d??\n", __FUNCTION__, task_tgid_vnr(current)); return 0; @@ -482,7 +483,7 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* Is this a request for a specific thread? See if it's waiting */ - if (packet->req.ttid) { + if (unlikely(packet->req.ttid)) { list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) { if (packet->req.ttid != task_pid_vnr(wqhln_iter->task)) continue; @@ -505,8 +506,8 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) } } - /* If no match found, add request */ - if (!wqhln) { + /* If no match found, add request to pending request list */ + if (unlikely(!wqhln)) { retry_alloc: wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_ATOMIC); if (!wqhln_alloc) { @@ -518,7 +519,7 @@ retry_alloc: wqhln->req = 0; wqhln->task = NULL; init_waitqueue_head(&wqhln->wq_syscall); - list_add_tail(&wqhln->list, &ppd->wq_list); + list_add_tail(&wqhln->list, &ppd->wq_req_list); } wqhln->packet = packet; @@ -545,7 +546,7 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); - if (!ppd) { + if (unlikely(!ppd)) { kprintf("%s: ERROR: no per-process structure for PID %d??\n", __FUNCTION__, task_tgid_vnr(current)); return -EINVAL; @@ -562,7 +563,7 @@ retry: /* Prepare per-thread wait queue head or find a valid request */ irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* First see if there is a valid request already that is not yet taken */ - list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { + list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) { if (wqhln_iter->task == NULL && wqhln_iter->req) { wqhln = wqhln_iter; wqhln->task = current; @@ -582,11 +583,9 @@ retry_alloc: wqhln->task = current; wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); - } - /* No valid request? Wait for one.. */ - if (wqhln->req == 0) { - list_add_tail(&wqhln->list, &ppd->wq_list); + /* Wait for a request.. */ + list_add(&wqhln->list, &ppd->wq_list); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index c572fcee..5b440cc0 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -187,6 +187,7 @@ struct mcctrl_per_proc_data { unsigned long rpgtable; /* per process, not per OS */ struct list_head wq_list; + struct list_head wq_req_list; struct list_head wq_list_exact; ihk_spinlock_t wq_list_lock; @@ -294,7 +295,7 @@ ihk_os_t osnum_to_os(int n); /* syscall.c */ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet); -struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( +inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( struct mcctrl_usrdata *ud, int pid); int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 074bdafd..75124715 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -94,7 +94,7 @@ int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, int ret = 0; unsigned long flags; - ptd_alloc = kmalloc(sizeof(*ptd), GFP_KERNEL); + ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC); if (!ptd_alloc) { kprintf("%s: error allocate per thread data\n", __FUNCTION__); ret = -ENOMEM; @@ -110,7 +110,7 @@ int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, } } - if (ptd) { + if (unlikely(ptd)) { ret = -EBUSY; kfree(ptd_alloc); goto out; @@ -1524,6 +1524,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) ppd->pid = task_tgid_vnr(current); ppd->rpgtable = sc->args[2]; INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_req_list); INIT_LIST_HEAD(&ppd->wq_list_exact); spin_lock_init(&ppd->wq_list_lock); From 0884e3d5436d5df1c973d1bb13d418d003b287a3 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 14 Aug 2016 11:16:40 +0900 Subject: [PATCH 13/42] IHK-IKC: map queue in McKernel as cacheable --- arch/x86/kernel/include/arch-memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index 2279720e..47ebbb40 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -318,5 +318,5 @@ extern unsigned long ap_trampoline; #define AP_TRAMPOLINE_SIZE 0x2000 /* Local is cachable */ -#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE) +#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE) #endif From 45e51fcc070aa6ee2da4b12518eb758c282c530f Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 14 Aug 2016 11:29:02 +0900 Subject: [PATCH 14/42] mcctrl: fix padding for 128bytes SCD message --- executer/kernel/mcctrl/mcctrl.h | 2 +- kernel/include/syscall.h | 2 +- kernel/syscall.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 5b440cc0..f8362117 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -111,7 +111,6 @@ struct ikc_scd_packet { int ref; int osnum; int pid; - int padding; unsigned long arg; struct syscall_request req; unsigned long resp_pa; @@ -124,6 +123,7 @@ struct ikc_scd_packet { long sysfs_arg3; }; }; + char padding[12]; }; struct mcctrl_priv { diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 111edf07..1a107854 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -209,7 +209,6 @@ struct ikc_scd_packet { int ref; int osnum; int pid; - int padding; unsigned long arg; struct syscall_request req; unsigned long resp_pa; @@ -222,6 +221,7 @@ struct ikc_scd_packet { long sysfs_arg3; }; }; + char padding[12]; }; struct syscall_response { diff --git a/kernel/syscall.c b/kernel/syscall.c index 5c443d99..dae81b9c 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -129,7 +129,7 @@ static void do_mod_exit(int status); static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res) { - struct ikc_scd_packet packet; + struct ikc_scd_packet packet IHK_DMA_ALIGN; struct ihk_ikc_channel_desc *syscall_channel; int ret; From 3fa3920bb397236ee1357ce02055bf35f267bbd6 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 14 Aug 2016 11:30:17 +0900 Subject: [PATCH 15/42] fix a couple of debug msgs --- arch/x86/kernel/perfctr.c | 2 +- kernel/init.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/perfctr.c b/arch/x86/kernel/perfctr.c index b2c2cd09..1b1ab99d 100644 --- a/arch/x86/kernel/perfctr.c +++ b/arch/x86/kernel/perfctr.c @@ -105,7 +105,7 @@ static int set_perfctr_x86_direct(int counter, int mode, unsigned int value) wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value); //kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0); - kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value); + //kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value); return 0; } diff --git a/kernel/init.c b/kernel/init.c index 9f82ca01..0f73ef17 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -371,7 +371,7 @@ int main(void) } kmsg_init(mode); - kputs("MCK started.\n"); + kputs("IHK/McKernel started.\n"); arch_init(); @@ -393,7 +393,7 @@ int main(void) futex_init(); - kputs("MCK/IHK booted.\n"); + kputs("IHK/McKernel booted.\n"); #ifdef DCFA_KMOD mc_cmd_client_init(); From fbbaaf5b540903ff06a799eac18427f9d8bbbf9e Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 14 Aug 2016 14:28:21 +0900 Subject: [PATCH 16/42] mcctrl: use GFP_ATOMIC in atomic context --- executer/kernel/mcctrl/procfs.c | 2 +- executer/kernel/mcctrl/syscall.c | 8 ++++---- executer/kernel/mcctrl/sysfs.c | 2 +- kernel/syscall.c | 4 +++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index 36278bd8..96243fc4 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -749,7 +749,7 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) { struct procfs_work *work = NULL; - work = kzalloc(sizeof(*work), GFP_KERNEL); + work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { printk("%s: kzalloc failed\n", __FUNCTION__); return -1; diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 75124715..0042d323 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -281,7 +281,7 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u phys, sizeof(*resp), NULL, 0); retry_alloc: - wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); + wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC); if (!wqhln) { printk("WARNING: coudln't alloc wait queue head, retrying..\n"); goto retry_alloc; @@ -779,7 +779,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) up(&pager_sem); - newpager = kzalloc(sizeof(*newpager), GFP_KERNEL); + newpager = kzalloc(sizeof(*newpager), GFP_ATOMIC); if (!newpager) { error = -ENOMEM; printk("pager_req_create(%d,%lx):kzalloc failed. %d\n", fd, (long)result_pa, error); @@ -1035,7 +1035,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t phys; dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa); - pager = kzalloc(sizeof(*pager), GFP_KERNEL); + pager = kzalloc(sizeof(*pager), GFP_ATOMIC); if (!pager) { error = -ENOMEM; printk("pager_req_map(%p,%d,%lx,%lx,%lx):kzalloc failed. %d\n", os, fd, len, off, result_rpa, error); @@ -1514,7 +1514,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) struct mcctrl_per_proc_data *ppd = NULL; int i; - ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); + ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); if (!ppd) { printk("ERROR: allocating per process data\n"); error = -ENOMEM; diff --git a/executer/kernel/mcctrl/sysfs.c b/executer/kernel/mcctrl/sysfs.c index 0610862c..230a4996 100644 --- a/executer/kernel/mcctrl/sysfs.c +++ b/executer/kernel/mcctrl/sysfs.c @@ -1888,7 +1888,7 @@ sysfsm_packet_handler(void *os, int msg, int err, long arg1, long arg2) { struct sysfs_work *work = NULL; - work = kzalloc(sizeof(*work), GFP_KERNEL); + work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { eprintk("mcctrl:sysfsm_packet_handler:kzalloc failed\n"); return; diff --git a/kernel/syscall.c b/kernel/syscall.c index dae81b9c..8d4b22ca 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -190,6 +190,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) dkprintf("SC(%d)[%3d] sending syscall\n", ihk_mc_get_processor_id(), req->number); + + irqstate = 0; /* for avoidance of warning */ + barrier(); if(req->number != __NR_exit_group){ if(proc->nohost && // host is down @@ -199,7 +202,6 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) ++thread->in_syscall_offload; } - irqstate = 0; /* for avoidance of warning */ if(req->number == __NR_exit_group || req->number == __NR_gettid || req->number == __NR_kill){ // interrupt syscall From e4239f18859c2a7870df821b15b97c8b2fd69aeb Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 14 Aug 2016 14:29:10 +0900 Subject: [PATCH 17/42] mcexec: use 16 threads initially in offload handler pool --- executer/user/mcexec.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 5ae855b9..9bd388e4 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -868,7 +868,10 @@ struct thread_data_s { pthread_mutex_t *lock; pthread_barrier_t *init_ready; } *thread_data; + int ncpu; +int n_threads; + pid_t master_tid; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; @@ -1106,9 +1109,9 @@ void init_worker_threads(int fd) int i; pthread_mutex_init(&lock, NULL); - pthread_barrier_init(&init_ready, NULL, ncpu + 2); + pthread_barrier_init(&init_ready, NULL, n_threads + 2); - for (i = 0; i <= ncpu; ++i) { + for (i = 0; i <= n_threads; ++i) { int ret; thread_data[i].fd = fd; @@ -1518,6 +1521,19 @@ int main(int argc, char **argv) return 1; } + n_threads = ncpu; + if (ncpu > 16) { + n_threads = 16; + } + + /* + * XXX: keep thread_data ncpu sized despite that there are only + * n_threads worker threads in the pool so that signaling code + * keeps working. + * + * TODO: fix signaling code to be independent of TIDs. + * TODO: implement dynaic thread pool resizing. + */ thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1)); memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1)); @@ -1602,7 +1618,7 @@ int main(int argc, char **argv) return 1; } - for (i = 0; i <= ncpu; ++i) { + for (i = 0; i <= n_threads; ++i) { pthread_join(thread_data[i].thread_id, NULL); } @@ -1666,13 +1682,13 @@ do_generic_syscall( static void kill_thread(unsigned long cpu) { - if(cpu >= 0 && cpu < ncpu){ + if(cpu >= 0 && cpu < n_threads){ pthread_kill(thread_data[cpu].thread_id, LOCALSIG); } else{ int i; - for (i = 0; i < ncpu; ++i) { + for (i = 0; i < n_threads; ++i) { pthread_kill(thread_data[i].thread_id, LOCALSIG); } } From 6ff2d4abe705521ce2d23ce50ca4620cae68cc3d Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Mon, 15 Aug 2016 13:47:57 +0900 Subject: [PATCH 18/42] mcctrl: store per-process data in hash table --- executer/kernel/mcctrl/control.c | 100 ++++++++++++++++++++++--------- executer/kernel/mcctrl/ikc.c | 7 ++- executer/kernel/mcctrl/mcctrl.h | 21 +++++-- executer/kernel/mcctrl/syscall.c | 10 ++-- 4 files changed, 99 insertions(+), 39 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index be9f2e3e..a95dd1d6 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -82,7 +82,6 @@ static long mcexec_prepare_image(ihk_os_t os, void *args, *envs; long ret = 0; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - unsigned long flags; struct mcctrl_per_proc_data *ppd = NULL; int i; @@ -168,9 +167,11 @@ static long mcexec_prepare_image(ihk_os_t os, rwlock_init(&ppd->per_thread_data_hash_lock[i]); } - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - list_add_tail(&ppd->list, &usrdata->per_proc_list); - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); + if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) { + printk("%s: error adding per process data\n", __FUNCTION__); + ret = -EINVAL; + goto free_out; + } if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * desc.num_sections)) { @@ -184,6 +185,10 @@ static long mcexec_prepare_image(ihk_os_t os, ret = 0; free_out: + /* Only free ppd if error */ + if (ret != 0 && ppd) { + kfree(ppd); + } kfree(args); kfree(pdesc); kfree(envs); @@ -428,23 +433,75 @@ static long mcexec_get_cpu(ihk_os_t os) return info->n_cpus; } -inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( - struct mcctrl_usrdata *ud, - int pid) +int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid, + struct mcctrl_per_proc_data *ppd) { - struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; + struct mcctrl_per_proc_data *ppd_iter; + int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK); + int ret = 0; unsigned long flags; - /* Look up per-process structure */ - flags = ihk_ikc_spinlock_lock(&ud->per_proc_list_lock); - list_for_each_entry(ppd_iter, &ud->per_proc_list, list) { + /* Check if data for this thread exists and add if not */ + write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags); + list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) { + if (ppd_iter->pid == pid) { + ret = -EBUSY; + goto out; + } + } + + list_add_tail(&ppd->hash, &ud->per_proc_data_hash[hash]); + +out: + write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags); + return ret; +} + +int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid) +{ + struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL; + int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK); + int ret = 0; + unsigned long flags; + + write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags); + list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) { if (ppd_iter->pid == pid) { ppd = ppd_iter; break; } } - ihk_ikc_spinlock_unlock(&ud->per_proc_list_lock, flags); + if (!ppd) { + ret = -EINVAL; + goto out; + } + + list_del(&ppd->hash); + +out: + write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags); + return ret; +} + +inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( + struct mcctrl_usrdata *ud, int pid) +{ + struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL; + int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK); + unsigned long flags; + + /* Check if data for this process exists and return it */ + read_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags); + + list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) { + if (ppd_iter->pid == pid) { + ppd = ppd_iter; + break; + } + } + + read_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags); return ppd; } @@ -939,23 +996,14 @@ int mcexec_close_exec(ihk_os_t os) int found = 0; int os_ind = ihk_host_os_get_index(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - unsigned long flags; - struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; + struct mcctrl_per_proc_data *ppd = NULL; - ppd = NULL; - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - - list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { - if (ppd_iter->pid == task_tgid_vnr(current)) { - ppd = ppd_iter; - break; - } - } + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (ppd) { - list_del(&ppd->list); + mcctrl_delete_per_proc_data(usrdata, ppd->pid); - dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", + dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable); kfree(ppd); @@ -965,8 +1013,6 @@ int mcexec_close_exec(ihk_os_t os) task_tgid_vnr(current)); } - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); - if (os_ind < 0) { return EINVAL; } diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 0103a266..a05f45b8 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -317,6 +317,7 @@ int prepare_ikc_channels(ihk_os_t os) { struct ihk_cpu_info *info; struct mcctrl_usrdata *usrdata; + int i; usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL); usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL); @@ -348,8 +349,10 @@ int prepare_ikc_channels(ihk_os_t os) memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2); ihk_ikc_listen_port(os, &usrdata->listen_param2); - INIT_LIST_HEAD(&usrdata->per_proc_list); - spin_lock_init(&usrdata->per_proc_list_lock); + for (i = 0; i < MCCTRL_PER_PROC_DATA_HASH_SIZE; ++i) { + INIT_LIST_HEAD(&usrdata->per_proc_data_hash[i]); + rwlock_init(&usrdata->per_proc_data_hash_lock[i]); + } INIT_LIST_HEAD(&usrdata->cpu_topology_list); INIT_LIST_HEAD(&usrdata->node_topology_list); diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index f8362117..0a4965fb 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -182,7 +182,7 @@ struct mcctrl_per_thread_data { #define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1) struct mcctrl_per_proc_data { - struct list_head list; + struct list_head hash; int pid; unsigned long rpgtable; /* per process, not per OS */ @@ -251,6 +251,10 @@ struct node_topology { #define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG)) +#define MCCTRL_PER_PROC_DATA_HASH_SHIFT 7 +#define MCCTRL_PER_PROC_DATA_HASH_SIZE (1 << MCCTRL_PER_PROC_DATA_HASH_SHIFT) +#define MCCTRL_PER_PROC_DATA_HASH_MASK (MCCTRL_PER_PROC_DATA_HASH_SIZE - 1) + struct mcctrl_usrdata { struct ihk_ikc_listen_param listen_param; struct ihk_ikc_listen_param listen_param2; @@ -266,8 +270,9 @@ struct mcctrl_usrdata { unsigned long last_thread_exec; wait_queue_head_t wq_prepare; - struct list_head per_proc_list; - ihk_spinlock_t per_proc_list_lock; + struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE]; + rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE]; + void **keys; struct sysfsm_data sysfsm_data; unsigned long cpu_online[CPU_LONGS]; @@ -295,15 +300,19 @@ ihk_os_t osnum_to_os(int n); /* syscall.c */ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet); +int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid, + struct mcctrl_per_proc_data *ppd); +int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid); inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( - struct mcctrl_usrdata *ud, - int pid); + struct mcctrl_usrdata *ud, int pid); + int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, struct task_struct *task, void *data); int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, struct task_struct *task); -struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( +inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( struct mcctrl_per_proc_data *ppd, struct task_struct *task); + void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, int ret, int stid); diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 0042d323..25cee740 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -1510,7 +1510,6 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) case __NR_munmap: /* Set new remote page table if not zero */ if (sc->args[2]) { - unsigned long flags; struct mcctrl_per_proc_data *ppd = NULL; int i; @@ -1533,9 +1532,12 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) rwlock_init(&ppd->per_thread_data_hash_lock[i]); } - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - list_add_tail(&ppd->list, &usrdata->per_proc_list); - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); + if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) { + printk("%s: error adding per process data\n", __FUNCTION__); + error = -EBUSY; + kfree(ppd); + goto out; + } dprintk("pid: %d, rpgtable: 0x%lx added\n", ppd->pid, ppd->rpgtable); From 5e9957da0fbcd0b06fbcd9864763724cf6f368b2 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 08:53:41 +0900 Subject: [PATCH 19/42] syscall_response: introduction of req_thread_status field --- executer/include/uprotocol.h | 5 +++++ kernel/include/syscall.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index e8dfe0ec..6c94ad86 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -135,12 +135,17 @@ struct syscall_load_desc { unsigned long size; }; +#define IHK_SCD_REQ_THREAD_SPINNING 0 +#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1 +#define IHK_SCD_REQ_THREAD_DESCHEDULED 2 + struct syscall_response { /* TID of the thread that requested the service */ int ttid; /* TID of the mcexec thread that is serving or has served the request */ int stid; unsigned long status; + unsigned long req_thread_status; long ret; unsigned long fault_address; unsigned long fault_reason; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 1a107854..4d878290 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -224,12 +224,17 @@ struct ikc_scd_packet { char padding[12]; }; +#define IHK_SCD_REQ_THREAD_SPINNING 0 +#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1 +#define IHK_SCD_REQ_THREAD_DESCHEDULED 2 + struct syscall_response { /* TID of the thread that requested the service */ int ttid; /* TID of the mcexec thread that is serving the request */ int stid; unsigned long status; + unsigned long req_thread_status; long ret; unsigned long fault_address; unsigned long fault_reason; From c897a56c346e62c825d2eab4febd50f1931ab4e3 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 08:56:05 +0900 Subject: [PATCH 20/42] __notify_syscall_requester(): use CAS or IKC to notify syscall completion --- executer/kernel/mcctrl/mcctrl.h | 6 ++++ executer/kernel/mcctrl/syscall.c | 57 +++++++++++++++++++++++++++++++- kernel/include/syscall.h | 6 ++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 0a4965fb..6983d440 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -49,6 +49,7 @@ #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 #define SCD_MSG_PREPARE_PROCESS_NACKED 0x7 #define SCD_MSG_SCHEDULE_PROCESS 0x3 +#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14 #define SCD_MSG_INIT_CHANNEL 0x5 #define SCD_MSG_INIT_CHANNEL_ACKED 0x6 @@ -122,6 +123,11 @@ struct ikc_scd_packet { long sysfs_arg2; long sysfs_arg3; }; + + /* SCD_MSG_SCHEDULE_THREAD */ + struct { + int ttid; + }; }; char padding[12]; }; diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 25cee740..8df0771c 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -241,6 +241,43 @@ out: } #endif +static int __notify_syscall_requester(ihk_os_t os, struct ikc_scd_packet *packet, + struct syscall_response *res) +{ + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct ihk_ikc_channel_desc *c = (usrdata->channels + packet->ref)->c; + struct ikc_scd_packet r_packet; + int ret = 0; + + /* If spinning, no need for IKC message */ + if (__sync_bool_compare_and_swap(&res->req_thread_status, + IHK_SCD_REQ_THREAD_SPINNING, + IHK_SCD_REQ_THREAD_TO_BE_WOKEN)) { + dprintk("%s: no need to send IKC message for PID %d\n", + __FUNCTION__, packet->pid); + return ret; + } + + /* The thread is not spinning any more, make sure it's descheduled */ + if (!__sync_bool_compare_and_swap(&res->req_thread_status, + IHK_SCD_REQ_THREAD_DESCHEDULED, + IHK_SCD_REQ_THREAD_TO_BE_WOKEN)) { + printk("%s: WARNING: inconsistent requester status, " + "pid: %d, req status: %lu, syscall nr: %lu\n", + __FUNCTION__, packet->pid, + res->req_thread_status, packet->req.number); + dump_stack(); + + return -EINVAL; + } + + r_packet.msg = SCD_MSG_WAKE_UP_SYSCALL_THREAD; + r_packet.ttid = packet->req.rtid; + ret = ihk_ikc_send(c, &r_packet, 0); + + return ret; +} + static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) { struct ikc_scd_packet *packet; @@ -274,7 +311,7 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u req = &packet->req; - /* XXX: we need to map response structure here.. */ + /* Map response structure */ phys = ihk_device_map_memory(ihk_os_to_dev(usrdata->os), packet->resp_pa, sizeof(*resp)); resp = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os), @@ -306,6 +343,12 @@ retry_alloc: #define STATUS_PAGER_COMPLETED 1 #define STATUS_PAGE_FAULT 3 req->valid = 0; + + if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) { + printk("%s: WARNING: failed to notify PID %d\n", + __FUNCTION__, packet->pid); + } + mb(); resp->status = STATUS_PAGE_FAULT; @@ -363,6 +406,12 @@ retry_alloc: #define PAGER_REQ_RESUME 0x0101 else if (req->args[0] != PAGER_REQ_RESUME) { resp->ret = pager_call(usrdata->os, (void *)req); + + if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) { + printk("%s: WARNING: failed to notify PID %d\n", + __FUNCTION__, packet->pid); + } + mb(); resp->status = STATUS_PAGER_COMPLETED; break; @@ -1301,6 +1350,12 @@ void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, /* Map response structure and notify offloading thread */ res->ret = ret; res->stid = stid; + + if (__notify_syscall_requester(os, packet, res) < 0) { + printk("%s: WARNING: failed to notify PID %d\n", + __FUNCTION__, packet->pid); + } + mb(); res->status = 1; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 4d878290..ae4c0c7f 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -31,6 +31,7 @@ #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 #define SCD_MSG_PREPARE_PROCESS_NACKED 0x7 #define SCD_MSG_SCHEDULE_PROCESS 0x3 +#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14 #define SCD_MSG_INIT_CHANNEL 0x5 #define SCD_MSG_INIT_CHANNEL_ACKED 0x6 @@ -220,6 +221,11 @@ struct ikc_scd_packet { long sysfs_arg2; long sysfs_arg3; }; + + /* SCD_MSG_SCHEDULE_THREAD */ + struct { + int ttid; + }; }; char padding[12]; }; From 3aa06444f42df644d594c371c51664e140a3523b Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 08:58:22 +0900 Subject: [PATCH 21/42] do_syscall(): allow descheduling threads in offloaded syscalls if CPU core oversubscribed --- kernel/host.c | 21 +++++++++++++++ kernel/include/process.h | 3 +++ kernel/process.c | 9 ++----- kernel/syscall.c | 56 ++++++++++++++++++++++++++++------------ 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/kernel/host.c b/kernel/host.c index 2f4f9138..1013ebfe 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -559,6 +559,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, struct ikc_scd_packet *packet = __packet; struct ikc_scd_packet pckt; int rc; + struct mcs_rwlock_node_irqsave lock; struct thread *thread; struct process *proc; struct mcctrl_signal { @@ -625,6 +626,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, ret = 0; break; + /* + * Used for syscall offload reply message to explicitly schedule in + * the waiting thread + */ + case SCD_MSG_WAKE_UP_SYSCALL_THREAD: + thread = find_thread(0, packet->ttid, &lock); + if (!thread) { + kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n", + __FUNCTION__, packet->ttid); + ret = -EINVAL; + break; + } + thread_unlock(thread, &lock); + + dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n", + __FUNCTION__, packet->ttid); + waitq_wakeup(&thread->scd_wq); + ret = 0; + break; + case SCD_MSG_SEND_SIGNAL: pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); diff --git a/kernel/include/process.h b/kernel/include/process.h index 4ad055f4..bd10f5dd 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -566,6 +566,9 @@ struct thread { struct itimerval itimer_prof; struct timespec itimer_virtual_value; struct timespec itimer_prof_value; + + /* Syscall offload wait queue head */ + struct waitq scd_wq; }; struct process_vm { diff --git a/kernel/process.c b/kernel/process.c index e82e1e2a..cf0e2089 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2576,13 +2576,8 @@ void schedule(void) struct thread *last; if (cpu_local_var(no_preempt)) { - dkprintf("no schedule() while no preemption! \n"); - return; - } - - if (cpu_local_var(current) - && cpu_local_var(current)->in_syscall_offload) { - dkprintf("no schedule() while syscall offload!\n"); + kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n", + __FUNCTION__, cpu_local_var(no_preempt)); return; } diff --git a/kernel/syscall.c b/kernel/syscall.c index 8d4b22ca..a7985728 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -186,6 +186,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) unsigned long irqstate; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; + DECLARE_WAITQ_ENTRY(scd_wq_entry, thread); dkprintf("SC(%d)[%3d] sending syscall\n", ihk_mc_get_processor_id(), @@ -212,7 +213,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) * the pool may serve the request */ req->rtid = cpu_local_var(current)->tid; req->ttid = 0; - + res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; send_syscall(req, cpu, pid, &res); dkprintf("%s: syscall num: %d waiting for Linux.. \n", @@ -224,36 +225,55 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) while (res.status != STATUS_COMPLETED) { while (res.status == STATUS_IN_PROGRESS) { struct cpu_local_var *v; - int call_schedule = 0; + int do_schedule = 0; long runq_irqstate; + unsigned long flags; + + DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current)); cpu_pause(); - /* XXX: Intel MPI + Intel OpenMP situation: - * While the MPI helper thread waits in a poll() call the OpenMP master - * thread is iterating through the CPU cores using setaffinity(). - * Unless we give a chance to it on this core the two threads seem to - * hang in deadlock. If the new thread would make a system call on this - * core we would be in trouble. For now, allow it, but in the future - * we should have syscall channels for each thread instead of per core, - * or we should multiplex syscall threads in mcexec */ + /* Spin if not preemptable */ + if (cpu_local_var(no_preempt) || !thread->tid) { + continue; + } + + /* Spin by default, but if re-schedule is requested let + * the other thread run */ runq_irqstate = ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); v = get_this_cpu_local_var(); if (v->flags & CPU_FLAG_NEED_RESCHED) { - call_schedule = 1; - --thread->in_syscall_offload; + do_schedule = 1; } ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate); - if (call_schedule) { - schedule(); - ++thread->in_syscall_offload; + if (!do_schedule) { + continue; } + + flags = cpu_disable_interrupt_save(); + + /* Try to sleep until notified */ + if (__sync_bool_compare_and_swap(&res.req_thread_status, + IHK_SCD_REQ_THREAD_SPINNING, + IHK_SCD_REQ_THREAD_DESCHEDULED)) { + + dkprintf("%s: tid %d waiting for syscall reply...\n", + __FUNCTION__, thread->tid); + waitq_init(&thread->scd_wq); + waitq_prepare_to_wait(&thread->scd_wq, &scd_wq_entry, + PS_INTERRUPTIBLE); + cpu_restore_interrupt(flags); + schedule(); + waitq_finish_wait(&thread->scd_wq, &scd_wq_entry); + } + + cpu_restore_interrupt(flags); } - + if (res.status == STATUS_PAGE_FAULT) { dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", cpu_local_var(current)->proc->pid); @@ -271,6 +291,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) req2.rtid = cpu_local_var(current)->tid; req2.ttid = res.stid; + res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; send_syscall(&req2, cpu, pid, &res); } } @@ -809,7 +830,8 @@ terminate(int rc, int sig) release_thread(mythread); release_process_vm(vm); schedule(); - // no return + kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__); + panic("panic"); } void From f584e2ec250bc283ba00ba0bdad7a52252678c1b Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 09:20:55 +0900 Subject: [PATCH 22/42] increase kernel stack size and eliminate unused waitq declaration in do_syscall() --- kernel/include/process.h | 2 +- kernel/syscall.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/include/process.h b/kernel/include/process.h index bd10f5dd..a8d8895e 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -160,7 +160,7 @@ #endif #define USER_STACK_NR_PAGES 8192 -#define KERNEL_STACK_NR_PAGES 25 +#define KERNEL_STACK_NR_PAGES 32 #define NOPHYS ((uintptr_t)-1) diff --git a/kernel/syscall.c b/kernel/syscall.c index a7985728..2d067c67 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -186,7 +186,6 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) unsigned long irqstate; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; - DECLARE_WAITQ_ENTRY(scd_wq_entry, thread); dkprintf("SC(%d)[%3d] sending syscall\n", ihk_mc_get_processor_id(), @@ -228,7 +227,6 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) int do_schedule = 0; long runq_irqstate; unsigned long flags; - DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current)); cpu_pause(); From 4410e702d9b421799e0142ee4d3640d0f04297c8 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 14:17:59 +0900 Subject: [PATCH 23/42] devobj: fix memory leak for device file mapping --- kernel/devobj.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/devobj.c b/kernel/devobj.c index cbaf2a0a..3225454a 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -166,6 +166,8 @@ static void devobj_release(struct memobj *memobj) struct devobj *obj = to_devobj(memobj); struct devobj *free_obj = NULL; uintptr_t handle; + const size_t pfn_npages = + (obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1; dkprintf("devobj_release(%p %lx)\n", obj, obj->handle); @@ -194,7 +196,7 @@ static void devobj_release(struct memobj *memobj) } if (obj->pfn_table) { - free_pages(obj->pfn_table, 1); + free_pages(obj->pfn_table, pfn_npages); } kfree(free_obj); } From 73cf93727b1d984a3de6207c94f3a78b24e8470e Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 14:18:58 +0900 Subject: [PATCH 24/42] clone(): use CAS for TID allocation --- kernel/syscall.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/syscall.c b/kernel/syscall.c index 2d067c67..fa99d581 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1941,9 +1941,13 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, } /* Find an unused TID */ +retry_tid: for (i = 0; i < newproc->nr_tids; ++i) { if (!newproc->tids[i].thread) { - newproc->tids[i].thread = new; + if (!__sync_bool_compare_and_swap( + &newproc->tids[i].thread, NULL, new)) { + goto retry_tid; + } new->tid = newproc->tids[i].tid; dkprintf("%s: tid %d assigned to %p\n", __FUNCTION__, new->tid, new); break; From 1a207e19c2f0320ca97b31d21c4abc6d42aaf123 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Wed, 17 Aug 2016 13:55:36 +0900 Subject: [PATCH 25/42] clean up a couple of debug messages --- arch/x86/kernel/memory.c | 2 ++ kernel/mem.c | 5 +++-- kernel/process.c | 4 ++++ kernel/syscall.c | 12 ++++++++++-- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 79aac667..b2a362cb 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -1111,6 +1111,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, if (!(old & PFL1_FILEOFF) && args->free_physical) { if (page && page_unmap(page)) { ihk_mc_free_pages(phys_to_virt(phys), 1); + dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base); } args->vm->currss -= PTL1_SIZE; } @@ -1159,6 +1160,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, if (!(old & PFL2_FILEOFF) && args->free_physical) { if (page && page_unmap(page)) { ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE); + dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base); } args->vm->currss -= PTL2_SIZE; } diff --git a/kernel/mem.c b/kernel/mem.c index 2df09209..6824e628 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -425,8 +425,9 @@ static void page_allocator_init(void) ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages); - kprintf("Available pages: %ld pages\n", - ihk_pagealloc_count(pa_allocator)); + kprintf("Available memory: %ld bytes in %ld pages\n", + (ihk_pagealloc_count(pa_allocator) * PAGE_SIZE), + ihk_pagealloc_count(pa_allocator)); /* Notify the ihk to use my page allocator */ ihk_mc_set_page_allocator(&allocator); diff --git a/kernel/process.c b/kernel/process.c index cf0e2089..fb1c30e0 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1533,6 +1533,8 @@ retry: kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate new page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; } + dkprintf("%s: clearing 0x%lx:%lu\n", + __FUNCTION__, pgaddr, pgsize); memset(virt, 0, pgsize); phys = virt_to_phys(virt); page_map(phys_to_page(phys)); @@ -1565,6 +1567,8 @@ retry: kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; } + dkprintf("%s: copying 0x%lx:%lu\n", + __FUNCTION__, pgaddr, pgsize); memcpy(virt, phys_to_virt(phys), pgsize); phys = virt_to_phys(virt); diff --git a/kernel/syscall.c b/kernel/syscall.c index fa99d581..159f21ff 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -930,6 +930,8 @@ static int do_munmap(void *addr, size_t len) } } finish_free_pages_pending(); + dkprintf("%s: 0x%lx:%lu, error: %ld\n", + __FUNCTION__, addr, len, error); return error; } @@ -1171,6 +1173,8 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, error = -ENOMEM; goto out; } + dkprintf("%s: 0x%x:%lu allocated %d pages, p2align: %lx\n", + __FUNCTION__, addr, len, npages, p2align); phys = virt_to_phys(p); } else if (flags & MAP_SHARED) { @@ -1255,8 +1259,12 @@ out: if (memobj) { memobj_release(memobj); } - dkprintf("do_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n", - addr0, len0, prot, flags, fd, off0, error, addr); + dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, " + "fd: %d, off: %lu, error: %ld, addr: 0x%lx\n", + __FUNCTION__, + addr, len, addr0, len0, prot, flags, + fd, off0, error, addr); + return (!error)? addr: error; } From 9efd568e072d21dea3bd3ce313f439315f59a76b Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Wed, 17 Aug 2016 14:00:05 +0900 Subject: [PATCH 26/42] do_mmap(): simplify demand paging flags; avoid zeroobj and allocate pages directly --- kernel/syscall.c | 15 ++++----------- kernel/zeroobj.c | 4 ++++ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/syscall.c b/kernel/syscall.c index 159f21ff..bd9eec9f 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1079,25 +1079,18 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, vrflags |= PROT_TO_VR_FLAG(prot); vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0; vrflags |= (flags & MAP_LOCKED)? VR_LOCKED: 0; + vrflags |= VR_DEMAND_PAGING; if (flags & MAP_ANONYMOUS) { - if (0) { - /* dummy */ + if (!anon_on_demand) { + populated_mapping = 1; } #ifdef USE_NOCACHE_MMAP #define X_MAP_NOCACHE MAP_32BIT else if (flags & X_MAP_NOCACHE) { + vrflags &= ~VR_DEMAND_PAGING; vrflags |= VR_IO_NOCACHE; } #endif - else { - vrflags |= VR_DEMAND_PAGING; - if (!anon_on_demand) { - populated_mapping = 1; - } - } - } - else { - vrflags |= VR_DEMAND_PAGING; } if (flags & (MAP_POPULATE | MAP_LOCKED)) { diff --git a/kernel/zeroobj.c b/kernel/zeroobj.c index c8fc5c2a..a70a89f2 100644 --- a/kernel/zeroobj.c +++ b/kernel/zeroobj.c @@ -172,6 +172,10 @@ static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align, struct zeroobj *obj = to_zeroobj(memobj); struct page *page; + /* Don't bother about zero page, page fault handler will + * allocate and clear pages */ + return 0; + dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n", memobj, off, p2align, physp); if (off & ~PAGE_MASK) { From 01d2ea1605624176c0172c195c19c91fbc3fbb64 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Wed, 17 Aug 2016 15:08:30 +0900 Subject: [PATCH 27/42] do_munmap(): do TLB flush per address in remote_tlb_flush_cpu_mask() --- kernel/mem.c | 7 +++++++ kernel/syscall.c | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/mem.c b/kernel/mem.c index 6824e628..1fc11a10 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -265,6 +265,13 @@ void remote_flush_tlb_cpumask(struct process_vm *vm, unsigned long tsc; tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */ #endif + if (flush_entry->addr) { + flush_tlb_single(flush_entry->addr & PAGE_MASK); + } + /* Zero address denotes full TLB flush */ + else { + flush_tlb(); + } /* Wait for all cores */ while (ihk_atomic_read(&flush_entry->pending) != 0) { diff --git a/kernel/syscall.c b/kernel/syscall.c index bd9eec9f..90ce79c1 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -917,8 +917,6 @@ static int do_munmap(void *addr, size_t len) begin_free_pages_pending(); error = remove_process_memory_range(cpu_local_var(current)->vm, (intptr_t)addr, (intptr_t)addr+len, &ro_freed); - // XXX: TLB flush - flush_tlb(); if (error || !ro_freed) { clear_host_pte((uintptr_t)addr, len); } From f5857cfc9e6f12f5677d34311aa2b4ba61a11e70 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Wed, 17 Aug 2016 18:02:05 +0900 Subject: [PATCH 28/42] MM: use ihk_mc_{alloc/free}_pages() everywhere and fix free_pages() on kmalloc()ed object bug --- arch/x86/kernel/memory.c | 13 ++++--------- kernel/devobj.c | 6 +++--- kernel/host.c | 13 +++++++------ kernel/process.c | 21 +++++++++++---------- kernel/sysfs.c | 26 +++++++++++++------------- lib/include/ihk/mm.h | 2 +- 6 files changed, 39 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index b2a362cb..e9b70a45 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -23,6 +23,7 @@ #include #include #include +#include #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define ekprintf(...) kprintf(__VA_ARGS__) @@ -84,20 +85,14 @@ void ihk_mc_free_pages(void *p, int npages) pa_ops->free_page(p, npages); } -void *ihk_mc_allocate(int size, enum ihk_mc_ap_flag flag) +void *ihk_mc_allocate(int size, int flag) { - if (pa_ops && pa_ops->alloc) - return pa_ops->alloc(size, flag); - else - return ihk_mc_alloc_pages(1, flag); + return kmalloc(size, IHK_MC_AP_NOWAIT); } void ihk_mc_free(void *p) { - if (pa_ops && pa_ops->free) - return pa_ops->free(p); - else - return ihk_mc_free_pages(p, 1); + kfree(p); } void *get_last_early_heap(void) diff --git a/kernel/devobj.c b/kernel/devobj.c index 3225454a..737d42f7 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -99,7 +99,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp } memset(obj, 0, sizeof(*obj)); - obj->pfn_table = allocate_pages(pfn_npages, IHK_MC_AP_NOWAIT); + obj->pfn_table = ihk_mc_alloc_pages(pfn_npages, IHK_MC_AP_NOWAIT); if (!obj->pfn_table) { error = -ENOMEM; kprintf("%s: error: fd: %d, len: %lu, off: %lu allocating PFN failed.\n", @@ -141,7 +141,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp out: if (obj) { if (obj->pfn_table) { - free_pages(obj->pfn_table, pfn_npages); + ihk_mc_free_pages(obj->pfn_table, pfn_npages); } kfree(obj); } @@ -196,7 +196,7 @@ static void devobj_release(struct memobj *memobj) } if (obj->pfn_table) { - free_pages(obj->pfn_table, pfn_npages); + ihk_mc_free_pages(obj->pfn_table, pfn_npages); } kfree(free_obj); } diff --git a/kernel/host.c b/kernel/host.c index 1013ebfe..3444a349 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -375,8 +375,9 @@ static int process_msg_prepare_process(unsigned long rphys) n = p->num_sections; dkprintf("# of sections: %d\n", n); - if((pn = ihk_mc_allocate(sizeof(struct program_load_desc) - + sizeof(struct program_image_section) * n, IHK_MC_AP_NOWAIT)) == NULL){ + if((pn = kmalloc(sizeof(struct program_load_desc) + + sizeof(struct program_image_section) * n, + IHK_MC_AP_NOWAIT)) == NULL){ ihk_mc_unmap_virtual(p, npages, 0); ihk_mc_unmap_memory(NULL, phys, sz); return -ENOMEM; @@ -385,7 +386,7 @@ static int process_msg_prepare_process(unsigned long rphys) + sizeof(struct program_image_section) * n); if((thread = create_thread(p->entry)) == NULL){ - ihk_mc_free(pn); + kfree(pn); ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_memory(NULL, phys, sz); return -ENOMEM; @@ -435,7 +436,7 @@ static int process_msg_prepare_process(unsigned long rphys) dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid, vm->address_space->page_table); - ihk_mc_free(pn); + kfree(pn); ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_memory(NULL, phys, sz); @@ -443,7 +444,7 @@ static int process_msg_prepare_process(unsigned long rphys) return 0; err: - ihk_mc_free(pn); + kfree(pn); ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_memory(NULL, phys, sz); destroy_thread(thread); @@ -452,7 +453,7 @@ err: static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_params *lparam) { - lparam->response_va = allocate_pages(RESPONSE_PAGE_COUNT, 0); + lparam->response_va = ihk_mc_alloc_pages(RESPONSE_PAGE_COUNT, 0); lparam->response_pa = virt_to_phys(lparam->response_va); pcp->request_page = 0; diff --git a/kernel/process.c b/kernel/process.c index fb1c30e0..7ed1f98a 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -739,7 +739,7 @@ int join_process_memory_range(struct process_vm *vm, memobj_release(merging->memobj); } list_del(&merging->list); - ihk_mc_free(merging); + kfree(merging); error = 0; out: @@ -835,8 +835,9 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) if (range->memobj) { memobj_release(range->memobj); } + list_del(&range->list); - ihk_mc_free(range); + kfree(range); dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n", vm, start0, end0); @@ -1888,14 +1889,14 @@ unsigned long extend_process_region(struct process_vm *vm, aligned_end = (aligned_end + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; /* Fill in the gap between old_aligned_end and aligned_end * with regular pages */ - if((p = allocate_pages((aligned_end - old_aligned_end) >> PAGE_SHIFT, + if((p = ihk_mc_alloc_pages((aligned_end - old_aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT)) == NULL){ return end; } if((rc = add_process_memory_range(vm, old_aligned_end, aligned_end, virt_to_phys(p), flag, LARGE_PAGE_SHIFT)) != 0){ - free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT); + ihk_mc_free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT); return end; } @@ -1908,7 +1909,7 @@ unsigned long extend_process_region(struct process_vm *vm, (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; address = aligned_new_end; - if((p = allocate_pages((aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT, + if((p = ihk_mc_alloc_pages((aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT)) == NULL){ return end; } @@ -1916,16 +1917,16 @@ unsigned long extend_process_region(struct process_vm *vm, p_aligned = ((unsigned long)p + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; if (p_aligned > (unsigned long)p) { - free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT); + ihk_mc_free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT); } - free_pages( + ihk_mc_free_pages( (void *)(p_aligned + aligned_new_end - aligned_end), (LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT); if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end, virt_to_phys((void *)p_aligned), flag, LARGE_PAGE_SHIFT)) != 0){ - free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT); + ihk_mc_free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT); return end; } @@ -1943,7 +1944,7 @@ unsigned long extend_process_region(struct process_vm *vm, p=0; }else{ - p = allocate_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT); + p = ihk_mc_alloc_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT); if (!p) { return end; @@ -1952,7 +1953,7 @@ unsigned long extend_process_region(struct process_vm *vm, if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end, (p==0?0:virt_to_phys(p)), flag, NULL, 0, PAGE_SHIFT)) != 0){ - free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT); + ihk_mc_free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT); return end; } diff --git a/kernel/sysfs.c b/kernel/sysfs.c index a924e531..dc97e58d 100644 --- a/kernel/sysfs.c +++ b/kernel/sysfs.c @@ -75,7 +75,7 @@ sysfs_createf(struct sysfs_ops *ops, void *instance, int mode, dkprintf("sysfs_createf(%p,%p,%#o,%s,...)\n", ops, instance, mode, fmt); - param = allocate_pages(1, IHK_MC_AP_NOWAIT); + param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!param) { error = -ENOMEM; ekprintf("sysfs_createf:allocate_pages failed. %d\n", error); @@ -134,7 +134,7 @@ sysfs_createf(struct sysfs_ops *ops, void *instance, int mode, error = 0; out: if (param) { - free_pages(param, 1); + ihk_mc_free_pages(param, 1); } if (error) { ekprintf("sysfs_createf(%p,%p,%#o,%s,...): %d\n", @@ -156,7 +156,7 @@ sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...) dkprintf("sysfs_mkdirf(%p,%s,...)\n", dirhp, fmt); - param = allocate_pages(1, IHK_MC_AP_NOWAIT); + param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!param) { error = -ENOMEM; ekprintf("sysfs_mkdirf:allocate_pages failed. %d\n", error); @@ -208,7 +208,7 @@ sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...) out: if (param) { - free_pages(param, 1); + ihk_mc_free_pages(param, 1); } if (error) { ekprintf("sysfs_mkdirf(%p,%s,...): %d\n", dirhp, fmt, error); @@ -229,7 +229,7 @@ sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...) dkprintf("sysfs_symlinkf(%#lx,%s,...)\n", targeth.handle, fmt); - param = allocate_pages(1, IHK_MC_AP_NOWAIT); + param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!param) { error = -ENOMEM; ekprintf("sysfs_symlinkf:allocate_pages failed. %d\n", error); @@ -279,7 +279,7 @@ sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...) error = 0; out: if (param) { - free_pages(param, 1); + ihk_mc_free_pages(param, 1); } if (error) { ekprintf("sysfs_symlinkf(%#lx,%s,...): %d\n", @@ -301,7 +301,7 @@ sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...) dkprintf("sysfs_lookupf(%p,%s,...)\n", objhp, fmt); - param = allocate_pages(1, IHK_MC_AP_NOWAIT); + param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!param) { error = -ENOMEM; ekprintf("sysfs_lookupf:allocate_pages failed. %d\n", error); @@ -353,7 +353,7 @@ sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...) out: if (param) { - free_pages(param, 1); + ihk_mc_free_pages(param, 1); } if (error) { ekprintf("sysfs_lookupf(%p,%s,...): %d\n", objhp, fmt, error); @@ -374,7 +374,7 @@ sysfs_unlinkf(int flags, const char *fmt, ...) dkprintf("sysfs_unlinkf(%#x,%s,...)\n", flags, fmt); - param = allocate_pages(1, IHK_MC_AP_NOWAIT); + param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!param) { error = -ENOMEM; ekprintf("sysfs_unlinkf:allocate_pages failed. %d\n", error); @@ -423,7 +423,7 @@ sysfs_unlinkf(int flags, const char *fmt, ...) error = 0; out: if (param) { - free_pages(param, 1); + ihk_mc_free_pages(param, 1); } if (error) { ekprintf("sysfs_unlinkf(%#x,%s,...): %d\n", flags, fmt, error); @@ -601,14 +601,14 @@ sysfs_init(void) } sysfs_data_bufsize = PAGE_SIZE; - sysfs_data_buf = allocate_pages(1, IHK_MC_AP_NOWAIT); + sysfs_data_buf = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!sysfs_data_buf) { error = -ENOMEM; ekprintf("sysfs_init:allocate_pages(buf) failed. %d\n", error); goto out; } - param = allocate_pages(1, IHK_MC_AP_NOWAIT); + param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); if (!param) { error = -ENOMEM; ekprintf("sysfs_init:allocate_pages(param) failed. %d\n", @@ -644,7 +644,7 @@ sysfs_init(void) error = 0; out: if (param) { - free_pages(param, 1); + ihk_mc_free_pages(param, 1); } if (error) { ekprintf("sysfs_init(): %d\n", error); diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index cf2957a0..52a3c554 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -103,7 +103,7 @@ void ihk_mc_clean_micpa(void); void *ihk_mc_alloc_aligned_pages(int npages, int p2align, enum ihk_mc_ap_flag flag); void *ihk_mc_alloc_pages(int npages, enum ihk_mc_ap_flag flag); void ihk_mc_free_pages(void *p, int npages); -void *ihk_mc_allocate(int size, enum ihk_mc_ap_flag flag); +void *ihk_mc_allocate(int size, int flag); void ihk_mc_free(void *p); void *arch_alloc_page(enum ihk_mc_ap_flag flag); From 5fd68eae54b187649147dd261947b0437c4b0ad7 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 18 Aug 2016 07:31:25 +0900 Subject: [PATCH 29/42] PF handler: fix up various error msgs --- arch/x86/kernel/cpu.c | 9 ++++----- kernel/debug.c | 1 + kernel/mem.c | 7 +++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index d86aecbd..9befbd40 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -1036,9 +1036,8 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) unsigned long error = ((struct x86_user_context *)regs)->gpr.error; irqflags = kprintf_lock(); - dkprintf("[%d] Page fault for 0x%lX\n", - ihk_mc_get_processor_id(), address); - dkprintf("%s for %s access in %s mode (reserved bit %s set), " + __kprintf("Page fault for 0x%lx\n", address); + __kprintf("%s for %s access in %s mode (reserved bit %s set), " "it %s an instruction fetch\n", (error & PF_PROT ? "protection fault" : "no page found"), (error & PF_WRITE ? "write" : "read"), @@ -1050,14 +1049,14 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) list_for_each_entry(range, &vm->vm_range_list, list) { if (range->start <= address && range->end > address) { found = 1; - dkprintf("address is in range, flag: 0x%X! \n", + __kprintf("address is in range, flag: 0x%lx\n", range->flag); ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); break; } } if (!found) { - dkprintf("address is out of range! \n"); + __kprintf("address is out of range! \n"); } kprintf_unlock(irqflags); diff --git a/kernel/debug.c b/kernel/debug.c index 6c9a8214..1061b6fc 100644 --- a/kernel/debug.c +++ b/kernel/debug.c @@ -110,6 +110,7 @@ int __kprintf(const char *format, ...) char buf[KPRINTF_LOCAL_BUF_LEN]; /* Copy into the local buf */ + len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id()); va_start(va, format); len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va); va_end(va); diff --git a/kernel/mem.c b/kernel/mem.c index 1fc11a10..f0e4c12b 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -342,10 +342,9 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) // no return } - kprintf("[%d]page_fault_handler(%p,%lx,%p):" - "fault vm failed. %d, TID: %d\n", - ihk_mc_get_processor_id(), fault_addr, - reason, regs, error, thread->tid); + kprintf("%s fault VM failed for TID: %d, addr: 0x%lx, " + "reason: %d, error: %d\n", + thread->tid, fault_addr, reason, error); unhandled_page_fault(thread, fault_addr, regs); preempt_enable(); memset(&info, '\0', sizeof info); From bd6a2c23119e3009f6f950ebbdb45351e946c013 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 18 Aug 2016 07:32:31 +0900 Subject: [PATCH 30/42] sys_mmap(): correct initial address check --- arch/x86/kernel/syscall.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index a37b2d64..d6819a18 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -1434,9 +1434,8 @@ SYSCALL_DECLARE(mmap) goto out; } - if ((addr < region->user_start) - || (region->user_end <= addr) - || ((region->user_end - addr) < len)) { + if ((flags & MAP_FIXED) && ((addr < region->user_start) + || (region->user_end <= addr))) { ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):ENOMEM\n", addr0, len0, prot, flags0, fd, off0); error = -ENOMEM; From 7ebc34ddccf343029273c88fd3f8fa8b40976bf2 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 18 Aug 2016 14:31:52 +0900 Subject: [PATCH 31/42] do_fork(): fix tids memory leak; additional sanity checks --- arch/x86/kernel/memory.c | 8 ++++++++ kernel/host.c | 5 +++++ kernel/include/cls.h | 1 + kernel/mem.c | 11 +++++++---- kernel/process.c | 1 - kernel/syscall.c | 2 ++ 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index e9b70a45..782df6c2 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -87,11 +87,19 @@ void ihk_mc_free_pages(void *p, int npages) void *ihk_mc_allocate(int size, int flag) { + if (!cpu_local_var(kmalloc_initialized)) { + kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__); + return NULL; + } return kmalloc(size, IHK_MC_AP_NOWAIT); } void ihk_mc_free(void *p) { + if (!cpu_local_var(kmalloc_initialized)) { + kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__); + return; + } kfree(p); } diff --git a/kernel/host.c b/kernel/host.c index 3444a349..5cec52e2 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -373,6 +373,11 @@ static int process_msg_prepare_process(unsigned long rphys) } n = p->num_sections; + if (n > 16) { + kprintf("%s: ERROR: more ELF sections than 16??\n", + __FUNCTION__); + return -ENOMEM; + } dkprintf("# of sections: %d\n", n); if((pn = kmalloc(sizeof(struct program_load_desc) diff --git a/kernel/include/cls.h b/kernel/include/cls.h index 58532c08..d2521b11 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -73,6 +73,7 @@ struct cpu_local_var { int in_interrupt; int no_preempt; int timer_enabled; + int kmalloc_initialized; } __attribute__((aligned(64))); diff --git a/kernel/mem.c b/kernel/mem.c index f0e4c12b..fc8f109d 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -664,8 +664,8 @@ static struct alloc *allochash[HASHNUM]; static struct location *lochash[HASHNUM]; static ihk_spinlock_t alloclock; int runcount; -static unsigned char *page; -static int space; +static unsigned char *page = NULL; +static int space = 0; static void *dalloc(unsigned long size) { @@ -896,14 +896,17 @@ void kmalloc_init(void) h->size = 0; register_kmalloc(); + v->kmalloc_initialized = 1; memdebug = find_command_line("memdebug"); for (i = 0; i < HASHNUM; i++) { allochash[i] = NULL; lochash[i] = NULL; } - page = allocate_pages(16, IHK_MC_AP_NOWAIT); - space = 16 * 4096; + if (!page) { + page = allocate_pages(16, IHK_MC_AP_NOWAIT); + space = 16 * 4096; + } ihk_mc_spinlock_init(&alloclock); } diff --git a/kernel/process.c b/kernel/process.c index 7ed1f98a..12e43eb8 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -963,7 +963,6 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa return attr; } -/* XXX: インデントを揃える必要がある */ int add_process_memory_range(struct process_vm *vm, unsigned long start, unsigned long end, unsigned long phys, unsigned long flag, diff --git a/kernel/syscall.c b/kernel/syscall.c index 90ce79c1..9688f871 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1937,6 +1937,8 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, newproc->tids[i].thread = NULL; ++newproc->nr_tids; } + + kfree(tids); } /* Find an unused TID */ From 82ae6d7458e5bba18c1d9d72c890b05fb4b506f0 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 18 Aug 2016 14:52:05 +0900 Subject: [PATCH 32/42] query_free_mem_interrupt_handler(): report number of free pages as kmsg --- kernel/mem.c | 13 +++++++++---- kernel/syscall.c | 8 ++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/kernel/mem.c b/kernel/mem.c index fc8f109d..c267f33c 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -156,13 +156,18 @@ void sbox_write(int offset, unsigned int value); static void query_free_mem_interrupt_handler(void *priv) { -#ifdef ATTACHED_MIC - dkprintf("query free mem handler!\n"); - + extern int runcount; int pages = ihk_pagealloc_query_free(pa_allocator); - dkprintf("free pages: %d\n", pages); + kprintf("McKernel free pages: %d\n", pages); + if (find_command_line("memdebug")) { + memcheckall(); + freecheck(runcount); + runcount++; + } + +#ifdef ATTACHED_MIC sbox_write(SBOX_SCRATCH0, pages); sbox_write(SBOX_SCRATCH1, 1); #endif diff --git a/kernel/syscall.c b/kernel/syscall.c index 9688f871..b3279588 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1201,10 +1201,10 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, error = add_process_memory_range(thread->vm, addr, addr+len, phys, vrflags, memobj, off, pgshift); if (error) { - ekprintf("do_mmap:add_process_memory_range" - "(%p,%lx,%lx,%lx,%lx,%d) failed %d\n", - thread->vm, addr, addr+len, - virt_to_phys(p), vrflags, pgshift, error); + kprintf("%s: add_process_memory_range failed for 0x%lx:%lu" + " flags: %lx, vrflags: %lx, pgshift: %d, error: %d\n", + __FUNCTION__, addr, addr+len, + flags, vrflags, pgshift, error); goto out; } From 3b60a95f1367b8741dbab53dcebbacea359d8399 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 18 Aug 2016 21:41:23 +0900 Subject: [PATCH 33/42] kmalloc()/kfree() re-implementation --- kernel/host.c | 10 - kernel/include/cls.h | 15 +- kernel/include/kmalloc.h | 2 - kernel/mem.c | 527 +++++++++++++-------------------------- 4 files changed, 188 insertions(+), 366 deletions(-) diff --git a/kernel/host.c b/kernel/host.c index 5cec52e2..271bff3d 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -528,9 +528,6 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c, extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont); extern void process_procfs_request(unsigned long rarg); -extern int memcheckall(); -extern int freecheck(int runcount); -extern int runcount; extern void terminate_host(int pid); extern void debug_log(long); @@ -588,13 +585,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, case SCD_MSG_PREPARE_PROCESS: - if (find_command_line("memdebug")) { - memcheckall(); - if (runcount) - freecheck(runcount); - runcount++; - } - if((rc = process_msg_prepare_process(packet->arg)) == 0){ pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED; pckt.err = 0; diff --git a/kernel/include/cls.h b/kernel/include/cls.h index d2521b11..d7bad237 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -19,11 +19,13 @@ * CPU Local Storage (cls) */ -struct malloc_header { - unsigned int check; +struct kmalloc_header { + unsigned int front_magic; unsigned int cpu_id; - struct malloc_header *next; - unsigned long size; + struct list_head list; + int size; /* The size of this chunk without the header */ + unsigned int end_magic; + /* 32 bytes */ }; #include @@ -38,8 +40,9 @@ extern ihk_spinlock_t cpu_status_lock; struct cpu_local_var { /* malloc */ - struct malloc_header free_list; - struct malloc_header *remote_free_list; + struct list_head free_list; + struct list_head remote_free_list; + ihk_spinlock_t remote_free_list_lock; struct thread idle; struct process idle_proc; diff --git a/kernel/include/kmalloc.h b/kernel/include/kmalloc.h index 6f523ec8..b17d1211 100644 --- a/kernel/include/kmalloc.h +++ b/kernel/include/kmalloc.h @@ -32,8 +32,6 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line); void _kfree(void *ptr, char *file, int line); void *__kmalloc(int size, enum ihk_mc_ap_flag flag); void __kfree(void *ptr); -void *___kmalloc(int size, enum ihk_mc_ap_flag flag); -void ___kfree(void *ptr); int _memcheck(void *ptr, char *msg, char *file, int line, int free); int memcheckall(); diff --git a/kernel/mem.c b/kernel/mem.c index c267f33c..65e74fbe 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -156,17 +156,10 @@ void sbox_write(int offset, unsigned int value); static void query_free_mem_interrupt_handler(void *priv) { - extern int runcount; int pages = ihk_pagealloc_query_free(pa_allocator); kprintf("McKernel free pages: %d\n", pages); - if (find_command_line("memdebug")) { - memcheckall(); - freecheck(runcount); - runcount++; - } - #ifdef ATTACHED_MIC sbox_write(SBOX_SCRATCH0, pages); sbox_write(SBOX_SCRATCH1, 1); @@ -519,6 +512,9 @@ static void page_init(void) static char *memdebug = NULL; +static void *___kmalloc(int size, enum ihk_mc_ap_flag flag); +static void ___kfree(void *ptr); + void register_kmalloc(void) { if(memdebug){ @@ -648,60 +644,25 @@ void mem_init(void) } } -struct location { - struct location *next; - int line; - int cnt; - char file[0]; -}; -struct alloc { - struct alloc *next; - struct malloc_header *p; - struct location *loc; - int size; - int runcount; -}; - -#define HASHNUM 129 - -static struct alloc *allochash[HASHNUM]; -static struct location *lochash[HASHNUM]; -static ihk_spinlock_t alloclock; -int runcount; -static unsigned char *page = NULL; -static int space = 0; - -static void *dalloc(unsigned long size) +void kmalloc_init(void) { - void *r; - static int pos = 0; - unsigned long irqstate; + struct cpu_local_var *v = get_this_cpu_local_var(); - irqstate = ihk_mc_spinlock_lock(&alloclock); - size = (size + 7) & 0xfffffffffffffff8L; - if (pos + size > space) { - page = allocate_pages(1, IHK_MC_AP_NOWAIT); - space = 4096; - pos = 0; - } - r = page + pos; - pos += size; - ihk_mc_spinlock_unlock(&alloclock, irqstate); + register_kmalloc(); - return r; + INIT_LIST_HEAD(&v->free_list); + INIT_LIST_HEAD(&v->remote_free_list); + ihk_mc_spinlock_init(&v->remote_free_list_lock); + + v->kmalloc_initialized = 1; } + +/* Top level routines called from macro */ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line) { - char *r = ___kmalloc(size, flag); - struct malloc_header *h; - unsigned long hash; - char *t; - struct location *lp; - struct alloc *ap; - unsigned long alcsize; - unsigned long chksize; + void *r = ___kmalloc(size, flag); if (!memdebug) return r; @@ -709,177 +670,22 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line) if (!r) return r; - h = ((struct malloc_header *)r) - 1; - alcsize = h->size * sizeof(struct malloc_header); - chksize = alcsize - size; - memset(r + size, '\x5a', chksize); + /* TODO: kmalloc() debug code */ - for (hash = 0, t = file; *t; t++) { - hash <<= 1; - hash += *t; - } - hash += line; - hash %= HASHNUM; - for (lp = lochash[hash]; lp; lp = lp->next) - if (lp->line == line && - !strcmp(lp->file, file)) - break; - if (!lp) { - lp = dalloc(sizeof(struct location) + strlen(file) + 1); - memset(lp, '\0', sizeof(struct location)); - lp->line = line; - strcpy(lp->file, file); - do { - lp->next = lochash[hash]; - } while (!compare_and_swap(lochash + hash, (unsigned long)lp->next, (unsigned long)lp)); - } - - hash = (unsigned long)h % HASHNUM; - do { - for (ap = allochash[hash]; ap; ap = ap->next) - if (!ap->p) - break; - } while (ap && !compare_and_swap(&ap->p, 0UL, (unsigned long)h)); - if (!ap) { - ap = dalloc(sizeof(struct alloc)); - memset(ap, '\0', sizeof(struct alloc)); - ap->p = h; - do { - ap->next = allochash[hash]; - } while (!compare_and_swap(allochash + hash, (unsigned long)ap->next, (unsigned long)ap)); - } - - ap->loc = lp; - ap->size = size; - ap->runcount = runcount; - - return r; -} - -int _memcheck(void *ptr, char *msg, char *file, int line, int flags) -{ - struct malloc_header *h = ((struct malloc_header *)ptr) - 1; - struct malloc_header *next; - unsigned long hash = (unsigned long)h % HASHNUM; - struct alloc *ap; - static unsigned long check = 0x5a5a5a5a5a5a5a5aUL; - unsigned long alcsize; - unsigned long chksize; - - - if (h->check != 0x5a5a5a5a) { - int i; - unsigned long max = 0; - unsigned long cur = (unsigned long)h; - struct alloc *maxap = NULL; - - for (i = 0; i < HASHNUM; i++) - for (ap = allochash[i]; ap; ap = ap->next) - if ((unsigned long)ap->p < cur && - (unsigned long)ap->p > max) { - max = (unsigned long)ap->p; - maxap = ap; - } - - kprintf("%s: detect buffer overrun, alc=%s:%d size=%ld h=%p, s=%ld\n", msg, maxap->loc->file, maxap->loc->line, maxap->size, maxap->p, maxap->p->size); - kprintf("broken header: h=%p next=%p size=%ld cpu_id=%d\n", h, h->next, h->size, h->cpu_id); - } - - for (ap = allochash[hash]; ap; ap = ap->next) - if (ap->p == h) - break; - if (!ap) { - if(file) - kprintf("%s: address not found, %s:%d p=%p\n", msg, file, line, ptr); - else - kprintf("%s: address not found p=%p\n", msg, ptr); - return 1; - } - - alcsize = h->size * sizeof(struct malloc_header); - chksize = alcsize - ap->size; - if (chksize > 8) - chksize = 8; - next = (struct malloc_header *)((char *)ptr + alcsize); - - if (next->check != 0x5a5a5a5a || - memcmp((char *)ptr + ap->size, &check, chksize)) { - unsigned long buf = 0x5a5a5a5a5a5a5a5aUL; - unsigned char *p; - unsigned char *q; - memcpy(&buf, (char *)ptr + ap->size, chksize); - p = (unsigned char *)&(next->check); - q = (unsigned char *)&buf; - - if (file) - kprintf("%s: broken, %s:%d alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, file, line, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size); - else - kprintf("%s: broken, alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size); - - - if (next->check != 0x5a5a5a5a) - kprintf("next->HEADER: next=%p size=%ld cpu_id=%d\n", next->next, next->size, next->cpu_id); - - return 1; - } - - if(flags & 1){ - ap->p = NULL; - ap->loc = NULL; - ap->size = 0; - } - return 0; -} - -int memcheckall() -{ - int i; - struct alloc *ap; - int r = 0; - - for(i = 0; i < HASHNUM; i++) - for(ap = allochash[i]; ap; ap = ap->next) - if(ap->p) - r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2); - return r; -} - -int freecheck(int runcount) -{ - int i; - struct alloc *ap; - struct location *lp; - int r = 0; - - for (i = 0; i < HASHNUM; i++) - for (lp = lochash[i]; lp; lp = lp->next) - lp->cnt = 0; - - for (i = 0; i < HASHNUM; i++) - for (ap = allochash[i]; ap; ap = ap->next) - if (ap->p && ap->runcount == runcount) { - ap->loc->cnt++; - r++; - } - - if (r) { - kprintf("memory leak?\n"); - for (i = 0; i < HASHNUM; i++) - for (lp = lochash[i]; lp; lp = lp->next) - if (lp->cnt) - kprintf(" alc=%s:%d cnt=%d\n", lp->file, lp->line, lp->cnt); - } return r; } void _kfree(void *ptr, char *file, int line) { - if (memdebug) - _memcheck(ptr, "KFREE", file, line, 1); + if (memdebug) { + /* TODO: kfree() debug code */ + } + ___kfree(ptr); } +/* Redirection routines registered in alloc structure */ void *__kmalloc(int size, enum ihk_mc_ap_flag flag) { return kmalloc(size, flag); @@ -890,163 +696,188 @@ void __kfree(void *ptr) kfree(ptr); } -void kmalloc_init(void) + +static void ___kmalloc_insert_chunk(struct list_head *free_list, + struct kmalloc_header *chunk) { - struct cpu_local_var *v = get_this_cpu_local_var(); - struct malloc_header *h = &v->free_list; - int i; + struct kmalloc_header *chunk_iter, *next_chunk = NULL; - h->check = 0x5a5a5a5a; - h->next = &v->free_list; - h->size = 0; - - register_kmalloc(); - v->kmalloc_initialized = 1; - - memdebug = find_command_line("memdebug"); - for (i = 0; i < HASHNUM; i++) { - allochash[i] = NULL; - lochash[i] = NULL; - } - if (!page) { - page = allocate_pages(16, IHK_MC_AP_NOWAIT); - space = 16 * 4096; - } - ihk_mc_spinlock_init(&alloclock); -} - -void ____kfree(struct cpu_local_var *v, struct malloc_header *p) -{ - struct malloc_header *h = &v->free_list; - int combined = 0; - - h = h->next; - - while ((p < h || p > h->next) && h != &v->free_list) { - h = h->next; - } - - if (h + h->size + 1 == p && h->size != 0) { - combined = 1; - h->size += p->size + 1; - h->check = 0x5a5a5a5a; - } - if (h->next == p + p->size + 1 && h->next->size != 0) { - if (combined) { - h->check = 0x5a5a5a5a; - h->size += h->next->size + 1; - h->next = h->next->next; - } else { - p->check = 0x5a5a5a5a; - p->size += h->next->size + 1; - p->next = h->next->next; - h->next = p; + /* Find out where to insert */ + list_for_each_entry(chunk_iter, free_list, list) { + if ((void *)chunk < (void *)chunk_iter) { + next_chunk = chunk_iter; + break; } - } else if (!combined) { - p->next = h->next; - h->next = p; } + + /* Add in front of next */ + if (next_chunk) { + list_add_tail(&chunk->list, &next_chunk->list); + } + /* Add after the head */ + else { + list_add(&chunk->list, free_list); + } + + return; } -void *___kmalloc(int size, enum ihk_mc_ap_flag flag) +static void ___kmalloc_init_chunk(struct kmalloc_header *h, int size) { - struct cpu_local_var *v = get_this_cpu_local_var(); - struct malloc_header *h = &v->free_list, *prev, *p; - int u, req_page; + h->size = size; + h->front_magic = 0x5c5c5c5c; + h->end_magic = 0x6d6d6d6d; + h->cpu_id = ihk_mc_get_processor_id(); +} - p = (struct malloc_header *)xchg8((unsigned long *)&v->remote_free_list, 0L); - while(p){ - struct malloc_header *n = p->next; - ____kfree(v, p); - p = n; +static void ___kmalloc_consolidate_list(struct list_head *list) +{ + struct kmalloc_header *chunk_iter, *chunk, *next_chunk; + +reiterate: + chunk_iter = NULL; + chunk = NULL; + + list_for_each_entry(next_chunk, list, list) { + + if (chunk_iter && (((void *)chunk_iter + sizeof(struct kmalloc_header) + + chunk_iter->size) == (void *)next_chunk)) { + chunk = chunk_iter; + break; + } + + chunk_iter = next_chunk; } - if (size >= PAGE_SIZE * 4) { + if (!chunk) { + return; + } + + chunk->size += (next_chunk->size + sizeof(struct kmalloc_header)); + list_del(&next_chunk->list); + goto reiterate; +} + + +/* Actual low-level allocation routines */ +static void *___kmalloc(int size, enum ihk_mc_ap_flag flag) +{ + struct kmalloc_header *chunk_iter, *tmp; + struct kmalloc_header *chunk = NULL; + unsigned long irqflags; + int npages; + unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save(); + + /* + * 32 bytes aligned size, as this leaves us at cache line boundary + * (including the header) even for the smallest allocations + */ + if ((size % 32) != 0) size = ((size + 31) & ~((int)32-1)); + + /* Clean up remotely deallocated chunks */ + irqflags = ihk_mc_spinlock_lock(&cpu_local_var(remote_free_list_lock)); + list_for_each_entry_safe(chunk, tmp, + &cpu_local_var(remote_free_list), list) { + + list_del(&chunk->list); + ___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk); + } + ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags); + + chunk = NULL; + /* Find a chunk that is big enough */ + list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) { + if (chunk_iter->size >= size) { + chunk = chunk_iter; + break; + } + } + +split_and_return: + /* Did we find one? */ + if (chunk) { + /* Do we need to split it? Only if there is enough space for + * another header and some actual content */ + if (chunk->size > (size + sizeof(struct kmalloc_header))) { + struct kmalloc_header *leftover; + + leftover = (struct kmalloc_header *) + ((void *)chunk + sizeof(struct kmalloc_header) + size); + ___kmalloc_init_chunk(leftover, + (chunk->size - size - sizeof(struct kmalloc_header))); + list_add(&leftover->list, &chunk->list); + chunk->size = size; + } + + list_del(&chunk->list); + ___kmalloc_consolidate_list(&cpu_local_var(free_list)); + cpu_restore_interrupt(kmalloc_irq_flags); + return ((void *)chunk + sizeof(struct kmalloc_header)); + } + + + /* Allocate new memory and add it to free list */ + npages = (size + sizeof(struct kmalloc_header) + (PAGE_SIZE - 1)) + >> PAGE_SHIFT; + chunk = ihk_mc_alloc_pages(npages, flag); + + if (!chunk) { + cpu_restore_interrupt(kmalloc_irq_flags); return NULL; } - u = (size + sizeof(*h) - 1) / sizeof(*h); + ___kmalloc_init_chunk(chunk, + (npages * PAGE_SIZE - sizeof(struct kmalloc_header))); + ___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk); - prev = h; - h = h->next; - - while (1) { - if (h == &v->free_list) { - req_page = ((u + 2) * sizeof(*h) + PAGE_SIZE - 1) - >> PAGE_SHIFT; - - h = allocate_pages(req_page, flag); - if(h == NULL) { - kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag); - return NULL; - } - h->check = 0x5a5a5a5a; - prev->next = h; - h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2; - /* Guard entry */ - p = h + h->size + 1; - p->check = 0x5a5a5a5a; - p->next = &v->free_list; - p->size = 0; - h->next = p; - } - - if (h->size >= u) { - if (h->size == u || h->size == u + 1) { - prev->next = h->next; - h->cpu_id = ihk_mc_get_processor_id(); - - return h + 1; - } else { /* Divide */ - h->size -= u + 1; - - p = h + h->size + 1; - p->check = 0x5a5a5a5a; - p->size = u; - p->cpu_id = ihk_mc_get_processor_id(); - - return p + 1; - } - } - prev = h; - h = h->next; - } + goto split_and_return; } -void ___kfree(void *ptr) +static void ___kfree(void *ptr) { - struct malloc_header *p = (struct malloc_header *)ptr; - struct cpu_local_var *v = get_cpu_local_var((--p)->cpu_id); + struct kmalloc_header *chunk = + (struct kmalloc_header*)(ptr - sizeof(struct kmalloc_header)); + unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save(); - if(p->cpu_id == ihk_mc_get_processor_id()){ - ____kfree(v, p); + /* Sanity check */ + if (chunk->front_magic != 0x5c5c5c5c || chunk->end_magic != 0x6d6d6d6d) { + kprintf("%s: memory corruption at address 0x%p\n", __FUNCTION__, ptr); + panic("panic"); } - else{ - unsigned long oldval; - unsigned long newval; - unsigned long rval; - do{ - p->next = v->remote_free_list; - oldval = (unsigned long)p->next; - newval = (unsigned long)p; - rval = atomic_cmpxchg8( - (unsigned long *)&v->remote_free_list, - oldval, newval); - }while(rval != oldval); + + /* Does this chunk belong to this CPU? */ + if (chunk->cpu_id == ihk_mc_get_processor_id()) { + + ___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk); + ___kmalloc_consolidate_list(&cpu_local_var(free_list)); } + else { + struct cpu_local_var *v = get_cpu_local_var(chunk->cpu_id); + unsigned long irqflags; + + irqflags = ihk_mc_spinlock_lock(&v->remote_free_list_lock); + list_add(&chunk->list, &v->remote_free_list); + ihk_mc_spinlock_unlock(&v->remote_free_list_lock, irqflags); + } + + cpu_restore_interrupt(kmalloc_irq_flags); } -void print_free_list(void) + +void ___kmalloc_print_free_list(struct list_head *list) { - struct cpu_local_var *v = get_this_cpu_local_var(); - struct malloc_header *h = &v->free_list; + struct kmalloc_header *chunk_iter; + unsigned long irqflags = kprintf_lock(); - h = h->next; - - kprintf("free_list : \n"); - while (h != &v->free_list) { - kprintf(" %p : %p, %d ->\n", h, h->next, h->size); - h = h->next; + __kprintf("%s: [ \n", __FUNCTION__); + list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) { + __kprintf("%s: 0x%lx:%d (VA PFN: %lu, off: %lu)\n", __FUNCTION__, + (unsigned long)chunk_iter, + chunk_iter->size, + (unsigned long)chunk_iter >> PAGE_SHIFT, + (unsigned long)chunk_iter % PAGE_SIZE); } - kprintf("\n"); + __kprintf("%s: ] \n", __FUNCTION__); + kprintf_unlock(irqflags); } + From b9439947a7ac33ee3a6a60dadbaadf35797c6e14 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 19 Aug 2016 11:52:00 +0900 Subject: [PATCH 34/42] kmalloc(): re-implementation of memory leak tracking --- kernel/include/kmalloc.h | 1 + kernel/mem.c | 277 ++++++++++++++++++++++++++++++++++++--- kernel/process.c | 2 + 3 files changed, 264 insertions(+), 16 deletions(-) diff --git a/kernel/include/kmalloc.h b/kernel/include/kmalloc.h index b17d1211..c91cd0cc 100644 --- a/kernel/include/kmalloc.h +++ b/kernel/include/kmalloc.h @@ -36,5 +36,6 @@ void __kfree(void *ptr); int _memcheck(void *ptr, char *msg, char *file, int line, int free); int memcheckall(); int freecheck(int runcount); +void kmalloc_consolidate_free_list(void); #endif diff --git a/kernel/mem.c b/kernel/mem.c index 65e74fbe..4955fe38 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -160,6 +160,12 @@ static void query_free_mem_interrupt_handler(void *priv) kprintf("McKernel free pages: %d\n", pages); + if (find_command_line("memdebug")) { + extern void kmalloc_memcheck(void); + + kmalloc_memcheck(); + } + #ifdef ATTACHED_MIC sbox_write(SBOX_SCRATCH0, pages); sbox_write(SBOX_SCRATCH1, 1); @@ -644,6 +650,36 @@ void mem_init(void) } } +#define KMALLOC_TRACK_HASH_SHIFT (8) +#define KMALLOC_TRACK_HASH_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT) +#define KMALLOC_TRACK_HASH_MASK (KMALLOC_TRACK_HASH_SIZE - 1) + +struct list_head kmalloc_track_hash[KMALLOC_TRACK_HASH_SIZE]; +ihk_spinlock_t kmalloc_track_hash_locks[KMALLOC_TRACK_HASH_SIZE]; + +struct list_head kmalloc_addr_hash[KMALLOC_TRACK_HASH_SIZE]; +ihk_spinlock_t kmalloc_addr_hash_locks[KMALLOC_TRACK_HASH_SIZE]; + +int kmalloc_track_initialized = 0; +int kmalloc_runcount = 0; + +struct kmalloc_track_addr_entry { + void *addr; + int runcount; + struct list_head list; /* track_entry's list */ + struct kmalloc_track_entry *entry; + struct list_head hash; /* address hash */ +}; + +struct kmalloc_track_entry { + char *file; + int line; + int size; + ihk_atomic_t alloc_count; + struct list_head hash; + struct list_head addr_list; + ihk_spinlock_t addr_list_lock; +}; void kmalloc_init(void) { @@ -656,12 +692,57 @@ void kmalloc_init(void) ihk_mc_spinlock_init(&v->remote_free_list_lock); v->kmalloc_initialized = 1; + + if (!kmalloc_track_initialized) { + int i; + + memdebug = find_command_line("memdebug"); + + kmalloc_track_initialized = 1; + for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) { + ihk_mc_spinlock_init(&kmalloc_track_hash_locks[i]); + INIT_LIST_HEAD(&kmalloc_track_hash[i]); + ihk_mc_spinlock_init(&kmalloc_addr_hash_locks[i]); + INIT_LIST_HEAD(&kmalloc_addr_hash[i]); + } + } } +/* NOTE: Hash lock must be held */ +struct kmalloc_track_entry *__kmalloc_track_find_entry( + int size, char *file, int line) +{ + struct kmalloc_track_entry *entry_iter, *entry = NULL; + int hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK; + + list_for_each_entry(entry_iter, &kmalloc_track_hash[hash], hash) { + if (!strcmp(entry_iter->file, file) && + entry_iter->size == size && + entry_iter->line == line) { + entry = entry_iter; + break; + } + } + + if (entry) { + dkprintf("%s found entry %s:%d size: %d\n", __FUNCTION__, + file, line, size); + } + else { + dkprintf("%s couldn't find entry %s:%d size: %d\n", __FUNCTION__, + file, line, size); + } + + return entry; +} /* Top level routines called from macro */ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line) { + unsigned long irqflags; + struct kmalloc_track_entry *entry; + struct kmalloc_track_addr_entry *addr_entry; + int hash, addr_hash; void *r = ___kmalloc(size, flag); if (!memdebug) @@ -670,21 +751,176 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line) if (!r) return r; - /* TODO: kmalloc() debug code */ + hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK; + irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]); + entry = __kmalloc_track_find_entry(size, file, line); + if (!entry) { + entry = ___kmalloc(sizeof(*entry), IHK_MC_AP_NOWAIT); + if (!entry) { + kprintf("%s: ERROR: allocating tracking entry\n"); + goto out; + } + + entry->line = line; + entry->size = size; + ihk_atomic_set(&entry->alloc_count, 0); + ihk_mc_spinlock_init(&entry->addr_list_lock); + INIT_LIST_HEAD(&entry->addr_list); + + entry->file = ___kmalloc(strlen(file) + 1, IHK_MC_AP_NOWAIT); + if (!entry->file) { + kprintf("%s: ERROR: allocating file string\n"); + ___kfree(entry); + ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags); + goto out; + } + + strcpy(entry->file, file); + entry->file[strlen(file)] = 0; + list_add(&entry->hash, &kmalloc_track_hash[hash]); + dkprintf("%s entry %s:%d size: %d added\n", __FUNCTION__, + file, line, size); + } + ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags); + + ihk_atomic_inc(&entry->alloc_count); + + /* Add new addr entry for this allocation entry */ + addr_entry = ___kmalloc(sizeof(*addr_entry), IHK_MC_AP_NOWAIT); + if (!addr_entry) { + kprintf("%s: ERROR: allocating addr entry\n"); + goto out; + } + + addr_entry->addr = r; + addr_entry->runcount = kmalloc_runcount; + addr_entry->entry = entry; + + irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock); + list_add(&addr_entry->list, &entry->addr_list); + ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags); + + /* Add addr entry to address hash */ + addr_hash = ((unsigned long)r >> 5) & KMALLOC_TRACK_HASH_MASK; + irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[addr_hash]); + list_add(&addr_entry->hash, &kmalloc_addr_hash[addr_hash]); + ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[addr_hash], irqflags); + + dkprintf("%s addr_entry %p added\n", __FUNCTION__, r); + +out: return r; } void _kfree(void *ptr, char *file, int line) { - if (memdebug) { - /* TODO: kfree() debug code */ + unsigned long irqflags; + struct kmalloc_track_entry *entry; + struct kmalloc_track_addr_entry *addr_entry_iter, *addr_entry = NULL; + int hash; + + if (!memdebug) { + goto out; } + hash = ((unsigned long)ptr >> 5) & KMALLOC_TRACK_HASH_MASK; + irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[hash]); + list_for_each_entry(addr_entry_iter, + &kmalloc_addr_hash[hash], hash) { + if (addr_entry_iter->addr == ptr) { + addr_entry = addr_entry_iter; + break; + } + } + + if (addr_entry) { + list_del(&addr_entry->hash); + } + ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[hash], irqflags); + + if (!addr_entry) { + kprintf("%s: ERROR: kfree()ing invalid pointer\n", __FUNCTION__); + panic("panic"); + } + + entry = addr_entry->entry; + + irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock); + list_del(&addr_entry->list); + ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags); + + dkprintf("%s addr_entry %p removed\n", __FUNCTION__, addr_entry->addr); + ___kfree(addr_entry); + + /* Do we need to remove tracking entry as well? */ + if (!ihk_atomic_dec_and_test(&entry->alloc_count)) { + goto out; + } + + hash = (strlen(entry->file) + entry->line + entry->size) & + KMALLOC_TRACK_HASH_MASK; + irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]); + list_del(&entry->hash); + ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags); + + dkprintf("%s entry %s:%d size: %d removed\n", __FUNCTION__, + entry->file, entry->line, entry->size); + ___kfree(entry->file); + ___kfree(entry); + +out: ___kfree(ptr); } +void kmalloc_memcheck(void) +{ + int i; + unsigned long irqflags; + struct kmalloc_track_entry *entry = NULL; + + for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) { + irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[i]); + list_for_each_entry(entry, &kmalloc_track_hash[i], hash) { + struct kmalloc_track_addr_entry *addr_entry = NULL; + int cnt = 0; + + ihk_mc_spinlock_lock_noirq(&entry->addr_list_lock); + list_for_each_entry(addr_entry, &entry->addr_list, list) { + + dkprintf("%s memory leak: %p @ %s:%d size: %d runcount: %d\n", + __FUNCTION__, + addr_entry->addr, + entry->file, + entry->line, + entry->size, + addr_entry->runcount); + + if (kmalloc_runcount != addr_entry->runcount) + continue; + + cnt++; + } + ihk_mc_spinlock_unlock_noirq(&entry->addr_list_lock); + + if (!cnt) + continue; + + kprintf("%s memory leak: %s:%d size: %d cnt: %d, runcount: %d\n", + __FUNCTION__, + entry->file, + entry->line, + entry->size, + cnt, + kmalloc_runcount); + } + ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[i], irqflags); + } + + ++kmalloc_runcount; +} + /* Redirection routines registered in alloc structure */ void *__kmalloc(int size, enum ihk_mc_ap_flag flag) { @@ -759,12 +995,32 @@ reiterate: } +void kmalloc_consolidate_free_list(void) +{ + struct kmalloc_header *chunk, *tmp; + unsigned long irqflags = + ihk_mc_spinlock_lock(&cpu_local_var(remote_free_list_lock)); + + /* Clean up remotely deallocated chunks */ + list_for_each_entry_safe(chunk, tmp, + &cpu_local_var(remote_free_list), list) { + + list_del(&chunk->list); + ___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk); + } + + /* Free list lock ensures IRQs are disabled */ + ___kmalloc_consolidate_list(&cpu_local_var(free_list)); + + ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags); +} + + /* Actual low-level allocation routines */ static void *___kmalloc(int size, enum ihk_mc_ap_flag flag) { - struct kmalloc_header *chunk_iter, *tmp; + struct kmalloc_header *chunk_iter; struct kmalloc_header *chunk = NULL; - unsigned long irqflags; int npages; unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save(); @@ -774,16 +1030,6 @@ static void *___kmalloc(int size, enum ihk_mc_ap_flag flag) */ if ((size % 32) != 0) size = ((size + 31) & ~((int)32-1)); - /* Clean up remotely deallocated chunks */ - irqflags = ihk_mc_spinlock_lock(&cpu_local_var(remote_free_list_lock)); - list_for_each_entry_safe(chunk, tmp, - &cpu_local_var(remote_free_list), list) { - - list_del(&chunk->list); - ___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk); - } - ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags); - chunk = NULL; /* Find a chunk that is big enough */ list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) { @@ -810,7 +1056,6 @@ split_and_return: } list_del(&chunk->list); - ___kmalloc_consolidate_list(&cpu_local_var(free_list)); cpu_restore_interrupt(kmalloc_irq_flags); return ((void *)chunk + sizeof(struct kmalloc_header)); } diff --git a/kernel/process.c b/kernel/process.c index 12e43eb8..c1554ec2 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2342,6 +2342,8 @@ static void idle(void) } if (v->status == CPU_STATUS_IDLE || v->status == CPU_STATUS_RESERVED) { + /* No work to do? Consolidate the kmalloc free list */ + kmalloc_consolidate_free_list(); cpu_safe_halt(); } else { From a7ee3f531b71d99e177b725aecda379a9a49ac09 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 19 Aug 2016 11:52:44 +0900 Subject: [PATCH 35/42] sched_setaffinity(): error handling for invalid input --- arch/x86/kernel/memory.c | 3 +++ kernel/syscall.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 782df6c2..08c23cee 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -2278,6 +2278,9 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t reason = PF_USER; /* page not present */ for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) { + if (!addr) + return -EINVAL; + error = page_fault_process_vm(vm, (void *)addr, reason); if (error) { kprintf("%s: error: PF for %p failed\n", __FUNCTION__, addr); diff --git a/kernel/syscall.c b/kernel/syscall.c index b3279588..1395d434 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -5674,6 +5674,10 @@ SYSCALL_DECLARE(sched_setaffinity) int empty_set = 1; extern int num_processors; + if (!u_cpu_set) { + return -EINVAL; + } + if (sizeof(k_cpu_set) > len) { memset(&k_cpu_set, 0, sizeof(k_cpu_set)); } @@ -5681,7 +5685,7 @@ SYSCALL_DECLARE(sched_setaffinity) len = MIN2(len, sizeof(k_cpu_set)); if (copy_from_user(&k_cpu_set, u_cpu_set, len)) { - kprintf("%s: error: copy_from_user failed for %p:%d\n", __FUNCTION__, u_cpu_set, len); + dkprintf("%s: error: copy_from_user failed for %p:%d\n", __FUNCTION__, u_cpu_set, len); return -EFAULT; } From d550bced7895ecf2820f8018996ab6056e3fa791 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 19 Aug 2016 12:51:28 +0900 Subject: [PATCH 36/42] kmalloc(): use macros to define size alignment --- kernel/mem.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/mem.c b/kernel/mem.c index 4955fe38..1041d4cc 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -1015,6 +1015,9 @@ void kmalloc_consolidate_free_list(void) ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags); } +#define KMALLOC_MIN_SHIFT (5) +#define KMALLOC_MIN_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT) +#define KMALLOC_MIN_MASK (KMALLOC_MIN_SIZE - 1) /* Actual low-level allocation routines */ static void *___kmalloc(int size, enum ihk_mc_ap_flag flag) @@ -1024,11 +1027,10 @@ static void *___kmalloc(int size, enum ihk_mc_ap_flag flag) int npages; unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save(); - /* - * 32 bytes aligned size, as this leaves us at cache line boundary - * (including the header) even for the smallest allocations - */ - if ((size % 32) != 0) size = ((size + 31) & ~((int)32-1)); + /* KMALLOC_MIN_SIZE bytes aligned size. */ + if (size & KMALLOC_MIN_MASK) { + size = ((size + KMALLOC_MIN_SIZE - 1) & ~(KMALLOC_MIN_MASK)); + } chunk = NULL; /* Find a chunk that is big enough */ From bfbc94dfb048f56e6bec7f207d176be41662b5ff Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 2 Sep 2016 15:04:35 +0900 Subject: [PATCH 37/42] mcctrl+mcexec: fix per-proc data allocation for fork() --- executer/kernel/mcctrl/control.c | 80 ++++++++++++++++++++------------ executer/kernel/mcctrl/syscall.c | 32 +++---------- executer/user/mcexec.c | 3 +- kernel/syscall.c | 2 +- 4 files changed, 61 insertions(+), 56 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index a95dd1d6..ca8fca9e 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -83,7 +83,6 @@ static long mcexec_prepare_image(ihk_os_t os, long ret = 0; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd = NULL; - int i; if (copy_from_user(&desc, udesc, sizeof(struct program_load_desc))) { @@ -148,30 +147,15 @@ static long mcexec_prepare_image(ihk_os_t os, goto free_out; } - ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { - printk("ERROR: allocating per process data\n"); - ret = -ENOMEM; - goto free_out; - } - - ppd->pid = pdesc->pid; - ppd->rpgtable = pdesc->rpgtable; - INIT_LIST_HEAD(&ppd->wq_list); - INIT_LIST_HEAD(&ppd->wq_req_list); - INIT_LIST_HEAD(&ppd->wq_list_exact); - spin_lock_init(&ppd->wq_list_lock); - - for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { - INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); - rwlock_init(&ppd->per_thread_data_hash_lock[i]); - } - - if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) { - printk("%s: error adding per process data\n", __FUNCTION__); + printk("ERROR: no per process data for PID %d\n", task_tgid_vnr(current)); ret = -EINVAL; goto free_out; } + + /* Update rpgtable */ + ppd->rpgtable = pdesc->rpgtable; if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * desc.num_sections)) { @@ -185,10 +169,6 @@ static long mcexec_prepare_image(ihk_os_t os, ret = 0; free_out: - /* Only free ppd if error */ - if (ret != 0 && ppd) { - kfree(ppd); - } kfree(args); kfree(pdesc); kfree(envs); @@ -924,14 +904,53 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) int retval; int os_ind = ihk_host_os_get_index(os); char *pathbuf, *fullpath; + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct mcctrl_per_proc_data *ppd = NULL; + int i; if (os_ind < 0) { return EINVAL; } + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); + + if (!ppd) { + ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); + if (!ppd) { + printk("ERROR: allocating per process data\n"); + return -ENOMEM; + } + + ppd->pid = task_tgid_vnr(current); + /* + * XXX: rpgtable will be updated in __do_in_kernel_syscall() + * under case __NR_munmap + */ + INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_req_list); + INIT_LIST_HEAD(&ppd->wq_list_exact); + spin_lock_init(&ppd->wq_list_lock); + + for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { + INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); + rwlock_init(&ppd->per_thread_data_hash_lock[i]); + } + + if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) { + printk("%s: error adding per process data\n", __FUNCTION__); + retval = EINVAL; + goto out_free_ppd; + } + } + else { + /* Only deallocate in case of an error if we added it above */ + ppd = NULL; + } + pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { - return ENOMEM; + retval = ENOMEM; + goto out_error_drop_ppd; } file = open_exec(filename); @@ -963,7 +982,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) break; } } - + /* Add new exec file to the list */ mcef->os = os; mcef->pid = task_tgid_vnr(current); @@ -980,12 +999,15 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) kfree(pathbuf); return 0; - + out_put_file: fput(file); - out_error_free: kfree(pathbuf); +out_error_drop_ppd: + if (ppd) mcctrl_delete_per_proc_data(usrdata, ppd->pid); +out_free_ppd: + if (ppd) kfree(ppd); return -retval; } diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 8df0771c..8bd45a71 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -1566,36 +1566,18 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) /* Set new remote page table if not zero */ if (sc->args[2]) { struct mcctrl_per_proc_data *ppd = NULL; - int i; - ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); - if (!ppd) { - printk("ERROR: allocating per process data\n"); - error = -ENOMEM; - goto out; + ppd = mcctrl_get_per_proc_data(usrdata, sc->args[3]); + if (unlikely(!ppd)) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -1; } - ppd->pid = task_tgid_vnr(current); ppd->rpgtable = sc->args[2]; - INIT_LIST_HEAD(&ppd->wq_list); - INIT_LIST_HEAD(&ppd->wq_req_list); - INIT_LIST_HEAD(&ppd->wq_list_exact); - spin_lock_init(&ppd->wq_list_lock); - for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { - INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); - rwlock_init(&ppd->per_thread_data_hash_lock[i]); - } - - if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) { - printk("%s: error adding per process data\n", __FUNCTION__); - error = -EBUSY; - kfree(ppd); - goto out; - } - - dprintk("pid: %d, rpgtable: 0x%lx added\n", - ppd->pid, ppd->rpgtable); + dprintk("%s: pid: %d, rpgtable: 0x%lx updated\n", + __FUNCTION__, ppd->pid, ppd->rpgtable); } ret = clear_pte_range(sc->args[0], sc->args[1]); diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 9bd388e4..0e23a95f 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -2088,7 +2088,6 @@ gettid_out: /* Reinit signals and syscall threads */ init_sigaction(); - init_worker_threads(fd); __dprintf("pid(%d): signals and syscall threads OK\n", getpid()); @@ -2102,6 +2101,8 @@ gettid_out: goto fork_child_sync_pipe; } + init_worker_threads(fd); + fork_child_sync_pipe: sem_post(&fs->sem); if (fs->status) diff --git a/kernel/syscall.c b/kernel/syscall.c index 1395d434..9e9cd2f3 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1981,7 +1981,7 @@ retry_tid: } /* In a single threaded process TID equals to PID */ - settid(new, 0, cpuid, -1, 0, NULL); + new->tid = newproc->pid; new->vm->address_space->pids[0] = new->proc->pid; dkprintf("fork(): new pid: %d\n", new->proc->pid); From 84665ff699faec15c010218cc26b6a9327ec00ae Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 4 Sep 2016 10:59:50 +0900 Subject: [PATCH 38/42] do_page_fault_process_vm(): fix error msg format that could cause another PF --- kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/process.c b/kernel/process.c index c1554ec2..9c9dfe6f 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1649,7 +1649,7 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui "access denied. %d\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason, error); - kprintf("%s: reason: %s%s%s%s%s%s%s%s\n", __FUNCTION__, + kprintf("%s: reason: %s%s%s%s%s%s%s\n", __FUNCTION__, (reason & PF_PROT) ? "PF_PROT " : "", (reason & PF_WRITE) ? "PF_WRITE " : "", (reason & PF_USER) ? "PF_USER " : "", From 673deadf373a9318bb013a999670b65c08dfd4bf Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Mon, 12 Sep 2016 15:40:06 +0900 Subject: [PATCH 39/42] fix syscall return type --- executer/kernel/mcctrl/mcctrl.h | 2 +- executer/kernel/mcctrl/syscall.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 6983d440..9273597f 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -320,7 +320,7 @@ inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( struct mcctrl_per_proc_data *ppd, struct task_struct *task); void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, - int ret, int stid); + long ret, int stid); #define PROCFS_NAME_MAX 1000 diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 8bd45a71..86fb4a1f 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -1337,7 +1337,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) } void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, - int ret, int stid) + long ret, int stid) { unsigned long phys; struct syscall_response *res; From 419f5e495b28f1babe75770e01b6e8e617b70adf Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Mon, 12 Sep 2016 15:40:33 +0900 Subject: [PATCH 40/42] set*[ug]id: propagate credentials to thread pool --- executer/user/mcexec.c | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 0e23a95f..32a1b8a5 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -2360,6 +2360,53 @@ return_execve2: do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; + case __NR_setresuid: + ret = setresuid(w.sr.args[0], w.sr.args[1], w.sr.args[2]); + if(ret == -1) + ret = -errno; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + + case __NR_setreuid: + ret = setreuid(w.sr.args[0], w.sr.args[1]); + if(ret == -1) + ret = -errno; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + + case __NR_setuid: + ret = setuid(w.sr.args[0]); + if(ret == -1) + ret = -errno; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + + case __NR_setresgid: + ret = setresgid(w.sr.args[0], w.sr.args[1], w.sr.args[2]); + if(ret == -1) + ret = -errno; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + + case __NR_setregid: + ret = setregid(w.sr.args[0], w.sr.args[1]); + if(ret == -1) + ret = -errno; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + + case __NR_setgid: + ret = setgid(w.sr.args[0]); + if(ret == -1) + ret = -errno; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + + case __NR_setfsgid: + ret = setfsgid(w.sr.args[0]); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + case __NR_close: if(w.sr.args[0] == fd) ret = -EBADF; From 9390fe5d2c799960e8699ee0de84e710adcd1ca1 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Mon, 12 Sep 2016 15:43:29 +0900 Subject: [PATCH 41/42] signal: send signal to thread using thread-id. not cpu-id --- arch/x86/kernel/syscall.c | 6 +++--- executer/user/mcexec.c | 20 +++++++++++--------- kernel/syscall.c | 7 ++++--- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index d6819a18..d1a00371 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -291,7 +291,7 @@ SYSCALL_DECLARE(rt_sigreturn) extern struct cpu_local_var *clv; extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); -extern void interrupt_syscall(int all, int pid); +extern void interrupt_syscall(int pid, int tid); extern int num_processors; #define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \ @@ -1287,7 +1287,7 @@ done: cpu_restore_interrupt(irqstate); if (doint && !(mask & tthread->sigmask.__val[0])) { - int cpuid = tthread->cpu_id; + int tid = tthread->tid; int pid = tproc->pid; int status = tthread->status; @@ -1298,7 +1298,7 @@ done: } if(!tthread->proc->nohost) - interrupt_syscall(pid, cpuid); + interrupt_syscall(pid, tid); if (status != PS_RUNNING) { if(sig == SIGKILL){ diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 32a1b8a5..52bdc84b 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -882,7 +882,7 @@ static void *main_loop_thread_func(void *arg) struct thread_data_s *td = (struct thread_data_s *)arg; td->tid = gettid(); - td->remote_tid = (int)td->tid; + td->remote_tid = -1; pthread_barrier_wait(&init_ready); td->ret = main_loop(td->fd, td->cpu, td->lock); @@ -1680,16 +1680,14 @@ do_generic_syscall( } static void -kill_thread(unsigned long cpu) +kill_thread(unsigned long tid) { - if(cpu >= 0 && cpu < n_threads){ - pthread_kill(thread_data[cpu].thread_id, LOCALSIG); - } - else{ - int i; + int i; - for (i = 0; i < n_threads; ++i) { + for (i = 0; i < n_threads; ++i) { + if(thread_data[i].remote_tid == tid){ pthread_kill(thread_data[i].thread_id, LOCALSIG); + break; } } } @@ -1848,6 +1846,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) //pthread_mutex_lock(lock); + thread_data[cpu].remote_tid = w.sr.rtid; + switch (w.sr.number) { case __NR_open: ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); @@ -2440,7 +2440,9 @@ return_execve2: break; } - + + thread_data[cpu].remote_tid = -1; + //pthread_mutex_unlock(lock); } __dprint("timed out.\n"); diff --git a/kernel/syscall.c b/kernel/syscall.c index 9e9cd2f3..8d05afa2 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -847,14 +847,15 @@ terminate_host(int pid) } void -interrupt_syscall(int pid, int cpuid) +interrupt_syscall(int pid, int tid) { - dkprintf("interrupt_syscall,target pid=%d,target cpuid=%d\n", pid, cpuid); + dkprintf("interrupt_syscall,target pid=%d,target tid=%d\n", pid, tid); ihk_mc_user_context_t ctx; long lerror; +kprintf("interrupt_syscall pid=%d tid=%d\n", pid, tid); ihk_mc_syscall_arg0(&ctx) = pid; - ihk_mc_syscall_arg1(&ctx) = cpuid; + ihk_mc_syscall_arg1(&ctx) = tid; lerror = syscall_generic_forwarding(__NR_kill, &ctx); if (lerror) { From e28725884f329fb3560a89e4464f1341fb255463 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Mon, 19 Sep 2016 17:29:41 +0900 Subject: [PATCH 42/42] fix debug print --- kernel/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/mem.c b/kernel/mem.c index 1041d4cc..c87d43ba 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -347,7 +347,7 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) } kprintf("%s fault VM failed for TID: %d, addr: 0x%lx, " - "reason: %d, error: %d\n", + "reason: %d, error: %d\n", __FUNCTION__, thread->tid, fault_addr, reason, error); unhandled_page_fault(thread, fault_addr, regs); preempt_enable();