diff --git a/executer/kernel/control.c b/executer/kernel/control.c index f3f7e19f..0c52eda3 100644 --- a/executer/kernel/control.c +++ b/executer/kernel/control.c @@ -358,7 +358,6 @@ retry_alloc: kfree(wqhln); wqhln = wqhln_iter; list_del(&wqhln->list); - printk("DEBUG: wait queue head was already available in syscall wait\n"); break; } } diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index 682e23bc..1770d7b2 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -1126,10 +1126,11 @@ static void clear_pte_range(uintptr_t start, uintptr_t len) } if (addr < end) { zap_vma_ptes(vma, addr, end-addr); + dprintk("clear_pte_range() 0x%lx - 0x%lx OK\n", + vma->vm_start, vma->vm_end); } addr = end; } - up_read(&mm->mmap_sem); return; } @@ -1138,6 +1139,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall { int error; long ret = -1; + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx)\n", os, c, sc->number, sc->args[0]); switch (sc->number) { @@ -1146,6 +1148,11 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall break; case __NR_munmap: + /* Set new remote page table if not zero */ + if (sc->args[2]) { + usrdata->rpgtable = sc->args[2]; + } + clear_pte_range(sc->args[0], sc->args[1]); ret = 0; break; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 8beddb0d..7f942174 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1093,6 +1093,55 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) break; } #endif + case __NR_fork: { + int child; + + child = fork(); + + switch (child) { + /* Error */ + case -1: + do_syscall_return(fd, cpu, -1, 0, 0, 0, 0); + break; + + /* Child process */ + case 0: { + int i; + + /* Reopen device fd */ + close(fd); + fd = open(dev, O_RDWR); + if (fd < 0) { + /* TODO: tell parent something went wrong? */ + fprintf(stderr, "ERROR: opening %s\n", dev); + return 1; + } + + /* Reinit signals and syscall threads */ + init_sigaction(); + init_worker_threads(fd); + + __dprintf("pid(%d): signals and syscall threads OK\n", + getpid()); + + /* TODO: does the forked thread run in a pthread context? */ + for (i = 0; i <= ncpu; ++i) { + pthread_join(thread_data[i].thread_id, NULL); + } + + return 0; + } + + /* Parent */ + default: + + do_syscall_return(fd, cpu, child, 0, 0, 0, 0); + break; + } + + break; + } + default: ret = do_generic_syscall(&w); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); diff --git a/kernel/include/process.h b/kernel/include/process.h index 41a25e7c..3a08853b 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -157,8 +157,8 @@ struct process_vm { struct process *create_process(unsigned long user_pc); -struct process *clone_process(struct process *org, - unsigned long pc, unsigned long sp); +struct process *clone_process(struct process *org, unsigned long pc, + unsigned long sp, int clone_flags); void destroy_process(struct process *proc); void hold_process(struct process *proc); void free_process(struct process *proc); diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 6a8c86ed..bb945e1c 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -206,9 +206,9 @@ struct syscall_params { SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \ SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5); -#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id()) +#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0) -extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu); +extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu, int pid); extern int obtain_clone_cpuid(); extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx); diff --git a/kernel/process.c b/kernel/process.c index ed853403..c128d4da 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -47,6 +47,10 @@ #define KERNEL_STACK_NR_PAGES 24 extern long do_arch_prctl(unsigned long code, unsigned long address); +static void insert_vm_range_list(struct process_vm *vm, + struct vm_range *newrange); +static int copy_user_ranges(struct process *proc, struct process *org); +enum ihk_mc_pt_attribute vrflag_to_ptattr(unsigned long flag); static int init_process_vm(struct process *owner, struct process_vm *vm) { @@ -105,6 +109,7 @@ struct process *create_process(unsigned long user_pc) proc->vm = (struct process_vm *)(proc + 1); if(init_process_vm(proc, proc->vm) != 0){ + kfree(proc->sigshared); kfree(proc->sighandler); ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES); return NULL; @@ -117,11 +122,12 @@ struct process *create_process(unsigned long user_pc) } struct process *clone_process(struct process *org, unsigned long pc, - unsigned long sp) + unsigned long sp, int clone_flags) { struct process *proc; - if((proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, IHK_MC_AP_NOWAIT)) == NULL){ + if ((proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, + IHK_MC_AP_NOWAIT)) == NULL) { return NULL; } @@ -136,24 +142,199 @@ struct process *clone_process(struct process *org, unsigned long pc, memcpy(proc->uctx, org->uctx, sizeof(*org->uctx)); ihk_mc_modify_user_context(proc->uctx, IHK_UCR_STACK_POINTER, sp); ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER, pc); - - ihk_atomic_inc(&org->vm->refcount); - proc->vm = org->vm; + proc->rlimit_stack = org->rlimit_stack; - proc->sighandler = org->sighandler; - ihk_atomic_inc(&org->sighandler->use); + /* TODO: do this check properly! + * fork() */ + if (clone_flags == 0x1200011) { + dkprintf("fork(): sighandler\n"); + proc->sighandler = kmalloc(sizeof(struct sig_handler), + IHK_MC_AP_NOWAIT); + + if (!proc->sighandler) { + goto err_free_proc; + } - proc->sigshared = org->sigshared; - ihk_atomic_inc(&org->sigshared->use); + dkprintf("fork(): sigshared\n"); + proc->sigshared = kmalloc(sizeof(struct sig_shared), IHK_MC_AP_NOWAIT); + + if (!proc->sigshared) { + goto err_free_sighandler; + } - ihk_mc_spinlock_init(&proc->sigpendinglock); - INIT_LIST_HEAD(&proc->sigpending); + memset(proc->sighandler, '\0', sizeof(struct sig_handler)); + ihk_atomic_set(&proc->sighandler->use, 1); + ihk_mc_spinlock_init(&proc->sighandler->lock); + ihk_atomic_set(&proc->sigshared->use, 1); + ihk_mc_spinlock_init(&proc->sigshared->lock); + INIT_LIST_HEAD(&proc->sigshared->sigpending); + ihk_mc_spinlock_init(&proc->sigpendinglock); + INIT_LIST_HEAD(&proc->sigpending); + + proc->vm = (struct process_vm *)(proc + 1); + + dkprintf("fork(): init_process_vm()\n"); + if (init_process_vm(proc, proc->vm) != 0) { + goto err_free_sigshared; + } + + memcpy(&proc->vm->region, &org->vm->region, sizeof(struct vm_regions)); + + dkprintf("fork(): copy_user_ranges()\n"); + /* Copy user-space mappings. + * TODO: do this with COW later? */ + if (copy_user_ranges(proc, org) != 0) { + goto err_free_sigshared; + } + + dkprintf("fork(): copy_user_ranges() OK\n"); + } + /* clone() */ + else { + ihk_atomic_inc(&org->vm->refcount); + proc->vm = org->vm; + + proc->sighandler = org->sighandler; + ihk_atomic_inc(&org->sighandler->use); + + proc->sigshared = org->sigshared; + ihk_atomic_inc(&org->sigshared->use); + + ihk_mc_spinlock_init(&proc->sigpendinglock); + INIT_LIST_HEAD(&proc->sigpending); + } ihk_mc_spinlock_init(&proc->spin_sleep_lock); proc->spin_sleep = 0; return proc; + +err_free_sigshared: + kfree(proc->sigshared); + +err_free_sighandler: + ihk_mc_free_pages(proc->sighandler, KERNEL_STACK_NR_PAGES); + +err_free_proc: + ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES); + + return NULL; +} + +static int copy_user_ranges(struct process *proc, struct process *org) +{ + struct vm_range *src_range; + struct vm_range *range; + + ihk_mc_spinlock_lock_noirq(&org->vm->memory_range_lock); + + /* Iterate original process' vm_range list and take a copy one-by-one */ + list_for_each_entry(src_range, &org->vm->vm_range_list, list) { + void *ptepgaddr; + size_t ptepgsize; + int ptep2align; + void *pg_vaddr; + size_t pgsize; + void *vaddr; + int p2align; + enum ihk_mc_pt_attribute attr; + pte_t *ptep; + + range = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT); + if (!range) { + goto err_rollback; + } + + INIT_LIST_HEAD(&range->list); + range->start = src_range->start; + range->end = src_range->end; + range->flag = src_range->flag; + range->memobj = src_range->memobj; + range->objoff = src_range->objoff; + if (range->memobj) { + memobj_ref(range->memobj); + } + + /* Copy actual mappings */ + vaddr = (void *)range->start; + while ((unsigned long)vaddr < range->end) { + /* Get source PTE */ + ptep = ihk_mc_pt_lookup_pte(org->vm->page_table, vaddr, + &ptepgaddr, &ptepgsize, &ptep2align); + + if (!ptep || pte_is_null(ptep) || !pte_is_present(ptep)) { + vaddr += PAGE_SIZE; + continue; + } + + dkprintf("copy_user_ranges(): 0x%lx PTE found\n", vaddr); + + /* Page size */ + if (arch_get_smaller_page_size(NULL, -1, &ptepgsize, + &ptep2align)) { + + kprintf("ERROR: copy_user_ranges() " + "(%p,%lx-%lx %lx,%lx):" + "get pgsize failed\n", org->vm, + range->start, range->end, + range->flag, vaddr); + + goto err_free_range_rollback; + } + + pgsize = ptepgsize; + p2align = ptep2align; + dkprintf("copy_user_ranges(): page size: %d\n", pgsize); + + /* Get physical page */ + pg_vaddr = ihk_mc_alloc_aligned_pages(1, p2align, IHK_MC_AP_NOWAIT); + + if (!pg_vaddr) { + kprintf("ERROR: copy_user_ranges() allocating new page\n"); + goto err_free_range_rollback; + } + dkprintf("copy_user_ranges(): phys page allocated\n", pgsize); + + /* Copy content */ + memcpy(pg_vaddr, vaddr, pgsize); + dkprintf("copy_user_ranges(): memcpy OK\n", pgsize); + + /* Set up new PTE */ + attr = vrflag_to_ptattr(range->flag); + if (ihk_mc_pt_set_range(proc->vm->page_table, vaddr, + vaddr + pgsize, virt_to_phys(pg_vaddr), attr)) { + kprintf("ERROR: copy_user_ranges() " + "(%p,%lx-%lx %lx,%lx):" + "set range failed.\n", + org->vm, range->start, range->end, + range->flag, vaddr); + + goto err_free_range_rollback; + } + dkprintf("copy_user_ranges(): new PTE set\n", pgsize); + + vaddr += pgsize; + } + + insert_vm_range_list(proc->vm, range); + } + + ihk_mc_spinlock_unlock_noirq(&org->vm->memory_range_lock); + + return 0; + +err_free_range_rollback: + kfree(range); + +err_rollback: + + /* TODO: implement rollback */ + + + ihk_mc_spinlock_unlock_noirq(&org->vm->memory_range_lock); + + return -1; } int update_process_page_table(struct process *process, @@ -549,10 +730,9 @@ int add_process_memory_range(struct process *process, range->start, range->end, range->end - range->start, range->flag); } else { - dkprintf("range: 0x%lX - 0x%lX => 0x%lX - 0x%lX (%ld) [%lx]\n", - range->start, range->end, range->phys, range->phys + - range->end - range->start, range->end - range->start, - range->flag); + dkprintf("range: 0x%lX - 0x%lX (%ld) [%lx]\n", + range->start, range->end, range->end - range->start, + range->flag); } if (flag & VR_REMOTE) { diff --git a/kernel/syscall.c b/kernel/syscall.c index 4c99860d..4e971356 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -101,7 +101,7 @@ int copy_to_user(struct process *, void *, const void *, size_t); static void do_mod_exit(int status); #endif -static void send_syscall(struct syscall_request *req, int cpu) +static void send_syscall(struct syscall_request *req, int cpu, int pid) { struct ikc_scd_packet packet; struct syscall_response *res; @@ -147,7 +147,7 @@ static void send_syscall(struct syscall_request *req, int cpu) #ifdef SYSCALL_BY_IKC packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.ref = cpu; - packet.pid = cpu_local_var(current)->pid; + packet.pid = pid ? pid : cpu_local_var(current)->pid; packet.arg = scp->request_rpa; dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); ihk_ikc_send(syscall_channel, &packet, 0); @@ -155,7 +155,8 @@ static void send_syscall(struct syscall_request *req, int cpu) } -long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu) +long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, + int cpu, int pid) { struct syscall_response *res; struct syscall_request req2 IHK_DMA_ALIGN; @@ -175,7 +176,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu } res = scp->response_va; - send_syscall(req, cpu); + send_syscall(req, cpu, pid); dkprintf("SC(%d)[%3d] waiting for host.. \n", ihk_mc_get_processor_id(), @@ -202,7 +203,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu req2.args[0] = PAGER_RESUME_PAGE_FAULT; req2.args[1] = error; - send_syscall(&req2, cpu); + send_syscall(&req2, cpu, pid); } } @@ -237,7 +238,7 @@ terminate(int rc, int sig, ihk_mc_user_context_t *ctx) /* XXX: send SIGKILL to all threads in this process */ flush_process_memory(proc); /* temporary hack */ - do_syscall(&request, ctx, ihk_mc_get_processor_id()); + do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0); #define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */ proc->status = PS_ZOMBIE; @@ -282,7 +283,7 @@ SYSCALL_DECLARE(exit_group) /* XXX: send SIGKILL to all threads in this process */ - do_syscall(&request, ctx, ihk_mc_get_processor_id()); + do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0); #define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */ proc->status = PS_ZOMBIE; @@ -306,6 +307,8 @@ static void clear_host_pte(uintptr_t addr, size_t len) ihk_mc_syscall_arg0(&ctx) = addr; ihk_mc_syscall_arg1(&ctx) = len; + /* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */ + ihk_mc_syscall_arg2(&ctx) = 0; lerror = syscall_generic_forwarding(__NR_munmap, &ctx); if (lerror) { @@ -961,30 +964,57 @@ SYSCALL_DECLARE(clone) ihk_mc_user_context_t ctx1; struct syscall_request request1 IHK_DMA_ALIGN; - if(clone_flags == 0x1200011){ - // fork() - return -EOPNOTSUPP; - } - - dkprintf("[%d] clone(): stack_pointr: 0x%lX\n", - ihk_mc_get_processor_id(), - (unsigned long)ihk_mc_syscall_arg1(ctx)); + dkprintf("clone(): stack_pointr passed in: 0x%lX, stack pointer of caller: 0x%lx\n", + (unsigned long)ihk_mc_syscall_arg1(ctx), + (unsigned long)ihk_mc_syscall_sp(ctx)); cpuid = obtain_clone_cpuid(); new = clone_process(cpu_local_var(current), ihk_mc_syscall_pc(ctx), - ihk_mc_syscall_arg1(ctx)); + ihk_mc_syscall_arg1(ctx) ? ihk_mc_syscall_arg1(ctx) : + ihk_mc_syscall_sp(ctx), + clone_flags); if (!new) { return -ENOMEM; } -// /* Allocate new pid */ -// new->pid = ihk_atomic_inc_return(&pid_cnt); + /* fork() a new process on the host */ + /* TODO: do this check properly! */ + if (clone_flags == 0x1200011) { + request1.number = __NR_fork; + new->pid = do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), 0); + + if (new->pid == -1) { + kprintf("ERROR: forking host process\n"); + + /* TODO: clean-up new */ + return -EFAULT; + } - new->pid = cpu_local_var(current)->pid; + dkprintf("fork(): new pid: %d\n", new->pid); + /* clear user space PTEs and set new rpgtable so that consequent + * page faults will look up the right mappings */ + request1.number = __NR_munmap; + request1.args[0] = new->vm->region.user_start; + request1.args[1] = new->vm->region.user_end - + new->vm->region.user_start; + /* 3rd parameter denotes new rpgtable of host process */ + request1.args[2] = virt_to_phys(new->vm->page_table); + + dkprintf("fork(): requesting PTE clear and rpgtable (0x%lx) update\n", + request1.args[2]); + + if (do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), new->pid)) { + kprintf("ERROR: clearing PTEs in host process\n"); + } + } + else { + new->pid = cpu_local_var(current)->pid; + } + request1.number = __NR_gettid; - new->tid = do_syscall(&request1, &ctx1, cpuid); + new->tid = do_syscall(&request1, &ctx1, cpuid, new->pid); if (clone_flags & CLONE_PARENT_SETTID) { dkprintf("clone_flags & CLONE_PARENT_SETTID: 0x%lX\n", @@ -1000,6 +1030,20 @@ SYSCALL_DECLARE(clone) new->thread.clear_child_tid = (int*)ihk_mc_syscall_arg3(ctx); } + if (clone_flags & CLONE_CHILD_SETTID) { + unsigned long phys; + dkprintf("clone_flags & CLONE_CHILD_SETTID: 0x%lX\n", + (unsigned long)ihk_mc_syscall_arg3(ctx)); + + if (ihk_mc_pt_virt_to_phys(new->vm->page_table, + (void *)ihk_mc_syscall_arg3(ctx), &phys)) { + kprintf("ERROR: looking up physical addr for child process\n"); + return -EFAULT; + } + + *((int*)phys_to_virt(phys)) = new->tid; + } + if (clone_flags & CLONE_SETTLS) { dkprintf("clone_flags & CLONE_SETTLS: 0x%lX\n", (unsigned long)ihk_mc_syscall_arg4(ctx)); @@ -1485,7 +1529,7 @@ SYSCALL_DECLARE(futex) request.args[0] = __phys; - int r = do_syscall(&request, ctx, ihk_mc_get_processor_id()); + int r = do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0); if (r < 0) { return -EFAULT;