diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index 55a6de80..5115ce71 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -256,6 +256,18 @@ out: return error; } +/* + * By remap_pfn_range(), VM_PFN_AT_MMAP may be raised. + * VM_PFN_AT_MMAP cause the following problems. + * + * 1) vm_pgoff is changed. As a result, i_mmap tree is corrupted. + * 2) duplicate free_memtype() calls occur. + * + * These problems may be solved in linux-3.7. + * It uses vm_insert_pfn() until it is fixed. + */ +#define USE_VM_INSERT_PFN 1 + static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct mcctrl_usrdata * usrdata = vma->vm_file->private_data; @@ -268,6 +280,9 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long pgsize; unsigned long rva; unsigned long pfn; +#if USE_VM_INSERT_PFN + size_t pix; +#endif dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); @@ -303,7 +318,16 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) phys = ihk_device_map_memory(dev, rpa, pgsize); pfn = phys >> PAGE_SHIFT; - error = remap_pfn_range(vma, rva, pfn, pgsize, PAGE_SHARED); +#if USE_VM_INSERT_PFN + for (pix = 0; pix < (pgsize / PAGE_SIZE); ++pix) { + error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE), pfn+pix); + if (error) { + break; + } + } +#else + error = remap_pfn_range(vma, rva, pfn, pgsize, vma->vm_page_prot); +#endif ihk_device_unmap_memory(dev, phys, pgsize); if (error) { printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", @@ -898,32 +922,67 @@ static void __return_syscall(struct mcctrl_channel *c, int ret) c->param.response_va->status = 1; } -static void clear_pte_range(uintptr_t addr, uintptr_t len) +static int remap_user_space(uintptr_t rva, size_t len, int prot) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + struct file *file; uintptr_t start; + pgoff_t pgoff; + uintptr_t map; + + dprintk("remap_user_space(%lx,%lx,%x)\n", rva, len, prot); + down_write(&mm->mmap_sem); + vma = find_vma(mm, rva); + if (!vma || (rva < vma->vm_start)) { + printk("remap_user_space(%lx,%lx,%x):find_vma failed. %p %lx %lx\n", + rva, len, prot, vma, + (vma)? vma->vm_start: -1, + (vma)? vma->vm_end: 0); + up_write(&mm->mmap_sem); + map = -ENOMEM; + goto out; + } + + file = vma->vm_file; + start = rva; + pgoff = vma->vm_pgoff + ((rva - vma->vm_start) >> PAGE_SHIFT); + + map = do_mmap_pgoff(file, start, len, + prot, MAP_FIXED|MAP_SHARED, pgoff); + up_write(&mm->mmap_sem); +out: + dprintk("remap_user_space(%lx,%lx,%x): %lx (%ld)\n", + rva, len, prot, (long)map, (long)map); + return (IS_ERR_VALUE(map))? (int)map: 0; +} + +static void clear_pte_range(uintptr_t start, uintptr_t len) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + uintptr_t addr; uintptr_t end; down_read(&mm->mmap_sem); - vma = find_vma(mm, 0); - if (!vma) { - printk("clear_pte_range(%lx,%lx):find_vma(0) failed\n", - addr, len); - up_read(&mm->mmap_sem); - return; - } + addr = start; + while (addr < (start + len)) { + vma = find_vma(mm, addr); + if (!vma) { + break; + } - start = addr; - end = addr + len; - if (start < vma->vm_start) { - start = vma->vm_start; - } - if (vma->vm_end < end) { - end = vma->vm_end; - } - if (start < end) { - zap_vma_ptes(vma, start, end-start); + if (addr < vma->vm_start) { + addr = vma->vm_start; + } + end = start + len; + if (vma->vm_end < end) { + end = vma->vm_end; + } + if (addr < end) { + zap_vma_ptes(vma, addr, end-addr); + } + addr = end; } up_read(&mm->mmap_sem); @@ -946,6 +1005,10 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall ret = 0; break; + case __NR_mprotect: + ret = remap_user_space(sc->args[0], sc->args[1], sc->args[2]); + break; + default: error = -ENOSYS; goto out; diff --git a/kernel/include/process.h b/kernel/include/process.h index 006c67f5..7b1b8c17 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -140,8 +140,8 @@ int add_process_memory_range(struct process *process, unsigned long start, unsigned long end, unsigned long phys, unsigned long flag, struct memobj *memobj, off_t objoff); -int remove_process_memory_range( - struct process *process, unsigned long start, unsigned long end); +int remove_process_memory_range(struct process *process, unsigned long start, + unsigned long end, int *ro_freedp); int split_process_memory_range(struct process *process, struct vm_range *range, uintptr_t addr, struct vm_range **splitp); int join_process_memory_range(struct process *process, struct vm_range *surviving, diff --git a/kernel/process.c b/kernel/process.c index 236a2348..5632ec4c 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -361,13 +361,14 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } int remove_process_memory_range(struct process *process, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, int *ro_freedp) { struct process_vm * const vm = process->vm; struct vm_range *range; struct vm_range *next; int error; struct vm_range *freerange; + int ro_freed = 0; dkprintf("remove_process_memory_range(%p,%lx,%lx)\n", process, start, end); @@ -401,6 +402,10 @@ int remove_process_memory_range(struct process *process, } } + if (!(freerange->flag & VR_PROT_WRITE)) { + ro_freed = 1; + } + error = free_process_memory_range(process->vm, freerange); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" @@ -411,8 +416,11 @@ int remove_process_memory_range(struct process *process, } - dkprintf("remove_process_memory_range(%p,%lx,%lx): 0\n", - process, start, end); + if (ro_freedp) { + *ro_freedp = ro_freed; + } + dkprintf("remove_process_memory_range(%p,%lx,%lx): 0 %d\n", + process, start, end, ro_freed); return 0; } diff --git a/kernel/syscall.c b/kernel/syscall.c index 32eeb08a..d39209ab 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -274,15 +274,47 @@ static void clear_host_pte(uintptr_t addr, size_t len) return; } +static int set_host_vma(uintptr_t addr, size_t len, int prot) +{ + ihk_mc_user_context_t ctx; + long lerror; + + ihk_mc_syscall_arg0(&ctx) = addr; + ihk_mc_syscall_arg1(&ctx) = len; + ihk_mc_syscall_arg2(&ctx) = prot; + + lerror = syscall_generic_forwarding(__NR_mprotect, &ctx); + if (lerror) { + kprintf("set_host_vma(%lx,%lx,%x) failed. %ld\n", + addr, len, prot, lerror); + goto out; + } + + lerror = 0; +out: + return (int)lerror; +} + static int do_munmap(void *addr, size_t len) { int error; + int ro_freed; begin_free_pages_pending(); error = remove_process_memory_range(cpu_local_var(current), - (intptr_t)addr, (intptr_t)addr+len); + (intptr_t)addr, (intptr_t)addr+len, &ro_freed); // XXX: TLB flush flush_tlb(); + if (error || !ro_freed) { + clear_host_pte((uintptr_t)addr, len); + } + else { + error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE); + if (error) { + kprintf("sys_munmap:set_host_vma failed. %d\n", error); + /* through */ + } + } finish_free_pages_pending(); return error; } @@ -376,10 +408,10 @@ SYSCALL_DECLARE(mmap) void *p = NULL; int vrflags; intptr_t phys; - int unmapped = 0; struct memobj *memobj = NULL; int maxprot; int denied; + int ro_vma_mapped = 0; dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n", ihk_mc_get_processor_id(), @@ -419,7 +451,7 @@ SYSCALL_DECLARE(mmap) ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n", addr0, len0, prot, flags, fd, off); error = -EINVAL; - goto out; + goto out2; } /* check not supported requests */ @@ -429,7 +461,7 @@ SYSCALL_DECLARE(mmap) addr0, len0, prot, flags, fd, off, (flags & ~(supported_flags | ignored_flags))); error = -EINVAL; - goto out; + goto out2; } if ((flags & MAP_SHARED) && !(flags & MAP_ANONYMOUS)) { @@ -437,19 +469,17 @@ SYSCALL_DECLARE(mmap) addr0, len0, prot, flags, fd, off, (flags & ~(supported_flags | ignored_flags))); error = -EINVAL; - goto out; + goto out2; } ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); if (flags & MAP_FIXED) { /* clear specified address range */ - unmapped = 1; error = do_munmap((void *)addr, len); if (error) { ekprintf("sys_mmap:do_munmap(%lx,%lx) failed. %d\n", addr, len, error); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); goto out; } } @@ -459,7 +489,6 @@ SYSCALL_DECLARE(mmap) if (error) { ekprintf("sys_mmap:search_free_space(%lx,%lx) failed. %d\n", len, region->map_end, error); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); goto out; } region->map_end = addr + len; @@ -487,13 +516,22 @@ SYSCALL_DECLARE(mmap) vrflags |= VR_DEMAND_PAGING; } + if (!(prot & PROT_WRITE)) { + error = set_host_vma(addr, len, PROT_READ); + if (error) { + kprintf("sys_mmap:set_host_vma failed. %d\n", error); + goto out; + } + + ro_vma_mapped = 1; + } + phys = 0; maxprot = PROT_READ | PROT_WRITE | PROT_EXEC; if (!(flags & MAP_ANONYMOUS)) { error = fileobj_create(fd, &memobj, &maxprot); if (error) { ekprintf("sys_mmap:fileobj_create failed. %d\n", error); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); goto out; } } @@ -511,7 +549,6 @@ SYSCALL_DECLARE(mmap) if (p == NULL) { ekprintf("sys_mmap:allocate_pages(%d,%d) failed.\n", npages, p2align); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); error = -ENOMEM; goto out; } @@ -524,7 +561,6 @@ SYSCALL_DECLARE(mmap) denied = prot & ~maxprot; if (denied) { ekprintf("sys_mmap:denied %x. %x %x\n", denied, prot, maxprot); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); error = (denied == PROT_EXEC)? -EPERM: -EACCES; goto out; } @@ -536,26 +572,27 @@ SYSCALL_DECLARE(mmap) "(%p,%lx,%lx,%lx,%lx) failed %d\n", proc, addr, addr+len, virt_to_phys(p), vrflags, error); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); goto out; } - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); - error = 0; p = NULL; memobj = NULL; + ro_vma_mapped = 0; out: + if (ro_vma_mapped) { + (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); + } + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + +out2: if (p) { ihk_mc_free_pages(p, npages); } if (memobj) { memobj_release(memobj); } - if (unmapped) { - clear_host_pte(addr, len); - } dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n", ihk_mc_get_processor_id(), addr0, len0, prot, flags, fd, off, error, addr); @@ -588,7 +625,6 @@ SYSCALL_DECLARE(munmap) ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); error = do_munmap((void *)addr, len); ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); - clear_host_pte(addr, len); out: dkprintf("[%d]sys_munmap(%lx,%lx): %d\n", @@ -612,6 +648,7 @@ SYSCALL_DECLARE(mprotect) struct vm_range *changed; const unsigned long protflags = PROT_TO_VR_FLAG(prot); unsigned long denied; + int ro_changed = 0; dkprintf("[%d]sys_mprotect(%lx,%lx,%x)\n", ihk_mc_get_processor_id(), start, len0, prot); @@ -636,7 +673,6 @@ SYSCALL_DECLARE(mprotect) } ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); - begin_free_pages_pending(); #if 0 /* check contiguous map */ @@ -719,6 +755,10 @@ SYSCALL_DECLARE(mprotect) } } + if ((range->flag ^ protflags) & VR_PROT_WRITE) { + ro_changed = 1; + } + error = change_prot_process_memory_range(proc, range, protflags); if (error) { ekprintf("sys_mprotect(%lx,%lx,%x):change failed. %d\n", @@ -744,7 +784,13 @@ SYSCALL_DECLARE(mprotect) out: // XXX: TLB flush flush_tlb(); - finish_free_pages_pending(); + if (ro_changed && !error) { + error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE)); + if (error) { + kprintf("sys_mprotect:set_host_vma failed. %d\n", error); + /* through */ + } + } ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); dkprintf("[%d]sys_mprotect(%lx,%lx,%x): %d\n", ihk_mc_get_processor_id(), start, len0, prot, error);