diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index ad135b64..6f232b3a 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -115,7 +115,7 @@ arch_clear_host_user_space() /* XXX: might be unnecessary */ clear_host_pte(th->vm->region.user_start, - (th->vm->region.user_end - th->vm->region.user_start)); + (th->vm->region.user_end - th->vm->region.user_start), 0); return 0; } diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index 602dcf1c..aec4baae 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c @@ -154,7 +154,7 @@ arch_clear_host_user_space() /* XXX: might be unnecessary */ clear_host_pte(th->vm->region.user_start, - (th->vm->region.user_end - th->vm->region.user_start)); + (th->vm->region.user_end - th->vm->region.user_start), 0); return 0; } diff --git a/kernel/include/process.h b/kernel/include/process.h index 32fb8805..668ac5c9 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -750,7 +750,7 @@ struct process_vm { // 2. addition of process page table (allocate_pages, update_process_page_table) // note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc) // is protected by its own lock (see ihk/manycore/generic/page_alloc.c) - unsigned long is_memory_range_lock_taken; + int is_memory_range_lock_taken; /* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */ ihk_atomic_t refcount; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index c01d57d1..b54e5f06 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -486,7 +486,7 @@ void set_cputime(enum set_cputime_mode mode); int do_munmap(void *addr, size_t len, int holding_memory_range_lock); intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd, off_t off0); -void clear_host_pte(uintptr_t addr, size_t len); +void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock); typedef int32_t key_t; int do_shmget(key_t key, size_t size, int shmflg); struct process_vm; diff --git a/kernel/process.c b/kernel/process.c index 6b08b55e..afbfb1fe 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2168,19 +2168,11 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason); - if (!thread->vm->is_memory_range_lock_taken) { - /* For the case where is_memory_range_lock_taken is incremented after memory_range_lock is taken. */ - while (1) { - if (thread->vm->is_memory_range_lock_taken) { - goto skip; - } - if (ihk_mc_spinlock_trylock_noirq(&vm->memory_range_lock)) { - locked = 1; - break; - } - } + if (thread->vm->is_memory_range_lock_taken == -1 || + thread->vm->is_memory_range_lock_taken != ihk_mc_get_processor_id()) { + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + locked = 1; } else { -skip:; dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n", __func__, thread->proc->pid, thread->tid); } diff --git a/kernel/syscall.c b/kernel/syscall.c index 7d4964ea..514285ee 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1448,17 +1448,28 @@ SYSCALL_DECLARE(exit_group) return 0; } -void clear_host_pte(uintptr_t addr, size_t len) +void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock) { ihk_mc_user_context_t ctx; long lerror; + struct thread *thread = cpu_local_var(current); ihk_mc_syscall_arg0(&ctx) = addr; ihk_mc_syscall_arg1(&ctx) = len; /* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */ ihk_mc_syscall_arg2(&ctx) = 0; + /* #986: Let remote page fault code skip + read-locking memory_range_lock. It's safe because other writers are warded off + until the remote PF handling code calls up_write(¤t->mm->mmap_sem) and + vm_range is consistent when calling this function. */ + if (holding_memory_range_lock) { + thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id(); + } lerror = syscall_generic_forwarding(__NR_munmap, &ctx); + if (holding_memory_range_lock) { + thread->vm->is_memory_range_lock_taken = -1; + } if (lerror) { kprintf("clear_host_pte failed. %ld\n", lerror); } @@ -1477,11 +1488,11 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__); /* #986: Let remote page fault code skip - read-locking memory_range_lock. It's safe because other writers are warded off + read-locking memory_range_lock. It's safe because other writers are warded off until the remote PF handling code calls up_write(¤t->mm->mmap_sem) and vm_range is consistent when calling this function. */ if (holding_memory_range_lock) { - thread->vm->is_memory_range_lock_taken = 1; + thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id(); } lerror = syscall_generic_forwarding(__NR_mprotect, &ctx); if (lerror) { @@ -1493,7 +1504,7 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory lerror = 0; out: if (holding_memory_range_lock) { - thread->vm->is_memory_range_lock_taken = 0; + thread->vm->is_memory_range_lock_taken = -1; } return (int)lerror; } @@ -1507,7 +1518,7 @@ int do_munmap(void *addr, size_t len, int holding_memory_range_lock) error = remove_process_memory_range(cpu_local_var(current)->vm, (intptr_t)addr, (intptr_t)addr+len, &ro_freed); if (error || !ro_freed) { - clear_host_pte((uintptr_t)addr, len); + clear_host_pte((uintptr_t)addr, len, holding_memory_range_lock); } else { error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock); @@ -8055,7 +8066,7 @@ SYSCALL_DECLARE(remap_file_pages) start0, size, prot, pgoff, flags, error); goto out; } - clear_host_pte(start, size); /* XXX: workaround */ + clear_host_pte(start, size, 1 /* memory range lock */); /* XXX: workaround */ if (range->flag & VR_LOCKED) { need_populate = 1;