munmap: fix deadlock with remote pagefault on vm range lock

Add similar protection to clear_host_pte than to set_host_vma (see #986)

Also make the page fault handler only skip taking lock if the munmap
happened on the same cpu id

Change-Id: I6d9e68e8f8905b20bb2ccfa72848e04fe6404ab6
This commit is contained in:
Dominique Martinet
2019-03-28 14:01:15 +09:00
committed by Balazs Gerofi
parent 621533bbd3
commit a563d780c1
6 changed files with 25 additions and 22 deletions

View File

@@ -1448,17 +1448,28 @@ SYSCALL_DECLARE(exit_group)
return 0;
}
void clear_host_pte(uintptr_t addr, size_t len)
void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock)
{
ihk_mc_user_context_t ctx;
long lerror;
struct thread *thread = cpu_local_var(current);
ihk_mc_syscall_arg0(&ctx) = addr;
ihk_mc_syscall_arg1(&ctx) = len;
/* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */
ihk_mc_syscall_arg2(&ctx) = 0;
/* #986: Let remote page fault code skip
read-locking memory_range_lock. It's safe because other writers are warded off
until the remote PF handling code calls up_write(&current->mm->mmap_sem) and
vm_range is consistent when calling this function. */
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id();
}
lerror = syscall_generic_forwarding(__NR_munmap, &ctx);
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = -1;
}
if (lerror) {
kprintf("clear_host_pte failed. %ld\n", lerror);
}
@@ -1477,11 +1488,11 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory
dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__);
/* #986: Let remote page fault code skip
read-locking memory_range_lock. It's safe because other writers are warded off
read-locking memory_range_lock. It's safe because other writers are warded off
until the remote PF handling code calls up_write(&current->mm->mmap_sem) and
vm_range is consistent when calling this function. */
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 1;
thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id();
}
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
if (lerror) {
@@ -1493,7 +1504,7 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory
lerror = 0;
out:
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 0;
thread->vm->is_memory_range_lock_taken = -1;
}
return (int)lerror;
}
@@ -1507,7 +1518,7 @@ int do_munmap(void *addr, size_t len, int holding_memory_range_lock)
error = remove_process_memory_range(cpu_local_var(current)->vm,
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
if (error || !ro_freed) {
clear_host_pte((uintptr_t)addr, len);
clear_host_pte((uintptr_t)addr, len, holding_memory_range_lock);
}
else {
error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock);
@@ -8055,7 +8066,7 @@ SYSCALL_DECLARE(remap_file_pages)
start0, size, prot, pgoff, flags, error);
goto out;
}
clear_host_pte(start, size); /* XXX: workaround */
clear_host_pte(start, size, 1 /* memory range lock */); /* XXX: workaround */
if (range->flag & VR_LOCKED) {
need_populate = 1;