Fix deadlock involving mmap_sem and memory_range_lock

Change-Id: I187246271163e708af6542c057d0a8dfde5b211e
Fujitsu: TEMP_FIX_1
Refs: #986
This commit is contained in:
Masamichi Takagi
2018-09-03 13:54:40 +09:00
parent b080e0f301
commit 567dcd3846
4 changed files with 52 additions and 19 deletions

View File

@@ -715,6 +715,8 @@ struct process_vm {
// 2. addition of process page table (allocate_pages, update_process_page_table) // 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc) // note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c) // is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
unsigned long is_memory_range_lock_taken;
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
ihk_atomic_t refcount; ihk_atomic_t refcount;
int exiting; int exiting;

View File

@@ -462,7 +462,7 @@ static inline unsigned long timespec_to_jiffy(const struct timespec *ats)
void reset_cputime(void); void reset_cputime(void);
void set_cputime(int mode); void set_cputime(int mode);
int do_munmap(void *addr, size_t len); int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd, intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd,
off_t off0); off_t off0);
void clear_host_pte(uintptr_t addr, size_t len); void clear_host_pte(uintptr_t addr, size_t len);

View File

@@ -1952,11 +1952,28 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
int error; int error;
const uintptr_t fault_addr = (uintptr_t)fault_addr0; const uintptr_t fault_addr = (uintptr_t)fault_addr0;
struct vm_range *range; struct vm_range *range;
struct thread *thread = cpu_local_var(current);
int locked = 0;
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n", dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
ihk_mc_get_processor_id(), vm, fault_addr0, reason); ihk_mc_get_processor_id(), vm, fault_addr0, reason);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); if (!thread->vm->is_memory_range_lock_taken) {
/* For the case where is_memory_range_lock_taken is incremented after memory_range_lock is taken. */
while (1) {
if (thread->vm->is_memory_range_lock_taken) {
goto skip;
}
if (ihk_mc_spinlock_trylock_noirq(&vm->memory_range_lock)) {
locked = 1;
break;
}
}
} else {
skip:;
dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n",
__func__, thread->proc->pid, thread->tid);
}
if (vm->exiting) { if (vm->exiting) {
error = -ECANCELED; error = -ECANCELED;
@@ -2065,7 +2082,9 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
error = 0; error = 0;
out: out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); if (locked) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
}
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx): %d\n", dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx): %d\n",
ihk_mc_get_processor_id(), vm, fault_addr0, ihk_mc_get_processor_id(), vm, fault_addr0,
reason, error); reason, error);

View File

@@ -1276,15 +1276,24 @@ void clear_host_pte(uintptr_t addr, size_t len)
return; return;
} }
static int set_host_vma(uintptr_t addr, size_t len, int prot) static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory_range_lock)
{ {
ihk_mc_user_context_t ctx; ihk_mc_user_context_t ctx;
long lerror; long lerror;
struct thread *thread = cpu_local_var(current);
ihk_mc_syscall_arg0(&ctx) = addr; ihk_mc_syscall_arg0(&ctx) = addr;
ihk_mc_syscall_arg1(&ctx) = len; ihk_mc_syscall_arg1(&ctx) = len;
ihk_mc_syscall_arg2(&ctx) = prot; ihk_mc_syscall_arg2(&ctx) = prot;
dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__);
/* #986: Let remote page fault code skip
read-locking memory_range_lock. It's safe because other writers are warded off
until the remote PF handling code calls up_write(&current->mm->mmap_sem) and
vm_range is consistent when calling this function. */
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 1;
}
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx); lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
if (lerror) { if (lerror) {
kprintf("set_host_vma(%lx,%lx,%x) failed. %ld\n", kprintf("set_host_vma(%lx,%lx,%x) failed. %ld\n",
@@ -1294,10 +1303,13 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot)
lerror = 0; lerror = 0;
out: out:
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 0;
}
return (int)lerror; return (int)lerror;
} }
int do_munmap(void *addr, size_t len) int do_munmap(void *addr, size_t len, int holding_memory_range_lock)
{ {
int error; int error;
int ro_freed; int ro_freed;
@@ -1309,7 +1321,7 @@ int do_munmap(void *addr, size_t len)
clear_host_pte((uintptr_t)addr, len); clear_host_pte((uintptr_t)addr, len);
} }
else { else {
error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE); error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE, holding_memory_range_lock);
if (error) { if (error) {
kprintf("sys_munmap:set_host_vma failed. %d\n", error); kprintf("sys_munmap:set_host_vma failed. %d\n", error);
/* through */ /* through */
@@ -1445,7 +1457,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
if (flags & MAP_FIXED) { if (flags & MAP_FIXED) {
/* clear specified address range */ /* clear specified address range */
error = do_munmap((void *)addr, len); error = do_munmap((void *)addr, len, 1/* holding memory_range_lock */);
if (error) { if (error) {
ekprintf("do_mmap:do_munmap(%lx,%lx) failed. %d\n", ekprintf("do_mmap:do_munmap(%lx,%lx) failed. %d\n",
addr, len, error); addr, len, error);
@@ -1487,7 +1499,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
} }
if (!(prot & PROT_WRITE)) { if (!(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ); error = set_host_vma(addr, len, PROT_READ, 1/* holding memory_range_lock */);
if (error) { if (error) {
kprintf("do_mmap:set_host_vma failed. %d\n", error); kprintf("do_mmap:set_host_vma failed. %d\n", error);
goto out; goto out;
@@ -1689,7 +1701,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
out: out:
if (ro_vma_mapped) { if (ro_vma_mapped) {
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE, 1/* holding memory_range_lock */);
} }
ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock);
@@ -1760,7 +1772,7 @@ SYSCALL_DECLARE(munmap)
} }
ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock);
error = do_munmap((void *)addr, len); error = do_munmap((void *)addr, len, 1/* holding memory_range_lock */);
ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock);
out: out:
@@ -1899,7 +1911,7 @@ out:
// XXX: TLB flush // XXX: TLB flush
flush_tlb(); flush_tlb();
if (ro_changed && !error) { if (ro_changed && !error) {
error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE)); error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE), 1/* holding memory_range_lock */);
if (error) { if (error) {
kprintf("sys_mprotect:set_host_vma failed. %d\n", error); kprintf("sys_mprotect:set_host_vma failed. %d\n", error);
/* through */ /* through */
@@ -2149,7 +2161,7 @@ static void munmap_all(void)
addr = (void *)range->start; addr = (void *)range->start;
size = range->end - range->start; size = range->end - range->start;
error = do_munmap(addr, size); error = do_munmap(addr, size, 1/* holding memory_range_lock */);
if (error) { if (error) {
kprintf("munmap_all():do_munmap(%p,%lx) failed. %d\n", kprintf("munmap_all():do_munmap(%p,%lx) failed. %d\n",
addr, size, error); addr, size, error);
@@ -4987,7 +4999,7 @@ SYSCALL_DECLARE(shmat)
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
if (!(prot & PROT_WRITE)) { if (!(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ); error = set_host_vma(addr, len, PROT_READ, 1/* holding memory_range_lock */);
if (error) { if (error) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
shmobj_list_unlock(); shmobj_list_unlock();
@@ -5002,7 +5014,7 @@ SYSCALL_DECLARE(shmat)
vrflags, &obj->memobj, 0, obj->pgshift, NULL); vrflags, &obj->memobj, 0, obj->pgshift, NULL);
if (error) { if (error) {
if (!(prot & PROT_WRITE)) { if (!(prot & PROT_WRITE)) {
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE, 1/* holding memory_range_lock */);
} }
memobj_release(&obj->memobj); memobj_release(&obj->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
@@ -5297,7 +5309,7 @@ SYSCALL_DECLARE(shmdt)
return -EINVAL; return -EINVAL;
} }
error = do_munmap((void *)range->start, (range->end - range->start)); error = do_munmap((void *)range->start, (range->end - range->start), 1/* holding memory_range_lock */);
if (error) { if (error) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dkprintf("shmdt(%p): %d\n", shmaddr, error); dkprintf("shmdt(%p): %d\n", shmaddr, error);
@@ -7825,7 +7837,7 @@ SYSCALL_DECLARE(mremap)
/* do the remap */ /* do the remap */
if (need_relocate) { if (need_relocate) {
if (flags & MREMAP_FIXED) { if (flags & MREMAP_FIXED) {
error = do_munmap((void *)newstart, newsize); error = do_munmap((void *)newstart, newsize, 1/* holding memory_range_lock */);
if (error) { if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"fixed:munmap failed. %d\n", "fixed:munmap failed. %d\n",
@@ -7872,7 +7884,7 @@ SYSCALL_DECLARE(mremap)
goto out; goto out;
} }
error = do_munmap((void *)oldstart, oldsize); error = do_munmap((void *)oldstart, oldsize, 1/* holding memory_range_lock */);
if (error) { if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"relocate:munmap failed. %d\n", "relocate:munmap failed. %d\n",
@@ -7883,7 +7895,7 @@ SYSCALL_DECLARE(mremap)
} }
} }
else if (newsize < oldsize) { else if (newsize < oldsize) {
error = do_munmap((void *)newend, (oldend - newend)); error = do_munmap((void *)newend, (oldend - newend), 1/* holding memory_range_lock */);
if (error) { if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"shrink:munmap failed. %d\n", "shrink:munmap failed. %d\n",