munmap: fix deadlock with remote pagefault on vm range lock

Add similar protection to clear_host_pte than to set_host_vma (see #986)

Also make the page fault handler only skip taking lock if the munmap
happened on the same cpu id

Change-Id: I6d9e68e8f8905b20bb2ccfa72848e04fe6404ab6
This commit is contained in:
Dominique Martinet
2019-03-28 14:01:15 +09:00
committed by Balazs Gerofi
parent 621533bbd3
commit a563d780c1
6 changed files with 25 additions and 22 deletions

View File

@ -115,7 +115,7 @@ arch_clear_host_user_space()
/* XXX: might be unnecessary */ /* XXX: might be unnecessary */
clear_host_pte(th->vm->region.user_start, clear_host_pte(th->vm->region.user_start,
(th->vm->region.user_end - th->vm->region.user_start)); (th->vm->region.user_end - th->vm->region.user_start), 0);
return 0; return 0;
} }

View File

@ -154,7 +154,7 @@ arch_clear_host_user_space()
/* XXX: might be unnecessary */ /* XXX: might be unnecessary */
clear_host_pte(th->vm->region.user_start, clear_host_pte(th->vm->region.user_start,
(th->vm->region.user_end - th->vm->region.user_start)); (th->vm->region.user_end - th->vm->region.user_start), 0);
return 0; return 0;
} }

View File

@ -750,7 +750,7 @@ struct process_vm {
// 2. addition of process page table (allocate_pages, update_process_page_table) // 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc) // note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c) // is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
unsigned long is_memory_range_lock_taken; int is_memory_range_lock_taken;
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */ /* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
ihk_atomic_t refcount; ihk_atomic_t refcount;

View File

@ -486,7 +486,7 @@ void set_cputime(enum set_cputime_mode mode);
int do_munmap(void *addr, size_t len, int holding_memory_range_lock); int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd, intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd,
off_t off0); off_t off0);
void clear_host_pte(uintptr_t addr, size_t len); void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock);
typedef int32_t key_t; typedef int32_t key_t;
int do_shmget(key_t key, size_t size, int shmflg); int do_shmget(key_t key, size_t size, int shmflg);
struct process_vm; struct process_vm;

View File

@ -2168,19 +2168,11 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n", dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
ihk_mc_get_processor_id(), vm, fault_addr0, reason); ihk_mc_get_processor_id(), vm, fault_addr0, reason);
if (!thread->vm->is_memory_range_lock_taken) { if (thread->vm->is_memory_range_lock_taken == -1 ||
/* For the case where is_memory_range_lock_taken is incremented after memory_range_lock is taken. */ thread->vm->is_memory_range_lock_taken != ihk_mc_get_processor_id()) {
while (1) { ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
if (thread->vm->is_memory_range_lock_taken) { locked = 1;
goto skip;
}
if (ihk_mc_spinlock_trylock_noirq(&vm->memory_range_lock)) {
locked = 1;
break;
}
}
} else { } else {
skip:;
dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n", dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n",
__func__, thread->proc->pid, thread->tid); __func__, thread->proc->pid, thread->tid);
} }

View File

@ -1448,17 +1448,28 @@ SYSCALL_DECLARE(exit_group)
return 0; return 0;
} }
void clear_host_pte(uintptr_t addr, size_t len) void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock)
{ {
ihk_mc_user_context_t ctx; ihk_mc_user_context_t ctx;
long lerror; long lerror;
struct thread *thread = cpu_local_var(current);
ihk_mc_syscall_arg0(&ctx) = addr; ihk_mc_syscall_arg0(&ctx) = addr;
ihk_mc_syscall_arg1(&ctx) = len; ihk_mc_syscall_arg1(&ctx) = len;
/* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */ /* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */
ihk_mc_syscall_arg2(&ctx) = 0; ihk_mc_syscall_arg2(&ctx) = 0;
/* #986: Let remote page fault code skip
read-locking memory_range_lock. It's safe because other writers are warded off
until the remote PF handling code calls up_write(&current->mm->mmap_sem) and
vm_range is consistent when calling this function. */
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id();
}
lerror = syscall_generic_forwarding(__NR_munmap, &ctx); lerror = syscall_generic_forwarding(__NR_munmap, &ctx);
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = -1;
}
if (lerror) { if (lerror) {
kprintf("clear_host_pte failed. %ld\n", lerror); kprintf("clear_host_pte failed. %ld\n", lerror);
} }
@ -1477,11 +1488,11 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory
dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__); dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__);
/* #986: Let remote page fault code skip /* #986: Let remote page fault code skip
read-locking memory_range_lock. It's safe because other writers are warded off read-locking memory_range_lock. It's safe because other writers are warded off
until the remote PF handling code calls up_write(&current->mm->mmap_sem) and until the remote PF handling code calls up_write(&current->mm->mmap_sem) and
vm_range is consistent when calling this function. */ vm_range is consistent when calling this function. */
if (holding_memory_range_lock) { if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 1; thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id();
} }
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx); lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
if (lerror) { if (lerror) {
@ -1493,7 +1504,7 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory
lerror = 0; lerror = 0;
out: out:
if (holding_memory_range_lock) { if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 0; thread->vm->is_memory_range_lock_taken = -1;
} }
return (int)lerror; return (int)lerror;
} }
@ -1507,7 +1518,7 @@ int do_munmap(void *addr, size_t len, int holding_memory_range_lock)
error = remove_process_memory_range(cpu_local_var(current)->vm, error = remove_process_memory_range(cpu_local_var(current)->vm,
(intptr_t)addr, (intptr_t)addr+len, &ro_freed); (intptr_t)addr, (intptr_t)addr+len, &ro_freed);
if (error || !ro_freed) { if (error || !ro_freed) {
clear_host_pte((uintptr_t)addr, len); clear_host_pte((uintptr_t)addr, len, holding_memory_range_lock);
} }
else { else {
error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock); error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock);
@ -8055,7 +8066,7 @@ SYSCALL_DECLARE(remap_file_pages)
start0, size, prot, pgoff, flags, error); start0, size, prot, pgoff, flags, error);
goto out; goto out;
} }
clear_host_pte(start, size); /* XXX: workaround */ clear_host_pte(start, size, 1 /* memory range lock */); /* XXX: workaround */
if (range->flag & VR_LOCKED) { if (range->flag & VR_LOCKED) {
need_populate = 1; need_populate = 1;