munmap: fix deadlock with remote pagefault on vm range lock
Add similar protection to clear_host_pte than to set_host_vma (see #986) Also make the page fault handler only skip taking lock if the munmap happened on the same cpu id Change-Id: I6d9e68e8f8905b20bb2ccfa72848e04fe6404ab6
This commit is contained in:
committed by
Balazs Gerofi
parent
621533bbd3
commit
a563d780c1
@ -115,7 +115,7 @@ arch_clear_host_user_space()
|
|||||||
|
|
||||||
/* XXX: might be unnecessary */
|
/* XXX: might be unnecessary */
|
||||||
clear_host_pte(th->vm->region.user_start,
|
clear_host_pte(th->vm->region.user_start,
|
||||||
(th->vm->region.user_end - th->vm->region.user_start));
|
(th->vm->region.user_end - th->vm->region.user_start), 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -154,7 +154,7 @@ arch_clear_host_user_space()
|
|||||||
|
|
||||||
/* XXX: might be unnecessary */
|
/* XXX: might be unnecessary */
|
||||||
clear_host_pte(th->vm->region.user_start,
|
clear_host_pte(th->vm->region.user_start,
|
||||||
(th->vm->region.user_end - th->vm->region.user_start));
|
(th->vm->region.user_end - th->vm->region.user_start), 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -750,7 +750,7 @@ struct process_vm {
|
|||||||
// 2. addition of process page table (allocate_pages, update_process_page_table)
|
// 2. addition of process page table (allocate_pages, update_process_page_table)
|
||||||
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
|
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
|
||||||
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
|
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
|
||||||
unsigned long is_memory_range_lock_taken;
|
int is_memory_range_lock_taken;
|
||||||
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
|
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
|
||||||
|
|
||||||
ihk_atomic_t refcount;
|
ihk_atomic_t refcount;
|
||||||
|
|||||||
@ -486,7 +486,7 @@ void set_cputime(enum set_cputime_mode mode);
|
|||||||
int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
|
int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
|
||||||
intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd,
|
intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd,
|
||||||
off_t off0);
|
off_t off0);
|
||||||
void clear_host_pte(uintptr_t addr, size_t len);
|
void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock);
|
||||||
typedef int32_t key_t;
|
typedef int32_t key_t;
|
||||||
int do_shmget(key_t key, size_t size, int shmflg);
|
int do_shmget(key_t key, size_t size, int shmflg);
|
||||||
struct process_vm;
|
struct process_vm;
|
||||||
|
|||||||
@ -2168,19 +2168,11 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
|
|||||||
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
|
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
|
||||||
ihk_mc_get_processor_id(), vm, fault_addr0, reason);
|
ihk_mc_get_processor_id(), vm, fault_addr0, reason);
|
||||||
|
|
||||||
if (!thread->vm->is_memory_range_lock_taken) {
|
if (thread->vm->is_memory_range_lock_taken == -1 ||
|
||||||
/* For the case where is_memory_range_lock_taken is incremented after memory_range_lock is taken. */
|
thread->vm->is_memory_range_lock_taken != ihk_mc_get_processor_id()) {
|
||||||
while (1) {
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||||
if (thread->vm->is_memory_range_lock_taken) {
|
locked = 1;
|
||||||
goto skip;
|
|
||||||
}
|
|
||||||
if (ihk_mc_spinlock_trylock_noirq(&vm->memory_range_lock)) {
|
|
||||||
locked = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
skip:;
|
|
||||||
dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n",
|
dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n",
|
||||||
__func__, thread->proc->pid, thread->tid);
|
__func__, thread->proc->pid, thread->tid);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1448,17 +1448,28 @@ SYSCALL_DECLARE(exit_group)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear_host_pte(uintptr_t addr, size_t len)
|
void clear_host_pte(uintptr_t addr, size_t len, int holding_memory_range_lock)
|
||||||
{
|
{
|
||||||
ihk_mc_user_context_t ctx;
|
ihk_mc_user_context_t ctx;
|
||||||
long lerror;
|
long lerror;
|
||||||
|
struct thread *thread = cpu_local_var(current);
|
||||||
|
|
||||||
ihk_mc_syscall_arg0(&ctx) = addr;
|
ihk_mc_syscall_arg0(&ctx) = addr;
|
||||||
ihk_mc_syscall_arg1(&ctx) = len;
|
ihk_mc_syscall_arg1(&ctx) = len;
|
||||||
/* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */
|
/* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */
|
||||||
ihk_mc_syscall_arg2(&ctx) = 0;
|
ihk_mc_syscall_arg2(&ctx) = 0;
|
||||||
|
|
||||||
|
/* #986: Let remote page fault code skip
|
||||||
|
read-locking memory_range_lock. It's safe because other writers are warded off
|
||||||
|
until the remote PF handling code calls up_write(¤t->mm->mmap_sem) and
|
||||||
|
vm_range is consistent when calling this function. */
|
||||||
|
if (holding_memory_range_lock) {
|
||||||
|
thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id();
|
||||||
|
}
|
||||||
lerror = syscall_generic_forwarding(__NR_munmap, &ctx);
|
lerror = syscall_generic_forwarding(__NR_munmap, &ctx);
|
||||||
|
if (holding_memory_range_lock) {
|
||||||
|
thread->vm->is_memory_range_lock_taken = -1;
|
||||||
|
}
|
||||||
if (lerror) {
|
if (lerror) {
|
||||||
kprintf("clear_host_pte failed. %ld\n", lerror);
|
kprintf("clear_host_pte failed. %ld\n", lerror);
|
||||||
}
|
}
|
||||||
@ -1477,11 +1488,11 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory
|
|||||||
|
|
||||||
dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__);
|
dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__);
|
||||||
/* #986: Let remote page fault code skip
|
/* #986: Let remote page fault code skip
|
||||||
read-locking memory_range_lock. It's safe because other writers are warded off
|
read-locking memory_range_lock. It's safe because other writers are warded off
|
||||||
until the remote PF handling code calls up_write(¤t->mm->mmap_sem) and
|
until the remote PF handling code calls up_write(¤t->mm->mmap_sem) and
|
||||||
vm_range is consistent when calling this function. */
|
vm_range is consistent when calling this function. */
|
||||||
if (holding_memory_range_lock) {
|
if (holding_memory_range_lock) {
|
||||||
thread->vm->is_memory_range_lock_taken = 1;
|
thread->vm->is_memory_range_lock_taken = ihk_mc_get_processor_id();
|
||||||
}
|
}
|
||||||
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
|
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
|
||||||
if (lerror) {
|
if (lerror) {
|
||||||
@ -1493,7 +1504,7 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory
|
|||||||
lerror = 0;
|
lerror = 0;
|
||||||
out:
|
out:
|
||||||
if (holding_memory_range_lock) {
|
if (holding_memory_range_lock) {
|
||||||
thread->vm->is_memory_range_lock_taken = 0;
|
thread->vm->is_memory_range_lock_taken = -1;
|
||||||
}
|
}
|
||||||
return (int)lerror;
|
return (int)lerror;
|
||||||
}
|
}
|
||||||
@ -1507,7 +1518,7 @@ int do_munmap(void *addr, size_t len, int holding_memory_range_lock)
|
|||||||
error = remove_process_memory_range(cpu_local_var(current)->vm,
|
error = remove_process_memory_range(cpu_local_var(current)->vm,
|
||||||
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
|
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
|
||||||
if (error || !ro_freed) {
|
if (error || !ro_freed) {
|
||||||
clear_host_pte((uintptr_t)addr, len);
|
clear_host_pte((uintptr_t)addr, len, holding_memory_range_lock);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock);
|
error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock);
|
||||||
@ -8055,7 +8066,7 @@ SYSCALL_DECLARE(remap_file_pages)
|
|||||||
start0, size, prot, pgoff, flags, error);
|
start0, size, prot, pgoff, flags, error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
clear_host_pte(start, size); /* XXX: workaround */
|
clear_host_pte(start, size, 1 /* memory range lock */); /* XXX: workaround */
|
||||||
|
|
||||||
if (range->flag & VR_LOCKED) {
|
if (range->flag & VR_LOCKED) {
|
||||||
need_populate = 1;
|
need_populate = 1;
|
||||||
|
|||||||
Reference in New Issue
Block a user