Fix deadlock involving mmap_sem and memory_range_lock

Change-Id: I187246271163e708af6542c057d0a8dfde5b211e
Fujitsu: TEMP_FIX_1
Refs: #986
This commit is contained in:
Masamichi Takagi
2018-09-03 13:54:40 +09:00
parent b080e0f301
commit 567dcd3846
4 changed files with 52 additions and 19 deletions

View File

@ -715,6 +715,8 @@ struct process_vm {
// 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
unsigned long is_memory_range_lock_taken;
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
ihk_atomic_t refcount;
int exiting;

View File

@ -462,7 +462,7 @@ static inline unsigned long timespec_to_jiffy(const struct timespec *ats)
void reset_cputime(void);
void set_cputime(int mode);
int do_munmap(void *addr, size_t len);
int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd,
off_t off0);
void clear_host_pte(uintptr_t addr, size_t len);

View File

@ -1952,11 +1952,28 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
int error;
const uintptr_t fault_addr = (uintptr_t)fault_addr0;
struct vm_range *range;
struct thread *thread = cpu_local_var(current);
int locked = 0;
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
ihk_mc_get_processor_id(), vm, fault_addr0, reason);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
if (!thread->vm->is_memory_range_lock_taken) {
/* For the case where is_memory_range_lock_taken is incremented after memory_range_lock is taken. */
while (1) {
if (thread->vm->is_memory_range_lock_taken) {
goto skip;
}
if (ihk_mc_spinlock_trylock_noirq(&vm->memory_range_lock)) {
locked = 1;
break;
}
}
} else {
skip:;
dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n",
__func__, thread->proc->pid, thread->tid);
}
if (vm->exiting) {
error = -ECANCELED;
@ -2065,7 +2082,9 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
if (locked) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
}
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx): %d\n",
ihk_mc_get_processor_id(), vm, fault_addr0,
reason, error);

View File

@ -1276,15 +1276,24 @@ void clear_host_pte(uintptr_t addr, size_t len)
return;
}
static int set_host_vma(uintptr_t addr, size_t len, int prot)
static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory_range_lock)
{
ihk_mc_user_context_t ctx;
long lerror;
struct thread *thread = cpu_local_var(current);
ihk_mc_syscall_arg0(&ctx) = addr;
ihk_mc_syscall_arg1(&ctx) = len;
ihk_mc_syscall_arg2(&ctx) = prot;
dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__);
/* #986: Let remote page fault code skip
read-locking memory_range_lock. It's safe because other writers are warded off
until the remote PF handling code calls up_write(&current->mm->mmap_sem) and
vm_range is consistent when calling this function. */
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 1;
}
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
if (lerror) {
kprintf("set_host_vma(%lx,%lx,%x) failed. %ld\n",
@ -1294,10 +1303,13 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot)
lerror = 0;
out:
if (holding_memory_range_lock) {
thread->vm->is_memory_range_lock_taken = 0;
}
return (int)lerror;
}
int do_munmap(void *addr, size_t len)
int do_munmap(void *addr, size_t len, int holding_memory_range_lock)
{
int error;
int ro_freed;
@ -1309,7 +1321,7 @@ int do_munmap(void *addr, size_t len)
clear_host_pte((uintptr_t)addr, len);
}
else {
error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE);
error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE, holding_memory_range_lock);
if (error) {
kprintf("sys_munmap:set_host_vma failed. %d\n", error);
/* through */
@ -1445,7 +1457,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
if (flags & MAP_FIXED) {
/* clear specified address range */
error = do_munmap((void *)addr, len);
error = do_munmap((void *)addr, len, 1/* holding memory_range_lock */);
if (error) {
ekprintf("do_mmap:do_munmap(%lx,%lx) failed. %d\n",
addr, len, error);
@ -1487,7 +1499,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
}
if (!(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ);
error = set_host_vma(addr, len, PROT_READ, 1/* holding memory_range_lock */);
if (error) {
kprintf("do_mmap:set_host_vma failed. %d\n", error);
goto out;
@ -1689,7 +1701,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
out:
if (ro_vma_mapped) {
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE);
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE, 1/* holding memory_range_lock */);
}
ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock);
@ -1760,7 +1772,7 @@ SYSCALL_DECLARE(munmap)
}
ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock);
error = do_munmap((void *)addr, len);
error = do_munmap((void *)addr, len, 1/* holding memory_range_lock */);
ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock);
out:
@ -1899,7 +1911,7 @@ out:
// XXX: TLB flush
flush_tlb();
if (ro_changed && !error) {
error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE));
error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE), 1/* holding memory_range_lock */);
if (error) {
kprintf("sys_mprotect:set_host_vma failed. %d\n", error);
/* through */
@ -2149,7 +2161,7 @@ static void munmap_all(void)
addr = (void *)range->start;
size = range->end - range->start;
error = do_munmap(addr, size);
error = do_munmap(addr, size, 1/* holding memory_range_lock */);
if (error) {
kprintf("munmap_all():do_munmap(%p,%lx) failed. %d\n",
addr, size, error);
@ -4987,7 +4999,7 @@ SYSCALL_DECLARE(shmat)
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
if (!(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ);
error = set_host_vma(addr, len, PROT_READ, 1/* holding memory_range_lock */);
if (error) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
shmobj_list_unlock();
@ -5002,7 +5014,7 @@ SYSCALL_DECLARE(shmat)
vrflags, &obj->memobj, 0, obj->pgshift, NULL);
if (error) {
if (!(prot & PROT_WRITE)) {
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE);
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE, 1/* holding memory_range_lock */);
}
memobj_release(&obj->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
@ -5297,7 +5309,7 @@ SYSCALL_DECLARE(shmdt)
return -EINVAL;
}
error = do_munmap((void *)range->start, (range->end - range->start));
error = do_munmap((void *)range->start, (range->end - range->start), 1/* holding memory_range_lock */);
if (error) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dkprintf("shmdt(%p): %d\n", shmaddr, error);
@ -7825,7 +7837,7 @@ SYSCALL_DECLARE(mremap)
/* do the remap */
if (need_relocate) {
if (flags & MREMAP_FIXED) {
error = do_munmap((void *)newstart, newsize);
error = do_munmap((void *)newstart, newsize, 1/* holding memory_range_lock */);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"fixed:munmap failed. %d\n",
@ -7872,7 +7884,7 @@ SYSCALL_DECLARE(mremap)
goto out;
}
error = do_munmap((void *)oldstart, oldsize);
error = do_munmap((void *)oldstart, oldsize, 1/* holding memory_range_lock */);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"relocate:munmap failed. %d\n",
@ -7883,7 +7895,7 @@ SYSCALL_DECLARE(mremap)
}
}
else if (newsize < oldsize) {
error = do_munmap((void *)newend, (oldend - newend));
error = do_munmap((void *)newend, (oldend - newend), 1/* holding memory_range_lock */);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"shrink:munmap failed. %d\n",