support msync() system call. refs #382

Msync(2) of this version writes only the pages which the calling process
modified. Modifications of the other processes are not written.
This commit is contained in:
NAKAMURA Gou
2014-07-09 14:19:26 +09:00
parent 6485578a7f
commit 16af976a71
8 changed files with 356 additions and 0 deletions

View File

@ -204,6 +204,12 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
return (off_t)(*ptep & PAGE_MASK);
}
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
*ptep = PTE_NULL;
return;
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
@ -235,6 +241,36 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
#endif
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
}
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = PFL1_DIRTY; break;
case PTL2_SIZE: mask = PFL2_DIRTY; break;
case PTL3_SIZE: mask = PFL3_DIRTY; break;
}
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);

View File

@ -41,6 +41,7 @@ SYSCALL_DELEGATED(20, writev)
SYSCALL_DELEGATED(21, access)
SYSCALL_HANDLED(24, sched_yield)
SYSCALL_HANDLED(25, mremap)
SYSCALL_HANDLED(26, msync)
SYSCALL_HANDLED(28, madvise)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(39, getpid)

View File

@ -46,6 +46,7 @@ static memobj_ref_func_t fileobj_ref;
static memobj_get_page_func_t fileobj_get_page;
static memobj_copy_page_func_t fileobj_copy_page;
static memobj_flush_page_func_t fileobj_flush_page;
static memobj_invalidate_page_func_t fileobj_invalidate_page;
static struct memobj_ops fileobj_ops = {
.release = &fileobj_release,
@ -53,6 +54,7 @@ static struct memobj_ops fileobj_ops = {
.get_page = &fileobj_get_page,
.copy_page = &fileobj_copy_page,
.flush_page = &fileobj_flush_page,
.invalidate_page = &fileobj_invalidate_page,
};
static struct fileobj *to_fileobj(struct memobj *memobj)
@ -577,3 +579,33 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
memobj_lock(&obj->memobj);
return 0;
}
static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
size_t pgsize)
{
struct fileobj *obj = to_fileobj(memobj);
int error;
struct page *page;
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n",
memobj, phys, pgsize);
if (!(page = phys_to_page(phys))
|| !(page = page_list_lookup(obj, page->offset))) {
error = 0;
goto out;
}
if (ihk_atomic_read(&page->count) == 1) {
if (page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys),
pgsize/PAGE_SIZE);
}
}
error = 0;
out:
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n",
memobj, phys, pgsize, error);
return error;
}

View File

@ -37,6 +37,7 @@ typedef void memobj_ref_func_t(struct memobj *obj);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
struct memobj_ops {
memobj_release_func_t * release;
@ -44,6 +45,7 @@ struct memobj_ops {
memobj_get_page_func_t * get_page;
memobj_copy_page_func_t * copy_page;
memobj_flush_page_func_t * flush_page;
memobj_invalidate_page_func_t * invalidate_page;
};
static inline void memobj_release(struct memobj *obj)
@ -86,6 +88,15 @@ static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t p
return 0;
}
static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
size_t pgsize)
{
if (obj->ops->invalidate_page) {
return (*obj->ops->invalidate_page)(obj, phys, pgsize);
}
return 0;
}
static inline void memobj_lock(struct memobj *obj)
{
ihk_mc_spinlock_lock_noirq(&obj->lock);

View File

@ -69,4 +69,11 @@
#define MREMAP_MAYMOVE 0x01
#define MREMAP_FIXED 0x02
/*
* for msync()
*/
#define MS_ASYNC 0x01
#define MS_INVALIDATE 0x02
#define MS_SYNC 0x04
#endif /* HEADER_MMAN_H */

View File

@ -405,6 +405,10 @@ int change_prot_process_memory_range(
unsigned long newflag);
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off);
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end);
int invalidate_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t start, uintptr_t end);
struct vm_range *lookup_process_memory_range(
struct process_vm *vm, uintptr_t start, uintptr_t end);
struct vm_range *next_process_memory_range(

View File

@ -1210,6 +1210,154 @@ out:
return error;
}
struct sync_args {
struct memobj *memobj;
};
static int sync_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
struct sync_args *args = arg0;
int error;
uintptr_t phys;
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)
|| !pte_is_dirty(ptep, pgsize)) {
error = 0;
goto out;
}
pte_clear_dirty(ptep, pgsize);
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
phys = pte_get_phys(ptep);
error = memobj_flush_page(args->memobj, phys, pgsize);
if (error) {
ekprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):"
"flush failed. %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
pte_set_dirty(ptep, pgsize);
goto out;
}
error = 0;
out:
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end)
{
int error;
struct sync_args args;
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx)\n",
vm, range, start, end);
args.memobj = range->memobj;
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
VPTEF_SKIP_NULL, &sync_one_page, &args);
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
ekprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):"
"visit failed%d\n",
vm, range, start, end, error);
goto out;
}
out:
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
vm, range, start, end, error);
return error;
}
struct invalidate_args {
struct vm_range *range;
};
static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
struct invalidate_args *args = arg0;
struct vm_range *range = args->range;
int error;
uintptr_t phys;
struct page *page;
off_t linear_off;
pte_t apte;
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
error = 0;
goto out;
}
phys = pte_get_phys(ptep);
page = phys_to_page(phys);
linear_off = range->objoff + ((uintptr_t)pgaddr - range->start);
if (page && (page->offset == linear_off)) {
pte_make_null(&apte, pgsize);
}
else {
pte_make_fileoff(page->offset, 0, pgsize, &apte);
}
pte_xchg(ptep, &apte);
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
if (page && page_unmap(page)) {
panic("invalidate_one_page");
}
error = memobj_invalidate_page(range->memobj, phys, pgsize);
if (error) {
ekprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):"
"invalidate failed. %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
goto out;
}
error = 0;
out:
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int invalidate_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t start, uintptr_t end)
{
int error;
struct invalidate_args args;
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx)\n",
vm, range, start, end);
args.range = range;
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
VPTEF_SKIP_NULL, &invalidate_one_page, &args);
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):"
"visit failed%d\n",
vm, range, start, end, error);
goto out;
}
out:
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
vm, range, start, end, error);
return error;
}
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
{
int error;

View File

@ -3935,6 +3935,123 @@ out:
return ret;
}
SYSCALL_DECLARE(msync)
{
const uintptr_t start0 = ihk_mc_syscall_arg0(ctx);
const size_t len0 = ihk_mc_syscall_arg1(ctx);
const int flags = ihk_mc_syscall_arg2(ctx);
const size_t len = (len0 + PAGE_SIZE - 1) & PAGE_MASK;
const uintptr_t start = start0;
const uintptr_t end = start + len;
struct process *proc = cpu_local_var(current);
struct process_vm *vm = proc->vm;
int error;
uintptr_t addr;
struct vm_range *range;
uintptr_t s;
uintptr_t e;
dkprintf("sys_msync(%#lx,%#lx,%#x)\n", start0, len0, flags);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
if ((start0 & ~PAGE_MASK)
|| (flags & ~(MS_ASYNC|MS_INVALIDATE|MS_SYNC))
|| ((flags & MS_ASYNC) && (flags & MS_SYNC))) {
error = -EINVAL;
ekprintf("sys_msync(%#lx,%#lx,%#x):invalid args. %d\n",
start0, len0, flags, error);
goto out;
}
if (end < start) {
error = -ENOMEM;
ekprintf("sys_msync(%#lx,%#lx,%#x):invalid args. %d\n",
start0, len0, flags, error);
goto out;
}
/* check ranges */
range = NULL;
for (addr = start; addr < end; addr = range->end) {
if (!range) {
range = lookup_process_memory_range(vm, addr,
addr+PAGE_SIZE);
}
else {
range = next_process_memory_range(vm, range);
}
if (!range || (addr < range->start)) {
error = -ENOMEM;
ekprintf("sys_msync(%#lx,%#lx,%#x):"
"invalid VMR %d %#lx-%#lx %#lx\n",
start0, len0, flags, error,
range?range->start:0,
range?range->end:0,
range?range->flag:0);
goto out;
}
if ((flags & MS_INVALIDATE) && (range->flag & VR_LOCKED)) {
error = -EBUSY;
ekprintf("sys_msync(%#lx,%#lx,%#x):"
"locked VMR %d %#lx-%#lx %#lx\n",
start0, len0, flags, error,
range->start, range->end, range->flag);
goto out;
}
}
/* do the sync */
range = NULL;
for (addr = start; addr < end; addr = range->end) {
if (!range) {
range = lookup_process_memory_range(vm, addr,
addr+PAGE_SIZE);
}
else {
range = next_process_memory_range(vm, range);
}
if ((range->flag & VR_PRIVATE) || !range->memobj
|| !memobj_has_pager(range->memobj)) {
dkprintf("sys_msync(%#lx,%#lx,%#x):"
"unsyncable VMR %d %#lx-%#lx %#lx\n",
start0, len0, flags, error,
range->start, range->end, range->flag);
/* nothing to do */
continue;
}
s = addr;
e = (range->end < end)? range->end: end;
if (flags & (MS_ASYNC | MS_SYNC)) {
error = sync_process_memory_range(vm, range, s, e);
if (error) {
ekprintf("sys_msync(%#lx,%#lx,%#x):sync failed. %d\n",
start0, len0, flags, error);
goto out;
}
}
if (flags & MS_INVALIDATE) {
error = invalidate_process_memory_range(
vm, range, s, e);
if (error) {
ekprintf("sys_msync(%#lx,%#lx,%#x):"
"invalidate failed. %d\n",
start0, len0, flags, error);
goto out;
}
}
}
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dkprintf("sys_msync(%#lx,%#lx,%#x):%d\n", start0, len0, flags, error);
return error;
} /* sys_msync() */
SYSCALL_DECLARE(getcpu)
{
const uintptr_t cpup = ihk_mc_syscall_arg0(ctx);