support msync() system call. refs #382
Msync(2) of this version writes only the pages which the calling process modified. Modifications of the other processes are not written.
This commit is contained in:
@ -204,6 +204,12 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
|
||||
return (off_t)(*ptep & PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
*ptep = PTE_NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_make_fileoff(off_t off,
|
||||
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
|
||||
{
|
||||
@ -235,6 +241,36 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
|
||||
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
|
||||
#endif
|
||||
|
||||
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
struct page_table;
|
||||
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
|
||||
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
|
||||
|
||||
@ -41,6 +41,7 @@ SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
SYSCALL_HANDLED(25, mremap)
|
||||
SYSCALL_HANDLED(26, msync)
|
||||
SYSCALL_HANDLED(28, madvise)
|
||||
SYSCALL_HANDLED(34, pause)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
|
||||
@ -46,6 +46,7 @@ static memobj_ref_func_t fileobj_ref;
|
||||
static memobj_get_page_func_t fileobj_get_page;
|
||||
static memobj_copy_page_func_t fileobj_copy_page;
|
||||
static memobj_flush_page_func_t fileobj_flush_page;
|
||||
static memobj_invalidate_page_func_t fileobj_invalidate_page;
|
||||
|
||||
static struct memobj_ops fileobj_ops = {
|
||||
.release = &fileobj_release,
|
||||
@ -53,6 +54,7 @@ static struct memobj_ops fileobj_ops = {
|
||||
.get_page = &fileobj_get_page,
|
||||
.copy_page = &fileobj_copy_page,
|
||||
.flush_page = &fileobj_flush_page,
|
||||
.invalidate_page = &fileobj_invalidate_page,
|
||||
};
|
||||
|
||||
static struct fileobj *to_fileobj(struct memobj *memobj)
|
||||
@ -577,3 +579,33 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
memobj_lock(&obj->memobj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
int error;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n",
|
||||
memobj, phys, pgsize);
|
||||
|
||||
if (!(page = phys_to_page(phys))
|
||||
|| !(page = page_list_lookup(obj, page->offset))) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ihk_atomic_read(&page->count) == 1) {
|
||||
if (page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys),
|
||||
pgsize/PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n",
|
||||
memobj, phys, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -37,6 +37,7 @@ typedef void memobj_ref_func_t(struct memobj *obj);
|
||||
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
|
||||
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
|
||||
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
|
||||
struct memobj_ops {
|
||||
memobj_release_func_t * release;
|
||||
@ -44,6 +45,7 @@ struct memobj_ops {
|
||||
memobj_get_page_func_t * get_page;
|
||||
memobj_copy_page_func_t * copy_page;
|
||||
memobj_flush_page_func_t * flush_page;
|
||||
memobj_invalidate_page_func_t * invalidate_page;
|
||||
};
|
||||
|
||||
static inline void memobj_release(struct memobj *obj)
|
||||
@ -86,6 +88,15 @@ static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t p
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
if (obj->ops->invalidate_page) {
|
||||
return (*obj->ops->invalidate_page)(obj, phys, pgsize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memobj_lock(struct memobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&obj->lock);
|
||||
|
||||
@ -69,4 +69,11 @@
|
||||
#define MREMAP_MAYMOVE 0x01
|
||||
#define MREMAP_FIXED 0x02
|
||||
|
||||
/*
|
||||
* for msync()
|
||||
*/
|
||||
#define MS_ASYNC 0x01
|
||||
#define MS_INVALIDATE 0x02
|
||||
#define MS_SYNC 0x04
|
||||
|
||||
#endif /* HEADER_MMAN_H */
|
||||
|
||||
@ -405,6 +405,10 @@ int change_prot_process_memory_range(
|
||||
unsigned long newflag);
|
||||
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end, off_t off);
|
||||
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end);
|
||||
int invalidate_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *lookup_process_memory_range(
|
||||
struct process_vm *vm, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *next_process_memory_range(
|
||||
|
||||
148
kernel/process.c
148
kernel/process.c
@ -1210,6 +1210,154 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
struct sync_args {
|
||||
struct memobj *memobj;
|
||||
};
|
||||
|
||||
static int sync_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
void *pgaddr, size_t pgsize)
|
||||
{
|
||||
struct sync_args *args = arg0;
|
||||
int error;
|
||||
uintptr_t phys;
|
||||
|
||||
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize);
|
||||
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)
|
||||
|| !pte_is_dirty(ptep, pgsize)) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pte_clear_dirty(ptep, pgsize);
|
||||
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
|
||||
|
||||
phys = pte_get_phys(ptep);
|
||||
error = memobj_flush_page(args->memobj, phys, pgsize);
|
||||
if (error) {
|
||||
ekprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):"
|
||||
"flush failed. %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
pte_set_dirty(ptep, pgsize);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct sync_args args;
|
||||
|
||||
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx)\n",
|
||||
vm, range, start, end);
|
||||
args.memobj = range->memobj;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
|
||||
VPTEF_SKIP_NULL, &sync_one_page, &args);
|
||||
memobj_unlock(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):"
|
||||
"visit failed%d\n",
|
||||
vm, range, start, end, error);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
struct invalidate_args {
|
||||
struct vm_range *range;
|
||||
};
|
||||
|
||||
static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
void *pgaddr, size_t pgsize)
|
||||
{
|
||||
struct invalidate_args *args = arg0;
|
||||
struct vm_range *range = args->range;
|
||||
int error;
|
||||
uintptr_t phys;
|
||||
struct page *page;
|
||||
off_t linear_off;
|
||||
pte_t apte;
|
||||
|
||||
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize);
|
||||
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
phys = pte_get_phys(ptep);
|
||||
page = phys_to_page(phys);
|
||||
linear_off = range->objoff + ((uintptr_t)pgaddr - range->start);
|
||||
if (page && (page->offset == linear_off)) {
|
||||
pte_make_null(&apte, pgsize);
|
||||
}
|
||||
else {
|
||||
pte_make_fileoff(page->offset, 0, pgsize, &apte);
|
||||
}
|
||||
pte_xchg(ptep, &apte);
|
||||
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
|
||||
|
||||
if (page && page_unmap(page)) {
|
||||
panic("invalidate_one_page");
|
||||
}
|
||||
|
||||
error = memobj_invalidate_page(range->memobj, phys, pgsize);
|
||||
if (error) {
|
||||
ekprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):"
|
||||
"invalidate failed. %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int invalidate_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct invalidate_args args;
|
||||
|
||||
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx)\n",
|
||||
vm, range, start, end);
|
||||
args.range = range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
|
||||
VPTEF_SKIP_NULL, &invalidate_one_page, &args);
|
||||
memobj_unlock(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):"
|
||||
"visit failed%d\n",
|
||||
vm, range, start, end, error);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
|
||||
{
|
||||
int error;
|
||||
|
||||
117
kernel/syscall.c
117
kernel/syscall.c
@ -3935,6 +3935,123 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(msync)
|
||||
{
|
||||
const uintptr_t start0 = ihk_mc_syscall_arg0(ctx);
|
||||
const size_t len0 = ihk_mc_syscall_arg1(ctx);
|
||||
const int flags = ihk_mc_syscall_arg2(ctx);
|
||||
const size_t len = (len0 + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
const uintptr_t start = start0;
|
||||
const uintptr_t end = start + len;
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct process_vm *vm = proc->vm;
|
||||
int error;
|
||||
uintptr_t addr;
|
||||
struct vm_range *range;
|
||||
uintptr_t s;
|
||||
uintptr_t e;
|
||||
|
||||
dkprintf("sys_msync(%#lx,%#lx,%#x)\n", start0, len0, flags);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
if ((start0 & ~PAGE_MASK)
|
||||
|| (flags & ~(MS_ASYNC|MS_INVALIDATE|MS_SYNC))
|
||||
|| ((flags & MS_ASYNC) && (flags & MS_SYNC))) {
|
||||
error = -EINVAL;
|
||||
ekprintf("sys_msync(%#lx,%#lx,%#x):invalid args. %d\n",
|
||||
start0, len0, flags, error);
|
||||
goto out;
|
||||
}
|
||||
if (end < start) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("sys_msync(%#lx,%#lx,%#x):invalid args. %d\n",
|
||||
start0, len0, flags, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* check ranges */
|
||||
range = NULL;
|
||||
for (addr = start; addr < end; addr = range->end) {
|
||||
if (!range) {
|
||||
range = lookup_process_memory_range(vm, addr,
|
||||
addr+PAGE_SIZE);
|
||||
}
|
||||
else {
|
||||
range = next_process_memory_range(vm, range);
|
||||
}
|
||||
|
||||
if (!range || (addr < range->start)) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("sys_msync(%#lx,%#lx,%#x):"
|
||||
"invalid VMR %d %#lx-%#lx %#lx\n",
|
||||
start0, len0, flags, error,
|
||||
range?range->start:0,
|
||||
range?range->end:0,
|
||||
range?range->flag:0);
|
||||
goto out;
|
||||
}
|
||||
if ((flags & MS_INVALIDATE) && (range->flag & VR_LOCKED)) {
|
||||
error = -EBUSY;
|
||||
ekprintf("sys_msync(%#lx,%#lx,%#x):"
|
||||
"locked VMR %d %#lx-%#lx %#lx\n",
|
||||
start0, len0, flags, error,
|
||||
range->start, range->end, range->flag);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* do the sync */
|
||||
range = NULL;
|
||||
for (addr = start; addr < end; addr = range->end) {
|
||||
if (!range) {
|
||||
range = lookup_process_memory_range(vm, addr,
|
||||
addr+PAGE_SIZE);
|
||||
}
|
||||
else {
|
||||
range = next_process_memory_range(vm, range);
|
||||
}
|
||||
|
||||
if ((range->flag & VR_PRIVATE) || !range->memobj
|
||||
|| !memobj_has_pager(range->memobj)) {
|
||||
dkprintf("sys_msync(%#lx,%#lx,%#x):"
|
||||
"unsyncable VMR %d %#lx-%#lx %#lx\n",
|
||||
start0, len0, flags, error,
|
||||
range->start, range->end, range->flag);
|
||||
/* nothing to do */
|
||||
continue;
|
||||
}
|
||||
|
||||
s = addr;
|
||||
e = (range->end < end)? range->end: end;
|
||||
|
||||
if (flags & (MS_ASYNC | MS_SYNC)) {
|
||||
error = sync_process_memory_range(vm, range, s, e);
|
||||
if (error) {
|
||||
ekprintf("sys_msync(%#lx,%#lx,%#x):sync failed. %d\n",
|
||||
start0, len0, flags, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & MS_INVALIDATE) {
|
||||
error = invalidate_process_memory_range(
|
||||
vm, range, s, e);
|
||||
if (error) {
|
||||
ekprintf("sys_msync(%#lx,%#lx,%#x):"
|
||||
"invalidate failed. %d\n",
|
||||
start0, len0, flags, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
dkprintf("sys_msync(%#lx,%#lx,%#x):%d\n", start0, len0, flags, error);
|
||||
return error;
|
||||
} /* sys_msync() */
|
||||
|
||||
SYSCALL_DECLARE(getcpu)
|
||||
{
|
||||
const uintptr_t cpup = ihk_mc_syscall_arg0(ctx);
|
||||
|
||||
Reference in New Issue
Block a user