add shared mapped file (in progress)

implemented:
- Pages can be shared between maps.
- A change made to a map is written to the file, at munmap().

not yet implemented:
- VM operation during page IO execution.
  Because page IO is executed with VM's lock.
- Page IO, which does not change a file size with any case.
  When munmap() races with truncate(), the file size may be changed
  illegally.
This commit is contained in:
NAKAMURA Gou
2013-11-06 17:17:56 +09:00
parent d35140ab0b
commit bbbc6e1570
8 changed files with 153 additions and 24 deletions

View File

@ -814,6 +814,8 @@ static int split_large_page(pte_t *ptep)
struct clear_range_args {
int free_physical;
uint8_t padding[4];
struct memobj *memobj;
};
static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
@ -822,13 +824,18 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
struct clear_range_args *args = args0;
uint64_t phys;
struct page *page;
pte_t old;
if (*ptep == PTE_NULL) {
return -ENOENT;
}
phys = *ptep & PT_PHYSMASK;
*ptep = PTE_NULL;
old = xchg(ptep, PTE_NULL);
if ((old & PFL1_DIRTY) && args->memobj) {
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
}
if (args->free_physical) {
page = phys_to_page(phys);
@ -848,6 +855,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
struct page_table *pt;
int error;
struct page *page;
pte_t old;
if (*ptep == PTE_NULL) {
return -ENOENT;
@ -869,7 +877,11 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if (*ptep & PFL2_SIZE) {
phys = *ptep & PT_PHYSMASK;
*ptep = PTE_NULL;
old = xchg(ptep, PTE_NULL);
if ((old & PFL2_DIRTY) && args->memobj) {
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
}
if (args->free_physical) {
page = phys_to_page(phys);
@ -922,7 +934,7 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
}
static int clear_range(struct page_table *pt, uintptr_t start, uintptr_t end,
int free_physical)
int free_physical, struct memobj *memobj)
{
int error;
struct clear_range_args args;
@ -935,6 +947,8 @@ static int clear_range(struct page_table *pt, uintptr_t start, uintptr_t end,
}
args.free_physical = free_physical;
args.memobj = memobj;
error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args);
return error;
}
@ -943,14 +957,14 @@ int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end)
{
#define KEEP_PHYSICAL 0
return clear_range(pt, (uintptr_t)start, (uintptr_t)end,
KEEP_PHYSICAL);
KEEP_PHYSICAL, NULL);
}
int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end)
int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end, struct memobj *memobj)
{
#define FREE_PHYSICAL 1
return clear_range(pt, (uintptr_t)start, (uintptr_t)end,
FREE_PHYSICAL);
FREE_PHYSICAL, memobj);
}
struct change_attr_args {
@ -1284,7 +1298,7 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
error = -EBUSY;
ekprintf("set_range_l1(%lx,%lx,%lx):page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL);
goto out;
}
@ -1332,7 +1346,7 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base,
KEEP_PHYSICAL);
KEEP_PHYSICAL, NULL);
goto out;
}
@ -1343,7 +1357,7 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l2(%lx,%lx,%lx):"
"page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL);
goto out;
}
else {
@ -1400,7 +1414,7 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base,
KEEP_PHYSICAL);
KEEP_PHYSICAL, NULL);
goto out;
}
*ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR;
@ -1410,7 +1424,7 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l3(%lx,%lx,%lx):"
"page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL);
goto out;
}
else {
@ -1449,7 +1463,7 @@ int set_range_l4(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base,
KEEP_PHYSICAL);
KEEP_PHYSICAL, NULL);
goto out;
}
*ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR;

View File

@ -885,6 +885,84 @@ out:
return ss;
}
static int pager_req_write(ihk_os_t os, uintptr_t handle, off_t off, size_t size, uintptr_t rpa)
{
ssize_t ss;
struct pager *pager;
struct file *file = NULL;
uintptr_t phys = -1;
ihk_device_t dev = ihk_os_to_dev(os);
void *buf = NULL;
mm_segment_t fs;
loff_t pos;
loff_t fsize;
size_t len;
dprintk("pager_req_write(%lx,%lx,%lx,%lx)\n", handle, off, size, rpa);
ss = down_interruptible(&pager_sem);
if (ss) {
printk("pager_req_write(%lx,%lx,%lx,%lx): signaled. %ld\n", handle, off, size, rpa, ss);
goto out;
}
list_for_each_entry(pager, &pager_list, list) {
if ((uintptr_t)pager == handle) {
file = pager->rwfile;
break;
}
}
if (file) {
get_file(file);
}
up(&pager_sem);
if (!file) {
ss = -EBADF;
printk("pager_req_write(%lx,%lx,%lx,%lx):pager not found. %ld\n", handle, off, size, rpa, ss);
goto out;
}
/*
* XXX: vfs_write 位の階層を使いつつ,
* ファイルサイズ更新を回避する方法ないかな?
*/
fsize = i_size_read(file->f_mapping->host);
if (off >= fsize) {
ss = 0;
goto out;
}
phys = ihk_device_map_memory(dev, rpa, size);
buf = ihk_device_map_virtual(dev, phys, size, NULL, 0);
fs = get_fs();
set_fs(KERNEL_DS);
pos = off;
len = size;
if ((off + size) > fsize) {
len = fsize - off;
}
ss = vfs_write(file, buf, len, &pos);
set_fs(fs);
if (ss < 0) {
printk("pager_req_write(%lx,%lx,%lx,%lx):pwrite failed. %ld\n", handle, off, size, rpa, ss);
goto out;
}
out:
if (buf) {
ihk_device_unmap_virtual(dev, buf, size);
}
if (phys != (uintptr_t)-1) {
ihk_device_unmap_memory(dev, phys, size);
}
if (file) {
fput(file);
}
dprintk("pager_req_write(%lx,%lx,%lx,%lx): %ld\n", handle, off, size, rpa, ss);
return ss;
}
static long pager_call(ihk_os_t os, struct syscall_request *req)
{
long ret;
@ -894,6 +972,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
#define PAGER_REQ_CREATE 0x0001
#define PAGER_REQ_RELEASE 0x0002
#define PAGER_REQ_READ 0x0003
#define PAGER_REQ_WRITE 0x0004
case PAGER_REQ_CREATE:
ret = pager_req_create(os, req->args[1], req->args[2]);
break;
@ -906,6 +985,10 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
ret = pager_req_read(os, req->args[1], req->args[2], req->args[3], req->args[4]);
break;
case PAGER_REQ_WRITE:
ret = pager_req_write(os, req->args[1], req->args[2], req->args[3], req->args[4]);
break;
default:
ret = -ENOSYS;
break;

View File

@ -33,12 +33,14 @@ static memobj_release_func_t fileobj_release;
static memobj_ref_func_t fileobj_ref;
static memobj_get_page_func_t fileobj_get_page;
static memobj_copy_page_func_t fileobj_copy_page;
static memobj_flush_page_func_t fileobj_flush_page;
static struct memobj_ops fileobj_ops = {
.release = &fileobj_release,
.ref = &fileobj_ref,
.get_page = &fileobj_get_page,
.copy_page = &fileobj_copy_page,
.flush_page = &fileobj_flush_page,
};
static struct fileobj *to_fileobj(struct memobj *memobj)
@ -528,3 +530,31 @@ out:
memobj, orgpa, p2align, newpa);
return newpa;
}
static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
size_t pgsize)
{
struct fileobj *obj = to_fileobj(memobj);
struct page *page;
ihk_mc_user_context_t ctx;
ssize_t ss;
page = phys_to_page(phys);
memobj_unlock(&obj->memobj);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = page->offset;
ihk_mc_syscall_arg3(&ctx) = pgsize;
ihk_mc_syscall_arg4(&ctx) = phys;
ss = syscall_generic_forwarding(__NR_mmap, &ctx);
if (ss != pgsize) {
dkprintf("fileobj_flush_page(%p,%lx,%lx): %ld (%lx)\n",
memobj, phys, pgsize, ss, ss);
/* through */
}
memobj_lock(&obj->memobj);
return 0;
}

View File

@ -15,12 +15,14 @@ typedef void memobj_release_func_t(struct memobj *obj);
typedef void memobj_ref_func_t(struct memobj *obj);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
struct memobj_ops {
memobj_release_func_t * release;
memobj_ref_func_t * ref;
memobj_get_page_func_t * get_page;
memobj_copy_page_func_t * copy_page;
memobj_flush_page_func_t * flush_page;
};
static inline void memobj_release(struct memobj *obj)
@ -45,6 +47,11 @@ static inline uintptr_t memobj_copy_page(struct memobj *obj,
return (*obj->ops->copy_page)(obj, orgphys, p2align);
}
static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t pgsize)
{
return (*obj->ops->flush_page)(obj, phys, pgsize);
}
static inline void memobj_lock(struct memobj *obj)
{
ihk_mc_spinlock_lock_noirq(&obj->lock);

View File

@ -7,6 +7,7 @@ enum pager_op {
PAGER_REQ_CREATE = 0x0001,
PAGER_REQ_RELEASE = 0x0002,
PAGER_REQ_READ = 0x0003,
PAGER_REQ_WRITE = 0x0004,
};
/*

View File

@ -324,15 +324,15 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
memobj_lock(range->memobj);
}
error = ihk_mc_pt_free_range(vm->page_table,
(void *)start, (void *)end);
(void *)start, (void *)end, range->memobj);
if (range->memobj) {
memobj_unlock(range->memobj);
}
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error && (error != -ENOENT)) {
ekprintf("free_process_memory_range(%p,%lx-%lx):"
"ihk_mc_pt_free_range(%lx-%lx) failed. %d\n",
vm, start0, end0, start, end, error);
"ihk_mc_pt_free_range(%lx-%lx,%p) failed. %d\n",
vm, start0, end0, start, end, range->memobj, error);
/* through */
}
}

View File

@ -464,14 +464,6 @@ SYSCALL_DECLARE(mmap)
goto out2;
}
if ((flags & MAP_SHARED) && !(flags & MAP_ANONYMOUS)) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):NYI:shared mapped file%lx\n",
addr0, len0, prot, flags, fd, off,
(flags & ~(supported_flags | ignored_flags)));
error = -EINVAL;
goto out2;
}
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
if (flags & MAP_FIXED) {

View File

@ -4,6 +4,8 @@
#include <ihk/types.h>
#include <memory.h>
struct memobj;
enum ihk_mc_gma_type {
IHK_MC_GMA_MAP_START,
IHK_MC_GMA_MAP_END,
@ -96,7 +98,7 @@ int ihk_mc_pt_change_page(page_table_t pt, void *virt,
int ihk_mc_pt_clear_page(page_table_t pt, void *virt);
int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt);
int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end);
int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end);
int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end, struct memobj *memobj);
int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end,
enum ihk_mc_pt_attribute clrattr,
enum ihk_mc_pt_attribute setattr);