shmobj: Support large page
Mixing page sizes is allowed by shmobj. Change-Id: Ic48b71da2db6ce3f68fa3dbc8ad5ae96347d6018 Refs: #1381 Refs: #1458
This commit is contained in:
committed by
Masamichi Takagi
parent
4b66373813
commit
9a60997ea0
@ -17,6 +17,7 @@
|
||||
#include <ihk/types.h>
|
||||
#include <ihk/atomic.h>
|
||||
#include <ihk/lock.h>
|
||||
#include <ihk/mm.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
#include <pager.h>
|
||||
@ -61,6 +62,8 @@ typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys,
|
||||
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
|
||||
typedef int memobj_update_page_func_t(struct memobj *obj, page_table_t pt,
|
||||
struct page *orig_page, void *vaddr);
|
||||
|
||||
struct memobj_ops {
|
||||
memobj_free_func_t *free;
|
||||
@ -69,6 +72,7 @@ struct memobj_ops {
|
||||
memobj_flush_page_func_t *flush_page;
|
||||
memobj_invalidate_page_func_t *invalidate_page;
|
||||
memobj_lookup_page_func_t *lookup_page;
|
||||
memobj_update_page_func_t *update_page;
|
||||
};
|
||||
|
||||
static inline int memobj_ref(struct memobj *obj)
|
||||
@ -131,6 +135,15 @@ static inline int memobj_lookup_page(struct memobj *obj, off_t off,
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static inline int memobj_update_page(struct memobj *obj, page_table_t pt,
|
||||
struct page *orig_page, void *vaddr)
|
||||
{
|
||||
if (obj->ops->update_page) {
|
||||
return (*obj->ops->update_page)(obj, pt, orig_page, vaddr);
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static inline int memobj_has_pager(struct memobj *obj)
|
||||
{
|
||||
return !!(obj->flags & MF_HAS_PAGER);
|
||||
|
||||
@ -23,6 +23,7 @@ struct page {
|
||||
ihk_atomic_t count;
|
||||
ihk_atomic64_t mapped;
|
||||
off_t offset;
|
||||
int pgshift; /* Using by shmobj */
|
||||
};
|
||||
|
||||
/* mode */
|
||||
|
||||
@ -107,5 +107,6 @@ void shmobj_list_unlock(void);
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
|
||||
void shmlock_user_free(struct shmlock_user *user);
|
||||
int shmlock_user_get(uid_t ruid, struct shmlock_user **userp);
|
||||
struct shmobj *to_shmobj(struct memobj *memobj);
|
||||
|
||||
#endif /* HEADER_SHM_H */
|
||||
|
||||
16
kernel/mem.c
16
kernel/mem.c
@ -45,6 +45,7 @@
|
||||
#include <sysfs.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <bootparam.h>
|
||||
#include <memobj.h>
|
||||
|
||||
//#define DEBUG_PRINT_MEM
|
||||
|
||||
@ -2712,3 +2713,18 @@ int ihk_mc_get_mem_user_page(void *arg0, page_table_t pt, pte_t *ptep, void *pga
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int is_splitable(struct page *page, uint32_t memobj_flags)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
if (page && (page_is_in_memobj(page)
|
||||
|| page_is_multi_mapped(page))) {
|
||||
if (memobj_flags & MF_SHM) {
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
#include <timer.h>
|
||||
#include <mman.h>
|
||||
#include <xpmem.h>
|
||||
#include <shm.h>
|
||||
#include <rusage_private.h>
|
||||
#include <ihk/monitor.h>
|
||||
#include <ihk/debug.h>
|
||||
@ -914,20 +915,19 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
{
|
||||
int error;
|
||||
struct vm_range *newrange = NULL;
|
||||
unsigned long page_mask;
|
||||
|
||||
dkprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p)\n",
|
||||
vm, range->start, range->end, addr, splitp);
|
||||
|
||||
if (range->pgshift != 0) {
|
||||
page_mask = (1 << range->pgshift) - 1;
|
||||
if (addr & page_mask) {
|
||||
if (addr & ((1UL << range->pgshift) - 1)) {
|
||||
/* split addr is not aligned */
|
||||
range->pgshift = 0;
|
||||
}
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_split(vm->address_space->page_table, vm, (void *)addr);
|
||||
error = ihk_mc_pt_split(vm->address_space->page_table, vm,
|
||||
range, (void *)addr);
|
||||
if (error) {
|
||||
ekprintf("split_process_memory_range:"
|
||||
"ihk_mc_pt_split failed. %d\n", error);
|
||||
@ -935,6 +935,37 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
}
|
||||
// memory_stat_rss_add() is called in child-node, i.e. ihk_mc_pt_split() to deal with L3->L2 case
|
||||
|
||||
if (range->memobj && range->memobj->flags & MF_SHM) {
|
||||
/* Target range is shared memory */
|
||||
uintptr_t _phys = 0;
|
||||
struct page *page = NULL;
|
||||
unsigned long page_mask;
|
||||
|
||||
/* Lookup the page split target */
|
||||
error = memobj_lookup_page(range->memobj,
|
||||
range->objoff + addr - range->start,
|
||||
0, &_phys, NULL);
|
||||
if (error && error != -ENOENT) {
|
||||
ekprintf("%s: memobj_lookup_page failed. %d\n",
|
||||
__func__, error);
|
||||
goto out;
|
||||
}
|
||||
page = phys_to_page(_phys);
|
||||
|
||||
if (page) {
|
||||
page_mask = ~((1UL << page->pgshift) - 1);
|
||||
/* Update existing page */
|
||||
error = memobj_update_page(range->memobj,
|
||||
vm->address_space->page_table, page,
|
||||
(void *)(addr & page_mask));
|
||||
if (error) {
|
||||
ekprintf("%s: memobj_update_page failed. %d\n",
|
||||
__func__, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
|
||||
if (!newrange) {
|
||||
ekprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p):"
|
||||
@ -1865,7 +1896,9 @@ int invalidate_process_memory_range(struct process_vm *vm,
|
||||
if (ptep && pte_is_contiguous(ptep)) {
|
||||
if (!page_is_contiguous_head(ptep, pgsize)) {
|
||||
// start pte is not contiguous head
|
||||
error = split_contiguous_pages(ptep, pgsize);
|
||||
error = split_contiguous_pages(ptep, pgsize,
|
||||
range->memobj ?
|
||||
range->memobj->flags : 0);
|
||||
if (error) {
|
||||
ihk_spinlock_t *page_table_lock;
|
||||
|
||||
@ -1883,7 +1916,9 @@ int invalidate_process_memory_range(struct process_vm *vm,
|
||||
if (ptep && pte_is_contiguous(ptep)) {
|
||||
if (!page_is_contiguous_tail(ptep, pgsize)) {
|
||||
// end pte is not contiguous tail
|
||||
error = split_contiguous_pages(ptep, pgsize);
|
||||
error = split_contiguous_pages(ptep, pgsize,
|
||||
range->memobj ?
|
||||
range->memobj->flags : 0);
|
||||
if (error) {
|
||||
ihk_spinlock_t *page_table_lock;
|
||||
|
||||
|
||||
105
kernel/shmobj.c
105
kernel/shmobj.c
@ -32,14 +32,16 @@ static memobj_free_func_t shmobj_free;
|
||||
static memobj_get_page_func_t shmobj_get_page;
|
||||
static memobj_invalidate_page_func_t shmobj_invalidate_page;
|
||||
static memobj_lookup_page_func_t shmobj_lookup_page;
|
||||
static memobj_update_page_func_t shmobj_update_page;
|
||||
|
||||
static struct memobj_ops shmobj_ops = {
|
||||
.free = &shmobj_free,
|
||||
.get_page = &shmobj_get_page,
|
||||
.lookup_page = &shmobj_lookup_page,
|
||||
.update_page = &shmobj_update_page,
|
||||
};
|
||||
|
||||
static struct shmobj *to_shmobj(struct memobj *memobj)
|
||||
struct shmobj *to_shmobj(struct memobj *memobj)
|
||||
{
|
||||
return (struct shmobj *)memobj;
|
||||
}
|
||||
@ -86,7 +88,8 @@ static struct page *page_list_lookup(struct shmobj *obj, off_t off)
|
||||
struct page *page;
|
||||
|
||||
list_for_each_entry(page, &obj->page_list, list) {
|
||||
if (page->offset == off) {
|
||||
if (page->offset <= off &&
|
||||
off < page->offset + (1UL << page->pgshift)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -245,7 +248,6 @@ static void shmobj_destroy(struct shmobj *obj)
|
||||
}
|
||||
|
||||
/* zap page_list */
|
||||
npages = (size_t)1 << (obj->pgshift - PAGE_SHIFT);
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
void *page_va;
|
||||
@ -259,6 +261,7 @@ static void shmobj_destroy(struct shmobj *obj)
|
||||
page_list_remove(obj, page);
|
||||
phys = page_to_phys(page);
|
||||
page_va = phys_to_virt(phys);
|
||||
npages = (size_t)1 << (page->pgshift - PAGE_SHIFT);
|
||||
|
||||
if (ihk_atomic_read(&page->count) != 1) {
|
||||
kprintf("%s: WARNING: page count for phys 0x%lx is invalid\n",
|
||||
@ -270,8 +273,8 @@ static void shmobj_destroy(struct shmobj *obj)
|
||||
* (3) terminate() --> ... --> free_process_memory_range()
|
||||
*/
|
||||
|
||||
size_t free_pgsize = 1UL << obj->pgshift;
|
||||
size_t free_size = 1UL << obj->pgshift;
|
||||
size_t free_pgsize = 1UL << page->pgshift;
|
||||
size_t free_size = 1UL << page->pgshift;
|
||||
|
||||
ihk_mc_free_pages_user(page_va, npages);
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n",
|
||||
@ -369,12 +372,6 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (p2align != (obj->pgshift - PAGE_SHIFT)) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):pgsize mismatch. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (obj->real_segsz <= off) {
|
||||
error = -ERANGE;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
|
||||
@ -416,6 +413,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
memset(virt, 0, npages*PAGE_SIZE);
|
||||
page->mode = PM_MAPPED;
|
||||
page->offset = off;
|
||||
page->pgshift = p2align + PAGE_SHIFT;
|
||||
|
||||
/* Page contents should survive over unmap */
|
||||
ihk_atomic_set(&page->count, 1);
|
||||
@ -460,24 +458,12 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align,
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (p2align != (obj->pgshift - PAGE_SHIFT)) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):pgsize mismatch. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (obj->real_segsz <= off) {
|
||||
error = -ERANGE;
|
||||
ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):beyond the end. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if ((obj->real_segsz - off) < (PAGE_SIZE << p2align)) {
|
||||
error = -ENOSPC;
|
||||
ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):too large. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
page_list_lock(obj);
|
||||
page = page_list_lookup(obj, off);
|
||||
@ -501,3 +487,76 @@ out:
|
||||
memobj, off, p2align, physp, error, phys);
|
||||
return error;
|
||||
} /* shmobj_lookup_page() */
|
||||
|
||||
static int shmobj_update_page(struct memobj *memobj, page_table_t pt,
|
||||
struct page *orig_page, void *vaddr)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
int error;
|
||||
pte_t *pte;
|
||||
size_t pte_size, orig_pgsize, page_off;
|
||||
struct page *page;
|
||||
int p2align;
|
||||
uintptr_t base_phys, phys;
|
||||
|
||||
dkprintf("%s(%p,%p,%p,%p)\n",
|
||||
memobj, pt, orig_page, vaddr);
|
||||
memobj_ref(&obj->memobj);
|
||||
|
||||
if (!pt || !orig_page || !vaddr) {
|
||||
error = -ENOENT;
|
||||
dkprintf("%s(%p,%p,%p,%p): invalid argument. %d\n", __func__,
|
||||
memobj, pt, orig_page, vaddr);
|
||||
goto out;
|
||||
}
|
||||
base_phys = page_to_phys(orig_page);
|
||||
pte = ihk_mc_pt_lookup_pte(pt, vaddr, 0, NULL, &pte_size, &p2align);
|
||||
if (!pte) {
|
||||
error = -ENOENT;
|
||||
dkprintf("%s(%p,%p,%p,%p): pte not found. %d\n",
|
||||
__func__, memobj, pt, orig_page, vaddr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
orig_pgsize = (1UL << orig_page->pgshift);
|
||||
|
||||
/* Update original page */
|
||||
orig_page->pgshift = p2align + PAGE_SHIFT;
|
||||
|
||||
/* Fit pages to pte */
|
||||
page_off = pte_size;
|
||||
while (page_off < orig_pgsize) {
|
||||
pte = ihk_mc_pt_lookup_pte(pt, vaddr + page_off, 0, NULL,
|
||||
&pte_size, &p2align);
|
||||
if (!pte) {
|
||||
error = -ENOENT;
|
||||
dkprintf("%s(%p,%p,%p,%p): pte not found. %d\n",
|
||||
__func__, memobj, pt, orig_page, vaddr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
phys = base_phys + page_off;
|
||||
page = phys_to_page_insert_hash(phys);
|
||||
|
||||
page->mode = orig_page->mode;
|
||||
page->offset = orig_page->offset + page_off;
|
||||
page->pgshift = p2align + PAGE_SHIFT;
|
||||
|
||||
ihk_atomic_set(&page->count,
|
||||
ihk_atomic_read(&orig_page->count));
|
||||
|
||||
ihk_atomic64_set(&page->mapped,
|
||||
ihk_atomic64_read(&orig_page->mapped));
|
||||
page_list_insert(obj, page);
|
||||
|
||||
page_off += pte_size;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
|
||||
out:
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("%s(%p,%p,%p,%p):%d\n", __func__,
|
||||
memobj, pt, orig_page, vaddr);
|
||||
return error;
|
||||
} /* shmobj_update_page() */
|
||||
|
||||
@ -1714,7 +1714,8 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
|
||||
}
|
||||
p2align = pgshift - PAGE_SHIFT;
|
||||
}
|
||||
else if ((flags & MAP_PRIVATE) && (flags & MAP_ANONYMOUS)
|
||||
else if (((flags & (MAP_PRIVATE | MAP_SHARED))
|
||||
&& (flags & MAP_ANONYMOUS))
|
||||
&& !proc->thp_disable) {
|
||||
pgshift = 0; /* transparent huge page */
|
||||
p2align = PAGE_P2ALIGN;
|
||||
@ -1950,7 +1951,7 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
|
||||
memset(&ads, 0, sizeof(ads));
|
||||
ads.shm_segsz = len;
|
||||
ads.shm_perm.mode = SHM_DEST;
|
||||
ads.init_pgshift = PAGE_SHIFT;
|
||||
ads.init_pgshift = PAGE_SHIFT + p2align;
|
||||
error = shmobj_create(&ads, &memobj);
|
||||
if (error) {
|
||||
ekprintf("do_mmap:shmobj_create failed. %d\n", error);
|
||||
|
||||
Reference in New Issue
Block a user