From 8eb3bf3559f948c127b84e7fe60d8d7008dad2b0 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 11 Sep 2016 08:25:38 -0400 Subject: [PATCH] physical page management: eliminate static page frame array and maintain page structures dynamically covering only file mappings. use hash table for address <-> page structure conversion. --- kernel/fileobj.c | 15 +++-- kernel/include/page.h | 4 +- kernel/mem.c | 150 +++++++++++++++++++++++++++--------------- kernel/process.c | 4 +- kernel/shmobj.c | 2 +- kernel/zeroobj.c | 2 +- 6 files changed, 115 insertions(+), 62 deletions(-) diff --git a/kernel/fileobj.c b/kernel/fileobj.c index c976673f..2480491f 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -430,7 +430,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintp goto out; } phys = virt_to_phys(virt); - page = phys_to_page(phys); + page = phys_to_page_insert_hash(phys); if (page->mode != PM_NONE) { panic("fileobj_get_page:invalid new page"); } @@ -502,10 +502,10 @@ static uintptr_t fileobj_copy_page( memobj_lock(memobj); for (;;) { - if (orgpage->mode != PM_MAPPED) { + if (!orgpage || orgpage->mode != PM_MAPPED) { kprintf("fileobj_copy_page(%p,%lx,%d):" "invalid cow page. %x\n", - memobj, orgpa, p2align, orgpage->mode); + memobj, orgpa, p2align, orgpage ? orgpage->mode : 0); panic("fileobj_copy_page:invalid cow page"); } count = ihk_atomic_read(&orgpage->count); @@ -527,7 +527,9 @@ static uintptr_t fileobj_copy_page( memcpy(newkva, orgkva, pgsize); ihk_atomic_dec(&orgpage->count); newpa = virt_to_phys(newkva); - page_map(phys_to_page(newpa)); + if (phys_to_page(newpa)) { + page_map(phys_to_page(newpa)); + } newkva = NULL; /* avoid ihk_mc_free_pages() */ break; } @@ -563,6 +565,11 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, ssize_t ss; page = phys_to_page(phys); + if (!page) { + kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n", + __FUNCTION__, phys); + return 0; + } memobj_unlock(&obj->memobj); ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE; diff --git a/kernel/include/page.h b/kernel/include/page.h index b9142e29..3f5835d8 100644 --- a/kernel/include/page.h +++ b/kernel/include/page.h @@ -17,8 +17,9 @@ struct page { struct list_head list; + struct list_head hash; uint8_t mode; - uint8_t padding[3]; + uint64_t phys; ihk_atomic_t count; off_t offset; }; @@ -38,6 +39,7 @@ enum page_mode { struct page *phys_to_page(uintptr_t phys); uintptr_t page_to_phys(struct page *page); int page_unmap(struct page *page); +struct page *phys_to_page_insert_hash(uint64_t phys); void begin_free_pages_pending(void); void finish_free_pages_pending(void); diff --git a/kernel/mem.c b/kernel/mem.c index ebcd0f99..5f4ef85c 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -50,7 +50,6 @@ static struct ihk_page_allocator_desc *pa_allocator; static unsigned long pa_start, pa_end; -static struct page *pa_pages; extern void unhandled_page_fault(struct thread *, void *, void *); extern int interrupt_from_user(void *); @@ -99,17 +98,16 @@ static void free_pages(void *va, int npages) struct page *page; page = phys_to_page(virt_to_phys(va)); - if (!page) { - panic("free_pages:struct page not found"); - } - if (page->mode != PM_NONE) { - panic("free_pages:not PM_NONE"); - } - if (pendings->next != NULL) { - page->mode = PM_PENDING_FREE; - page->offset = npages; - list_add_tail(&page->list, pendings); - return; + if (page) { + if (page->mode != PM_NONE) { + panic("free_pages:not PM_NONE"); + } + if (pendings->next != NULL) { + page->mode = PM_PENDING_FREE; + page->offset = npages; + list_add_tail(&page->list, pendings); + return; + } } ihk_pagealloc_free(pa_allocator, virt_to_phys(va), npages); @@ -392,16 +390,11 @@ out: return; } -static void page_allocator_init(void) +static void page_allocator_init(uint64_t start, uint64_t end, int initial) { unsigned long page_map_pa, pages; void *page_map; unsigned int i; - uint64_t start; - uint64_t end; - - start = ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_START, 0); - end = ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_END, 0); start &= PAGE_MASK; pa_start = start & LARGE_PAGE_MASK; @@ -475,32 +468,94 @@ static void numa_init(void) } } +#define PHYS_PAGE_HASH_SHIFT (10) +#define PHYS_PAGE_HASH_SIZE (1 << PHYS_PAGE_HASH_SHIFT) +#define PHYS_PAGE_HASH_MASK (PHYS_PAGE_HASH_SIZE - 1) + +/* + * Page hash only tracks pages that are mapped in non-anymous mappings + * and thus it is initially empty. + */ +struct list_head page_hash[PHYS_PAGE_HASH_SIZE]; +ihk_spinlock_t page_hash_locks[PHYS_PAGE_HASH_SIZE]; + +static void page_init(void) +{ + int i; + + for (i = 0; i < PHYS_PAGE_HASH_SIZE; ++i) { + ihk_mc_spinlock_init(&page_hash_locks[i]); + INIT_LIST_HEAD(&page_hash[i]); + } + + return; +} + +/* XXX: page_hash_lock must be held */ +static struct page *__phys_to_page(uintptr_t phys) +{ + int hash = (phys >> PAGE_SHIFT) & PHYS_PAGE_HASH_MASK; + struct page *page_iter, *page = NULL; + + list_for_each_entry(page_iter, &page_hash[hash], hash) { + if (page_iter->phys == phys) { + page = page_iter; + break; + } + } + + return page; +} struct page *phys_to_page(uintptr_t phys) { - int64_t ix; + int hash = (phys >> PAGE_SHIFT) & PHYS_PAGE_HASH_MASK; + struct page *page = NULL; + unsigned long irqflags; - if ((phys < pa_start) || (pa_end <= phys)) { - return NULL; - } + irqflags = ihk_mc_spinlock_lock(&page_hash_locks[hash]); + page = __phys_to_page(phys); + ihk_mc_spinlock_unlock(&page_hash_locks[hash], irqflags); - ix = (phys - pa_start) >> PAGE_SHIFT; - return &pa_pages[ix]; + return page; } uintptr_t page_to_phys(struct page *page) { - int64_t ix; - uintptr_t phys; + return page ? page->phys : 0; +} - ix = page - pa_pages; - phys = pa_start + (ix << PAGE_SHIFT); - if ((phys < pa_start) || (pa_end <= phys)) { - ekprintf("page_to_phys(%p):not a pa_pages[]:%p %lx-%lx\n", - page, pa_pages, pa_start, pa_end); - panic("page_to_phys"); +/* + * Allocate page and add to hash if it doesn't exist yet. + * NOTE: page->count is zero for new pages and the caller + * is responsible to increase it. + */ +struct page *phys_to_page_insert_hash(uint64_t phys) +{ + int hash = (phys >> PAGE_SHIFT) & PHYS_PAGE_HASH_MASK; + struct page *page = NULL; + unsigned long irqflags; + + irqflags = ihk_mc_spinlock_lock(&page_hash_locks[hash]); + page = __phys_to_page(phys); + if (!page) { + int hash = (phys >> PAGE_SHIFT) & PHYS_PAGE_HASH_MASK; + page = kmalloc(sizeof(*page), IHK_MC_AP_CRITICAL); + if (!page) { + kprintf("%s: error allocating page\n", __FUNCTION__); + goto out; + } + + list_add(&page->hash, &page_hash[hash]); + page->phys = phys; + page->mode = PM_NONE; + INIT_LIST_HEAD(&page->list); + ihk_atomic_set(&page->count, 0); } - return phys; +out: + ihk_mc_spinlock_unlock(&page_hash_locks[hash], irqflags); + + return page; } int page_unmap(struct page *page) @@ -513,35 +568,19 @@ int page_unmap(struct page *page) return 0; } - /* no mapping exist */ + /* no mapping exist TODO: why is this check?? if (page->mode != PM_MAPPED) { return 1; } + */ - list_del(&page->list); + list_del(&page->hash); page->mode = PM_NONE; + kfree(page); dkprintf("page_unmap(%p %x %d): 1\n", page, page->mode, page->count); return 1; } -static void page_init(void) -{ - size_t npages; - size_t allocsize; - size_t allocpages; - - if (sizeof(ihk_atomic_t) != sizeof(uint32_t)) { - panic("sizeof(ihk_atomic_t) is not 32 bit"); - } - npages = (pa_end - pa_start) >> PAGE_SHIFT; - allocsize = sizeof(struct page) * npages; - allocpages = (allocsize + PAGE_SIZE - 1) >> PAGE_SHIFT; - - pa_pages = ihk_mc_alloc_pages(allocpages, IHK_MC_AP_CRITICAL); - memset(pa_pages, 0, allocsize); - return; -} - static char *memdebug = NULL; static void *___kmalloc(int size, enum ihk_mc_ap_flag flag); @@ -667,7 +706,10 @@ void ihk_mc_clean_micpa(void){ void mem_init(void) { numa_init(); - page_allocator_init(); + page_allocator_init( + ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_START, 0), + ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_END, 0), 1); + page_init(); /* Prepare the kernel virtual map space */ diff --git a/kernel/process.c b/kernel/process.c index f5b755f4..ed9904c6 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1542,7 +1542,9 @@ retry: __FUNCTION__, pgaddr, pgsize); memset(virt, 0, pgsize); phys = virt_to_phys(virt); - page_map(phys_to_page(phys)); + if (phys_to_page(phys)) { + page_map(phys_to_page(phys)); + } } } else { diff --git a/kernel/shmobj.c b/kernel/shmobj.c index bd1bb7fd..f87b9b7d 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -404,7 +404,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, goto out; } phys = virt_to_phys(virt); - page = phys_to_page(phys); + page = phys_to_page_insert_hash(phys); if (page->mode != PM_NONE) { fkprintf("shmobj_get_page(%p,%#lx,%d,%p):" "page %p %#lx %d %d %#lx\n", diff --git a/kernel/zeroobj.c b/kernel/zeroobj.c index a70a89f2..04f1b8e6 100644 --- a/kernel/zeroobj.c +++ b/kernel/zeroobj.c @@ -112,7 +112,7 @@ static int alloc_zeroobj(void) goto out; } phys = virt_to_phys(virt); - page = phys_to_page(phys); + page = phys_to_page_insert_hash(phys); if (page->mode != PM_NONE) { fkprintf("alloc_zeroobj():"