From 5fc0d90b7d4573b279b264d1d9b1f512daa7bcd4 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 11 Jun 2013 16:34:51 +0900 Subject: [PATCH] add ihk_mc_pt_clear_range() --- arch/x86/kernel/memory.c | 234 +++++++++++++++++++++++++++++++++++++++ kernel/process.c | 10 +- lib/include/ihk/mm.h | 1 + 3 files changed, 239 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 580aa18d..db904c92 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -213,6 +213,12 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr) l2i = ((virt) >> PTL2_SHIFT) & (PT_ENTRIES - 1); \ l1i = ((virt) >> PTL1_SHIFT) & (PT_ENTRIES - 1) +#define GET_INDICES_VIRT(l4i, l3i, l2i, l1i) \ + ( ((uint64_t)(l4i) << PTL4_SHIFT) \ + | ((uint64_t)(l3i) << PTL3_SHIFT) \ + | ((uint64_t)(l2i) << PTL2_SHIFT) \ + | ((uint64_t)(l1i) << PTL1_SHIFT) \ + ) void set_pte(pte_t *ppte, unsigned long phys, int attr) { @@ -626,6 +632,234 @@ int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt) return __clear_pt_page(pt, virt, 1); } +static int clear_range_l1(struct page_table *pt, uint64_t base, uint64_t start, uint64_t end) +{ + int six; + int eix; + int ret; + int i; + + six = (start <= base)? 0: (start - base) >> PTL1_SHIFT; + eix = ((base + PTL2_SIZE) <= end)? PT_ENTRIES + : ((end - base) + (PTL1_SIZE - 1)) >> PTL1_SHIFT; + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + if (!(pt->entry[i] & PFL1_PRESENT)) { + continue; + } + + pt->entry[i] = 0; + ret = 0; + } + + return ret; +} + +static int clear_range_l2(struct page_table *pt, uint64_t base, uint64_t start, uint64_t end) +{ + int six; + int eix; + int ret; + int i; + uint64_t off; + struct page_table *q; + int error; + + six = (start <= base)? 0: (start - base) >> PTL2_SHIFT; + eix = ((base + PTL3_SIZE) <= end)? PT_ENTRIES + : ((end - base) + (PTL2_SIZE - 1)) >> PTL2_SHIFT; + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + if (!(pt->entry[i] & PFL2_PRESENT)) { + continue; + } + + off = i * PTL2_SIZE; + + if (pt->entry[i] & PFL2_SIZE) { + if (((base + off) < start) || (end < (base + off + PTL2_SIZE))) { + kprintf("clear_range_l2(%p,%lx,%lx,%lx):" + "not a 2MiB page boundary\n", + pt, base, start, end); + ret = -ERANGE; + break; + } + + pt->entry[i] = 0; + ret = 0; + continue; + } + + q = phys_to_virt(pt->entry[i] & PT_PHYSMASK); + + if ((start <= (base + off)) && ((base + off + PTL2_SIZE) <= end)) { + pt->entry[i] = 0; + ret = 0; + arch_free_page(q); + } + else { + error = clear_range_l1(q, base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + } + + return ret; +} + +static int clear_range_l3(struct page_table *pt, uint64_t base, uint64_t start, uint64_t end) +{ + int six; + int eix; + int ret; + int i; + int error; + struct page_table *q; + + six = (start <= base)? 0: (start - base) >> PTL3_SHIFT; + eix = ((base + PTL4_SIZE) <= end)? PT_ENTRIES + : ((end - base) + (PTL3_SIZE - 1)) >> PTL3_SHIFT; + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + if (!(pt->entry[i] & PFL3_PRESENT)) { + continue; + } + + q = phys_to_virt(pt->entry[i] & PT_PHYSMASK); + error = clear_range_l2(q, base+(i*PTL3_SIZE), start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int clear_range_l4(struct page_table *pt, uint64_t base, uint64_t start, uint64_t end) +{ + int six; + int eix; + int ret; + int i; + int error; + struct page_table *q; + + six = (start <= base)? 0: (start - base) >> PTL4_SHIFT; + eix = ((end - base) + (PTL4_SIZE - 1)) >> PTL4_SHIFT; + if ((eix <= 0) || (PT_ENTRIES < eix)) { + eix = PT_ENTRIES; + } + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + if (!(pt->entry[i] & PFL4_PRESENT)) { + continue; + } + + q = phys_to_virt(pt->entry[i] & PT_PHYSMASK); + error = clear_range_l3(q, base+(i*PTL4_SIZE), start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int lookup_pte(struct page_table *pt, void *virt, pte_t **ptep, void **pgbasep, uint64_t *pgsizep) +{ + int l4idx, l3idx, l2idx, l1idx; + + GET_VIRT_INDICES((uint64_t)virt, l4idx, l3idx, l2idx, l1idx); + + if (!(pt->entry[l4idx] & PFL4_PRESENT)) { + return -ENOENT; + } + + pt = phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK); + if (!(pt->entry[l3idx] & PFL3_PRESENT)) { + return -ENOENT; + } + + pt = phys_to_virt(pt->entry[l3idx] & PT_PHYSMASK); + if (!(pt->entry[l2idx] & PFL2_PRESENT) || (pt->entry[l2idx] & PFL2_SIZE)) { + *ptep = &pt->entry[l2idx]; + *pgbasep = (void *)GET_INDICES_VIRT(l4idx, l3idx, l2idx, 0); + *pgsizep = PTL2_SIZE; + return 0; + } + + pt = phys_to_virt(pt->entry[l2idx] & PT_PHYSMASK); + *ptep = &pt->entry[l1idx]; + *pgbasep = (void *)GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx); + *pgsizep = PTL1_SIZE; + + return 0; +} + +static int is_middle_of_the_page(struct page_table *pt, void *virt) +{ + int error; + pte_t *pte; + void *pgbase; + uint64_t pgsize; + + error = lookup_pte(pt, virt, &pte, &pgbase, &pgsize); + if (error) { + return 0; + } + + if (!(*pte & PF_PRESENT)) { + return 0; + } + + return pgbase != virt; +} + +int ihk_mc_pt_clear_range(page_table_t pt, void *start0, void *end0) +{ + const uint64_t start = (uint64_t)start0; + const uint64_t end = (uint64_t)end0; + int error; + + if ((USER_END <= start) || (USER_END < end) || (end <= start)) { + kprintf("ihk_mc_pt_clear_range(%p,%p,%p):invalid start and/or end.\n", + pt, start0, end0); + return -EINVAL; + } + + if (((start % LARGE_PAGE_SIZE) != 0) && is_middle_of_the_page(pt, start0)) { + kprintf("ihk_mc_pt_clear_range(%p,%p,%p):start0 is not a page boundary\n", + pt, start0, end0); + return -EINVAL; + } + + if (((end % LARGE_PAGE_SIZE) != 0) && is_middle_of_the_page(pt, end0)) { + kprintf("ihk_mc_pt_clear_range(%p,%p,%p):end0 is not a page boundary\n", + pt, start0, end0); + return -EINVAL; + } + + error = clear_range_l4(pt, 0, start, end); + return error; +} + void load_page_table(struct page_table *pt) { unsigned long pt_addr; diff --git a/kernel/process.c b/kernel/process.c index 55297e32..88db018a 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -475,13 +475,11 @@ int remove_process_region(struct process *proc, return -EINVAL; } - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); /* We defer freeing to the time of exit */ - while (start < end) { - ihk_mc_pt_clear_page(proc->vm->page_table, (void *)start); - start += PAGE_SIZE; - } - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + // XXX: check error + ihk_mc_pt_clear_range(proc->vm->page_table, (void *)start, (void *)end); + ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); return 0; } diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index b619181c..76de6e68 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -92,6 +92,7 @@ int ihk_mc_pt_change_page(page_table_t pt, void *virt, enum ihk_mc_pt_attribute); int ihk_mc_pt_clear_page(page_table_t pt, void *virt); int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt); +int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end); int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size, enum ihk_mc_pt_prepare_flag);