diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index 65ca4710..52352f1d 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -47,38 +47,38 @@ /* mask of the physical address of the entry to the page table */ #define PT_PHYSMASK (((1UL << 52) - 1) & PAGE_MASK) -#define PF_PRESENT 0x01 /* entry is valid */ -#define PF_SIZE 0x80 /* entry points large page */ +#define PF_PRESENT ((pte_t)0x01) /* entry is valid */ +#define PF_SIZE ((pte_t)0x80) /* entry points large page */ -#define PFL4_PRESENT 0x01 -#define PFL4_WRITABLE 0x02 -#define PFL4_USER 0x04 +#define PFL4_PRESENT ((pte_t)0x01) +#define PFL4_WRITABLE ((pte_t)0x02) +#define PFL4_USER ((pte_t)0x04) -#define PFL3_PRESENT 0x01 -#define PFL3_WRITABLE 0x02 -#define PFL3_USER 0x04 -#define PFL3_ACCESSED 0x20 -#define PFL3_DIRTY 0x40 -#define PFL3_SIZE 0x80 /* Used in 1G page */ -#define PFL3_GLOBAL 0x100 +#define PFL3_PRESENT ((pte_t)0x01) +#define PFL3_WRITABLE ((pte_t)0x02) +#define PFL3_USER ((pte_t)0x04) +#define PFL3_ACCESSED ((pte_t)0x20) +#define PFL3_DIRTY ((pte_t)0x40) +#define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */ +#define PFL3_GLOBAL ((pte_t)0x100) -#define PFL2_PRESENT 0x01 -#define PFL2_WRITABLE 0x02 -#define PFL2_USER 0x04 -#define PFL2_ACCESSED 0x20 -#define PFL2_DIRTY 0x40 -#define PFL2_SIZE 0x80 /* Used in 2M page */ -#define PFL2_GLOBAL 0x100 -#define PFL2_PWT 0x08 -#define PFL2_PCD 0x10 +#define PFL2_PRESENT ((pte_t)0x01) +#define PFL2_WRITABLE ((pte_t)0x02) +#define PFL2_USER ((pte_t)0x04) +#define PFL2_PWT ((pte_t)0x08) +#define PFL2_PCD ((pte_t)0x10) +#define PFL2_ACCESSED ((pte_t)0x20) +#define PFL2_DIRTY ((pte_t)0x40) +#define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */ +#define PFL2_GLOBAL ((pte_t)0x100) -#define PFL1_PRESENT 0x01 -#define PFL1_WRITABLE 0x02 -#define PFL1_USER 0x04 -#define PFL1_ACCESSED 0x20 -#define PFL1_DIRTY 0x40 -#define PFL1_PWT 0x08 -#define PFL1_PCD 0x10 +#define PFL1_PRESENT ((pte_t)0x01) +#define PFL1_WRITABLE ((pte_t)0x02) +#define PFL1_USER ((pte_t)0x04) +#define PFL1_PWT ((pte_t)0x08) +#define PFL1_PCD ((pte_t)0x10) +#define PFL1_ACCESSED ((pte_t)0x20) +#define PFL1_DIRTY ((pte_t)0x40) /* We allow user programs to access all the memory */ #define PFL4_KERN_ATTR (PFL4_PRESENT | PFL4_WRITABLE) @@ -103,6 +103,8 @@ enum ihk_mc_pt_attribute { typedef unsigned long pte_t; +#define PTE_NULL ((pte_t)0) + struct page_table; void set_pte(pte_t *ppte, unsigned long phys, int attr); pte_t *get_pte(struct page_table *pt, void *virt, int attr); diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 315cf3ba..0c8cfc4e 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -8,6 +8,8 @@ #include #include +#define ekprintf(...) kprintf(__VA_ARGS__) + static char *last_page; extern char _head[], _end[]; @@ -762,6 +764,30 @@ static int walk_pte_l4(struct page_table *pt, uint64_t base, uint64_t start, return ret; } +static int split_large_page(pte_t *ptep) +{ + struct page_table *pt; + uint64_t phys; + pte_t attr; + int i; + + pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); + if (pt == NULL) { + ekprintf("split_large_page:__alloc_new_pt failed\n"); + return -ENOMEM; + } + + phys = *ptep & PT_PHYSMASK; + attr = *ptep & ~PFL2_SIZE; + + for (i = 0; i < PT_ENTRIES; ++i) { + pt->entry[i] = (phys + (i * PTL1_SIZE)) | attr; + } + + *ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR; + return 0; +} + struct clear_range_args { int free_physical; }; @@ -771,7 +797,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t star struct clear_range_args *args = args0; uint64_t phys; - if (!(*ptep & PFL1_PRESENT)) { + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -792,20 +818,27 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t star struct page_table *pt; int error; - if (!(*ptep & PFL2_PRESENT)) { + if (*ptep == PTE_NULL) { return -ENOENT; } - if (*ptep & PFL2_SIZE) { - if ((base < start) || (end < (base + PTL2_SIZE))) { - kprintf("clear_range_l2(%p,%p,%lx,%lx,%lx):" - "not a 2MiB page boundary\n", - args0, ptep, base, start, end); - return -ERANGE; + if ((*ptep & PFL2_SIZE) + && ((base < start) || (end < (base + PTL2_SIZE)))) { + error = split_large_page(ptep); + if (error) { + ekprintf("clear_range_l2(%p,%p,%lx,%lx,%lx):" + "split failed. %d\n", + args0, ptep, base, start, end, error); + return error; } + if (*ptep & PFL2_SIZE) { + panic("clear_range_l2:split"); + } + } + if (*ptep & PFL2_SIZE) { phys = *ptep & PT_PHYSMASK; - *ptep = 0; + *ptep = PTE_NULL; if (args->free_physical) { ihk_mc_free_pages(phys_to_virt(phys), @@ -822,7 +855,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t star } if ((start <= base) && ((base + PTL2_SIZE) <= end)) { - *ptep = 0; + *ptep = PTE_NULL; arch_free_page(pt); } @@ -833,7 +866,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, uint64_t star { struct page_table *pt; - if (!(*ptep & PFL3_PRESENT)) { + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -845,7 +878,7 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base, uint64_t star { struct page_table *pt; - if (!(*ptep & PFL4_PRESENT)) { + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -853,72 +886,6 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base, uint64_t star return walk_pte_l3(pt, base, start, end, &clear_range_l3, args0); } -static int lookup_pte(struct page_table *pt, void *virt, pte_t **ptep, void **pgbasep, uint64_t *pgsizep) -{ - int l4idx, l3idx, l2idx, l1idx; - - GET_VIRT_INDICES((uint64_t)virt, l4idx, l3idx, l2idx, l1idx); - - if (!(pt->entry[l4idx] & PFL4_PRESENT)) { - return -ENOENT; - } - - pt = phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK); - if (!(pt->entry[l3idx] & PFL3_PRESENT)) { - return -ENOENT; - } - - pt = phys_to_virt(pt->entry[l3idx] & PT_PHYSMASK); - if (!(pt->entry[l2idx] & PFL2_PRESENT) || (pt->entry[l2idx] & PFL2_SIZE)) { - *ptep = &pt->entry[l2idx]; - *pgbasep = (void *)GET_INDICES_VIRT(l4idx, l3idx, l2idx, 0); - *pgsizep = PTL2_SIZE; - return 0; - } - - pt = phys_to_virt(pt->entry[l2idx] & PT_PHYSMASK); - *ptep = &pt->entry[l1idx]; - *pgbasep = (void *)GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx); - *pgsizep = PTL1_SIZE; - - return 0; -} - -#ifdef USE_LARGE_PAGES -static int split_large_page(struct page_table *pt, intptr_t virt) -{ - int error; - pte_t *ptep; - void *pgbase; - uint64_t pgsize; - struct page_table *q; - uint64_t phys; - pte_t attr; - int i; - - error = lookup_pte(pt, (void *)virt, &ptep, &pgbase, &pgsize); - if (error || !(*ptep & PF_PRESENT) || (pgsize == PAGE_SIZE)) { - return 0; - } - - q = __alloc_new_pt(IHK_MC_AP_NOWAIT); - if (q == NULL) { - kprintf("split_large_page:__alloc_new_pt failed\n"); - return -ENOMEM; - } - - phys = *ptep & PT_PHYSMASK; - attr = *ptep & (PFL2_PRESENT | PFL2_WRITABLE | PFL2_USER | PFL2_PWT | PFL2_PCD); - - for (i = 0; i < PT_ENTRIES; ++i) { - q->entry[i] = (phys + (i * PTL1_SIZE)) | attr; - } - - *ptep = (virt_to_phys(q) & PT_PHYSMASK) | PFL2_PDIR_ATTR; - return 0; -} -#endif /* USE_LARGE_PAGES */ - static int clear_range(page_table_t pt, void *start0, void *end0, int free_physical) { const uint64_t start = (uint64_t)start0; @@ -927,31 +894,11 @@ static int clear_range(page_table_t pt, void *start0, void *end0, int free_physi struct clear_range_args args; if ((USER_END <= start) || (USER_END < end) || (end <= start)) { - kprintf("clear_range(%p,%p,%p,%x):invalid start and/or end.\n", + ekprintf("clear_range(%p,%p,%p,%x):invalid start and/or end.\n", pt, start0, end0, free_physical); return -EINVAL; } -#ifdef USE_LARGE_PAGES - if (start & (LARGE_PAGE_SIZE - 1)) { - error = split_large_page(pt, start); - if (error) { - kprintf("clear_range(%p,%p,%p,%x):split_large_page(%lx) failed. %d\n", - pt, start0, end0, free_physical, start, error); - return error; - } - } - - if (end & (LARGE_PAGE_SIZE - 1)) { - error = split_large_page(pt, end); - if (error) { - kprintf("clear_range(%p,%p,%p,%x):split_large_page(%lx) failed. %d\n", - pt, start0, end0, free_physical, end, error); - return error; - } - } -#endif /* USE_LARGE_PAGES */ - args.free_physical = free_physical; error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args); return error; @@ -969,6 +916,223 @@ int ihk_mc_pt_free_range(page_table_t pt, void *start0, void *end0) return clear_range(pt, start0, end0, FREE_PHYSICAL); } +struct change_attr_args { + pte_t clrpte; + pte_t setpte; +}; + +static int change_attr_range_l1(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct change_attr_args *args = arg0; + + if (*ptep == PTE_NULL) { + return -ENOENT; + } + + *ptep = (*ptep & ~args->clrpte) | args->setpte; + return 0; +} + +static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct change_attr_args *args = arg0; + int error; + struct page_table *pt; + + if (*ptep == PTE_NULL) { + return -ENOENT; + } + + if ((*ptep & PFL2_SIZE) + && ((base < start) || (end < (base + PTL2_SIZE)))) { + error = split_large_page(ptep); + if (error) { + ekprintf("change_attr_range_l2(%p,%p,%lx,%lx,%lx):" + "split failed. %d\n", + arg0, ptep, base, start, end, error); + return error; + } + if (*ptep & PFL2_SIZE) { + panic("change_attr_range_l2:split"); + } + } + + if (*ptep & PFL2_SIZE) { + *ptep = (*ptep & ~args->clrpte) | args->setpte; + return 0; + } + + pt = phys_to_virt(*ptep & PT_PHYSMASK); + return walk_pte_l1(pt, base, start, end, &change_attr_range_l1, arg0); +} + +static int change_attr_range_l3(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep == PTE_NULL) { + return -ENOENT; + } + + pt = phys_to_virt(*ptep & PT_PHYSMASK); + return walk_pte_l2(pt, base, start, end, &change_attr_range_l2, arg0); +} + +static int change_attr_range_l4(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep == PTE_NULL) { + return -ENOENT; + } + + pt = phys_to_virt(*ptep & PT_PHYSMASK); + return walk_pte_l3(pt, base, start, end, &change_attr_range_l3, arg0); +} + +int ihk_mc_pt_change_attr_range(page_table_t pt, void *start0, void *end0, + enum ihk_mc_pt_attribute clrattr, + enum ihk_mc_pt_attribute setattr) +{ + const intptr_t start = (intptr_t)start0; + const intptr_t end = (intptr_t)end0; + struct change_attr_args args; + + args.clrpte = attr_to_l1attr(clrattr); + args.setpte = attr_to_l1attr(setattr); + return walk_pte_l4(pt, 0, start, end, &change_attr_range_l4, &args); +} + +static int alloc_range_l1(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + enum ihk_mc_pt_attribute *attrp = arg0; + void *vp; + + if (*ptep == PTE_NULL) { + /* not mapped */ + vp = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); + if (vp == NULL) { + return -ENOMEM; + } + memset(vp, 0, PTL1_SIZE); + + *ptep = virt_to_phys(vp) | attr_to_l1attr(*attrp); + } + else if (!(*ptep & PFL1_PRESENT)) { + kprintf("alloc_range_l1(%p,%p,%lx,%lx,%lx):inactive %lx\n", + arg0, ptep, base, start, end, *ptep); + return -EBUSY; + } + + return 0; +} + +static int alloc_range_l2(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct page_table *pt; +#ifdef USE_LARGE_PAGES + enum ihk_mc_pt_attribute *attrp = arg0; + void *vp; +#endif /* USE_LARGE_PAGES */ + + if (*ptep != PTE_NULL) { + if (!(*ptep & PFL2_PRESENT)) { + kprintf("alloc_range_l2(%p,%p,%lx,%lx,%lx):inactive %lx\n", + arg0, ptep, base, start, end, *ptep); + return -EBUSY; + } + else if (*ptep & PFL2_SIZE) { + return 0; + } + + pt = phys_to_virt(*ptep & PT_PHYSMASK); + } + else { +#ifdef USE_LARGE_PAGES + if ((start <= base) && ((base + PTL2_SIZE) <= end)) { + vp = ihk_mc_alloc_aligned_pages(LARGE_PAGE_SIZE/PAGE_SIZE, + LARGE_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT); + if (vp != NULL) { + memset(vp, 0, PTL2_SIZE); + + *ptep = virt_to_phys(vp) + | attr_to_l2attr(*attrp | PTATTR_LARGEPAGE); + return 0; + } + } +#endif /* USE_LARGE_PAGES */ + pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); + if (pt == NULL) { + return -ENOMEM; + } + + *ptep = virt_to_phys(pt) | PFL2_PDIR_ATTR; + } + + return walk_pte_l1(pt, base, start, end, &alloc_range_l1, arg0); +} + +static int alloc_range_l3(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep != PTE_NULL) { + if (!(*ptep & PFL3_PRESENT)) { + kprintf("alloc_range_l3(%p,%p,%lx,%lx,%lx):inactive %lx\n", + arg0, ptep, base, start, end, *ptep); + panic("alloc_range_l3:inactive"); + } + pt = phys_to_virt(*ptep & PT_PHYSMASK); + } + else { + pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); + if (pt == NULL) { + return -ENOMEM; + } + *ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR; + } + + return walk_pte_l2(pt, base, start, end, &alloc_range_l2, arg0); +} + +static int alloc_range_l4(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep != PTE_NULL) { + if (!(*ptep & PFL4_PRESENT)) { + kprintf("alloc_range_l4(%p,%p,%lx,%lx,%lx):inactive %lx\n", + arg0, ptep, base, start, end, *ptep); + panic("alloc_range_l4:inactive"); + } + pt = phys_to_virt(*ptep & PT_PHYSMASK); + } + else { + pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); + if (pt == NULL) { + return -ENOMEM; + } + *ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR; + } + + return walk_pte_l3(pt, base, start, end, &alloc_range_l3, arg0); +} + +int ihk_mc_pt_alloc_range(page_table_t pt, void *start, void *end, + enum ihk_mc_pt_attribute attr) +{ + return walk_pte_l4(pt, 0, (intptr_t)start, (intptr_t)end, + &alloc_range_l4, &attr); +} + void load_page_table(struct page_table *pt) { unsigned long pt_addr; diff --git a/kernel/include/process.h b/kernel/include/process.h index 46d3b036..d4e298c4 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -120,8 +120,17 @@ int add_process_memory_range(struct process *process, unsigned long phys, unsigned long flag); int remove_process_memory_range( struct process *process, unsigned long start, unsigned long end); +int split_process_memory_range(struct process *process, + struct vm_range *range, uintptr_t addr, struct vm_range **splitp); +int join_process_memory_range(struct process *process, struct vm_range *surviving, + struct vm_range *merging); +int change_prot_process_memory_range( + struct process *process, struct vm_range *range, + unsigned long newflag); struct vm_range *lookup_process_memory_range( - struct process *proc, uintptr_t start, uintptr_t end); + struct process_vm *vm, uintptr_t start, uintptr_t end); +struct vm_range *next_process_memory_range( + struct process_vm *vm, struct vm_range *range); int remove_process_region(struct process *proc, unsigned long start, unsigned long end); struct program_load_desc; diff --git a/kernel/process.c b/kernel/process.c index 7509a14c..25318870 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -13,9 +13,11 @@ //#define DEBUG_PRINT_PROCESS #ifdef DEBUG_PRINT_PROCESS -#define dkprintf kprintf +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) #else #define dkprintf(...) +#define ekprintf(...) kprintf(__VA_ARGS__) #endif @@ -114,9 +116,6 @@ struct process *clone_process(struct process *org, unsigned long pc, return proc; } -extern void __host_update_process_range(struct process *process, - struct vm_range *range); - int update_process_page_table(struct process *process, struct vm_range *range, uint64_t phys, enum ihk_mc_pt_attribute flag) @@ -190,6 +189,72 @@ err: return -ENOMEM; } +int split_process_memory_range(struct process *proc, struct vm_range *range, + uintptr_t addr, struct vm_range **splitp) +{ + int error; + struct vm_range *newrange = NULL; + + dkprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p)\n", + proc, range->start, range->end, addr, splitp); + + newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT); + if (!newrange) { + ekprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p):" + "kmalloc failed\n", + proc, range->start, range->end, addr, splitp); + error = -ENOMEM; + goto out; + } + + newrange->start = addr; + newrange->end = range->end; + newrange->flag = range->flag; + + range->end = addr; + + list_add(&newrange->list, &range->list); + + error = 0; + if (splitp != NULL) { + *splitp = newrange; + } + +out: + dkprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p): %d %p %lx-%lx\n", + proc, range->start, range->end, addr, splitp, + error, newrange, + newrange? newrange->start: 0, newrange? newrange->end: 0); + return error; +} + +int join_process_memory_range(struct process *proc, + struct vm_range *surviving, struct vm_range *merging) +{ + int error; + + dkprintf("join_process_memory_range(%p,%lx-%lx,%lx-%lx)\n", + proc, surviving->start, surviving->end, + merging->start, merging->end); + + if ((surviving->end != merging->start) + || (surviving->flag != merging->flag)) { + error = -EINVAL; + goto out; + } + + surviving->end = merging->end; + + list_del(&merging->list); + ihk_mc_free(merging); + + error = 0; +out: + dkprintf("join_process_memory_range(%p,%lx-%lx,%p): %d\n", + proc, surviving->start, surviving->end, merging, error); + return error; +} + int remove_process_memory_range(struct process *process, unsigned long start, unsigned long end) { struct process_vm * const vm = process->vm; @@ -287,12 +352,65 @@ int remove_process_memory_range(struct process *process, unsigned long start, un return 0; } +static void insert_vm_range_list(struct process_vm *vm, struct vm_range *newrange) +{ + struct list_head *next; + struct vm_range *range; + + next = &vm->vm_range_list; + list_for_each_entry(range, &vm->vm_range_list, list) { + if ((newrange->start < range->end) && (range->start < newrange->end)) { + ekprintf("insert_vm_range_list(%p,%lx-%lx %lx):overlap %lx-%lx %lx\n", + vm, newrange->start, newrange->end, newrange->flag, + range->start, range->end, range->flag); + panic("insert_vm_range_list\n"); + } + + if (newrange->end <= range->start) { + next = &range->list; + break; + } + } + + list_add_tail(&newrange->list, next); + return; +} + +enum ihk_mc_pt_attribute vrflag_to_ptattr(unsigned long flag) +{ + enum ihk_mc_pt_attribute attr; + + attr = PTATTR_USER | PTATTR_FOR_USER; + + if (flag & VR_REMOTE) { + attr |= IHK_PTA_REMOTE; + } + else if (flag & VR_IO_NOCACHE) { + attr |= PTATTR_UNCACHABLE; + } + + if ((flag & VR_PROT_MASK) != VR_PROT_NONE) { + attr |= PTATTR_ACTIVE; + } + + if (flag & VR_PROT_WRITE) { + attr |= PTATTR_WRITABLE; + } + + return attr; +} + + int add_process_memory_range(struct process *process, unsigned long start, unsigned long end, unsigned long phys, unsigned long flag) { struct vm_range *range; int rc; +#if 0 + extern void __host_update_process_range(struct process *process, + struct vm_range *range); +#endif if ((start < process->vm->region.user_start) || (process->vm->region.user_end < end)) { @@ -330,6 +448,8 @@ int add_process_memory_range(struct process *process, //demand paging no need to update process table now kprintf("demand paging do not update process page table\n"); rc = 0; + } else if ((range->flag & VR_PROT_MASK) == VR_PROT_NONE) { + rc = 0; } else { rc = update_process_page_table(process, range, phys, 0); } @@ -344,7 +464,7 @@ int add_process_memory_range(struct process *process, } #endif - list_add_tail(&range->list, &process->vm->vm_range_list); + insert_vm_range_list(process->vm, range); /* Clear content! */ if (!(flag & (VR_REMOTE | VR_DEMAND_PAGING)) @@ -355,21 +475,114 @@ int add_process_memory_range(struct process *process, return 0; } -struct vm_range *lookup_process_memory_range(struct process *proc, uintptr_t start, uintptr_t end) +struct vm_range *lookup_process_memory_range( + struct process_vm *vm, uintptr_t start, uintptr_t end) { - struct vm_range *range; + struct vm_range *range = NULL; + + dkprintf("lookup_process_memory_range(%p,%lx,%lx)\n", vm, start, end); if (end <= start) { - return NULL; + goto out; } - list_for_each_entry(range, &proc->vm->vm_range_list, list) { + list_for_each_entry(range, &vm->vm_range_list, list) { + if (end <= range->start) { + break; + } if ((start < range->end) && (range->start < end)) { - return range; + goto out; } } - return NULL; + range = NULL; +out: + dkprintf("lookup_process_memory_range(%p,%lx,%lx): %p %lx-%lx\n", + vm, start, end, range, + range? range->start: 0, range? range->end: 0); + return range; +} + +struct vm_range *next_process_memory_range( + struct process_vm *vm, struct vm_range *range) +{ + struct vm_range *next; + + dkprintf("next_process_memory_range(%p,%lx-%lx)\n", + vm, range->start, range->end); + + if (list_is_last(&range->list, &vm->vm_range_list)) { + next = NULL; + } + else { + next = list_entry(range->list.next, struct vm_range, list); + } + + dkprintf("next_process_memory_range(%p,%lx-%lx): %p %lx-%lx\n", + vm, range->start, range->end, next, + next? next->start: 0, next? next->end: 0); + return next; +} + +int change_prot_process_memory_range(struct process *proc, + struct vm_range *range, unsigned long protflag) +{ + unsigned long newflag; + int error; + enum ihk_mc_pt_attribute oldattr; + enum ihk_mc_pt_attribute newattr; + enum ihk_mc_pt_attribute clrattr; + enum ihk_mc_pt_attribute setattr; + + dkprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx)\n", + proc, range->start, range->end, protflag); + + newflag = (range->flag & ~VR_PROT_MASK) | (protflag & VR_PROT_MASK); + if (range->flag == newflag) { + /* nothing to do */ + error = 0; + goto out; + } + + oldattr = vrflag_to_ptattr(range->flag); + newattr = vrflag_to_ptattr(newflag); + + clrattr = oldattr & ~newattr; + setattr = newattr & ~oldattr; + + ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); + error = ihk_mc_pt_change_attr_range(proc->vm->page_table, + (void *)range->start, (void *)range->end, + clrattr, setattr); + ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + if (error && (error != -ENOENT)) { + ekprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx):" + "ihk_mc_pt_change_attr_range failed: %d\n", + proc, range->start, range->end, protflag, error); + goto out; + } + + if (((range->flag & VR_PROT_MASK) == PROT_NONE) + && !(range->flag & VR_DEMAND_PAGING)) { + ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); + error = ihk_mc_pt_alloc_range(proc->vm->page_table, + (void *)range->start, (void *)range->end, + newattr); + ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + if (error) { + ekprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx):" + "ihk_mc_pt_alloc_range failed: %d\n", + proc, range->start, range->end, protflag, error); + goto out; + } + } + + range->flag = newflag; + error = 0; +out: + dkprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx): %d\n", + proc, range->start, range->end, protflag, error); + return error; } int init_process_stack(struct process *process, struct program_load_desc *pn, @@ -544,7 +757,6 @@ int remove_process_region(struct process *proc, return 0; } -extern void print_free_list(void); void free_process_memory(struct process *proc) { struct vm_range *range, *next; diff --git a/kernel/syscall.c b/kernel/syscall.c index 8a9f4aff..3a5654bb 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -205,7 +205,7 @@ static int search_free_space(size_t len, intptr_t hint, intptr_t *addrp) goto out; } - range = lookup_process_memory_range(proc, addr, addr+len); + range = lookup_process_memory_range(proc->vm, addr, addr+len); if (range == NULL) { break; } @@ -437,17 +437,145 @@ SYSCALL_DECLARE(munmap) const size_t len = ihk_mc_syscall_arg1(ctx); int error; + dkprintf("[%d]sys_munmap(%lx,%lx)\n", + ihk_mc_get_processor_id(), addr, len); + ihk_mc_spinlock_lock_noirq(&cpu_local_var(current)->vm->memory_range_lock); error = do_munmap(addr, len); ihk_mc_spinlock_unlock_noirq(&cpu_local_var(current)->vm->memory_range_lock); + dkprintf("[%d]sys_munmap(%lx,%lx): %d\n", + ihk_mc_get_processor_id(), addr, len, error); return error; } SYSCALL_DECLARE(mprotect) { - dkprintf("mprotect returns 0\n"); - return 0; + const intptr_t start = ihk_mc_syscall_arg0(ctx); + const size_t len0 = ihk_mc_syscall_arg1(ctx); + const int prot = ihk_mc_syscall_arg2(ctx); + struct process *proc = cpu_local_var(current); + struct vm_regions *region = &proc->vm->region; + size_t len; + intptr_t end; + struct vm_range *first; + intptr_t addr; + struct vm_range *range; + int error; + struct vm_range *changed; + const unsigned long protflags = PROT_TO_VR_FLAG(prot); + + dkprintf("[%d]sys_mprotect(%lx,%lx,%x)\n", + ihk_mc_get_processor_id(), start, len0, prot); + + len = (len0 + PAGE_SIZE - 1) & PAGE_MASK; + end = start + len; + + /* check arguments */ + if ((start & (PAGE_SIZE - 1)) + || (start < region->user_start) + || (region->user_end <= start) + || (len > (region->user_end - region->user_start) + || ((region->user_end - len) < start))) { + ekprintf("[%d]sys_mprotect(%lx,%lx,%x): -EINVAL\n", + ihk_mc_get_processor_id(), start, len0, prot); + return -EINVAL; + } + + if (len == 0) { + /* nothing to do */ + return 0; + } + + ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + + /* check contiguous map */ + first = NULL; + for (addr = start; addr < end; addr = range->end) { + if (first == NULL) { + range = lookup_process_memory_range(proc->vm, start, start+PAGE_SIZE); + first = range; + } + else { + range = next_process_memory_range(proc->vm, range); + } + + if ((range == NULL) || (addr < range->start)) { + /* not contiguous */ + ekprintf("sys_mprotect(%lx,%lx,%x):not contiguous\n", + start, len0, prot); + error = -ENOMEM; + goto out; + } + + if (range->flag & (VR_REMOTE | VR_RESERVED | VR_IO_NOCACHE)) { + ekprintf("sys_mprotect(%lx,%lx,%x):cannot change\n", + start, len0, prot); + error = -EINVAL; + goto out; + } + } + + /* do the mprotect */ + changed = NULL; + for (addr = start; addr < end; addr = changed->end) { + if (changed == NULL) { + range = first; + } + else { + range = next_process_memory_range(proc->vm, changed); + } + if (range == NULL) { + ekprintf("sys_mprotect(%lx,%lx,%x):next(%lx) failed.\n", + start, len0, prot, + (changed)? changed->end: -1); + panic("sys_mprotect:next\n"); + } + + if (range->start < addr) { + error = split_process_memory_range(proc, range, addr, &range); + if (error) { + ekprintf("sys_mprotect(%lx,%lx,%x):split failed. %d\n", + start, len0, prot, error); + goto out; + } + } + if (end < range->end) { + error = split_process_memory_range(proc, range, end, NULL); + if (error) { + ekprintf("sys_mprotect(%lx,%lx,%x):split failed. %d\n", + start, len0, prot, error); + goto out; + } + } + + error = change_prot_process_memory_range(proc, range, protflags); + if (error) { + ekprintf("sys_mprotect(%lx,%lx,%x):change failed. %d\n", + start, len0, prot, error); + goto out; + } + + if (changed == NULL) { + changed = range; + } + else { + error = join_process_memory_range(proc, changed, range); + if (error) { + ekprintf("sys_mprotect(%lx,%lx,%x):join failed. %d\n", + start, len0, prot, error); + changed = range; + /* through */ + } + } + } + + error = 0; +out: + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + dkprintf("[%d]sys_mprotect(%lx,%lx,%x): %d\n", + ihk_mc_get_processor_id(), start, len0, prot, error); + return error; } SYSCALL_DECLARE(brk) diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index a3f53335..fee5f442 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -95,6 +95,11 @@ int ihk_mc_pt_clear_page(page_table_t pt, void *virt); int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt); int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end); int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end); +int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end, + enum ihk_mc_pt_attribute clrattr, + enum ihk_mc_pt_attribute setattr); +int ihk_mc_pt_alloc_range(page_table_t pt, void *start, void *end, + enum ihk_mc_pt_attribute attr); int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size, enum ihk_mc_pt_prepare_flag);