diff --git a/arch/x86/kernel/include/arch/rusage.h b/arch/x86/kernel/include/arch/rusage.h new file mode 100644 index 00000000..6ee49f5f --- /dev/null +++ b/arch/x86/kernel/include/arch/rusage.h @@ -0,0 +1,58 @@ +#ifndef ARCH_RUSAGE_H_INCLUDED +#define ARCH_RUSAGE_H_INCLUDED + +#define DEBUG_RUSAGE + +#define IHK_OS_PGSIZE_4KB 0 +#define IHK_OS_PGSIZE_2MB 1 +#define IHK_OS_PGSIZE_1GB 2 + +extern struct ihk_os_monitor *monitor; + +extern int sprintf(char * buf, const char *fmt, ...); + +#define DEBUG_ARCH_RUSAGE +#ifdef DEBUG_ARCH_RUSAGE +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + kprintf("%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + kprintf("%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + kprintf("%s,%s", __FUNCTION__, msg); \ + } while (0); +#endif + +static inline int rusage_pgsize_to_pgtype(size_t pgsize) +{ + int ret = IHK_OS_PGSIZE_4KB; + switch (pgsize) { + case PTL1_SIZE: + ret = IHK_OS_PGSIZE_4KB; + break; + case PTL2_SIZE: + ret = IHK_OS_PGSIZE_2MB; + break; + case PTL3_SIZE: + ret = IHK_OS_PGSIZE_1GB; + break; + default: + eprintf("unknown pgsize=%ld\n", pgsize); + break; + } + return ret; +} + +#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */ diff --git a/arch/x86/kernel/include/ihk/atomic.h b/arch/x86/kernel/include/ihk/atomic.h index 250556a6..6346cfd2 100644 --- a/arch/x86/kernel/include/ihk/atomic.h +++ b/arch/x86/kernel/include/ihk/atomic.h @@ -114,6 +114,11 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v) return *(volatile long *)&(v)->counter64; } +static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i) +{ + v->counter64 = i; +} + static inline void ihk_atomic64_inc(ihk_atomic64_t *v) { asm volatile ("lock incq %0" : "+m"(v->counter64)); diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index fa67bd1f..38081030 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -24,9 +24,17 @@ #include #include #include +#include -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +//#define DEBUG + +#ifdef DEBUG +#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#else +#define dkprintf(...) do { } while (0) +#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#endif static char *last_page; extern char _head[], _end[]; @@ -902,11 +910,24 @@ static int split_large_page(pte_t *ptep, size_t pgsize) } } pt->entry[i] = pte; + switch(pgsize) { + case PTL3_SIZE: + dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", pte_is_fileoff(ptep, pgsize) ? pte_get_off(&pte, pgsize) : pte_get_phys(&pte), __FUNCTION__, PTL2_SIZE, PTL2_SIZE); + memory_stat_rss_add(PTL2_SIZE, PTL2_SIZE); + break; + case PTL2_SIZE: + dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", pte_is_fileoff(ptep, pgsize) ? pte_get_off(&pte, pgsize) : pte_get_phys(&pte), __FUNCTION__, PTL1_SIZE, PTL1_SIZE); + memory_stat_rss_add(PTL1_SIZE, PTL1_SIZE); + break; + } pte += pgsize / PT_ENTRIES; } *ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR; + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys_base, __FUNCTION__, pgsize, pgsize); + memory_stat_rss_sub(pgsize, pgsize); + /* Do not do this check for large pages as they don't come from the zeroobj * and are not actually mapped. * TODO: clean up zeroobj as we don't really need it, anonymous mappings @@ -1106,6 +1127,8 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, struct page *page; pte_t old; + //dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end); + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -1119,17 +1142,37 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, page = phys_to_page(phys); } + if (page) { + dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1); + } if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) && !(args->memobj->flags & MF_ZEROFILL)) { memobj_flush_page(args->memobj, phys, PTL1_SIZE); } - if (!(old & PFL1_FILEOFF) && args->free_physical) { - if (!page || (page && page_unmap(page))) { - ihk_mc_free_pages_user(phys_to_virt(phys), 1); - dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base); + if (!(old & PFL1_FILEOFF)) { + if(args->free_physical) { + if (!page) { + /* Anonymous || !XPMEM attach */ + if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) { + ihk_mc_free_pages_user(phys_to_virt(phys), 1); + dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base); + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE); + memory_stat_rss_sub(PTL1_SIZE, PTL1_SIZE); + } else { + dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys); + } + } else if (page_unmap(page)) { + ihk_mc_free_pages_user(phys_to_virt(phys), 1); + dkprintf("%s: freeing file-backed page at 0x%lx\n", __FUNCTION__, base); + /* Track page->count for !MF_PREMAP pages */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE); + rusage_memory_stat_sub(args->memobj, PTL1_SIZE, PTL1_SIZE); + } + args->vm->currss -= PTL1_SIZE; + } else { + dkprintf("%s: !calling memory_stat_rss_sub(),virt=%lx,phys=%lx\n", __FUNCTION__, base, pte_get_phys(&old)); } - args->vm->currss -= PTL1_SIZE; } return 0; @@ -1145,6 +1188,8 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, struct page *page; pte_t old; + //dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end); + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -1172,13 +1217,29 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, memobj_flush_page(args->memobj, phys, PTL2_SIZE); } - if (!(old & PFL2_FILEOFF) && args->free_physical) { - if (!page || (page && page_unmap(page))) { - ihk_mc_free_pages_user(phys_to_virt(phys), + if (!(old & PFL2_FILEOFF)) { + if(args->free_physical) { + if (!page) { + /* Anonymous || !XPMEM attach */ + if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) { + ihk_mc_free_pages_user(phys_to_virt(phys), + PTL2_SIZE/PTL1_SIZE); + dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base); + dkprintf("%lx-,%s: memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old),__FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE); + memory_stat_rss_sub(PTL2_SIZE, PTL2_SIZE); + } else { + dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys); + } + } else if (page_unmap(page)) { + ihk_mc_free_pages_user(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE); - dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base); + dkprintf("%s: having unmapped page-struct, freeing large page at 0x%lx\n", __FUNCTION__, base); + /* Track page->count for !MF_PREMAP pages */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE); + rusage_memory_stat_sub(args->memobj, PTL2_SIZE, PTL2_SIZE); + } + args->vm->currss -= PTL2_SIZE; } - args->vm->currss -= PTL2_SIZE; } return 0; @@ -1209,6 +1270,8 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, struct page *page; struct page_table *pt; + //dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end); + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -1236,12 +1299,29 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, memobj_flush_page(args->memobj, phys, PTL3_SIZE); } - if (!(old & PFL3_FILEOFF) && args->free_physical) { - if (!page || (page && page_unmap(page))) { - ihk_mc_free_pages_user(phys_to_virt(phys), + kprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old)); + + if (!(old & PFL3_FILEOFF)) { + if(args->free_physical) { + if (!page) { + /* Anonymous || !XPMEM attach */ + if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) { + ihk_mc_free_pages_user(phys_to_virt(phys), + PTL3_SIZE/PTL1_SIZE); + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%ld,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE); + memory_stat_rss_sub(PTL3_SIZE, PTL3_SIZE); + } else { + dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys); + } + } else if (page_unmap(page)) { + ihk_mc_free_pages_user(phys_to_virt(phys), PTL3_SIZE/PTL1_SIZE); + /* Track page->count for !MF_PREMAP pages */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE); + rusage_memory_stat_sub(args->memobj, PTL3_SIZE, PTL3_SIZE); + } + args->vm->currss -= PTL3_SIZE; } - args->vm->currss -= PTL3_SIZE; } return 0; @@ -1267,6 +1347,8 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base, { struct page_table *pt; + //dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end); + if (*ptep == PTE_NULL) { return -ENOENT; } @@ -1284,6 +1366,9 @@ static int clear_range(struct page_table *pt, struct process_vm *vm, int error; struct clear_range_args args; + dkprintf("%s: %p,%lx,%lx,%d,%p\n", + __FUNCTION__, pt, start, end, free_physical, memobj); + if ((start < vm->region.user_start) || (vm->region.user_end < end) || (end <= start)) { @@ -1530,6 +1615,7 @@ struct set_range_args { int pgshift; uintptr_t diff; struct process_vm *vm; + struct vm_range *range; /* To find pages we don't need to call memory_stat_rss_add() */ }; int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, @@ -1553,6 +1639,13 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, *ptep = phys | attr_to_l1attr(args->attr); error = 0; + // call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE); + } else { + dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE); + } + out: dkprintf("set_range_l1(%lx,%lx,%lx): %d %lx\n", base, start, end, error, *ptep); @@ -1584,6 +1677,12 @@ retry: dkprintf("set_range_l2(%lx,%lx,%lx):" "2MiB page. %d %lx\n", base, start, end, error, *ptep); + // call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE); + } else { + dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE); + } goto out; } @@ -1666,6 +1765,13 @@ retry: dkprintf("set_range_l3(%lx,%lx,%lx):" "1GiB page. %d %lx\n", base, start, end, error, *ptep); + + // Call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE); + } else { + dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE); + } goto out; } @@ -1783,13 +1889,13 @@ out: int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start, void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr, - int pgshift) + int pgshift, struct vm_range *range) { int error; struct set_range_args args; - dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x)\n", - pt, start, end, phys, attr); + dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x,%d,%lx-%lx)\n", + pt, start, end, phys, attr, pgshift, range->start, range->end); args.pt = pt; args.phys = phys; @@ -1797,6 +1903,7 @@ int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start, args.diff = (uintptr_t)start ^ phys; args.vm = vm; args.pgshift = pgshift; + args.range = range; error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end, &set_range_l4, &args); @@ -1935,8 +2042,8 @@ int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep, if (p2alignp) *p2alignp = p2align; out: - dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n", - args, cursize, error, newsize, p2align); + /*dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n", + args, cursize, error, newsize, p2align);*/ return error; } @@ -1959,6 +2066,7 @@ struct move_args { uintptr_t src; uintptr_t dest; struct process_vm *vm; + struct vm_range *range; }; static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, @@ -1990,7 +2098,7 @@ static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, attr = apte & ~PT_PHYSMASK; error = ihk_mc_pt_set_range(pt, args->vm, (void *)dest, - (void *)(dest + pgsize), phys, attr, pgshift); + (void *)(dest + pgsize), phys, attr, pgshift, args->range); if (error) { kprintf("move_one_page(%p,%p,%p %#lx,%p,%d):" "set failed. %d\n", @@ -2006,7 +2114,7 @@ out: } int move_pte_range(page_table_t pt, struct process_vm *vm, - void *src, void *dest, size_t size) + void *src, void *dest, size_t size, struct vm_range *range) { int error; struct move_args args; @@ -2015,6 +2123,7 @@ int move_pte_range(page_table_t pt, struct process_vm *vm, args.src = (uintptr_t)src; args.dest = (uintptr_t)dest; args.vm = vm; + args.range = range; error = visit_pte_range(pt, src, src+size, 0, VPTEF_SKIP_NULL, &move_one_page, &args); diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index 2260b665..c68e14ca 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -1366,7 +1366,7 @@ SYSCALL_DECLARE(mmap) struct thread *thread = cpu_local_var(current); struct vm_regions *region = &thread->vm->region; int error; - intptr_t addr; + intptr_t addr = 0; size_t len; int flags = flags0; size_t pgsize; @@ -1469,7 +1469,7 @@ SYSCALL_DECLARE(shmget) const key_t key = ihk_mc_syscall_arg0(ctx); const size_t size = ihk_mc_syscall_arg1(ctx); const int shmflg0 = ihk_mc_syscall_arg2(ctx); - int shmid; + int shmid = -EINVAL; int error; int shmflg = shmflg0; @@ -1732,6 +1732,7 @@ int arch_map_vdso(struct process_vm *vm) enum ihk_mc_pt_attribute attr; int error; int i; + struct vm_range *range; dkprintf("arch_map_vdso()\n"); if (container_size <= 0) { @@ -1750,7 +1751,7 @@ int arch_map_vdso(struct process_vm *vm) vrflags |= VR_PROT_READ | VR_PROT_EXEC; vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e, - NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, NULL); + NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, &range); if (error) { ekprintf("ERROR: adding memory range for vdso. %d\n", error); goto out; @@ -1762,7 +1763,7 @@ int arch_map_vdso(struct process_vm *vm) s = vm->vdso_addr + (i * PAGE_SIZE); e = s + PAGE_SIZE; error = ihk_mc_pt_set_range(pt, vm, s, e, - vdso.vdso_physlist[i], attr, 0); + vdso.vdso_physlist[i], attr, 0, range); if (error) { ekprintf("ihk_mc_pt_set_range failed. %d\n", error); goto out; @@ -1782,7 +1783,7 @@ int arch_map_vdso(struct process_vm *vm) vrflags |= VR_PROT_READ; vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e, - NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, NULL); + NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, &range); if (error) { ekprintf("ERROR: adding memory range for vvar. %d\n", error); goto out; @@ -1794,7 +1795,7 @@ int arch_map_vdso(struct process_vm *vm) e = s + PAGE_SIZE; attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE; error = ihk_mc_pt_set_range(pt, vm, s, e, - vdso.vvar_phys, attr, 0); + vdso.vvar_phys, attr, 0, range); if (error) { ekprintf("ihk_mc_pt_set_range failed. %d\n", error); goto out; @@ -1805,7 +1806,7 @@ int arch_map_vdso(struct process_vm *vm) e = s + PAGE_SIZE; attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE; error = ihk_mc_pt_set_range(pt, vm, s, e, - vdso.hpet_phys, attr, 0); + vdso.hpet_phys, attr, 0, range); if (error) { ekprintf("ihk_mc_pt_set_range failed. %d\n", error); goto out; @@ -1816,7 +1817,7 @@ int arch_map_vdso(struct process_vm *vm) e = s + PAGE_SIZE; attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE; error = ihk_mc_pt_set_range(pt, vm, s, e, - vdso.pvti_phys, attr, 0); + vdso.pvti_phys, attr, 0, range); if (error) { ekprintf("ihk_mc_pt_set_range failed. %d\n", error); goto out; diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index d87fafea..5fd22936 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -1057,6 +1057,9 @@ enum { MF_REG_FILE = 0x1000, MF_DEV_FILE = 0x2000, MF_PREMAP = 0x8000, + MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ + MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ + MF_SHM = 0x40000, MF_END }; diff --git a/kernel/devobj.c b/kernel/devobj.c index 70a23e2f..d364b4ed 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -34,6 +34,7 @@ #include #include #include +#include //#define DEBUG_PRINT_DEVOBJ @@ -199,6 +200,7 @@ static void devobj_release(struct memobj *memobj) } if (obj->pfn_table) { + // Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory ihk_mc_free_pages(obj->pfn_table, pfn_npages); } kfree(free_obj); @@ -268,6 +270,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt memobj_lock(&obj->memobj); obj->pfn_table[ix] = pfn; + // Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory } memobj_unlock(&obj->memobj); diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 4c38da96..0b1ae04e 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -25,9 +25,17 @@ #include #include #include +#include +//#define DEBUG_PRINT_FILEOBJ + +#ifdef DEBUG_PRINT_FILEOBJ +#define dkprintf(...) do { if (1) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define ekprintf(...) kprintf(__VA_ARGS__) +#endif mcs_rwlock_lock_t fileobj_list_lock; static LIST_HEAD(fileobj_list); @@ -262,6 +270,9 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp) __FUNCTION__, j); goto error_cleanup; } + // Track change in memobj->pages[] for MF_PREMAP pages (MPOL_SHM_PREMAP case) + dkprintf("%lx+,%s: MF_PREMAP&&MPOL_SHM_PREMAP,memory_stat_rss_add,phys=%lx,size=%ld,pgsize=%ld\n", virt_to_phys(mo->pages[j]), __FUNCTION__, virt_to_phys(mo->pages[j]), PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_add(PAGE_SIZE, PAGE_SIZE); memset(mo->pages[j], 0, PAGE_SIZE); @@ -357,23 +368,31 @@ static void fileobj_release(struct memobj *memobj) for (;;) { struct page *page; void *page_va; + uintptr_t phys; page = fileobj_page_hash_first(obj); if (!page) { break; } __fileobj_page_hash_remove(page); - page_va = phys_to_virt(page_to_phys(page)); + phys = page_to_phys(page); + page_va = phys_to_virt(phys); + /* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and + incremented by the second get_page() reaping the pageio and decremented by clear_range(). + */ if (ihk_atomic_read(&page->count) != 1) { - kprintf("%s: WARNING: page count %d for phys 0x%lx is invalid, flags: 0x%lx\n", - __FUNCTION__, + kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n", + __FUNCTION__, ihk_atomic_read(&page->count), page->phys, to_memobj(free_obj)->flags); } else if (page_unmap(page)) { ihk_mc_free_pages_user(page_va, 1); + /* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE); } #if 0 count = ihk_atomic_sub_return(1, &page->count); @@ -398,10 +417,17 @@ static void fileobj_release(struct memobj *memobj) /* Pre-mapped? */ if (to_memobj(free_obj)->flags & MF_PREMAP) { int i; - for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) { - if (to_memobj(free_obj)->pages[i]) + if (to_memobj(free_obj)->pages[i]) { + dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]); + // Track change in fileobj->pages[] for MF_PREMAP pages + // Note that page_unmap() isn't called for MF_PREMAP in + // free_process_memory_range() --> ihk_mc_pt_free_range() + dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n", + virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE); ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1); + } } kfree(to_memobj(free_obj)->pages); @@ -531,7 +557,7 @@ out: } static int fileobj_get_page(struct memobj *memobj, off_t off, - int p2align, uintptr_t *physp, unsigned long *pflag) + int p2align, uintptr_t *physp, unsigned long *pflag) { struct thread *proc = cpu_local_var(current); struct fileobj *obj = to_fileobj(memobj); @@ -577,6 +603,9 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, else { dkprintf("%s: MF_ZEROFILL: off: %lu -> 0x%lx allocated\n", __FUNCTION__, off, virt_to_phys(virt)); + // Track change in memobj->pages[] for MF_PREMAP pages (!MPOL_SHM_PREMAP case) + dkprintf("%lx+,%s: MF_PREMAP&&!MPOL_SHM_PREMAP,memory_stat_rss_add,phys=%lx,size=%ld,pgsize=%ld\n", virt_to_phys(virt), __FUNCTION__, virt_to_phys(virt), PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_add(PAGE_SIZE, PAGE_SIZE); } } @@ -608,7 +637,6 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, virt = ihk_mc_alloc_pages_user(npages, IHK_MC_AP_NOWAIT | (to_memobj(obj)->flags & MF_ZEROFILL) ? IHK_MC_AP_USER : 0); - if (!virt) { error = -ENOMEM; kprintf("fileobj_get_page(%p,%lx,%x,%p):" @@ -619,11 +647,16 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, } phys = virt_to_phys(virt); page = phys_to_page_insert_hash(phys); + // Track change in page->count for !MF_PREMAP pages. + // Add when setting the PTE for a page with count of one in ihk_mc_pt_set_range(). + dkprintf("%s: phys_to_page_insert_hash(),phys=%lx,virt=%lx,size=%lx,pgsize=%lx\n", __FUNCTION__, phys, virt, npages * PAGE_SIZE, PAGE_SIZE); + if (page->mode != PM_NONE) { panic("fileobj_get_page:invalid new page"); } page->offset = off; ihk_atomic_set(&page->count, 1); + ihk_atomic64_set(&page->mapped, 0); __fileobj_page_hash_insert(obj, page, hash); page->mode = PM_WILL_PAGEIO; } @@ -646,6 +679,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, } else if (page->mode == PM_DONE_PAGEIO) { page->mode = PM_MAPPED; + dkprintf("%s: PM_DONE_PAGEIO-->PM_MAPPED,obj=%lx,off=%lx,phys=%lx\n", __FUNCTION__, obj, off, page_to_phys(page)); } else if (page->mode == PM_PAGEIO_EOF) { error = -ERANGE; @@ -657,6 +691,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, } ihk_atomic_inc(&page->count); + dkprintf("%s: mode=%d,count=%d,obj=%lx,off=%lx,phys=%lx\n", __FUNCTION__, page->mode, page->count, obj, off, page_to_phys(page)); error = 0; *physp = page_to_phys(page); @@ -684,6 +719,7 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, ihk_mc_user_context_t ctx; ssize_t ss; + dkprintf("%s: phys=%lx,to_memobj(obj)->flags=%x,memobj->flags=%x,page=%p\n", __FUNCTION__, phys, to_memobj(obj)->flags, memobj->flags, phys_to_page(phys)); if (to_memobj(obj)->flags & MF_ZEROFILL) { return 0; } @@ -698,6 +734,7 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, __FUNCTION__, phys); return 0; } + memobj_unlock(&obj->memobj); ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE; @@ -706,6 +743,7 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, ihk_mc_syscall_arg3(&ctx) = pgsize; ihk_mc_syscall_arg4(&ctx) = phys; + dkprintf("%s: syscall_generic_forwarding\n", __FUNCTION__); ss = syscall_generic_forwarding(__NR_mmap, &ctx); if (ss != pgsize) { dkprintf("fileobj_flush_page(%p,%lx,%lx): %ld (%lx)\n", diff --git a/kernel/host.c b/kernel/host.c index 2994356e..29ef63df 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -32,6 +32,7 @@ #include #include #include +#include //#define DEBUG_PRINT_HOST @@ -155,10 +156,10 @@ int prepare_process_ranges_args_envs(struct thread *thread, ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL); error = ihk_mc_pt_set_range(vm->address_space->page_table, vm, - (void *)range->start, - (void *)range->start + (range_npages * PAGE_SIZE), - up, ptattr, - range->pgshift); + (void *)range->start, + (void *)range->start + (range_npages * PAGE_SIZE), + up, ptattr, + range->pgshift, range); if (error) { kprintf("%s: ihk_mc_pt_set_range failed. %d\n", @@ -167,6 +168,8 @@ int prepare_process_ranges_args_envs(struct thread *thread, goto err; } + // memory_stat_rss_add() is called in ihk_mc_pt_set_range() + p->sections[i].remote_pa = up; /* TODO: Maybe we need flag */ @@ -237,6 +240,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, kprintf("%s: error: adding memory range for heap\n", __FUNCTION__); goto err; } + // heap: Add when memory_stat_rss_add() is called in downstream, i.e. add_process_memory_range() vm->region.brk_end_allocated = vm->region.brk_end + proc->heap_extension; @@ -260,12 +264,15 @@ int prepare_process_ranges_args_envs(struct thread *thread, } args_envs_p = virt_to_phys(args_envs); + dkprintf("%s: args_envs: %d pages\n", + __FUNCTION__, ARGENV_PAGE_COUNT); if(add_process_memory_range(vm, addr, e, args_envs_p, flags, NULL, 0, PAGE_SHIFT, NULL) != 0){ ihk_mc_free_pages_user(args_envs, ARGENV_PAGE_COUNT); kprintf("ERROR: adding memory range for args/envs\n"); goto err; } + // memory_stat_rss_add() is called in downstream, i.e. add_process_memory_range() dkprintf("args_envs mapping\n"); diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index fd932856..496e3d4a 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -37,6 +37,9 @@ enum { MF_REG_FILE = 0x1000, MF_DEV_FILE = 0x2000, MF_PREMAP = 0x8000, + MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ + MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ + MF_SHM = 0x40000, MF_HOST_RELEASED = 0x80000000, MF_END }; diff --git a/kernel/include/page.h b/kernel/include/page.h index 3f5835d8..617bdf2d 100644 --- a/kernel/include/page.h +++ b/kernel/include/page.h @@ -21,6 +21,7 @@ struct page { uint8_t mode; uint64_t phys; ihk_atomic_t count; + ihk_atomic64_t mapped; off_t offset; }; diff --git a/kernel/include/rusage.h b/kernel/include/rusage.h index e31ad1f6..c96ef9ae 100644 --- a/kernel/include/rusage.h +++ b/kernel/include/rusage.h @@ -2,7 +2,10 @@ #define __RUSAGE_H #include +#include #include +#include +#include #ifdef ENABLE_RUSAGE #define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB @@ -23,9 +26,9 @@ rusage_rss_add(unsigned long size) unsigned long retval; newval = __sync_add_and_fetch(&monitor->rusage_rss_current, size); - oldval = monitor->rusage_rss_max; + oldval = monitor->rusage_memory_max_usage; while (newval > oldval) { - retval = __sync_val_compare_and_swap(&monitor->rusage_rss_max, + retval = __sync_val_compare_and_swap(&monitor->rusage_memory_max_usage, oldval, newval); if (retval == oldval) { break; @@ -40,6 +43,88 @@ rusage_rss_sub(unsigned long size) __sync_sub_and_fetch(&monitor->rusage_rss_current, size); } +static inline void memory_stat_rss_add(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(size, &monitor->rusage_memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline void memory_stat_rss_sub(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(-size, &monitor->rusage_memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(size, &monitor->rusage_memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(-size, &monitor->rusage_memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize) +{ + /* Is it resident in main memory? */ + if (range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED)) { + return 0; + } + /* Is it anonymous and pre-paging? */ + if (!range->memobj) { + memory_stat_rss_add(size, pgsize); + return 1; + } + /* Is it devobj or (fileobj and pre-map) or xpmem attachment? */ + if ((range->memobj->flags & MF_DEV_FILE) || + (range->memobj->flags & MF_PREMAP) || + (range->memobj->flags & MF_XPMEM) + ) { + return 0; + } + /* Is it anonymous and demand-paging? */ + if (range->memobj->flags & MF_ZEROOBJ) { + memory_stat_rss_add(size, pgsize); + return 1; + } + + struct page *page = phys_to_page(phys); + + /* Is It file map and cow page? */ + if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && + !page) { + //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); + memory_stat_rss_add(size, pgsize); + return 1; + } + + /* Is it a sharable page? */ + if (!page) { + kprintf("%s: WARNING !page,phys=%lx\n", __FUNCTION__, phys); + return 0; + } + /* Is this the first attempt to map the sharable page? */ + if(__sync_bool_compare_and_swap(&page->mapped.counter64, 0, 1)) { + if(range->memobj->flags & MF_SHM) { + memory_stat_rss_add(size, pgsize); + } else { + rusage_memory_stat_mapped_file_add(size, pgsize); + } + return 1; + } else { + return 0; + } + return 0; +} + +static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize) +{ + if(memobj->flags & MF_SHM) { + memory_stat_rss_sub(size, pgsize); + } else { + rusage_memory_stat_mapped_file_sub(size, pgsize); + } +} + static inline void rusage_kmem_add(unsigned long size) { @@ -47,11 +132,11 @@ rusage_kmem_add(unsigned long size) unsigned long oldval; unsigned long retval; - newval = __sync_add_and_fetch(&monitor->rusage_kmem_usage, size); - oldval = monitor->rusage_kmem_max_usage; + newval = __sync_add_and_fetch(&monitor->rusage_memory_kmem_usage, size); + oldval = monitor->rusage_memory_kmem_max_usage; while (newval > oldval) { retval = __sync_val_compare_and_swap( - &monitor->rusage_kmem_max_usage, + &monitor->rusage_memory_kmem_max_usage, oldval, newval); if (retval == oldval) { break; @@ -63,13 +148,13 @@ rusage_kmem_add(unsigned long size) static inline void rusage_kmem_sub(unsigned long size) { - __sync_sub_and_fetch(&monitor->rusage_kmem_usage, size); + __sync_sub_and_fetch(&monitor->rusage_memory_kmem_usage, size); } static inline void rusage_numa_add(int numa_id, unsigned long size) { - __sync_add_and_fetch(monitor->rusage_numa_stat + numa_id, size); + __sync_add_and_fetch(monitor->rusage_memory_numa_stat + numa_id, size); rusage_rss_add(size); } @@ -77,7 +162,7 @@ static inline void rusage_numa_sub(int numa_id, unsigned long size) { rusage_rss_sub(size); - __sync_sub_and_fetch(monitor->rusage_numa_stat + numa_id, size); + __sync_sub_and_fetch(monitor->rusage_memory_numa_stat + numa_id, size); } static inline void @@ -163,6 +248,31 @@ rusage_rss_sub(unsigned long size) { } +static inline void memory_stat_rss_add(unsigned long size, size_t pgsize) +{ +} + +static inline void memory_stat_rss_sub(unsigned long size, size_t pgsize) +{ +} + +static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize) +{ +} + +static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize) +{ +} + +static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize) +{ + return 0; +} + +static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize) +{ +} + static inline void rusage_numa_add(int numa_id, unsigned long size) { diff --git a/kernel/include/shm.h b/kernel/include/shm.h index 95e81264..4071e578 100644 --- a/kernel/include/shm.h +++ b/kernel/include/shm.h @@ -32,7 +32,7 @@ enum { SHM_EXEC = 0100000, /* for shm_mode */ - SHM_DEST = 01000, + SHM_DEST = 01000, /* Marked for destruction */ SHM_LOCKED = 02000, /* for cmd of shmctl() */ diff --git a/kernel/include/xpmem_private.h b/kernel/include/xpmem_private.h index cfc89e79..fca9d6b6 100644 --- a/kernel/include/xpmem_private.h +++ b/kernel/include/xpmem_private.h @@ -318,6 +318,8 @@ static inline struct xpmem_thread_group *__xpmem_tg_ref_by_tgid( tgid, return_destroying); index = xpmem_tg_hashtable_index(tgid); + XPMEM_DEBUG("xpmem_my_part=%p\n", xpmem_my_part); + XPMEM_DEBUG("xpmem_my_part->tg_hashtable=%p\n", xpmem_my_part->tg_hashtable); mcs_rwlock_reader_lock(&xpmem_my_part->tg_hashtable[index].lock, &lock); tg = __xpmem_tg_ref_by_tgid_nolock_internal(tgid, index, return_destroying); diff --git a/kernel/init.c b/kernel/init.c index cb97f956..091b9122 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -255,7 +255,6 @@ static void monitor_init() monitor = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL); memset(monitor, 0, z * PAGE_SIZE); monitor->num_processors = num_processors; - monitor->num_numa_nodes = ihk_mc_get_nr_numa_nodes(); monitor->ns_per_tsc = ihk_mc_get_ns_per_tsc(); phys = virt_to_phys(monitor); ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) + diff --git a/kernel/process.c b/kernel/process.c index 537f7787..f34a5100 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -440,6 +440,31 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp, goto err_free_proc; } + /* Copy mckfd list + FIXME: Replace list manipulation with list_add() etc. */ + long irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + struct mckfd *cur; + for (cur = org->proc->mckfd; cur; cur = cur->next) { + struct mckfd *mckfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT); + if(!mckfd) { + release_address_space(asp); + kfree(proc->vm); + kfree(proc); + goto err_free_proc; + } + memcpy(mckfd, cur, sizeof(struct mckfd)); + + if (proc->mckfd == NULL) { + proc->mckfd = mckfd; + mckfd->next = NULL; + } + else { + mckfd->next = proc->mckfd; + proc->mckfd = mckfd; + } + } + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + thread->vm->vdso_addr = org->vm->vdso_addr; thread->vm->vvar_addr = org->vm->vvar_addr; thread->proc->maxrss = org->proc->maxrss; @@ -532,6 +557,7 @@ ptrace_traceme(void) struct copy_args { struct process_vm *new_vm; unsigned long new_vrflag; + struct vm_range *range; /* out */ intptr_t fault_addr; @@ -591,12 +617,13 @@ static int copy_user_pte(void *arg0, page_table_t src_pt, pte_t *src_ptep, void } error = ihk_mc_pt_set_range(args->new_vm->address_space->page_table, - args->new_vm, pgaddr, pgaddr+pgsize, phys, attr, - pgshift); + args->new_vm, pgaddr, pgaddr + pgsize, phys, attr, + pgshift, args->range); if (error) { args->fault_addr = (intptr_t)pgaddr; goto out; } + // fork/clone case: memory_stat_rss_add() is called in ihk_mc_pt_set_range() dkprintf("copy_user_pte(): new PTE set\n"); error = 0; @@ -655,7 +682,8 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm) args.new_vrflag = range->flag; args.new_vm = vm; args.fault_addr = -1; - + args.range = range; + error = visit_pte_range(orgvm->address_space->page_table, (void *)range->start, (void *)range->end, range->pgshift, VPTEF_SKIP_NULL, @@ -670,6 +698,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm) } goto err_free_range_rollback; } + // memory_stat_rss_add() is called in child-node, i.e. copy_user_pte() insert_vm_range_list(vm, range); } @@ -702,13 +731,13 @@ int update_process_page_table(struct process_vm *vm, attr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL); flags = ihk_mc_spinlock_lock(&vm->page_table_lock); error = ihk_mc_pt_set_range(vm->address_space->page_table, vm, - (void *)range->start, (void *)range->end, phys, attr, - range->pgshift); + (void *)range->start, (void *)range->end, phys, attr, + range->pgshift, range); if (error) { kprintf("update_process_page_table:ihk_mc_pt_set_range failed. %d\n", error); goto out; } - + // memory_stat_rss_add() is called in ihk_mc_pt_set_range() error = 0; out: ihk_mc_spinlock_unlock(&vm->page_table_lock, flags); @@ -730,6 +759,7 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range, "ihk_mc_pt_split failed. %d\n", error); goto out; } + // memory_stat_rss_add() is called in child-node, i.e. ihk_mc_pt_split() to deal with L3->L2 case newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT); if (!newrange) { @@ -872,6 +902,8 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } } + dkprintf("%s: vm=%p,range=%p,%lx-%lx\n", __FUNCTION__, vm, range, range->start, range->end); + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); if (range->memobj) { memobj_lock(range->memobj); @@ -889,8 +921,11 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) vm, start0, end0, start, end, range->memobj, error); /* through */ } + // memory_stat_rss_sub() is called downstream, i.e. ihk_mc_pt_free_range() to deal with empty PTE } else { + // memory_stat_rss_sub() isn't called because free_physical is set to zero in clear_range() + dkprintf("%s,memory_stat_rss_sub() isn't called, VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED case, %lx-%lx\n", __FUNCTION__, start, end); ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); error = ihk_mc_pt_clear_range(vm->address_space->page_table, vm, (void *)start, (void *)end); @@ -1100,6 +1135,7 @@ int add_process_memory_range(struct process_vm *vm, struct memobj *memobj, off_t offset, int pgshift, struct vm_range **rp) { + dkprintf("%s: start=%lx,end=%lx,phys=%lx,flag=%lx\n", __FUNCTION__, start, end, phys, flag); struct vm_range *range; int rc; @@ -1147,6 +1183,7 @@ int add_process_memory_range(struct process_vm *vm, } else { rc = update_process_page_table(vm, range, phys, 0); + // memory_stat_rss_add() is called in ihk_mc_pt_set_range() } if (rc != 0) { @@ -1394,6 +1431,8 @@ static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep, page = phys_to_page(phys); if (page && page_unmap(page)) { ihk_mc_free_pages_user(phys_to_virt(phys), pgsize/PAGE_SIZE); + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys, __FUNCTION__, pgsize, pgsize); + rusage_memory_stat_sub(args->memobj, pgsize, pgsize); } error = 0; @@ -1565,6 +1604,7 @@ static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep, arg0, pt, ptep, *ptep, pgaddr, pgshift, error); goto out; } + // memory_stat_rss_sub() is called in downstream, i.e. shmobj_invalidate_page() error = 0; out: @@ -1596,6 +1636,8 @@ int invalidate_process_memory_range(struct process_vm *vm, vm, range, start, end, error); goto out; } + // memory_stat_rss_sub() is called downstream, i.e. invalidate_one_page() to deal with empty PTEs + out: dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n", vm, range, start, end, error); @@ -1636,6 +1678,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang goto out; } /*****/ + dkprintf("%s: pgaddr=%lx,range->start=%lx,range->end=%lx,pgaddr+pgsize=%lx\n", __FUNCTION__, pgaddr, range->start, range->end, pgaddr + pgsize); while (((uintptr_t)pgaddr < range->start) || (range->end < ((uintptr_t)pgaddr + pgsize))) { ptep = NULL; @@ -1647,6 +1690,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang pgaddr = (void *)(fault_addr & ~(pgsize - 1)); } /*****/ + dkprintf("%s: ptep=%lx,pte_is_null=%d,pte_is_fileoff=%d\n", __FUNCTION__, ptep, ptep ? pte_is_null(ptep) : -1, ptep ? pte_is_fileoff(ptep, pgsize) : -1); if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) { phys = NOPHYS; if (range->memobj) { @@ -1659,7 +1703,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang off = pte_get_off(ptep, pgsize); } error = memobj_get_page(range->memobj, off, p2align, - &phys, &memobj_flag); + &phys, &memobj_flag); if (error) { struct memobj *obj; @@ -1671,6 +1715,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang goto out; } } + // memory_stat_rss_add() is called downstream, i.e. memobj_get_page() to check page->count } if (phys == NOPHYS) { void *virt = NULL; @@ -1704,6 +1749,10 @@ retry: memset(virt, 0, pgsize); phys = virt_to_phys(virt); if (phys_to_page(phys)) { + dkprintf("%s: NOPHYS,phys=%lx,vmr(%lx-%lx),flag=%x,fa=%lx,reason=%x\n", + __FUNCTION__, page_to_phys(page), + range->start, range->end, range->flag, fault_addr, reason); + page_map(phys_to_page(phys)); } } @@ -1730,6 +1779,10 @@ retry: void *virt; size_t npages; + if (!page) { + kprintf("%s: WARNING: cow on non-struct-page-managed page\n", __FUNCTION__); + } + npages = pgsize / PAGE_SIZE; virt = ihk_mc_alloc_aligned_pages_user(npages, p2align, IHK_MC_AP_NOWAIT); @@ -1738,34 +1791,54 @@ retry: kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; } - dkprintf("%s: copying 0x%lx:%lu\n", - __FUNCTION__, pgaddr, pgsize); + dkprintf("%s: cow,copying virt:%lx<-%lx,phys:%lx<-%lx,pgsize=%lu\n", + __FUNCTION__, virt, phys_to_virt(phys), virt_to_phys(virt), phys, pgsize); memcpy(virt, phys_to_virt(phys), pgsize); - phys = virt_to_phys(virt); - if (page) { - page_unmap(page); + /* Call rusage_memory_stat_add() because remote page fault may create a page not pointed-to by PTE */ + if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) { + dkprintf("%lx+,%s: remote page fault + cow, calling memory_stat_rss_add(),pgsize=%ld\n", + phys, __FUNCTION__, pgsize); } + if (page) { + if (page_unmap(page)) { + dkprintf("%lx-,%s: cow,calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys, __FUNCTION__, pgsize, pgsize); + rusage_memory_stat_sub(range->memobj, pgsize, pgsize); + } + } + phys = virt_to_phys(virt); page = phys_to_page(phys); } } /*****/ if (ptep) { + if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) { + /* on-demand paging, phys pages are obtained by ihk_mc_alloc_aligned_pages_user() or get_page() */ + dkprintf("%lx+,%s: (on-demand paging && first map) || cow,calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n", + phys, __FUNCTION__, phys, pgsize); + } else { + dkprintf("%s: !calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n", + __FUNCTION__, phys, pgsize); + } + + dkprintf("%s: attr=%x\n", __FUNCTION__, attr); error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep, pgsize, phys, attr); if (error) { kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_pte failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; } + dkprintf("%s: non-NULL pte,page=%lx,page_is_in_memobj=%d,page->count=%d\n", __FUNCTION__, page, page ? page_is_in_memobj(page) : 0, page ? ihk_atomic_read(&page->count) : 0); } else { error = ihk_mc_pt_set_range(vm->address_space->page_table, vm, pgaddr, pgaddr + pgsize, phys, - attr, range->pgshift); + attr, range->pgshift, range); if (error) { kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_range failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; } + // memory_stat_rss_add() is called in downstream with !memobj check } flush_tlb_single(fault_addr); vm->currss += pgsize; @@ -1778,7 +1851,14 @@ retry: out: ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (page) { - page_unmap(page); + /* Unmap stray struct page */ + dkprintf("%s: out,phys=%lx,vmr(%lx-%lx),flag=%x,fa=%lx,reason=%x\n", + __FUNCTION__, page_to_phys(page), + range->start, range->end, range->flag, fault_addr, reason); + if (page_unmap(page)) { + dkprintf("%lx-,%s: out,calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", page_to_phys(page), __FUNCTION__, pgsize, pgsize); + rusage_memory_stat_sub(range->memobj, pgsize, pgsize); + } } dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx): %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); return error; @@ -1947,6 +2027,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, unsigned long at_rand; struct process *proc = thread->proc; unsigned long ap_flag; + struct vm_range *range; /* Create stack range */ end = STACK_TOP(&thread->vm->region) & LARGE_PAGE_MASK; @@ -1991,17 +2072,18 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC; #define NOPHYS ((uintptr_t)-1) if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, - vrflag, NULL, 0, LARGE_PAGE_SHIFT, NULL)) != 0) { + vrflag, NULL, 0, LARGE_PAGE_SHIFT, &range)) != 0) { ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); return rc; } /* Map physical pages for initial stack frame */ error = ihk_mc_pt_set_range(thread->vm->address_space->page_table, - thread->vm, (void *)(end - minsz), - (void *)end, virt_to_phys(stack), - arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL), - LARGE_PAGE_SHIFT); + thread->vm, (void *)(end - minsz), + (void *)end, virt_to_phys(stack), + arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL), + LARGE_PAGE_SHIFT, range + ); if (error) { kprintf("init_process_stack:" @@ -2011,6 +2093,8 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, return error; } + // memory_stat_rss_add() is called in ihk_mc_pt_set_range(); + /* set up initial stack frame */ p = (unsigned long *)(stack + minsz); s_ind = -1; @@ -2112,8 +2196,9 @@ unsigned long extend_process_region(struct process_vm *vm, ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT); return end_allocated; } + // memory_stat_rss_add() is called in add_process_memory_range() - dkprintf("%s: new_end_allocated: 0x%lu, align_size: %lu, align_mask: %lx\n", + dkprintf("%s: new_end_allocated: 0x%lx, align_size: %lu, align_mask: %lx\n", __FUNCTION__, new_end_allocated, align_size, align_mask); return new_end_allocated; @@ -2134,6 +2219,9 @@ int remove_process_region(struct process_vm *vm, (void *)start, (void *)end); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); + // memory_stat_rss_sub() isn't called because this execution path is no loger reached + dkprintf("%s: memory_stat_rss_sub() isn't called,start=%lx,end=%lx\n", __FUNCTION__, start, end); + return 0; } diff --git a/kernel/shmobj.c b/kernel/shmobj.c index 99fe1e2a..c6fabf5b 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -22,6 +22,7 @@ #include #include #include +#include #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define ekprintf(...) kprintf(__VA_ARGS__) @@ -179,6 +180,7 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp) memset(obj, 0, sizeof(*obj)); obj->memobj.ops = &shmobj_ops; + obj->memobj.flags = MF_SHM; obj->memobj.size = ds->shm_segsz; obj->ds = *ds; obj->ds.shm_perm.seq = the_seq++; @@ -242,13 +244,15 @@ void shmobj_destroy(struct shmobj *obj) for (;;) { struct page *page; void *page_va; + uintptr_t phys; page = page_list_first(obj); if (!page) { break; } page_list_remove(obj, page); - page_va = phys_to_virt(page_to_phys(page)); + phys = page_to_phys(page); + page_va = phys_to_virt(phys); if (ihk_atomic_read(&page->count) != 1) { kprintf("%s: WARNING: page count for phys 0x%lx is invalid\n", @@ -257,6 +261,10 @@ void shmobj_destroy(struct shmobj *obj) if (page_unmap(page)) { ihk_mc_free_pages_user(page_va, npages); + /* Track change in page->count for shmobj. + It is decremented in here or shmobj_invalidate() or clear_range(). */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, npages * PAGE_SIZE, PAGE_SIZE); + memory_stat_rss_sub(npages * PAGE_SIZE, PAGE_SIZE); } #if 0 dkprintf("shmobj_destroy(%p):" @@ -366,7 +374,7 @@ static void shmobj_ref(struct memobj *memobj) } static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, - uintptr_t *physp, unsigned long *pflag) + uintptr_t *physp, unsigned long *pflag) { struct shmobj *obj = to_shmobj(memobj); int error; @@ -417,6 +425,9 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, } phys = virt_to_phys(virt); page = phys_to_page_insert_hash(phys); + /* Track change in page->count for shmobj. + Add when setting the PTE for a page with count of one in ihk_mc_pt_set_range(). */ + if (page->mode != PM_NONE) { fkprintf("shmobj_get_page(%p,%#lx,%d,%p):" "page %p %#lx %d %d %#lx\n", @@ -429,6 +440,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, page->mode = PM_MAPPED; page->offset = off; ihk_atomic_set(&page->count, 1); + ihk_atomic64_set(&page->mapped, 0); page_list_insert(obj, page); virt = NULL; dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n", @@ -469,6 +481,10 @@ static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys, if (page_unmap(page)) { ihk_mc_free_pages_user(phys_to_virt(phys), pgsize/PAGE_SIZE); + /* Track change in page->count for shmobj. + It is decremented in here or shmobj_destroy() or clear_range(). */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, pgsize, PAGE_SIZE); + memory_stat_rss_sub(pgsize, PAGE_SIZE); } } diff --git a/kernel/syscall.c b/kernel/syscall.c index b0fdc008..ca738f90 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -289,6 +289,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", cpu_local_var(current)->proc->pid); + dkprintf("remote page fault,va=%lx,reason=%x\n", res.fault_address, res.fault_reason|PF_POPULATE); error = page_fault_process_vm(thread->vm, (void *)res.fault_address, res.fault_reason|PF_POPULATE); @@ -966,6 +967,14 @@ void terminate(int rc, int sig) dkprintf("terminate,pid=%d\n", proc->pid); + /* rusage debug */ + for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + dkprintf("memory_stat_rss[%d]=%ld\n", i, monitor->rusage_memory_stat_rss[i]); + } + for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + dkprintf("memory_stat_mapped_file[%d]=%ld\n", i, monitor->rusage_memory_stat_mapped_file[i]); + } + #ifdef DCFA_KMOD do_mod_exit(rc); #endif @@ -1413,6 +1422,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, } } else if (flags & MAP_SHARED) { + dkprintf("%s: MAP_SHARED,flags=%x,len=%ld\n", __FUNCTION__, flags, len); memset(&ads, 0, sizeof(ads)); ads.shm_segsz = len; ads.shm_perm.mode = SHM_DEST; @@ -1424,6 +1434,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, } } else { + dkprintf("%s: anon&demand-paging\n", __FUNCTION__); error = zeroobj_create(&memobj); if (error) { ekprintf("do_mmap:zeroobj_create failed. %d\n", error); @@ -1473,19 +1484,21 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, for (i = 0; i < memobj->nr_pages; ++i) { error = ihk_mc_pt_set_range(proc->vm->address_space->page_table, - proc->vm, - (void *)range->start + (i * PAGE_SIZE), - (void *)range->start + (i * PAGE_SIZE) + - PAGE_SIZE, - virt_to_phys(memobj->pages[i]), - ptattr, - PAGE_SHIFT); + proc->vm, + (void *)range->start + (i * PAGE_SIZE), + (void *)range->start + (i * PAGE_SIZE) + + PAGE_SIZE, + virt_to_phys(memobj->pages[i]), + ptattr, + PAGE_SHIFT, + range); if (error) { kprintf("%s: ERROR: mapping %d page of pre-mapped file\n", __FUNCTION__, i); } } dkprintf("%s: memobj 0x%lx pre-mapped\n", __FUNCTION__, memobj); + // fileobj && MF_PREMAP && MPOL_SHM_PREMAP case: memory_stat_rss_add() is called in fileobj_create() } /* else if (memobj->flags & MF_REG_FILE) { @@ -1765,8 +1778,6 @@ SYSCALL_DECLARE(brk) extend_process_region(cpu_local_var(current)->vm, region->brk_end_allocated, address, vrflag); ihk_mc_spinlock_unlock_noirq(&cpu_local_var(current)->vm->memory_range_lock); - dkprintf("SC(%d)[sys_brk] brk_end set to %lx\n", - ihk_mc_get_processor_id(), region->brk_end); if (old_brk_end_allocated == region->brk_end_allocated) { r = old_brk_end_allocated; @@ -1775,6 +1786,8 @@ SYSCALL_DECLARE(brk) region->brk_end = address; r = region->brk_end; + dkprintf("SC(%d)[sys_brk] brk_end set to %lx\n", + ihk_mc_get_processor_id(), region->brk_end); out: return r; @@ -2856,7 +2869,7 @@ SYSCALL_DECLARE(ioctl) ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); if(fdp && fdp->ioctl_cb){ -//kprintf("ioctl: found system fd %d\n", fd); + //kprintf("ioctl: found system fd %d\n", fd); rc = fdp->ioctl_cb(fdp, ctx); } else{ @@ -7334,8 +7347,8 @@ SYSCALL_DECLARE(mremap) size = (oldsize < newsize)? oldsize: newsize; ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); error = move_pte_range(vm->address_space->page_table, vm, - (void *)oldstart, (void *)newstart, - size); + (void *)oldstart, (void *)newstart, + size, range); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" diff --git a/kernel/xpmem.c b/kernel/xpmem.c index 1093261a..9daaff1b 100644 --- a/kernel/xpmem.c +++ b/kernel/xpmem.c @@ -1098,6 +1098,8 @@ static int xpmem_attach( XPMEM_DEBUG("do_mmap(): vaddr=0x%lx, size=0x%lx, prot_flags=0x%lx, " "flags=0x%lx, fd=%d, offset=0x%lx", vaddr, size, prot_flags, flags, mckfd->fd, offset); + /* The new range uses on-demand paging and is associated with shmobj because of + MAP_ANONYMOUS && !MAP_PRIVATE && MAP_SHARED */ at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd, offset); if (IS_ERR((void *)(uintptr_t)at_vaddr)) { ret = at_vaddr; @@ -1110,13 +1112,21 @@ static int xpmem_attach( vmr = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1); + /* To identify pages of XPMEM attachment for rusage accounting */ + if(vmr->memobj) { + vmr->memobj->flags |= MF_XPMEM; + } else { + ekprintf("%s: vmr->memobj equals to NULL\n", __FUNCTION__); + } + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); if (!vmr) { ret = -ENOENT; goto out_2; } - vmr->private_data = att; + vmr->private_data = att; + att->at_vmr = vmr; @@ -1199,6 +1209,8 @@ static int xpmem_detach( xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size); range->private_data = NULL; + /* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() --> + xpmem_free_process_memory_range() */ mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); @@ -1416,6 +1428,8 @@ static void xpmem_detach_att( xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size); range->private_data = NULL; + /* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() --> + xpmem_free_process_memory_range() */ att->flags &= ~XPMEM_FLAG_VALIDPTEs; @@ -1677,7 +1691,8 @@ int xpmem_remove_process_memory_range( } remaining_vmr->private_data = NULL; - + /* This function is always followed by xpmem_free_process_memory_range() + which in turn calls memobj_release() */ remaining_vaddr = att->at_vaddr; } @@ -1699,6 +1714,8 @@ int xpmem_remove_process_memory_range( att->at_size = remaining_vmr->end - remaining_vmr->start; vmr->private_data = NULL; + /* This function is always followed by [xpmem_]free_process_memory_range() + which in turn calls memobj_release() */ out: mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); @@ -1910,17 +1927,19 @@ static int xpmem_remap_pte( __FUNCTION__, ret); goto out; } + // memory_stat_rss_add() is called by the process hosting the memory area } else { ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm, att_pgaddr, att_pgaddr + att_pgsize, seg_phys, att_attr, - vmr->pgshift); + vmr->pgshift, vmr); if (ret) { ret = -EFAULT; ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n", __FUNCTION__, ret); goto out; } + // memory_stat_rss_add() is called by the process hosting the memory area } out: diff --git a/kernel/zeroobj.c b/kernel/zeroobj.c index d914462c..cf49c14d 100644 --- a/kernel/zeroobj.c +++ b/kernel/zeroobj.c @@ -102,6 +102,7 @@ static int alloc_zeroobj(void) memset(obj, 0, sizeof(*obj)); obj->memobj.ops = &zeroobj_ops; + obj->memobj.flags = MF_ZEROOBJ; obj->memobj.size = 0; page_list_init(obj); ihk_mc_spinlock_init(&obj->memobj.lock); @@ -127,6 +128,7 @@ static int alloc_zeroobj(void) page->mode = PM_MAPPED; page->offset = 0; ihk_atomic_set(&page->count, 1); + ihk_atomic64_set(&page->mapped, 0); page_list_insert(obj, page); virt = NULL; diff --git a/lib/include/ihk/debug.h b/lib/include/ihk/debug.h index 6b64ccc7..92ad0cbc 100644 --- a/lib/include/ihk/debug.h +++ b/lib/include/ihk/debug.h @@ -15,6 +15,7 @@ #include #include +#include struct ihk_kmsg_buf { int tail; diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 66db1fe8..2da99210 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -22,6 +22,7 @@ struct memobj; struct process_vm; +struct vm_range; enum ihk_mc_gma_type { IHK_MC_GMA_MAP_START, @@ -178,7 +179,7 @@ int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end, pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift, void **pgbasep, size_t *pgsizep, int *p2alignp); int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start, void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr, - int pgshift); + int pgshift, struct vm_range *range); int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize, uintptr_t phys, enum ihk_mc_pt_attribute attr); int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size, enum ihk_mc_pt_prepare_flag); @@ -189,7 +190,7 @@ typedef int pte_visitor_t(void *arg, page_table_t pt, pte_t *ptep, int visit_pte_range(page_table_t pt, void *start, void *end, int pgshift, enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg); int move_pte_range(page_table_t pt, struct process_vm *vm, - void *src, void *dest, size_t size); + void *src, void *dest, size_t size, struct vm_range *range); struct page_table *ihk_mc_pt_create(ihk_mc_ap_flag ap_flag); /* XXX: proper use of struct page_table and page_table_t is unknown */ diff --git a/lib/include/ihk/rusage.h b/lib/include/ihk/rusage.h index c69331a0..83eecb48 100644 --- a/lib/include/ihk/rusage.h +++ b/lib/include/ihk/rusage.h @@ -1,45 +1,8 @@ #ifndef __IHK_RUSAGE_H #define __IHK_RUSAGE_H -struct ihk_os_cpu_monitor { - int status; -#define IHK_OS_MONITOR_NOT_BOOT 0 -#define IHK_OS_MONITOR_IDLE 1 -#define IHK_OS_MONITOR_USER 2 -#define IHK_OS_MONITOR_KERNEL 3 -#define IHK_OS_MONITOR_KERNEL_HEAVY 4 -#define IHK_OS_MONITOR_KERNEL_OFFLOAD 5 -#define IHK_OS_MONITOR_KERNEL_FREEZING 8 -#define IHK_OS_MONITOR_KERNEL_FROZEN 9 -#define IHK_OS_MONITOR_KERNEL_THAW 10 -#define IHK_OS_MONITOR_PANIC 99 - int status_bak; - unsigned long counter; - unsigned long ocounter; - unsigned long user_tsc; - unsigned long system_tsc; -}; - -struct ihk_os_monitor { - unsigned long rusage_max_num_threads; - unsigned long rusage_num_threads; - unsigned long rusage_rss_max; - long rusage_rss_current; - unsigned long rusage_kmem_usage; - unsigned long rusage_kmem_max_usage; - unsigned long rusage_hugetlb_usage; - unsigned long rusage_hugetlb_max_usage; - unsigned long rusage_total_memory; - unsigned long rusage_total_memory_usage; - unsigned long rusage_total_memory_max_usage; - unsigned long num_numa_nodes; - unsigned long num_processors; - unsigned long ns_per_tsc; - unsigned long reserve[128]; - unsigned long rusage_numa_stat[1024]; - - struct ihk_os_cpu_monitor cpu[0]; -}; +#include +#include enum RUSAGE_MEMBER { RUSAGE_RSS,