From 3fe225792968b61f8326894c35c9447b914c9ff8 Mon Sep 17 00:00:00 2001 From: Katsukura Date: Wed, 15 Mar 2017 23:22:51 +0900 Subject: [PATCH] create rusage branch. --- arch/x86/kernel/cpu.c | 21 ++ arch/x86/kernel/include/ihk/atomic.h | 21 ++ arch/x86/kernel/interrupt.S | 29 ++ arch/x86/kernel/vsyscall.c | 1 + kernel/Makefile.build.in | 2 +- kernel/ap.c | 15 +- kernel/cls.c | 6 + kernel/freeze.c | 54 ++++ kernel/host.c | 2 +- kernel/include/cls.h | 21 ++ kernel/include/init.h | 2 + kernel/include/process.h | 5 + kernel/include/rusage.h | 81 ++++++ kernel/include/syscall.h | 24 ++ kernel/init.c | 54 +++- kernel/mem.c | 17 +- kernel/process.c | 100 ++++++- kernel/procfs.c | 2 +- kernel/rusage.c | 389 +++++++++++++++++++++++++++ kernel/syscall.c | 119 ++++++-- lib/abort.c | 9 + lib/include/ihk/mm.h | 3 + lib/include/ihk/rusage.h | 76 ++++++ 23 files changed, 1013 insertions(+), 40 deletions(-) create mode 100644 kernel/freeze.c create mode 100644 kernel/include/rusage.h create mode 100644 kernel/rusage.c create mode 100644 lib/include/ihk/rusage.h diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index cfd79887..ab473941 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -1813,4 +1813,25 @@ int running_on_kvm(void) { return 0; } +void +mod_nmi_ctx(void *nmi_ctx, void (*func)()) +{ + unsigned long *l = nmi_ctx; + int i; + unsigned long flags; + +struct x86_cpu_local_variables *v; + +if(!ihk_mc_get_processor_id()) { +v = get_x86_this_cpu_local(); +} + asm volatile("pushf; pop %0" : "=r"(flags) : : "memory", "cc"); + for (i = 0; i < 22; i++) + l[i] = l[i + 5]; + l[i++] = (unsigned long)func; // return address + l[i++] = 0x20; // KERNEL CS + l[i++] = flags & ~RFLAGS_IF; // rflags (disable interrupt) + l[i++] = (unsigned long)(l + 27); // ols rsp + l[i++] = 0x28; // KERNEL DS +} /*** end of file ***/ diff --git a/arch/x86/kernel/include/ihk/atomic.h b/arch/x86/kernel/include/ihk/atomic.h index 8e68d29d..250556a6 100644 --- a/arch/x86/kernel/include/ihk/atomic.h +++ b/arch/x86/kernel/include/ihk/atomic.h @@ -215,4 +215,25 @@ static inline unsigned long atomic_cmpxchg4(unsigned int *addr, return oldval; } +static inline void ihk_atomic_add_long(long i, long *v) { + asm volatile("lock addq %1,%0" + : "+m" (*v) + : "ir" (i)); +} +static inline void ihk_atomic_add_ulong(long i, unsigned long *v) { + asm volatile("lock addq %1,%0" + : "+m" (*v) + : "ir" (i)); +} + +static inline unsigned long ihk_atomic_add_long_return(long i, long *v) { + long __i; + + __i = i; + asm volatile("lock xaddq %0, %1" + : "+r" (i), "+m" (*v) + : : "memory"); + return i + __i; +} + #endif diff --git a/arch/x86/kernel/interrupt.S b/arch/x86/kernel/interrupt.S index 3537468b..d6765e80 100644 --- a/arch/x86/kernel/interrupt.S +++ b/arch/x86/kernel/interrupt.S @@ -130,11 +130,40 @@ general_protection_exception: addq $8, %rsp iretq +.global __freeze +__freeze: + PUSH_ALL_REGS + callq freeze + POP_ALL_REGS + iretq + .globl nmi nmi: #define PANICED 232 #define PANIC_REGS 240 movq %rax,%gs:PANIC_REGS+0x00 + movq %rsp,%gs:PANIC_REGS+0x08 + + movl nmi_mode(%rip),%eax + cmp $1,%rax + je 1f + cmp $2,%rax + jne 3f +1: + cld + movq %gs:PANIC_REGS+0x00,%rax + PUSH_ALL_REGS + subq $40, %rsp + movq %rsp,%gs:PANIC_REGS+0x10 + movq %rsp, %rdi + call freeze_thaw + cmpq $0, %rax + jnz 2f + addq $40, %rsp +2: + POP_ALL_REGS + iretq +3: movq %rbx,%gs:PANIC_REGS+0x08 movq %rcx,%gs:PANIC_REGS+0x10 movq %rdx,%gs:PANIC_REGS+0x18 diff --git a/arch/x86/kernel/vsyscall.c b/arch/x86/kernel/vsyscall.c index 1813a455..12e2d2cb 100644 --- a/arch/x86/kernel/vsyscall.c +++ b/arch/x86/kernel/vsyscall.c @@ -17,6 +17,7 @@ * make sure that these are position-independent codes. */ +#include #include #include #include diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index c3537e58..28334e10 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -3,7 +3,7 @@ SRC=$(VPATH) IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o -OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o +OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o rusage.o freeze.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions diff --git a/kernel/ap.c b/kernel/ap.c index 7931d935..799a05f8 100644 --- a/kernel/ap.c +++ b/kernel/ap.c @@ -25,15 +25,18 @@ #include #include #include +#include +#include +#include //#define DEBUG_PRINT_AP #ifdef DEBUG_PRINT_AP -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) #else #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) #endif int num_processors = 1; @@ -41,6 +44,8 @@ static volatile int ap_stop = 1; mcs_lock_node_t ap_syscall_semaphore; +extern struct ihk_os_monitor *monitor; + static void ap_wait(void) { init_tick(); @@ -117,6 +122,10 @@ void ap_init(void) num_processors++; } kprintf("BSP: booted %d AP CPUs\n", cpu_info->ncpus - 1); +#ifdef ENABLE_RUSAGE + rusage_num_threads = 0; + rusage_max_num_threads = 0; +#endif } #include diff --git a/kernel/cls.c b/kernel/cls.c index 1818321a..e1696a69 100644 --- a/kernel/cls.c +++ b/kernel/cls.c @@ -24,16 +24,22 @@ extern int num_processors; struct cpu_local_var *clv; int cpu_local_var_initialized = 0; +extern struct ihk_os_monitor *monitor; void cpu_local_var_init(void) { int z; + int i; z = sizeof(struct cpu_local_var) * num_processors; z = (z + PAGE_SIZE - 1) >> PAGE_SHIFT; clv = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL); memset(clv, 0, z * PAGE_SIZE); + + for(i = 0; i < num_processors; i++) + clv[i].monitor = monitor + i; + cpu_local_var_initialized = 1; } diff --git a/kernel/freeze.c b/kernel/freeze.c new file mode 100644 index 00000000..25a21bc0 --- /dev/null +++ b/kernel/freeze.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +extern int nmi_mode; +extern mod_nmi_ctx(void *, void(*)()); +extern void lapic_ack(); +extern void __freeze(); + +void +freeze() +{ + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + monitor->status_bak = monitor->status; + monitor->status = IHK_OS_MONITOR_KERNEL_FROZEN; + while (monitor->status == IHK_OS_MONITOR_KERNEL_FROZEN) + cpu_halt(); + monitor->status = monitor->status_bak; +} + +long +freeze_thaw(void *nmi_ctx) +{ + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + if (nmi_mode == 1) { + if (monitor->status != IHK_OS_MONITOR_KERNEL_FROZEN) { +#if 1 + mod_nmi_ctx(nmi_ctx, __freeze); + return 1; +#else + unsigned long flags; + + flags = cpu_disable_interrupt_save(); + monitor->status_bak = monitor->status; + monitor->status = IHK_OS_MONITOR_KERNEL_FROZEN; + lapic_ack(); + while (monitor->status == IHK_OS_MONITOR_KERNEL_FROZEN) + cpu_halt(); + monitor->status = monitor->status_bak; + cpu_restore_interrupt(flags); +#endif + } + } + else if(nmi_mode == 2) { + if (monitor->status == IHK_OS_MONITOR_KERNEL_FROZEN) { + monitor->status = IHK_OS_MONITOR_KERNEL_THAW; + } + } + return 0; +} diff --git a/kernel/host.c b/kernel/host.c index cec24368..081280c1 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -23,8 +23,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/kernel/include/cls.h b/kernel/include/cls.h index 786a3d28..d91cf4e6 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -28,6 +28,26 @@ struct kmalloc_header { /* 32 bytes */ }; +#define IHK_OS_MONITOR_NOT_BOOT 0 +#define IHK_OS_MONITOR_IDLE 1 +#define IHK_OS_MONITOR_USER 2 +#define IHK_OS_MONITOR_KERNEL 3 +#define IHK_OS_MONITOR_KERNEL_HEAVY 4 +#define IHK_OS_MONITOR_KERNEL_OFFLOAD 5 +#define IHK_OS_MONITOR_KERNEL_FREEZING 8 +#define IHK_OS_MONITOR_KERNEL_FROZEN 9 +#define IHK_OS_MONITOR_KERNEL_THAW 10 +#define IHK_OS_MONITOR_PANIC 99 + +struct ihk_os_monitor { + int status; + int status_bak; + unsigned long counter; + unsigned long ocounter; + unsigned long user_tsc; + unsigned long system_tsc; +}; + #include #define CPU_STATUS_DISABLE (0) #define CPU_STATUS_IDLE (1) @@ -75,6 +95,7 @@ struct cpu_local_var { int no_preempt; int timer_enabled; int kmalloc_initialized; + struct ihk_os_monitor *monitor; } __attribute__((aligned(64))); diff --git a/kernel/include/init.h b/kernel/include/init.h index f0b84cf2..60729375 100644 --- a/kernel/include/init.h +++ b/kernel/include/init.h @@ -29,6 +29,8 @@ extern void init_host_syscall_channel2(void); extern void sched_init(void); extern void pc_ap_init(void); extern void cpu_sysfs_setup(void); +extern void rusage_sysfs_setup(void); +extern void status_sysfs_setup(void); extern char *find_command_line(char *name); diff --git a/kernel/include/process.h b/kernel/include/process.h index b80287e7..afd35b1a 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -664,9 +664,14 @@ struct thread { struct sig_pending *ptrace_sendsig; // cpu time + /* struct timespec stime; struct timespec utime; struct timespec btime; + */ + unsigned long system_tsc; + unsigned long user_tsc; + unsigned long base_tsc; int times_update; int in_kernel; diff --git a/kernel/include/rusage.h b/kernel/include/rusage.h new file mode 100644 index 00000000..7500d1a1 --- /dev/null +++ b/kernel/include/rusage.h @@ -0,0 +1,81 @@ +#ifndef __RUSAGE_H +#define __RUSAGE_H + +#define ENABLE_RUSAGE + +#define RUSAGE_DEFAULT_SIZE 10 + +enum RUSAGE_MEMBER { + RUSAGE_RSS, + RUSAGE_CACHE, + RUSAGE_RSS_HUGE, + RUSAGE_MAPPED_FILE, + RUSAGE_MAX_USAGE, + RUSAGE_KMEM_USAGE, + RUSAGE_KMAX_USAGE, + RUSAGE_NUM_NUMA_NODES, + RUSAGE_NUMA_STAT, + RUSAGE_HUGETLB , + RUSAGE_HUGETLB_MAX , + RUSAGE_STAT_SYSTEM , + RUSAGE_STAT_USER , + RUSAGE_USAGE , + RUSAGE_USAGE_PER_CPU , + RUSAGE_NUM_THREADS , + RUSAGE_MAX_NUM_THREADS +}; + +struct r_data{ + unsigned long pid; + unsigned long rss; + unsigned long cache; + unsigned long rss_huge; + unsigned long mapped_file; + unsigned long max_usage; + unsigned long kmem_usage; + unsigned long kmax_usage; + unsigned long hugetlb; + unsigned long hugetlb_max; + unsigned long stat_system; + unsigned long stat_user; + unsigned long usage; + struct r_data *next; +} ; +typedef struct r_data rusage_data; + +rusage_data *rdata[RUSAGE_DEFAULT_SIZE]; +unsigned long rusage_max_num_threads; +unsigned long rusage_num_threads; + +enum ihk_os_status { + IHK_STATUS_INACTIVE, + IHK_STATUS_BOOTING, + IHK_STATUS_RUNNING, + IHK_STATUS_SHUTDOWN, + IHK_STATUS_PANIC, + IHK_STATUS_HUNGUP, + IHK_STATUS_FREEZING, + IHK_STATUS_FROZEN, +}; + +enum ihk_os_status os_status; +unsigned long sys_delegate_count; +enum sys_delegate_state_enum { + ENTER_KERNEL, + EXIT_KERNEL, +}; +enum sys_delegate_state_enum sys_delegate_state; + +unsigned long rusage_rss[sizeof(cpu_set_t)/8]; +unsigned long rusage_rss_max; +long rusage_rss_current; +unsigned long rusage_kmem_usage; +unsigned long rusage_kmem_max_usage; +unsigned long rusage_hugetlb_usage; +unsigned long rusage_hugetlb_max_usage; +unsigned long rusage_numa_stat[1024]; +unsigned long rusage_max_memory; + +#define RUSAGE_MEM_LIMIT 2000000 + +#endif diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 9cf81012..49a7b409 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -73,6 +73,7 @@ /* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */ #define SCD_MSG_PROCFS_TID_CREATE 0x44 #define SCD_MSG_PROCFS_TID_DELETE 0x45 +#define SCD_MSG_EVENT_SIGNAL 0x46 /* Cloning flags. */ # define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */ @@ -374,6 +375,29 @@ struct tod_data_s { }; extern struct tod_data_s tod_data; /* residing in arch-dependent file */ +static inline void tsc_to_ts(unsigned long tsc, struct timespec *ts) +{ + time_t sec_delta; + long ns_delta; + + sec_delta = tsc / tod_data.clocks_per_sec; + ns_delta = NS_PER_SEC * (tsc % tod_data.clocks_per_sec) + / tod_data.clocks_per_sec; + /* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */ + + ts->tv_sec = sec_delta; + ts->tv_nsec = ns_delta; + if (ts->tv_nsec >= NS_PER_SEC) { + ts->tv_nsec -= NS_PER_SEC; + ++ts->tv_sec; + } +} + +static inline unsigned long timespec_to_jiffy(const struct timespec *ats) +{ + return ats->tv_sec * 100 + ats->tv_nsec / 10000000; +} + void reset_cputime(); void set_cputime(int mode); int do_munmap(void *addr, size_t len); diff --git a/kernel/init.c b/kernel/init.c index 7b58efe2..37428d13 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -31,6 +31,7 @@ #include #include #include +#include //#define IOCTL_FUNC_EXTENSION #ifdef IOCTL_FUNC_EXTENSION @@ -239,6 +240,31 @@ static void time_init(void) return; } +struct ihk_os_monitor *monitor; + +static void monitor_init() +{ + int z; + unsigned long phys; + + z = sizeof(struct ihk_os_monitor) * num_processors; + z = (z + PAGE_SIZE -1) >> PAGE_SHIFT; + monitor = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL); + memset(monitor, 0, z * PAGE_SIZE); + phys = virt_to_phys(monitor); + ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) * num_processors); +} + +int nmi_mode; + +static void nmi_init() +{ + unsigned long phys; + + phys = virt_to_phys(&nmi_mode); + ihk_set_nmi_mode_addr(phys); +} + static void rest_init(void) { handler_init(); @@ -250,7 +276,9 @@ static void rest_init(void) //pc_test(); ap_init(); + monitor_init(); cpu_local_var_init(); + nmi_init(); time_init(); kmalloc_init(); @@ -320,6 +348,10 @@ static void setup_remote_snooping_samples(void) static void populate_sysfs(void) { cpu_sysfs_setup(); +#ifdef ENABLE_RUSAGE + rusage_sysfs_setup(); + status_sysfs_setup(); +#endif //setup_remote_snooping_samples(); } /* populate_sysfs() */ @@ -361,6 +393,21 @@ int main(void) char *ptr; int mode = 0; +#ifdef ENABLE_RUSAGE + int i; + os_status = IHK_STATUS_INACTIVE; + rusage_hugetlb_usage = 0; + rusage_hugetlb_max_usage = 0; + for (i = 0; i < sizeof(cpu_set_t)/8; i++) { + rusage_rss[i] = 0; + } + for (i = 0; i < 1024; i++) { + rusage_numa_stat[i] = 0; + } + rusage_rss_current = 0; + rusage_rss_max = 0; +#endif + ptr = find_command_line("ksyslogd="); if (ptr) { mode = ptr[9] - 0x30; @@ -369,7 +416,9 @@ int main(void) kmsg_init(mode); kputs("IHK/McKernel started.\n"); - +#ifdef ENABLE_RUSAGE + os_status = IHK_STATUS_BOOTING; +#endif ihk_set_kmsg(virt_to_phys(&kmsg_buf), IHK_KMSG_SIZE); arch_init(); @@ -392,6 +441,9 @@ int main(void) futex_init(); kputs("IHK/McKernel booted.\n"); +#ifdef ENABLE_RUSAGE + os_status = IHK_STATUS_RUNNING; +#endif #ifdef DCFA_KMOD mc_cmd_client_init(); diff --git a/kernel/mem.c b/kernel/mem.c index d9617c8f..e1a198d5 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -37,6 +37,7 @@ #include #include #include +#include //#define DEBUG_PRINT_MEM @@ -537,6 +538,9 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, __FUNCTION__, ihk_mc_get_numa_id(), npages, node); +#ifdef ENABLE_RUSAGE + rusage_numa_stat[ihk_mc_get_numa_id()] += npages * PAGE_SIZE; +#endif break; } } @@ -582,6 +586,9 @@ distance_based: ihk_mc_get_numa_id(), npages, memory_nodes[node].nodes_by_distance[i].id); +#ifdef ENABLE_RUSAGE + rusage_numa_stat[ihk_mc_get_numa_id()] += npages * PAGE_SIZE; +#endif break; } } @@ -602,7 +609,9 @@ order_based: &memory_nodes[(node + i) % ihk_mc_get_nr_numa_nodes()].allocators, list) { pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); - +#ifdef ENABLE_RUSAGE + rusage_numa_stat[ihk_mc_get_numa_id()] += npages * PAGE_SIZE; +#endif if (pa) break; } @@ -634,6 +643,9 @@ static void __mckernel_free_pages_in_allocator(void *va, int npages) if (pa_start >= pa_allocator->start && pa_end <= pa_allocator->end) { ihk_pagealloc_free(pa_allocator, pa_start, npages); +#ifdef ENABLE_RUSAGE + rusage_numa_stat[i] -= npages * PAGE_SIZE; +#endif return; } } @@ -1051,6 +1063,9 @@ static void numa_init(void) ihk_pagealloc_count(allocator) * PAGE_SIZE, ihk_pagealloc_count(allocator), numa_id); +#ifdef ENABLE_RUSAGE + rusage_max_memory = ihk_pagealloc_count(allocator) * PAGE_SIZE; +#endif } } diff --git a/kernel/process.c b/kernel/process.c index 7607b8d5..50f8061a 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -31,6 +31,7 @@ #include #include #include +#include //#define DEBUG_PRINT_PROCESS @@ -65,6 +66,7 @@ extern void procfs_create_thread(struct thread *); extern void procfs_delete_thread(struct thread *); extern void perf_start(struct mc_perf_event *event); extern void perf_reset(struct mc_perf_event *event); +extern void event_signal(); struct list_head resource_set_list; mcs_rwlock_lock_t resource_set_lock; @@ -328,7 +330,25 @@ struct thread *create_thread(unsigned long user_pc, ihk_mc_spinlock_init(&thread->spin_sleep_lock); thread->spin_sleep = 0; - +#ifdef ENABLE_RUSAGE + { + int processor_id; + unsigned long curr; + processor_id = ihk_mc_get_processor_id(); + rusage_rss[processor_id] += KERNEL_STACK_NR_PAGES * PAGE_SIZE; + curr = ihk_atomic_add_long_return ( KERNEL_STACK_NR_PAGES * PAGE_SIZE, &rusage_rss_current); + if (rusage_rss_max < curr) { + atomic_cmpxchg8(&rusage_rss_max, rusage_rss_max, curr); + } + if (rusage_max_memory - curr < RUSAGE_MEM_LIMIT) { + event_signal(); + } + ihk_atomic_add_ulong ( 1, &rusage_num_threads); + if (rusage_max_num_threads < rusage_num_threads) { + atomic_cmpxchg8(&rusage_max_num_threads, rusage_max_num_threads, rusage_num_threads); + } + } +#endif return thread; err: @@ -475,6 +495,29 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp, ihk_mc_spinlock_init(&thread->spin_sleep_lock); thread->spin_sleep = 0; + +#ifdef ENABLE_RUSAGE + { + int processor_id; + long curr; + processor_id = ihk_mc_get_processor_id(); + rusage_rss[processor_id] += KERNEL_STACK_NR_PAGES * PAGE_SIZE; + curr = ihk_atomic_add_long_return (KERNEL_STACK_NR_PAGES * PAGE_SIZE, &rusage_rss_current); + if (rusage_rss_max < curr) { + atomic_cmpxchg8(&rusage_rss_max, rusage_rss_max, curr); + } + if (rusage_max_memory - curr < RUSAGE_MEM_LIMIT) { + event_signal(); + } + + ihk_atomic_add_ulong ( 1, &rusage_num_threads); + + if (rusage_max_num_threads < rusage_num_threads) { + atomic_cmpxchg8(&rusage_max_num_threads, rusage_max_num_threads, rusage_num_threads); + } + } +#endif + #ifdef TRACK_SYSCALLS thread->track_syscalls = org->track_syscalls; #endif @@ -1951,6 +1994,24 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, end + sizeof(unsigned long) * s_ind); thread->vm->region.stack_end = end; thread->vm->region.stack_start = start; + +#ifdef ENABLE_RUSAGE +{ + int processor_id; + long curr; + + processor_id = ihk_mc_get_processor_id(); + rusage_rss[processor_id] += (minsz >> PAGE_SHIFT) * PAGE_SIZE; + curr = ihk_atomic_add_long_return ((minsz >> PAGE_SHIFT) * PAGE_SIZE, &rusage_rss_current); + if (rusage_rss_max < curr) { + atomic_cmpxchg8(&rusage_rss_max, rusage_rss_max, curr); + } + if (rusage_max_memory - curr < RUSAGE_MEM_LIMIT) { + event_signal(); + } +} +#endif + return 0; } @@ -2054,7 +2115,21 @@ unsigned long extend_process_region(struct process_vm *vm, ihk_mc_free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT); return end; } - +#ifdef ENABLE_RUSAGE +{ + int processor_id; + long curr; + processor_id = ihk_mc_get_processor_id(); + rusage_rss[processor_id] += ((aligned_new_end - aligned_end) >> PAGE_SHIFT) * PAGE_SIZE; + curr = ihk_atomic_add_long_return (((aligned_new_end - aligned_end) >> PAGE_SHIFT) * PAGE_SIZE, &rusage_rss_current); + if (rusage_rss_max < curr) { + atomic_cmpxchg8(&rusage_rss_max, rusage_rss_max, curr); + } + if (rusage_max_memory - curr < RUSAGE_MEM_LIMIT) { + event_signal(); + } +} +#endif return address; } @@ -2336,6 +2411,16 @@ void destroy_thread(struct thread *thread) release_sigcommon(thread->sigcommon); +#ifdef ENABLE_RUSAGE +{ + int processor_id; + processor_id = ihk_mc_get_processor_id(); + rusage_rss[processor_id] -= KERNEL_STACK_NR_PAGES * PAGE_SIZE; + ihk_atomic_add_long_return(KERNEL_STACK_NR_PAGES * PAGE_SIZE * (-1) , &rusage_rss_current); + ihk_atomic_add_ulong ( -1, &rusage_num_threads); +} +#endif + ihk_mc_free_pages(thread, KERNEL_STACK_NR_PAGES); } @@ -2343,14 +2428,17 @@ void release_thread(struct thread *thread) { struct process_vm *vm; struct mcs_rwlock_node lock; + struct timespec ats; if (!ihk_atomic_dec_and_test(&thread->refcount)) { return; } mcs_rwlock_writer_lock_noirq(&thread->proc->update_lock, &lock); - ts_add(&thread->proc->stime, &thread->stime); - ts_add(&thread->proc->utime, &thread->utime); + tsc_to_ts(thread->system_tsc, &ats); + ts_add(&thread->proc->stime, &ats); + tsc_to_ts(thread->user_tsc, &ats); + ts_add(&thread->proc->utime, &ats); mcs_rwlock_writer_unlock_noirq(&thread->proc->update_lock, &lock); vm = thread->vm; @@ -2398,6 +2486,7 @@ static void do_migrate(void); static void idle(void) { struct cpu_local_var *v = get_this_cpu_local_var(); + struct ihk_os_monitor *monitor = v->monitor; /* Release runq_lock before starting the idle loop. * See comments at release_runq_lock(). @@ -2458,8 +2547,11 @@ static void idle(void) v->status == CPU_STATUS_RESERVED) { /* No work to do? Consolidate the kmalloc free list */ kmalloc_consolidate_free_list(); + monitor->status = IHK_OS_MONITOR_IDLE; cpu_local_var(current)->status = PS_INTERRUPTIBLE; cpu_safe_halt(); + monitor->status = IHK_OS_MONITOR_KERNEL; + monitor->counter++; cpu_local_var(current)->status = PS_RUNNING; } else { diff --git a/kernel/procfs.c b/kernel/procfs.c index 133edc89..4d5d4c7d 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/kernel/rusage.c b/kernel/rusage.c new file mode 100644 index 00000000..397a4f83 --- /dev/null +++ b/kernel/rusage.c @@ -0,0 +1,389 @@ +/** + * \file rusage.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG_PRINT_AP + +#ifdef DEBUG_PRINT_AP +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +extern int num_processors ; +static volatile int ap_stop = 1; + +mcs_lock_node_t ap_syscall_semaphore; + +extern struct ihk_os_monitor *monitor; + + +#ifdef ENABLE_RUSAGE +/* count total rss */ +unsigned long count_rss () { + int i; + unsigned long val = 0; + for(i = 0; i < sizeof(cpu_set_t)/8; i++){ + val += rusage_rss[i]; + } + return val; +} + +/* count total cache */ +unsigned long count_cache () { + return 0; +} + +/* count total rss_huge */ +unsigned long count_rss_huge () { + return 0; +} + +/* count total mapped_file */ +unsigned long count_mapped_file () { + return 0; +} + +/* count total max_usage */ +unsigned long count_max_usage() { + return rusage_rss_max; +} + +/* count total kmem_usage */ +unsigned long count_kmem_usage() { + return 0; +} + +/* count total kmax_usage */ +unsigned long count_kmem_max_usage() { + return 0; +} + +#endif + +#include +#include + +#ifdef ENABLE_RUSAGE +char* strcat_rusage(char *s1, char *s2) { + int i; + int j; + for (i = 0; s1[i] != '\0'; i++); //skip chars. + for (j = 0; s2[j] != '\0'; j++) { + s1[i+j] = s2[j]; + } + s1[i+j] = '\0'; + return s1; +} + +static ssize_t +show_rusage_memory_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf, size, "rss %lu\ncache %lu\nrss_huge %lu\nmapped_file %lu\n", + count_rss(), + count_cache(), + count_rss_huge(), + count_mapped_file() + ); +} + +static ssize_t +show_rusage_memory_max_usage_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf,size,"%lu\n",count_max_usage()); +} + +static ssize_t +show_rusage_memory_kmem_usage_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf,size,"%lu\n",count_kmem_usage()); +} + +static ssize_t +show_rusage_memory_kmem_max_usage_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf,size,"%lu\n",count_kmem_max_usage()); +} + +static ssize_t +show_rusage_num_numa_nodes_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf,size,"%d\n",ihk_mc_get_nr_numa_nodes()); +} + +static ssize_t +show_rusage_memory_numa_stat_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + int i; + int num_numa; + char tmp_buf1[1024]; + char tmp_buf2[1024]; + unsigned long total = 0; + memset(tmp_buf1, 0, 1024); + num_numa = ihk_mc_get_nr_numa_nodes(); + + for (i = 0; i < num_numa; i++) { + total += rusage_numa_stat[i]; + } + sprintf(tmp_buf1, "total=%lu ", total); + + for (i = 0; i < num_numa; i++) { + sprintf(tmp_buf2, "N%d=%lu ", i, rusage_numa_stat[i]); + strcat_rusage(tmp_buf1, tmp_buf2); + memset(tmp_buf2, 0, 1024); + } + return snprintf(buf, size, "%s\n", tmp_buf1); +} + +static ssize_t +show_rusage_hugetlb_usage_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf, size, "%lu\n", rusage_hugetlb_usage); +} + +static ssize_t +show_rusage_hugetlb_max_usage_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf, size, "%lu\n", rusage_hugetlb_max_usage); +} +static ssize_t +show_rusage_cpuacct_stat_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + struct timespec uts; + struct timespec sts; + int i; + int r = 0; + + uts.tv_sec = 0; + uts.tv_nsec = 0; + sts.tv_sec = 0; + sts.tv_nsec = 0; + if (monitor) + for (i = 0; i < num_processors; i++) { + struct timespec ats; + + tsc_to_ts(monitor[i].user_tsc, &ats); + ts_add(&uts, &ats); + tsc_to_ts(monitor[i].system_tsc, &ats); + ts_add(&sts, &ats); + } + r = snprintf(buf, size, "user %lu\n", timespec_to_jiffy(&uts)); + r += snprintf(strchr(buf, '\0'), size - r, "system %lu\n", + timespec_to_jiffy(&sts)); + return r; +} +static ssize_t +show_rusage_cpuacct_usage_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + struct timespec uts; + int i; + int r = 0; + + uts.tv_sec = 0; + uts.tv_nsec = 0; + if (monitor) + for (i = 0; i < num_processors; i++) { + struct timespec ats; + + tsc_to_ts(monitor[i].user_tsc + monitor[i].system_tsc, + &ats); + ts_add(&uts, &ats); + } + if (uts.tv_sec) + r = snprintf(buf, size, "%lu%09lu\n", uts.tv_sec, uts.tv_nsec); + else + r = snprintf(buf, size, "%lu\n", uts.tv_nsec); + return r; +} + + +static ssize_t +show_rusage_cpuacct_usage_percpu_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + struct timespec uts; + int i; + int r = 0; + + ((char *)buf)[0] = '\0'; + for (i = 0; i < num_processors; i++) { + if (monitor) { + tsc_to_ts(monitor[i].user_tsc + monitor[i].system_tsc, + &uts); + } + else { + uts.tv_sec = 0; + uts.tv_nsec = 0; + } + if (uts.tv_sec) + r += snprintf(strchr(buf, '\0'), size - r, + "%lu%09lu ", uts.tv_sec, uts.tv_nsec); + else + r += snprintf(strchr(buf, '\0'), size - r, + "%lu ", uts.tv_nsec); + } + ((char *)buf)[r - 1] = '\n'; + return r; +} + +/* callback funciton of rusage(threads) sysfs */ +static ssize_t +show_rusage_num_threads_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf, size, "%lu\n", rusage_num_threads); +} + +/* callback funciton of rusage(max threads) sysfs */ +static ssize_t +show_rusage_max_num_threads_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf, size, "%lu\n", rusage_max_num_threads); +} + +/* definition of sysfs ops */ +struct sysfs_ops show_rusage_memory = { + .show = &show_rusage_memory_data, +}; +struct sysfs_ops show_rusage_memory_max_usage = { + .show = &show_rusage_memory_max_usage_data, +}; +struct sysfs_ops show_rusage_memory_kmem_usage = { + .show = &show_rusage_memory_kmem_usage_data, +}; +struct sysfs_ops show_rusage_memory_kmem_max_usage = { + .show = &show_rusage_memory_kmem_max_usage_data, +}; +struct sysfs_ops show_rusage_num_numa_nodes = { + .show = &show_rusage_num_numa_nodes_data, +}; +struct sysfs_ops show_rusage_memory_numa_stat = { + .show = &show_rusage_memory_numa_stat_data, +}; +struct sysfs_ops show_rusage_hugetlb_usage = { + .show = &show_rusage_hugetlb_usage_data, +}; +struct sysfs_ops show_rusage_hugetlb_max_usage = { + .show = &show_rusage_hugetlb_max_usage_data, +}; +struct sysfs_ops show_rusage_cpuacct_stat = { + .show = &show_rusage_cpuacct_stat_data, +}; +struct sysfs_ops show_rusage_cpuacct_usage = { + .show = &show_rusage_cpuacct_usage_data, +}; +struct sysfs_ops show_rusage_num_threads = { + .show = &show_rusage_num_threads_data, +}; +struct sysfs_ops show_rusage_cpuacct_usage_percpu = { + .show = &show_rusage_cpuacct_usage_percpu_data, +}; +struct sysfs_ops show_rusage_max_num_threads = { + .show = &show_rusage_max_num_threads_data, +}; + +/* create sysfs files for rusage. */ +void rusage_sysfs_setup(void) { + int error; + error = sysfs_createf(&show_rusage_memory, &rdata, 0444, + "/sys/fs/cgroup/memory/memory.stat"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_memory_max_usage, &rdata, 0444, + "/sys/fs/cgroup/memory/memory.max_usage_in_bytes"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_memory_kmem_usage, &rdata, 0444, + "/sys/fs/cgroup/memory/memory.kmem.usage_in_bytes"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_memory_kmem_max_usage, &rdata, 0444, + "/sys/fs/cgroup/memory/memory.kmem.max_usage_in_bytes"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_num_numa_nodes, &rdata, 0444, + "/sys/fs/cgroup/cpu/num_numa_nodes.txt"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_memory_numa_stat, &rdata, 0444, + "/sys/fs/cgroup/memory/memory.numa_stat"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_hugetlb_usage, &rdata, 0444, + "/sys/fs/cgroup/hugetlb/hugetlb.1GB.usage_in_bytes"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_hugetlb_max_usage, &rdata, 0444, + "/sys/fs/cgroup/hugetlb/hugetlb.1GB.max_usage_in_bytes"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_cpuacct_stat, &rdata, 0444, + "/sys/fs/cgroup/cpuacct/cpuacct.stat"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_cpuacct_usage, &rdata, 0444, + "/sys/fs/cgroup/cpuacct/cpuacct.usage"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_cpuacct_usage_percpu, &rdata, 0444, + "/sys/fs/cgroup/cpuacct/cpuacct.usage_percpu"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_num_threads, &rdata, 0444, + "/sys/fs/cgroup/num_threads"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } + error = sysfs_createf(&show_rusage_max_num_threads, &rdata, 0444, + "/sys/fs/cgroup/max_num_threads"); + if (error) { + panic("rusage_sysfs_setup:sysfs_createf() failed\n"); + } +} + +/* callback funciton of os_status sysfs */ +static ssize_t +show_ihk_status_data(struct sysfs_ops *ops, void *instance, void *buf, size_t size) +{ + return snprintf(buf, size, "%d\n", os_status); +} + +struct sysfs_ops show_ihk_status = { + .show = &show_ihk_status_data, +}; + +/* create sysfs files for monitoring status.*/ +void status_sysfs_setup(void) { + int error; + error = sysfs_createf(&show_ihk_status, &rdata, 0444, + "/sys/fs/cgroup/mck_status"); + if (error) { + panic("status_sysfs_setup:sysfs_createf() failed\n"); + } +} +#endif + diff --git a/kernel/syscall.c b/kernel/syscall.c index ef1ef983..ccbed07a 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -55,6 +55,7 @@ #include #include #include +#include /* Headers taken from kitten LWK */ #include @@ -389,6 +390,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) long rc; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + int mstatus = 0; + #ifdef TRACK_SYSCALLS uint64_t t_s; t_s = rdtsc(); @@ -398,6 +402,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) ihk_mc_get_processor_id(), req->number); + mstatus = monitor->status; + monitor->status = IHK_OS_MONITOR_KERNEL_OFFLOAD; + barrier(); if(req->number != __NR_exit_group){ @@ -520,6 +527,8 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) } #endif // TRACK_SYSCALLS + monitor->status = mstatus; + monitor->counter++; return rc; } @@ -957,6 +966,7 @@ terminate(int rc, int sig) mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); vm = proc->vm; + free_all_process_memory_range(vm); if (proc->saved_cmdline) { kfree(proc->saved_cmdline); @@ -1100,6 +1110,18 @@ terminate_host(int pid) do_kill(cpu_local_var(current), pid, -1, SIGKILL, NULL, 0); } +void +event_signal() +{ + struct ihk_ikc_channel_desc *syscall_channel; + struct ikc_scd_packet pckt; + + syscall_channel = get_cpu_local_var(0)->syscall_channel2; + memset(&pckt, '\0', sizeof pckt); + pckt.msg = SCD_MSG_EVENT_SIGNAL; + ihk_ikc_send(syscall_channel, &pckt, 0); +} + void interrupt_syscall(int pid, int tid) { @@ -2405,11 +2427,13 @@ SYSCALL_DECLARE(set_tid_address) return cpu_local_var(current)->proc->pid; } +/* static unsigned long timespec_to_jiffy(const struct timespec *ats) { return ats->tv_sec * 100 + ats->tv_nsec / 10000000; } +*/ SYSCALL_DECLARE(times) { @@ -2425,8 +2449,10 @@ SYSCALL_DECLARE(times) struct process *proc = thread->proc; struct timespec ats; - mytms.tms_utime = timespec_to_jiffy(&thread->utime); - mytms.tms_stime = timespec_to_jiffy(&thread->stime); + tsc_to_ts(thread->user_tsc, &ats); + mytms.tms_utime = timespec_to_jiffy(&ats); + tsc_to_ts(thread->system_tsc, &ats); + mytms.tms_stime = timespec_to_jiffy(&ats); ats.tv_sec = proc->utime.tv_sec; ats.tv_nsec = proc->utime.tv_nsec; ts_add(&ats, &proc->utime_children); @@ -3533,6 +3559,9 @@ SYSCALL_DECLARE(rt_sigtimedwait) int sig; struct timespec ats; struct timespec ets; + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; if (sigsetsize > sizeof(sigset_t)) return -EINVAL; @@ -3688,6 +3717,9 @@ do_sigsuspend(struct thread *thread, const sigset_t *set) struct list_head *head; mcs_rwlock_lock_t *lock; struct mcs_rwlock_node_irqsave mcs_rw_node; + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; wset = set->__val[0]; wset &= ~__sigmask(SIGKILL); @@ -4796,7 +4828,10 @@ SYSCALL_DECLARE(futex) uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx); uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx); int flags = op; - + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; + /* Cross-address space futex? */ if (op & FUTEX_PRIVATE_FLAG) { fshared = 0; @@ -4907,6 +4942,9 @@ SYSCALL_DECLARE(exit) if(nproc == 1){ // process has only one thread terminate(exit_status, 0); +#ifdef ENABLE_RUSAGE + rusage_num_threads--; +#endif return 0; } @@ -4933,6 +4971,9 @@ SYSCALL_DECLARE(exit) if(proc->status == PS_EXITED){ mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); terminate(exit_status, 0); +#ifdef ENABLE_RUSAGE + rusage_num_threads--; +#endif return 0; } thread->status = PS_EXITED; @@ -4941,6 +4982,9 @@ SYSCALL_DECLARE(exit) release_thread(thread); schedule(); +#ifdef ENABLE_RUSAGE + rusage_num_threads--; +#endif return 0; } @@ -5065,6 +5109,7 @@ SYSCALL_DECLARE(getrusage) struct timespec utime; struct timespec stime; struct mcs_rwlock_node lock; + struct timespec ats; if(who != RUSAGE_SELF && who != RUSAGE_CHILDREN && @@ -5096,8 +5141,10 @@ SYSCALL_DECLARE(getrusage) list_for_each_entry(child, &proc->threads_list, siblings_list){ while(!child->times_update) cpu_pause(); - ts_add(&utime, &child->utime); - ts_add(&stime, &child->stime); + tsc_to_ts(child->user_tsc, &ats); + ts_add(&utime, &ats); + tsc_to_ts(child->system_tsc, &ats); + ts_add(&stime, &ats); } mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock); ts_to_tv(&kusage.ru_utime, &utime); @@ -5106,14 +5153,18 @@ SYSCALL_DECLARE(getrusage) kusage.ru_maxrss = proc->maxrss / 1024; } else if(who == RUSAGE_CHILDREN){ - ts_to_tv(&kusage.ru_utime, &proc->utime_children); - ts_to_tv(&kusage.ru_stime, &proc->stime_children); + tsc_to_ts(thread->user_tsc, &ats); + ts_to_tv(&kusage.ru_utime, &ats); + tsc_to_ts(thread->system_tsc, &ats); + ts_to_tv(&kusage.ru_stime, &ats); kusage.ru_maxrss = proc->maxrss_children / 1024; } else if(who == RUSAGE_THREAD){ - ts_to_tv(&kusage.ru_utime, &thread->utime); - ts_to_tv(&kusage.ru_stime, &thread->stime); + tsc_to_ts(thread->user_tsc, &ats); + ts_to_tv(&kusage.ru_utime, &ats); + tsc_to_ts(thread->system_tsc, &ats); + ts_to_tv(&kusage.ru_stime, &ats); kusage.ru_maxrss = proc->maxrss / 1024; } @@ -6449,10 +6500,11 @@ SYSCALL_DECLARE(clock_gettime) ats.tv_nsec = proc->utime.tv_nsec; ts_add(&ats, &proc->stime); list_for_each_entry(child, &proc->threads_list, siblings_list){ + struct timespec wts; while(!child->times_update) cpu_pause(); - ts_add(&ats, &child->utime); - ts_add(&ats, &child->stime); + tsc_to_ts(child->user_tsc + child->system_tsc, &wts); + ts_add(&ats, &wts); } mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock); return copy_to_user(ts, &ats, sizeof ats); @@ -6460,9 +6512,7 @@ SYSCALL_DECLARE(clock_gettime) else if(clock_id == CLOCK_THREAD_CPUTIME_ID){ struct thread *thread = cpu_local_var(current); - ats.tv_sec = thread->utime.tv_sec; - ats.tv_nsec = thread->utime.tv_nsec; - ts_add(&ats, &thread->stime); + tsc_to_ts(thread->user_tsc + thread->system_tsc, &ats); return copy_to_user(ts, &ats, sizeof ats); } @@ -6565,6 +6615,9 @@ SYSCALL_DECLARE(nanosleep) struct timespec *tv = (struct timespec *)ihk_mc_syscall_arg0(ctx); struct timespec *rem = (struct timespec *)ihk_mc_syscall_arg1(ctx); struct syscall_request request IHK_DMA_ALIGN; + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; /* Do it locally if supported */ if (gettime_local_support) { @@ -8479,8 +8532,7 @@ reset_cputime() if(!(thread = cpu_local_var(current))) return; - thread->btime.tv_sec = 0; - thread->btime.tv_nsec = 0; + thread->base_tsc = 0; } /** @@ -8492,8 +8544,9 @@ void set_cputime(int mode) { struct thread *thread; - struct timespec ats; + unsigned long tsc; struct cpu_local_var *v; + struct ihk_os_monitor *monitor; if(clv == NULL) return; @@ -8501,38 +8554,48 @@ set_cputime(int mode) v = get_this_cpu_local_var(); if(!(thread = v->current)) return; + if(thread == &v->idle) + return; + monitor = v->monitor; + if(mode == 0){ + monitor->status = IHK_OS_MONITOR_USER; + } + else if(mode == 1){ + monitor->counter++; + monitor->status = IHK_OS_MONITOR_KERNEL; + } if(!gettime_local_support){ thread->times_update = 1; return; } - calculate_time_from_tsc(&ats); - if(thread->btime.tv_sec != 0 && thread->btime.tv_nsec != 0){ + tsc = rdtsc(); + if(thread->base_tsc != 0){ + unsigned long dtsc = tsc - thread->base_tsc; struct timespec dts; - dts.tv_sec = ats.tv_sec; - dts.tv_nsec = ats.tv_nsec; - ts_sub(&dts, &thread->btime); + tsc_to_ts(dtsc, &dts); if(mode == 1){ - ts_add(&thread->utime, &dts); + thread->user_tsc += dtsc; + monitor->user_tsc += dtsc; ts_add(&thread->itimer_virtual_value, &dts); ts_add(&thread->itimer_prof_value, &dts); } else{ - ts_add(&thread->stime, &dts); + thread->system_tsc += dtsc; + monitor->system_tsc += dtsc; ts_add(&thread->itimer_prof_value, &dts); } } if(mode == 2){ - thread->btime.tv_sec = 0; - thread->btime.tv_nsec = 0; + thread->base_tsc = 0; } else{ - thread->btime.tv_sec = ats.tv_sec; - thread->btime.tv_nsec = ats.tv_nsec; + thread->base_tsc = tsc; } + thread->times_update = 1; thread->in_kernel = mode; diff --git a/lib/abort.c b/lib/abort.c index 0ed14610..3868e90b 100644 --- a/lib/abort.c +++ b/lib/abort.c @@ -1,8 +1,17 @@ #include #include +#include +#include + +extern struct cpu_local_var *clv; void panic(const char *msg) { + if (clv) { + struct ihk_os_monitor *monitor = cpu_local_var(monitor); + + monitor->status = IHK_OS_MONITOR_PANIC; + } cpu_disable_interrupt(); kprintf(msg); diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index b1cd80d6..8c58a420 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -198,6 +198,9 @@ void remote_flush_tlb_cpumask(struct process_vm *vm, int ihk_set_kmsg(unsigned long addr, unsigned long size); char *ihk_get_kargs(); +int ihk_set_monitor(unsigned long addr, unsigned long size); +int ihk_set_nmi_mode_addr(unsigned long addr); + extern void (*__tlb_flush_handler)(int vector); struct tlb_flush_entry { diff --git a/lib/include/ihk/rusage.h b/lib/include/ihk/rusage.h new file mode 100644 index 00000000..331a30a3 --- /dev/null +++ b/lib/include/ihk/rusage.h @@ -0,0 +1,76 @@ +#ifndef __RUSAGE_H +#define __RUSAGE_H + +#define ENABLE_RUSAGE + +#define RUSAGE_DEFAULT_SIZE 10 + +enum RUSAGE_MEMBER { + RUSAGE_RSS, + RUSAGE_CACHE, + RUSAGE_RSS_HUGE, + RUSAGE_MAPPED_FILE, + RUSAGE_MAX_USAGE, + RUSAGE_KMEM_USAGE, + RUSAGE_KMAX_USAGE, + RUSAGE_NUM_NUMA_NODES, + RUSAGE_NUMA_STAT, + RUSAGE_HUGETLB , + RUSAGE_HUGETLB_MAX , + RUSAGE_STAT_SYSTEM , + RUSAGE_STAT_USER , + RUSAGE_USAGE , + RUSAGE_USAGE_PER_CPU , + RUSAGE_NUM_THREADS , + RUSAGE_MAX_NUM_THREADS +}; + +struct r_data{ + unsigned long pid; + unsigned long rss; + unsigned long cache; + unsigned long rss_huge; + unsigned long mapped_file; + unsigned long max_usage; + unsigned long kmem_usage; + unsigned long kmax_usage; + unsigned long hugetlb; + unsigned long hugetlb_max; + unsigned long stat_system; + unsigned long stat_user; + unsigned long usage; + struct r_data *next; +} ; +typedef struct r_data rusage_data; + +rusage_data *rdata[RUSAGE_DEFAULT_SIZE]; +unsigned long rusage_max_num_threads; +unsigned long rusage_num_threads; + +enum ihk_os_status { + IHK_STATUS_INACTIVE, + IHK_STATUS_BOOTING, + IHK_STATUS_RUNNING, + IHK_STATUS_SHUTDOWN, + IHK_STATUS_PANIC, + IHK_STATUS_HUNGUP, + IHK_STATUS_FREEZING, + IHK_STATUS_FROZEN, +}; + +enum ihk_os_status os_status; +unsigned long sys_delegate_count; +enum sys_delegate_state_enum { + ENTER_KERNEL, + EXIT_KERNEL, +}; +enum sys_delegate_state_enum sys_delegate_state; + +unsigned long rusage_kmem_usage; +unsigned long rusage_kmem_max_usage; +unsigned long rusage_hugetlb_usage; +unsigned long rusage_hugetlb_max_usage; +unsigned long rusage_usage_per_cpu[sizeof(cpu_set_t)/8]; +unsigned long rusage_numa_stat[1024]; + +#endif