diff --git a/kernel/include/profile.h b/kernel/include/profile.h index 5d1cd2ca..fbd3cfa9 100644 --- a/kernel/include/profile.h +++ b/kernel/include/profile.h @@ -10,6 +10,7 @@ #define PROFILE_EVENT_MIN PROFILE_OFFLOAD_MAX #define __NR_profile 701 +#define PROF_JOB 0x40000000 #define PROF_PROC 0x80000000 #define PROF_CLEAR 0x01 #define PROF_ON 0x02 @@ -21,13 +22,13 @@ struct profile_event { uint64_t tsc; }; -/* +/* * The layout of profile events is as follows: * [0,PROFILE_SYSCALL_MAX) - syscalls * [PROFILE_SYSCALL_MAX,PROFILE_OFFLOAD_MAX) - syscall offloads * [PROFILE_OFFLOAD_MAX,PROFILE_EVENT_MAX) - general events * - * XXX: Make sure to fill in prof_event_names in profile.c + * XXX: Make sure to fill in prof_event_names in profile.c * for each added profiled event. */ enum profile_event_type { @@ -43,7 +44,9 @@ enum profile_event_type profile_syscall2offload(enum profile_event_type sc); void profile_event_add(enum profile_event_type type, uint64_t tsc); void profile_print_thread_stats(struct thread *thread); void profile_print_proc_stats(struct process *proc); +void profile_print_job_stats(struct process *proc); void profile_accumulate_events(struct thread *thread, struct process *proc); +int profile_accumulate_and_print_job_events(struct process *proc); int profile_alloc_events(struct thread *thread); void profile_dealloc_thread_events(struct thread *thread); void profile_dealloc_proc_events(struct process *proc); diff --git a/kernel/process.c b/kernel/process.c index c8a2e363..97b21fe4 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2218,7 +2218,12 @@ release_process(struct process *proc) if (proc->tids) kfree(proc->tids); #ifdef PROFILE_ENABLE - profile_print_proc_stats(proc); + if (proc->nr_processes) { + profile_accumulate_and_print_job_events(proc); + } + else { + profile_print_proc_stats(proc); + } profile_dealloc_proc_events(proc); #endif // PROFILE_ENABLE kfree(proc); diff --git a/kernel/profile.c b/kernel/profile.c index 0472d01c..7025de6b 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -51,6 +51,13 @@ char *profile_event_names[] = "" }; +mcs_lock_node_t job_profile_lock = {0, NULL}; +struct profile_event *job_profile_events = NULL; +int job_nr_processes = -1; +int job_nr_processes_left = -1; + + + enum profile_event_type profile_syscall2offload(enum profile_event_type sc) { return (PROFILE_SYSCALL_MAX + sc); @@ -85,6 +92,8 @@ void profile_print_thread_stats(struct thread *thread) int i; unsigned long flags; + if (!thread->profile_events) return; + flags = kprintf_lock(); for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) { @@ -134,6 +143,8 @@ void profile_print_proc_stats(struct process *proc) int i; unsigned long flags; + if (!proc->profile_events) return; + flags = kprintf_lock(); for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) { @@ -177,17 +188,124 @@ void profile_print_proc_stats(struct process *proc) kprintf_unlock(flags); } +int profile_accumulate_and_print_job_events(struct process *proc) +{ + int i; + unsigned long flags; + struct mcs_lock_node mcs_node; + + mcs_lock_lock(&job_profile_lock, &mcs_node); + + /* First process? */ + if (job_nr_processes == -1) { + job_nr_processes = proc->nr_processes; + job_nr_processes_left = proc->nr_processes; + } + + --job_nr_processes_left; + + /* Allocate event counters */ + if (!job_profile_events) { + + job_profile_events = kmalloc(sizeof(*job_profile_events) * + PROFILE_EVENT_MAX, IHK_MC_AP_NOWAIT); + + if (!job_profile_events) { + kprintf("%s: ERROR: allocating job profile counters\n", + __FUNCTION__); + return -ENOMEM; + } + + memset(job_profile_events, 0, + sizeof(*job_profile_events) * PROFILE_EVENT_MAX); + } + + /* Accumulate process */ + for (i = 0; i < PROFILE_EVENT_MAX; ++i) { + if (!proc->profile_events[i].tsc) + continue; + + job_profile_events[i].tsc += proc->profile_events[i].tsc; + job_profile_events[i].cnt += proc->profile_events[i].cnt; + proc->profile_events[i].tsc = 0; + proc->profile_events[i].cnt = 0; + } + + /* Last process? */ + if (job_nr_processes_left == 0) { + flags = kprintf_lock(); + + for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) { + if (!job_profile_events[i].cnt && + !job_profile_events[i + PROFILE_SYSCALL_MAX].cnt) + continue; + + __kprintf("JOB: (%2d) (%3d,%20s): %6u %6lukC offl: %6u %6lukC\n", + job_nr_processes, + i, + syscall_name[i], + job_profile_events[i].cnt, + (job_profile_events[i].tsc / + (job_profile_events[i].cnt ? + job_profile_events[i].cnt : 1)) + / 1000, + job_profile_events[i + PROFILE_SYSCALL_MAX].cnt, + (job_profile_events[i + PROFILE_SYSCALL_MAX].tsc / + (job_profile_events[i + PROFILE_SYSCALL_MAX].cnt ? + job_profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1)) + / 1000 + ); + + job_profile_events[i].tsc = 0; + job_profile_events[i].cnt = 0; + } + + for (i = PROFILE_EVENT_MIN; i < PROFILE_EVENT_MAX; ++i) { + + if (!job_profile_events[i].cnt) + continue; + + __kprintf("JOB: (%2d) (%3d,%20s): %6u %6lukC \n", + job_nr_processes, + i, + profile_event_names[i - PROFILE_EVENT_MIN], + job_profile_events[i].cnt, + (job_profile_events[i].tsc / + (job_profile_events[i].cnt ? + job_profile_events[i].cnt : 1)) + / 1000); + + job_profile_events[i].tsc = 0; + job_profile_events[i].cnt = 0; + } + + kprintf_unlock(flags); + + /* Reset job process indicators */ + job_nr_processes = -1; + job_nr_processes_left = -1; + } + + mcs_lock_unlock(&job_profile_lock, &mcs_node); + + return 0; +} + void profile_accumulate_events(struct thread *thread, struct process *proc) { int i; struct mcs_lock_node mcs_node; + if (!thread->profile_events || !proc->profile_events) return; + mcs_lock_lock(&proc->profile_lock, &mcs_node); for (i = 0; i < PROFILE_EVENT_MAX; ++i) { proc->profile_events[i].tsc += thread->profile_events[i].tsc; proc->profile_events[i].cnt += thread->profile_events[i].cnt; + thread->profile_events[i].tsc = 0; + thread->profile_events[i].cnt = 0; } mcs_lock_unlock(&proc->profile_lock, &mcs_node); @@ -198,17 +316,19 @@ int profile_alloc_events(struct thread *thread) struct process *proc = thread->proc; struct mcs_lock_node mcs_node; - thread->profile_events = kmalloc(sizeof(*thread->profile_events) * - PROFILE_EVENT_MAX, IHK_MC_AP_NOWAIT); - if (!thread->profile_events) { - kprintf("%s: ERROR: allocating thread private profile counters\n", - __FUNCTION__); - return -ENOMEM; - } + thread->profile_events = kmalloc(sizeof(*thread->profile_events) * + PROFILE_EVENT_MAX, IHK_MC_AP_NOWAIT); - memset(thread->profile_events, 0, - sizeof(*thread->profile_events) * PROFILE_EVENT_MAX); + if (!thread->profile_events) { + kprintf("%s: ERROR: allocating thread private profile counters\n", + __FUNCTION__); + return -ENOMEM; + } + + memset(thread->profile_events, 0, + sizeof(*thread->profile_events) * PROFILE_EVENT_MAX); + } mcs_lock_lock(&proc->profile_lock, &mcs_node); if (!proc->profile_events) { @@ -218,6 +338,7 @@ int profile_alloc_events(struct thread *thread) if (!proc->profile_events) { kprintf("%s: ERROR: allocating proc private profile counters\n", __FUNCTION__); + mcs_lock_unlock(&proc->profile_lock, &mcs_node); return -ENOMEM; } @@ -242,6 +363,8 @@ void profile_dealloc_proc_events(struct process *proc) void static profile_clear_thread(struct thread *thread) { + if (!thread->profile_events) return; + memset(thread->profile_events, 0, sizeof(*thread->profile_events) * PROFILE_EVENT_MAX); } @@ -249,18 +372,56 @@ void static profile_clear_thread(struct thread *thread) int do_profile(int flag) { struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; - /* Process level? */ - if (flag & PROF_PROC) { + /* Job level? */ + if (flag & PROF_JOB) { if (flag & PROF_PRINT) { - profile_print_proc_stats(thread->proc); + struct mcs_rwlock_node lock; + struct thread *_thread; + + /* Accumulate events from all threads to process level */ + mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &lock); + list_for_each_entry(_thread, &proc->threads_list, + siblings_list) { + profile_accumulate_events(_thread, proc); + } + mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock); + + /* Accumulate events to job level */ + return profile_accumulate_and_print_job_events(proc); + } + } + /* Process level? */ + else if (flag & PROF_PROC) { + struct mcs_rwlock_node lock; + struct thread *_thread; + + /* Accumulate events from all threads */ + mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &lock); + + list_for_each_entry(_thread, &proc->threads_list, + siblings_list) { + if (flag & PROF_PRINT) { + profile_accumulate_events(_thread, proc); + } + + if (flag & PROF_CLEAR) { + profile_clear_thread(_thread); + } + + if (flag & PROF_ON) { + _thread->profile = 1; + } + else if (flag & PROF_OFF) { + _thread->profile = 0; + } } - if (flag & PROF_ON) { - thread->profile = 1; - } - else if (flag & PROF_OFF) { - thread->profile = 0; + mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock); + + if (flag & PROF_PRINT) { + profile_print_proc_stats(proc); } } /* Thread level */