profile: better time breakdown and exclusion of idle cycles

This commit is contained in:
Balazs Gerofi
2017-07-20 17:36:34 +09:00
parent 64c2e437c6
commit 992a292c08
5 changed files with 160 additions and 30 deletions

View File

@ -788,6 +788,15 @@ static int futex_wait(uint32_t __user *uaddr, int fshared,
if (!bitset) if (!bitset)
return -EINVAL; return -EINVAL;
#ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile &&
cpu_local_var(current)->profile_start_ts) {
cpu_local_var(current)->profile_elapsed_ts +=
(rdtsc() - cpu_local_var(current)->profile_start_ts);
cpu_local_var(current)->profile_start_ts = 0;
}
#endif
q.bitset = bitset; q.bitset = bitset;
q.requeue_pi_key = NULL; q.requeue_pi_key = NULL;
@ -822,6 +831,11 @@ retry:
out_put_key: out_put_key:
put_futex_key(fshared, &q.key); put_futex_key(fshared, &q.key);
out: out:
#ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile) {
cpu_local_var(current)->profile_start_ts = rdtsc();
}
#endif
return ret; return ret;
} }

View File

@ -554,6 +554,7 @@ struct process {
int profile; int profile;
mcs_lock_node_t profile_lock; mcs_lock_node_t profile_lock;
struct profile_event *profile_events; struct profile_event *profile_events;
unsigned long profile_elapsed_ts;
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
int nr_processes; /* For partitioned execution */ int nr_processes; /* For partitioned execution */
}; };
@ -631,6 +632,8 @@ struct thread {
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
int profile; int profile;
struct profile_event *profile_events; struct profile_event *profile_events;
unsigned long profile_start_ts;
unsigned long profile_elapsed_ts;
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
// signal // signal

View File

@ -2966,6 +2966,19 @@ redo:
perf_start(next->proc->monitoring_event); perf_start(next->proc->monitoring_event);
} }
} }
#ifdef PROFILE_ENABLE
if (prev->profile && prev->profile_start_ts != 0) {
prev->profile_elapsed_ts +=
(rdtsc() - prev->profile_start_ts);
prev->profile_start_ts = 0;
}
if (next->profile && next->profile_start_ts == 0) {
next->profile_start_ts = rdtsc();
}
#endif
if (prev) { if (prev) {
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
} }

View File

@ -74,6 +74,7 @@ mcs_lock_node_t job_profile_lock = {0, NULL};
struct profile_event *job_profile_events = NULL; struct profile_event *job_profile_events = NULL;
int job_nr_processes = -1; int job_nr_processes = -1;
int job_nr_processes_left = -1; int job_nr_processes_left = -1;
unsigned long job_elapsed_ts;
@ -111,16 +112,26 @@ void profile_print_thread_stats(struct thread *thread)
int i; int i;
unsigned long flags; unsigned long flags;
if (!thread->profile_events) return; if (!thread->profile_events)
return;
/* Not yet accumulated period? */
if (thread->profile_start_ts) {
thread->profile_elapsed_ts += (rdtsc() - thread->profile_start_ts);
}
flags = kprintf_lock(); flags = kprintf_lock();
__kprintf("TID: %4d elapsed cycles (excluding idle): %luk\n",
thread->tid,
thread->profile_elapsed_ts / 1000);
for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) { for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) {
if (!thread->profile_events[i].cnt && if (!thread->profile_events[i].cnt &&
!thread->profile_events[i + PROFILE_SYSCALL_MAX].cnt) !thread->profile_events[i + PROFILE_SYSCALL_MAX].cnt)
continue; continue;
__kprintf("TID: %4d (%3d,%20s): %6u %6luk offl: %6u %6luk\n", __kprintf("TID: %4d (%3d,%20s): %6u %6luk offl: %6u %6luk (%2d.%2d%%)\n",
thread->tid, thread->tid,
i, i,
syscall_name[i], syscall_name[i],
@ -133,7 +144,13 @@ void profile_print_thread_stats(struct thread *thread)
(thread->profile_events[i + PROFILE_SYSCALL_MAX].tsc / (thread->profile_events[i + PROFILE_SYSCALL_MAX].tsc /
(thread->profile_events[i + PROFILE_SYSCALL_MAX].cnt ? (thread->profile_events[i + PROFILE_SYSCALL_MAX].cnt ?
thread->profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1)) thread->profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1))
/ 1000 / 1000,
(thread->profile_events[i].tsc ?
thread->profile_events[i].tsc * 100
/ thread->profile_elapsed_ts : 0),
(thread->profile_events[i].tsc ?
(thread->profile_events[i].tsc * 10000
/ thread->profile_elapsed_ts) % 100 : 0)
); );
} }
@ -149,7 +166,14 @@ void profile_print_thread_stats(struct thread *thread)
(thread->profile_events[i].tsc / (thread->profile_events[i].tsc /
(thread->profile_events[i].cnt ? (thread->profile_events[i].cnt ?
thread->profile_events[i].cnt : 1)) thread->profile_events[i].cnt : 1))
/ 1000); / 1000,
(thread->profile_events[i].tsc ?
thread->profile_events[i].tsc * 100
/ thread->profile_elapsed_ts : 0),
(thread->profile_events[i].tsc ?
(thread->profile_events[i].tsc * 10000
/ thread->profile_elapsed_ts) % 100 : 0)
);
} }
@ -161,16 +185,20 @@ void profile_print_proc_stats(struct process *proc)
int i; int i;
unsigned long flags; unsigned long flags;
if (!proc->profile_events) return; if (!proc->profile_events || !proc->profile_elapsed_ts)
return;
flags = kprintf_lock(); flags = kprintf_lock();
__kprintf("PID: %4d elapsed cycles for all threads (excluding idle): %luk\n",
proc->pid,
proc->profile_elapsed_ts / 1000);
for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) { for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) {
if (!proc->profile_events[i].cnt && if (!proc->profile_events[i].cnt &&
!proc->profile_events[i + PROFILE_SYSCALL_MAX].cnt) !proc->profile_events[i + PROFILE_SYSCALL_MAX].cnt)
continue; continue;
__kprintf("PID: %4d (%3d,%20s): %6u %6luk offl: %6u %6luk\n", __kprintf("PID: %4d (%3d,%20s): %6u %6luk offl: %6u %6luk (%2d.%2d%%)\n",
proc->pid, proc->pid,
i, i,
syscall_name[i], syscall_name[i],
@ -183,7 +211,13 @@ void profile_print_proc_stats(struct process *proc)
(proc->profile_events[i + PROFILE_SYSCALL_MAX].tsc / (proc->profile_events[i + PROFILE_SYSCALL_MAX].tsc /
(proc->profile_events[i + PROFILE_SYSCALL_MAX].cnt ? (proc->profile_events[i + PROFILE_SYSCALL_MAX].cnt ?
proc->profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1)) proc->profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1))
/ 1000 / 1000,
(proc->profile_events[i].tsc ?
proc->profile_events[i].tsc * 100
/ proc->profile_elapsed_ts : 0),
(proc->profile_events[i].tsc ?
(proc->profile_events[i].tsc * 10000
/ proc->profile_elapsed_ts) % 100 : 0)
); );
} }
@ -199,7 +233,16 @@ void profile_print_proc_stats(struct process *proc)
(proc->profile_events[i].tsc / (proc->profile_events[i].tsc /
(proc->profile_events[i].cnt ? (proc->profile_events[i].cnt ?
proc->profile_events[i].cnt : 1)) proc->profile_events[i].cnt : 1))
/ 1000); / 1000,
(proc->profile_events[i].tsc &&
proc->profile_elapsed_ts ?
proc->profile_events[i].tsc * 100
/ proc->profile_elapsed_ts : 0),
(proc->profile_events[i].tsc &&
proc->profile_elapsed_ts ?
(proc->profile_events[i].tsc * 10000
/ proc->profile_elapsed_ts) % 100 : 0)
);
} }
kprintf_unlock(flags); kprintf_unlock(flags);
@ -217,6 +260,7 @@ int profile_accumulate_and_print_job_events(struct process *proc)
if (job_nr_processes == -1) { if (job_nr_processes == -1) {
job_nr_processes = proc->nr_processes; job_nr_processes = proc->nr_processes;
job_nr_processes_left = proc->nr_processes; job_nr_processes_left = proc->nr_processes;
job_elapsed_ts = 0;
} }
--job_nr_processes_left; --job_nr_processes_left;
@ -248,16 +292,21 @@ int profile_accumulate_and_print_job_events(struct process *proc)
proc->profile_events[i].cnt = 0; proc->profile_events[i].cnt = 0;
} }
job_elapsed_ts += proc->profile_elapsed_ts;
/* Last process? */ /* Last process? */
if (job_nr_processes_left == 0) { if (job_nr_processes_left == 0) {
flags = kprintf_lock(); flags = kprintf_lock();
__kprintf("JOB: (%2d) elapsed cycles for all threads (excluding idle): %luk\n",
job_nr_processes,
job_elapsed_ts / 1000);
for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) { for (i = 0; i < PROFILE_SYSCALL_MAX; ++i) {
if (!job_profile_events[i].cnt && if (!job_profile_events[i].cnt &&
!job_profile_events[i + PROFILE_SYSCALL_MAX].cnt) !job_profile_events[i + PROFILE_SYSCALL_MAX].cnt)
continue; continue;
__kprintf("JOB: (%2d) (%3d,%20s): %6u %6luk offl: %6u %6luk\n", __kprintf("JOB: (%2d) (%3d,%20s): %6u %6luk offl: %6u %6luk (%2d.%2d%%)\n",
job_nr_processes, job_nr_processes,
i, i,
syscall_name[i], syscall_name[i],
@ -270,7 +319,13 @@ int profile_accumulate_and_print_job_events(struct process *proc)
(job_profile_events[i + PROFILE_SYSCALL_MAX].tsc / (job_profile_events[i + PROFILE_SYSCALL_MAX].tsc /
(job_profile_events[i + PROFILE_SYSCALL_MAX].cnt ? (job_profile_events[i + PROFILE_SYSCALL_MAX].cnt ?
job_profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1)) job_profile_events[i + PROFILE_SYSCALL_MAX].cnt : 1))
/ 1000 / 1000,
(job_profile_events[i].tsc ?
job_profile_events[i].tsc * 100
/ job_elapsed_ts : 0),
(job_profile_events[i].tsc ?
(job_profile_events[i].tsc * 10000
/ job_elapsed_ts) % 100 : 0)
); );
job_profile_events[i].tsc = 0; job_profile_events[i].tsc = 0;
@ -302,6 +357,7 @@ int profile_accumulate_and_print_job_events(struct process *proc)
/* Reset job process indicators */ /* Reset job process indicators */
job_nr_processes = -1; job_nr_processes = -1;
job_nr_processes_left = -1; job_nr_processes_left = -1;
job_elapsed_ts = 0;
} }
mcs_lock_unlock(&job_profile_lock, &mcs_node); mcs_lock_unlock(&job_profile_lock, &mcs_node);
@ -326,6 +382,12 @@ void profile_accumulate_events(struct thread *thread,
thread->profile_events[i].cnt = 0; thread->profile_events[i].cnt = 0;
} }
proc->profile_elapsed_ts += thread->profile_elapsed_ts;
if (thread->profile_start_ts) {
proc->profile_elapsed_ts +=
(rdtsc() - thread->profile_start_ts);
}
mcs_lock_unlock(&proc->profile_lock, &mcs_node); mcs_lock_unlock(&proc->profile_lock, &mcs_node);
} }
@ -381,6 +443,7 @@ void profile_dealloc_proc_events(struct process *proc)
void static profile_clear_process(struct process *proc) void static profile_clear_process(struct process *proc)
{ {
proc->profile_elapsed_ts = 0;
if (!proc->profile_events) return; if (!proc->profile_events) return;
memset(proc->profile_events, 0, memset(proc->profile_events, 0,
@ -389,6 +452,8 @@ void static profile_clear_process(struct process *proc)
void static profile_clear_thread(struct thread *thread) void static profile_clear_thread(struct thread *thread)
{ {
thread->profile_start_ts = 0;
thread->profile_elapsed_ts = 0;
if (!thread->profile_events) return; if (!thread->profile_events) return;
memset(thread->profile_events, 0, memset(thread->profile_events, 0,
@ -399,6 +464,7 @@ int do_profile(int flag)
{ {
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var(current);
struct process *proc = thread->proc; struct process *proc = thread->proc;
unsigned long now_ts = rdtsc();
/* Job level? */ /* Job level? */
if (flag & PROF_JOB) { if (flag & PROF_JOB) {
@ -444,7 +510,14 @@ int do_profile(int flag)
_thread->profile = 1; _thread->profile = 1;
} }
else if (flag & PROF_OFF) { else if (flag & PROF_OFF) {
_thread->profile = 0; if (_thread->profile) {
_thread->profile = 0;
if (_thread->profile_start_ts) {
_thread->profile_elapsed_ts +=
(now_ts - _thread->profile_start_ts);
}
_thread->profile_start_ts = 0;
}
} }
} }
@ -460,7 +533,9 @@ int do_profile(int flag)
/* Make sure future threads profile as well */ /* Make sure future threads profile as well */
if (flag & PROF_ON) { if (flag & PROF_ON) {
proc->profile = 1; if (!proc->profile) {
proc->profile = 1;
}
} }
else if (flag & PROF_OFF) { else if (flag & PROF_OFF) {
proc->profile = 0; proc->profile = 0;
@ -476,13 +551,28 @@ int do_profile(int flag)
if (flag & PROF_CLEAR) { if (flag & PROF_CLEAR) {
profile_clear_thread(thread); profile_clear_thread(thread);
/* If profiling, reset start and elapsed */
if (thread->profile) {
thread->profile_start_ts = 0;
thread->profile_elapsed_ts = 0;
}
} }
if (flag & PROF_ON) { if (flag & PROF_ON) {
thread->profile = 1; if (!thread->profile) {
thread->profile = 1;
thread->profile_start_ts = 0;
}
} }
else if (flag & PROF_OFF) { else if (flag & PROF_OFF) {
thread->profile = 0; if (thread->profile) {
thread->profile = 0;
if (thread->profile_start_ts) {
thread->profile_elapsed_ts +=
(now_ts - thread->profile_start_ts);
}
thread->profile_start_ts = 0;
}
} }
} }

View File

@ -9307,9 +9307,6 @@ set_cputime(int mode)
long syscall(int num, ihk_mc_user_context_t *ctx) long syscall(int num, ihk_mc_user_context_t *ctx)
{ {
long l; long l;
#ifdef PROFILE_ENABLE
uint64_t t_s;
#endif // PROFILE_ENABLE
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var(current);
#ifdef DISABLE_SCHED_YIELD #ifdef DISABLE_SCHED_YIELD
@ -9317,6 +9314,14 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
#endif // DISABLE_SCHED_YIELD #endif // DISABLE_SCHED_YIELD
set_cputime(1); set_cputime(1);
#ifdef PROFILE_ENABLE
if (thread->profile && thread->profile_start_ts) {
unsigned long ts = rdtsc();
thread->profile_elapsed_ts += (ts - thread->profile_start_ts);
thread->profile_start_ts = ts;
}
#endif // PROFILE_ENABLE
if(cpu_local_var(current)->proc->status == PS_EXITED && if(cpu_local_var(current)->proc->status == PS_EXITED &&
(num != __NR_exit && num != __NR_exit_group)){ (num != __NR_exit && num != __NR_exit_group)){
check_signal(-EINVAL, NULL, 0); check_signal(-EINVAL, NULL, 0);
@ -9356,10 +9361,6 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
#endif #endif
dkprintf("\n"); dkprintf("\n");
#ifdef PROFILE_ENABLE
t_s = rdtsc();
#endif // PROFILE_ENABLE
if ((0 <= num) && (num < (sizeof(syscall_table) / sizeof(syscall_table[0]))) if ((0 <= num) && (num < (sizeof(syscall_table) / sizeof(syscall_table[0])))
&& (syscall_table[num] != NULL)) { && (syscall_table[num] != NULL)) {
l = syscall_table[num](num, ctx); l = syscall_table[num](num, ctx);
@ -9381,13 +9382,22 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
} }
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
if (num < PROFILE_SYSCALL_MAX) { if (thread->profile) {
profile_event_add(num, (rdtsc() - t_s)); unsigned long ts = rdtsc();
}
else { /*
if (num != __NR_profile) { * futex_wait() and schedule() will internally reset
dkprintf("%s: syscall > %d ?? : %d\n", * thread->profile_start_ts so that actual wait time
__FUNCTION__, PROFILE_SYSCALL_MAX, num); * is not accounted for.
*/
if (num < PROFILE_SYSCALL_MAX) {
profile_event_add(num, (ts - thread->profile_start_ts));
}
else {
if (num != __NR_profile) {
dkprintf("%s: syscall > %d ?? : %d\n",
__FUNCTION__, PROFILE_SYSCALL_MAX, num);
}
} }
} }
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE