From 3328ce03d918d5711d99233bcbc8478945827769 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 14 Nov 2019 09:15:56 +0900 Subject: [PATCH] Record pthread routine address in clone(), keep helper threads on caller CPU core (workaround for Fugaku) Change-Id: I29d1589e430dc1396558cdf3df4d068c27173612 --- arch/arm64/kernel/syscall.c | 39 ++++++++++-- executer/user/libsched_yield.c | 108 +++++++++++++++++++++++++++++++++ kernel/host.c | 1 + kernel/include/process.h | 2 + kernel/syscall.c | 58 ++++++++++++++++-- 5 files changed, 199 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 962a694c..4e28bd64 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -57,15 +57,34 @@ extern int num_processors; int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { int min_queue_len = -1; - int cpu, min_cpu = -1, uti_cpu = -1; + int cpu, min_cpu = -1; +#if 0 + int uti_cpu = -1; +#endif unsigned long irqstate = 0; + int start, end, step; + + if (use_last) { + start = num_processors - 1; + end = -1; + step = -1; + } + else { + start = 0; + end = num_processors; + step = 1; + } + if (!cpu_local_var(current)->proc->nr_processes) { irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock); } + else { + irqstate = cpu_disable_interrupt_save(); + } /* Find the first allowed core with the shortest run queue */ - for (cpu = 0; cpu < num_processors; ++cpu) { + for (cpu = start; cpu != end; cpu += step) { struct cpu_local_var *v; if (!CPU_ISSET(cpu, cpu_set)) @@ -76,11 +95,14 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d\n", __func__, cpu, v->runq_len, v->runq_reserved); if (min_queue_len == -1 || - v->runq_len + v->runq_reserved < min_queue_len) { - min_queue_len = v->runq_len + v->runq_reserved; + //v->runq_len + v->runq_reserved < min_queue_len) { + v->runq_len < min_queue_len) { + //min_queue_len = v->runq_len + v->runq_reserved; + min_queue_len = v->runq_len; min_cpu = cpu; } +#if 0 /* Record the last tie CPU */ if (min_cpu != cpu && v->runq_len + v->runq_reserved == min_queue_len) { @@ -89,14 +111,15 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n", __func__, cpu, v->runq_len, v->runq_reserved, min_cpu, uti_cpu); +#else ihk_mc_spinlock_unlock_noirq(&v->runq_lock); -#if 0 if (min_queue_len == 0) break; #endif } +#if 0 min_cpu = use_last ? uti_cpu : min_cpu; if (min_cpu != -1) { if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED) @@ -105,10 +128,16 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) __sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, 1); } +#else + __sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, 1); +#endif if (!cpu_local_var(current)->proc->nr_processes) { ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate); } + else { + cpu_restore_interrupt(irqstate); + } return min_cpu; } diff --git a/executer/user/libsched_yield.c b/executer/user/libsched_yield.c index b69af142..302edfb4 100644 --- a/executer/user/libsched_yield.c +++ b/executer/user/libsched_yield.c @@ -1,6 +1,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include @@ -27,3 +28,110 @@ int sched_yield(void) return 0; } + +#undef pthread_create + +typedef int (*__pthread_create_fn)(pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine) (void *), + void *arg); + +static __pthread_create_fn orig_pthread_create = 0; + + +int pthread_create(pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine) (void *), + void *arg) +{ + if (!orig_pthread_create) { + orig_pthread_create = + (__pthread_create_fn)dlsym(RTLD_NEXT, "pthread_create"); + } + + /* CLONE_VM and newsp == parent_tidptr impiles pthread start routine addr */ + syscall(__NR_clone, CLONE_VM, start_routine, start_routine, 0, 0, 0); + + return orig_pthread_create(thread, attr, start_routine, arg); +} + +#if 0 +#define PROCMAPS_MAX_LEN 131072 + +char *addr_to_lib(void *addr, unsigned long *offset_in_lib) +{ + char maps_path[PATH_MAX]; + char buf[PROCMAPS_MAX_LEN]; + int fd; + void *start, *end; + char perms[4]; + unsigned long offset; + unsigned long dev[2]; + int inode; + char path[PATH_MAX]; + char *line; + + sprintf(maps_path,"/proc/self/maps"); + fd = open(maps_path, O_RDONLY); + if (fd < 0) { + fprintf(stderr,"error: cannot open the memory maps, %s\n", + strerror(errno)); + return NULL; + } + + memset(buf, 0, PROCMAPS_MAX_LEN); + read(fd, buf, PROCMAPS_MAX_LEN); + line = strtok(buf, "\n"); + while (line) { + memset(path, 0, sizeof(path)); + sscanf(line, "%012lx-%012lx %4s %lx %lx:%lx %d\t\t\t%[^\n]", + &start, &end, perms, &offset, &dev[0], &dev[1], &inode, path); + + if (start <= addr && end > addr) { + close(fd); + if (offset_in_lib) + *offset_in_lib = (unsigned long)(addr - start); + return strlen(path) > 0 ? strdup(path) : NULL; + } + + line = strtok(NULL, "\n"); + } + + close(fd); + return NULL; +} + +int pthread_create(pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine) (void *), + void *arg) +{ + char *lib = NULL; + int util_thread = 1; + unsigned long offset; + + if (!orig_pthread_create) { + orig_pthread_create = + (__pthread_create_fn)dlsym(RTLD_NEXT, "pthread_create"); + } + + lib = addr_to_lib(start_routine, &offset); + if (lib) + printf("%s: 0x%lx is in %s @ 0x%lx\n", + __func__, start_routine, lib, offset); + + if (lib && (strstr(lib, "iomp") || strstr(lib, "psm"))) { + util_thread = 0; + } + + if (util_thread) { + /* McKernel util_indicate_clone() */ + syscall(731); + } + + if (lib) + free(lib); + + return orig_pthread_create(thread, attr, start_routine, arg); +} +#endif diff --git a/kernel/host.c b/kernel/host.c index 45f3c77d..8c2caf14 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -500,6 +500,7 @@ static int process_msg_prepare_process(unsigned long rphys) ihk_mc_unmap_memory(NULL, phys, sz); return -ENOMEM; } + sprintf(thread->pthread_routine, "%s", "[main]"); proc = thread->proc; vm = thread->vm; diff --git a/kernel/include/process.h b/kernel/include/process.h index 2f5300ef..512b0c03 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -608,6 +608,7 @@ struct thread { // thread info int cpu_id; int tid; + char pthread_routine[PATH_MAX + 64]; int status; // PS_RUNNING -> PS_EXITED (-> ZOMBIE / ptrace) // | ^ ^ // | | | @@ -717,6 +718,7 @@ struct thread { /* Syscall offload wait queue head */ struct waitq scd_wq; + unsigned long clone_pthread_start_routine; int uti_state; int mod_clone; struct uti_attr *mod_clone_arg; diff --git a/kernel/syscall.c b/kernel/syscall.c index 424ee9ea..cb4bd60f 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -2757,6 +2757,9 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, const struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info(); #endif int err = 0; + unsigned long clone_pthread_start_routine = 0; + struct vm_range *range = NULL; + int helper_thread = 0; dkprintf("%s,flags=%08x,newsp=%lx,ptidptr=%lx," "ctidptr=%lx,tls=%lx,curpc=%lx,cursp=%lx", @@ -2766,6 +2769,18 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, dkprintf("do_fork(): stack_pointr passed in: 0x%lX, stack pointer of caller: 0x%lx\n", newsp, cursp); + /* CLONE_VM and newsp == parent_tidptr impiles pthread start routine addr */ + if ((clone_flags & CLONE_VM) && newsp == parent_tidptr) { + old->clone_pthread_start_routine = parent_tidptr; + dkprintf("%s: clone_pthread_start_routine: 0x%lx\n", __func__, + old->clone_pthread_start_routine); + return 0; + } + + /* Clear pthread routine addr regardless if we succeed */ + clone_pthread_start_routine = old->clone_pthread_start_routine; + old->clone_pthread_start_routine = 0; + parent_cpuid = old->cpu_id; if (((clone_flags & CLONE_VM) && !(clone_flags & CLONE_THREAD)) || (!(clone_flags & CLONE_VM) && (clone_flags & CLONE_THREAD))) { @@ -2821,10 +2836,34 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, } } - cpuid = obtain_clone_cpuid(&old->cpu_set, old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu); - if (cpuid == -1) { - kprintf("do_fork,core not available\n"); - return -EAGAIN; + if (clone_pthread_start_routine) { + ihk_rwspinlock_read_lock_noirq(&old->vm->memory_range_lock); + range = lookup_process_memory_range(old->vm, + clone_pthread_start_routine, + clone_pthread_start_routine + 1); + ihk_rwspinlock_read_unlock_noirq(&old->vm->memory_range_lock); + + if (range && range->memobj && range->memobj->path) { + if (!strstr(range->memobj->path, "omp.so")) { + helper_thread = 1; + } + dkprintf("clone(): %s thread from %s\n", + helper_thread ? "helper" : "compute", + range->memobj->path); + } + } + + if (helper_thread) { + cpuid = ihk_mc_get_processor_id(); + //cpuid = obtain_clone_cpuid(&oldproc->cpu_set, 1); + } + else { + cpuid = obtain_clone_cpuid(&oldproc->cpu_set, + (old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu)); + if (cpuid == -1) { + kprintf("do_fork,core not available\n"); + return -EAGAIN; + } } new = clone_thread(old, curpc, @@ -2835,6 +2874,17 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, goto release_cpuid; } + if (clone_pthread_start_routine && + range && range->memobj && range->memobj->path) { + + sprintf(new->pthread_routine, "0x%lx @ %s", + clone_pthread_start_routine, + range->memobj->path); + } + else { + sprintf(new->pthread_routine, "%s", "[unknown]"); + } + newproc = new->proc; cpu_set(cpuid, &new->vm->address_space->cpu_set,