Record pthread routine address in clone(), keep helper threads on caller CPU core (workaround for Fugaku)

Change-Id: I29d1589e430dc1396558cdf3df4d068c27173612
This commit is contained in:
Balazs Gerofi
2019-11-14 09:15:56 +09:00
committed by Masamichi Takagi
parent 97b107f61c
commit 3328ce03d9
5 changed files with 199 additions and 9 deletions

View File

@ -57,15 +57,34 @@ extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last)
{
int min_queue_len = -1;
int cpu, min_cpu = -1, uti_cpu = -1;
int cpu, min_cpu = -1;
#if 0
int uti_cpu = -1;
#endif
unsigned long irqstate = 0;
int start, end, step;
if (use_last) {
start = num_processors - 1;
end = -1;
step = -1;
}
else {
start = 0;
end = num_processors;
step = 1;
}
if (!cpu_local_var(current)->proc->nr_processes) {
irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock);
}
else {
irqstate = cpu_disable_interrupt_save();
}
/* Find the first allowed core with the shortest run queue */
for (cpu = 0; cpu < num_processors; ++cpu) {
for (cpu = start; cpu != end; cpu += step) {
struct cpu_local_var *v;
if (!CPU_ISSET(cpu, cpu_set))
@ -76,11 +95,14 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last)
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d\n",
__func__, cpu, v->runq_len, v->runq_reserved);
if (min_queue_len == -1 ||
v->runq_len + v->runq_reserved < min_queue_len) {
min_queue_len = v->runq_len + v->runq_reserved;
//v->runq_len + v->runq_reserved < min_queue_len) {
v->runq_len < min_queue_len) {
//min_queue_len = v->runq_len + v->runq_reserved;
min_queue_len = v->runq_len;
min_cpu = cpu;
}
#if 0
/* Record the last tie CPU */
if (min_cpu != cpu &&
v->runq_len + v->runq_reserved == min_queue_len) {
@ -89,14 +111,15 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last)
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n",
__func__, cpu, v->runq_len, v->runq_reserved,
min_cpu, uti_cpu);
#else
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
#if 0
if (min_queue_len == 0)
break;
#endif
}
#if 0
min_cpu = use_last ? uti_cpu : min_cpu;
if (min_cpu != -1) {
if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED)
@ -105,10 +128,16 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last)
__sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved,
1);
}
#else
__sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, 1);
#endif
if (!cpu_local_var(current)->proc->nr_processes) {
ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate);
}
else {
cpu_restore_interrupt(irqstate);
}
return min_cpu;
}

View File

@ -1,6 +1,7 @@
#define _GNU_SOURCE
#include <dlfcn.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
@ -27,3 +28,110 @@ int sched_yield(void)
return 0;
}
#undef pthread_create
typedef int (*__pthread_create_fn)(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *),
void *arg);
static __pthread_create_fn orig_pthread_create = 0;
int pthread_create(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *),
void *arg)
{
if (!orig_pthread_create) {
orig_pthread_create =
(__pthread_create_fn)dlsym(RTLD_NEXT, "pthread_create");
}
/* CLONE_VM and newsp == parent_tidptr impiles pthread start routine addr */
syscall(__NR_clone, CLONE_VM, start_routine, start_routine, 0, 0, 0);
return orig_pthread_create(thread, attr, start_routine, arg);
}
#if 0
#define PROCMAPS_MAX_LEN 131072
char *addr_to_lib(void *addr, unsigned long *offset_in_lib)
{
char maps_path[PATH_MAX];
char buf[PROCMAPS_MAX_LEN];
int fd;
void *start, *end;
char perms[4];
unsigned long offset;
unsigned long dev[2];
int inode;
char path[PATH_MAX];
char *line;
sprintf(maps_path,"/proc/self/maps");
fd = open(maps_path, O_RDONLY);
if (fd < 0) {
fprintf(stderr,"error: cannot open the memory maps, %s\n",
strerror(errno));
return NULL;
}
memset(buf, 0, PROCMAPS_MAX_LEN);
read(fd, buf, PROCMAPS_MAX_LEN);
line = strtok(buf, "\n");
while (line) {
memset(path, 0, sizeof(path));
sscanf(line, "%012lx-%012lx %4s %lx %lx:%lx %d\t\t\t%[^\n]",
&start, &end, perms, &offset, &dev[0], &dev[1], &inode, path);
if (start <= addr && end > addr) {
close(fd);
if (offset_in_lib)
*offset_in_lib = (unsigned long)(addr - start);
return strlen(path) > 0 ? strdup(path) : NULL;
}
line = strtok(NULL, "\n");
}
close(fd);
return NULL;
}
int pthread_create(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *),
void *arg)
{
char *lib = NULL;
int util_thread = 1;
unsigned long offset;
if (!orig_pthread_create) {
orig_pthread_create =
(__pthread_create_fn)dlsym(RTLD_NEXT, "pthread_create");
}
lib = addr_to_lib(start_routine, &offset);
if (lib)
printf("%s: 0x%lx is in %s @ 0x%lx\n",
__func__, start_routine, lib, offset);
if (lib && (strstr(lib, "iomp") || strstr(lib, "psm"))) {
util_thread = 0;
}
if (util_thread) {
/* McKernel util_indicate_clone() */
syscall(731);
}
if (lib)
free(lib);
return orig_pthread_create(thread, attr, start_routine, arg);
}
#endif

View File

@ -500,6 +500,7 @@ static int process_msg_prepare_process(unsigned long rphys)
ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM;
}
sprintf(thread->pthread_routine, "%s", "[main]");
proc = thread->proc;
vm = thread->vm;

View File

@ -608,6 +608,7 @@ struct thread {
// thread info
int cpu_id;
int tid;
char pthread_routine[PATH_MAX + 64];
int status; // PS_RUNNING -> PS_EXITED (-> ZOMBIE / ptrace)
// | ^ ^
// | | |
@ -717,6 +718,7 @@ struct thread {
/* Syscall offload wait queue head */
struct waitq scd_wq;
unsigned long clone_pthread_start_routine;
int uti_state;
int mod_clone;
struct uti_attr *mod_clone_arg;

View File

@ -2757,6 +2757,9 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
const struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
#endif
int err = 0;
unsigned long clone_pthread_start_routine = 0;
struct vm_range *range = NULL;
int helper_thread = 0;
dkprintf("%s,flags=%08x,newsp=%lx,ptidptr=%lx,"
"ctidptr=%lx,tls=%lx,curpc=%lx,cursp=%lx",
@ -2766,6 +2769,18 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
dkprintf("do_fork(): stack_pointr passed in: 0x%lX, stack pointer of caller: 0x%lx\n",
newsp, cursp);
/* CLONE_VM and newsp == parent_tidptr impiles pthread start routine addr */
if ((clone_flags & CLONE_VM) && newsp == parent_tidptr) {
old->clone_pthread_start_routine = parent_tidptr;
dkprintf("%s: clone_pthread_start_routine: 0x%lx\n", __func__,
old->clone_pthread_start_routine);
return 0;
}
/* Clear pthread routine addr regardless if we succeed */
clone_pthread_start_routine = old->clone_pthread_start_routine;
old->clone_pthread_start_routine = 0;
parent_cpuid = old->cpu_id;
if (((clone_flags & CLONE_VM) && !(clone_flags & CLONE_THREAD)) ||
(!(clone_flags & CLONE_VM) && (clone_flags & CLONE_THREAD))) {
@ -2821,10 +2836,34 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
}
}
cpuid = obtain_clone_cpuid(&old->cpu_set, old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu);
if (cpuid == -1) {
kprintf("do_fork,core not available\n");
return -EAGAIN;
if (clone_pthread_start_routine) {
ihk_rwspinlock_read_lock_noirq(&old->vm->memory_range_lock);
range = lookup_process_memory_range(old->vm,
clone_pthread_start_routine,
clone_pthread_start_routine + 1);
ihk_rwspinlock_read_unlock_noirq(&old->vm->memory_range_lock);
if (range && range->memobj && range->memobj->path) {
if (!strstr(range->memobj->path, "omp.so")) {
helper_thread = 1;
}
dkprintf("clone(): %s thread from %s\n",
helper_thread ? "helper" : "compute",
range->memobj->path);
}
}
if (helper_thread) {
cpuid = ihk_mc_get_processor_id();
//cpuid = obtain_clone_cpuid(&oldproc->cpu_set, 1);
}
else {
cpuid = obtain_clone_cpuid(&oldproc->cpu_set,
(old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu));
if (cpuid == -1) {
kprintf("do_fork,core not available\n");
return -EAGAIN;
}
}
new = clone_thread(old, curpc,
@ -2835,6 +2874,17 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
goto release_cpuid;
}
if (clone_pthread_start_routine &&
range && range->memobj && range->memobj->path) {
sprintf(new->pthread_routine, "0x%lx @ %s",
clone_pthread_start_routine,
range->memobj->path);
}
else {
sprintf(new->pthread_routine, "%s", "[unknown]");
}
newproc = new->proc;
cpu_set(cpuid, &new->vm->address_space->cpu_set,