uti: Workaround not to share CPU with OpenMP threads

* Assign uti thread to the last idle CPU so that it's not shared with
  an OpenMP thread

Change-Id: Ia42cae056ce81fde9b6dab6286b39a52f3c9e172
This commit is contained in:
Masamichi Takagi
2018-09-03 18:08:03 +09:00
parent dbba7dea18
commit 5cb8a1f10f
8 changed files with 34 additions and 14 deletions

View File

@ -58,7 +58,7 @@ static int cpuid_head = 1;
extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set) {
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
int min_queue_len = -1;
int i, min_cpu = -1;

View File

@ -90,11 +90,11 @@ static ptrdiff_t vdso_offset;
extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set) {
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
int min_queue_len = -1;
int cpu, min_cpu = -1;
int cpu, min_cpu = -1, uti_cpu = -1;
unsigned long irqstate;
irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock);
/* Find the first allowed core with the shortest run queue */
for (cpu = 0; cpu < num_processors; ++cpu) {
@ -109,12 +109,20 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set) {
min_queue_len = v->runq_len + v->runq_reserved;
min_cpu = cpu;
}
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
/* Record the last tie CPU */
if (min_cpu != cpu && v->runq_len + v->runq_reserved == min_queue_len) {
uti_cpu = cpu;
}
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved, min_cpu, uti_cpu);
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
#if 0
if (min_queue_len == 0)
break;
#endif
}
min_cpu = use_last ? uti_cpu : min_cpu;
if (min_cpu != -1) {
if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED)
get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED;

View File

@ -143,6 +143,7 @@ struct program_load_desc {
long stack_premap;
unsigned long mpol_bind_mask;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int nr_processes;
int process_rank;
char shell_path[SHELL_PATH_MAX_LEN];

View File

@ -234,6 +234,7 @@ static long stack_max = -1;
static struct rlimit rlim_stack;
static char *mpol_bind_nodes = NULL;
static int uti_thread_rank = 0;
static int uti_use_last_cpu = 0;
/* Partitioned execution (e.g., for MPI) */
static int nr_processes = 0;
@ -1794,6 +1795,12 @@ static struct option mcexec_options[] = {
.flag = NULL,
.val = 'u',
},
{
.name = "uti-use-last-cpu",
.has_arg = no_argument,
.flag = &uti_use_last_cpu,
.val = 1,
},
/* end */
{ NULL, 0, NULL, 0, },
};
@ -2610,6 +2617,7 @@ int main(int argc, char **argv)
}
desc->uti_thread_rank = uti_thread_rank;
desc->uti_use_last_cpu = uti_use_last_cpu;
/* user_start and user_end are set by this call */
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {

View File

@ -506,6 +506,7 @@ static int process_msg_prepare_process(unsigned long rphys)
}
proc->uti_thread_rank = pn->uti_thread_rank;
proc->uti_use_last_cpu = pn->uti_use_last_cpu;
#ifdef PROFILE_ENABLE
proc->profile = pn->profile;
@ -612,7 +613,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_SCHEDULE_PROCESS:
thread = (struct thread *)packet->arg;
cpuid = obtain_clone_cpuid(&thread->cpu_set);
cpuid = obtain_clone_cpuid(&thread->cpu_set, 0);
if (cpuid == -1) {
kprintf("No CPU available\n");
ret = -1;

View File

@ -275,7 +275,7 @@ extern struct list_head resource_set_list;
extern mcs_rwlock_lock_t resource_set_lock;
extern int idle_halt;
extern int allow_oversubscribe;
extern ihk_spinlock_t runq_reservation_lock; /* To serialize runq reservations for competeing fork()s */
extern ihk_spinlock_t runq_reservation_lock; /* mutex for cpuid reservation (clv->runq_reserved) */
struct process_hash {
struct list_head list[HASH_SIZE];
@ -557,6 +557,7 @@ struct process {
unsigned long heap_extension;
unsigned long mpol_bind_mask;
int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int clone_count;
// perf_event

View File

@ -201,6 +201,7 @@ struct program_load_desc {
long stack_premap;
unsigned long mpol_bind_mask;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int nr_processes;
int process_rank;
char shell_path[SHELL_PATH_MAX_LEN];
@ -350,7 +351,7 @@ struct syscall_post {
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
int obtain_clone_cpuid(cpu_set_t *cpu_set);
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last);
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
#define DECLARATOR(number,name) __NR_##name = number,

View File

@ -2445,12 +2445,6 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
return -EINVAL;
}
cpuid = obtain_clone_cpuid(&old->cpu_set);
if (cpuid == -1) {
kprintf("do_fork,core not available\n");
return -EAGAIN;
}
/* N-th creation put the new on Linux CPU. It's turned off when zero is
set to uti_thread_rank. */
if (oldproc->uti_thread_rank) {
@ -2463,6 +2457,12 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
}
}
cpuid = obtain_clone_cpuid(&old->cpu_set, old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu);
if (cpuid == -1) {
kprintf("do_fork,core not available\n");
return -EAGAIN;
}
new = clone_thread(old, curpc,
newsp ? newsp : cursp, clone_flags);