uti: Workaround not to share CPU with OpenMP threads
* Assign uti thread to the last idle CPU so that it's not shared with an OpenMP thread Change-Id: Ia42cae056ce81fde9b6dab6286b39a52f3c9e172
This commit is contained in:
@ -58,7 +58,7 @@ static int cpuid_head = 1;
|
|||||||
|
|
||||||
extern int num_processors;
|
extern int num_processors;
|
||||||
|
|
||||||
int obtain_clone_cpuid(cpu_set_t *cpu_set) {
|
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
|
||||||
int min_queue_len = -1;
|
int min_queue_len = -1;
|
||||||
int i, min_cpu = -1;
|
int i, min_cpu = -1;
|
||||||
|
|
||||||
|
|||||||
@ -90,9 +90,9 @@ static ptrdiff_t vdso_offset;
|
|||||||
|
|
||||||
extern int num_processors;
|
extern int num_processors;
|
||||||
|
|
||||||
int obtain_clone_cpuid(cpu_set_t *cpu_set) {
|
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
|
||||||
int min_queue_len = -1;
|
int min_queue_len = -1;
|
||||||
int cpu, min_cpu = -1;
|
int cpu, min_cpu = -1, uti_cpu = -1;
|
||||||
unsigned long irqstate;
|
unsigned long irqstate;
|
||||||
|
|
||||||
irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock);
|
irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock);
|
||||||
@ -109,12 +109,20 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set) {
|
|||||||
min_queue_len = v->runq_len + v->runq_reserved;
|
min_queue_len = v->runq_len + v->runq_reserved;
|
||||||
min_cpu = cpu;
|
min_cpu = cpu;
|
||||||
}
|
}
|
||||||
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
|
||||||
|
|
||||||
|
/* Record the last tie CPU */
|
||||||
|
if (min_cpu != cpu && v->runq_len + v->runq_reserved == min_queue_len) {
|
||||||
|
uti_cpu = cpu;
|
||||||
|
}
|
||||||
|
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved, min_cpu, uti_cpu);
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||||
|
#if 0
|
||||||
if (min_queue_len == 0)
|
if (min_queue_len == 0)
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
min_cpu = use_last ? uti_cpu : min_cpu;
|
||||||
if (min_cpu != -1) {
|
if (min_cpu != -1) {
|
||||||
if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED)
|
if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED)
|
||||||
get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED;
|
get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED;
|
||||||
|
|||||||
@ -143,6 +143,7 @@ struct program_load_desc {
|
|||||||
long stack_premap;
|
long stack_premap;
|
||||||
unsigned long mpol_bind_mask;
|
unsigned long mpol_bind_mask;
|
||||||
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
|
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
|
||||||
|
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
|
||||||
int nr_processes;
|
int nr_processes;
|
||||||
int process_rank;
|
int process_rank;
|
||||||
char shell_path[SHELL_PATH_MAX_LEN];
|
char shell_path[SHELL_PATH_MAX_LEN];
|
||||||
|
|||||||
@ -234,6 +234,7 @@ static long stack_max = -1;
|
|||||||
static struct rlimit rlim_stack;
|
static struct rlimit rlim_stack;
|
||||||
static char *mpol_bind_nodes = NULL;
|
static char *mpol_bind_nodes = NULL;
|
||||||
static int uti_thread_rank = 0;
|
static int uti_thread_rank = 0;
|
||||||
|
static int uti_use_last_cpu = 0;
|
||||||
|
|
||||||
/* Partitioned execution (e.g., for MPI) */
|
/* Partitioned execution (e.g., for MPI) */
|
||||||
static int nr_processes = 0;
|
static int nr_processes = 0;
|
||||||
@ -1794,6 +1795,12 @@ static struct option mcexec_options[] = {
|
|||||||
.flag = NULL,
|
.flag = NULL,
|
||||||
.val = 'u',
|
.val = 'u',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "uti-use-last-cpu",
|
||||||
|
.has_arg = no_argument,
|
||||||
|
.flag = &uti_use_last_cpu,
|
||||||
|
.val = 1,
|
||||||
|
},
|
||||||
/* end */
|
/* end */
|
||||||
{ NULL, 0, NULL, 0, },
|
{ NULL, 0, NULL, 0, },
|
||||||
};
|
};
|
||||||
@ -2610,6 +2617,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
desc->uti_thread_rank = uti_thread_rank;
|
desc->uti_thread_rank = uti_thread_rank;
|
||||||
|
desc->uti_use_last_cpu = uti_use_last_cpu;
|
||||||
|
|
||||||
/* user_start and user_end are set by this call */
|
/* user_start and user_end are set by this call */
|
||||||
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {
|
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {
|
||||||
|
|||||||
@ -506,6 +506,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
|||||||
}
|
}
|
||||||
|
|
||||||
proc->uti_thread_rank = pn->uti_thread_rank;
|
proc->uti_thread_rank = pn->uti_thread_rank;
|
||||||
|
proc->uti_use_last_cpu = pn->uti_use_last_cpu;
|
||||||
|
|
||||||
#ifdef PROFILE_ENABLE
|
#ifdef PROFILE_ENABLE
|
||||||
proc->profile = pn->profile;
|
proc->profile = pn->profile;
|
||||||
@ -612,7 +613,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
case SCD_MSG_SCHEDULE_PROCESS:
|
case SCD_MSG_SCHEDULE_PROCESS:
|
||||||
thread = (struct thread *)packet->arg;
|
thread = (struct thread *)packet->arg;
|
||||||
|
|
||||||
cpuid = obtain_clone_cpuid(&thread->cpu_set);
|
cpuid = obtain_clone_cpuid(&thread->cpu_set, 0);
|
||||||
if (cpuid == -1) {
|
if (cpuid == -1) {
|
||||||
kprintf("No CPU available\n");
|
kprintf("No CPU available\n");
|
||||||
ret = -1;
|
ret = -1;
|
||||||
|
|||||||
@ -275,7 +275,7 @@ extern struct list_head resource_set_list;
|
|||||||
extern mcs_rwlock_lock_t resource_set_lock;
|
extern mcs_rwlock_lock_t resource_set_lock;
|
||||||
extern int idle_halt;
|
extern int idle_halt;
|
||||||
extern int allow_oversubscribe;
|
extern int allow_oversubscribe;
|
||||||
extern ihk_spinlock_t runq_reservation_lock; /* To serialize runq reservations for competeing fork()s */
|
extern ihk_spinlock_t runq_reservation_lock; /* mutex for cpuid reservation (clv->runq_reserved) */
|
||||||
|
|
||||||
struct process_hash {
|
struct process_hash {
|
||||||
struct list_head list[HASH_SIZE];
|
struct list_head list[HASH_SIZE];
|
||||||
@ -557,6 +557,7 @@ struct process {
|
|||||||
unsigned long heap_extension;
|
unsigned long heap_extension;
|
||||||
unsigned long mpol_bind_mask;
|
unsigned long mpol_bind_mask;
|
||||||
int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */
|
int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */
|
||||||
|
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
|
||||||
int clone_count;
|
int clone_count;
|
||||||
|
|
||||||
// perf_event
|
// perf_event
|
||||||
|
|||||||
@ -201,6 +201,7 @@ struct program_load_desc {
|
|||||||
long stack_premap;
|
long stack_premap;
|
||||||
unsigned long mpol_bind_mask;
|
unsigned long mpol_bind_mask;
|
||||||
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
|
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
|
||||||
|
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
|
||||||
int nr_processes;
|
int nr_processes;
|
||||||
int process_rank;
|
int process_rank;
|
||||||
char shell_path[SHELL_PATH_MAX_LEN];
|
char shell_path[SHELL_PATH_MAX_LEN];
|
||||||
@ -350,7 +351,7 @@ struct syscall_post {
|
|||||||
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
|
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
|
||||||
|
|
||||||
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
|
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
|
||||||
int obtain_clone_cpuid(cpu_set_t *cpu_set);
|
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last);
|
||||||
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
|
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
|
||||||
|
|
||||||
#define DECLARATOR(number,name) __NR_##name = number,
|
#define DECLARATOR(number,name) __NR_##name = number,
|
||||||
|
|||||||
@ -2445,12 +2445,6 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
cpuid = obtain_clone_cpuid(&old->cpu_set);
|
|
||||||
if (cpuid == -1) {
|
|
||||||
kprintf("do_fork,core not available\n");
|
|
||||||
return -EAGAIN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* N-th creation put the new on Linux CPU. It's turned off when zero is
|
/* N-th creation put the new on Linux CPU. It's turned off when zero is
|
||||||
set to uti_thread_rank. */
|
set to uti_thread_rank. */
|
||||||
if (oldproc->uti_thread_rank) {
|
if (oldproc->uti_thread_rank) {
|
||||||
@ -2463,6 +2457,12 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cpuid = obtain_clone_cpuid(&old->cpu_set, old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu);
|
||||||
|
if (cpuid == -1) {
|
||||||
|
kprintf("do_fork,core not available\n");
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
new = clone_thread(old, curpc,
|
new = clone_thread(old, curpc,
|
||||||
newsp ? newsp : cursp, clone_flags);
|
newsp ? newsp : cursp, clone_flags);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user