do_syscall(): allow descheduling threads in offloaded syscalls if CPU core oversubscribed

This commit is contained in:
Balazs Gerofi
2016-08-16 08:58:22 +09:00
parent c897a56c34
commit 3aa06444f4
4 changed files with 65 additions and 24 deletions

View File

@ -559,6 +559,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct ikc_scd_packet *packet = __packet;
struct ikc_scd_packet pckt;
int rc;
struct mcs_rwlock_node_irqsave lock;
struct thread *thread;
struct process *proc;
struct mcctrl_signal {
@ -625,6 +626,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
ret = 0;
break;
/*
* Used for syscall offload reply message to explicitly schedule in
* the waiting thread
*/
case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
thread = find_thread(0, packet->ttid, &lock);
if (!thread) {
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
__FUNCTION__, packet->ttid);
ret = -EINVAL;
break;
}
thread_unlock(thread, &lock);
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
__FUNCTION__, packet->ttid);
waitq_wakeup(&thread->scd_wq);
ret = 0;
break;
case SCD_MSG_SEND_SIGNAL:
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);

View File

@ -566,6 +566,9 @@ struct thread {
struct itimerval itimer_prof;
struct timespec itimer_virtual_value;
struct timespec itimer_prof_value;
/* Syscall offload wait queue head */
struct waitq scd_wq;
};
struct process_vm {

View File

@ -2576,13 +2576,8 @@ void schedule(void)
struct thread *last;
if (cpu_local_var(no_preempt)) {
dkprintf("no schedule() while no preemption! \n");
return;
}
if (cpu_local_var(current)
&& cpu_local_var(current)->in_syscall_offload) {
dkprintf("no schedule() while syscall offload!\n");
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
__FUNCTION__, cpu_local_var(no_preempt));
return;
}

View File

@ -186,6 +186,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
unsigned long irqstate;
struct thread *thread = cpu_local_var(current);
struct process *proc = thread->proc;
DECLARE_WAITQ_ENTRY(scd_wq_entry, thread);
dkprintf("SC(%d)[%3d] sending syscall\n",
ihk_mc_get_processor_id(),
@ -212,7 +213,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
* the pool may serve the request */
req->rtid = cpu_local_var(current)->tid;
req->ttid = 0;
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
send_syscall(req, cpu, pid, &res);
dkprintf("%s: syscall num: %d waiting for Linux.. \n",
@ -224,36 +225,55 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
while (res.status != STATUS_COMPLETED) {
while (res.status == STATUS_IN_PROGRESS) {
struct cpu_local_var *v;
int call_schedule = 0;
int do_schedule = 0;
long runq_irqstate;
unsigned long flags;
DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current));
cpu_pause();
/* XXX: Intel MPI + Intel OpenMP situation:
* While the MPI helper thread waits in a poll() call the OpenMP master
* thread is iterating through the CPU cores using setaffinity().
* Unless we give a chance to it on this core the two threads seem to
* hang in deadlock. If the new thread would make a system call on this
* core we would be in trouble. For now, allow it, but in the future
* we should have syscall channels for each thread instead of per core,
* or we should multiplex syscall threads in mcexec */
/* Spin if not preemptable */
if (cpu_local_var(no_preempt) || !thread->tid) {
continue;
}
/* Spin by default, but if re-schedule is requested let
* the other thread run */
runq_irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED) {
call_schedule = 1;
--thread->in_syscall_offload;
do_schedule = 1;
}
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
if (call_schedule) {
schedule();
++thread->in_syscall_offload;
if (!do_schedule) {
continue;
}
flags = cpu_disable_interrupt_save();
/* Try to sleep until notified */
if (__sync_bool_compare_and_swap(&res.req_thread_status,
IHK_SCD_REQ_THREAD_SPINNING,
IHK_SCD_REQ_THREAD_DESCHEDULED)) {
dkprintf("%s: tid %d waiting for syscall reply...\n",
__FUNCTION__, thread->tid);
waitq_init(&thread->scd_wq);
waitq_prepare_to_wait(&thread->scd_wq, &scd_wq_entry,
PS_INTERRUPTIBLE);
cpu_restore_interrupt(flags);
schedule();
waitq_finish_wait(&thread->scd_wq, &scd_wq_entry);
}
cpu_restore_interrupt(flags);
}
if (res.status == STATUS_PAGE_FAULT) {
dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n",
cpu_local_var(current)->proc->pid);
@ -271,6 +291,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
req2.rtid = cpu_local_var(current)->tid;
req2.ttid = res.stid;
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
send_syscall(&req2, cpu, pid, &res);
}
}
@ -809,7 +830,8 @@ terminate(int rc, int sig)
release_thread(mythread);
release_process_vm(vm);
schedule();
// no return
kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__);
panic("panic");
}
void