From 3aa06444f42df644d594c371c51664e140a3523b Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 16 Aug 2016 08:58:22 +0900 Subject: [PATCH] do_syscall(): allow descheduling threads in offloaded syscalls if CPU core oversubscribed --- kernel/host.c | 21 +++++++++++++++ kernel/include/process.h | 3 +++ kernel/process.c | 9 ++----- kernel/syscall.c | 56 ++++++++++++++++++++++++++++------------ 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/kernel/host.c b/kernel/host.c index 2f4f9138..1013ebfe 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -559,6 +559,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, struct ikc_scd_packet *packet = __packet; struct ikc_scd_packet pckt; int rc; + struct mcs_rwlock_node_irqsave lock; struct thread *thread; struct process *proc; struct mcctrl_signal { @@ -625,6 +626,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, ret = 0; break; + /* + * Used for syscall offload reply message to explicitly schedule in + * the waiting thread + */ + case SCD_MSG_WAKE_UP_SYSCALL_THREAD: + thread = find_thread(0, packet->ttid, &lock); + if (!thread) { + kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n", + __FUNCTION__, packet->ttid); + ret = -EINVAL; + break; + } + thread_unlock(thread, &lock); + + dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n", + __FUNCTION__, packet->ttid); + waitq_wakeup(&thread->scd_wq); + ret = 0; + break; + case SCD_MSG_SEND_SIGNAL: pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); diff --git a/kernel/include/process.h b/kernel/include/process.h index 4ad055f4..bd10f5dd 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -566,6 +566,9 @@ struct thread { struct itimerval itimer_prof; struct timespec itimer_virtual_value; struct timespec itimer_prof_value; + + /* Syscall offload wait queue head */ + struct waitq scd_wq; }; struct process_vm { diff --git a/kernel/process.c b/kernel/process.c index e82e1e2a..cf0e2089 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2576,13 +2576,8 @@ void schedule(void) struct thread *last; if (cpu_local_var(no_preempt)) { - dkprintf("no schedule() while no preemption! \n"); - return; - } - - if (cpu_local_var(current) - && cpu_local_var(current)->in_syscall_offload) { - dkprintf("no schedule() while syscall offload!\n"); + kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n", + __FUNCTION__, cpu_local_var(no_preempt)); return; } diff --git a/kernel/syscall.c b/kernel/syscall.c index 8d4b22ca..a7985728 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -186,6 +186,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) unsigned long irqstate; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; + DECLARE_WAITQ_ENTRY(scd_wq_entry, thread); dkprintf("SC(%d)[%3d] sending syscall\n", ihk_mc_get_processor_id(), @@ -212,7 +213,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) * the pool may serve the request */ req->rtid = cpu_local_var(current)->tid; req->ttid = 0; - + res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; send_syscall(req, cpu, pid, &res); dkprintf("%s: syscall num: %d waiting for Linux.. \n", @@ -224,36 +225,55 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) while (res.status != STATUS_COMPLETED) { while (res.status == STATUS_IN_PROGRESS) { struct cpu_local_var *v; - int call_schedule = 0; + int do_schedule = 0; long runq_irqstate; + unsigned long flags; + + DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current)); cpu_pause(); - /* XXX: Intel MPI + Intel OpenMP situation: - * While the MPI helper thread waits in a poll() call the OpenMP master - * thread is iterating through the CPU cores using setaffinity(). - * Unless we give a chance to it on this core the two threads seem to - * hang in deadlock. If the new thread would make a system call on this - * core we would be in trouble. For now, allow it, but in the future - * we should have syscall channels for each thread instead of per core, - * or we should multiplex syscall threads in mcexec */ + /* Spin if not preemptable */ + if (cpu_local_var(no_preempt) || !thread->tid) { + continue; + } + + /* Spin by default, but if re-schedule is requested let + * the other thread run */ runq_irqstate = ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); v = get_this_cpu_local_var(); if (v->flags & CPU_FLAG_NEED_RESCHED) { - call_schedule = 1; - --thread->in_syscall_offload; + do_schedule = 1; } ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate); - if (call_schedule) { - schedule(); - ++thread->in_syscall_offload; + if (!do_schedule) { + continue; } + + flags = cpu_disable_interrupt_save(); + + /* Try to sleep until notified */ + if (__sync_bool_compare_and_swap(&res.req_thread_status, + IHK_SCD_REQ_THREAD_SPINNING, + IHK_SCD_REQ_THREAD_DESCHEDULED)) { + + dkprintf("%s: tid %d waiting for syscall reply...\n", + __FUNCTION__, thread->tid); + waitq_init(&thread->scd_wq); + waitq_prepare_to_wait(&thread->scd_wq, &scd_wq_entry, + PS_INTERRUPTIBLE); + cpu_restore_interrupt(flags); + schedule(); + waitq_finish_wait(&thread->scd_wq, &scd_wq_entry); + } + + cpu_restore_interrupt(flags); } - + if (res.status == STATUS_PAGE_FAULT) { dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", cpu_local_var(current)->proc->pid); @@ -271,6 +291,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) req2.rtid = cpu_local_var(current)->tid; req2.ttid = res.stid; + res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; send_syscall(&req2, cpu, pid, &res); } } @@ -809,7 +830,8 @@ terminate(int rc, int sig) release_thread(mythread); release_process_vm(vm); schedule(); - // no return + kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__); + panic("panic"); } void