support to utility thread offloading

This commit is contained in:
Tomoki Shirasawa
2017-06-27 13:27:09 +09:00
parent 2afc9d37d1
commit 07efb3ab9a
19 changed files with 2077 additions and 134 deletions

View File

@ -153,5 +153,8 @@ SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile) SYSCALL_HANDLED(__NR_profile, profile)
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system)
/**** End of File ****/ /**** End of File ****/

View File

@ -239,6 +239,7 @@ enter_user_mode:
movq $0, %rdi movq $0, %rdi
movq %rsp, %rsi movq %rsp, %rsi
call check_signal call check_signal
call utilthr_migrate
movq $0, %rdi movq $0, %rdi
call set_cputime call set_cputime
POP_ALL_REGS POP_ALL_REGS

View File

@ -259,7 +259,7 @@ SYSCALL_DECLARE(rt_sigreturn)
extern struct cpu_local_var *clv; extern struct cpu_local_var *clv;
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern void interrupt_syscall(int pid, int tid); extern void interrupt_syscall(struct thread *, int sig);
extern int num_processors; extern int num_processors;
#define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \ #define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \
@ -1230,6 +1230,12 @@ done:
return 0; return 0;
} }
if (tthread->thread_offloaded) {
interrupt_syscall(tthread, sig);
release_thread(tthread);
return 0;
}
doint = 0; doint = 0;
mcs_rwlock_writer_lock_noirq(savelock, &mcs_rw_node); mcs_rwlock_writer_lock_noirq(savelock, &mcs_rw_node);
@ -1275,8 +1281,6 @@ done:
cpu_restore_interrupt(irqstate); cpu_restore_interrupt(irqstate);
if (doint && !(mask & tthread->sigmask.__val[0])) { if (doint && !(mask & tthread->sigmask.__val[0])) {
int tid = tthread->tid;
int pid = tproc->pid;
int status = tthread->status; int status = tthread->status;
if (thread != tthread) { if (thread != tthread) {
@ -1286,7 +1290,7 @@ done:
} }
if(!tthread->proc->nohost) if(!tthread->proc->nohost)
interrupt_syscall(pid, tid); interrupt_syscall(tthread, 0);
if (status != PS_RUNNING) { if (status != PS_RUNNING) {
if(sig == SIGKILL){ if(sig == SIGKILL){
@ -1826,4 +1830,61 @@ out:
return error; return error;
} /* arch_map_vdso() */ } /* arch_map_vdso() */
void
save_uctx(void *uctx, struct x86_user_context *regs)
{
struct trans_uctx {
volatile int cond;
int fregsize;
unsigned long rax;
unsigned long rbx;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
unsigned long rbp;
unsigned long r8;
unsigned long r9;
unsigned long r10;
unsigned long r11;
unsigned long r12;
unsigned long r13;
unsigned long r14;
unsigned long r15;
unsigned long rflags;
unsigned long rip;
unsigned long rsp;
unsigned long fs;
} *ctx = uctx;
if (!regs) {
asm ("movq %%gs:(%1),%0" : "=r"(regs) :
"r"(offsetof(struct x86_cpu_local_variables, tss.rsp0)));
regs--;
}
ctx->cond = 0;
ctx->rax = regs->gpr.rax;
ctx->rbx = regs->gpr.rbx;
ctx->rcx = regs->gpr.rcx;
ctx->rdx = regs->gpr.rdx;
ctx->rsi = regs->gpr.rsi;
ctx->rdi = regs->gpr.rdi;
ctx->rbp = regs->gpr.rbp;
ctx->r8 = regs->gpr.r8;
ctx->r9 = regs->gpr.r9;
ctx->r10 = regs->gpr.r10;
ctx->r11 = regs->gpr.r11;
ctx->r12 = regs->gpr.r12;
ctx->r13 = regs->gpr.r13;
ctx->r14 = regs->gpr.r14;
ctx->r15 = regs->gpr.r15;
ctx->rflags = regs->gpr.rflags;
ctx->rsp = regs->gpr.rsp;
ctx->rip = regs->gpr.rip;
ihk_mc_arch_get_special_register(IHK_ASR_X86_FS, &ctx->fs);
ctx->fregsize = 0;
}
/*** End of File ***/ /*** End of File ***/

3
configure vendored
View File

@ -4583,7 +4583,7 @@ fi
ac_config_headers="$ac_config_headers config.h" ac_config_headers="$ac_config_headers config.h"
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in" ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"
if test "x$enable_dcfa" = xyes; then : if test "x$enable_dcfa" = xyes; then :
@ -5284,6 +5284,7 @@ do
"config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"executer/user/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/Makefile" ;; "executer/user/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/Makefile" ;;
"executer/user/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/arch/x86_64/Makefile" ;;
"executer/kernel/mcctrl/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/Makefile" ;; "executer/kernel/mcctrl/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/Makefile" ;;
"executer/kernel/mcctrl/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/arch/x86_64/Makefile" ;; "executer/kernel/mcctrl/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/arch/x86_64/Makefile" ;;
"executer/kernel/mcoverlayfs/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/Makefile" ;; "executer/kernel/mcoverlayfs/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/Makefile" ;;

View File

@ -356,6 +356,7 @@ AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([ AC_CONFIG_FILES([
Makefile Makefile
executer/user/Makefile executer/user/Makefile
executer/user/arch/x86_64/Makefile
executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/Makefile
executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile
executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/Makefile

View File

@ -55,6 +55,16 @@
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915 #define MCEXEC_UP_SYS_UMOUNT 0x30a02915
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916 #define MCEXEC_UP_SYS_UNSHARE 0x30a02916
#define MCEXEC_UP_UTIL_THREAD1 0x30a02920
#define MCEXEC_UP_UTIL_THREAD2 0x30a02921
#define MCEXEC_UP_SIG_THREAD 0x30a02922
#define MCEXEC_UP_SWITCH_THREAD 0x30a02923
#define MCEXEC_UP_SYSCALL_THREAD 0x30a02924
#define MCEXEC_UP_TERMINATE_THREAD 0x30a02925
#define MCEXEC_UP_COPY_FROM_MCK 0x30a03000
#define MCEXEC_UP_COPY_TO_MCK 0x30a03001
#define MCEXEC_UP_DEBUG_LOG 0x40000000 #define MCEXEC_UP_DEBUG_LOG 0x40000000
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0 #define MCEXEC_UP_TRANSFER_TO_REMOTE 0

View File

@ -196,3 +196,65 @@ out:
ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso)); ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso));
return; return;
} /* get_vdso_info() */ } /* get_vdso_info() */
void *
get_user_sp(void)
{
unsigned long usp;
asm volatile("movq %%gs:0xaf80, %0" : "=r" (usp));
return (void *)usp;
}
void
set_user_sp(void *usp)
{
asm volatile("movq %0, %%gs:0xaf80" :: "r" (usp));
}
struct trans_uctx {
volatile int cond;
int fregsize;
unsigned long rax;
unsigned long rbx;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
unsigned long rbp;
unsigned long r8;
unsigned long r9;
unsigned long r10;
unsigned long r11;
unsigned long r12;
unsigned long r13;
unsigned long r14;
unsigned long r15;
unsigned long rflags;
unsigned long rip;
unsigned long rsp;
unsigned long fs;
};
void
restore_fs(unsigned long fs)
{
wrmsrl(MSR_FS_BASE, fs);
}
void
save_fs_ctx(void *ctx)
{
struct trans_uctx *tctx = ctx;
rdmsrl(MSR_FS_BASE, tctx->fs);
}
unsigned long
get_fs_ctx(void *ctx)
{
struct trans_uctx *tctx = ctx;
return tctx->fs;
}

View File

@ -38,6 +38,9 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/delay.h> #include <asm/delay.h>
#include <asm/io.h> #include <asm/io.h>
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <trace/events/sched.h>
#include "../../../config.h" #include "../../../config.h"
#include "mcctrl.h" #include "mcctrl.h"
#include <ihk/ihk_host_user.h> #include <ihk/ihk_host_user.h>
@ -85,6 +88,10 @@ int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount;
//extern struct mcctrl_channel *channels; //extern struct mcctrl_channel *channels;
int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu); int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu);
int syscall_backward(struct mcctrl_usrdata *, int, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long *);
long mcexec_switch_thread(ihk_os_t os, unsigned long code, struct file *file);
static long mcexec_prepare_image(ihk_os_t os, static long mcexec_prepare_image(ihk_os_t os,
struct program_load_desc * __user udesc) struct program_load_desc * __user udesc)
@ -305,13 +312,38 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
#endif #endif
} }
//extern unsigned long last_thread_exec; struct mcos_handler_info {
struct release_handler_info {
int pid; int pid;
int cpu; int cpu;
struct mcctrl_usrdata *ud;
struct file *file;
}; };
struct mcos_handler_info;
static struct host_thread *host_threads;
DEFINE_RWLOCK(host_thread_lock);
struct host_thread {
struct host_thread *next;
struct mcos_handler_info *handler;
int pid;
int tid;
unsigned long usp;
unsigned long lfs;
unsigned long rfs;
};
struct mcos_handler_info *new_mcos_handler_info(ihk_os_t os, struct file *file)
{
struct mcos_handler_info *info;
info = kmalloc(sizeof(struct mcos_handler_info), GFP_KERNEL);
memset(info, '\0', sizeof(struct mcos_handler_info));
info->ud = ihk_host_os_get_usrdata(os);
info->file = file;
return info;
}
static long mcexec_debug_log(ihk_os_t os, unsigned long arg) static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
{ {
struct ikc_scd_packet isp; struct ikc_scd_packet isp;
@ -326,11 +358,43 @@ static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
int mcexec_close_exec(ihk_os_t os); int mcexec_close_exec(ihk_os_t os);
int mcexec_destroy_per_process_data(ihk_os_t os); int mcexec_destroy_per_process_data(ihk_os_t os);
#if 0
static unsigned long mod_sys_call_table(int num, unsigned long func)
{
static unsigned long *sys_call_table = NULL;
unsigned long oldval;
if (!sys_call_table) {
sys_call_table =
(unsigned long *)kallsyms_lookup_name("sys_call_table");
if (!sys_call_table) {
printk("sys_call_table not found\n");
return -ENOENT;
}
}
oldval = sys_call_table[num];
if (func && sys_call_table[num] != func) {
sys_call_table[num] = func;
}
return oldval;
}
#endif
static void release_handler(ihk_os_t os, void *param) static void release_handler(ihk_os_t os, void *param)
{ {
struct release_handler_info *info = param; struct mcos_handler_info *info = param;
struct ikc_scd_packet isp; struct ikc_scd_packet isp;
int os_ind = ihk_host_os_get_index(os); int os_ind = ihk_host_os_get_index(os);
unsigned long flags;
struct host_thread *thread;
write_lock_irqsave(&host_thread_lock, flags);
for (thread = host_threads; thread; thread = thread->next) {
if (thread->handler == info) {
thread->handler = NULL;
}
}
write_unlock_irqrestore(&host_thread_lock, flags);
mcexec_close_exec(os); mcexec_close_exec(os);
@ -356,14 +420,15 @@ static long mcexec_newprocess(ihk_os_t os,
struct file *file) struct file *file)
{ {
struct newprocess_desc desc; struct newprocess_desc desc;
struct release_handler_info *info; struct mcos_handler_info *info;
if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) { if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) {
return -EFAULT; return -EFAULT;
} }
info = kmalloc(sizeof(struct release_handler_info), GFP_KERNEL); info = new_mcos_handler_info(os, file);
info->pid = desc.pid; info->pid = desc.pid;
ihk_os_register_release_handler(file, release_handler, info); ihk_os_register_release_handler(file, release_handler, info);
ihk_os_set_mcos_private_data(file, info);
return 0; return 0;
} }
@ -375,7 +440,7 @@ static long mcexec_start_image(ihk_os_t os,
struct ikc_scd_packet isp; struct ikc_scd_packet isp;
struct mcctrl_channel *c; struct mcctrl_channel *c;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct release_handler_info *info; struct mcos_handler_info *info;
desc = kmalloc(sizeof(*desc), GFP_KERNEL); desc = kmalloc(sizeof(*desc), GFP_KERNEL);
if (!desc) { if (!desc) {
@ -390,10 +455,11 @@ static long mcexec_start_image(ihk_os_t os,
return -EFAULT; return -EFAULT;
} }
info = kmalloc(sizeof(struct release_handler_info), GFP_KERNEL); info = new_mcos_handler_info(os, file);
info->pid = desc->pid; info->pid = desc->pid;
info->cpu = desc->cpu; info->cpu = desc->cpu;
ihk_os_register_release_handler(file, release_handler, info); ihk_os_register_release_handler(file, release_handler, info);
ihk_os_set_mcos_private_data(file, info);
c = usrdata->channels + desc->cpu; c = usrdata->channels + desc->cpu;
@ -937,7 +1003,6 @@ void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd)
for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; i++) { for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; i++) {
struct mcctrl_per_thread_data *ptd; struct mcctrl_per_thread_data *ptd;
struct mcctrl_per_thread_data *next; struct mcctrl_per_thread_data *next;
struct ikc_scd_packet *packet;
list_for_each_entry_safe(ptd, next, list_for_each_entry_safe(ptd, next,
ppd->per_thread_data_hash + i, hash) { ppd->per_thread_data_hash + i, hash) {
@ -1207,6 +1272,7 @@ retry_alloc:
ret = -EINVAL;; ret = -EINVAL;;
goto put_ppd_out; goto put_ppd_out;
} }
req->cpu = packet->ref;
ret = 0; ret = 0;
goto put_ppd_out; goto put_ppd_out;
@ -2015,6 +2081,551 @@ void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet)
} }
extern void *get_user_sp(void);
extern void set_user_sp(unsigned long);
extern void restore_fs(unsigned long fs);
extern void save_fs_ctx(void *);
extern unsigned long get_fs_ctx(void *);
long
mcexec_util_thread1(ihk_os_t os, unsigned long arg, struct file *file)
{
void **__user uparam = (void ** __user)arg;
void *param[6];
unsigned long p_rctx;
unsigned long phys;
void *__user u_rctx;
void *rctx;
int rc = 0;
unsigned long free_address;
unsigned long free_size;
unsigned long icurrent = (unsigned long)current;
if(copy_from_user(param, uparam, sizeof(void *) * 6)) {
return -EFAULT;
}
p_rctx = (unsigned long)param[0];
u_rctx = (void *__user)param[1];
free_address = (unsigned long)param[4];
free_size = (unsigned long)param[5];
phys = ihk_device_map_memory(ihk_os_to_dev(os), p_rctx, PAGE_SIZE);
#ifdef CONFIG_MIC
rctx = ioremap_wc(phys, PAGE_SIZE);
#else
rctx = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
#endif
if(copy_to_user(u_rctx, rctx, PAGE_SIZE) ||
copy_to_user((unsigned long *)(uparam + 3), &icurrent,
sizeof(unsigned long)))
rc = -EFAULT;
((unsigned long *)rctx)[0] = free_address;
((unsigned long *)rctx)[1] = free_size;
#ifdef CONFIG_MIC
iounmap(rctx);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), rctx, PAGE_SIZE);
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
return rc;
}
#if 0
static struct {
unsigned long org_futex;
unsigned long org_brk;
unsigned long org_clone;
unsigned long org_fork;
unsigned long org_vfork;
unsigned long org_gettid;
unsigned long org_mmap;
unsigned long org_munmap;
unsigned long org_mprotect;
unsigned long org_mremap;
unsigned long org_execve;
unsigned long org_exit_group;
unsigned long org_exit;
} org_syscalls;
#endif
static inline struct host_thread *get_host_thread(void)
{
int pid = task_tgid_vnr(current);
int tid = task_pid_vnr(current);
unsigned long flags;
struct host_thread *thread;
read_lock_irqsave(&host_thread_lock, flags);
for (thread = host_threads; thread; thread = thread->next)
if(thread->pid == pid && thread->tid == tid)
break;
read_unlock_irqrestore(&host_thread_lock, flags);
return thread;
}
#if 0
#define DEF_SYSCALL(f, v, n) \
static asmlinkage unsigned long f(unsigned long p1, unsigned long p2, \
unsigned long p3, unsigned long p4, unsigned long p5, \
unsigned long p6)\
{\
struct host_thread *thread = get_host_thread();\
\
if (thread) {\
unsigned long ret;\
int rc;\
\
rc = syscall_backward(thread->handler->ud, n, p1, p2, p3, p4, \
p5, p6, &ret);\
if (rc < 0)\
return rc;\
return ret;\
}\
\
return ((asmlinkage unsigned long (*)(unsigned long, unsigned long,\
unsigned long, unsigned long, unsigned long, unsigned long))\
org_syscalls.v)(p1, p2, p3, p4, p5, p6);\
}
#define BAD_SYSCALL(f, v) \
static asmlinkage unsigned long f(unsigned long p1, unsigned long p2, \
unsigned long p3, unsigned long p4, unsigned long p5, \
unsigned long p6)\
{\
struct host_thread *thread = get_host_thread();\
\
if (thread) {\
return -ENOSYS;\
}\
\
return ((asmlinkage unsigned long (*)(unsigned long, unsigned long,\
unsigned long, unsigned long, unsigned long, unsigned long))\
org_syscalls.v)(p1, p2, p3, p4, p5, p6);\
}
DEF_SYSCALL(mod_futex, org_futex, __NR_futex)
DEF_SYSCALL(mod_brk, org_brk, __NR_brk)
DEF_SYSCALL(mod_gettid, org_gettid, __NR_gettid)
DEF_SYSCALL(mod_mmap, org_mmap, __NR_mmap)
DEF_SYSCALL(mod_munmap, org_munmap, __NR_munmap)
DEF_SYSCALL(mod_mremap, org_mremap, __NR_mremap)
DEF_SYSCALL(mod_mprotect, org_mprotect, __NR_mprotect)
BAD_SYSCALL(mod_clone, org_clone)
BAD_SYSCALL(mod_fork, org_fork)
BAD_SYSCALL(mod_vfork, org_vfork)
BAD_SYSCALL(mod_execve, org_execve)
static asmlinkage unsigned long mod_exit(int exit_status)
{
struct host_thread *thread = get_host_thread();
if (thread) {
unsigned long code = (exit_status & 255) << 8;
ihk_os_t os = thread->handler->ud->os;
struct file *file = thread->handler->file;
mcexec_switch_thread(os, code, file);
return 0;
}
return ((asmlinkage unsigned long (*)(int))
org_syscalls.org_exit)(exit_status);
}
static asmlinkage unsigned long mod_exit_group(int exit_status)
{
struct host_thread *thread = get_host_thread();
if (thread) {
unsigned long code = (exit_status & 255) << 8;
ihk_os_t os = thread->handler->ud->os;
struct file *file = thread->handler->file;
code |= 0x100000000;
mcexec_switch_thread(os, code, file);
return 0;
}
return ((asmlinkage unsigned long (*)(int))
org_syscalls.org_exit_group)(exit_status);
}
static void save_syscalls(void)
{
#define SAVE_SYSCALL(v, f, n) \
do { \
unsigned long org; \
if (org_syscalls.v == 0L && \
(org = mod_sys_call_table(n, 0L)) != (unsigned long)f) \
org_syscalls.v = org; \
} while (0)
SAVE_SYSCALL(org_futex, mod_futex, __NR_futex);
SAVE_SYSCALL(org_brk, mod_brk, __NR_brk);
SAVE_SYSCALL(org_clone, mod_clone, __NR_clone);
SAVE_SYSCALL(org_fork, mod_fork, __NR_fork);
SAVE_SYSCALL(org_vfork, mod_vfork, __NR_vfork);
SAVE_SYSCALL(org_gettid, mod_gettid, __NR_gettid);
SAVE_SYSCALL(org_mmap, mod_mmap, __NR_mmap);
SAVE_SYSCALL(org_munmap, mod_munmap, __NR_munmap);
SAVE_SYSCALL(org_mprotect, mod_mprotect, __NR_mprotect);
SAVE_SYSCALL(org_mremap, mod_mremap, __NR_mremap);
SAVE_SYSCALL(org_execve, mod_execve, __NR_execve);
SAVE_SYSCALL(org_exit_group, mod_exit_group, __NR_exit_group);
SAVE_SYSCALL(org_exit, mod_exit, __NR_exit);
}
static void mod_syscalls(void)
{
#define MOD_SYSCALL(f, n) \
do { \
mod_sys_call_table(n, (unsigned long)f); \
} while (0)
MOD_SYSCALL(mod_futex, __NR_futex);
MOD_SYSCALL(mod_brk, __NR_brk);
MOD_SYSCALL(mod_clone, __NR_clone);
MOD_SYSCALL(mod_fork, __NR_fork);
MOD_SYSCALL(mod_vfork, __NR_vfork);
MOD_SYSCALL(mod_gettid, __NR_gettid);
MOD_SYSCALL(mod_mmap, __NR_mmap);
MOD_SYSCALL(mod_munmap, __NR_munmap);
MOD_SYSCALL(mod_mprotect, __NR_mprotect);
MOD_SYSCALL(mod_mremap, __NR_mremap);
MOD_SYSCALL(mod_execve, __NR_execve);
MOD_SYSCALL(mod_exit_group, __NR_exit_group);
MOD_SYSCALL(mod_exit, __NR_exit);
}
static void restore_syscalls(void)
{
#define RESTORE_SYSCALL(v, n) \
do { \
mod_sys_call_table(n, org_syscalls.v); \
} while (0)
RESTORE_SYSCALL(org_futex, __NR_futex);
RESTORE_SYSCALL(org_brk, __NR_brk);
RESTORE_SYSCALL(org_clone, __NR_clone);
RESTORE_SYSCALL(org_fork, __NR_fork);
RESTORE_SYSCALL(org_vfork, __NR_vfork);
RESTORE_SYSCALL(org_gettid, __NR_gettid);
RESTORE_SYSCALL(org_mmap, __NR_mmap);
RESTORE_SYSCALL(org_munmap, __NR_munmap);
RESTORE_SYSCALL(org_mprotect, __NR_mprotect);
RESTORE_SYSCALL(org_mremap, __NR_mremap);
RESTORE_SYSCALL(org_execve, __NR_execve);
RESTORE_SYSCALL(org_exit_group, __NR_exit_group);
RESTORE_SYSCALL(org_exit, __NR_exit);
}
static void process_exit_prober(void *data, struct task_struct *tsk)
{
struct mcos_handler_info *info;
unsigned long flags;
struct host_thread *thread;
struct host_thread *prev;
int pid = task_tgid_vnr(tsk);
int tid = task_pid_vnr(tsk);
int code;
struct ikc_scd_packet *packet;
struct mcctrl_usrdata *usrdata = NULL;
struct mcctrl_per_proc_data *ppd = NULL;
if (!host_threads) {
return;
}
write_lock_irqsave(&host_thread_lock, flags);
for (prev = NULL, thread = host_threads; thread;
prev = thread, thread = thread->next)
if(thread->pid == pid && thread->tid == tid)
break;
if (!thread) {
write_unlock_irqrestore(&host_thread_lock, flags);
return;
}
info = thread->handler;
if (!info)
goto err;
usrdata = info->ud;
code = tsk->exit_code;
ppd = mcctrl_get_per_proc_data(usrdata, pid);
if (!ppd) {
kprintf("%s: ERROR: no packet registered for TID %d\n",
__FUNCTION__, task_pid_vnr(current));
goto err;
}
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, tsk);
if (!packet) {
goto err;
}
mcctrl_delete_per_thread_data(ppd, tsk);
__return_syscall(usrdata->os, packet, code, tid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
(usrdata->channels + packet->ref)->c);
err:
if (ppd)
mcctrl_put_per_proc_data(ppd);
if (prev)
prev->next = thread->next;
else
host_threads = thread->next;
write_unlock_irqrestore(&host_thread_lock, flags);
kfree(thread);
read_lock_irqsave(&host_thread_lock, flags);
if (!host_threads) {
restore_syscalls();
unregister_trace_sched_process_exit(process_exit_prober, NULL);
}
read_unlock_irqrestore(&host_thread_lock, flags);
}
#endif
long
mcexec_util_thread2(ihk_os_t os, unsigned long arg, struct file *file)
{
void *usp = get_user_sp();
struct mcos_handler_info *info;
struct host_thread *thread;
unsigned long flags;
void **__user param = (void **__user )arg;
void *__user rctx = (void *__user)param[1];
void *__user lctx = (void *__user)param[2];
save_fs_ctx(lctx);
info = ihk_os_get_mcos_private_data(file);
thread = kmalloc(sizeof(struct host_thread), GFP_KERNEL);
memset(thread, '\0', sizeof(struct host_thread));
thread->pid = task_tgid_vnr(current);
thread->tid = task_pid_vnr(current);
thread->usp = (unsigned long)usp;
thread->lfs = get_fs_ctx(lctx);
thread->rfs = get_fs_ctx(rctx);
thread->handler = info;
write_lock_irqsave(&host_thread_lock, flags);
#if 0
if (!host_threads) {
save_syscalls();
register_trace_sched_process_exit(process_exit_prober, NULL);
}
#endif
thread->next = host_threads;
host_threads = thread;
write_unlock_irqrestore(&host_thread_lock, flags);
#if 0
mod_syscalls();
#endif
return 0;
}
long
mcexec_sig_thread(ihk_os_t os, unsigned long arg, struct file *file)
{
int tid = task_pid_vnr(current);
int pid = task_tgid_vnr(current);
unsigned long flags;
struct host_thread *thread;
read_lock_irqsave(&host_thread_lock, flags);
for (thread = host_threads; thread; thread = thread->next)
if(thread->pid == pid && thread->tid == tid)
break;
read_unlock_irqrestore(&host_thread_lock, flags);
if (thread) {
if (arg)
restore_fs(thread->lfs);
else
restore_fs(thread->rfs);
return 0;
}
return -EINVAL;
}
long
mcexec_switch_thread(ihk_os_t os, unsigned long code, struct file *file)
{
int tid = task_pid_vnr(current);
int pid = task_tgid_vnr(current);
unsigned long flags;
struct host_thread *thread;
struct host_thread *prev;
struct ikc_scd_packet *packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd;
write_lock_irqsave(&host_thread_lock, flags);
for (prev = NULL, thread = host_threads; thread;
prev = thread, thread = thread->next)
if(thread->tid == tid)
break;
if (!thread) {
write_unlock_irqrestore(&host_thread_lock, flags);
return -EINVAL;
}
ppd = mcctrl_get_per_proc_data(usrdata, pid);
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
goto err;
}
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd,
current);
if (!packet) {
kprintf("%s: ERROR: no packet registered for TID %d\n",
__FUNCTION__, tid);
goto err;
}
mcctrl_delete_per_thread_data(ppd, current);
__return_syscall(usrdata->os, packet, code, tid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
(usrdata->channels + packet->ref)->c);
err:
if(ppd)
mcctrl_put_per_proc_data(ppd);
if (prev)
prev->next = thread->next;
else
host_threads = thread->next;
write_unlock_irqrestore(&host_thread_lock, flags);
kfree(thread);
#if 0
read_lock_irqsave(&host_thread_lock, flags);
if (!host_threads) {
restore_syscalls();
unregister_trace_sched_process_exit(process_exit_prober, NULL);
}
read_unlock_irqrestore(&host_thread_lock, flags);
#endif
set_user_sp(thread->usp);
return 0;
}
long
mcexec_terminate_thread(ihk_os_t os, unsigned long *param, struct file *file)
{
int pid = param[0];
int tid = param[1];
struct task_struct *tsk = (struct task_struct *)param[3];
unsigned long flags;
struct host_thread *thread;
struct host_thread *prev;
struct ikc_scd_packet *packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd;
printk("mcexec_terminate_thread\n");
write_lock_irqsave(&host_thread_lock, flags);
for (prev = NULL, thread = host_threads; thread;
prev = thread, thread = thread->next) {
printk("thread tid=%d\n", thread->tid);
if(thread->tid == tid)
break;
}
if (!thread) {
write_unlock_irqrestore(&host_thread_lock, flags);
printk("mcexec_terminate_thread no thread pid=%d tid=%d\n", pid, tid);
return -EINVAL;
}
ppd = mcctrl_get_per_proc_data(usrdata, pid);
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, pid);
goto err;
}
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, tsk);
if (!packet) {
kprintf("%s: ERROR: no packet registered for TID %d\n",
__FUNCTION__, tid);
goto err;
}
mcctrl_delete_per_thread_data(ppd, tsk);
__return_syscall(usrdata->os, packet, param[2], tid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
(usrdata->channels + packet->ref)->c);
err:
if(ppd)
mcctrl_put_per_proc_data(ppd);
if (prev)
prev->next = thread->next;
else
host_threads = thread->next;
write_unlock_irqrestore(&host_thread_lock, flags);
kfree(thread);
return 0;
}
long
mcexec_syscall_thread(ihk_os_t os, unsigned long arg, struct file *file)
{
struct syscall_struct {
int number;
unsigned long args[6];
unsigned long ret;
};
struct syscall_struct param;
struct syscall_struct __user *uparam =
(struct syscall_struct __user *)arg;
int rc;
if (copy_from_user(&param, uparam, sizeof param)) {
return -EFAULT;
}
if (param.number == __NR_exit ||
param.number == __NR_exit_group) {
unsigned long code = (param.args[0] & 255) << 8;
if (param.number == __NR_exit_group)
code |= 0x100000000L;
mcexec_switch_thread(os, code, file);
return 0;
}
rc = syscall_backward(ihk_host_os_get_usrdata(os), param.number,
param.args[0], param.args[1], param.args[2],
param.args[3], param.args[4], param.args[5],
&param.ret);
if (copy_to_user(&uparam->ret, &param.ret, sizeof(unsigned long))) {
return -EFAULT;
}
return rc;
}
long
mcexec_copy_from_mck(ihk_os_t os, unsigned long *arg)
{
void __user *to = (void *)arg[0];
void *from = phys_to_virt(arg[1]);
long len = arg[2];
if (copy_to_user(to, from, len)) {
return -EFAULT;
}
return 0;
}
long
mcexec_copy_to_mck(ihk_os_t os, unsigned long *arg)
{
void *to = phys_to_virt(arg[0]);
void __user *from = (void *)arg[1];
long len = arg[2];
if (copy_from_user(to, from, len)) {
return -EFAULT;
}
return 0;
}
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
struct file *file) struct file *file)
{ {
@ -2087,6 +2698,30 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
case MCEXEC_UP_SYS_UNSHARE: case MCEXEC_UP_SYS_UNSHARE:
return mcexec_sys_unshare((struct sys_unshare_desc *)arg); return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
case MCEXEC_UP_UTIL_THREAD1:
return mcexec_util_thread1(os, arg, file);
case MCEXEC_UP_UTIL_THREAD2:
return mcexec_util_thread2(os, arg, file);
case MCEXEC_UP_SIG_THREAD:
return mcexec_sig_thread(os, arg, file);
case MCEXEC_UP_SWITCH_THREAD:
return mcexec_switch_thread(os, arg, file);
case MCEXEC_UP_SYSCALL_THREAD:
return mcexec_syscall_thread(os, arg, file);
case MCEXEC_UP_TERMINATE_THREAD:
return mcexec_terminate_thread(os, (unsigned long *)arg, file);
case MCEXEC_UP_COPY_FROM_MCK:
return mcexec_copy_from_mck(os, (unsigned long *)arg);
case MCEXEC_UP_COPY_TO_MCK:
return mcexec_copy_to_mck(os, (unsigned long *)arg);
case MCEXEC_UP_DEBUG_LOG: case MCEXEC_UP_DEBUG_LOG:
return mcexec_debug_log(os, arg); return mcexec_debug_log(os, arg);

View File

@ -81,7 +81,15 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTIL_THREAD1, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTIL_THREAD2, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SIG_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SWITCH_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYSCALL_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_TERMINATE_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_COPY_FROM_MCK, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_COPY_TO_MCK, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_GET, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_GET, .func = mcctrl_ioctl },

View File

@ -278,6 +278,174 @@ static int __notify_syscall_requester(ihk_os_t os, struct ikc_scd_packet *packet
return ret; return ret;
} }
long syscall_backward(struct mcctrl_usrdata *usrdata, int num,
unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
unsigned long arg5, unsigned long arg6,
unsigned long *ret)
{
struct ikc_scd_packet *packet;
struct syscall_request *req;
struct syscall_response *resp;
unsigned long syscall_ret;
struct wait_queue_head_list_node *wqhln;
unsigned long irqflags;
struct mcctrl_per_proc_data *ppd;
unsigned long phys;
struct syscall_request _request[2];
struct syscall_request *request;
if (((unsigned long)_request ^ (unsigned long)(_request + 1)) &
~(PAGE_SIZE -1))
request = _request + 1;
else
request = _request;
request->number = num;
request->args[0] = arg1;
request->args[1] = arg2;
request->args[2] = arg3;
request->args[3] = arg4;
request->args[4] = arg5;
request->args[5] = arg6;
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
}
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
if (!packet) {
syscall_ret = -ENOENT;
printk("%s: no packet registered for TID %d\n",
__FUNCTION__, task_pid_vnr(current));
goto out_put_ppd;
}
req = &packet->req;
/* Map response structure */
phys = ihk_device_map_memory(ihk_os_to_dev(usrdata->os),
packet->resp_pa, sizeof(*resp));
resp = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os),
phys, sizeof(*resp), NULL, 0);
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
/* Prepare per-thread wait queue head */
wqhln->task = current;
/* Save the TID explicitly, because mcexec_syscall(), where the request
* will be matched, is in IRQ context and can't call task_pid_vnr() */
wqhln->rtid = task_pid_vnr(current);
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* Add to exact list */
list_add_tail(&wqhln->list, &ppd->wq_list_exact);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
resp->stid = task_pid_vnr(current);
resp->fault_address = virt_to_phys(request);
#define STATUS_IN_PROGRESS 0
#define STATUS_SYSCALL 4
req->valid = 0;
if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) {
printk("%s: WARNING: failed to notify PID %d\n",
__FUNCTION__, packet->pid);
}
mb();
resp->status = STATUS_SYSCALL;
dprintk("%s: tid: %d, syscall: %d SLEEPING\n",
__FUNCTION__, task_pid_vnr(current), num);
/* wait for response */
syscall_ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* Remove per-thread wait queue head */
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
dprintk("%s: tid: %d, syscall: %d WOKEN UP\n",
__FUNCTION__, task_pid_vnr(current), num);
if (syscall_ret) {
kfree(wqhln);
goto out;
}
else {
unsigned long phys2;
struct syscall_response *resp2;
/* Update packet reference */
packet = wqhln->packet;
req = &packet->req;
phys2 = ihk_device_map_memory(ihk_os_to_dev(usrdata->os),
packet->resp_pa, sizeof(*resp));
resp2 = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os),
phys2, sizeof(*resp), NULL, 0);
if (resp != resp2) {
resp = resp2;
phys = phys2;
printk("%s: updated new remote PA for resp\n", __FUNCTION__);
}
}
if (!req->valid) {
printk("%s:not valid\n", __FUNCTION__);
}
req->valid = 0;
/* check result */
if (req->number != __NR_mmap) {
printk("%s:unexpected response. %lx %lx\n",
__FUNCTION__, req->number, req->args[0]);
syscall_ret = -EIO;
goto out;
}
#define PAGER_REQ_RESUME 0x0101
else if (req->args[0] != PAGER_REQ_RESUME) {
resp->ret = pager_call(usrdata->os, (void *)req);
if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) {
printk("%s: WARNING: failed to notify PID %d\n",
__FUNCTION__, packet->pid);
}
mb();
}
else {
*ret = req->args[1];
}
kfree(wqhln);
syscall_ret = 0;
out:
ihk_device_unmap_virtual(ihk_os_to_dev(usrdata->os), resp, sizeof(*resp));
ihk_device_unmap_memory(ihk_os_to_dev(usrdata->os), phys, sizeof(*resp));
out_put_ppd:
dprintk("%s: tid: %d, syscall: %d, reason: %lu, syscall_ret: %d\n",
__FUNCTION__, task_pid_vnr(current), num, reason, syscall_ret);
mcctrl_put_per_proc_data(ppd);
return syscall_ret;
}
static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason)
{ {
struct ikc_scd_packet *packet; struct ikc_scd_packet *packet;
@ -598,7 +766,7 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
/* Look up per-process structure */ /* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) { if (!ppd) {
@ -608,6 +776,8 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (!ppd) { if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n", kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current)); __FUNCTION__, task_tgid_vnr(current));
printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
return -EINVAL; return -EINVAL;
} }
@ -759,11 +929,11 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u
original = override_creds(promoted); original = override_creds(promoted);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) #if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
start = vm_mmap_pgoff(file, start, end, start = vm_mmap_pgoff(file, start, end, PROT_READ|PROT_WRITE|PROT_EXEC,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0); MAP_FIXED|MAP_SHARED, 0);
#else #else
start = vm_mmap(file, start, end, start = vm_mmap(file, start, end, PROT_READ|PROT_WRITE|PROT_EXEC,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0); MAP_FIXED|MAP_SHARED, 0);
#endif #endif
revert_creds(original); revert_creds(original);

View File

@ -1,18 +1,22 @@
CC=@CC@ CC=@CC@
BINDIR=@BINDIR@ BINDIR=@BINDIR@
prefix=@prefix@
exec_prefix=@exec_prefix@
LIBDIR=@libdir@
MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@ MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@
KDIR ?= @KDIR@ KDIR ?= @KDIR@
CFLAGS=-Wall -O -I. CFLAGS=-Wall -O -I. -Iarch/${ARCH}
VPATH=@abs_srcdir@ VPATH=@abs_srcdir@
TARGET=mcexec libsched_yield TARGET=mcexec libsched_yield
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair @uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
LIBS=@LIBS@ LIBS=@LIBS@
ARCH=@ARCH@
IHKDIR ?= $(VPATH)/../../../ihk/linux/include/ IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
all: $(TARGET) all: $(TARGET)
mcexec: mcexec.c mcexec: mcexec.c libmcexec.a
$(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -lrt -lnuma -pthread -o $@ $^ $(EXTRA_OBJS) $(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -DLIBDIR=\"$(LIBDIR)\" -fPIE -pie -L. -lmcexec -lrt -lnuma -pthread -o $@ $^ $(EXTRA_OBJS)
eclair: eclair.c eclair: eclair.c
$(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS) $(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS)
@ -20,12 +24,17 @@ eclair: eclair.c
libsched_yield: libsched_yield.c libsched_yield: libsched_yield.c
$(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl $(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl
clean: libmcexec.a::
(cd arch/${ARCH}; make)
clean::
(cd arch/${ARCH}; make clean)
$(RM) $(TARGET) *.o $(RM) $(TARGET) *.o
.PHONY: all clean install .PHONY: all clean install
install: install::
(cd arch/${ARCH}; make install)
mkdir -p -m 755 $(BINDIR) mkdir -p -m 755 $(BINDIR)
install -m 755 mcexec $(BINDIR) install -m 755 mcexec $(BINDIR)
mkdir -p -m 755 $(MCKERNEL_LIBDIR) mkdir -p -m 755 $(MCKERNEL_LIBDIR)

View File

@ -0,0 +1,113 @@
#ifndef ARCH_ARGS_H
#define ARCH_ARGS_H
typedef struct user_regs_struct syscall_args;
static inline int
get_syscall_args(int pid, syscall_args *args)
{
return ptrace(PTRACE_GETREGS, pid, NULL, args);
}
static inline int
set_syscall_args(int pid, syscall_args *args)
{
return ptrace(PTRACE_SETREGS, pid, NULL, args);
}
static inline unsigned long
get_syscall_number(syscall_args *args)
{
return args->orig_rax;
}
static inline unsigned long
get_syscall_return(syscall_args *args)
{
return args->rax;
}
static inline unsigned long
get_syscall_arg1(syscall_args *args)
{
return args->rdi;
}
static inline unsigned long
get_syscall_arg2(syscall_args *args)
{
return args->rsi;
}
static inline unsigned long
get_syscall_arg3(syscall_args *args)
{
return args->rdx;
}
static inline unsigned long
get_syscall_arg4(syscall_args *args)
{
return args->r10;
}
static inline unsigned long
get_syscall_arg5(syscall_args *args)
{
return args->r8;
}
static inline unsigned long
get_syscall_arg6(syscall_args *args)
{
return args->r9;
}
static inline void
set_syscall_number(syscall_args *args, unsigned long value)
{
args->orig_rax = value;
}
static inline void
set_syscall_return(syscall_args *args, unsigned long value)
{
args->rax = value;
}
static inline void
set_syscall_arg1(syscall_args *args, unsigned long value)
{
args->rdi = value;
}
static inline void
set_syscall_arg2(syscall_args *args, unsigned long value)
{
args->rsi = value;
}
static inline void
set_syscall_arg3(syscall_args *args, unsigned long value)
{
args->rdx = value;
}
static inline void
set_syscall_arg4(syscall_args *args, unsigned long value)
{
args->r10 = value;
}
static inline void
set_syscall_arg5(syscall_args *args, unsigned long value)
{
args->r8 = value;
}
static inline void
set_syscall_arg6(syscall_args *args, unsigned long value)
{
args->r9 = value;
}
#endif

View File

@ -0,0 +1,149 @@
/*
arg: rdi, rsi, rdx, rcx, r8, r9
ret: rax
rax syscall number
syscall: (rax:num) rdi rsi rdx r10 r8 r9 (rcx:ret addr)
fd, cmd, param
rdi: fd
rsi: cmd
rdx: param
rcx: save area
r8: new thread context
*/
.global switch_ctx
switch_ctx:
movq $0,0x00(%rcx)
movq %rax,0x8(%rcx)
movq %rbx,0x10(%rcx)
movq %rcx,0x18(%rcx)
movq %rdx,0x20(%rcx)
movq %rsi,0x28(%rcx)
movq %rdi,0x30(%rcx)
movq %rbp,0x38(%rcx)
movq %r8,0x40(%rcx)
movq %r9,0x48(%rcx)
movq %r10,0x50(%rcx)
movq %r11,0x58(%rcx)
movq %r12,0x60(%rcx)
movq %r13,0x68(%rcx)
movq %r14,0x70(%rcx)
movq %r15,0x78(%rcx)
pushfq
popq %rax
movq %rax,0x80(%rcx)
movq 0x00(%rsp),%rax
movq %rax,0x88(%rcx)
movq %rsp,0x90(%rcx)
movq %rcx,%r10
pushq %rcx
pushq %r8
pushq %rax
mov $0x10,%eax /* ioctl */
syscall
3:
popq %r8
popq %r8
popq %rcx
movq %r10,%rcx
cmp $0xfffffffffffff001,%eax
jae 1f
test %eax,%eax
jnz 2f
pushq %rax
movq $158,%rax /* arch_prctl */
movq $0x1002,%rdi /* ARCH_SET_FS */
movq 0x98(%r8),%rsi
syscall
popq %rax
movq 0x10(%r8),%rbx
movq 0x18(%r8),%rcx
movq 0x20(%r8),%rdx
movq 0x28(%r8),%rsi
movq 0x30(%r8),%rdi
movq 0x38(%r8),%rbp
movq 0x48(%r8),%r9
movq 0x50(%r8),%r10
movq 0x58(%r8),%r11
movq 0x60(%r8),%r12
movq 0x68(%r8),%r13
movq 0x70(%r8),%r14
movq 0x78(%r8),%r15
movq 0x80(%r8),%rax
pushq %rax
popfq
movq 0x90(%r8),%rsp
// movq 0x8(%r8),%rax /* for interrupts */
movq 0x40(%r8),%r8
movq $0,%rax /* ioctl return */
pushq %rcx
retq
1:
mov $0xffffffffffffffff,%eax
2:
pushq %rax
movq $158,%rax /* arch_prctl */
movq $0x1002,%rdi /* ARCH_SET_FS */
movq 0x98(%rcx),%rsi
syscall
popq %rax
movq 0x10(%rcx),%rbx
movq 0x28(%rcx),%rsi
movq 0x30(%rcx),%rdi
movq 0x38(%rcx),%rbp
movq 0x40(%rcx),%r8
movq 0x48(%rcx),%r9
movq 0x50(%rcx),%r10
movq 0x58(%rcx),%r11
movq 0x60(%rcx),%r12
movq 0x68(%rcx),%r13
movq 0x70(%rcx),%r14
movq 0x78(%rcx),%r15
movq 0x80(%rcx),%rdx
pushq %rdx
popfq
movq 0x20(%rcx),%rdx
movq 0x18(%rcx),%rcx
retq
/*
arg: rdi, rsi, rdx, rcx, r8, r9
ret: rax
unsigned long
compare_and_swap(unsigned long *addr, unsigned long old, unsigned long new);
rdi: addr
rsi: old
rdx: new
RET: old value
*/
.global compare_and_swap
compare_and_swap:
movq %rsi,%rax
lock
cmpxchgq %rdx,0(%rdi)
retq
/*
unsigned int
compare_and_swap_int(unsigned int *addr, unsigned int old, unsigned int new);
ret: old value
*/
.global compare_and_swap_int
compare_and_swap_int:
movl %esi,%eax
lock
cmpxchgl %edx,0(%rdi)
retq

3
executer/user/archdep.h Normal file
View File

@ -0,0 +1,3 @@
extern int switch_ctx(int fd, unsigned long cmd, void **param, void *lctx, void *rctx);
extern unsigned long compare_and_swap(unsigned long *addr, unsigned long old, unsigned long new);
extern unsigned int compare_and_swap_int(unsigned int *addr, unsigned int old, unsigned int new);

View File

@ -56,6 +56,7 @@
#include <sys/wait.h> #include <sys/wait.h>
#include <dirent.h> #include <dirent.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <sys/ptrace.h>
#include <pthread.h> #include <pthread.h>
#include <semaphore.h> #include <semaphore.h>
#include <signal.h> #include <signal.h>
@ -63,8 +64,12 @@
#include <sys/mount.h> #include <sys/mount.h>
#include <include/generated/uapi/linux/version.h> #include <include/generated/uapi/linux/version.h>
#include <sys/user.h> #include <sys/user.h>
#include <sys/prctl.h>
#include <asm/prctl.h>
#include "../include/uprotocol.h" #include "../include/uprotocol.h"
#include <getopt.h> #include <getopt.h>
#include "archdep.h"
#include "arch_args.h"
#include "../../config.h" #include "../../config.h"
#include <numa.h> #include <numa.h>
#include <numaif.h> #include <numaif.h>
@ -85,6 +90,8 @@
__VA_ARGS__);fflush(stderr);} __VA_ARGS__);fflush(stderr);}
#endif #endif
#undef DEBUG_UTI
#ifdef USE_SYSCALL_MOD_CALL #ifdef USE_SYSCALL_MOD_CALL
extern int mc_cmd_server_init(); extern int mc_cmd_server_init();
extern void mc_cmd_server_exit(); extern void mc_cmd_server_exit();
@ -131,6 +138,13 @@ struct sigfd {
struct sigfd *sigfdtop; struct sigfd *sigfdtop;
struct syscall_struct {
int number;
unsigned long args[6];
unsigned long ret;
};
#ifdef NCCS #ifdef NCCS
#undef NCCS #undef NCCS
#endif #endif
@ -145,7 +159,42 @@ struct kernel_termios {
cc_t c_cc[NCCS]; /* control characters */ cc_t c_cc[NCCS]; /* control characters */
}; };
int main_loop(int fd, int cpu, pthread_mutex_t *lock); #define UTI_FLAG_NUMA_SET (1ULL<<1) /* Indicates NUMA_SET is specified */
#define UTI_FLAG_SAME_NUMA_DOMAIN (1ULL<<2)
#define UTI_FLAG_DIFFERENT_NUMA_DOMAIN (1ULL<<3)
#define UTI_FLAG_SAME_L1 (1ULL<<4)
#define UTI_FLAG_SAME_L2 (1ULL<<5)
#define UTI_FLAG_SAME_L3 (1ULL<<6)
#define UTI_FLAG_DIFFERENT_L1 (1ULL<<7)
#define UTI_FLAG_DIFFERENT_L2 (1ULL<<8)
#define UTI_FLAG_DIFFERENT_L3 (1ULL<<9)
#define UTI_FLAG_EXCLUSIVE_CPU (1ULL<<10)
#define UTI_FLAG_CPU_INTENSIVE (1ULL<<11)
#define UTI_FLAG_HIGH_PRIORITY (1ULL<<12)
#define UTI_FLAG_NON_COOPERATIVE (1ULL<<13)
/* Linux default value is used */
#define UTI_MAX_NUMA_DOMAINS (1024)
typedef struct uti_attr {
/* UTI_CPU_SET environmental variable is used to denote the preferred
location of utility thread */
uint64_t numa_set[(UTI_MAX_NUMA_DOMAINS + sizeof(uint64_t) * 8 - 1) /
(sizeof(uint64_t) * 8)];
uint64_t flags; /* Representing location and behavior hints by bitmap */
} uti_attr_t;
struct kuti_attr {
long parent_cpuid;
struct uti_attr attr;
};
struct thread_data_s;
int main_loop(struct thread_data_s *);
static int mcosid; static int mcosid;
static int fd; static int fd;
@ -188,6 +237,11 @@ pid_t gettid(void)
return syscall(SYS_gettid); return syscall(SYS_gettid);
} }
int tgkill(int tgid, int tid, int sig)
{
return syscall(SYS_tgkill, tgid, tid, sig);
}
struct program_load_desc *load_elf(FILE *fp, char **interp_pathp) struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
{ {
Elf64_Ehdr hdr; Elf64_Ehdr hdr;
@ -893,13 +947,15 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat)
//#define NUM_HANDLER_THREADS 248 //#define NUM_HANDLER_THREADS 248
struct thread_data_s { struct thread_data_s {
struct thread_data_s *next;
pthread_t thread_id; pthread_t thread_id;
int fd;
int cpu; int cpu;
int ret; int ret;
pid_t tid; pid_t tid;
int terminate; int terminate;
int remote_tid; int remote_tid;
int remote_cpu;
int joined;
pthread_mutex_t *lock; pthread_mutex_t *lock;
pthread_barrier_t *init_ready; pthread_barrier_t *init_ready;
} *thread_data; } *thread_data;
@ -918,8 +974,9 @@ static void *main_loop_thread_func(void *arg)
td->tid = gettid(); td->tid = gettid();
td->remote_tid = -1; td->remote_tid = -1;
pthread_barrier_wait(&init_ready); if (td->init_ready)
td->ret = main_loop(td->fd, td->cpu, td->lock); pthread_barrier_wait(td->init_ready);
td->ret = main_loop(td);
return NULL; return NULL;
} }
@ -929,54 +986,91 @@ static void *main_loop_thread_func(void *arg)
void void
sendsig(int sig, siginfo_t *siginfo, void *context) sendsig(int sig, siginfo_t *siginfo, void *context)
{ {
pid_t pid = getpid(); pid_t pid;
pid_t tid = gettid(); pid_t tid;
int remote_tid; int remote_tid;
int i;
int cpu; int cpu;
struct signal_desc sigdesc; struct signal_desc sigdesc;
struct thread_data_s *tp;
int localthread;
if(siginfo->si_pid == pid && localthread = ioctl(fd, MCEXEC_UP_SIG_THREAD, 1);
siginfo->si_signo == LOCALSIG) pid = getpid();
return; tid = gettid();
if (siginfo->si_pid == pid &&
siginfo->si_signo == LOCALSIG)
goto out;
if(siginfo->si_signo == SIGCHLD) if (siginfo->si_signo == SIGCHLD)
return; goto out;
for(i = 0; i < ncpu; i++){ for (tp = thread_data; tp; tp = tp->next) {
if(siginfo->si_pid == pid && if (siginfo->si_pid == pid &&
thread_data[i].tid == tid){ tp->tid == tid) {
if(thread_data[i].terminate) if (tp->terminate)
return; goto out;
break; break;
} }
if(siginfo->si_pid != pid && if (siginfo->si_pid != pid &&
thread_data[i].remote_tid == tid){ tp->remote_tid == tid) {
if(thread_data[i].terminate) if (tp->terminate)
return; goto out;
break; break;
} }
} }
if(i != ncpu){ if (tp) {
remote_tid = thread_data[i].remote_tid; remote_tid = tp->remote_tid;
cpu = thread_data[i].cpu; cpu = tp->remote_cpu;
} }
else{ else {
cpu = 0; cpu = 0;
remote_tid = -1; remote_tid = -1;
} }
memset(&sigdesc, '\0', sizeof sigdesc); if (localthread) {
sigdesc.cpu = cpu; memset(&sigdesc, '\0', sizeof sigdesc);
sigdesc.pid = (int)pid; sigdesc.cpu = cpu;
sigdesc.tid = remote_tid; sigdesc.pid = (int)pid;
sigdesc.sig = sig; sigdesc.tid = remote_tid;
memcpy(&sigdesc.info, siginfo, 128); sigdesc.sig = sig;
if (ioctl(fd, MCEXEC_UP_SEND_SIGNAL, &sigdesc) != 0) { memcpy(&sigdesc.info, siginfo, 128);
perror("send_signal"); if (ioctl(fd, MCEXEC_UP_SEND_SIGNAL, &sigdesc) != 0) {
close(fd); close(fd);
exit(1); exit(1);
}
} }
else {
struct syscall_struct param;
int rc;
param.number = SYS_rt_sigaction;
param.args[0] = sig;
rc = ioctl(fd, MCEXEC_UP_SYSCALL_THREAD, &param);
if (rc == -1);
else if (param.ret == (unsigned long)SIG_IGN);
else if (param.ret == (unsigned long)SIG_DFL) {
if (sig != SIGCHLD && sig != SIGURG && sig != SIGCONT) {
signal(sig, SIG_DFL);
kill(getpid(), sig);
for(;;)
sleep(1);
#if 0
ioctl(fd, MCEXEC_UP_SWITCH_THREAD,
0x100000000 | sig);
pthread_exit(NULL);
#endif
}
}
else {
ioctl(fd, MCEXEC_UP_SIG_THREAD, 0);
((void (*)(int, siginfo_t *, void *))param.ret)(sig,
siginfo, context);
ioctl(fd, MCEXEC_UP_SIG_THREAD, 1);
}
}
out:
if (!localthread)
ioctl(fd, MCEXEC_UP_SIG_THREAD, 0);
} }
long long
@ -1137,7 +1231,29 @@ void init_sigaction(void)
sigaction(i, &act, NULL); sigaction(i, &act, NULL);
} }
} }
} }
static int max_cpuid;
static int
create_worker_thread(pthread_barrier_t *init_ready)
{
struct thread_data_s *tp;
tp = malloc(sizeof(struct thread_data_s));
if (!tp)
return ENOMEM;
memset(tp, '\0', sizeof(struct thread_data_s));
tp->cpu = max_cpuid++;
tp->lock = &lock;
tp->init_ready = init_ready;
tp->terminate = 0;
tp->next = thread_data;
thread_data = tp;
return pthread_create(&tp->thread_id, NULL,
&main_loop_thread_func, tp);
}
void init_worker_threads(int fd) void init_worker_threads(int fd)
{ {
@ -1146,19 +1262,12 @@ void init_worker_threads(int fd)
pthread_mutex_init(&lock, NULL); pthread_mutex_init(&lock, NULL);
pthread_barrier_init(&init_ready, NULL, n_threads + 2); pthread_barrier_init(&init_ready, NULL, n_threads + 2);
max_cpuid = 0;
for (i = 0; i <= n_threads; ++i) { for (i = 0; i <= n_threads; ++i) {
int ret; int ret = create_worker_thread(&init_ready);
thread_data[i].fd = fd; if (ret) {
thread_data[i].cpu = i; printf("ERROR: creating syscall threads(%d)\n", ret);
thread_data[i].lock = &lock;
thread_data[i].init_ready = &init_ready;
thread_data[i].terminate = 0;
ret = pthread_create(&thread_data[i].thread_id, NULL,
&main_loop_thread_func, &thread_data[i]);
if (ret < 0) {
printf("ERROR: creating syscall threads\n");
exit(1); exit(1);
} }
} }
@ -1438,13 +1547,44 @@ void bind_mount_recursive(const char *root, char *prefix)
} }
#endif #endif
static void
join_all_threads()
{
struct thread_data_s *tp;
int live_thread;
do {
live_thread = 0;
for (tp = thread_data; tp; tp = tp->next) {
if (tp->joined)
continue;
live_thread = 1;
pthread_join(tp->thread_id, NULL);
tp->joined = 1;
}
} while (live_thread);
}
static int
opendev()
{
int f;
sprintf(dev, "/dev/mcos%d", mcosid);
/* Open OS chardev for ioctl() */
f = open(dev, O_RDWR);
if (f < 0) {
fprintf(stderr, "Error: Failed to open %s.\n", dev);
return -1;
}
fd = f;
return fd;
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
// int fd;
#if 0
int fdm;
long r;
#endif
struct program_load_desc *desc; struct program_load_desc *desc;
int envs_len; int envs_len;
char *envs; char *envs;
@ -1460,6 +1600,7 @@ int main(int argc, char **argv)
char path[1024]; char path[1024];
char *shell = NULL; char *shell = NULL;
char shell_path[1024]; char shell_path[1024];
int num = 0;
#ifdef USE_SYSCALL_MOD_CALL #ifdef USE_SYSCALL_MOD_CALL
__glob_argc = argc; __glob_argc = argc;
@ -1521,30 +1662,19 @@ int main(int argc, char **argv)
/* Determine OS device */ /* Determine OS device */
if (isdigit(*argv[optind])) { if (isdigit(*argv[optind])) {
mcosid = atoi(argv[optind]); num = atoi(argv[optind]);
++optind; ++optind;
} }
sprintf(dev, "/dev/mcos%d", mcosid);
/* No more arguments? */ /* No more arguments? */
if (optind >= argc) { if (optind >= argc) {
print_usage(argv); print_usage(argv);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
__dprintf("target_core: %d, device: %s, command: ", target_core, dev); mcosid = num;
for (i = optind; i < argc; ++i) { if (opendev() == -1)
__dprintf("%s ", argv[i]); exit(EXIT_FAILURE);
}
__dprintf("%s", "\n");
/* Open OS chardev for ioctl() */
fd = open(dev, O_RDWR);
if (fd < 0) {
fprintf(stderr, "Error: Failed to open %s.\n", dev);
return 1;
}
if (disable_sched_yield) { if (disable_sched_yield) {
char sched_yield_lib_path[PATH_MAX]; char sched_yield_lib_path[PATH_MAX];
@ -1567,7 +1697,6 @@ int main(int argc, char **argv)
/* Collect environment variables */ /* Collect environment variables */
envs_len = flatten_strings(-1, NULL, environ, &envs); envs_len = flatten_strings(-1, NULL, environ, &envs);
envs = envs;
#ifdef ENABLE_MCOVERLAYFS #ifdef ENABLE_MCOVERLAYFS
__dprintf("mcoverlay enable\n"); __dprintf("mcoverlay enable\n");
@ -1798,12 +1927,14 @@ int main(int argc, char **argv)
* TODO: fix signaling code to be independent of TIDs. * TODO: fix signaling code to be independent of TIDs.
* TODO: implement dynaic thread pool resizing. * TODO: implement dynaic thread pool resizing.
*/ */
#if 0
thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1)); thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1));
if (!thread_data) { if (!thread_data) {
fprintf(stderr, "error: allocating thread pool data\n"); fprintf(stderr, "error: allocating thread pool data\n");
return 1; return 1;
} }
memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1)); memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1));
#endif
#if 0 #if 0
fdm = open("/dev/fmem", O_RDWR); fdm = open("/dev/fmem", O_RDWR);
@ -1991,9 +2122,7 @@ int main(int argc, char **argv)
return 1; return 1;
} }
for (i = 0; i <= n_threads; ++i) { join_all_threads();
pthread_join(thread_data[i].thread_id, NULL);
}
return 0; return 0;
} }
@ -2145,18 +2274,339 @@ out:
} }
static void static void
kill_thread(unsigned long tid) kill_thread(unsigned long tid, int sig)
{ {
int i; struct thread_data_s *tp;
for (i = 0; i <= n_threads; ++i) { if (sig == 0)
if(thread_data[i].remote_tid == tid){ sig = LOCALSIG;
pthread_kill(thread_data[i].thread_id, LOCALSIG);
for (tp = thread_data; tp; tp = tp->next) {
if (tp->remote_tid == tid) {
pthread_kill(tp->thread_id, sig);
break; break;
} }
} }
} }
static int
samepage(void *a, void *b)
{
unsigned long aa = (unsigned long)a;
unsigned long bb = (unsigned long)b;
return (aa & PAGE_MASK) == (bb & PAGE_MASK);
}
#ifdef DEBUG_UTI
long syscalls[512];
static void
debug_sig(int s)
{
int i;
for (i = 0; i < 512; i++)
if (syscalls[i])
fprintf(stderr, "syscall %d called %ld\n", i,
syscalls[i]);
}
#endif
static int
create_tracer(void *wp, int mck_tid, unsigned long key)
{
int pid = getpid();
int tid = gettid();
int pfd[2];
int tpid;
int rc;
int st;
int sig = 0;
int i;
struct syscall_struct *param_top = NULL;
struct syscall_struct *param;
unsigned long code = 0;
int exited = 0;
int mode = 0;
if (pipe(pfd) == -1)
return -1;
tpid = fork();
if (tpid) {
struct timeval tv;
fd_set rfd;
if (tpid == -1)
return -1;
close(pfd[1]);
while ((rc = waitpid(tpid, &st, 0)) == -1 && errno == EINTR);
if (rc == -1 || !WIFEXITED(st) || WEXITSTATUS(st)) {
fprintf(stderr, "waitpid rc=%d st=%08x\n", rc, st);
return -ENOMEM;
}
FD_ZERO(&rfd);
FD_SET(pfd[0], &rfd);
tv.tv_sec = 1;
tv.tv_usec = 0;
while ((rc = select(pfd[0] + 1, &rfd, NULL, NULL, &tv)) == -1 &&
errno == EINTR);
if (rc == 0) {
close(pfd[0]);
return -ETIMEDOUT;
}
if (rc == -1) {
close(pfd[0]);
return -errno;
}
rc = read(pfd[0], &st, 1);
close(pfd[0]);
if (rc != 1) {
return -EAGAIN;
}
return 0;
}
close(pfd[0]);
tpid = fork();
if (tpid) {
if (tpid == -1) {
fprintf(stderr, "fork errno=%d\n", errno);
exit(1);
}
exit(0);
}
if (ptrace(PTRACE_ATTACH, tid, 0, 0) == -1) {
fprintf(stderr, "PTRACE_ATTACH errno=%d\n", errno);
exit(1);
}
waitpid(-1, &st, __WALL);
if (ptrace(PTRACE_SETOPTIONS, tid, 0, PTRACE_O_TRACESYSGOOD) == -1) {
fprintf(stderr, "PTRACE_SETOPTIONS errno=%d\n", errno);
exit(1);
}
write(pfd[1], " ", 1);
close(pfd[1]);
for (i = 0; i < 4096; i++)
if (i != fd && i != 2)
close(i);
open("/dev/null", O_RDONLY);
open("/dev/null", O_WRONLY);
// open("/dev/null", O_WRONLY);
for (i = 1; i <= 10; i++) {
param = (struct syscall_struct *)wp + i;
*(void **)param = param_top;
param_top = param;
}
memset(wp, '\0', sizeof(long));
fprintf(stderr, "tracer PID=%d\n", getpid());
#ifdef DEBUG_UTI
fprintf(stderr, "tracer PID=%d\n", getpid());
signal(SIGINT, debug_sig);
#endif
for (;;) {
ptrace(PTRACE_SYSCALL, tid, 0, sig);
sig = 0;
waitpid(-1, &st, __WALL);
if (WIFEXITED(st) || WIFSIGNALED(st)) {
unsigned long term_param[4];
term_param[0] = pid;
term_param[1] = tid;
term_param[3] = key;
code = st;
if (exited == 2 || // exit_group
WIFSIGNALED(st)) {
code |= 0x0000000100000000;
}
term_param[2] = code;
ioctl(fd, MCEXEC_UP_TERMINATE_THREAD, term_param);
break;
}
if (!WIFSTOPPED(st)) {
continue;
}
if (WSTOPSIG(st) & 0x80) { // syscall
syscall_args args;
get_syscall_args(tid, &args);
#ifdef DEBUG_UTI
if (get_syscall_return(&args) == -ENOSYS) {
if (get_syscall_number(&args) >= 0 &&
get_syscall_number(&args) < 512) {
syscalls[get_syscall_number(&args)]++;
}
}
#endif
if (get_syscall_number(&args) == __NR_ioctl &&
get_syscall_return(&args) == -ENOSYS &&
get_syscall_arg1(&args) == fd &&
get_syscall_arg2(&args) == MCEXEC_UP_SIG_THREAD) {
mode = get_syscall_arg3(&args);
}
if (mode) {
continue;
}
switch (get_syscall_number(&args)) {
case __NR_gettid:
set_syscall_number(&args, -1);
set_syscall_return(&args, mck_tid);
set_syscall_args(tid, &args);
continue;
case __NR_futex:
case __NR_brk:
case __NR_mmap:
case __NR_munmap:
case __NR_mprotect:
case __NR_mremap:
break;
case __NR_exit_group:
exited++;
case __NR_exit:
exited++;
continue;
case __NR_clone:
case __NR_fork:
case __NR_vfork:
case __NR_execve:
set_syscall_number(&args, -1);
set_syscall_args(tid, &args);
continue;
case __NR_ioctl:
param = (struct syscall_struct *)
get_syscall_arg3(&args);
if (get_syscall_return(&args) != -ENOSYS &&
get_syscall_arg1(&args) == fd &&
get_syscall_arg2(&args) ==
MCEXEC_UP_SYSCALL_THREAD &&
samepage(wp, param)) {
set_syscall_arg1(&args, param->args[0]);
set_syscall_arg2(&args, param->args[1]);
set_syscall_arg3(&args, param->args[2]);
set_syscall_arg4(&args, param->args[3]);
set_syscall_arg5(&args, param->args[4]);
set_syscall_arg6(&args, param->args[5]);
set_syscall_return(&args, param->ret);
*(void **)param = param_top;
param_top = param;
set_syscall_args(tid, &args);
}
continue;
default:
continue;
}
param = param_top;
if (!param) {
set_syscall_number(&args, -1);
set_syscall_return(&args, -ENOMEM);
}
else {
param_top = *(void **)param;
param->number = get_syscall_number(&args);
param->args[0] = get_syscall_arg1(&args);
param->args[1] = get_syscall_arg2(&args);
param->args[2] = get_syscall_arg3(&args);
param->args[3] = get_syscall_arg4(&args);
param->args[4] = get_syscall_arg5(&args);
param->args[5] = get_syscall_arg6(&args);
param->ret = -EINVAL;
set_syscall_number(&args, __NR_ioctl);
set_syscall_arg1(&args, fd);
set_syscall_arg2(&args,
MCEXEC_UP_SYSCALL_THREAD);
set_syscall_arg3(&args, (unsigned long)param);
}
set_syscall_args(tid, &args);
}
else { // signal
sig = WSTOPSIG(st) & 0x7f;
}
}
#ifdef DEBUG_UTI
fprintf(stderr, "offloaded thread called these syscalls\n");
debug_sig(0);
#endif
exit(0);
}
static void
util_thread_setaffinity(unsigned long pattr)
{
struct kuti_attr kattr;
unsigned long args[3];
args[0] = (unsigned long)&kattr;
args[1] = pattr;
args[2] = sizeof kattr;
if (ioctl(fd, MCEXEC_UP_COPY_FROM_MCK, args) == -1) {
return;
}
}
static long
util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr)
{
void *lctx;
void *rctx;
void *wp;
void *param[6];
int rc = 0;
wp = mmap(NULL, PAGE_SIZE * 3, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (wp == (void *)-1) {
rc = -errno;
goto out;
}
lctx = (char *)wp + PAGE_SIZE;
rctx = (char *)lctx + PAGE_SIZE;
param[0] = (void *)uctx_pa;
param[1] = rctx;
param[2] = lctx;
param[4] = wp;
param[5] = (void *)(PAGE_SIZE * 3);
if ((rc = ioctl(fd, MCEXEC_UP_UTIL_THREAD1, param)) == -1) {
fprintf(stderr, "util_thread1: %d errno=%d\n", rc, errno);
rc = -errno;
goto out;
}
create_worker_thread(NULL);
if ((rc = create_tracer(wp, remote_tid, (unsigned long)param[3]))) {
fprintf(stderr, "create tracer %d\n", rc);
rc = -errno;
goto out;
}
if (pattr) {
util_thread_setaffinity(pattr);
}
if ((rc = switch_ctx(fd, MCEXEC_UP_UTIL_THREAD2, param, lctx, rctx))
< 0) {
fprintf(stderr, "util_thread2: %d\n", rc);
}
fprintf(stderr, "return from util_thread2 rc=%d\n", rc);
pthread_exit(NULL);
out:
if (wp)
munmap(wp, PAGE_SIZE * 3);
return rc;
}
static long do_strncpy_from_user(int fd, void *dest, void *src, unsigned long n) static long do_strncpy_from_user(int fd, void *dest, void *src, unsigned long n)
{ {
struct strncpy_from_user_desc desc; struct strncpy_from_user_desc desc;
@ -2291,7 +2741,7 @@ chgpath(char *in, char *buf)
return fn; return fn;
} }
int main_loop(int fd, int cpu, pthread_mutex_t *lock) int main_loop(struct thread_data_s *my_thread)
{ {
struct syscall_wait_desc w; struct syscall_wait_desc w;
long ret; long ret;
@ -2301,6 +2751,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
struct timespec tv; struct timespec tv;
char pathbuf[PATH_MAX]; char pathbuf[PATH_MAX];
char tmpbuf[PATH_MAX]; char tmpbuf[PATH_MAX];
int cpu = my_thread->cpu;
memset(&w, '\0', sizeof w); memset(&w, '\0', sizeof w);
w.cpu = cpu; w.cpu = cpu;
@ -2318,7 +2769,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
//pthread_mutex_lock(lock); //pthread_mutex_lock(lock);
thread_data[cpu].remote_tid = w.sr.rtid; my_thread->remote_tid = w.sr.rtid;
my_thread->remote_cpu = w.cpu;
switch (w.sr.number) { switch (w.sr.number) {
case __NR_open: case __NR_open:
@ -2350,7 +2802,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
break; break;
case __NR_kill: // interrupt syscall case __NR_kill: // interrupt syscall
kill_thread(w.sr.args[1]); kill_thread(w.sr.args[1], w.sr.args[2]);
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
break; break;
case __NR_exit: case __NR_exit:
@ -2423,6 +2875,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
*/ */
if (w.sr.args[4] > 0) { if (w.sr.args[4] > 0) {
struct remote_transfer trans; struct remote_transfer trans;
struct thread_data_s *tp;
int i = 0; int i = 0;
int *tids = malloc(sizeof(int) * w.sr.args[4]); int *tids = malloc(sizeof(int) * w.sr.args[4]);
if (!tids) { if (!tids) {
@ -2430,8 +2883,11 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
goto gettid_out; goto gettid_out;
} }
for (i = 0; i < ncpu && i < w.sr.args[4]; ++i) { for (tp = thread_data; tp && i < w.sr.args[4];
tids[i] = thread_data[i].tid; tp = tp->next) {
if (tp->joined || tp->terminate)
continue;
tids[i++] = tp->tid;
} }
for (; i < ncpu; ++i) { for (; i < ncpu; ++i) {
@ -2528,14 +2984,13 @@ gettid_out:
/* Child process */ /* Child process */
case 0: { case 0: {
int i;
int ret = 1; int ret = 1;
struct newprocess_desc npdesc; struct newprocess_desc npdesc;
ischild = 1; ischild = 1;
/* Reopen device fd */ /* Reopen device fd */
close(fd); close(fd);
fd = open(dev, O_RDWR); fd = opendev();
if (fd < 0) { if (fd < 0) {
fs->status = -errno; fs->status = -errno;
fprintf(stderr, "ERROR: opening %s\n", dev); fprintf(stderr, "ERROR: opening %s\n", dev);
@ -2586,9 +3041,7 @@ fork_child_sync_pipe:
ioctl(fd, MCEXEC_UP_NEW_PROCESS, &npdesc); ioctl(fd, MCEXEC_UP_NEW_PROCESS, &npdesc);
/* TODO: does the forked thread run in a pthread context? */ /* TODO: does the forked thread run in a pthread context? */
for (i = 0; i <= ncpu; ++i) { join_all_threads();
pthread_join(thread_data[i].thread_id, NULL);
}
return ret; return ret;
} }
@ -2622,11 +3075,11 @@ fork_child_sync_pipe:
munmap(fs, sizeof(struct fork_sync)); munmap(fs, sizeof(struct fork_sync));
fork_err: fork_err:
pthread_mutex_lock(&fork_sync_mutex); pthread_mutex_lock(&fork_sync_mutex);
for(fp = fork_sync_top, fb = NULL; fp; fb = fp, fp = fp->next) for (fp = fork_sync_top, fb = NULL; fp; fb = fp, fp = fp->next)
if(fp == fsc) if (fp == fsc)
break; break;
if(fp){ if (fp) {
if(fb) if (fb)
fb->next = fsc->next; fb->next = fsc->next;
else else
fork_sync_top = fsc->next; fork_sync_top = fsc->next;
@ -2645,13 +3098,13 @@ fork_err:
opt = WEXITED | (options & WNOWAIT); opt = WEXITED | (options & WNOWAIT);
memset(&info, '\0', sizeof info); memset(&info, '\0', sizeof info);
while((ret = waitid(P_PID, pid, &info, opt)) == -1 && while ((ret = waitid(P_PID, pid, &info, opt)) == -1 &&
errno == EINTR); errno == EINTR);
if(ret == 0){ if (ret == 0) {
ret = info.si_pid; ret = info.si_pid;
} }
if(ret != pid) { if (ret != pid) {
fprintf(stderr, "ERROR: waiting for %lu rc=%d errno=%d\n", w.sr.args[0], ret, errno); fprintf(stderr, "ERROR: waiting for %lu rc=%d errno=%d\n", w.sr.args[0], ret, errno);
} }
@ -2934,6 +3387,21 @@ return_execve2:
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break; break;
case __NR_sched_setaffinity:
if (w.sr.args[0] == 0) {
ret = util_thread(w.sr.args[1], w.sr.rtid,
w.sr.args[2]);
}
else {
ret = munmap((void *)w.sr.args[1],
w.sr.args[2]);
if(ret == -1)fprintf(stderr, "munmap rc=%ld errno=%d addr=%p size=%d\n", ret, errno, (void *)w.sr.args[1], (int)w.sr.args[2]);
if (ret == -1)
ret = -errno;
}
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
default: default:
ret = do_generic_syscall(&w); ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
@ -2941,7 +3409,7 @@ return_execve2:
} }
thread_data[cpu].remote_tid = -1; my_thread->remote_tid = -1;
//pthread_mutex_unlock(lock); //pthread_mutex_unlock(lock);
} }

View File

@ -231,6 +231,10 @@ enum mpol_rebind_step {
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */ #define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */
#define SPAWN_TO_LOCAL 0
#define SPAWN_TO_REMOTE 1
#define SPAWNING_TO_REMOTE 1001
#include <waitq.h> #include <waitq.h>
#include <futex.h> #include <futex.h>
@ -667,6 +671,11 @@ struct thread {
/* Syscall offload wait queue head */ /* Syscall offload wait queue head */
struct waitq scd_wq; struct waitq scd_wq;
int thread_offloaded;
int mod_clone;
struct uti_attr *mod_clone_arg;
int parent_cpuid;
}; };
#define VM_RANGE_CACHE_SIZE 4 #define VM_RANGE_CACHE_SIZE 4

View File

@ -517,4 +517,34 @@ struct perf_ctrl_desc {
}; };
}; };
}; };
#define UTI_FLAG_NUMA_SET (1ULL<<1) /* Indicates NUMA_SET is specified */
#define UTI_FLAG_SAME_NUMA_DOMAIN (1ULL<<2)
#define UTI_FLAG_DIFFERENT_NUMA_DOMAIN (1ULL<<3)
#define UTI_FLAG_SAME_L1 (1ULL<<4)
#define UTI_FLAG_SAME_L2 (1ULL<<5)
#define UTI_FLAG_SAME_L3 (1ULL<<6)
#define UTI_FLAG_DIFFERENT_L1 (1ULL<<7)
#define UTI_FLAG_DIFFERENT_L2 (1ULL<<8)
#define UTI_FLAG_DIFFERENT_L3 (1ULL<<9)
#define UTI_FLAG_EXCLUSIVE_CPU (1ULL<<10)
#define UTI_FLAG_CPU_INTENSIVE (1ULL<<11)
#define UTI_FLAG_HIGH_PRIORITY (1ULL<<12)
#define UTI_FLAG_NON_COOPERATIVE (1ULL<<13)
/* Linux default value is used */
#define UTI_MAX_NUMA_DOMAINS (1024)
typedef struct uti_attr {
/* UTI_CPU_SET environmental variable is used to denote the preferred
location of utility thread */
uint64_t numa_set[(UTI_MAX_NUMA_DOMAINS + sizeof(uint64_t) * 8 - 1) /
(sizeof(uint64_t) * 8)];
uint64_t flags; /* Representing location and behavior hints by bitmap */
} uti_attr_t;
#endif #endif

View File

@ -2864,11 +2864,16 @@ redo:
} else { } else {
/* Pick a new running process or one that has a pending signal */ /* Pick a new running process or one that has a pending signal */
list_for_each_entry_safe(thread, tmp, &(v->runq), sched_list) { list_for_each_entry_safe(thread, tmp, &(v->runq), sched_list) {
if (thread->status == PS_RUNNING || if (thread->status == PS_RUNNING &&
(thread->status == PS_INTERRUPTIBLE && hassigpending(thread))) { thread->mod_clone == SPAWNING_TO_REMOTE){
next = thread; next = thread;
break; break;
} }
if (thread->status == PS_RUNNING ||
(thread->status == PS_INTERRUPTIBLE && hassigpending(thread))) {
if(!next)
next = thread;
}
} }
/* No process? Run idle.. */ /* No process? Run idle.. */

View File

@ -215,10 +215,11 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
dkprintf("%s: syscall num: %d waiting for Linux.. \n", dkprintf("%s: syscall num: %d waiting for Linux.. \n",
__FUNCTION__, req->number); __FUNCTION__, req->number);
#define STATUS_IN_PROGRESS 0 #define STATUS_IN_PROGRESS 0
#define STATUS_COMPLETED 1 #define STATUS_COMPLETED 1
#define STATUS_PAGE_FAULT 3 #define STATUS_PAGE_FAULT 3
#define STATUS_SYACALL 4
while (res.status != STATUS_COMPLETED) { while (res.status != STATUS_COMPLETED) {
while (res.status == STATUS_IN_PROGRESS) { while (res.status == STATUS_IN_PROGRESS) {
struct cpu_local_var *v; struct cpu_local_var *v;
@ -290,6 +291,75 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
send_syscall(&req2, cpu, pid, &res); send_syscall(&req2, cpu, pid, &res);
} }
if (res.status == STATUS_SYACALL) {
struct syscall_request *requestp;
struct syscall_request request;
int num;
ihk_mc_user_context_t ctx;
int ns;
unsigned long syscall_ret;
unsigned long phys;
phys = ihk_mc_map_memory(NULL, res.fault_address,
sizeof(struct syscall_request));
requestp = ihk_mc_map_virtual(phys, 1,
PTATTR_WRITABLE | PTATTR_ACTIVE);
memcpy(&request, requestp, sizeof request);
ihk_mc_unmap_virtual(requestp, 1, 1);
ihk_mc_unmap_memory(NULL, phys,
sizeof(struct syscall_request));
num = request.number;
if (num == __NR_rt_sigaction) {
int sig = request.args[0];
struct thread *thread = cpu_local_var(current);
sig--;
if (sig < 0 || sig >= _NSIG)
syscall_ret = -EINVAL;
else
syscall_ret = (unsigned long)thread->
sigcommon->action[sig].
sa.sa_handler;
}
else {
ns = (sizeof syscall_table /
sizeof syscall_table[0]);
if (num >= 0 && num < ns &&
syscall_table[num]) {
ihk_mc_syscall_arg0(&ctx) =
request.args[0];
ihk_mc_syscall_arg1(&ctx) =
request.args[1];
ihk_mc_syscall_arg2(&ctx) =
request.args[2];
ihk_mc_syscall_arg3(&ctx) =
request.args[3];
ihk_mc_syscall_arg4(&ctx) =
request.args[4];
ihk_mc_syscall_arg5(&ctx) =
request.args[5];
syscall_ret = syscall_table[num](num,
&ctx);
}
else
syscall_ret = -ENOSYS;
}
/* send result */
req2.number = __NR_mmap;
#define PAGER_RESUME_PAGE_FAULT 0x0101
req2.args[0] = PAGER_RESUME_PAGE_FAULT;
req2.args[1] = syscall_ret;
/* The current thread is the requester and only the waiting thread
* may serve the request */
req2.rtid = cpu_local_var(current)->tid;
req2.ttid = res.stid;
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
send_syscall(&req2, cpu, pid, &res);
}
} }
dkprintf("%s: syscall num: %d got host reply: %d \n", dkprintf("%s: syscall num: %d got host reply: %d \n",
@ -299,6 +369,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
if(req->number != __NR_exit_group){ if(req->number != __NR_exit_group){
--thread->in_syscall_offload; --thread->in_syscall_offload;
if(req->number == __NR_sched_setaffinity)kprintf("do_syscall 2 offload=%d\n", thread->in_syscall_offload);
} }
/* -ERESTARTSYS indicates that the proxy process is gone /* -ERESTARTSYS indicates that the proxy process is gone
@ -941,15 +1012,16 @@ event_signal()
} }
void void
interrupt_syscall(int pid, int tid) interrupt_syscall(struct thread *thread, int sig)
{ {
dkprintf("interrupt_syscall,target pid=%d,target tid=%d\n", pid, tid);
ihk_mc_user_context_t ctx; ihk_mc_user_context_t ctx;
long lerror; long lerror;
dkprintf("interrupt_syscall pid=%d tid=%d\n", pid, tid); dkprintf("interrupt_syscall pid=%d tid=%d sig=%d\n", thread->proc->pid,
ihk_mc_syscall_arg0(&ctx) = pid; thread->tid, sig);
ihk_mc_syscall_arg1(&ctx) = tid; ihk_mc_syscall_arg0(&ctx) = thread->proc->pid;
ihk_mc_syscall_arg1(&ctx) = thread->tid;
ihk_mc_syscall_arg2(&ctx) = sig;
lerror = syscall_generic_forwarding(__NR_kill, &ctx); lerror = syscall_generic_forwarding(__NR_kill, &ctx);
if (lerror) { if (lerror) {
@ -2044,6 +2116,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
unsigned long cursp) unsigned long cursp)
{ {
int cpuid; int cpuid;
int parent_cpuid;
struct thread *old = cpu_local_var(current); struct thread *old = cpu_local_var(current);
struct process *oldproc = old->proc; struct process *oldproc = old->proc;
struct process *newproc; struct process *newproc;
@ -2057,7 +2130,8 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
dkprintf("do_fork(): stack_pointr passed in: 0x%lX, stack pointer of caller: 0x%lx\n", dkprintf("do_fork(): stack_pointr passed in: 0x%lX, stack pointer of caller: 0x%lx\n",
newsp, cursp); newsp, cursp);
parent_cpuid = old->cpu_id;
if (((clone_flags & CLONE_VM) && !(clone_flags & CLONE_THREAD)) || if (((clone_flags & CLONE_VM) && !(clone_flags & CLONE_THREAD)) ||
(!(clone_flags & CLONE_VM) && (clone_flags & CLONE_THREAD))) { (!(clone_flags & CLONE_VM) && (clone_flags & CLONE_THREAD))) {
kprintf("clone(): ERROR: CLONE_VM and CLONE_THREAD should be set together\n"); kprintf("clone(): ERROR: CLONE_VM and CLONE_THREAD should be set together\n");
@ -2249,9 +2323,14 @@ retry_tid:
new->tlsblock_base = old->tlsblock_base; new->tlsblock_base = old->tlsblock_base;
} }
new->parent_cpuid = parent_cpuid;
ihk_mc_syscall_ret(new->uctx) = 0; ihk_mc_syscall_ret(new->uctx) = 0;
new->status = PS_RUNNING; new->status = PS_RUNNING;
if (old->mod_clone == SPAWN_TO_REMOTE) {
new->mod_clone = SPAWNING_TO_REMOTE;
}
chain_thread(new); chain_thread(new);
if (!(clone_flags & CLONE_VM)) { if (!(clone_flags & CLONE_VM)) {
newproc->status = PS_RUNNING; newproc->status = PS_RUNNING;
@ -4800,14 +4879,16 @@ SYSCALL_DECLARE(futex)
return ret; return ret;
} }
SYSCALL_DECLARE(exit) static void
do_exit(int code)
{ {
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var(current);
struct thread *child; struct thread *child;
struct process *proc = thread->proc; struct process *proc = thread->proc;
struct mcs_rwlock_node_irqsave lock; struct mcs_rwlock_node_irqsave lock;
int nproc; int nproc;
int exit_status = (int)ihk_mc_syscall_arg0(ctx); int exit_status = (code >> 8) & 255;
int sig = code & 255;
dkprintf("sys_exit,pid=%d\n", proc->pid); dkprintf("sys_exit,pid=%d\n", proc->pid);
@ -4819,11 +4900,11 @@ SYSCALL_DECLARE(exit)
mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); mcs_rwlock_reader_unlock(&proc->threads_lock, &lock);
if(nproc == 1){ // process has only one thread if(nproc == 1){ // process has only one thread
terminate(exit_status, 0); terminate(exit_status, sig);
#ifdef ENABLE_RUSAGE #ifdef ENABLE_RUSAGE
rusage_num_threads--; rusage_num_threads--;
#endif #endif
return 0; return;
} }
#ifdef DCFA_KMOD #ifdef DCFA_KMOD
@ -4852,7 +4933,7 @@ SYSCALL_DECLARE(exit)
#ifdef ENABLE_RUSAGE #ifdef ENABLE_RUSAGE
rusage_num_threads--; rusage_num_threads--;
#endif #endif
return 0; return;
} }
thread->status = PS_EXITED; thread->status = PS_EXITED;
sync_child_event(thread->proc->monitoring_event); sync_child_event(thread->proc->monitoring_event);
@ -4864,6 +4945,14 @@ SYSCALL_DECLARE(exit)
rusage_num_threads--; rusage_num_threads--;
#endif #endif
return;
}
SYSCALL_DECLARE(exit)
{
int exit_status = (int)ihk_mc_syscall_arg0(ctx);
do_exit(exit_status << 8);
return 0; return 0;
} }
@ -6053,7 +6142,6 @@ SYSCALL_DECLARE(sched_setaffinity)
struct thread *thread; struct thread *thread;
int cpu_id; int cpu_id;
int empty_set = 1; int empty_set = 1;
extern int num_processors;
if (!u_cpu_set) { if (!u_cpu_set) {
return -EFAULT; return -EFAULT;
@ -8412,6 +8500,123 @@ SYSCALL_DECLARE(pmc_reset)
return ihk_mc_perfctr_reset(counter); return ihk_mc_perfctr_reset(counter);
} }
extern void save_uctx(void *, void *);
int
util_thread(struct uti_attr *arg)
{
volatile unsigned long *context;
unsigned long pcontext;
struct syscall_request request IHK_DMA_ALIGN;
long rc;
struct thread *thread = cpu_local_var(current);
unsigned long free_address;
unsigned long free_size;
struct kuti_attr {
long parent_cpuid;
struct uti_attr attr;
} kattr;
kprintf("util_thread called\n");
context = (volatile unsigned long *)ihk_mc_alloc_pages(1,
IHK_MC_AP_NOWAIT);
if (!context) {
return -ENOMEM;
}
pcontext = virt_to_phys((void *)context);
save_uctx((void *)context, NULL);
request.number = __NR_sched_setaffinity;
request.args[0] = 0;
request.args[1] = pcontext;
request.args[2] = 0;
if (arg) {
memcpy(&kattr.attr, arg, sizeof(struct uti_attr));
kattr.parent_cpuid = thread->parent_cpuid;
request.args[2] = virt_to_phys(&kattr);
}
thread->thread_offloaded = 1;
rc = do_syscall(&request, ihk_mc_get_processor_id(), 0);
thread->thread_offloaded = 0;
free_address = context[0];
free_size = context[1];
ihk_mc_free_pages((void *)context, 1);
if (rc >= 0) {
if (rc & 0x10000007f) { // exit_group || signal
thread->proc->nohost = 1;
terminate((rc >> 8) & 255, rc & 255);
}
else {
request.number = __NR_sched_setaffinity;
request.args[0] = 1;
request.args[1] = free_address;
request.args[2] = free_size;
do_syscall(&request, ihk_mc_get_processor_id(), 0);
do_exit(rc);
}
}
return rc;
}
void
utilthr_migrate()
{
struct thread *thread = cpu_local_var(current);
if (thread->mod_clone == SPAWNING_TO_REMOTE) {
thread->mod_clone = SPAWN_TO_LOCAL;
util_thread(thread->mod_clone_arg);
}
}
SYSCALL_DECLARE(util_migrate_inter_kernel)
{
struct uti_attr *arg = (void *)ihk_mc_syscall_arg0(ctx);
struct uti_attr kattr;
if (arg) {
if (copy_from_user(&kattr, arg, sizeof(struct uti_attr))) {
return -EFAULT;
}
}
return util_thread(arg? &kattr: NULL);
}
SYSCALL_DECLARE(util_indicate_clone)
{
int mod = (int)ihk_mc_syscall_arg0(ctx);
struct uti_attr *arg = (void *)ihk_mc_syscall_arg1(ctx);
struct thread *thread = cpu_local_var(current);
struct uti_attr *kattr = NULL;
if (mod != SPAWN_TO_LOCAL &&
mod != SPAWN_TO_REMOTE)
return -EINVAL;
if (arg) {
kattr = kmalloc(sizeof(struct uti_attr), IHK_MC_AP_NOWAIT);
if (copy_from_user(kattr, arg, sizeof(struct uti_attr))) {
kfree(kattr);
return -EFAULT;
}
}
thread->mod_clone = mod;
if (thread->mod_clone_arg) {
kfree(thread->mod_clone_arg);
thread->mod_clone_arg = NULL;
}
if (kattr) {
thread->mod_clone_arg = kattr;
}
return 0;
}
SYSCALL_DECLARE(get_system)
{
return 0;
}
void void
reset_cputime() reset_cputime()
{ {