uti: Call into McKernel futex()
(1) Masquerade clv
(2) Fix timeout
(3) Let mcexec thread with the same tid as McKernel thread migrating
to Linux handles the migration request
(4) Call create_tracer() before creating proxy related objects
Change-Id: I6b2689b70db49827f10aa7d5a4c581aa81319b55
This commit is contained in:
@ -62,6 +62,7 @@
|
||||
#define MCEXEC_UP_TERMINATE_THREAD 0x30a02925
|
||||
#define MCEXEC_UP_GET_NUM_POOL_THREADS 0x30a02926
|
||||
#define MCEXEC_UP_UTI_ATTR 0x30a02927
|
||||
#define MCEXEC_UP_UNMAP_PSEUDO_FILEMAP 0x30a02928
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
|
||||
@ -283,6 +283,8 @@ struct mcos_handler_info {
|
||||
int cpu;
|
||||
struct mcctrl_usrdata *ud;
|
||||
struct file *file;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
};
|
||||
|
||||
struct mcos_handler_info;
|
||||
@ -420,6 +422,8 @@ static long mcexec_start_image(ihk_os_t os,
|
||||
|
||||
info->pid = desc->pid;
|
||||
info->cpu = desc->cpu;
|
||||
info->user_start = desc->user_start;
|
||||
info->user_end = desc->user_end;
|
||||
ihk_os_register_release_handler(file, release_handler, info);
|
||||
ihk_os_set_mcos_private_data(file, info);
|
||||
|
||||
@ -1198,9 +1202,28 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
|
||||
wqhln = wqhln_iter;
|
||||
break;
|
||||
}
|
||||
if (!wqhln) {
|
||||
printk("%s: WARNING: no target thread found for exact request??\n",
|
||||
__FUNCTION__);
|
||||
/* Find the mcexec thread with the same tid as the requesting McKernel thread
|
||||
and let it handle the migrate-to-Linux request */
|
||||
if (packet->req.number == __NR_sched_setaffinity && packet->req.args[0] == 0) {
|
||||
list_for_each_entry(wqhln_iter, &ppd->wq_list, list) {
|
||||
if (packet->req.ttid == wqhln_iter->rtid) {
|
||||
if (!wqhln_iter->task) {
|
||||
printk("%s: ERROR: wqhln_iter->task=%p,rtid=%d,&ppd->wq_list_lock=%p\n", __FUNCTION__, wqhln_iter->task, wqhln_iter->rtid, &ppd->wq_list_lock);
|
||||
} else if(wqhln_iter->req) {
|
||||
/* list_del() is called after woken-up */
|
||||
dprintk("%s: INFO: target thread is busy, wqhln_iter->req=%d,rtid=%d,&ppd->wq_list_lock=%p\n", __FUNCTION__, wqhln_iter->req, wqhln_iter->rtid, &ppd->wq_list_lock);
|
||||
} else {
|
||||
wqhln = wqhln_iter;
|
||||
dprintk("%s: uti, worker with tid of %d found in wq_list\n", __FUNCTION__, packet->req.ttid);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!wqhln) {
|
||||
printk("%s: WARNING: no target thread (tid=%d) found for exact request??\n",
|
||||
__FUNCTION__, packet->req.ttid);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Is there any thread available? */
|
||||
@ -1225,6 +1248,12 @@ retry_alloc:
|
||||
wqhln = wqhln_alloc;
|
||||
wqhln->req = 0;
|
||||
wqhln->task = NULL;
|
||||
/* Let the mcexec thread to handle migrate-to-Linux request in mcexec_wait_syscall() after finishing the current task */
|
||||
if (packet->req.number == __NR_sched_setaffinity && packet->req.args[0] == 0) {
|
||||
wqhln->rtid = packet->req.ttid;
|
||||
} else {
|
||||
wqhln->rtid = 0;
|
||||
}
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
list_add_tail(&wqhln->list, &ppd->wq_req_list);
|
||||
}
|
||||
@ -1272,16 +1301,27 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
|
||||
retry:
|
||||
/* Prepare per-thread wait queue head or find a valid request */
|
||||
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||
|
||||
/* Handle migrate-to-Linux request if any */
|
||||
list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) {
|
||||
if (wqhln_iter->rtid == task_pid_vnr(current)) {
|
||||
wqhln = wqhln_iter;
|
||||
wqhln->task = current;
|
||||
list_del(&wqhln->list);
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
/* First see if there is a valid request already that is not yet taken */
|
||||
list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) {
|
||||
if (wqhln_iter->task == NULL && wqhln_iter->req) {
|
||||
if (!wqhln_iter->rtid && wqhln_iter->task == NULL && wqhln_iter->req) {
|
||||
wqhln = wqhln_iter;
|
||||
wqhln->task = current;
|
||||
list_del(&wqhln->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
found:
|
||||
if (!wqhln) {
|
||||
retry_alloc:
|
||||
wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
|
||||
@ -1293,6 +1333,8 @@ retry_alloc:
|
||||
wqhln->task = current;
|
||||
wqhln->req = 0;
|
||||
wqhln->packet = NULL;
|
||||
/* Let mcexec_syscall() find the mcexec thread to handle migrate-to-Linux request */
|
||||
wqhln->rtid = task_pid_vnr(current);
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
|
||||
list_add(&wqhln->list, &ppd->wq_list);
|
||||
@ -2354,6 +2396,8 @@ mcexec_util_thread2(ihk_os_t os, unsigned long arg, struct file *file)
|
||||
void **__user param = (void **__user )arg;
|
||||
void *__user rctx = (void *__user)param[1];
|
||||
void *__user lctx = (void *__user)param[2];
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct mcctrl_per_proc_data *ppd;
|
||||
|
||||
save_fs_ctx(lctx);
|
||||
info = ihk_os_get_mcos_private_data(file);
|
||||
@ -2451,34 +2495,148 @@ err:
|
||||
kfree(thread);
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_unmap_pseudo_filemap(ihk_os_t os, struct file *file)
|
||||
{
|
||||
long rc = -1;
|
||||
struct mcos_handler_info *info;
|
||||
info = ihk_os_get_mcos_private_data(file);
|
||||
dprintk("%s: clear_pte_range %p-%p\n", __FUNCTION__, (void*)info->user_start, (void*)info->user_end);
|
||||
rc = clear_pte_range(info->user_start, info->user_end - info->user_start);
|
||||
return rc;
|
||||
}
|
||||
|
||||
long
|
||||
mcexec_syscall_thread(ihk_os_t os, unsigned long arg, struct file *file)
|
||||
static long (*mckernel_do_futex)(int n, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2, unsigned long arg3,
|
||||
unsigned long arg4, unsigned long arg5,
|
||||
unsigned long _uti_clv,
|
||||
void *uti_futex_resp,
|
||||
void *_linux_wait_event,
|
||||
void *_linux_printk,
|
||||
void *_linux_clock_gettime);
|
||||
|
||||
long uti_wait_event(void *_resp, unsigned long nsec_timeout) {
|
||||
struct uti_futex_resp *resp = _resp;
|
||||
if (nsec_timeout) {
|
||||
return wait_event_interruptible_timeout(resp->wq, resp->done, nsecs_to_jiffies(nsec_timeout));
|
||||
} else {
|
||||
return wait_event_interruptible(resp->wq, resp->done);
|
||||
}
|
||||
}
|
||||
|
||||
int uti_printk(const char *fmt, ...) {
|
||||
int sum = 0, nwritten;
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
nwritten = vprintk(fmt, args);
|
||||
sum += nwritten;
|
||||
va_end(args);
|
||||
return sum;
|
||||
}
|
||||
|
||||
int uti_clock_gettime(clockid_t clk_id, struct timespec *tp) {
|
||||
int ret = 0;
|
||||
struct timespec64 ts64;
|
||||
dprintk("%s: clk_id=%x,REALTIME=%x,MONOTONIC=%x\n", __FUNCTION__, clk_id, CLOCK_REALTIME, CLOCK_MONOTONIC);
|
||||
switch(clk_id) {
|
||||
case CLOCK_REALTIME:
|
||||
getnstimeofday64(&ts64);
|
||||
tp->tv_sec = ts64.tv_sec;
|
||||
tp->tv_nsec = ts64.tv_nsec;
|
||||
dprintk("%s: CLOCK_REALTIME,%ld.%09ld\n", __FUNCTION__, tp->tv_sec, tp->tv_nsec);
|
||||
break;
|
||||
case CLOCK_MONOTONIC: {
|
||||
/* Do not use getrawmonotonic() because it returns different value than clock_gettime() */
|
||||
ktime_get_ts64(&ts64);
|
||||
tp->tv_sec = ts64.tv_sec;
|
||||
tp->tv_nsec = ts64.tv_nsec;
|
||||
dprintk("%s: CLOCK_MONOTONIC,%ld.%09ld\n", __FUNCTION__, tp->tv_sec, tp->tv_nsec);
|
||||
break; }
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
long mcexec_syscall_thread(ihk_os_t os, unsigned long arg, struct file *file)
|
||||
{
|
||||
struct syscall_struct {
|
||||
int number;
|
||||
unsigned long args[6];
|
||||
unsigned long ret;
|
||||
unsigned long uti_clv; /* copy of a clv in McKernel */
|
||||
};
|
||||
struct syscall_struct param;
|
||||
struct syscall_struct __user *uparam =
|
||||
(struct syscall_struct __user *)arg;
|
||||
int rc;
|
||||
long rc;
|
||||
|
||||
if (copy_from_user(¶m, uparam, sizeof param)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
rc = syscall_backward(ihk_host_os_get_usrdata(os), param.number,
|
||||
param.args[0], param.args[1], param.args[2],
|
||||
param.args[3], param.args[4], param.args[5],
|
||||
¶m.ret);
|
||||
#if 1 /* debug */
|
||||
if (param.number == __NR_futex) {
|
||||
#else
|
||||
if (0) {
|
||||
#endif
|
||||
struct uti_futex_resp resp = {
|
||||
.done = 0
|
||||
};
|
||||
init_waitqueue_head(&resp.wq);
|
||||
|
||||
if (!mckernel_do_futex) {
|
||||
if (ihk_os_get_special_address(os, IHK_SPADDR_MCKERNEL_DO_FUTEX,
|
||||
(unsigned long *)&mckernel_do_futex,
|
||||
NULL)) {
|
||||
kprintf("%s: ihk_os_get_special_address failed\n", __FUNCTION__);
|
||||
return -EINVAL;
|
||||
}
|
||||
dprintk("%s: mckernel_do_futex=%p\n", __FUNCTION__, mckernel_do_futex);
|
||||
}
|
||||
|
||||
rc = (*mckernel_do_futex)(param.number, param.args[0], param.args[1], param.args[2],
|
||||
param.args[3], param.args[4], param.args[5], param.uti_clv, (void *)&resp, (void *)uti_wait_event, (void *)uti_printk, (void *)uti_clock_gettime);
|
||||
param.ret = rc;
|
||||
} else {
|
||||
dprintk("%s: syscall_backward, SC %d, tid %d\n", __FUNCTION__, param.number, task_tgid_vnr(current));
|
||||
rc = syscall_backward(ihk_host_os_get_usrdata(os), param.number,
|
||||
param.args[0], param.args[1], param.args[2],
|
||||
param.args[3], param.args[4], param.args[5],
|
||||
¶m.ret);
|
||||
}
|
||||
if (copy_to_user(&uparam->ret, ¶m.ret, sizeof(unsigned long))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void mcctrl_futex_wake(struct ikc_scd_packet *pisp)
|
||||
{
|
||||
struct uti_futex_resp *resp;
|
||||
|
||||
/* Guard the access to pisp->futex.resp, which is dead out of mcexec_syscall_thread() */
|
||||
if (*pisp->futex.spin_sleep == 0) {
|
||||
dprintk("%s: DEBUG: woken up by someone else\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
resp = pisp->futex.resp;
|
||||
if (!resp) {
|
||||
kprintf("%s: ERROR: pisp->futex.resp is NULL\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*pisp->futex.spin_sleep == 0) {
|
||||
kprintf("%s: ERROR: resp is dead\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
resp->done = 1;
|
||||
wake_up_interruptible(&resp->wq);
|
||||
}
|
||||
|
||||
|
||||
static struct ihk_cache_topology *
|
||||
cache_topo_search(struct ihk_cpu_topology *cpu_topo, int level)
|
||||
{
|
||||
@ -2838,6 +2996,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
case MCEXEC_UP_TERMINATE_THREAD:
|
||||
return mcexec_terminate_thread(os, (unsigned long *)arg, file);
|
||||
|
||||
case MCEXEC_UP_UNMAP_PSEUDO_FILEMAP:
|
||||
return mcexec_unmap_pseudo_filemap(os, file);
|
||||
|
||||
case MCEXEC_UP_GET_NUM_POOL_THREADS:
|
||||
return mcctrl_get_num_pool_threads(os);
|
||||
|
||||
|
||||
@ -90,6 +90,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_TERMINATE_THREAD, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_NUM_POOL_THREADS, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_UTI_ATTR, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_UNMAP_PSEUDO_FILEMAP, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
{ .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl },
|
||||
{ .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl },
|
||||
|
||||
@ -52,6 +52,8 @@
|
||||
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
|
||||
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
|
||||
void sig_done(unsigned long arg, int err);
|
||||
void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet);
|
||||
void mcctrl_futex_wake(struct ikc_scd_packet *pisp);
|
||||
void mcctrl_os_read_write_cpu_response(ihk_os_t os,
|
||||
struct ikc_scd_packet *pisp);
|
||||
void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp);
|
||||
@ -221,6 +223,10 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
mcctrl_eventfd(__os, pisp);
|
||||
break;
|
||||
|
||||
case SCD_MSG_FUTEX_WAKE:
|
||||
mcctrl_futex_wake(pisp);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "mcctrl:syscall_packet_handler:"
|
||||
"unknown message (%d.%d.%d.%d.%d.%#lx)\n",
|
||||
|
||||
@ -102,6 +102,8 @@
|
||||
#define SCD_MSG_CPU_RW_REG 0x52
|
||||
#define SCD_MSG_CPU_RW_REG_RESP 0x53
|
||||
|
||||
#define SCD_MSG_FUTEX_WAKE 0x60
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
@ -126,6 +128,12 @@ enum mcctrl_os_cpu_operation {
|
||||
MCCTRL_OS_CPU_MAX_OP
|
||||
};
|
||||
|
||||
/* Used to wake-up a Linux thread futex_wait()-ing */
|
||||
struct uti_futex_resp {
|
||||
int done;
|
||||
wait_queue_head_t wq;
|
||||
};
|
||||
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int err;
|
||||
@ -164,6 +172,12 @@ struct ikc_scd_packet {
|
||||
struct {
|
||||
int eventfd_type;
|
||||
};
|
||||
|
||||
/* SCD_MSG_FUTEX_WAKE */
|
||||
struct {
|
||||
void *resp;
|
||||
int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */
|
||||
} futex;
|
||||
};
|
||||
char padding[8];
|
||||
};
|
||||
@ -465,6 +479,7 @@ inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
|
||||
|
||||
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
|
||||
long ret, int stid);
|
||||
int clear_pte_range(uintptr_t start, uintptr_t len);
|
||||
|
||||
int mcctrl_os_alive(void);
|
||||
|
||||
|
||||
@ -475,7 +475,7 @@ out_put_ppd:
|
||||
return syscall_ret;
|
||||
}
|
||||
|
||||
static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason)
|
||||
int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason)
|
||||
{
|
||||
struct ikc_scd_packet *packet;
|
||||
struct ikc_scd_packet *free_packet = NULL;
|
||||
@ -1999,7 +1999,7 @@ out:
|
||||
return (IS_ERR_VALUE(map))? (int)map: 0;
|
||||
}
|
||||
|
||||
static int clear_pte_range(uintptr_t start, uintptr_t len)
|
||||
int clear_pte_range(uintptr_t start, uintptr_t len)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
@ -67,6 +67,12 @@ get_syscall_arg6(syscall_args *args)
|
||||
return args->r9;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
get_syscall_rip(syscall_args *args)
|
||||
{
|
||||
return args->rip;
|
||||
}
|
||||
|
||||
static inline void
|
||||
set_syscall_number(syscall_args *args, unsigned long value)
|
||||
{
|
||||
|
||||
@ -192,6 +192,7 @@ struct syscall_struct {
|
||||
int number;
|
||||
unsigned long args[6];
|
||||
unsigned long ret;
|
||||
unsigned long uti_clv; /* copy of a clv in McKernel */
|
||||
};
|
||||
|
||||
#ifdef NCCS
|
||||
@ -1951,6 +1952,20 @@ static void ld_preload_init()
|
||||
#endif
|
||||
}
|
||||
|
||||
struct uti_desc {
|
||||
void *wp;
|
||||
int mck_tid;
|
||||
unsigned long key;
|
||||
int pid, tid; /* Used as the id of tracee when issuing MCEXEC_UP_TERMINATE_THREAD */
|
||||
unsigned long uti_clv;
|
||||
sem_t arg, attach;
|
||||
};
|
||||
|
||||
static int create_tracer();
|
||||
int uti_pfd[2];
|
||||
void *uti_wp;
|
||||
struct uti_desc *uti_desc;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -2129,6 +2144,31 @@ int main(int argc, char **argv)
|
||||
if (opendev() == -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
/* Perform mmap() before fork() in create_tracer() */
|
||||
uti_wp = mmap(NULL, PAGE_SIZE * 3, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (uti_wp == (void *)-1) {
|
||||
exit(1);
|
||||
}
|
||||
uti_desc = mmap(NULL, sizeof(struct uti_desc), PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (uti_desc == (void *)-1) {
|
||||
exit(1);
|
||||
}
|
||||
sem_init(&uti_desc->arg, 1, 0);
|
||||
sem_init(&uti_desc->attach, 1, 0);
|
||||
#if 1
|
||||
/* Create tracer before any proxy VMAs are attached */
|
||||
if ((error = pipe(uti_pfd)) == -1) {
|
||||
fprintf(stderr, "%s: pipe returned %d\n", __FUNCTION__, error);
|
||||
return -1;
|
||||
}
|
||||
if ((error = create_tracer())) {
|
||||
fprintf(stderr, "%s: create tracer returned %d\n", __FUNCTION__, error);
|
||||
return error;
|
||||
}
|
||||
#endif
|
||||
|
||||
ld_preload_init();
|
||||
|
||||
#ifdef ADD_ENVS_OPTION
|
||||
@ -2853,11 +2893,8 @@ debug_sig(int s)
|
||||
#endif
|
||||
|
||||
static int
|
||||
create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
create_tracer()
|
||||
{
|
||||
int pid = getpid();
|
||||
int tid = gettid();
|
||||
int pfd[2];
|
||||
int tpid;
|
||||
int rc;
|
||||
int st;
|
||||
@ -2868,44 +2905,42 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
unsigned long code = 0;
|
||||
int exited = 0;
|
||||
int mode = 0;
|
||||
//struct tracer_desc desc;
|
||||
unsigned long buf;
|
||||
|
||||
if (pipe(pfd) == -1)
|
||||
return -1;
|
||||
tpid = fork();
|
||||
if (tpid) {
|
||||
struct timeval tv;
|
||||
fd_set rfd;
|
||||
|
||||
if (tpid == -1)
|
||||
return -1;
|
||||
close(pfd[1]);
|
||||
close(uti_pfd[1]);
|
||||
while ((rc = waitpid(tpid, &st, 0)) == -1 && errno == EINTR);
|
||||
if (rc == -1 || !WIFEXITED(st) || WEXITSTATUS(st)) {
|
||||
fprintf(stderr, "waitpid rc=%d st=%08x\n", rc, st);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#if 0
|
||||
struct timeval tv;
|
||||
fd_set rfd;
|
||||
FD_ZERO(&rfd);
|
||||
FD_SET(pfd[0], &rfd);
|
||||
FD_SET(uti_pfd[0], &rfd);
|
||||
tv.tv_sec = 1;
|
||||
tv.tv_usec = 0;
|
||||
while ((rc = select(pfd[0] + 1, &rfd, NULL, NULL, &tv)) == -1 &&
|
||||
while ((rc = select(uti_pfd[0] + 1, &rfd, NULL, NULL, &tv)) == -1 &&
|
||||
errno == EINTR);
|
||||
if (rc == 0) {
|
||||
close(pfd[0]);
|
||||
fprintf(stderr, "%s: select timed out\n", __FUNCTION__);
|
||||
close(uti_pfd[0]);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
if (rc == -1) {
|
||||
close(pfd[0]);
|
||||
fprintf(stderr, "%s: select errno=%d\n", __FUNCTION__, errno);
|
||||
close(uti_pfd[0]);
|
||||
return -errno;
|
||||
}
|
||||
rc = read(pfd[0], &st, 1);
|
||||
close(pfd[0]);
|
||||
if (rc != 1) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
close(pfd[0]);
|
||||
close(uti_pfd[0]);
|
||||
tpid = fork();
|
||||
if (tpid) {
|
||||
if (tpid == -1) {
|
||||
@ -2914,17 +2949,42 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
if (ptrace(PTRACE_ATTACH, tid, 0, 0) == -1) {
|
||||
|
||||
#if 0
|
||||
/* Reopen device because one process must be managed by one opened-device */
|
||||
close(fd);
|
||||
fd = opendev();
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "%s: ERROR: opendev returned %d\n", __FUNCTION__, errno);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_CREATE_PPD) != 0) {
|
||||
fprintf(stderr, "%s: ERROR: MCEXEC_UP_CREATE_PPD returned %d\n", __FUNCTION__, errno);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
sem_wait(&uti_desc->arg);
|
||||
//close(uti_pfd[0]);
|
||||
|
||||
if (ptrace(PTRACE_ATTACH, uti_desc->tid, 0, 0) == -1) {
|
||||
fprintf(stderr, "PTRACE_ATTACH errno=%d\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
waitpid(-1, &st, __WALL);
|
||||
if (ptrace(PTRACE_SETOPTIONS, tid, 0, PTRACE_O_TRACESYSGOOD) == -1) {
|
||||
if (ptrace(PTRACE_SETOPTIONS, uti_desc->tid, 0, PTRACE_O_TRACESYSGOOD) == -1) {
|
||||
fprintf(stderr, "PTRACE_SETOPTIONS errno=%d\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
write(pfd[1], " ", 1);
|
||||
close(pfd[1]);
|
||||
|
||||
/* Wake up tracee so that it can context-switch to McKernel code */
|
||||
rc = write(uti_pfd[1], &buf, sizeof(unsigned long));
|
||||
if (rc != sizeof(unsigned long)) {
|
||||
fprintf(stderr, "%s: write returned %d\n", __FUNCTION__, rc);
|
||||
exit(1);
|
||||
}
|
||||
close(uti_pfd[1]);
|
||||
|
||||
for (i = 0; i < 4096; i++)
|
||||
if (i != fd
|
||||
@ -2940,33 +3000,42 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
#endif
|
||||
|
||||
for (i = 1; i <= 10; i++) {
|
||||
param = (struct syscall_struct *)wp + i;
|
||||
param = (struct syscall_struct *)uti_desc->wp + i;
|
||||
*(void **)param = param_top;
|
||||
param_top = param;
|
||||
}
|
||||
memset(wp, '\0', sizeof(long));
|
||||
memset(uti_desc->wp, '\0', sizeof(long));
|
||||
|
||||
#ifdef DEBUG_UTI
|
||||
fprintf(stderr, "tracer PID=%d\n", getpid());
|
||||
signal(SIGINT, debug_sig);
|
||||
#endif
|
||||
for (;;) {
|
||||
ptrace(PTRACE_SYSCALL, tid, 0, sig);
|
||||
ptrace(PTRACE_SYSCALL, uti_desc->tid, 0, sig);
|
||||
sig = 0;
|
||||
waitpid(-1, &st, __WALL);
|
||||
if (WIFEXITED(st) || WIFSIGNALED(st)) {
|
||||
unsigned long term_param[4];
|
||||
|
||||
term_param[0] = pid;
|
||||
term_param[1] = tid;
|
||||
term_param[3] = key;
|
||||
term_param[0] = uti_desc->pid;
|
||||
term_param[1] = uti_desc->tid;
|
||||
term_param[3] = uti_desc->key;
|
||||
code = st;
|
||||
if (exited == 2 || // exit_group
|
||||
WIFSIGNALED(st)) {
|
||||
code |= 0x0000000100000000;
|
||||
}
|
||||
term_param[2] = code;
|
||||
ioctl(fd, MCEXEC_UP_TERMINATE_THREAD, term_param);
|
||||
if (ioctl(fd, MCEXEC_UP_TERMINATE_THREAD, term_param) != 0) {
|
||||
fprintf(stderr, "%s: ERROR: MCEXEC_UP_TERMINATE_THREAD returned %d\n", __FUNCTION__, errno);
|
||||
}
|
||||
__dprintf("%s: WIFEXITED=%d,WIFSIGNALED=%d,WTERMSIG=%d,exited=%d\n", __FUNCTION__, WIFEXITED(st), WIFSIGNALED(st), WTERMSIG(st), exited);
|
||||
#if 0
|
||||
if (ptrace(PTRACE_DETACH, uti_desc->tid, 0, WIFSIGNALED(st) ? WTERMSIG(st) : 0) && errno != ESRCH) {
|
||||
fprintf(stderr, "PTRACE_DETACH errno=%d\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
if (!WIFSTOPPED(st)) {
|
||||
@ -2975,7 +3044,7 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
if (WSTOPSIG(st) & 0x80) { // syscall
|
||||
syscall_args args;
|
||||
|
||||
get_syscall_args(tid, &args);
|
||||
get_syscall_args(uti_desc->tid, &args);
|
||||
|
||||
#ifdef DEBUG_UTI
|
||||
if (get_syscall_return(&args) == -ENOSYS) {
|
||||
@ -3000,8 +3069,8 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
switch (get_syscall_number(&args)) {
|
||||
case __NR_gettid:
|
||||
set_syscall_number(&args, -1);
|
||||
set_syscall_return(&args, mck_tid);
|
||||
set_syscall_args(tid, &args);
|
||||
set_syscall_return(&args, uti_desc->mck_tid);
|
||||
set_syscall_args(uti_desc->tid, &args);
|
||||
continue;
|
||||
case __NR_futex:
|
||||
case __NR_brk:
|
||||
@ -3029,7 +3098,7 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_78 */
|
||||
case __NR_execve:
|
||||
set_syscall_number(&args, -1);
|
||||
set_syscall_args(tid, &args);
|
||||
set_syscall_args(uti_desc->tid, &args);
|
||||
continue;
|
||||
case __NR_ioctl:
|
||||
param = (struct syscall_struct *)
|
||||
@ -3038,7 +3107,7 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
get_syscall_arg1(&args) == fd &&
|
||||
get_syscall_arg2(&args) ==
|
||||
MCEXEC_UP_SYSCALL_THREAD &&
|
||||
samepage(wp, param)) {
|
||||
samepage(uti_desc->wp, param)) {
|
||||
set_syscall_arg1(&args, param->args[0]);
|
||||
set_syscall_arg2(&args, param->args[1]);
|
||||
set_syscall_arg3(&args, param->args[2]);
|
||||
@ -3048,7 +3117,7 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
set_syscall_return(&args, param->ret);
|
||||
*(void **)param = param_top;
|
||||
param_top = param;
|
||||
set_syscall_args(tid, &args);
|
||||
set_syscall_args(uti_desc->tid, &args);
|
||||
}
|
||||
continue;
|
||||
default:
|
||||
@ -3068,6 +3137,7 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
param->args[3] = get_syscall_arg4(&args);
|
||||
param->args[4] = get_syscall_arg5(&args);
|
||||
param->args[5] = get_syscall_arg6(&args);
|
||||
param->uti_clv = uti_desc->uti_clv;
|
||||
param->ret = -EINVAL;
|
||||
set_syscall_number(&args, __NR_ioctl);
|
||||
set_syscall_arg1(&args, fd);
|
||||
@ -3075,7 +3145,7 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
MCEXEC_UP_SYSCALL_THREAD);
|
||||
set_syscall_arg3(&args, (unsigned long)param);
|
||||
}
|
||||
set_syscall_args(tid, &args);
|
||||
set_syscall_args(uti_desc->tid, &args);
|
||||
}
|
||||
else { // signal
|
||||
sig = WSTOPSIG(st) & 0x7f;
|
||||
@ -3083,45 +3153,47 @@ create_tracer(void *wp, int mck_tid, unsigned long key)
|
||||
}
|
||||
|
||||
#ifdef DEBUG_UTI
|
||||
fprintf(stderr, "offloaded thread called these syscalls\n");
|
||||
debug_sig(0);
|
||||
//fprintf(stderr, "offloaded thread called these syscalls\n");
|
||||
//debug_sig(0);
|
||||
#endif
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static long
|
||||
util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr)
|
||||
util_thread(struct thread_data_s *my_thread, unsigned long uctx_pa, int remote_tid, unsigned long pattr, unsigned long uti_clv)
|
||||
{
|
||||
void *lctx;
|
||||
void *rctx;
|
||||
void *wp;
|
||||
void *param[6];
|
||||
int rc = 0;
|
||||
unsigned long buf;
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_35
|
||||
wp = mmap(NULL, page_size * 3, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
wp = mmap(NULL, PAGE_SIZE * 3, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
if (wp == (void *)-1) {
|
||||
rc = -errno;
|
||||
goto out;
|
||||
#if 0
|
||||
{
|
||||
int error;
|
||||
if ((error = pipe(uti_pfd)) == -1) {
|
||||
fprintf(stderr, "%s: pipe returned %d\n", __FUNCTION__, error);
|
||||
rc = error; goto out;
|
||||
}
|
||||
if ((error = create_tracer())) {
|
||||
fprintf(stderr, "%s: create_tracer returned %d\n", __FUNCTION__, error);
|
||||
rc = error; goto out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_35
|
||||
lctx = (char *)wp + page_size;
|
||||
lctx = (char *)uti_wp + page_size;
|
||||
rctx = (char *)lctx + page_size;
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
lctx = (char *)wp + PAGE_SIZE;
|
||||
#else
|
||||
lctx = (char *)uti_wp + PAGE_SIZE;
|
||||
rctx = (char *)lctx + PAGE_SIZE;
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
|
||||
param[0] = (void *)uctx_pa;
|
||||
param[1] = rctx;
|
||||
param[2] = lctx;
|
||||
param[4] = wp;
|
||||
param[4] = uti_wp;
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_35
|
||||
param[5] = (void *)(page_size * 3);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
@ -3134,11 +3206,37 @@ util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr)
|
||||
}
|
||||
|
||||
create_worker_thread(NULL);
|
||||
if ((rc = create_tracer(wp, remote_tid, (unsigned long)param[3]))) {
|
||||
fprintf(stderr, "create tracer %d\n", rc);
|
||||
|
||||
/* Pass info to the tracer so that it can masquerade as the tracee */
|
||||
uti_desc->wp = uti_wp;
|
||||
uti_desc->mck_tid = remote_tid;
|
||||
uti_desc->key = (unsigned long)param[3];
|
||||
uti_desc->pid = getpid();
|
||||
uti_desc->tid = gettid();
|
||||
uti_desc->uti_clv = uti_clv;
|
||||
|
||||
#if 0
|
||||
//usleep(100000);
|
||||
ssize_t nwritten;
|
||||
char *cur;
|
||||
for(cur = (char*)&uti_desc; (nwritten = write(uti_pfd[1], cur, sizeof(struct uti_desc) - (cur - (char*)&uti_desc))) > 0; cur += nwritten) { }
|
||||
if (nwritten < 0) {
|
||||
fprintf(stderr, "write returned %ld errno=%d\n", nwritten, errno);
|
||||
rc = -errno;
|
||||
goto out;
|
||||
}
|
||||
close(uti_pfd[1]);
|
||||
#endif
|
||||
sem_post(&uti_desc->arg);
|
||||
|
||||
/* Wait until tracer attaches me. We can't use
|
||||
futex because it would be captured and redirected by tracer */
|
||||
rc = read(uti_pfd[0], &buf, sizeof(unsigned long));
|
||||
if (rc != sizeof(unsigned long)) {
|
||||
fprintf(stderr, "%s: write returned %d\n", __FUNCTION__, rc);
|
||||
exit(1);
|
||||
}
|
||||
close(uti_pfd[0]);
|
||||
|
||||
if (pattr) {
|
||||
struct uti_attr_desc desc;
|
||||
@ -3155,11 +3253,11 @@ util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr)
|
||||
pthread_exit(NULL);
|
||||
|
||||
out:
|
||||
if (wp)
|
||||
if (uti_wp != (void*)-1)
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_35
|
||||
munmap(wp, page_size * 3);
|
||||
munmap(uti_wp, page_size * 3);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
munmap(wp, PAGE_SIZE * 3);
|
||||
munmap(uti_wp, PAGE_SIZE * 3);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_35 */
|
||||
return rc;
|
||||
}
|
||||
@ -4225,8 +4323,8 @@ return_execve2:
|
||||
|
||||
case __NR_sched_setaffinity:
|
||||
if (w.sr.args[0] == 0) {
|
||||
ret = util_thread(w.sr.args[1], w.sr.rtid,
|
||||
w.sr.args[2]);
|
||||
ret = util_thread(my_thread, w.sr.args[1], w.sr.rtid,
|
||||
w.sr.args[2], w.sr.args[3]);
|
||||
}
|
||||
else {
|
||||
ret = munmap((void *)w.sr.args[1],
|
||||
|
||||
166
kernel/futex.c
166
kernel/futex.c
@ -71,14 +71,21 @@
|
||||
#include <kmsg.h>
|
||||
#include <timer.h>
|
||||
#include <debug.h>
|
||||
#include <syscall.h>
|
||||
|
||||
//#define DEBUG_PRINT_FUTEX
|
||||
|
||||
#ifdef DEBUG_PRINT_FUTEX
|
||||
#undef DDEBUG_DEFAULT
|
||||
#define DDEBUG_DEFAULT DDEBUG_PRINT
|
||||
#define uti_dkprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
|
||||
#else
|
||||
#define uti_dkprintf(...) do { } while (0)
|
||||
#endif
|
||||
#define uti_kprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
|
||||
|
||||
|
||||
unsigned long ihk_mc_get_ns_per_tsc(void);
|
||||
int futex_cmpxchg_enabled;
|
||||
|
||||
/**
|
||||
@ -108,6 +115,9 @@ struct futex_q {
|
||||
union futex_key key;
|
||||
union futex_key *requeue_pi_key;
|
||||
uint32_t bitset;
|
||||
|
||||
/* Used to wake-up a thread running on a Linux CPU */
|
||||
void *uti_futex_resp;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -180,11 +190,12 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
* lock_page() might sleep, the caller should not hold a spinlock.
|
||||
*/
|
||||
static int
|
||||
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
|
||||
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key, struct cpu_local_var *clv_override)
|
||||
{
|
||||
unsigned long address = (unsigned long)uaddr;
|
||||
unsigned long phys;
|
||||
struct process_vm *mm = cpu_local_var(current)->vm;
|
||||
struct thread *thread = cpu_local_var_with_override(current, clv_override);
|
||||
struct process_vm *mm = thread->vm;
|
||||
|
||||
/*
|
||||
* The futex address must be "naturally" aligned.
|
||||
@ -250,7 +261,7 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
|
||||
* The hash bucket lock must be held when this is called.
|
||||
* Afterwards, the futex_q must not be accessed.
|
||||
*/
|
||||
static void wake_futex(struct futex_q *q)
|
||||
static void wake_futex(struct futex_q *q, struct cpu_local_var *clv_override)
|
||||
{
|
||||
struct thread *p = q->task;
|
||||
|
||||
@ -272,8 +283,31 @@ static void wake_futex(struct futex_q *q)
|
||||
barrier();
|
||||
q->lock_ptr = NULL;
|
||||
|
||||
dkprintf("wake_futex(): waking up tid %d\n", p->tid);
|
||||
sched_wakeup_thread(p, PS_NORMAL);
|
||||
|
||||
if (q->uti_futex_resp) {
|
||||
int rc;
|
||||
uti_dkprintf("wake_futex(): waking up migrated-to-Linux thread (tid %d),uti_futex_resp=%p\n", p->tid, q->uti_futex_resp);
|
||||
/* TODO: Add the case when a Linux thread waking up another Linux thread */
|
||||
if (clv_override) {
|
||||
uti_dkprintf("%s: ERROR: A Linux thread is waking up migrated-to-Linux thread\n", __FUNCTION__);
|
||||
}
|
||||
if (p->spin_sleep == 0) {
|
||||
uti_dkprintf("%s: INFO: woken up by someone else\n", __FUNCTION__);
|
||||
}
|
||||
|
||||
struct ikc_scd_packet pckt;
|
||||
struct ihk_ikc_channel_desc *resp_channel = cpu_local_var_with_override(ikc2linux, clv_override);
|
||||
pckt.msg = SCD_MSG_FUTEX_WAKE;
|
||||
pckt.futex.resp = q->uti_futex_resp;
|
||||
pckt.futex.spin_sleep = &p->spin_sleep;
|
||||
rc = ihk_ikc_send(resp_channel, &pckt, 0);
|
||||
if (rc) {
|
||||
uti_dkprintf("%s: ERROR: ihk_ikc_send returned %d, resp_channel=%p\n", __FUNCTION__, rc, resp_channel);
|
||||
}
|
||||
} else {
|
||||
uti_dkprintf("wake_futex(): waking up McKernel thread (tid %d)\n", p->tid);
|
||||
sched_wakeup_thread(p, PS_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -303,7 +337,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
|
||||
/*
|
||||
* Wake up waiters matching bitset queued on this futex (uaddr).
|
||||
*/
|
||||
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset)
|
||||
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset, struct cpu_local_var *clv_override)
|
||||
{
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q *this, *next;
|
||||
@ -314,7 +348,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
|
||||
if (!bitset)
|
||||
return -EINVAL;
|
||||
|
||||
ret = get_futex_key(uaddr, fshared, &key);
|
||||
ret = get_futex_key(uaddr, fshared, &key, clv_override);
|
||||
if ((ret != 0))
|
||||
goto out;
|
||||
|
||||
@ -330,7 +364,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
|
||||
if (!(this->bitset & bitset))
|
||||
continue;
|
||||
|
||||
wake_futex(this);
|
||||
wake_futex(this, clv_override);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
@ -348,7 +382,8 @@ out:
|
||||
*/
|
||||
static int
|
||||
futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
|
||||
int nr_wake, int nr_wake2, int op)
|
||||
int nr_wake, int nr_wake2, int op,
|
||||
struct cpu_local_var *clv_override)
|
||||
{
|
||||
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
@ -357,10 +392,10 @@ futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
|
||||
int ret, op_ret;
|
||||
|
||||
retry:
|
||||
ret = get_futex_key(uaddr1, fshared, &key1);
|
||||
ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
|
||||
if ((ret != 0))
|
||||
goto out;
|
||||
ret = get_futex_key(uaddr2, fshared, &key2);
|
||||
ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
|
||||
if ((ret != 0))
|
||||
goto out_put_key1;
|
||||
|
||||
@ -394,7 +429,7 @@ retry_private:
|
||||
|
||||
plist_for_each_entry_safe(this, next, head, list) {
|
||||
if (match_futex (&this->key, &key1)) {
|
||||
wake_futex(this);
|
||||
wake_futex(this, clv_override);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
@ -406,7 +441,7 @@ retry_private:
|
||||
op_ret = 0;
|
||||
plist_for_each_entry_safe(this, next, head, list) {
|
||||
if (match_futex (&this->key, &key2)) {
|
||||
wake_futex(this);
|
||||
wake_futex(this, clv_override);
|
||||
if (++op_ret >= nr_wake2)
|
||||
break;
|
||||
}
|
||||
@ -469,7 +504,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
|
||||
*/
|
||||
static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
|
||||
int nr_wake, int nr_requeue, uint32_t *cmpval,
|
||||
int requeue_pi)
|
||||
int requeue_pi, struct cpu_local_var *clv_override)
|
||||
{
|
||||
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
|
||||
int drop_count = 0, task_count = 0, ret;
|
||||
@ -477,10 +512,10 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
|
||||
struct plist_head *head1;
|
||||
struct futex_q *this, *next;
|
||||
|
||||
ret = get_futex_key(uaddr1, fshared, &key1);
|
||||
ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
|
||||
if ((ret != 0))
|
||||
goto out;
|
||||
ret = get_futex_key(uaddr2, fshared, &key2);
|
||||
ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
|
||||
if ((ret != 0))
|
||||
goto out_put_key1;
|
||||
|
||||
@ -515,7 +550,7 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
|
||||
*/
|
||||
/* RIKEN: no requeue_pi at this moment */
|
||||
if (++task_count <= nr_wake) {
|
||||
wake_futex(this);
|
||||
wake_futex(this, clv_override);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -574,7 +609,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||
* state is implicit in the state of woken task (see futex_wait_requeue_pi() for
|
||||
* an example).
|
||||
*/
|
||||
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb, struct cpu_local_var *clv_override)
|
||||
{
|
||||
int prio;
|
||||
|
||||
@ -595,7 +630,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||
q->list.plist.spinlock = &hb->lock;
|
||||
#endif
|
||||
plist_add(&q->list, &hb->chain);
|
||||
q->task = cpu_local_var(current);
|
||||
q->task = cpu_local_var_with_override(current, clv_override);
|
||||
ihk_mc_spinlock_unlock_noirq(&hb->lock);
|
||||
}
|
||||
|
||||
@ -658,19 +693,19 @@ retry:
|
||||
/* RIKEN: this function has been rewritten so that it returns the remaining
|
||||
* time in case we are waken.
|
||||
*/
|
||||
static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
uint64_t timeout)
|
||||
static int64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
uint64_t timeout, struct cpu_local_var *clv_override)
|
||||
{
|
||||
uint64_t time_remain = 0;
|
||||
int64_t time_remain = 0;
|
||||
unsigned long irqstate;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct thread *thread = cpu_local_var_with_override(current, clv_override);
|
||||
/*
|
||||
* The task state is guaranteed to be set before another task can
|
||||
* wake it.
|
||||
* queue_me() calls spin_unlock() upon completion, serializing
|
||||
* access to the hash list and forcing a memory barrier.
|
||||
*/
|
||||
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
|
||||
xchg4(&(thread->status), PS_INTERRUPTIBLE);
|
||||
|
||||
/* Indicate spin sleep. Note that schedule_timeout() with
|
||||
* idle_halt should use spin sleep because sleep with timeout
|
||||
@ -682,25 +717,40 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
}
|
||||
|
||||
queue_me(q, hb);
|
||||
queue_me(q, hb, clv_override);
|
||||
|
||||
if (!plist_node_empty(&q->list)) {
|
||||
if (clv_override) {
|
||||
uti_dkprintf("%s: tid: %d is trying to sleep\n", __FUNCTION__, thread->tid);
|
||||
/* Note that the unit of timeout is nsec */
|
||||
time_remain = (*linux_wait_event)(q->uti_futex_resp, timeout);
|
||||
|
||||
/* Note that time_remain == 0 indicates contidion evaluated to false after the timeout elapsed */
|
||||
if (time_remain < 0) {
|
||||
if (time_remain == -ERESTARTSYS) { /* Interrupted by signal */
|
||||
uti_dkprintf("%s: DEBUG: wait_event returned -ERESTARTSYS\n", __FUNCTION__);
|
||||
} else {
|
||||
uti_kprintf("%s: ERROR: wait_event returned %d\n", __FUNCTION__, time_remain);
|
||||
}
|
||||
}
|
||||
uti_dkprintf("%s: tid: %d woken up\n", __FUNCTION__, thread->tid);
|
||||
} else {
|
||||
|
||||
if (timeout) {
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", thread->tid);
|
||||
time_remain = schedule_timeout(timeout);
|
||||
}
|
||||
else {
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", thread->tid);
|
||||
spin_sleep_or_schedule();
|
||||
time_remain = 0;
|
||||
}
|
||||
|
||||
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid);
|
||||
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", thread->tid);
|
||||
}
|
||||
}
|
||||
|
||||
/* This does not need to be serialized */
|
||||
cpu_local_var(current)->status = PS_RUNNING;
|
||||
thread->status = PS_RUNNING;
|
||||
thread->spin_sleep = 0;
|
||||
|
||||
return time_remain;
|
||||
@ -724,7 +774,8 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
* <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
|
||||
*/
|
||||
static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
|
||||
struct futex_q *q, struct futex_hash_bucket **hb)
|
||||
struct futex_q *q, struct futex_hash_bucket **hb,
|
||||
struct cpu_local_var *clv_override)
|
||||
{
|
||||
uint32_t uval;
|
||||
int ret;
|
||||
@ -747,7 +798,7 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
|
||||
* rare, but normal.
|
||||
*/
|
||||
q->key = FUTEX_KEY_INIT;
|
||||
ret = get_futex_key(uaddr, fshared, &q->key);
|
||||
ret = get_futex_key(uaddr, fshared, &q->key, clv_override);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
@ -771,46 +822,54 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
|
||||
}
|
||||
|
||||
static int futex_wait(uint32_t __user *uaddr, int fshared,
|
||||
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt)
|
||||
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt,
|
||||
struct cpu_local_var *clv_override)
|
||||
{
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q;
|
||||
uint64_t time_remain;
|
||||
int64_t time_remain;
|
||||
int ret;
|
||||
|
||||
if (!bitset)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef PROFILE_ENABLE
|
||||
if (cpu_local_var(current)->profile &&
|
||||
cpu_local_var(current)->profile_start_ts) {
|
||||
cpu_local_var(current)->profile_elapsed_ts +=
|
||||
(rdtsc() - cpu_local_var(current)->profile_start_ts);
|
||||
cpu_local_var(current)->profile_start_ts = 0;
|
||||
if (cpu_local_var_with_override(current, clv_override)->profile &&
|
||||
cpu_local_var_with_override(current, clv_override)->profile_start_ts) {
|
||||
cpu_local_var_with_override(current, clv_override)->profile_elapsed_ts +=
|
||||
(rdtsc() - cpu_local_var_with_override(current, clv_override)->profile_start_ts);
|
||||
cpu_local_var_with_override(current, clv_override)->profile_start_ts = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
q.bitset = bitset;
|
||||
q.requeue_pi_key = NULL;
|
||||
q.uti_futex_resp = cpu_local_var_with_override(uti_futex_resp, clv_override);
|
||||
|
||||
retry:
|
||||
/* Prepare to wait on uaddr. */
|
||||
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
|
||||
if (ret)
|
||||
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb, clv_override);
|
||||
if (ret) {
|
||||
uti_dkprintf("%s: tid=%d futex_wait_setup returns zero, no need to sleep\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* queue_me and wait for wakeup, timeout, or a signal. */
|
||||
time_remain = futex_wait_queue_me(hb, &q, timeout);
|
||||
time_remain = futex_wait_queue_me(hb, &q, timeout, clv_override);
|
||||
|
||||
/* If we were woken (and unqueued), we succeeded, whatever. */
|
||||
ret = 0;
|
||||
if (!unqueue_me(&q))
|
||||
if (!unqueue_me(&q)) {
|
||||
uti_dkprintf("%s: tid=%d unqueued\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
|
||||
goto out_put_key;
|
||||
}
|
||||
ret = -ETIMEDOUT;
|
||||
|
||||
/* RIKEN: timer expired case (indicated by !time_remain) */
|
||||
if (timeout && !time_remain)
|
||||
if (timeout && !time_remain) {
|
||||
uti_dkprintf("%s: tid=%d timer expired\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
|
||||
goto out_put_key;
|
||||
}
|
||||
|
||||
if (hassigpending(cpu_local_var(current))) {
|
||||
ret = -EINTR;
|
||||
@ -825,19 +884,22 @@ out_put_key:
|
||||
put_futex_key(fshared, &q.key);
|
||||
out:
|
||||
#ifdef PROFILE_ENABLE
|
||||
if (cpu_local_var(current)->profile) {
|
||||
cpu_local_var(current)->profile_start_ts = rdtsc();
|
||||
if (cpu_local_var_with_override(current, clv_override)->profile) {
|
||||
cpu_local_var_with_override(current, clv_override)->profile_start_ts = rdtsc();
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
|
||||
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared)
|
||||
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared,
|
||||
struct cpu_local_var *clv_override)
|
||||
{
|
||||
int clockrt, ret = -ENOSYS;
|
||||
int cmd = op & FUTEX_CMD_MASK;
|
||||
|
||||
uti_dkprintf("%s: uaddr=%p, op=%x, val=%x, timeout=%ld, uaddr2=%p, val2=%x, val3=%x, fshared=%d, clv=%p\n", __FUNCTION__, uaddr, op, val, timeout, uaddr2, val2, val3, fshared, clv_override);
|
||||
|
||||
clockrt = op & FUTEX_CLOCK_REALTIME;
|
||||
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
|
||||
return -ENOSYS;
|
||||
@ -846,21 +908,21 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
|
||||
case FUTEX_WAIT:
|
||||
val3 = FUTEX_BITSET_MATCH_ANY;
|
||||
case FUTEX_WAIT_BITSET:
|
||||
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
|
||||
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt, clv_override);
|
||||
break;
|
||||
case FUTEX_WAKE:
|
||||
val3 = FUTEX_BITSET_MATCH_ANY;
|
||||
case FUTEX_WAKE_BITSET:
|
||||
ret = futex_wake(uaddr, fshared, val, val3);
|
||||
ret = futex_wake(uaddr, fshared, val, val3, clv_override);
|
||||
break;
|
||||
case FUTEX_REQUEUE:
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0, clv_override);
|
||||
break;
|
||||
case FUTEX_CMP_REQUEUE:
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0);
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0, clv_override);
|
||||
break;
|
||||
case FUTEX_WAKE_OP:
|
||||
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
|
||||
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3, clv_override);
|
||||
break;
|
||||
/* RIKEN: these calls are not supported for now.
|
||||
case FUTEX_LOCK_PI:
|
||||
|
||||
@ -100,6 +100,9 @@ struct cpu_local_var {
|
||||
struct list_head smp_func_req_list;
|
||||
|
||||
struct process_vm *on_fork_vm;
|
||||
|
||||
/* UTI */
|
||||
void *uti_futex_resp;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
|
||||
@ -111,4 +114,6 @@ static struct cpu_local_var *get_this_cpu_local_var(void)
|
||||
|
||||
#define cpu_local_var(name) get_this_cpu_local_var()->name
|
||||
|
||||
#define cpu_local_var_with_override(name, clv_override) (clv_override ? clv_override->name : get_this_cpu_local_var()->name)
|
||||
|
||||
#endif
|
||||
|
||||
@ -150,6 +150,7 @@ union futex_key {
|
||||
|
||||
extern int futex_init(void);
|
||||
|
||||
struct cpu_local_var;
|
||||
extern int
|
||||
futex(
|
||||
uint32_t __user * uaddr,
|
||||
@ -159,7 +160,8 @@ futex(
|
||||
uint32_t __user * uaddr2,
|
||||
uint32_t val2,
|
||||
uint32_t val3,
|
||||
int fshared
|
||||
int fshared,
|
||||
struct cpu_local_var *clv_override
|
||||
);
|
||||
|
||||
|
||||
|
||||
@ -83,6 +83,8 @@
|
||||
#define SCD_MSG_CPU_RW_REG 0x52
|
||||
#define SCD_MSG_CPU_RW_REG_RESP 0x53
|
||||
|
||||
#define SCD_MSG_FUTEX_WAKE 0x60
|
||||
|
||||
/* Cloning flags. */
|
||||
# define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */
|
||||
# define CLONE_VM 0x00000100 /* Set if VM shared between processes. */
|
||||
@ -276,6 +278,12 @@ struct ikc_scd_packet {
|
||||
struct {
|
||||
int eventfd_type;
|
||||
};
|
||||
|
||||
/* SCD_MSG_FUTEX_WAKE */
|
||||
struct {
|
||||
void *resp;
|
||||
int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */
|
||||
} futex;
|
||||
};
|
||||
char padding[8];
|
||||
};
|
||||
@ -475,6 +483,14 @@ int arch_cpu_read_write_register(struct ihk_os_cpu_register *desc,
|
||||
enum mcctrl_os_cpu_operation op);
|
||||
struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr);
|
||||
time_t time(void);
|
||||
long do_futex(int n, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2, unsigned long arg3,
|
||||
unsigned long arg4, unsigned long arg5,
|
||||
unsigned long _uti_clv,
|
||||
void *uti_futex_resp,
|
||||
void *_linux_wait_event,
|
||||
void *_linux_printk,
|
||||
void *_linux_clock_gettime);
|
||||
|
||||
#ifndef POSTK_DEBUG_ARCH_DEP_52
|
||||
#define VDSO_MAXPAGES 2
|
||||
@ -592,4 +608,9 @@ struct move_pages_smp_req {
|
||||
#define PROCESS_VM_READ 0
|
||||
#define PROCESS_VM_WRITE 1
|
||||
|
||||
/* uti: function pointers pointing to Linux codes */
|
||||
extern long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout);
|
||||
extern int (*linux_printk)(const char *fmt, ...);
|
||||
extern int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp);
|
||||
|
||||
#endif
|
||||
|
||||
@ -25,6 +25,8 @@
|
||||
#define CLOCK_PROCESS_CPUTIME_ID 2
|
||||
#define CLOCK_THREAD_CPUTIME_ID 3
|
||||
|
||||
typedef int clockid_t;
|
||||
|
||||
typedef long int __time_t;
|
||||
|
||||
/* POSIX.1b structure for a time value. This is like a `struct timeval' but
|
||||
|
||||
@ -251,6 +251,11 @@ static void nmi_init()
|
||||
ihk_set_nmi_mode_addr(phys);
|
||||
}
|
||||
|
||||
static void uti_init()
|
||||
{
|
||||
ihk_set_mckernel_do_futex((unsigned long)do_futex);
|
||||
}
|
||||
|
||||
static void rest_init(void)
|
||||
{
|
||||
handler_init();
|
||||
@ -266,6 +271,7 @@ static void rest_init(void)
|
||||
#endif /* !POSTK_DEBUG_TEMP_FIX_73 */
|
||||
cpu_local_var_init();
|
||||
nmi_init();
|
||||
uti_init();
|
||||
time_init();
|
||||
kmalloc_init();
|
||||
|
||||
|
||||
132
kernel/syscall.c
132
kernel/syscall.c
@ -74,6 +74,13 @@
|
||||
#define DDEBUG_DEFAULT DDEBUG_PRINT
|
||||
#endif
|
||||
|
||||
#define DEBUG_UTI
|
||||
#ifdef DEBUG_UTI
|
||||
#define uti_dkprintf(...) do { ((uti_clv && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
|
||||
#else
|
||||
#define uti_dkprintf(...) do { } while (0)
|
||||
#endif
|
||||
|
||||
//static ihk_atomic_t pid_cnt = IHK_ATOMIC_INIT(1024);
|
||||
|
||||
/* generate system call handler's prototypes */
|
||||
@ -139,6 +146,10 @@ static void do_mod_exit(int status);
|
||||
*/
|
||||
#define NR_TIDS (allow_oversubscribe ? (num_processors * 2) : num_processors)
|
||||
|
||||
long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout);
|
||||
int (*linux_printk)(const char *fmt, ...);
|
||||
int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp);
|
||||
|
||||
static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res)
|
||||
{
|
||||
struct ikc_scd_packet packet IHK_DMA_ALIGN;
|
||||
@ -264,10 +275,21 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
||||
++thread->in_syscall_offload;
|
||||
}
|
||||
|
||||
/* The current thread is the requester and any thread from
|
||||
* the pool may serve the request */
|
||||
/* The current thread is the requester */
|
||||
req->rtid = cpu_local_var(current)->tid;
|
||||
req->ttid = 0;
|
||||
|
||||
if (req->number == __NR_sched_setaffinity && req->args[0] == 0) {
|
||||
/* mcexec thread serving migrate-to-Linux request must have
|
||||
the same tid as the requesting McKernel thread because the
|
||||
serving thread jumps to hfi driver and then jumps to
|
||||
rus_vm_fault() without registering it into per thread data
|
||||
by mcctrl_add_per_thread_data()). */
|
||||
req->ttid = cpu_local_var(current)->tid/*0*/;
|
||||
dkprintf("%s: uti, ttid=%d\n", __FUNCTION__, req->ttid);
|
||||
} else {
|
||||
/* Any thread from the pool may serve the request */
|
||||
req->ttid = 0;
|
||||
}
|
||||
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */
|
||||
send_syscall(req, cpu, target_pid, &res);
|
||||
@ -5323,8 +5345,16 @@ SYSCALL_DECLARE(shmdt)
|
||||
return 0;
|
||||
} /* sys_shmdt() */
|
||||
|
||||
SYSCALL_DECLARE(futex)
|
||||
long do_futex(int n, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2, unsigned long arg3,
|
||||
unsigned long arg4, unsigned long arg5,
|
||||
unsigned long _uti_clv,
|
||||
void *uti_futex_resp,
|
||||
void *_linux_wait_event,
|
||||
void *_linux_printk,
|
||||
void *_linux_clock_gettime)
|
||||
{
|
||||
struct cpu_local_var *uti_clv = (struct cpu_local_var *)_uti_clv;
|
||||
uint64_t timeout = 0; // No timeout
|
||||
uint32_t val2 = 0;
|
||||
// Only one clock is used, ignore FUTEX_CLOCK_REALTIME
|
||||
@ -5332,24 +5362,44 @@ SYSCALL_DECLARE(futex)
|
||||
int fshared = 1;
|
||||
int ret = 0;
|
||||
|
||||
uint32_t *uaddr = (uint32_t *)ihk_mc_syscall_arg0(ctx);
|
||||
int op = (int)ihk_mc_syscall_arg1(ctx);
|
||||
uint32_t val = (uint32_t)ihk_mc_syscall_arg2(ctx);
|
||||
struct timespec *utime = (struct timespec*)ihk_mc_syscall_arg3(ctx);
|
||||
uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx);
|
||||
uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx);
|
||||
uint32_t *uaddr = (uint32_t *)arg0;
|
||||
int op = (int)arg1;
|
||||
uint32_t val = (uint32_t)arg2;
|
||||
struct timespec *utime = (struct timespec*)arg3;
|
||||
uint32_t *uaddr2 = (uint32_t *)arg4;
|
||||
uint32_t val3 = (uint32_t)arg5;
|
||||
int flags = op;
|
||||
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
|
||||
|
||||
monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY;
|
||||
|
||||
|
||||
/* TODO: replace these with passing via struct smp_boot_param */
|
||||
if (_linux_printk && !linux_printk) {
|
||||
linux_printk = (int (*)(const char *fmt, ...))_linux_printk;
|
||||
}
|
||||
if (_linux_wait_event && !linux_wait_event) {
|
||||
linux_wait_event = (long (*)(void *_resp, unsigned long nsec_timeout))_linux_wait_event;
|
||||
}
|
||||
if (_linux_clock_gettime && !linux_clock_gettime) {
|
||||
linux_clock_gettime = (int (*)(clockid_t clk_id, struct timespec *tp))_linux_clock_gettime;
|
||||
}
|
||||
|
||||
/* Fill in clv */
|
||||
if (uti_clv) {
|
||||
uti_clv->uti_futex_resp = uti_futex_resp;
|
||||
}
|
||||
|
||||
/* monitor is per-cpu object */
|
||||
if (!uti_clv) {
|
||||
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
|
||||
monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY;
|
||||
}
|
||||
|
||||
/* Cross-address space futex? */
|
||||
if (op & FUTEX_PRIVATE_FLAG) {
|
||||
fshared = 0;
|
||||
}
|
||||
op = (op & FUTEX_CMD_MASK);
|
||||
|
||||
dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d\n",
|
||||
uti_dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d\n",
|
||||
flags,
|
||||
(op == FUTEX_WAIT) ? "FUTEX_WAIT" :
|
||||
(op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" :
|
||||
@ -5360,8 +5410,13 @@ SYSCALL_DECLARE(futex)
|
||||
(op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown",
|
||||
(unsigned long)uaddr, val, utime, uaddr2, val3, *uaddr, fshared);
|
||||
|
||||
if ((op == FUTEX_WAIT || op == FUTEX_WAIT_BITSET) && utime) {
|
||||
uti_dkprintf("%s: utime=%ld.%09ld\n", __FUNCTION__, utime->tv_sec, utime->tv_nsec);
|
||||
}
|
||||
if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) {
|
||||
unsigned long nsec_timeout;
|
||||
if (!uti_clv) {
|
||||
/* Use cycles for non-UTI case */
|
||||
|
||||
/* As per the Linux implementation FUTEX_WAIT specifies the duration of
|
||||
* the timeout, while FUTEX_WAIT_BITSET specifies the absolute timestamp */
|
||||
@ -5407,19 +5462,35 @@ SYSCALL_DECLARE(futex)
|
||||
else {
|
||||
nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec);
|
||||
}
|
||||
|
||||
timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc();
|
||||
dkprintf("futex timeout: %lu\n", timeout);
|
||||
|
||||
}
|
||||
else{
|
||||
if (op == FUTEX_WAIT_BITSET) { /* User passed absolute time */
|
||||
struct timespec ats;
|
||||
ret = (*linux_clock_gettime)((flags & FUTEX_CLOCK_REALTIME) ? CLOCK_REALTIME: CLOCK_MONOTONIC, &ats);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
uti_dkprintf("%s: ats=%ld.%09ld\n", __FUNCTION__, ats.tv_sec, ats.tv_nsec);
|
||||
/* Use nsec for UTI case */
|
||||
timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec) -
|
||||
(ats.tv_sec * NS_PER_SEC + ats.tv_nsec);
|
||||
} else { /* User passed relative time */
|
||||
/* Use nsec for UTI case */
|
||||
timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE.
|
||||
* number of waiters to wake in 'utime' if op == FUTEX_WAKE_OP. */
|
||||
if (op == FUTEX_CMP_REQUEUE || op == FUTEX_WAKE_OP)
|
||||
val2 = (uint32_t) (unsigned long) ihk_mc_syscall_arg3(ctx);
|
||||
val2 = (uint32_t) (unsigned long) arg3;
|
||||
|
||||
ret = futex(uaddr, op, val, timeout, uaddr2, val2, val3, fshared);
|
||||
ret = futex(uaddr, op, val, timeout, uaddr2, val2, val3, fshared, uti_clv);
|
||||
|
||||
dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d, ret: %d\n",
|
||||
uti_dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d, ret: %d\n",
|
||||
op,
|
||||
(op == FUTEX_WAIT) ? "FUTEX_WAIT" :
|
||||
(op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" :
|
||||
@ -5433,6 +5504,14 @@ SYSCALL_DECLARE(futex)
|
||||
return ret;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(futex)
|
||||
{
|
||||
return do_futex(n, ihk_mc_syscall_arg0(ctx), ihk_mc_syscall_arg1(ctx),
|
||||
ihk_mc_syscall_arg2(ctx), ihk_mc_syscall_arg3(ctx),
|
||||
ihk_mc_syscall_arg4(ctx), ihk_mc_syscall_arg5(ctx),
|
||||
0UL, NULL, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
do_exit(int code)
|
||||
{
|
||||
@ -5474,7 +5553,7 @@ do_exit(int code)
|
||||
setint_user((int*)thread->clear_child_tid, 0);
|
||||
barrier();
|
||||
futex((uint32_t *)thread->clear_child_tid,
|
||||
FUTEX_WAKE, 1, 0, NULL, 0, 0, 1);
|
||||
FUTEX_WAKE, 1, 0, NULL, 0, 0, 1, NULL);
|
||||
}
|
||||
|
||||
mcs_rwlock_writer_lock(&proc->threads_lock, &lock);
|
||||
@ -8998,6 +9077,7 @@ util_thread(struct uti_attr *arg)
|
||||
{
|
||||
volatile unsigned long *context;
|
||||
unsigned long pcontext;
|
||||
struct cpu_local_var *uti_clv;
|
||||
struct syscall_request request IHK_DMA_ALIGN;
|
||||
long rc;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
@ -9016,6 +9096,14 @@ util_thread(struct uti_attr *arg)
|
||||
pcontext = virt_to_phys((void *)context);
|
||||
save_uctx((void *)context, NULL);
|
||||
|
||||
/* Create a copy of clv and replace clv with it when the Linux thread calls in a McKernel function */
|
||||
uti_clv = kmalloc(sizeof(struct cpu_local_var), IHK_MC_AP_NOWAIT);
|
||||
if (!uti_clv) {
|
||||
ihk_mc_free_pages((void *)context, 1);
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(uti_clv, get_this_cpu_local_var(), sizeof(struct cpu_local_var));
|
||||
|
||||
request.number = __NR_sched_setaffinity;
|
||||
request.args[0] = 0;
|
||||
request.args[1] = pcontext;
|
||||
@ -9025,19 +9113,23 @@ util_thread(struct uti_attr *arg)
|
||||
kattr.parent_cpuid = thread->parent_cpuid;
|
||||
request.args[2] = virt_to_phys(&kattr);
|
||||
}
|
||||
request.args[3] = (unsigned long)uti_clv;
|
||||
thread->thread_offloaded = 1;
|
||||
rc = do_syscall(&request, ihk_mc_get_processor_id(), 0);
|
||||
thread->thread_offloaded = 0;
|
||||
free_address = context[0];
|
||||
free_size = context[1];
|
||||
ihk_mc_free_pages((void *)context, 1);
|
||||
kfree(uti_clv);
|
||||
|
||||
if (rc >= 0) {
|
||||
if (rc & 0x10000007f) { // exit_group || signal
|
||||
dkprintf("%s: exit_group || signal\n", __FUNCTION__);
|
||||
thread->proc->nohost = 1;
|
||||
terminate((rc >> 8) & 255, rc & 255);
|
||||
}
|
||||
else {
|
||||
dkprintf("%s: !exit_group && !signal\n", __FUNCTION__);
|
||||
request.number = __NR_sched_setaffinity;
|
||||
request.args[0] = 1;
|
||||
request.args[1] = free_address;
|
||||
|
||||
@ -229,6 +229,7 @@ char *ihk_get_kargs(void);
|
||||
int ihk_set_monitor(unsigned long addr, unsigned long size);
|
||||
int ihk_set_rusage(unsigned long addr, unsigned long size);
|
||||
int ihk_set_nmi_mode_addr(unsigned long addr);
|
||||
int ihk_set_mckernel_do_futex(unsigned long addr);
|
||||
|
||||
extern void (*__tlb_flush_handler)(int vector);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user