diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 6d297bdf..e8dfe0ec 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -110,6 +110,13 @@ struct program_load_desc { }; struct syscall_request { + /* TID of requesting thread */ + int rtid; + /* + * TID of target thread. Remote page fault response needs to designate the + * thread that must serve the request, 0 indicates any thread from the pool + */ + int ttid; unsigned long valid; unsigned long number; unsigned long args[6]; @@ -129,6 +136,10 @@ struct syscall_load_desc { }; struct syscall_response { + /* TID of the thread that requested the service */ + int ttid; + /* TID of the mcexec thread that is serving or has served the request */ + int stid; unsigned long status; long ret; unsigned long fault_address; diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index e1994b2b..c9893406 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -83,6 +83,7 @@ static long mcexec_prepare_image(ihk_os_t os, struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); unsigned long flags; struct mcctrl_per_proc_data *ppd = NULL; + int i; if (copy_from_user(&desc, udesc, sizeof(struct program_load_desc))) { @@ -156,6 +157,14 @@ static long mcexec_prepare_image(ihk_os_t os, ppd->pid = pdesc->pid; ppd->rpgtable = pdesc->rpgtable; + INIT_LIST_HEAD(&ppd->wq_list); + INIT_LIST_HEAD(&ppd->wq_list_exact); + spin_lock_init(&ppd->wq_list_lock); + + for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { + INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); + rwlock_init(&ppd->per_thread_data_hash_lock[i]); + } flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); list_add_tail(&ppd->list, &usrdata->per_proc_list); @@ -417,42 +426,115 @@ static long mcexec_get_cpu(ihk_os_t os) return info->n_cpus; } -int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg) +struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( + struct mcctrl_usrdata *ud, + int pid) { - struct wait_queue_head_list_node *wqhln = NULL; - struct wait_queue_head_list_node *wqhln_iter; + struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; unsigned long flags; - /* Look up per-process wait queue head with pid */ - flags = ihk_ikc_spinlock_lock(&c->wq_list_lock); - list_for_each_entry(wqhln_iter, &c->wq_list, list) { - if (wqhln_iter->pid == pid) { - wqhln = wqhln_iter; + /* Look up per-process structure */ + flags = ihk_ikc_spinlock_lock(&ud->per_proc_list_lock); + list_for_each_entry(ppd_iter, &ud->per_proc_list, list) { + if (ppd_iter->pid == pid) { + ppd = ppd_iter; break; } } + ihk_ikc_spinlock_unlock(&ud->per_proc_list_lock, flags); + + return ppd; +} + +/* + * Called indirectly from the IKC message handler. + */ +int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) +{ + struct wait_queue_head_list_node *wqhln = NULL; + struct wait_queue_head_list_node *wqhln_iter; + struct wait_queue_head_list_node *wqhln_alloc = NULL; + struct mcctrl_channel *c = ud->channels + packet->ref; + int pid = packet->pid; + unsigned long flags; + struct mcctrl_per_proc_data *ppd; - if (!wqhln) { retry_alloc: - wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC); - if (!wqhln) { - printk("WARNING: coudln't alloc wait queue head, retrying..\n"); - goto retry_alloc; - } - - wqhln->pid = pid; - wqhln->req = 0; - init_waitqueue_head(&wqhln->wq_syscall); - list_add_tail(&wqhln->list, &c->wq_list); + wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_KERNEL); + if (!wqhln_alloc) { + printk("WARNING: coudln't alloc wait queue head, retrying..\n"); + goto retry_alloc; } + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(ud, pid); + + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return 0; + } + + dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n", + __FUNCTION__, + c->param.request_va->rtid, + c->param.request_va->ttid, + c->param.request_va->number); + /* + * Three scenarios are possible: + * - Find the designated thread if req->ttid is specified. + * - Find any available thread if req->ttid is zero. + * - Add a request element if no threads are available. + */ + flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + + /* Is this a request for a specific thread? See if it's waiting */ + if (c->param.request_va->ttid) { + list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) { + if (c->param.request_va->ttid != task_pid_vnr(wqhln_iter->task)) + continue; + + wqhln = wqhln_iter; + break; + } + if (!wqhln) { + printk("%s: WARNING: no target thread found for exact request??\n", + __FUNCTION__); + } + } + /* Is there any thread available? */ + else { + list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { + if (wqhln_iter->task && !wqhln_iter->req) { + wqhln = wqhln_iter; + break; + } + } + } + + /* If no match found, add request */ + if (!wqhln) { + wqhln = wqhln_alloc; + wqhln->req = 0; + wqhln->task = NULL; + init_waitqueue_head(&wqhln->wq_syscall); + list_add_tail(&wqhln->list, &ppd->wq_list); + } + else { + kfree(wqhln_alloc); + } + + memcpy(&wqhln->packet, packet, sizeof(*packet)); wqhln->req = 1; wake_up(&wqhln->wq_syscall); - ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags); return 0; } +/* + * Called from an mcexec thread via ioctl(). + */ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { struct syscall_wait_desc swd; @@ -462,8 +544,18 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) struct wait_queue_head_list_node *wqhln_iter; int ret = 0; unsigned long irqflags; - -//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); + struct mcctrl_per_proc_data *ppd; + + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); + + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -EINVAL; + } + + //printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu)); if (copy_from_user(&swd, req, sizeof(swd))) { return -EFAULT; } @@ -471,16 +563,15 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) if (swd.cpu >= usrdata->num_channels) return -EINVAL; - c = get_peer_channel(usrdata, current); + c = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); if (c) { printk("mcexec_wait_syscall:already registered. task %p ch %p\n", current, c); return -EBUSY; } - c = usrdata->channels + swd.cpu; retry: - /* Prepare per-process wait queue head */ + /* Prepare per-thread wait queue head or find a valid request */ retry_alloc: wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); if (!wqhln) { @@ -488,35 +579,48 @@ retry_alloc: goto retry_alloc; } - wqhln->pid = swd.pid; + wqhln->task = current; wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); - irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); - /* First see if there is one wait queue already */ - list_for_each_entry(wqhln_iter, &c->wq_list, list) { - if (wqhln_iter->pid == task_tgid_vnr(current)) { + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + /* First see if there is a valid request already that is not yet taken */ + list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { + if (wqhln_iter->task == NULL && wqhln_iter->req) { kfree(wqhln); wqhln = wqhln_iter; + wqhln->task = current; list_del(&wqhln->list); break; } } - list_add_tail(&wqhln->list, &c->wq_list); - ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags); - ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); + /* No valid request? Wait for one.. */ + if (wqhln->req == 0) { + list_add_tail(&wqhln->list, &ppd->wq_list); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); + + ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); + + /* Remove per-thread wait queue head */ + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + list_del(&wqhln->list); + } + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); - /* Remove per-process wait queue head */ - irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); - list_del(&wqhln->list); - ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags); if (ret && !wqhln->req) { kfree(wqhln); return -EINTR; } + + /* Channel is determined by request */ + dprintk("%s: tid: %d request from CPU %d\n", + __FUNCTION__, task_pid_vnr(current), wqhln->packet.ref); + + c = usrdata->channels + wqhln->packet.ref; kfree(wqhln); +#if 0 if (c->param.request_va->number == 61 && c->param.request_va->args[0] == swd.pid) { @@ -528,6 +632,7 @@ retry_alloc: return -EINTR; } +#endif mb(); if (!c->param.request_va->valid) { @@ -543,18 +648,27 @@ retry_alloc: dprintk("SC #%lx, %lx\n", c->param.request_va->number, c->param.request_va->args[0]); - register_peer_channel(usrdata, current, c); + if (mcctrl_add_per_thread_data(ppd, current, c) < 0) { + kprintf("%s: error adding per-thread data\n", __FUNCTION__); + return -EINVAL; + } if (__do_in_kernel_syscall(os, c, c->param.request_va)) { if (copy_to_user(&req->sr, c->param.request_va, sizeof(struct syscall_request))) { - deregister_peer_channel(usrdata, current, c); + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { + kprintf("%s: error deleting per-thread data\n", __FUNCTION__); + return -EINVAL; + } return -EFAULT; } return 0; } - deregister_peer_channel(usrdata, current, c); + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { + kprintf("%s: error deleting per-thread data\n", __FUNCTION__); + return -EINVAL; + } goto retry; } @@ -675,6 +789,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) struct syscall_ret_desc ret; struct mcctrl_channel *mc; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct mcctrl_per_proc_data *ppd; #if 0 ihk_dma_channel_t channel; struct ihk_dma_request request; @@ -688,13 +803,25 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { return -EFAULT; } - mc = usrdata->channels + ret.cpu; - if (!mc) { + + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); return -EINVAL; } - deregister_peer_channel(usrdata, current, mc); + + mc = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); + if (!mc) { + kprintf("%s: ERROR: no peer channel registerred??\n", __FUNCTION__); + return -EINVAL; + } + + mcctrl_delete_per_thread_data(ppd, current); mc->param.response_va->ret = ret.ret; + mc->param.response_va->stid = task_pid_vnr(current); if (ret.size > 0) { /* Host => Accel. Write is fast. */ @@ -876,6 +1003,34 @@ int mcexec_close_exec(ihk_os_t os) struct mckernel_exec_file *mcef = NULL; int found = 0; int os_ind = ihk_host_os_get_index(os); + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + unsigned long flags; + struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; + + ppd = NULL; + flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); + + list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { + if (ppd_iter->pid == task_tgid_vnr(current)) { + ppd = ppd_iter; + break; + } + } + + if (ppd) { + list_del(&ppd->list); + + dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", + task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable); + + kfree(ppd); + } + else { + printk("WARNING: no per process data for pid %d ?\n", + task_tgid_vnr(current)); + } + + ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); if (os_ind < 0) { return EINVAL; diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 2d513517..00e201e9 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -40,7 +40,7 @@ void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); -int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg); +int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet); void sig_done(unsigned long arg, int err); /* XXX: this runs in atomic context! */ @@ -64,7 +64,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, break; case SCD_MSG_SYSCALL_ONESIDE: - mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg); + mcexec_syscall(usrdata, pisp); break; case SCD_MSG_PROCFS_ANSWER: @@ -263,9 +263,6 @@ static int connect_handler(struct ihk_ikc_channel_info *param) } param->packet_handler = syscall_packet_handler; - INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list); - spin_lock_init(&usrdata->channels[cpu].wq_list_lock); - usrdata->channels[cpu].c = c; kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c); @@ -284,9 +281,6 @@ static int connect_handler2(struct ihk_ikc_channel_info *param) param->packet_handler = syscall_packet_handler; - INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list); - spin_lock_init(&usrdata->channels[cpu].wq_list_lock); - usrdata->channels[cpu].c = c; kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c); @@ -313,7 +307,6 @@ int prepare_ikc_channels(ihk_os_t os) { struct ihk_cpu_info *info; struct mcctrl_usrdata *usrdata; - int error; usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL); usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL); @@ -351,11 +344,6 @@ int prepare_ikc_channels(ihk_os_t os) INIT_LIST_HEAD(&usrdata->cpu_topology_list); INIT_LIST_HEAD(&usrdata->node_topology_list); - error = init_peer_channel_registry(usrdata); - if (error) { - return error; - } - return 0; } @@ -394,7 +382,6 @@ void destroy_ikc_channels(ihk_os_t os) } free_page((unsigned long)usrdata->mcctrl_doorbell_va); - destroy_peer_channel_registry(usrdata); kfree(usrdata->channels); kfree(usrdata); } diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 9f60326a..4d46e54d 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "sysfs.h" @@ -154,8 +155,11 @@ struct syscall_params { struct wait_queue_head_list_node { struct list_head list; wait_queue_head_t wq_syscall; - int pid; + struct task_struct *task; + /* Denotes an exclusive wait for requester TID rtid */ + int rtid; int req; + struct ikc_scd_packet packet; }; struct mcctrl_channel { @@ -163,15 +167,29 @@ struct mcctrl_channel { struct syscall_params param; struct ikc_scd_init_param init; void *dma_buf; - - struct list_head wq_list; - ihk_spinlock_t wq_list_lock; }; +struct mcctrl_per_thread_data { + struct list_head hash; + struct task_struct *task; + void *data; +}; + +#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8 +#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT) +#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1) + struct mcctrl_per_proc_data { struct list_head list; int pid; unsigned long rpgtable; /* per process, not per OS */ + + struct list_head wq_list; + struct list_head wq_list_exact; + ihk_spinlock_t wq_list_lock; + + struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE]; + rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE]; }; struct sysfsm_req { @@ -273,12 +291,16 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); ihk_os_t osnum_to_os(int n); /* syscall.c */ -int init_peer_channel_registry(struct mcctrl_usrdata *ud); -void destroy_peer_channel_registry(struct mcctrl_usrdata *ud); -int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch); -int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch); -struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key); int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc); +struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( + struct mcctrl_usrdata *ud, + int pid); +int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task, void *data); +int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task); +struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( + struct mcctrl_per_proc_data *ppd, struct task_struct *task); #define PROCFS_NAME_MAX 1000 diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 1b028ae1..3a9b1e09 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -84,88 +84,96 @@ static void print_dma_lastreq(void) } #endif -int init_peer_channel_registry(struct mcctrl_usrdata *ud) +int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task, void *data) { - ud->keys = kzalloc(sizeof(void *) * ud->num_channels, GFP_KERNEL); - if (!ud->keys) { - printk("Error: cannot allocate usrdata.keys[].\n"); - return -ENOMEM; + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + struct mcctrl_per_thread_data *ptd_alloc = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + int ret = 0; + unsigned long flags; + + ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC); + if (!ptd_alloc) { + kprintf("%s: error allocate per thread data\n", __FUNCTION__); + ret = -ENOMEM; + goto out_noalloc; } - return 0; -} - -void destroy_peer_channel_registry(struct mcctrl_usrdata *ud) -{ - kfree(ud->keys); - ud->keys = NULL; - return; -} - -int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch) -{ - int cpu; - - cpu = ch - ud->channels; - if ((cpu < 0) || (ud->num_channels <= cpu)) { - printk("register_peer_channel(%p,%p,%p):" - "not a syscall channel. cpu=%d\n", - ud, key, ch, cpu); - return -EINVAL; - } - - if (ud->keys[cpu] != NULL) { - printk("register_peer_channel(%p,%p,%p):" - "already registered. cpu=%d\n", - ud, key, ch, cpu); - /* - * When mcexec receives a signal, - * it may be finished without doing deregister_peer_channel(). - * Therefore a substitute registration is necessary. - */ -#if 0 - return -EBUSY; -#endif - } - - ud->keys[cpu] = key; - return 0; -} - -int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch) -{ - int cpu; - - cpu = ch - ud->channels; - if ((cpu < 0) || (ud->num_channels <= cpu)) { - printk("deregister_peer_channel(%p,%p,%p):" - "not a syscall channel. cpu=%d\n", - ud, key, ch, cpu); - return -EINVAL; - } - - if (ud->keys[cpu] && (ud->keys[cpu] != key)) { - printk("deregister_peer_channel(%p,%p,%p):" - "not registered. cpu=%d\n", - ud, key, ch, cpu); - return -EBUSY; - } - - ud->keys[cpu] = NULL; - return 0; -} - -struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key) -{ - int cpu; - - for (cpu = 0; cpu < ud->num_channels; ++cpu) { - if (ud->keys[cpu] == key) { - return &ud->channels[cpu]; + /* Check if data for this thread exists and add if not */ + write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; } } - return NULL; + if (ptd) { + ret = -EBUSY; + kfree(ptd_alloc); + goto out; + } + + ptd = ptd_alloc; + ptd->task = task; + ptd->data = data; + list_add_tail(&ptd->hash, &ppd->per_thread_data_hash[hash]); + +out: + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); +out_noalloc: + return ret; +} + +int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, + struct task_struct *task) +{ + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + int ret = 0; + unsigned long flags; + + /* Check if data for this thread exists and delete it */ + write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; + } + } + + if (!ptd) { + ret = -EINVAL; + goto out; + } + + list_del(&ptd->hash); + kfree(ptd); + +out: + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + return ret; +} + +struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task) +{ + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + unsigned long flags; + + /* Check if data for this thread exists and return it */ + read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; + } + } + + read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + return ptd ? ptd->data : NULL; } #if 1 /* x86 depend, host OS side */ @@ -238,10 +246,23 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u struct syscall_request *req; struct syscall_response *resp; int error; + struct wait_queue_head_list_node *wqhln; + unsigned long irqflags; + struct mcctrl_per_proc_data *ppd; - dprintk("remote_page_fault(%p,%p,%llx)\n", usrdata, fault_addr, reason); + dprintk("%s: tid: %d, fault_addr: %p\n", + __FUNCTION__, task_pid_vnr(current), fault_addr); + + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); - channel = get_peer_channel(usrdata, current); + if (!ppd) { + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -EINVAL; + } + + channel = (struct mcctrl_channel *)mcctrl_get_per_thread_data(ppd, current); if (!channel) { error = -ENOENT; printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n", @@ -252,10 +273,28 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u req = channel->param.request_va; resp = channel->param.response_va; - /* request page fault */ +retry_alloc: + wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); + if (!wqhln) { + printk("WARNING: coudln't alloc wait queue head, retrying..\n"); + goto retry_alloc; + } + + /* Prepare per-thread wait queue head */ + wqhln->task = current; + wqhln->req = 0; + init_waitqueue_head(&wqhln->wq_syscall); + + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + /* Add to exact list */ + list_add_tail(&wqhln->list, &ppd->wq_list_exact); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); + + /* Request page fault */ resp->ret = -EFAULT; resp->fault_address = (unsigned long)fault_addr; resp->fault_reason = reason; + resp->stid = task_pid_vnr(current); #define STATUS_PAGER_COMPLETED 1 #define STATUS_PAGE_FAULT 3 @@ -264,43 +303,18 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u resp->status = STATUS_PAGE_FAULT; for (;;) { - struct wait_queue_head_list_node *wqhln; - struct wait_queue_head_list_node *wqhln_iter; - unsigned long irqflags; - -retry_alloc: - wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL); - if (!wqhln) { - printk("WARNING: coudln't alloc wait queue head, retrying..\n"); - goto retry_alloc; - } - - /* Prepare per-process wait queue head */ - wqhln->pid = task_tgid_vnr(current); - wqhln->req = 0; - init_waitqueue_head(&wqhln->wq_syscall); - - irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); - /* First see if there is a wait queue already */ - list_for_each_entry(wqhln_iter, &channel->wq_list, list) { - if (wqhln_iter->pid == task_tgid_vnr(current)) { - kfree(wqhln); - wqhln = wqhln_iter; - list_del(&wqhln->list); - break; - } - } - list_add_tail(&wqhln->list, &channel->wq_list); - ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags); - + dprintk("%s: tid: %d, fault_addr: %p SLEEPING\n", + __FUNCTION__, task_pid_vnr(current), fault_addr); /* wait for response */ error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); - - /* Remove per-process wait queue head */ - irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); + + /* Remove per-thread wait queue head */ + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); - ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); kfree(wqhln); + dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n", + __FUNCTION__, task_pid_vnr(current), fault_addr); if (error) { printk("remote_page_fault:interrupted. %d\n", error); @@ -472,26 +486,18 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #if USE_VM_INSERT_PFN size_t pix; #endif - struct mcctrl_per_proc_data *ppd, *ppd_iter; - unsigned long flags; + struct mcctrl_per_proc_data *ppd; dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); - ppd = NULL; - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - - list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { - if (ppd_iter->pid == task_tgid_vnr(current)) { - ppd = ppd_iter; - break; - } - } - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); + /* Look up per-process structure */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { - printk("ERROR: no per process data for pid %d\n", task_tgid_vnr(current)); - return VM_FAULT_SIGBUS; + kprintf("%s: ERROR: no per-process structure for PID %d??\n", + __FUNCTION__, task_tgid_vnr(current)); + return -EINVAL; } for (try = 1; ; ++try) { @@ -1711,33 +1717,6 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall break; case __NR_exit_group: { - unsigned long flags; - struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter; - - ppd = NULL; - flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); - - list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { - if (ppd_iter->pid == task_tgid_vnr(current)) { - ppd = ppd_iter; - break; - } - } - - if (ppd) { - list_del(&ppd->list); - - dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", - task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable); - - kfree(ppd); - } - else { - printk("WARNING: no per process data for pid %d ?\n", - task_tgid_vnr(current)); - } - - ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags); /* Make sure the user space handler will be called as well */ error = -ENOSYS; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index cec60856..5ae855b9 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1870,13 +1870,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) sig = 0; term = 0; + do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); + /* Drop executable file */ if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) { fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n"); } - do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); - __dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n", w.sr.args[0], cpu); if(w.sr.number == __NR_exit_group){ diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 4aaf3244..6d0ccfa0 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -210,12 +210,23 @@ struct ikc_scd_init_param { }; struct syscall_request { + /* TID of requesting thread */ + int rtid; + /* + * TID of target thread. Remote page fault response needs to designate the + * thread that must serve the request, 0 indicates any thread from the pool + */ + int ttid; unsigned long valid; unsigned long number; unsigned long args[6]; }; struct syscall_response { + /* TID of the thread that requested the service */ + int ttid; + /* TID of the mcexec thread that is serving the request */ + int stid; unsigned long status; long ret; unsigned long fault_address; diff --git a/kernel/syscall.c b/kernel/syscall.c index 5d31da8b..dfe90328 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -227,6 +227,10 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) scp = &get_cpu_local_var(cpu)->scp; } res = scp->response_va; + /* The current thread is the requester and any thread from + * the pool may serve the request */ + req->rtid = cpu_local_var(current)->tid; + req->ttid = 0; send_syscall(req, cpu, pid); @@ -281,6 +285,10 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) #define PAGER_RESUME_PAGE_FAULT 0x0101 req2.args[0] = PAGER_RESUME_PAGE_FAULT; req2.args[1] = error; + /* The current thread is the requester and only the waiting thread + * may serve the request */ + req2.rtid = cpu_local_var(current)->tid; + req2.ttid = res->stid; send_syscall(&req2, cpu, pid); }