/** * \file executer/kernel/control.c * License details are found in the file LICENSE. * \brief * kernel module control * \author Taku Shimosawa \par * Copyright (C) 2011 - 2012 Taku Shimosawa * \author Balazs Gerofi \par * Copyright (C) 2012 RIKEN AICS * \author Gou Nakamura \par * Copyright (C) 2012 - 2013 Hitachi, Ltd. * \author Tomoki Shirasawa \par * Copyright (C) 2012 - 2013 Hitachi, Ltd. * \author Balazs Gerofi \par * Copyright (C) 2013 The University of Tokyo */ /* * HISTORY: * 2013/09/02 shirasawa add terminate thread * 2013/08/19 shirasawa mcexec forward signal to MIC process * 2013/08/07 nakamura add page fault forwarding * 2013/07/05 shirasawa propagate error code for prepare image * 2013/07/02 shirasawa add error handling for prepare_process * 2013/04/17 nakamura add generic system call forwarding */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../../config.h" #include "mcctrl.h" //#define DEBUG #ifdef DEBUG #define dprintk printk #else #define dprintk(...) #endif #ifdef MCCTRL_KSYM_sys_unshare #if MCCTRL_KSYM_sys_unshare typedef int (*int_star_fn_ulong_t)(unsigned long); int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = (int_star_fn_ulong_t) MCCTRL_KSYM_sys_unshare; #else // exported int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = NULL; #endif #endif #ifdef MCCTRL_KSYM_sys_mount #if MCCTRL_KSYM_sys_mount typedef int (*int_star_fn_char_char_char_ulong_void_t)(char *, char *, char *, unsigned long, void *); int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = (int_star_fn_char_char_char_ulong_void_t) MCCTRL_KSYM_sys_mount; #else // exported int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount; #endif #endif #ifdef MCCTRL_KSYM_sys_umount #if MCCTRL_KSYM_sys_umount typedef int (*int_fn_char_star_int_t)(char *, int); int (*mcctrl_sys_umount)(char *dir_name, int flags) = (int_fn_char_star_int_t) MCCTRL_KSYM_sys_umount; #else // exported int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount; #endif #endif //static DECLARE_WAIT_QUEUE_HEAD(wq_prepare); //extern struct mcctrl_channel *channels; int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu); static long mcexec_prepare_image(ihk_os_t os, struct program_load_desc * __user udesc) { struct program_load_desc *desc, *pdesc; struct ikc_scd_packet isp; void *args, *envs; long ret = 0; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd = NULL; int num_sections; desc = kmalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { printk("%s: error: allocating program_load_desc\n", __FUNCTION__); return -ENOMEM; } if (copy_from_user(desc, udesc, sizeof(struct program_load_desc))) { printk("%s: error: copying program_load_desc\n", __FUNCTION__); kfree(desc); return -EFAULT; } num_sections = desc->num_sections; if (num_sections <= 0 || num_sections > 16) { printk("# of sections: %d\n", num_sections); return -EINVAL; } pdesc = kmalloc(sizeof(struct program_load_desc) + sizeof(struct program_image_section) * num_sections, GFP_KERNEL); memcpy(pdesc, desc, sizeof(struct program_load_desc)); if (copy_from_user(pdesc->sections, udesc->sections, sizeof(struct program_image_section) * num_sections)) { kfree(desc); kfree(pdesc); return -EFAULT; } kfree(desc); pdesc->pid = task_tgid_vnr(current); if (reserve_user_space(usrdata, &pdesc->user_start, &pdesc->user_end)) { kfree(pdesc); return -ENOMEM; } args = kmalloc(pdesc->args_len, GFP_KERNEL); if (copy_from_user(args, pdesc->args, pdesc->args_len)) { kfree(args); kfree(pdesc); return -EFAULT; } envs = kmalloc(pdesc->envs_len, GFP_KERNEL); if (copy_from_user(envs, pdesc->envs, pdesc->envs_len)) { ret = -EFAULT; goto free_out; } pdesc->args = (void*)virt_to_phys(args); dprintk("args: 0x%lX\n", (unsigned long)pdesc->args); dprintk("argc: %ld\n", *(long *)args); pdesc->envs = (void*)virt_to_phys(envs); dprintk("envs: 0x%lX\n", (unsigned long)pdesc->envs); dprintk("envc: %ld\n", *(long *)envs); isp.msg = SCD_MSG_PREPARE_PROCESS; isp.ref = pdesc->cpu; isp.arg = virt_to_phys(pdesc); dprintk("# of sections: %d\n", pdesc->num_sections); dprintk("%p (%lx)\n", pdesc, isp.arg); pdesc->status = 0; mcctrl_ikc_send(os, pdesc->cpu, &isp); while (wait_event_interruptible(usrdata->wq_prepare, pdesc->status) != 0); if(pdesc->err < 0){ ret = pdesc->err; goto free_out; } ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { printk("ERROR: no per process data for PID %d\n", task_tgid_vnr(current)); ret = -EINVAL; goto free_out; } /* Update rpgtable */ ppd->rpgtable = pdesc->rpgtable; if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * num_sections)) { ret = -EFAULT; goto free_out; } dprintk("%s: pid %d, rpgtable: 0x%lx added\n", __FUNCTION__, ppd->pid, ppd->rpgtable); ret = 0; free_out: kfree(args); kfree(pdesc); kfree(envs); return ret; } int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt) { struct remote_transfer pt; unsigned long phys, ret = 0; void *rpm; #if 0 unsigned long dma_status = 0; ihk_dma_channel_t channel; struct ihk_dma_request request; void *p; channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0); if (!channel) { return -EINVAL; } #endif if (copy_from_user(&pt, upt, sizeof(pt))) { return -EFAULT; } if (pt.size > PAGE_SIZE) { printk("mcexec_transfer_image(): ERROR: size exceeds PAGE_SIZE\n"); return -EFAULT; } phys = ihk_device_map_memory(ihk_os_to_dev(os), pt.rphys, PAGE_SIZE); #ifdef CONFIG_MIC rpm = ioremap_wc(phys, PAGE_SIZE); #else rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0); #endif if (pt.direction == MCEXEC_UP_TRANSFER_TO_REMOTE) { if (copy_from_user(rpm, pt.userp, pt.size)) { ret = -EFAULT; } } else if (pt.direction == MCEXEC_UP_TRANSFER_FROM_REMOTE) { if (copy_to_user(pt.userp, rpm, pt.size)) { ret = -EFAULT; } } else { printk("mcexec_transfer_image(): ERROR: invalid direction\n"); ret = -EINVAL; } #ifdef CONFIG_MIC iounmap(rpm); #else ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, PAGE_SIZE); #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); return ret; #if 0 p = (void *)__get_free_page(GFP_KERNEL); if (copy_from_user(p, pt.src, PAGE_SIZE)) { return -EFAULT; } memset(&request, 0, sizeof(request)); request.src_os = NULL; request.src_phys = virt_to_phys(p); request.dest_os = os; request.dest_phys = pt.dest; request.size = PAGE_SIZE; request.notify = (void *)virt_to_phys(&dma_status); request.priv = (void *)1; ihk_dma_request(channel, &request); while (!dma_status) { mb(); udelay(1); } free_page((unsigned long)p); return 0; #endif } //extern unsigned long last_thread_exec; struct handlerinfo { int pid; }; static long mcexec_debug_log(ihk_os_t os, unsigned long arg) { struct ikc_scd_packet isp; memset(&isp, '\0', sizeof isp); isp.msg = SCD_MSG_DEBUG_LOG; isp.arg = arg; mcctrl_ikc_send(os, 0, &isp); return 0; } static void release_handler(ihk_os_t os, void *param) { struct handlerinfo *info = param; struct ikc_scd_packet isp; int os_ind = ihk_host_os_get_index(os); memset(&isp, '\0', sizeof isp); isp.msg = SCD_MSG_CLEANUP_PROCESS; isp.pid = info->pid; mcctrl_ikc_send(os, 0, &isp); if(os_ind >= 0) delete_pid_entry(os_ind, info->pid); kfree(param); } static long mcexec_newprocess(ihk_os_t os, struct newprocess_desc *__user udesc, struct file *file) { struct newprocess_desc desc; struct handlerinfo *info; if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) { return -EFAULT; } info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL); info->pid = desc.pid; ihk_os_register_release_handler(file, release_handler, info); return 0; } static long mcexec_start_image(ihk_os_t os, struct program_load_desc * __user udesc, struct file *file) { struct program_load_desc *desc; struct ikc_scd_packet isp; struct mcctrl_channel *c; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct handlerinfo *info; desc = kmalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { printk("%s: error: allocating program_load_desc\n", __FUNCTION__); return -ENOMEM; } if (copy_from_user(desc, udesc, sizeof(struct program_load_desc))) { kfree(desc); return -EFAULT; } info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL); info->pid = desc->pid; ihk_os_register_release_handler(file, release_handler, info); c = usrdata->channels + desc->cpu; mcctrl_ikc_set_recv_cpu(os, desc->cpu); usrdata->last_thread_exec = desc->cpu; isp.msg = SCD_MSG_SCHEDULE_PROCESS; isp.ref = desc->cpu; isp.arg = desc->rprocess; mcctrl_ikc_send(os, desc->cpu, &isp); kfree(desc); return 0; } static DECLARE_WAIT_QUEUE_HEAD(signalq); static long mcexec_send_signal(ihk_os_t os, struct signal_desc *sigparam) { struct ikc_scd_packet isp; struct mcctrl_channel *c; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct signal_desc sig; struct mcctrl_signal msig[2]; struct mcctrl_signal *msigp; int rc; if (copy_from_user(&sig, sigparam, sizeof(struct signal_desc))) { return -EFAULT; } msigp = msig; if(((unsigned long)msig & 0xfffffffffffff000L) != ((unsigned long)(msig + 1) & 0xfffffffffffff000L)) msigp++; memset(msigp, '\0', sizeof msig); msigp->sig = sig.sig; msigp->pid = sig.pid; msigp->tid = sig.tid; memcpy(&msigp->info, &sig.info, 128); c = usrdata->channels; isp.msg = SCD_MSG_SEND_SIGNAL; isp.ref = sig.cpu; isp.pid = sig.pid; isp.arg = virt_to_phys(msigp); if((rc = mcctrl_ikc_send(os, sig.cpu, &isp)) < 0){ printk("mcexec_send_signal: mcctrl_ikc_send ret=%d\n", rc); return rc; } wait_event_interruptible(signalq, msigp->cond != 0); return 0; } void sig_done(unsigned long arg, int err) { struct mcctrl_signal *msigp; msigp = phys_to_virt(arg); msigp->cond = 1; wake_up_interruptible(&signalq); } static long mcexec_get_cpu(ihk_os_t os) { struct ihk_cpu_info *info; info = ihk_os_get_cpu_info(os); if (!info) { printk("Error: cannot retrieve CPU info.\n"); return -EINVAL; } if (info->n_cpus < 1) { printk("Error: # of cpu is invalid.\n"); return -EINVAL; } return info->n_cpus; } int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid, struct mcctrl_per_proc_data *ppd) { struct mcctrl_per_proc_data *ppd_iter; int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK); int ret = 0; unsigned long flags; /* Check if data for this thread exists and add if not */ write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags); list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) { if (ppd_iter->pid == pid) { ret = -EBUSY; goto out; } } list_add_tail(&ppd->hash, &ud->per_proc_data_hash[hash]); out: write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags); return ret; } int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid) { struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL; int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK); int ret = 0; unsigned long flags; write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags); list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) { if (ppd_iter->pid == pid) { ppd = ppd_iter; break; } } if (!ppd) { ret = -EINVAL; goto out; } list_del(&ppd->hash); out: write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags); return ret; } inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( struct mcctrl_usrdata *ud, int pid) { struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL; int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK); unsigned long flags; /* Check if data for this process exists and return it */ read_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags); list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) { if (ppd_iter->pid == pid) { ppd = ppd_iter; break; } } read_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags); return ppd; } /* * Called indirectly from the IKC message handler. */ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) { struct wait_queue_head_list_node *wqhln = NULL; struct wait_queue_head_list_node *wqhln_iter; struct wait_queue_head_list_node *wqhln_alloc = NULL; int pid = packet->pid; unsigned long flags; struct mcctrl_per_proc_data *ppd; /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(ud, pid); if (unlikely(!ppd)) { kprintf("%s: ERROR: no per-process structure for PID %d, " "syscall nr: %lu\n", __FUNCTION__, pid, packet->req.number); return -1; } dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n", __FUNCTION__, packet->req.rtid, packet->req.ttid, packet->req.number); /* * Three scenarios are possible: * - Find the designated thread if req->ttid is specified. * - Find any available thread if req->ttid is zero. * - Add a request element if no threads are available. */ flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* Is this a request for a specific thread? See if it's waiting */ if (unlikely(packet->req.ttid)) { list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) { if (packet->req.ttid != task_pid_vnr(wqhln_iter->task)) continue; wqhln = wqhln_iter; break; } if (!wqhln) { printk("%s: WARNING: no target thread found for exact request??\n", __FUNCTION__); } } /* Is there any thread available? */ else { list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { if (wqhln_iter->task && !wqhln_iter->req) { wqhln = wqhln_iter; break; } } } /* If no match found, add request to pending request list */ if (unlikely(!wqhln)) { retry_alloc: wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_ATOMIC); if (!wqhln_alloc) { printk("WARNING: coudln't alloc wait queue head, retrying..\n"); goto retry_alloc; } wqhln = wqhln_alloc; wqhln->req = 0; wqhln->task = NULL; init_waitqueue_head(&wqhln->wq_syscall); list_add_tail(&wqhln->list, &ppd->wq_req_list); } wqhln->packet = packet; wqhln->req = 1; wake_up(&wqhln->wq_syscall); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags); return 0; } /* * Called from an mcexec thread via ioctl(). */ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) { struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct wait_queue_head_list_node *wqhln = NULL; struct wait_queue_head_list_node *wqhln_iter; int ret = 0; unsigned long irqflags; struct mcctrl_per_proc_data *ppd; /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (unlikely(!ppd)) { kprintf("%s: ERROR: no per-process structure for PID %d??\n", __FUNCTION__, task_tgid_vnr(current)); return -EINVAL; } packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); if (packet) { printk("%s: ERROR: packet %p is already registered for thread %d\n", __FUNCTION__, packet, task_pid_vnr(current)); return -EBUSY; } retry: /* Prepare per-thread wait queue head or find a valid request */ irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); /* First see if there is a valid request already that is not yet taken */ list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) { if (wqhln_iter->task == NULL && wqhln_iter->req) { wqhln = wqhln_iter; wqhln->task = current; list_del(&wqhln->list); break; } } if (!wqhln) { retry_alloc: wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC); if (!wqhln) { printk("WARNING: coudln't alloc wait queue head, retrying..\n"); goto retry_alloc; } wqhln->task = current; wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); /* Wait for a request.. */ list_add(&wqhln->list, &ppd->wq_list); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); /* Remove per-thread wait queue head */ irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); } ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); if (ret && !wqhln->req) { kfree(wqhln); wqhln = NULL; return -EINTR; } packet = wqhln->packet; kfree(wqhln); wqhln = NULL; dprintk("%s: tid: %d request from CPU %d\n", __FUNCTION__, task_pid_vnr(current), packet->ref); mb(); if (!packet->req.valid) { printk("%s: ERROR: stray wakeup pid: %d, tid: %d: SC %lu\n", __FUNCTION__, task_tgid_vnr(current), task_pid_vnr(current), packet->req.number); ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, (usrdata->channels + packet->ref)->c); goto retry; } packet->req.valid = 0; /* ack */ dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, " "args[3]: %lu, args[4]: %lu, args[5]: %lu\n", __FUNCTION__, packet->req.number, packet->req.args[0], packet->req.args[1], packet->req.args[2], packet->req.args[3], packet->req.args[4], packet->req.args[5]); if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) { kprintf("%s: error adding per-thread data\n", __FUNCTION__); return -EINVAL; } if (__do_in_kernel_syscall(os, packet)) { if (copy_to_user(&req->sr, &packet->req, sizeof(struct syscall_request))) { if (mcctrl_delete_per_thread_data(ppd, current) < 0) { kprintf("%s: error deleting per-thread data\n", __FUNCTION__); return -EINVAL; } return -EFAULT; } return 0; } ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, (usrdata->channels + packet->ref)->c); if (mcctrl_delete_per_thread_data(ppd, current) < 0) { kprintf("%s: error deleting per-thread data\n", __FUNCTION__); return -EINVAL; } goto retry; } long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg) { struct prepare_dma_desc desc; int pin_shift = 16; int order; unsigned long a; if (copy_from_user(&desc, arg, sizeof(struct prepare_dma_desc))) { return -EFAULT; } order = pin_shift - PAGE_SHIFT; if(desc.size > 0){ order = get_order (desc.size); } a = __get_free_pages(GFP_KERNEL, order); if (!a) { return -ENOMEM; } a = virt_to_phys((void *)a); if (copy_to_user((void*)desc.pa, &a, sizeof(unsigned long))) { return -EFAULT; } return 0; } long mcexec_free_region(ihk_os_t os, unsigned long *__user arg) { struct free_dma_desc desc; int pin_shift = 16; int order; if (copy_from_user(&desc, arg, sizeof(struct free_dma_desc))) { return -EFAULT; } order = pin_shift - PAGE_SHIFT; if(desc.size > 0){ order = get_order (desc.size); } if(desc.pa > 0){ free_pages((unsigned long)phys_to_virt(desc.pa), order); } return 0; } long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg) { struct syscall_load_desc desc; unsigned long phys; void *rpm; if (copy_from_user(&desc, arg, sizeof(struct syscall_load_desc))) { return -EFAULT; } phys = ihk_device_map_memory(ihk_os_to_dev(os), desc.src, desc.size); #ifdef CONFIG_MIC rpm = ioremap_wc(phys, desc.size); #else rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, desc.size, NULL, 0); #endif dprintk("mcexec_load_syscall: %s (desc.size: %d)\n", rpm, desc.size); if (copy_to_user((void *__user)desc.dest, rpm, desc.size)) { return -EFAULT; } #ifdef CONFIG_MIC iounmap(rpm); #else ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, desc.size); #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size); return 0; } long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) { struct syscall_ret_desc ret; struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd; if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { return -EFAULT; } /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { kprintf("%s: ERROR: no per-process structure for PID %d??\n", __FUNCTION__, task_tgid_vnr(current)); return -EINVAL; } packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); if (!packet) { kprintf("%s: ERROR: no packet registered for TID %d\n", __FUNCTION__, task_pid_vnr(current)); return -EINVAL; } mcctrl_delete_per_thread_data(ppd, current); if (ret.size > 0) { /* Host => Accel. Write is fast. */ unsigned long phys; void *rpm; phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, ret.size); #ifdef CONFIG_MIC rpm = ioremap_wc(phys, ret.size); #else rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, ret.size, NULL, 0); #endif if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) { return -EFAULT; } #ifdef CONFIG_MIC iounmap(rpm); #else ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size); #endif ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size); } __return_syscall(os, packet, ret.ret, task_pid_vnr(current)); /* Free packet */ ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, (usrdata->channels + packet->ref)->c); return 0; } LIST_HEAD(mckernel_exec_files); DEFINE_SEMAPHORE(mckernel_exec_file_lock); struct mckernel_exec_file { ihk_os_t os; pid_t pid; struct file *fp; struct list_head list; }; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) #define GUIDVAL(x) (x) #else #define GUIDVAL(x) ((x).val) #endif int mcexec_getcred(unsigned long phys) { int *virt = phys_to_virt(phys); virt[0] = GUIDVAL(current_uid()); virt[1] = GUIDVAL(current_euid()); virt[2] = GUIDVAL(current_suid()); virt[3] = GUIDVAL(current_fsuid()); virt[4] = GUIDVAL(current_gid()); virt[5] = GUIDVAL(current_egid()); virt[6] = GUIDVAL(current_sgid()); virt[7] = GUIDVAL(current_fsgid()); return 0; } int mcexec_getcredv(int __user *virt) { int wk[8]; wk[0] = GUIDVAL(current_uid()); wk[1] = GUIDVAL(current_euid()); wk[2] = GUIDVAL(current_suid()); wk[3] = GUIDVAL(current_fsuid()); wk[4] = GUIDVAL(current_gid()); wk[5] = GUIDVAL(current_egid()); wk[6] = GUIDVAL(current_sgid()); wk[7] = GUIDVAL(current_fsgid()); if(copy_to_user(virt, wk, sizeof(int) * 8)) return -EFAULT; return 0; } int mcexec_open_exec(ihk_os_t os, char * __user filename) { struct file *file; struct mckernel_exec_file *mcef; struct mckernel_exec_file *mcef_iter; int retval; int os_ind = ihk_host_os_get_index(os); char *pathbuf, *fullpath; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd = NULL; int i; if (os_ind < 0) { return EINVAL; } ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { ppd = kmalloc(sizeof(*ppd), GFP_KERNEL); if (!ppd) { printk("ERROR: allocating per process data\n"); return -ENOMEM; } ppd->pid = task_tgid_vnr(current); /* * XXX: rpgtable will be updated in __do_in_kernel_syscall() * under case __NR_munmap */ INIT_LIST_HEAD(&ppd->wq_list); INIT_LIST_HEAD(&ppd->wq_req_list); INIT_LIST_HEAD(&ppd->wq_list_exact); spin_lock_init(&ppd->wq_list_lock); for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) { INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]); rwlock_init(&ppd->per_thread_data_hash_lock[i]); } if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) { printk("%s: error adding per process data\n", __FUNCTION__); retval = EINVAL; goto out_free_ppd; } } else { /* Only deallocate in case of an error if we added it above */ ppd = NULL; } pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { retval = ENOMEM; goto out_error_drop_ppd; } file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) { goto out_error_free; } fullpath = d_path(&file->f_path, pathbuf, PATH_MAX); if (IS_ERR(fullpath)) { retval = PTR_ERR(fullpath); goto out_error_free; } mcef = kmalloc(sizeof(*mcef), GFP_KERNEL); if (!mcef) { retval = ENOMEM; goto out_put_file; } down(&mckernel_exec_file_lock); /* Find previous file (if exists) and drop it */ list_for_each_entry(mcef_iter, &mckernel_exec_files, list) { if (mcef_iter->os == os && mcef_iter->pid == task_tgid_vnr(current)) { allow_write_access(mcef_iter->fp); fput(mcef_iter->fp); list_del(&mcef_iter->list); kfree(mcef_iter); break; } } /* Add new exec file to the list */ mcef->os = os; mcef->pid = task_tgid_vnr(current); mcef->fp = file; list_add_tail(&mcef->list, &mckernel_exec_files); /* Create /proc/self/exe entry */ add_pid_entry(os_ind, task_tgid_vnr(current)); proc_exe_link(os_ind, task_tgid_vnr(current), fullpath); up(&mckernel_exec_file_lock); dprintk("%d open_exec and holding file: %s\n", (int)task_tgid_vnr(current), filename); kfree(pathbuf); return 0; out_put_file: fput(file); out_error_free: kfree(pathbuf); out_error_drop_ppd: if (ppd) mcctrl_delete_per_proc_data(usrdata, ppd->pid); out_free_ppd: if (ppd) kfree(ppd); return -retval; } int mcexec_close_exec(ihk_os_t os) { struct mckernel_exec_file *mcef = NULL; int found = 0; int os_ind = ihk_host_os_get_index(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd = NULL; ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (ppd) { mcctrl_delete_per_proc_data(usrdata, ppd->pid); dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n", task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable); kfree(ppd); } else { printk("WARNING: no per process data for pid %d ?\n", task_tgid_vnr(current)); } if (os_ind < 0) { return EINVAL; } down(&mckernel_exec_file_lock); list_for_each_entry(mcef, &mckernel_exec_files, list) { if (mcef->os == os && mcef->pid == task_tgid_vnr(current)) { allow_write_access(mcef->fp); fput(mcef->fp); list_del(&mcef->list); kfree(mcef); found = 1; dprintk("%d close_exec dropped executable \n", (int)task_tgid_vnr(current)); break; } } up(&mckernel_exec_file_lock); return (found ? 0 : EINVAL); } long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __user arg) { struct strncpy_from_user_desc desc; void *buf; void *dest; void *src; unsigned long remain; long want; long copied; if (copy_from_user(&desc, arg, sizeof(desc))) { return -EFAULT; } buf = (void *)__get_free_page(GFP_KERNEL); if (!buf) { return -ENOMEM; } dest = desc.dest; src = desc.src; remain = desc.n; want = 0; copied = 0; while ((remain > 0) && (want == copied)) { want = (remain > PAGE_SIZE)? PAGE_SIZE: remain; copied = strncpy_from_user(buf, src, want); if (copied == want) { if (copy_to_user(dest, buf, copied)) { copied = -EFAULT; } } else if (copied >= 0) { if (copy_to_user(dest, buf, copied+1)) { copied = -EFAULT; } } dest += copied; src += copied; remain -= copied; } desc.result = (copied >= 0)? (desc.n - remain): copied; free_page((unsigned long)buf); if (copy_to_user(arg, &desc, sizeof(*arg))) { return -EFAULT; } return 0; } long mcexec_sys_mount(struct sys_mount_desc *__user arg) { struct sys_mount_desc desc; struct cred *promoted; const struct cred *original; int ret; if (copy_from_user(&desc, arg, sizeof(desc))) { return -EFAULT; } promoted = prepare_creds(); if (!promoted) { return -ENOMEM; } cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); #ifdef MCCTRL_KSYM_sys_mount ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type, desc.flags, desc.data); #else ret = -EFAULT; #endif revert_creds(original); put_cred(promoted); return ret; } long mcexec_sys_umount(struct sys_mount_desc *__user arg) { struct sys_umount_desc desc; struct cred *promoted; const struct cred *original; int ret; if (copy_from_user(&desc, arg, sizeof(desc))) { return -EFAULT; } promoted = prepare_creds(); if (!promoted) { return -ENOMEM; } cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); #ifdef MCCTRL_KSYM_sys_umount ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE); kprintf("%s: mcctrl_sys_umount: %d\n", __FUNCTION__, ret); #else ret = -EFAULT; kprintf("%s: mcctrl_sys_umount not defined?\n", __FUNCTION__); #endif revert_creds(original); put_cred(promoted); return ret; } long mcexec_sys_unshare(struct sys_unshare_desc *__user arg) { struct sys_unshare_desc desc; struct cred *promoted; const struct cred *original; int ret; if (copy_from_user(&desc, arg, sizeof(desc))) { return -EFAULT; } promoted = prepare_creds(); if (!promoted) { return -ENOMEM; } cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); #if MCCTRL_KSYM_sys_unshare ret = mcctrl_sys_unshare(desc.unshare_flags); #else ret = -EFAULT; #endif revert_creds(original); put_cred(promoted); return ret; } long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, struct file *file) { switch (req) { case MCEXEC_UP_PREPARE_IMAGE: return mcexec_prepare_image(os, (struct program_load_desc *)arg); case MCEXEC_UP_TRANSFER: return mcexec_transfer_image(os, (struct remote_transfer *)arg); case MCEXEC_UP_START_IMAGE: return mcexec_start_image(os, (struct program_load_desc *)arg, file); case MCEXEC_UP_WAIT_SYSCALL: return mcexec_wait_syscall(os, (struct syscall_wait_desc *)arg); case MCEXEC_UP_RET_SYSCALL: return mcexec_ret_syscall(os, (struct syscall_ret_desc *)arg); case MCEXEC_UP_LOAD_SYSCALL: return mcexec_load_syscall(os, (struct syscall_load_desc *)arg); case MCEXEC_UP_SEND_SIGNAL: return mcexec_send_signal(os, (struct signal_desc *)arg); case MCEXEC_UP_GET_CPU: return mcexec_get_cpu(os); case MCEXEC_UP_STRNCPY_FROM_USER: return mcexec_strncpy_from_user(os, (struct strncpy_from_user_desc *)arg); case MCEXEC_UP_NEW_PROCESS: return mcexec_newprocess(os, (struct newprocess_desc *)arg, file); case MCEXEC_UP_OPEN_EXEC: return mcexec_open_exec(os, (char *)arg); case MCEXEC_UP_CLOSE_EXEC: return mcexec_close_exec(os); case MCEXEC_UP_PREPARE_DMA: return mcexec_pin_region(os, (unsigned long *)arg); case MCEXEC_UP_FREE_DMA: return mcexec_free_region(os, (unsigned long *)arg); case MCEXEC_UP_GET_CRED: return mcexec_getcred((unsigned long)arg); case MCEXEC_UP_GET_CREDV: return mcexec_getcredv((int *)arg); case MCEXEC_UP_SYS_MOUNT: return mcexec_sys_mount((struct sys_mount_desc *)arg); case MCEXEC_UP_SYS_UMOUNT: return mcexec_sys_umount((struct sys_mount_desc *)arg); case MCEXEC_UP_SYS_UNSHARE: return mcexec_sys_unshare((struct sys_unshare_desc *)arg); case MCEXEC_UP_DEBUG_LOG: return mcexec_debug_log(os, arg); } return -EINVAL; } void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err) { struct program_load_desc *desc = phys_to_virt(arg); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); desc->err = err; desc->status = 1; wake_up_all(&usrdata->wq_prepare); }