Merge remote-tracking branch 'remotes/origin/ikc2'

Conflicts:
	executer/kernel/mcctrl/syscall.c
It is resolved.
This commit is contained in:
Ken Sato
2016-09-27 11:46:12 +09:00
29 changed files with 1709 additions and 1272 deletions

View File

@ -1054,9 +1054,8 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
unsigned long error = ((struct x86_user_context *)regs)->gpr.error; unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
irqflags = kprintf_lock(); irqflags = kprintf_lock();
dkprintf("[%d] Page fault for 0x%lX\n", __kprintf("Page fault for 0x%lx\n", address);
ihk_mc_get_processor_id(), address); __kprintf("%s for %s access in %s mode (reserved bit %s set), "
dkprintf("%s for %s access in %s mode (reserved bit %s set), "
"it %s an instruction fetch\n", "it %s an instruction fetch\n",
(error & PF_PROT ? "protection fault" : "no page found"), (error & PF_PROT ? "protection fault" : "no page found"),
(error & PF_WRITE ? "write" : "read"), (error & PF_WRITE ? "write" : "read"),
@ -1068,14 +1067,14 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
list_for_each_entry(range, &vm->vm_range_list, list) { list_for_each_entry(range, &vm->vm_range_list, list) {
if (range->start <= address && range->end > address) { if (range->start <= address && range->end > address) {
found = 1; found = 1;
dkprintf("address is in range, flag: 0x%X! \n", __kprintf("address is in range, flag: 0x%lx\n",
range->flag); range->flag);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
break; break;
} }
} }
if (!found) { if (!found) {
dkprintf("address is out of range! \n"); __kprintf("address is out of range! \n");
} }
kprintf_unlock(irqflags); kprintf_unlock(irqflags);

View File

@ -318,5 +318,5 @@ extern unsigned long ap_trampoline;
#define AP_TRAMPOLINE_SIZE 0x2000 #define AP_TRAMPOLINE_SIZE 0x2000
/* Local is cachable */ /* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE) #define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#endif #endif

View File

@ -23,6 +23,7 @@
#include <process.h> #include <process.h>
#include <page.h> #include <page.h>
#include <cls.h> #include <cls.h>
#include <kmalloc.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__) #define ekprintf(...) kprintf(__VA_ARGS__)
@ -84,20 +85,22 @@ void ihk_mc_free_pages(void *p, int npages)
pa_ops->free_page(p, npages); pa_ops->free_page(p, npages);
} }
void *ihk_mc_allocate(int size, enum ihk_mc_ap_flag flag) void *ihk_mc_allocate(int size, int flag)
{ {
if (pa_ops && pa_ops->alloc) if (!cpu_local_var(kmalloc_initialized)) {
return pa_ops->alloc(size, flag); kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__);
else return NULL;
return ihk_mc_alloc_pages(1, flag); }
return kmalloc(size, IHK_MC_AP_NOWAIT);
} }
void ihk_mc_free(void *p) void ihk_mc_free(void *p)
{ {
if (pa_ops && pa_ops->free) if (!cpu_local_var(kmalloc_initialized)) {
return pa_ops->free(p); kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__);
else return;
return ihk_mc_free_pages(p, 1); }
kfree(p);
} }
void *get_last_early_heap(void) void *get_last_early_heap(void)
@ -1111,6 +1114,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
if (!(old & PFL1_FILEOFF) && args->free_physical) { if (!(old & PFL1_FILEOFF) && args->free_physical) {
if (page && page_unmap(page)) { if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), 1); ihk_mc_free_pages(phys_to_virt(phys), 1);
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
} }
args->vm->currss -= PTL1_SIZE; args->vm->currss -= PTL1_SIZE;
} }
@ -1159,6 +1163,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if (!(old & PFL2_FILEOFF) && args->free_physical) { if (!(old & PFL2_FILEOFF) && args->free_physical) {
if (page && page_unmap(page)) { if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE); ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE);
dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base);
} }
args->vm->currss -= PTL2_SIZE; args->vm->currss -= PTL2_SIZE;
} }
@ -2273,6 +2278,9 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
reason = PF_USER; /* page not present */ reason = PF_USER; /* page not present */
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) { for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
if (!addr)
return -EINVAL;
error = page_fault_process_vm(vm, (void *)addr, reason); error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) { if (error) {
kprintf("%s: error: PF for %p failed\n", __FUNCTION__, addr); kprintf("%s: error: PF for %p failed\n", __FUNCTION__, addr);

View File

@ -38,7 +38,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
arch_master_channel_packet_handler = packet_handler; arch_master_channel_packet_handler = packet_handler;
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq, ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
ihk_ikc_master_channel_packet_handler); ihk_ikc_master_channel_packet_handler, channel);
ihk_ikc_enable_channel(channel); ihk_ikc_enable_channel(channel);
/* Set boot parameter */ /* Set boot parameter */

View File

@ -105,7 +105,7 @@ static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value); wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value);
//kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0); //kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value); //kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
return 0; return 0;
} }

View File

@ -293,7 +293,7 @@ SYSCALL_DECLARE(rt_sigreturn)
extern struct cpu_local_var *clv; extern struct cpu_local_var *clv;
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern void interrupt_syscall(int all, int pid); extern void interrupt_syscall(int pid, int tid);
extern int num_processors; extern int num_processors;
#define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \ #define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \
@ -1290,7 +1290,7 @@ done:
cpu_restore_interrupt(irqstate); cpu_restore_interrupt(irqstate);
if (doint && !(mask & tthread->sigmask.__val[0])) { if (doint && !(mask & tthread->sigmask.__val[0])) {
int cpuid = tthread->cpu_id; int tid = tthread->tid;
int pid = tproc->pid; int pid = tproc->pid;
int status = tthread->status; int status = tthread->status;
@ -1301,7 +1301,7 @@ done:
} }
if(!tthread->proc->nohost) if(!tthread->proc->nohost)
interrupt_syscall(pid, cpuid); interrupt_syscall(pid, tid);
if (status != PS_RUNNING) { if (status != PS_RUNNING) {
if(sig == SIGKILL){ if(sig == SIGKILL){
@ -1437,9 +1437,8 @@ SYSCALL_DECLARE(mmap)
goto out; goto out;
} }
if ((addr < region->user_start) if ((flags & MAP_FIXED) && ((addr < region->user_start)
|| (region->user_end <= addr) || (region->user_end <= addr))) {
|| ((region->user_end - addr) < len)) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):ENOMEM\n", ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):ENOMEM\n",
addr0, len0, prot, flags0, fd, off0); addr0, len0, prot, flags0, fd, off0);
error = -ENOMEM; error = -ENOMEM;

View File

@ -21,10 +21,12 @@ cpus=""
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit 0; fi if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit 0; fi
# Destroy all LWK instances # Destroy all LWK instances
if ls /dev/mcos* 1>/dev/null 2>&1; then
for i in /dev/mcos*; do for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`; ind=`echo $i|cut -c10-`;
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed" >&2; exit 1; fi if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed" >&2; exit 1; fi
done done
fi
# Query IHK-SMP resources and release them # Query IHK-SMP resources and release them
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi

View File

@ -110,6 +110,13 @@ struct program_load_desc {
}; };
struct syscall_request { struct syscall_request {
/* TID of requesting thread */
int rtid;
/*
* TID of target thread. Remote page fault response needs to designate the
* thread that must serve the request, 0 indicates any thread from the pool
*/
int ttid;
unsigned long valid; unsigned long valid;
unsigned long number; unsigned long number;
unsigned long args[6]; unsigned long args[6];
@ -128,8 +135,17 @@ struct syscall_load_desc {
unsigned long size; unsigned long size;
}; };
#define IHK_SCD_REQ_THREAD_SPINNING 0
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
struct syscall_response { struct syscall_response {
/* TID of the thread that requested the service */
int ttid;
/* TID of the mcexec thread that is serving or has served the request */
int stid;
unsigned long status; unsigned long status;
unsigned long req_thread_status;
long ret; long ret;
unsigned long fault_address; unsigned long fault_address;
unsigned long fault_reason; unsigned long fault_reason;

View File

@ -33,6 +33,7 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/version.h> #include <linux/version.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
#include <linux/interrupt.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/delay.h> #include <asm/delay.h>
#include <asm/io.h> #include <asm/io.h>
@ -81,7 +82,6 @@ static long mcexec_prepare_image(ihk_os_t os,
void *args, *envs; void *args, *envs;
long ret = 0; long ret = 0;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
struct mcctrl_per_proc_data *ppd = NULL; struct mcctrl_per_proc_data *ppd = NULL;
if (copy_from_user(&desc, udesc, if (copy_from_user(&desc, udesc,
@ -124,52 +124,48 @@ static long mcexec_prepare_image(ihk_os_t os,
} }
pdesc->args = (void*)virt_to_phys(args); pdesc->args = (void*)virt_to_phys(args);
printk("args: 0x%lX\n", (unsigned long)pdesc->args); dprintk("args: 0x%lX\n", (unsigned long)pdesc->args);
printk("argc: %ld\n", *(long *)args); dprintk("argc: %ld\n", *(long *)args);
pdesc->envs = (void*)virt_to_phys(envs); pdesc->envs = (void*)virt_to_phys(envs);
printk("envs: 0x%lX\n", (unsigned long)pdesc->envs); dprintk("envs: 0x%lX\n", (unsigned long)pdesc->envs);
printk("envc: %ld\n", *(long *)envs); dprintk("envc: %ld\n", *(long *)envs);
isp.msg = SCD_MSG_PREPARE_PROCESS; isp.msg = SCD_MSG_PREPARE_PROCESS;
isp.ref = pdesc->cpu; isp.ref = pdesc->cpu;
isp.arg = virt_to_phys(pdesc); isp.arg = virt_to_phys(pdesc);
printk("# of sections: %d\n", pdesc->num_sections); dprintk("# of sections: %d\n", pdesc->num_sections);
printk("%p (%lx)\n", pdesc, isp.arg); dprintk("%p (%lx)\n", pdesc, isp.arg);
pdesc->status = 0; pdesc->status = 0;
mcctrl_ikc_send(os, pdesc->cpu, &isp); mcctrl_ikc_send(os, pdesc->cpu, &isp);
wait_event_interruptible(usrdata->wq_prepare, pdesc->status); while (wait_event_interruptible(usrdata->wq_prepare, pdesc->status) != 0);
if(pdesc->err < 0){ if(pdesc->err < 0){
ret = pdesc->err; ret = pdesc->err;
goto free_out; goto free_out;
} }
ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) { if (!ppd) {
printk("ERROR: allocating per process data\n"); printk("ERROR: no per process data for PID %d\n", task_tgid_vnr(current));
ret = -ENOMEM; ret = -EINVAL;
goto free_out; goto free_out;
} }
ppd->pid = pdesc->pid; /* Update rpgtable */
ppd->rpgtable = pdesc->rpgtable; ppd->rpgtable = pdesc->rpgtable;
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
list_add_tail(&ppd->list, &usrdata->per_proc_list);
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
dprintk("pid %d, rpgtable: 0x%lx added\n",
ppd->pid, ppd->rpgtable);
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
sizeof(struct program_image_section) * desc.num_sections)) { sizeof(struct program_image_section) * desc.num_sections)) {
ret = -EFAULT; ret = -EFAULT;
goto free_out; goto free_out;
} }
dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
__FUNCTION__, ppd->pid, ppd->rpgtable);
ret = 0; ret = 0;
free_out: free_out:
@ -417,19 +413,200 @@ static long mcexec_get_cpu(ihk_os_t os)
return info->n_cpus; return info->n_cpus;
} }
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg) int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
struct mcctrl_per_proc_data *ppd)
{
struct mcctrl_per_proc_data *ppd_iter;
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
int ret = 0;
unsigned long flags;
/* Check if data for this thread exists and add if not */
write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
if (ppd_iter->pid == pid) {
ret = -EBUSY;
goto out;
}
}
list_add_tail(&ppd->hash, &ud->per_proc_data_hash[hash]);
out:
write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
return ret;
}
int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid)
{
struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL;
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
int ret = 0;
unsigned long flags;
write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
if (ppd_iter->pid == pid) {
ppd = ppd_iter;
break;
}
}
if (!ppd) {
ret = -EINVAL;
goto out;
}
list_del(&ppd->hash);
out:
write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
return ret;
}
inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
struct mcctrl_usrdata *ud, int pid)
{
struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL;
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
unsigned long flags;
/* Check if data for this process exists and return it */
read_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
if (ppd_iter->pid == pid) {
ppd = ppd_iter;
break;
}
}
read_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
return ppd;
}
/*
* Called indirectly from the IKC message handler.
*/
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
{ {
struct wait_queue_head_list_node *wqhln = NULL; struct wait_queue_head_list_node *wqhln = NULL;
struct wait_queue_head_list_node *wqhln_iter; struct wait_queue_head_list_node *wqhln_iter;
struct wait_queue_head_list_node *wqhln_alloc = NULL;
int pid = packet->pid;
unsigned long flags; unsigned long flags;
struct mcctrl_per_proc_data *ppd;
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(ud, pid);
if (unlikely(!ppd)) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return 0;
}
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n",
__FUNCTION__,
packet->req.rtid,
packet->req.ttid,
packet->req.number);
/*
* Three scenarios are possible:
* - Find the designated thread if req->ttid is specified.
* - Find any available thread if req->ttid is zero.
* - Add a request element if no threads are available.
*/
flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* Is this a request for a specific thread? See if it's waiting */
if (unlikely(packet->req.ttid)) {
list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) {
if (packet->req.ttid != task_pid_vnr(wqhln_iter->task))
continue;
/* Look up per-process wait queue head with pid */
flags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
if (wqhln_iter->pid == pid) {
wqhln = wqhln_iter; wqhln = wqhln_iter;
break; break;
} }
if (!wqhln) {
printk("%s: WARNING: no target thread found for exact request??\n",
__FUNCTION__);
}
}
/* Is there any thread available? */
else {
list_for_each_entry(wqhln_iter, &ppd->wq_list, list) {
if (wqhln_iter->task && !wqhln_iter->req) {
wqhln = wqhln_iter;
break;
}
}
}
/* If no match found, add request to pending request list */
if (unlikely(!wqhln)) {
retry_alloc:
wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
if (!wqhln_alloc) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
wqhln = wqhln_alloc;
wqhln->req = 0;
wqhln->task = NULL;
init_waitqueue_head(&wqhln->wq_syscall);
list_add_tail(&wqhln->list, &ppd->wq_req_list);
}
wqhln->packet = packet;
wqhln->req = 1;
wake_up(&wqhln->wq_syscall);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags);
return 0;
}
/*
* Called from an mcexec thread via ioctl().
*/
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
{
struct ikc_scd_packet *packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct wait_queue_head_list_node *wqhln = NULL;
struct wait_queue_head_list_node *wqhln_iter;
int ret = 0;
unsigned long irqflags;
struct mcctrl_per_proc_data *ppd;
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (unlikely(!ppd)) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
}
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
if (packet) {
printk("%s: ERROR: packet %p is already registered for thread %d\n",
__FUNCTION__, packet, task_pid_vnr(current));
return -EBUSY;
}
retry:
/* Prepare per-thread wait queue head or find a valid request */
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* First see if there is a valid request already that is not yet taken */
list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) {
if (wqhln_iter->task == NULL && wqhln_iter->req) {
wqhln = wqhln_iter;
wqhln->task = current;
list_del(&wqhln->list);
break;
}
} }
if (!wqhln) { if (!wqhln) {
@ -440,180 +617,86 @@ retry_alloc:
goto retry_alloc; goto retry_alloc;
} }
wqhln->pid = pid; wqhln->task = current;
wqhln->req = 0; wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall); init_waitqueue_head(&wqhln->wq_syscall);
list_add_tail(&wqhln->list, &c->wq_list);
/* Wait for a request.. */
list_add(&wqhln->list, &ppd->wq_list);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* Remove per-thread wait queue head */
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
list_del(&wqhln->list);
} }
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
wqhln->req = 1;
wake_up(&wqhln->wq_syscall);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
return 0;
}
#ifndef DO_USER_MODE
// static int remaining_job, base_cpu, job_pos;
#endif
// extern int num_channels;
// extern int mcctrl_dma_abort;
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
{
struct syscall_wait_desc swd;
struct mcctrl_channel *c;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct wait_queue_head_list_node *wqhln;
struct wait_queue_head_list_node *wqhln_iter;
int ret = 0;
unsigned long irqflags;
#ifndef DO_USER_MODE
unsigned long s, w, d;
#endif
//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu));
if (copy_from_user(&swd, req, sizeof(swd))) {
return -EFAULT;
}
if (swd.cpu >= usrdata->num_channels)
return -EINVAL;
c = get_peer_channel(usrdata, current);
if (c) {
printk("mcexec_wait_syscall:already registered. task %p ch %p\n",
current, c);
return -EBUSY;
}
c = usrdata->channels + swd.cpu;
#ifdef DO_USER_MODE
retry:
/* Prepare per-process wait queue head */
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
wqhln->pid = swd.pid;
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
/* First see if there is one wait queue already */
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
if (wqhln_iter->pid == task_tgid_vnr(current)) {
kfree(wqhln);
wqhln = wqhln_iter;
list_del(&wqhln->list);
break;
}
}
list_add_tail(&wqhln->list, &c->wq_list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* Remove per-process wait queue head */
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
if (ret && !wqhln->req) { if (ret && !wqhln->req) {
kfree(wqhln); kfree(wqhln);
wqhln = NULL;
return -EINTR; return -EINTR;
} }
packet = wqhln->packet;
kfree(wqhln); kfree(wqhln);
wqhln = NULL;
if (c->param.request_va->number == 61 && dprintk("%s: tid: %d request from CPU %d\n",
c->param.request_va->args[0] == swd.pid) { __FUNCTION__, task_pid_vnr(current), packet->ref);
dprintk("pid: %d, tid: %d: SC %d, swd.cpu: %d, WARNING: wait4() for self?\n",
task_tgid_vnr(current),
task_pid_vnr(current);
c->param.request_va->number,
swd.cpu);
return -EINTR;
}
#if 1
mb(); mb();
if (!c->param.request_va->valid) { if (!packet->req.valid) {
printk("mcexec_wait_syscall:stray wakeup\n"); printk("%s: ERROR: stray wakeup pid: %d, tid: %d: SC %lu\n",
__FUNCTION__,
task_tgid_vnr(current),
task_pid_vnr(current),
packet->req.number);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
(usrdata->channels + packet->ref)->c);
goto retry; goto retry;
} }
#endif
#else
while (1) {
c = usrdata->channels + swd.cpu;
ihk_get_tsc(s);
if (!usrdata->remaining_job) {
while (!(*c->param.doorbell_va)) {
mb();
cpu_relax();
ihk_get_tsc(w);
if (w > s + 1024UL * 1024 * 1024 * 10) {
return -EINTR;
}
}
d = (*c->param.doorbell_va) - 1;
*c->param.doorbell_va = 0;
if (d < 0 || d >= usrdata->num_channels) { packet->req.valid = 0; /* ack */
d = 0; dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
} "args[3]: %lu, args[4]: %lu, args[5]: %lu\n",
usrdata->base_cpu = d; __FUNCTION__,
usrdata->job_pos = 0; packet->req.number,
usrdata->remaining_job = 1; packet->req.args[0],
} else { packet->req.args[1],
usrdata->job_pos++; packet->req.args[2],
} packet->req.args[3],
packet->req.args[4],
for (; usrdata->job_pos < usrdata->num_channels; usrdata->job_pos++) { packet->req.args[5]);
if (base_cpu + job_pos >= num_channels) {
c = usrdata->channels + if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) {
(usrdata->base_cpu + usrdata->job_pos - usrdata->num_channels); kprintf("%s: error adding per-thread data\n", __FUNCTION__);
} else { return -EINVAL;
c = usrdata->channels + usrdata->base_cpu + usrdata->job_pos;
}
if (!c) {
continue;
}
if (c->param.request_va &&
c->param.request_va->valid) {
#endif
c->param.request_va->valid = 0; /* ack */
dprintk("SC #%lx, %lx\n",
c->param.request_va->number,
c->param.request_va->args[0]);
register_peer_channel(usrdata, current, c);
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
if (copy_to_user(&req->sr, c->param.request_va,
sizeof(struct syscall_request))) {
deregister_peer_channel(usrdata, current, c);
return -EFAULT;
}
return 0;
}
deregister_peer_channel(usrdata, current, c);
#ifdef DO_USER_MODE
goto retry;
#endif
#ifndef DO_USER_MODE
if (usrdata->mcctrl_dma_abort) {
return -2;
}
}
}
usrdata->remaining_job = 0;
} }
#endif
return 0; if (__do_in_kernel_syscall(os, packet)) {
if (copy_to_user(&req->sr, &packet->req,
sizeof(struct syscall_request))) {
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
return -EINVAL;
}
return -EFAULT;
}
return 0;
}
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
(usrdata->channels + packet->ref)->c);
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
return -EINVAL;
}
goto retry;
} }
long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg) long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg)
@ -696,33 +779,6 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
#endif #endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size); ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size);
/*
ihk_dma_channel_t channel;
struct ihk_dma_request request;
unsigned long dma_status = 0;
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
if (!channel) {
return -EINVAL;
}
memset(&request, 0, sizeof(request));
request.src_os = os;
request.src_phys = desc.src;
request.dest_os = NULL;
request.dest_phys = desc.dest;
request.size = desc.size;
request.notify = (void *)virt_to_phys(&dma_status);
request.priv = (void *)1;
ihk_dma_request(channel, &request);
while (!dma_status) {
mb();
udelay(1);
}
*/
return 0; return 0;
} }
@ -730,74 +786,60 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
{ {
struct syscall_ret_desc ret; struct syscall_ret_desc ret;
struct mcctrl_channel *mc; struct ikc_scd_packet *packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
#if 0 struct mcctrl_per_proc_data *ppd;
ihk_dma_channel_t channel;
struct ihk_dma_request request;
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
if (!channel) {
return -EINVAL;
}
#endif
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
return -EFAULT; return -EFAULT;
} }
mc = usrdata->channels + ret.cpu;
if (!mc) { /* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
__FUNCTION__, task_tgid_vnr(current));
return -EINVAL; return -EINVAL;
} }
deregister_peer_channel(usrdata, current, mc);
mc->param.response_va->ret = ret.ret; packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
if (!packet) {
kprintf("%s: ERROR: no packet registered for TID %d\n",
__FUNCTION__, task_pid_vnr(current));
return -EINVAL;
}
mcctrl_delete_per_thread_data(ppd, current);
if (ret.size > 0) { if (ret.size > 0) {
/* Host => Accel. Write is fast. */ /* Host => Accel. Write is fast. */
unsigned long phys; unsigned long phys;
void *rpm; void *rpm;
phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, ret.size);
ret.size);
#ifdef CONFIG_MIC #ifdef CONFIG_MIC
rpm = ioremap_wc(phys, ret.size); rpm = ioremap_wc(phys, ret.size);
#else #else
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
ret.size, NULL, 0); ret.size, NULL, 0);
#endif #endif
if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) { if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) {
return -EFAULT; return -EFAULT;
} }
mb();
mc->param.response_va->status = 1;
#ifdef CONFIG_MIC #ifdef CONFIG_MIC
iounmap(rpm); iounmap(rpm);
#else #else
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size); ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size);
#endif #endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size); ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size);
}
/* __return_syscall(os, packet, ret.ret, task_pid_vnr(current));
memset(&request, 0, sizeof(request));
request.src_os = NULL; /* Free packet */
request.src_phys = ret.src; ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
request.dest_os = os; (usrdata->channels + packet->ref)->c);
request.dest_phys = ret.dest;
request.size = ret.size;
request.notify_os = os;
request.notify = (void *)mc->param.response_rpa;
request.priv = (void *)1;
ihk_dma_request(channel, &request);
*/
} else {
mb();
mc->param.response_va->status = 1;
}
return 0; return 0;
} }
@ -862,14 +904,53 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
int retval; int retval;
int os_ind = ihk_host_os_get_index(os); int os_ind = ihk_host_os_get_index(os);
char *pathbuf, *fullpath; char *pathbuf, *fullpath;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd = NULL;
int i;
if (os_ind < 0) { if (os_ind < 0) {
return EINVAL; return EINVAL;
} }
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
ppd = kmalloc(sizeof(*ppd), GFP_KERNEL);
if (!ppd) {
printk("ERROR: allocating per process data\n");
return -ENOMEM;
}
ppd->pid = task_tgid_vnr(current);
/*
* XXX: rpgtable will be updated in __do_in_kernel_syscall()
* under case __NR_munmap
*/
INIT_LIST_HEAD(&ppd->wq_list);
INIT_LIST_HEAD(&ppd->wq_req_list);
INIT_LIST_HEAD(&ppd->wq_list_exact);
spin_lock_init(&ppd->wq_list_lock);
for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]);
rwlock_init(&ppd->per_thread_data_hash_lock[i]);
}
if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) {
printk("%s: error adding per process data\n", __FUNCTION__);
retval = EINVAL;
goto out_free_ppd;
}
}
else {
/* Only deallocate in case of an error if we added it above */
ppd = NULL;
}
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
if (!pathbuf) { if (!pathbuf) {
return ENOMEM; retval = ENOMEM;
goto out_error_drop_ppd;
} }
file = open_exec(filename); file = open_exec(filename);
@ -901,7 +982,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
break; break;
} }
} }
/* Add new exec file to the list */ /* Add new exec file to the list */
mcef->os = os; mcef->os = os;
mcef->pid = task_tgid_vnr(current); mcef->pid = task_tgid_vnr(current);
@ -918,12 +999,15 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
kfree(pathbuf); kfree(pathbuf);
return 0; return 0;
out_put_file: out_put_file:
fput(file); fput(file);
out_error_free: out_error_free:
kfree(pathbuf); kfree(pathbuf);
out_error_drop_ppd:
if (ppd) mcctrl_delete_per_proc_data(usrdata, ppd->pid);
out_free_ppd:
if (ppd) kfree(ppd);
return -retval; return -retval;
} }
@ -933,6 +1017,23 @@ int mcexec_close_exec(ihk_os_t os)
struct mckernel_exec_file *mcef = NULL; struct mckernel_exec_file *mcef = NULL;
int found = 0; int found = 0;
int os_ind = ihk_host_os_get_index(os); int os_ind = ihk_host_os_get_index(os);
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd = NULL;
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (ppd) {
mcctrl_delete_per_proc_data(usrdata, ppd->pid);
dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n",
task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable);
kfree(ppd);
}
else {
printk("WARNING: no per process data for pid %d ?\n",
task_tgid_vnr(current));
}
if (os_ind < 0) { if (os_ind < 0) {
return EINVAL; return EINVAL;

View File

@ -27,6 +27,7 @@
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/interrupt.h>
#include "mcctrl.h" #include "mcctrl.h"
#ifdef ATTACHED_MIC #ifdef ATTACHED_MIC
#include <sysdeps/mic/mic/micconst.h> #include <sysdeps/mic/mic/micconst.h>
@ -40,16 +41,18 @@
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err); void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg); int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
void sig_done(unsigned long arg, int err); void sig_done(unsigned long arg, int err);
/* XXX: this runs in atomic context! */
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
void *__packet, void *__os) void *__packet, void *__os)
{ {
struct ikc_scd_packet *pisp = __packet; struct ikc_scd_packet *pisp = __packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os);
int msg = pisp->msg;
switch (pisp->msg) { switch (msg) {
case SCD_MSG_INIT_CHANNEL: case SCD_MSG_INIT_CHANNEL:
mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c); mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c);
break; break;
@ -63,7 +66,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break; break;
case SCD_MSG_SYSCALL_ONESIDE: case SCD_MSG_SYSCALL_ONESIDE:
mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg); mcexec_syscall(usrdata, pisp);
break; break;
case SCD_MSG_PROCFS_ANSWER: case SCD_MSG_PROCFS_ANSWER:
@ -88,11 +91,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break; break;
case SCD_MSG_PROCFS_TID_CREATE: case SCD_MSG_PROCFS_TID_CREATE:
add_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg);
break;
case SCD_MSG_PROCFS_TID_DELETE: case SCD_MSG_PROCFS_TID_DELETE:
delete_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg); procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg);
break; break;
case SCD_MSG_GET_VDSO_INFO: case SCD_MSG_GET_VDSO_INFO:
@ -110,6 +110,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pisp->err, pisp->arg); pisp->err, pisp->arg);
break; break;
} }
/*
* SCD_MSG_SYSCALL_ONESIDE holds the packet and frees is it
* mcexec_ret_syscall(), for the rest, free it here.
*/
if (msg != SCD_MSG_SYSCALL_ONESIDE) {
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)__packet, c);
}
return 0; return 0;
} }
@ -146,8 +154,6 @@ int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu)
ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c, ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c,
ihk_ikc_get_processor_id()); ihk_ikc_get_processor_id());
kprintf("Setting the target to %d\n",
ihk_ikc_get_processor_id());
return 0; return 0;
} }
@ -193,12 +199,13 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih
#endif #endif
pmc->param.request_va = pmc->param.request_va =
(void *)__get_free_pages(GFP_KERNEL, (void *)__get_free_pages(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL,
REQUEST_SHIFT - PAGE_SHIFT); REQUEST_SHIFT - PAGE_SHIFT);
pmc->param.request_pa = virt_to_phys(pmc->param.request_va); pmc->param.request_pa = virt_to_phys(pmc->param.request_va);
pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va; pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va;
pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa; pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa;
pmc->param.post_va = (void *)__get_free_page(GFP_KERNEL); pmc->param.post_va = (void *)__get_free_page(in_interrupt() ?
GFP_ATOMIC : GFP_KERNEL);
pmc->param.post_pa = virt_to_phys(pmc->param.post_va); pmc->param.post_pa = virt_to_phys(pmc->param.post_va);
memset(pmc->param.doorbell_va, 0, PAGE_SIZE); memset(pmc->param.doorbell_va, 0, PAGE_SIZE);
memset(pmc->param.request_va, 0, PAGE_SIZE); memset(pmc->param.request_va, 0, PAGE_SIZE);
@ -218,8 +225,9 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih
PAGE_SIZE, NULL, 0); PAGE_SIZE, NULL, 0);
#endif #endif
pmc->dma_buf = (void *)__get_free_pages(GFP_KERNEL, pmc->dma_buf = (void *)__get_free_pages(in_interrupt() ?
DMA_PIN_SHIFT - PAGE_SHIFT); GFP_ATOMIC : GFP_KERNEL,
DMA_PIN_SHIFT - PAGE_SHIFT);
rpm->request_page = pmc->param.request_pa; rpm->request_page = pmc->param.request_pa;
rpm->doorbell_page = pmc->param.doorbell_pa; rpm->doorbell_page = pmc->param.doorbell_pa;
@ -265,9 +273,6 @@ static int connect_handler(struct ihk_ikc_channel_info *param)
} }
param->packet_handler = syscall_packet_handler; param->packet_handler = syscall_packet_handler;
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
usrdata->channels[cpu].c = c; usrdata->channels[cpu].c = c;
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c); kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
@ -286,9 +291,6 @@ static int connect_handler2(struct ihk_ikc_channel_info *param)
param->packet_handler = syscall_packet_handler; param->packet_handler = syscall_packet_handler;
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
usrdata->channels[cpu].c = c; usrdata->channels[cpu].c = c;
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c); kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
@ -315,7 +317,7 @@ int prepare_ikc_channels(ihk_os_t os)
{ {
struct ihk_cpu_info *info; struct ihk_cpu_info *info;
struct mcctrl_usrdata *usrdata; struct mcctrl_usrdata *usrdata;
int error; int i;
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL); usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL); usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
@ -347,17 +349,14 @@ int prepare_ikc_channels(ihk_os_t os)
memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2); memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2);
ihk_ikc_listen_port(os, &usrdata->listen_param2); ihk_ikc_listen_port(os, &usrdata->listen_param2);
INIT_LIST_HEAD(&usrdata->per_proc_list); for (i = 0; i < MCCTRL_PER_PROC_DATA_HASH_SIZE; ++i) {
spin_lock_init(&usrdata->per_proc_list_lock); INIT_LIST_HEAD(&usrdata->per_proc_data_hash[i]);
rwlock_init(&usrdata->per_proc_data_hash_lock[i]);
}
INIT_LIST_HEAD(&usrdata->cpu_topology_list); INIT_LIST_HEAD(&usrdata->cpu_topology_list);
INIT_LIST_HEAD(&usrdata->node_topology_list); INIT_LIST_HEAD(&usrdata->node_topology_list);
error = init_peer_channel_registry(usrdata);
if (error) {
return error;
}
return 0; return 0;
} }
@ -396,7 +395,6 @@ void destroy_ikc_channels(ihk_os_t os)
} }
free_page((unsigned long)usrdata->mcctrl_doorbell_va); free_page((unsigned long)usrdata->mcctrl_doorbell_va);
destroy_peer_channel_registry(usrdata);
kfree(usrdata->channels); kfree(usrdata->channels);
kfree(usrdata); kfree(usrdata);
} }

View File

@ -41,6 +41,7 @@
#include <ikc/master.h> #include <ikc/master.h>
#include <ihk/msr.h> #include <ihk/msr.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
#include <linux/rwlock.h>
#include <linux/threads.h> #include <linux/threads.h>
#include "sysfs.h" #include "sysfs.h"
@ -48,6 +49,7 @@
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7 #define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3 #define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
#define SCD_MSG_INIT_CHANNEL 0x5 #define SCD_MSG_INIT_CHANNEL 0x5
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6 #define SCD_MSG_INIT_CHANNEL_ACKED 0x6
@ -110,8 +112,9 @@ struct ikc_scd_packet {
int ref; int ref;
int osnum; int osnum;
int pid; int pid;
int padding;
unsigned long arg; unsigned long arg;
struct syscall_request req;
unsigned long resp_pa;
}; };
/* for SCD_MSG_SYSFS_* */ /* for SCD_MSG_SYSFS_* */
@ -120,7 +123,13 @@ struct ikc_scd_packet {
long sysfs_arg2; long sysfs_arg2;
long sysfs_arg3; long sysfs_arg3;
}; };
/* SCD_MSG_SCHEDULE_THREAD */
struct {
int ttid;
};
}; };
char padding[12];
}; };
struct mcctrl_priv { struct mcctrl_priv {
@ -154,8 +163,11 @@ struct syscall_params {
struct wait_queue_head_list_node { struct wait_queue_head_list_node {
struct list_head list; struct list_head list;
wait_queue_head_t wq_syscall; wait_queue_head_t wq_syscall;
int pid; struct task_struct *task;
/* Denotes an exclusive wait for requester TID rtid */
int rtid;
int req; int req;
struct ikc_scd_packet *packet;
}; };
struct mcctrl_channel { struct mcctrl_channel {
@ -163,15 +175,30 @@ struct mcctrl_channel {
struct syscall_params param; struct syscall_params param;
struct ikc_scd_init_param init; struct ikc_scd_init_param init;
void *dma_buf; void *dma_buf;
struct list_head wq_list;
ihk_spinlock_t wq_list_lock;
}; };
struct mcctrl_per_thread_data {
struct list_head hash;
struct task_struct *task;
void *data;
};
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT)
#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1)
struct mcctrl_per_proc_data { struct mcctrl_per_proc_data {
struct list_head list; struct list_head hash;
int pid; int pid;
unsigned long rpgtable; /* per process, not per OS */ unsigned long rpgtable; /* per process, not per OS */
struct list_head wq_list;
struct list_head wq_req_list;
struct list_head wq_list_exact;
ihk_spinlock_t wq_list_lock;
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
}; };
struct sysfsm_req { struct sysfsm_req {
@ -230,6 +257,10 @@ struct node_topology {
#define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG)) #define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG))
#define MCCTRL_PER_PROC_DATA_HASH_SHIFT 7
#define MCCTRL_PER_PROC_DATA_HASH_SIZE (1 << MCCTRL_PER_PROC_DATA_HASH_SHIFT)
#define MCCTRL_PER_PROC_DATA_HASH_MASK (MCCTRL_PER_PROC_DATA_HASH_SIZE - 1)
struct mcctrl_usrdata { struct mcctrl_usrdata {
struct ihk_ikc_listen_param listen_param; struct ihk_ikc_listen_param listen_param;
struct ihk_ikc_listen_param listen_param2; struct ihk_ikc_listen_param listen_param2;
@ -245,8 +276,9 @@ struct mcctrl_usrdata {
unsigned long last_thread_exec; unsigned long last_thread_exec;
wait_queue_head_t wq_prepare; wait_queue_head_t wq_prepare;
struct list_head per_proc_list; struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
ihk_spinlock_t per_proc_list_lock; rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE];
void **keys; void **keys;
struct sysfsm_data sysfsm_data; struct sysfsm_data sysfsm_data;
unsigned long cpu_online[CPU_LONGS]; unsigned long cpu_online[CPU_LONGS];
@ -273,12 +305,22 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
ihk_os_t osnum_to_os(int n); ihk_os_t osnum_to_os(int n);
/* syscall.c */ /* syscall.c */
int init_peer_channel_registry(struct mcctrl_usrdata *ud); int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
void destroy_peer_channel_registry(struct mcctrl_usrdata *ud); int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch); struct mcctrl_per_proc_data *ppd);
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch); int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid);
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key); inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc); struct mcctrl_usrdata *ud, int pid);
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task, void *data);
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task);
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid);
#define PROCFS_NAME_MAX 1000 #define PROCFS_NAME_MAX 1000
@ -301,6 +343,7 @@ struct procfs_file {
}; };
void procfs_answer(unsigned int arg, int err); void procfs_answer(unsigned int arg, int err);
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg);
void add_tid_entry(int osnum, int pid, int tid); void add_tid_entry(int osnum, int pid, int tid);
void add_pid_entry(int osnum, int pid); void add_pid_entry(int osnum, int pid);
void delete_tid_entry(int osnum, int pid, int tid); void delete_tid_entry(int osnum, int pid, int tid);

View File

@ -17,6 +17,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/resource.h> #include <linux/resource.h>
#include <linux/interrupt.h>
#include "mcctrl.h" #include "mcctrl.h"
#include <linux/version.h> #include <linux/version.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
@ -713,6 +714,57 @@ mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
return file->f_pos; return file->f_pos;
} }
struct procfs_work {
void *os;
int msg;
int pid;
unsigned long arg;
struct work_struct work;
};
static void procfsm_work_main(struct work_struct *work0)
{
struct procfs_work *work = container_of(work0, struct procfs_work, work);
switch (work->msg) {
case SCD_MSG_PROCFS_TID_CREATE:
add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
break;
case SCD_MSG_PROCFS_TID_DELETE:
delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
break;
default:
printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
__FUNCTION__, work->msg, work->pid, work->arg);
break;
}
kfree(work);
return;
}
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
{
struct procfs_work *work = NULL;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
printk("%s: kzalloc failed\n", __FUNCTION__);
return -1;
}
work->os = os;
work->msg = msg;
work->pid = pid;
work->arg = arg;
INIT_WORK(&work->work, &procfsm_work_main);
schedule_work(&work->work);
return 0;
}
static const struct file_operations mckernel_forward_ro = { static const struct file_operations mckernel_forward_ro = {
.llseek = mckernel_procfs_lseek, .llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read, .read = mckernel_procfs_read,

View File

@ -40,6 +40,7 @@
#include <linux/cred.h> #include <linux/cred.h>
#include <linux/capability.h> #include <linux/capability.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/delay.h> #include <asm/delay.h>
@ -84,88 +85,96 @@ static void print_dma_lastreq(void)
} }
#endif #endif
int init_peer_channel_registry(struct mcctrl_usrdata *ud) int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task, void *data)
{ {
ud->keys = kzalloc(sizeof(void *) * ud->num_channels, GFP_KERNEL); struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
if (!ud->keys) { struct mcctrl_per_thread_data *ptd_alloc = NULL;
printk("Error: cannot allocate usrdata.keys[].\n"); int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
return -ENOMEM; int ret = 0;
unsigned long flags;
ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC);
if (!ptd_alloc) {
kprintf("%s: error allocate per thread data\n", __FUNCTION__);
ret = -ENOMEM;
goto out_noalloc;
} }
return 0; /* Check if data for this thread exists and add if not */
} write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
void destroy_peer_channel_registry(struct mcctrl_usrdata *ud) if (ptd_iter->task == task) {
{ ptd = ptd_iter;
kfree(ud->keys); break;
ud->keys = NULL;
return;
}
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch)
{
int cpu;
cpu = ch - ud->channels;
if ((cpu < 0) || (ud->num_channels <= cpu)) {
printk("register_peer_channel(%p,%p,%p):"
"not a syscall channel. cpu=%d\n",
ud, key, ch, cpu);
return -EINVAL;
}
if (ud->keys[cpu] != NULL) {
printk("register_peer_channel(%p,%p,%p):"
"already registered. cpu=%d\n",
ud, key, ch, cpu);
/*
* When mcexec receives a signal,
* it may be finished without doing deregister_peer_channel().
* Therefore a substitute registration is necessary.
*/
#if 0
return -EBUSY;
#endif
}
ud->keys[cpu] = key;
return 0;
}
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch)
{
int cpu;
cpu = ch - ud->channels;
if ((cpu < 0) || (ud->num_channels <= cpu)) {
printk("deregister_peer_channel(%p,%p,%p):"
"not a syscall channel. cpu=%d\n",
ud, key, ch, cpu);
return -EINVAL;
}
if (ud->keys[cpu] && (ud->keys[cpu] != key)) {
printk("deregister_peer_channel(%p,%p,%p):"
"not registered. cpu=%d\n",
ud, key, ch, cpu);
return -EBUSY;
}
ud->keys[cpu] = NULL;
return 0;
}
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key)
{
int cpu;
for (cpu = 0; cpu < ud->num_channels; ++cpu) {
if (ud->keys[cpu] == key) {
return &ud->channels[cpu];
} }
} }
return NULL; if (unlikely(ptd)) {
ret = -EBUSY;
kfree(ptd_alloc);
goto out;
}
ptd = ptd_alloc;
ptd->task = task;
ptd->data = data;
list_add_tail(&ptd->hash, &ppd->per_thread_data_hash[hash]);
out:
write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
out_noalloc:
return ret;
}
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task)
{
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
int ret = 0;
unsigned long flags;
/* Check if data for this thread exists and delete it */
write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) {
ptd = ptd_iter;
break;
}
}
if (!ptd) {
ret = -EINVAL;
goto out;
}
list_del(&ptd->hash);
kfree(ptd);
out:
write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ret;
}
struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task)
{
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
unsigned long flags;
/* Check if data for this thread exists and return it */
read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) {
ptd = ptd_iter;
break;
}
}
read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ptd ? ptd->data : NULL;
} }
#if 1 /* x86 depend, host OS side */ #if 1 /* x86 depend, host OS side */
@ -232,80 +241,156 @@ out:
} }
#endif #endif
static int __notify_syscall_requester(ihk_os_t os, struct ikc_scd_packet *packet,
struct syscall_response *res)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct ihk_ikc_channel_desc *c = (usrdata->channels + packet->ref)->c;
struct ikc_scd_packet r_packet;
int ret = 0;
/* If spinning, no need for IKC message */
if (__sync_bool_compare_and_swap(&res->req_thread_status,
IHK_SCD_REQ_THREAD_SPINNING,
IHK_SCD_REQ_THREAD_TO_BE_WOKEN)) {
dprintk("%s: no need to send IKC message for PID %d\n",
__FUNCTION__, packet->pid);
return ret;
}
/* The thread is not spinning any more, make sure it's descheduled */
if (!__sync_bool_compare_and_swap(&res->req_thread_status,
IHK_SCD_REQ_THREAD_DESCHEDULED,
IHK_SCD_REQ_THREAD_TO_BE_WOKEN)) {
printk("%s: WARNING: inconsistent requester status, "
"pid: %d, req status: %lu, syscall nr: %lu\n",
__FUNCTION__, packet->pid,
res->req_thread_status, packet->req.number);
dump_stack();
return -EINVAL;
}
r_packet.msg = SCD_MSG_WAKE_UP_SYSCALL_THREAD;
r_packet.ttid = packet->req.rtid;
ret = ihk_ikc_send(c, &r_packet, 0);
return ret;
}
static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason)
{ {
struct mcctrl_channel *channel; struct ikc_scd_packet *packet;
struct syscall_request *req; struct syscall_request *req;
struct syscall_response *resp; struct syscall_response *resp;
int error; int error;
struct wait_queue_head_list_node *wqhln;
unsigned long irqflags;
struct mcctrl_per_proc_data *ppd;
unsigned long phys;
dprintk("remote_page_fault(%p,%p,%llx)\n", usrdata, fault_addr, reason); dprintk("%s: tid: %d, fault_addr: %lu, reason: %lu\n",
__FUNCTION__, task_pid_vnr(current), fault_addr, reason);
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
channel = get_peer_channel(usrdata, current); if (!ppd) {
if (!channel) { kprintf("%s: ERROR: no per-process structure for PID %d??\n",
error = -ENOENT; __FUNCTION__, task_tgid_vnr(current));
printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n", return -EINVAL;
usrdata, fault_addr, reason, error);
goto out;
} }
req = channel->param.request_va; packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
resp = channel->param.response_va; if (!packet) {
error = -ENOENT;
printk("%s: no packet registered for TID %d\n",
__FUNCTION__, task_pid_vnr(current));
goto out_no_unmap;
}
/* request page fault */ req = &packet->req;
/* Map response structure */
phys = ihk_device_map_memory(ihk_os_to_dev(usrdata->os),
packet->resp_pa, sizeof(*resp));
resp = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os),
phys, sizeof(*resp), NULL, 0);
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
/* Prepare per-thread wait queue head */
wqhln->task = current;
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
/* Add to exact list */
list_add_tail(&wqhln->list, &ppd->wq_list_exact);
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
/* Request page fault */
resp->ret = -EFAULT; resp->ret = -EFAULT;
resp->fault_address = (unsigned long)fault_addr; resp->fault_address = (unsigned long)fault_addr;
resp->fault_reason = reason; resp->fault_reason = reason;
resp->stid = task_pid_vnr(current);
#define STATUS_PAGER_COMPLETED 1 #define STATUS_PAGER_COMPLETED 1
#define STATUS_PAGE_FAULT 3 #define STATUS_PAGE_FAULT 3
req->valid = 0; req->valid = 0;
if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) {
printk("%s: WARNING: failed to notify PID %d\n",
__FUNCTION__, packet->pid);
}
mb(); mb();
resp->status = STATUS_PAGE_FAULT; resp->status = STATUS_PAGE_FAULT;
for (;;) { for (;;) {
struct wait_queue_head_list_node *wqhln; dprintk("%s: tid: %d, fault_addr: %p SLEEPING\n",
struct wait_queue_head_list_node *wqhln_iter; __FUNCTION__, task_pid_vnr(current), fault_addr);
unsigned long irqflags;
retry_alloc:
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
if (!wqhln) {
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
goto retry_alloc;
}
/* Prepare per-process wait queue head */
wqhln->pid = task_tgid_vnr(current);
wqhln->req = 0;
init_waitqueue_head(&wqhln->wq_syscall);
irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock);
/* First see if there is a wait queue already */
list_for_each_entry(wqhln_iter, &channel->wq_list, list) {
if (wqhln_iter->pid == task_tgid_vnr(current)) {
kfree(wqhln);
wqhln = wqhln_iter;
list_del(&wqhln->list);
break;
}
}
list_add_tail(&wqhln->list, &channel->wq_list);
ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags);
/* wait for response */ /* wait for response */
error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
/* Remove per-process wait queue head */ /* Remove per-thread wait queue head */
irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
list_del(&wqhln->list); list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
kfree(wqhln);
dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n",
__FUNCTION__, task_pid_vnr(current), fault_addr);
if (error) { if (error) {
kfree(wqhln);
printk("remote_page_fault:interrupted. %d\n", error); printk("remote_page_fault:interrupted. %d\n", error);
goto out; goto out;
} }
else {
/* Update packet reference */
packet = wqhln->packet;
req = &packet->req;
{
unsigned long phys2;
struct syscall_response *resp2;
phys2 = ihk_device_map_memory(ihk_os_to_dev(usrdata->os),
packet->resp_pa, sizeof(*resp));
resp2 = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os),
phys2, sizeof(*resp), NULL, 0);
if (resp != resp2) {
resp = resp2;
phys = phys2;
printk("%s: updated new remote PA for resp\n", __FUNCTION__);
}
}
}
if (!req->valid) { if (!req->valid) {
printk("remote_page_fault:not valid\n"); printk("remote_page_fault:not valid\n");
} }
@ -321,23 +406,37 @@ retry_alloc:
#define PAGER_REQ_RESUME 0x0101 #define PAGER_REQ_RESUME 0x0101
else if (req->args[0] != PAGER_REQ_RESUME) { else if (req->args[0] != PAGER_REQ_RESUME) {
resp->ret = pager_call(usrdata->os, (void *)req); resp->ret = pager_call(usrdata->os, (void *)req);
if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) {
printk("%s: WARNING: failed to notify PID %d\n",
__FUNCTION__, packet->pid);
}
mb(); mb();
resp->status = STATUS_PAGER_COMPLETED; resp->status = STATUS_PAGER_COMPLETED;
continue; break;
//continue;
} }
else { else {
error = req->args[1]; error = req->args[1];
if (error) { if (error) {
printk("remote_page_fault:response %d\n", error); printk("remote_page_fault:response %d\n", error);
kfree(wqhln);
goto out; goto out;
} }
} }
break; break;
} }
kfree(wqhln);
error = 0; error = 0;
out: out:
dprintk("remote_page_fault(%p,%p,%llx): %d\n", usrdata, fault_addr, reason, error); ihk_device_unmap_virtual(ihk_os_to_dev(usrdata->os), resp, sizeof(*resp));
ihk_device_unmap_memory(ihk_os_to_dev(usrdata->os), phys, sizeof(*resp));
out_no_unmap:
dprintk("%s: tid: %d, fault_addr: %lu, reason: %lu, error: %d\n",
__FUNCTION__, task_pid_vnr(current), fault_addr, reason, error);
return error; return error;
} }
@ -389,8 +488,9 @@ static int rus_page_hash_insert(struct page *page)
{ {
int ret = 0; int ret = 0;
struct rus_page *rp; struct rus_page *rp;
unsigned long flags;
spin_lock(&rus_page_hash_lock); spin_lock_irqsave(&rus_page_hash_lock, flags);
rp = _rus_page_hash_lookup(page); rp = _rus_page_hash_lookup(page);
if (!rp) { if (!rp) {
@ -417,7 +517,7 @@ static int rus_page_hash_insert(struct page *page)
out: out:
spin_unlock(&rus_page_hash_lock); spin_unlock_irqrestore(&rus_page_hash_lock, flags);
return ret; return ret;
} }
@ -426,8 +526,9 @@ void rus_page_hash_put_pages(void)
int i; int i;
struct rus_page *rp_iter; struct rus_page *rp_iter;
struct rus_page *rp_iter_next; struct rus_page *rp_iter_next;
unsigned long flags;
spin_lock(&rus_page_hash_lock); spin_lock_irqsave(&rus_page_hash_lock, flags);
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) { for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
@ -440,7 +541,7 @@ void rus_page_hash_put_pages(void)
} }
} }
spin_unlock(&rus_page_hash_lock); spin_unlock_irqrestore(&rus_page_hash_lock, flags);
} }
@ -472,27 +573,22 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
#if USE_VM_INSERT_PFN #if USE_VM_INSERT_PFN
size_t pix; size_t pix;
#endif #endif
struct mcctrl_per_proc_data *ppd, *ppd_iter; struct mcctrl_per_proc_data *ppd;
unsigned long flags;
dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
ppd = NULL; /* Look up per-process structure */
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
if (!ppd) {
list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) { ppd = mcctrl_get_per_proc_data(usrdata, vma->vm_mm->owner->pid);
if (ppd_iter->pid == task_tgid_vnr(current) ||
ppd_iter->pid == vma->vm_mm->owner->pid) {
ppd = ppd_iter;
break;
}
} }
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
if (!ppd) { if (!ppd) {
printk("ERROR: no per process data for pid %d\n", task_tgid_vnr(current)); kprintf("%s: ERROR: no per-process structure for PID %d??\n",
return VM_FAULT_SIGBUS; __FUNCTION__, task_tgid_vnr(current));
return -EINVAL;
} }
for (try = 1; ; ++try) { for (try = 1; ; ++try) {
@ -626,237 +722,6 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u
return start; return start;
} }
//unsigned long last_thread_exec = 0;
#ifndef DO_USER_MODE
static struct {
long (*do_sys_open)(int, const char __user *, int, int);
long (*sys_lseek)(unsigned int, off_t, unsigned int);
long (*sys_read)(unsigned int, char __user *, size_t);
long (*sys_write)(unsigned int, const char __user *, size_t);
} syscalls;
void
mcctrl_syscall_init(void)
{
printk("mcctrl_syscall_init\n");
syscalls.do_sys_open = (void *)kallsyms_lookup_name("do_sys_open");
syscalls.sys_lseek = (void *)kallsyms_lookup_name("sys_lseek");
syscalls.sys_read = (void *)kallsyms_lookup_name("sys_read");
syscalls.sys_write = (void *)kallsyms_lookup_name("sys_write");
printk("syscalls.do_sys_open=%lx\n", (long)syscalls.do_sys_open);
printk("syscalls.sys_lseek=%lx\n", (long)syscalls.sys_lseek);
printk("syscalls.sys_read=%lx\n", (long)syscalls.sys_read);
printk("syscalls.sys_write=%lx\n", (long)syscalls.sys_write);
}
static int do_async_copy(ihk_os_t os, unsigned long dest, unsigned long src,
unsigned long size, unsigned int inbound)
{
struct ihk_dma_request request;
ihk_dma_channel_t channel;
unsigned long asize = ALIGN_WAIT_BUF(size);
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
if (!channel) {
return -EINVAL;
}
memset(&request, 0, sizeof(request));
request.src_os = inbound ? os : NULL;
request.src_phys = src;
request.dest_os = inbound ? NULL : os;
request.dest_phys = dest;
request.size = size;
request.notify = (void *)(inbound ? dest + asize : src + asize);
request.priv = (void *)1;
*(unsigned long *)phys_to_virt((unsigned long)request.notify) = 0;
#ifdef SC_DEBUG
last_request = request;
#endif
ihk_dma_request(channel, &request);
return 0;
}
//int mcctrl_dma_abort;
static void async_wait(ihk_os_t os, unsigned char *p, int size)
{
int asize = ALIGN_WAIT_BUF(size);
unsigned long long s, w;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
rdtscll(s);
while (!p[asize]) {
mb();
cpu_relax();
rdtscll(w);
if (w > s + 1024UL * 1024 * 1024 * 10) {
printk("DMA Timed out : %p (%p + %d) => %d\n",
p + asize, p, size, p[asize]);
#ifdef SC_DEBUG
print_dma_lastreq();
#endif
usrdata->mcctrl_dma_abort = 1;
return;
}
}
}
static void clear_wait(unsigned char *p, int size)
{
//int asize = ALIGN_WAIT_BUF(size);
p[size] = 0;
}
static unsigned long translate_remote_va(struct mcctrl_channel *c,
unsigned long rva)
{
int i, n;
struct syscall_post *p;
p = c->param.post_va;
n = (int)p->v[0];
if (n < 0 || n >= PAGE_SIZE / sizeof(struct syscall_post)) {
return -EINVAL;
}
for (i = 0; i < n; i++) {
if (p[i + 1].v[0] != 1) {
continue;
}
if (rva >= p[i + 1].v[1] && rva < p[i + 1].v[2]) {
return p[i + 1].v[3] + (rva - p[i + 1].v[1]);
}
}
return -EFAULT;
}
//extern struct mcctrl_channel *channels;
#if 0
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
struct syscall_request *sc)
{
int ret;
mm_segment_t fs;
unsigned long pa;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
switch (sc->number) {
case 0: /* read */
case 1024:
if (sc->number & 1024) {
sc->args[1] = translate_remote_va(c, sc->args[1]);
if ((long)sc->args[1] < 0) {
__return_syscall(c, -EFAULT);
return 0;
}
}
clear_wait(c->dma_buf, sc->args[2]);
fs = get_fs();
set_fs(KERNEL_DS);
ret = syscalls.sys_read(sc->args[0], c->dma_buf, sc->args[2]);
if (ret > 0) {
do_async_copy(os, sc->args[1], virt_to_phys(c->dma_buf),
sc->args[2], 0);
set_fs(fs);
async_wait(os, c->dma_buf, sc->args[2]);
}
__return_syscall(c, ret);
return 0;
case 1: /* write */
case 1025:
if (sc->number & 1024) {
sc->args[1] = translate_remote_va(c, sc->args[1]);
if ((long)sc->args[1] < 0) {
__return_syscall(c, -EFAULT);
return 0;
}
}
clear_wait(c->dma_buf, sc->args[2]);
do_async_copy(os, virt_to_phys(c->dma_buf), sc->args[1],
sc->args[2], 1);
fs = get_fs();
set_fs(KERNEL_DS);
async_wait(os, c->dma_buf, sc->args[2]);
ret = syscalls.sys_write(sc->args[0], c->dma_buf, sc->args[2]);
set_fs(fs);
__return_syscall(c, ret);
return 0;
case 2: /* open */
case 1026:
if (sc->number & 1024) {
sc->args[0] = translate_remote_va(c, sc->args[0]);
if ((long)sc->args[0] < 0) {
__return_syscall(c, -EFAULT);
return 0;
}
}
clear_wait(c->dma_buf, 256);
do_async_copy(os, virt_to_phys(c->dma_buf), sc->args[0],
256, 1);
fs = get_fs();
set_fs(KERNEL_DS);
async_wait(os, c->dma_buf, 256);
ret = syscalls.do_sys_open(AT_FDCWD, c->dma_buf, sc->args[1],
sc->args[2]);
set_fs(fs);
__return_syscall(c, ret);
return 0;
case 3: /* Close */
ret = sys_close(sc->args[0]);
__return_syscall(c, ret);
return 0;
case 8: /* lseek */
ret = syscalls.sys_lseek(sc->args[0], sc->args[1], sc->args[2]);
__return_syscall(c, ret);
return 0;
case 56: /* Clone */
usrdata->last_thread_exec++;
if (mcctrl_ikc_is_valid_thread(usrdata->last_thread_exec)) {
printk("Clone notification: %lx\n", sc->args[0]);
if (channels[usrdata->last_thread_exec].param.post_va) {
memcpy(usrdata->channels[usrdata->last_thread_exec].param.post_va,
c->param.post_va, PAGE_SIZE);
}
mcctrl_ikc_send_msg(usrdata->last_thread_exec,
SCD_MSG_SCHEDULE_PROCESS,
usrdata->last_thread_exec, sc->args[0]);
}
__return_syscall(c, 0);
return 0;
default:
if (sc->number & 1024) {
__return_syscall(c, -EFAULT);
return 0;
} else {
return -ENOSYS;
}
}
}
#endif
#endif /* !DO_USER_MODE */
struct pager { struct pager {
struct list_head list; struct list_head list;
struct inode * inode; struct inode * inode;
@ -967,7 +832,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
up(&pager_sem); up(&pager_sem);
newpager = kzalloc(sizeof(*newpager), GFP_KERNEL); newpager = kzalloc(sizeof(*newpager), GFP_ATOMIC);
if (!newpager) { if (!newpager) {
error = -ENOMEM; error = -ENOMEM;
printk("pager_req_create(%d,%lx):kzalloc failed. %d\n", fd, (long)result_pa, error); printk("pager_req_create(%d,%lx):kzalloc failed. %d\n", fd, (long)result_pa, error);
@ -1223,7 +1088,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
uintptr_t phys; uintptr_t phys;
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa); dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
pager = kzalloc(sizeof(*pager), GFP_KERNEL); pager = kzalloc(sizeof(*pager), GFP_ATOMIC);
if (!pager) { if (!pager) {
error = -ENOMEM; error = -ENOMEM;
printk("pager_req_map(%p,%d,%lx,%lx,%lx):kzalloc failed. %d\n", os, fd, len, off, result_rpa, error); printk("pager_req_map(%p,%d,%lx,%lx,%lx):kzalloc failed. %d\n", os, fd, len, off, result_rpa, error);
@ -1475,11 +1340,31 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
return ret; return ret;
} }
static void __return_syscall(struct mcctrl_channel *c, int ret) void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid)
{ {
c->param.response_va->ret = ret; unsigned long phys;
struct syscall_response *res;
phys = ihk_device_map_memory(ihk_os_to_dev(os),
packet->resp_pa, sizeof(*res));
res = ihk_device_map_virtual(ihk_os_to_dev(os),
phys, sizeof(*res), NULL, 0);
/* Map response structure and notify offloading thread */
res->ret = ret;
res->stid = stid;
if (__notify_syscall_requester(os, packet, res) < 0) {
printk("%s: WARNING: failed to notify PID %d\n",
__FUNCTION__, packet->pid);
}
mb(); mb();
c->param.response_va->status = 1; res->status = 1;
ihk_device_unmap_virtual(ihk_os_to_dev(os), res, sizeof(*res));
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, sizeof(*res));
} }
static int remap_user_space(uintptr_t rva, size_t len, int prot) static int remap_user_space(uintptr_t rva, size_t len, int prot)
@ -1668,13 +1553,14 @@ fail:
#define SCHED_CHECK_SAME_OWNER 0x01 #define SCHED_CHECK_SAME_OWNER 0x01
#define SCHED_CHECK_ROOT 0x02 #define SCHED_CHECK_ROOT 0x02
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc) int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
{ {
struct syscall_request *sc = &packet->req;
int error; int error;
long ret = -1; long ret = -1;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx)\n", os, c, sc->number, sc->args[0]); dprintk("%s: system call: %d\n", __FUNCTION__, sc->args[0]);
switch (sc->number) { switch (sc->number) {
case __NR_mmap: case __NR_mmap:
ret = pager_call(os, sc); ret = pager_call(os, sc);
@ -1683,25 +1569,19 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
case __NR_munmap: case __NR_munmap:
/* Set new remote page table if not zero */ /* Set new remote page table if not zero */
if (sc->args[2]) { if (sc->args[2]) {
unsigned long flags;
struct mcctrl_per_proc_data *ppd = NULL; struct mcctrl_per_proc_data *ppd = NULL;
ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC); ppd = mcctrl_get_per_proc_data(usrdata, sc->args[3]);
if (!ppd) { if (unlikely(!ppd)) {
printk("ERROR: allocating per process data\n"); kprintf("%s: ERROR: no per-process structure for PID %d??\n",
error = -ENOMEM; __FUNCTION__, task_tgid_vnr(current));
goto out; return -1;
} }
ppd->pid = task_tgid_vnr(current);
ppd->rpgtable = sc->args[2]; ppd->rpgtable = sc->args[2];
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock); dprintk("%s: pid: %d, rpgtable: 0x%lx updated\n",
list_add_tail(&ppd->list, &usrdata->per_proc_list); __FUNCTION__, ppd->pid, ppd->rpgtable);
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
dprintk("pid: %d, rpgtable: 0x%lx added\n",
ppd->pid, ppd->rpgtable);
} }
ret = clear_pte_range(sc->args[0], sc->args[1]); ret = clear_pte_range(sc->args[0], sc->args[1]);
@ -1712,33 +1592,6 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
break; break;
case __NR_exit_group: { case __NR_exit_group: {
unsigned long flags;
struct mcctrl_per_proc_data *ppd = NULL, *ppd_iter;
ppd = NULL;
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
list_for_each_entry(ppd_iter, &usrdata->per_proc_list, list) {
if (ppd_iter->pid == task_tgid_vnr(current)) {
ppd = ppd_iter;
break;
}
}
if (ppd) {
list_del(&ppd->list);
dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n",
task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable);
kfree(ppd);
}
else {
printk("WARNING: no per process data for pid %d ?\n",
task_tgid_vnr(current));
}
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
/* Make sure the user space handler will be called as well */ /* Make sure the user space handler will be called as well */
error = -ENOSYS; error = -ENOSYS;
@ -1821,10 +1674,11 @@ sched_setparam_out:
break; break;
} }
__return_syscall(c, ret); __return_syscall(os, packet, ret, 0);
error = 0; error = 0;
out: out:
dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx): %d %ld\n", os, c, sc->number, sc->args[0], error, ret); dprintk("%s: system call: %d, error: %d, ret: %ld\n",
__FUNCTION__, sc->number, sc->args[0], error, ret);
return error; return error;
} }

View File

@ -14,6 +14,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/version.h> #include <linux/version.h>
#include <linux/interrupt.h>
#include "mcctrl.h" #include "mcctrl.h"
#include "sysfs_msg.h" #include "sysfs_msg.h"

View File

@ -870,7 +870,10 @@ struct thread_data_s {
pthread_mutex_t *lock; pthread_mutex_t *lock;
pthread_barrier_t *init_ready; pthread_barrier_t *init_ready;
} *thread_data; } *thread_data;
int ncpu; int ncpu;
int n_threads;
pid_t master_tid; pid_t master_tid;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
@ -881,7 +884,7 @@ static void *main_loop_thread_func(void *arg)
struct thread_data_s *td = (struct thread_data_s *)arg; struct thread_data_s *td = (struct thread_data_s *)arg;
td->tid = gettid(); td->tid = gettid();
td->remote_tid = (int)td->tid; td->remote_tid = -1;
pthread_barrier_wait(&init_ready); pthread_barrier_wait(&init_ready);
td->ret = main_loop(td->fd, td->cpu, td->lock); td->ret = main_loop(td->fd, td->cpu, td->lock);
@ -1108,9 +1111,9 @@ void init_worker_threads(int fd)
int i; int i;
pthread_mutex_init(&lock, NULL); pthread_mutex_init(&lock, NULL);
pthread_barrier_init(&init_ready, NULL, ncpu + 2); pthread_barrier_init(&init_ready, NULL, n_threads + 2);
for (i = 0; i <= ncpu; ++i) { for (i = 0; i <= n_threads; ++i) {
int ret; int ret;
thread_data[i].fd = fd; thread_data[i].fd = fd;
@ -1520,6 +1523,19 @@ int main(int argc, char **argv)
return 1; return 1;
} }
n_threads = ncpu;
if (ncpu > 16) {
n_threads = 16;
}
/*
* XXX: keep thread_data ncpu sized despite that there are only
* n_threads worker threads in the pool so that signaling code
* keeps working.
*
* TODO: fix signaling code to be independent of TIDs.
* TODO: implement dynaic thread pool resizing.
*/
thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1)); thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1));
memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1)); memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1));
@ -1604,7 +1620,7 @@ int main(int argc, char **argv)
return 1; return 1;
} }
for (i = 0; i <= ncpu; ++i) { for (i = 0; i <= n_threads; ++i) {
pthread_join(thread_data[i].thread_id, NULL); pthread_join(thread_data[i].thread_id, NULL);
} }
@ -1666,16 +1682,14 @@ do_generic_syscall(
} }
static void static void
kill_thread(unsigned long cpu) kill_thread(unsigned long tid)
{ {
if(cpu >= 0 && cpu < ncpu){ int i;
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
}
else{
int i;
for (i = 0; i < ncpu; ++i) { for (i = 0; i < n_threads; ++i) {
if(thread_data[i].remote_tid == tid){
pthread_kill(thread_data[i].thread_id, LOCALSIG); pthread_kill(thread_data[i].thread_id, LOCALSIG);
break;
} }
} }
} }
@ -1834,6 +1848,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
//pthread_mutex_lock(lock); //pthread_mutex_lock(lock);
thread_data[cpu].remote_tid = w.sr.rtid;
switch (w.sr.number) { switch (w.sr.number) {
case __NR_open: case __NR_open:
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
@ -1872,13 +1888,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
sig = 0; sig = 0;
term = 0; term = 0;
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
/* Drop executable file */ /* Drop executable file */
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) { if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n"); fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n");
} }
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
__dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n", __dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n",
w.sr.args[0], cpu); w.sr.args[0], cpu);
if(w.sr.number == __NR_exit_group){ if(w.sr.number == __NR_exit_group){
@ -1946,6 +1962,39 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
thread_data[oldcpuid].remote_tid = wtid; thread_data[oldcpuid].remote_tid = wtid;
} }
/*
* Number of TIDs and the remote physical address where TIDs are
* expected are passed in arg 4 and 5, respectively.
*/
if (w.sr.args[4] > 0) {
struct remote_transfer trans;
int i = 0;
int *tids = malloc(sizeof(int) * w.sr.args[4]);
if (!tids) {
fprintf(stderr, "__NR_gettid(): error allocating TIDs\n");
goto gettid_out;
}
for (i = 0; i < ncpu && i < w.sr.args[4]; ++i) {
tids[i] = thread_data[i].tid;
}
for (; i < ncpu; ++i) {
tids[i] = 0;
}
trans.userp = (void*)tids;
trans.rphys = w.sr.args[5];
trans.size = sizeof(int) * w.sr.args[4];
trans.direction = MCEXEC_UP_TRANSFER_TO_REMOTE;
if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) {
fprintf(stderr, "__NR_gettid(): error transfering TIDs\n");
}
free(tids);
}
gettid_out:
do_syscall_return(fd, cpu, thread_data[newcpuid].remote_tid, 0, 0, 0, 0); do_syscall_return(fd, cpu, thread_data[newcpuid].remote_tid, 0, 0, 0, 0);
break; break;
} }
@ -2041,7 +2090,6 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
/* Reinit signals and syscall threads */ /* Reinit signals and syscall threads */
init_sigaction(); init_sigaction();
init_worker_threads(fd);
__dprintf("pid(%d): signals and syscall threads OK\n", __dprintf("pid(%d): signals and syscall threads OK\n",
getpid()); getpid());
@ -2055,6 +2103,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
goto fork_child_sync_pipe; goto fork_child_sync_pipe;
} }
init_worker_threads(fd);
fork_child_sync_pipe: fork_child_sync_pipe:
sem_post(&fs->sem); sem_post(&fs->sem);
if (fs->status) if (fs->status)
@ -2313,6 +2363,53 @@ return_execve2:
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break; break;
case __NR_setresuid:
ret = setresuid(w.sr.args[0], w.sr.args[1], w.sr.args[2]);
if(ret == -1)
ret = -errno;
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_setreuid:
ret = setreuid(w.sr.args[0], w.sr.args[1]);
if(ret == -1)
ret = -errno;
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_setuid:
ret = setuid(w.sr.args[0]);
if(ret == -1)
ret = -errno;
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_setresgid:
ret = setresgid(w.sr.args[0], w.sr.args[1], w.sr.args[2]);
if(ret == -1)
ret = -errno;
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_setregid:
ret = setregid(w.sr.args[0], w.sr.args[1]);
if(ret == -1)
ret = -errno;
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_setgid:
ret = setgid(w.sr.args[0]);
if(ret == -1)
ret = -errno;
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_setfsgid:
ret = setfsgid(w.sr.args[0]);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_close: case __NR_close:
if(w.sr.args[0] == fd) if(w.sr.args[0] == fd)
ret = -EBADF; ret = -EBADF;
@ -2346,7 +2443,9 @@ return_execve2:
break; break;
} }
thread_data[cpu].remote_tid = -1;
//pthread_mutex_unlock(lock); //pthread_mutex_unlock(lock);
} }
__dprint("timed out.\n"); __dprint("timed out.\n");

View File

@ -110,6 +110,7 @@ int __kprintf(const char *format, ...)
char buf[KPRINTF_LOCAL_BUF_LEN]; char buf[KPRINTF_LOCAL_BUF_LEN];
/* Copy into the local buf */ /* Copy into the local buf */
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
va_start(va, format); va_start(va, format);
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va); len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
va_end(va); va_end(va);

View File

@ -99,7 +99,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
} }
memset(obj, 0, sizeof(*obj)); memset(obj, 0, sizeof(*obj));
obj->pfn_table = allocate_pages(pfn_npages, IHK_MC_AP_NOWAIT); obj->pfn_table = ihk_mc_alloc_pages(pfn_npages, IHK_MC_AP_NOWAIT);
if (!obj->pfn_table) { if (!obj->pfn_table) {
error = -ENOMEM; error = -ENOMEM;
kprintf("%s: error: fd: %d, len: %lu, off: %lu allocating PFN failed.\n", kprintf("%s: error: fd: %d, len: %lu, off: %lu allocating PFN failed.\n",
@ -141,7 +141,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
out: out:
if (obj) { if (obj) {
if (obj->pfn_table) { if (obj->pfn_table) {
free_pages(obj->pfn_table, pfn_npages); ihk_mc_free_pages(obj->pfn_table, pfn_npages);
} }
kfree(obj); kfree(obj);
} }
@ -166,6 +166,8 @@ static void devobj_release(struct memobj *memobj)
struct devobj *obj = to_devobj(memobj); struct devobj *obj = to_devobj(memobj);
struct devobj *free_obj = NULL; struct devobj *free_obj = NULL;
uintptr_t handle; uintptr_t handle;
const size_t pfn_npages =
(obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle); dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
@ -194,7 +196,7 @@ static void devobj_release(struct memobj *memobj)
} }
if (obj->pfn_table) { if (obj->pfn_table) {
free_pages(obj->pfn_table, 1); ihk_mc_free_pages(obj->pfn_table, pfn_npages);
} }
kfree(free_obj); kfree(free_obj);
} }

View File

@ -376,10 +376,16 @@ static int process_msg_prepare_process(unsigned long rphys)
} }
n = p->num_sections; n = p->num_sections;
if (n > 16) {
kprintf("%s: ERROR: more ELF sections than 16??\n",
__FUNCTION__);
return -ENOMEM;
}
dkprintf("# of sections: %d\n", n); dkprintf("# of sections: %d\n", n);
if((pn = ihk_mc_allocate(sizeof(struct program_load_desc) if((pn = kmalloc(sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * n, IHK_MC_AP_NOWAIT)) == NULL){ + sizeof(struct program_image_section) * n,
IHK_MC_AP_NOWAIT)) == NULL){
ihk_mc_unmap_virtual(p, npages, 0); ihk_mc_unmap_virtual(p, npages, 0);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM; return -ENOMEM;
@ -388,7 +394,7 @@ static int process_msg_prepare_process(unsigned long rphys)
+ sizeof(struct program_image_section) * n); + sizeof(struct program_image_section) * n);
if((thread = create_thread(p->entry)) == NULL){ if((thread = create_thread(p->entry)) == NULL){
ihk_mc_free(pn); kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM; return -ENOMEM;
@ -438,7 +444,7 @@ static int process_msg_prepare_process(unsigned long rphys)
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid, dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
vm->address_space->page_table); vm->address_space->page_table);
ihk_mc_free(pn); kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
@ -446,7 +452,7 @@ static int process_msg_prepare_process(unsigned long rphys)
return 0; return 0;
err: err:
ihk_mc_free(pn); kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
destroy_thread(thread); destroy_thread(thread);
@ -455,7 +461,7 @@ err:
static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_params *lparam) static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_params *lparam)
{ {
lparam->response_va = allocate_pages(RESPONSE_PAGE_COUNT, 0); lparam->response_va = ihk_mc_alloc_pages(RESPONSE_PAGE_COUNT, 0);
lparam->response_pa = virt_to_phys(lparam->response_va); lparam->response_pa = virt_to_phys(lparam->response_va);
pcp->request_page = 0; pcp->request_page = 0;
@ -524,12 +530,7 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
} }
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont); extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
extern void process_procfs_request(unsigned long rarg); extern void process_procfs_request(unsigned long rarg);
extern int memcheckall();
extern int freecheck(int runcount);
extern int runcount;
extern void terminate_host(int pid); extern void terminate_host(int pid);
extern void debug_log(long); extern void debug_log(long);
@ -564,6 +565,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct ikc_scd_packet *packet = __packet; struct ikc_scd_packet *packet = __packet;
struct ikc_scd_packet pckt; struct ikc_scd_packet pckt;
int rc; int rc;
struct mcs_rwlock_node_irqsave lock;
struct thread *thread; struct thread *thread;
struct process *proc; struct process *proc;
struct mcctrl_signal { struct mcctrl_signal {
@ -575,22 +577,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
} *sp, info; } *sp, info;
unsigned long pp; unsigned long pp;
int cpuid; int cpuid;
int ret = 0;
switch (packet->msg) { switch (packet->msg) {
case SCD_MSG_INIT_CHANNEL_ACKED: case SCD_MSG_INIT_CHANNEL_ACKED:
dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n"); dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n");
process_msg_init_acked(c, packet->arg); process_msg_init_acked(c, packet->arg);
return 0; ret = 0;
break;
case SCD_MSG_PREPARE_PROCESS: case SCD_MSG_PREPARE_PROCESS:
if (find_command_line("memdebug")) {
memcheckall();
if (runcount)
freecheck(runcount);
runcount++;
}
if((rc = process_msg_prepare_process(packet->arg)) == 0){ if((rc = process_msg_prepare_process(packet->arg)) == 0){
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED; pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
pckt.err = 0; pckt.err = 0;
@ -603,19 +600,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pckt.arg = packet->arg; pckt.arg = packet->arg;
syscall_channel_send(c, &pckt); syscall_channel_send(c, &pckt);
return 0; ret = 0;
break;
case SCD_MSG_SCHEDULE_PROCESS: case SCD_MSG_SCHEDULE_PROCESS:
cpuid = obtain_clone_cpuid(); cpuid = obtain_clone_cpuid();
if(cpuid == -1){ if(cpuid == -1){
kprintf("No CPU available\n"); kprintf("No CPU available\n");
return -1; ret = -1;
break;
} }
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg); dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
thread = (struct thread *)packet->arg; thread = (struct thread *)packet->arg;
proc = thread->proc; proc = thread->proc;
settid(thread, 0, cpuid, -1); settid(thread, 0, cpuid, -1, 0, NULL);
proc->status = PS_RUNNING; proc->status = PS_RUNNING;
thread->status = PS_RUNNING; thread->status = PS_RUNNING;
chain_thread(thread); chain_thread(thread);
@ -623,7 +622,29 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
runq_add_thread(thread, cpuid); runq_add_thread(thread, cpuid);
//cpu_local_var(next) = (struct thread *)packet->arg; //cpu_local_var(next) = (struct thread *)packet->arg;
return 0; ret = 0;
break;
/*
* Used for syscall offload reply message to explicitly schedule in
* the waiting thread
*/
case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
thread = find_thread(0, packet->ttid, &lock);
if (!thread) {
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
__FUNCTION__, packet->ttid);
ret = -EINVAL;
break;
}
thread_unlock(thread, &lock);
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
__FUNCTION__, packet->ttid);
waitq_wakeup(&thread->scd_wq);
ret = 0;
break;
case SCD_MSG_SEND_SIGNAL: case SCD_MSG_SEND_SIGNAL:
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
@ -638,18 +659,25 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0); rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc); kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
return 0; ret = 0;
break;
case SCD_MSG_PROCFS_REQUEST: case SCD_MSG_PROCFS_REQUEST:
process_procfs_request(packet->arg); process_procfs_request(packet->arg);
return 0; ret = 0;
break;
case SCD_MSG_CLEANUP_PROCESS: case SCD_MSG_CLEANUP_PROCESS:
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid); dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
terminate_host(packet->pid); terminate_host(packet->pid);
return 0; ret = 0;
break;
case SCD_MSG_DEBUG_LOG: case SCD_MSG_DEBUG_LOG:
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg); dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
debug_log(packet->arg); debug_log(packet->arg);
return 0; ret = 0;
break;
case SCD_MSG_SYSFS_REQ_SHOW: case SCD_MSG_SYSFS_REQ_SHOW:
case SCD_MSG_SYSFS_REQ_STORE: case SCD_MSG_SYSFS_REQ_STORE:
@ -657,7 +685,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
sysfss_packet_handler(c, packet->msg, packet->err, sysfss_packet_handler(c, packet->msg, packet->err,
packet->sysfs_arg1, packet->sysfs_arg2, packet->sysfs_arg1, packet->sysfs_arg2,
packet->sysfs_arg3); packet->sysfs_arg3);
return 0; ret = 0;
break;
case SCD_MSG_GET_CPU_MAPPING: case SCD_MSG_GET_CPU_MAPPING:
req_get_cpu_mapping(packet->arg); req_get_cpu_mapping(packet->arg);
@ -665,17 +694,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING; pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING;
pckt.arg = packet->arg; pckt.arg = packet->arg;
syscall_channel_send(c, &pckt); syscall_channel_send(c, &pckt);
return 0; ret = 0;
break;
default: default:
kprintf("syscall_pakcet_handler:unknown message " kprintf("syscall_pakcet_handler:unknown message "
"(%d.%d.%d.%d.%d.%#lx)\n", "(%d.%d.%d.%d.%d.%#lx)\n",
packet->msg, packet->ref, packet->osnum, packet->msg, packet->ref, packet->osnum,
packet->pid, packet->err, packet->arg); packet->pid, packet->err, packet->arg);
return 0; ret = 0;
break;
} }
return 0;
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, c);
return ret;
} }
void init_host_syscall_channel(void) void init_host_syscall_channel(void)

View File

@ -19,11 +19,13 @@
* CPU Local Storage (cls) * CPU Local Storage (cls)
*/ */
struct malloc_header { struct kmalloc_header {
unsigned int check; unsigned int front_magic;
unsigned int cpu_id; unsigned int cpu_id;
struct malloc_header *next; struct list_head list;
unsigned long size; int size; /* The size of this chunk without the header */
unsigned int end_magic;
/* 32 bytes */
}; };
#include <ihk/lock.h> #include <ihk/lock.h>
@ -38,8 +40,9 @@ extern ihk_spinlock_t cpu_status_lock;
struct cpu_local_var { struct cpu_local_var {
/* malloc */ /* malloc */
struct malloc_header free_list; struct list_head free_list;
struct malloc_header *remote_free_list; struct list_head remote_free_list;
ihk_spinlock_t remote_free_list_lock;
struct thread idle; struct thread idle;
struct process idle_proc; struct process idle_proc;
@ -73,6 +76,7 @@ struct cpu_local_var {
int in_interrupt; int in_interrupt;
int no_preempt; int no_preempt;
int timer_enabled; int timer_enabled;
int kmalloc_initialized;
} __attribute__((aligned(64))); } __attribute__((aligned(64)));

View File

@ -32,11 +32,10 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line);
void _kfree(void *ptr, char *file, int line); void _kfree(void *ptr, char *file, int line);
void *__kmalloc(int size, enum ihk_mc_ap_flag flag); void *__kmalloc(int size, enum ihk_mc_ap_flag flag);
void __kfree(void *ptr); void __kfree(void *ptr);
void *___kmalloc(int size, enum ihk_mc_ap_flag flag);
void ___kfree(void *ptr);
int _memcheck(void *ptr, char *msg, char *file, int line, int free); int _memcheck(void *ptr, char *msg, char *file, int line, int free);
int memcheckall(); int memcheckall();
int freecheck(int runcount); int freecheck(int runcount);
void kmalloc_consolidate_free_list(void);
#endif #endif

View File

@ -161,7 +161,7 @@
#endif #endif
#define USER_STACK_NR_PAGES 8192 #define USER_STACK_NR_PAGES 8192
#define KERNEL_STACK_NR_PAGES 25 #define KERNEL_STACK_NR_PAGES 32
#define NOPHYS ((uintptr_t)-1) #define NOPHYS ((uintptr_t)-1)
@ -349,6 +349,11 @@ struct sig_pending {
typedef void pgio_func_t(void *arg); typedef void pgio_func_t(void *arg);
struct mcexec_tid {
int tid;
struct thread *thread;
};
/* Represents a node in the process fork tree, it may exist even after the /* Represents a node in the process fork tree, it may exist even after the
* corresponding process exited due to references from the parent and/or * corresponding process exited due to references from the parent and/or
* children and is used for implementing wait/waitpid without having a * children and is used for implementing wait/waitpid without having a
@ -363,6 +368,9 @@ struct process {
// threads and children // threads and children
struct list_head threads_list; struct list_head threads_list;
mcs_rwlock_lock_t threads_lock; // lock for threads_list mcs_rwlock_lock_t threads_lock; // lock for threads_list
/* TID set of proxy process */
struct mcexec_tid *tids;
int nr_tids;
/* The ptracing process behave as the parent of the ptraced process /* The ptracing process behave as the parent of the ptraced process
after using PTRACE_ATTACH except getppid. So we save it here. */ after using PTRACE_ATTACH except getppid. So we save it here. */
@ -559,6 +567,9 @@ struct thread {
struct itimerval itimer_prof; struct itimerval itimer_prof;
struct timespec itimer_virtual_value; struct timespec itimer_virtual_value;
struct timespec itimer_prof_value; struct timespec itimer_prof_value;
/* Syscall offload wait queue head */
struct waitq scd_wq;
}; };
struct process_vm { struct process_vm {
@ -679,5 +690,7 @@ void chain_thread(struct thread *);
void proc_init(); void proc_init();
void set_timer(); void set_timer();
struct sig_pending *hassigpending(struct thread *thread); struct sig_pending *hassigpending(struct thread *thread);
void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid,
int nr_tids, int *tids);
#endif #endif

View File

@ -31,6 +31,7 @@
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7 #define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3 #define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
#define SCD_MSG_INIT_CHANNEL 0x5 #define SCD_MSG_INIT_CHANNEL 0x5
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6 #define SCD_MSG_INIT_CHANNEL_ACKED 0x6
@ -117,28 +118,6 @@ struct user_desc {
unsigned int lm:1; unsigned int lm:1;
}; };
struct ikc_scd_packet {
int msg;
int err;
union {
/* for traditional SCD_MSG_* */
struct {
int ref;
int osnum;
int pid;
int padding;
unsigned long arg;
};
/* for SCD_MSG_SYSFS_* */
struct {
long sysfs_arg1;
long sysfs_arg2;
long sysfs_arg3;
};
};
};
struct program_image_section { struct program_image_section {
unsigned long vaddr; unsigned long vaddr;
unsigned long len; unsigned long len;
@ -210,13 +189,58 @@ struct ikc_scd_init_param {
}; };
struct syscall_request { struct syscall_request {
/* TID of requesting thread */
int rtid;
/*
* TID of target thread. Remote page fault response needs to designate the
* thread that must serve the request, 0 indicates any thread from the pool
*/
int ttid;
unsigned long valid; unsigned long valid;
unsigned long number; unsigned long number;
unsigned long args[6]; unsigned long args[6];
}; };
struct ikc_scd_packet {
int msg;
int err;
union {
/* for traditional SCD_MSG_* */
struct {
int ref;
int osnum;
int pid;
unsigned long arg;
struct syscall_request req;
unsigned long resp_pa;
};
/* for SCD_MSG_SYSFS_* */
struct {
long sysfs_arg1;
long sysfs_arg2;
long sysfs_arg3;
};
/* SCD_MSG_SCHEDULE_THREAD */
struct {
int ttid;
};
};
char padding[12];
};
#define IHK_SCD_REQ_THREAD_SPINNING 0
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
struct syscall_response { struct syscall_response {
/* TID of the thread that requested the service */
int ttid;
/* TID of the mcexec thread that is serving the request */
int stid;
unsigned long status; unsigned long status;
unsigned long req_thread_status;
long ret; long ret;
unsigned long fault_address; unsigned long fault_address;
unsigned long fault_reason; unsigned long fault_reason;

View File

@ -371,7 +371,7 @@ int main(void)
} }
kmsg_init(mode); kmsg_init(mode);
kputs("MCK started.\n"); kputs("IHK/McKernel started.\n");
arch_init(); arch_init();
@ -393,7 +393,7 @@ int main(void)
futex_init(); futex_init();
kputs("MCK/IHK booted.\n"); kputs("IHK/McKernel booted.\n");
#ifdef DCFA_KMOD #ifdef DCFA_KMOD
mc_cmd_client_init(); mc_cmd_client_init();

View File

@ -156,13 +156,17 @@ void sbox_write(int offset, unsigned int value);
static void query_free_mem_interrupt_handler(void *priv) static void query_free_mem_interrupt_handler(void *priv)
{ {
#ifdef ATTACHED_MIC
dkprintf("query free mem handler!\n");
int pages = ihk_pagealloc_query_free(pa_allocator); int pages = ihk_pagealloc_query_free(pa_allocator);
dkprintf("free pages: %d\n", pages); kprintf("McKernel free pages: %d\n", pages);
if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void);
kmalloc_memcheck();
}
#ifdef ATTACHED_MIC
sbox_write(SBOX_SCRATCH0, pages); sbox_write(SBOX_SCRATCH0, pages);
sbox_write(SBOX_SCRATCH1, 1); sbox_write(SBOX_SCRATCH1, 1);
#endif #endif
@ -265,6 +269,13 @@ void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long tsc; unsigned long tsc;
tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */ tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */
#endif #endif
if (flush_entry->addr) {
flush_tlb_single(flush_entry->addr & PAGE_MASK);
}
/* Zero address denotes full TLB flush */
else {
flush_tlb();
}
/* Wait for all cores */ /* Wait for all cores */
while (ihk_atomic_read(&flush_entry->pending) != 0) { while (ihk_atomic_read(&flush_entry->pending) != 0) {
@ -335,10 +346,9 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
// no return // no return
} }
kprintf("[%d]page_fault_handler(%p,%lx,%p):" kprintf("%s fault VM failed for TID: %d, addr: 0x%lx, "
"fault vm failed. %d, TID: %d\n", "reason: %d, error: %d\n", __FUNCTION__,
ihk_mc_get_processor_id(), fault_addr, thread->tid, fault_addr, reason, error);
reason, regs, error, thread->tid);
unhandled_page_fault(thread, fault_addr, regs); unhandled_page_fault(thread, fault_addr, regs);
preempt_enable(); preempt_enable();
memset(&info, '\0', sizeof info); memset(&info, '\0', sizeof info);
@ -425,8 +435,9 @@ static void page_allocator_init(void)
ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages); ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages);
kprintf("Available pages: %ld pages\n", kprintf("Available memory: %ld bytes in %ld pages\n",
ihk_pagealloc_count(pa_allocator)); (ihk_pagealloc_count(pa_allocator) * PAGE_SIZE),
ihk_pagealloc_count(pa_allocator));
/* Notify the ihk to use my page allocator */ /* Notify the ihk to use my page allocator */
ihk_mc_set_page_allocator(&allocator); ihk_mc_set_page_allocator(&allocator);
@ -507,6 +518,9 @@ static void page_init(void)
static char *memdebug = NULL; static char *memdebug = NULL;
static void *___kmalloc(int size, enum ihk_mc_ap_flag flag);
static void ___kfree(void *ptr);
void register_kmalloc(void) void register_kmalloc(void)
{ {
if(memdebug){ if(memdebug){
@ -636,60 +650,100 @@ void mem_init(void)
} }
} }
struct location { #define KMALLOC_TRACK_HASH_SHIFT (8)
struct location *next; #define KMALLOC_TRACK_HASH_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT)
int line; #define KMALLOC_TRACK_HASH_MASK (KMALLOC_TRACK_HASH_SIZE - 1)
int cnt;
char file[0];
};
struct alloc { struct list_head kmalloc_track_hash[KMALLOC_TRACK_HASH_SIZE];
struct alloc *next; ihk_spinlock_t kmalloc_track_hash_locks[KMALLOC_TRACK_HASH_SIZE];
struct malloc_header *p;
struct location *loc; struct list_head kmalloc_addr_hash[KMALLOC_TRACK_HASH_SIZE];
int size; ihk_spinlock_t kmalloc_addr_hash_locks[KMALLOC_TRACK_HASH_SIZE];
int kmalloc_track_initialized = 0;
int kmalloc_runcount = 0;
struct kmalloc_track_addr_entry {
void *addr;
int runcount; int runcount;
struct list_head list; /* track_entry's list */
struct kmalloc_track_entry *entry;
struct list_head hash; /* address hash */
}; };
#define HASHNUM 129 struct kmalloc_track_entry {
char *file;
int line;
int size;
ihk_atomic_t alloc_count;
struct list_head hash;
struct list_head addr_list;
ihk_spinlock_t addr_list_lock;
};
static struct alloc *allochash[HASHNUM]; void kmalloc_init(void)
static struct location *lochash[HASHNUM];
static ihk_spinlock_t alloclock;
int runcount;
static unsigned char *page;
static int space;
static void *dalloc(unsigned long size)
{ {
void *r; struct cpu_local_var *v = get_this_cpu_local_var();
static int pos = 0;
unsigned long irqstate;
irqstate = ihk_mc_spinlock_lock(&alloclock); register_kmalloc();
size = (size + 7) & 0xfffffffffffffff8L;
if (pos + size > space) { INIT_LIST_HEAD(&v->free_list);
page = allocate_pages(1, IHK_MC_AP_NOWAIT); INIT_LIST_HEAD(&v->remote_free_list);
space = 4096; ihk_mc_spinlock_init(&v->remote_free_list_lock);
pos = 0;
v->kmalloc_initialized = 1;
if (!kmalloc_track_initialized) {
int i;
memdebug = find_command_line("memdebug");
kmalloc_track_initialized = 1;
for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) {
ihk_mc_spinlock_init(&kmalloc_track_hash_locks[i]);
INIT_LIST_HEAD(&kmalloc_track_hash[i]);
ihk_mc_spinlock_init(&kmalloc_addr_hash_locks[i]);
INIT_LIST_HEAD(&kmalloc_addr_hash[i]);
}
} }
r = page + pos;
pos += size;
ihk_mc_spinlock_unlock(&alloclock, irqstate);
return r;
} }
/* NOTE: Hash lock must be held */
struct kmalloc_track_entry *__kmalloc_track_find_entry(
int size, char *file, int line)
{
struct kmalloc_track_entry *entry_iter, *entry = NULL;
int hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK;
list_for_each_entry(entry_iter, &kmalloc_track_hash[hash], hash) {
if (!strcmp(entry_iter->file, file) &&
entry_iter->size == size &&
entry_iter->line == line) {
entry = entry_iter;
break;
}
}
if (entry) {
dkprintf("%s found entry %s:%d size: %d\n", __FUNCTION__,
file, line, size);
}
else {
dkprintf("%s couldn't find entry %s:%d size: %d\n", __FUNCTION__,
file, line, size);
}
return entry;
}
/* Top level routines called from macro */
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line) void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
{ {
char *r = ___kmalloc(size, flag); unsigned long irqflags;
struct malloc_header *h; struct kmalloc_track_entry *entry;
unsigned long hash; struct kmalloc_track_addr_entry *addr_entry;
char *t; int hash, addr_hash;
struct location *lp; void *r = ___kmalloc(size, flag);
struct alloc *ap;
unsigned long alcsize;
unsigned long chksize;
if (!memdebug) if (!memdebug)
return r; return r;
@ -697,177 +751,177 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
if (!r) if (!r)
return r; return r;
h = ((struct malloc_header *)r) - 1; hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK;
alcsize = h->size * sizeof(struct malloc_header); irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]);
chksize = alcsize - size;
memset(r + size, '\x5a', chksize);
for (hash = 0, t = file; *t; t++) { entry = __kmalloc_track_find_entry(size, file, line);
hash <<= 1;
hash += *t; if (!entry) {
entry = ___kmalloc(sizeof(*entry), IHK_MC_AP_NOWAIT);
if (!entry) {
kprintf("%s: ERROR: allocating tracking entry\n");
goto out;
}
entry->line = line;
entry->size = size;
ihk_atomic_set(&entry->alloc_count, 0);
ihk_mc_spinlock_init(&entry->addr_list_lock);
INIT_LIST_HEAD(&entry->addr_list);
entry->file = ___kmalloc(strlen(file) + 1, IHK_MC_AP_NOWAIT);
if (!entry->file) {
kprintf("%s: ERROR: allocating file string\n");
___kfree(entry);
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
goto out;
}
strcpy(entry->file, file);
entry->file[strlen(file)] = 0;
list_add(&entry->hash, &kmalloc_track_hash[hash]);
dkprintf("%s entry %s:%d size: %d added\n", __FUNCTION__,
file, line, size);
} }
hash += line; ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
hash %= HASHNUM;
for (lp = lochash[hash]; lp; lp = lp->next) ihk_atomic_inc(&entry->alloc_count);
if (lp->line == line &&
!strcmp(lp->file, file)) /* Add new addr entry for this allocation entry */
break; addr_entry = ___kmalloc(sizeof(*addr_entry), IHK_MC_AP_NOWAIT);
if (!lp) { if (!addr_entry) {
lp = dalloc(sizeof(struct location) + strlen(file) + 1); kprintf("%s: ERROR: allocating addr entry\n");
memset(lp, '\0', sizeof(struct location)); goto out;
lp->line = line;
strcpy(lp->file, file);
do {
lp->next = lochash[hash];
} while (!compare_and_swap(lochash + hash, (unsigned long)lp->next, (unsigned long)lp));
} }
hash = (unsigned long)h % HASHNUM; addr_entry->addr = r;
do { addr_entry->runcount = kmalloc_runcount;
for (ap = allochash[hash]; ap; ap = ap->next) addr_entry->entry = entry;
if (!ap->p)
break;
} while (ap && !compare_and_swap(&ap->p, 0UL, (unsigned long)h));
if (!ap) {
ap = dalloc(sizeof(struct alloc));
memset(ap, '\0', sizeof(struct alloc));
ap->p = h;
do {
ap->next = allochash[hash];
} while (!compare_and_swap(allochash + hash, (unsigned long)ap->next, (unsigned long)ap));
}
ap->loc = lp; irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock);
ap->size = size; list_add(&addr_entry->list, &entry->addr_list);
ap->runcount = runcount; ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags);
return r; /* Add addr entry to address hash */
} addr_hash = ((unsigned long)r >> 5) & KMALLOC_TRACK_HASH_MASK;
irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[addr_hash]);
list_add(&addr_entry->hash, &kmalloc_addr_hash[addr_hash]);
ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[addr_hash], irqflags);
int _memcheck(void *ptr, char *msg, char *file, int line, int flags) dkprintf("%s addr_entry %p added\n", __FUNCTION__, r);
{
struct malloc_header *h = ((struct malloc_header *)ptr) - 1;
struct malloc_header *next;
unsigned long hash = (unsigned long)h % HASHNUM;
struct alloc *ap;
static unsigned long check = 0x5a5a5a5a5a5a5a5aUL;
unsigned long alcsize;
unsigned long chksize;
if (h->check != 0x5a5a5a5a) {
int i;
unsigned long max = 0;
unsigned long cur = (unsigned long)h;
struct alloc *maxap = NULL;
for (i = 0; i < HASHNUM; i++)
for (ap = allochash[i]; ap; ap = ap->next)
if ((unsigned long)ap->p < cur &&
(unsigned long)ap->p > max) {
max = (unsigned long)ap->p;
maxap = ap;
}
kprintf("%s: detect buffer overrun, alc=%s:%d size=%ld h=%p, s=%ld\n", msg, maxap->loc->file, maxap->loc->line, maxap->size, maxap->p, maxap->p->size);
kprintf("broken header: h=%p next=%p size=%ld cpu_id=%d\n", h, h->next, h->size, h->cpu_id);
}
for (ap = allochash[hash]; ap; ap = ap->next)
if (ap->p == h)
break;
if (!ap) {
if(file)
kprintf("%s: address not found, %s:%d p=%p\n", msg, file, line, ptr);
else
kprintf("%s: address not found p=%p\n", msg, ptr);
return 1;
}
alcsize = h->size * sizeof(struct malloc_header);
chksize = alcsize - ap->size;
if (chksize > 8)
chksize = 8;
next = (struct malloc_header *)((char *)ptr + alcsize);
if (next->check != 0x5a5a5a5a ||
memcmp((char *)ptr + ap->size, &check, chksize)) {
unsigned long buf = 0x5a5a5a5a5a5a5a5aUL;
unsigned char *p;
unsigned char *q;
memcpy(&buf, (char *)ptr + ap->size, chksize);
p = (unsigned char *)&(next->check);
q = (unsigned char *)&buf;
if (file)
kprintf("%s: broken, %s:%d alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, file, line, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size);
else
kprintf("%s: broken, alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size);
if (next->check != 0x5a5a5a5a)
kprintf("next->HEADER: next=%p size=%ld cpu_id=%d\n", next->next, next->size, next->cpu_id);
return 1;
}
if(flags & 1){
ap->p = NULL;
ap->loc = NULL;
ap->size = 0;
}
return 0;
}
int memcheckall()
{
int i;
struct alloc *ap;
int r = 0;
for(i = 0; i < HASHNUM; i++)
for(ap = allochash[i]; ap; ap = ap->next)
if(ap->p)
r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2);
return r;
}
int freecheck(int runcount)
{
int i;
struct alloc *ap;
struct location *lp;
int r = 0;
for (i = 0; i < HASHNUM; i++)
for (lp = lochash[i]; lp; lp = lp->next)
lp->cnt = 0;
for (i = 0; i < HASHNUM; i++)
for (ap = allochash[i]; ap; ap = ap->next)
if (ap->p && ap->runcount == runcount) {
ap->loc->cnt++;
r++;
}
if (r) {
kprintf("memory leak?\n");
for (i = 0; i < HASHNUM; i++)
for (lp = lochash[i]; lp; lp = lp->next)
if (lp->cnt)
kprintf(" alc=%s:%d cnt=%d\n", lp->file, lp->line, lp->cnt);
}
out:
return r; return r;
} }
void _kfree(void *ptr, char *file, int line) void _kfree(void *ptr, char *file, int line)
{ {
if (memdebug) unsigned long irqflags;
_memcheck(ptr, "KFREE", file, line, 1); struct kmalloc_track_entry *entry;
struct kmalloc_track_addr_entry *addr_entry_iter, *addr_entry = NULL;
int hash;
if (!memdebug) {
goto out;
}
hash = ((unsigned long)ptr >> 5) & KMALLOC_TRACK_HASH_MASK;
irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[hash]);
list_for_each_entry(addr_entry_iter,
&kmalloc_addr_hash[hash], hash) {
if (addr_entry_iter->addr == ptr) {
addr_entry = addr_entry_iter;
break;
}
}
if (addr_entry) {
list_del(&addr_entry->hash);
}
ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[hash], irqflags);
if (!addr_entry) {
kprintf("%s: ERROR: kfree()ing invalid pointer\n", __FUNCTION__);
panic("panic");
}
entry = addr_entry->entry;
irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock);
list_del(&addr_entry->list);
ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags);
dkprintf("%s addr_entry %p removed\n", __FUNCTION__, addr_entry->addr);
___kfree(addr_entry);
/* Do we need to remove tracking entry as well? */
if (!ihk_atomic_dec_and_test(&entry->alloc_count)) {
goto out;
}
hash = (strlen(entry->file) + entry->line + entry->size) &
KMALLOC_TRACK_HASH_MASK;
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]);
list_del(&entry->hash);
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
dkprintf("%s entry %s:%d size: %d removed\n", __FUNCTION__,
entry->file, entry->line, entry->size);
___kfree(entry->file);
___kfree(entry);
out:
___kfree(ptr); ___kfree(ptr);
} }
void kmalloc_memcheck(void)
{
int i;
unsigned long irqflags;
struct kmalloc_track_entry *entry = NULL;
for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) {
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[i]);
list_for_each_entry(entry, &kmalloc_track_hash[i], hash) {
struct kmalloc_track_addr_entry *addr_entry = NULL;
int cnt = 0;
ihk_mc_spinlock_lock_noirq(&entry->addr_list_lock);
list_for_each_entry(addr_entry, &entry->addr_list, list) {
dkprintf("%s memory leak: %p @ %s:%d size: %d runcount: %d\n",
__FUNCTION__,
addr_entry->addr,
entry->file,
entry->line,
entry->size,
addr_entry->runcount);
if (kmalloc_runcount != addr_entry->runcount)
continue;
cnt++;
}
ihk_mc_spinlock_unlock_noirq(&entry->addr_list_lock);
if (!cnt)
continue;
kprintf("%s memory leak: %s:%d size: %d cnt: %d, runcount: %d\n",
__FUNCTION__,
entry->file,
entry->line,
entry->size,
cnt,
kmalloc_runcount);
}
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[i], irqflags);
}
++kmalloc_runcount;
}
/* Redirection routines registered in alloc structure */
void *__kmalloc(int size, enum ihk_mc_ap_flag flag) void *__kmalloc(int size, enum ihk_mc_ap_flag flag)
{ {
return kmalloc(size, flag); return kmalloc(size, flag);
@ -878,160 +932,199 @@ void __kfree(void *ptr)
kfree(ptr); kfree(ptr);
} }
void kmalloc_init(void)
static void ___kmalloc_insert_chunk(struct list_head *free_list,
struct kmalloc_header *chunk)
{ {
struct cpu_local_var *v = get_this_cpu_local_var(); struct kmalloc_header *chunk_iter, *next_chunk = NULL;
struct malloc_header *h = &v->free_list;
int i;
h->check = 0x5a5a5a5a; /* Find out where to insert */
h->next = &v->free_list; list_for_each_entry(chunk_iter, free_list, list) {
h->size = 0; if ((void *)chunk < (void *)chunk_iter) {
next_chunk = chunk_iter;
register_kmalloc(); break;
memdebug = find_command_line("memdebug");
for (i = 0; i < HASHNUM; i++) {
allochash[i] = NULL;
lochash[i] = NULL;
}
page = allocate_pages(16, IHK_MC_AP_NOWAIT);
space = 16 * 4096;
ihk_mc_spinlock_init(&alloclock);
}
void ____kfree(struct cpu_local_var *v, struct malloc_header *p)
{
struct malloc_header *h = &v->free_list;
int combined = 0;
h = h->next;
while ((p < h || p > h->next) && h != &v->free_list) {
h = h->next;
}
if (h + h->size + 1 == p && h->size != 0) {
combined = 1;
h->size += p->size + 1;
h->check = 0x5a5a5a5a;
}
if (h->next == p + p->size + 1 && h->next->size != 0) {
if (combined) {
h->check = 0x5a5a5a5a;
h->size += h->next->size + 1;
h->next = h->next->next;
} else {
p->check = 0x5a5a5a5a;
p->size += h->next->size + 1;
p->next = h->next->next;
h->next = p;
} }
} else if (!combined) {
p->next = h->next;
h->next = p;
} }
/* Add in front of next */
if (next_chunk) {
list_add_tail(&chunk->list, &next_chunk->list);
}
/* Add after the head */
else {
list_add(&chunk->list, free_list);
}
return;
} }
void *___kmalloc(int size, enum ihk_mc_ap_flag flag) static void ___kmalloc_init_chunk(struct kmalloc_header *h, int size)
{ {
struct cpu_local_var *v = get_this_cpu_local_var(); h->size = size;
struct malloc_header *h = &v->free_list, *prev, *p; h->front_magic = 0x5c5c5c5c;
int u, req_page; h->end_magic = 0x6d6d6d6d;
h->cpu_id = ihk_mc_get_processor_id();
}
p = (struct malloc_header *)xchg8((unsigned long *)&v->remote_free_list, 0L); static void ___kmalloc_consolidate_list(struct list_head *list)
while(p){ {
struct malloc_header *n = p->next; struct kmalloc_header *chunk_iter, *chunk, *next_chunk;
____kfree(v, p);
p = n; reiterate:
chunk_iter = NULL;
chunk = NULL;
list_for_each_entry(next_chunk, list, list) {
if (chunk_iter && (((void *)chunk_iter + sizeof(struct kmalloc_header)
+ chunk_iter->size) == (void *)next_chunk)) {
chunk = chunk_iter;
break;
}
chunk_iter = next_chunk;
} }
if (size >= PAGE_SIZE * 4) { if (!chunk) {
return;
}
chunk->size += (next_chunk->size + sizeof(struct kmalloc_header));
list_del(&next_chunk->list);
goto reiterate;
}
void kmalloc_consolidate_free_list(void)
{
struct kmalloc_header *chunk, *tmp;
unsigned long irqflags =
ihk_mc_spinlock_lock(&cpu_local_var(remote_free_list_lock));
/* Clean up remotely deallocated chunks */
list_for_each_entry_safe(chunk, tmp,
&cpu_local_var(remote_free_list), list) {
list_del(&chunk->list);
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
}
/* Free list lock ensures IRQs are disabled */
___kmalloc_consolidate_list(&cpu_local_var(free_list));
ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags);
}
#define KMALLOC_MIN_SHIFT (5)
#define KMALLOC_MIN_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT)
#define KMALLOC_MIN_MASK (KMALLOC_MIN_SIZE - 1)
/* Actual low-level allocation routines */
static void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
{
struct kmalloc_header *chunk_iter;
struct kmalloc_header *chunk = NULL;
int npages;
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
/* KMALLOC_MIN_SIZE bytes aligned size. */
if (size & KMALLOC_MIN_MASK) {
size = ((size + KMALLOC_MIN_SIZE - 1) & ~(KMALLOC_MIN_MASK));
}
chunk = NULL;
/* Find a chunk that is big enough */
list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) {
if (chunk_iter->size >= size) {
chunk = chunk_iter;
break;
}
}
split_and_return:
/* Did we find one? */
if (chunk) {
/* Do we need to split it? Only if there is enough space for
* another header and some actual content */
if (chunk->size > (size + sizeof(struct kmalloc_header))) {
struct kmalloc_header *leftover;
leftover = (struct kmalloc_header *)
((void *)chunk + sizeof(struct kmalloc_header) + size);
___kmalloc_init_chunk(leftover,
(chunk->size - size - sizeof(struct kmalloc_header)));
list_add(&leftover->list, &chunk->list);
chunk->size = size;
}
list_del(&chunk->list);
cpu_restore_interrupt(kmalloc_irq_flags);
return ((void *)chunk + sizeof(struct kmalloc_header));
}
/* Allocate new memory and add it to free list */
npages = (size + sizeof(struct kmalloc_header) + (PAGE_SIZE - 1))
>> PAGE_SHIFT;
chunk = ihk_mc_alloc_pages(npages, flag);
if (!chunk) {
cpu_restore_interrupt(kmalloc_irq_flags);
return NULL; return NULL;
} }
u = (size + sizeof(*h) - 1) / sizeof(*h); ___kmalloc_init_chunk(chunk,
(npages * PAGE_SIZE - sizeof(struct kmalloc_header)));
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
prev = h; goto split_and_return;
h = h->next;
while (1) {
if (h == &v->free_list) {
req_page = ((u + 2) * sizeof(*h) + PAGE_SIZE - 1)
>> PAGE_SHIFT;
h = allocate_pages(req_page, flag);
if(h == NULL) {
kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag);
return NULL;
}
h->check = 0x5a5a5a5a;
prev->next = h;
h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2;
/* Guard entry */
p = h + h->size + 1;
p->check = 0x5a5a5a5a;
p->next = &v->free_list;
p->size = 0;
h->next = p;
}
if (h->size >= u) {
if (h->size == u || h->size == u + 1) {
prev->next = h->next;
h->cpu_id = ihk_mc_get_processor_id();
return h + 1;
} else { /* Divide */
h->size -= u + 1;
p = h + h->size + 1;
p->check = 0x5a5a5a5a;
p->size = u;
p->cpu_id = ihk_mc_get_processor_id();
return p + 1;
}
}
prev = h;
h = h->next;
}
} }
void ___kfree(void *ptr) static void ___kfree(void *ptr)
{ {
struct malloc_header *p = (struct malloc_header *)ptr; struct kmalloc_header *chunk =
struct cpu_local_var *v = get_cpu_local_var((--p)->cpu_id); (struct kmalloc_header*)(ptr - sizeof(struct kmalloc_header));
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
if(p->cpu_id == ihk_mc_get_processor_id()){ /* Sanity check */
____kfree(v, p); if (chunk->front_magic != 0x5c5c5c5c || chunk->end_magic != 0x6d6d6d6d) {
kprintf("%s: memory corruption at address 0x%p\n", __FUNCTION__, ptr);
panic("panic");
} }
else{
unsigned long oldval; /* Does this chunk belong to this CPU? */
unsigned long newval; if (chunk->cpu_id == ihk_mc_get_processor_id()) {
unsigned long rval;
do{ ___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
p->next = v->remote_free_list; ___kmalloc_consolidate_list(&cpu_local_var(free_list));
oldval = (unsigned long)p->next;
newval = (unsigned long)p;
rval = atomic_cmpxchg8(
(unsigned long *)&v->remote_free_list,
oldval, newval);
}while(rval != oldval);
} }
else {
struct cpu_local_var *v = get_cpu_local_var(chunk->cpu_id);
unsigned long irqflags;
irqflags = ihk_mc_spinlock_lock(&v->remote_free_list_lock);
list_add(&chunk->list, &v->remote_free_list);
ihk_mc_spinlock_unlock(&v->remote_free_list_lock, irqflags);
}
cpu_restore_interrupt(kmalloc_irq_flags);
} }
void print_free_list(void)
void ___kmalloc_print_free_list(struct list_head *list)
{ {
struct cpu_local_var *v = get_this_cpu_local_var(); struct kmalloc_header *chunk_iter;
struct malloc_header *h = &v->free_list; unsigned long irqflags = kprintf_lock();
h = h->next; __kprintf("%s: [ \n", __FUNCTION__);
list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) {
kprintf("free_list : \n"); __kprintf("%s: 0x%lx:%d (VA PFN: %lu, off: %lu)\n", __FUNCTION__,
while (h != &v->free_list) { (unsigned long)chunk_iter,
kprintf(" %p : %p, %d ->\n", h, h->next, h->size); chunk_iter->size,
h = h->next; (unsigned long)chunk_iter >> PAGE_SHIFT,
(unsigned long)chunk_iter % PAGE_SIZE);
} }
kprintf("\n"); __kprintf("%s: ] \n", __FUNCTION__);
kprintf_unlock(irqflags);
} }

View File

@ -53,7 +53,6 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm);
extern void release_fp_regs(struct thread *proc); extern void release_fp_regs(struct thread *proc);
extern void save_fp_regs(struct thread *proc); extern void save_fp_regs(struct thread *proc);
extern void restore_fp_regs(struct thread *proc); extern void restore_fp_regs(struct thread *proc);
void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
extern void __runq_add_proc(struct thread *proc, int cpu_id); extern void __runq_add_proc(struct thread *proc, int cpu_id);
extern void terminate_host(int pid); extern void terminate_host(int pid);
extern void lapic_timer_enable(unsigned int clocks); extern void lapic_timer_enable(unsigned int clocks);
@ -745,7 +744,7 @@ int join_process_memory_range(struct process_vm *vm,
memobj_release(merging->memobj); memobj_release(merging->memobj);
} }
list_del(&merging->list); list_del(&merging->list);
ihk_mc_free(merging); kfree(merging);
error = 0; error = 0;
out: out:
@ -841,8 +840,9 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
if (range->memobj) { if (range->memobj) {
memobj_release(range->memobj); memobj_release(range->memobj);
} }
list_del(&range->list); list_del(&range->list);
ihk_mc_free(range); kfree(range);
dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n", dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n",
vm, start0, end0); vm, start0, end0);
@ -968,7 +968,6 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa
return attr; return attr;
} }
/* XXX: インデントを揃える必要がある */
int add_process_memory_range(struct process_vm *vm, int add_process_memory_range(struct process_vm *vm,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag, unsigned long phys, unsigned long flag,
@ -1539,6 +1538,8 @@ retry:
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate new page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate new page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out; goto out;
} }
dkprintf("%s: clearing 0x%lx:%lu\n",
__FUNCTION__, pgaddr, pgsize);
memset(virt, 0, pgsize); memset(virt, 0, pgsize);
phys = virt_to_phys(virt); phys = virt_to_phys(virt);
page_map(phys_to_page(phys)); page_map(phys_to_page(phys));
@ -1571,6 +1572,8 @@ retry:
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out; goto out;
} }
dkprintf("%s: copying 0x%lx:%lu\n",
__FUNCTION__, pgaddr, pgsize);
memcpy(virt, phys_to_virt(phys), pgsize); memcpy(virt, phys_to_virt(phys), pgsize);
phys = virt_to_phys(virt); phys = virt_to_phys(virt);
@ -1651,7 +1654,7 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
"access denied. %d\n", "access denied. %d\n",
ihk_mc_get_processor_id(), vm, ihk_mc_get_processor_id(), vm,
fault_addr0, reason, error); fault_addr0, reason, error);
kprintf("%s: reason: %s%s%s%s%s%s%s%s\n", __FUNCTION__, kprintf("%s: reason: %s%s%s%s%s%s%s\n", __FUNCTION__,
(reason & PF_PROT) ? "PF_PROT " : "", (reason & PF_PROT) ? "PF_PROT " : "",
(reason & PF_WRITE) ? "PF_WRITE " : "", (reason & PF_WRITE) ? "PF_WRITE " : "",
(reason & PF_USER) ? "PF_USER " : "", (reason & PF_USER) ? "PF_USER " : "",
@ -1890,14 +1893,14 @@ unsigned long extend_process_region(struct process_vm *vm,
aligned_end = (aligned_end + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; aligned_end = (aligned_end + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
/* Fill in the gap between old_aligned_end and aligned_end /* Fill in the gap between old_aligned_end and aligned_end
* with regular pages */ * with regular pages */
if((p = allocate_pages((aligned_end - old_aligned_end) >> PAGE_SHIFT, if((p = ihk_mc_alloc_pages((aligned_end - old_aligned_end) >> PAGE_SHIFT,
IHK_MC_AP_NOWAIT)) == NULL){ IHK_MC_AP_NOWAIT)) == NULL){
return end; return end;
} }
if((rc = add_process_memory_range(vm, old_aligned_end, if((rc = add_process_memory_range(vm, old_aligned_end,
aligned_end, virt_to_phys(p), flag, aligned_end, virt_to_phys(p), flag,
LARGE_PAGE_SHIFT)) != 0){ LARGE_PAGE_SHIFT)) != 0){
free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT); ihk_mc_free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT);
return end; return end;
} }
@ -1910,7 +1913,7 @@ unsigned long extend_process_region(struct process_vm *vm,
(LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
address = aligned_new_end; address = aligned_new_end;
if((p = allocate_pages((aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT, if((p = ihk_mc_alloc_pages((aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT,
IHK_MC_AP_NOWAIT)) == NULL){ IHK_MC_AP_NOWAIT)) == NULL){
return end; return end;
} }
@ -1918,16 +1921,16 @@ unsigned long extend_process_region(struct process_vm *vm,
p_aligned = ((unsigned long)p + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; p_aligned = ((unsigned long)p + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
if (p_aligned > (unsigned long)p) { if (p_aligned > (unsigned long)p) {
free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT); ihk_mc_free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT);
} }
free_pages( ihk_mc_free_pages(
(void *)(p_aligned + aligned_new_end - aligned_end), (void *)(p_aligned + aligned_new_end - aligned_end),
(LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT); (LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT);
if((rc = add_process_memory_range(vm, aligned_end, if((rc = add_process_memory_range(vm, aligned_end,
aligned_new_end, virt_to_phys((void *)p_aligned), aligned_new_end, virt_to_phys((void *)p_aligned),
flag, LARGE_PAGE_SHIFT)) != 0){ flag, LARGE_PAGE_SHIFT)) != 0){
free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT); ihk_mc_free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT);
return end; return end;
} }
@ -1945,7 +1948,7 @@ unsigned long extend_process_region(struct process_vm *vm,
p=0; p=0;
}else{ }else{
p = allocate_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT); p = ihk_mc_alloc_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT);
if (!p) { if (!p) {
return end; return end;
@ -1954,7 +1957,7 @@ unsigned long extend_process_region(struct process_vm *vm,
if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end, if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end,
(p==0?0:virt_to_phys(p)), flag, NULL, 0, (p==0?0:virt_to_phys(p)), flag, NULL, 0,
PAGE_SHIFT)) != 0){ PAGE_SHIFT)) != 0){
free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT); ihk_mc_free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT);
return end; return end;
} }
@ -2067,6 +2070,7 @@ release_process(struct process *proc)
mcs_rwlock_writer_unlock(&parent->children_lock, &lock); mcs_rwlock_writer_unlock(&parent->children_lock, &lock);
} }
if (proc->tids) kfree(proc->tids);
kfree(proc); kfree(proc);
} }
@ -2172,6 +2176,23 @@ release_sigcommon(struct sig_common *sigcommon)
kfree(sigcommon); kfree(sigcommon);
} }
/*
* Release the TID from the process' TID set corresponding to this thread.
* NOTE: threads_lock must be held.
*/
void __release_tid(struct process *proc, struct thread *thread) {
int i;
for (i = 0; i < proc->nr_tids; ++i) {
if (proc->tids[i].thread != thread) continue;
proc->tids[i].thread = NULL;
dkprintf("%s: tid %d has been released by %p\n",
__FUNCTION__, thread->tid, thread);
break;
}
}
void destroy_thread(struct thread *thread) void destroy_thread(struct thread *thread)
{ {
struct sig_pending *pending; struct sig_pending *pending;
@ -2188,6 +2209,7 @@ void destroy_thread(struct thread *thread)
mcs_rwlock_writer_lock(&proc->threads_lock, &lock); mcs_rwlock_writer_lock(&proc->threads_lock, &lock);
list_del(&thread->siblings_list); list_del(&thread->siblings_list);
__release_tid(proc, thread);
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
cpu_clear(thread->cpu_id, &thread->vm->address_space->cpu_set, cpu_clear(thread->cpu_id, &thread->vm->address_space->cpu_set,
@ -2325,6 +2347,8 @@ static void idle(void)
} }
if (v->status == CPU_STATUS_IDLE || if (v->status == CPU_STATUS_IDLE ||
v->status == CPU_STATUS_RESERVED) { v->status == CPU_STATUS_RESERVED) {
/* No work to do? Consolidate the kmalloc free list */
kmalloc_consolidate_free_list();
cpu_safe_halt(); cpu_safe_halt();
} }
else { else {
@ -2527,7 +2551,7 @@ static void do_migrate(void)
v->flags |= CPU_FLAG_NEED_RESCHED; v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
double_rq_unlock(cur_v, v, irqstate); double_rq_unlock(cur_v, v, irqstate);
settid(req->thread, 2, cpu_id, old_cpu_id); //settid(req->thread, 2, cpu_id, old_cpu_id, 0, NULL);
ack: ack:
waitq_wakeup(&req->wq); waitq_wakeup(&req->wq);
@ -2563,13 +2587,8 @@ void schedule(void)
struct thread *last; struct thread *last;
if (cpu_local_var(no_preempt)) { if (cpu_local_var(no_preempt)) {
dkprintf("no schedule() while no preemption! \n"); kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
return; __FUNCTION__, cpu_local_var(no_preempt));
}
if (cpu_local_var(current)
&& cpu_local_var(current)->in_syscall_offload) {
dkprintf("no schedule() while syscall offload!\n");
return; return;
} }

View File

@ -127,11 +127,9 @@ int prepare_process_ranges_args_envs(struct thread *thread,
static void do_mod_exit(int status); static void do_mod_exit(int status);
#endif #endif
static void send_syscall(struct syscall_request *req, int cpu, int pid) static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res)
{ {
struct ikc_scd_packet packet; struct ikc_scd_packet packet IHK_DMA_ALIGN;
struct syscall_response *res;
struct syscall_params *scp;
struct ihk_ikc_channel_desc *syscall_channel; struct ihk_ikc_channel_desc *syscall_channel;
int ret; int ret;
@ -140,7 +138,6 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
req->number == __NR_kill){ // interrupt syscall req->number == __NR_kill){ // interrupt syscall
extern int num_processors; extern int num_processors;
scp = &get_cpu_local_var(0)->scp2;
syscall_channel = get_cpu_local_var(0)->syscall_channel2; syscall_channel = get_cpu_local_var(0)->syscall_channel2;
/* XXX: is this really going to work if multiple processes /* XXX: is this really going to work if multiple processes
@ -152,34 +149,22 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
pid = req->args[1]; pid = req->args[1];
} }
else{ else{
scp = &get_cpu_local_var(cpu)->scp;
syscall_channel = get_cpu_local_var(cpu)->syscall_channel; syscall_channel = get_cpu_local_var(cpu)->syscall_channel;
} }
res = scp->response_va;
res->status = 0; res->status = 0;
req->valid = 0; req->valid = 0;
#ifdef USE_DMA memcpy(&packet.req, req, sizeof(*req));
memcpy_async(scp->request_pa,
virt_to_phys(req), sizeof(*req), 0, &fin);
memcpy_async_wait(&scp->post_fin);
scp->post_va->v[0] = scp->post_idx;
memcpy_async_wait(&fin);
#else
memcpy(scp->request_va, req, sizeof(*req));
#endif
barrier(); barrier();
scp->request_va->valid = 1; packet.req.valid = 1;
*(unsigned int *)scp->doorbell_va = cpu + 1;
#ifdef SYSCALL_BY_IKC #ifdef SYSCALL_BY_IKC
packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.msg = SCD_MSG_SYSCALL_ONESIDE;
packet.ref = cpu; packet.ref = cpu;
packet.pid = pid ? pid : cpu_local_var(current)->proc->pid; packet.pid = pid ? pid : cpu_local_var(current)->proc->pid;
packet.arg = scp->request_rpa; packet.resp_pa = virt_to_phys(res);
dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid);
ret = ihk_ikc_send(syscall_channel, &packet, 0); ret = ihk_ikc_send(syscall_channel, &packet, 0);
@ -193,9 +178,8 @@ ihk_spinlock_t syscall_lock;
long do_syscall(struct syscall_request *req, int cpu, int pid) long do_syscall(struct syscall_request *req, int cpu, int pid)
{ {
struct syscall_response *res; struct syscall_response res;
struct syscall_request req2 IHK_DMA_ALIGN; struct syscall_request req2 IHK_DMA_ALIGN;
struct syscall_params *scp;
int error; int error;
long rc; long rc;
int islock = 0; int islock = 0;
@ -206,6 +190,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
dkprintf("SC(%d)[%3d] sending syscall\n", dkprintf("SC(%d)[%3d] sending syscall\n",
ihk_mc_get_processor_id(), ihk_mc_get_processor_id(),
req->number); req->number);
irqstate = 0; /* for avoidance of warning */
barrier();
if(req->number != __NR_exit_group){ if(req->number != __NR_exit_group){
if(proc->nohost && // host is down if(proc->nohost && // host is down
@ -215,20 +202,18 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
++thread->in_syscall_offload; ++thread->in_syscall_offload;
} }
irqstate = 0; /* for avoidance of warning */
if(req->number == __NR_exit_group || if(req->number == __NR_exit_group ||
req->number == __NR_gettid || req->number == __NR_gettid ||
req->number == __NR_kill){ // interrupt syscall req->number == __NR_kill){ // interrupt syscall
scp = &get_cpu_local_var(0)->scp2;
islock = 1; islock = 1;
irqstate = ihk_mc_spinlock_lock(&syscall_lock); irqstate = ihk_mc_spinlock_lock(&syscall_lock);
} }
else{ /* The current thread is the requester and any thread from
scp = &get_cpu_local_var(cpu)->scp; * the pool may serve the request */
} req->rtid = cpu_local_var(current)->tid;
res = scp->response_va; req->ttid = 0;
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
send_syscall(req, cpu, pid); send_syscall(req, cpu, pid, &res);
dkprintf("%s: syscall num: %d waiting for Linux.. \n", dkprintf("%s: syscall num: %d waiting for Linux.. \n",
__FUNCTION__, req->number); __FUNCTION__, req->number);
@ -236,60 +221,83 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
#define STATUS_IN_PROGRESS 0 #define STATUS_IN_PROGRESS 0
#define STATUS_COMPLETED 1 #define STATUS_COMPLETED 1
#define STATUS_PAGE_FAULT 3 #define STATUS_PAGE_FAULT 3
while (res->status != STATUS_COMPLETED) { while (res.status != STATUS_COMPLETED) {
while (res->status == STATUS_IN_PROGRESS) { while (res.status == STATUS_IN_PROGRESS) {
struct cpu_local_var *v; struct cpu_local_var *v;
int call_schedule = 0; int do_schedule = 0;
long runq_irqstate; long runq_irqstate;
unsigned long flags;
DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current));
cpu_pause(); cpu_pause();
/* XXX: Intel MPI + Intel OpenMP situation: /* Spin if not preemptable */
* While the MPI helper thread waits in a poll() call the OpenMP master if (cpu_local_var(no_preempt) || !thread->tid) {
* thread is iterating through the CPU cores using setaffinity(). continue;
* Unless we give a chance to it on this core the two threads seem to }
* hang in deadlock. If the new thread would make a system call on this
* core we would be in trouble. For now, allow it, but in the future /* Spin by default, but if re-schedule is requested let
* we should have syscall channels for each thread instead of per core, * the other thread run */
* or we should multiplex syscall threads in mcexec */
runq_irqstate = runq_irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var(); v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED) { if (v->flags & CPU_FLAG_NEED_RESCHED) {
call_schedule = 1; do_schedule = 1;
--thread->in_syscall_offload;
} }
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate); ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
if (call_schedule) { if (!do_schedule) {
schedule(); continue;
++thread->in_syscall_offload;
} }
flags = cpu_disable_interrupt_save();
/* Try to sleep until notified */
if (__sync_bool_compare_and_swap(&res.req_thread_status,
IHK_SCD_REQ_THREAD_SPINNING,
IHK_SCD_REQ_THREAD_DESCHEDULED)) {
dkprintf("%s: tid %d waiting for syscall reply...\n",
__FUNCTION__, thread->tid);
waitq_init(&thread->scd_wq);
waitq_prepare_to_wait(&thread->scd_wq, &scd_wq_entry,
PS_INTERRUPTIBLE);
cpu_restore_interrupt(flags);
schedule();
waitq_finish_wait(&thread->scd_wq, &scd_wq_entry);
}
cpu_restore_interrupt(flags);
} }
if (res->status == STATUS_PAGE_FAULT) { if (res.status == STATUS_PAGE_FAULT) {
dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n",
cpu_local_var(current)->proc->pid); cpu_local_var(current)->proc->pid);
error = page_fault_process_vm(thread->vm, error = page_fault_process_vm(thread->vm,
(void *)res->fault_address, (void *)res.fault_address,
res->fault_reason|PF_POPULATE); res.fault_reason|PF_POPULATE);
/* send result */ /* send result */
req2.number = __NR_mmap; req2.number = __NR_mmap;
#define PAGER_RESUME_PAGE_FAULT 0x0101 #define PAGER_RESUME_PAGE_FAULT 0x0101
req2.args[0] = PAGER_RESUME_PAGE_FAULT; req2.args[0] = PAGER_RESUME_PAGE_FAULT;
req2.args[1] = error; req2.args[1] = error;
/* The current thread is the requester and only the waiting thread
* may serve the request */
req2.rtid = cpu_local_var(current)->tid;
req2.ttid = res.stid;
send_syscall(&req2, cpu, pid); res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
send_syscall(&req2, cpu, pid, &res);
} }
} }
dkprintf("%s: syscall num: %d got host reply: %d \n", dkprintf("%s: syscall num: %d got host reply: %d \n",
__FUNCTION__, req->number, res->ret); __FUNCTION__, req->number, res.ret);
rc = res->ret; rc = res.ret;
if(islock){ if(islock){
ihk_mc_spinlock_unlock(&syscall_lock, irqstate); ihk_mc_spinlock_unlock(&syscall_lock, irqstate);
} }
@ -820,7 +828,8 @@ terminate(int rc, int sig)
release_thread(mythread); release_thread(mythread);
release_process_vm(vm); release_process_vm(vm);
schedule(); schedule();
// no return kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__);
panic("panic");
} }
void void
@ -838,14 +847,15 @@ terminate_host(int pid)
} }
void void
interrupt_syscall(int pid, int cpuid) interrupt_syscall(int pid, int tid)
{ {
dkprintf("interrupt_syscall,target pid=%d,target cpuid=%d\n", pid, cpuid); dkprintf("interrupt_syscall,target pid=%d,target tid=%d\n", pid, tid);
ihk_mc_user_context_t ctx; ihk_mc_user_context_t ctx;
long lerror; long lerror;
kprintf("interrupt_syscall pid=%d tid=%d\n", pid, tid);
ihk_mc_syscall_arg0(&ctx) = pid; ihk_mc_syscall_arg0(&ctx) = pid;
ihk_mc_syscall_arg1(&ctx) = cpuid; ihk_mc_syscall_arg1(&ctx) = tid;
lerror = syscall_generic_forwarding(__NR_kill, &ctx); lerror = syscall_generic_forwarding(__NR_kill, &ctx);
if (lerror) { if (lerror) {
@ -908,8 +918,6 @@ static int do_munmap(void *addr, size_t len)
begin_free_pages_pending(); begin_free_pages_pending();
error = remove_process_memory_range(cpu_local_var(current)->vm, error = remove_process_memory_range(cpu_local_var(current)->vm,
(intptr_t)addr, (intptr_t)addr+len, &ro_freed); (intptr_t)addr, (intptr_t)addr+len, &ro_freed);
// XXX: TLB flush
flush_tlb();
if (error || !ro_freed) { if (error || !ro_freed) {
clear_host_pte((uintptr_t)addr, len); clear_host_pte((uintptr_t)addr, len);
} }
@ -921,6 +929,8 @@ static int do_munmap(void *addr, size_t len)
} }
} }
finish_free_pages_pending(); finish_free_pages_pending();
dkprintf("%s: 0x%lx:%lu, error: %ld\n",
__FUNCTION__, addr, len, error);
return error; return error;
} }
@ -1068,25 +1078,18 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
vrflags |= PROT_TO_VR_FLAG(prot); vrflags |= PROT_TO_VR_FLAG(prot);
vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0; vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0;
vrflags |= (flags & MAP_LOCKED)? VR_LOCKED: 0; vrflags |= (flags & MAP_LOCKED)? VR_LOCKED: 0;
vrflags |= VR_DEMAND_PAGING;
if (flags & MAP_ANONYMOUS) { if (flags & MAP_ANONYMOUS) {
if (0) { if (!anon_on_demand) {
/* dummy */ populated_mapping = 1;
} }
#ifdef USE_NOCACHE_MMAP #ifdef USE_NOCACHE_MMAP
#define X_MAP_NOCACHE MAP_32BIT #define X_MAP_NOCACHE MAP_32BIT
else if (flags & X_MAP_NOCACHE) { else if (flags & X_MAP_NOCACHE) {
vrflags &= ~VR_DEMAND_PAGING;
vrflags |= VR_IO_NOCACHE; vrflags |= VR_IO_NOCACHE;
} }
#endif #endif
else {
vrflags |= VR_DEMAND_PAGING;
if (!anon_on_demand) {
populated_mapping = 1;
}
}
}
else {
vrflags |= VR_DEMAND_PAGING;
} }
if (flags & (MAP_POPULATE | MAP_LOCKED)) { if (flags & (MAP_POPULATE | MAP_LOCKED)) {
@ -1162,6 +1165,8 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
error = -ENOMEM; error = -ENOMEM;
goto out; goto out;
} }
dkprintf("%s: 0x%x:%lu allocated %d pages, p2align: %lx\n",
__FUNCTION__, addr, len, npages, p2align);
phys = virt_to_phys(p); phys = virt_to_phys(p);
} }
else if (flags & MAP_SHARED) { else if (flags & MAP_SHARED) {
@ -1197,10 +1202,10 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
error = add_process_memory_range(thread->vm, addr, addr+len, phys, error = add_process_memory_range(thread->vm, addr, addr+len, phys,
vrflags, memobj, off, pgshift); vrflags, memobj, off, pgshift);
if (error) { if (error) {
ekprintf("do_mmap:add_process_memory_range" kprintf("%s: add_process_memory_range failed for 0x%lx:%lu"
"(%p,%lx,%lx,%lx,%lx,%d) failed %d\n", " flags: %lx, vrflags: %lx, pgshift: %d, error: %d\n",
thread->vm, addr, addr+len, __FUNCTION__, addr, addr+len,
virt_to_phys(p), vrflags, pgshift, error); flags, vrflags, pgshift, error);
goto out; goto out;
} }
@ -1246,8 +1251,12 @@ out:
if (memobj) { if (memobj) {
memobj_release(memobj); memobj_release(memobj);
} }
dkprintf("do_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n", dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
addr0, len0, prot, flags, fd, off0, error, addr); "fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
__FUNCTION__,
addr, len, addr0, len0, prot, flags,
fd, off0, error, addr);
return (!error)? addr: error; return (!error)? addr: error;
} }
@ -1478,8 +1487,8 @@ SYSCALL_DECLARE(getppid)
return thread->proc->ppid_parent->pid; return thread->proc->ppid_parent->pid;
} }
void void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid,
settid(struct thread *thread, int mode, int newcpuid, int oldcpuid) int nr_tids, int *tids)
{ {
struct syscall_request request IHK_DMA_ALIGN; struct syscall_request request IHK_DMA_ALIGN;
unsigned long rc; unsigned long rc;
@ -1489,6 +1498,12 @@ settid(struct thread *thread, int mode, int newcpuid, int oldcpuid)
request.args[1] = thread->proc->pid; request.args[1] = thread->proc->pid;
request.args[2] = newcpuid; request.args[2] = newcpuid;
request.args[3] = oldcpuid; request.args[3] = oldcpuid;
/*
* If nr_tids is non-zero, tids should point to an array of ints
* where the thread ids of the mcexec process are expected.
*/
request.args[4] = nr_tids;
request.args[5] = virt_to_phys(tids);
rc = do_syscall(&request, ihk_mc_get_processor_id(), thread->proc->pid); rc = do_syscall(&request, ihk_mc_get_processor_id(), thread->proc->pid);
if (mode != 2) { if (mode != 2) {
thread->tid = rc; thread->tid = rc;
@ -1893,7 +1908,61 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
&new->vm->address_space->cpu_set_lock); &new->vm->address_space->cpu_set_lock);
if (clone_flags & CLONE_VM) { if (clone_flags & CLONE_VM) {
settid(new, 1, cpuid, -1); int *tids = NULL;
int i;
struct mcs_rwlock_node_irqsave lock;
mcs_rwlock_writer_lock(&newproc->threads_lock, &lock);
/* Obtain mcexec TIDs if not known yet */
if (!newproc->nr_tids) {
tids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT);
if (!tids) {
mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock);
release_cpuid(cpuid);
return -ENOMEM;
}
newproc->tids = kmalloc(sizeof(struct mcexec_tid) * num_processors, IHK_MC_AP_NOWAIT);
if (!newproc->tids) {
mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock);
kfree(tids);
release_cpuid(cpuid);
return -ENOMEM;
}
settid(new, 1, cpuid, -1, num_processors, tids);
for (i = 0; (i < num_processors) && tids[i]; ++i) {
dkprintf("%s: tid[%d]: %d\n", __FUNCTION__, i, tids[i]);
newproc->tids[i].tid = tids[i];
newproc->tids[i].thread = NULL;
++newproc->nr_tids;
}
kfree(tids);
}
/* Find an unused TID */
retry_tid:
for (i = 0; i < newproc->nr_tids; ++i) {
if (!newproc->tids[i].thread) {
if (!__sync_bool_compare_and_swap(
&newproc->tids[i].thread, NULL, new)) {
goto retry_tid;
}
new->tid = newproc->tids[i].tid;
dkprintf("%s: tid %d assigned to %p\n", __FUNCTION__, new->tid, new);
break;
}
}
/* TODO: spawn more mcexec threads */
if (!new->tid) {
kprintf("%s: no more TIDs available\n");
panic("");
}
mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock);
} }
/* fork() a new process on the host */ /* fork() a new process on the host */
else { else {
@ -1913,7 +1982,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
} }
/* In a single threaded process TID equals to PID */ /* In a single threaded process TID equals to PID */
settid(new, 0, cpuid, -1); new->tid = newproc->pid;
new->vm->address_space->pids[0] = new->proc->pid; new->vm->address_space->pids[0] = new->proc->pid;
dkprintf("fork(): new pid: %d\n", new->proc->pid); dkprintf("fork(): new pid: %d\n", new->proc->pid);
@ -5712,6 +5781,10 @@ SYSCALL_DECLARE(sched_setaffinity)
int empty_set = 1; int empty_set = 1;
extern int num_processors; extern int num_processors;
if (!u_cpu_set) {
return -EINVAL;
}
if (sizeof(k_cpu_set) > len) { if (sizeof(k_cpu_set) > len) {
memset(&k_cpu_set, 0, sizeof(k_cpu_set)); memset(&k_cpu_set, 0, sizeof(k_cpu_set));
} }
@ -5719,7 +5792,7 @@ SYSCALL_DECLARE(sched_setaffinity)
len = MIN2(len, sizeof(k_cpu_set)); len = MIN2(len, sizeof(k_cpu_set));
if (copy_from_user(&k_cpu_set, u_cpu_set, len)) { if (copy_from_user(&k_cpu_set, u_cpu_set, len)) {
kprintf("%s: error: copy_from_user failed for %p:%d\n", __FUNCTION__, u_cpu_set, len); dkprintf("%s: error: copy_from_user failed for %p:%d\n", __FUNCTION__, u_cpu_set, len);
return -EFAULT; return -EFAULT;
} }

View File

@ -75,7 +75,7 @@ sysfs_createf(struct sysfs_ops *ops, void *instance, int mode,
dkprintf("sysfs_createf(%p,%p,%#o,%s,...)\n", dkprintf("sysfs_createf(%p,%p,%#o,%s,...)\n",
ops, instance, mode, fmt); ops, instance, mode, fmt);
param = allocate_pages(1, IHK_MC_AP_NOWAIT); param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!param) { if (!param) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_createf:allocate_pages failed. %d\n", error); ekprintf("sysfs_createf:allocate_pages failed. %d\n", error);
@ -134,7 +134,7 @@ sysfs_createf(struct sysfs_ops *ops, void *instance, int mode,
error = 0; error = 0;
out: out:
if (param) { if (param) {
free_pages(param, 1); ihk_mc_free_pages(param, 1);
} }
if (error) { if (error) {
ekprintf("sysfs_createf(%p,%p,%#o,%s,...): %d\n", ekprintf("sysfs_createf(%p,%p,%#o,%s,...): %d\n",
@ -156,7 +156,7 @@ sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...)
dkprintf("sysfs_mkdirf(%p,%s,...)\n", dirhp, fmt); dkprintf("sysfs_mkdirf(%p,%s,...)\n", dirhp, fmt);
param = allocate_pages(1, IHK_MC_AP_NOWAIT); param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!param) { if (!param) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_mkdirf:allocate_pages failed. %d\n", error); ekprintf("sysfs_mkdirf:allocate_pages failed. %d\n", error);
@ -208,7 +208,7 @@ sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...)
out: out:
if (param) { if (param) {
free_pages(param, 1); ihk_mc_free_pages(param, 1);
} }
if (error) { if (error) {
ekprintf("sysfs_mkdirf(%p,%s,...): %d\n", dirhp, fmt, error); ekprintf("sysfs_mkdirf(%p,%s,...): %d\n", dirhp, fmt, error);
@ -229,7 +229,7 @@ sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...)
dkprintf("sysfs_symlinkf(%#lx,%s,...)\n", targeth.handle, fmt); dkprintf("sysfs_symlinkf(%#lx,%s,...)\n", targeth.handle, fmt);
param = allocate_pages(1, IHK_MC_AP_NOWAIT); param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!param) { if (!param) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_symlinkf:allocate_pages failed. %d\n", error); ekprintf("sysfs_symlinkf:allocate_pages failed. %d\n", error);
@ -279,7 +279,7 @@ sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...)
error = 0; error = 0;
out: out:
if (param) { if (param) {
free_pages(param, 1); ihk_mc_free_pages(param, 1);
} }
if (error) { if (error) {
ekprintf("sysfs_symlinkf(%#lx,%s,...): %d\n", ekprintf("sysfs_symlinkf(%#lx,%s,...): %d\n",
@ -301,7 +301,7 @@ sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...)
dkprintf("sysfs_lookupf(%p,%s,...)\n", objhp, fmt); dkprintf("sysfs_lookupf(%p,%s,...)\n", objhp, fmt);
param = allocate_pages(1, IHK_MC_AP_NOWAIT); param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!param) { if (!param) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_lookupf:allocate_pages failed. %d\n", error); ekprintf("sysfs_lookupf:allocate_pages failed. %d\n", error);
@ -353,7 +353,7 @@ sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...)
out: out:
if (param) { if (param) {
free_pages(param, 1); ihk_mc_free_pages(param, 1);
} }
if (error) { if (error) {
ekprintf("sysfs_lookupf(%p,%s,...): %d\n", objhp, fmt, error); ekprintf("sysfs_lookupf(%p,%s,...): %d\n", objhp, fmt, error);
@ -374,7 +374,7 @@ sysfs_unlinkf(int flags, const char *fmt, ...)
dkprintf("sysfs_unlinkf(%#x,%s,...)\n", flags, fmt); dkprintf("sysfs_unlinkf(%#x,%s,...)\n", flags, fmt);
param = allocate_pages(1, IHK_MC_AP_NOWAIT); param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!param) { if (!param) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_unlinkf:allocate_pages failed. %d\n", error); ekprintf("sysfs_unlinkf:allocate_pages failed. %d\n", error);
@ -423,7 +423,7 @@ sysfs_unlinkf(int flags, const char *fmt, ...)
error = 0; error = 0;
out: out:
if (param) { if (param) {
free_pages(param, 1); ihk_mc_free_pages(param, 1);
} }
if (error) { if (error) {
ekprintf("sysfs_unlinkf(%#x,%s,...): %d\n", flags, fmt, error); ekprintf("sysfs_unlinkf(%#x,%s,...): %d\n", flags, fmt, error);
@ -601,14 +601,14 @@ sysfs_init(void)
} }
sysfs_data_bufsize = PAGE_SIZE; sysfs_data_bufsize = PAGE_SIZE;
sysfs_data_buf = allocate_pages(1, IHK_MC_AP_NOWAIT); sysfs_data_buf = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!sysfs_data_buf) { if (!sysfs_data_buf) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_init:allocate_pages(buf) failed. %d\n", error); ekprintf("sysfs_init:allocate_pages(buf) failed. %d\n", error);
goto out; goto out;
} }
param = allocate_pages(1, IHK_MC_AP_NOWAIT); param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!param) { if (!param) {
error = -ENOMEM; error = -ENOMEM;
ekprintf("sysfs_init:allocate_pages(param) failed. %d\n", ekprintf("sysfs_init:allocate_pages(param) failed. %d\n",
@ -644,7 +644,7 @@ sysfs_init(void)
error = 0; error = 0;
out: out:
if (param) { if (param) {
free_pages(param, 1); ihk_mc_free_pages(param, 1);
} }
if (error) { if (error) {
ekprintf("sysfs_init(): %d\n", error); ekprintf("sysfs_init(): %d\n", error);

View File

@ -172,6 +172,10 @@ static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
struct zeroobj *obj = to_zeroobj(memobj); struct zeroobj *obj = to_zeroobj(memobj);
struct page *page; struct page *page;
/* Don't bother about zero page, page fault handler will
* allocate and clear pages */
return 0;
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n", dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n",
memobj, off, p2align, physp); memobj, off, p2align, physp);
if (off & ~PAGE_MASK) { if (off & ~PAGE_MASK) {

View File

@ -103,7 +103,7 @@ void ihk_mc_clean_micpa(void);
void *ihk_mc_alloc_aligned_pages(int npages, int p2align, enum ihk_mc_ap_flag flag); void *ihk_mc_alloc_aligned_pages(int npages, int p2align, enum ihk_mc_ap_flag flag);
void *ihk_mc_alloc_pages(int npages, enum ihk_mc_ap_flag flag); void *ihk_mc_alloc_pages(int npages, enum ihk_mc_ap_flag flag);
void ihk_mc_free_pages(void *p, int npages); void ihk_mc_free_pages(void *p, int npages);
void *ihk_mc_allocate(int size, enum ihk_mc_ap_flag flag); void *ihk_mc_allocate(int size, int flag);
void ihk_mc_free(void *p); void ihk_mc_free(void *p);
void *arch_alloc_page(enum ihk_mc_ap_flag flag); void *arch_alloc_page(enum ihk_mc_ap_flag flag);