diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index dc75673a..e01a4cac 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -117,6 +117,25 @@ #define PTE_NULL ((pte_t)0) typedef unsigned long pte_t; +/* + * pagemap kernel ABI bits + */ +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + /* For easy conversion, it is better to be the same as architecture's ones */ enum ihk_mc_pt_attribute { PTATTR_ACTIVE = 0x01, diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 3c0c7266..f36e1da1 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -494,6 +494,50 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage) return 0; } +uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt) +{ + int l4idx, l3idx, l2idx, l1idx; + unsigned long v = (unsigned long)virt; + uint64_t ret = 0; + + if (!pt) { + pt = init_pt; + } + + GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx); + + if (!(pt->entry[l4idx] & PFL4_PRESENT)) { + return ret; + } + pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK); + + if (!(pt->entry[l3idx] & PFL3_PRESENT)) { + return ret; + } + pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK); + + if (!(pt->entry[l2idx] & PFL2_PRESENT)) { + return ret; + } + if ((pt->entry[l2idx] & PFL2_SIZE)) { + + ret = PM_PFRAME(((pt->entry[l2idx] & LARGE_PAGE_MASK) + + (v & (LARGE_PAGE_SIZE - 1))) >> PAGE_SHIFT); + ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; + return ret; + } + pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK); + + if (!(pt->entry[l1idx] & PFL1_PRESENT)) { + return ret; + } + + ret = PM_PFRAME((pt->entry[l1idx] & PT_PHYSMASK) >> PAGE_SHIFT); + ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; + return ret; +} + + int ihk_mc_pt_virt_to_phys(struct page_table *pt, void *virt, unsigned long *phys) { diff --git a/executer/kernel/procfs.c b/executer/kernel/procfs.c index cb7d00e9..bdaf3fd0 100644 --- a/executer/kernel/procfs.c +++ b/executer/kernel/procfs.c @@ -26,9 +26,8 @@ #endif static DECLARE_WAIT_QUEUE_HEAD(procfsq); - -int mckernel_procfs_read(char *buffer, char **start, off_t offset, - int count, int *peof, void *dat); +static ssize_t mckernel_procfs_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos); /* A private data for the procfs driver. */ @@ -149,6 +148,27 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode) return ret; } +loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + break; + case 1: + file->f_pos += offset; + break; + default: + return -EINVAL; + } + return file->f_pos; +} + +static const struct file_operations mckernel_procfs_file_operations = { + .llseek = mckernel_procfs_lseek, + .read = mckernel_procfs_read, + .write = NULL, +}; + /** * \brief Create a procfs entry. * @@ -194,7 +214,7 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg) e->cpu = ref; e->pid = pid; - entry->read_proc = mckernel_procfs_read; + entry->proc_fops = &mckernel_procfs_file_operations; quit: f->status = 1; /* Now the peer can free the data. */ ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file)); @@ -271,32 +291,36 @@ void procfs_answer(unsigned int arg, int err) * This function conforms to the 2) way of fs/proc/generic.c * from linux-2.6.39.4. */ - -int mckernel_procfs_read(char *buffer, char **start, off_t offset, - int count, int *peof, void *dat) +static ssize_t +mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) { + struct inode * inode = file->f_path.dentry->d_inode; char *kern_buffer; int order = 0; - struct procfs_list_entry *e = dat; volatile struct procfs_read *r; struct ikc_scd_packet isp; int ret, retrycount = 0; unsigned long pbuf; + unsigned long count = nbytes; + struct proc_dir_entry *dp = PDE(inode); + struct procfs_list_entry *e = dp->data; + loff_t offset = *ppos; - dprintk("mckernel_procfs_read: invoked for %s, count: %d\n", - e->fname, count); + dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n", + e->fname, offset, count); - /* Starting from the middle of a proc file is not supported yet */ - if (offset > 0) { - return 0; - } - - if (count <= 0 || dat == NULL || offset < 0) { + if (count <= 0 || offset < 0) { return 0; } while ((1 << order) < count) ++order; - order -= 12; + if (order > 12) { + order -= 12; + } + else { + order = 1; + } /* NOTE: we need physically contigous memory to pass through IKC */ kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order); @@ -324,18 +348,23 @@ retry: isp.msg = SCD_MSG_PROCFS_REQUEST; isp.ref = e->cpu; isp.arg = virt_to_phys(r); + ret = mcctrl_ikc_send(e->os, e->cpu, &isp); + if (ret < 0) { goto out; /* error */ } + /* Wait for a reply. */ ret = -EIO; /* default exit code */ dprintk("now wait for a relpy\n"); + /* Wait for the status field of the procfs_read structure set ready. */ if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) { kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n"); goto out; } + /* Wake up and check the result. */ dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof); if ((r->ret == 0) && (r->eof != 1)) { @@ -350,12 +379,14 @@ retry: dprintk("retry\n"); goto retry; } - if (r->eof == 1) { - dprintk("reached end of file.\n"); - *peof = 1; - } - memcpy(buffer, kern_buffer, r->ret); + if (copy_to_user(buf, kern_buffer, r->ret)) { + kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n"); + ret = -EFAULT; + goto out; + } + + *ppos += r->ret; ret = r->ret; out: diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 2bc012c3..fa840b3c 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1405,7 +1405,11 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid) __dprintf("open: %s\n", pathbuf); fn = pathbuf; - if(!strncmp(fn, "/proc/", 6)){ + if (!strncmp(fn, "/proc/self/", 11)){ + sprintf(tmpbuf, "/proc/mcos%d/%d/%s", mcosid, getpid(), fn + 11); + fn = tmpbuf; + } + else if(!strncmp(fn, "/proc/", 6)){ sprintf(tmpbuf, "/proc/mcos%d/%s", mcosid, fn + 6); fn = tmpbuf; } diff --git a/kernel/procfs.c b/kernel/procfs.c index f0ac6ba1..49a0a5b5 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -69,6 +69,9 @@ void create_proc_procfs_files(int pid, int cpuid) snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid); create_proc_procfs_file(pid, fname, 0400, cpuid); + snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid); + create_proc_procfs_file(pid, fname, 0400, cpuid); + snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid); create_proc_procfs_file(pid, fname, 0400, cpuid); @@ -122,6 +125,9 @@ void delete_proc_procfs_files(int pid) snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid); delete_proc_procfs_file(pid, fname); + snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid); + delete_proc_procfs_file(pid, fname); + snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid); delete_proc_procfs_file(pid, fname); @@ -208,6 +214,8 @@ void process_procfs_request(unsigned long rarg) struct ihk_ikc_channel_desc *syscall_channel; ihk_spinlock_t *savelock; unsigned long irqstate; + unsigned long offset; + int count; dprintf("process_procfs_request: invoked.\n"); @@ -235,6 +243,8 @@ void process_procfs_request(unsigned long rarg) goto bufunavail; } + count = r->count; + offset = r->offset; dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count); /* @@ -328,6 +338,13 @@ void process_procfs_request(unsigned long rarg) int left = r->count - 1; /* extra 1 for terminating NULL */ int written = 0; char *_buf = buf; + + /* Starting from the middle of a proc file is not supported for maps */ + if (offset > 0) { + ans = 0; + eof = 1; + goto end; + } ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); @@ -370,6 +387,51 @@ void process_procfs_request(unsigned long rarg) eof = 1; goto end; } + + /* + * mcos%d/PID/pagemap + */ + if (strcmp(p, "pagemap") == 0) { + struct process_vm *vm = proc->vm; + uint64_t *_buf = (uint64_t *)buf; + uint64_t start, end; + + if (offset < PAGE_SIZE) { + kprintf("WARNING: /proc/pagemap queried for NULL page\n"); + ans = 0; + goto end; + } + + /* Check alignment */ + if ((offset % sizeof(uint64_t) != 0) || + (count % sizeof(uint64_t) != 0)) { + ans = 0; + eof = 1; + goto end; + } + + start = (offset / sizeof(uint64_t)) << PAGE_SHIFT; + end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT); + + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + while (start < end) { + *_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->page_table, start); + dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->ftn->pid, + start, *_buf); + start += PAGE_SIZE; + ++_buf; + } + + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + + dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n", + start, end, count); + + ans = count; + goto end; + } + /* * mcos%d/PID/auxv diff --git a/kernel/syscall.c b/kernel/syscall.c index 53c25091..b7209c7f 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1072,7 +1072,7 @@ out: } ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); - if (!error && (flags & (MAP_POPULATE | MAP_LOCKED))) { + if (!error && (flags & (MAP_POPULATE) || flags & (MAP_LOCKED))) { error = populate_process_memory(proc, (void *)addr, len); if (error) { ekprintf("sys_mmap:populate_process_memory" diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index f8952ae3..ecd510ad 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -148,6 +148,7 @@ void ihk_mc_pt_destroy(struct page_table *pt); void ihk_mc_load_page_table(struct page_table *pt); int ihk_mc_pt_virt_to_phys(struct page_table *pt, void *virt, unsigned long *phys); +uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt); void remote_flush_tlb_cpumask(struct process_vm *vm, unsigned long addr, int cpu_id);