introduction of mckernel_procfs_file_operations; fix /proc/self path resolution;

implementation of /proc/self/pagemap (LTP mmap12)
This commit is contained in:
bgerofi@riken.jp
2014-12-15 12:43:42 +09:00
committed by Balazs Gerofi bgerofi@riken.jp
parent 815d907ca4
commit d4ba4dc8b3
7 changed files with 185 additions and 24 deletions

View File

@ -117,6 +117,25 @@
#define PTE_NULL ((pte_t)0)
typedef unsigned long pte_t;
/*
* pagemap kernel ABI bits
*/
#define PM_ENTRY_BYTES sizeof(uint64_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
PTATTR_ACTIVE = 0x01,

View File

@ -494,6 +494,50 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage)
return 0;
}
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
uint64_t ret = 0;
if (!pt) {
pt = init_pt;
}
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
return ret;
}
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
return ret;
}
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
return ret;
}
if ((pt->entry[l2idx] & PFL2_SIZE)) {
ret = PM_PFRAME(((pt->entry[l2idx] & LARGE_PAGE_MASK) +
(v & (LARGE_PAGE_SIZE - 1))) >> PAGE_SHIFT);
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return ret;
}
pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK);
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
return ret;
}
ret = PM_PFRAME((pt->entry[l1idx] & PT_PHYSMASK) >> PAGE_SHIFT);
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return ret;
}
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
void *virt, unsigned long *phys)
{

View File

@ -26,9 +26,8 @@
#endif
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat);
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos);
/* A private data for the procfs driver. */
@ -149,6 +148,27 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
return ret;
}
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case 0:
file->f_pos = offset;
break;
case 1:
file->f_pos += offset;
break;
default:
return -EINVAL;
}
return file->f_pos;
}
static const struct file_operations mckernel_procfs_file_operations = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read,
.write = NULL,
};
/**
* \brief Create a procfs entry.
*
@ -194,7 +214,7 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
e->cpu = ref;
e->pid = pid;
entry->read_proc = mckernel_procfs_read;
entry->proc_fops = &mckernel_procfs_file_operations;
quit:
f->status = 1; /* Now the peer can free the data. */
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
@ -271,32 +291,36 @@ void procfs_answer(unsigned int arg, int err)
* This function conforms to the 2) way of fs/proc/generic.c
* from linux-2.6.39.4.
*/
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat)
static ssize_t
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
loff_t *ppos)
{
struct inode * inode = file->f_path.dentry->d_inode;
char *kern_buffer;
int order = 0;
struct procfs_list_entry *e = dat;
volatile struct procfs_read *r;
struct ikc_scd_packet isp;
int ret, retrycount = 0;
unsigned long pbuf;
unsigned long count = nbytes;
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
loff_t offset = *ppos;
dprintk("mckernel_procfs_read: invoked for %s, count: %d\n",
e->fname, count);
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
e->fname, offset, count);
/* Starting from the middle of a proc file is not supported yet */
if (offset > 0) {
return 0;
}
if (count <= 0 || dat == NULL || offset < 0) {
if (count <= 0 || offset < 0) {
return 0;
}
while ((1 << order) < count) ++order;
order -= 12;
if (order > 12) {
order -= 12;
}
else {
order = 1;
}
/* NOTE: we need physically contigous memory to pass through IKC */
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
@ -324,18 +348,23 @@ retry:
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = e->cpu;
isp.arg = virt_to_phys(r);
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("now wait for a relpy\n");
/* Wait for the status field of the procfs_read structure set ready. */
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
goto out;
}
/* Wake up and check the result. */
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
if ((r->ret == 0) && (r->eof != 1)) {
@ -350,12 +379,14 @@ retry:
dprintk("retry\n");
goto retry;
}
if (r->eof == 1) {
dprintk("reached end of file.\n");
*peof = 1;
}
memcpy(buffer, kern_buffer, r->ret);
if (copy_to_user(buf, kern_buffer, r->ret)) {
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
ret = -EFAULT;
goto out;
}
*ppos += r->ret;
ret = r->ret;
out:

View File

@ -1405,7 +1405,11 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
__dprintf("open: %s\n", pathbuf);
fn = pathbuf;
if(!strncmp(fn, "/proc/", 6)){
if (!strncmp(fn, "/proc/self/", 11)){
sprintf(tmpbuf, "/proc/mcos%d/%d/%s", mcosid, getpid(), fn + 11);
fn = tmpbuf;
}
else if(!strncmp(fn, "/proc/", 6)){
sprintf(tmpbuf, "/proc/mcos%d/%s", mcosid, fn + 6);
fn = tmpbuf;
}

View File

@ -69,6 +69,9 @@ void create_proc_procfs_files(int pid, int cpuid)
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
@ -122,6 +125,9 @@ void delete_proc_procfs_files(int pid)
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
delete_proc_procfs_file(pid, fname);
@ -208,6 +214,8 @@ void process_procfs_request(unsigned long rarg)
struct ihk_ikc_channel_desc *syscall_channel;
ihk_spinlock_t *savelock;
unsigned long irqstate;
unsigned long offset;
int count;
dprintf("process_procfs_request: invoked.\n");
@ -235,6 +243,8 @@ void process_procfs_request(unsigned long rarg)
goto bufunavail;
}
count = r->count;
offset = r->offset;
dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count);
/*
@ -328,6 +338,13 @@ void process_procfs_request(unsigned long rarg)
int left = r->count - 1; /* extra 1 for terminating NULL */
int written = 0;
char *_buf = buf;
/* Starting from the middle of a proc file is not supported for maps */
if (offset > 0) {
ans = 0;
eof = 1;
goto end;
}
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
@ -370,6 +387,51 @@ void process_procfs_request(unsigned long rarg)
eof = 1;
goto end;
}
/*
* mcos%d/PID/pagemap
*/
if (strcmp(p, "pagemap") == 0) {
struct process_vm *vm = proc->vm;
uint64_t *_buf = (uint64_t *)buf;
uint64_t start, end;
if (offset < PAGE_SIZE) {
kprintf("WARNING: /proc/pagemap queried for NULL page\n");
ans = 0;
goto end;
}
/* Check alignment */
if ((offset % sizeof(uint64_t) != 0) ||
(count % sizeof(uint64_t) != 0)) {
ans = 0;
eof = 1;
goto end;
}
start = (offset / sizeof(uint64_t)) << PAGE_SHIFT;
end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
while (start < end) {
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->page_table, start);
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->ftn->pid,
start, *_buf);
start += PAGE_SIZE;
++_buf;
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n",
start, end, count);
ans = count;
goto end;
}
/*
* mcos%d/PID/auxv

View File

@ -1072,7 +1072,7 @@ out:
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
if (!error && (flags & (MAP_POPULATE | MAP_LOCKED))) {
if (!error && (flags & (MAP_POPULATE) || flags & (MAP_LOCKED))) {
error = populate_process_memory(proc, (void *)addr, len);
if (error) {
ekprintf("sys_mmap:populate_process_memory"

View File

@ -148,6 +148,7 @@ void ihk_mc_pt_destroy(struct page_table *pt);
void ihk_mc_load_page_table(struct page_table *pt);
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
void *virt, unsigned long *phys);
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);