Revert "trial implementation of private file mapping"

This reverts commit abe57218c4.
This commit is contained in:
NAKAMURA Gou
2013-07-26 16:44:39 +09:00
parent abe57218c4
commit 78d9d3fcd2
23 changed files with 173 additions and 1756 deletions

View File

@ -289,11 +289,6 @@ void init_syscall(void)
void init_cpu(void)
{
asm volatile (
"mov %%cr0,%%rax;"
"or $0x10000,%%rax;"
"mov %%rax,%%cr0"
::: "%rax");
init_fpu();
init_lapic();
init_syscall();
@ -499,7 +494,7 @@ int ihk_mc_unregister_interrupt_handler(int vector,
extern unsigned long __page_fault_handler_address;
void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, unsigned long, void *))
void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, void *))
{
__page_fault_handler_address = (unsigned long)h;
}

View File

@ -128,8 +128,6 @@ struct x86_regs {
unsigned long error, rip, cs, rflags, rsp, ss;
};
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
/*
* Page fault error code bits:
*

View File

@ -66,8 +66,8 @@ page_fault:
cld
PUSH_ALL_REGS
movq %cr2, %rdi
movq 80(%rsp),%rsi
movq %rsp, %rdx
movq %rsp, %rsi
movq %rbp, %rdx
movq __page_fault_handler_address(%rip), %rax
andq %rax, %rax
jz 1f

View File

@ -7,7 +7,6 @@
#include <errno.h>
#include <list.h>
#include <process.h>
#include <page.h>
#define ekprintf(...) kprintf(__VA_ARGS__)
@ -797,20 +796,15 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t star
{
struct clear_range_args *args = args0;
uint64_t phys;
struct page *page;
if (*ptep == PTE_NULL) {
return -ENOENT;
}
phys = *ptep & PT_PHYSMASK;
*ptep = PTE_NULL;
*ptep = 0;
if (args->free_physical) {
page = phys_to_page(phys);
if (page && (page->mode == PM_MAPPED) && !page_unmap(page)) {
return 0;
}
ihk_mc_free_pages(phys_to_virt(phys), 1);
}
@ -823,7 +817,6 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t star
uint64_t phys;
struct page_table *pt;
int error;
struct page *page;
if (*ptep == PTE_NULL) {
return -ENOENT;
@ -848,15 +841,6 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t star
*ptep = PTE_NULL;
if (args->free_physical) {
page = phys_to_page(phys);
if (page && (page->mode == PM_MAPPED)) {
if (--page->count > 0) {
/* other mapping exists */
return 0;
}
list_del(&page->list);
page->mode = PM_NONE;
}
ihk_mc_free_pages(phys_to_virt(phys),
LARGE_PAGE_SIZE/PAGE_SIZE);
}
@ -1149,369 +1133,6 @@ int ihk_mc_pt_alloc_range(page_table_t pt, void *start, void *end,
&alloc_range_l4, &attr);
}
static int lookup_pte(struct page_table *pt, uintptr_t virt, pte_t **ptepp,
uintptr_t *pgbasep, size_t *pgsizep)
{
int l4idx, l3idx, l2idx, l1idx;
GET_VIRT_INDICES(virt, l4idx, l3idx, l2idx, l1idx);
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
return -ENOENT;
}
pt = phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK);
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
return -ENOENT;
}
pt = phys_to_virt(pt->entry[l3idx] & PT_PHYSMASK);
if ((pt->entry[l2idx] == PTE_NULL)
|| (pt->entry[l2idx] & PFL2_SIZE)) {
*ptepp = &pt->entry[l2idx];
*pgbasep = GET_INDICES_VIRT(l4idx, l3idx, l2idx, 0);
*pgsizep = PTL2_SIZE;
return 0;
}
pt = phys_to_virt(pt->entry[l2idx] & PT_PHYSMASK);
*ptepp = &pt->entry[l1idx];
*pgbasep = GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx);
*pgsizep = PTL1_SIZE;
return 0;
}
int ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, pte_t **ptepp, void **pgbasep, size_t *pgsizep)
{
int error;
pte_t *ptep = NULL;
uintptr_t pgbase = 0;
size_t pgsize = 0;
kprintf("ihk_mc_pt_lookup_pte(%p,%p)\n", pt, virt);
error = lookup_pte(pt, (uintptr_t)virt, &ptep, &pgbase, &pgsize);
if (error) {
kprintf("ihk_mc_pt_lookup_pte(%p,%p):lookup failed. %d\n", pt, virt, error);
goto out;
}
error = 0;
*ptepp = ptep;
*pgbasep = (void *)pgbase;
*pgsizep = pgsize;
out:
kprintf("ihk_mc_pt_lookup_pte(%p,%p): %d %p %lx %lx\n", pt, virt, error, ptep, pgbase, pgsize);
return error;
}
static int page_p2align_list[] = {
LARGE_PAGE_P2ALIGN,
PAGE_P2ALIGN,
-1,
};
int ihk_mc_pt_choose_pagesize(page_table_t pt, void *start0, void *end0,
void *fault_addr0, size_t maxpgsize, void **pgaddrp,
size_t *pgsizep, int *p2alignp)
{
const uintptr_t start = (uintptr_t)start0;
const uintptr_t end = (uintptr_t)end0;
const uintptr_t fault_addr = (uintptr_t)fault_addr0;
int ix;
int p2align;
size_t pgsize;
uintptr_t pgbase;
pte_t *ptep;
int error;
uintptr_t pga;
size_t pgs;
kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p)\n",
pt, start0, end0, fault_addr0, maxpgsize, pgaddrp,
pgsizep, p2alignp);
if ((fault_addr < start) || (end <= fault_addr)) {
kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p):"
"out of range\n",
pt, start0, end0, fault_addr0, maxpgsize,
pgaddrp, pgsizep, p2alignp);
panic("ihk_mc_pt_choose_pagesize:out of range");
}
pgs = 0;
for (ix = 0; page_p2align_list[ix] >= 0; ++ix) {
p2align = page_p2align_list[ix];
pgsize = PAGE_SIZE << p2align;
pgbase = fault_addr & ~(pgsize - 1);
if ((maxpgsize != 0) && (pgsize > maxpgsize)) {
continue;
}
if ((pgbase < start) || (end < (pgbase + pgsize))) {
continue;
}
if (pgs == 0) {
error = lookup_pte(pt, fault_addr, &ptep, &pga, &pgs);
if (error == -ENOENT) {
error = 0;
pgs = LARGE_PAGE_SIZE;
pga = fault_addr & LARGE_PAGE_MASK;
}
else if (error) {
kprintf("ihk_mc_pt_choose_pagesize("
"%p,%p,%p,%p,%lx,%p,%p,%p):"
"lookup pte failed. %d\n",
pt, start0, end0, fault_addr0,
maxpgsize, pgaddrp, pgsizep,
p2alignp, error);
goto out;
}
}
if (pgs < pgsize) {
continue;
}
error = 0;
*pgaddrp = (void *)pgbase;
*pgsizep = pgsize;
*p2alignp = p2align;
goto out;
}
kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p):"
"not reached\n",
pt, start0, end0, fault_addr0, maxpgsize, pgaddrp,
pgsizep, p2alignp);
panic("ihk_mc_pt_choose_pagesize:not reached");
out:
kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p):"
" %d %p %lx %d\n",
pt, start0, end0, fault_addr0, maxpgsize, pgaddrp,
pgsizep, p2alignp, error, *pgaddrp, *pgsizep, *p2alignp);
return error;
}
struct set_range_args {
uintptr_t phys;
enum ihk_mc_pt_attribute attr;
int padding;
uintptr_t diff;
};
int set_range_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t start,
uint64_t end)
{
struct set_range_args *args = args0;
int error;
uintptr_t phys;
kprintf("set_range_l1(%p,%p,%lx,%lx,%lx)\n",
args0, ptep, base, start, end);
if (*ptep != PTE_NULL) {
kprintf("set_range_l1(%p,%p,%lx,%lx,%lx):page exists\n",
args0, ptep, base, start, end);
error = -EBUSY;
goto out;
}
phys = args->phys + (base - start);
*ptep = phys | attr_to_l1attr(args->attr);
error = 0;
out:
kprintf("set_range_l1(%p,%p,%lx,%lx,%lx): %d\n",
args0, ptep, base, start, end, error);
return error;
}
int set_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t start,
uint64_t end)
{
struct set_range_args *args = args0;
uintptr_t phys;
int error;
struct page_table *pt;
kprintf("set_range_l2(%p,%p,%lx,%lx,%lx)\n",
args0, ptep, base, start, end);
if (*ptep == PTE_NULL) {
if ((start <= base) && ((base + PTL2_SIZE) <= end)
&& ((args->diff & (PTL2_SIZE - 1)) == 0)) {
phys = args->phys + (base - start);
*ptep = phys | attr_to_l2attr(args->attr|PTATTR_LARGEPAGE);
kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):"
"large page\n",
args0, ptep, base, start, end);
error = 0;
goto out;
}
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (pt == NULL) {
kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):"
"__alloc_new_pt failed\n",
args0, ptep, base, start, end);
error = -ENOMEM;
goto out;
}
*ptep = virt_to_phys(pt) | PFL2_PDIR_ATTR;
}
else if (*ptep & PFL2_SIZE) {
kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):"
"page exists\n",
args0, ptep, base, start, end);
error = -EBUSY;
goto out;
}
else {
pt = phys_to_virt(*ptep & PT_PHYSMASK);
}
error = walk_pte_l1(pt, base, start, end, &set_range_l1, args0);
if (error) {
kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):"
"walk_pte_l1 failed. %d\n",
args0, ptep, base, start, end, error);
goto out;
}
error = 0;
out:
kprintf("set_range_l2(%p,%p,%lx,%lx,%lx): %d\n",
args0, ptep, base, start, end, error);
return error;
}
int set_range_l3(void *args0, pte_t *ptep, uint64_t base, uint64_t start,
uint64_t end)
{
struct page_table *pt;
int error;
kprintf("set_range_l3(%p,%p,%lx,%lx,%lx)\n",
args0, ptep, base, start, end);
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (pt == NULL) {
kprintf("set_range_l3(%p,%p,%lx,%lx,%lx):"
"__alloc_new_pt failed\n",
args0, ptep, base, start, end);
return -ENOMEM;
}
*ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR;
}
else {
pt = phys_to_virt(*ptep & PT_PHYSMASK);
}
error = walk_pte_l2(pt, base, start, end, &set_range_l2, args0);
if (error) {
kprintf("set_range_l3(%p,%p,%lx,%lx,%lx):"
"walk_pte_l2 failed. %d\n",
args0, ptep, base, start, end, error);
goto out;
}
error = 0;
out:
kprintf("set_range_l3(%p,%p,%lx,%lx,%lx): %d\n",
args0, ptep, base, start, end, error);
return error;
}
int set_range_l4(void *args0, pte_t *ptep, uint64_t base, uint64_t start,
uint64_t end)
{
struct page_table *pt;
int error;
kprintf("set_range_l4(%p,%p,%lx,%lx,%lx)\n",
args0, ptep, base, start, end);
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (pt == NULL) {
kprintf("set_range_l4(%p,%p,%lx,%lx,%lx):"
"__alloc_new_pt failed\n",
args0, ptep, base, start, end);
return -ENOMEM;
}
*ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR;
}
else {
pt = phys_to_virt(*ptep & PT_PHYSMASK);
}
error = walk_pte_l3(pt, base, start, end, &set_range_l3, args0);
if (error) {
kprintf("set_range_l4(%p,%p,%lx,%lx,%lx):"
"walk_pte_l3 failed. %d\n",
args0, ptep, base, start, end, error);
goto out;
}
error = 0;
out:
kprintf("set_range_l4(%p,%p,%lx,%lx,%lx): %d\n",
args0, ptep, base, start, end, error);
return error;
}
int ihk_mc_pt_set_range(page_table_t pt, void *start, void *end,
uintptr_t phys, enum ihk_mc_pt_attribute attr)
{
int error;
struct set_range_args args;
kprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x)\n",
pt, start, end, phys, attr);
args.phys = phys;
args.attr = attr;
args.diff = (uintptr_t)start ^ phys;
error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end,
&set_range_l4, &args);
if (error) {
kprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x):"
"walk_pte_l4 failed. %d\n",
pt, start, end, phys, attr, error);
goto out;
}
error = 0;
out:
kprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x): %d\n",
pt, start, end, phys, attr, error);
return error;
}
int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, uintptr_t phys, size_t pgsize, enum ihk_mc_pt_attribute attr)
{
kprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x):\n",
pt, ptep, phys, pgsize, attr);
switch (pgsize) {
case PTL1_SIZE:
*ptep = phys | attr_to_l1attr(attr);
break;
case PTL2_SIZE:
*ptep = phys | attr_to_l2attr(attr | PTATTR_LARGEPAGE);
break;
default:
kprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x):\n",
pt, ptep, phys, pgsize, attr);
panic("ihk_mc_pt_set_pte");
break;
}
return 0;
}
void load_page_table(struct page_table *pt)
{
unsigned long pt_addr;

View File

@ -71,8 +71,6 @@ struct syscall_load_desc {
struct syscall_response {
unsigned long status;
long ret;
unsigned long fault_address;
unsigned long fault_reason;
};
struct syscall_ret_desc {

View File

@ -217,9 +217,9 @@ int mcexec_syscall(struct mcctrl_channel *c, unsigned long arg)
return 0;
}
#ifndef DO_USER_MODE
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
struct syscall_request *sc);
#ifndef DO_USER_MODE
// static int remaining_job, base_cpu, job_pos;
#endif
@ -243,24 +243,10 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
if(swd.cpu >= usrdata->num_channels)return -EINVAL;
c = usrdata->channels + swd.cpu;
if ((usrdata->channelowners[swd.cpu] != NULL)
&& (usrdata->channelowners[swd.cpu] != current)) {
printk("mcexec_wait_syscall:double wait %p %p\n",
usrdata->channelowners[swd.cpu],
current);
return -EBUSY;
}
#ifdef DO_USER_MODE
retry:
if (wait_event_interruptible(c->wq_syscall, c->req)) {
return -EINTR;
}
wait_event_interruptible(c->wq_syscall, c->req);
c->req = 0;
if (!c->param.request_va->valid) {
printk("mcexec_wait_syscall:stray wakeup\n");
goto retry;
}
#else
while (1) {
c = usrdata->channels + swd.cpu;
@ -299,28 +285,22 @@ printk("mcexec_wait_syscall:stray wakeup\n");
}
if (c->param.request_va &&
c->param.request_va->valid) {
#endif
c->param.request_va->valid = 0; /* ack */
dprintk("SC #%lx, %lx\n",
c->param.request_va->number,
c->param.request_va->args[0]);
usrdata->channelowners[swd.cpu] = current;
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
if (copy_to_user(&req->sr, c->param.request_va,
sizeof(struct syscall_request))) {
usrdata->channelowners[swd.cpu] = NULL;
return -EFAULT;
}
return 0;
}
usrdata->channelowners[swd.cpu] = NULL;
#ifdef DO_USER_MODE
goto retry;
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
#endif
if (copy_to_user(&req->sr, c->param.request_va,
sizeof(struct syscall_request))) {
return -EFAULT;
}
#ifndef DO_USER_MODE
if (usrdata->mcctrl_dma_abort) {
return -2;
}
return 0;
}
if (usrdata->mcctrl_dma_abort) {
return -2;
}
}
}
usrdata->remaining_job = 0;
@ -458,13 +438,6 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
return -EFAULT;
}
if (usrdata->channelowners[ret.cpu] != current) {
printk("mcexec_ret_syscall:owner mismatch: %p %p\n",
usrdata->channelowners[ret.cpu],
current);
return -EBUSY;
}
usrdata->channelowners[ret.cpu] = NULL;
mc = usrdata->channels + ret.cpu;
if (!mc) {
return -EINVAL;
@ -515,15 +488,6 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
} else {
mc->param.response_va->status = 1;
}
#if 1
{
extern struct vm_area_struct *rus_vma;
if (zap_vma_ptes(rus_vma, rus_vma->vm_start, rus_vma->vm_end - rus_vma->vm_start)) {
printk("zap_vma_ptes failed\n");
}
}
#endif
return 0;
}

View File

@ -235,11 +235,6 @@ int prepare_ikc_channels(ihk_os_t os)
printk("Error: cannot allocate channels.\n");
return -ENOMEM;
}
usrdata->channelowners = kzalloc(sizeof(void *) * usrdata->num_channels, GFP_KERNEL);
if (usrdata->channelowners == NULL) {
printk("Error: cannot allocate channelowners.\n");
return -ENOMEM;
}
usrdata->os = os;
init_waitqueue_head(&usrdata->wq_prepare);

View File

@ -80,7 +80,6 @@ struct mcctrl_usrdata {
unsigned long last_thread_exec;
wait_queue_head_t wq_prepare;
unsigned long rpgtable; /* per process, not per OS */
void **channelowners;
};
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);

View File

@ -8,7 +8,6 @@
#include <linux/anon_inodes.h>
#include <linux/mman.h>
#include <linux/file.h>
#include <linux/semaphore.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
#include <asm/io.h>
@ -38,7 +37,7 @@ static void print_dma_lastreq(void)
#endif
#if 1 /* x86 depend, host OS side */
unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, unsigned fflags)
unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva)
{
unsigned long rpa;
int offsh;
@ -64,13 +63,6 @@ unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long
return -EFAULT;
}
#define PTE_RW 0x002
if ((fflags & FAULT_FLAG_WRITE) && !(pt[ix] & PTE_RW)) {
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
return -EFAULT;
}
#define PTE_PS 0x080
if (pt[ix] & PTE_PS) {
rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << offsh) - 1);
@ -92,64 +84,6 @@ out:
}
#endif
static int pager_call(ihk_os_t os, struct syscall_request *req);
static int remote_page_fault(struct mcctrl_usrdata *usrdata, struct vm_fault *vmf)
{
int cpu;
struct mcctrl_channel *channel;
volatile struct syscall_request *req;
volatile struct syscall_response *resp;
printk("remote_page_fault(%p,%p %x)\n", usrdata, vmf->virtual_address, vmf->flags);
/* get peer cpu */
for (cpu = 0; cpu < usrdata->num_channels; ++cpu) {
if (usrdata->channelowners[cpu] == current) {
break;
}
}
if (cpu >= usrdata->num_channels) {
printk("cpu not found\n");
return -ENOENT;
}
channel = &usrdata->channels[cpu];
req = channel->param.request_va;
resp = channel->param.response_va;
/* request page fault */
resp->ret = -EFAULT;
resp->fault_address = (unsigned long)vmf->virtual_address;
resp->fault_reason = (vmf->flags & FAULT_FLAG_WRITE)? 1: 0;
req->valid = 0;
resp->status = 3;
retry:
/* wait for response */
while (req->valid == 0) {
schedule();
}
req->valid = 0;
/* check result */
if (req->number != __NR_mmap) {
printk("remote_page_fault:invalid response. %lx %lx\n",
req->number, req->args[0]);
return -EIO;
}
else if (req->args[0] != 0x0101) {
resp->ret = pager_call(usrdata->os, (void *)req);
resp->status = 1;
goto retry;
}
else if (req->args[1] != 0) {
printk("remote_page_fault:response %d\n", (int)req->args[1]);
return (int)req->args[1];
}
printk("remote_page_fault(%p,%p %x): 0\n", usrdata, vmf->virtual_address, vmf->flags);
return 0;
}
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct mcctrl_usrdata * usrdata = vma->vm_file->private_data;
@ -157,26 +91,12 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long rpa;
unsigned long phys;
int error;
int try;
dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
for (try = 1; ; ++try) {
rpa = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable,
(unsigned long)vmf->virtual_address,
vmf->flags);
#define NTRIES 2
if (((long)rpa >= 0) || (try >= NTRIES)) {
break;
}
error = remote_page_fault(usrdata, vmf);
if (error) {
printk("forward_page_fault failed. %d\n", error);
break;
}
}
rpa = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable,
(unsigned long)vmf->virtual_address);
if ((long)rpa < 0) {
printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
@ -210,7 +130,6 @@ static struct file_operations rus_fops = {
.mmap = &rus_mmap,
};
struct vm_area_struct *rus_vma = NULL;
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp)
{
struct file *file;
@ -233,7 +152,6 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un
}
start = do_mmap_pgoff(file, 0, end,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
vma = find_vma(current->mm, 0);
up_write(&current->mm->mmap_sem);
fput(file);
if (IS_ERR_VALUE(start)) {
@ -241,7 +159,6 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un
return start;
}
rus_vma = vma;
*startp = start;
*endp = end;
return 0;
@ -333,6 +250,12 @@ static void clear_wait(unsigned char *p, int size)
p[size] = 0;
}
static void __return_syscall(struct mcctrl_channel *c, int ret)
{
c->param.response_va->ret = ret;
c->param.response_va->status = 1;
}
static unsigned long translate_remote_va(struct mcctrl_channel *c,
unsigned long rva)
{
@ -359,7 +282,6 @@ static unsigned long translate_remote_va(struct mcctrl_channel *c,
//extern struct mcctrl_channel *channels;
#if 0
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
struct syscall_request *sc)
{
@ -475,328 +397,4 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
}
}
}
#endif
#endif /* !DO_USER_MODE */
static void __return_syscall(struct mcctrl_channel *c, long ret)
{
c->param.response_va->ret = ret;
c->param.response_va->status = 1;
}
struct pager {
struct list_head list;
struct inode * inode;
void * handle;
};
/*
* for linux v2.6.35 or prior
*/
#ifndef DEFINE_SEMAPHORE
#define DEFINE_SEMAPHORE(...) DECLARE_MUTEX(__VA_ARGS__)
#endif
static DEFINE_SEMAPHORE(pager_sem);
static struct list_head pager_list = LIST_HEAD_INIT(pager_list);
struct pager_create_result {
uintptr_t handle;
int maxprot;
};
static int pager_req_create(ihk_os_t os, int fd, int flags, int prot, uintptr_t result_pa)
{
const int ignore_flags = MAP_FIXED | MAP_DENYWRITE;
const int ok_flags = MAP_PRIVATE;
ihk_device_t dev = ihk_os_to_dev(os);
int error;
void *handle = NULL;
struct pager_create_result *resp;
int maxprot = -1;
struct file *file = NULL;
struct inode *inode;
struct pager *pager;
uintptr_t phys;
printk("pager_req_create(%d,%x,%x,%lx)\n", fd, flags, prot, (long)result_pa);
if (flags & ~(ignore_flags | ok_flags)) {
printk("pager_req_create(%d,%x,%x,%lx):not supported flags %x\n",
fd, flags, prot, (long)result_pa,
flags & ~(ignore_flags | ok_flags));
error = -EINVAL;
goto out;
}
file = fget(fd);
if (file == NULL) {
error = -EBADF;
printk("pager_req_create(%d,%x,%x,%lx):file not found. %d\n", fd, flags, prot, (long)result_pa, error);
goto out;
}
inode = file->f_path.dentry->d_inode;
if (inode == NULL) {
error = -EBADF;
printk("pager_req_create(%d,%x,%x,%lx):inode not found. %d\n", fd, flags, prot, (long)result_pa, error);
goto out;
}
if (!(file->f_mode & (FMODE_READ | FMODE_WRITE))) {
maxprot = PROT_NONE;
}
else {
maxprot = 0;
if (file->f_mode & FMODE_READ) {
maxprot |= PROT_READ;
maxprot |= PROT_EXEC;
}
if (file->f_mode & FMODE_WRITE) {
maxprot |= PROT_WRITE;
}
}
error = down_interruptible(&pager_sem);
if (error) {
error = -EINTR;
printk("pager_req_create(%d,%x,%x,%lx):signaled. %d\n", fd, flags, prot, (long)result_pa, error);
goto out;
}
list_for_each_entry(pager, &pager_list, list) {
if (pager->inode == inode) {
handle = pager->handle;
error = -EALREADY;
up(&pager_sem);
goto found;
}
}
pager = kzalloc(sizeof(*pager), GFP_KERNEL);
if (pager == NULL) {
error = -ENOMEM;
printk("pager_req_create(%d,%x,%x,%lx):kzalloc failed. %d\n", fd, flags, prot, (long)result_pa, error);
up(&pager_sem);
goto out;
}
down_write(&current->mm->mmap_sem);
handle = (void *)do_mmap_pgoff(file, 0, PAGE_SIZE, prot, (flags & ok_flags), 0);
up_write(&current->mm->mmap_sem);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
printk("pager_req_create(%d,%x,%x,%lx):mmap failed. %d\n",
fd, flags, prot, (long)result_pa, error);
kfree(pager);
up(&pager_sem);
goto out;
}
pager->inode = inode;
pager->handle = handle;
list_add(&pager->list, &pager_list);
up(&pager_sem);
error = 0;
found:
phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp));
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
resp->handle = (uintptr_t)handle;
resp->maxprot = maxprot;
ihk_device_unmap_virtual(dev, resp, sizeof(*resp));
ihk_device_unmap_memory(dev, phys, sizeof(*resp));
out:
if (file != NULL) {
fput(file);
}
printk("pager_req_create(%d,%x,%x,%lx): %d %p %x\n",
fd, flags, prot, (long)result_pa, error, handle, maxprot);
return error;
}
static int pager_req_release(ihk_os_t os, uintptr_t handle)
{
struct vm_area_struct *vma;
int error;
struct pager *pager;
struct pager *next;
printk("pager_req_relase(%p,%lx)\n", os, handle);
error = down_interruptible(&pager_sem);
if (error) {
printk("pager_req_relase(%p,%lx):signaled. %d\n", os, handle, error);
down_write(&current->mm->mmap_sem);
goto out;
}
list_for_each_entry_safe(pager, next, &pager_list, list) {
if ((uintptr_t)pager->handle == handle) {
list_del(&pager->list);
up(&pager_sem);
kfree(pager);
goto found;
}
}
up(&pager_sem);
error = -EBADF;
printk("pager_req_relase(%p,%lx):pager not found. %d\n", os, handle, error);
down_write(&current->mm->mmap_sem);
goto out;
found:
down_write(&current->mm->mmap_sem);
vma = find_vma(current->mm, handle);
if (vma == 0) {
error = -EBADF;
printk("pager_req_relase(%p,%lx):vma not found. %d\n", os, handle, error);
goto out;
}
if ((vma->vm_start != handle) || (vma->vm_end != (handle + PAGE_SIZE))) {
error = -EBADF;
printk("pager_req_relase(%p,%lx):invalid vma. %d\n", os, handle, error);
goto out;
}
if (vma->vm_file == NULL) {
error = -EBADF;
printk("pager_req_relase(%p,%lx):file not found. %d\n", os, handle, error);
goto out;
}
error = do_munmap(current->mm, handle, PAGE_SIZE);
if (error) {
printk("pager_req_relase(%p,%lx):do_munmap failed. %d\n", os, handle, error);
goto out;
}
error = 0;
out:
up_write(&current->mm->mmap_sem);
printk("pager_req_relase(%p,%lx): %d\n", os, handle, error);
return error;
}
static int pager_req_read(ihk_os_t os, uintptr_t handle, off_t off, size_t size, uintptr_t rpa)
{
ihk_device_t dev = ihk_os_to_dev(os);
struct vm_area_struct *vma;
int error;
struct file *file;
uintptr_t phys;
void *buf;
mm_segment_t fs;
loff_t pos;
ssize_t ss;
printk("pager_req_read(%lx,%lx,%lx,%lx)\n", handle, off, size, rpa);
down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, handle);
if (vma == 0) {
error = -EBADF;
printk("pager_req_read(%lx,%lx,%lx,%lx):vma not found. %d\n", handle, off, size, rpa, error);
up_read(&current->mm->mmap_sem);
goto out;
}
if ((vma->vm_start != handle) || (vma->vm_end != (handle + PAGE_SIZE))) {
error = -EBADF;
printk("pager_req_read(%lx,%lx,%lx,%lx):invalid vma. %d\n", handle, off, size, rpa, error);
up_read(&current->mm->mmap_sem);
goto out;
}
file = vma->vm_file;
if (file == NULL) {
error = -EBADF;
printk("pager_req_read(%lx,%lx,%lx,%lx):file not found. %d\n", handle, off, size, rpa, error);
up_read(&current->mm->mmap_sem);
goto out;
}
get_file(file);
up_read(&current->mm->mmap_sem);
phys = ihk_device_map_memory(dev, rpa, size);
buf = ihk_device_map_virtual(dev, phys, size, NULL, 0);
fs = get_fs();
set_fs(KERNEL_DS);
pos = off;
ss = vfs_read(file, buf, size, &pos);
if ((ss >= 0) && (ss != size)) {
if (clear_user(buf+ss, size-ss) == 0) {
ss = size;
}
else {
ss = -EIO;
}
}
set_fs(fs);
ihk_device_unmap_virtual(dev, buf, size);
ihk_device_unmap_memory(dev, phys, size);
fput(file);
if (ss < 0) {
error = ss;
printk("pager_req_read(%lx,%lx,%lx,%lx):pread failed. %d\n", handle, off, size, rpa, error);
goto out;
}
error = 0;
out:
printk("pager_req_read(%lx,%lx,%lx,%lx): %d\n", handle, off, size, rpa, error);
return error;
}
static int pager_call(ihk_os_t os, struct syscall_request *req)
{
int error;
printk("pager_call(%p %#lx)\n", req, req->args[0]);
switch (req->args[0]) {
#define PAGER_REQ_CREATE 0x0001
#define PAGER_REQ_RELEASE 0x0002
#define PAGER_REQ_READ 0x0003
case PAGER_REQ_CREATE:
error = pager_req_create(os, req->args[1], req->args[2], req->args[3], req->args[4]);
break;
case PAGER_REQ_RELEASE:
error = pager_req_release(os, req->args[1]);
break;
case PAGER_REQ_READ:
error = pager_req_read(os, req->args[1], req->args[2], req->args[3], req->args[4]);
break;
default:
error = -ENOSYS;
break;
}
printk("pager_call(%p %#lx): %d\n", req, req->args[0], error);
return error;
}
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc)
{
int error;
long ret;
printk("__do_in_kernel_syscall(%p,%p,%p %ld)\n", os, c, sc, sc->number);
switch (sc->number) {
case __NR_mmap:
ret = pager_call(os, sc);
break;
default:
error = -ENOSYS;
goto out;
break;
}
__return_syscall(c, ret);
error = 0;
out:
printk("__do_in_kernel_syscall(%p,%p,%p %ld): %d\n", os, c, sc, sc->number, error);
return error;
}

View File

@ -635,12 +635,33 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
pthread_mutex_unlock(lock);
return w.sr.args[0];
case __NR_mmap:
case __NR_munmap:
case __NR_mprotect:
/* reserved for internal use */
do_syscall_return(fd, cpu, -ENOSYS, 0, 0, 0, 0);
break;
case __NR_mmap: {
// w.sr.args[0] is converted to MIC physical address
__dprintf("mcexec.c,mmap,MIC-paddr=%lx,len=%lx,prot=%lx,flags=%lx,fd=%lx,offset=%lx\n",
w.sr.args[0], w.sr.args[1], w.sr.args[2], w.sr.args[3], w.sr.args[4], w.sr.args[5]);
off_t old_off = lseek(w.sr.args[4], 0, SEEK_CUR);
if(old_off == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; goto mmap_out; }
off_t rlseek = lseek(w.sr.args[4], w.sr.args[5], SEEK_SET);
if(rlseek == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; goto mmap_out; }
ssize_t toread = w.sr.args[1];
ret = 0;
while(toread > 0) {
__dprintf("mcexec.c,mmap,read,addr=%lx,len=%lx\n", (long int)((void *)dma_buf + w.sr.args[1] - toread), toread);
ssize_t rread = read(w.sr.args[4], (void *)dma_buf + w.sr.args[1] - toread, toread);
if(rread == 0) {
__dprint("mcexec.c,mmap,read==0\n");
goto mmap_zero_out;
} else if(rread < 0) {
__dprint("mcexec.c,mmap,read failed\n"); ret = -errno; break;
}
toread -= rread;
}
mmap_zero_out:
rlseek = lseek(w.sr.args[4], old_off, SEEK_SET);
if(rlseek == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; }
mmap_out:
do_syscall_return(fd, cpu, ret, 1, (unsigned long)dma_buf, w.sr.args[0], w.sr.args[1]);
break; }
#ifdef USE_SYSCALL_MOD_CALL
case 303:{

View File

@ -1,6 +1,6 @@
IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o memobj.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__

View File

@ -1,6 +1,6 @@
IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o memobj.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__

View File

@ -92,13 +92,12 @@ static int process_msg_prepare_process(unsigned long rphys)
range_npages = (e - s) >> PAGE_SHIFT;
flags = VR_NONE;
flags |= PROT_TO_VR_FLAG(pn->sections[i].prot);
flags |= VRFLAG_PROT_TO_MAXPROT(flags);
if((up_v = ihk_mc_alloc_pages(range_npages, IHK_MC_AP_NOWAIT)) == NULL){
goto err;
}
up = virt_to_phys(up_v);
if(add_process_memory_range(proc, s, e, up, flags, NULL, 0) != 0){
if(add_process_memory_range(proc, s, e, up, flags) != 0){
ihk_mc_free_pages(up_v, range_npages);
goto err;
}
@ -169,32 +168,29 @@ static int process_msg_prepare_process(unsigned long rphys)
/* Map system call stuffs */
flags = VR_RESERVED | VR_PROT_READ | VR_PROT_WRITE;
flags |= VRFLAG_PROT_TO_MAXPROT(flags);
addr = proc->vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
e = addr + PAGE_SIZE * DOORBELL_PAGE_COUNT;
if(add_process_memory_range(proc, addr, e,
cpu_local_var(scp).doorbell_pa,
VR_REMOTE | flags, NULL, 0) != 0){
VR_REMOTE | flags) != 0){
goto err;
}
addr = e;
e = addr + PAGE_SIZE * REQUEST_PAGE_COUNT;
if(add_process_memory_range(proc, addr, e,
cpu_local_var(scp).request_pa,
VR_REMOTE | flags, NULL, 0) != 0){
VR_REMOTE | flags) != 0){
goto err;
}
addr = e;
e = addr + PAGE_SIZE * RESPONSE_PAGE_COUNT;
if(add_process_memory_range(proc, addr, e,
cpu_local_var(scp).response_pa,
flags, NULL, 0) != 0){
flags) != 0){
goto err;
}
/* Map, copy and update args and envs */
flags = VR_PROT_READ | VR_PROT_WRITE;
flags |= VRFLAG_PROT_TO_MAXPROT(flags);
addr = e;
e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT;
@ -204,7 +200,7 @@ static int process_msg_prepare_process(unsigned long rphys)
args_envs_p = virt_to_phys(args_envs);
if(add_process_memory_range(proc, addr, e, args_envs_p,
flags, NULL, 0) != 0){
VR_PROT_READ|VR_PROT_WRITE) != 0){
ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT);
goto err;
}

View File

@ -1,22 +0,0 @@
#ifndef HEADER_MEMOBJ_H
#define HEADER_MEMOBJ_H
#include <ihk/types.h>
#include <ihk/atomic.h>
#include <ihk/lock.h>
#include <list.h>
struct memobj {
struct list_head list;
ihk_atomic_t ref;
uintptr_t handle;
struct list_head page_list;
ihk_spinlock_t page_list_lock;
};
int memobj_create(int fd, int flags, int prot, struct memobj **objp, int *maxprotp);
void memobj_ref(struct memobj *obj);
void memobj_release(struct memobj *obj);
int memobj_get_page(struct memobj *obj, off_t off, size_t pgsize, uintptr_t *physp);
#endif /* HEADER_MEMOBJ_H */

View File

@ -2,25 +2,16 @@
#define __HEADER_PAGE_H
struct page {
struct list_head list;
uint8_t mode;
uint8_t padding[3];
int32_t count;
off_t offset;
struct list_head list;
uint64_t flags;
int64_t count;
};
/* mode */
enum page_mode {
PM_NONE = 0x00,
PM_PENDING_FREE = 0x01,
PM_PAGEIO = 0x02,
PM_MAPPED = 0x03,
PM_ANON_COW = 0x04,
};
/* flags */
#define PAGE_IN_LIST 0x0001UL
struct page *phys_to_page(uintptr_t phys);
uintptr_t page_to_phys(struct page *page);
int page_unmap(struct page *page);
void *allocate_pages(int npages, enum ihk_mc_ap_flag flag);
void free_pages(void *va, int npages);

View File

@ -1,26 +0,0 @@
#ifndef HEADER_PAGER_H
#define HEADER_PAGER_H
#include <ihk/types.h>
enum pager_op {
PAGER_REQ_CREATE = 0x0001,
PAGER_REQ_RELEASE = 0x0002,
PAGER_REQ_READ = 0x0003,
};
/*
* int pager_req_create(int fd, int flags, int prot, uintptr_t result_rpa);
*/
struct pager_create_result {
uintptr_t handle;
int maxprot;
};
/*
* int pager_req_release(uintptr_t handle);
*/
/*
* int pager_req_read(uintptr_t handle, off_t off, size_t size, uintptr_t buf_rpa);
*/
#endif /* HEADER_PAGER_H */

View File

@ -7,7 +7,6 @@
#include <ihk/atomic.h>
#include <list.h>
#include <signal.h>
#include <memobj.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
@ -15,21 +14,13 @@
#define VR_IO_NOCACHE 0x100
#define VR_REMOTE 0x200
#define VR_DEMAND_PAGING 0x1000
#define VR_PRIVATE 0x2000
#define VR_PROT_NONE 0x00000000
#define VR_PROT_READ 0x00010000
#define VR_PROT_WRITE 0x00020000
#define VR_PROT_EXEC 0x00040000
#define VR_PROT_MASK 0x00070000
#define VR_MAXPROT_NONE 0x00000000
#define VR_MAXPROT_READ 0x00100000
#define VR_MAXPROT_WRITE 0x00200000
#define VR_MAXPROT_EXEC 0x00400000
#define VR_MAXPROT_MASK 0x00700000
#define PROT_TO_VR_FLAG(prot) (((unsigned long)(prot) << 16) & VR_PROT_MASK)
#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4)
#define VRFLAG_MAXPROT_TO_PROT(vrflag) (((vrflag) & VR_MAXPROT_MASK) >> 4)
#define PS_RUNNING 0x1
#define PS_INTERRUPTIBLE 0x2
@ -50,8 +41,6 @@ struct vm_range {
struct list_head list;
unsigned long start, end;
unsigned long flag;
struct memobj *memobj;
off_t objoff;
};
struct vm_regions {
@ -117,6 +106,7 @@ struct process_vm {
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
};
struct process *create_process(unsigned long user_pc);
struct process *clone_process(struct process *org,
unsigned long pc, unsigned long sp);
@ -124,12 +114,10 @@ void destroy_process(struct process *proc);
void hold_process(struct process *proc);
void free_process(struct process *proc);
void free_process_memory(struct process *proc);
void flush_process_memory(struct process *proc);
int add_process_memory_range(struct process *process,
unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag,
struct memobj *memobj, off_t objoff);
unsigned long phys, unsigned long flag);
int remove_process_memory_range(
struct process *process, unsigned long start, unsigned long end);
int split_process_memory_range(struct process *process,
@ -145,7 +133,6 @@ struct vm_range *next_process_memory_range(
struct process_vm *vm, struct vm_range *range);
struct vm_range *previous_process_memory_range(
struct process_vm *vm, struct vm_range *range);
int page_fault_process_memory_range(struct process *proc, struct vm_range *range, uintptr_t fault_addr, uint64_t reason);
int remove_process_region(struct process *proc,
unsigned long start, unsigned long end);
struct program_load_desc;

View File

@ -126,8 +126,6 @@ struct syscall_request {
struct syscall_response {
unsigned long status;
long ret;
unsigned long fault_address;
unsigned long fault_reason;
};
struct syscall_post {
@ -192,7 +190,6 @@ struct syscall_params {
extern int do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx);
extern int obtain_clone_cpuid();
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
#define DECLARATOR(number,name) __NR_##name = number,
#define SYSCALL_HANDLED(number,name) DECLARATOR(number,name)

View File

@ -1,4 +1,3 @@
#include <ihk/types.h>
#include <kmsg.h>
#include <kmalloc.h>
#include <string.h>
@ -69,12 +68,12 @@ void free_pages(void *va, int npages)
struct list_head *pendings = &cpu_local_var(pending_free_pages);
struct page *page;
page = phys_to_page(virt_to_phys(va));
if (page->mode != PM_NONE) {
panic("free_pages:not PM_NONE");
}
if (pendings->next != NULL) {
page->mode = PM_PENDING_FREE;
page = phys_to_page(virt_to_phys(va));
if (page->flags & PAGE_IN_LIST) {
panic("free_pages");
}
page->flags |= PAGE_IN_LIST;
page->count = npages;
list_add_tail(&page->list, pendings);
return;
@ -104,10 +103,10 @@ void finish_free_pages_pending(void)
}
list_for_each_entry_safe(page, next, pendings, list) {
if (page->mode != PM_PENDING_FREE) {
panic("free_pending_pages:not PM_PENDING_FREE");
if (!(page->flags & PAGE_IN_LIST)) {
panic("free_pending_pages");
}
page->mode = PM_NONE;
page->flags &= ~PAGE_IN_LIST;
list_del(&page->list);
ihk_pagealloc_free(pa_allocator, page_to_phys(page), page->count);
}
@ -144,39 +143,72 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
void sigsegv(void *);
static void unhandled_page_fault(struct process *proc, unsigned long address, void *regs)
static void page_fault_handler(unsigned long address, void *regs,
unsigned long rbp)
{
struct process_vm *vm = proc->vm;
struct vm_range *range;
char found;
struct vm_range *range, *next;
char found = 0;
int irqflags;
unsigned long error = ((struct x86_regs *)regs)->error;
irqflags = kprintf_lock();
__kprintf("[%d] Page fault for 0x%lX\n",
ihk_mc_get_processor_id(), address);
__kprintf("%s for %s access in %s mode (reserved bit %s set), "
"it %s an instruction fetch\n",
(error & PF_PROT ? "protection fault" : "no page found"),
(error & PF_WRITE ? "write" : "read"),
(error & PF_USER ? "user" : "kernel"),
(error & PF_RSVD ? "was" : "wasn't"),
(error & PF_INSTR ? "was" : "wasn't"));
__kprintf("[%d] Page fault for 0x%lX, (rbp: 0x%lX)\n",
ihk_mc_get_processor_id(), address, rbp);
found = 0;
list_for_each_entry(range, &vm->vm_range_list, list) {
__kprintf("%s for %s access in %s mode (reserved bit %s set), it %s an instruction fetch\n",
(error & PF_PROT ? "protection fault" : "no page found"),
(error & PF_WRITE ? "write" : "read"),
(error & PF_USER ? "user" : "kernel"),
(error & PF_RSVD ? "was" : "wasn't"),
(error & PF_INSTR ? "was" : "wasn't"));
list_for_each_entry_safe(range, next,
&cpu_local_var(current)->vm->vm_range_list,
list) {
if (range->start <= address && range->end > address) {
__kprintf("address is in range, flag: 0x%X! \n", range->flag);
if(range->flag & VR_DEMAND_PAGING){
//allocate page for demand paging
__kprintf("demand paging\n");
void* pa = allocate_pages(1, IHK_MC_AP_CRITICAL);
if(!pa){
kprintf_unlock(irqflags);
panic("allocate_pages failed");
}
__kprintf("physical memory area obtained %lx\n", virt_to_phys(pa));
{
enum ihk_mc_pt_attribute flag = 0;
struct process *process = cpu_local_var(current);
unsigned long flags = ihk_mc_spinlock_lock(&process->vm->page_table_lock);
const enum ihk_mc_pt_attribute attr = flag | PTATTR_WRITABLE | PTATTR_USER | PTATTR_FOR_USER;
int rc = ihk_mc_pt_set_page(process->vm->page_table, (void*)(address & PAGE_MASK), virt_to_phys(pa), attr);
if(rc != 0) {
ihk_mc_spinlock_unlock(&process->vm->page_table_lock, flags);
__kprintf("ihk_mc_pt_set_page failed,rc=%d,%p,%lx,%08x\n", rc, (void*)(address & PAGE_MASK), virt_to_phys(pa), attr);
ihk_mc_pt_print_pte(process->vm->page_table, (void*)address);
goto fn_fail;
}
ihk_mc_spinlock_unlock(&process->vm->page_table_lock, flags);
__kprintf("update_process_page_table success\n");
}
kprintf_unlock(irqflags);
memset(pa, 0, PAGE_SIZE);
return;
}
found = 1;
__kprintf("address is in range, flag: 0x%X! \n",
range->flag);
ihk_mc_pt_print_pte(vm->page_table, (void*)address);
ihk_mc_pt_print_pte(cpu_local_var(current)->vm->page_table,
(void*)address);
break;
}
}
if (!found) {
if (!found)
__kprintf("address is out of range! \n");
}
fn_fail:
kprintf_unlock(irqflags);
/* TODO */
@ -184,72 +216,19 @@ static void unhandled_page_fault(struct process *proc, unsigned long address, vo
#ifdef DEBUG_PRINT_MEM
{
uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs);
kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n",
sp[0], sp[1], sp[2], sp[3]);
const struct x86_regs *_regs = regs;
dkprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n",
*((unsigned long*)_regs->rsp),
*((unsigned long*)_regs->rsp+8),
*((unsigned long*)_regs->rsp+16),
*((unsigned long*)_regs->rsp+24)
);
}
#endif
#if 0
panic("mem fault");
#endif
sigsegv(regs);
return;
}
static void page_fault_handler(unsigned long address, unsigned long reason, void *regs)
{
struct process *proc = cpu_local_var(current);
struct process_vm *vm = proc->vm;
struct vm_range *range;
unsigned long vrflag;
unsigned long denied;
int error;
kprintf("[%d]page_fault_handler(%lx,%lx,%p)\n",
ihk_mc_get_processor_id(), address, reason, regs);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
range = lookup_process_memory_range(vm, address, address+1);
if (range == NULL) {
kprintf("page_fault_handler(%lx,%lx,%p):out of range\n",
address, reason, regs);
unhandled_page_fault(proc, address, regs);
goto out;
}
if (reason & PF_WRITE) {
vrflag = VR_PROT_WRITE;
}
else if (reason & PF_INSTR) {
vrflag = VR_PROT_EXEC;
}
else {
vrflag = VR_PROT_READ;
}
denied = vrflag & ~range->flag;
if (denied) {
kprintf("page_fault_handler(%lx,%lx,%p):access denied. %lx\n",
address, reason, regs, denied);
unhandled_page_fault(proc, address, regs);
goto out;
}
error = page_fault_process_memory_range(proc, range, address, reason);
if (error) {
kprintf("page_fault_handler(%lx,%lx,%p):fault range failed. %d\n",
address, reason, regs, error);
unhandled_page_fault(proc, address, regs);
goto out;
}
out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
kprintf("[%d]page_fault_handler(%lx,%lx,%p):\n",
ihk_mc_get_processor_id(), address, reason, regs);
return;
//panic("mem fault");
}
static void page_allocator_init(void)
@ -310,7 +289,6 @@ static void page_allocator_init(void)
&query_free_mem_handler);
}
#if 1
struct page *phys_to_page(uintptr_t phys)
{
int64_t ix;
@ -338,26 +316,6 @@ uintptr_t page_to_phys(struct page *page)
return phys;
}
int page_unmap(struct page *page)
{
kprintf("page_unmap(%p %x %d)\n", page, page->mode, page->count);
if (page->mode != PM_MAPPED) {
panic("page_unmap:not PM_MAPPED");
}
if (--page->count > 0) {
/* other mapping exist */
kprintf("page_unmap(%p %x %d): 0\n", page, page->mode, page->count);
return 0;
}
/* no mapping exist */
list_del(&page->list);
page->mode = PM_NONE;
kprintf("page_unmap(%p %x %d): 1\n", page, page->mode, page->count);
return 1;
}
static void page_init(void)
{
size_t npages;
@ -372,7 +330,6 @@ static void page_init(void)
memset(pa_pages, 0, allocsize);
return;
}
#endif
void register_kmalloc(void)
{

View File

@ -1,221 +0,0 @@
#include <ihk/atomic.h>
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <ihk/lock.h>
#include <ihk/mm.h>
#include <ihk/types.h>
#include <errno.h>
#include <kmalloc.h>
#include <kmsg.h>
#include <memobj.h>
#include <memory.h>
#include <page.h>
#include <pager.h>
#include <string.h>
#include <syscall.h>
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
static ihk_spinlock_t memobj_list_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(memobj_list);
int memobj_create(int fd, int flags, int prot, struct memobj **objpp, int *maxprotp)
{
ihk_mc_user_context_t ctx;
struct pager_create_result result;
int error;
struct memobj *memobj = NULL;
struct memobj *obj;
kprintf("memobj_create(%d,%x,%x)\n", fd, flags, prot);
memobj = kmalloc(sizeof(*memobj), IHK_MC_AP_NOWAIT);
if (memobj == NULL) {
error = -ENOMEM;
kprintf("memobj_create(%d,%x,%x):kmalloc failed. %d\n", fd, flags, prot, error);
goto out;
}
retry:
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE;
ihk_mc_syscall_arg1(&ctx) = fd;
ihk_mc_syscall_arg2(&ctx) = flags;
ihk_mc_syscall_arg3(&ctx) = prot;
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error == -EALREADY) {
kprintf("memobj_create(%d,%x,%x,%p):create failed. %d\n",
fd, flags, prot, objpp, error);
ihk_mc_spinlock_lock_noirq(&memobj_list_lock);
list_for_each_entry(obj, &memobj_list, list) {
if (obj->handle == result.handle) {
memobj_ref(obj);
ihk_mc_spinlock_unlock_noirq(&memobj_list_lock);
kfree(memobj);
memobj = obj;
goto found;
}
}
ihk_mc_spinlock_unlock_noirq(&memobj_list_lock);
goto retry;
}
else if (error) {
kprintf("memobj_create(%d,%x,%x,%p):create failed. %d\n",
fd, flags, prot, objpp, error);
goto out;
}
memset(memobj, 0, sizeof(*memobj));
ihk_atomic_set(&memobj->ref, 1);
memobj->handle = result.handle;
INIT_LIST_HEAD(&memobj->page_list);
ihk_mc_spinlock_init(&memobj->page_list_lock);
ihk_mc_spinlock_lock_noirq(&memobj_list_lock);
list_add(&memobj->list, &memobj_list);
ihk_mc_spinlock_unlock_noirq(&memobj_list_lock);
found:
error = 0;
*objpp = memobj;
*maxprotp = result.maxprot;
memobj = NULL;
out:
kprintf("memobj_create(%d,%x,%x):%d %p %x\n", fd, flags, prot, error, *objpp, *maxprotp);
return error;
}
void memobj_ref(struct memobj *obj)
{
kprintf("memobj_ref(%p):\n", obj);
ihk_atomic_inc(&obj->ref);
return;
}
void memobj_release(struct memobj *obj)
{
ihk_mc_user_context_t ctx;
int error;
kprintf("memobj_release(%p)\n", obj);
ihk_mc_spinlock_lock_noirq(&memobj_list_lock);
if (!ihk_atomic_dec_and_test(&obj->ref)) {
ihk_mc_spinlock_unlock_noirq(&memobj_list_lock);
kprintf("memobj_release(%p):keep\n", obj);
return;
}
list_del(&obj->list);
ihk_mc_spinlock_unlock_noirq(&memobj_list_lock);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("memobj_release(%p):release failed. %d\n", obj, error);
/* through */
}
kfree(obj);
kprintf("memobj_release(%p):free\n", obj);
return;
}
int memobj_get_page(struct memobj *obj, off_t off, size_t pgsize, uintptr_t *physp)
{
int error;
void *virt = NULL;
uintptr_t phys = -1;
ihk_mc_user_context_t ctx;
struct page *page;
kprintf("memobj_get_page(%p,%lx,%lx,%p)\n", obj, off, pgsize, physp);
if (pgsize != PAGE_SIZE) {
error = -ENOMEM;
goto out;
}
retry:
for (;;) {
ihk_mc_spinlock_lock_noirq(&obj->page_list_lock);
list_for_each_entry(page, &obj->page_list, list) {
if ((page->mode != PM_PAGEIO) && (page->mode != PM_MAPPED)) {
panic("memobj_get_page:invalid obj page");
}
if (page->offset == off) {
if (page->mode == PM_PAGEIO) {
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
goto retry;
}
++page->count;
phys = page_to_phys(page);
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
goto found;
}
}
if (virt != NULL) {
page = phys_to_page(phys);
break;
}
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (virt == NULL) {
error = -ENOMEM;
goto out;
}
phys = virt_to_phys(virt);
}
if (page->mode != PM_NONE) {
panic("memobj_get_page:invalid new page");
}
page->mode = PM_PAGEIO;
page->offset = off;
list_add(&page->list, &obj->page_list);
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_READ;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = off;
ihk_mc_syscall_arg3(&ctx) = pgsize;
ihk_mc_syscall_arg4(&ctx) = phys;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("memobj_get_page(%p,%lx,%lx,%p):read failed. %d\n",
obj, off, pgsize, physp, error);
ihk_mc_spinlock_lock_noirq(&obj->page_list_lock);
if (page->mode != PM_PAGEIO) {
panic("memobj_get_page:invalid io page");
}
list_del(&page->list);
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
page->mode = PM_NONE;
goto out;
}
ihk_mc_spinlock_lock_noirq(&obj->page_list_lock);
if (page->mode != PM_PAGEIO) {
panic("memobj_get_page:invalid io page");
}
page->mode = PM_MAPPED;
page->count = 1;
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
virt = NULL;
found:
error = 0;
*physp = phys;
out:
if (virt != NULL) {
ihk_mc_free_pages(virt, 1);
}
kprintf("memobj_get_page(%p,%lx,%lx,%p): %d %lx\n",
obj, off, pgsize, physp, error, phys);
return error;
}

View File

@ -211,16 +211,6 @@ int split_process_memory_range(struct process *proc, struct vm_range *range,
newrange->end = range->end;
newrange->flag = range->flag;
if (range->memobj != NULL) {
memobj_ref(range->memobj);
newrange->memobj = range->memobj;
newrange->objoff = range->objoff + (addr - range->start);
}
else {
newrange->memobj = NULL;
newrange->objoff = 0;
}
range->end = addr;
list_add(&newrange->list, &range->list);
@ -248,27 +238,13 @@ int join_process_memory_range(struct process *proc,
merging->start, merging->end);
if ((surviving->end != merging->start)
|| (surviving->flag != merging->flag)
|| (surviving->memobj != merging->memobj)) {
|| (surviving->flag != merging->flag)) {
error = -EINVAL;
goto out;
}
if (surviving->memobj != NULL) {
size_t len;
off_t endoff;
len = surviving->end - surviving->start;
endoff = surviving->objoff + len;
if (endoff != merging->objoff) {
return -EINVAL;
}
}
surviving->end = merging->end;
if (merging->memobj != NULL) {
memobj_release(merging->memobj);
}
list_del(&merging->list);
ihk_mc_free(merging);
@ -292,7 +268,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
intptr_t lpend;
#endif /* USE_LARGE_PAGES */
kprintf("free_process_memory_range(%p,%lx-%lx)\n",
dkprintf("free_process_memory_range(%p,%lx-%lx)\n",
vm, start0, end0);
start = range->start;
@ -318,17 +294,10 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
}
#endif /* USE_LARGE_PAGES */
if (range->memobj != NULL) {
ihk_mc_spinlock_lock_noirq(&range->memobj->page_list_lock);
}
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
error = ihk_mc_pt_free_range(vm->page_table,
(void *)start, (void *)end);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (range->memobj != NULL) {
ihk_mc_spinlock_unlock_noirq(&range->memobj->page_list_lock);
}
if (error && (error != -ENOENT)) {
ekprintf("free_process_memory_range(%p,%lx-%lx):"
"ihk_mc_pt_free_range(%lx-%lx) failed. %d\n",
@ -349,13 +318,10 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
}
}
if (range->memobj != NULL) {
memobj_release(range->memobj);
}
list_del(&range->list);
ihk_mc_free(range);
kprintf("free_process_memory_range(%p,%lx-%lx): 0\n",
dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n",
vm, start0, end0);
return 0;
}
@ -467,8 +433,7 @@ enum ihk_mc_pt_attribute vrflag_to_ptattr(unsigned long flag)
int add_process_memory_range(struct process *process,
unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag,
struct memobj *memobj, off_t offset)
unsigned long phys, unsigned long flag)
{
struct vm_range *range;
int rc;
@ -493,8 +458,6 @@ int add_process_memory_range(struct process *process,
range->start = start;
range->end = end;
range->flag = flag;
range->memobj = memobj;
range->objoff = offset;
if(range->flag & VR_DEMAND_PAGING) {
dkprintf("range: 0x%lX - 0x%lX => physicall memory area is allocated on demand (%ld) [%lx]\n",
@ -673,321 +636,6 @@ out:
return error;
}
static int pf_anon_page_not_present(struct process *proc, struct vm_range *range, uintptr_t fault_addr)
{
int error;
int npages;
void *virt = NULL;
void *ptepgaddr;
size_t ptepgsize;
void *pgaddr;
size_t pgsize;
int p2align;
uintptr_t phys;
enum ihk_mc_pt_attribute attr;
size_t maxpgsize;
pte_t *ptep;
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx)\n", proc, range->start, range->end, range->flag, fault_addr);
ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock);
error = ihk_mc_pt_lookup_pte(proc->vm->page_table, (void *)fault_addr, &ptep, &ptepgaddr, &ptepgsize);
if (error && (error != -ENOENT)) {
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):lookup pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
if (!error && (*ptep != PTE_NULL)) {
if (!(*ptep & PF_PRESENT)) {
error = -EFAULT;
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):disabled page. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
error = 0;
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):already mapped. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
flush_tlb();
goto out;
}
if (error) {
error = 0;
ptepgsize = LARGE_PAGE_SIZE;
ptepgaddr = (void *)-1;
}
maxpgsize = ptepgsize;
#ifndef USE_LARGE_PAGES
if (maxpgsize > PAGE_SIZE) {
maxpgsize = PAGE_SIZE;
}
#endif
for (;;) {
error = ihk_mc_pt_choose_pagesize(proc->vm->page_table, (void *)range->start, (void *)range->end, (void *)fault_addr, maxpgsize, &pgaddr, &pgsize, &p2align);
if (error) {
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):choose pagesize failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
npages = pgsize / PAGE_SIZE;
virt = ihk_mc_alloc_aligned_pages(npages, p2align, IHK_MC_AP_NOWAIT);
if (virt) {
phys = virt_to_phys(virt);
memset(virt, 0, pgsize);
break;
}
if (pgsize <= PAGE_SIZE) {
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):alloc pages failed\n", proc, range->start, range->end, range->flag, fault_addr);
error = -ENOMEM;
goto out;
}
maxpgsize = pgsize - 1;
}
attr = vrflag_to_ptattr(range->flag);
if ((ptepgaddr == pgaddr) && (ptepgsize == pgsize)) {
kprintf("HIT\n");
error = ihk_mc_pt_set_pte(proc->vm->page_table, ptep, phys, pgsize, attr);
if (error) {
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):set pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
}
else {
error = ihk_mc_pt_set_range(proc->vm->page_table, pgaddr, pgaddr+pgsize, phys, attr);
if (error) {
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):set range failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
}
virt = NULL;
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock);
if (virt != NULL) {
ihk_mc_free_pages(virt, npages);
}
kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx): %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
return error;
}
static int pf_obj_page_not_present(struct process *proc, struct vm_range *range, uintptr_t fault_addr)
{
int error;
int npages;
struct page *page = NULL;
void *pgaddr;
size_t pgsize;
int p2align;
uintptr_t phys;
enum ihk_mc_pt_attribute attr;
size_t maxpgsize;
off_t off;
pte_t *ptep;
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx)\n", proc, range->start, range->end, range->flag, fault_addr);
ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock);
error = ihk_mc_pt_lookup_pte(proc->vm->page_table, (void *)fault_addr, &ptep, &pgaddr, &pgsize);
if (error == -ENOENT) {
maxpgsize = LARGE_PAGE_SIZE;
}
else if (error) {
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):lookup pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
else if (*ptep != PTE_NULL) {
if (!*ptep & PF_PRESENT) {
error = -EFAULT;
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):disabled page. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):already mapped. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
flush_tlb();
error = 0;
goto out;
}
else {
maxpgsize = pgsize;
}
#ifndef USE_LARGE_PAGES
maxpgsize = PAGE_SIZE;
#else
/* temporary? restriction */
maxpgsize = PAGE_SIZE;
#endif
do {
error = ihk_mc_pt_choose_pagesize(proc->vm->page_table, (void *)range->start, (void *)range->end, (void *)fault_addr, maxpgsize, &pgaddr, &pgsize, &p2align);
if (error) {
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):choose pagesize failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
off = range->objoff + ((uintptr_t)pgaddr - range->start);
error = memobj_get_page(range->memobj, off, pgsize, &phys);
if (error) {
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):get page failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
npages = pgsize / PAGE_SIZE;
page = phys_to_page(phys);
} while (0);
attr = vrflag_to_ptattr(range->flag);
if ((range->flag & VR_PRIVATE) && (range->flag & VR_PROT_WRITE)) {
/* for copy-on-write */
attr &= ~PTATTR_WRITABLE;
}
error = ihk_mc_pt_set_range(proc->vm->page_table, pgaddr, pgaddr+pgsize, phys, attr);
if (error) {
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):set range failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
error = 0;
page = NULL; /* avoid page_unmap() */
out:
if ((page != NULL) && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(page_to_phys(page)), npages);
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock);
kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx): %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
return error;
}
static int pf_obj_cow_page(struct process *proc, struct vm_range *range, uintptr_t fault_addr)
{
int error;
pte_t *ptep;
void *pgaddr;
size_t pgsize;
uintptr_t oldpa;
void *oldva;
void *newva;
uintptr_t newpa;
struct page *oldpage;
enum ihk_mc_pt_attribute attr;
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx)\n", proc, range->start, range->end, range->flag, fault_addr);
ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock);
error = ihk_mc_pt_lookup_pte(proc->vm->page_table, (void *)fault_addr, &ptep, &pgaddr, &pgsize);
if (error) {
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):pte not found. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
flush_tlb();
error = 0;
goto out;
}
if (pgsize != PAGE_SIZE) {
panic("pf_obj_cow_page:NYI:cow large page");
}
oldpa = *ptep & PT_PHYSMASK;
oldva = phys_to_virt(oldpa);
oldpage = phys_to_page(oldpa);
if (oldpage) {
newva = NULL;
ihk_mc_spinlock_lock_noirq(&range->memobj->page_list_lock);
for (;;) {
if (oldpage->mode != PM_MAPPED) {
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):invalid cow page. %p %x\n", proc, range->start, range->end, range->flag, fault_addr, range->memobj, oldpage->mode);
panic("page_fault_process_meory_range:invalid cow page");
}
if (oldpage->count == 1) {
if (newva) {
ihk_mc_free_pages(newva, 1);
}
list_del(&oldpage->list);
oldpage->mode = PM_NONE;
newpa = oldpa;
newva = oldva;
break;
}
if (oldpage->count <= 0) {
panic("pf_obj_cow_page:oldpage count corrupted");
}
if (newva) {
memcpy(newva, oldva, pgsize);
--oldpage->count;
break;
}
ihk_mc_spinlock_unlock_noirq(&range->memobj->page_list_lock);
newva = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (!newva) {
error = -ENOMEM;
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):alloc page failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
ihk_mc_spinlock_lock_noirq(&range->memobj->page_list_lock);
}
ihk_mc_spinlock_unlock_noirq(&range->memobj->page_list_lock);
}
else {
newva = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
if (newva == NULL) {
error = -ENOMEM;
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):alloc page failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
goto out;
}
memcpy(newva, oldva, pgsize);
}
newpa = virt_to_phys(newva);
attr = vrflag_to_ptattr(range->flag);
error = ihk_mc_pt_set_pte(proc->vm->page_table, ptep, newpa, pgsize, attr);
if (error) {
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):set pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
ihk_mc_free_pages(newva, 1);
goto out;
}
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock);
kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx): %d\n", proc, range->start, range->end, range->flag, fault_addr, error);
return error;
}
int page_fault_process_memory_range(struct process *proc,
struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
{
int error;
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx)\n",
proc, range->start, range->end, range->flag,
fault_addr, reason);
if (!(reason & PF_PROT) && !range->memobj) {
error = pf_anon_page_not_present(proc, range, fault_addr);
}
else if (!(reason & PF_PROT) && range->memobj) {
error = pf_obj_page_not_present(proc, range, fault_addr);
}
else if ((reason & PF_PROT) && (reason & PF_WRITE) && (range->flag & VR_PROT_WRITE) && range->memobj) {
error = pf_obj_cow_page(proc, range, fault_addr);
}
else {
error = -EFAULT;
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):"
"unknown fault. %d\n",
proc, range->start, range->end, range->flag,
fault_addr, reason, error);
}
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx): %d\n",
proc, range->start, range->end, range->flag,
fault_addr, reason, error);
return error;
}
int init_process_stack(struct process *process, struct program_load_desc *pn,
int argc, char **argv,
int envc, char **env)
@ -1000,18 +648,14 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
unsigned long end = process->vm->region.user_end;
unsigned long start = end - size;
int rc;
unsigned long vrflag;
if(stack == NULL)
return -ENOMEM;
memset(stack, 0, size);
vrflag = VR_STACK;
vrflag |= VR_PROT_READ | VR_PROT_WRITE | VR_PROT_EXEC;
vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag);
if ((rc = add_process_memory_range(process, start, end, virt_to_phys(stack),
vrflag, NULL, 0)) != 0) {
VR_STACK|VR_PROT_READ|VR_PROT_WRITE)) != 0) {
ihk_mc_free_pages(stack, USER_STACK_NR_PAGES);
return rc;
}
@ -1139,7 +783,7 @@ unsigned long extend_process_region(struct process *proc,
}
}
if((rc = add_process_memory_range(proc, aligned_end, aligned_new_end,
(p==0?0:virt_to_phys(p)), flag, NULL, 0)) != 0){
(p==0?0:virt_to_phys(p)), flag)) != 0){
free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT);
return end;
}
@ -1164,24 +808,6 @@ int remove_process_region(struct process *proc,
return 0;
}
void flush_process_memory(struct process *proc)
{
struct process_vm *vm = proc->vm;
struct vm_range *range;
kprintf("flush_process_memory(%p)\n", proc);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
list_for_each_entry(range, &vm->vm_range_list, list) {
if (range->memobj != NULL) {
memobj_release(range->memobj);
range->memobj = NULL;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
kprintf("flush_process_memory(%p):\n", proc);
return;
}
void free_process_memory(struct process *proc)
{
struct vm_range *range, *next;

View File

@ -119,36 +119,10 @@ int do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx)
ihk_mc_get_processor_id(),
req->number);
#define STATUS_IN_PROGRESS 0
#define STATUS_COMPLETED 1
#define STATUS_PAGE_FAULT 3
while (res->status != STATUS_COMPLETED) {
while (res->status == STATUS_IN_PROGRESS) {
cpu_pause();
}
if (res->status == STATUS_PAGE_FAULT) {
volatile struct syscall_request *req = cpu_local_var(scp).request_va;
int error;
uint8_t u8;
/* do page fault */
u8 = *(volatile uint8_t *)res->fault_address; // XXX:
if (res->fault_reason) {
*(uint8_t *)res->fault_address = u8; // XXX:
}
error = 0;
/* send result */
req->number = __NR_mmap;
req->args[0] = 0x101;
req->args[1] = error;
res->status = STATUS_IN_PROGRESS;
req->valid = 1;
}
while (!res->status) {
cpu_pause();
}
dkprintf("SC(%d)[%3d] got host reply: %d \n",
ihk_mc_get_processor_id(),
req->number, res->ret);
@ -188,7 +162,6 @@ terminate(int rc, int sig, ihk_mc_user_context_t *ctx)
/* XXX: send SIGKILL to all threads in this process */
flush_process_memory(proc); /* temporary hack */
do_syscall(&request, ctx);
#define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */
@ -336,9 +309,6 @@ SYSCALL_DECLARE(mmap)
void *p;
int vrflags;
intptr_t phys;
struct memobj *memobj;
int maxprot;
int denied;
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n",
ihk_mc_get_processor_id(),
@ -418,7 +388,6 @@ SYSCALL_DECLARE(mmap)
/* do the map */
vrflags = VR_NONE;
vrflags |= PROT_TO_VR_FLAG(prot);
vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0;
if (flags & MAP_ANONYMOUS) {
if (0) {
/* dummy */
@ -432,28 +401,11 @@ SYSCALL_DECLARE(mmap)
else if ((len == 64*1024*1024) || (len == 128*1024*1024)) {
vrflags |= VR_DEMAND_PAGING;
}
#if 1
vrflags |= VR_DEMAND_PAGING;
#endif
}
else {
/* mapped file */
vrflags |= VR_DEMAND_PAGING;
}
p = NULL;
phys = 0;
memobj = NULL;
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
if (!(flags & MAP_ANONYMOUS)) {
error = memobj_create(fd, flags, prot, &memobj, &maxprot);
if (error) {
ekprintf("sys_mmap:memobj_create failed. %d\n", error);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
goto out;
}
}
else if (!(vrflags & VR_DEMAND_PAGING)
if (!(vrflags & VR_DEMAND_PAGING)
&& ((vrflags & VR_PROT_MASK) != VR_PROT_NONE)) {
npages = len >> PAGE_SHIFT;
p2align = PAGE_P2ALIGN;
@ -474,22 +426,7 @@ SYSCALL_DECLARE(mmap)
phys = virt_to_phys(p);
}
if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) {
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
}
denied = prot & ~maxprot;
if (denied) {
ekprintf("sys_mmap:denied %x. %x %x\n", denied, prot, maxprot);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
if (p != NULL) {
ihk_mc_free_pages(p, npages);
}
error = -EACCES;
goto out;
}
vrflags |= VRFLAG_PROT_TO_MAXPROT(PROT_TO_VR_FLAG(maxprot));
error = add_process_memory_range(proc, addr, addr+len, phys, vrflags, memobj, off);
error = add_process_memory_range(proc, addr, addr+len, phys, vrflags);
if (error) {
ekprintf("sys_mmap:add_process_memory_range"
"(%p,%lx,%lx,%lx,%lx) failed %d\n",
@ -503,6 +440,32 @@ SYSCALL_DECLARE(mmap)
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
/* read page with pread64() */
if (!(flags & MAP_ANONYMOUS)) {
ihk_mc_user_context_t ctx2;
ssize_t ss;
ihk_mc_syscall_arg0(&ctx2) = fd;
ihk_mc_syscall_arg1(&ctx2) = addr;
ihk_mc_syscall_arg2(&ctx2) = len;
ihk_mc_syscall_arg3(&ctx2) = off;
ss = syscall_generic_forwarding(__NR_pread64, &ctx2);
if (ss < 0) {
ekprintf("sys_mmap:pread(%d,%lx,%lx,%lx) failed %ld\n",
fd, addr, len, off, (long)ss);
error = do_munmap((void *)addr, len);
if (error) {
ekprintf("sys_mmap:do_munmap(%lx,%lx) failed. %d\n",
addr, len, error);
/* through */
}
error = ss;
goto out;
}
}
error = 0;
out:
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n",
@ -544,7 +507,6 @@ SYSCALL_DECLARE(mprotect)
int error;
struct vm_range *changed;
const unsigned long protflags = PROT_TO_VR_FLAG(prot);
unsigned long denied;
dkprintf("[%d]sys_mprotect(%lx,%lx,%x)\n",
ihk_mc_get_processor_id(), start, len0, prot);
@ -596,14 +558,6 @@ SYSCALL_DECLARE(mprotect)
error = -EINVAL;
goto out;
}
denied = protflags & ~VRFLAG_MAXPROT_TO_PROT(range->flag);
if (denied) {
ekprintf("sys_mprotect(%lx,%lx,%x):denied %lx. %lx %lx\n",
start, len0, prot, denied, protflags, range->flag);
error = -EACCES;
goto out;
}
}
/* do the mprotect */
@ -675,7 +629,6 @@ SYSCALL_DECLARE(brk)
unsigned long address = ihk_mc_syscall_arg0(ctx);
struct vm_regions *region = &cpu_local_var(current)->vm->region;
unsigned long r;
unsigned long vrflag;
dkprintf("SC(%d)[sys_brk] brk_start=%lx,end=%lx\n",
ihk_mc_get_processor_id(), region->brk_start, region->brk_end);
@ -693,8 +646,6 @@ SYSCALL_DECLARE(brk)
}
/* try to extend memory region */
vrflag = VR_PROT_READ | VR_PROT_WRITE;
vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag);
ihk_mc_spinlock_lock_noirq(&cpu_local_var(current)->vm->memory_range_lock);
region->brk_end = extend_process_region(cpu_local_var(current),
region->brk_start, region->brk_end, address,

View File

@ -1,7 +1,6 @@
#ifndef __HEADER_GENERIC_IHK_MM_H
#define __HEADER_GENERIC_IHK_MM_H
#include <ihk/types.h>
#include <memory.h>
enum ihk_mc_gma_type {
@ -59,7 +58,7 @@ struct ihk_mc_pa_ops {
};
void ihk_mc_set_page_allocator(struct ihk_mc_pa_ops *);
void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, unsigned long, void *));
void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, void *, unsigned long));
unsigned long ihk_mc_map_memory(void *os, unsigned long phys,
unsigned long size);
@ -101,13 +100,6 @@ int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end,
enum ihk_mc_pt_attribute setattr);
int ihk_mc_pt_alloc_range(page_table_t pt, void *start, void *end,
enum ihk_mc_pt_attribute attr);
int ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, pte_t **ptepp, void **pgbasep, size_t *pgsizep);
int ihk_mc_pt_choose_pagesize(page_table_t pt, void *start, void *end,
void *fault_addr, size_t maxpgsize, void **pgbasep,
size_t *pgsizep, int *p2alignp);
int ihk_mc_pt_set_range(page_table_t pt, void *start, void *end,
uintptr_t phys, enum ihk_mc_pt_attribute attr);
int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, uintptr_t phys, size_t pgsize, enum ihk_mc_pt_attribute attr);
int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size,
enum ihk_mc_pt_prepare_flag);