840 lines
22 KiB
C
840 lines
22 KiB
C
/*
|
|
* \file pager.c
|
|
* License details are found in the file LICENSE.
|
|
* \brief
|
|
* paging system
|
|
* \author Yutaka Ishikawa <ishikawa@riken.jp>
|
|
*/
|
|
/*
|
|
* HISTORY:
|
|
*/
|
|
#include <types.h>
|
|
#include <kmsg.h>
|
|
#include <ihk/cpu.h>
|
|
#include <cpulocal.h>
|
|
#include <ihk/mm.h>
|
|
#include <ihk/debug.h>
|
|
#include <ihk/ikc.h>
|
|
#include <errno.h>
|
|
#include <cls.h>
|
|
#include <syscall.h>
|
|
#include <kmalloc.h>
|
|
#include <process.h>
|
|
#include <swapfmt.h>
|
|
|
|
#define O_RDONLY 00000000
|
|
#define O_WRONLY 00000001
|
|
#define O_RDWR 00000002
|
|
#define O_CREAT 00000100
|
|
#define O_TRUNC 00001000
|
|
#define SEEK_SET 0 /* from include/uapi/linux/fs.h in Linux */
|
|
#define SEEK_CUR 1 /* from include/uapi/linux/fs.h in Linux */
|
|
#define IS_TEXT(start, region) ((start) == (region)->text_start)
|
|
#define IS_DATA(start, region) ((start) == (region)->data_start)
|
|
#define IS_STACK(start, region) ((start) == (region)->stack_start)
|
|
#define IS_INVALID_USERADDRESS(addr, region) \
|
|
((((unsigned long) addr) < region->user_start) \
|
|
|| ((unsigned long) addr) >= region->user_end)
|
|
#define IS_INVALID_LENGTH(len, region) \
|
|
((len) > (region->user_end - region->user_start))
|
|
#define IS_READONLY(flag) (((flag)&VR_PROT_WRITE) == 0)
|
|
#define IS_NOTUSER(flag) (((flag)&VR_AP_USER) == 0)
|
|
|
|
|
|
//#define DEBUG_PRINT_PROCESS
|
|
|
|
#ifdef DEBUG_PRINT_PROCESS
|
|
#define dkprintf(...) kprintf(__VA_ARGS__)
|
|
#define ekprintf(...) kprintf(__VA_ARGS__)
|
|
#else
|
|
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
|
#define ekprintf(...) kprintf(__VA_ARGS__)
|
|
#endif
|
|
|
|
/*
|
|
* Contiguous pages are represented by the "addrpair" structure.
|
|
* - The swap_area, whose type is "struct arealist", keeps swappable pages
|
|
* using "areaent" structures that keeps a list of the "addrpair" structures.
|
|
* - The mlock_area is also the "struct arealist" struct, keeping pages locked byt
|
|
* both McKernel and Linux.
|
|
* - The mlock_container is also the "struct arealist" type, keeping pages loc
|
|
*/
|
|
/*
|
|
* The page areas are independently managed by McKernel and Linux.
|
|
* Pages locked by Linuxkernel are not known by McKernel. To get the information,
|
|
* the mlockcntnr structure is used.
|
|
* The mlockcntnr keeps the list of
|
|
*/
|
|
#define MLOCKADDRS_SIZE 128
|
|
struct addrpair {
|
|
unsigned long start;
|
|
unsigned long end;
|
|
unsigned long flag;
|
|
};
|
|
struct areaent {
|
|
struct areaent *next;
|
|
int count;
|
|
struct addrpair pair[MLOCKADDRS_SIZE];
|
|
};
|
|
|
|
struct arealist {
|
|
struct areaent *head;
|
|
struct areaent *tail;
|
|
int count;
|
|
};
|
|
|
|
struct mlockcntnr {
|
|
struct areaent *from;
|
|
int ccount;
|
|
struct areaent *cur;
|
|
};
|
|
|
|
struct swapinfo {
|
|
struct swap_header *swphdr;
|
|
struct swap_areainfo *swap_info, *mlock_info;
|
|
|
|
struct arealist swap_area;
|
|
struct arealist mlock_area;
|
|
struct mlockcntnr mlock_container;
|
|
#define UDATA_BUFSIZE (8*1024)
|
|
char *swapfname;
|
|
char *udata_buf; /* To read-store data from Linux to user space */
|
|
|
|
void *user_buf;
|
|
size_t ubuf_size, ubuf_alloced;
|
|
};
|
|
|
|
static void
|
|
area_print(struct vm_regions *region)
|
|
{
|
|
dkprintf("text %016lx:%016lx\n", region->text_start, region->text_end);
|
|
dkprintf("data %016lx:%016lx\n", region->data_start, region->data_end);
|
|
dkprintf("brk %016lx:%016lx\n", region->brk_start, region->brk_end);
|
|
dkprintf("map %016lx:%016lx\n", region->map_start, region->map_end);
|
|
dkprintf("stack %016lx:%016lx\n", region->stack_start, region->stack_end);
|
|
dkprintf("user %016lx:%016lx\n", region->user_start, region->user_end);
|
|
}
|
|
|
|
|
|
static int
|
|
myalloc_init(struct swapinfo *si, void *p, size_t sz)
|
|
{
|
|
extern SYSCALL_DECLARE(mlock);
|
|
ihk_mc_user_context_t ctx0;
|
|
int cc;
|
|
|
|
/* pin the buffer down in McKernel side */
|
|
ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) p;
|
|
ihk_mc_syscall_arg1(&ctx0) = sz;
|
|
cc = sys_mlock(__NR_mlock, &ctx0);
|
|
if (cc < 0) return cc;
|
|
/* init */
|
|
si->user_buf = p;
|
|
si->ubuf_size = sz;
|
|
si->ubuf_alloced = 0;
|
|
dkprintf("myalloc_init: buffer(%p) size(0x%lx)\n", si->user_buf, si->ubuf_size);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
myalloc_finalize(struct swapinfo *si)
|
|
{
|
|
extern SYSCALL_DECLARE(munlock);
|
|
ihk_mc_user_context_t ctx0;
|
|
|
|
/* unpindown in McKernel side */
|
|
ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) si->user_buf;
|
|
ihk_mc_syscall_arg1(&ctx0) = si->ubuf_size;
|
|
sys_munlock(__NR_munlock, &ctx0);
|
|
}
|
|
|
|
void *
|
|
myalloc(struct swapinfo *si, size_t sz)
|
|
{
|
|
void *p = NULL;
|
|
|
|
if ((si->ubuf_alloced + sz) < si->ubuf_size) {
|
|
p = (void*) &((char*)si->user_buf)[si->ubuf_alloced];
|
|
si->ubuf_alloced += sz;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
void
|
|
myfree()
|
|
{
|
|
/* nothing so far */
|
|
}
|
|
|
|
static int
|
|
linux_open(char *fname, int flag, int mode)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
int fd;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) fname;
|
|
ihk_mc_syscall_arg1(&ctx0) = flag;
|
|
ihk_mc_syscall_arg2(&ctx0) = mode;
|
|
fd = syscall_generic_forwarding(__NR_open, &ctx0);
|
|
return fd;
|
|
}
|
|
|
|
static int
|
|
linux_unlink(char *fname)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) fname;
|
|
return syscall_generic_forwarding(__NR_unlink, &ctx0);
|
|
}
|
|
|
|
static ssize_t
|
|
linux_read(int fd, void *buf, size_t count)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
ssize_t sz;
|
|
size_t count0 = count;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = fd;
|
|
sz = 0;
|
|
for (;;) {
|
|
ssize_t sz0;
|
|
|
|
ihk_mc_syscall_arg1(&ctx0) = (uintptr_t) buf;
|
|
ihk_mc_syscall_arg2(&ctx0) = count;
|
|
sz0 = syscall_generic_forwarding(__NR_read, &ctx0);
|
|
if (sz0 == -EINTR)
|
|
continue;
|
|
if (sz0 <= 0) {
|
|
if (sz == 0)
|
|
sz = sz0;
|
|
break;
|
|
}
|
|
sz += sz0;
|
|
if (sz == count0)
|
|
break;
|
|
count -= sz0;
|
|
buf = (char *)buf + sz0;
|
|
}
|
|
return sz;
|
|
}
|
|
|
|
static ssize_t
|
|
linux_write(int fd, void *buf, size_t count)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
ssize_t sz;
|
|
size_t count0 = count;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = fd;
|
|
sz = 0;
|
|
for (;;) {
|
|
ssize_t sz0;
|
|
|
|
ihk_mc_syscall_arg1(&ctx0) = (uintptr_t) buf;
|
|
ihk_mc_syscall_arg2(&ctx0) = count;
|
|
sz0 = syscall_generic_forwarding(__NR_write, &ctx0);
|
|
if (sz0 == -EINTR)
|
|
continue;
|
|
if (sz0 <= 0) {
|
|
if (sz == 0)
|
|
sz = sz0;
|
|
break;
|
|
}
|
|
sz += sz0;
|
|
if (sz == count0)
|
|
break;
|
|
count -= sz0;
|
|
buf = (char *)buf + sz0;
|
|
}
|
|
return sz;
|
|
}
|
|
|
|
static off_t
|
|
linux_lseek(int fd, off_t off, int whence)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
int cc;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = fd;
|
|
ihk_mc_syscall_arg1(&ctx0) = off;
|
|
ihk_mc_syscall_arg2(&ctx0) = whence;
|
|
cc = syscall_generic_forwarding(__NR_lseek, &ctx0);
|
|
return cc;
|
|
}
|
|
|
|
static int
|
|
linux_close(int fd)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
int cc;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = fd;
|
|
cc = syscall_generic_forwarding(__NR_close, &ctx0);
|
|
return cc;
|
|
}
|
|
|
|
/*
|
|
* The munmap syscall from McKernel is handled by mccntrl module.
|
|
* An extra argument, flag, is to set new remote page table if not zero.
|
|
*/
|
|
static int
|
|
linux_munmap(void *addr, size_t len, int flag)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
int cc;
|
|
|
|
ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) addr;
|
|
ihk_mc_syscall_arg1(&ctx0) = len;
|
|
ihk_mc_syscall_arg2(&ctx0) = flag;
|
|
cc = syscall_generic_forwarding(__NR_munmap, &ctx0);
|
|
return cc;
|
|
}
|
|
|
|
static int
|
|
pager_open(struct swapinfo *si, char *fname, int flag, int mode)
|
|
{
|
|
int fd;
|
|
strcpy(si->udata_buf, fname);
|
|
fd = linux_open(si->udata_buf, flag, mode);
|
|
return fd;
|
|
}
|
|
|
|
static int
|
|
pager_unlink(struct swapinfo *si, char *fname)
|
|
{
|
|
strcpy(si->udata_buf, fname);
|
|
return linux_unlink(si->udata_buf);
|
|
}
|
|
|
|
static ssize_t
|
|
pager_read(struct swapinfo *si, int fd, void *start, size_t size)
|
|
{
|
|
ssize_t off, sz, rs;
|
|
|
|
kprintf("pager_read: %lx (%lx)\n", start, size);
|
|
for (off = 0; off < size; off += sz) {
|
|
sz = size - off;
|
|
sz = (sz > UDATA_BUFSIZE) ? UDATA_BUFSIZE : sz;
|
|
rs = linux_read(fd, si->udata_buf, sz);
|
|
if (rs != sz) return rs;
|
|
copy_to_user(start + off, si->udata_buf, sz);
|
|
}
|
|
return off;
|
|
}
|
|
|
|
static ssize_t
|
|
pager_write(int fd, void *start, size_t size)
|
|
{
|
|
ssize_t sz;
|
|
|
|
sz = linux_write(fd, start, size);
|
|
return sz;
|
|
}
|
|
|
|
static int
|
|
mlocklist_req(unsigned long start, unsigned long end, struct addrpair *addr, int nent)
|
|
{
|
|
ihk_mc_user_context_t ctx0;
|
|
int cc;
|
|
|
|
#define PAGER_REQ_MLOCK_LIST 0x0008
|
|
ihk_mc_syscall_arg0(&ctx0) = PAGER_REQ_MLOCK_LIST;
|
|
ihk_mc_syscall_arg1(&ctx0) = start;
|
|
ihk_mc_syscall_arg2(&ctx0) = end;
|
|
ihk_mc_syscall_arg3(&ctx0) = (unsigned long) addr;
|
|
ihk_mc_syscall_arg4(&ctx0) = nent;
|
|
cc = syscall_generic_forwarding(__NR_mmap, &ctx0);
|
|
return cc;
|
|
}
|
|
|
|
/*
|
|
* If the last entry of addrpair is -1, more paged locked by Linux exist.
|
|
*/
|
|
static int
|
|
mlocklist_morereq(struct swapinfo *si, unsigned long *start)
|
|
{
|
|
struct areaent *ent = si->mlock_area.tail;
|
|
|
|
dkprintf("mlocklist_morereq: start = %ld and = %ld\n",
|
|
ent->pair[ent->count].start, ent->pair[ent->count].end);
|
|
if (ent->pair[ent->count].start != (unsigned long) -1) {
|
|
return 0;
|
|
}
|
|
*start = ent->pair[ent->count].end;
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
arealist_alloc(struct swapinfo *si, struct arealist *areap)
|
|
{
|
|
areap->head = areap->tail = myalloc(si, sizeof(struct areaent));
|
|
if (areap->head == NULL) return -ENOMEM;
|
|
memset(areap->head, 0, sizeof(struct areaent));
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
arealist_init(struct swapinfo *si)
|
|
{
|
|
int cc;
|
|
|
|
if ((cc = arealist_alloc(si, &si->swap_area)) < 0) return cc;
|
|
cc = arealist_alloc(si, &si->mlock_area);
|
|
return cc;
|
|
}
|
|
|
|
|
|
static void
|
|
arealist_free(struct arealist *area)
|
|
{
|
|
struct areaent *tmp;
|
|
for (tmp = area->head; tmp != NULL; tmp = tmp->next) {
|
|
myfree(tmp);
|
|
}
|
|
memset(area, 0, sizeof(struct arealist));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* returns the start address of addrpair and its size
|
|
*/
|
|
static int
|
|
arealist_get(struct swapinfo *si, struct addrpair **pair, struct arealist *area)
|
|
{
|
|
struct areaent *tmp;
|
|
struct areaent *tail = area->tail;
|
|
if (tail->count < MLOCKADDRS_SIZE - 1) { /* at least two entries are needed */
|
|
if (pair) *pair = &tail->pair[tail->count];
|
|
return MLOCKADDRS_SIZE - tail->count;
|
|
}
|
|
tmp = myalloc(si, sizeof(struct areaent));
|
|
if (tmp == NULL) {
|
|
return -1;
|
|
}
|
|
memset(tmp, 0, sizeof(struct areaent));
|
|
area->tail->next = tmp;
|
|
area->tail = tmp;
|
|
if (pair) *pair = area->tail->pair;
|
|
return MLOCKADDRS_SIZE;
|
|
};
|
|
|
|
static void
|
|
arealist_update(int cnt, struct arealist *area)
|
|
{
|
|
area->tail->count += cnt;
|
|
area->count += cnt;
|
|
}
|
|
|
|
static int
|
|
arealist_add(struct swapinfo *si, unsigned long start, unsigned long end,
|
|
unsigned long flag, struct arealist *area)
|
|
{
|
|
int cc;
|
|
struct addrpair *addr;
|
|
|
|
cc = arealist_get(si, &addr, area);
|
|
if (cc < 0) return -1;
|
|
addr->start = start; addr->end = end; addr->flag = flag;
|
|
arealist_update(1, area);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
arealist_preparewrite(struct arealist *areap, struct swap_areainfo *info,
|
|
ssize_t off, struct process_vm *vm, int flag)
|
|
{
|
|
struct areaent *ent;
|
|
int count = 0;
|
|
ssize_t totsz = 0;
|
|
struct page_table *pt = vm->address_space->page_table;
|
|
|
|
for (ent = areap->head; ent != NULL; ent = ent->next) {
|
|
int i;
|
|
for (i = 0; i < ent->count; i++, count++) {
|
|
ssize_t sz = ent->pair[i].end - ent->pair[i].start;
|
|
info[count].start = ent->pair[i].start;
|
|
info[count].end = ent->pair[i].end;
|
|
info[count].flag = ent->pair[i].flag;
|
|
if (flag) { /* position in file */
|
|
info[count].pos = off + totsz;
|
|
} else { /* physical memory */
|
|
if (ihk_mc_pt_virt_to_phys(pt,
|
|
(void*) ent->pair[i].start,
|
|
&info[count].pos)) {
|
|
kprintf("Cannot get phys\n");
|
|
}
|
|
}
|
|
totsz += sz;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
static ssize_t
|
|
arealist_write(int fd, struct swap_areainfo *info, int count)
|
|
{
|
|
ssize_t sz;
|
|
|
|
sz = linux_write(fd, info, sizeof(struct swap_areainfo)*count);
|
|
if (sz != sizeof(struct swap_areainfo)*count) return -1;
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
arealist_print(char *msg, struct arealist *areap, int count)
|
|
{
|
|
struct areaent *ent;
|
|
kprintf("%s: %d\n", msg, count);
|
|
for (ent = areap->head; ent != NULL; ent = ent->next) {
|
|
int i;
|
|
for (i = 0; i < ent->count; i++) {
|
|
kprintf("\t%p -- %p\n",
|
|
(void*) ent->pair[i].start, (void*) ent->pair[i].end);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
*
|
|
*/
|
|
static int
|
|
mlockcntnr_sethead(struct swapinfo *si)
|
|
{
|
|
int cnt;
|
|
cnt = arealist_get(si, 0, &si->mlock_area); /* Adjust arealist */
|
|
if (cnt < 0) return -1;
|
|
si->mlock_container.from = si->mlock_container.cur = si->mlock_area.tail;
|
|
si->mlock_container.ccount = si->mlock_area.tail->count;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
mlockcntnr_isempty(struct swapinfo *si)
|
|
{
|
|
return si->mlock_container.from == si->mlock_area.tail
|
|
&& si->mlock_container.ccount == si->mlock_area.tail->count;
|
|
}
|
|
|
|
static int
|
|
mlockcntnr_addrent(struct swapinfo *si, struct addrpair *laddr)
|
|
{
|
|
if (si->mlock_container.ccount == si->mlock_container.cur->count) {
|
|
struct areaent *tmp = si->mlock_container.cur->next;
|
|
if (tmp == 0) return 0;
|
|
si->mlock_container.cur = tmp;
|
|
si->mlock_container.ccount = 1;
|
|
}
|
|
*laddr = si->mlock_container.cur->pair[si->mlock_container.ccount - 1];
|
|
si->mlock_container.ccount++;
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
print_area(char *label, unsigned long start, unsigned long sz,
|
|
struct vm_regions *region)
|
|
{
|
|
char *type;
|
|
|
|
if (start == region->text_start) {
|
|
type = "text";
|
|
} else if (start == region->data_start) {
|
|
type = "data";
|
|
} else if (start == region->brk_start) {
|
|
type = "brk";
|
|
} else if (start == region->stack_start) {
|
|
type = "stack";
|
|
} else if (start == region->user_start) {
|
|
type = "user";
|
|
} else if (start >= region->map_start
|
|
&& start <= region->stack_start) {
|
|
type = "map";
|
|
} else {
|
|
type = "other";
|
|
}
|
|
kprintf("%s: %s write(%p, %ld)\n", label, type, start, sz);
|
|
}
|
|
|
|
void
|
|
print_region(char *msg, struct process_vm *vm)
|
|
{
|
|
struct vm_range *range, *next;
|
|
|
|
kprintf("%s:\n", msg);
|
|
list_for_each_entry_safe(range, next, &vm->vm_range_list, list) {
|
|
if (range->memobj != NULL) continue;
|
|
kprintf("\t%016lx:%016lx (%lx)\n",
|
|
range->start, range->end, range->flag);
|
|
}
|
|
}
|
|
|
|
static void
|
|
debug_dump(char *msg, unsigned char *p)
|
|
{
|
|
kprintf("%s-> %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x"
|
|
":%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
|
|
msg, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
|
|
p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
|
|
}
|
|
|
|
int
|
|
do_pagein(int flag)
|
|
{
|
|
struct thread *thread = cpu_local_var(current);
|
|
struct process_vm *vm = thread->vm;
|
|
int fd, i;
|
|
ssize_t pos, sz, rs;
|
|
struct swapinfo *si = vm->swapinfo;
|
|
|
|
dkprintf("do_pagein: flag(%d) currss(%lx)\n", flag, vm->currss);
|
|
fd = pager_open(si, si->swapfname, O_RDONLY, 0);
|
|
pager_unlink(si, si->swapfname);
|
|
if (fd < 0) {
|
|
kprintf("do_pagein: Cannot open file: %s\n", si->swapfname);
|
|
return fd;
|
|
}
|
|
/*
|
|
* In the current implementaion, the following working areas remain
|
|
* in the physical memory area:
|
|
* swphdr, swap_info and mlock_info
|
|
*/
|
|
pos = sizeof(struct swap_header);
|
|
pos += sizeof(struct swap_areainfo)*si->swphdr->count_sarea;
|
|
pos += sizeof(struct swap_areainfo)*si->swphdr->count_marea;
|
|
rs = linux_lseek(fd, pos, SEEK_SET);
|
|
for (i = 0; i < si->swphdr->count_sarea; i++) {
|
|
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
|
|
sz = si->swap_info[i].end - si->swap_info[i].start;
|
|
dkprintf("pagein: %016lx:%016lx sz(%lx)\n", si->swap_info[i].start, si->swap_info[i].end, sz);
|
|
rs = pager_read(si, fd, (void*) si->swap_info[i].start, sz);
|
|
if (rs != sz) goto err;
|
|
// ihk_mc_pt_print_pte(vm->address_space->page_table, (void*) si->swap_info[i].start);
|
|
}
|
|
linux_close(fd);
|
|
print_region("after pagin", vm);
|
|
kprintf("do_pagein: done, currss(%lx)\n", vm->currss);
|
|
vm->swapinfo = NULL;
|
|
kfree(si->swapfname);
|
|
kfree(si);
|
|
return 0;
|
|
err:
|
|
linux_close(fd);
|
|
ekprintf("pagein: read error: return(%lx) size(%lx)\n", rs, sz);
|
|
vm->swapinfo = NULL;
|
|
kfree(si->swapfname);
|
|
kfree(si);
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
do_pageout(char *fname, void *buf, size_t size, int flag)
|
|
{
|
|
struct thread *thread = cpu_local_var(current);
|
|
struct process_vm *vm = thread->vm;
|
|
struct vm_regions *region = &vm->region;
|
|
struct vm_range *range, *next;
|
|
struct addrpair *addr;
|
|
int i, fd;
|
|
long cc;
|
|
unsigned long start, end;
|
|
ssize_t pos, sz;
|
|
struct swapinfo *si;
|
|
|
|
fd = -1;
|
|
dkprintf("do_pageout: buf(%p) size(%d) flag(%d) currss(%lx)\n",
|
|
buf, size, flag, vm->currss);
|
|
if (IS_INVALID_USERADDRESS(fname, region)
|
|
|| IS_INVALID_USERADDRESS(buf, region)
|
|
|| IS_INVALID_LENGTH(size, region)) {
|
|
return -EINVAL;
|
|
}
|
|
if (!(si = kmalloc(sizeof(struct swapinfo), IHK_MC_AP_NOWAIT))) {
|
|
ekprintf("do_pageout: Cannot allocate working memory in kmalloc\n");
|
|
return -ENOMEM;
|
|
}
|
|
memset(si, '\0', sizeof(struct swapinfo));
|
|
cc = myalloc_init(si, buf, size);
|
|
if (cc < 0) {
|
|
kfree(si);
|
|
ekprintf("do_pageout: Cannot pin buf (%p) down\n", buf);
|
|
return cc;
|
|
}
|
|
si->udata_buf = myalloc(si, UDATA_BUFSIZE);
|
|
si->swapfname = kmalloc(strlen(fname) + 1, IHK_MC_AP_NOWAIT);
|
|
if (si->swapfname == NULL) {
|
|
kfree(si);
|
|
ekprintf("do_pageout: Cannot allocate working memory in kmalloc\n");
|
|
return -ENOMEM;
|
|
}
|
|
if (strcpy_from_user(si->swapfname, fname)) {
|
|
cc = -EFAULT;
|
|
goto err;
|
|
}
|
|
cc = arealist_init(si);
|
|
if (cc < 0) {
|
|
ekprintf("do_pageout: user buffer area is needed more than %d byte\n",
|
|
UDATA_BUFSIZE + sizeof(struct areaent)*2);
|
|
goto err;
|
|
}
|
|
|
|
fd = linux_open(fname, O_RDWR|O_CREAT|O_TRUNC, 0600);
|
|
if (fd < 0) {
|
|
ekprintf("do_pageout: Cannot open/create file: %s\n", fname);
|
|
cc = fd;
|
|
goto err;
|
|
}
|
|
area_print(region);
|
|
|
|
/* looking at ranges except for non anoymous, text, and data */
|
|
list_for_each_entry_safe(range, next, &vm->vm_range_list, list) {
|
|
if (range->memobj != NULL) continue;
|
|
if (IS_TEXT(range->start, region)
|
|
|| IS_STACK(range->start, region)
|
|
|| IS_INVALID_USERADDRESS(range->start, region)
|
|
|| IS_READONLY(range->flag)
|
|
|| IS_NOTUSER(range->flag)) continue;
|
|
if (range->flag & VR_LOCKED) {
|
|
/* this range is locked by McKernel */
|
|
cc = arealist_add(si, range->start, range->end,
|
|
range->flag, &si->mlock_area);
|
|
if (cc < 0) goto nomem;
|
|
continue;
|
|
}
|
|
start = range->start; end = range->end;
|
|
if ((cc = mlockcntnr_sethead(si)) < 0) goto nomem;
|
|
/* Requesting mlock list in Linux Kernel. We do not know how much
|
|
* addrpair entries are needed. The Linux side stores -1 in
|
|
* the last entry of addrpair to inform more entries exist.
|
|
* the mlocklist_morereq function checks this condition. */
|
|
do {
|
|
if ((cc = arealist_get(si, &addr, &si->mlock_area)) < 0) goto nomem;
|
|
cc = mlocklist_req(start, end, addr, cc);
|
|
arealist_update(cc, &si->mlock_area);
|
|
} while (mlocklist_morereq(si, &start));
|
|
/* */
|
|
if (mlockcntnr_isempty(si)) { /* whole range is going to swap */
|
|
cc = arealist_add(si, range->start, range->end,
|
|
range->flag, &si->swap_area);
|
|
} else { /* partial range is going to swap */
|
|
for (start = range->start; start < range->end;) {
|
|
struct addrpair laddr;
|
|
if (mlockcntnr_addrent(si, &laddr) == 0) {
|
|
/* No more entry locked by Linux */
|
|
cc = arealist_add(si, start, range->end,
|
|
range->flag,
|
|
&si->swap_area);
|
|
if (cc < 0) goto nomem;
|
|
break;
|
|
}
|
|
if (start < laddr.start) {
|
|
/* swap range from start to laddr.start */
|
|
cc = arealist_add(si, start, laddr.start,
|
|
range->flag,
|
|
&si->swap_area);
|
|
if (cc < 0) goto nomem;
|
|
}
|
|
start = laddr.end;
|
|
kprintf("do_pageout: start(%ld) range->end(%ld)\n",
|
|
start, range->end);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
arealist_print("SWAP", &si->swap_area, si->swap_area.count);
|
|
arealist_print("MLOCK", &si->mlock_area, si->mlock_area.count);
|
|
si->swap_info = myalloc(si, sizeof(struct swap_areainfo)* si->swap_area.count);
|
|
si->mlock_info = myalloc(si, sizeof(struct swap_areainfo)* si->mlock_area.count);
|
|
if (si->swap_info == NULL || si->mlock_info == NULL) goto nomem;
|
|
|
|
/* preparing page store */
|
|
si->swphdr = myalloc(si, sizeof(struct swap_header));
|
|
strncpy(si->swphdr->magic, MCKERNEL_SWAP, SWAP_HLEN);
|
|
strncpy(si->swphdr->version, MCKERNEL_SWAP_VERSION, SWAP_HLEN);
|
|
si->swphdr->count_sarea = si->swap_area.count;
|
|
si->swphdr->count_marea = si->mlock_area.count;
|
|
if ((cc = pager_write(fd, si->swphdr, sizeof(struct swap_header)))
|
|
!= sizeof(struct swap_header)) {
|
|
if (cc >= 0)
|
|
cc = -EIO;
|
|
goto err;
|
|
}
|
|
pos = linux_lseek(fd, 0, SEEK_CUR);
|
|
pos += sizeof(struct swap_areainfo)*(si->swap_area.count+si->mlock_area.count);
|
|
cc = arealist_preparewrite(&si->swap_area, si->swap_info, pos, vm, 1);
|
|
if (cc != si->swap_area.count) {
|
|
ekprintf("do_pageout: ERROR file ent(%d) != list ent(%d) in swap_area\n",
|
|
cc, si->swap_area.count);
|
|
}
|
|
cc = arealist_preparewrite(&si->mlock_area, si->mlock_info, 0, vm, 0);
|
|
if (cc != si->mlock_area.count) {
|
|
ekprintf("do_pageout: ERROR file ent(%d) != list ent(%d) in swap_area\n",
|
|
cc, si->mlock_area.count);
|
|
}
|
|
/* arealists are stored */
|
|
if ((cc = arealist_write(fd, si->swap_info, si->swap_area.count)) < 0) goto err;
|
|
if ((cc = arealist_write(fd, si->mlock_info, si->mlock_area.count)) < 0) goto err;
|
|
/* now pages are stored */
|
|
for (i = 0; i < si->swap_area.count; i++) {
|
|
sz = si->swap_info[i].end - si->swap_info[i].start;
|
|
if ((cc = pager_write(fd, (void*) si->swap_info[i].start, sz)) != sz) {
|
|
if (cc >= 0)
|
|
cc = -EIO;
|
|
goto err;
|
|
}
|
|
}
|
|
if (flag & 0x04) {
|
|
kprintf("skipping physical memory removal\n");
|
|
goto free_exit;
|
|
}
|
|
kprintf("removing physical memory\n");
|
|
for (i = 0; i < si->swap_area.count; i++) {
|
|
cc = ihk_mc_pt_free_range(vm->address_space->page_table,
|
|
vm,
|
|
(void*) si->swap_info[i].start,
|
|
(void*) si->swap_info[i].end, NULL);
|
|
if (cc < 0) {
|
|
kprintf("ihk_mc_pt_clear_range returns: %d\n", cc);
|
|
}
|
|
}
|
|
#if 0
|
|
range->flag |= VR_PAGEOUT;
|
|
#endif
|
|
cc = linux_close(fd);
|
|
fd = -1;
|
|
/*
|
|
* Unmapping McKernel's user virtual spaces in Linux side.
|
|
* From here to the completion of do_pagein, the nonlocking user spaces
|
|
* except TEXT, STACK, readonly pages, are not invalid.
|
|
*/
|
|
for (i = 0; i < si->swap_area.count; i++) {
|
|
sz = si->swap_info[i].end - si->swap_info[i].start;
|
|
cc = linux_munmap((void*) si->swap_info[i].start, sz, 0);
|
|
if (cc < 0) {
|
|
kprintf("do_pageout: Cannot munmap: %lx len(%lx)\n",
|
|
si->swap_info[i].start, sz);
|
|
}
|
|
}
|
|
cc = 0;
|
|
goto free_exit;
|
|
err:
|
|
ekprintf("do_pageout: write error: %d\n", cc);
|
|
goto free_exit;
|
|
nomem:
|
|
ekprintf("do_pageout: cannot allocate working memory\n");
|
|
cc = -ENOMEM;
|
|
free_exit:
|
|
if (fd >= 0)
|
|
linux_close(fd);
|
|
dkprintf("do_pageout: done, currss(%lx)\n", vm->currss);
|
|
arealist_free(&si->mlock_area); arealist_free(&si->swap_area);
|
|
if (cc != 0) {
|
|
pager_unlink(si, si->swapfname);
|
|
kfree(si->swapfname);
|
|
kfree(si);
|
|
}
|
|
else {
|
|
vm->swapinfo = si;
|
|
}
|
|
return cc;
|
|
}
|