2320 lines
52 KiB
C
2320 lines
52 KiB
C
/* xpmem.c COPYRIGHT FUJITSU LIMITED 2017 */
|
|
/**
|
|
* \file xpmem.c
|
|
* License details are found in the file LICENSE.
|
|
* \brief
|
|
* Cross Partition Memory (XPMEM) support.
|
|
* \author Yoichi Umezawa <yoichi.umezawa.qh@hitachi.com> \par
|
|
* Copyright (C) 2016 Yoichi Umezawa
|
|
*
|
|
* Original Copyright follows:
|
|
*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved.
|
|
* Copyright 2010, 2014 Cray Inc. All Rights Reserved
|
|
* Copyright 2015-2016 Los Alamos National Security, LLC. All rights reserved.
|
|
*/
|
|
/*
|
|
* HISTORY
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <kmalloc.h>
|
|
#include <limits.h>
|
|
#include <memobj.h>
|
|
#include <mman.h>
|
|
#include <page.h>
|
|
#include <string.h>
|
|
#include <types.h>
|
|
#include <vsprintf.h>
|
|
#include <ihk/lock.h>
|
|
#include <ihk/mm.h>
|
|
#include <xpmem_private.h>
|
|
|
|
struct xpmem_partition *xpmem_my_part = NULL; /* pointer to this partition */
|
|
|
|
|
|
#if defined(POSTK_DEBUG_ARCH_DEP_46) || defined(POSTK_DEBUG_ARCH_DEP_62)
|
|
int xpmem_open(int num, const char *pathname,
|
|
int flags, ihk_mc_user_context_t *ctx)
|
|
{
|
|
int ret;
|
|
struct thread *thread = cpu_local_var(current);
|
|
struct process *proc = thread->proc;
|
|
int fd;
|
|
struct mckfd *mckfd;
|
|
long irqstate;
|
|
|
|
XPMEM_DEBUG("call: syscall_num=%d, pathname=%s, flags=%d", num, pathname, flags);
|
|
#else /* POSTK_DEBUG_ARCH_DEP_46 || POSTK_DEBUG_ARCH_DEP_62 */
|
|
int xpmem_open(
|
|
ihk_mc_user_context_t *ctx)
|
|
{
|
|
const char *pathname = (const char *)ihk_mc_syscall_arg0(ctx);
|
|
int flags = (int)ihk_mc_syscall_arg1(ctx);
|
|
int ret;
|
|
struct thread *thread = cpu_local_var(current);
|
|
struct process *proc = thread->proc;
|
|
struct syscall_request request IHK_DMA_ALIGN;
|
|
int fd;
|
|
struct mckfd *mckfd;
|
|
long irqstate;
|
|
|
|
XPMEM_DEBUG("call: pathname=%s, flags=%d", pathname, flags);
|
|
#endif /* POSTK_DEBUG_ARCH_DEP_46 || POSTK_DEBUG_ARCH_DEP_62 */
|
|
|
|
if (!xpmem_my_part) {
|
|
ret = xpmem_init();
|
|
if (ret) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */
|
|
fd = syscall_generic_forwarding(num, ctx);
|
|
if(fd < 0){
|
|
XPMEM_DEBUG("syscall_num=%d error: fd=%d", num, fd);
|
|
return fd;
|
|
}
|
|
#else /* POSTK_DEBUG_ARCH_DEP_62 */
|
|
request.number = __NR_open;
|
|
request.args[0] = (unsigned long)pathname;
|
|
request.args[1] = flags;
|
|
fd = do_syscall(&request, ihk_mc_get_processor_id(), 0);
|
|
if (fd < 0) {
|
|
XPMEM_DEBUG("__NR_open error: fd=%d", fd);
|
|
return fd;
|
|
}
|
|
#endif /* POSTK_DEBUG_ARCH_DEP_62 */
|
|
|
|
ret = __xpmem_open();
|
|
if (ret) {
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
return ret;
|
|
}
|
|
|
|
mckfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT);
|
|
if(!mckfd) {
|
|
return -ENOMEM;
|
|
}
|
|
XPMEM_DEBUG("kmalloc(): mckfd=0x%p", mckfd);
|
|
memset(mckfd, 0, sizeof(struct mckfd));
|
|
mckfd->fd = fd;
|
|
mckfd->sig_no = -1;
|
|
mckfd->ioctl_cb = xpmem_ioctl;
|
|
mckfd->close_cb = xpmem_close;
|
|
mckfd->dup_cb = xpmem_dup;
|
|
mckfd->data = (long)proc;
|
|
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
|
|
|
if (proc->mckfd == NULL) {
|
|
proc->mckfd = mckfd;
|
|
mckfd->next = NULL;
|
|
}
|
|
else {
|
|
mckfd->next = proc->mckfd;
|
|
proc->mckfd = mckfd;
|
|
}
|
|
|
|
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
|
|
|
ihk_atomic_inc_return(&xpmem_my_part->n_opened);
|
|
XPMEM_DEBUG("n_opened=%d", xpmem_my_part->n_opened);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", mckfd->fd);
|
|
|
|
return mckfd->fd;
|
|
}
|
|
|
|
|
|
static int xpmem_ioctl(
|
|
struct mckfd *mckfd,
|
|
ihk_mc_user_context_t *ctx)
|
|
{
|
|
int ret;
|
|
unsigned int cmd = ihk_mc_syscall_arg1(ctx);
|
|
unsigned long arg = ihk_mc_syscall_arg2(ctx);
|
|
|
|
XPMEM_DEBUG("call: cmd=0x%x, arg=0x%lx", cmd, arg);
|
|
|
|
switch (cmd) {
|
|
case XPMEM_CMD_VERSION: {
|
|
ret = XPMEM_CURRENT_VERSION;
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=0x%lx", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
case XPMEM_CMD_MAKE: {
|
|
struct xpmem_cmd_make make_info;
|
|
xpmem_segid_t segid = 0;
|
|
|
|
if (copy_from_user(&make_info, (void __user *)arg,
|
|
sizeof(struct xpmem_cmd_make)))
|
|
return -EFAULT;
|
|
|
|
ret = xpmem_make(make_info.vaddr, make_info.size,
|
|
make_info.permit_type,
|
|
(void *)make_info.permit_value, &segid);
|
|
if (ret != 0) {
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
return ret;
|
|
}
|
|
|
|
if (copy_to_user(&((struct xpmem_cmd_make __user *)arg)->segid,
|
|
(void *)&segid, sizeof(xpmem_segid_t))) {
|
|
(void)xpmem_remove(segid);
|
|
return -EFAULT;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
case XPMEM_CMD_REMOVE: {
|
|
struct xpmem_cmd_remove remove_info;
|
|
|
|
if (copy_from_user(&remove_info, (void __user *)arg,
|
|
sizeof(struct xpmem_cmd_remove)))
|
|
return -EFAULT;
|
|
|
|
ret = xpmem_remove(remove_info.segid);
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
case XPMEM_CMD_GET: {
|
|
struct xpmem_cmd_get get_info;
|
|
xpmem_apid_t apid = 0;
|
|
|
|
if (copy_from_user(&get_info, (void __user *)arg,
|
|
sizeof(struct xpmem_cmd_get)))
|
|
return -EFAULT;
|
|
|
|
ret = xpmem_get(get_info.segid, get_info.flags,
|
|
get_info.permit_type,
|
|
(void *)get_info.permit_value, &apid);
|
|
if (ret != 0) {
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
return ret;
|
|
}
|
|
|
|
if (copy_to_user(&((struct xpmem_cmd_get __user *)arg)->apid,
|
|
(void *)&apid, sizeof(xpmem_apid_t))) {
|
|
(void)xpmem_release(apid);
|
|
return -EFAULT;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
case XPMEM_CMD_RELEASE: {
|
|
struct xpmem_cmd_release release_info;
|
|
|
|
if (copy_from_user(&release_info, (void __user *)arg,
|
|
sizeof(struct xpmem_cmd_release)))
|
|
return -EFAULT;
|
|
|
|
ret = xpmem_release(release_info.apid);
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
case XPMEM_CMD_ATTACH: {
|
|
struct xpmem_cmd_attach attach_info;
|
|
unsigned long at_vaddr = 0;
|
|
|
|
if (copy_from_user(&attach_info, (void __user *)arg,
|
|
sizeof(struct xpmem_cmd_attach)))
|
|
return -EFAULT;
|
|
|
|
ret = xpmem_attach(mckfd, attach_info.apid, attach_info.offset,
|
|
attach_info.size, attach_info.vaddr,
|
|
attach_info.fd, attach_info.flags,
|
|
&at_vaddr);
|
|
if (ret != 0) {
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
return ret;
|
|
}
|
|
|
|
if (copy_to_user(
|
|
&((struct xpmem_cmd_attach __user *)arg)->vaddr,
|
|
(void *)&at_vaddr, sizeof(unsigned long))) {
|
|
(void)xpmem_detach(at_vaddr);
|
|
return -EFAULT;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
case XPMEM_CMD_DETACH: {
|
|
struct xpmem_cmd_detach detach_info;
|
|
|
|
if (copy_from_user(&detach_info, (void __user *)arg,
|
|
sizeof(struct xpmem_cmd_detach)))
|
|
return -EFAULT;
|
|
|
|
ret = xpmem_detach(detach_info.vaddr);
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret);
|
|
|
|
return ret;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, -EINVAL);
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int xpmem_close(
|
|
struct mckfd *mckfd,
|
|
ihk_mc_user_context_t *ctx)
|
|
{
|
|
int n_opened;
|
|
|
|
XPMEM_DEBUG("call: fd=%d, pid=%d, rgid=%d",
|
|
mckfd->fd, proc->pid, proc->rgid);
|
|
|
|
n_opened = ihk_atomic_dec_return(&xpmem_my_part->n_opened);
|
|
XPMEM_DEBUG("n_opened=%d", n_opened);
|
|
|
|
if (mckfd->data) {
|
|
/* release my xpmem-objects */
|
|
xpmem_flush(mckfd);
|
|
}
|
|
|
|
if (!n_opened) {
|
|
xpmem_exit();
|
|
}
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int xpmem_dup(
|
|
struct mckfd *mckfd,
|
|
ihk_mc_user_context_t *ctx)
|
|
{
|
|
mckfd->data = 0;
|
|
ihk_atomic_inc_return(&xpmem_my_part->n_opened);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int xpmem_init(void)
|
|
{
|
|
int i;
|
|
|
|
XPMEM_DEBUG("call: ");
|
|
|
|
xpmem_my_part = kmalloc(sizeof(struct xpmem_partition) +
|
|
sizeof(struct xpmem_hashlist) * XPMEM_TG_HASHTABLE_SIZE,
|
|
IHK_MC_AP_NOWAIT);
|
|
if (xpmem_my_part == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
XPMEM_DEBUG("kmalloc(): xpmem_my_part=0x%p", xpmem_my_part);
|
|
memset(xpmem_my_part, 0, sizeof(struct xpmem_partition) +
|
|
sizeof(struct xpmem_hashlist) * XPMEM_TG_HASHTABLE_SIZE);
|
|
|
|
for (i = 0; i < XPMEM_TG_HASHTABLE_SIZE; i++) {
|
|
mcs_rwlock_init(&xpmem_my_part->tg_hashtable[i].lock);
|
|
INIT_LIST_HEAD(&xpmem_my_part->tg_hashtable[i].list);
|
|
}
|
|
|
|
ihk_atomic_set(&xpmem_my_part->n_opened, 0);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void xpmem_exit(void)
|
|
{
|
|
XPMEM_DEBUG("call: ");
|
|
|
|
if (xpmem_my_part) {
|
|
XPMEM_DEBUG("kfree(): xpmem_my_part=0x%p", xpmem_my_part);
|
|
kfree(xpmem_my_part);
|
|
xpmem_my_part = NULL;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static int __xpmem_open(void)
|
|
{
|
|
struct xpmem_thread_group *tg;
|
|
int index;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
XPMEM_DEBUG("call: ");
|
|
|
|
tg = xpmem_tg_ref_by_tgid(cpu_local_var(current)->proc->pid);
|
|
if (!IS_ERR(tg)) {
|
|
xpmem_tg_deref(tg);
|
|
XPMEM_DEBUG("return: ret=%d, tg=0x%p", 0, tg);
|
|
return 0;
|
|
}
|
|
|
|
tg = kmalloc(sizeof(struct xpmem_thread_group) +
|
|
sizeof(struct xpmem_hashlist) * XPMEM_AP_HASHTABLE_SIZE,
|
|
IHK_MC_AP_NOWAIT);
|
|
if (tg == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
XPMEM_DEBUG("kmalloc(): tg=0x%p", tg);
|
|
memset(tg, 0, sizeof(struct xpmem_thread_group) +
|
|
sizeof(struct xpmem_hashlist) * XPMEM_AP_HASHTABLE_SIZE);
|
|
|
|
ihk_mc_spinlock_init(&tg->lock);
|
|
tg->tgid = cpu_local_var(current)->proc->pid;
|
|
tg->uid = cpu_local_var(current)->proc->ruid;
|
|
tg->gid = cpu_local_var(current)->proc->rgid;
|
|
ihk_atomic_set(&tg->uniq_segid, 0);
|
|
ihk_atomic_set(&tg->uniq_apid, 0);
|
|
mcs_rwlock_init(&tg->seg_list_lock);
|
|
INIT_LIST_HEAD(&tg->seg_list);
|
|
ihk_atomic_set(&tg->n_pinned, 0);
|
|
INIT_LIST_HEAD(&tg->tg_hashlist);
|
|
tg->vm = cpu_local_var(current)->vm;
|
|
|
|
for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) {
|
|
mcs_rwlock_init(&tg->ap_hashtable[index].lock);
|
|
INIT_LIST_HEAD(&tg->ap_hashtable[index].list);
|
|
}
|
|
|
|
xpmem_tg_not_destroyable(tg);
|
|
|
|
index = xpmem_tg_hashtable_index(tg->tgid);
|
|
mcs_rwlock_writer_lock(&xpmem_my_part->tg_hashtable[index].lock, &lock);
|
|
|
|
list_add_tail(&tg->tg_hashlist,
|
|
&xpmem_my_part->tg_hashtable[index].list);
|
|
|
|
mcs_rwlock_writer_unlock(&xpmem_my_part->tg_hashtable[index].lock,
|
|
&lock);
|
|
|
|
tg->group_leader = cpu_local_var(current);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void xpmem_destroy_tg(
|
|
struct xpmem_thread_group *tg)
|
|
{
|
|
XPMEM_DEBUG("call: tg=0x%p", tg);
|
|
|
|
xpmem_tg_destroyable(tg);
|
|
xpmem_tg_deref(tg);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static int xpmem_make(
|
|
unsigned long vaddr,
|
|
size_t size,
|
|
int permit_type,
|
|
void *permit_value,
|
|
xpmem_segid_t *segid_p)
|
|
{
|
|
xpmem_segid_t segid;
|
|
struct xpmem_thread_group *seg_tg;
|
|
struct xpmem_segment *seg;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
XPMEM_DEBUG("call: vaddr=0x%lx, size=0x%lx, permit_type=%d, "
|
|
"permit_value=0%04lo",
|
|
vaddr, size, permit_type,
|
|
(unsigned long)(uintptr_t)permit_value);
|
|
|
|
if (permit_type != XPMEM_PERMIT_MODE ||
|
|
((unsigned long)(uintptr_t)permit_value & ~00777) ||
|
|
size == 0) {
|
|
XPMEM_DEBUG("return: ret=%d", -EINVAL);
|
|
return -EINVAL;
|
|
}
|
|
|
|
seg_tg = xpmem_tg_ref_by_tgid(cpu_local_var(current)->proc->pid);
|
|
if (IS_ERR(seg_tg)) {
|
|
DBUG_ON(PTR_ERR(seg_tg) != -ENOENT);
|
|
return -XPMEM_ERRNO_NOPROC;
|
|
}
|
|
|
|
/*
|
|
* The start of the segment must be page aligned and it must be a
|
|
* multiple of pages in size.
|
|
*/
|
|
if (offset_in_page(vaddr) != 0 || offset_in_page(size) != 0) {
|
|
xpmem_tg_deref(seg_tg);
|
|
XPMEM_DEBUG("return: ret=%d", -EINVAL);
|
|
return -EINVAL;
|
|
}
|
|
|
|
segid = xpmem_make_segid(seg_tg);
|
|
if (segid < 0) {
|
|
xpmem_tg_deref(seg_tg);
|
|
return segid;
|
|
}
|
|
|
|
/* create a new struct xpmem_segment structure with a unique segid */
|
|
seg = kmalloc(sizeof(struct xpmem_segment), IHK_MC_AP_NOWAIT);
|
|
if (seg == NULL) {
|
|
xpmem_tg_deref(seg_tg);
|
|
return -ENOMEM;
|
|
}
|
|
XPMEM_DEBUG("kmalloc(): seg=0x%p", seg);
|
|
memset(seg, 0, sizeof(struct xpmem_segment));
|
|
|
|
ihk_mc_spinlock_init(&seg->lock);
|
|
seg->segid = segid;
|
|
seg->vaddr = vaddr;
|
|
seg->size = size;
|
|
seg->permit_type = permit_type;
|
|
seg->permit_value = permit_value;
|
|
seg->tg = seg_tg;
|
|
INIT_LIST_HEAD(&seg->ap_list);
|
|
INIT_LIST_HEAD(&seg->seg_list);
|
|
|
|
xpmem_seg_not_destroyable(seg);
|
|
|
|
mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock);
|
|
list_add_tail(&seg->seg_list, &seg_tg->seg_list);
|
|
mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
xpmem_tg_deref(seg_tg);
|
|
|
|
*segid_p = segid;
|
|
|
|
XPMEM_DEBUG("return: ret=%d, segid=0x%lx", 0, *segid_p);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static xpmem_segid_t xpmem_make_segid(
|
|
struct xpmem_thread_group *seg_tg)
|
|
{
|
|
struct xpmem_id segid;
|
|
xpmem_segid_t *segid_p = (xpmem_segid_t *)&segid;
|
|
int uniq;
|
|
|
|
XPMEM_DEBUG("call: seg_tg=0x%p, uniq_segid=%d",
|
|
seg_tg, ihk_atomic_read(&seg_tg->uniq_segid));
|
|
|
|
DBUG_ON(sizeof(struct xpmem_id) != sizeof(xpmem_segid_t));
|
|
|
|
uniq = ihk_atomic_inc_return(&seg_tg->uniq_segid);
|
|
if (uniq > XPMEM_MAX_UNIQ_ID) {
|
|
ihk_atomic_dec(&seg_tg->uniq_segid);
|
|
return -EBUSY;
|
|
}
|
|
|
|
*segid_p = 0;
|
|
segid.tgid = seg_tg->tgid;
|
|
segid.uniq = (unsigned long)uniq;
|
|
|
|
DBUG_ON(*segid_p <= 0);
|
|
|
|
XPMEM_DEBUG("return: segid=0x%lx, segid.tgid=%d, segid.uniq=%d",
|
|
segid, segid.tgid, segid.uniq);
|
|
|
|
return *segid_p;
|
|
}
|
|
|
|
|
|
static int xpmem_remove(
|
|
xpmem_segid_t segid)
|
|
{
|
|
struct xpmem_thread_group *seg_tg;
|
|
struct xpmem_segment *seg;
|
|
|
|
XPMEM_DEBUG("call: segid=0x%lx", segid);
|
|
|
|
if (segid <= 0) {
|
|
XPMEM_DEBUG("return: ret=%d", -EINVAL);
|
|
return -EINVAL;
|
|
}
|
|
|
|
seg_tg = xpmem_tg_ref_by_segid(segid);
|
|
if (IS_ERR(seg_tg))
|
|
return PTR_ERR(seg_tg);
|
|
|
|
if (cpu_local_var(current)->proc->pid != seg_tg->tgid) {
|
|
xpmem_tg_deref(seg_tg);
|
|
XPMEM_DEBUG("return: ret=%d", -EACCES);
|
|
return -EACCES;
|
|
}
|
|
|
|
seg = xpmem_seg_ref_by_segid(seg_tg, segid);
|
|
if (IS_ERR(seg)) {
|
|
xpmem_tg_deref(seg_tg);
|
|
return PTR_ERR(seg);
|
|
}
|
|
DBUG_ON(seg->tg != seg_tg);
|
|
|
|
xpmem_remove_seg(seg_tg, seg);
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void xpmem_remove_seg(
|
|
struct xpmem_thread_group *seg_tg,
|
|
struct xpmem_segment *seg)
|
|
{
|
|
DBUG_ON(ihk_atomic_read(&seg->refcnt) <= 0);
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
XPMEM_DEBUG("call: tgid=%d, segid=0x%lx", seg_tg->tgid, seg->segid);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&seg->lock);
|
|
if (seg->flags & XPMEM_FLAG_DESTROYING) {
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
return;
|
|
}
|
|
seg->flags |= XPMEM_FLAG_DESTROYING;
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
|
|
xpmem_clear_PTEs(seg);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&seg->lock);
|
|
seg->flags |= XPMEM_FLAG_DESTROYED;
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
|
|
mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock);
|
|
list_del_init(&seg->seg_list);
|
|
mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
xpmem_seg_destroyable(seg);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static void xpmem_remove_segs_of_tg(
|
|
struct xpmem_thread_group *seg_tg)
|
|
{
|
|
struct xpmem_segment *seg;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
XPMEM_DEBUG("call: tgid=%d", seg_tg->tgid);
|
|
|
|
mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
while (!list_empty(&seg_tg->seg_list)) {
|
|
seg = list_entry((&seg_tg->seg_list)->next,
|
|
struct xpmem_segment, seg_list);
|
|
xpmem_seg_ref(seg);
|
|
mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
xpmem_remove_seg(seg_tg, seg);
|
|
|
|
xpmem_seg_deref(seg);
|
|
|
|
mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock);
|
|
}
|
|
|
|
mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static int xpmem_get(
|
|
xpmem_segid_t segid,
|
|
int flags,
|
|
int permit_type,
|
|
void *permit_value,
|
|
xpmem_apid_t *apid_p)
|
|
{
|
|
xpmem_apid_t apid;
|
|
struct xpmem_access_permit *ap;
|
|
struct xpmem_segment *seg;
|
|
struct xpmem_thread_group *ap_tg, *seg_tg;
|
|
int index;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
XPMEM_DEBUG("call: segid=0x%lx, flags=%d, permit_type=%d, "
|
|
"permit_value=0%04lo",
|
|
segid, flags, permit_type,
|
|
(unsigned long)(uintptr_t)permit_value);
|
|
|
|
if (segid <= 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((flags & ~(XPMEM_RDONLY | XPMEM_RDWR)) ||
|
|
(flags & (XPMEM_RDONLY | XPMEM_RDWR)) ==
|
|
(XPMEM_RDONLY | XPMEM_RDWR)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (permit_type != XPMEM_PERMIT_MODE || permit_value != NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
seg_tg = xpmem_tg_ref_by_segid(segid);
|
|
if (IS_ERR(seg_tg)) {
|
|
return PTR_ERR(seg_tg);
|
|
}
|
|
|
|
seg = xpmem_seg_ref_by_segid(seg_tg, segid);
|
|
if (IS_ERR(seg)) {
|
|
xpmem_tg_deref(seg_tg);
|
|
return PTR_ERR(seg);
|
|
}
|
|
|
|
if (xpmem_check_permit_mode(flags, seg) != 0) {
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
return -EACCES;
|
|
}
|
|
|
|
ap_tg = xpmem_tg_ref_by_tgid(cpu_local_var(current)->proc->pid);
|
|
if (IS_ERR(ap_tg)) {
|
|
DBUG_ON(PTR_ERR(ap_tg) != -ENOENT);
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
return -XPMEM_ERRNO_NOPROC;
|
|
}
|
|
|
|
apid = xpmem_make_apid(ap_tg);
|
|
if (apid < 0) {
|
|
xpmem_tg_deref(ap_tg);
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
return apid;
|
|
}
|
|
|
|
/* create a new xpmem_access_permit structure with a unique apid */
|
|
ap = kmalloc(sizeof(struct xpmem_access_permit), IHK_MC_AP_NOWAIT);
|
|
if (ap == NULL) {
|
|
xpmem_tg_deref(ap_tg);
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
return -ENOMEM;
|
|
}
|
|
XPMEM_DEBUG("kmalloc(): ap=0x%p", ap);
|
|
memset(ap, 0, sizeof(struct xpmem_access_permit));
|
|
|
|
ihk_mc_spinlock_init(&ap->lock);
|
|
ap->apid = apid;
|
|
ap->mode = flags;
|
|
ap->seg = seg;
|
|
ap->tg = ap_tg;
|
|
INIT_LIST_HEAD(&ap->att_list);
|
|
INIT_LIST_HEAD(&ap->ap_list);
|
|
INIT_LIST_HEAD(&ap->ap_hashlist);
|
|
|
|
xpmem_ap_not_destroyable(ap);
|
|
|
|
/* add ap to its seg's access permit list */
|
|
ihk_mc_spinlock_lock_noirq(&seg->lock);
|
|
list_add_tail(&ap->ap_list, &seg->ap_list);
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
|
|
/* add ap to its hash list */
|
|
index = xpmem_ap_hashtable_index(ap->apid);
|
|
mcs_rwlock_writer_lock(&ap_tg->ap_hashtable[index].lock, &lock);
|
|
list_add_tail(&ap->ap_hashlist, &ap_tg->ap_hashtable[index].list);
|
|
mcs_rwlock_writer_unlock(&ap_tg->ap_hashtable[index].lock, &lock);
|
|
|
|
xpmem_tg_deref(ap_tg);
|
|
|
|
*apid_p = apid;
|
|
|
|
XPMEM_DEBUG("return: ret=%d, apid=0x%lx", 0, *apid_p);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int xpmem_check_permit_mode(
|
|
int flags,
|
|
struct xpmem_segment *seg)
|
|
{
|
|
int ret;
|
|
struct xpmem_perm perm;
|
|
|
|
XPMEM_DEBUG("call: flags=%d", flags);
|
|
|
|
DBUG_ON(seg->permit_type != XPMEM_PERMIT_MODE);
|
|
|
|
memset(&perm, 0, sizeof(struct xpmem_perm));
|
|
perm.uid = seg->tg->uid;
|
|
perm.gid = seg->tg->gid;
|
|
perm.mode = (unsigned long)seg->permit_value;
|
|
|
|
ret = xpmem_perms(&perm, XPMEM_PERM_IRUSR);
|
|
if (ret == 0 && (flags & XPMEM_RDWR)) {
|
|
ret = xpmem_perms(&perm, XPMEM_PERM_IWUSR);
|
|
}
|
|
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int xpmem_perms(
|
|
struct xpmem_perm *perm,
|
|
short flag)
|
|
{
|
|
int ret = 0;
|
|
int requested_mode;
|
|
int granted_mode;
|
|
|
|
XPMEM_DEBUG("call: uid=%d, gid=%d, mode=0%lo, flag=0%o",
|
|
perm->uid, perm->gid, perm->mode, flag);
|
|
|
|
requested_mode = (flag >> 6) | (flag >> 3) | flag;
|
|
granted_mode = perm->mode;
|
|
if (perm->uid == cpu_local_var(current)->proc->ruid) {
|
|
granted_mode >>= 6;
|
|
}
|
|
else if (perm->gid == cpu_local_var(current)->proc->rgid) {
|
|
granted_mode >>= 3;
|
|
}
|
|
|
|
if (requested_mode & ~granted_mode & 0007) {
|
|
ret = -1;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static xpmem_apid_t xpmem_make_apid(
|
|
struct xpmem_thread_group *ap_tg)
|
|
{
|
|
struct xpmem_id apid;
|
|
xpmem_apid_t *apid_p = (xpmem_apid_t *)≋
|
|
int uniq;
|
|
|
|
XPMEM_DEBUG("call: ap_tg=0x%p, uniq_apid=%d",
|
|
ap_tg, ihk_atomic_read(&ap_tg->uniq_apid));
|
|
|
|
DBUG_ON(sizeof(struct xpmem_id) != sizeof(xpmem_apid_t));
|
|
|
|
uniq = ihk_atomic_inc_return(&ap_tg->uniq_apid);
|
|
if (uniq > XPMEM_MAX_UNIQ_ID) {
|
|
ihk_atomic_dec(&ap_tg->uniq_apid);
|
|
return -EBUSY;
|
|
}
|
|
|
|
*apid_p = 0;
|
|
apid.tgid = ap_tg->tgid;
|
|
apid.uniq = (unsigned int)uniq;
|
|
|
|
XPMEM_DEBUG("return: apid=0x%lx, apid.tgid=%d, apid.uniq=%d",
|
|
apid, apid.tgid, apid.uniq);
|
|
|
|
return *apid_p;
|
|
}
|
|
|
|
|
|
static int xpmem_release(
|
|
xpmem_apid_t apid)
|
|
{
|
|
struct xpmem_thread_group *ap_tg;
|
|
struct xpmem_access_permit *ap;
|
|
|
|
XPMEM_DEBUG("call: apid=0x%lx", apid);
|
|
|
|
if (apid <= 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
ap_tg = xpmem_tg_ref_by_apid(apid);
|
|
if (IS_ERR(ap_tg)) {
|
|
return PTR_ERR(ap_tg);
|
|
}
|
|
|
|
if (cpu_local_var(current)->proc->pid != ap_tg->tgid) {
|
|
xpmem_tg_deref(ap_tg);
|
|
return -EACCES;
|
|
}
|
|
|
|
ap = xpmem_ap_ref_by_apid(ap_tg, apid);
|
|
if (IS_ERR(ap)) {
|
|
xpmem_tg_deref(ap_tg);
|
|
return PTR_ERR(ap);
|
|
}
|
|
DBUG_ON(ap->tg != ap_tg);
|
|
|
|
xpmem_release_ap(ap_tg, ap);
|
|
xpmem_ap_deref(ap);
|
|
xpmem_tg_deref(ap_tg);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void xpmem_release_ap(
|
|
struct xpmem_thread_group *ap_tg,
|
|
struct xpmem_access_permit *ap)
|
|
{
|
|
int index;
|
|
struct xpmem_thread_group *seg_tg;
|
|
struct xpmem_attachment *att;
|
|
struct xpmem_segment *seg;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
XPMEM_DEBUG("call: tgid=%d, apid=0x%lx", ap_tg->tgid, ap->apid);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
if (ap->flags & XPMEM_FLAG_DESTROYING) {
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
return;
|
|
}
|
|
ap->flags |= XPMEM_FLAG_DESTROYING;
|
|
|
|
while (!list_empty(&ap->att_list)) {
|
|
att = list_entry((&ap->att_list)->next, struct xpmem_attachment,
|
|
att_list);
|
|
xpmem_att_ref(att);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
xpmem_detach_att(ap, att);
|
|
|
|
xpmem_att_deref(att);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
}
|
|
|
|
ap->flags |= XPMEM_FLAG_DESTROYED;
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
index = xpmem_ap_hashtable_index(ap->apid);
|
|
mcs_rwlock_writer_lock(&ap_tg->ap_hashtable[index].lock, &lock);
|
|
list_del_init(&ap->ap_hashlist);
|
|
mcs_rwlock_writer_unlock(&ap_tg->ap_hashtable[index].lock, &lock);
|
|
|
|
seg = ap->seg;
|
|
seg_tg = seg->tg;
|
|
|
|
ihk_mc_spinlock_lock_noirq(&seg->lock);
|
|
list_del_init(&ap->ap_list);
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
|
|
xpmem_ap_destroyable(ap);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static void xpmem_release_aps_of_tg(
|
|
struct xpmem_thread_group *ap_tg)
|
|
{
|
|
struct xpmem_hashlist *hashlist;
|
|
struct xpmem_access_permit *ap;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
int index;
|
|
|
|
XPMEM_DEBUG("call: tgid=%d", ap_tg->tgid);
|
|
|
|
for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) {
|
|
hashlist = &ap_tg->ap_hashtable[index];
|
|
|
|
mcs_rwlock_writer_lock(&hashlist->lock, &lock);
|
|
while (!list_empty(&hashlist->list)) {
|
|
ap = list_entry((&hashlist->list)->next,
|
|
struct xpmem_access_permit, ap_hashlist);
|
|
xpmem_ap_ref(ap);
|
|
mcs_rwlock_writer_unlock(&hashlist->lock, &lock);
|
|
|
|
xpmem_release_ap(ap_tg, ap);
|
|
|
|
xpmem_ap_deref(ap);
|
|
|
|
mcs_rwlock_writer_lock(&hashlist->lock, &lock);
|
|
}
|
|
mcs_rwlock_writer_unlock(&hashlist->lock, &lock);
|
|
}
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
static void xpmem_flush(struct mckfd *mckfd)
|
|
{
|
|
struct process *proc = (struct process *)mckfd->data;
|
|
struct xpmem_thread_group *tg;
|
|
int index;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
index = xpmem_tg_hashtable_index(proc->pid);
|
|
|
|
mcs_rwlock_writer_lock(&xpmem_my_part->tg_hashtable[index].lock, &lock);
|
|
|
|
tg = xpmem_tg_ref_by_tgid_all_nolock(proc->pid);
|
|
if (IS_ERR(tg)) {
|
|
mcs_rwlock_writer_unlock(
|
|
&xpmem_my_part->tg_hashtable[index].lock, &lock);
|
|
return;
|
|
}
|
|
|
|
list_del_init(&tg->tg_hashlist);
|
|
|
|
mcs_rwlock_writer_unlock(&xpmem_my_part->tg_hashtable[index].lock,
|
|
&lock);
|
|
|
|
XPMEM_DEBUG("tg->vm=0x%p", tg->vm);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&tg->lock);
|
|
tg->flags |= XPMEM_FLAG_DESTROYING;
|
|
ihk_mc_spinlock_unlock_noirq(&tg->lock);
|
|
|
|
xpmem_release_aps_of_tg(tg);
|
|
xpmem_remove_segs_of_tg(tg);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&tg->lock);
|
|
tg->flags |= XPMEM_FLAG_DESTROYED;
|
|
ihk_mc_spinlock_unlock_noirq(&tg->lock);
|
|
|
|
xpmem_destroy_tg(tg);
|
|
}
|
|
|
|
static int xpmem_attach(
|
|
struct mckfd *mckfd,
|
|
xpmem_apid_t apid,
|
|
off_t offset,
|
|
size_t size,
|
|
unsigned long vaddr,
|
|
int fd,
|
|
int att_flags,
|
|
unsigned long *at_vaddr_p)
|
|
{
|
|
int ret;
|
|
unsigned long flags;
|
|
unsigned long prot_flags = PROT_READ | PROT_WRITE;
|
|
unsigned long seg_vaddr;
|
|
unsigned long at_vaddr;
|
|
struct xpmem_thread_group *ap_tg;
|
|
struct xpmem_thread_group *seg_tg;
|
|
struct xpmem_access_permit *ap;
|
|
struct xpmem_segment *seg;
|
|
struct xpmem_attachment *att;
|
|
struct mcs_rwlock_node_irqsave at_lock;
|
|
struct vm_range *vmr;
|
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
|
|
|
XPMEM_DEBUG("call: apid=0x%lx, offset=0x%lx, size=0x%lx, vaddr=0x%lx, "
|
|
"fd=%d, att_flags=%d",
|
|
apid, offset, size, vaddr, fd, att_flags);
|
|
|
|
if (apid <= 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* The start of the attachment must be page aligned */
|
|
if (offset_in_page(vaddr) != 0 || offset_in_page(offset) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* If the size is not page aligned, fix it */
|
|
if (offset_in_page(size) != 0) {
|
|
size += PAGE_SIZE - offset_in_page(size);
|
|
}
|
|
|
|
ap_tg = xpmem_tg_ref_by_apid(apid);
|
|
if (IS_ERR(ap_tg))
|
|
return PTR_ERR(ap_tg);
|
|
|
|
ap = xpmem_ap_ref_by_apid(ap_tg, apid);
|
|
if (IS_ERR(ap)) {
|
|
xpmem_tg_deref(ap_tg);
|
|
return PTR_ERR(ap);
|
|
}
|
|
|
|
seg = ap->seg;
|
|
xpmem_seg_ref(seg);
|
|
seg_tg = seg->tg;
|
|
xpmem_tg_ref(seg_tg);
|
|
|
|
if ((seg->flags & XPMEM_FLAG_DESTROYING) ||
|
|
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
|
|
ret = -ENOENT;
|
|
goto out_1;
|
|
}
|
|
|
|
ret = xpmem_validate_access(ap, offset, size, XPMEM_RDWR, &seg_vaddr);
|
|
if (ret != 0) {
|
|
goto out_1;
|
|
}
|
|
|
|
size += offset_in_page(seg_vaddr);
|
|
|
|
seg = ap->seg;
|
|
if (cpu_local_var(current)->proc->pid == seg_tg->tgid && vaddr) {
|
|
if ((vaddr + size > seg_vaddr) && (vaddr < seg_vaddr + size)) {
|
|
ret = -EINVAL;
|
|
goto out_1;
|
|
}
|
|
}
|
|
|
|
/* create new attach structure */
|
|
att = kmalloc(sizeof(struct xpmem_attachment), IHK_MC_AP_NOWAIT);
|
|
if (att == NULL) {
|
|
ret = -ENOMEM;
|
|
goto out_1;
|
|
}
|
|
XPMEM_DEBUG("kmalloc(): att=0x%p", att);
|
|
memset(att, 0, sizeof(struct xpmem_attachment));
|
|
|
|
mcs_rwlock_init(&att->at_lock);
|
|
att->vaddr = seg_vaddr;
|
|
att->at_size = size;
|
|
att->ap = ap;
|
|
INIT_LIST_HEAD(&att->att_list);
|
|
att->vm = vm;
|
|
|
|
xpmem_att_not_destroyable(att);
|
|
xpmem_att_ref(att);
|
|
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
list_add_tail(&att->att_list, &ap->att_list);
|
|
if (ap->flags & XPMEM_FLAG_DESTROYING) {
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
ret = -ENOENT;
|
|
goto out_2;
|
|
}
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
flags = MAP_SHARED;
|
|
if (vaddr != 0)
|
|
flags |= MAP_FIXED;
|
|
|
|
if (flags & MAP_FIXED) {
|
|
struct vm_range *existing_vmr;
|
|
|
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
|
|
|
existing_vmr = lookup_process_memory_range(vm, vaddr,
|
|
vaddr + size);
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
|
|
for (; existing_vmr && existing_vmr->start < vaddr + size;
|
|
existing_vmr = next_process_memory_range(vm,
|
|
existing_vmr)) {
|
|
if (xpmem_is_private_data(existing_vmr)) {
|
|
ret = -EINVAL;
|
|
goto out_2;
|
|
}
|
|
}
|
|
}
|
|
|
|
flags |= MAP_ANONYMOUS;
|
|
XPMEM_DEBUG("do_mmap(): vaddr=0x%lx, size=0x%lx, prot_flags=0x%lx, "
|
|
"flags=0x%lx, fd=%d, offset=0x%lx",
|
|
vaddr, size, prot_flags, flags, mckfd->fd, offset);
|
|
/* The new range uses on-demand paging and is associated with shmobj because of
|
|
MAP_ANONYMOUS && !MAP_PRIVATE && MAP_SHARED */
|
|
at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd, offset);
|
|
if (IS_ERR((void *)(uintptr_t)at_vaddr)) {
|
|
ret = at_vaddr;
|
|
goto out_2;
|
|
}
|
|
XPMEM_DEBUG("at_vaddr=0x%lx", at_vaddr);
|
|
att->at_vaddr = at_vaddr;
|
|
|
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
|
|
|
vmr = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1);
|
|
|
|
/* To identify pages of XPMEM attachment for rusage accounting */
|
|
if(vmr->memobj) {
|
|
vmr->memobj->flags |= MF_XPMEM;
|
|
} else {
|
|
ekprintf("%s: vmr->memobj equals to NULL\n", __FUNCTION__);
|
|
}
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
|
|
if (!vmr) {
|
|
ret = -ENOENT;
|
|
goto out_2;
|
|
}
|
|
vmr->private_data = att;
|
|
|
|
|
|
att->at_vmr = vmr;
|
|
|
|
*at_vaddr_p = at_vaddr + offset_in_page(att->vaddr);
|
|
|
|
ret = 0;
|
|
out_2:
|
|
if (ret != 0) {
|
|
att->flags |= XPMEM_FLAG_DESTROYING;
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
list_del_init(&att->att_list);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
xpmem_att_destroyable(att);
|
|
}
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
xpmem_att_deref(att);
|
|
out_1:
|
|
xpmem_ap_deref(ap);
|
|
xpmem_tg_deref(ap_tg);
|
|
xpmem_seg_deref(seg);
|
|
xpmem_tg_deref(seg_tg);
|
|
|
|
XPMEM_DEBUG("return: ret=%d, at_vaddr=0x%lx", ret, *at_vaddr_p);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int xpmem_detach(
|
|
unsigned long at_vaddr)
|
|
{
|
|
int ret;
|
|
struct xpmem_access_permit *ap;
|
|
struct xpmem_attachment *att;
|
|
struct mcs_rwlock_node_irqsave at_lock;
|
|
struct vm_range *range;
|
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
|
|
|
XPMEM_DEBUG("call: at_vaddr=0x%lx", at_vaddr);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
|
|
|
range = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1);
|
|
|
|
if (!range || range->start > at_vaddr) {
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
return 0;
|
|
}
|
|
|
|
att = (struct xpmem_attachment *)range->private_data;
|
|
if (att == NULL) {
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
xpmem_att_ref(att);
|
|
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
|
|
if (att->flags & XPMEM_FLAG_DESTROYING) {
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
xpmem_att_deref(att);
|
|
return 0;
|
|
}
|
|
att->flags |= XPMEM_FLAG_DESTROYING;
|
|
|
|
ap = att->ap;
|
|
xpmem_ap_ref(ap);
|
|
|
|
if (cpu_local_var(current)->proc->pid != ap->tg->tgid) {
|
|
att->flags &= ~XPMEM_FLAG_DESTROYING;
|
|
xpmem_ap_deref(ap);
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
xpmem_att_deref(att);
|
|
return -EACCES;
|
|
}
|
|
|
|
xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size);
|
|
|
|
range->private_data = NULL;
|
|
/* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() -->
|
|
xpmem_free_process_memory_range() */
|
|
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
|
|
XPMEM_DEBUG("xpmem_vm_munmap(): start=0x%lx, len=0x%lx",
|
|
range->start, att->at_size);
|
|
ret = xpmem_vm_munmap(vm, (void *)range->start, att->at_size);
|
|
if (ret) {
|
|
ekprintf("%s: ERROR: xpmem_vm_munmap() failed %d\n",
|
|
__FUNCTION__, ret);
|
|
}
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
DBUG_ON(ret != 0);
|
|
|
|
att->flags &= ~XPMEM_FLAG_VALIDPTEs;
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
list_del_init(&att->att_list);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
xpmem_att_destroyable(att);
|
|
|
|
xpmem_ap_deref(ap);
|
|
xpmem_att_deref(att);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int xpmem_vm_munmap(
|
|
struct process_vm *vm,
|
|
void *addr,
|
|
size_t len)
|
|
{
|
|
int ret;
|
|
int ro_freed;
|
|
|
|
XPMEM_DEBUG("call: vm=0x%p, addr=0x%p, len=0x%lx", vm, addr, len);
|
|
|
|
begin_free_pages_pending();
|
|
|
|
ret = xpmem_remove_process_range(vm, (intptr_t)addr,
|
|
(intptr_t)(addr + len), &ro_freed);
|
|
|
|
finish_free_pages_pending();
|
|
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int xpmem_remove_process_range(
|
|
struct process_vm *vm,
|
|
unsigned long start,
|
|
unsigned long end,
|
|
int *ro_freedp)
|
|
{
|
|
int error = 0;
|
|
struct vm_range *range;
|
|
struct vm_range *next;
|
|
int ro_freed = 0;
|
|
|
|
XPMEM_DEBUG("call: vm=0x%p, start=0x%lx, end=0x%lx", vm, start, end);
|
|
|
|
next = lookup_process_memory_range(vm, start, end);
|
|
while ((range = next) && range->start < end) {
|
|
next = next_process_memory_range(vm, range);
|
|
|
|
if (range->start < start) {
|
|
error = split_process_memory_range(vm,
|
|
range, start, &range);
|
|
if (error) {
|
|
ekprintf("%s(%p,%lx,%lx): ERROR: "
|
|
"split failed %d\n",
|
|
__FUNCTION__, vm, start, end, error);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (end < range->end) {
|
|
error = split_process_memory_range(vm, range, end,
|
|
NULL);
|
|
if (error) {
|
|
ekprintf("%s(%p,%lx,%lx): ERROR: "
|
|
"split failed %d\n",
|
|
__FUNCTION__, vm, start, end, error);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (!(range->flag & VR_PROT_WRITE)) {
|
|
ro_freed = 1;
|
|
}
|
|
|
|
if (range->private_data) {
|
|
xpmem_remove_process_memory_range(vm, range);
|
|
}
|
|
|
|
error = xpmem_free_process_memory_range(vm, range);
|
|
if (error) {
|
|
ekprintf("%s(%p,%lx,%lx): ERROR: free failed %d\n",
|
|
__FUNCTION__, vm, start, end, error);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (ro_freedp) {
|
|
*ro_freedp = ro_freed;
|
|
}
|
|
|
|
out:
|
|
XPMEM_DEBUG("return: ret=%d, ro_freed=%d", error, ro_freed);
|
|
|
|
return error;
|
|
}
|
|
|
|
|
|
static int xpmem_free_process_memory_range(
|
|
struct process_vm *vm,
|
|
struct vm_range *range)
|
|
{
|
|
int error;
|
|
int i;
|
|
|
|
XPMEM_DEBUG("call: vm=0x%p, start=0x%lx, end=0x%lx",
|
|
vm, range->start, range->end);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
|
|
|
error = ihk_mc_pt_clear_range(vm->address_space->page_table, vm,
|
|
(void *)range->start, (void *)range->end);
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
|
|
|
if (error && (error != -ENOENT)) {
|
|
ekprintf("%s(%p,%lx-%lx): ERROR: "
|
|
"ihk_mc_pt_clear_range(%lx-%lx) failed %d\n",
|
|
__FUNCTION__, vm, range->start, range->end,
|
|
range->start, range->end, error);
|
|
/* through */
|
|
}
|
|
|
|
if (range->memobj) {
|
|
memobj_release(range->memobj);
|
|
}
|
|
|
|
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
|
|
for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) {
|
|
if (vm->range_cache[i] == range)
|
|
vm->range_cache[i] = NULL;
|
|
}
|
|
|
|
kfree(range);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void xpmem_detach_att(
|
|
struct xpmem_access_permit *ap,
|
|
struct xpmem_attachment *att)
|
|
{
|
|
int ret;
|
|
struct vm_range *range;
|
|
struct process_vm *vm;
|
|
struct mcs_rwlock_node_irqsave at_lock;
|
|
|
|
XPMEM_DEBUG("call: apid=0x%lx, att=0x%p", ap->apid, att);
|
|
|
|
XPMEM_DEBUG("detaching current->vm=0x%p, att->vm=0x%p",
|
|
(void *)cpu_local_var(current)->vm, (void *)att->vm);
|
|
|
|
vm = cpu_local_var(current)->vm ? cpu_local_var(current)->vm : att->vm;
|
|
|
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
|
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
|
|
if (att->flags & XPMEM_FLAG_DESTROYING) {
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
XPMEM_DEBUG("return: XPMEM_FLAG_DESTROYING");
|
|
return;
|
|
}
|
|
att->flags |= XPMEM_FLAG_DESTROYING;
|
|
|
|
range = lookup_process_memory_range(cpu_local_var(current)->vm,
|
|
att->at_vaddr, att->at_vaddr + 1);
|
|
|
|
if (!range || range->start > att->at_vaddr) {
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
list_del_init(&att->att_list);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
xpmem_att_destroyable(att);
|
|
XPMEM_DEBUG("return: range=%p");
|
|
return;
|
|
}
|
|
XPMEM_DEBUG("lookup_process_memory_range(): at_vaddr=0x%lx, "
|
|
"start=0x%lx, end=0x%lx",
|
|
att->at_vaddr, range->start, range->end);
|
|
|
|
DBUG_ON(!xpmem_is_private_data(range));
|
|
DBUG_ON((range->end - range->start) != att->at_size);
|
|
DBUG_ON(range->private_data != att);
|
|
|
|
xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size);
|
|
|
|
range->private_data = NULL;
|
|
/* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() -->
|
|
xpmem_free_process_memory_range() */
|
|
|
|
att->flags &= ~XPMEM_FLAG_VALIDPTEs;
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
list_del_init(&att->att_list);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
|
|
XPMEM_DEBUG("xpmem_vm_munmap(): start=0x%lx, len=0x%lx",
|
|
range->start, att->at_size);
|
|
ret = xpmem_vm_munmap(cpu_local_var(current)->vm, (void *)range->start,
|
|
att->at_size);
|
|
if (ret) {
|
|
ekprintf("%s: ERROR: xpmem_vm_munmap() failed %d\n",
|
|
__FUNCTION__, ret);
|
|
}
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
|
|
|
xpmem_att_destroyable(att);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static void xpmem_clear_PTEs(
|
|
struct xpmem_segment *seg)
|
|
{
|
|
XPMEM_DEBUG("call: segid=0x%lx", seg->segid);
|
|
|
|
xpmem_clear_PTEs_range(seg, seg->vaddr, seg->vaddr + seg->size);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static void xpmem_clear_PTEs_range(
|
|
struct xpmem_segment *seg,
|
|
unsigned long start,
|
|
unsigned long end)
|
|
{
|
|
struct xpmem_access_permit *ap;
|
|
|
|
XPMEM_DEBUG("call: segid=0x%lx, start=0x%lx, end=0x%lx",
|
|
seg->segid, start, end);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&seg->lock);
|
|
|
|
list_for_each_entry(ap, &seg->ap_list, ap_list) {
|
|
xpmem_ap_ref(ap);
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
|
|
xpmem_clear_PTEs_of_ap(ap, start, end);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&seg->lock);
|
|
if (list_empty(&ap->ap_list)) {
|
|
xpmem_ap_deref(ap);
|
|
ap = list_entry(&seg->ap_list,
|
|
struct xpmem_access_permit, ap_list);
|
|
}
|
|
else {
|
|
xpmem_ap_deref(ap);
|
|
}
|
|
}
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&seg->lock);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static void xpmem_clear_PTEs_of_ap(
|
|
struct xpmem_access_permit *ap,
|
|
unsigned long start,
|
|
unsigned long end)
|
|
{
|
|
struct xpmem_attachment *att;
|
|
|
|
XPMEM_DEBUG("call: apid=0x%lx, start=0x%lx, end=0x%lx",
|
|
ap->apid, start, end);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
|
|
list_for_each_entry(att, &ap->att_list, att_list) {
|
|
if (!(att->flags & XPMEM_FLAG_VALIDPTEs))
|
|
continue;
|
|
|
|
xpmem_att_ref(att);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
xpmem_clear_PTEs_of_att(att, start, end);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
if (list_empty(&att->att_list)) {
|
|
xpmem_att_deref(att);
|
|
att = list_entry(&ap->att_list, struct xpmem_attachment,
|
|
att_list);
|
|
}
|
|
else {
|
|
xpmem_att_deref(att);
|
|
}
|
|
}
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static void xpmem_clear_PTEs_of_att(
|
|
struct xpmem_attachment *att,
|
|
unsigned long start,
|
|
unsigned long end)
|
|
{
|
|
int ret;
|
|
struct mcs_rwlock_node_irqsave at_lock;
|
|
|
|
XPMEM_DEBUG("call: att=0x%p, start=0x%lx, end=0x%lx",
|
|
att, start, end);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&att->vm->memory_range_lock);
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
|
|
if (att->flags & XPMEM_FLAG_VALIDPTEs) {
|
|
struct vm_range *range;
|
|
unsigned long invalidate_start;
|
|
unsigned long invalidate_end;
|
|
unsigned long invalidate_len;
|
|
unsigned long offset_start;
|
|
unsigned long offset_end;
|
|
unsigned long unpin_at;
|
|
unsigned long att_vaddr_end = att->vaddr + att->at_size;
|
|
|
|
invalidate_start = max(start, att->vaddr);
|
|
invalidate_end = min(end, att_vaddr_end);
|
|
if (invalidate_start >= att_vaddr_end ||
|
|
invalidate_end <= att->vaddr)
|
|
goto out;
|
|
|
|
offset_start = invalidate_start - att->vaddr;
|
|
offset_end = invalidate_end - att->vaddr;
|
|
|
|
unpin_at = att->at_vaddr + offset_start;
|
|
invalidate_len = offset_end - offset_start;
|
|
DBUG_ON(offset_in_page(unpin_at) ||
|
|
offset_in_page(invalidate_len));
|
|
XPMEM_DEBUG("unpin_at=0x%lx, invalidate_len=0x%lx\n",
|
|
unpin_at, invalidate_len);
|
|
|
|
xpmem_unpin_pages(att->ap->seg, att->vm, unpin_at,
|
|
invalidate_len);
|
|
|
|
range = lookup_process_memory_range(att->vm, att->at_vaddr,
|
|
att->at_vaddr + 1);
|
|
if (!range) {
|
|
ekprintf("%s: ERROR: lookup_process_memory_range() "
|
|
"failed\n",
|
|
__FUNCTION__);
|
|
goto out;
|
|
}
|
|
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
|
|
XPMEM_DEBUG(
|
|
"xpmem_vm_munmap(): start=0x%lx, len=0x%lx",
|
|
unpin_at, invalidate_len);
|
|
ret = xpmem_vm_munmap(att->vm, (void *)unpin_at,
|
|
invalidate_len);
|
|
if (ret) {
|
|
ekprintf("%s: ERROR: xpmem_vm_munmap() failed %d\n",
|
|
__FUNCTION__, ret);
|
|
}
|
|
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
|
|
if (offset_start == 0 && att->at_size == invalidate_len)
|
|
att->flags &= ~XPMEM_FLAG_VALIDPTEs;
|
|
}
|
|
out:
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
ihk_mc_spinlock_unlock_noirq(&att->vm->memory_range_lock);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
int xpmem_remove_process_memory_range(
|
|
struct process_vm *vm,
|
|
struct vm_range *vmr)
|
|
{
|
|
struct vm_range *remaining_vmr;
|
|
u64 remaining_vaddr;
|
|
struct xpmem_access_permit *ap;
|
|
struct xpmem_attachment *att;
|
|
struct mcs_rwlock_node_irqsave at_lock;
|
|
|
|
XPMEM_DEBUG("call: vmr=0x%p, att=0x%p", vmr, vmr->private_data);
|
|
|
|
att = (struct xpmem_attachment *)vmr->private_data;
|
|
if (att == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
XPMEM_DEBUG("cleaning up vmr with range: 0x%lx - 0x%lx",
|
|
vmr->start, vmr->end);
|
|
|
|
xpmem_att_ref(att);
|
|
|
|
ihk_mc_spinlock_lock_noirq(
|
|
&cpu_local_var(current)->vm->memory_range_lock);
|
|
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
|
|
if (att->flags & XPMEM_FLAG_DESTROYING) {
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
xpmem_att_deref(att);
|
|
XPMEM_DEBUG("already cleaned up");
|
|
return 0;
|
|
}
|
|
|
|
if (vmr->start == att->at_vaddr &&
|
|
((vmr->end - vmr->start) == att->at_size)) {
|
|
att->flags |= XPMEM_FLAG_DESTROYING;
|
|
|
|
ap = att->ap;
|
|
xpmem_ap_ref(ap);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&ap->lock);
|
|
list_del_init(&att->att_list);
|
|
ihk_mc_spinlock_unlock_noirq(&ap->lock);
|
|
|
|
xpmem_ap_deref(ap);
|
|
|
|
xpmem_att_destroyable(att);
|
|
goto out;
|
|
}
|
|
|
|
if (vmr->start == att->at_vaddr) {
|
|
remaining_vaddr = vmr->end;
|
|
}
|
|
else if (vmr->end == att->at_vaddr + att->at_size) {
|
|
remaining_vaddr = att->at_vaddr;
|
|
}
|
|
else {
|
|
remaining_vaddr = vmr->end;
|
|
remaining_vmr = lookup_process_memory_range(
|
|
cpu_local_var(current)->vm, remaining_vaddr - 1,
|
|
remaining_vaddr);
|
|
if (!remaining_vmr) {
|
|
ekprintf("%s: ERROR: vm_range is NULL\n", __FUNCTION__);
|
|
goto out;
|
|
}
|
|
else if (remaining_vmr->start > remaining_vaddr ||
|
|
remaining_vmr->private_data != vmr->private_data) {
|
|
ekprintf("%s: ERROR: vm_range: 0x%lx - 0x%lx\n",
|
|
__FUNCTION__, vmr->start, vmr->end);
|
|
goto out;
|
|
}
|
|
|
|
remaining_vmr->private_data = NULL;
|
|
/* This function is always followed by xpmem_free_process_memory_range()
|
|
which in turn calls memobj_release() */
|
|
remaining_vaddr = att->at_vaddr;
|
|
}
|
|
|
|
remaining_vmr = lookup_process_memory_range(
|
|
cpu_local_var(current)->vm, remaining_vaddr,
|
|
remaining_vaddr + 1);
|
|
if (!remaining_vmr) {
|
|
ekprintf("%s: ERROR: vm_range is NULL\n", __FUNCTION__);
|
|
goto out;
|
|
}
|
|
else if (remaining_vmr->start > remaining_vaddr ||
|
|
remaining_vmr->private_data != vmr->private_data) {
|
|
ekprintf("%s: ERROR: vm_range: 0x%lx - 0x%lx\n",
|
|
__FUNCTION__, vmr->start, vmr->end);
|
|
goto out;
|
|
}
|
|
|
|
att->at_vaddr = remaining_vmr->start;
|
|
att->at_size = remaining_vmr->end - remaining_vmr->start;
|
|
|
|
vmr->private_data = NULL;
|
|
/* This function is always followed by [xpmem_]free_process_memory_range()
|
|
which in turn calls memobj_release() */
|
|
|
|
out:
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
|
|
ihk_mc_spinlock_unlock_noirq(
|
|
&cpu_local_var(current)->vm->memory_range_lock);
|
|
|
|
xpmem_att_deref(att);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int xpmem_fault_process_memory_range(
|
|
struct process_vm *vm,
|
|
struct vm_range *vmr,
|
|
unsigned long vaddr,
|
|
uint64_t reason)
|
|
{
|
|
int ret = 0;
|
|
unsigned long seg_vaddr = 0;
|
|
pte_t *pte = NULL;
|
|
pte_t *old_pte = NULL;
|
|
struct xpmem_thread_group *ap_tg;
|
|
struct xpmem_thread_group *seg_tg;
|
|
struct xpmem_access_permit *ap;
|
|
struct xpmem_attachment *att;
|
|
struct xpmem_segment *seg;
|
|
size_t pgsize;
|
|
struct mcs_rwlock_node_irqsave at_lock;
|
|
int att_locked = 0;
|
|
|
|
XPMEM_DEBUG("call: vmr=0x%p, vaddr=0x%lx, reason=0x%lx",
|
|
vmr, vaddr, reason);
|
|
|
|
att = (struct xpmem_attachment *)vmr->private_data;
|
|
if (att == NULL) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
xpmem_att_ref(att);
|
|
ap = att->ap;
|
|
xpmem_ap_ref(ap);
|
|
ap_tg = ap->tg;
|
|
xpmem_tg_ref(ap_tg);
|
|
if ((ap->flags & XPMEM_FLAG_DESTROYING) ||
|
|
(ap_tg->flags & XPMEM_FLAG_DESTROYING)) {
|
|
xpmem_att_deref(att);
|
|
xpmem_ap_deref(ap);
|
|
xpmem_tg_deref(ap_tg);
|
|
return -EFAULT;
|
|
}
|
|
DBUG_ON(cpu_local_var(current)->proc->pid != ap_tg->tgid);
|
|
DBUG_ON(ap->mode != XPMEM_RDWR);
|
|
|
|
seg = ap->seg;
|
|
xpmem_seg_ref(seg);
|
|
seg_tg = seg->tg;
|
|
xpmem_tg_ref(seg_tg);
|
|
|
|
if ((seg->flags & XPMEM_FLAG_DESTROYING) ||
|
|
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
|
|
ret = -ENOENT;
|
|
goto out_2;
|
|
}
|
|
|
|
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
|
|
att_locked = 1;
|
|
|
|
if ((att->flags & XPMEM_FLAG_DESTROYING) ||
|
|
(ap_tg->flags & XPMEM_FLAG_DESTROYING) ||
|
|
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
|
|
goto out_2;
|
|
}
|
|
|
|
if (vaddr < att->at_vaddr || vaddr + 1 > att->at_vaddr + att->at_size) {
|
|
goto out_2;
|
|
}
|
|
|
|
seg_vaddr = (att->vaddr & PAGE_MASK) + (vaddr - att->at_vaddr);
|
|
XPMEM_DEBUG("vaddr=%lx, seg_vaddr=%lx", vaddr, seg_vaddr);
|
|
|
|
ret = xpmem_ensure_valid_page(seg, seg_vaddr);
|
|
if (ret != 0) {
|
|
goto out_2;
|
|
}
|
|
|
|
pte = xpmem_vaddr_to_pte(seg_tg->vm, seg_vaddr, &pgsize);
|
|
|
|
att->flags |= XPMEM_FLAG_VALIDPTEs;
|
|
|
|
out_2:
|
|
xpmem_ap_deref(ap);
|
|
xpmem_tg_deref(ap_tg);
|
|
|
|
if (pte && !pte_is_null(pte)) {
|
|
old_pte = xpmem_vaddr_to_pte(cpu_local_var(current)->vm, vaddr,
|
|
&pgsize);
|
|
if (old_pte && !pte_is_null(old_pte)) {
|
|
if (*old_pte != *pte) {
|
|
ret = -EFAULT;
|
|
ekprintf("%s: ERROR: pte mismatch: "
|
|
"0x%lx != 0x%lx\n",
|
|
__FUNCTION__, *old_pte, *pte);
|
|
}
|
|
|
|
ihk_atomic_dec(&seg->tg->n_pinned);
|
|
goto out_1;
|
|
}
|
|
|
|
ret = xpmem_remap_pte(vm, vmr, vaddr, reason, seg, seg_vaddr);
|
|
if (ret) {
|
|
ekprintf("%s: ERROR: xpmem_remap_pte() failed %d\n",
|
|
__FUNCTION__, ret);
|
|
}
|
|
}
|
|
|
|
flush_tlb_single(vaddr);
|
|
|
|
out_1:
|
|
if (att_locked) {
|
|
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
|
}
|
|
|
|
xpmem_tg_deref(seg_tg);
|
|
xpmem_seg_deref(seg);
|
|
xpmem_att_deref(att);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int xpmem_remap_pte(
|
|
struct process_vm *vm,
|
|
struct vm_range *vmr,
|
|
unsigned long vaddr,
|
|
uint64_t reason,
|
|
struct xpmem_segment *seg,
|
|
unsigned long seg_vaddr)
|
|
{
|
|
int ret;
|
|
struct xpmem_thread_group *seg_tg = seg->tg;
|
|
struct vm_range *seg_vmr;
|
|
pte_t *seg_pte;
|
|
void *seg_pgaddr;
|
|
size_t seg_pgsize;
|
|
int seg_p2align;
|
|
uintptr_t seg_phys;
|
|
pte_t *att_pte;
|
|
void *att_pgaddr;
|
|
size_t att_pgsize;
|
|
int att_p2align;
|
|
enum ihk_mc_pt_attribute att_attr;
|
|
|
|
XPMEM_DEBUG("call: vmr=0x%p, vaddr=0x%lx, reason=0x%lx, segid=0x%lx, "
|
|
"seg_vaddr=0x%lx",
|
|
vmr, vaddr, reason, seg->segid, seg_vaddr);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&seg_tg->vm->memory_range_lock);
|
|
|
|
seg_vmr = lookup_process_memory_range(seg_tg->vm, seg_vaddr,
|
|
seg_vaddr + 1);
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&seg_tg->vm->memory_range_lock);
|
|
|
|
if (!seg_vmr) {
|
|
ret = -EFAULT;
|
|
ekprintf("%s: ERROR: lookup_process_memory_range() failed\n",
|
|
__FUNCTION__);
|
|
goto out;
|
|
}
|
|
|
|
seg_pte = ihk_mc_pt_lookup_pte(seg_tg->vm->address_space->page_table,
|
|
(void *)seg_vaddr, seg_vmr->pgshift, &seg_pgaddr, &seg_pgsize,
|
|
&seg_p2align);
|
|
if (!seg_pte) {
|
|
ret = -EFAULT;
|
|
ekprintf("%s: ERROR: ihk_mc_pt_lookup_pte() failed\n",
|
|
__FUNCTION__);
|
|
goto out;
|
|
}
|
|
XPMEM_DEBUG("seg_pte=0x%016lx, seg_pgaddr=0x%p, seg_pgsize=%lu, "
|
|
"seg_p2align=%d",
|
|
*seg_pte, seg_pgaddr, seg_pgsize, seg_p2align);
|
|
|
|
seg_phys = pte_get_phys(seg_pte);
|
|
XPMEM_DEBUG("seg_phys=0x%lx", seg_phys);
|
|
|
|
att_pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
|
(void *)vaddr, vmr->pgshift, &att_pgaddr, &att_pgsize,
|
|
&att_p2align);
|
|
XPMEM_DEBUG("att_pte=%p, att_pgaddr=0x%p, att_pgsize=%lu, "
|
|
"att_p2align=%d",
|
|
att_pte, att_pgaddr, att_pgsize, att_p2align);
|
|
|
|
att_attr = arch_vrflag_to_ptattr(vmr->flag, reason, att_pte);
|
|
XPMEM_DEBUG("att_attr=0x%lx", att_attr);
|
|
|
|
if (att_pte) {
|
|
ret = ihk_mc_pt_set_pte(vm->address_space->page_table, att_pte,
|
|
att_pgsize, seg_phys, att_attr);
|
|
if (ret) {
|
|
ret = -EFAULT;
|
|
ekprintf("%s: ERROR: ihk_mc_pt_set_pte() failed %d\n",
|
|
__FUNCTION__, ret);
|
|
goto out;
|
|
}
|
|
// memory_stat_rss_add() is called by the process hosting the memory area
|
|
}
|
|
else {
|
|
ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
|
|
att_pgaddr, att_pgaddr + att_pgsize, seg_phys, att_attr,
|
|
vmr->pgshift, vmr);
|
|
if (ret) {
|
|
ret = -EFAULT;
|
|
ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n",
|
|
__FUNCTION__, ret);
|
|
goto out;
|
|
}
|
|
// memory_stat_rss_add() is called by the process hosting the memory area
|
|
}
|
|
|
|
out:
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int xpmem_ensure_valid_page(
|
|
struct xpmem_segment *seg,
|
|
unsigned long vaddr)
|
|
{
|
|
int ret;
|
|
struct xpmem_thread_group *seg_tg = seg->tg;
|
|
|
|
XPMEM_DEBUG("call: segid=0x%lx, vaddr=0x%lx", seg->segid, vaddr);
|
|
|
|
if (seg->flags & XPMEM_FLAG_DESTROYING)
|
|
return -ENOENT;
|
|
|
|
ret = xpmem_pin_page(seg_tg, seg_tg->group_leader, seg_tg->vm, vaddr);
|
|
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static pte_t * xpmem_vaddr_to_pte(
|
|
struct process_vm *vm,
|
|
unsigned long vaddr,
|
|
size_t *pgsize)
|
|
{
|
|
pte_t *pte = NULL;
|
|
struct vm_range *range;
|
|
int pgshift;
|
|
void *base;
|
|
size_t size;
|
|
int p2align;
|
|
|
|
range = lookup_process_memory_range(vm, vaddr, vaddr + 1);
|
|
if (range) {
|
|
pgshift = range->pgshift;
|
|
}
|
|
else {
|
|
goto out;
|
|
}
|
|
|
|
pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
|
(void *)vaddr, pgshift, &base, &size, &p2align);
|
|
if (pte) {
|
|
*pgsize = size;
|
|
}
|
|
else {
|
|
*pgsize = PAGE_SIZE;
|
|
}
|
|
|
|
out:
|
|
|
|
return pte;
|
|
}
|
|
|
|
|
|
static int xpmem_pin_page(
|
|
struct xpmem_thread_group *tg,
|
|
struct thread *src_thread,
|
|
struct process_vm *src_vm,
|
|
unsigned long vaddr)
|
|
{
|
|
int ret;
|
|
struct vm_range *range;
|
|
|
|
XPMEM_DEBUG("call: tgid=%d, vaddr=0x%lx", tg->tgid, vaddr);
|
|
|
|
ihk_mc_spinlock_lock_noirq(&src_vm->memory_range_lock);
|
|
|
|
range = lookup_process_memory_range(src_vm, vaddr, vaddr + 1);
|
|
|
|
ihk_mc_spinlock_unlock_noirq(&src_vm->memory_range_lock);
|
|
|
|
if (!range || range->start > vaddr) {
|
|
return -ENOENT;
|
|
}
|
|
|
|
if (xpmem_is_private_data(range)) {
|
|
return -ENOENT;
|
|
}
|
|
|
|
ret = page_fault_process_vm(src_vm, (void *)vaddr,
|
|
PF_POPULATE | PF_WRITE | PF_USER);
|
|
if (!ret) {
|
|
ihk_atomic_inc(&tg->n_pinned);
|
|
}
|
|
else {
|
|
return -ENOENT;
|
|
}
|
|
|
|
XPMEM_DEBUG("return: ret=%d", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static void xpmem_unpin_pages(
|
|
struct xpmem_segment *seg,
|
|
struct process_vm *vm,
|
|
unsigned long vaddr,
|
|
size_t size)
|
|
{
|
|
int n_pgs = (((offset_in_page(vaddr) + (size)) + (PAGE_SIZE - 1)) >>
|
|
PAGE_SHIFT);
|
|
int n_pgs_unpinned = 0;
|
|
size_t vsize = 0;
|
|
pte_t *pte = NULL;
|
|
|
|
XPMEM_DEBUG("call: segid=0x%lx, vaddr=0x%lx, size=0x%lx",
|
|
seg->segid, vaddr, size);
|
|
|
|
XPMEM_DEBUG("n_pgs=%d", n_pgs);
|
|
|
|
vaddr &= PAGE_MASK;
|
|
|
|
while (n_pgs > 0) {
|
|
pte = xpmem_vaddr_to_pte(vm, vaddr, &vsize);
|
|
if (pte && !pte_is_null(pte)) {
|
|
n_pgs_unpinned++;
|
|
vaddr += PAGE_SIZE;
|
|
n_pgs--;
|
|
}
|
|
else {
|
|
vsize = ((vaddr + vsize) & (~(vsize - 1)));
|
|
n_pgs -= (vsize - vaddr) / PAGE_SIZE;
|
|
vaddr = vsize;
|
|
}
|
|
}
|
|
|
|
XPMEM_DEBUG("sub: tg->n_pinned=%d, n_pgs_unpinned=%d",
|
|
seg->tg->n_pinned, n_pgs_unpinned);
|
|
ihk_atomic_sub(n_pgs_unpinned, &seg->tg->n_pinned);
|
|
|
|
XPMEM_DEBUG("return: ");
|
|
}
|
|
|
|
|
|
static struct xpmem_thread_group * __xpmem_tg_ref_by_tgid_nolock_internal(
|
|
pid_t tgid,
|
|
int index,
|
|
int return_destroying)
|
|
{
|
|
struct xpmem_thread_group *tg;
|
|
|
|
list_for_each_entry(tg, &xpmem_my_part->tg_hashtable[index].list,
|
|
tg_hashlist) {
|
|
if (tg->tgid == tgid) {
|
|
if ((tg->flags & XPMEM_FLAG_DESTROYING) &&
|
|
!return_destroying) {
|
|
continue;
|
|
}
|
|
|
|
xpmem_tg_ref(tg);
|
|
|
|
return tg;
|
|
}
|
|
}
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
}
|
|
|
|
|
|
static struct xpmem_thread_group * xpmem_tg_ref_by_segid(
|
|
xpmem_segid_t segid)
|
|
{
|
|
struct xpmem_thread_group *tg;
|
|
|
|
tg = xpmem_tg_ref_by_tgid(xpmem_segid_to_tgid(segid));
|
|
|
|
return tg;
|
|
}
|
|
|
|
|
|
static struct xpmem_thread_group * xpmem_tg_ref_by_apid(
|
|
xpmem_apid_t apid)
|
|
{
|
|
struct xpmem_thread_group *tg;
|
|
|
|
tg = xpmem_tg_ref_by_tgid(xpmem_apid_to_tgid(apid));
|
|
|
|
return tg;
|
|
}
|
|
|
|
|
|
static void xpmem_tg_deref(
|
|
struct xpmem_thread_group *tg)
|
|
{
|
|
DBUG_ON(ihk_atomic_read(&tg->refcnt) <= 0);
|
|
if (ihk_atomic_dec_return(&tg->refcnt) != 0) {
|
|
XPMEM_DEBUG("return: tg->refcnt=%d, tg->n_pinned=%d",
|
|
tg->refcnt, tg->n_pinned);
|
|
return;
|
|
}
|
|
|
|
XPMEM_DEBUG("kfree(): tg=0x%p", tg);
|
|
kfree(tg);
|
|
}
|
|
|
|
|
|
static struct xpmem_segment * xpmem_seg_ref_by_segid(
|
|
struct xpmem_thread_group *seg_tg,
|
|
xpmem_segid_t segid)
|
|
{
|
|
struct xpmem_segment *seg;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
mcs_rwlock_reader_lock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
list_for_each_entry(seg, &seg_tg->seg_list, seg_list) {
|
|
if (seg->segid == segid) {
|
|
if (seg->flags & XPMEM_FLAG_DESTROYING)
|
|
continue;
|
|
|
|
xpmem_seg_ref(seg);
|
|
mcs_rwlock_reader_unlock(&seg_tg->seg_list_lock, &lock);
|
|
return seg;
|
|
}
|
|
}
|
|
|
|
mcs_rwlock_reader_unlock(&seg_tg->seg_list_lock, &lock);
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
}
|
|
|
|
|
|
static void xpmem_seg_deref(
|
|
struct xpmem_segment *seg)
|
|
{
|
|
DBUG_ON(ihk_atomic_read(&seg->refcnt) <= 0);
|
|
if (ihk_atomic_dec_return(&seg->refcnt) != 0) {
|
|
XPMEM_DEBUG("return: seg->refcnt=%d", seg->refcnt);
|
|
return;
|
|
}
|
|
|
|
DBUG_ON(!(seg->flags & XPMEM_FLAG_DESTROYING));
|
|
|
|
XPMEM_DEBUG("kfree(): seg=0x%p", seg);
|
|
kfree(seg);
|
|
}
|
|
|
|
|
|
static struct xpmem_access_permit * xpmem_ap_ref_by_apid(
|
|
struct xpmem_thread_group *ap_tg,
|
|
xpmem_apid_t apid)
|
|
{
|
|
int index;
|
|
struct xpmem_access_permit *ap;
|
|
struct mcs_rwlock_node_irqsave lock;
|
|
|
|
index = xpmem_ap_hashtable_index(apid);
|
|
mcs_rwlock_reader_lock(&ap_tg->ap_hashtable[index].lock, &lock);
|
|
|
|
list_for_each_entry(ap, &ap_tg->ap_hashtable[index].list,
|
|
ap_hashlist) {
|
|
if (ap->apid == apid) {
|
|
if (ap->flags & XPMEM_FLAG_DESTROYING) {
|
|
break;
|
|
}
|
|
|
|
xpmem_ap_ref(ap);
|
|
mcs_rwlock_reader_unlock(
|
|
&ap_tg->ap_hashtable[index].lock, &lock);
|
|
return ap;
|
|
}
|
|
}
|
|
|
|
mcs_rwlock_reader_unlock(&ap_tg->ap_hashtable[index].lock, &lock);
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
}
|
|
|
|
|
|
static void xpmem_ap_deref(
|
|
struct xpmem_access_permit *ap)
|
|
{
|
|
DBUG_ON(ihk_atomic_read(&ap->refcnt) <= 0);
|
|
if (ihk_atomic_dec_return(&ap->refcnt) != 0) {
|
|
XPMEM_DEBUG("return: ap->refcnt=%d", ap->refcnt);
|
|
return;
|
|
}
|
|
|
|
DBUG_ON(!(ap->flags & XPMEM_FLAG_DESTROYING));
|
|
|
|
XPMEM_DEBUG("kfree(): ap=0x%p", ap);
|
|
kfree(ap);
|
|
}
|
|
|
|
|
|
static void xpmem_att_deref(
|
|
struct xpmem_attachment *att)
|
|
{
|
|
DBUG_ON(ihk_atomic_read(&att->refcnt) <= 0);
|
|
if (ihk_atomic_dec_return(&att->refcnt) != 0) {
|
|
XPMEM_DEBUG("return: att->refcnt=%d", att->refcnt);
|
|
return;
|
|
}
|
|
|
|
DBUG_ON(!(att->flags & XPMEM_FLAG_DESTROYING));
|
|
|
|
XPMEM_DEBUG("kfree(): att=0x%p", att);
|
|
kfree(att);
|
|
}
|
|
|
|
|
|
static int xpmem_validate_access(
|
|
struct xpmem_access_permit *ap,
|
|
off_t offset,
|
|
size_t size,
|
|
int mode,
|
|
unsigned long *vaddr)
|
|
{
|
|
XPMEM_DEBUG("call: apid=0x%lx, offset=0x%lx, size=0x%lx, mode=%d",
|
|
ap->apid, offset, size, mode);
|
|
|
|
if (cpu_local_var(current)->proc->pid != ap->tg->tgid ||
|
|
(mode == XPMEM_RDWR && ap->mode == XPMEM_RDONLY)) {
|
|
return -EACCES;
|
|
}
|
|
|
|
if (offset < 0 || size == 0 || offset + size > ap->seg->size) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
*vaddr = ap->seg->vaddr + offset;
|
|
|
|
XPMEM_DEBUG("return: ret=%d, vaddr=0x%lx", 0, *vaddr);
|
|
|
|
return 0;
|
|
}
|
|
|