From c38d536aaa4fb5f8e5096cfe72f3989e33046605 Mon Sep 17 00:00:00 2001 From: Yoichi Umezawa Date: Wed, 29 Mar 2017 18:20:53 +0900 Subject: [PATCH] xpmem: porting xpmem v2.6.3 implement xpmem_get, xpmem_release, xpmem_attach, xpmem_detach --- arch/x86/kernel/memory.c | 2 +- executer/user/mcexec.c | 18 + kernel/include/process.h | 1 + kernel/include/xpmem.h | 5 + kernel/include/xpmem_private.h | 130 ++- kernel/process.c | 28 +- kernel/xpmem.c | 1682 ++++++++++++++++++++++++++++++-- 7 files changed, 1770 insertions(+), 96 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 74e5d95e..d03addea 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -1101,7 +1101,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, page = phys_to_page(phys); } - if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && + if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) && !(args->memobj->flags & MF_ZEROFILL)) { memobj_flush_page(args->memobj, phys, PTL1_SIZE); } diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 257cad64..b60de305 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -2636,6 +2636,24 @@ return_execve2: do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; + case __NR_stat: + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + + fn = chgpath(pathbuf, tmpbuf); + + ret = stat(fn, (struct stat *)w.sr.args[1]); + __dprintf("stat: path=%s, ret=%ld\n", fn, ret); + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + default: ret = do_generic_syscall(&w); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); diff --git a/kernel/include/process.h b/kernel/include/process.h index b80287e7..a4c4f8d3 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -389,6 +389,7 @@ struct vm_range { off_t objoff; int pgshift; /* page size. 0 means THP */ int padding; + void *private_data; }; struct vm_range_numa_policy { diff --git a/kernel/include/xpmem.h b/kernel/include/xpmem.h index e8f5f8b2..e69d233a 100644 --- a/kernel/include/xpmem.h +++ b/kernel/include/xpmem.h @@ -11,11 +11,16 @@ #ifndef _XPMEM_H #define _XPMEM_H +#include #include #define XPMEM_DEV_PATH "/dev/xpmem" extern int xpmem_open(ihk_mc_user_context_t *ctx); +extern int xpmem_remove_process_memory_range(struct process_vm *vm, + struct vm_range *vmr); +extern int xpmem_fault_process_memory_range(struct process_vm *vm, + struct vm_range *vmr, unsigned long vaddr, uint64_t reason); #endif /* _XPMEM_H */ diff --git a/kernel/include/xpmem_private.h b/kernel/include/xpmem_private.h index 279e5ba3..cfc89e79 100644 --- a/kernel/include/xpmem_private.h +++ b/kernel/include/xpmem_private.h @@ -160,7 +160,7 @@ static inline int xpmem_ap_hashtable_index(xpmem_apid_t apid) index = ((xpmem_id_t *)&apid)->xpmem_id.uniq % XPMEM_AP_HASHTABLE_SIZE; - XPMEM_DEBUG("return: apid=%lu, index=%d", apid, index); + XPMEM_DEBUG("return: apid=0x%lx, index=%d", apid, index); return index; } @@ -174,22 +174,20 @@ struct xpmem_thread_group { uid_t uid; /* tg's uid */ gid_t gid; /* tg's gid */ volatile int flags; /* tg attributes and state */ - ihk_atomic_t uniq_segid; - ihk_atomic_t uniq_apid; - mcs_rwlock_lock_t seg_list_lock; + ihk_atomic_t uniq_segid; /* segid uniq */ + ihk_atomic_t uniq_apid; /* apid uniq */ + mcs_rwlock_lock_t seg_list_lock; /* tg's list of segs lock */ struct list_head seg_list; /* tg's list of segs */ ihk_atomic_t refcnt; /* references to tg */ ihk_atomic_t n_pinned; /* #of pages pinned by this tg */ struct list_head tg_hashlist; /* tg hash list */ struct thread *group_leader; /* thread group leader */ - struct process_vm *vm; /* tg's mm */ - ihk_atomic_t n_recall_PFNs; /* #of recall of PFNs in progress */ + struct process_vm *vm; /* tg's process_vm */ struct xpmem_hashlist ap_hashtable[]; /* locks + ap hash lists */ }; struct xpmem_segment { ihk_spinlock_t lock; /* seg lock */ - mcs_rwlock_lock_t seg_lock; /* seg sema */ xpmem_segid_t segid; /* unique segid */ unsigned long vaddr; /* starting address */ size_t size; /* size of seg */ @@ -216,18 +214,16 @@ struct xpmem_access_permit { }; struct xpmem_attachment { - mcs_rwlock_lock_t at_lock; /* att lock for serialization */ - struct mcs_rwlock_node_irqsave at_irqsave; /* att lock for serialization */ + mcs_rwlock_lock_t at_lock; /* att lock */ unsigned long vaddr; /* starting address of seg attached */ unsigned long at_vaddr; /* address where seg is attached */ size_t at_size; /* size of seg attachment */ - struct vm_range *at_vma; /* vma where seg is attachment */ + struct vm_range *at_vmr; /* vm_range where seg is attachment */ volatile int flags; /* att attributes and state */ ihk_atomic_t refcnt; /* references to att */ struct xpmem_access_permit *ap; /* associated access permit */ struct list_head att_list; /* atts linked to access permit */ - struct process_vm *vm; /* mm struct attached to */ - mcs_rwlock_lock_t invalidate_lock; /* to serialize page table invalidates */ + struct process_vm *vm; /* process_vm attached to */ }; struct xpmem_partition { @@ -249,8 +245,10 @@ struct xpmem_perm { #define XPMEM_PERM_IRUSR 00400 #define XPMEM_PERM_IWUSR 00200 +extern struct xpmem_partition *xpmem_my_part; + static int xpmem_ioctl(struct mckfd *mckfd, ihk_mc_user_context_t *ctx); -static int xpmem_close( struct mckfd *mckfd, ihk_mc_user_context_t *ctx); +static int xpmem_close(struct mckfd *mckfd, ihk_mc_user_context_t *ctx); static int xpmem_init(void); static void xpmem_exit(void); @@ -263,10 +261,47 @@ static xpmem_segid_t xpmem_make_segid(struct xpmem_thread_group *); static int xpmem_remove(xpmem_segid_t); static void xpmem_remove_seg(struct xpmem_thread_group *, struct xpmem_segment *); +static void xpmem_remove_segs_of_tg(struct xpmem_thread_group *seg_tg); +static int xpmem_get(xpmem_segid_t, int, int, void *, xpmem_apid_t *); +static int xpmem_check_permit_mode(int, struct xpmem_segment *); +static int xpmem_perms(struct xpmem_perm *, short); +static xpmem_apid_t xpmem_make_apid(struct xpmem_thread_group *); + +static int xpmem_release(xpmem_apid_t); +static void xpmem_release_ap(struct xpmem_thread_group *, + struct xpmem_access_permit *); +static void xpmem_release_aps_of_tg(struct xpmem_thread_group *ap_tg); + +static int xpmem_attach(struct mckfd *, xpmem_apid_t, off_t, size_t, + unsigned long, int, int, unsigned long *); + +static int xpmem_detach(unsigned long); +static int xpmem_vm_munmap(struct process_vm *vm, void *addr, size_t len); +static int xpmem_remove_process_range(struct process_vm *vm, + unsigned long start, unsigned long end, int *ro_freedp); +static int xpmem_free_process_memory_range(struct process_vm *vm, + struct vm_range *range); +static void xpmem_detach_att(struct xpmem_access_permit *, + struct xpmem_attachment *); static void xpmem_clear_PTEs(struct xpmem_segment *); +static void xpmem_clear_PTEs_range(struct xpmem_segment *, unsigned long, + unsigned long); +static void xpmem_clear_PTEs_of_ap(struct xpmem_access_permit *, unsigned long, + unsigned long); +static void xpmem_clear_PTEs_of_att(struct xpmem_attachment *, unsigned long, + unsigned long); -extern struct xpmem_partition *xpmem_my_part; +static int xpmem_remap_pte(struct process_vm *, struct vm_range *, + unsigned long, uint64_t, struct xpmem_segment *, unsigned long); + +static int xpmem_ensure_valid_page(struct xpmem_segment *, unsigned long); +static pte_t * xpmem_vaddr_to_pte(struct process_vm *, unsigned long, + size_t *pgsize); +static int xpmem_pin_page(struct xpmem_thread_group *, struct thread *, + struct process_vm *, unsigned long); +static void xpmem_unpin_pages(struct xpmem_segment *, struct process_vm *, + unsigned long, size_t); static struct xpmem_thread_group * __xpmem_tg_ref_by_tgid_nolock_internal( pid_t, int, int); @@ -317,10 +352,17 @@ static inline struct xpmem_thread_group *__xpmem_tg_ref_by_tgid_nolock( #define xpmem_tg_ref_by_tgid_all_nolock(t) __xpmem_tg_ref_by_tgid_nolock(t, 1) static struct xpmem_thread_group * xpmem_tg_ref_by_segid(xpmem_segid_t); +static struct xpmem_thread_group * xpmem_tg_ref_by_apid(xpmem_apid_t); static void xpmem_tg_deref(struct xpmem_thread_group *); static struct xpmem_segment *xpmem_seg_ref_by_segid(struct xpmem_thread_group *, xpmem_segid_t); static void xpmem_seg_deref(struct xpmem_segment *); +static struct xpmem_access_permit * xpmem_ap_ref_by_apid( + struct xpmem_thread_group *, xpmem_apid_t); +static void xpmem_ap_deref(struct xpmem_access_permit *); +static void xpmem_att_deref(struct xpmem_attachment *); +static int xpmem_validate_access(struct xpmem_access_permit *, off_t, size_t, + int, unsigned long *); /* * Inlines that mark an internal driver structure as being destroyable or not. @@ -363,6 +405,42 @@ static inline void xpmem_seg_destroyable( XPMEM_DEBUG("return: "); } +static inline void xpmem_ap_not_destroyable( + struct xpmem_access_permit *ap) +{ + ihk_atomic_set(&ap->refcnt, 1); + + XPMEM_DEBUG("return: ap->refcnt=%d", ap->refcnt); +} + +static inline void xpmem_ap_destroyable( + struct xpmem_access_permit *ap) +{ + XPMEM_DEBUG("call: "); + + xpmem_ap_deref(ap); + + XPMEM_DEBUG("return: "); +} + +static inline void xpmem_att_not_destroyable( + struct xpmem_attachment *att) +{ + ihk_atomic_set(&att->refcnt, 1); + + XPMEM_DEBUG("return: att->refcnt=%d", att->refcnt); +} + +static inline void xpmem_att_destroyable( + struct xpmem_attachment *att) +{ + XPMEM_DEBUG("call: "); + + xpmem_att_deref(att); + + XPMEM_DEBUG("return: "); +} + /* * Inlines that increment the refcnt for the specified structure. */ @@ -384,5 +462,29 @@ static inline void xpmem_seg_ref( XPMEM_DEBUG("return: seg->refcnt=%d", seg->refcnt); } +static inline void xpmem_ap_ref( + struct xpmem_access_permit *ap) +{ + DBUG_ON(ihk_atomic_read(&ap->refcnt) <= 0); + ihk_atomic_inc(&ap->refcnt); + + XPMEM_DEBUG("return: ap->refcnt=%d", ap->refcnt); +} + +static inline void xpmem_att_ref( + struct xpmem_attachment *att) +{ + DBUG_ON(ihk_atomic_read(&att->refcnt) <= 0); + ihk_atomic_inc(&att->refcnt); + + XPMEM_DEBUG("return: att->refcnt=%d", att->refcnt); +} + +static inline int xpmem_is_private_data( + struct vm_range *vmr) +{ + return (vmr->private_data != NULL); +} + #endif /* _XPMEM_PRIVATE_H */ diff --git a/kernel/process.c b/kernel/process.c index 7607b8d5..93563b4c 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -31,6 +31,7 @@ #include #include #include +#include //#define DEBUG_PRINT_PROCESS @@ -637,6 +638,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm) range->memobj = src_range->memobj; range->objoff = src_range->objoff; range->pgshift = src_range->pgshift; + range->private_data = src_range->private_data; if (range->memobj) { memobj_ref(range->memobj); } @@ -734,6 +736,7 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range, newrange->end = range->end; newrange->flag = range->flag; newrange->pgshift = range->pgshift; + newrange->private_data = range->private_data; if (range->memobj) { memobj_ref(range->memobj); @@ -953,6 +956,10 @@ int remove_process_memory_range(struct process_vm *vm, ro_freed = 1; } + if (freerange->private_data) { + xpmem_remove_process_memory_range(vm, freerange); + } + error = free_process_memory_range(vm, freerange); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" @@ -1058,6 +1065,7 @@ int add_process_memory_range(struct process_vm *vm, range->memobj = memobj; range->objoff = offset; range->pgshift = pgshift; + range->private_data = NULL; rc = 0; if (phys == NOPHYS) { @@ -1793,7 +1801,12 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui } } - error = page_fault_process_memory_range(vm, range, fault_addr, reason); + if (!range->private_data) { + error = page_fault_process_memory_range(vm, range, fault_addr, reason); + } + else { + error = xpmem_fault_process_memory_range(vm, range, fault_addr, reason); + } if (error == -ERESTART) { goto out; } @@ -2209,6 +2222,19 @@ release_process_vm(struct process_vm *vm) return; } + { + long irqstate; + struct mckfd *fdp; + + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for (fdp = proc->mckfd; fdp; fdp = fdp->next) { + if (fdp->close_cb) { + fdp->close_cb(fdp, NULL); + } + } + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + } + if(vm->free_cb) vm->free_cb(vm, vm->opt); diff --git a/kernel/xpmem.c b/kernel/xpmem.c index 129aaae0..1093261a 100644 --- a/kernel/xpmem.c +++ b/kernel/xpmem.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -59,7 +60,7 @@ int xpmem_open( request.args[0] = (unsigned long)pathname; request.args[1] = flags; fd = do_syscall(&request, ihk_mc_get_processor_id(), 0); - if(fd < 0){ + if (fd < 0) { XPMEM_DEBUG("__NR_open error: fd=%d", fd); return fd; } @@ -80,12 +81,14 @@ int xpmem_open( mckfd->sig_no = -1; mckfd->ioctl_cb = xpmem_ioctl; mckfd->close_cb = xpmem_close; + mckfd->data = (long)proc; irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); - if(proc->mckfd == NULL) { + if (proc->mckfd == NULL) { proc->mckfd = mckfd; mckfd->next = NULL; - } else { + } + else { mckfd->next = proc->mckfd; proc->mckfd = mckfd; } @@ -93,6 +96,7 @@ int xpmem_open( ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); ihk_atomic_inc_return(&xpmem_my_part->n_opened); + XPMEM_DEBUG("n_opened=%d", xpmem_my_part->n_opened); XPMEM_DEBUG("return: ret=%d", mckfd->fd); @@ -159,26 +163,25 @@ static int xpmem_ioctl( } case XPMEM_CMD_GET: { struct xpmem_cmd_get get_info; -// xpmem_apid_t apid = 0; + xpmem_apid_t apid = 0; if (copy_from_user(&get_info, (void __user *)arg, sizeof(struct xpmem_cmd_get))) return -EFAULT; -// ret = xpmem_get(get_info.segid, get_info.flags, -// get_info.permit_type, -// (void *)get_info.permit_value, &apid); // TODO - ret = -EINVAL; + ret = xpmem_get(get_info.segid, get_info.flags, + get_info.permit_type, + (void *)get_info.permit_value, &apid); if (ret != 0) { XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret); return ret; } -// if (copy_to_user(&((struct xpmem_cmd_get __user *)arg)->apid, -// (void *)&apid, sizeof(xpmem_apid_t))) { -// (void)xpmem_release(apid); -// return -EFAULT; -// } + if (copy_to_user(&((struct xpmem_cmd_get __user *)arg)->apid, + (void *)&apid, sizeof(xpmem_apid_t))) { + (void)xpmem_release(apid); + return -EFAULT; + } XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret); @@ -191,8 +194,7 @@ static int xpmem_ioctl( sizeof(struct xpmem_cmd_release))) return -EFAULT; -// ret = xpmem_release(release_info.apid); // TODO - ret = -EINVAL; + ret = xpmem_release(release_info.apid); XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret); @@ -200,28 +202,27 @@ static int xpmem_ioctl( } case XPMEM_CMD_ATTACH: { struct xpmem_cmd_attach attach_info; -// unsigned long at_vaddr = 0; + unsigned long at_vaddr = 0; if (copy_from_user(&attach_info, (void __user *)arg, sizeof(struct xpmem_cmd_attach))) return -EFAULT; -// ret = xpmem_attach(mckfd, attach_info.apid, attach_info.offset, -// attach_info.size, attach_info.vaddr, -// attach_info.fd, attach_info.flags, -// &at_vaddr); // TODO - ret = -EINVAL; + ret = xpmem_attach(mckfd, attach_info.apid, attach_info.offset, + attach_info.size, attach_info.vaddr, + attach_info.fd, attach_info.flags, + &at_vaddr); if (ret != 0) { XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret); return ret; } -// if (copy_to_user( -// &((struct xpmem_cmd_attach __user *)arg)->vaddr, -// (void *)&at_vaddr, sizeof(unsigned long))) { -// (void)xpmem_detach(at_vaddr); -// return -EFAULT; -// } + if (copy_to_user( + &((struct xpmem_cmd_attach __user *)arg)->vaddr, + (void *)&at_vaddr, sizeof(unsigned long))) { + (void)xpmem_detach(at_vaddr); + return -EFAULT; + } XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret); @@ -234,8 +235,7 @@ static int xpmem_ioctl( sizeof(struct xpmem_cmd_detach))) return -EFAULT; -// ret = xpmem_detach(detach_info.vaddr); // TODO - ret = -EINVAL; + ret = xpmem_detach(detach_info.vaddr); XPMEM_DEBUG("return: cmd=0x%x, ret=%d", cmd, ret); @@ -255,27 +255,24 @@ static int xpmem_close( struct mckfd *mckfd, ihk_mc_user_context_t *ctx) { + int n_opened; + struct process *proc = (struct process *)mckfd->data; struct xpmem_thread_group *tg; int index; struct mcs_rwlock_node_irqsave lock; - int n_opened; - XPMEM_DEBUG("call: fd=%d", mckfd->fd); + XPMEM_DEBUG("call: fd=%d, pid=%d, rgid=%d", + mckfd->fd, proc->pid, proc->rgid); n_opened = ihk_atomic_dec_return(&xpmem_my_part->n_opened); - if (n_opened) { - XPMEM_DEBUG("return: ret=%d, n_opened=%d", 0, n_opened); - return 0; - } XPMEM_DEBUG("n_opened=%d", n_opened); - index = xpmem_tg_hashtable_index(cpu_local_var(current)->proc->pid); + index = xpmem_tg_hashtable_index(proc->pid); mcs_rwlock_writer_lock(&xpmem_my_part->tg_hashtable[index].lock, &lock); - tg = xpmem_tg_ref_by_tgid_all_nolock( - cpu_local_var(current)->proc->pid); - if (!tg) { + tg = xpmem_tg_ref_by_tgid_all_nolock(proc->pid); + if (IS_ERR(tg)) { mcs_rwlock_writer_unlock( &xpmem_my_part->tg_hashtable[index].lock, &lock); return 0; @@ -288,6 +285,17 @@ static int xpmem_close( XPMEM_DEBUG("tg->vm=0x%p", tg->vm); + ihk_mc_spinlock_lock_noirq(&tg->lock); + tg->flags |= XPMEM_FLAG_DESTROYING; + ihk_mc_spinlock_unlock_noirq(&tg->lock); + + xpmem_release_aps_of_tg(tg); + xpmem_remove_segs_of_tg(tg); + + ihk_mc_spinlock_lock_noirq(&tg->lock); + tg->flags |= XPMEM_FLAG_DESTROYED; + ihk_mc_spinlock_unlock_noirq(&tg->lock); + xpmem_destroy_tg(tg); if (!n_opened) { @@ -334,7 +342,7 @@ static void xpmem_exit(void) XPMEM_DEBUG("call: "); if (xpmem_my_part) { - XPMEM_DEBUG("kfree(): 0x%p", xpmem_my_part); + XPMEM_DEBUG("kfree(): xpmem_my_part=0x%p", xpmem_my_part); kfree(xpmem_my_part); xpmem_my_part = NULL; } @@ -379,7 +387,6 @@ static int __xpmem_open(void) ihk_atomic_set(&tg->n_pinned, 0); INIT_LIST_HEAD(&tg->tg_hashlist); tg->vm = cpu_local_var(current)->vm; - ihk_atomic_set(&tg->n_recall_PFNs, 0); for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) { mcs_rwlock_init(&tg->ap_hashtable[index].lock); @@ -410,8 +417,6 @@ static void xpmem_destroy_tg( { XPMEM_DEBUG("call: tg=0x%p", tg); - XPMEM_DEBUG("tg->vm=0x%p", tg->vm); - xpmem_tg_destroyable(tg); xpmem_tg_deref(tg); @@ -431,7 +436,7 @@ static int xpmem_make( struct xpmem_segment *seg; struct mcs_rwlock_node_irqsave lock; - XPMEM_DEBUG("call: vaddr=0x%lx, size=%lu, permit_type=%d, " + XPMEM_DEBUG("call: vaddr=0x%lx, size=0x%lx, permit_type=%d, " "permit_value=0%04lo", vaddr, size, permit_type, (unsigned long)(uintptr_t)permit_value); @@ -475,7 +480,6 @@ static int xpmem_make( memset(seg, 0, sizeof(struct xpmem_segment)); ihk_mc_spinlock_init(&seg->lock); - mcs_rwlock_init(&seg->seg_lock); seg->segid = segid; seg->vaddr = vaddr; seg->size = size; @@ -487,7 +491,6 @@ static int xpmem_make( xpmem_seg_not_destroyable(seg); - /* add seg to its tg's list of segs */ mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock); list_add_tail(&seg->seg_list, &seg_tg->seg_list); mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock); @@ -578,49 +581,1486 @@ static void xpmem_remove_seg( struct xpmem_segment *seg) { DBUG_ON(ihk_atomic_read(&seg->refcnt) <= 0); - struct mcs_rwlock_node_irqsave seg_lock; struct mcs_rwlock_node_irqsave lock; XPMEM_DEBUG("call: tgid=%d, segid=0x%lx", seg_tg->tgid, seg->segid); - ihk_mc_spinlock_lock(&seg->lock); + ihk_mc_spinlock_lock_noirq(&seg->lock); if (seg->flags & XPMEM_FLAG_DESTROYING) { ihk_mc_spinlock_unlock_noirq(&seg->lock); - schedule(); return; } seg->flags |= XPMEM_FLAG_DESTROYING; ihk_mc_spinlock_unlock_noirq(&seg->lock); - mcs_rwlock_writer_lock(&seg->seg_lock, &seg_lock); - - /* unpin pages and clear PTEs for each attachment to this segment */ xpmem_clear_PTEs(seg); - /* indicate that the segment has been destroyed */ - ihk_mc_spinlock_lock(&seg->lock); + ihk_mc_spinlock_lock_noirq(&seg->lock); seg->flags |= XPMEM_FLAG_DESTROYED; ihk_mc_spinlock_unlock_noirq(&seg->lock); - /* Remove segment structure from its tg's list of segs */ mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock); list_del_init(&seg->seg_list); mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock); - mcs_rwlock_writer_unlock(&seg->seg_lock, &seg_lock); - xpmem_seg_destroyable(seg); XPMEM_DEBUG("return: "); } +static void xpmem_remove_segs_of_tg( + struct xpmem_thread_group *seg_tg) +{ + struct xpmem_segment *seg; + struct mcs_rwlock_node_irqsave lock; + + XPMEM_DEBUG("call: tgid=%d", seg_tg->tgid); + + mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock); + + while (!list_empty(&seg_tg->seg_list)) { + seg = list_entry((&seg_tg->seg_list)->next, + struct xpmem_segment, seg_list); + xpmem_seg_ref(seg); + mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock); + + xpmem_remove_seg(seg_tg, seg); + + xpmem_seg_deref(seg); + + mcs_rwlock_writer_lock(&seg_tg->seg_list_lock, &lock); + } + + mcs_rwlock_writer_unlock(&seg_tg->seg_list_lock, &lock); + + XPMEM_DEBUG("return: "); +} + + +static int xpmem_get( + xpmem_segid_t segid, + int flags, + int permit_type, + void *permit_value, + xpmem_apid_t *apid_p) +{ + xpmem_apid_t apid; + struct xpmem_access_permit *ap; + struct xpmem_segment *seg; + struct xpmem_thread_group *ap_tg, *seg_tg; + int index; + struct mcs_rwlock_node_irqsave lock; + + XPMEM_DEBUG("call: segid=0x%lx, flags=%d, permit_type=%d, " + "permit_value=0%04lo", + segid, flags, permit_type, + (unsigned long)(uintptr_t)permit_value); + + if (segid <= 0) { + return -EINVAL; + } + + if ((flags & ~(XPMEM_RDONLY | XPMEM_RDWR)) || + (flags & (XPMEM_RDONLY | XPMEM_RDWR)) == + (XPMEM_RDONLY | XPMEM_RDWR)) { + return -EINVAL; + } + + if (permit_type != XPMEM_PERMIT_MODE || permit_value != NULL) { + return -EINVAL; + } + + seg_tg = xpmem_tg_ref_by_segid(segid); + if (IS_ERR(seg_tg)) { + return PTR_ERR(seg_tg); + } + + seg = xpmem_seg_ref_by_segid(seg_tg, segid); + if (IS_ERR(seg)) { + xpmem_tg_deref(seg_tg); + return PTR_ERR(seg); + } + + if (xpmem_check_permit_mode(flags, seg) != 0) { + xpmem_seg_deref(seg); + xpmem_tg_deref(seg_tg); + return -EACCES; + } + + ap_tg = xpmem_tg_ref_by_tgid(cpu_local_var(current)->proc->pid); + if (IS_ERR(ap_tg)) { + DBUG_ON(PTR_ERR(ap_tg) != -ENOENT); + xpmem_seg_deref(seg); + xpmem_tg_deref(seg_tg); + return -XPMEM_ERRNO_NOPROC; + } + + apid = xpmem_make_apid(ap_tg); + if (apid < 0) { + xpmem_tg_deref(ap_tg); + xpmem_seg_deref(seg); + xpmem_tg_deref(seg_tg); + return apid; + } + + /* create a new xpmem_access_permit structure with a unique apid */ + ap = kmalloc(sizeof(struct xpmem_access_permit), IHK_MC_AP_NOWAIT); + if (ap == NULL) { + xpmem_tg_deref(ap_tg); + xpmem_seg_deref(seg); + xpmem_tg_deref(seg_tg); + return -ENOMEM; + } + XPMEM_DEBUG("kmalloc(): ap=0x%p", ap); + memset(ap, 0, sizeof(struct xpmem_access_permit)); + + ihk_mc_spinlock_init(&ap->lock); + ap->apid = apid; + ap->mode = flags; + ap->seg = seg; + ap->tg = ap_tg; + INIT_LIST_HEAD(&ap->att_list); + INIT_LIST_HEAD(&ap->ap_list); + INIT_LIST_HEAD(&ap->ap_hashlist); + + xpmem_ap_not_destroyable(ap); + + /* add ap to its seg's access permit list */ + ihk_mc_spinlock_lock_noirq(&seg->lock); + list_add_tail(&ap->ap_list, &seg->ap_list); + ihk_mc_spinlock_unlock_noirq(&seg->lock); + + /* add ap to its hash list */ + index = xpmem_ap_hashtable_index(ap->apid); + mcs_rwlock_writer_lock(&ap_tg->ap_hashtable[index].lock, &lock); + list_add_tail(&ap->ap_hashlist, &ap_tg->ap_hashtable[index].list); + mcs_rwlock_writer_unlock(&ap_tg->ap_hashtable[index].lock, &lock); + + xpmem_tg_deref(ap_tg); + + *apid_p = apid; + + XPMEM_DEBUG("return: ret=%d, apid=0x%lx", 0, *apid_p); + + return 0; +} + + +static int xpmem_check_permit_mode( + int flags, + struct xpmem_segment *seg) +{ + int ret; + struct xpmem_perm perm; + + XPMEM_DEBUG("call: flags=%d", flags); + + DBUG_ON(seg->permit_type != XPMEM_PERMIT_MODE); + + memset(&perm, 0, sizeof(struct xpmem_perm)); + perm.uid = seg->tg->uid; + perm.gid = seg->tg->gid; + perm.mode = (unsigned long)seg->permit_value; + + ret = xpmem_perms(&perm, XPMEM_PERM_IRUSR); + if (ret == 0 && (flags & XPMEM_RDWR)) { + ret = xpmem_perms(&perm, XPMEM_PERM_IWUSR); + } + + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static int xpmem_perms( + struct xpmem_perm *perm, + short flag) +{ + int ret = 0; + int requested_mode; + int granted_mode; + + XPMEM_DEBUG("call: uid=%d, gid=%d, mode=0%lo, flag=0%o", + perm->uid, perm->gid, perm->mode, flag); + + requested_mode = (flag >> 6) | (flag >> 3) | flag; + granted_mode = perm->mode; + if (perm->uid == cpu_local_var(current)->proc->ruid) { + granted_mode >>= 6; + } + else if (perm->gid == cpu_local_var(current)->proc->rgid) { + granted_mode >>= 3; + } + + if (requested_mode & ~granted_mode & 0007) { + ret = -1; + } + + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static xpmem_apid_t xpmem_make_apid( + struct xpmem_thread_group *ap_tg) +{ + struct xpmem_id apid; + xpmem_apid_t *apid_p = (xpmem_apid_t *)≋ + int uniq; + + XPMEM_DEBUG("call: ap_tg=0x%p, uniq_apid=%d", + ap_tg, ihk_atomic_read(&ap_tg->uniq_apid)); + + DBUG_ON(sizeof(struct xpmem_id) != sizeof(xpmem_apid_t)); + + uniq = ihk_atomic_inc_return(&ap_tg->uniq_apid); + if (uniq > XPMEM_MAX_UNIQ_ID) { + ihk_atomic_dec(&ap_tg->uniq_apid); + return -EBUSY; + } + + *apid_p = 0; + apid.tgid = ap_tg->tgid; + apid.uniq = (unsigned int)uniq; + + XPMEM_DEBUG("return: apid=0x%lx, apid.tgid=%d, apid.uniq=%d", + apid, apid.tgid, apid.uniq); + + return *apid_p; +} + + +static int xpmem_release( + xpmem_apid_t apid) +{ + struct xpmem_thread_group *ap_tg; + struct xpmem_access_permit *ap; + + XPMEM_DEBUG("call: apid=0x%lx", apid); + + if (apid <= 0) { + return -EINVAL; + } + + ap_tg = xpmem_tg_ref_by_apid(apid); + if (IS_ERR(ap_tg)) { + return PTR_ERR(ap_tg); + } + + if (cpu_local_var(current)->proc->pid != ap_tg->tgid) { + xpmem_tg_deref(ap_tg); + return -EACCES; + } + + ap = xpmem_ap_ref_by_apid(ap_tg, apid); + if (IS_ERR(ap)) { + xpmem_tg_deref(ap_tg); + return PTR_ERR(ap); + } + DBUG_ON(ap->tg != ap_tg); + + xpmem_release_ap(ap_tg, ap); + xpmem_ap_deref(ap); + xpmem_tg_deref(ap_tg); + + XPMEM_DEBUG("return: ret=%d", 0); + + return 0; +} + + +static void xpmem_release_ap( + struct xpmem_thread_group *ap_tg, + struct xpmem_access_permit *ap) +{ + int index; + struct xpmem_thread_group *seg_tg; + struct xpmem_attachment *att; + struct xpmem_segment *seg; + struct mcs_rwlock_node_irqsave lock; + + XPMEM_DEBUG("call: tgid=%d, apid=0x%lx", ap_tg->tgid, ap->apid); + + ihk_mc_spinlock_lock_noirq(&ap->lock); + if (ap->flags & XPMEM_FLAG_DESTROYING) { + ihk_mc_spinlock_unlock_noirq(&ap->lock); + return; + } + ap->flags |= XPMEM_FLAG_DESTROYING; + + while (!list_empty(&ap->att_list)) { + att = list_entry((&ap->att_list)->next, struct xpmem_attachment, + att_list); + xpmem_att_ref(att); + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + xpmem_detach_att(ap, att); + + xpmem_att_deref(att); + + ihk_mc_spinlock_lock_noirq(&ap->lock); + } + + ap->flags |= XPMEM_FLAG_DESTROYED; + + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + index = xpmem_ap_hashtable_index(ap->apid); + mcs_rwlock_writer_lock(&ap_tg->ap_hashtable[index].lock, &lock); + list_del_init(&ap->ap_hashlist); + mcs_rwlock_writer_unlock(&ap_tg->ap_hashtable[index].lock, &lock); + + seg = ap->seg; + seg_tg = seg->tg; + + ihk_mc_spinlock_lock_noirq(&seg->lock); + list_del_init(&ap->ap_list); + ihk_mc_spinlock_unlock_noirq(&seg->lock); + + xpmem_seg_deref(seg); + xpmem_tg_deref(seg_tg); + + xpmem_ap_destroyable(ap); + + XPMEM_DEBUG("return: "); +} + + +static void xpmem_release_aps_of_tg( + struct xpmem_thread_group *ap_tg) +{ + struct xpmem_hashlist *hashlist; + struct xpmem_access_permit *ap; + struct mcs_rwlock_node_irqsave lock; + int index; + + XPMEM_DEBUG("call: tgid=%d", ap_tg->tgid); + + for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) { + hashlist = &ap_tg->ap_hashtable[index]; + + mcs_rwlock_writer_lock(&hashlist->lock, &lock); + while (!list_empty(&hashlist->list)) { + ap = list_entry((&hashlist->list)->next, + struct xpmem_access_permit, ap_hashlist); + xpmem_ap_ref(ap); + mcs_rwlock_writer_unlock(&hashlist->lock, &lock); + + xpmem_release_ap(ap_tg, ap); + + xpmem_ap_deref(ap); + + mcs_rwlock_writer_lock(&hashlist->lock, &lock); + } + mcs_rwlock_writer_unlock(&hashlist->lock, &lock); + } + + XPMEM_DEBUG("return: "); +} + + +static int xpmem_attach( + struct mckfd *mckfd, + xpmem_apid_t apid, + off_t offset, + size_t size, + unsigned long vaddr, + int fd, + int att_flags, + unsigned long *at_vaddr_p) +{ + int ret; + unsigned long flags; + unsigned long prot_flags = PROT_READ | PROT_WRITE; + unsigned long seg_vaddr; + unsigned long at_vaddr; + struct xpmem_thread_group *ap_tg; + struct xpmem_thread_group *seg_tg; + struct xpmem_access_permit *ap; + struct xpmem_segment *seg; + struct xpmem_attachment *att; + struct mcs_rwlock_node_irqsave at_lock; + struct vm_range *vmr; + struct process_vm *vm = cpu_local_var(current)->vm; + + XPMEM_DEBUG("call: apid=0x%lx, offset=0x%lx, size=0x%lx, vaddr=0x%lx, " + "fd=%d, att_flags=%d", + apid, offset, size, vaddr, fd, att_flags); + + if (apid <= 0) { + return -EINVAL; + } + + /* The start of the attachment must be page aligned */ + if (offset_in_page(vaddr) != 0 || offset_in_page(offset) != 0) { + return -EINVAL; + } + + /* If the size is not page aligned, fix it */ + if (offset_in_page(size) != 0) { + size += PAGE_SIZE - offset_in_page(size); + } + + ap_tg = xpmem_tg_ref_by_apid(apid); + if (IS_ERR(ap_tg)) + return PTR_ERR(ap_tg); + + ap = xpmem_ap_ref_by_apid(ap_tg, apid); + if (IS_ERR(ap)) { + xpmem_tg_deref(ap_tg); + return PTR_ERR(ap); + } + + seg = ap->seg; + xpmem_seg_ref(seg); + seg_tg = seg->tg; + xpmem_tg_ref(seg_tg); + + if ((seg->flags & XPMEM_FLAG_DESTROYING) || + (seg_tg->flags & XPMEM_FLAG_DESTROYING)) { + ret = -ENOENT; + goto out_1; + } + + ret = xpmem_validate_access(ap, offset, size, XPMEM_RDWR, &seg_vaddr); + if (ret != 0) { + goto out_1; + } + + size += offset_in_page(seg_vaddr); + + seg = ap->seg; + if (cpu_local_var(current)->proc->pid == seg_tg->tgid && vaddr) { + if ((vaddr + size > seg_vaddr) && (vaddr < seg_vaddr + size)) { + ret = -EINVAL; + goto out_1; + } + } + + /* create new attach structure */ + att = kmalloc(sizeof(struct xpmem_attachment), IHK_MC_AP_NOWAIT); + if (att == NULL) { + ret = -ENOMEM; + goto out_1; + } + XPMEM_DEBUG("kmalloc(): att=0x%p", att); + memset(att, 0, sizeof(struct xpmem_attachment)); + + mcs_rwlock_init(&att->at_lock); + att->vaddr = seg_vaddr; + att->at_size = size; + att->ap = ap; + INIT_LIST_HEAD(&att->att_list); + att->vm = vm; + + xpmem_att_not_destroyable(att); + xpmem_att_ref(att); + + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + + ihk_mc_spinlock_lock_noirq(&ap->lock); + list_add_tail(&att->att_list, &ap->att_list); + if (ap->flags & XPMEM_FLAG_DESTROYING) { + ihk_mc_spinlock_unlock_noirq(&ap->lock); + ret = -ENOENT; + goto out_2; + } + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + flags = MAP_SHARED; + if (vaddr != 0) + flags |= MAP_FIXED; + + if (flags & MAP_FIXED) { + struct vm_range *existing_vmr; + + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + existing_vmr = lookup_process_memory_range(vm, vaddr, + vaddr + size); + + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + + for (; existing_vmr && existing_vmr->start < vaddr + size; + existing_vmr = next_process_memory_range(vm, + existing_vmr)) { + if (xpmem_is_private_data(existing_vmr)) { + ret = -EINVAL; + goto out_2; + } + } + } + + flags |= MAP_ANONYMOUS; + XPMEM_DEBUG("do_mmap(): vaddr=0x%lx, size=0x%lx, prot_flags=0x%lx, " + "flags=0x%lx, fd=%d, offset=0x%lx", + vaddr, size, prot_flags, flags, mckfd->fd, offset); + at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd, offset); + if (IS_ERR((void *)(uintptr_t)at_vaddr)) { + ret = at_vaddr; + goto out_2; + } + XPMEM_DEBUG("at_vaddr=0x%lx", at_vaddr); + att->at_vaddr = at_vaddr; + + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + vmr = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1); + + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + + if (!vmr) { + ret = -ENOENT; + goto out_2; + } + vmr->private_data = att; + + att->at_vmr = vmr; + + *at_vaddr_p = at_vaddr + offset_in_page(att->vaddr); + + ret = 0; +out_2: + if (ret != 0) { + att->flags |= XPMEM_FLAG_DESTROYING; + ihk_mc_spinlock_lock_noirq(&ap->lock); + list_del_init(&att->att_list); + ihk_mc_spinlock_unlock_noirq(&ap->lock); + xpmem_att_destroyable(att); + } + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + xpmem_att_deref(att); +out_1: + xpmem_ap_deref(ap); + xpmem_tg_deref(ap_tg); + xpmem_seg_deref(seg); + xpmem_tg_deref(seg_tg); + + XPMEM_DEBUG("return: ret=%d, at_vaddr=0x%lx", ret, *at_vaddr_p); + + return ret; +} + + +static int xpmem_detach( + unsigned long at_vaddr) +{ + int ret; + struct xpmem_access_permit *ap; + struct xpmem_attachment *att; + struct mcs_rwlock_node_irqsave at_lock; + struct vm_range *range; + struct process_vm *vm = cpu_local_var(current)->vm; + + XPMEM_DEBUG("call: at_vaddr=0x%lx", at_vaddr); + + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + range = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1); + + if (!range || range->start > at_vaddr) { + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + return 0; + } + + att = (struct xpmem_attachment *)range->private_data; + if (att == NULL) { + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + return -EINVAL; + } + + xpmem_att_ref(att); + + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + + if (att->flags & XPMEM_FLAG_DESTROYING) { + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + xpmem_att_deref(att); + return 0; + } + att->flags |= XPMEM_FLAG_DESTROYING; + + ap = att->ap; + xpmem_ap_ref(ap); + + if (cpu_local_var(current)->proc->pid != ap->tg->tgid) { + att->flags &= ~XPMEM_FLAG_DESTROYING; + xpmem_ap_deref(ap); + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + xpmem_att_deref(att); + return -EACCES; + } + + xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size); + + range->private_data = NULL; + + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + + XPMEM_DEBUG("xpmem_vm_munmap(): start=0x%lx, len=0x%lx", + range->start, att->at_size); + ret = xpmem_vm_munmap(vm, (void *)range->start, att->at_size); + if (ret) { + ekprintf("%s: ERROR: xpmem_vm_munmap() failed %d\n", + __FUNCTION__, ret); + } + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + DBUG_ON(ret != 0); + + att->flags &= ~XPMEM_FLAG_VALIDPTEs; + + ihk_mc_spinlock_lock_noirq(&ap->lock); + list_del_init(&att->att_list); + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + xpmem_att_destroyable(att); + + xpmem_ap_deref(ap); + xpmem_att_deref(att); + + XPMEM_DEBUG("return: ret=%d", 0); + + return 0; +} + + +static int xpmem_vm_munmap( + struct process_vm *vm, + void *addr, + size_t len) +{ + int ret; + int ro_freed; + + XPMEM_DEBUG("call: vm=0x%p, addr=0x%p, len=0x%lx", vm, addr, len); + + begin_free_pages_pending(); + + ret = xpmem_remove_process_range(vm, (intptr_t)addr, + (intptr_t)(addr + len), &ro_freed); + + finish_free_pages_pending(); + + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static int xpmem_remove_process_range( + struct process_vm *vm, + unsigned long start, + unsigned long end, + int *ro_freedp) +{ + int error = 0; + struct vm_range *range; + struct vm_range *next; + struct vm_range *freerange; + int ro_freed = 0; + + XPMEM_DEBUG("call: vm=0x%p, start=0x%lx, end=0x%lx", vm, start, end); + + list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + if ((range->end <= start) || (end <= range->start)) { + /* no overlap */ + continue; + } + freerange = range; + + if (freerange->start < start) { + error = split_process_memory_range(vm, + freerange, start, &freerange); + if (error) { + ekprintf("%s(%p,%lx,%lx): ERROR: " + "split failed %d\n", + __FUNCTION__, vm, start, end, error); + goto out; + } + } + + if (end < freerange->end) { + error = split_process_memory_range(vm, freerange, end, + NULL); + if (error) { + ekprintf("%s(%p,%lx,%lx): ERROR: " + "split failed %d\n", + __FUNCTION__, vm, start, end, error); + goto out; + } + } + + if (!(freerange->flag & VR_PROT_WRITE)) { + ro_freed = 1; + } + + if (freerange->private_data) { + xpmem_remove_process_memory_range(vm, freerange); + } + + error = xpmem_free_process_memory_range(vm, freerange); + if (error) { + ekprintf("%s(%p,%lx,%lx): ERROR: free failed %d\n", + __FUNCTION__, vm, start, end, error); + goto out; + } + } + + if (ro_freedp) { + *ro_freedp = ro_freed; + } + +out: + XPMEM_DEBUG("return: ret=%d, ro_freed=%d", error, ro_freed); + + return error; +} + + +static int xpmem_free_process_memory_range( + struct process_vm *vm, + struct vm_range *range) +{ + int error; + int i; + + XPMEM_DEBUG("call: vm=0x%p, start=0x%lx, end=0x%lx", + vm, range->start, range->end); + + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); + + error = ihk_mc_pt_clear_range(vm->address_space->page_table, vm, + (void *)range->start, (void *)range->end); + + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); + + if (error && (error != -ENOENT)) { + ekprintf("%s(%p,%lx-%lx): ERROR: " + "ihk_mc_pt_clear_range(%lx-%lx) failed %d\n", + __FUNCTION__, vm, range->start, range->end, + range->start, range->end, error); + /* through */ + } + + if (range->memobj) { + memobj_release(range->memobj); + } + + list_del(&range->list); + for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) { + if (vm->range_cache[i] == range) + vm->range_cache[i] = NULL; + } + + kfree(range); + + XPMEM_DEBUG("return: ret=%d", 0); + + return 0; +} + + +static void xpmem_detach_att( + struct xpmem_access_permit *ap, + struct xpmem_attachment *att) +{ + int ret; + struct vm_range *range; + struct process_vm *vm; + struct mcs_rwlock_node_irqsave at_lock; + + XPMEM_DEBUG("call: apid=0x%lx, att=0x%p", ap->apid, att); + + XPMEM_DEBUG("detaching current->vm=0x%p, att->vm=0x%p", + (void *)cpu_local_var(current)->vm, (void *)att->vm); + + vm = cpu_local_var(current)->vm ? cpu_local_var(current)->vm : att->vm; + + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + + if (att->flags & XPMEM_FLAG_DESTROYING) { + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + XPMEM_DEBUG("return: XPMEM_FLAG_DESTROYING"); + return; + } + att->flags |= XPMEM_FLAG_DESTROYING; + + range = lookup_process_memory_range(cpu_local_var(current)->vm, + att->at_vaddr, att->at_vaddr + 1); + + if (!range || range->start > att->at_vaddr) { + DBUG_ON(1); + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + ekprintf("%s: ERROR: lookup_process_memory_range() failed\n", + __FUNCTION__); + XPMEM_DEBUG("return: range=%p"); + return; + } + XPMEM_DEBUG("lookup_process_memory_range(): at_vaddr=0x%lx, " + "start=0x%lx, end=0x%lx", + att->at_vaddr, range->start, range->end); + + DBUG_ON(!xpmem_is_private_data(range)); + DBUG_ON((range->end - range->start) != att->at_size); + DBUG_ON(range->private_data != att); + + xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size); + + range->private_data = NULL; + + att->flags &= ~XPMEM_FLAG_VALIDPTEs; + + ihk_mc_spinlock_lock_noirq(&ap->lock); + list_del_init(&att->att_list); + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + + XPMEM_DEBUG("xpmem_vm_munmap(): start=0x%lx, len=0x%lx", + range->start, att->at_size); + ret = xpmem_vm_munmap(cpu_local_var(current)->vm, (void *)range->start, + att->at_size); + if (ret) { + ekprintf("%s: ERROR: xpmem_vm_munmap() failed %d\n", + __FUNCTION__, ret); + } + + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + + xpmem_att_destroyable(att); + + XPMEM_DEBUG("return: "); +} + + static void xpmem_clear_PTEs( struct xpmem_segment *seg) { - XPMEM_DEBUG("call: seg=0x%p", seg); + XPMEM_DEBUG("call: segid=0x%lx", seg->segid); -// xpmem_clear_PTEs_range(seg, seg->vaddr, seg->vaddr + seg->size, 0); // TODO + xpmem_clear_PTEs_range(seg, seg->vaddr, seg->vaddr + seg->size); + + XPMEM_DEBUG("return: "); +} + + +static void xpmem_clear_PTEs_range( + struct xpmem_segment *seg, + unsigned long start, + unsigned long end) +{ + struct xpmem_access_permit *ap; + + XPMEM_DEBUG("call: segid=0x%lx, start=0x%lx, end=0x%lx", + seg->segid, start, end); + + ihk_mc_spinlock_lock_noirq(&seg->lock); + + list_for_each_entry(ap, &seg->ap_list, ap_list) { + xpmem_ap_ref(ap); + ihk_mc_spinlock_unlock_noirq(&seg->lock); + + xpmem_clear_PTEs_of_ap(ap, start, end); + + ihk_mc_spinlock_lock_noirq(&seg->lock); + if (list_empty(&ap->ap_list)) { + xpmem_ap_deref(ap); + ap = list_entry(&seg->ap_list, + struct xpmem_access_permit, ap_list); + } + else { + xpmem_ap_deref(ap); + } + } + + ihk_mc_spinlock_unlock_noirq(&seg->lock); + + XPMEM_DEBUG("return: "); +} + + +static void xpmem_clear_PTEs_of_ap( + struct xpmem_access_permit *ap, + unsigned long start, + unsigned long end) +{ + struct xpmem_attachment *att; + + XPMEM_DEBUG("call: apid=0x%lx, start=0x%lx, end=0x%lx", + ap->apid, start, end); + + ihk_mc_spinlock_lock_noirq(&ap->lock); + + list_for_each_entry(att, &ap->att_list, att_list) { + if (!(att->flags & XPMEM_FLAG_VALIDPTEs)) + continue; + + xpmem_att_ref(att); + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + xpmem_clear_PTEs_of_att(att, start, end); + + ihk_mc_spinlock_lock_noirq(&ap->lock); + if (list_empty(&att->att_list)) { + xpmem_att_deref(att); + att = list_entry(&ap->att_list, struct xpmem_attachment, + att_list); + } + else { + xpmem_att_deref(att); + } + } + + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + XPMEM_DEBUG("return: "); +} + + +static void xpmem_clear_PTEs_of_att( + struct xpmem_attachment *att, + unsigned long start, + unsigned long end) +{ + int ret; + struct mcs_rwlock_node_irqsave at_lock; + + XPMEM_DEBUG("call: att=0x%p, start=0x%lx, end=0x%lx", + att, start, end); + + ihk_mc_spinlock_lock_noirq(&att->vm->memory_range_lock); + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + + if (att->flags & XPMEM_FLAG_VALIDPTEs) { + struct vm_range *range; + unsigned long invalidate_start; + unsigned long invalidate_end; + unsigned long invalidate_len; + unsigned long offset_start; + unsigned long offset_end; + unsigned long unpin_at; + unsigned long att_vaddr_end = att->vaddr + att->at_size; + + invalidate_start = max(start, att->vaddr); + invalidate_end = min(end, att_vaddr_end); + if (invalidate_start >= att_vaddr_end || + invalidate_end <= att->vaddr) + goto out; + + offset_start = invalidate_start - att->vaddr; + offset_end = invalidate_end - att->vaddr; + + unpin_at = att->at_vaddr + offset_start; + invalidate_len = offset_end - offset_start; + DBUG_ON(offset_in_page(unpin_at) || + offset_in_page(invalidate_len)); + XPMEM_DEBUG("unpin_at=0x%lx, invalidate_len=0x%lx\n", + unpin_at, invalidate_len); + + xpmem_unpin_pages(att->ap->seg, att->vm, unpin_at, + invalidate_len); + + range = lookup_process_memory_range(att->vm, att->at_vaddr, + att->at_vaddr + 1); + if (!range) { + ekprintf("%s: ERROR: lookup_process_memory_range() " + "failed\n", + __FUNCTION__); + goto out; + } + + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + + XPMEM_DEBUG( + "xpmem_vm_munmap(): start=0x%lx, len=0x%lx", + unpin_at, invalidate_len); + ret = xpmem_vm_munmap(att->vm, (void *)unpin_at, + invalidate_len); + if (ret) { + ekprintf("%s: ERROR: xpmem_vm_munmap() failed %d\n", + __FUNCTION__, ret); + } + + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + + if (offset_start == 0 && att->at_size == invalidate_len) + att->flags &= ~XPMEM_FLAG_VALIDPTEs; + } +out: + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + ihk_mc_spinlock_unlock_noirq(&att->vm->memory_range_lock); + + XPMEM_DEBUG("return: "); +} + + +int xpmem_remove_process_memory_range( + struct process_vm *vm, + struct vm_range *vmr) +{ + struct vm_range *remaining_vmr; + u64 remaining_vaddr; + struct xpmem_access_permit *ap; + struct xpmem_attachment *att; + struct mcs_rwlock_node_irqsave at_lock; + + XPMEM_DEBUG("call: vmr=0x%p, att=0x%p", vmr, vmr->private_data); + + att = (struct xpmem_attachment *)vmr->private_data; + if (att == NULL) { + return 0; + } + + XPMEM_DEBUG("cleaning up vmr with range: 0x%lx - 0x%lx", + vmr->start, vmr->end); + + xpmem_att_ref(att); + + ihk_mc_spinlock_lock_noirq( + &cpu_local_var(current)->vm->memory_range_lock); + + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + + if (att->flags & XPMEM_FLAG_DESTROYING) { + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + xpmem_att_deref(att); + XPMEM_DEBUG("already cleaned up"); + return 0; + } + + if (vmr->start == att->at_vaddr && + ((vmr->end - vmr->start) == att->at_size)) { + att->flags |= XPMEM_FLAG_DESTROYING; + + ap = att->ap; + xpmem_ap_ref(ap); + + ihk_mc_spinlock_lock_noirq(&ap->lock); + list_del_init(&att->att_list); + ihk_mc_spinlock_unlock_noirq(&ap->lock); + + xpmem_ap_deref(ap); + + xpmem_att_destroyable(att); + goto out; + } + + if (vmr->start == att->at_vaddr) { + remaining_vaddr = vmr->end; + } + else if (vmr->end == att->at_vaddr + att->at_size) { + remaining_vaddr = att->at_vaddr; + } + else { + remaining_vaddr = vmr->end; + remaining_vmr = lookup_process_memory_range( + cpu_local_var(current)->vm, remaining_vaddr - 1, + remaining_vaddr); + if (!remaining_vmr) { + ekprintf("%s: ERROR: vm_range is NULL\n", __FUNCTION__); + goto out; + } + else if (remaining_vmr->start > remaining_vaddr || + remaining_vmr->private_data != vmr->private_data) { + ekprintf("%s: ERROR: vm_range: 0x%lx - 0x%lx\n", + __FUNCTION__, vmr->start, vmr->end); + goto out; + } + + remaining_vmr->private_data = NULL; + + remaining_vaddr = att->at_vaddr; + } + + remaining_vmr = lookup_process_memory_range( + cpu_local_var(current)->vm, remaining_vaddr, + remaining_vaddr + 1); + if (!remaining_vmr) { + ekprintf("%s: ERROR: vm_range is NULL\n", __FUNCTION__); + goto out; + } + else if (remaining_vmr->start > remaining_vaddr || + remaining_vmr->private_data != vmr->private_data) { + ekprintf("%s: ERROR: vm_range: 0x%lx - 0x%lx\n", + __FUNCTION__, vmr->start, vmr->end); + goto out; + } + + att->at_vaddr = remaining_vmr->start; + att->at_size = remaining_vmr->end - remaining_vmr->start; + + vmr->private_data = NULL; + +out: + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + + ihk_mc_spinlock_unlock_noirq( + &cpu_local_var(current)->vm->memory_range_lock); + + xpmem_att_deref(att); + + XPMEM_DEBUG("return: ret=%d", 0); + + return 0; +} + + +int xpmem_fault_process_memory_range( + struct process_vm *vm, + struct vm_range *vmr, + unsigned long vaddr, + uint64_t reason) +{ + int ret = 0; + unsigned long seg_vaddr = 0; + pte_t *pte = NULL; + pte_t *old_pte = NULL; + struct xpmem_thread_group *ap_tg; + struct xpmem_thread_group *seg_tg; + struct xpmem_access_permit *ap; + struct xpmem_attachment *att; + struct xpmem_segment *seg; + size_t pgsize; + struct mcs_rwlock_node_irqsave at_lock; + int att_locked = 0; + + XPMEM_DEBUG("call: vmr=0x%p, vaddr=0x%lx, reason=0x%lx", + vmr, vaddr, reason); + + att = (struct xpmem_attachment *)vmr->private_data; + if (att == NULL) { + return -EFAULT; + } + + xpmem_att_ref(att); + ap = att->ap; + xpmem_ap_ref(ap); + ap_tg = ap->tg; + xpmem_tg_ref(ap_tg); + if ((ap->flags & XPMEM_FLAG_DESTROYING) || + (ap_tg->flags & XPMEM_FLAG_DESTROYING)) { + xpmem_att_deref(att); + xpmem_ap_deref(ap); + xpmem_tg_deref(ap_tg); + return -EFAULT; + } + DBUG_ON(cpu_local_var(current)->proc->pid != ap_tg->tgid); + DBUG_ON(ap->mode != XPMEM_RDWR); + + seg = ap->seg; + xpmem_seg_ref(seg); + seg_tg = seg->tg; + xpmem_tg_ref(seg_tg); + + if ((seg->flags & XPMEM_FLAG_DESTROYING) || + (seg_tg->flags & XPMEM_FLAG_DESTROYING)) { + ret = -ENOENT; + goto out_2; + } + + mcs_rwlock_writer_lock(&att->at_lock, &at_lock); + att_locked = 1; + + if ((att->flags & XPMEM_FLAG_DESTROYING) || + (ap_tg->flags & XPMEM_FLAG_DESTROYING) || + (seg_tg->flags & XPMEM_FLAG_DESTROYING)) { + goto out_2; + } + + if (vaddr < att->at_vaddr || vaddr + 1 > att->at_vaddr + att->at_size) { + goto out_2; + } + + seg_vaddr = (att->vaddr & PAGE_MASK) + (vaddr - att->at_vaddr); + XPMEM_DEBUG("vaddr=%lx, seg_vaddr=%lx", vaddr, seg_vaddr); + + ret = xpmem_ensure_valid_page(seg, seg_vaddr); + if (ret != 0) { + goto out_2; + } + + pte = xpmem_vaddr_to_pte(seg_tg->vm, seg_vaddr, &pgsize); + + att->flags |= XPMEM_FLAG_VALIDPTEs; + +out_2: + xpmem_ap_deref(ap); + xpmem_tg_deref(ap_tg); + + if (pte && !pte_is_null(pte)) { + old_pte = xpmem_vaddr_to_pte(cpu_local_var(current)->vm, vaddr, + &pgsize); + if (old_pte && !pte_is_null(old_pte)) { + if (*old_pte != *pte) { + ret = -EFAULT; + ekprintf("%s: ERROR: pte mismatch: " + "0x%lx != 0x%lx\n", + __FUNCTION__, *old_pte, *pte); + } + + ihk_atomic_dec(&seg->tg->n_pinned); + goto out_1; + } + + ret = xpmem_remap_pte(vm, vmr, vaddr, reason, seg, seg_vaddr); + if (ret) { + ekprintf("%s: ERROR: xpmem_remap_pte() failed %d\n", + __FUNCTION__, ret); + } + } + + flush_tlb_single(vaddr); + +out_1: + if (att_locked) { + mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); + } + + xpmem_tg_deref(seg_tg); + xpmem_seg_deref(seg); + xpmem_att_deref(att); + + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static int xpmem_remap_pte( + struct process_vm *vm, + struct vm_range *vmr, + unsigned long vaddr, + uint64_t reason, + struct xpmem_segment *seg, + unsigned long seg_vaddr) +{ + int ret; + struct xpmem_thread_group *seg_tg = seg->tg; + struct vm_range *seg_vmr; + pte_t *seg_pte; + void *seg_pgaddr; + size_t seg_pgsize; + int seg_p2align; + uintptr_t seg_phys; + pte_t *att_pte; + void *att_pgaddr; + size_t att_pgsize; + int att_p2align; + enum ihk_mc_pt_attribute att_attr; + + XPMEM_DEBUG("call: vmr=0x%p, vaddr=0x%lx, reason=0x%lx, segid=0x%lx, " + "seg_vaddr=0x%lx", + vmr, vaddr, reason, seg->segid, seg_vaddr); + + ihk_mc_spinlock_lock_noirq(&seg_tg->vm->memory_range_lock); + + seg_vmr = lookup_process_memory_range(seg_tg->vm, seg_vaddr, + seg_vaddr + 1); + + ihk_mc_spinlock_unlock_noirq(&seg_tg->vm->memory_range_lock); + + if (!seg_vmr) { + ret = -EFAULT; + ekprintf("%s: ERROR: lookup_process_memory_range() failed\n", + __FUNCTION__); + goto out; + } + + seg_pte = ihk_mc_pt_lookup_pte(seg_tg->vm->address_space->page_table, + (void *)seg_vaddr, seg_vmr->pgshift, &seg_pgaddr, &seg_pgsize, + &seg_p2align); + if (!seg_pte) { + ret = -EFAULT; + ekprintf("%s: ERROR: ihk_mc_pt_lookup_pte() failed\n", + __FUNCTION__); + goto out; + } + XPMEM_DEBUG("seg_pte=0x%016lx, seg_pgaddr=0x%p, seg_pgsize=%lu, " + "seg_p2align=%d", + *seg_pte, seg_pgaddr, seg_pgsize, seg_p2align); + + seg_phys = pte_get_phys(seg_pte); + XPMEM_DEBUG("seg_phys=0x%lx", seg_phys); + + att_pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table, + (void *)vaddr, vmr->pgshift, &att_pgaddr, &att_pgsize, + &att_p2align); + XPMEM_DEBUG("att_pte=%p, att_pgaddr=0x%p, att_pgsize=%lu, " + "att_p2align=%d", + att_pte, att_pgaddr, att_pgsize, att_p2align); + + att_attr = arch_vrflag_to_ptattr(vmr->flag, reason, att_pte); + XPMEM_DEBUG("att_attr=0x%lx", att_attr); + + if (att_pte) { + ret = ihk_mc_pt_set_pte(vm->address_space->page_table, att_pte, + att_pgsize, seg_phys, att_attr); + if (ret) { + ret = -EFAULT; + ekprintf("%s: ERROR: ihk_mc_pt_set_pte() failed %d\n", + __FUNCTION__, ret); + goto out; + } + } + else { + ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm, + att_pgaddr, att_pgaddr + att_pgsize, seg_phys, att_attr, + vmr->pgshift); + if (ret) { + ret = -EFAULT; + ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n", + __FUNCTION__, ret); + goto out; + } + } + +out: + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static int xpmem_ensure_valid_page( + struct xpmem_segment *seg, + unsigned long vaddr) +{ + int ret; + struct xpmem_thread_group *seg_tg = seg->tg; + + XPMEM_DEBUG("call: segid=0x%lx, vaddr=0x%lx", seg->segid, vaddr); + + if (seg->flags & XPMEM_FLAG_DESTROYING) + return -ENOENT; + + ret = xpmem_pin_page(seg_tg, seg_tg->group_leader, seg_tg->vm, vaddr); + + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static pte_t * xpmem_vaddr_to_pte( + struct process_vm *vm, + unsigned long vaddr, + size_t *pgsize) +{ + pte_t *pte = NULL; + struct vm_range *range; + int pgshift; + void *base; + size_t size; + int p2align; + + range = lookup_process_memory_range(vm, vaddr, vaddr + 1); + if (range) { + pgshift = range->pgshift; + } + else { + goto out; + } + + pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table, + (void *)vaddr, pgshift, &base, &size, &p2align); + if (pte) { + *pgsize = size; + } + else { + *pgsize = PAGE_SIZE; + } + +out: + + return pte; +} + + +static int xpmem_pin_page( + struct xpmem_thread_group *tg, + struct thread *src_thread, + struct process_vm *src_vm, + unsigned long vaddr) +{ + int ret; + struct vm_range *range; + + XPMEM_DEBUG("call: tgid=%d, vaddr=0x%lx", tg->tgid, vaddr); + + ihk_mc_spinlock_lock_noirq(&src_vm->memory_range_lock); + + range = lookup_process_memory_range(src_vm, vaddr, vaddr + 1); + + ihk_mc_spinlock_unlock_noirq(&src_vm->memory_range_lock); + + if (!range || range->start > vaddr) { + return -ENOENT; + } + + if (xpmem_is_private_data(range)) { + return -ENOENT; + } + + ret = page_fault_process_vm(src_vm, (void *)vaddr, + PF_POPULATE | PF_WRITE | PF_USER); + if (!ret) { + ihk_atomic_inc(&tg->n_pinned); + } + else { + return -ENOENT; + } + + XPMEM_DEBUG("return: ret=%d", ret); + + return ret; +} + + +static void xpmem_unpin_pages( + struct xpmem_segment *seg, + struct process_vm *vm, + unsigned long vaddr, + size_t size) +{ + int n_pgs = (((offset_in_page(vaddr) + (size)) + (PAGE_SIZE - 1)) >> + PAGE_SHIFT); + int n_pgs_unpinned = 0; + size_t vsize = 0; + pte_t *pte = NULL; + + XPMEM_DEBUG("call: segid=0x%lx, vaddr=0x%lx, size=0x%lx", + seg->segid, vaddr, size); + + XPMEM_DEBUG("n_pgs=%d", n_pgs); + + vaddr &= PAGE_MASK; + + while (n_pgs > 0) { + pte = xpmem_vaddr_to_pte(vm, vaddr, &vsize); + if (pte && !pte_is_null(pte)) { + n_pgs_unpinned++; + vaddr += PAGE_SIZE; + n_pgs--; + } + else { + vsize = ((vaddr + vsize) & (~(vsize - 1))); + n_pgs -= (vsize - vaddr) / PAGE_SIZE; + vaddr = vsize; + } + } + + XPMEM_DEBUG("sub: tg->n_pinned=%d, n_pgs_unpinned=%d", + seg->tg->n_pinned, n_pgs_unpinned); + ihk_atomic_sub(n_pgs_unpinned, &seg->tg->n_pinned); XPMEM_DEBUG("return: "); } @@ -633,9 +2073,6 @@ static struct xpmem_thread_group * __xpmem_tg_ref_by_tgid_nolock_internal( { struct xpmem_thread_group *tg; - XPMEM_DEBUG("call: tgid=%d, index=%d, return_destroying=%d", - tgid, index, return_destroying); - list_for_each_entry(tg, &xpmem_my_part->tg_hashtable[index].list, tg_hashlist) { if (tg->tgid == tgid) { @@ -646,13 +2083,10 @@ static struct xpmem_thread_group * __xpmem_tg_ref_by_tgid_nolock_internal( xpmem_tg_ref(tg); - XPMEM_DEBUG("return: tg=0x%p", tg); return tg; } } - XPMEM_DEBUG("return: tg=0x%p", ERR_PTR(-ENOENT)); - return ERR_PTR(-ENOENT); } @@ -662,31 +2096,35 @@ static struct xpmem_thread_group * xpmem_tg_ref_by_segid( { struct xpmem_thread_group *tg; - XPMEM_DEBUG("call: segid=0x%lx", segid); - tg = xpmem_tg_ref_by_tgid(xpmem_segid_to_tgid(segid)); - XPMEM_DEBUG("return: tg=0x%p", tg); - return tg; } +static struct xpmem_thread_group * xpmem_tg_ref_by_apid( + xpmem_apid_t apid) +{ + struct xpmem_thread_group *tg; + + tg = xpmem_tg_ref_by_tgid(xpmem_apid_to_tgid(apid)); + + return tg; +} + + static void xpmem_tg_deref( struct xpmem_thread_group *tg) { - XPMEM_DEBUG("call: tg=0x%p", tg); - DBUG_ON(ihk_atomic_read(&tg->refcnt) <= 0); if (ihk_atomic_dec_return(&tg->refcnt) != 0) { - XPMEM_DEBUG("return: tg->refcnt=%d", tg->refcnt); + XPMEM_DEBUG("return: tg->refcnt=%d, tg->n_pinned=%d", + tg->refcnt, tg->n_pinned); return; } XPMEM_DEBUG("kfree(): tg=0x%p", tg); kfree(tg); - - XPMEM_DEBUG("return: "); } @@ -697,8 +2135,6 @@ static struct xpmem_segment * xpmem_seg_ref_by_segid( struct xpmem_segment *seg; struct mcs_rwlock_node_irqsave lock; - XPMEM_DEBUG("call: seg_tg=0x%p, segid=0x%lx", seg_tg, segid); - mcs_rwlock_reader_lock(&seg_tg->seg_list_lock, &lock); list_for_each_entry(seg, &seg_tg->seg_list, seg_list) { @@ -721,8 +2157,6 @@ static struct xpmem_segment * xpmem_seg_ref_by_segid( static void xpmem_seg_deref( struct xpmem_segment *seg) { - XPMEM_DEBUG("call: seg=0x%p", seg); - DBUG_ON(ihk_atomic_read(&seg->refcnt) <= 0); if (ihk_atomic_dec_return(&seg->refcnt) != 0) { XPMEM_DEBUG("return: seg->refcnt=%d", seg->refcnt); @@ -733,7 +2167,95 @@ static void xpmem_seg_deref( XPMEM_DEBUG("kfree(): seg=0x%p", seg); kfree(seg); - - XPMEM_DEBUG("return: "); +} + + +static struct xpmem_access_permit * xpmem_ap_ref_by_apid( + struct xpmem_thread_group *ap_tg, + xpmem_apid_t apid) +{ + int index; + struct xpmem_access_permit *ap; + struct mcs_rwlock_node_irqsave lock; + + index = xpmem_ap_hashtable_index(apid); + mcs_rwlock_reader_lock(&ap_tg->ap_hashtable[index].lock, &lock); + + list_for_each_entry(ap, &ap_tg->ap_hashtable[index].list, + ap_hashlist) { + if (ap->apid == apid) { + if (ap->flags & XPMEM_FLAG_DESTROYING) { + break; + } + + xpmem_ap_ref(ap); + mcs_rwlock_reader_unlock( + &ap_tg->ap_hashtable[index].lock, &lock); + return ap; + } + } + + mcs_rwlock_reader_unlock(&ap_tg->ap_hashtable[index].lock, &lock); + + return ERR_PTR(-ENOENT); +} + + +static void xpmem_ap_deref( + struct xpmem_access_permit *ap) +{ + DBUG_ON(ihk_atomic_read(&ap->refcnt) <= 0); + if (ihk_atomic_dec_return(&ap->refcnt) != 0) { + XPMEM_DEBUG("return: ap->refcnt=%d", ap->refcnt); + return; + } + + DBUG_ON(!(ap->flags & XPMEM_FLAG_DESTROYING)); + + XPMEM_DEBUG("kfree(): ap=0x%p", ap); + kfree(ap); +} + + +static void xpmem_att_deref( + struct xpmem_attachment *att) +{ + DBUG_ON(ihk_atomic_read(&att->refcnt) <= 0); + if (ihk_atomic_dec_return(&att->refcnt) != 0) { + XPMEM_DEBUG("return: att->refcnt=%d", att->refcnt); + return; + } + + DBUG_ON(!(att->flags & XPMEM_FLAG_DESTROYING)); + + XPMEM_DEBUG("kfree(): att=0x%p", att); + kfree(att); +} + + +static int xpmem_validate_access( + struct xpmem_access_permit *ap, + off_t offset, + size_t size, + int mode, + unsigned long *vaddr) +{ + XPMEM_DEBUG("call: apid=0x%lx, offset=0x%lx, size=0x%lx, mode=%d", + ap->apid, offset, size, mode); + + if (cpu_local_var(current)->proc->pid != ap->tg->tgid || + (mode == XPMEM_RDWR && ap->mode == XPMEM_RDONLY)) { + return -EACCES; + } + + if (offset < 0 || size == 0 || offset + size > ap->seg->size) { + return -EINVAL; + } + + *vaddr = ap->seg->vaddr + offset; + + XPMEM_DEBUG("return: ret=%d, vaddr=0x%lx", 0, *vaddr); + + return 0; }