Revert "xpmem: fix mapping of attachment and segment"

This reverts commit a5fcc91656.

Change-Id: If29415369d724391b291939ecce76482138e82f5
This commit is contained in:
Masamichi Takagi
2020-06-16 10:38:31 +09:00
parent 33b7414615
commit e774e1b984
43 changed files with 284 additions and 2875 deletions

View File

@ -69,6 +69,4 @@ static inline int page_is_multi_mapped(struct page *page)
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
extern int xpmem_remote_on_demand;
#endif

View File

@ -830,9 +830,6 @@ struct vm_range *previous_process_memory_range(
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend);
int page_fault_process_memory_range(struct process_vm *vm,
struct vm_range *range,
uintptr_t fault_addr, uint64_t reason);
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr,
uint64_t reason);
int remove_process_region(struct process_vm *vm,

View File

@ -31,7 +31,7 @@ int xpmem_update_process_page_table(struct process_vm *vm,
struct vm_range *vmr);
struct xpmem_attachment {
ihk_rwspinlock_t at_lock; /* att lock */
mcs_rwlock_lock_t at_lock; /* att lock */
unsigned long vaddr; /* starting address of seg attached */
unsigned long at_vaddr; /* address where seg is attached */
size_t at_size; /* size of seg attachment */

View File

@ -248,12 +248,11 @@ static void xpmem_clear_PTEs_of_att(struct xpmem_attachment *, unsigned long,
static int xpmem_remap_pte(struct process_vm *, struct vm_range *,
unsigned long, uint64_t, struct xpmem_segment *, unsigned long);
static int xpmem_ensure_valid_page(struct xpmem_segment *, unsigned long,
int);
static int xpmem_ensure_valid_page(struct xpmem_segment *, unsigned long);
static pte_t * xpmem_vaddr_to_pte(struct process_vm *, unsigned long,
size_t *pgsize);
static int xpmem_pin_page(struct xpmem_thread_group *, struct thread *,
struct process_vm *, unsigned long, int);
struct process_vm *, unsigned long);
static void xpmem_unpin_pages(struct xpmem_segment *, struct process_vm *,
unsigned long, size_t);

View File

@ -62,7 +62,6 @@ extern int interrupt_from_user(void *);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
int xpmem_remote_on_demand;
static struct ihk_mc_pa_ops *pa_ops;
@ -1955,11 +1954,6 @@ void mem_init(void)
kprintf("Demand paging on ANONYMOUS mappings enabled.\n");
anon_on_demand = 1;
}
if (find_command_line("xpmem_remote_on_demand")) {
kprintf("Demand paging on XPMEM remote mappings enabled.\n");
xpmem_remote_on_demand = 1;
}
/* Init distance vectors */
numa_distances_init();

View File

@ -1926,9 +1926,7 @@ out:
return error;
}
int page_fault_process_memory_range(struct process_vm *vm,
struct vm_range *range,
uintptr_t fault_addr, uint64_t reason)
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
{
int error;
pte_t *ptep;

View File

@ -25,7 +25,6 @@
#include <kmalloc.h>
#include <limits.h>
#include <memobj.h>
#include <process.h>
#include <mman.h>
#include <page.h>
#include <string.h>
@ -424,6 +423,11 @@ static int xpmem_make(
struct xpmem_thread_group *seg_tg;
struct xpmem_segment *seg;
struct mcs_rwlock_node_irqsave lock;
struct process_vm *vm = cpu_local_var(current)->vm;
int ret;
pte_t *seg_pte = NULL;
size_t pgsize = 0, seg_size = 0;
unsigned long pf_addr;
XPMEM_DEBUG("call: vaddr=0x%lx, size=0x%lx, permit_type=%d, "
"permit_value=0%04lo",
@ -448,13 +452,34 @@ static int xpmem_make(
* multiple of pages in size.
*/
if (offset_in_page(vaddr) != 0 ||
/* Special treatment of -1UL */
(offset_in_page(size) != 0 && size != 0xffffffffffffffff)) {
/* Special treatment of -1UL */
(offset_in_page(size) != 0 && size != 0xffffffffffffffff)) {
xpmem_tg_deref(seg_tg);
XPMEM_DEBUG("return: ret=%d", -EINVAL);
return -EINVAL;
}
/* Page-in segment area */
pf_addr = vaddr;
while (pf_addr < vaddr + size) {
ret = page_fault_process_vm(vm, (void *)pf_addr,
PF_POPULATE | PF_WRITE | PF_USER);
if (ret) {
xpmem_tg_deref(seg_tg);
return -ENOENT;
}
seg_pte = xpmem_vaddr_to_pte(vm, pf_addr, &pgsize);
if (!seg_pte || pte_is_null(seg_pte)) {
xpmem_tg_deref(seg_tg);
return -ENOENT;
}
pf_addr += pgsize;
seg_size += pgsize;
}
if (seg_size > size) {
size = seg_size;
}
segid = xpmem_make_segid(seg_tg);
if (segid < 0) {
xpmem_tg_deref(seg_tg);
@ -1011,7 +1036,7 @@ static int xpmem_attach(
struct xpmem_access_permit *ap;
struct xpmem_segment *seg;
struct xpmem_attachment *att;
unsigned long at_lock;
struct mcs_rwlock_node_irqsave at_lock;
struct process_vm *vm = cpu_local_var(current)->vm;
XPMEM_DEBUG("call: apid=0x%lx, offset=0x%lx, size=0x%lx, vaddr=0x%lx, "
@ -1077,7 +1102,7 @@ static int xpmem_attach(
XPMEM_DEBUG("kmalloc(): att=0x%p", att);
memset(att, 0, sizeof(struct xpmem_attachment));
ihk_rwspinlock_init(&att->at_lock);
mcs_rwlock_init(&att->at_lock);
att->vaddr = seg_vaddr;
att->at_size = size;
att->ap = ap;
@ -1087,7 +1112,7 @@ static int xpmem_attach(
xpmem_att_not_destroyable(att);
xpmem_att_ref(att);
at_lock = ihk_rwspinlock_write_lock(&att->at_lock);
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
ihk_mc_spinlock_lock_noirq(&ap->lock);
list_add_tail(&att->att_list, &ap->att_list);
@ -1129,10 +1154,7 @@ static int xpmem_attach(
"flags=0x%lx, fd=%d, offset=0x%lx",
vaddr, size, prot_flags, flags, mckfd->fd, offset);
/* The new range is associated with shmobj because of
* MAP_ANONYMOUS && !MAP_PRIVATE && MAP_SHARED. Note that MAP_FIXED
* support prevents us from reusing segment vm_range when segment vm
* and attach vm is the same.
*/
MAP_ANONYMOUS && !MAP_PRIVATE && MAP_SHARED */
at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd,
offset, VR_XPMEM, att);
if (IS_ERR((void *)(uintptr_t)at_vaddr)) {
@ -1152,7 +1174,7 @@ out_2:
ihk_mc_spinlock_unlock_noirq(&ap->lock);
xpmem_att_destroyable(att);
}
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
xpmem_att_deref(att);
out_1:
xpmem_ap_deref(ap);
@ -1171,7 +1193,7 @@ static int xpmem_detach(
int ret;
struct xpmem_access_permit *ap;
struct xpmem_attachment *att;
unsigned long at_lock;
struct mcs_rwlock_node_irqsave at_lock;
struct vm_range *range;
struct process_vm *vm = cpu_local_var(current)->vm;
@ -1194,10 +1216,10 @@ static int xpmem_detach(
xpmem_att_ref(att);
at_lock = ihk_rwspinlock_write_lock(&att->at_lock);
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
if (att->flags & XPMEM_FLAG_DESTROYING) {
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
ihk_rwspinlock_write_unlock_noirq(&vm->memory_range_lock);
xpmem_att_deref(att);
return 0;
@ -1210,7 +1232,7 @@ static int xpmem_detach(
if (cpu_local_var(current)->proc->pid != ap->tg->tgid) {
att->flags &= ~XPMEM_FLAG_DESTROYING;
xpmem_ap_deref(ap);
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
ihk_rwspinlock_write_unlock_noirq(&vm->memory_range_lock);
xpmem_att_deref(att);
return -EACCES;
@ -1222,7 +1244,7 @@ static int xpmem_detach(
/* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() -->
xpmem_free_process_memory_range() */
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
XPMEM_DEBUG("xpmem_vm_munmap(): start=0x%lx, len=0x%lx",
range->start, att->at_size);
@ -1390,16 +1412,16 @@ static void xpmem_detach_att(
int ret;
struct vm_range *range;
struct process_vm *vm;
unsigned long at_lock;
struct mcs_rwlock_node_irqsave at_lock;
XPMEM_DEBUG("call: apid=0x%lx, att=0x%p", ap->apid, att);
XPMEM_DEBUG("detaching att->vm=0x%p", (void *)att->vm);
at_lock = ihk_rwspinlock_write_lock(&att->at_lock);
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
if (att->flags & XPMEM_FLAG_DESTROYING) {
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
XPMEM_DEBUG("return: XPMEM_FLAG_DESTROYING");
return;
}
@ -1415,7 +1437,7 @@ static void xpmem_detach_att(
ihk_mc_spinlock_lock_noirq(&ap->lock);
list_del_init(&att->att_list);
ihk_mc_spinlock_unlock_noirq(&ap->lock);
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
xpmem_att_destroyable(att);
XPMEM_DEBUG("return: range=%p");
@ -1441,7 +1463,7 @@ static void xpmem_detach_att(
list_del_init(&att->att_list);
ihk_mc_spinlock_unlock_noirq(&ap->lock);
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
XPMEM_DEBUG("xpmem_vm_munmap(): start=0x%lx, len=0x%lx",
range->start, att->at_size);
@ -1549,13 +1571,13 @@ static void xpmem_clear_PTEs_of_att(
unsigned long end)
{
int ret;
unsigned long at_lock;
struct mcs_rwlock_node_irqsave at_lock;
XPMEM_DEBUG("call: att=0x%p, start=0x%lx, end=0x%lx",
att, start, end);
ihk_rwspinlock_read_lock_noirq(&att->vm->memory_range_lock);
at_lock = ihk_rwspinlock_write_lock(&att->at_lock);
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
if (att->flags & XPMEM_FLAG_VALIDPTEs) {
struct vm_range *range;
@ -1595,7 +1617,7 @@ static void xpmem_clear_PTEs_of_att(
goto out;
}
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
XPMEM_DEBUG(
"xpmem_vm_munmap(): start=0x%lx, len=0x%lx",
@ -1607,13 +1629,13 @@ static void xpmem_clear_PTEs_of_att(
__FUNCTION__, ret);
}
at_lock = ihk_rwspinlock_write_lock(&att->at_lock);
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
if (offset_start == 0 && att->at_size == invalidate_len)
att->flags &= ~XPMEM_FLAG_VALIDPTEs;
}
out:
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
ihk_rwspinlock_read_unlock_noirq(&att->vm->memory_range_lock);
XPMEM_DEBUG("return: ");
@ -1628,7 +1650,7 @@ int xpmem_remove_process_memory_range(
u64 remaining_vaddr;
struct xpmem_access_permit *ap;
struct xpmem_attachment *att;
unsigned long at_lock;
struct mcs_rwlock_node_irqsave at_lock;
XPMEM_DEBUG("call: vmr=0x%p, att=0x%p", vmr, vmr->private_data);
@ -1642,7 +1664,7 @@ int xpmem_remove_process_memory_range(
xpmem_att_ref(att);
at_lock = ihk_rwspinlock_write_lock(&att->at_lock);
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
if (att->flags & XPMEM_FLAG_DESTROYING) {
XPMEM_DEBUG("already cleaned up");
@ -1718,7 +1740,7 @@ int xpmem_remove_process_memory_range(
*/
out:
ihk_rwspinlock_write_unlock(&att->at_lock, at_lock);
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
xpmem_att_deref(att);
@ -1728,29 +1750,24 @@ out:
}
static int _xpmem_fault_process_memory_range(
int xpmem_fault_process_memory_range(
struct process_vm *vm,
struct vm_range *vmr,
unsigned long vaddr,
uint64_t reason,
int page_in_remote)
uint64_t reason)
{
int ret = 0;
unsigned long seg_vaddr;
unsigned long seg_vaddr = 0;
pte_t *pte = NULL;
pte_t *old_pte = NULL;
struct xpmem_thread_group *ap_tg;
struct xpmem_thread_group *seg_tg;
struct xpmem_access_permit *ap;
struct xpmem_attachment *att;
struct xpmem_segment *seg;
pte_t *att_pte;
void *att_pgaddr;
size_t att_pgsize;
int att_p2align;
pte_t *seg_pte;
size_t seg_pgsize;
uintptr_t seg_phys;
uintptr_t seg_phys_plus_off;
enum ihk_mc_pt_attribute att_attr;
size_t pgsize;
struct mcs_rwlock_node_irqsave at_lock = { 0 };
int att_locked = 0;
XPMEM_DEBUG("call: vmr=0x%p, vaddr=0x%lx, reason=0x%lx",
vmr, vaddr, reason);
@ -1783,144 +1800,67 @@ static int _xpmem_fault_process_memory_range(
if ((seg->flags & XPMEM_FLAG_DESTROYING) ||
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
ret = -ENOENT;
goto out;
goto out_2;
}
mcs_rwlock_writer_lock(&att->at_lock, &at_lock);
att_locked = 1;
if ((att->flags & XPMEM_FLAG_DESTROYING) ||
(ap_tg->flags & XPMEM_FLAG_DESTROYING) ||
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
goto out;
goto out_2;
}
if (vaddr < att->at_vaddr || vaddr + 1 > att->at_vaddr + att->at_size) {
goto out;
goto out_2;
}
/* page-in remote pages on page-fault or (on attach and
* xpmem_remote_on_demand isn't specified)
*/
seg_vaddr = att->vaddr + (vaddr - att->at_vaddr);
seg_vaddr = (att->vaddr & PAGE_MASK) + (vaddr - att->at_vaddr);
XPMEM_DEBUG("vaddr=%lx, seg_vaddr=%lx", vaddr, seg_vaddr);
ret = xpmem_ensure_valid_page(seg, seg_vaddr, page_in_remote);
ret = xpmem_ensure_valid_page(seg, seg_vaddr);
if (ret != 0) {
goto out;
goto out_2;
}
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_lock_noirq(&seg_tg->vm->memory_range_lock);
}
seg_pte = xpmem_vaddr_to_pte(seg_tg->vm, seg_vaddr, &seg_pgsize);
/* map only resident remote pages on attach and
* xpmem_remote_on_demand is specified
*/
if (!seg_pte || pte_is_null(seg_pte)) {
ret = page_in_remote ? -EFAULT : 0;
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_unlock_noirq(&seg_tg->vm->memory_range_lock);
}
goto out;
}
seg_phys = pte_get_phys(seg_pte);
/* clear lower bits of the contiguous-PTE tail entries */
seg_phys_plus_off = (seg_phys & ~(seg_pgsize - 1)) |
(seg_vaddr & (seg_pgsize - 1));
XPMEM_DEBUG("seg_vaddr: %lx, seg_phys: %lx, seg_phys_plus_off: %lx",
seg_vaddr, seg_phys, seg_phys_plus_off);
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_unlock_noirq(&seg_tg->vm->memory_range_lock);
}
/* find largest page-size fitting vm range and segment page */
att_pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)vaddr, vmr->pgshift, &att_pgaddr, &att_pgsize,
&att_p2align);
while ((unsigned long)att_pgaddr < vmr->start ||
vmr->end < (uintptr_t)att_pgaddr + att_pgsize ||
att_pgsize > seg_pgsize) {
att_pte = NULL;
ret = arch_get_smaller_page_size(NULL, att_pgsize,
&att_pgsize, &att_p2align);
if (ret) {
ekprintf("%s: arch_get_smaller_page_size failed: "
" range: %lx-%lx, pgsize: %lx, ret: %d\n",
__func__, vmr->start, vmr->end, att_pgsize,
ret);
goto out;
}
att_pgaddr = (void *)(vaddr & ~(att_pgsize - 1));
}
arch_adjust_allocate_page_size(vm->address_space->page_table,
vaddr, att_pte, &att_pgaddr,
&att_pgsize);
XPMEM_DEBUG("att_pte=%p, att_pgaddr=0x%p, att_pgsize=%lu, "
"att_p2align=%d",
att_pte, att_pgaddr, att_pgsize, att_p2align);
/* last arg is not used */
att_attr = arch_vrflag_to_ptattr(vmr->flag, reason, NULL);
XPMEM_DEBUG("att_attr=0x%lx", att_attr);
if (att_pte && !pte_is_null(att_pte)) {
unsigned long att_phys = pte_get_phys(att_pte);
unsigned long seg_phys_aligned =
seg_phys_plus_off & ~(att_pgsize - 1);
if (att_phys != seg_phys_aligned) {
ret = -EFAULT;
ekprintf("%s: ERROR: pte mismatch: "
"0x%lx != 0x%lx\n",
__func__, att_phys, seg_phys_aligned);
}
if (page_in_remote) {
ihk_atomic_dec(&seg->tg->n_pinned);
}
goto out;
}
XPMEM_DEBUG("att_pgaddr: %lx, att_pgsize: %lx, "
"seg_vaddr: %lx, seg_pgsize: %lx, seg_phys: %lx\n",
att_pgaddr, att_pgsize, seg_vaddr,
seg_pgsize, seg_phys);
if (att_pte && !pgsize_is_contiguous(att_pgsize)) {
ret = ihk_mc_pt_set_pte(vm->address_space->page_table,
att_pte, att_pgsize,
seg_phys_plus_off,
att_attr);
if (ret) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_set_pte() failed %d\n",
__func__, ret);
goto out;
}
}
else {
ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
att_pgaddr, att_pgaddr + att_pgsize,
seg_phys_plus_off,
att_attr, vmr->pgshift, vmr, 1);
if (ret) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n",
__func__, ret);
goto out;
}
}
pte = xpmem_vaddr_to_pte(seg_tg->vm, seg_vaddr, &pgsize);
att->flags |= XPMEM_FLAG_VALIDPTEs;
flush_tlb_single(vaddr);
out:
out_2:
xpmem_ap_deref(ap);
xpmem_tg_deref(ap_tg);
if (pte && !pte_is_null(pte)) {
old_pte = xpmem_vaddr_to_pte(cpu_local_var(current)->vm, vaddr,
&pgsize);
if (old_pte && !pte_is_null(old_pte)) {
if (*old_pte != *pte) {
ret = -EFAULT;
ekprintf("%s: ERROR: pte mismatch: "
"0x%lx != 0x%lx\n",
__FUNCTION__, *old_pte, *pte);
}
ihk_atomic_dec(&seg->tg->n_pinned);
goto out_1;
}
ret = xpmem_remap_pte(vm, vmr, vaddr, reason, seg, seg_vaddr);
if (ret) {
ekprintf("%s: ERROR: xpmem_remap_pte() failed %d\n",
__FUNCTION__, ret);
}
}
flush_tlb_single(vaddr);
out_1:
if (att_locked) {
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
}
xpmem_tg_deref(seg_tg);
xpmem_seg_deref(seg);
xpmem_att_deref(att);
@ -1930,38 +1870,21 @@ out:
return ret;
}
int xpmem_fault_process_memory_range(
struct process_vm *vm,
struct vm_range *vmr,
unsigned long vaddr,
uint64_t reason)
{
int ret;
unsigned long at_lock;
struct xpmem_attachment *att;
att = (struct xpmem_attachment *)vmr->private_data;
if (att == NULL) {
return -EFAULT;
}
at_lock = ihk_rwspinlock_read_lock(&att->at_lock);
ret = _xpmem_fault_process_memory_range(vm, vmr, vaddr, reason, 1);
ihk_rwspinlock_read_unlock(&att->at_lock, at_lock);
return ret;
}
int xpmem_update_process_page_table(
struct process_vm *vm, struct vm_range *vmr)
{
int ret = 0;
unsigned long vaddr;
pte_t *pte;
size_t pgsize;
unsigned long seg_vaddr = 0;
unsigned long vaddr = vmr->start;
pte_t *pte = NULL;
pte_t *seg_pte = NULL;
struct xpmem_thread_group *ap_tg;
struct xpmem_thread_group *seg_tg;
struct xpmem_access_permit *ap;
struct xpmem_attachment *att;
struct xpmem_segment *seg;
size_t seg_pgsize;
size_t pgsize;
XPMEM_DEBUG("call: vmr=0x%p", vmr);
@ -2005,29 +1928,44 @@ int xpmem_update_process_page_table(
goto out_2;
}
for (vaddr = vmr->start; vaddr < vmr->end; vaddr += pgsize) {
XPMEM_DEBUG("vmr: %lx-%lx, vaddr: %lx",
vmr->start, vmr->end, vaddr);
ret = _xpmem_fault_process_memory_range(vm, vmr, vaddr,
0,
!xpmem_remote_on_demand);
if (ret) {
ekprintf("%s: ERROR: "
"_xpmem_fault_process_memory_range() "
"failed %d\n", __func__, ret);
seg_vaddr = (att->vaddr & PAGE_MASK) + (vaddr - att->at_vaddr);
XPMEM_DEBUG("vaddr=%lx, seg_vaddr=%lx", vaddr, seg_vaddr);
while (vaddr < vmr->end) {
ret = xpmem_ensure_valid_page(seg, seg_vaddr);
if (ret != 0) {
goto out_2;
}
pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)vaddr, vmr->pgshift,
NULL, &pgsize, NULL);
seg_pte = xpmem_vaddr_to_pte(seg_tg->vm, seg_vaddr,
&seg_pgsize);
/* when segment page is not resident and
* xpmem_remote_on_demand is specified
*/
if (!pte || pte_is_null(pte)) {
pgsize = PAGE_SIZE;
if (seg_pte && !pte_is_null(seg_pte)) {
pte = xpmem_vaddr_to_pte(cpu_local_var(current)->vm,
vaddr, &pgsize);
if (pte && !pte_is_null(pte)) {
if (*seg_pte != *pte) {
ret = -EFAULT;
ekprintf("%s: ERROR: pte mismatch: "
"0x%lx != 0x%lx\n",
__func__, *seg_pte, *pte);
}
ihk_atomic_dec(&seg->tg->n_pinned);
goto out_2;
}
ret = xpmem_remap_pte(vm, vmr, vaddr,
0, seg, seg_vaddr);
if (ret) {
ekprintf("%s: ERROR: xpmem_remap_pte() failed %d\n",
__func__, ret);
}
}
flush_tlb_single(vaddr);
att->flags |= XPMEM_FLAG_VALIDPTEs;
seg_vaddr += seg_pgsize;
vaddr += seg_pgsize;
}
out_2:
@ -2044,10 +1982,110 @@ out_1:
return ret;
}
static int xpmem_remap_pte(
struct process_vm *vm,
struct vm_range *vmr,
unsigned long vaddr,
uint64_t reason,
struct xpmem_segment *seg,
unsigned long seg_vaddr)
{
int ret;
struct xpmem_thread_group *seg_tg = seg->tg;
struct vm_range *seg_vmr;
pte_t *seg_pte;
void *seg_pgaddr;
size_t seg_pgsize;
int seg_p2align;
uintptr_t seg_phys;
pte_t *att_pte;
void *att_pgaddr;
size_t att_pgsize;
int att_p2align;
enum ihk_mc_pt_attribute att_attr;
XPMEM_DEBUG("call: vmr=0x%p, vaddr=0x%lx, reason=0x%lx, segid=0x%lx, "
"seg_vaddr=0x%lx",
vmr, vaddr, reason, seg->segid, seg_vaddr);
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_lock_noirq(&seg_tg->vm->memory_range_lock);
}
seg_vmr = lookup_process_memory_range(seg_tg->vm, seg_vaddr,
seg_vaddr + 1);
if (!seg_vmr) {
ret = -EFAULT;
ekprintf("%s: ERROR: lookup_process_memory_range() failed\n",
__FUNCTION__);
goto out;
}
seg_pte = ihk_mc_pt_lookup_pte(seg_tg->vm->address_space->page_table,
(void *)seg_vaddr, seg_vmr->pgshift, &seg_pgaddr, &seg_pgsize,
&seg_p2align);
if (!seg_pte) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_lookup_pte() failed\n",
__FUNCTION__);
goto out;
}
XPMEM_DEBUG("seg_pte=0x%016lx, seg_pgaddr=0x%p, seg_pgsize=%lu, "
"seg_p2align=%d",
*seg_pte, seg_pgaddr, seg_pgsize, seg_p2align);
seg_phys = pte_get_phys(seg_pte);
XPMEM_DEBUG("seg_phys=0x%lx", seg_phys);
att_pte = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)vaddr, vmr->pgshift, &att_pgaddr, &att_pgsize,
&att_p2align);
XPMEM_DEBUG("att_pte=%p, att_pgaddr=0x%p, att_pgsize=%lu, "
"att_p2align=%d",
att_pte, att_pgaddr, att_pgsize, att_p2align);
att_attr = arch_vrflag_to_ptattr(vmr->flag, reason, att_pte);
XPMEM_DEBUG("att_attr=0x%lx", att_attr);
if (att_pte && !pgsize_is_contiguous(seg_pgsize)) {
ret = ihk_mc_pt_set_pte(vm->address_space->page_table, att_pte,
seg_pgsize, seg_phys, att_attr);
if (ret) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_set_pte() failed %d\n",
__func__, ret);
goto out;
}
}
else {
ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
att_pgaddr, att_pgaddr + seg_pgsize,
seg_phys, att_attr,
pgsize_to_pgshift(seg_pgsize), vmr, 1);
if (ret) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n",
__func__, ret);
goto out;
}
}
out:
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_unlock_noirq(&seg_tg->vm->memory_range_lock);
}
XPMEM_DEBUG("return: ret=%d", ret);
return ret;
}
static int xpmem_ensure_valid_page(
struct xpmem_segment *seg,
unsigned long vaddr,
int page_in)
unsigned long vaddr)
{
int ret;
struct xpmem_thread_group *seg_tg = seg->tg;
@ -2057,8 +2095,7 @@ static int xpmem_ensure_valid_page(
if (seg->flags & XPMEM_FLAG_DESTROYING)
return -ENOENT;
ret = xpmem_pin_page(seg_tg, seg_tg->group_leader, seg_tg->vm, vaddr,
page_in);
ret = xpmem_pin_page(seg_tg, seg_tg->group_leader, seg_tg->vm, vaddr);
XPMEM_DEBUG("return: ret=%d", ret);
@ -2104,8 +2141,7 @@ static int xpmem_pin_page(
struct xpmem_thread_group *tg,
struct thread *src_thread,
struct process_vm *src_vm,
unsigned long vaddr,
int page_in)
unsigned long vaddr)
{
int ret = 0;
struct vm_range *range;
@ -2129,21 +2165,6 @@ static int xpmem_pin_page(
}
ihk_atomic_inc(&tg->n_pinned);
/* Page-in remote area */
if (page_in) {
/* skip read lock for the case src_vm is local
* because write lock is taken in do_mmap.
*/
ret = page_fault_process_memory_range(src_vm, range,
vaddr,
PF_POPULATE | PF_WRITE |
PF_USER);
if (ret) {
goto out;
}
}
out:
if (is_remote_vm(src_vm)) {
ihk_rwspinlock_read_unlock_noirq(&src_vm->memory_range_lock);