Revert "xpmem: Support large page attachment"

This reverts commit a8696d811d.

Conflicts:
	kernel/include/process.h
	kernel/syscall.c
	kernel/xpmem.c

Change-Id: I726e74450f6228d3fc78fc62dda15b2067732a53
This commit is contained in:
Masamichi Takagi
2020-06-16 10:42:53 +09:00
parent 29d27b7c8d
commit 6332903f0d
35 changed files with 104 additions and 2394 deletions

View File

@@ -423,11 +423,6 @@ static int xpmem_make(
struct xpmem_thread_group *seg_tg;
struct xpmem_segment *seg;
struct mcs_rwlock_node_irqsave lock;
struct process_vm *vm = cpu_local_var(current)->vm;
int ret;
pte_t *seg_pte = NULL;
size_t pgsize = 0, seg_size = 0;
unsigned long pf_addr;
XPMEM_DEBUG("call: vaddr=0x%lx, size=0x%lx, permit_type=%d, "
"permit_value=0%04lo",
@@ -459,27 +454,6 @@ static int xpmem_make(
return -EINVAL;
}
/* Page-in segment area */
pf_addr = vaddr;
while (pf_addr < vaddr + size) {
ret = page_fault_process_vm(vm, (void *)pf_addr,
PF_POPULATE | PF_WRITE | PF_USER);
if (ret) {
xpmem_tg_deref(seg_tg);
return -ENOENT;
}
seg_pte = xpmem_vaddr_to_pte(vm, pf_addr, &pgsize);
if (!seg_pte || pte_is_null(seg_pte)) {
xpmem_tg_deref(seg_tg);
return -ENOENT;
}
pf_addr += pgsize;
seg_size += pgsize;
}
if (seg_size > size) {
size = seg_size;
}
segid = xpmem_make_segid(seg_tg);
if (segid < 0) {
xpmem_tg_deref(seg_tg);
@@ -1037,6 +1011,7 @@ static int xpmem_attach(
struct xpmem_segment *seg;
struct xpmem_attachment *att;
struct mcs_rwlock_node_irqsave at_lock;
struct vm_range *vmr;
struct process_vm *vm = cpu_local_var(current)->vm;
XPMEM_DEBUG("call: apid=0x%lx, offset=0x%lx, size=0x%lx, vaddr=0x%lx, "
@@ -1151,15 +1126,37 @@ static int xpmem_attach(
XPMEM_DEBUG("do_mmap(): vaddr=0x%lx, size=0x%lx, prot_flags=0x%lx, "
"flags=0x%lx, fd=%d, offset=0x%lx",
vaddr, size, prot_flags, flags, mckfd->fd, offset);
/* The new range is associated with shmobj because of
/* The new range uses on-demand paging and is associated with shmobj because of
MAP_ANONYMOUS && !MAP_PRIVATE && MAP_SHARED */
at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd,
offset, VR_XPMEM, att);
at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd, offset);
if (IS_ERR((void *)(uintptr_t)at_vaddr)) {
ret = at_vaddr;
goto out_2;
}
XPMEM_DEBUG("at_vaddr=0x%lx", at_vaddr);
att->at_vaddr = at_vaddr;
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
vmr = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1);
/* To identify pages of XPMEM attachment for rusage accounting */
if(vmr->memobj) {
vmr->memobj->flags |= MF_XPMEM;
} else {
ekprintf("%s: vmr->memobj equals to NULL\n", __FUNCTION__);
}
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
if (!vmr) {
ret = -ENOENT;
goto out_2;
}
vmr->private_data = att;
att->at_vmr = vmr;
*at_vaddr_p = at_vaddr + offset_in_page(att->vaddr);
@@ -1185,6 +1182,7 @@ out_1:
return ret;
}
static int xpmem_detach(
unsigned long at_vaddr)
{
@@ -1880,117 +1878,6 @@ out_1:
return ret;
}
int xpmem_update_process_page_table(
struct process_vm *vm, struct vm_range *vmr)
{
int ret = 0;
unsigned long seg_vaddr = 0;
unsigned long vaddr = vmr->start;
pte_t *pte = NULL;
pte_t *seg_pte = NULL;
struct xpmem_thread_group *ap_tg;
struct xpmem_thread_group *seg_tg;
struct xpmem_access_permit *ap;
struct xpmem_attachment *att;
struct xpmem_segment *seg;
size_t seg_pgsize;
size_t pgsize;
XPMEM_DEBUG("call: vmr=0x%p", vmr);
att = (struct xpmem_attachment *)vmr->private_data;
if (att == NULL) {
return -EFAULT;
}
xpmem_att_ref(att);
ap = att->ap;
xpmem_ap_ref(ap);
ap_tg = ap->tg;
xpmem_tg_ref(ap_tg);
if ((ap->flags & XPMEM_FLAG_DESTROYING) ||
(ap_tg->flags & XPMEM_FLAG_DESTROYING)) {
ret = -EFAULT;
goto out_1;
}
DBUG_ON(cpu_local_var(current)->proc->pid != ap_tg->tgid);
DBUG_ON(ap->mode != XPMEM_RDWR);
seg = ap->seg;
xpmem_seg_ref(seg);
seg_tg = seg->tg;
xpmem_tg_ref(seg_tg);
if ((seg->flags & XPMEM_FLAG_DESTROYING) ||
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
ret = -ENOENT;
goto out_2;
}
att->at_vaddr = vmr->start;
att->at_vmr = vmr;
if ((att->flags & XPMEM_FLAG_DESTROYING) ||
(ap_tg->flags & XPMEM_FLAG_DESTROYING) ||
(seg_tg->flags & XPMEM_FLAG_DESTROYING)) {
goto out_2;
}
seg_vaddr = (att->vaddr & PAGE_MASK) + (vaddr - att->at_vaddr);
XPMEM_DEBUG("vaddr=%lx, seg_vaddr=%lx", vaddr, seg_vaddr);
while (vaddr < vmr->end) {
ret = xpmem_ensure_valid_page(seg, seg_vaddr);
if (ret != 0) {
goto out_2;
}
seg_pte = xpmem_vaddr_to_pte(seg_tg->vm, seg_vaddr,
&seg_pgsize);
if (seg_pte && !pte_is_null(seg_pte)) {
pte = xpmem_vaddr_to_pte(cpu_local_var(current)->vm,
vaddr, &pgsize);
if (pte && !pte_is_null(pte)) {
if (*seg_pte != *pte) {
ret = -EFAULT;
ekprintf("%s: ERROR: pte mismatch: "
"0x%lx != 0x%lx\n",
__func__, *seg_pte, *pte);
}
ihk_atomic_dec(&seg->tg->n_pinned);
goto out_2;
}
ret = xpmem_remap_pte(vm, vmr, vaddr,
0, seg, seg_vaddr);
if (ret) {
ekprintf("%s: ERROR: xpmem_remap_pte() failed %d\n",
__func__, ret);
}
}
flush_tlb_single(vaddr);
att->flags |= XPMEM_FLAG_VALIDPTEs;
seg_vaddr += seg_pgsize;
vaddr += seg_pgsize;
}
out_2:
xpmem_tg_deref(seg_tg);
xpmem_seg_deref(seg);
out_1:
xpmem_att_deref(att);
xpmem_ap_deref(ap);
xpmem_tg_deref(ap_tg);
XPMEM_DEBUG("return: ret=%d", ret);
return ret;
}
static int xpmem_remap_pte(
struct process_vm *vm,
@@ -2018,16 +1905,12 @@ static int xpmem_remap_pte(
"seg_vaddr=0x%lx",
vmr, vaddr, reason, seg->segid, seg_vaddr);
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_lock_noirq(&seg_tg->vm->memory_range_lock);
}
ihk_rwspinlock_read_lock_noirq(&seg_tg->vm->memory_range_lock);
seg_vmr = lookup_process_memory_range(seg_tg->vm, seg_vaddr,
seg_vaddr + 1);
if (is_remote_vm(seg_tg->vm)) {
ihk_rwspinlock_read_unlock_noirq(&seg_tg->vm->memory_range_lock);
}
ihk_rwspinlock_read_unlock_noirq(&seg_tg->vm->memory_range_lock);
if (!seg_vmr) {
ret = -EFAULT;
@@ -2062,27 +1945,28 @@ static int xpmem_remap_pte(
att_attr = arch_vrflag_to_ptattr(vmr->flag, reason, att_pte);
XPMEM_DEBUG("att_attr=0x%lx", att_attr);
if (att_pte && !pgsize_is_contiguous(seg_pgsize)) {
ret = ihk_mc_pt_set_pte(vm->address_space->page_table, att_pte,
seg_pgsize, seg_phys, att_attr);
if (att_pte) {
ret = ihk_mc_pt_set_pte(vm->address_space->page_table, att_pte,
att_pgsize, seg_phys, att_attr);
if (ret) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_set_pte() failed %d\n",
__func__, ret);
ekprintf("%s: ERROR: ihk_mc_pt_set_pte() failed %d\n",
__FUNCTION__, ret);
goto out;
}
// memory_stat_rss_add() is called by the process hosting the memory area
}
else {
ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
att_pgaddr, att_pgaddr + seg_pgsize,
seg_phys, att_attr,
pgsize_to_pgshift(seg_pgsize), vmr, 1);
ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
att_pgaddr, att_pgaddr + att_pgsize, seg_phys, att_attr,
vmr->pgshift, vmr, 0);
if (ret) {
ret = -EFAULT;
ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n",
__func__, ret);
__FUNCTION__, ret);
goto out;
}
// memory_stat_rss_add() is called by the process hosting the memory area
}
out:
@@ -2142,7 +2026,8 @@ static pte_t * xpmem_vaddr_to_pte(
}
out:
return pte;
return pte;
}
@@ -2152,35 +2037,37 @@ static int xpmem_pin_page(
struct process_vm *src_vm,
unsigned long vaddr)
{
int ret = 0;
int ret;
struct vm_range *range;
XPMEM_DEBUG("call: tgid=%d, vaddr=0x%lx", tg->tgid, vaddr);
if (is_remote_vm(src_vm)) {
ihk_rwspinlock_read_lock_noirq(&src_vm->memory_range_lock);
}
ihk_rwspinlock_read_lock_noirq(&src_vm->memory_range_lock);
range = lookup_process_memory_range(src_vm, vaddr, vaddr + 1);
ihk_rwspinlock_read_unlock_noirq(&src_vm->memory_range_lock);
if (!range || range->start > vaddr) {
ret = -ENOENT;
goto out;
return -ENOENT;
}
if (xpmem_is_private_data(range)) {
ret = -ENOENT;
goto out;
return -ENOENT;
}
ihk_atomic_inc(&tg->n_pinned);
out:
if (is_remote_vm(src_vm)) {
ihk_rwspinlock_read_unlock_noirq(&src_vm->memory_range_lock);
ret = page_fault_process_vm(src_vm, (void *)vaddr,
PF_POPULATE | PF_WRITE | PF_USER);
if (!ret) {
ihk_atomic_inc(&tg->n_pinned);
}
else {
return -ENOENT;
}
XPMEM_DEBUG("return: ret=%d", ret);
return ret;
return ret;
}
@@ -2190,24 +2077,30 @@ static void xpmem_unpin_pages(
unsigned long vaddr,
size_t size)
{
int n_pgs = (((offset_in_page(vaddr) + (size)) + (PAGE_SIZE - 1)) >>
PAGE_SHIFT);
int n_pgs_unpinned = 0;
size_t vsize = 0;
unsigned long end = vaddr + size;
pte_t *pte = NULL;
XPMEM_DEBUG("call: segid=0x%lx, vaddr=0x%lx, size=0x%lx",
seg->segid, vaddr, size);
XPMEM_DEBUG("n_pgs=%d", n_pgs);
vaddr &= PAGE_MASK;
while (vaddr < end) {
while (n_pgs > 0) {
pte = xpmem_vaddr_to_pte(vm, vaddr, &vsize);
if (pte && !pte_is_null(pte)) {
n_pgs_unpinned++;
vaddr += vsize;
vaddr += PAGE_SIZE;
n_pgs--;
}
else {
vaddr = ((vaddr + vsize) & (~(vsize - 1)));
vsize = ((vaddr + vsize) & (~(vsize - 1)));
n_pgs -= (vsize - vaddr) / PAGE_SIZE;
vaddr = vsize;
}
}
@@ -2409,15 +2302,3 @@ static int xpmem_validate_access(
return 0;
}
static int is_remote_vm(struct process_vm *vm)
{
int ret = 0;
if (cpu_local_var(current)->proc->vm != vm) {
/* vm is not mine */
ret = 1;
}
return ret;
}