Include mbind support (this is a rebase commit to merge into development)

This commit is contained in:
Katsuya Horigome
2017-11-06 12:47:56 +09:00
parent bad487cc07
commit d649d6fc2d
124 changed files with 1458 additions and 103 deletions

View File

@@ -1473,7 +1473,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
if (!(flags & MAP_ANONYMOUS)) {
off = off0;
error = fileobj_create(fd, &memobj, &maxprot);
error = fileobj_create(fd, &memobj, &maxprot, addr0);
#ifdef ATTACHED_MIC
/*
* XXX: refuse device mapping in attached-mic now:
@@ -1536,7 +1536,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
}
p = ihk_mc_alloc_aligned_pages_user(npages, p2align,
IHK_MC_AP_NOWAIT | ap_flag);
IHK_MC_AP_NOWAIT | ap_flag, addr0);
if (p == NULL) {
dkprintf("%s: warning: failed to allocate %d contiguous pages "
" (bytes: %lu, pgshift: %d), enabling demand paging\n",
@@ -3771,6 +3771,57 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
return rc;
}
struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr)
{
struct rb_root *root = &vm->vm_range_numa_policy_tree;
struct rb_node *node = root->rb_node;
struct vm_range_numa_policy *numa_policy = NULL;
while (node) {
numa_policy = rb_entry(node, struct vm_range_numa_policy, policy_rb_node);
if (addr < numa_policy->start) {
node = node->rb_left;
} else if (addr >= numa_policy->end) {
node = node->rb_right;
} else {
return numa_policy;
}
}
return NULL;
}
static int vm_policy_insert(struct process_vm *vm, struct vm_range_numa_policy *newrange)
{
struct rb_root *root = &vm->vm_range_numa_policy_tree;
struct rb_node **new = &(root->rb_node), *parent = NULL;
struct vm_range_numa_policy *range;
while (*new) {
range = rb_entry(*new, struct vm_range_numa_policy, policy_rb_node);
parent = *new;
if (newrange->end <= range->start) {
new = &((*new)->rb_left);
} else if (newrange->start >= range->end) {
new = &((*new)->rb_right);
} else {
ekprintf("vm_range_insert(%p,%lx-%lx (nodemask)%lx (policy)%d): overlap %lx-%lx (nodemask)%lx (policy)%d\n",
vm, newrange->start, newrange->end, newrange->numa_mask, newrange->numa_mem_policy,
range->start, range->end, range->numa_mask, range->numa_mem_policy);
return -EFAULT;
}
}
dkprintf("vm_range_insert: %p,%p: %lx-%lx (nodemask)%lx (policy)%d\n",
vm, newrange, newrange->start, newrange->end, newrange->numa_mask, newrange->numa_mem_policy);
rb_link_node(&newrange->policy_rb_node, parent, new);
rb_insert_color(&newrange->policy_rb_node, root);
return 0;
}
struct mc_perf_event*
mc_perf_event_alloc(struct perf_event_attr *attr)
{
@@ -8063,8 +8114,7 @@ SYSCALL_DECLARE(mbind)
int error = 0;
int bit;
struct vm_range *range;
struct vm_range_numa_policy *range_policy, *range_policy_iter;
struct vm_range_numa_policy *range_policy_next = NULL;
struct vm_range_numa_policy *range_policy, *range_policy_iter = NULL;
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
dkprintf("%s: addr: 0x%lx, len: %lu, mode: 0x%x, "
@@ -8275,17 +8325,12 @@ SYSCALL_DECLARE(mbind)
case MPOL_INTERLEAVE:
case MPOL_PREFERRED:
/* Adjust any overlapping range settings and add new one */
range_policy_next = NULL;
list_for_each_entry(range_policy_iter,
&vm->vm_range_numa_policy_list, list) {
range_policy_iter = vm_range_policy_search(vm, addr);
if (range_policy_iter) {
int adjusted = 0;
unsigned long orig_end = range_policy_iter->end;
if (range_policy_iter->end < addr)
continue;
/* Special case of entirely overlapping */
if (range_policy_iter->start == addr &&
if (range_policy_iter->start == addr &&
range_policy_iter->end == addr + len) {
range_policy = range_policy_iter;
goto mbind_update_only;
@@ -8302,7 +8347,7 @@ SYSCALL_DECLARE(mbind)
if (orig_end > addr + len) {
if (adjusted) {
/* Add a new entry after */
range_policy = kmalloc(sizeof(*range_policy),
range_policy = kmalloc(sizeof(struct vm_range_numa_policy),
IHK_MC_AP_NOWAIT);
if (!range_policy) {
dkprintf("%s: error allocating range_policy\n",
@@ -8311,31 +8356,24 @@ SYSCALL_DECLARE(mbind)
goto unlock_out;
}
memcpy(range_policy, range_policy_iter,
sizeof(*range_policy));
RB_CLEAR_NODE(&range_policy->policy_rb_node);
range_policy->start = addr + len;
range_policy->end = orig_end;
list_add(&range_policy->list,
&range_policy_iter->list);
range_policy_next = range_policy;
break;
error = vm_policy_insert(vm, range_policy);
if (error) {
kprintf("%s: ERROR: could not insert range: %d\n",__FUNCTION__, error);
return error;
}
}
else {
range_policy_iter->start = addr + len;
range_policy_next = range_policy_iter;
break;
}
}
/* Next one in ascending address order? */
if (range_policy_iter->start >= addr + len) {
range_policy_next = range_policy_iter;
break;
}
}
/* Add a new entry */
range_policy = kmalloc(sizeof(*range_policy),
range_policy = kmalloc(sizeof(struct vm_range_numa_policy),
IHK_MC_AP_NOWAIT);
if (!range_policy) {
dkprintf("%s: error allocating range_policy\n",
@@ -8344,17 +8382,14 @@ SYSCALL_DECLARE(mbind)
goto unlock_out;
}
memset(range_policy, 0, sizeof(*range_policy));
RB_CLEAR_NODE(&range_policy->policy_rb_node);
range_policy->start = addr;
range_policy->end = addr + len;
if (range_policy_next) {
list_add_tail(&range_policy->list,
&range_policy_next->list);
}
else {
list_add_tail(&range_policy->list,
&vm->vm_range_numa_policy_list);
error = vm_policy_insert(vm, range_policy);
if (error) {
kprintf("%s: ERROR: could not insert range: %d\n",__FUNCTION__, error);
return error;
}
mbind_update_only:
@@ -8395,8 +8430,6 @@ SYSCALL_DECLARE(set_mempolicy)
struct process_vm *vm = cpu_local_var(current)->vm;
int error = 0;
int bit, valid_mask;
struct vm_range_numa_policy *range_policy_iter;
struct vm_range_numa_policy *range_policy_next = NULL;
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
memset(numa_mask, 0, sizeof(numa_mask));
@@ -8417,6 +8450,8 @@ SYSCALL_DECLARE(set_mempolicy)
}
}
mode &= ~MPOL_MODE_FLAGS;
switch (mode) {
case MPOL_DEFAULT:
if (nodemask && nodemask_bits) {
@@ -8440,6 +8475,13 @@ SYSCALL_DECLARE(set_mempolicy)
set_bit(bit, vm->numa_mask);
}
#if 0
/* In man, "MPOL_DEFAULT mode deletes a process memory policy
other than the default and interprets that the memory policy"
falls back to the system default policy ", but not to delete
the NUMA memory policy.
There was no processing of Linux's same name command. */
/* Delete all range settings */
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
list_for_each_entry_safe(range_policy_iter, range_policy_next,
@@ -8448,6 +8490,7 @@ SYSCALL_DECLARE(set_mempolicy)
kfree(range_policy_iter);
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
#endif
vm->numa_mem_policy = mode;
error = 0;
@@ -8595,7 +8638,6 @@ SYSCALL_DECLARE(get_mempolicy)
/* Address range specific? */
if (flags & MPOL_F_ADDR) {
struct vm_range_numa_policy *range_policy_iter;
struct vm_range *range;
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
@@ -8607,16 +8649,8 @@ SYSCALL_DECLARE(get_mempolicy)
goto out;
}
list_for_each_entry(range_policy_iter,
&vm->vm_range_numa_policy_list, list) {
if (range_policy_iter->start > addr ||
range_policy_iter->end <= addr) {
continue;
}
range_policy = vm_range_policy_search(vm, addr);
range_policy = range_policy_iter;
break;
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
}