Pre-map file mappings from /dev/shm (--mpol-shm-premap mcexec argument)

This commit is contained in:
Balazs Gerofi
2017-03-12 14:34:34 +09:00
parent c5079898c2
commit 9b5ccb5a33
8 changed files with 157 additions and 7 deletions

View File

@@ -1075,7 +1075,7 @@ int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift,
struct clear_range_args {
int free_physical;
uint8_t padding[4];
int dont_walk_l1;
struct memobj *memobj;
struct process_vm *vm;
};
@@ -1167,9 +1167,11 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
}
pt = phys_to_virt(*ptep & PT_PHYSMASK);
error = walk_pte_l1(pt, base, start, end, &clear_range_l1, args0);
if (error && (error != -ENOENT)) {
return error;
if (!args->dont_walk_l1) {
error = walk_pte_l1(pt, base, start, end, &clear_range_l1, args0);
if (error && (error != -ENOENT)) {
return error;
}
}
if ((start <= base) && ((base + PTL2_SIZE) <= end)) {
@@ -1279,6 +1281,10 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
if (memobj && (memobj->flags & MF_DEV_FILE)) {
args.free_physical = 0;
}
args.dont_walk_l1 = 0;
if (memobj && ((memobj->flags & MF_PREMAP))) {
args.dont_walk_l1 = 1;
}
args.memobj = memobj;
args.vm = vm;

View File

@@ -98,6 +98,7 @@ typedef unsigned long __cpu_set_unit;
#define MPOL_NO_HEAP 0x01
#define MPOL_NO_STACK 0x02
#define MPOL_NO_BSS 0x04
#define MPOL_SHM_PREMAP 0x08
struct program_load_desc {
int num_sections;

View File

@@ -770,6 +770,7 @@ enum {
MF_ZEROFILL = 0x0010,
MF_REG_FILE = 0x1000,
MF_DEV_FILE = 0x2000,
MF_PREMAP = 0x8000,
MF_END
};
@@ -863,9 +864,8 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
fullpath = d_path(&file->f_path, pathbuf, PATH_MAX);
if (!IS_ERR(fullpath)) {
if (!strncmp("/dev/shm/Intel_MPI", fullpath, 18)) {
//mf_flags = (MF_PREFETCH | MF_ZEROFILL);
mf_flags = (MF_ZEROFILL);
dprintk("%s: filename: %s, zerofill\n",
mf_flags = (MF_PREMAP | MF_ZEROFILL);
dprintk("%s: filename: %s, premap & zerofill\n",
__FUNCTION__, fullpath);
}
else if (strstr(fullpath, "libmpi") != NULL) {

View File

@@ -158,6 +158,7 @@ static int enable_vdso = 1;
static int mpol_no_heap = 0;
static int mpol_no_stack = 0;
static int mpol_no_bss = 0;
static int mpol_shm_premap = 0;
static int no_bind_ikc_map = 0;
static unsigned long mpol_threshold = 0;
static unsigned long heap_extension = (2*1024*1024);
@@ -1312,6 +1313,12 @@ static struct option mcexec_options[] = {
.flag = &mpol_no_bss,
.val = 1,
},
{
.name = "mpol-shm-premap",
.has_arg = no_argument,
.flag = &mpol_shm_premap,
.val = 1,
},
{
.name = "no-bind-ikc-map",
.has_arg = no_argument,
@@ -1798,6 +1805,10 @@ int main(int argc, char **argv)
desc->mpol_flags |= MPOL_NO_BSS;
}
if (mpol_shm_premap) {
desc->mpol_flags |= MPOL_SHM_PREMAP;
}
desc->mpol_threshold = mpol_threshold;
desc->heap_extension = heap_extension;

View File

@@ -231,6 +231,52 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
if (to_memobj(obj)->flags & MF_PREFETCH) {
to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED;
}
/* XXX: KNL specific optimization for OFP runs */
if ((to_memobj(obj)->flags & MF_PREMAP) &&
(to_memobj(obj)->flags & MF_ZEROFILL)) {
struct memobj *mo = to_memobj(obj);
int nr_pages = (result.size + (PAGE_SIZE - 1))
>> PAGE_SHIFT;
int j = 0;
int node = 4;
mo->pages = kmalloc(nr_pages * sizeof(void *), IHK_MC_AP_NOWAIT);
if (!mo->pages) {
kprintf("%s: WARNING: failed to allocate pages\n",
__FUNCTION__);
goto error_cleanup;
}
mo->nr_pages = nr_pages;
memset(mo->pages, 0, nr_pages * sizeof(*mo->pages));
if (cpu_local_var(current)->proc->mpol_flags & MPOL_SHM_PREMAP) {
/* Get the actual pages NUMA interleaved */
for (j = 0; j < nr_pages; ++j) {
mo->pages[j] = ihk_mc_alloc_aligned_pages_node(1,
PAGE_P2ALIGN, IHK_MC_AP_NOWAIT, node);
if (!mo->pages[j]) {
kprintf("%s: ERROR: allocating pages[%d]\n",
__FUNCTION__, j);
goto error_cleanup;
}
memset(mo->pages[j], 0, PAGE_SIZE);
++node;
if (node == ihk_mc_get_nr_numa_nodes()) {
node = 4;
}
}
dkprintf("%s: allocated %d pages interleaved\n",
__FUNCTION__, nr_pages);
}
error_cleanup:
/* TODO: cleanup allocated portion */
;
}
newobj = NULL;
dkprintf("%s: new obj 0x%lx cref: %d, %s\n",
__FUNCTION__,
@@ -345,6 +391,19 @@ static void fileobj_release(struct memobj *memobj)
page->mode = PM_NONE;
#endif
}
/* Pre-mapped? */
if (to_memobj(free_obj)->flags & MF_PREMAP) {
int i;
for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) {
if (to_memobj(free_obj)->pages[i])
ihk_mc_free_pages(to_memobj(free_obj)->pages[i], 1);
}
kfree(to_memobj(free_obj)->pages);
}
obj_list_remove(free_obj);
mcs_rwlock_writer_unlock_noirq(&fileobj_list_lock, &node);
kfree(free_obj);
@@ -491,6 +550,42 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
profile_event_add(PROFILE_page_fault_file, PAGE_SIZE);
#endif // PROFILE_ENABLE
if (memobj->flags & MF_PREMAP) {
int page_ind = off >> PAGE_SHIFT;
if (!memobj->pages[page_ind]) {
virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT | IHK_MC_AP_USER);
if (!virt) {
error = -ENOMEM;
kprintf("fileobj_get_page(%p,%lx,%x,%p):"
"alloc failed. %d\n",
obj, off, p2align, physp,
error);
goto out_nolock;
}
/* Update the array but see if someone did it already and use
* that if so */
if (!__sync_bool_compare_and_swap(&memobj->pages[page_ind],
NULL, virt)) {
ihk_mc_free_pages(virt, 1);
}
else {
dkprintf("%s: MF_ZEROFILL: off: %lu -> 0x%lx allocated\n",
__FUNCTION__, off, virt_to_phys(virt));
}
}
virt = memobj->pages[page_ind];
error = 0;
*physp = virt_to_phys(virt);
dkprintf("%s: MF_ZEROFILL: off: %lu -> 0x%lx resolved\n",
__FUNCTION__, off, virt_to_phys(virt));
virt = NULL;
goto out_nolock;
}
mcs_rwlock_writer_lock_noirq(&obj->page_hash_locks[hash],
&mcs_node);
page = __fileobj_page_hash_lookup(obj, hash, off);
@@ -566,6 +661,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
out:
mcs_rwlock_writer_unlock_noirq(&obj->page_hash_locks[hash],
&mcs_node);
out_nolock:
if (virt) {
ihk_mc_free_pages(virt, npages);
}

View File

@@ -36,6 +36,7 @@ enum {
MF_ZEROFILL = 0x0010,
MF_REG_FILE = 0x1000,
MF_DEV_FILE = 0x2000,
MF_PREMAP = 0x8000,
MF_HOST_RELEASED = 0x80000000,
MF_END
};
@@ -49,6 +50,10 @@ struct memobj {
uint32_t status;
size_t size;
ihk_spinlock_t lock;
/* For pre-mapped memobjects */
void **pages;
int nr_pages;
};
typedef void memobj_release_func_t(struct memobj *obj);

View File

@@ -163,6 +163,7 @@ typedef unsigned long __cpu_set_unit;
#define MPOL_NO_HEAP 0x01
#define MPOL_NO_STACK 0x02
#define MPOL_NO_BSS 0x04
#define MPOL_SHM_PREMAP 0x08
struct program_load_desc {
int num_sections;

View File

@@ -1321,6 +1321,36 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
populate_len = memobj->size;
}
memobj_unlock(memobj);
/* Update PTEs for pre-mapped memory object */
if ((memobj->flags & MF_PREMAP) &&
(proc->mpol_flags & MPOL_SHM_PREMAP)) {
int i;
enum ihk_mc_pt_attribute ptattr;
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
for (i = 0; i < memobj->nr_pages; ++i) {
error = ihk_mc_pt_set_range(proc->vm->address_space->page_table,
proc->vm,
(void *)range->start + (i * PAGE_SIZE),
(void *)range->start + (i * PAGE_SIZE) +
PAGE_SIZE,
virt_to_phys(memobj->pages[i]),
ptattr,
PAGE_SHIFT);
if (error) {
kprintf("%s: ERROR: mapping %d page of pre-mapped file\n",
__FUNCTION__, i);
}
}
dkprintf("%s: memobj 0x%lx pre-mapped\n", __FUNCTION__, memobj);
}
/*
else if (memobj->flags & MF_REG_FILE) {
populated_mapping = 1;
populate_len = memobj->size;
}
*/
}
error = 0;