ihk_os_getrusage(): Add per-page-size memory usage accounting

This commit is contained in:
Masamichi Takagi
2017-08-04 12:00:31 +09:00
parent 184c2d311c
commit 74f15783d2
22 changed files with 572 additions and 130 deletions

View File

@@ -0,0 +1,58 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#define DEBUG_RUSAGE
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
extern struct ihk_os_monitor *monitor;
extern int sprintf(char * buf, const char *fmt, ...);
#define DEBUG_ARCH_RUSAGE
#ifdef DEBUG_ARCH_RUSAGE
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#endif
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case PTL2_SIZE:
ret = IHK_OS_PGSIZE_2MB;
break;
case PTL3_SIZE:
ret = IHK_OS_PGSIZE_1GB;
break;
default:
eprintf("unknown pgsize=%ld\n", pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@@ -114,6 +114,11 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
{
v->counter64 = i;
}
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
{
asm volatile ("lock incq %0" : "+m"(v->counter64));

View File

@@ -24,9 +24,17 @@
#include <page.h>
#include <cls.h>
#include <kmalloc.h>
#include <rusage.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
//#define DEBUG
#ifdef DEBUG
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#endif
static char *last_page;
extern char _head[], _end[];
@@ -902,11 +910,24 @@ static int split_large_page(pte_t *ptep, size_t pgsize)
}
}
pt->entry[i] = pte;
switch(pgsize) {
case PTL3_SIZE:
dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", pte_is_fileoff(ptep, pgsize) ? pte_get_off(&pte, pgsize) : pte_get_phys(&pte), __FUNCTION__, PTL2_SIZE, PTL2_SIZE);
memory_stat_rss_add(PTL2_SIZE, PTL2_SIZE);
break;
case PTL2_SIZE:
dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", pte_is_fileoff(ptep, pgsize) ? pte_get_off(&pte, pgsize) : pte_get_phys(&pte), __FUNCTION__, PTL1_SIZE, PTL1_SIZE);
memory_stat_rss_add(PTL1_SIZE, PTL1_SIZE);
break;
}
pte += pgsize / PT_ENTRIES;
}
*ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR;
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys_base, __FUNCTION__, pgsize, pgsize);
memory_stat_rss_sub(pgsize, pgsize);
/* Do not do this check for large pages as they don't come from the zeroobj
* and are not actually mapped.
* TODO: clean up zeroobj as we don't really need it, anonymous mappings
@@ -1106,6 +1127,8 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
struct page *page;
pte_t old;
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
if (*ptep == PTE_NULL) {
return -ENOENT;
}
@@ -1119,17 +1142,37 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
page = phys_to_page(phys);
}
if (page) {
dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1);
}
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) &&
!(args->memobj->flags & MF_ZEROFILL)) {
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
}
if (!(old & PFL1_FILEOFF) && args->free_physical) {
if (!page || (page && page_unmap(page))) {
ihk_mc_free_pages_user(phys_to_virt(phys), 1);
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
if (!(old & PFL1_FILEOFF)) {
if(args->free_physical) {
if (!page) {
/* Anonymous || !XPMEM attach */
if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) {
ihk_mc_free_pages_user(phys_to_virt(phys), 1);
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE);
memory_stat_rss_sub(PTL1_SIZE, PTL1_SIZE);
} else {
dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys);
}
} else if (page_unmap(page)) {
ihk_mc_free_pages_user(phys_to_virt(phys), 1);
dkprintf("%s: freeing file-backed page at 0x%lx\n", __FUNCTION__, base);
/* Track page->count for !MF_PREMAP pages */
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE);
rusage_memory_stat_sub(args->memobj, PTL1_SIZE, PTL1_SIZE);
}
args->vm->currss -= PTL1_SIZE;
} else {
dkprintf("%s: !calling memory_stat_rss_sub(),virt=%lx,phys=%lx\n", __FUNCTION__, base, pte_get_phys(&old));
}
args->vm->currss -= PTL1_SIZE;
}
return 0;
@@ -1145,6 +1188,8 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
struct page *page;
pte_t old;
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
if (*ptep == PTE_NULL) {
return -ENOENT;
}
@@ -1172,13 +1217,29 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
}
if (!(old & PFL2_FILEOFF) && args->free_physical) {
if (!page || (page && page_unmap(page))) {
ihk_mc_free_pages_user(phys_to_virt(phys),
if (!(old & PFL2_FILEOFF)) {
if(args->free_physical) {
if (!page) {
/* Anonymous || !XPMEM attach */
if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) {
ihk_mc_free_pages_user(phys_to_virt(phys),
PTL2_SIZE/PTL1_SIZE);
dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base);
dkprintf("%lx-,%s: memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old),__FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE);
memory_stat_rss_sub(PTL2_SIZE, PTL2_SIZE);
} else {
dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys);
}
} else if (page_unmap(page)) {
ihk_mc_free_pages_user(phys_to_virt(phys),
PTL2_SIZE/PTL1_SIZE);
dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base);
dkprintf("%s: having unmapped page-struct, freeing large page at 0x%lx\n", __FUNCTION__, base);
/* Track page->count for !MF_PREMAP pages */
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE);
rusage_memory_stat_sub(args->memobj, PTL2_SIZE, PTL2_SIZE);
}
args->vm->currss -= PTL2_SIZE;
}
args->vm->currss -= PTL2_SIZE;
}
return 0;
@@ -1209,6 +1270,8 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
struct page *page;
struct page_table *pt;
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
if (*ptep == PTE_NULL) {
return -ENOENT;
}
@@ -1236,12 +1299,29 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
memobj_flush_page(args->memobj, phys, PTL3_SIZE);
}
if (!(old & PFL3_FILEOFF) && args->free_physical) {
if (!page || (page && page_unmap(page))) {
ihk_mc_free_pages_user(phys_to_virt(phys),
kprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old));
if (!(old & PFL3_FILEOFF)) {
if(args->free_physical) {
if (!page) {
/* Anonymous || !XPMEM attach */
if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) {
ihk_mc_free_pages_user(phys_to_virt(phys),
PTL3_SIZE/PTL1_SIZE);
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%ld,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE);
memory_stat_rss_sub(PTL3_SIZE, PTL3_SIZE);
} else {
dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys);
}
} else if (page_unmap(page)) {
ihk_mc_free_pages_user(phys_to_virt(phys),
PTL3_SIZE/PTL1_SIZE);
/* Track page->count for !MF_PREMAP pages */
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE);
rusage_memory_stat_sub(args->memobj, PTL3_SIZE, PTL3_SIZE);
}
args->vm->currss -= PTL3_SIZE;
}
args->vm->currss -= PTL3_SIZE;
}
return 0;
@@ -1267,6 +1347,8 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
{
struct page_table *pt;
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
if (*ptep == PTE_NULL) {
return -ENOENT;
}
@@ -1284,6 +1366,9 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
int error;
struct clear_range_args args;
dkprintf("%s: %p,%lx,%lx,%d,%p\n",
__FUNCTION__, pt, start, end, free_physical, memobj);
if ((start < vm->region.user_start)
|| (vm->region.user_end < end)
|| (end <= start)) {
@@ -1530,6 +1615,7 @@ struct set_range_args {
int pgshift;
uintptr_t diff;
struct process_vm *vm;
struct vm_range *range; /* To find pages we don't need to call memory_stat_rss_add() */
};
int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
@@ -1553,6 +1639,13 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
*ptep = phys | attr_to_l1attr(args->attr);
error = 0;
// call memory_stat_rss_add() here because pgshift is resolved here
if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) {
dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE);
} else {
dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE);
}
out:
dkprintf("set_range_l1(%lx,%lx,%lx): %d %lx\n",
base, start, end, error, *ptep);
@@ -1584,6 +1677,12 @@ retry:
dkprintf("set_range_l2(%lx,%lx,%lx):"
"2MiB page. %d %lx\n",
base, start, end, error, *ptep);
// call memory_stat_rss_add() here because pgshift is resolved here
if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) {
dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE);
} else {
dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE);
}
goto out;
}
@@ -1666,6 +1765,13 @@ retry:
dkprintf("set_range_l3(%lx,%lx,%lx):"
"1GiB page. %d %lx\n",
base, start, end, error, *ptep);
// Call memory_stat_rss_add() here because pgshift is resolved here
if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) {
dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE);
} else {
dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE);
}
goto out;
}
@@ -1783,13 +1889,13 @@ out:
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
int pgshift)
int pgshift, struct vm_range *range)
{
int error;
struct set_range_args args;
dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x)\n",
pt, start, end, phys, attr);
dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x,%d,%lx-%lx)\n",
pt, start, end, phys, attr, pgshift, range->start, range->end);
args.pt = pt;
args.phys = phys;
@@ -1797,6 +1903,7 @@ int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
args.diff = (uintptr_t)start ^ phys;
args.vm = vm;
args.pgshift = pgshift;
args.range = range;
error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end,
&set_range_l4, &args);
@@ -1935,8 +2042,8 @@ int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep,
if (p2alignp) *p2alignp = p2align;
out:
dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n",
args, cursize, error, newsize, p2align);
/*dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n",
args, cursize, error, newsize, p2align);*/
return error;
}
@@ -1959,6 +2066,7 @@ struct move_args {
uintptr_t src;
uintptr_t dest;
struct process_vm *vm;
struct vm_range *range;
};
static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep,
@@ -1990,7 +2098,7 @@ static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep,
attr = apte & ~PT_PHYSMASK;
error = ihk_mc_pt_set_range(pt, args->vm, (void *)dest,
(void *)(dest + pgsize), phys, attr, pgshift);
(void *)(dest + pgsize), phys, attr, pgshift, args->range);
if (error) {
kprintf("move_one_page(%p,%p,%p %#lx,%p,%d):"
"set failed. %d\n",
@@ -2006,7 +2114,7 @@ out:
}
int move_pte_range(page_table_t pt, struct process_vm *vm,
void *src, void *dest, size_t size)
void *src, void *dest, size_t size, struct vm_range *range)
{
int error;
struct move_args args;
@@ -2015,6 +2123,7 @@ int move_pte_range(page_table_t pt, struct process_vm *vm,
args.src = (uintptr_t)src;
args.dest = (uintptr_t)dest;
args.vm = vm;
args.range = range;
error = visit_pte_range(pt, src, src+size, 0, VPTEF_SKIP_NULL,
&move_one_page, &args);

View File

@@ -1366,7 +1366,7 @@ SYSCALL_DECLARE(mmap)
struct thread *thread = cpu_local_var(current);
struct vm_regions *region = &thread->vm->region;
int error;
intptr_t addr;
intptr_t addr = 0;
size_t len;
int flags = flags0;
size_t pgsize;
@@ -1469,7 +1469,7 @@ SYSCALL_DECLARE(shmget)
const key_t key = ihk_mc_syscall_arg0(ctx);
const size_t size = ihk_mc_syscall_arg1(ctx);
const int shmflg0 = ihk_mc_syscall_arg2(ctx);
int shmid;
int shmid = -EINVAL;
int error;
int shmflg = shmflg0;
@@ -1732,6 +1732,7 @@ int arch_map_vdso(struct process_vm *vm)
enum ihk_mc_pt_attribute attr;
int error;
int i;
struct vm_range *range;
dkprintf("arch_map_vdso()\n");
if (container_size <= 0) {
@@ -1750,7 +1751,7 @@ int arch_map_vdso(struct process_vm *vm)
vrflags |= VR_PROT_READ | VR_PROT_EXEC;
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e,
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, NULL);
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, &range);
if (error) {
ekprintf("ERROR: adding memory range for vdso. %d\n", error);
goto out;
@@ -1762,7 +1763,7 @@ int arch_map_vdso(struct process_vm *vm)
s = vm->vdso_addr + (i * PAGE_SIZE);
e = s + PAGE_SIZE;
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.vdso_physlist[i], attr, 0);
vdso.vdso_physlist[i], attr, 0, range);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -1782,7 +1783,7 @@ int arch_map_vdso(struct process_vm *vm)
vrflags |= VR_PROT_READ;
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e,
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, NULL);
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, &range);
if (error) {
ekprintf("ERROR: adding memory range for vvar. %d\n", error);
goto out;
@@ -1794,7 +1795,7 @@ int arch_map_vdso(struct process_vm *vm)
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.vvar_phys, attr, 0);
vdso.vvar_phys, attr, 0, range);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -1805,7 +1806,7 @@ int arch_map_vdso(struct process_vm *vm)
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE;
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.hpet_phys, attr, 0);
vdso.hpet_phys, attr, 0, range);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -1816,7 +1817,7 @@ int arch_map_vdso(struct process_vm *vm)
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.pvti_phys, attr, 0);
vdso.pvti_phys, attr, 0, range);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;