ihk_os_getrusage(): Add per-page-size memory usage accounting
This commit is contained in:
58
arch/x86/kernel/include/arch/rusage.h
Normal file
58
arch/x86/kernel/include/arch/rusage.h
Normal file
@@ -0,0 +1,58 @@
|
||||
#ifndef ARCH_RUSAGE_H_INCLUDED
|
||||
#define ARCH_RUSAGE_H_INCLUDED
|
||||
|
||||
#define DEBUG_RUSAGE
|
||||
|
||||
#define IHK_OS_PGSIZE_4KB 0
|
||||
#define IHK_OS_PGSIZE_2MB 1
|
||||
#define IHK_OS_PGSIZE_1GB 2
|
||||
|
||||
extern struct ihk_os_monitor *monitor;
|
||||
|
||||
extern int sprintf(char * buf, const char *fmt, ...);
|
||||
|
||||
#define DEBUG_ARCH_RUSAGE
|
||||
#ifdef DEBUG_ARCH_RUSAGE
|
||||
#define dprintf(...) \
|
||||
do { \
|
||||
char msg[1024]; \
|
||||
sprintf(msg, __VA_ARGS__); \
|
||||
kprintf("%s,%s", __FUNCTION__, msg); \
|
||||
} while (0);
|
||||
#define eprintf(...) \
|
||||
do { \
|
||||
char msg[1024]; \
|
||||
sprintf(msg, __VA_ARGS__); \
|
||||
kprintf("%s,%s", __FUNCTION__, msg); \
|
||||
} while (0);
|
||||
#else
|
||||
#define dprintf(...) do { } while (0)
|
||||
#define eprintf(...) \
|
||||
do { \
|
||||
char msg[1024]; \
|
||||
sprintf(msg, __VA_ARGS__); \
|
||||
kprintf("%s,%s", __FUNCTION__, msg); \
|
||||
} while (0);
|
||||
#endif
|
||||
|
||||
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
|
||||
{
|
||||
int ret = IHK_OS_PGSIZE_4KB;
|
||||
switch (pgsize) {
|
||||
case PTL1_SIZE:
|
||||
ret = IHK_OS_PGSIZE_4KB;
|
||||
break;
|
||||
case PTL2_SIZE:
|
||||
ret = IHK_OS_PGSIZE_2MB;
|
||||
break;
|
||||
case PTL3_SIZE:
|
||||
ret = IHK_OS_PGSIZE_1GB;
|
||||
break;
|
||||
default:
|
||||
eprintf("unknown pgsize=%ld\n", pgsize);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */
|
||||
@@ -114,6 +114,11 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
|
||||
return *(volatile long *)&(v)->counter64;
|
||||
}
|
||||
|
||||
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
|
||||
{
|
||||
v->counter64 = i;
|
||||
}
|
||||
|
||||
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
|
||||
{
|
||||
asm volatile ("lock incq %0" : "+m"(v->counter64));
|
||||
|
||||
@@ -24,9 +24,17 @@
|
||||
#include <page.h>
|
||||
#include <cls.h>
|
||||
#include <kmalloc.h>
|
||||
#include <rusage.h>
|
||||
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
//#define DEBUG
|
||||
|
||||
#ifdef DEBUG
|
||||
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
|
||||
#else
|
||||
#define dkprintf(...) do { } while (0)
|
||||
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
static char *last_page;
|
||||
extern char _head[], _end[];
|
||||
@@ -902,11 +910,24 @@ static int split_large_page(pte_t *ptep, size_t pgsize)
|
||||
}
|
||||
}
|
||||
pt->entry[i] = pte;
|
||||
switch(pgsize) {
|
||||
case PTL3_SIZE:
|
||||
dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", pte_is_fileoff(ptep, pgsize) ? pte_get_off(&pte, pgsize) : pte_get_phys(&pte), __FUNCTION__, PTL2_SIZE, PTL2_SIZE);
|
||||
memory_stat_rss_add(PTL2_SIZE, PTL2_SIZE);
|
||||
break;
|
||||
case PTL2_SIZE:
|
||||
dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", pte_is_fileoff(ptep, pgsize) ? pte_get_off(&pte, pgsize) : pte_get_phys(&pte), __FUNCTION__, PTL1_SIZE, PTL1_SIZE);
|
||||
memory_stat_rss_add(PTL1_SIZE, PTL1_SIZE);
|
||||
break;
|
||||
}
|
||||
pte += pgsize / PT_ENTRIES;
|
||||
}
|
||||
|
||||
*ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR;
|
||||
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys_base, __FUNCTION__, pgsize, pgsize);
|
||||
memory_stat_rss_sub(pgsize, pgsize);
|
||||
|
||||
/* Do not do this check for large pages as they don't come from the zeroobj
|
||||
* and are not actually mapped.
|
||||
* TODO: clean up zeroobj as we don't really need it, anonymous mappings
|
||||
@@ -1106,6 +1127,8 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
||||
struct page *page;
|
||||
pte_t old;
|
||||
|
||||
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1119,17 +1142,37 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
||||
page = phys_to_page(phys);
|
||||
}
|
||||
|
||||
if (page) {
|
||||
dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1);
|
||||
}
|
||||
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) &&
|
||||
!(args->memobj->flags & MF_ZEROFILL)) {
|
||||
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
|
||||
}
|
||||
|
||||
if (!(old & PFL1_FILEOFF) && args->free_physical) {
|
||||
if (!page || (page && page_unmap(page))) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys), 1);
|
||||
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
|
||||
if (!(old & PFL1_FILEOFF)) {
|
||||
if(args->free_physical) {
|
||||
if (!page) {
|
||||
/* Anonymous || !XPMEM attach */
|
||||
if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys), 1);
|
||||
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE);
|
||||
memory_stat_rss_sub(PTL1_SIZE, PTL1_SIZE);
|
||||
} else {
|
||||
dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys);
|
||||
}
|
||||
} else if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys), 1);
|
||||
dkprintf("%s: freeing file-backed page at 0x%lx\n", __FUNCTION__, base);
|
||||
/* Track page->count for !MF_PREMAP pages */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE);
|
||||
rusage_memory_stat_sub(args->memobj, PTL1_SIZE, PTL1_SIZE);
|
||||
}
|
||||
args->vm->currss -= PTL1_SIZE;
|
||||
} else {
|
||||
dkprintf("%s: !calling memory_stat_rss_sub(),virt=%lx,phys=%lx\n", __FUNCTION__, base, pte_get_phys(&old));
|
||||
}
|
||||
args->vm->currss -= PTL1_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1145,6 +1188,8 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
|
||||
struct page *page;
|
||||
pte_t old;
|
||||
|
||||
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1172,13 +1217,29 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
|
||||
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
|
||||
}
|
||||
|
||||
if (!(old & PFL2_FILEOFF) && args->free_physical) {
|
||||
if (!page || (page && page_unmap(page))) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
if (!(old & PFL2_FILEOFF)) {
|
||||
if(args->free_physical) {
|
||||
if (!page) {
|
||||
/* Anonymous || !XPMEM attach */
|
||||
if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
PTL2_SIZE/PTL1_SIZE);
|
||||
dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base);
|
||||
dkprintf("%lx-,%s: memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old),__FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE);
|
||||
memory_stat_rss_sub(PTL2_SIZE, PTL2_SIZE);
|
||||
} else {
|
||||
dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys);
|
||||
}
|
||||
} else if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
PTL2_SIZE/PTL1_SIZE);
|
||||
dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base);
|
||||
dkprintf("%s: having unmapped page-struct, freeing large page at 0x%lx\n", __FUNCTION__, base);
|
||||
/* Track page->count for !MF_PREMAP pages */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE);
|
||||
rusage_memory_stat_sub(args->memobj, PTL2_SIZE, PTL2_SIZE);
|
||||
}
|
||||
args->vm->currss -= PTL2_SIZE;
|
||||
}
|
||||
args->vm->currss -= PTL2_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1209,6 +1270,8 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
|
||||
struct page *page;
|
||||
struct page_table *pt;
|
||||
|
||||
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1236,12 +1299,29 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
|
||||
memobj_flush_page(args->memobj, phys, PTL3_SIZE);
|
||||
}
|
||||
|
||||
if (!(old & PFL3_FILEOFF) && args->free_physical) {
|
||||
if (!page || (page && page_unmap(page))) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
kprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old));
|
||||
|
||||
if (!(old & PFL3_FILEOFF)) {
|
||||
if(args->free_physical) {
|
||||
if (!page) {
|
||||
/* Anonymous || !XPMEM attach */
|
||||
if (!args->memobj || !(args->memobj->flags & MF_XPMEM)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
PTL3_SIZE/PTL1_SIZE);
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%ld,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE);
|
||||
memory_stat_rss_sub(PTL3_SIZE, PTL3_SIZE);
|
||||
} else {
|
||||
dkprintf("%s: XPMEM attach,phys=%lx\n", __FUNCTION__, phys);
|
||||
}
|
||||
} else if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
PTL3_SIZE/PTL1_SIZE);
|
||||
/* Track page->count for !MF_PREMAP pages */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE);
|
||||
rusage_memory_stat_sub(args->memobj, PTL3_SIZE, PTL3_SIZE);
|
||||
}
|
||||
args->vm->currss -= PTL3_SIZE;
|
||||
}
|
||||
args->vm->currss -= PTL3_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1267,6 +1347,8 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
|
||||
{
|
||||
struct page_table *pt;
|
||||
|
||||
//dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end);
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1284,6 +1366,9 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
|
||||
int error;
|
||||
struct clear_range_args args;
|
||||
|
||||
dkprintf("%s: %p,%lx,%lx,%d,%p\n",
|
||||
__FUNCTION__, pt, start, end, free_physical, memobj);
|
||||
|
||||
if ((start < vm->region.user_start)
|
||||
|| (vm->region.user_end < end)
|
||||
|| (end <= start)) {
|
||||
@@ -1530,6 +1615,7 @@ struct set_range_args {
|
||||
int pgshift;
|
||||
uintptr_t diff;
|
||||
struct process_vm *vm;
|
||||
struct vm_range *range; /* To find pages we don't need to call memory_stat_rss_add() */
|
||||
};
|
||||
|
||||
int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
|
||||
@@ -1553,6 +1639,13 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
|
||||
*ptep = phys | attr_to_l1attr(args->attr);
|
||||
|
||||
error = 0;
|
||||
// call memory_stat_rss_add() here because pgshift is resolved here
|
||||
if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) {
|
||||
dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE);
|
||||
} else {
|
||||
dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE);
|
||||
}
|
||||
|
||||
out:
|
||||
dkprintf("set_range_l1(%lx,%lx,%lx): %d %lx\n",
|
||||
base, start, end, error, *ptep);
|
||||
@@ -1584,6 +1677,12 @@ retry:
|
||||
dkprintf("set_range_l2(%lx,%lx,%lx):"
|
||||
"2MiB page. %d %lx\n",
|
||||
base, start, end, error, *ptep);
|
||||
// call memory_stat_rss_add() here because pgshift is resolved here
|
||||
if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) {
|
||||
dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE);
|
||||
} else {
|
||||
dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1666,6 +1765,13 @@ retry:
|
||||
dkprintf("set_range_l3(%lx,%lx,%lx):"
|
||||
"1GiB page. %d %lx\n",
|
||||
base, start, end, error, *ptep);
|
||||
|
||||
// Call memory_stat_rss_add() here because pgshift is resolved here
|
||||
if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) {
|
||||
dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE);
|
||||
} else {
|
||||
dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1783,13 +1889,13 @@ out:
|
||||
|
||||
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
|
||||
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
|
||||
int pgshift)
|
||||
int pgshift, struct vm_range *range)
|
||||
{
|
||||
int error;
|
||||
struct set_range_args args;
|
||||
|
||||
dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x)\n",
|
||||
pt, start, end, phys, attr);
|
||||
dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x,%d,%lx-%lx)\n",
|
||||
pt, start, end, phys, attr, pgshift, range->start, range->end);
|
||||
|
||||
args.pt = pt;
|
||||
args.phys = phys;
|
||||
@@ -1797,6 +1903,7 @@ int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
|
||||
args.diff = (uintptr_t)start ^ phys;
|
||||
args.vm = vm;
|
||||
args.pgshift = pgshift;
|
||||
args.range = range;
|
||||
|
||||
error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end,
|
||||
&set_range_l4, &args);
|
||||
@@ -1935,8 +2042,8 @@ int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep,
|
||||
if (p2alignp) *p2alignp = p2align;
|
||||
|
||||
out:
|
||||
dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n",
|
||||
args, cursize, error, newsize, p2align);
|
||||
/*dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n",
|
||||
args, cursize, error, newsize, p2align);*/
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1959,6 +2066,7 @@ struct move_args {
|
||||
uintptr_t src;
|
||||
uintptr_t dest;
|
||||
struct process_vm *vm;
|
||||
struct vm_range *range;
|
||||
};
|
||||
|
||||
static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
@@ -1990,7 +2098,7 @@ static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
attr = apte & ~PT_PHYSMASK;
|
||||
|
||||
error = ihk_mc_pt_set_range(pt, args->vm, (void *)dest,
|
||||
(void *)(dest + pgsize), phys, attr, pgshift);
|
||||
(void *)(dest + pgsize), phys, attr, pgshift, args->range);
|
||||
if (error) {
|
||||
kprintf("move_one_page(%p,%p,%p %#lx,%p,%d):"
|
||||
"set failed. %d\n",
|
||||
@@ -2006,7 +2114,7 @@ out:
|
||||
}
|
||||
|
||||
int move_pte_range(page_table_t pt, struct process_vm *vm,
|
||||
void *src, void *dest, size_t size)
|
||||
void *src, void *dest, size_t size, struct vm_range *range)
|
||||
{
|
||||
int error;
|
||||
struct move_args args;
|
||||
@@ -2015,6 +2123,7 @@ int move_pte_range(page_table_t pt, struct process_vm *vm,
|
||||
args.src = (uintptr_t)src;
|
||||
args.dest = (uintptr_t)dest;
|
||||
args.vm = vm;
|
||||
args.range = range;
|
||||
|
||||
error = visit_pte_range(pt, src, src+size, 0, VPTEF_SKIP_NULL,
|
||||
&move_one_page, &args);
|
||||
|
||||
@@ -1366,7 +1366,7 @@ SYSCALL_DECLARE(mmap)
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct vm_regions *region = &thread->vm->region;
|
||||
int error;
|
||||
intptr_t addr;
|
||||
intptr_t addr = 0;
|
||||
size_t len;
|
||||
int flags = flags0;
|
||||
size_t pgsize;
|
||||
@@ -1469,7 +1469,7 @@ SYSCALL_DECLARE(shmget)
|
||||
const key_t key = ihk_mc_syscall_arg0(ctx);
|
||||
const size_t size = ihk_mc_syscall_arg1(ctx);
|
||||
const int shmflg0 = ihk_mc_syscall_arg2(ctx);
|
||||
int shmid;
|
||||
int shmid = -EINVAL;
|
||||
int error;
|
||||
int shmflg = shmflg0;
|
||||
|
||||
@@ -1732,6 +1732,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
int error;
|
||||
int i;
|
||||
struct vm_range *range;
|
||||
|
||||
dkprintf("arch_map_vdso()\n");
|
||||
if (container_size <= 0) {
|
||||
@@ -1750,7 +1751,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
vrflags |= VR_PROT_READ | VR_PROT_EXEC;
|
||||
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
|
||||
error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e,
|
||||
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, NULL);
|
||||
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, &range);
|
||||
if (error) {
|
||||
ekprintf("ERROR: adding memory range for vdso. %d\n", error);
|
||||
goto out;
|
||||
@@ -1762,7 +1763,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
s = vm->vdso_addr + (i * PAGE_SIZE);
|
||||
e = s + PAGE_SIZE;
|
||||
error = ihk_mc_pt_set_range(pt, vm, s, e,
|
||||
vdso.vdso_physlist[i], attr, 0);
|
||||
vdso.vdso_physlist[i], attr, 0, range);
|
||||
if (error) {
|
||||
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
|
||||
goto out;
|
||||
@@ -1782,7 +1783,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
vrflags |= VR_PROT_READ;
|
||||
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
|
||||
error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e,
|
||||
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, NULL);
|
||||
NOPHYS, vrflags, NULL, 0, PAGE_SHIFT, &range);
|
||||
if (error) {
|
||||
ekprintf("ERROR: adding memory range for vvar. %d\n", error);
|
||||
goto out;
|
||||
@@ -1794,7 +1795,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
e = s + PAGE_SIZE;
|
||||
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
|
||||
error = ihk_mc_pt_set_range(pt, vm, s, e,
|
||||
vdso.vvar_phys, attr, 0);
|
||||
vdso.vvar_phys, attr, 0, range);
|
||||
if (error) {
|
||||
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
|
||||
goto out;
|
||||
@@ -1805,7 +1806,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
e = s + PAGE_SIZE;
|
||||
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE;
|
||||
error = ihk_mc_pt_set_range(pt, vm, s, e,
|
||||
vdso.hpet_phys, attr, 0);
|
||||
vdso.hpet_phys, attr, 0, range);
|
||||
if (error) {
|
||||
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
|
||||
goto out;
|
||||
@@ -1816,7 +1817,7 @@ int arch_map_vdso(struct process_vm *vm)
|
||||
e = s + PAGE_SIZE;
|
||||
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
|
||||
error = ihk_mc_pt_set_range(pt, vm, s, e,
|
||||
vdso.pvti_phys, attr, 0);
|
||||
vdso.pvti_phys, attr, 0, range);
|
||||
if (error) {
|
||||
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
|
||||
goto out;
|
||||
|
||||
@@ -1057,6 +1057,9 @@ enum {
|
||||
MF_REG_FILE = 0x1000,
|
||||
MF_DEV_FILE = 0x2000,
|
||||
MF_PREMAP = 0x8000,
|
||||
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
|
||||
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
|
||||
MF_SHM = 0x40000,
|
||||
MF_END
|
||||
};
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <process.h>
|
||||
#include <rusage.h>
|
||||
|
||||
//#define DEBUG_PRINT_DEVOBJ
|
||||
|
||||
@@ -199,6 +200,7 @@ static void devobj_release(struct memobj *memobj)
|
||||
}
|
||||
|
||||
if (obj->pfn_table) {
|
||||
// Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
kfree(free_obj);
|
||||
@@ -268,6 +270,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
obj->pfn_table[ix] = pfn;
|
||||
// Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
|
||||
@@ -25,9 +25,17 @@
|
||||
#include <pager.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <rusage.h>
|
||||
|
||||
//#define DEBUG_PRINT_FILEOBJ
|
||||
|
||||
#ifdef DEBUG_PRINT_FILEOBJ
|
||||
#define dkprintf(...) do { if (1) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
mcs_rwlock_lock_t fileobj_list_lock;
|
||||
static LIST_HEAD(fileobj_list);
|
||||
@@ -262,6 +270,9 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
|
||||
__FUNCTION__, j);
|
||||
goto error_cleanup;
|
||||
}
|
||||
// Track change in memobj->pages[] for MF_PREMAP pages (MPOL_SHM_PREMAP case)
|
||||
dkprintf("%lx+,%s: MF_PREMAP&&MPOL_SHM_PREMAP,memory_stat_rss_add,phys=%lx,size=%ld,pgsize=%ld\n", virt_to_phys(mo->pages[j]), __FUNCTION__, virt_to_phys(mo->pages[j]), PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_add(PAGE_SIZE, PAGE_SIZE);
|
||||
|
||||
memset(mo->pages[j], 0, PAGE_SIZE);
|
||||
|
||||
@@ -357,23 +368,31 @@ static void fileobj_release(struct memobj *memobj)
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
void *page_va;
|
||||
uintptr_t phys;
|
||||
|
||||
page = fileobj_page_hash_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
__fileobj_page_hash_remove(page);
|
||||
page_va = phys_to_virt(page_to_phys(page));
|
||||
phys = page_to_phys(page);
|
||||
page_va = phys_to_virt(phys);
|
||||
|
||||
/* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and
|
||||
incremented by the second get_page() reaping the pageio and decremented by clear_range().
|
||||
*/
|
||||
if (ihk_atomic_read(&page->count) != 1) {
|
||||
kprintf("%s: WARNING: page count %d for phys 0x%lx is invalid, flags: 0x%lx\n",
|
||||
__FUNCTION__,
|
||||
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n",
|
||||
__FUNCTION__,
|
||||
ihk_atomic_read(&page->count),
|
||||
page->phys,
|
||||
to_memobj(free_obj)->flags);
|
||||
}
|
||||
else if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(page_va, 1);
|
||||
/* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
|
||||
}
|
||||
#if 0
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
@@ -398,10 +417,17 @@ static void fileobj_release(struct memobj *memobj)
|
||||
/* Pre-mapped? */
|
||||
if (to_memobj(free_obj)->flags & MF_PREMAP) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) {
|
||||
if (to_memobj(free_obj)->pages[i])
|
||||
if (to_memobj(free_obj)->pages[i]) {
|
||||
dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]);
|
||||
// Track change in fileobj->pages[] for MF_PREMAP pages
|
||||
// Note that page_unmap() isn't called for MF_PREMAP in
|
||||
// free_process_memory_range() --> ihk_mc_pt_free_range()
|
||||
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
|
||||
virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
|
||||
ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(to_memobj(free_obj)->pages);
|
||||
@@ -531,7 +557,7 @@ out:
|
||||
}
|
||||
|
||||
static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct thread *proc = cpu_local_var(current);
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
@@ -577,6 +603,9 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
else {
|
||||
dkprintf("%s: MF_ZEROFILL: off: %lu -> 0x%lx allocated\n",
|
||||
__FUNCTION__, off, virt_to_phys(virt));
|
||||
// Track change in memobj->pages[] for MF_PREMAP pages (!MPOL_SHM_PREMAP case)
|
||||
dkprintf("%lx+,%s: MF_PREMAP&&!MPOL_SHM_PREMAP,memory_stat_rss_add,phys=%lx,size=%ld,pgsize=%ld\n", virt_to_phys(virt), __FUNCTION__, virt_to_phys(virt), PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_add(PAGE_SIZE, PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -608,7 +637,6 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
|
||||
virt = ihk_mc_alloc_pages_user(npages, IHK_MC_AP_NOWAIT |
|
||||
(to_memobj(obj)->flags & MF_ZEROFILL) ? IHK_MC_AP_USER : 0);
|
||||
|
||||
if (!virt) {
|
||||
error = -ENOMEM;
|
||||
kprintf("fileobj_get_page(%p,%lx,%x,%p):"
|
||||
@@ -619,11 +647,16 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
}
|
||||
phys = virt_to_phys(virt);
|
||||
page = phys_to_page_insert_hash(phys);
|
||||
// Track change in page->count for !MF_PREMAP pages.
|
||||
// Add when setting the PTE for a page with count of one in ihk_mc_pt_set_range().
|
||||
dkprintf("%s: phys_to_page_insert_hash(),phys=%lx,virt=%lx,size=%lx,pgsize=%lx\n", __FUNCTION__, phys, virt, npages * PAGE_SIZE, PAGE_SIZE);
|
||||
|
||||
if (page->mode != PM_NONE) {
|
||||
panic("fileobj_get_page:invalid new page");
|
||||
}
|
||||
page->offset = off;
|
||||
ihk_atomic_set(&page->count, 1);
|
||||
ihk_atomic64_set(&page->mapped, 0);
|
||||
__fileobj_page_hash_insert(obj, page, hash);
|
||||
page->mode = PM_WILL_PAGEIO;
|
||||
}
|
||||
@@ -646,6 +679,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
}
|
||||
else if (page->mode == PM_DONE_PAGEIO) {
|
||||
page->mode = PM_MAPPED;
|
||||
dkprintf("%s: PM_DONE_PAGEIO-->PM_MAPPED,obj=%lx,off=%lx,phys=%lx\n", __FUNCTION__, obj, off, page_to_phys(page));
|
||||
}
|
||||
else if (page->mode == PM_PAGEIO_EOF) {
|
||||
error = -ERANGE;
|
||||
@@ -657,6 +691,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
}
|
||||
|
||||
ihk_atomic_inc(&page->count);
|
||||
dkprintf("%s: mode=%d,count=%d,obj=%lx,off=%lx,phys=%lx\n", __FUNCTION__, page->mode, page->count, obj, off, page_to_phys(page));
|
||||
|
||||
error = 0;
|
||||
*physp = page_to_phys(page);
|
||||
@@ -684,6 +719,7 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
ihk_mc_user_context_t ctx;
|
||||
ssize_t ss;
|
||||
|
||||
dkprintf("%s: phys=%lx,to_memobj(obj)->flags=%x,memobj->flags=%x,page=%p\n", __FUNCTION__, phys, to_memobj(obj)->flags, memobj->flags, phys_to_page(phys));
|
||||
if (to_memobj(obj)->flags & MF_ZEROFILL) {
|
||||
return 0;
|
||||
}
|
||||
@@ -698,6 +734,7 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
__FUNCTION__, phys);
|
||||
return 0;
|
||||
}
|
||||
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE;
|
||||
@@ -706,6 +743,7 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
ihk_mc_syscall_arg3(&ctx) = pgsize;
|
||||
ihk_mc_syscall_arg4(&ctx) = phys;
|
||||
|
||||
dkprintf("%s: syscall_generic_forwarding\n", __FUNCTION__);
|
||||
ss = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (ss != pgsize) {
|
||||
dkprintf("fileobj_flush_page(%p,%lx,%lx): %ld (%lx)\n",
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include <kmalloc.h>
|
||||
#include <sysfs.h>
|
||||
#include <ihk/perfctr.h>
|
||||
#include <rusage.h>
|
||||
|
||||
//#define DEBUG_PRINT_HOST
|
||||
|
||||
@@ -155,10 +156,10 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
|
||||
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
|
||||
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
|
||||
(void *)range->start,
|
||||
(void *)range->start + (range_npages * PAGE_SIZE),
|
||||
up, ptattr,
|
||||
range->pgshift);
|
||||
(void *)range->start,
|
||||
(void *)range->start + (range_npages * PAGE_SIZE),
|
||||
up, ptattr,
|
||||
range->pgshift, range);
|
||||
|
||||
if (error) {
|
||||
kprintf("%s: ihk_mc_pt_set_range failed. %d\n",
|
||||
@@ -167,6 +168,8 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
goto err;
|
||||
}
|
||||
|
||||
// memory_stat_rss_add() is called in ihk_mc_pt_set_range()
|
||||
|
||||
p->sections[i].remote_pa = up;
|
||||
|
||||
/* TODO: Maybe we need flag */
|
||||
@@ -237,6 +240,7 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
kprintf("%s: error: adding memory range for heap\n", __FUNCTION__);
|
||||
goto err;
|
||||
}
|
||||
// heap: Add when memory_stat_rss_add() is called in downstream, i.e. add_process_memory_range()
|
||||
|
||||
vm->region.brk_end_allocated = vm->region.brk_end +
|
||||
proc->heap_extension;
|
||||
@@ -260,12 +264,15 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
}
|
||||
args_envs_p = virt_to_phys(args_envs);
|
||||
|
||||
dkprintf("%s: args_envs: %d pages\n",
|
||||
__FUNCTION__, ARGENV_PAGE_COUNT);
|
||||
if(add_process_memory_range(vm, addr, e, args_envs_p,
|
||||
flags, NULL, 0, PAGE_SHIFT, NULL) != 0){
|
||||
ihk_mc_free_pages_user(args_envs, ARGENV_PAGE_COUNT);
|
||||
kprintf("ERROR: adding memory range for args/envs\n");
|
||||
goto err;
|
||||
}
|
||||
// memory_stat_rss_add() is called in downstream, i.e. add_process_memory_range()
|
||||
|
||||
dkprintf("args_envs mapping\n");
|
||||
|
||||
|
||||
@@ -37,6 +37,9 @@ enum {
|
||||
MF_REG_FILE = 0x1000,
|
||||
MF_DEV_FILE = 0x2000,
|
||||
MF_PREMAP = 0x8000,
|
||||
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
|
||||
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
|
||||
MF_SHM = 0x40000,
|
||||
MF_HOST_RELEASED = 0x80000000,
|
||||
MF_END
|
||||
};
|
||||
|
||||
@@ -21,6 +21,7 @@ struct page {
|
||||
uint8_t mode;
|
||||
uint64_t phys;
|
||||
ihk_atomic_t count;
|
||||
ihk_atomic64_t mapped;
|
||||
off_t offset;
|
||||
};
|
||||
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
#define __RUSAGE_H
|
||||
|
||||
#include <config.h>
|
||||
#include <page.h>
|
||||
#include <ihk/rusage.h>
|
||||
#include <ihk/atomic.h>
|
||||
#include <memobj.h>
|
||||
|
||||
#ifdef ENABLE_RUSAGE
|
||||
#define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB
|
||||
@@ -23,9 +26,9 @@ rusage_rss_add(unsigned long size)
|
||||
unsigned long retval;
|
||||
|
||||
newval = __sync_add_and_fetch(&monitor->rusage_rss_current, size);
|
||||
oldval = monitor->rusage_rss_max;
|
||||
oldval = monitor->rusage_memory_max_usage;
|
||||
while (newval > oldval) {
|
||||
retval = __sync_val_compare_and_swap(&monitor->rusage_rss_max,
|
||||
retval = __sync_val_compare_and_swap(&monitor->rusage_memory_max_usage,
|
||||
oldval, newval);
|
||||
if (retval == oldval) {
|
||||
break;
|
||||
@@ -40,6 +43,88 @@ rusage_rss_sub(unsigned long size)
|
||||
__sync_sub_and_fetch(&monitor->rusage_rss_current, size);
|
||||
}
|
||||
|
||||
static inline void memory_stat_rss_add(unsigned long size, int pgsize)
|
||||
{
|
||||
ihk_atomic_add_long(size, &monitor->rusage_memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]);
|
||||
}
|
||||
|
||||
static inline void memory_stat_rss_sub(unsigned long size, int pgsize)
|
||||
{
|
||||
ihk_atomic_add_long(-size, &monitor->rusage_memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]);
|
||||
}
|
||||
|
||||
static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize)
|
||||
{
|
||||
ihk_atomic_add_long(size, &monitor->rusage_memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]);
|
||||
}
|
||||
|
||||
static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize)
|
||||
{
|
||||
ihk_atomic_add_long(-size, &monitor->rusage_memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]);
|
||||
}
|
||||
|
||||
static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize)
|
||||
{
|
||||
/* Is it resident in main memory? */
|
||||
if (range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED)) {
|
||||
return 0;
|
||||
}
|
||||
/* Is it anonymous and pre-paging? */
|
||||
if (!range->memobj) {
|
||||
memory_stat_rss_add(size, pgsize);
|
||||
return 1;
|
||||
}
|
||||
/* Is it devobj or (fileobj and pre-map) or xpmem attachment? */
|
||||
if ((range->memobj->flags & MF_DEV_FILE) ||
|
||||
(range->memobj->flags & MF_PREMAP) ||
|
||||
(range->memobj->flags & MF_XPMEM)
|
||||
) {
|
||||
return 0;
|
||||
}
|
||||
/* Is it anonymous and demand-paging? */
|
||||
if (range->memobj->flags & MF_ZEROOBJ) {
|
||||
memory_stat_rss_add(size, pgsize);
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct page *page = phys_to_page(phys);
|
||||
|
||||
/* Is It file map and cow page? */
|
||||
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) &&
|
||||
!page) {
|
||||
//kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys);
|
||||
memory_stat_rss_add(size, pgsize);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Is it a sharable page? */
|
||||
if (!page) {
|
||||
kprintf("%s: WARNING !page,phys=%lx\n", __FUNCTION__, phys);
|
||||
return 0;
|
||||
}
|
||||
/* Is this the first attempt to map the sharable page? */
|
||||
if(__sync_bool_compare_and_swap(&page->mapped.counter64, 0, 1)) {
|
||||
if(range->memobj->flags & MF_SHM) {
|
||||
memory_stat_rss_add(size, pgsize);
|
||||
} else {
|
||||
rusage_memory_stat_mapped_file_add(size, pgsize);
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize)
|
||||
{
|
||||
if(memobj->flags & MF_SHM) {
|
||||
memory_stat_rss_sub(size, pgsize);
|
||||
} else {
|
||||
rusage_memory_stat_mapped_file_sub(size, pgsize);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
rusage_kmem_add(unsigned long size)
|
||||
{
|
||||
@@ -47,11 +132,11 @@ rusage_kmem_add(unsigned long size)
|
||||
unsigned long oldval;
|
||||
unsigned long retval;
|
||||
|
||||
newval = __sync_add_and_fetch(&monitor->rusage_kmem_usage, size);
|
||||
oldval = monitor->rusage_kmem_max_usage;
|
||||
newval = __sync_add_and_fetch(&monitor->rusage_memory_kmem_usage, size);
|
||||
oldval = monitor->rusage_memory_kmem_max_usage;
|
||||
while (newval > oldval) {
|
||||
retval = __sync_val_compare_and_swap(
|
||||
&monitor->rusage_kmem_max_usage,
|
||||
&monitor->rusage_memory_kmem_max_usage,
|
||||
oldval, newval);
|
||||
if (retval == oldval) {
|
||||
break;
|
||||
@@ -63,13 +148,13 @@ rusage_kmem_add(unsigned long size)
|
||||
static inline void
|
||||
rusage_kmem_sub(unsigned long size)
|
||||
{
|
||||
__sync_sub_and_fetch(&monitor->rusage_kmem_usage, size);
|
||||
__sync_sub_and_fetch(&monitor->rusage_memory_kmem_usage, size);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rusage_numa_add(int numa_id, unsigned long size)
|
||||
{
|
||||
__sync_add_and_fetch(monitor->rusage_numa_stat + numa_id, size);
|
||||
__sync_add_and_fetch(monitor->rusage_memory_numa_stat + numa_id, size);
|
||||
rusage_rss_add(size);
|
||||
}
|
||||
|
||||
@@ -77,7 +162,7 @@ static inline void
|
||||
rusage_numa_sub(int numa_id, unsigned long size)
|
||||
{
|
||||
rusage_rss_sub(size);
|
||||
__sync_sub_and_fetch(monitor->rusage_numa_stat + numa_id, size);
|
||||
__sync_sub_and_fetch(monitor->rusage_memory_numa_stat + numa_id, size);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -163,6 +248,31 @@ rusage_rss_sub(unsigned long size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memory_stat_rss_add(unsigned long size, size_t pgsize)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memory_stat_rss_sub(unsigned long size, size_t pgsize)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
rusage_numa_add(int numa_id, unsigned long size)
|
||||
{
|
||||
|
||||
@@ -32,7 +32,7 @@ enum {
|
||||
SHM_EXEC = 0100000,
|
||||
|
||||
/* for shm_mode */
|
||||
SHM_DEST = 01000,
|
||||
SHM_DEST = 01000, /* Marked for destruction */
|
||||
SHM_LOCKED = 02000,
|
||||
|
||||
/* for cmd of shmctl() */
|
||||
|
||||
@@ -318,6 +318,8 @@ static inline struct xpmem_thread_group *__xpmem_tg_ref_by_tgid(
|
||||
tgid, return_destroying);
|
||||
|
||||
index = xpmem_tg_hashtable_index(tgid);
|
||||
XPMEM_DEBUG("xpmem_my_part=%p\n", xpmem_my_part);
|
||||
XPMEM_DEBUG("xpmem_my_part->tg_hashtable=%p\n", xpmem_my_part->tg_hashtable);
|
||||
mcs_rwlock_reader_lock(&xpmem_my_part->tg_hashtable[index].lock, &lock);
|
||||
tg = __xpmem_tg_ref_by_tgid_nolock_internal(tgid, index,
|
||||
return_destroying);
|
||||
|
||||
@@ -255,7 +255,6 @@ static void monitor_init()
|
||||
monitor = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL);
|
||||
memset(monitor, 0, z * PAGE_SIZE);
|
||||
monitor->num_processors = num_processors;
|
||||
monitor->num_numa_nodes = ihk_mc_get_nr_numa_nodes();
|
||||
monitor->ns_per_tsc = ihk_mc_get_ns_per_tsc();
|
||||
phys = virt_to_phys(monitor);
|
||||
ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) +
|
||||
|
||||
128
kernel/process.c
128
kernel/process.c
@@ -440,6 +440,31 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp,
|
||||
goto err_free_proc;
|
||||
}
|
||||
|
||||
/* Copy mckfd list
|
||||
FIXME: Replace list manipulation with list_add() etc. */
|
||||
long irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||
struct mckfd *cur;
|
||||
for (cur = org->proc->mckfd; cur; cur = cur->next) {
|
||||
struct mckfd *mckfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT);
|
||||
if(!mckfd) {
|
||||
release_address_space(asp);
|
||||
kfree(proc->vm);
|
||||
kfree(proc);
|
||||
goto err_free_proc;
|
||||
}
|
||||
memcpy(mckfd, cur, sizeof(struct mckfd));
|
||||
|
||||
if (proc->mckfd == NULL) {
|
||||
proc->mckfd = mckfd;
|
||||
mckfd->next = NULL;
|
||||
}
|
||||
else {
|
||||
mckfd->next = proc->mckfd;
|
||||
proc->mckfd = mckfd;
|
||||
}
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||
|
||||
thread->vm->vdso_addr = org->vm->vdso_addr;
|
||||
thread->vm->vvar_addr = org->vm->vvar_addr;
|
||||
thread->proc->maxrss = org->proc->maxrss;
|
||||
@@ -532,6 +557,7 @@ ptrace_traceme(void)
|
||||
struct copy_args {
|
||||
struct process_vm *new_vm;
|
||||
unsigned long new_vrflag;
|
||||
struct vm_range *range;
|
||||
|
||||
/* out */
|
||||
intptr_t fault_addr;
|
||||
@@ -591,12 +617,13 @@ static int copy_user_pte(void *arg0, page_table_t src_pt, pte_t *src_ptep, void
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_set_range(args->new_vm->address_space->page_table,
|
||||
args->new_vm, pgaddr, pgaddr+pgsize, phys, attr,
|
||||
pgshift);
|
||||
args->new_vm, pgaddr, pgaddr + pgsize, phys, attr,
|
||||
pgshift, args->range);
|
||||
if (error) {
|
||||
args->fault_addr = (intptr_t)pgaddr;
|
||||
goto out;
|
||||
}
|
||||
// fork/clone case: memory_stat_rss_add() is called in ihk_mc_pt_set_range()
|
||||
|
||||
dkprintf("copy_user_pte(): new PTE set\n");
|
||||
error = 0;
|
||||
@@ -655,7 +682,8 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
|
||||
args.new_vrflag = range->flag;
|
||||
args.new_vm = vm;
|
||||
args.fault_addr = -1;
|
||||
|
||||
args.range = range;
|
||||
|
||||
error = visit_pte_range(orgvm->address_space->page_table,
|
||||
(void *)range->start, (void *)range->end,
|
||||
range->pgshift, VPTEF_SKIP_NULL,
|
||||
@@ -670,6 +698,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
|
||||
}
|
||||
goto err_free_range_rollback;
|
||||
}
|
||||
// memory_stat_rss_add() is called in child-node, i.e. copy_user_pte()
|
||||
|
||||
insert_vm_range_list(vm, range);
|
||||
}
|
||||
@@ -702,13 +731,13 @@ int update_process_page_table(struct process_vm *vm,
|
||||
attr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
|
||||
flags = ihk_mc_spinlock_lock(&vm->page_table_lock);
|
||||
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
|
||||
(void *)range->start, (void *)range->end, phys, attr,
|
||||
range->pgshift);
|
||||
(void *)range->start, (void *)range->end, phys, attr,
|
||||
range->pgshift, range);
|
||||
if (error) {
|
||||
kprintf("update_process_page_table:ihk_mc_pt_set_range failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
// memory_stat_rss_add() is called in ihk_mc_pt_set_range()
|
||||
error = 0;
|
||||
out:
|
||||
ihk_mc_spinlock_unlock(&vm->page_table_lock, flags);
|
||||
@@ -730,6 +759,7 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
"ihk_mc_pt_split failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
// memory_stat_rss_add() is called in child-node, i.e. ihk_mc_pt_split() to deal with L3->L2 case
|
||||
|
||||
newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
|
||||
if (!newrange) {
|
||||
@@ -872,6 +902,8 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
||||
}
|
||||
}
|
||||
|
||||
dkprintf("%s: vm=%p,range=%p,%lx-%lx\n", __FUNCTION__, vm, range, range->start, range->end);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
if (range->memobj) {
|
||||
memobj_lock(range->memobj);
|
||||
@@ -889,8 +921,11 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
||||
vm, start0, end0, start, end, range->memobj, error);
|
||||
/* through */
|
||||
}
|
||||
// memory_stat_rss_sub() is called downstream, i.e. ihk_mc_pt_free_range() to deal with empty PTE
|
||||
}
|
||||
else {
|
||||
// memory_stat_rss_sub() isn't called because free_physical is set to zero in clear_range()
|
||||
dkprintf("%s,memory_stat_rss_sub() isn't called, VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED case, %lx-%lx\n", __FUNCTION__, start, end);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
error = ihk_mc_pt_clear_range(vm->address_space->page_table, vm,
|
||||
(void *)start, (void *)end);
|
||||
@@ -1100,6 +1135,7 @@ int add_process_memory_range(struct process_vm *vm,
|
||||
struct memobj *memobj, off_t offset,
|
||||
int pgshift, struct vm_range **rp)
|
||||
{
|
||||
dkprintf("%s: start=%lx,end=%lx,phys=%lx,flag=%lx\n", __FUNCTION__, start, end, phys, flag);
|
||||
struct vm_range *range;
|
||||
int rc;
|
||||
|
||||
@@ -1147,6 +1183,7 @@ int add_process_memory_range(struct process_vm *vm,
|
||||
}
|
||||
else {
|
||||
rc = update_process_page_table(vm, range, phys, 0);
|
||||
// memory_stat_rss_add() is called in ihk_mc_pt_set_range()
|
||||
}
|
||||
|
||||
if (rc != 0) {
|
||||
@@ -1394,6 +1431,8 @@ static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
page = phys_to_page(phys);
|
||||
if (page && page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys), pgsize/PAGE_SIZE);
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys, __FUNCTION__, pgsize, pgsize);
|
||||
rusage_memory_stat_sub(args->memobj, pgsize, pgsize);
|
||||
}
|
||||
|
||||
error = 0;
|
||||
@@ -1565,6 +1604,7 @@ static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgshift, error);
|
||||
goto out;
|
||||
}
|
||||
// memory_stat_rss_sub() is called in downstream, i.e. shmobj_invalidate_page()
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
@@ -1596,6 +1636,8 @@ int invalidate_process_memory_range(struct process_vm *vm,
|
||||
vm, range, start, end, error);
|
||||
goto out;
|
||||
}
|
||||
// memory_stat_rss_sub() is called downstream, i.e. invalidate_one_page() to deal with empty PTEs
|
||||
|
||||
out:
|
||||
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, error);
|
||||
@@ -1636,6 +1678,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
goto out;
|
||||
}
|
||||
/*****/
|
||||
dkprintf("%s: pgaddr=%lx,range->start=%lx,range->end=%lx,pgaddr+pgsize=%lx\n", __FUNCTION__, pgaddr, range->start, range->end, pgaddr + pgsize);
|
||||
while (((uintptr_t)pgaddr < range->start)
|
||||
|| (range->end < ((uintptr_t)pgaddr + pgsize))) {
|
||||
ptep = NULL;
|
||||
@@ -1647,6 +1690,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
|
||||
}
|
||||
/*****/
|
||||
dkprintf("%s: ptep=%lx,pte_is_null=%d,pte_is_fileoff=%d\n", __FUNCTION__, ptep, ptep ? pte_is_null(ptep) : -1, ptep ? pte_is_fileoff(ptep, pgsize) : -1);
|
||||
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
|
||||
phys = NOPHYS;
|
||||
if (range->memobj) {
|
||||
@@ -1659,7 +1703,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
off = pte_get_off(ptep, pgsize);
|
||||
}
|
||||
error = memobj_get_page(range->memobj, off, p2align,
|
||||
&phys, &memobj_flag);
|
||||
&phys, &memobj_flag);
|
||||
if (error) {
|
||||
struct memobj *obj;
|
||||
|
||||
@@ -1671,6 +1715,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
// memory_stat_rss_add() is called downstream, i.e. memobj_get_page() to check page->count
|
||||
}
|
||||
if (phys == NOPHYS) {
|
||||
void *virt = NULL;
|
||||
@@ -1704,6 +1749,10 @@ retry:
|
||||
memset(virt, 0, pgsize);
|
||||
phys = virt_to_phys(virt);
|
||||
if (phys_to_page(phys)) {
|
||||
dkprintf("%s: NOPHYS,phys=%lx,vmr(%lx-%lx),flag=%x,fa=%lx,reason=%x\n",
|
||||
__FUNCTION__, page_to_phys(page),
|
||||
range->start, range->end, range->flag, fault_addr, reason);
|
||||
|
||||
page_map(phys_to_page(phys));
|
||||
}
|
||||
}
|
||||
@@ -1730,6 +1779,10 @@ retry:
|
||||
void *virt;
|
||||
size_t npages;
|
||||
|
||||
if (!page) {
|
||||
kprintf("%s: WARNING: cow on non-struct-page-managed page\n", __FUNCTION__);
|
||||
}
|
||||
|
||||
npages = pgsize / PAGE_SIZE;
|
||||
virt = ihk_mc_alloc_aligned_pages_user(npages, p2align,
|
||||
IHK_MC_AP_NOWAIT);
|
||||
@@ -1738,34 +1791,54 @@ retry:
|
||||
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||
goto out;
|
||||
}
|
||||
dkprintf("%s: copying 0x%lx:%lu\n",
|
||||
__FUNCTION__, pgaddr, pgsize);
|
||||
dkprintf("%s: cow,copying virt:%lx<-%lx,phys:%lx<-%lx,pgsize=%lu\n",
|
||||
__FUNCTION__, virt, phys_to_virt(phys), virt_to_phys(virt), phys, pgsize);
|
||||
memcpy(virt, phys_to_virt(phys), pgsize);
|
||||
|
||||
phys = virt_to_phys(virt);
|
||||
if (page) {
|
||||
page_unmap(page);
|
||||
/* Call rusage_memory_stat_add() because remote page fault may create a page not pointed-to by PTE */
|
||||
if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) {
|
||||
dkprintf("%lx+,%s: remote page fault + cow, calling memory_stat_rss_add(),pgsize=%ld\n",
|
||||
phys, __FUNCTION__, pgsize);
|
||||
}
|
||||
if (page) {
|
||||
if (page_unmap(page)) {
|
||||
dkprintf("%lx-,%s: cow,calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys, __FUNCTION__, pgsize, pgsize);
|
||||
rusage_memory_stat_sub(range->memobj, pgsize, pgsize);
|
||||
}
|
||||
}
|
||||
phys = virt_to_phys(virt);
|
||||
page = phys_to_page(phys);
|
||||
}
|
||||
}
|
||||
/*****/
|
||||
if (ptep) {
|
||||
if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) {
|
||||
/* on-demand paging, phys pages are obtained by ihk_mc_alloc_aligned_pages_user() or get_page() */
|
||||
dkprintf("%lx+,%s: (on-demand paging && first map) || cow,calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n",
|
||||
phys, __FUNCTION__, phys, pgsize);
|
||||
} else {
|
||||
dkprintf("%s: !calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n",
|
||||
__FUNCTION__, phys, pgsize);
|
||||
}
|
||||
|
||||
dkprintf("%s: attr=%x\n", __FUNCTION__, attr);
|
||||
error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep,
|
||||
pgsize, phys, attr);
|
||||
if (error) {
|
||||
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_pte failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||
goto out;
|
||||
}
|
||||
dkprintf("%s: non-NULL pte,page=%lx,page_is_in_memobj=%d,page->count=%d\n", __FUNCTION__, page, page ? page_is_in_memobj(page) : 0, page ? ihk_atomic_read(&page->count) : 0);
|
||||
}
|
||||
else {
|
||||
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
|
||||
pgaddr, pgaddr + pgsize, phys,
|
||||
attr, range->pgshift);
|
||||
attr, range->pgshift, range);
|
||||
if (error) {
|
||||
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_range failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||
goto out;
|
||||
}
|
||||
// memory_stat_rss_add() is called in downstream with !memobj check
|
||||
}
|
||||
flush_tlb_single(fault_addr);
|
||||
vm->currss += pgsize;
|
||||
@@ -1778,7 +1851,14 @@ retry:
|
||||
out:
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (page) {
|
||||
page_unmap(page);
|
||||
/* Unmap stray struct page */
|
||||
dkprintf("%s: out,phys=%lx,vmr(%lx-%lx),flag=%x,fa=%lx,reason=%x\n",
|
||||
__FUNCTION__, page_to_phys(page),
|
||||
range->start, range->end, range->flag, fault_addr, reason);
|
||||
if (page_unmap(page)) {
|
||||
dkprintf("%lx-,%s: out,calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", page_to_phys(page), __FUNCTION__, pgsize, pgsize);
|
||||
rusage_memory_stat_sub(range->memobj, pgsize, pgsize);
|
||||
}
|
||||
}
|
||||
dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx): %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||
return error;
|
||||
@@ -1947,6 +2027,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
unsigned long at_rand;
|
||||
struct process *proc = thread->proc;
|
||||
unsigned long ap_flag;
|
||||
struct vm_range *range;
|
||||
|
||||
/* Create stack range */
|
||||
end = STACK_TOP(&thread->vm->region) & LARGE_PAGE_MASK;
|
||||
@@ -1991,17 +2072,18 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC;
|
||||
#define NOPHYS ((uintptr_t)-1)
|
||||
if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS,
|
||||
vrflag, NULL, 0, LARGE_PAGE_SHIFT, NULL)) != 0) {
|
||||
vrflag, NULL, 0, LARGE_PAGE_SHIFT, &range)) != 0) {
|
||||
ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Map physical pages for initial stack frame */
|
||||
error = ihk_mc_pt_set_range(thread->vm->address_space->page_table,
|
||||
thread->vm, (void *)(end - minsz),
|
||||
(void *)end, virt_to_phys(stack),
|
||||
arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL),
|
||||
LARGE_PAGE_SHIFT);
|
||||
thread->vm, (void *)(end - minsz),
|
||||
(void *)end, virt_to_phys(stack),
|
||||
arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL),
|
||||
LARGE_PAGE_SHIFT, range
|
||||
);
|
||||
|
||||
if (error) {
|
||||
kprintf("init_process_stack:"
|
||||
@@ -2011,6 +2093,8 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
return error;
|
||||
}
|
||||
|
||||
// memory_stat_rss_add() is called in ihk_mc_pt_set_range();
|
||||
|
||||
/* set up initial stack frame */
|
||||
p = (unsigned long *)(stack + minsz);
|
||||
s_ind = -1;
|
||||
@@ -2112,8 +2196,9 @@ unsigned long extend_process_region(struct process_vm *vm,
|
||||
ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT);
|
||||
return end_allocated;
|
||||
}
|
||||
// memory_stat_rss_add() is called in add_process_memory_range()
|
||||
|
||||
dkprintf("%s: new_end_allocated: 0x%lu, align_size: %lu, align_mask: %lx\n",
|
||||
dkprintf("%s: new_end_allocated: 0x%lx, align_size: %lu, align_mask: %lx\n",
|
||||
__FUNCTION__, new_end_allocated, align_size, align_mask);
|
||||
|
||||
return new_end_allocated;
|
||||
@@ -2134,6 +2219,9 @@ int remove_process_region(struct process_vm *vm,
|
||||
(void *)start, (void *)end);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
|
||||
// memory_stat_rss_sub() isn't called because this execution path is no loger reached
|
||||
dkprintf("%s: memory_stat_rss_sub() isn't called,start=%lx,end=%lx\n", __FUNCTION__, start, end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <page.h>
|
||||
#include <shm.h>
|
||||
#include <string.h>
|
||||
#include <rusage.h>
|
||||
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
@@ -179,6 +180,7 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
obj->memobj.ops = &shmobj_ops;
|
||||
obj->memobj.flags = MF_SHM;
|
||||
obj->memobj.size = ds->shm_segsz;
|
||||
obj->ds = *ds;
|
||||
obj->ds.shm_perm.seq = the_seq++;
|
||||
@@ -242,13 +244,15 @@ void shmobj_destroy(struct shmobj *obj)
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
void *page_va;
|
||||
uintptr_t phys;
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
page_list_remove(obj, page);
|
||||
page_va = phys_to_virt(page_to_phys(page));
|
||||
phys = page_to_phys(page);
|
||||
page_va = phys_to_virt(phys);
|
||||
|
||||
if (ihk_atomic_read(&page->count) != 1) {
|
||||
kprintf("%s: WARNING: page count for phys 0x%lx is invalid\n",
|
||||
@@ -257,6 +261,10 @@ void shmobj_destroy(struct shmobj *obj)
|
||||
|
||||
if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(page_va, npages);
|
||||
/* Track change in page->count for shmobj.
|
||||
It is decremented in here or shmobj_invalidate() or clear_range(). */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, npages * PAGE_SIZE, PAGE_SIZE);
|
||||
memory_stat_rss_sub(npages * PAGE_SIZE, PAGE_SIZE);
|
||||
}
|
||||
#if 0
|
||||
dkprintf("shmobj_destroy(%p):"
|
||||
@@ -366,7 +374,7 @@ static void shmobj_ref(struct memobj *memobj)
|
||||
}
|
||||
|
||||
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp, unsigned long *pflag)
|
||||
uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
int error;
|
||||
@@ -417,6 +425,9 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
}
|
||||
phys = virt_to_phys(virt);
|
||||
page = phys_to_page_insert_hash(phys);
|
||||
/* Track change in page->count for shmobj.
|
||||
Add when setting the PTE for a page with count of one in ihk_mc_pt_set_range(). */
|
||||
|
||||
if (page->mode != PM_NONE) {
|
||||
fkprintf("shmobj_get_page(%p,%#lx,%d,%p):"
|
||||
"page %p %#lx %d %d %#lx\n",
|
||||
@@ -429,6 +440,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
page->mode = PM_MAPPED;
|
||||
page->offset = off;
|
||||
ihk_atomic_set(&page->count, 1);
|
||||
ihk_atomic64_set(&page->mapped, 0);
|
||||
page_list_insert(obj, page);
|
||||
virt = NULL;
|
||||
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n",
|
||||
@@ -469,6 +481,10 @@ static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
|
||||
if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys),
|
||||
pgsize/PAGE_SIZE);
|
||||
/* Track change in page->count for shmobj.
|
||||
It is decremented in here or shmobj_destroy() or clear_range(). */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, pgsize, PAGE_SIZE);
|
||||
memory_stat_rss_sub(pgsize, PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -289,6 +289,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
||||
|
||||
dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n",
|
||||
cpu_local_var(current)->proc->pid);
|
||||
dkprintf("remote page fault,va=%lx,reason=%x\n", res.fault_address, res.fault_reason|PF_POPULATE);
|
||||
error = page_fault_process_vm(thread->vm,
|
||||
(void *)res.fault_address,
|
||||
res.fault_reason|PF_POPULATE);
|
||||
@@ -966,6 +967,14 @@ void terminate(int rc, int sig)
|
||||
|
||||
dkprintf("terminate,pid=%d\n", proc->pid);
|
||||
|
||||
/* rusage debug */
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
dkprintf("memory_stat_rss[%d]=%ld\n", i, monitor->rusage_memory_stat_rss[i]);
|
||||
}
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
dkprintf("memory_stat_mapped_file[%d]=%ld\n", i, monitor->rusage_memory_stat_mapped_file[i]);
|
||||
}
|
||||
|
||||
#ifdef DCFA_KMOD
|
||||
do_mod_exit(rc);
|
||||
#endif
|
||||
@@ -1413,6 +1422,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
}
|
||||
}
|
||||
else if (flags & MAP_SHARED) {
|
||||
dkprintf("%s: MAP_SHARED,flags=%x,len=%ld\n", __FUNCTION__, flags, len);
|
||||
memset(&ads, 0, sizeof(ads));
|
||||
ads.shm_segsz = len;
|
||||
ads.shm_perm.mode = SHM_DEST;
|
||||
@@ -1424,6 +1434,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
}
|
||||
}
|
||||
else {
|
||||
dkprintf("%s: anon&demand-paging\n", __FUNCTION__);
|
||||
error = zeroobj_create(&memobj);
|
||||
if (error) {
|
||||
ekprintf("do_mmap:zeroobj_create failed. %d\n", error);
|
||||
@@ -1473,19 +1484,21 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
|
||||
for (i = 0; i < memobj->nr_pages; ++i) {
|
||||
error = ihk_mc_pt_set_range(proc->vm->address_space->page_table,
|
||||
proc->vm,
|
||||
(void *)range->start + (i * PAGE_SIZE),
|
||||
(void *)range->start + (i * PAGE_SIZE) +
|
||||
PAGE_SIZE,
|
||||
virt_to_phys(memobj->pages[i]),
|
||||
ptattr,
|
||||
PAGE_SHIFT);
|
||||
proc->vm,
|
||||
(void *)range->start + (i * PAGE_SIZE),
|
||||
(void *)range->start + (i * PAGE_SIZE) +
|
||||
PAGE_SIZE,
|
||||
virt_to_phys(memobj->pages[i]),
|
||||
ptattr,
|
||||
PAGE_SHIFT,
|
||||
range);
|
||||
if (error) {
|
||||
kprintf("%s: ERROR: mapping %d page of pre-mapped file\n",
|
||||
__FUNCTION__, i);
|
||||
}
|
||||
}
|
||||
dkprintf("%s: memobj 0x%lx pre-mapped\n", __FUNCTION__, memobj);
|
||||
// fileobj && MF_PREMAP && MPOL_SHM_PREMAP case: memory_stat_rss_add() is called in fileobj_create()
|
||||
}
|
||||
/*
|
||||
else if (memobj->flags & MF_REG_FILE) {
|
||||
@@ -1765,8 +1778,6 @@ SYSCALL_DECLARE(brk)
|
||||
extend_process_region(cpu_local_var(current)->vm,
|
||||
region->brk_end_allocated, address, vrflag);
|
||||
ihk_mc_spinlock_unlock_noirq(&cpu_local_var(current)->vm->memory_range_lock);
|
||||
dkprintf("SC(%d)[sys_brk] brk_end set to %lx\n",
|
||||
ihk_mc_get_processor_id(), region->brk_end);
|
||||
|
||||
if (old_brk_end_allocated == region->brk_end_allocated) {
|
||||
r = old_brk_end_allocated;
|
||||
@@ -1775,6 +1786,8 @@ SYSCALL_DECLARE(brk)
|
||||
|
||||
region->brk_end = address;
|
||||
r = region->brk_end;
|
||||
dkprintf("SC(%d)[sys_brk] brk_end set to %lx\n",
|
||||
ihk_mc_get_processor_id(), region->brk_end);
|
||||
|
||||
out:
|
||||
return r;
|
||||
@@ -2856,7 +2869,7 @@ SYSCALL_DECLARE(ioctl)
|
||||
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||
|
||||
if(fdp && fdp->ioctl_cb){
|
||||
//kprintf("ioctl: found system fd %d\n", fd);
|
||||
//kprintf("ioctl: found system fd %d\n", fd);
|
||||
rc = fdp->ioctl_cb(fdp, ctx);
|
||||
}
|
||||
else{
|
||||
@@ -7334,8 +7347,8 @@ SYSCALL_DECLARE(mremap)
|
||||
size = (oldsize < newsize)? oldsize: newsize;
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
error = move_pte_range(vm->address_space->page_table, vm,
|
||||
(void *)oldstart, (void *)newstart,
|
||||
size);
|
||||
(void *)oldstart, (void *)newstart,
|
||||
size, range);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
|
||||
@@ -1098,6 +1098,8 @@ static int xpmem_attach(
|
||||
XPMEM_DEBUG("do_mmap(): vaddr=0x%lx, size=0x%lx, prot_flags=0x%lx, "
|
||||
"flags=0x%lx, fd=%d, offset=0x%lx",
|
||||
vaddr, size, prot_flags, flags, mckfd->fd, offset);
|
||||
/* The new range uses on-demand paging and is associated with shmobj because of
|
||||
MAP_ANONYMOUS && !MAP_PRIVATE && MAP_SHARED */
|
||||
at_vaddr = do_mmap(vaddr, size, prot_flags, flags, mckfd->fd, offset);
|
||||
if (IS_ERR((void *)(uintptr_t)at_vaddr)) {
|
||||
ret = at_vaddr;
|
||||
@@ -1110,13 +1112,21 @@ static int xpmem_attach(
|
||||
|
||||
vmr = lookup_process_memory_range(vm, at_vaddr, at_vaddr + 1);
|
||||
|
||||
/* To identify pages of XPMEM attachment for rusage accounting */
|
||||
if(vmr->memobj) {
|
||||
vmr->memobj->flags |= MF_XPMEM;
|
||||
} else {
|
||||
ekprintf("%s: vmr->memobj equals to NULL\n", __FUNCTION__);
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
if (!vmr) {
|
||||
ret = -ENOENT;
|
||||
goto out_2;
|
||||
}
|
||||
vmr->private_data = att;
|
||||
vmr->private_data = att;
|
||||
|
||||
|
||||
att->at_vmr = vmr;
|
||||
|
||||
@@ -1199,6 +1209,8 @@ static int xpmem_detach(
|
||||
xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size);
|
||||
|
||||
range->private_data = NULL;
|
||||
/* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() -->
|
||||
xpmem_free_process_memory_range() */
|
||||
|
||||
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
||||
|
||||
@@ -1416,6 +1428,8 @@ static void xpmem_detach_att(
|
||||
xpmem_unpin_pages(ap->seg, vm, att->at_vaddr, att->at_size);
|
||||
|
||||
range->private_data = NULL;
|
||||
/* range->memobj is released in xpmem_vm_munmap() --> xpmem_remove_process_range() -->
|
||||
xpmem_free_process_memory_range() */
|
||||
|
||||
att->flags &= ~XPMEM_FLAG_VALIDPTEs;
|
||||
|
||||
@@ -1677,7 +1691,8 @@ int xpmem_remove_process_memory_range(
|
||||
}
|
||||
|
||||
remaining_vmr->private_data = NULL;
|
||||
|
||||
/* This function is always followed by xpmem_free_process_memory_range()
|
||||
which in turn calls memobj_release() */
|
||||
remaining_vaddr = att->at_vaddr;
|
||||
}
|
||||
|
||||
@@ -1699,6 +1714,8 @@ int xpmem_remove_process_memory_range(
|
||||
att->at_size = remaining_vmr->end - remaining_vmr->start;
|
||||
|
||||
vmr->private_data = NULL;
|
||||
/* This function is always followed by [xpmem_]free_process_memory_range()
|
||||
which in turn calls memobj_release() */
|
||||
|
||||
out:
|
||||
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
||||
@@ -1910,17 +1927,19 @@ static int xpmem_remap_pte(
|
||||
__FUNCTION__, ret);
|
||||
goto out;
|
||||
}
|
||||
// memory_stat_rss_add() is called by the process hosting the memory area
|
||||
}
|
||||
else {
|
||||
ret = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
|
||||
att_pgaddr, att_pgaddr + att_pgsize, seg_phys, att_attr,
|
||||
vmr->pgshift);
|
||||
vmr->pgshift, vmr);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
ekprintf("%s: ERROR: ihk_mc_pt_set_range() failed %d\n",
|
||||
__FUNCTION__, ret);
|
||||
goto out;
|
||||
}
|
||||
// memory_stat_rss_add() is called by the process hosting the memory area
|
||||
}
|
||||
|
||||
out:
|
||||
|
||||
@@ -102,6 +102,7 @@ static int alloc_zeroobj(void)
|
||||
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
obj->memobj.ops = &zeroobj_ops;
|
||||
obj->memobj.flags = MF_ZEROOBJ;
|
||||
obj->memobj.size = 0;
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
@@ -127,6 +128,7 @@ static int alloc_zeroobj(void)
|
||||
page->mode = PM_MAPPED;
|
||||
page->offset = 0;
|
||||
ihk_atomic_set(&page->count, 1);
|
||||
ihk_atomic64_set(&page->mapped, 0);
|
||||
page_list_insert(obj, page);
|
||||
virt = NULL;
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <arch-lock.h>
|
||||
#include <ihk/memconst.h>
|
||||
#include <ihk/kmsg.h>
|
||||
|
||||
struct ihk_kmsg_buf {
|
||||
int tail;
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
struct memobj;
|
||||
struct process_vm;
|
||||
struct vm_range;
|
||||
|
||||
enum ihk_mc_gma_type {
|
||||
IHK_MC_GMA_MAP_START,
|
||||
@@ -178,7 +179,7 @@ int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end,
|
||||
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift, void **pgbasep, size_t *pgsizep, int *p2alignp);
|
||||
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
|
||||
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
|
||||
int pgshift);
|
||||
int pgshift, struct vm_range *range);
|
||||
int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize, uintptr_t phys, enum ihk_mc_pt_attribute attr);
|
||||
int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size,
|
||||
enum ihk_mc_pt_prepare_flag);
|
||||
@@ -189,7 +190,7 @@ typedef int pte_visitor_t(void *arg, page_table_t pt, pte_t *ptep,
|
||||
int visit_pte_range(page_table_t pt, void *start, void *end, int pgshift,
|
||||
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg);
|
||||
int move_pte_range(page_table_t pt, struct process_vm *vm,
|
||||
void *src, void *dest, size_t size);
|
||||
void *src, void *dest, size_t size, struct vm_range *range);
|
||||
|
||||
struct page_table *ihk_mc_pt_create(ihk_mc_ap_flag ap_flag);
|
||||
/* XXX: proper use of struct page_table and page_table_t is unknown */
|
||||
|
||||
@@ -1,45 +1,8 @@
|
||||
#ifndef __IHK_RUSAGE_H
|
||||
#define __IHK_RUSAGE_H
|
||||
|
||||
struct ihk_os_cpu_monitor {
|
||||
int status;
|
||||
#define IHK_OS_MONITOR_NOT_BOOT 0
|
||||
#define IHK_OS_MONITOR_IDLE 1
|
||||
#define IHK_OS_MONITOR_USER 2
|
||||
#define IHK_OS_MONITOR_KERNEL 3
|
||||
#define IHK_OS_MONITOR_KERNEL_HEAVY 4
|
||||
#define IHK_OS_MONITOR_KERNEL_OFFLOAD 5
|
||||
#define IHK_OS_MONITOR_KERNEL_FREEZING 8
|
||||
#define IHK_OS_MONITOR_KERNEL_FROZEN 9
|
||||
#define IHK_OS_MONITOR_KERNEL_THAW 10
|
||||
#define IHK_OS_MONITOR_PANIC 99
|
||||
int status_bak;
|
||||
unsigned long counter;
|
||||
unsigned long ocounter;
|
||||
unsigned long user_tsc;
|
||||
unsigned long system_tsc;
|
||||
};
|
||||
|
||||
struct ihk_os_monitor {
|
||||
unsigned long rusage_max_num_threads;
|
||||
unsigned long rusage_num_threads;
|
||||
unsigned long rusage_rss_max;
|
||||
long rusage_rss_current;
|
||||
unsigned long rusage_kmem_usage;
|
||||
unsigned long rusage_kmem_max_usage;
|
||||
unsigned long rusage_hugetlb_usage;
|
||||
unsigned long rusage_hugetlb_max_usage;
|
||||
unsigned long rusage_total_memory;
|
||||
unsigned long rusage_total_memory_usage;
|
||||
unsigned long rusage_total_memory_max_usage;
|
||||
unsigned long num_numa_nodes;
|
||||
unsigned long num_processors;
|
||||
unsigned long ns_per_tsc;
|
||||
unsigned long reserve[128];
|
||||
unsigned long rusage_numa_stat[1024];
|
||||
|
||||
struct ihk_os_cpu_monitor cpu[0];
|
||||
};
|
||||
#include <arch/rusage.h>
|
||||
#include <ihk/monitor.h>
|
||||
|
||||
enum RUSAGE_MEMBER {
|
||||
RUSAGE_RSS,
|
||||
|
||||
Reference in New Issue
Block a user