ihk_os_getrusage(): Add per-page-size memory usage accounting

This commit is contained in:
Masamichi Takagi
2017-08-04 12:00:31 +09:00
parent 184c2d311c
commit 74f15783d2
22 changed files with 572 additions and 130 deletions

View File

@@ -440,6 +440,31 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp,
goto err_free_proc;
}
/* Copy mckfd list
FIXME: Replace list manipulation with list_add() etc. */
long irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
struct mckfd *cur;
for (cur = org->proc->mckfd; cur; cur = cur->next) {
struct mckfd *mckfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT);
if(!mckfd) {
release_address_space(asp);
kfree(proc->vm);
kfree(proc);
goto err_free_proc;
}
memcpy(mckfd, cur, sizeof(struct mckfd));
if (proc->mckfd == NULL) {
proc->mckfd = mckfd;
mckfd->next = NULL;
}
else {
mckfd->next = proc->mckfd;
proc->mckfd = mckfd;
}
}
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
thread->vm->vdso_addr = org->vm->vdso_addr;
thread->vm->vvar_addr = org->vm->vvar_addr;
thread->proc->maxrss = org->proc->maxrss;
@@ -532,6 +557,7 @@ ptrace_traceme(void)
struct copy_args {
struct process_vm *new_vm;
unsigned long new_vrflag;
struct vm_range *range;
/* out */
intptr_t fault_addr;
@@ -591,12 +617,13 @@ static int copy_user_pte(void *arg0, page_table_t src_pt, pte_t *src_ptep, void
}
error = ihk_mc_pt_set_range(args->new_vm->address_space->page_table,
args->new_vm, pgaddr, pgaddr+pgsize, phys, attr,
pgshift);
args->new_vm, pgaddr, pgaddr + pgsize, phys, attr,
pgshift, args->range);
if (error) {
args->fault_addr = (intptr_t)pgaddr;
goto out;
}
// fork/clone case: memory_stat_rss_add() is called in ihk_mc_pt_set_range()
dkprintf("copy_user_pte(): new PTE set\n");
error = 0;
@@ -655,7 +682,8 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
args.new_vrflag = range->flag;
args.new_vm = vm;
args.fault_addr = -1;
args.range = range;
error = visit_pte_range(orgvm->address_space->page_table,
(void *)range->start, (void *)range->end,
range->pgshift, VPTEF_SKIP_NULL,
@@ -670,6 +698,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
}
goto err_free_range_rollback;
}
// memory_stat_rss_add() is called in child-node, i.e. copy_user_pte()
insert_vm_range_list(vm, range);
}
@@ -702,13 +731,13 @@ int update_process_page_table(struct process_vm *vm,
attr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
flags = ihk_mc_spinlock_lock(&vm->page_table_lock);
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
(void *)range->start, (void *)range->end, phys, attr,
range->pgshift);
(void *)range->start, (void *)range->end, phys, attr,
range->pgshift, range);
if (error) {
kprintf("update_process_page_table:ihk_mc_pt_set_range failed. %d\n", error);
goto out;
}
// memory_stat_rss_add() is called in ihk_mc_pt_set_range()
error = 0;
out:
ihk_mc_spinlock_unlock(&vm->page_table_lock, flags);
@@ -730,6 +759,7 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
"ihk_mc_pt_split failed. %d\n", error);
goto out;
}
// memory_stat_rss_add() is called in child-node, i.e. ihk_mc_pt_split() to deal with L3->L2 case
newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
if (!newrange) {
@@ -872,6 +902,8 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
}
}
dkprintf("%s: vm=%p,range=%p,%lx-%lx\n", __FUNCTION__, vm, range, range->start, range->end);
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
if (range->memobj) {
memobj_lock(range->memobj);
@@ -889,8 +921,11 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
vm, start0, end0, start, end, range->memobj, error);
/* through */
}
// memory_stat_rss_sub() is called downstream, i.e. ihk_mc_pt_free_range() to deal with empty PTE
}
else {
// memory_stat_rss_sub() isn't called because free_physical is set to zero in clear_range()
dkprintf("%s,memory_stat_rss_sub() isn't called, VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED case, %lx-%lx\n", __FUNCTION__, start, end);
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
error = ihk_mc_pt_clear_range(vm->address_space->page_table, vm,
(void *)start, (void *)end);
@@ -1100,6 +1135,7 @@ int add_process_memory_range(struct process_vm *vm,
struct memobj *memobj, off_t offset,
int pgshift, struct vm_range **rp)
{
dkprintf("%s: start=%lx,end=%lx,phys=%lx,flag=%lx\n", __FUNCTION__, start, end, phys, flag);
struct vm_range *range;
int rc;
@@ -1147,6 +1183,7 @@ int add_process_memory_range(struct process_vm *vm,
}
else {
rc = update_process_page_table(vm, range, phys, 0);
// memory_stat_rss_add() is called in ihk_mc_pt_set_range()
}
if (rc != 0) {
@@ -1394,6 +1431,8 @@ static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep,
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages_user(phys_to_virt(phys), pgsize/PAGE_SIZE);
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys, __FUNCTION__, pgsize, pgsize);
rusage_memory_stat_sub(args->memobj, pgsize, pgsize);
}
error = 0;
@@ -1565,6 +1604,7 @@ static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep,
arg0, pt, ptep, *ptep, pgaddr, pgshift, error);
goto out;
}
// memory_stat_rss_sub() is called in downstream, i.e. shmobj_invalidate_page()
error = 0;
out:
@@ -1596,6 +1636,8 @@ int invalidate_process_memory_range(struct process_vm *vm,
vm, range, start, end, error);
goto out;
}
// memory_stat_rss_sub() is called downstream, i.e. invalidate_one_page() to deal with empty PTEs
out:
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
vm, range, start, end, error);
@@ -1636,6 +1678,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
goto out;
}
/*****/
dkprintf("%s: pgaddr=%lx,range->start=%lx,range->end=%lx,pgaddr+pgsize=%lx\n", __FUNCTION__, pgaddr, range->start, range->end, pgaddr + pgsize);
while (((uintptr_t)pgaddr < range->start)
|| (range->end < ((uintptr_t)pgaddr + pgsize))) {
ptep = NULL;
@@ -1647,6 +1690,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
}
/*****/
dkprintf("%s: ptep=%lx,pte_is_null=%d,pte_is_fileoff=%d\n", __FUNCTION__, ptep, ptep ? pte_is_null(ptep) : -1, ptep ? pte_is_fileoff(ptep, pgsize) : -1);
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
phys = NOPHYS;
if (range->memobj) {
@@ -1659,7 +1703,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
off = pte_get_off(ptep, pgsize);
}
error = memobj_get_page(range->memobj, off, p2align,
&phys, &memobj_flag);
&phys, &memobj_flag);
if (error) {
struct memobj *obj;
@@ -1671,6 +1715,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
goto out;
}
}
// memory_stat_rss_add() is called downstream, i.e. memobj_get_page() to check page->count
}
if (phys == NOPHYS) {
void *virt = NULL;
@@ -1704,6 +1749,10 @@ retry:
memset(virt, 0, pgsize);
phys = virt_to_phys(virt);
if (phys_to_page(phys)) {
dkprintf("%s: NOPHYS,phys=%lx,vmr(%lx-%lx),flag=%x,fa=%lx,reason=%x\n",
__FUNCTION__, page_to_phys(page),
range->start, range->end, range->flag, fault_addr, reason);
page_map(phys_to_page(phys));
}
}
@@ -1730,6 +1779,10 @@ retry:
void *virt;
size_t npages;
if (!page) {
kprintf("%s: WARNING: cow on non-struct-page-managed page\n", __FUNCTION__);
}
npages = pgsize / PAGE_SIZE;
virt = ihk_mc_alloc_aligned_pages_user(npages, p2align,
IHK_MC_AP_NOWAIT);
@@ -1738,34 +1791,54 @@ retry:
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out;
}
dkprintf("%s: copying 0x%lx:%lu\n",
__FUNCTION__, pgaddr, pgsize);
dkprintf("%s: cow,copying virt:%lx<-%lx,phys:%lx<-%lx,pgsize=%lu\n",
__FUNCTION__, virt, phys_to_virt(phys), virt_to_phys(virt), phys, pgsize);
memcpy(virt, phys_to_virt(phys), pgsize);
phys = virt_to_phys(virt);
if (page) {
page_unmap(page);
/* Call rusage_memory_stat_add() because remote page fault may create a page not pointed-to by PTE */
if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) {
dkprintf("%lx+,%s: remote page fault + cow, calling memory_stat_rss_add(),pgsize=%ld\n",
phys, __FUNCTION__, pgsize);
}
if (page) {
if (page_unmap(page)) {
dkprintf("%lx-,%s: cow,calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", phys, __FUNCTION__, pgsize, pgsize);
rusage_memory_stat_sub(range->memobj, pgsize, pgsize);
}
}
phys = virt_to_phys(virt);
page = phys_to_page(phys);
}
}
/*****/
if (ptep) {
if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) {
/* on-demand paging, phys pages are obtained by ihk_mc_alloc_aligned_pages_user() or get_page() */
dkprintf("%lx+,%s: (on-demand paging && first map) || cow,calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n",
phys, __FUNCTION__, phys, pgsize);
} else {
dkprintf("%s: !calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n",
__FUNCTION__, phys, pgsize);
}
dkprintf("%s: attr=%x\n", __FUNCTION__, attr);
error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep,
pgsize, phys, attr);
if (error) {
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_pte failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out;
}
dkprintf("%s: non-NULL pte,page=%lx,page_is_in_memobj=%d,page->count=%d\n", __FUNCTION__, page, page ? page_is_in_memobj(page) : 0, page ? ihk_atomic_read(&page->count) : 0);
}
else {
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
pgaddr, pgaddr + pgsize, phys,
attr, range->pgshift);
attr, range->pgshift, range);
if (error) {
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_range failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out;
}
// memory_stat_rss_add() is called in downstream with !memobj check
}
flush_tlb_single(fault_addr);
vm->currss += pgsize;
@@ -1778,7 +1851,14 @@ retry:
out:
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (page) {
page_unmap(page);
/* Unmap stray struct page */
dkprintf("%s: out,phys=%lx,vmr(%lx-%lx),flag=%x,fa=%lx,reason=%x\n",
__FUNCTION__, page_to_phys(page),
range->start, range->end, range->flag, fault_addr, reason);
if (page_unmap(page)) {
dkprintf("%lx-,%s: out,calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", page_to_phys(page), __FUNCTION__, pgsize, pgsize);
rusage_memory_stat_sub(range->memobj, pgsize, pgsize);
}
}
dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx): %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
return error;
@@ -1947,6 +2027,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
unsigned long at_rand;
struct process *proc = thread->proc;
unsigned long ap_flag;
struct vm_range *range;
/* Create stack range */
end = STACK_TOP(&thread->vm->region) & LARGE_PAGE_MASK;
@@ -1991,17 +2072,18 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC;
#define NOPHYS ((uintptr_t)-1)
if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS,
vrflag, NULL, 0, LARGE_PAGE_SHIFT, NULL)) != 0) {
vrflag, NULL, 0, LARGE_PAGE_SHIFT, &range)) != 0) {
ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT);
return rc;
}
/* Map physical pages for initial stack frame */
error = ihk_mc_pt_set_range(thread->vm->address_space->page_table,
thread->vm, (void *)(end - minsz),
(void *)end, virt_to_phys(stack),
arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL),
LARGE_PAGE_SHIFT);
thread->vm, (void *)(end - minsz),
(void *)end, virt_to_phys(stack),
arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL),
LARGE_PAGE_SHIFT, range
);
if (error) {
kprintf("init_process_stack:"
@@ -2011,6 +2093,8 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
return error;
}
// memory_stat_rss_add() is called in ihk_mc_pt_set_range();
/* set up initial stack frame */
p = (unsigned long *)(stack + minsz);
s_ind = -1;
@@ -2112,8 +2196,9 @@ unsigned long extend_process_region(struct process_vm *vm,
ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT);
return end_allocated;
}
// memory_stat_rss_add() is called in add_process_memory_range()
dkprintf("%s: new_end_allocated: 0x%lu, align_size: %lu, align_mask: %lx\n",
dkprintf("%s: new_end_allocated: 0x%lx, align_size: %lu, align_mask: %lx\n",
__FUNCTION__, new_end_allocated, align_size, align_mask);
return new_end_allocated;
@@ -2134,6 +2219,9 @@ int remove_process_region(struct process_vm *vm,
(void *)start, (void *)end);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
// memory_stat_rss_sub() isn't called because this execution path is no loger reached
dkprintf("%s: memory_stat_rss_sub() isn't called,start=%lx,end=%lx\n", __FUNCTION__, start, end);
return 0;
}