From d7b8e7f4f46bb89c6a661e599fe81e813110898e Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Fri, 14 Jul 2017 09:51:39 +0900 Subject: [PATCH] fix to count user pages refs #864 --- arch/x86/kernel/memory.c | 8 +- executer/kernel/mcctrl/ikc.c | 10 +- executer/kernel/mcctrl/mcctrl.h | 2 +- kernel/Makefile.build.in | 2 +- kernel/ap.c | 2 - kernel/cls.c | 4 +- kernel/fileobj.c | 14 +-- kernel/freeze.c | 5 +- kernel/host.c | 9 +- kernel/include/cls.h | 22 +--- kernel/include/rusage.h | 195 +++++++++++++++++--------------- kernel/include/syscall.h | 2 +- kernel/init.c | 37 ++---- kernel/mem.c | 68 ++++++----- kernel/process.c | 23 ++-- kernel/shmobj.c | 9 +- kernel/syscall.c | 30 +++-- lib/abort.c | 2 +- lib/include/ihk/mm.h | 37 ++++-- lib/include/ihk/rusage.h | 140 +++++++++++++---------- 20 files changed, 322 insertions(+), 299 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 7eb6b70d..3213a454 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -1107,7 +1107,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, if (!(old & PFL1_FILEOFF) && args->free_physical) { if (!page || (page && page_unmap(page))) { - ihk_mc_free_pages(phys_to_virt(phys), 1); + ihk_mc_free_pages_user(phys_to_virt(phys), 1); dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base); } args->vm->currss -= PTL1_SIZE; @@ -1156,7 +1156,8 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, if (!(old & PFL2_FILEOFF) && args->free_physical) { if (!page || (page && page_unmap(page))) { - ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE); + ihk_mc_free_pages_user(phys_to_virt(phys), + PTL2_SIZE/PTL1_SIZE); dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base); } args->vm->currss -= PTL2_SIZE; @@ -1221,7 +1222,8 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, if (!(old & PFL3_FILEOFF) && args->free_physical) { if (!page || (page && page_unmap(page))) { - ihk_mc_free_pages(phys_to_virt(phys), PTL3_SIZE/PTL1_SIZE); + ihk_mc_free_pages_user(phys_to_virt(phys), + PTL3_SIZE/PTL1_SIZE); } args->vm->currss -= PTL3_SIZE; } diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index fa0612a9..689d51da 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -56,7 +56,7 @@ void sig_done(unsigned long arg, int err); void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet); void mcctrl_os_read_write_cpu_response(ihk_os_t os, struct ikc_scd_packet *pisp); -void mcctrl_event_signal(ihk_os_t os, struct ikc_scd_packet *pisp); +void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp); /* XXX: this runs in atomic context! */ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, @@ -121,8 +121,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, mcctrl_os_read_write_cpu_response(__os, pisp); break; - case SCD_MSG_EVENT_SIGNAL: - mcctrl_event_signal(__os, pisp); + case SCD_MSG_EVENTFD: + mcctrl_eventfd(__os, pisp); break; default: @@ -401,7 +401,7 @@ void destroy_ikc_channels(ihk_os_t os) } void -mcctrl_event_signal(ihk_os_t os, struct ikc_scd_packet *pisp) +mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp) { - ihk_os_event_signal(os, 0); + ihk_os_eventfd(os, 0); } diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 76626e6d..5166f447 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -92,7 +92,7 @@ #define SCD_MSG_PROCFS_TID_CREATE 0x44 #define SCD_MSG_PROCFS_TID_DELETE 0x45 -#define SCD_MSG_EVENT_SIGNAL 0x46 +#define SCD_MSG_EVENTFD 0x46 #define SCD_MSG_PERF_CTRL 0x50 #define SCD_MSG_PERF_ACK 0x51 diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index c5b18b3f..14e9ede4 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -3,7 +3,7 @@ SRC=$(VPATH) IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o -OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o rusage.o profile.o freeze.o +OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += rbtree.o DEPSRCS=$(wildcard $(SRC)/*.c) diff --git a/kernel/ap.c b/kernel/ap.c index 278827ff..3601e53d 100644 --- a/kernel/ap.c +++ b/kernel/ap.c @@ -44,8 +44,6 @@ static volatile int ap_stop = 1; mcs_lock_node_t ap_syscall_semaphore; -extern struct ihk_os_monitor *monitor; - static void ap_wait(void) { init_tick(); diff --git a/kernel/cls.c b/kernel/cls.c index a1088950..37128752 100644 --- a/kernel/cls.c +++ b/kernel/cls.c @@ -19,12 +19,12 @@ #include #include #include +#include extern int num_processors; struct cpu_local_var *clv; int cpu_local_var_initialized = 0; -extern struct ihk_os_monitor *monitor; void cpu_local_var_init(void) { @@ -38,7 +38,7 @@ void cpu_local_var_init(void) memset(clv, 0, z * PAGE_SIZE); for (i = 0; i < num_processors; i++) { - clv[i].monitor = monitor + i; + clv[i].monitor = monitor->cpu + i; INIT_LIST_HEAD(&clv[i].smp_func_req_list); } diff --git a/kernel/fileobj.c b/kernel/fileobj.c index d62d1bdf..490c0e17 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -255,7 +255,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp) if (cpu_local_var(current)->proc->mpol_flags & MPOL_SHM_PREMAP) { /* Get the actual pages NUMA interleaved */ for (j = 0; j < nr_pages; ++j) { - mo->pages[j] = ihk_mc_alloc_aligned_pages_node(1, + mo->pages[j] = ihk_mc_alloc_aligned_pages_node_user(1, PAGE_P2ALIGN, IHK_MC_AP_NOWAIT, node); if (!mo->pages[j]) { kprintf("%s: ERROR: allocating pages[%d]\n", @@ -373,7 +373,7 @@ static void fileobj_release(struct memobj *memobj) to_memobj(free_obj)->flags); } else if (page_unmap(page)) { - ihk_mc_free_pages(page_va, 1); + ihk_mc_free_pages_user(page_va, 1); } #if 0 count = ihk_atomic_sub_return(1, &page->count); @@ -401,7 +401,7 @@ static void fileobj_release(struct memobj *memobj) for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) { if (to_memobj(free_obj)->pages[i]) - ihk_mc_free_pages(to_memobj(free_obj)->pages[i], 1); + ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1); } kfree(to_memobj(free_obj)->pages); @@ -557,7 +557,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, int page_ind = off >> PAGE_SHIFT; if (!memobj->pages[page_ind]) { - virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT | IHK_MC_AP_USER); + virt = ihk_mc_alloc_pages_user(1, IHK_MC_AP_NOWAIT | IHK_MC_AP_USER); if (!virt) { error = -ENOMEM; @@ -572,7 +572,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, * that if so */ if (!__sync_bool_compare_and_swap(&memobj->pages[page_ind], NULL, virt)) { - ihk_mc_free_pages(virt, 1); + ihk_mc_free_pages_user(virt, 1); } else { dkprintf("%s: MF_ZEROFILL: off: %lu -> 0x%lx allocated\n", @@ -606,7 +606,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, if (!page) { npages = 1 << p2align; - virt = ihk_mc_alloc_pages(npages, IHK_MC_AP_NOWAIT | + virt = ihk_mc_alloc_pages_user(npages, IHK_MC_AP_NOWAIT | (to_memobj(obj)->flags & MF_ZEROFILL) ? IHK_MC_AP_USER : 0); if (!virt) { @@ -666,7 +666,7 @@ out: &mcs_node); out_nolock: if (virt) { - ihk_mc_free_pages(virt, npages); + ihk_mc_free_pages_user(virt, npages); } if (args) { kfree(args); diff --git a/kernel/freeze.c b/kernel/freeze.c index ccd1d4cb..40774215 100644 --- a/kernel/freeze.c +++ b/kernel/freeze.c @@ -3,6 +3,7 @@ #include #include #include +#include extern int nmi_mode; extern void mod_nmi_ctx(void *, void(*)()); @@ -12,7 +13,7 @@ extern void __freeze(); void freeze() { - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); monitor->status_bak = monitor->status; monitor->status = IHK_OS_MONITOR_KERNEL_FROZEN; @@ -24,7 +25,7 @@ freeze() long freeze_thaw(void *nmi_ctx) { - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); if (nmi_mode == 1) { if (monitor->status != IHK_OS_MONITOR_KERNEL_FROZEN) { diff --git a/kernel/host.c b/kernel/host.c index e1bfd88c..2994356e 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -145,7 +145,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, goto err; } - if ((up_v = ihk_mc_alloc_pages(range_npages, + if ((up_v = ihk_mc_alloc_pages_user(range_npages, IHK_MC_AP_NOWAIT | ap_flags)) == NULL) { kprintf("ERROR: alloc pages for ELF section %i\n", i); goto err; @@ -163,7 +163,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, if (error) { kprintf("%s: ihk_mc_pt_set_range failed. %d\n", __FUNCTION__, error); - ihk_mc_free_pages(up_v, range_npages); + ihk_mc_free_pages_user(up_v, range_npages); goto err; } @@ -253,7 +253,8 @@ int prepare_process_ranges_args_envs(struct thread *thread, addr = vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT; e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT; - if((args_envs = ihk_mc_alloc_pages(ARGENV_PAGE_COUNT, IHK_MC_AP_NOWAIT)) == NULL){ + if((args_envs = ihk_mc_alloc_pages_user(ARGENV_PAGE_COUNT, + IHK_MC_AP_NOWAIT)) == NULL){ kprintf("ERROR: allocating pages for args/envs\n"); goto err; } @@ -261,7 +262,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, if(add_process_memory_range(vm, addr, e, args_envs_p, flags, NULL, 0, PAGE_SHIFT, NULL) != 0){ - ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT); + ihk_mc_free_pages_user(args_envs, ARGENV_PAGE_COUNT); kprintf("ERROR: adding memory range for args/envs\n"); goto err; } diff --git a/kernel/include/cls.h b/kernel/include/cls.h index f754c170..3bf6546d 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -28,26 +28,6 @@ struct kmalloc_header { /* 32 bytes */ }; -#define IHK_OS_MONITOR_NOT_BOOT 0 -#define IHK_OS_MONITOR_IDLE 1 -#define IHK_OS_MONITOR_USER 2 -#define IHK_OS_MONITOR_KERNEL 3 -#define IHK_OS_MONITOR_KERNEL_HEAVY 4 -#define IHK_OS_MONITOR_KERNEL_OFFLOAD 5 -#define IHK_OS_MONITOR_KERNEL_FREEZING 8 -#define IHK_OS_MONITOR_KERNEL_FROZEN 9 -#define IHK_OS_MONITOR_KERNEL_THAW 10 -#define IHK_OS_MONITOR_PANIC 99 - -struct ihk_os_monitor { - int status; - int status_bak; - unsigned long counter; - unsigned long ocounter; - unsigned long user_tsc; - unsigned long system_tsc; -}; - #include #define CPU_STATUS_DISABLE (0) #define CPU_STATUS_IDLE (1) @@ -112,7 +92,7 @@ struct cpu_local_var { int no_preempt; int timer_enabled; int kmalloc_initialized; - struct ihk_os_monitor *monitor; + struct ihk_os_cpu_monitor *monitor; ihk_spinlock_t smp_func_req_lock; struct list_head smp_func_req_list; diff --git a/kernel/include/rusage.h b/kernel/include/rusage.h index 2a8e2147..e31ad1f6 100644 --- a/kernel/include/rusage.h +++ b/kernel/include/rusage.h @@ -2,106 +2,32 @@ #define __RUSAGE_H #include - -#define RUSAGE_DEFAULT_SIZE 10 - -enum RUSAGE_MEMBER { - RUSAGE_RSS, - RUSAGE_CACHE, - RUSAGE_RSS_HUGE, - RUSAGE_MAPPED_FILE, - RUSAGE_MAX_USAGE, - RUSAGE_KMEM_USAGE, - RUSAGE_KMAX_USAGE, - RUSAGE_NUM_NUMA_NODES, - RUSAGE_NUMA_STAT, - RUSAGE_HUGETLB , - RUSAGE_HUGETLB_MAX , - RUSAGE_STAT_SYSTEM , - RUSAGE_STAT_USER , - RUSAGE_USAGE , - RUSAGE_USAGE_PER_CPU , - RUSAGE_NUM_THREADS , - RUSAGE_MAX_NUM_THREADS -}; - -struct r_data{ - unsigned long pid; - unsigned long rss; - unsigned long cache; - unsigned long rss_huge; - unsigned long mapped_file; - unsigned long max_usage; - unsigned long kmem_usage; - unsigned long kmax_usage; - unsigned long hugetlb; - unsigned long hugetlb_max; - unsigned long stat_system; - unsigned long stat_user; - unsigned long usage; - struct r_data *next; -} ; -typedef struct r_data rusage_data; - -rusage_data *rdata[RUSAGE_DEFAULT_SIZE]; -unsigned long rusage_max_num_threads; -unsigned long rusage_num_threads; - -enum ihk_os_status { - IHK_STATUS_INACTIVE, - IHK_STATUS_BOOTING, - IHK_STATUS_RUNNING, - IHK_STATUS_SHUTDOWN, - IHK_STATUS_PANIC, - IHK_STATUS_HUNGUP, - IHK_STATUS_FREEZING, - IHK_STATUS_FROZEN, -}; - -enum ihk_os_status os_status; -unsigned long sys_delegate_count; -enum sys_delegate_state_enum { - ENTER_KERNEL, - EXIT_KERNEL, -}; -enum sys_delegate_state_enum sys_delegate_state; - -unsigned long rusage_rss_max; -long rusage_rss_current; -unsigned long rusage_kmem_usage; -unsigned long rusage_kmem_max_usage; -unsigned long rusage_hugetlb_usage; -unsigned long rusage_hugetlb_max_usage; -unsigned long rusage_numa_stat[1024]; -unsigned long rusage_max_memory; - -#define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB - -void rusage_init(); +#include #ifdef ENABLE_RUSAGE -extern void event_signal(); +#define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB + +extern void eventfd(); static inline void -rusage_max_memory_add(unsigned long size) +rusage_total_memory_add(unsigned long size) { - rusage_max_memory += size; + monitor->rusage_total_memory += size; } static inline void rusage_rss_add(unsigned long size) { - unsigned long newval = __sync_add_and_fetch(&rusage_rss_current, size); - unsigned long oldval = rusage_rss_max; + unsigned long newval; + unsigned long oldval; unsigned long retval; + newval = __sync_add_and_fetch(&monitor->rusage_rss_current, size); + oldval = monitor->rusage_rss_max; while (newval > oldval) { - retval = __sync_val_compare_and_swap(&rusage_rss_max, oldval, - newval); + retval = __sync_val_compare_and_swap(&monitor->rusage_rss_max, + oldval, newval); if (retval == oldval) { - if (rusage_max_memory - newval < RUSAGE_MEM_LIMIT) { - event_signal(); - } break; } oldval = retval; @@ -111,13 +37,39 @@ rusage_rss_add(unsigned long size) static inline void rusage_rss_sub(unsigned long size) { - __sync_sub_and_fetch(&rusage_rss_current, size); + __sync_sub_and_fetch(&monitor->rusage_rss_current, size); +} + +static inline void +rusage_kmem_add(unsigned long size) +{ + unsigned long newval; + unsigned long oldval; + unsigned long retval; + + newval = __sync_add_and_fetch(&monitor->rusage_kmem_usage, size); + oldval = monitor->rusage_kmem_max_usage; + while (newval > oldval) { + retval = __sync_val_compare_and_swap( + &monitor->rusage_kmem_max_usage, + oldval, newval); + if (retval == oldval) { + break; + } + oldval = retval; + } +} + +static inline void +rusage_kmem_sub(unsigned long size) +{ + __sync_sub_and_fetch(&monitor->rusage_kmem_usage, size); } static inline void rusage_numa_add(int numa_id, unsigned long size) { - __sync_add_and_fetch(rusage_numa_stat + numa_id, size); + __sync_add_and_fetch(monitor->rusage_numa_stat + numa_id, size); rusage_rss_add(size); } @@ -125,18 +77,63 @@ static inline void rusage_numa_sub(int numa_id, unsigned long size) { rusage_rss_sub(size); - __sync_sub_and_fetch(rusage_numa_stat + numa_id, size); + __sync_sub_and_fetch(monitor->rusage_numa_stat + numa_id, size); +} + +static inline void +rusage_page_add(int numa_id, unsigned long pages, int is_user) +{ + unsigned long size = pages * PAGE_SIZE; + unsigned long newval; + unsigned long oldval; + unsigned long retval; + + if (is_user) + rusage_numa_add(numa_id, size); + else + rusage_kmem_add(size); + + newval = __sync_add_and_fetch(&monitor->rusage_total_memory_usage, size); + oldval = monitor->rusage_total_memory_max_usage; + while (newval > oldval) { + retval = __sync_val_compare_and_swap(&monitor->rusage_total_memory_max_usage, + oldval, newval); + if (retval == oldval) { + if (monitor->rusage_total_memory - newval < + RUSAGE_MEM_LIMIT) { + eventfd(); + } + break; + } + oldval = retval; + } +} + +static inline void +rusage_page_sub(int numa_id, unsigned long pages, int is_user) +{ + unsigned long size = pages * PAGE_SIZE; + + __sync_sub_and_fetch(&monitor->rusage_total_memory_usage, size); + + if (is_user) + rusage_numa_sub(numa_id, size); + else + rusage_kmem_sub(size); } static inline void rusage_num_threads_inc() { - unsigned long newval = __sync_add_and_fetch(&rusage_num_threads, 1); - unsigned long oldval = rusage_max_num_threads; + unsigned long newval; + unsigned long oldval; unsigned long retval; + newval = __sync_add_and_fetch(&monitor->rusage_num_threads, 1); + oldval = monitor->rusage_max_num_threads; while (newval > oldval) { - retval = __sync_val_compare_and_swap(&rusage_max_num_threads, + retval = __sync_val_compare_and_swap(&monitor-> + rusage_max_num_threads, oldval, newval); if (retval == oldval) { break; @@ -148,11 +145,11 @@ rusage_num_threads_inc() static inline void rusage_num_threads_dec() { - __sync_sub_and_fetch(&rusage_num_threads, 1); + __sync_sub_and_fetch(&monitor->rusage_num_threads, 1); } #else static inline void -rusage_max_memory_add(unsigned long size) +rusage_total_memory_add(unsigned long size) { } @@ -176,6 +173,16 @@ rusage_numa_sub(int numa_id, unsigned long size) { } +static inline void +rusage_page_add(int numa_id, unsigned long size, int is_user) +{ +} + +static inline void +rusage_page_sub(int numa_id, unsigned long size, int is_user) +{ +} + static inline void rusage_num_threads_inc() { diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 8e2e8172..5fb6d9cf 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -73,7 +73,7 @@ /* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */ #define SCD_MSG_PROCFS_TID_CREATE 0x44 #define SCD_MSG_PROCFS_TID_DELETE 0x45 -#define SCD_MSG_EVENT_SIGNAL 0x46 +#define SCD_MSG_EVENTFD 0x46 #define SCD_MSG_PERF_CTRL 0x50 #define SCD_MSG_PERF_ACK 0x51 diff --git a/kernel/init.c b/kernel/init.c index 23777770..cb97f956 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -51,9 +51,11 @@ int osnum = 0; extern struct ihk_kmsg_buf kmsg_buf; - +extern unsigned long ihk_mc_get_ns_per_tsc(void); extern long syscall(int, ihk_mc_user_context_t *); +struct ihk_os_monitor *monitor; + static void handler_init(void) { ihk_mc_set_syscall_handler(syscall); @@ -242,19 +244,22 @@ static void time_init(void) return; } -struct ihk_os_monitor *monitor; - static void monitor_init() { int z; unsigned long phys; - z = sizeof(struct ihk_os_monitor) * num_processors; + z = sizeof(struct ihk_os_monitor) + + sizeof(struct ihk_os_cpu_monitor) * num_processors; z = (z + PAGE_SIZE -1) >> PAGE_SHIFT; monitor = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL); memset(monitor, 0, z * PAGE_SIZE); + monitor->num_processors = num_processors; + monitor->num_numa_nodes = ihk_mc_get_nr_numa_nodes(); + monitor->ns_per_tsc = ihk_mc_get_ns_per_tsc(); phys = virt_to_phys(monitor); - ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) * num_processors); + ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) + + sizeof(struct ihk_os_cpu_monitor) * num_processors); } int nmi_mode; @@ -351,10 +356,6 @@ static void populate_sysfs(void) { cpu_sysfs_setup(); numa_sysfs_setup(); -#ifdef ENABLE_RUSAGE - rusage_sysfs_setup(); - status_sysfs_setup(); -#endif //setup_remote_snooping_samples(); } /* populate_sysfs() */ @@ -400,18 +401,6 @@ int main(void) char *ptr; int mode = 0; -#ifdef ENABLE_RUSAGE - int i; - os_status = IHK_STATUS_INACTIVE; - rusage_hugetlb_usage = 0; - rusage_hugetlb_max_usage = 0; - for (i = 0; i < 1024; i++) { - rusage_numa_stat[i] = 0; - } - rusage_rss_current = 0; - rusage_rss_max = 0; -#endif - ptr = find_command_line("ksyslogd="); if (ptr) { mode = ptr[9] - 0x30; @@ -420,9 +409,6 @@ int main(void) kmsg_init(mode); kputs("IHK/McKernel started.\n"); -#ifdef ENABLE_RUSAGE - os_status = IHK_STATUS_BOOTING; -#endif ihk_set_kmsg(virt_to_phys(&kmsg_buf), IHK_KMSG_SIZE); arch_init(); @@ -445,9 +431,6 @@ int main(void) futex_init(); kputs("IHK/McKernel booted.\n"); -#ifdef ENABLE_RUSAGE - os_status = IHK_STATUS_RUNNING; -#endif #ifdef DCFA_KMOD mc_cmd_client_init(); diff --git a/kernel/mem.c b/kernel/mem.c index ca447665..150de39f 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -74,9 +74,9 @@ static void *___kmalloc(int size, ihk_mc_ap_flag flag); static void ___kfree(void *ptr); static void *___ihk_mc_alloc_aligned_pages_node(int npages, - int p2align, ihk_mc_ap_flag flag, int node); -static void *___ihk_mc_alloc_pages(int npages, ihk_mc_ap_flag flag); -static void ___ihk_mc_free_pages(void *p, int npages); + int p2align, ihk_mc_ap_flag flag, int node, int is_user); +static void *___ihk_mc_alloc_pages(int npages, ihk_mc_ap_flag flag, int is_user); +static void ___ihk_mc_free_pages(void *p, int npages, int is_user); /* * Page allocator tracking routines @@ -157,14 +157,15 @@ struct pagealloc_track_entry *__pagealloc_track_find_entry( /* Top level routines called from macros */ void *_ihk_mc_alloc_aligned_pages_node(int npages, int p2align, - ihk_mc_ap_flag flag, int node, char *file, int line) + ihk_mc_ap_flag flag, int node, int is_user, + char *file, int line) { unsigned long irqflags; struct pagealloc_track_entry *entry; struct pagealloc_track_addr_entry *addr_entry; int hash, addr_hash; void *r = ___ihk_mc_alloc_aligned_pages_node(npages, - p2align, flag, node); + p2align, flag, node, is_user); if (!memdebug || !pagealloc_track_initialized) return r; @@ -236,7 +237,8 @@ out: return r; } -void _ihk_mc_free_pages(void *ptr, int npages, char *file, int line) +void _ihk_mc_free_pages(void *ptr, int npages, int is_user, + char *file, int line) { unsigned long irqflags; struct pagealloc_track_entry *entry; @@ -407,7 +409,7 @@ void _ihk_mc_free_pages(void *ptr, int npages, char *file, int line) ___kfree(entry); out: - ___ihk_mc_free_pages(ptr, npages); + ___ihk_mc_free_pages(ptr, npages, is_user); } void pagealloc_memcheck(void) @@ -459,23 +461,24 @@ void pagealloc_memcheck(void) /* Actual allocation routines */ static void *___ihk_mc_alloc_aligned_pages_node(int npages, int p2align, - ihk_mc_ap_flag flag, int node) + ihk_mc_ap_flag flag, int node, int is_user) { if (pa_ops) - return pa_ops->alloc_page(npages, p2align, flag, node); + return pa_ops->alloc_page(npages, p2align, flag, node, is_user); else return early_alloc_pages(npages); } -static void *___ihk_mc_alloc_pages(int npages, ihk_mc_ap_flag flag) +static void *___ihk_mc_alloc_pages(int npages, ihk_mc_ap_flag flag, + int is_user) { - return ___ihk_mc_alloc_aligned_pages_node(npages, PAGE_P2ALIGN, flag, -1); + return ___ihk_mc_alloc_aligned_pages_node(npages, PAGE_P2ALIGN, flag, -1, is_user); } -static void ___ihk_mc_free_pages(void *p, int npages) +static void ___ihk_mc_free_pages(void *p, int npages, int is_user) { if (pa_ops) - pa_ops->free_page(p, npages); + pa_ops->free_page(p, npages, is_user); } void ihk_mc_set_page_allocator(struct ihk_mc_pa_ops *ops) @@ -505,7 +508,7 @@ static void reserve_pages(struct ihk_page_allocator_desc *pa_allocator, extern int cpu_local_var_initialized; static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, - ihk_mc_ap_flag flag, int pref_node) + ihk_mc_ap_flag flag, int pref_node, int is_user) { unsigned long pa = 0; int i, node; @@ -549,7 +552,7 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, ihk_mc_get_numa_id(), npages, node); - rusage_numa_add(pref_node, npages * PAGE_SIZE); + rusage_page_add(pref_node, npages, is_user); return phys_to_virt(pa); } @@ -595,8 +598,8 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, ihk_mc_get_numa_id(), npages, node); - rusage_numa_add(numa_id, - npages * PAGE_SIZE); + rusage_page_add(numa_id, npages, + is_user); break; } @@ -652,7 +655,7 @@ distance_based: ihk_mc_get_numa_id(), npages, memory_nodes[node].nodes_by_distance[i].id); - rusage_numa_add(numa_id, npages * PAGE_SIZE); + rusage_page_add(numa_id, npages, is_user); break; } } @@ -679,9 +682,7 @@ order_based: pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); #endif if (pa) { -#ifdef ENABLE_RUSAGE - rusage_numa_add(numa_id, npages * PAGE_SIZE); -#endif + rusage_page_add(numa_id, npages, is_user); break; } } @@ -698,7 +699,8 @@ order_based: return NULL; } -static void __mckernel_free_pages_in_allocator(void *va, int npages) +static void __mckernel_free_pages_in_allocator(void *va, int npages, + int is_user) { int i; unsigned long pa_start = virt_to_phys(va); @@ -715,9 +717,7 @@ static void __mckernel_free_pages_in_allocator(void *va, int npages) } ihk_numa_free_pages(&memory_nodes[numa_id], pa_start, npages); -#ifdef ENABLE_RUSAGE - rusage_numa_sub(numa_id, npages * PAGE_SIZE); -#endif + rusage_page_sub(numa_id, npages, is_user); break; } #else @@ -732,9 +732,7 @@ static void __mckernel_free_pages_in_allocator(void *va, int npages) if (pa_start >= pa_allocator->start && pa_end <= pa_allocator->end) { ihk_pagealloc_free(pa_allocator, pa_start, npages); -#ifdef ENABLE_RUSAGE - rusage_numa_sub(i, npages * PAGE_SIZE); -#endif + rusage_page_sub(i, npages, is_user); return; } } @@ -743,7 +741,7 @@ static void __mckernel_free_pages_in_allocator(void *va, int npages) } -static void mckernel_free_pages(void *va, int npages) +static void mckernel_free_pages(void *va, int npages, int is_user) { struct list_head *pendings = &cpu_local_var(pending_free_pages); struct page *page; @@ -762,7 +760,7 @@ static void mckernel_free_pages(void *va, int npages) } } - __mckernel_free_pages_in_allocator(va, npages); + __mckernel_free_pages_in_allocator(va, npages, is_user); } void begin_free_pages_pending(void) { @@ -792,7 +790,7 @@ void finish_free_pages_pending(void) page->mode = PM_NONE; list_del(&page->list); __mckernel_free_pages_in_allocator(phys_to_virt(page_to_phys(page)), - page->offset); + page->offset, IHK_MC_PG_USER); } pendings->next = pendings->prev = NULL; @@ -1193,14 +1191,12 @@ static void numa_init(void) ihk_pagealloc_count(allocator), numa_id); #endif -#ifdef ENABLE_RUSAGE #ifdef IHK_RBTREE_ALLOCATOR - rusage_max_memory_add(memory_nodes[numa_id].nr_free_pages * + rusage_total_memory_add(memory_nodes[numa_id].nr_free_pages * PAGE_SIZE); #else - rusage_max_memory_add(ihk_pagealloc_count(allocator) * + rusage_total_memory_add(ihk_pagealloc_count(allocator) * PAGE_SIZE); -#endif #endif } } @@ -2024,7 +2020,7 @@ split_and_return: npages = (size + sizeof(struct kmalloc_header) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; /* Use low-level page allocator to avoid tracking */ - chunk = ___ihk_mc_alloc_pages(npages, flag); + chunk = ___ihk_mc_alloc_pages(npages, flag, IHK_MC_PG_KERNEL); if (!chunk) { cpu_restore_interrupt(kmalloc_irq_flags); diff --git a/kernel/process.c b/kernel/process.c index 03d5bc00..1d540315 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -68,7 +68,6 @@ extern void procfs_create_thread(struct thread *); extern void procfs_delete_thread(struct thread *); extern void perf_start(struct mc_perf_event *event); extern void perf_reset(struct mc_perf_event *event); -extern void event_signal(); struct list_head resource_set_list; mcs_rwlock_lock_t resource_set_lock; @@ -575,7 +574,8 @@ static int copy_user_pte(void *arg0, page_table_t src_pt, pte_t *src_ptep, void dkprintf("copy_user_pte(): page size: %d\n", pgsize); npages = pgsize / PAGE_SIZE; - virt = ihk_mc_alloc_aligned_pages(npages, pgalign, IHK_MC_AP_NOWAIT); + virt = ihk_mc_alloc_aligned_pages_user(npages, pgalign, + IHK_MC_AP_NOWAIT); if (!virt) { kprintf("ERROR: copy_user_pte() allocating new page\n"); error = -ENOMEM; @@ -1393,7 +1393,7 @@ static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep, page = phys_to_page(phys); if (page && page_unmap(page)) { - ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE); + ihk_mc_free_pages_user(phys_to_virt(phys), pgsize/PAGE_SIZE); } error = 0; @@ -1678,7 +1678,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang retry: npages = pgsize / PAGE_SIZE; - virt = ihk_mc_alloc_aligned_pages(npages, p2align, + virt = ihk_mc_alloc_aligned_pages_user(npages, p2align, IHK_MC_AP_NOWAIT | (range->flag & VR_AP_USER) ? IHK_MC_AP_USER : 0); if (!virt && !range->pgshift && (pgsize != PAGE_SIZE)) { @@ -1731,7 +1731,8 @@ retry: size_t npages; npages = pgsize / PAGE_SIZE; - virt = ihk_mc_alloc_aligned_pages(npages, p2align, IHK_MC_AP_NOWAIT); + virt = ihk_mc_alloc_aligned_pages_user(npages, p2align, + IHK_MC_AP_NOWAIT); if (!virt) { error = -ENOMEM; kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); @@ -1973,7 +1974,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, __FUNCTION__, size, minsz, ap_flag ? "(IHK_MC_AP_USER)" : ""); - stack = ihk_mc_alloc_aligned_pages(minsz >> PAGE_SHIFT, + stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT, LARGE_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT | ap_flag); if (!stack) { @@ -1991,7 +1992,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, #define NOPHYS ((uintptr_t)-1) if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, vrflag, NULL, 0, LARGE_PAGE_SHIFT, NULL)) != 0) { - ihk_mc_free_pages(stack, minsz >> PAGE_SHIFT); + ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); return rc; } @@ -2006,7 +2007,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, kprintf("init_process_stack:" "set range %lx-%lx %lx failed. %d\n", (end-minsz), end, stack, error); - ihk_mc_free_pages(stack, minsz >> PAGE_SHIFT); + ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); return error; } @@ -2095,7 +2096,7 @@ unsigned long extend_process_region(struct process_vm *vm, p = 0; } else { - p = ihk_mc_alloc_aligned_pages( + p = ihk_mc_alloc_aligned_pages_user( (new_end_allocated - end_allocated) >> PAGE_SHIFT, align_p2align, IHK_MC_AP_NOWAIT | (!(vm->proc->mpol_flags & MPOL_NO_HEAP) ? IHK_MC_AP_USER : 0)); @@ -2108,7 +2109,7 @@ unsigned long extend_process_region(struct process_vm *vm, if ((rc = add_process_memory_range(vm, end_allocated, new_end_allocated, (p == 0 ? 0 : virt_to_phys(p)), flag, NULL, 0, align_p2align, NULL)) != 0) { - ihk_mc_free_pages(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT); + ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT); return end_allocated; } @@ -2482,7 +2483,7 @@ static void do_migrate(void); static void idle(void) { struct cpu_local_var *v = get_this_cpu_local_var(); - struct ihk_os_monitor *monitor = v->monitor; + struct ihk_os_cpu_monitor *monitor = v->monitor; /* Release runq_lock before starting the idle loop. * See comments at release_runq_lock(). diff --git a/kernel/shmobj.c b/kernel/shmobj.c index f453f436..99fe1e2a 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -256,7 +256,7 @@ void shmobj_destroy(struct shmobj *obj) } if (page_unmap(page)) { - ihk_mc_free_pages(page_va, npages); + ihk_mc_free_pages_user(page_va, npages); } #if 0 dkprintf("shmobj_destroy(%p):" @@ -406,7 +406,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, page = page_list_lookup(obj, off); if (!page) { npages = 1 << p2align; - virt = ihk_mc_alloc_aligned_pages(npages, p2align, + virt = ihk_mc_alloc_aligned_pages_user(npages, p2align, IHK_MC_AP_NOWAIT); if (!virt) { error = -ENOMEM; @@ -443,7 +443,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, out: memobj_unlock(&obj->memobj); if (virt) { - ihk_mc_free_pages(virt, npages); + ihk_mc_free_pages_user(virt, npages); } dkprintf("shmobj_get_page(%p,%#lx,%d,%p):%d\n", memobj, off, p2align, physp, error); @@ -467,7 +467,8 @@ static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys, if (ihk_atomic_read(&page->count) == 1) { if (page_unmap(page)) { - ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE); + ihk_mc_free_pages_user(phys_to_virt(phys), + pgsize/PAGE_SIZE); } } diff --git a/kernel/syscall.c b/kernel/syscall.c index f5163e19..dbb679c1 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -178,7 +178,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) long rc; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); int mstatus = 0; #ifdef PROFILE_ENABLE @@ -385,8 +385,6 @@ if(req->number == __NR_sched_setaffinity)kprintf("do_syscall 2 offload=%d\n", th terminate(0, SIGKILL); } -out: - #ifdef PROFILE_ENABLE if (req->number < PROFILE_SYSCALL_MAX) { profile_event_add(profile_syscall2offload(req->number), @@ -790,6 +788,8 @@ terminate(int rc, int sig) struct syscall_request request IHK_DMA_ALIGN; int exit_status; +kprintf("before terminate usage=%ld\n", monitor->rusage_rss_current); +kprintf("before terminate systm=%ld\n", monitor->rusage_kmem_usage); // sync perf info if(proc->monitoring_event) sync_child_event(proc->monitoring_event); @@ -990,6 +990,8 @@ terminate(int rc, int sig) release_thread(mythread); release_process_vm(vm); preempt_enable(); +kprintf("after terminate usage =%ld\n", monitor->rusage_rss_current); +kprintf("after terminate system=%ld\n", monitor->rusage_kmem_usage); schedule(); kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__); panic("panic"); @@ -1010,14 +1012,14 @@ terminate_host(int pid) } void -event_signal() +eventfd() { struct ihk_ikc_channel_desc *syscall_channel; struct ikc_scd_packet pckt; syscall_channel = get_cpu_local_var(0)->ikc2linux; memset(&pckt, '\0', sizeof pckt); - pckt.msg = SCD_MSG_EVENT_SIGNAL; + pckt.msg = SCD_MSG_EVENTFD; ihk_ikc_send(syscall_channel, &pckt, 0); } @@ -1353,7 +1355,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, vrflags |= VR_AP_USER; } - p = ihk_mc_alloc_aligned_pages(npages, p2align, + p = ihk_mc_alloc_aligned_pages_user(npages, p2align, IHK_MC_AP_NOWAIT | ap_flag); if (p == NULL) { dkprintf("%s: warning: failed to allocate %d contiguous pages " @@ -1508,7 +1510,7 @@ out: } if (p) { - ihk_mc_free_pages(p, npages); + ihk_mc_free_pages_user(p, npages); } if (memobj) { memobj_release(memobj); @@ -2135,6 +2137,8 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, int ptrace_event = 0; int termsig = clone_flags & 0x000000ff; +kprintf("before fork usage=%ld\n", monitor->rusage_rss_current); +kprintf("before fork systm=%ld\n", monitor->rusage_kmem_usage); dkprintf("do_fork,flags=%08x,newsp=%lx,ptidptr=%lx,ctidptr=%lx,tls=%lx,curpc=%lx,cursp=%lx", clone_flags, newsp, parent_tidptr, child_tidptr, tlsblock_base, curpc, cursp); @@ -2385,6 +2389,8 @@ retry_tid: if (ptrace_event) { schedule(); } +kprintf("after fork usage =%ld\n", monitor->rusage_rss_current); +kprintf("after fork system=%ld\n", monitor->rusage_kmem_usage); return new->tid; } @@ -3526,7 +3532,7 @@ SYSCALL_DECLARE(rt_sigtimedwait) int sig; struct timespec ats; struct timespec ets; - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; @@ -3684,7 +3690,7 @@ do_sigsuspend(struct thread *thread, const sigset_t *set) struct list_head *head; mcs_rwlock_lock_t *lock; struct mcs_rwlock_node_irqsave mcs_rw_node; - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; @@ -4795,7 +4801,7 @@ SYSCALL_DECLARE(futex) uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx); uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx); int flags = op; - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; @@ -6594,7 +6600,7 @@ SYSCALL_DECLARE(nanosleep) struct timespec *tv = (struct timespec *)ihk_mc_syscall_arg0(ctx); struct timespec *rem = (struct timespec *)ihk_mc_syscall_arg1(ctx); struct syscall_request request IHK_DMA_ALIGN; - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; @@ -9185,7 +9191,7 @@ set_cputime(int mode) struct thread *thread; unsigned long tsc; struct cpu_local_var *v; - struct ihk_os_monitor *monitor; + struct ihk_os_cpu_monitor *monitor; if(clv == NULL) return; diff --git a/lib/abort.c b/lib/abort.c index 3868e90b..7fada65a 100644 --- a/lib/abort.c +++ b/lib/abort.c @@ -8,7 +8,7 @@ extern struct cpu_local_var *clv; void panic(const char *msg) { if (clv) { - struct ihk_os_monitor *monitor = cpu_local_var(monitor); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); monitor->status = IHK_OS_MONITOR_PANIC; } diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 8c58a420..dee805a0 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -54,6 +54,9 @@ typedef unsigned long ihk_mc_ap_flag; #define IHK_MC_AP_BANDWIDTH 0x010000 #define IHK_MC_AP_LATENCY 0x020000 +#define IHK_MC_PG_KERNEL 0 +#define IHK_MC_PG_USER 1 + enum ihk_mc_pt_prepare_flag { IHK_MC_PT_FIRST_LEVEL, IHK_MC_PT_LAST_LEVEL, @@ -86,8 +89,8 @@ void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator, unsigned long, unsigned long, int)); struct ihk_mc_pa_ops { - void *(*alloc_page)(int, int, ihk_mc_ap_flag, int node); - void (*free_page)(void *, int); + void *(*alloc_page)(int, int, ihk_mc_ap_flag, int node, int is_user); + void (*free_page)(void *, int, int is_user); void *(*alloc)(int, ihk_mc_ap_flag); void (*free)(void *); @@ -111,25 +114,43 @@ int ihk_mc_free_micpa(unsigned long mic_pa); void ihk_mc_clean_micpa(void); void *_ihk_mc_alloc_aligned_pages_node(int npages, int p2align, - ihk_mc_ap_flag flag, int node, char *file, int line); + ihk_mc_ap_flag flag, int node, int is_user, char *file, int line); #define ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, node) ({\ -void *r = _ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, node, __FILE__, __LINE__);\ +void *r = _ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, node, IHK_MC_PG_KERNEL, __FILE__, __LINE__);\ +r;\ +}) +#define ihk_mc_alloc_aligned_pages_node_user(npages, p2align, flag, node) ({\ +void *r = _ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, node, IHK_MC_PG_USER, __FILE__, __LINE__);\ r;\ }) #define ihk_mc_alloc_aligned_pages(npages, p2align, flag) ({\ -void *r = _ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, -1, __FILE__, __LINE__);\ +void *r = _ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, -1, IHK_MC_PG_KERNEL, __FILE__, __LINE__);\ +r;\ +}) + +#define ihk_mc_alloc_aligned_pages_user(npages, p2align, flag) ({\ +void *r = _ihk_mc_alloc_aligned_pages_node(npages, p2align, flag, -1, IHK_MC_PG_USER, __FILE__, __LINE__);\ r;\ }) #define ihk_mc_alloc_pages(npages, flag) ({\ -void *r = _ihk_mc_alloc_aligned_pages_node(npages, PAGE_P2ALIGN, flag, -1, __FILE__, __LINE__);\ +void *r = _ihk_mc_alloc_aligned_pages_node(npages, PAGE_P2ALIGN, flag, -1, IHK_MC_PG_KERNEL, __FILE__, __LINE__);\ r;\ }) -void _ihk_mc_free_pages(void *ptr, int npages, char *file, int line); +#define ihk_mc_alloc_pages_user(npages, flag) ({\ +void *r = _ihk_mc_alloc_aligned_pages_node(npages, PAGE_P2ALIGN, flag, -1, IHK_MC_PG_USER, __FILE__, __LINE__);\ +r;\ +}) + +void _ihk_mc_free_pages(void *ptr, int npages, int is_user, char *file, int line); #define ihk_mc_free_pages(p, npages) ({\ -_ihk_mc_free_pages(p, npages, __FILE__, __LINE__);\ +_ihk_mc_free_pages(p, npages, IHK_MC_PG_KERNEL, __FILE__, __LINE__);\ +}) + +#define ihk_mc_free_pages_user(p, npages) ({\ +_ihk_mc_free_pages(p, npages, IHK_MC_PG_USER, __FILE__, __LINE__);\ }) void *ihk_mc_allocate(int size, int flag); diff --git a/lib/include/ihk/rusage.h b/lib/include/ihk/rusage.h index 331a30a3..c69331a0 100644 --- a/lib/include/ihk/rusage.h +++ b/lib/include/ihk/rusage.h @@ -1,76 +1,102 @@ -#ifndef __RUSAGE_H -#define __RUSAGE_H +#ifndef __IHK_RUSAGE_H +#define __IHK_RUSAGE_H -#define ENABLE_RUSAGE +struct ihk_os_cpu_monitor { + int status; +#define IHK_OS_MONITOR_NOT_BOOT 0 +#define IHK_OS_MONITOR_IDLE 1 +#define IHK_OS_MONITOR_USER 2 +#define IHK_OS_MONITOR_KERNEL 3 +#define IHK_OS_MONITOR_KERNEL_HEAVY 4 +#define IHK_OS_MONITOR_KERNEL_OFFLOAD 5 +#define IHK_OS_MONITOR_KERNEL_FREEZING 8 +#define IHK_OS_MONITOR_KERNEL_FROZEN 9 +#define IHK_OS_MONITOR_KERNEL_THAW 10 +#define IHK_OS_MONITOR_PANIC 99 + int status_bak; + unsigned long counter; + unsigned long ocounter; + unsigned long user_tsc; + unsigned long system_tsc; +}; -#define RUSAGE_DEFAULT_SIZE 10 +struct ihk_os_monitor { + unsigned long rusage_max_num_threads; + unsigned long rusage_num_threads; + unsigned long rusage_rss_max; + long rusage_rss_current; + unsigned long rusage_kmem_usage; + unsigned long rusage_kmem_max_usage; + unsigned long rusage_hugetlb_usage; + unsigned long rusage_hugetlb_max_usage; + unsigned long rusage_total_memory; + unsigned long rusage_total_memory_usage; + unsigned long rusage_total_memory_max_usage; + unsigned long num_numa_nodes; + unsigned long num_processors; + unsigned long ns_per_tsc; + unsigned long reserve[128]; + unsigned long rusage_numa_stat[1024]; + + struct ihk_os_cpu_monitor cpu[0]; +}; enum RUSAGE_MEMBER { - RUSAGE_RSS, - RUSAGE_CACHE, - RUSAGE_RSS_HUGE, - RUSAGE_MAPPED_FILE, - RUSAGE_MAX_USAGE, - RUSAGE_KMEM_USAGE, - RUSAGE_KMAX_USAGE, - RUSAGE_NUM_NUMA_NODES, - RUSAGE_NUMA_STAT, - RUSAGE_HUGETLB , - RUSAGE_HUGETLB_MAX , - RUSAGE_STAT_SYSTEM , - RUSAGE_STAT_USER , - RUSAGE_USAGE , - RUSAGE_USAGE_PER_CPU , - RUSAGE_NUM_THREADS , - RUSAGE_MAX_NUM_THREADS + RUSAGE_RSS, + RUSAGE_CACHE, + RUSAGE_RSS_HUGE, + RUSAGE_MAPPED_FILE, + RUSAGE_MAX_USAGE, + RUSAGE_KMEM_USAGE, + RUSAGE_KMAX_USAGE, + RUSAGE_NUM_NUMA_NODES, + RUSAGE_NUMA_STAT, + RUSAGE_HUGETLB , + RUSAGE_HUGETLB_MAX , + RUSAGE_STAT_SYSTEM , + RUSAGE_STAT_USER , + RUSAGE_USAGE , + RUSAGE_USAGE_PER_CPU , + RUSAGE_NUM_THREADS , + RUSAGE_MAX_NUM_THREADS }; struct r_data{ - unsigned long pid; - unsigned long rss; - unsigned long cache; - unsigned long rss_huge; - unsigned long mapped_file; - unsigned long max_usage; - unsigned long kmem_usage; - unsigned long kmax_usage; - unsigned long hugetlb; - unsigned long hugetlb_max; - unsigned long stat_system; - unsigned long stat_user; - unsigned long usage; - struct r_data *next; + unsigned long pid; + unsigned long rss; + unsigned long cache; + unsigned long rss_huge; + unsigned long mapped_file; + unsigned long max_usage; + unsigned long kmem_usage; + unsigned long kmax_usage; + unsigned long hugetlb; + unsigned long hugetlb_max; + unsigned long stat_system; + unsigned long stat_user; + unsigned long usage; + struct r_data *next; } ; -typedef struct r_data rusage_data; - -rusage_data *rdata[RUSAGE_DEFAULT_SIZE]; -unsigned long rusage_max_num_threads; -unsigned long rusage_num_threads; enum ihk_os_status { - IHK_STATUS_INACTIVE, - IHK_STATUS_BOOTING, - IHK_STATUS_RUNNING, - IHK_STATUS_SHUTDOWN, - IHK_STATUS_PANIC, - IHK_STATUS_HUNGUP, - IHK_STATUS_FREEZING, - IHK_STATUS_FROZEN, + IHK_STATUS_INACTIVE, + IHK_STATUS_BOOTING, + IHK_STATUS_RUNNING, + IHK_STATUS_SHUTDOWN, + IHK_STATUS_PANIC, + IHK_STATUS_HUNGUP, + IHK_STATUS_FREEZING, + IHK_STATUS_FROZEN, }; -enum ihk_os_status os_status; -unsigned long sys_delegate_count; enum sys_delegate_state_enum { ENTER_KERNEL, EXIT_KERNEL, }; -enum sys_delegate_state_enum sys_delegate_state; -unsigned long rusage_kmem_usage; -unsigned long rusage_kmem_max_usage; -unsigned long rusage_hugetlb_usage; -unsigned long rusage_hugetlb_max_usage; -unsigned long rusage_usage_per_cpu[sizeof(cpu_set_t)/8]; -unsigned long rusage_numa_stat[1024]; +extern struct ihk_os_monitor *monitor; + +extern void ihk_mc_set_os_status(unsigned long st); +extern unsigned long ihk_mc_get_os_status(); #endif