From aa7cb970c4c5b0adb2e4b83146d890ed6a81a728 Mon Sep 17 00:00:00 2001 From: Masamichi Takagi Date: Thu, 17 Aug 2017 22:06:49 +0900 Subject: [PATCH] ihk_os_getrusage(): Compile LWK-specific results in mcctrl 1. User asks mcctrl for the result via ihk_os_getrusage() with passing void * 2. mcctrl compiles the results and passes them to the user 3. User interprets it by using the type defined in the LWK-specific header --- arch/x86/kernel/include/arch/rusage.h | 2 +- arch/x86/kernel/memory.c | 4 +- configure | 5 + configure.ac | 4 + executer/include/ihklib_rusage.h | 23 ++ executer/kernel/mcctrl/Makefile.in | 5 +- executer/kernel/mcctrl/control.c | 76 ++++++ executer/kernel/mcctrl/driver.c | 1 + executer/kernel/mcctrl/mcctrl.h | 5 + kernel/ap.c | 2 +- kernel/cls.c | 4 +- kernel/devobj.c | 2 +- kernel/fileobj.c | 2 +- kernel/freeze.c | 2 +- kernel/host.c | 2 +- kernel/include/cls.h | 1 + kernel/include/rusage.h | 325 ++--------------------- kernel/include/rusage_private.h | 369 ++++++++++++++++++++++++++ kernel/init.c | 21 +- kernel/mem.c | 2 +- kernel/process.c | 5 +- kernel/shmobj.c | 2 +- kernel/syscall.c | 11 +- lib/abort.c | 2 +- lib/include/ihk/debug.h | 2 +- lib/include/ihk/mm.h | 1 + lib/include/ihk/monitor.h | 8 + lib/include/ihk/rusage.h | 65 ----- 28 files changed, 567 insertions(+), 386 deletions(-) create mode 100644 executer/include/ihklib_rusage.h create mode 100644 kernel/include/rusage_private.h create mode 100644 lib/include/ihk/monitor.h delete mode 100644 lib/include/ihk/rusage.h diff --git a/arch/x86/kernel/include/arch/rusage.h b/arch/x86/kernel/include/arch/rusage.h index 6ee49f5f..dfb6e9af 100644 --- a/arch/x86/kernel/include/arch/rusage.h +++ b/arch/x86/kernel/include/arch/rusage.h @@ -7,7 +7,7 @@ #define IHK_OS_PGSIZE_2MB 1 #define IHK_OS_PGSIZE_1GB 2 -extern struct ihk_os_monitor *monitor; +extern struct rusage_global *rusage; extern int sprintf(char * buf, const char *fmt, ...); diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 293daff9..b5c5c046 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include //#define DEBUG @@ -2050,7 +2050,7 @@ retry: dkprintf("set_range_l2(%lx,%lx,%lx):" "2MiB page. %d %lx\n", base, start, end, error, *ptep); - // call memory_stat_rss_add() here because pgshift is resolved here + // Call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE); } else { diff --git a/configure b/configure index 501f19b1..58d7e074 100755 --- a/configure +++ b/configure @@ -650,6 +650,7 @@ ENABLE_MCOVERLAYFS MANDIR KERNDIR KMODDIR +INCLUDEDIR ETCDIR INCDIR MCKERNEL_LIBDIR @@ -4777,6 +4778,9 @@ case $WITH_TARGET in if test "X$ETCDIR" = X; then ETCDIR="$prefix/etc" fi + if test "X$INCLUDEDIR" = X; then + INCLUDEDIR="$prefix/include" + fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -5660,6 +5664,7 @@ _ACEOF + ac_config_headers="$ac_config_headers config.h" diff --git a/configure.ac b/configure.ac index d5e4621c..8214ddbd 100644 --- a/configure.ac +++ b/configure.ac @@ -284,6 +284,9 @@ case $WITH_TARGET in if test "X$ETCDIR" = X; then ETCDIR="$prefix/etc" fi + if test "X$INCLUDEDIR" = X; then + INCLUDEDIR="$prefix/include" + fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -495,6 +498,7 @@ AC_SUBST(MCKERNEL_INCDIR) AC_SUBST(MCKERNEL_LIBDIR) AC_SUBST(INCDIR) AC_SUBST(ETCDIR) +AC_SUBST(INCLUDEDIR) AC_SUBST(KMODDIR) AC_SUBST(KERNDIR) AC_SUBST(MANDIR) diff --git a/executer/include/ihklib_rusage.h b/executer/include/ihklib_rusage.h new file mode 100644 index 00000000..1c48ace4 --- /dev/null +++ b/executer/include/ihklib_rusage.h @@ -0,0 +1,23 @@ +#ifndef IHKLIB_RUSAGE_H_INCLUDED +#define IHKLIB_RUSAGE_H_INCLUDED + +#define IHK_MAX_NUM_PGSIZES 4 +#define IHK_MAX_NUM_NUMA_NODES 1024 +#define IHK_MAX_NUM_CPUS 1024 + +struct mckernel_rusage { + unsigned long memory_stat_rss[IHK_MAX_NUM_PGSIZES]; + unsigned long memory_stat_mapped_file[IHK_MAX_NUM_PGSIZES]; + unsigned long memory_max_usage; + unsigned long memory_kmem_usage; + unsigned long memory_kmem_max_usage; + unsigned long memory_numa_stat[IHK_MAX_NUM_NUMA_NODES]; + unsigned long cpuacct_stat_system; + unsigned long cpuacct_stat_user; + unsigned long cpuacct_usage; + unsigned long cpuacct_usage_percpu[IHK_MAX_NUM_CPUS]; + int num_threads; + int max_num_threads; +}; + +#endif /* !defined(IHKLIB_RUSAGE_H_INCLUDED) */ diff --git a/executer/kernel/mcctrl/Makefile.in b/executer/kernel/mcctrl/Makefile.in index 291b84cc..b6187692 100644 --- a/executer/kernel/mcctrl/Makefile.in +++ b/executer/kernel/mcctrl/Makefile.in @@ -4,6 +4,7 @@ ARCH ?= @ARCH@ src = @abs_srcdir@ KMODDIR=@KMODDIR@ BINDIR=@BINDIR@ +INCLUDEDIR=@INCLUDEDIR@ IHK_BASE=$(src)/../../../../ihk obj-m += mcctrl.o @@ -13,7 +14,7 @@ obj-m += mcctrl.o ifeq ($(ARCH), arm64) ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -I$(src)/arch/$(ARCH)/include -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ else -ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ +ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../../kernel/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ -I@abs_builddir@/../../../ endif mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o @@ -37,3 +38,5 @@ clean: install: mkdir -p -m 755 $(KMODDIR) install -m 644 mcctrl.ko $(KMODDIR) + mkdir -p -m 755 $(INCLUDEDIR)/mckernel + install -m 644 $(src)/../../include/ihklib_rusage.h $(INCLUDEDIR)/mckernel/ihklib_rusage.h diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 62e39ef5..44803be7 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -45,6 +45,8 @@ #include "../../../config.h" #include "mcctrl.h" #include +#include +#include //#define DEBUG @@ -2194,6 +2196,77 @@ void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet) } +/* Compose LWK-specific rusage structure */ +long mcctrl_getrusage(ihk_os_t ihk_os, struct getrusage_desc *__user _desc) +{ + struct getrusage_desc desc; + struct rusage_global *rusage_global = ihk_os_get_rusage(ihk_os); + struct mckernel_rusage *rusage = NULL; + int ret = 0; + int i; + unsigned long ut; + unsigned long st; + + ret = copy_from_user(&desc, _desc, sizeof(struct getrusage_desc)); + if (ret != 0) { + printk("%s: copy_from_user failed\n", __FUNCTION__); + goto out; + } + + rusage = kmalloc(sizeof(struct mckernel_rusage), GFP_KERNEL); + if (!rusage) { + printk("%s: kmalloc failed\n", __FUNCTION__); + ret = -ENOMEM; + goto out; + } + memset(rusage, 0, sizeof(struct mckernel_rusage)); + + /* Compile statistics */ + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + rusage->memory_stat_rss[i] = rusage_global->memory_stat_rss[i]; + rusage->memory_stat_mapped_file[i] = rusage_global->memory_stat_mapped_file[i]; + } + rusage->memory_max_usage = rusage_global->memory_max_usage; + rusage->memory_kmem_usage = rusage_global->memory_kmem_usage; + rusage->memory_kmem_max_usage = rusage_global->memory_kmem_max_usage; + for (i = 0; i < rusage_global->num_numa_nodes; i++) { + rusage->memory_numa_stat[i] = rusage_global->memory_numa_stat[i]; + } + for (ut = 0, st = 0, i = 0; i < rusage_global->num_processors; i++) { + unsigned long wt; + + wt = rusage_global->cpu[i].user_tsc * rusage_global->ns_per_tsc / 1000; + ut += wt; + st += rusage_global->cpu[i].system_tsc * rusage_global->ns_per_tsc / 1000; + rusage->cpuacct_usage_percpu[i] = wt; + } + rusage->cpuacct_stat_system = st / 10000000; + rusage->cpuacct_stat_user = ut / 10000000; + rusage->cpuacct_usage = ut; + + rusage->num_threads = rusage_global->num_threads; + rusage->max_num_threads = rusage_global->max_num_threads; + + if (desc.size_rusage > sizeof(struct mckernel_rusage)) { + printk("%s: desc.size_rusage=%ld > sizeof(struct mckernel_rusage)=%ld\n", __FUNCTION__, desc.size_rusage, sizeof(struct mckernel_rusage)); + ret = -EINVAL; + goto out; + } + + ret = copy_to_user(desc.rusage, rusage, desc.size_rusage); + if (ret != 0) { + printk("%s: copy_to_user failed\n", __FUNCTION__); + goto out; + } + + out: + if (rusage) { + kfree(rusage); + } + + return ret; +} + extern void *get_user_sp(void); extern void set_user_sp(unsigned long); extern void restore_fs(unsigned long fs); @@ -2797,6 +2870,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, case IHK_OS_AUX_PERF_DESTROY: return mcctrl_perf_destroy(os); + + case IHK_OS_GETRUSAGE: + return mcctrl_getrusage(os, (struct getrusage_desc *)arg); } return -EINVAL; } diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index 67a330cc..87065b44 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -97,6 +97,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = { { .request = IHK_OS_AUX_PERF_ENABLE, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_DISABLE, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_DESTROY, .func = mcctrl_ioctl }, + { .request = IHK_OS_GETRUSAGE, .func = mcctrl_ioctl }, }; static struct ihk_os_kernel_call_handler mcctrl_kernel_handlers = { diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 065afbfb..d38ebda3 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -490,4 +490,9 @@ struct ihk_perf_event_attr{ unsigned exclude_idle:1; }; +struct getrusage_desc { + void* rusage; + size_t size_rusage; +}; + #endif diff --git a/kernel/ap.c b/kernel/ap.c index 02c48cee..62c77680 100644 --- a/kernel/ap.c +++ b/kernel/ap.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include //#define DEBUG_PRINT_AP diff --git a/kernel/cls.c b/kernel/cls.c index 37128752..b7c40846 100644 --- a/kernel/cls.c +++ b/kernel/cls.c @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include extern int num_processors; @@ -39,6 +40,7 @@ void cpu_local_var_init(void) for (i = 0; i < num_processors; i++) { clv[i].monitor = monitor->cpu + i; + clv[i].rusage = rusage->cpu + i; INIT_LIST_HEAD(&clv[i].smp_func_req_list); } diff --git a/kernel/devobj.c b/kernel/devobj.c index 7b1de867..ad687f4e 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include //#define DEBUG_PRINT_DEVOBJ diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 50bb1dfc..b6470fa1 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include //#define DEBUG_PRINT_FILEOBJ diff --git a/kernel/freeze.c b/kernel/freeze.c index 40774215..ebac3f47 100644 --- a/kernel/freeze.c +++ b/kernel/freeze.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include extern int nmi_mode; extern void mod_nmi_ctx(void *, void(*)()); diff --git a/kernel/host.c b/kernel/host.c index 9762f3d0..9e77d226 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include //#define DEBUG_PRINT_HOST diff --git a/kernel/include/cls.h b/kernel/include/cls.h index 3bf6546d..02de2bc7 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -93,6 +93,7 @@ struct cpu_local_var { int timer_enabled; int kmalloc_initialized; struct ihk_os_cpu_monitor *monitor; + struct rusage_percpu *rusage; ihk_spinlock_t smp_func_req_lock; struct list_head smp_func_req_list; diff --git a/kernel/include/rusage.h b/kernel/include/rusage.h index c96ef9ae..a20a13ea 100644 --- a/kernel/include/rusage.h +++ b/kernel/include/rusage.h @@ -1,307 +1,36 @@ +/* Interface toward kernel and mcctrl */ + #ifndef __RUSAGE_H #define __RUSAGE_H -#include -#include -#include -#include -#include +#define IHK_MAX_NUM_PGSIZES 4 +#define IHK_MAX_NUM_NUMA_NODES 1024 +#define IHK_MAX_NUM_CPUS 1024 -#ifdef ENABLE_RUSAGE -#define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB +struct rusage_percpu { + unsigned long user_tsc; + unsigned long system_tsc; +}; -extern void eventfd(); +struct rusage_global { + long memory_stat_rss[IHK_MAX_NUM_PGSIZES]; + long memory_stat_mapped_file[IHK_MAX_NUM_PGSIZES]; + unsigned long memory_max_usage; + unsigned long max_num_threads; + unsigned long num_threads; + long rss_current; + unsigned long memory_kmem_usage; + unsigned long memory_kmem_max_usage; + unsigned long memory_numa_stat[IHK_MAX_NUM_NUMA_NODES]; + struct rusage_percpu cpu[IHK_MAX_NUM_CPUS]; /* clv[i].monitor = &cpu[i] */ -static inline void -rusage_total_memory_add(unsigned long size) -{ - monitor->rusage_total_memory += size; -} + unsigned long total_memory; + unsigned long total_memory_usage; + unsigned long total_memory_max_usage; -static inline void -rusage_rss_add(unsigned long size) -{ - unsigned long newval; - unsigned long oldval; - unsigned long retval; - - newval = __sync_add_and_fetch(&monitor->rusage_rss_current, size); - oldval = monitor->rusage_memory_max_usage; - while (newval > oldval) { - retval = __sync_val_compare_and_swap(&monitor->rusage_memory_max_usage, - oldval, newval); - if (retval == oldval) { - break; - } - oldval = retval; - } -} - -static inline void -rusage_rss_sub(unsigned long size) -{ - __sync_sub_and_fetch(&monitor->rusage_rss_current, size); -} - -static inline void memory_stat_rss_add(unsigned long size, int pgsize) -{ - ihk_atomic_add_long(size, &monitor->rusage_memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]); -} - -static inline void memory_stat_rss_sub(unsigned long size, int pgsize) -{ - ihk_atomic_add_long(-size, &monitor->rusage_memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]); -} - -static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize) -{ - ihk_atomic_add_long(size, &monitor->rusage_memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]); -} - -static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize) -{ - ihk_atomic_add_long(-size, &monitor->rusage_memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]); -} - -static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize) -{ - /* Is it resident in main memory? */ - if (range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED)) { - return 0; - } - /* Is it anonymous and pre-paging? */ - if (!range->memobj) { - memory_stat_rss_add(size, pgsize); - return 1; - } - /* Is it devobj or (fileobj and pre-map) or xpmem attachment? */ - if ((range->memobj->flags & MF_DEV_FILE) || - (range->memobj->flags & MF_PREMAP) || - (range->memobj->flags & MF_XPMEM) - ) { - return 0; - } - /* Is it anonymous and demand-paging? */ - if (range->memobj->flags & MF_ZEROOBJ) { - memory_stat_rss_add(size, pgsize); - return 1; - } - - struct page *page = phys_to_page(phys); - - /* Is It file map and cow page? */ - if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && - !page) { - //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); - memory_stat_rss_add(size, pgsize); - return 1; - } - - /* Is it a sharable page? */ - if (!page) { - kprintf("%s: WARNING !page,phys=%lx\n", __FUNCTION__, phys); - return 0; - } - /* Is this the first attempt to map the sharable page? */ - if(__sync_bool_compare_and_swap(&page->mapped.counter64, 0, 1)) { - if(range->memobj->flags & MF_SHM) { - memory_stat_rss_add(size, pgsize); - } else { - rusage_memory_stat_mapped_file_add(size, pgsize); - } - return 1; - } else { - return 0; - } - return 0; -} - -static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize) -{ - if(memobj->flags & MF_SHM) { - memory_stat_rss_sub(size, pgsize); - } else { - rusage_memory_stat_mapped_file_sub(size, pgsize); - } -} - -static inline void -rusage_kmem_add(unsigned long size) -{ - unsigned long newval; - unsigned long oldval; - unsigned long retval; - - newval = __sync_add_and_fetch(&monitor->rusage_memory_kmem_usage, size); - oldval = monitor->rusage_memory_kmem_max_usage; - while (newval > oldval) { - retval = __sync_val_compare_and_swap( - &monitor->rusage_memory_kmem_max_usage, - oldval, newval); - if (retval == oldval) { - break; - } - oldval = retval; - } -} - -static inline void -rusage_kmem_sub(unsigned long size) -{ - __sync_sub_and_fetch(&monitor->rusage_memory_kmem_usage, size); -} - -static inline void -rusage_numa_add(int numa_id, unsigned long size) -{ - __sync_add_and_fetch(monitor->rusage_memory_numa_stat + numa_id, size); - rusage_rss_add(size); -} - -static inline void -rusage_numa_sub(int numa_id, unsigned long size) -{ - rusage_rss_sub(size); - __sync_sub_and_fetch(monitor->rusage_memory_numa_stat + numa_id, size); -} - -static inline void -rusage_page_add(int numa_id, unsigned long pages, int is_user) -{ - unsigned long size = pages * PAGE_SIZE; - unsigned long newval; - unsigned long oldval; - unsigned long retval; - - if (is_user) - rusage_numa_add(numa_id, size); - else - rusage_kmem_add(size); - - newval = __sync_add_and_fetch(&monitor->rusage_total_memory_usage, size); - oldval = monitor->rusage_total_memory_max_usage; - while (newval > oldval) { - retval = __sync_val_compare_and_swap(&monitor->rusage_total_memory_max_usage, - oldval, newval); - if (retval == oldval) { - if (monitor->rusage_total_memory - newval < - RUSAGE_MEM_LIMIT) { - eventfd(); - } - break; - } - oldval = retval; - } -} - -static inline void -rusage_page_sub(int numa_id, unsigned long pages, int is_user) -{ - unsigned long size = pages * PAGE_SIZE; - - __sync_sub_and_fetch(&monitor->rusage_total_memory_usage, size); - - if (is_user) - rusage_numa_sub(numa_id, size); - else - rusage_kmem_sub(size); -} - -static inline void -rusage_num_threads_inc() -{ - unsigned long newval; - unsigned long oldval; - unsigned long retval; - - newval = __sync_add_and_fetch(&monitor->rusage_num_threads, 1); - oldval = monitor->rusage_max_num_threads; - while (newval > oldval) { - retval = __sync_val_compare_and_swap(&monitor-> - rusage_max_num_threads, - oldval, newval); - if (retval == oldval) { - break; - } - oldval = retval; - } -} - -static inline void -rusage_num_threads_dec() -{ - __sync_sub_and_fetch(&monitor->rusage_num_threads, 1); -} -#else -static inline void -rusage_total_memory_add(unsigned long size) -{ -} - -static inline void -rusage_rss_add(unsigned long size) -{ -} - -static inline void -rusage_rss_sub(unsigned long size) -{ -} - -static inline void memory_stat_rss_add(unsigned long size, size_t pgsize) -{ -} - -static inline void memory_stat_rss_sub(unsigned long size, size_t pgsize) -{ -} - -static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize) -{ -} - -static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize) -{ -} - -static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize) -{ - return 0; -} - -static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize) -{ -} - -static inline void -rusage_numa_add(int numa_id, unsigned long size) -{ -} - -static inline void -rusage_numa_sub(int numa_id, unsigned long size) -{ -} - -static inline void -rusage_page_add(int numa_id, unsigned long size, int is_user) -{ -} - -static inline void -rusage_page_sub(int numa_id, unsigned long size, int is_user) -{ -} - -static inline void -rusage_num_threads_inc() -{ -} - -static inline void -rusage_num_threads_dec() -{ -} -#endif // ENABLE_RUSAGE + unsigned long num_numa_nodes; + unsigned long num_processors; + unsigned long ns_per_tsc; +}; #endif diff --git a/kernel/include/rusage_private.h b/kernel/include/rusage_private.h new file mode 100644 index 00000000..12607779 --- /dev/null +++ b/kernel/include/rusage_private.h @@ -0,0 +1,369 @@ +/* Interface toward kernel */ + +#ifndef RUSAGE_PRIVATE_H_INCLUDED +#define RUSAGE_PRIVATE_H_INCLUDED + +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_RUSAGE + +#define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB + +extern void eventfd(); + +static inline void +rusage_total_memory_add(unsigned long size) +{ + rusage->total_memory += size; +} + +static inline void +rusage_rss_add(unsigned long size) +{ + unsigned long newval; + unsigned long oldval; + unsigned long retval; + + newval = __sync_add_and_fetch(&rusage->rss_current, size); + oldval = rusage->memory_max_usage; + while (newval > oldval) { + retval = __sync_val_compare_and_swap(&rusage->memory_max_usage, + oldval, newval); + if (retval == oldval) { + break; + } + oldval = retval; + } +} + +static inline void +rusage_rss_sub(unsigned long size) +{ + __sync_sub_and_fetch(&rusage->rss_current, size); +} + +static inline void memory_stat_rss_add(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(size, &rusage->memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline void memory_stat_rss_sub(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(-size, &rusage->memory_stat_rss[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(size, &rusage->memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize) +{ + ihk_atomic_add_long(-size, &rusage->memory_stat_mapped_file[rusage_pgsize_to_pgtype(pgsize)]); +} + +static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize) +{ + /* Is it resident in main memory? */ + if (range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED)) { + return 0; + } + /* Is it anonymous and pre-paging? */ + if (!range->memobj) { + memory_stat_rss_add(size, pgsize); + return 1; + } + /* Is it devobj or (fileobj and pre-map) or xpmem attachment? */ + if ((range->memobj->flags & MF_DEV_FILE) || + (range->memobj->flags & MF_PREMAP) || + (range->memobj->flags & MF_XPMEM) + ) { + return 0; + } + /* Is it anonymous and demand-paging? */ + if (range->memobj->flags & MF_ZEROOBJ) { + memory_stat_rss_add(size, pgsize); + return 1; + } + + struct page *page = phys_to_page(phys); + + /* Is It file map and cow page? */ + if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && + !page) { + //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); + memory_stat_rss_add(size, pgsize); + return 1; + } + + /* Is it a sharable page? */ + if (!page) { + kprintf("%s: WARNING !page,phys=%lx\n", __FUNCTION__, phys); + return 0; + } + /* Is this the first attempt to map the sharable page? */ + if(__sync_bool_compare_and_swap(&page->mapped.counter64, 0, 1)) { + if(range->memobj->flags & MF_SHM) { + memory_stat_rss_add(size, pgsize); + } else { + rusage_memory_stat_mapped_file_add(size, pgsize); + } + return 1; + } else { + return 0; + } + return 0; +} + +static inline int rusage_memory_stat_add_with_page(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize, struct page *page) +{ + /* Is it resident in main memory? */ + if (range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED)) { + return 0; + } + /* Is it anonymous and pre-paging? */ + if (!range->memobj) { + memory_stat_rss_add(size, pgsize); + return 1; + } + /* Is it devobj or (fileobj and pre-map) or xpmem attachment? */ + if ((range->memobj->flags & MF_DEV_FILE) || + (range->memobj->flags & MF_PREMAP) || + (range->memobj->flags & MF_XPMEM) + ) { + return 0; + } + /* Is it anonymous and demand-paging? */ + if (range->memobj->flags & MF_ZEROOBJ) { + memory_stat_rss_add(size, pgsize); + return 1; + } + + /* Is It file map and cow page? */ + if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && + !page) { + //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); + memory_stat_rss_add(size, pgsize); + return 1; + } + + /* Is it a sharable page? */ + if (!page) { + kprintf("%s: WARNING !page,phys=%lx\n", __FUNCTION__, phys); + return 0; + } + /* Is this the first attempt to map the sharable page? */ + if(__sync_bool_compare_and_swap(&page->mapped.counter64, 0, 1)) { + if(range->memobj->flags & MF_SHM) { + memory_stat_rss_add(size, pgsize); + } else { + rusage_memory_stat_mapped_file_add(size, pgsize); + } + return 1; + } else { + return 0; + } + return 0; +} + +static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize) +{ + if(memobj->flags & MF_SHM) { + memory_stat_rss_sub(size, pgsize); + } else { + rusage_memory_stat_mapped_file_sub(size, pgsize); + } +} + +static inline void +rusage_kmem_add(unsigned long size) +{ + unsigned long newval; + unsigned long oldval; + unsigned long retval; + + newval = __sync_add_and_fetch(&rusage->memory_kmem_usage, size); + oldval = rusage->memory_kmem_max_usage; + while (newval > oldval) { + retval = __sync_val_compare_and_swap( + &rusage->memory_kmem_max_usage, + oldval, newval); + if (retval == oldval) { + break; + } + oldval = retval; + } +} + +static inline void +rusage_kmem_sub(unsigned long size) +{ + __sync_sub_and_fetch(&rusage->memory_kmem_usage, size); +} + +static inline void +rusage_numa_add(int numa_id, unsigned long size) +{ + __sync_add_and_fetch(rusage->memory_numa_stat + numa_id, size); + rusage_rss_add(size); +} + +static inline void +rusage_numa_sub(int numa_id, unsigned long size) +{ + rusage_rss_sub(size); + __sync_sub_and_fetch(rusage->memory_numa_stat + numa_id, size); +} + +static inline void +rusage_page_add(int numa_id, unsigned long pages, int is_user) +{ + unsigned long size = pages * PAGE_SIZE; + unsigned long newval; + unsigned long oldval; + unsigned long retval; + + if (is_user) + rusage_numa_add(numa_id, size); + else + rusage_kmem_add(size); + + newval = __sync_add_and_fetch(&rusage->total_memory_usage, size); + oldval = rusage->total_memory_max_usage; + while (newval > oldval) { + retval = __sync_val_compare_and_swap(&rusage->total_memory_max_usage, + oldval, newval); + if (retval == oldval) { + if (rusage->total_memory - newval < + RUSAGE_MEM_LIMIT) { + eventfd(); + } + break; + } + oldval = retval; + } +} + +static inline void +rusage_page_sub(int numa_id, unsigned long pages, int is_user) +{ + unsigned long size = pages * PAGE_SIZE; + + __sync_sub_and_fetch(&rusage->total_memory_usage, size); + + if (is_user) + rusage_numa_sub(numa_id, size); + else + rusage_kmem_sub(size); +} + +static inline void +rusage_num_threads_inc() +{ + unsigned long newval; + unsigned long oldval; + unsigned long retval; + + newval = __sync_add_and_fetch(&rusage->num_threads, 1); + oldval = rusage->max_num_threads; + while (newval > oldval) { + retval = __sync_val_compare_and_swap(&rusage-> + max_num_threads, + oldval, newval); + if (retval == oldval) { + break; + } + oldval = retval; + } +} + +static inline void +rusage_num_threads_dec() +{ + __sync_sub_and_fetch(&rusage->num_threads, 1); +} +#else +static inline void +rusage_total_memory_add(unsigned long size) +{ +} + +static inline void +rusage_rss_add(unsigned long size) +{ +} + +static inline void +rusage_rss_sub(unsigned long size) +{ +} + +static inline void memory_stat_rss_add(unsigned long size, size_t pgsize) +{ +} + +static inline void memory_stat_rss_sub(unsigned long size, size_t pgsize) +{ +} + +static inline void rusage_memory_stat_mapped_file_add(unsigned long size, int pgsize) +{ +} + +static inline void rusage_memory_stat_mapped_file_sub(unsigned long size, int pgsize) +{ +} + +static inline int rusage_memory_stat_add_with_page(struct vm_range *range, struct page *page, unsigned long size, int pgsize) +{ + return 0; +} + +static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, unsigned long size, int pgsize) +{ + return 0; +} + +static inline void rusage_memory_stat_sub(struct memobj *memobj, unsigned long size, int pgsize) +{ +} + +static inline void +rusage_numa_add(int numa_id, unsigned long size) +{ +} + +static inline void +rusage_numa_sub(int numa_id, unsigned long size) +{ +} + +static inline void +rusage_page_add(int numa_id, unsigned long size, int is_user) +{ +} + +static inline void +rusage_page_sub(int numa_id, unsigned long size, int is_user) +{ +} + +static inline void +rusage_num_threads_inc() +{ +} + +static inline void +rusage_num_threads_dec() +{ +} +#endif // ENABLE_RUSAGE + +extern struct rusage_global *rusage; + +#endif /* !defined(RUSAGE_PRIVATE_H_INCLUDED) */ diff --git a/kernel/init.c b/kernel/init.c index d2a96672..e43fc201 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -31,7 +31,8 @@ #include #include #include -#include +#include +#include //#define IOCTL_FUNC_EXTENSION #ifdef IOCTL_FUNC_EXTENSION @@ -55,6 +56,7 @@ extern unsigned long ihk_mc_get_ns_per_tsc(void); extern long syscall(int, ihk_mc_user_context_t *); struct ihk_os_monitor *monitor; +struct rusage_global *rusage; static void handler_init(void) { @@ -279,13 +281,27 @@ static void monitor_init() monitor = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL); memset(monitor, 0, z * PAGE_SIZE); monitor->num_processors = num_processors; - monitor->ns_per_tsc = ihk_mc_get_ns_per_tsc(); phys = virt_to_phys(monitor); ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) + sizeof(struct ihk_os_cpu_monitor) * num_processors); #endif /* POSTK_DEBUG_TEMP_FIX_73 */ } +static void rusage_init() +{ + int npages; + unsigned long phys; + + npages = (sizeof(struct rusage_global) + PAGE_SIZE -1) >> PAGE_SHIFT; + rusage = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL); + memset(rusage, 0, npages * PAGE_SIZE); + rusage->num_processors = num_processors; + rusage->num_numa_nodes = ihk_mc_get_nr_numa_nodes(); + rusage->ns_per_tsc = ihk_mc_get_ns_per_tsc(); + phys = virt_to_phys(rusage); + ihk_set_rusage(phys, sizeof(struct rusage_global)); +} + int nmi_mode; static void nmi_init() @@ -310,6 +326,7 @@ static void rest_init(void) #ifndef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */ monitor_init(); #endif /* !POSTK_DEBUG_TEMP_FIX_73 */ + rusage_init(); cpu_local_var_init(); nmi_init(); time_init(); diff --git a/kernel/mem.c b/kernel/mem.c index 8fe450da..e9e86406 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/process.c b/kernel/process.c index 40e9ef27..013bc289 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -36,8 +36,8 @@ #include #include #include -#include -#include +#include +#include //#define DEBUG_PRINT_PROCESS @@ -1866,6 +1866,7 @@ retry: /*****/ if (ptep) { + //if(rusage_memory_stat_add_with_page(range, phys, pgsize, pgsize, page)) { if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) { /* on-demand paging, phys pages are obtained by ihk_mc_alloc_aligned_pages_user() or get_page() */ dkprintf("%lx+,%s: (on-demand paging && first map) || cow,calling memory_stat_rss_add(),phys=%lx,pgsize=%ld\n", diff --git a/kernel/shmobj.c b/kernel/shmobj.c index c6fabf5b..ff1595b3 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define ekprintf(...) kprintf(__VA_ARGS__) diff --git a/kernel/syscall.c b/kernel/syscall.c index cdb66260..f85dd74a 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -56,7 +56,8 @@ #include #include #include -#include +#include +#include #include #ifdef POSTK_DEBUG_ARCH_DEP_27 #include @@ -1064,10 +1065,10 @@ void terminate(int rc, int sig) /* rusage debug */ for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - dkprintf("memory_stat_rss[%d]=%ld\n", i, monitor->rusage_memory_stat_rss[i]); + dkprintf("memory_stat_rss[%d]=%ld\n", i, rusage->memory_stat_rss[i]); } for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - dkprintf("memory_stat_mapped_file[%d]=%ld\n", i, monitor->rusage_memory_stat_mapped_file[i]); + dkprintf("memory_stat_mapped_file[%d]=%ld\n", i, rusage->memory_stat_mapped_file[i]); } #ifdef DCFA_KMOD @@ -9811,13 +9812,13 @@ set_cputime(int mode) tsc_to_ts(dtsc, &dts); if(mode == 1){ thread->user_tsc += dtsc; - monitor->user_tsc += dtsc; + v->rusage->user_tsc += dtsc; ts_add(&thread->itimer_virtual_value, &dts); ts_add(&thread->itimer_prof_value, &dts); } else{ thread->system_tsc += dtsc; - monitor->system_tsc += dtsc; + v->rusage->system_tsc += dtsc; ts_add(&thread->itimer_prof_value, &dts); } } diff --git a/lib/abort.c b/lib/abort.c index 7fada65a..a3389bd6 100644 --- a/lib/abort.c +++ b/lib/abort.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include extern struct cpu_local_var *clv; diff --git a/lib/include/ihk/debug.h b/lib/include/ihk/debug.h index bf117b59..33168930 100644 --- a/lib/include/ihk/debug.h +++ b/lib/include/ihk/debug.h @@ -16,7 +16,7 @@ #include #include -#include +#include #ifdef POSTK_DEBUG_ARCH_DEP_9 /* want to add a static assertion */ diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 5b2aa70d..34b269a9 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -227,6 +227,7 @@ int ihk_set_kmsg(unsigned long addr, unsigned long size); char *ihk_get_kargs(); int ihk_set_monitor(unsigned long addr, unsigned long size); +int ihk_set_rusage(unsigned long addr, unsigned long size); int ihk_set_nmi_mode_addr(unsigned long addr); extern void (*__tlb_flush_handler)(int vector); diff --git a/lib/include/ihk/monitor.h b/lib/include/ihk/monitor.h new file mode 100644 index 00000000..18129c78 --- /dev/null +++ b/lib/include/ihk/monitor.h @@ -0,0 +1,8 @@ +#ifndef MONITOR_H_INCLUDED +#define MONITOR_H_INCLUDED + +#include + +extern struct ihk_os_monitor *monitor; + +#endif /* !defined(MONITOR_H_INCLUDED) */ diff --git a/lib/include/ihk/rusage.h b/lib/include/ihk/rusage.h deleted file mode 100644 index 83eecb48..00000000 --- a/lib/include/ihk/rusage.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef __IHK_RUSAGE_H -#define __IHK_RUSAGE_H - -#include -#include - -enum RUSAGE_MEMBER { - RUSAGE_RSS, - RUSAGE_CACHE, - RUSAGE_RSS_HUGE, - RUSAGE_MAPPED_FILE, - RUSAGE_MAX_USAGE, - RUSAGE_KMEM_USAGE, - RUSAGE_KMAX_USAGE, - RUSAGE_NUM_NUMA_NODES, - RUSAGE_NUMA_STAT, - RUSAGE_HUGETLB , - RUSAGE_HUGETLB_MAX , - RUSAGE_STAT_SYSTEM , - RUSAGE_STAT_USER , - RUSAGE_USAGE , - RUSAGE_USAGE_PER_CPU , - RUSAGE_NUM_THREADS , - RUSAGE_MAX_NUM_THREADS -}; - -struct r_data{ - unsigned long pid; - unsigned long rss; - unsigned long cache; - unsigned long rss_huge; - unsigned long mapped_file; - unsigned long max_usage; - unsigned long kmem_usage; - unsigned long kmax_usage; - unsigned long hugetlb; - unsigned long hugetlb_max; - unsigned long stat_system; - unsigned long stat_user; - unsigned long usage; - struct r_data *next; -} ; - -enum ihk_os_status { - IHK_STATUS_INACTIVE, - IHK_STATUS_BOOTING, - IHK_STATUS_RUNNING, - IHK_STATUS_SHUTDOWN, - IHK_STATUS_PANIC, - IHK_STATUS_HUNGUP, - IHK_STATUS_FREEZING, - IHK_STATUS_FROZEN, -}; - -enum sys_delegate_state_enum { - ENTER_KERNEL, - EXIT_KERNEL, -}; - -extern struct ihk_os_monitor *monitor; - -extern void ihk_mc_set_os_status(unsigned long st); -extern unsigned long ihk_mc_get_os_status(); - -#endif