rusage and ihklib: Fix out-of-memory reporting and cleanup
1. Fix OOM: Count memory usage only when allocation succeeded 2. Fix OOM: Make user allocation fail when memory is running out 3. Fix OOM: Move rusage_init() before numa_init() 4. Cleanup: Rename ihkconfig/ihkosctl functions 5. Cleanup: Pass event type to eventfd() 6. Cleanup: arch/.../rusage.h --> arch/.../arch_rusage.h
This commit is contained in:
@@ -3,6 +3,8 @@
|
||||
#ifndef __RUSAGE_H
|
||||
#define __RUSAGE_H
|
||||
|
||||
//#define RUSAGE_DEBUG
|
||||
|
||||
#define IHK_MAX_NUM_PGSIZES 4
|
||||
#define IHK_MAX_NUM_NUMA_NODES 1024
|
||||
#define IHK_MAX_NUM_CPUS 1024
|
||||
@@ -13,21 +15,28 @@ struct rusage_percpu {
|
||||
};
|
||||
|
||||
struct rusage_global {
|
||||
/* Memory usage accounting */
|
||||
long memory_stat_rss[IHK_MAX_NUM_PGSIZES];
|
||||
long memory_stat_mapped_file[IHK_MAX_NUM_PGSIZES];
|
||||
long rss_current; /* anon && user, used only for memory_max_usage */
|
||||
unsigned long memory_max_usage;
|
||||
unsigned long max_num_threads;
|
||||
unsigned long num_threads;
|
||||
long rss_current;
|
||||
unsigned long memory_kmem_usage;
|
||||
unsigned long memory_kmem_max_usage;
|
||||
unsigned long memory_numa_stat[IHK_MAX_NUM_NUMA_NODES];
|
||||
|
||||
/* CPU usage accounting */
|
||||
struct rusage_percpu cpu[IHK_MAX_NUM_CPUS]; /* clv[i].monitor = &cpu[i] */
|
||||
|
||||
/* OOM monitoring */
|
||||
unsigned long total_memory;
|
||||
unsigned long total_memory_usage;
|
||||
unsigned long total_memory_max_usage;
|
||||
|
||||
#ifdef RUSAGE_DEBUG
|
||||
unsigned long total_memory_max_usage_old; /* debug */
|
||||
#endif
|
||||
/* Used for translating results into struct mckernel_rusage */
|
||||
unsigned long num_numa_nodes;
|
||||
unsigned long num_processors;
|
||||
unsigned long ns_per_tsc;
|
||||
|
||||
@@ -8,18 +8,25 @@
|
||||
#include <ihk/atomic.h>
|
||||
#include <memobj.h>
|
||||
#include <rusage.h>
|
||||
#include <arch/rusage.h>
|
||||
#include <ihk/ihk_monitor.h>
|
||||
#include <arch_rusage.h>
|
||||
|
||||
#ifdef ENABLE_RUSAGE
|
||||
|
||||
#define RUSAGE_MEM_LIMIT (2 * 1024 * 1024) // 2MB
|
||||
#define RUSAGE_OOM_MARGIN (2 * 1024 * 1024) // 2MB
|
||||
|
||||
extern void eventfd();
|
||||
extern void eventfd(int type);
|
||||
|
||||
static inline void
|
||||
rusage_total_memory_add(unsigned long size)
|
||||
{
|
||||
#ifdef RUSAGE_DEBUG
|
||||
kprintf("%s: total_memory=%ld,size=%ld\n", __FUNCTION__, rusage->total_memory, size);
|
||||
#endif
|
||||
rusage->total_memory += size;
|
||||
#ifdef RUSAGE_DEBUG
|
||||
kprintf("%s: total_memory=%ld\n", __FUNCTION__, rusage->total_memory);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -220,6 +227,22 @@ rusage_numa_sub(int numa_id, unsigned long size)
|
||||
__sync_sub_and_fetch(rusage->memory_numa_stat + numa_id, size);
|
||||
}
|
||||
|
||||
static inline int
|
||||
rusage_check_oom(int numa_id, unsigned long pages, int is_user)
|
||||
{
|
||||
unsigned long size = pages * PAGE_SIZE;
|
||||
|
||||
if (rusage->total_memory_usage + size > rusage->total_memory - RUSAGE_OOM_MARGIN) {
|
||||
kprintf("%s: memory used:%ld available:%ld\n", __FUNCTION__, rusage->total_memory_usage, rusage->total_memory);
|
||||
eventfd(IHK_OS_EVENTFD_TYPE_OOM);
|
||||
if (is_user) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
rusage_page_add(int numa_id, unsigned long pages, int is_user)
|
||||
{
|
||||
@@ -228,6 +251,12 @@ rusage_page_add(int numa_id, unsigned long pages, int is_user)
|
||||
unsigned long oldval;
|
||||
unsigned long retval;
|
||||
|
||||
#ifdef RUSAGE_DEBUG
|
||||
if (numa_id < 0 || numa_id >= rusage->num_numa_nodes) {
|
||||
kprintf("%s: Error: invalid numa_id=%d\n", __FUNCTION__, numa_id);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (is_user)
|
||||
rusage_numa_add(numa_id, size);
|
||||
else
|
||||
@@ -239,10 +268,12 @@ rusage_page_add(int numa_id, unsigned long pages, int is_user)
|
||||
retval = __sync_val_compare_and_swap(&rusage->total_memory_max_usage,
|
||||
oldval, newval);
|
||||
if (retval == oldval) {
|
||||
if (rusage->total_memory - newval <
|
||||
RUSAGE_MEM_LIMIT) {
|
||||
eventfd();
|
||||
#ifdef RUSAGE_DEBUG
|
||||
if (rusage->total_memory_max_usage > rusage->total_memory_max_usage_old + (1 * (1ULL << 30))) {
|
||||
kprintf("%s: max(%ld) > old + 1GB,numa_id=%d\n", __FUNCTION__, rusage->total_memory_max_usage, numa_id);
|
||||
rusage->total_memory_max_usage_old = rusage->total_memory_max_usage;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
oldval = retval;
|
||||
@@ -253,7 +284,15 @@ static inline void
|
||||
rusage_page_sub(int numa_id, unsigned long pages, int is_user)
|
||||
{
|
||||
unsigned long size = pages * PAGE_SIZE;
|
||||
|
||||
#ifdef RUSAGE_DEBUG
|
||||
if (numa_id < 0 || numa_id >= rusage->num_numa_nodes) {
|
||||
kprintf("%s: Error: invalid numa_id=%d\n", __FUNCTION__, numa_id);
|
||||
return;
|
||||
}
|
||||
if (rusage->total_memory_usage < size) {
|
||||
kprintf("%s: Error, total_memory_usage=%ld,size=%ld\n", __FUNCTION__, rusage->total_memory_max_usage, size);
|
||||
}
|
||||
#endif
|
||||
__sync_sub_and_fetch(&rusage->total_memory_usage, size);
|
||||
|
||||
if (is_user)
|
||||
@@ -343,9 +382,15 @@ rusage_numa_sub(int numa_id, unsigned long size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int
|
||||
rusage_check_oom(int numa_id, unsigned long pages, int is_user)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void
|
||||
rusage_page_add(int numa_id, unsigned long size, int is_user)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
||||
@@ -268,6 +268,11 @@ struct ikc_scd_packet {
|
||||
enum mcctrl_os_cpu_operation op;
|
||||
void *resp;
|
||||
};
|
||||
|
||||
/* SCD_MSG_EVENTFD */
|
||||
struct {
|
||||
int eventfd_type;
|
||||
};
|
||||
};
|
||||
char padding[12];
|
||||
};
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include <cls.h>
|
||||
#include <syscall.h>
|
||||
#include <sysfs.h>
|
||||
#include <rusage_private.h>
|
||||
#include <ihk/monitor.h>
|
||||
|
||||
//#define IOCTL_FUNC_EXTENSION
|
||||
@@ -287,21 +286,6 @@ static void monitor_init()
|
||||
#endif /* POSTK_DEBUG_TEMP_FIX_73 */
|
||||
}
|
||||
|
||||
static void rusage_init()
|
||||
{
|
||||
int npages;
|
||||
unsigned long phys;
|
||||
|
||||
npages = (sizeof(struct rusage_global) + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
rusage = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL);
|
||||
memset(rusage, 0, npages * PAGE_SIZE);
|
||||
rusage->num_processors = num_processors;
|
||||
rusage->num_numa_nodes = ihk_mc_get_nr_numa_nodes();
|
||||
rusage->ns_per_tsc = ihk_mc_get_ns_per_tsc();
|
||||
phys = virt_to_phys(rusage);
|
||||
ihk_set_rusage(phys, sizeof(struct rusage_global));
|
||||
}
|
||||
|
||||
int nmi_mode;
|
||||
|
||||
static void nmi_init()
|
||||
@@ -326,7 +310,6 @@ static void rest_init(void)
|
||||
#ifndef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */
|
||||
monitor_init();
|
||||
#endif /* !POSTK_DEBUG_TEMP_FIX_73 */
|
||||
rusage_init();
|
||||
cpu_local_var_init();
|
||||
nmi_init();
|
||||
time_init();
|
||||
|
||||
92
kernel/mem.c
92
kernel/mem.c
@@ -80,6 +80,8 @@ static void *___ihk_mc_alloc_aligned_pages_node(int npages,
|
||||
static void *___ihk_mc_alloc_pages(int npages, ihk_mc_ap_flag flag, int is_user);
|
||||
static void ___ihk_mc_free_pages(void *p, int npages, int is_user);
|
||||
|
||||
extern unsigned long ihk_mc_get_ns_per_tsc(void);
|
||||
|
||||
/*
|
||||
* Page allocator tracking routines
|
||||
*/
|
||||
@@ -571,14 +573,22 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
|
||||
if (pref_node > -1 && pref_node < ihk_mc_get_nr_numa_nodes()) {
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
{
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[pref_node], npages, p2align);
|
||||
if (rusage_check_oom(pref_node, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[pref_node], npages, p2align);
|
||||
}
|
||||
#else
|
||||
list_for_each_entry(pa_allocator,
|
||||
&memory_nodes[pref_node].allocators, list) {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
if (rusage_check_oom(pref_node, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (pa) {
|
||||
rusage_page_add(pref_node, npages, is_user);
|
||||
dkprintf("%s: explicit (node: %d) CPU @ node %d allocated "
|
||||
"%d pages from node %d\n",
|
||||
__FUNCTION__,
|
||||
@@ -586,8 +596,6 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
|
||||
ihk_mc_get_numa_id(),
|
||||
npages, node);
|
||||
|
||||
rusage_page_add(pref_node, npages, is_user);
|
||||
|
||||
return phys_to_virt(pa);
|
||||
}
|
||||
else {
|
||||
@@ -617,23 +625,30 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
|
||||
numa_id = memory_nodes[node].nodes_by_distance[i].id;
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
{
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[memory_nodes[node].
|
||||
nodes_by_distance[i].id], npages, p2align);
|
||||
if (rusage_check_oom(numa_id, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[memory_nodes[node].
|
||||
nodes_by_distance[i].id], npages, p2align);
|
||||
}
|
||||
#else
|
||||
list_for_each_entry(pa_allocator,
|
||||
&memory_nodes[numa_id].allocators, list) {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
if (rusage_check_oom(numa_id, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (pa) {
|
||||
rusage_page_add(numa_id, npages, is_user);
|
||||
dkprintf("%s: policy: CPU @ node %d allocated "
|
||||
"%d pages from node %d\n",
|
||||
__FUNCTION__,
|
||||
ihk_mc_get_numa_id(),
|
||||
npages, node);
|
||||
|
||||
rusage_page_add(numa_id, npages,
|
||||
is_user);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -674,22 +689,31 @@ distance_based:
|
||||
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
{
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[memory_nodes[node].
|
||||
nodes_by_distance[i].id], npages, p2align);
|
||||
if (rusage_check_oom(numa_id, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[memory_nodes[node].
|
||||
nodes_by_distance[i].id], npages, p2align);
|
||||
}
|
||||
#else
|
||||
list_for_each_entry(pa_allocator,
|
||||
&memory_nodes[numa_id].allocators, list) {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
if (rusage_check_oom(numa_id, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if (pa) {
|
||||
rusage_page_add(numa_id, npages, is_user);
|
||||
dkprintf("%s: distance: CPU @ node %d allocated "
|
||||
"%d pages from node %d\n",
|
||||
__FUNCTION__,
|
||||
ihk_mc_get_numa_id(),
|
||||
npages,
|
||||
memory_nodes[node].nodes_by_distance[i].id);
|
||||
rusage_page_add(numa_id, npages, is_user);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -708,13 +732,22 @@ order_based:
|
||||
numa_id = (node + i) % ihk_mc_get_nr_numa_nodes();
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
{
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[(node + i) %
|
||||
ihk_mc_get_nr_numa_nodes()], npages, p2align);
|
||||
if (rusage_check_oom(numa_id, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_numa_alloc_pages(&memory_nodes[(node + i) %
|
||||
ihk_mc_get_nr_numa_nodes()], npages, p2align);
|
||||
}
|
||||
#else
|
||||
list_for_each_entry(pa_allocator,
|
||||
&memory_nodes[numa_id].allocators, list) {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
if (rusage_check_oom(numa_id, npages, is_user) == -ENOMEM) {
|
||||
pa = 0;
|
||||
} else {
|
||||
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (pa) {
|
||||
rusage_page_add(numa_id, npages, is_user);
|
||||
break;
|
||||
@@ -730,6 +763,7 @@ order_based:
|
||||
if(flag != IHK_MC_AP_NOWAIT)
|
||||
panic("Not enough space\n");
|
||||
*/
|
||||
dkprintf("OOM\n", __FUNCTION__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1256,13 +1290,13 @@ static void numa_init(void)
|
||||
#endif
|
||||
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
dkprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n",
|
||||
kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n",
|
||||
start, end,
|
||||
end - start,
|
||||
(end - start) >> PAGE_SHIFT,
|
||||
numa_id);
|
||||
#else
|
||||
dkprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n",
|
||||
kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n",
|
||||
start, end,
|
||||
ihk_pagealloc_count(allocator) * PAGE_SIZE,
|
||||
ihk_pagealloc_count(allocator),
|
||||
@@ -1659,6 +1693,22 @@ void ihk_mc_clean_micpa(void){
|
||||
}
|
||||
#endif
|
||||
|
||||
static void rusage_init()
|
||||
{
|
||||
int npages;
|
||||
unsigned long phys;
|
||||
|
||||
npages = (sizeof(struct rusage_global) + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
rusage = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL);
|
||||
memset(rusage, 0, npages * PAGE_SIZE);
|
||||
rusage->num_processors = num_processors;
|
||||
rusage->num_numa_nodes = ihk_mc_get_nr_numa_nodes();
|
||||
rusage->ns_per_tsc = ihk_mc_get_ns_per_tsc();
|
||||
phys = virt_to_phys(rusage);
|
||||
ihk_set_rusage(phys, sizeof(struct rusage_global));
|
||||
dkprintf("%s: rusage->total_memory=%ld\n", __FUNCTION__, rusage->total_memory);
|
||||
}
|
||||
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */
|
||||
extern void monitor_init(void);
|
||||
#endif /* POSTK_DEBUG_TEMP_FIX_73 */
|
||||
@@ -1667,6 +1717,10 @@ void mem_init(void)
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */
|
||||
monitor_init();
|
||||
#endif /* !POSTK_DEBUG_TEMP_FIX_73 */
|
||||
|
||||
/* It must precedes numa_init() because rusage->total_memory is initialized in numa_init() */
|
||||
rusage_init();
|
||||
|
||||
/* Initialize NUMA information and memory allocator bitmaps */
|
||||
numa_init();
|
||||
|
||||
|
||||
@@ -2638,6 +2638,19 @@ void release_thread(struct thread *thread)
|
||||
|
||||
release_process_vm(vm);
|
||||
rusage_num_threads_dec();
|
||||
|
||||
#ifdef RUSAGE_DEBUG
|
||||
if (rusage->num_threads == 0) {
|
||||
int i;
|
||||
kprintf("total_memory_usage=%ld\n", rusage->total_memory_usage);
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
kprintf("memory_stat_rss[%d]=%ld\n", i, rusage->memory_stat_rss[i]);
|
||||
}
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
kprintf("memory_stat_mapped_file[%d]=%ld\n", i, rusage->memory_stat_mapped_file[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock)
|
||||
@@ -3397,6 +3410,18 @@ void runq_add_thread(struct thread *thread, int cpu_id)
|
||||
procfs_create_thread(thread);
|
||||
|
||||
rusage_num_threads_inc();
|
||||
#ifdef RUSAGE_DEBUG
|
||||
if (rusage->num_threads == 1) {
|
||||
int i;
|
||||
kprintf("total_memory_usage=%ld\n", rusage->total_memory_usage);
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
kprintf("memory_stat_rss[%d]=%ld\n", i, rusage->memory_stat_rss[i]);
|
||||
}
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
kprintf("memory_stat_mapped_file[%d]=%ld\n", i, rusage->memory_stat_mapped_file[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Kick scheduler */
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */
|
||||
|
||||
@@ -1063,14 +1063,6 @@ void terminate(int rc, int sig)
|
||||
|
||||
dkprintf("terminate,pid=%d\n", proc->pid);
|
||||
|
||||
/* rusage debug */
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
dkprintf("memory_stat_rss[%d]=%ld\n", i, rusage->memory_stat_rss[i]);
|
||||
}
|
||||
for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) {
|
||||
dkprintf("memory_stat_mapped_file[%d]=%ld\n", i, rusage->memory_stat_mapped_file[i]);
|
||||
}
|
||||
|
||||
#ifdef DCFA_KMOD
|
||||
do_mod_exit(rc);
|
||||
#endif
|
||||
@@ -1147,8 +1139,7 @@ terminate_host(int pid)
|
||||
do_kill(cpu_local_var(current), pid, -1, SIGKILL, NULL, 0);
|
||||
}
|
||||
|
||||
void
|
||||
eventfd()
|
||||
void eventfd(int type)
|
||||
{
|
||||
struct ihk_ikc_channel_desc *syscall_channel;
|
||||
struct ikc_scd_packet pckt;
|
||||
@@ -1156,6 +1147,7 @@ eventfd()
|
||||
syscall_channel = get_cpu_local_var(0)->ikc2linux;
|
||||
memset(&pckt, '\0', sizeof pckt);
|
||||
pckt.msg = SCD_MSG_EVENTFD;
|
||||
pckt.eventfd_type = type;
|
||||
ihk_ikc_send(syscall_channel, &pckt, 0);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user