Merge tag '1.6.0-rc1' into master-1.6.0

Release target Nov 11, 2018

Conflicts:
	configure
This commit is contained in:
Masamichi Takagi
2018-11-08 10:49:38 +09:00
538 changed files with 45347 additions and 8629 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
*~
*.o
*.elf
*.bin

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "ihk"]
path = ihk
url = https://github.com/RIKEN-SysSoft/ihk.git

View File

@@ -30,6 +30,7 @@
#include <debug-monitors.h>
#include <sysreg.h>
#include <cpufeature.h>
#include <debug.h>
#ifdef POSTK_DEBUG_ARCH_DEP_65
#include <hwcap.h>
#endif /* POSTK_DEBUG_ARCH_DEP_65 */
@@ -39,16 +40,10 @@
#include "postk_print_sysreg.c"
#ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
struct cpuinfo_arm64 cpuinfo_data[NR_CPUS]; /* index is logical cpuid */
static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 };
@@ -1283,7 +1278,6 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
return 0;
}
#ifdef POSTK_DEBUG_ARCH_DEP_22
/*
* @ref.impl linux-linaro/arch/arm64/kernel/process.c::tls_thread_switch()
*/
@@ -1309,14 +1303,13 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
struct thread *last;
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
struct mcs_rwlock_node_irqsave lock;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
/* Set up new TLS.. */
dkprintf("[%d] arch_switch_context: tlsblock_base: 0x%lX\n",
ihk_mc_get_processor_id(), next->tlsblock_base);
#ifdef ENABLE_PERF
/* Performance monitoring inherit */
if(next->proc->monitoring_event) {
if(next->proc->perf_status == PP_RESET)
@@ -1326,10 +1319,10 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
perf_start(next->proc->monitoring_event);
}
}
#endif /*ENABLE_PERF*/
if (likely(prev)) {
tls_thread_switch(prev, next);
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock);
if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) {
switch (prev->proc->status) {
@@ -1343,11 +1336,12 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
break;
}
mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&prev->proc->parent->waitpid_q);
} else {
mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock);
}
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
}
@@ -1357,7 +1351,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
return last;
}
#endif /* POSTK_DEBUG_ARCH_DEP_22 */
/*@
@ requires \valid(thread);
@@ -1439,8 +1432,7 @@ void copy_fp_regs(struct thread *from, struct thread *to)
}
}
void
clear_fp_regs(struct thread *thread)
void clear_fp_regs(void)
{
if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) {
#ifdef CONFIG_ARM64_SVE
@@ -1477,7 +1469,7 @@ restore_fp_regs(struct thread *thread)
if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) {
if (!thread->fp_regs) {
// only clear fpregs.
clear_fp_regs(thread);
clear_fp_regs();
return;
}
thread_fpsimd_load(thread);

View File

@@ -9,20 +9,16 @@
#include <prctl.h>
#include <cpufeature.h>
#include <kmalloc.h>
#include <debug.h>
#include <process.h>
//#define DEBUG_PRINT_FPSIMD
#ifdef DEBUG_PRINT_FPSIMD
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
#ifdef CONFIG_ARM64_SVE
/* Maximum supported vector length across all CPUs (initially poisoned) */
@@ -73,9 +69,6 @@ static int get_nr_threads(struct process *proc)
return nr_threads;
}
extern void save_fp_regs(struct thread *thread);
extern void clear_fp_regs(struct thread *thread);
extern void restore_fp_regs(struct thread *thread);
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */
int sve_set_vector_length(struct thread *thread,
unsigned long vl, unsigned long flags)
@@ -129,7 +122,7 @@ int sve_set_vector_length(struct thread *thread,
/* for self at prctl syscall */
if (thread == cpu_local_var(current)) {
save_fp_regs(thread);
clear_fp_regs(thread);
clear_fp_regs();
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);

View File

@@ -7,6 +7,7 @@
#include <process.h>
#include <string.h>
#include <elfcore.h>
#include <debug.h>
#define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
@@ -14,11 +15,8 @@
//#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
/*

View File

@@ -6,6 +6,8 @@
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include "affinity.h"
#include <lwk/compiler.h>
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@@ -152,6 +154,8 @@ typedef struct mcs_lock_node {
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t;
typedef mcs_lock_node_t mcs_lock_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
@@ -602,4 +606,16 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif
}
static inline int irqflags_can_interrupt(unsigned long flags)
{
#ifdef CONFIG_HAS_NMI
#warning irqflags_can_interrupt needs testing/fixing on such a target
return flags > ICC_PMR_EL1_MASKED;
#else
// PSTATE.DAIF I bit clear means interrupt is possible
return !(flags & (1 << 7));
#endif
}
#endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */

View File

@@ -35,38 +35,4 @@ void arm64_disable_pmu(void);
int armv8pmu_init(struct arm_pmu* cpu_pmu);
/* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */
/*
* Generalized hardware cache events:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_NODE = 6,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
#endif

View File

@@ -9,6 +9,11 @@
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
static inline int valid_signal(unsigned long sig)
{
return sig <= _NSIG ? 1 : 0;
}
typedef unsigned long int __sigset_t;
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))

View File

@@ -114,14 +114,18 @@ SYSCALL_HANDLED(236, get_mempolicy)
SYSCALL_HANDLED(237, set_mempolicy)
SYSCALL_HANDLED(238, migrate_pages)
SYSCALL_HANDLED(239, move_pages)
#ifdef PERF_ENABLE
SYSCALL_HANDLED(241, perf_event_open)
#endif // PERF_ENABLE
SYSCALL_HANDLED(260, wait4)
SYSCALL_HANDLED(270, process_vm_readv)
SYSCALL_HANDLED(271, process_vm_writev)
#ifdef PERF_ENABLE
SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)
SYSCALL_HANDLED(604, pmc_reset)
#endif // PERF_ENABLE
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)

View File

@@ -7,15 +7,13 @@
#include <arch/cpu.h>
#include <memory.h>
#include <syscall.h>
#include <debug.h>
// #define DEBUG_GICV2
#ifdef DEBUG_GICV2
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
void *dist_base;

View File

@@ -7,17 +7,15 @@
#include <cputype.h>
#include <process.h>
#include <syscall.h>
#include <debug.h>
//#define DEBUG_GICV3
#define USE_CAVIUM_THUNDER_X
#ifdef DEBUG_GICV3
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#ifdef USE_CAVIUM_THUNDER_X

View File

@@ -14,9 +14,7 @@
#include <context.h>
#include <kmalloc.h>
#include <vdso.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#include <debug.h>
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
@@ -2924,17 +2922,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
return error;
}
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
if (!is_mckernel_memory(pa, pa + cpsize)) {
dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p,"
"cpsize: %d\n", __FUNCTION__, to, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(to, va, cpsize);
ihk_mc_unmap_virtual(va, 1, 1);
ihk_mc_unmap_virtual(va, 1);
}
else {
va = phys_to_virt(pa);
@@ -3007,17 +3000,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error;
}
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
if (!is_mckernel_memory(pa, pa + cpsize)) {
dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE);
memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1);
ihk_mc_unmap_virtual(va, 1);
}
else {
va = phys_to_virt(pa);
@@ -3078,17 +3066,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error;
}
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
if (!is_mckernel_memory(pa, pa + cpsize)) {
dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE);
memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1);
ihk_mc_unmap_virtual(va, 1);
}
else {
va = phys_to_virt(pa);

View File

@@ -93,21 +93,50 @@ int ihk_mc_perfctr_init(int counter, uint64_t config, int mode)
return ret;
}
int ihk_mc_perfctr_start(int counter)
int ihk_mc_perfctr_start(unsigned long counter_mask)
{
int ret;
ret = cpu_pmu.enable_counter(counter);
return ret;
int ret = 0;
int counter;
unsigned long counter_bit;
for (counter = 0, counter_bit = 1;
counter_bit < counter_mask;
counter++, counter_bit <<= 1) {
if (!(counter_mask & counter_bit))
continue;
ret = cpu_pmu.enable_counter(counter_mask);
if (ret < 0)
break;
}
return ret < 0 ? ret : 0;
}
int ihk_mc_perfctr_stop(int counter)
int ihk_mc_perfctr_stop(unsigned long counter_mask)
{
cpu_pmu.disable_counter(counter);
int ret = 0;
int counter;
unsigned long counter_bit;
// ihk_mc_perfctr_startが呼ばれるときには、
// init系関数が呼ばれるのでdisableにする。
cpu_pmu.disable_intens(counter);
return 0;
for (counter = 0, counter_bit = 1;
counter_bit < counter_mask;
counter++, counter_bit <<= 1) {
if (!(counter_mask & counter_bit))
continue;
ret = cpu_pmu.disable_counter(counter);
if (ret < 0)
break;
// ihk_mc_perfctr_startが呼ばれるときには、
// init系関数が呼ばれるのでdisableにする。
ret = cpu_pmu.disable_intens(counter);
if (ret < 0)
break;
}
return ret < 0 ? ret : 0;
}
int ihk_mc_perfctr_reset(int counter)

View File

@@ -4,16 +4,14 @@
#include <ihk/perfctr.h>
#include <errno.h>
#include <ihk/debug.h>
#include <debug.h>
#define BIT(nr) (1UL << (nr))
//#define DEBUG_PRINT_PMU
#ifdef DEBUG_PRINT_PMU
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif

View File

@@ -21,15 +21,13 @@
#include <ihk/debug.h>
#include <compiler.h>
#include <lwk/compiler.h>
#include <debug.h>
//#define DEBUG_PRINT_PSCI
#ifdef DEBUG_PRINT_PSCI
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1

View File

@@ -11,22 +11,17 @@
#include <hwcap.h>
#include <string.h>
#include <thread_info.h>
#include <debug.h>
//#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC
#define dkprintf kprintf
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
extern void save_debugreg(unsigned long *debugreg);
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern int interrupt_from_user(void *);
@@ -959,11 +954,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
}
thread->exit_status = sig;
/* Transition thread state */
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_TRACED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
@@ -982,10 +973,6 @@ void ptrace_report_signal(struct thread *thread, int sig)
info._sifields._sigchld.si_pid = thread->tid;
info._sifields._sigchld.si_status = thread->exit_status;
do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake parent (if sleeping in wait4()) */
waitq_wakeup(&proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
dkprintf("ptrace_report_signal,sleeping\n");
/* Sleep */

View File

@@ -14,6 +14,8 @@
#include <prctl.h>
#include <limits.h>
#include <syscall.h>
#include <uio.h>
#include <debug.h>
extern void ptrace_report_signal(struct thread *thread, int sig);
extern void clear_single_step(struct thread *thread);
@@ -27,18 +29,12 @@ static void __check_signal(unsigned long rc, void *regs, int num, int irq_disabl
//#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC
#define dkprintf kprintf
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
uintptr_t debug_constants[] = {
sizeof(struct cpu_local_var),
offsetof(struct cpu_local_var, current),
@@ -59,7 +55,7 @@ static int cpuid_head = 1;
extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set) {
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
int min_queue_len = -1;
int i, min_cpu = -1;
@@ -1177,19 +1173,10 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
/* Reap and set new signal_flags */
proc->signal_flags = SIGNAL_STOP_STOPPED;
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_STOPPED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_STOPPED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_STOPPED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n",
proc->pid, thread->tid);
/* Sleep */
@@ -1206,19 +1193,10 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
/* Update thread state in fork tree */
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
thread->exit_status = SIGTRAP;
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_TRACED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_TRACED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&thread->proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
/* Sleep */
dkprintf("do_signal,SIGTRAP,sleeping\n");
@@ -1594,7 +1572,7 @@ done:
return 0;
}
if (tthread->thread_offloaded) {
if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) {
interrupt_syscall(tthread, sig);
release_thread(tthread);
return 0;
@@ -1729,7 +1707,7 @@ SYSCALL_DECLARE(mmap)
| MAP_NONBLOCK // 0x10000
;
const intptr_t addr0 = ihk_mc_syscall_arg0(ctx);
const uintptr_t addr0 = ihk_mc_syscall_arg0(ctx);
const size_t len0 = ihk_mc_syscall_arg1(ctx);
const int prot = ihk_mc_syscall_arg2(ctx);
const int flags0 = ihk_mc_syscall_arg3(ctx);
@@ -1738,7 +1716,7 @@ SYSCALL_DECLARE(mmap)
struct thread *thread = cpu_local_var(current);
struct vm_regions *region = &thread->vm->region;
int error;
intptr_t addr = 0;
uintptr_t addr = 0;
size_t len;
int flags = flags0;
size_t pgsize;

View File

@@ -14,15 +14,13 @@
#include <ihk/debug.h>
#include <ikc/queue.h>
#include <vdso.h>
#include <debug.h>
//#define DEBUG_PRINT_VDSO
#ifdef DEBUG_PRINT_VDSO
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52

View File

@@ -1,5 +1,7 @@
/* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <affinity.h>
#include <arch-memory.h>
#include <time.h>
#include <syscall.h>
#include <registers.h>

View File

@@ -9,29 +9,29 @@ PHDRS
SECTIONS
{
. = SIZEOF_HEADERS;
. = ALIGN(4096);
. = ALIGN(4096);
.text : {
*(.text)
*(.text)
} :text
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
. = ALIGN(8);
.bss : {
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@@ -31,6 +31,7 @@
#include <prctl.h>
#include <page.h>
#include <kmalloc.h>
#include <debug.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@@ -69,11 +70,8 @@
//#define DEBUG_PRINT_CPU
#ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
static void *lapic_vp;
@@ -96,6 +94,8 @@ int gettime_local_support = 0;
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
extern int kprintf(const char *format, ...);
extern int interrupt_from_user(void *);
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
static struct idt_entry{
uint32_t desc[4];
@@ -847,9 +847,6 @@ void setup_x86_ap(void (*next_func)(void))
}
void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long, void *, int);
void check_sig_pending();
extern void tlb_flush_handler(int vector);
void __show_stack(uintptr_t *sp) {
@@ -877,7 +874,7 @@ void interrupt_exit(struct x86_user_context *regs)
cpu_enable_interrupt();
check_sig_pending();
check_need_resched();
check_signal(0, regs, 0);
check_signal(0, regs, -1);
}
else {
check_sig_pending();
@@ -1010,6 +1007,12 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
set_cputime(interrupt_from_user(regs)? 0: 1);
--v->in_interrupt;
/* for migration by IPI */
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
}
}
void gpe_handler(struct x86_user_context *regs)
@@ -1644,12 +1647,10 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
return 0;
}
#ifdef POSTK_DEBUG_ARCH_DEP_22
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
struct thread *arch_switch_context(struct thread *prev, struct thread *next)
{
struct thread *last;
struct mcs_rwlock_node_irqsave lock;
dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n",
ihk_mc_get_processor_id(), next->tlsblock_base);
@@ -1668,7 +1669,7 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
}
#ifdef PROFILE_ENABLE
if (prev->profile && prev->profile_start_ts != 0) {
if (prev && prev->profile && prev->profile_start_ts != 0) {
prev->profile_elapsed_ts +=
(rdtsc() - prev->profile_start_ts);
prev->profile_start_ts = 0;
@@ -1680,6 +1681,28 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
#endif
if (prev) {
mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock);
if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) {
switch (prev->proc->status) {
case PS_DELAY_STOPPED:
prev->proc->status = PS_STOPPED;
break;
case PS_DELAY_TRACED:
prev->proc->status = PS_TRACED;
break;
default:
break;
}
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&prev->proc->parent->waitpid_q);
} else {
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
}
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
}
else {
@@ -1687,7 +1710,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
}
return last;
}
#endif
/*@
@ requires \valid(thread);
@@ -1762,14 +1784,6 @@ void copy_fp_regs(struct thread *from, struct thread *to)
}
}
#ifdef POSTK_DEBUG_TEMP_FIX_19
void
clear_fp_regs(struct thread *thread)
{
return;
}
#endif /* POSTK_DEBUG_TEMP_FIX_19 */
/*@
@ requires \valid(thread);
@ assigns thread->fp_regs;
@@ -1777,8 +1791,11 @@ clear_fp_regs(struct thread *thread)
void
restore_fp_regs(struct thread *thread)
{
if (!thread->fp_regs)
if (!thread->fp_regs) {
// only clear fpregs.
clear_fp_regs();
return;
}
if (xsave_available) {
unsigned int low, high;
@@ -1797,6 +1814,13 @@ restore_fp_regs(struct thread *thread)
//release_fp_regs(thread);
}
void clear_fp_regs(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
restore_fp_regs(&v->idle);
}
ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
{
ihk_mc_user_context_t *uctx = thread->uctx;

View File

@@ -6,6 +6,7 @@
#include <process.h>
#include <string.h>
#include <elfcore.h>
#include <debug.h>
#define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
@@ -13,13 +14,16 @@
//#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
/* Exclude reserved (mckernel's internal use), device file,
* hole created by mprotect
*/
#define GENCORE_RANGE_IS_INACCESSIBLE(range) \
((range->flag & (VR_RESERVED | VR_MEMTYPE_UC | VR_DONTDUMP)))
/*
* Generate a core file image, which consists of many chunks.
* Returns an allocated table, an etnry of which is a pair of the address
@@ -309,12 +313,10 @@ int gencore(struct thread *thread, void *regs,
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
mckernel's internal use. */
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DONTDUMP)
if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
continue;
}
/* We need a chunk for each page for a demand paging area.
This can be optimized for spacial complexity but we would
lose simplicity instead. */
@@ -403,8 +405,9 @@ int gencore(struct thread *thread, void *regs,
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
if (range->flag & VR_RESERVED)
if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
continue;
}
ph[i].p_type = PT_LOAD;
ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0)
@@ -446,8 +449,9 @@ int gencore(struct thread *thread, void *regs,
unsigned long phys;
if (range->flag & VR_RESERVED)
if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
continue;
}
if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */
unsigned long p, start, phys;

View File

@@ -64,12 +64,13 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return oldval;
}
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = encoded_op & 0xfff;
int oldval = 0, ret, tem;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))

View File

@@ -6,6 +6,7 @@
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include <lwk/compiler.h>
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@@ -14,7 +15,17 @@
int __kprintf(const char *format, ...);
#endif
typedef int ihk_spinlock_t;
typedef unsigned short __ticket_t;
typedef unsigned int __ticketpair_t;
typedef struct ihk_spinlock {
union {
__ticketpair_t head_tail;
struct __raw_tickets {
__ticket_t head, tail;
} tickets;
};
} ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
@@ -23,9 +34,61 @@ extern void preempt_disable(void);
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = 0;
lock->head_tail = 0;
}
#define SPIN_LOCK_UNLOCKED { .head_tail = 0 }
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", ihk_mc_get_processor_id()); rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
ihk_spinlock_t cur = { .head_tail = lock->head_tail };
ihk_spinlock_t next = { .tickets.head = cur.tickets.head, .tickets.tail = cur.tickets.tail + 2 };
int success;
if (cur.tickets.head != cur.tickets.tail) {
return 0;
}
preempt_disable();
/* Use the same increment amount as other functions! */
success = __sync_bool_compare_and_swap((__ticketpair_t*)lock, cur.head_tail, next.head_tail);
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock, int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
}
#define SPIN_LOCK_UNLOCKED 0
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
@@ -39,40 +102,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
#if 0
asm volatile("lock ; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
"jmp 1b\n"
"2:"
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
#endif
register struct __raw_tickets inc = { .tail = 0x0002 };
preempt_disable();
asm volatile("lock; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (*lock), "=&r" (tmp)
:
: "memory", "cc");
asm volatile ("lock xaddl %0, %1\n"
: "+r" (inc), "+m" (*(lock)) : : "memory", "cc");
if (inc.head == inc.tail)
goto out;
for (;;) {
if (*((volatile __ticket_t *)&lock->tickets.head) == inc.tail)
goto out;
cpu_pause();
}
out:
barrier(); /* make sure nothing creeps before the lock is taken */
}
#ifdef DEBUG_SPINLOCK
@@ -106,7 +153,10 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
#endif
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
__ticket_t inc = 0x0002;
asm volatile ("lock addw %1, %0\n"
: "+m" (lock->tickets.head) : "ri" (inc) : "memory", "cc");
preempt_enable();
}
@@ -602,4 +652,9 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif
}
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !!(flags & 0x200);
}
#endif

View File

@@ -42,16 +42,34 @@
#define USER_END 0x0000800000000000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
/*
* Canonical negative addresses (i.e., the smallest kernel virtual address)
* on x86 64 bit mode (in its most restricted 48 bit format) starts from
* 0xffff800000000000, but Linux starts mapping physical memory at 0xffff880000000000.
* The 0x80000000000 long gap (8TBs, i.e., 16 PGD level entries in the page tables)
* is used for Xen hyervisor (see arch/x86/include/asm/page.h) and that is
* what we utilize for McKernel.
* This gives us the benefit of being able to use Linux kernel virtual
* addresses identically as in Linux.
*
* NOTE: update these also in eclair.c when modified!
*/
#define MAP_ST_START 0xffff800000000000UL
#define MAP_VMAP_START 0xfffff00000000000UL
#define MAP_FIXED_START 0xffffffff70000000UL
#define MAP_KERNEL_START 0xffffffff80000000UL
#define MAP_VMAP_START 0xffff850000000000UL
#define MAP_FIXED_START 0xffff860000000000UL
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
/*
* MAP_KERNEL_START is 8MB below MODULES_END in Linux.
* Placing the LWK image in the virtual address space at the end of
* the Linux modules section enables us to map the LWK TEXT in Linux
* as well, so that Linux can also call into LWK text.
*/
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
#define STACK_TOP(region) ((region)->user_end)
#define MAP_VMAP_SIZE 0x0000000100000000UL
#define KERNEL_PHYS_OFFSET MAP_ST_START
#define PTL4_SHIFT 39
#define PTL4_SIZE (1UL << PTL4_SHIFT)
#define PTL3_SHIFT 30

View File

@@ -114,7 +114,7 @@ SYSCALL_HANDLED(160, setrlimit)
SYSCALL_HANDLED(164, settimeofday)
SYSCALL_HANDLED(186, gettid)
SYSCALL_HANDLED(200, tkill)
SYSCALL_DELEGATED(201, time)
SYSCALL_HANDLED(201, time)
SYSCALL_HANDLED(202, futex)
SYSCALL_HANDLED(203, sched_setaffinity)
SYSCALL_HANDLED(204, sched_getaffinity)
@@ -161,6 +161,7 @@ SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system)
SYSCALL_HANDLED(733, util_register_desc)
/* McKernel Specific */
SYSCALL_HANDLED(801, swapout)

View File

@@ -107,9 +107,17 @@ void init_boot_processor_local(void)
@ ensures \result == %gs;
@ assigns \nothing;
*/
extern int num_processors;
int ihk_mc_get_processor_id(void)
{
int id;
void *gs;
gs = (void *)rdmsr(MSR_GS_BASE);
if (gs < (void *)locals ||
gs > ((void *)locals + LOCALS_SPAN * num_processors)) {
return -1;
}
asm volatile("movl %%gs:0, %0" : "=r"(id));

View File

@@ -25,15 +25,13 @@
#include <cls.h>
#include <kmalloc.h>
#include <rusage_private.h>
#include <debug.h>
//#define DEBUG
#ifdef DEBUG
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
static char *last_page;
@@ -41,6 +39,8 @@ extern char _head[], _end[];
extern unsigned long x86_kernel_phys_base;
int safe_kernel_map = 0;
/* Arch specific early allocation routine */
void *early_alloc_pages(int nr_pages)
{
@@ -109,6 +109,7 @@ struct page_table {
};
static struct page_table *init_pt;
static int init_pt_loaded = 0;
static ihk_spinlock_t init_pt_lock;
static int use_1gb_page = 0;
@@ -167,30 +168,6 @@ static unsigned long setup_l3(struct page_table *pt,
return virt_to_phys(pt);
}
static void init_normal_area(struct page_table *pt)
{
unsigned long map_start, map_end, phys, pt_phys;
int ident_index, virt_index;
map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0);
map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0);
kprintf("map_start = %lx, map_end = %lx\n", map_start, map_end);
ident_index = map_start >> PTL4_SHIFT;
virt_index = (MAP_ST_START >> PTL4_SHIFT) & (PT_ENTRIES - 1);
memset(pt, 0, sizeof(struct page_table));
for (phys = (map_start & ~(PTL4_SIZE - 1)); phys < map_end;
phys += PTL4_SIZE) {
pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL), phys,
map_start, map_end);
pt->entry[ident_index++] = pt_phys | PFL4_PDIR_ATTR;
pt->entry[virt_index++] = pt_phys | PFL4_PDIR_ATTR;
}
}
static struct page_table *__alloc_new_pt(ihk_mc_ap_flag ap_flag)
{
struct page_table *newpt = ihk_mc_alloc_pages(1, ap_flag);
@@ -258,6 +235,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
}
}
#define PTLX_SHIFT(index) PTL ## index ## _SHIFT
#define GET_VIRT_INDEX(virt, index, dest) \
dest = ((virt) >> PTLX_SHIFT(index)) & (PT_ENTRIES - 1)
#define GET_VIRT_INDICES(virt, l4i, l3i, l2i, l1i) \
l4i = ((virt) >> PTL4_SHIFT) & (PT_ENTRIES - 1); \
l3i = ((virt) >> PTL3_SHIFT) & (PT_ENTRIES - 1); \
@@ -1518,12 +1500,12 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
if (page) {
dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1);
}
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) &&
!(args->memobj->flags & MF_ZEROFILL)) {
if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL1_SIZE) &&
args->memobj && !(args->memobj->flags & MF_ZEROFILL)) {
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
}
if (!(old & PFL1_FILEOFF)) {
if (!pte_is_fileoff(&old, PTL1_SIZE)) {
if(args->free_physical) {
if (!page) {
/* Anonymous || !XPMEM attach */
@@ -1585,11 +1567,11 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
page = phys_to_page(phys);
}
if (page && page_is_in_memobj(page) && (old & PFL2_DIRTY)) {
if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL2_SIZE)) {
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
}
if (!(old & PFL2_FILEOFF)) {
if (!pte_is_fileoff(&old, PTL2_SIZE)) {
if(args->free_physical) {
if (!page) {
/* Anonymous || !XPMEM attach */
@@ -1666,13 +1648,13 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
page = phys_to_page(phys);
}
if (page && page_is_in_memobj(page) && (old & PFL3_DIRTY)) {
if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL3_SIZE)) {
memobj_flush_page(args->memobj, phys, PTL3_SIZE);
}
dkprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old));
if (!(old & PFL3_FILEOFF)) {
if (!pte_is_fileoff(&old, PTL3_SIZE)) {
if(args->free_physical) {
if (!page) {
/* Anonymous || !XPMEM attach */
@@ -2540,6 +2522,82 @@ static void init_fixed_area(struct page_table *pt)
return;
}
static void init_normal_area(struct page_table *pt)
{
unsigned long map_start, map_end, phys;
void *virt;
map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0);
map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0);
virt = (void *)MAP_ST_START + map_start;
kprintf("map_start = %lx, map_end = %lx, virt %lx\n",
map_start, map_end, virt);
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) {
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
kprintf("%s: error setting mapping for 0x%lx\n",
__func__, virt);
}
virt += LARGE_PAGE_SIZE;
}
}
static void init_linux_kernel_mapping(struct page_table *pt)
{
unsigned long map_start, map_end, phys;
void *virt;
int nr_memory_chunks, chunk_id, numa_id;
/* In case of safe_kernel_map option (safe_kernel_map == 1),
* processing to prevent destruction of the memory area on Linux side
* is executed */
if (safe_kernel_map == 0) {
kprintf("Straight-map entire physical memory\n");
/* Map 2 TB for now */
map_start = 0;
map_end = 0x20000000000;
virt = (void *)LINUX_PAGE_OFFSET;
kprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
LINUX_PAGE_OFFSET, LINUX_PAGE_OFFSET + map_end, 0, map_end);
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) {
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
kprintf("%s: error setting mapping for 0x%lx\n", __FUNCTION__, virt);
}
virt += LARGE_PAGE_SIZE;
}
} else {
kprintf("Straight-map physical memory areas allocated to McKernel\n");
nr_memory_chunks = ihk_mc_get_nr_memory_chunks();
if (nr_memory_chunks == 0) {
kprintf("%s: ERROR: No memory chunk available.\n", __FUNCTION__);
return;
}
for (chunk_id = 0; chunk_id < nr_memory_chunks; chunk_id++) {
if (ihk_mc_get_memory_chunk(chunk_id, &map_start, &map_end, &numa_id)) {
kprintf("%s: ERROR: Memory chunk id (%d) out of range.\n", __FUNCTION__, chunk_id);
continue;
}
dkprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
LINUX_PAGE_OFFSET + map_start, LINUX_PAGE_OFFSET + map_end, map_start, map_end);
virt = (void *)(LINUX_PAGE_OFFSET + map_start);
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE, virt += LARGE_PAGE_SIZE) {
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
kprintf("%s: set_pt_large_page() failed for 0x%lx\n", __FUNCTION__, virt);
}
}
}
}
}
void init_text_area(struct page_table *pt)
{
unsigned long __end, phys, virt;
@@ -2624,17 +2682,19 @@ void init_page_table(void)
init_pt = ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL);
ihk_mc_spinlock_init(&init_pt_lock);
memset(init_pt, 0, sizeof(PAGE_SIZE));
memset(init_pt, 0, sizeof(*init_pt));
/* Normal memory area */
init_normal_area(init_pt);
init_linux_kernel_mapping(init_pt);
init_fixed_area(init_pt);
init_low_area(init_pt);
init_text_area(init_pt);
init_vsyscall_area(init_pt);
load_page_table(init_pt);
kprintf("Page table is now at %p\n", init_pt);
init_pt_loaded = 1;
kprintf("Page table is now at 0x%lx\n", init_pt);
}
extern void __reserve_arch_pages(unsigned long, unsigned long,
@@ -2664,15 +2724,31 @@ unsigned long virt_to_phys(void *v)
unsigned long va = (unsigned long)v;
if (va >= MAP_KERNEL_START) {
dkprintf("%s: MAP_KERNEL_START <= 0x%lx <= LINUX_PAGE_OFFSET\n",
__FUNCTION__, va);
return va - MAP_KERNEL_START + x86_kernel_phys_base;
} else {
}
else if (va >= LINUX_PAGE_OFFSET) {
return va - LINUX_PAGE_OFFSET;
}
else if (va >= MAP_FIXED_START) {
return va - MAP_FIXED_START;
}
else {
dkprintf("%s: MAP_ST_START <= 0x%lx <= MAP_FIXED_START\n",
__FUNCTION__, va);
return va - MAP_ST_START;
}
}
void *phys_to_virt(unsigned long p)
{
return (void *)(p + MAP_ST_START);
/* Before loading our own PT use straight mapping */
if (!init_pt_loaded) {
return (void *)(p + MAP_ST_START);
}
return (void *)(p + LINUX_PAGE_OFFSET);
}
int copy_from_user(void *dst, const void *src, size_t siz)
@@ -2840,17 +2916,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
return error;
}
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
if (!is_mckernel_memory(pa, pa + cpsize)) {
dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p,"
"cpsize: %d\n", __FUNCTION__, to, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(to, va, cpsize);
ihk_mc_unmap_virtual(va, 1, 1);
ihk_mc_unmap_virtual(va, 1);
}
else {
va = phys_to_virt(pa);
@@ -2924,17 +2995,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error;
}
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
if (!is_mckernel_memory(pa, pa + cpsize)) {
dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1);
ihk_mc_unmap_virtual(va, 1);
}
else {
va = phys_to_virt(pa);
@@ -2995,17 +3061,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error;
}
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
if (!is_mckernel_memory(pa, pa + cpsize)) {
dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1);
ihk_mc_unmap_virtual(va, 1);
}
else {
va = phys_to_virt(pa);

View File

@@ -30,7 +30,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE)
mikc_queue_pages = ((4 * num_processors * MASTER_IKCQ_PKTSIZE)
+ (PAGE_SIZE - 1)) / PAGE_SIZE;
/* Place both sides in this side */

View File

@@ -16,20 +16,16 @@
#include <registers.h>
#include <mc_perf_event.h>
#include <config.h>
#include <debug.h>
extern unsigned int *x86_march_perfmap;
extern int running_on_kvm(void);
#ifdef POSTK_DEBUG_TEMP_FIX_31
int ihk_mc_perfctr_fixed_init(int counter, int mode);
#endif/*POSTK_DEBUG_TEMP_FIX_31*/
//#define PERFCTR_DEBUG
#ifdef PERFCTR_DEBUG
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define X86_CR4_PCE 0x00000100
@@ -43,11 +39,11 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode);
} \
} while(0)
int perf_counters_discovered = 0;
int X86_IA32_NUM_PERF_COUNTERS = 0;
unsigned long X86_IA32_PERF_COUNTERS_MASK = 0;
int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0;
unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0;
int perf_counters_discovered;
int NUM_PERF_COUNTERS;
unsigned long PERF_COUNTERS_MASK;
int NUM_FIXED_PERF_COUNTERS;
unsigned long FIXED_PERF_COUNTERS_MASK;
void x86_init_perfctr(void)
{
@@ -78,17 +74,17 @@ void x86_init_perfctr(void)
op = 0x0a;
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op));
X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1;
NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
PERF_COUNTERS_MASK = (1 << NUM_PERF_COUNTERS) - 1;
X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
X86_IA32_FIXED_PERF_COUNTERS_MASK =
((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) <<
X86_IA32_BASE_FIXED_PERF_COUNTERS;
NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
FIXED_PERF_COUNTERS_MASK =
((1UL << NUM_FIXED_PERF_COUNTERS) - 1) <<
BASE_FIXED_PERF_COUNTERS;
perf_counters_discovered = 1;
kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n",
X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS);
kprintf("NUM_PERF_COUNTERS: %d, NUM_FIXED_PERF_COUNTERS: %d\n",
NUM_PERF_COUNTERS, NUM_FIXED_PERF_COUNTERS);
}
/* Clear Fixed Counter Control */
@@ -97,20 +93,20 @@ void x86_init_perfctr(void)
wrmsr(MSR_PERF_FIXED_CTRL, value);
/* Clear Generic Counter Control */
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
for (i = 0; i < NUM_PERF_COUNTERS; i++) {
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
}
/* Enable PMC Control */
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value |= X86_IA32_PERF_COUNTERS_MASK;
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK;
value |= PERF_COUNTERS_MASK;
value |= FIXED_PERF_COUNTERS_MASK;
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
}
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
{
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
if (counter < 0 || counter >= NUM_PERF_COUNTERS) {
return -EINVAL;
}
@@ -149,13 +145,14 @@ static int set_pmc_x86_direct(int counter, long val)
val &= 0x000000ffffffffff; // 40bit Mask
cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
if (cnt_bit & PERF_COUNTERS_MASK) {
// set generic pmc
wrmsr(MSR_IA32_PMC0 + counter, val);
}
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) {
// set fixed pmc
wrmsr(MSR_IA32_FIXED_CTR0 + counter - X86_IA32_BASE_FIXED_PERF_COUNTERS, val);
wrmsr(MSR_IA32_FIXED_CTR0 +
counter - BASE_FIXED_PERF_COUNTERS, val);
}
else {
return -EINVAL;
@@ -175,10 +172,10 @@ static int set_fixed_counter(int counter, int mode)
{
unsigned long value = 0;
unsigned int ctr_mask = 0xf;
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
int counter_idx = counter - BASE_FIXED_PERF_COUNTERS;
unsigned int set_val = 0;
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
if (counter_idx < 0 || counter_idx >= NUM_FIXED_PERF_COUNTERS) {
return -EINVAL;
}
@@ -208,14 +205,13 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode)
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode)
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
{
#ifdef POSTK_DEBUG_TEMP_FIX_31
// PAPI_REF_CYC counted by fixed counter
if (counter >= X86_IA32_BASE_FIXED_PERF_COUNTERS) {
if (counter >= BASE_FIXED_PERF_COUNTERS &&
counter < BASE_FIXED_PERF_COUNTERS + NUM_FIXED_PERF_COUNTERS) {
return ihk_mc_perfctr_fixed_init(counter, mode);
}
#endif /*POSTK_DEBUG_TEMP_FIX_31*/
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
if (counter < 0 || counter >= NUM_PERF_COUNTERS) {
return -EINVAL;
}
@@ -248,7 +244,7 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
}
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
if (counter < 0 || counter >= NUM_PERF_COUNTERS) {
return -EINVAL;
}
if (type < 0 || type >= PERFCTR_MAX_TYPE) {
@@ -300,18 +296,11 @@ int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
extern void x86_march_perfctr_start(unsigned long counter_mask);
#endif
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_start(int counter)
#else
int ihk_mc_perfctr_start(unsigned long counter_mask)
#endif /*POSTK_DEBUG_TEMP_FIX_30*/
{
int ret = 0;
unsigned long value = 0;
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
#ifdef POSTK_DEBUG_TEMP_FIX_30
unsigned long counter_mask = 1UL << counter;
#endif /*POSTK_DEBUG_TEMP_FIX_30*/
unsigned long mask = PERF_COUNTERS_MASK | FIXED_PERF_COUNTERS_MASK;
PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL);
@@ -328,18 +317,11 @@ int ihk_mc_perfctr_start(unsigned long counter_mask)
goto fn_exit;
}
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_stop(int counter)
#else
int ihk_mc_perfctr_stop(unsigned long counter_mask)
#endif/*POSTK_DEBUG_TEMP_FIX_30*/
{
int ret = 0;
unsigned long value;
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
#ifdef POSTK_DEBUG_TEMP_FIX_30
unsigned long counter_mask = 1UL << counter;
#endif/*POSTK_DEBUG_TEMP_FIX_30*/
unsigned long mask = PERF_COUNTERS_MASK | FIXED_PERF_COUNTERS_MASK;
PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL);
@@ -376,10 +358,10 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode)
{
unsigned long value = 0;
unsigned int ctr_mask = 0xf;
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
int counter_idx = counter - BASE_FIXED_PERF_COUNTERS;
unsigned int set_val = 0;
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
if (counter_idx < 0 || counter_idx >= NUM_FIXED_PERF_COUNTERS) {
return -EINVAL;
}
@@ -420,7 +402,7 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
{
int i, j;
for (i = 0, j = 0; i < X86_IA32_NUM_PERF_COUNTERS && counter_mask;
for (i = 0, j = 0; i < NUM_PERF_COUNTERS && counter_mask;
i++, counter_mask >>= 1) {
if (counter_mask & 1) {
value[j++] = rdpmc(i);
@@ -440,13 +422,14 @@ unsigned long ihk_mc_perfctr_read(int counter)
cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
if (cnt_bit & PERF_COUNTERS_MASK) {
// read generic pmc
retval = rdpmc(counter);
}
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) {
// read fixed pmc
retval = rdpmc((1 << 30) + (counter - X86_IA32_BASE_FIXED_PERF_COUNTERS));
retval = rdpmc((1 << 30) +
(counter - BASE_FIXED_PERF_COUNTERS));
}
else {
retval = -EINVAL;
@@ -468,12 +451,12 @@ unsigned long ihk_mc_perfctr_read_msr(int counter)
cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
if (cnt_bit & PERF_COUNTERS_MASK) {
// read generic pmc
idx = MSR_IA32_PMC0 + counter;
retval = (unsigned long) rdmsr(idx);
}
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) {
// read fixed pmc
idx = MSR_IA32_FIXED_CTR0 + counter;
retval = (unsigned long) rdmsr(idx);
@@ -506,8 +489,8 @@ int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsi
}
// find avail generic counter
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
if(!(pmc_status & (1 << i))) {
for (i = 0; i < NUM_PERF_COUNTERS; i++) {
if (!(pmc_status & (1 << i))) {
ret = i;
break;
}

View File

@@ -31,12 +31,11 @@
#include <page.h>
#include <limits.h>
#include <syscall.h>
#include <debug.h>
void terminate_mcexec(int, int);
extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact);
long syscall(int num, ihk_mc_user_context_t *ctx);
void set_signal(int sig, void *regs0, siginfo_t *info);
void check_signal(unsigned long rc, void *regs0, int num);
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long);
extern int get_xsave_size();
@@ -45,11 +44,8 @@ extern uint64_t get_xsave_mask();
//#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC
#define dkprintf kprintf
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
uintptr_t debug_constants[] = {
@@ -92,33 +88,45 @@ static ptrdiff_t vdso_offset;
extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set) {
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
int min_queue_len = -1;
int cpu, min_cpu = -1;
int cpu, min_cpu = -1, uti_cpu = -1;
unsigned long irqstate;
irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock);
/* Find the first allowed core with the shortest run queue */
for (cpu = 0; cpu < num_processors; ++cpu) {
struct cpu_local_var *v;
unsigned long irqstate;
if (!CPU_ISSET(cpu, cpu_set)) continue;
v = get_cpu_local_var(cpu);
irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
if (min_queue_len == -1 || v->runq_len < min_queue_len) {
min_queue_len = v->runq_len;
ihk_mc_spinlock_lock_noirq(&v->runq_lock);
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved);
if (min_queue_len == -1 || v->runq_len + v->runq_reserved < min_queue_len) {
min_queue_len = v->runq_len + v->runq_reserved;
min_cpu = cpu;
}
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
/* Record the last tie CPU */
if (min_cpu != cpu && v->runq_len + v->runq_reserved == min_queue_len) {
uti_cpu = cpu;
}
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved, min_cpu, uti_cpu);
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
#if 0
if (min_queue_len == 0)
break;
#endif
}
min_cpu = use_last ? uti_cpu : min_cpu;
if (min_cpu != -1) {
if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED)
get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED;
__sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, 1);
}
ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate);
return min_cpu;
}
@@ -251,7 +259,7 @@ SYSCALL_DECLARE(rt_sigreturn)
info.si_code = TRAP_TRACE;
set_signal(SIGTRAP, regs, &info);
check_need_resched();
check_signal(0, regs, 0);
check_signal(0, regs, -1);
}
if(ksigsp.fpregs && xsavesize){
@@ -276,7 +284,6 @@ SYSCALL_DECLARE(rt_sigreturn)
}
extern struct cpu_local_var *clv;
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern void interrupt_syscall(struct thread *, int sig);
extern void terminate(int, int);
extern int num_processors;
@@ -530,23 +537,32 @@ void ptrace_report_signal(struct thread *thread, int sig)
dkprintf("ptrace_report_signal, tid=%d, pid=%d\n", thread->tid, thread->proc->pid);
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
if(!(proc->ptrace & PT_TRACED)){
if (!(thread->ptrace & PT_TRACED)) {
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
return;
}
thread->exit_status = sig;
/* Transition thread state */
proc->status = PS_TRACED;
thread->exit_status = sig;
thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
proc->signal_flags |= SIGNAL_STOP_STOPPED;
} else {
proc->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
parent_pid = proc->parent->pid;
thread->ptrace &= ~PT_TRACE_SYSCALL;
save_debugreg(thread->ptrace_debugreg);
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
thread->signal_flags |= SIGNAL_STOP_STOPPED;
}
else {
thread->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
if (thread == proc->main_thread) {
proc->status = PS_DELAY_TRACED;
parent_pid = proc->parent->pid;
}
else {
parent_pid = thread->report_proc->pid;
waitq_wakeup(&thread->report_proc->waitpid_q);
}
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
memset(&info, '\0', sizeof info);
@@ -555,8 +571,6 @@ void ptrace_report_signal(struct thread *thread, int sig)
info._sifields._sigchld.si_pid = thread->tid;
info._sifields._sigchld.si_status = thread->exit_status;
do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0);
/* Wake parent (if sleeping in wait4()) */
waitq_wakeup(&proc->parent->waitpid_q);
dkprintf("ptrace_report_signal,sleeping\n");
/* Sleep */
@@ -569,9 +583,8 @@ ptrace_arch_prctl(int pid, long code, long addr)
{
long rc = -EIO;
struct thread *child;
struct mcs_rwlock_node_irqsave lock;
child = find_thread(pid, pid, &lock);
child = find_thread(pid, pid);
if (!child)
return -ESRCH;
if (child->proc->status & (PS_TRACED | PS_STOPPED)) {
@@ -613,7 +626,7 @@ ptrace_arch_prctl(int pid, long code, long addr)
break;
}
}
thread_unlock(child, &lock);
thread_unlock(child);
return rc;
}
@@ -635,11 +648,13 @@ arch_ptrace(long request, int pid, long addr, long data)
static int
isrestart(int num, unsigned long rc, int sig, int restart)
{
if(sig == SIGKILL || sig == SIGSTOP)
if (sig == SIGKILL || sig == SIGSTOP)
return 0;
if(num == 0 || rc != -EINTR)
if (num < 0 || rc != -EINTR)
return 0;
switch(num){
if (sig == SIGCHLD)
return 1;
switch (num) {
case __NR_pause:
case __NR_rt_sigsuspend:
case __NR_rt_sigtimedwait:
@@ -660,14 +675,12 @@ isrestart(int num, unsigned long rc, int sig, int restart)
case __NR_io_getevents:
return 0;
}
if(sig == SIGCHLD)
return 1;
if(restart)
if (restart)
return 1;
return 0;
}
void
int
do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int num)
{
struct x86_user_context *regs = regs0;
@@ -679,14 +692,15 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
int ptraceflag = 0;
struct mcs_rwlock_node_irqsave lock;
struct mcs_rwlock_node_irqsave mcs_rw_node;
int restart = 0;
for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1);
dkprintf("do_signal(): tid=%d, pid=%d, sig=%d\n", thread->tid, proc->pid, sig);
orgsig = sig;
if((proc->ptrace & PT_TRACED) &&
pending->ptracecont == 0 &&
sig != SIGKILL) {
if ((thread->ptrace & PT_TRACED) &&
pending->ptracecont == 0 &&
sig != SIGKILL) {
ptraceflag = 1;
sig = SIGSTOP;
}
@@ -707,7 +721,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
if(k->sa.sa_handler == SIG_IGN){
kfree(pending);
mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node);
return;
goto out;
}
else if(k->sa.sa_handler){
unsigned long *usp; /* user stack */
@@ -757,9 +771,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
memcpy(&ksigsp.sigstack, &thread->sigstack, sizeof(stack_t));
ksigsp.sigrc = rc;
ksigsp.num = num;
ksigsp.restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART);
if(num != 0 && rc == -EINTR && sig == SIGCHLD)
ksigsp.restart = 1;
restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART);
ksigsp.restart = restart;
if(xsavesize){
uint64_t xsave_mask = get_xsave_mask();
unsigned int low = (unsigned int)xsave_mask;
@@ -772,7 +785,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
kfree(_kfpregs);
kprintf("do_signal,no space available\n");
terminate(0, sig);
return;
goto out;
}
kfpregs = (void *)((((unsigned long)_kfpregs) + 63) & ~63);
memset(kfpregs, '\0', xsavesize);
@@ -782,7 +795,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
kfree(_kfpregs);
kprintf("do_signal,write_process_vm failed\n");
terminate(0, sig);
return;
goto out;
}
ksigsp.fpregs = (void *)fpregs;
kfree(_kfpregs);
@@ -794,7 +807,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node);
kprintf("do_signal,write_process_vm failed\n");
terminate(0, sig);
return;
goto out;
}
usp = (unsigned long *)sigsp;
@@ -824,12 +837,13 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
info.si_code = TRAP_TRACE;
set_signal(SIGTRAP, regs, &info);
check_need_resched();
check_signal(0, regs, 0);
check_signal(0, regs, -1);
}
}
else {
int coredumped = 0;
siginfo_t info;
int ptc = pending->ptracecont;
if(ptraceflag){
if(thread->ptrace_recvsig)
@@ -856,25 +870,37 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
info.si_code = CLD_STOPPED;
info._sifields._sigchld.si_pid = thread->proc->pid;
info._sifields._sigchld.si_status = (sig << 8) | 0x7f;
do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0);
dkprintf("do_signal,SIGSTOP,changing state\n");
if (ptc == 2 &&
thread != thread->proc->main_thread) {
thread->signal_flags =
SIGNAL_STOP_STOPPED;
thread->status = PS_STOPPED;
thread->exit_status = SIGSTOP;
do_kill(thread,
thread->report_proc->pid, -1,
SIGCHLD, &info, 0);
waitq_wakeup(
&thread->report_proc->waitpid_q);
}
else {
/* Update thread state in fork tree */
mcs_rwlock_writer_lock(
&proc->update_lock, &lock);
proc->group_exit_status = SIGSTOP;
/* Update thread state in fork tree */
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
proc->group_exit_status = SIGSTOP;
/* Reap and set new signal_flags */
proc->main_thread->signal_flags =
SIGNAL_STOP_STOPPED;
/* Reap and set new signal_flags */
proc->signal_flags = SIGNAL_STOP_STOPPED;
proc->status = PS_DELAY_STOPPED;
thread->status = PS_STOPPED;
mcs_rwlock_writer_unlock(
&proc->update_lock, &lock);
proc->status = PS_STOPPED;
thread->status = PS_STOPPED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&proc->parent->waitpid_q);
dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n",
proc->pid, thread->tid);
do_kill(thread,
thread->proc->parent->pid, -1,
SIGCHLD, &info, 0);
}
/* Sleep */
schedule();
dkprintf("SIGSTOP(): woken up\n");
@@ -882,19 +908,28 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
break;
case SIGTRAP:
dkprintf("do_signal,SIGTRAP\n");
if(!(proc->ptrace & PT_TRACED)) {
if (!(thread->ptrace & PT_TRACED)) {
goto core;
}
/* Update thread state in fork tree */
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
thread->exit_status = SIGTRAP;
proc->status = PS_TRACED;
thread->status = PS_TRACED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&thread->proc->parent->waitpid_q);
if (thread == proc->main_thread) {
mcs_rwlock_writer_lock(&proc->update_lock,
&lock);
proc->group_exit_status = SIGTRAP;
proc->status = PS_DELAY_TRACED;
mcs_rwlock_writer_unlock(&proc->update_lock,
&lock);
do_kill(thread, thread->proc->parent->pid, -1,
SIGCHLD, &info, 0);
}
else {
do_kill(thread, thread->report_proc->pid, -1,
SIGCHLD, &info, 0);
waitq_wakeup(&thread->report_proc->waitpid_q);
}
/* Sleep */
dkprintf("do_signal,SIGTRAP,sleeping\n");
@@ -909,7 +944,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
info._sifields._sigchld.si_pid = proc->pid;
info._sifields._sigchld.si_status = 0x0000ffff;
do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0);
proc->signal_flags = SIGNAL_STOP_CONTINUED;
proc->main_thread->signal_flags = SIGNAL_STOP_CONTINUED;
proc->status = PS_RUNNING;
dkprintf("do_signal,SIGCONT,do nothing\n");
break;
@@ -938,6 +973,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
break;
}
}
out:
return restart;
}
static struct sig_pending *
@@ -957,10 +994,12 @@ getsigpending(struct thread *thread, int delflag){
lock = &thread->sigcommon->lock;
head = &thread->sigcommon->sigpending;
for(;;) {
if (delflag)
if (delflag) {
mcs_rwlock_writer_lock(lock, &mcs_rw_node);
else
}
else {
mcs_rwlock_reader_lock(lock, &mcs_rw_node);
}
list_for_each_entry_safe(pending, next, head, list){
for(x = pending->sigmask.__val[0], sig = 0; x; sig++, x >>= 1);
@@ -973,19 +1012,23 @@ getsigpending(struct thread *thread, int delflag){
if(delflag)
list_del(&pending->list);
if (delflag)
if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
else
}
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
return pending;
}
}
}
if (delflag)
if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
else
}
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
if(lock == &thread->sigpendinglock)
return NULL;
@@ -1000,6 +1043,11 @@ getsigpending(struct thread *thread, int delflag){
struct sig_pending *
hassigpending(struct thread *thread)
{
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
return NULL;
}
return getsigpending(thread, 0);
}
@@ -1017,6 +1065,12 @@ void save_syscall_return_value(int num, unsigned long rc)
return;
}
/** \brief check arrived signals and processing
*
* @param rc return value of syscall
* @param regs0 context
* @param num syscall number (-1: Not called on exiting system call)
*/
void
check_signal(unsigned long rc, void *regs0, int num)
{
@@ -1050,6 +1104,11 @@ check_signal(unsigned long rc, void *regs0, int num)
goto out;
}
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
goto out;
}
for(;;){
pending = getsigpending(thread, 1);
if(!pending) {
@@ -1057,7 +1116,9 @@ check_signal(unsigned long rc, void *regs0, int num)
goto out;
}
do_signal(rc, regs, thread, pending, num);
if (do_signal(rc, regs, thread, pending, num)) {
num = -1;
}
}
out:
@@ -1137,7 +1198,7 @@ check_sig_pending_thread(struct thread *thread)
}
void
check_sig_pending()
check_sig_pending(void)
{
struct thread *thread;
struct cpu_local_var *v;
@@ -1158,7 +1219,7 @@ repeat:
continue;
}
if (thread->proc->exit_status & 0x0000000100000000L) {
if (thread->proc->group_exit_status & 0x0000000100000000L) {
continue;
}
@@ -1367,7 +1428,8 @@ done:
return 0;
}
if (tthread->thread_offloaded) {
/* Forward signal to Linux by interrupt_syscall mechanism */
if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) {
if (!tthread->proc->nohost) {
interrupt_syscall(tthread, sig);
}
@@ -1384,10 +1446,10 @@ done:
in check_signal */
rc = 0;
k = tthread->sigcommon->action + sig - 1;
if((sig != SIGKILL && (tproc->ptrace & PT_TRACED)) ||
(k->sa.sa_handler != (void *)1 &&
(k->sa.sa_handler != NULL ||
(sig != SIGCHLD && sig != SIGURG)))){
if ((sig != SIGKILL && (tthread->ptrace & PT_TRACED)) ||
(k->sa.sa_handler != (void *)1 &&
(k->sa.sa_handler != NULL ||
(sig != SIGCHLD && sig != SIGURG)))) {
struct sig_pending *pending = NULL;
if (sig < 33) { // SIGRTMIN - SIGRTMAX
list_for_each_entry(pending, head, list){
@@ -1471,7 +1533,7 @@ set_signal(int sig, void *regs0, siginfo_t *info)
SYSCALL_DECLARE(mmap)
{
const int supported_flags = 0
const unsigned int supported_flags = 0
| MAP_SHARED // 01
| MAP_PRIVATE // 02
| MAP_FIXED // 10
@@ -1479,7 +1541,7 @@ SYSCALL_DECLARE(mmap)
| MAP_LOCKED // 2000
| MAP_POPULATE // 8000
| MAP_HUGETLB // 00040000
| (0x3F << MAP_HUGE_SHIFT) // FC000000
| (0x3FU << MAP_HUGE_SHIFT) // FC000000
;
const int ignored_flags = 0
#ifdef USE_NOCACHE_MMAP
@@ -1498,7 +1560,7 @@ SYSCALL_DECLARE(mmap)
| MAP_NONBLOCK // 00010000
;
const intptr_t addr0 = ihk_mc_syscall_arg0(ctx);
const uintptr_t addr0 = ihk_mc_syscall_arg0(ctx);
const size_t len0 = ihk_mc_syscall_arg1(ctx);
const int prot = ihk_mc_syscall_arg2(ctx);
const int flags0 = ihk_mc_syscall_arg3(ctx);
@@ -1507,7 +1569,7 @@ SYSCALL_DECLARE(mmap)
struct thread *thread = cpu_local_var(current);
struct vm_regions *region = &thread->vm->region;
int error;
intptr_t addr = 0;
uintptr_t addr = 0;
size_t len;
int flags = flags0;
size_t pgsize;
@@ -1699,6 +1761,11 @@ SYSCALL_DECLARE(arch_prctl)
ihk_mc_syscall_arg1(ctx));
}
SYSCALL_DECLARE(time)
{
return time();
}
static int vdso_get_vdso_info(void)
{
int error;
@@ -2081,7 +2148,7 @@ int do_process_vm_read_writev(int pid,
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov,
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
(uintptr_t)(local_iov + liovcnt));
if (!range) {
ret = -EFAULT;
@@ -2090,7 +2157,7 @@ int do_process_vm_read_writev(int pid,
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)remote_iov,
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
(uintptr_t)(remote_iov + riovcnt));
if (!range) {
ret = -EFAULT;
@@ -2366,8 +2433,6 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
@@ -2387,41 +2452,38 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
case 1:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 1:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
mpsr->nodes_ready = 1;
break;
default:
break;
}
}
else if (nr_cpus >= 4 && nr_cpus < 8) {
else if (nr_cpus >= 4 && nr_cpus < 7) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
break;
case 1:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 2:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 3:
case 2:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
break;
case 3:
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
@@ -2431,7 +2493,7 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
break;
}
}
else if (nr_cpus >= 8) {
else {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
@@ -2443,28 +2505,23 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
sizeof(void *) * (count / 2));
break;
case 2:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 3:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 4:
case 3:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
break;
case 5:
case 4:
memset(mpsr->status, 0, sizeof(int) * count);
break;
case 6:
case 5:
memset(mpsr->nr_pages, 0, sizeof(int) * count);
break;
case 7:
case 6:
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
break;
default:
break;
}
@@ -2672,11 +2729,19 @@ out:
time_t time(void) {
struct syscall_request sreq IHK_DMA_ALIGN;
struct thread *thread = cpu_local_var(current);
time_t ret;
sreq.number = __NR_time;
sreq.args[0] = (uintptr_t)NULL;
ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid);
struct timespec ats;
time_t ret = 0;
if (gettime_local_support) {
calculate_time_from_tsc(&ats);
ret = ats.tv_sec;
}
else {
sreq.number = __NR_time;
sreq.args[0] = (uintptr_t)NULL;
ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id());
}
return ret;
}

View File

@@ -31,51 +31,6 @@ struct tod_data_s tod_data
.version = IHK_ATOMIC64_INIT(0),
};
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("pause" ::: "memory");
return;
} /* cpu_pause_for_vsyscall() */
static inline void calculate_time_from_tsc(struct timespec *ts)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data.origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data.version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data.clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
/ tod_data.clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
} /* calculate_time_from_tsc() */
int vsyscall_gettimeofday(struct timeval *tv, void *tz)
{
int error;

View File

@@ -45,11 +45,12 @@ fi
turbo=""
ihk_irq=""
safe_kernel_map=""
umask_old=`umask`
idle_halt=""
allow_oversubscribe=""
while getopts :tk:c:m:o:f:r:q:i:d:e:hO OPT
while getopts stk:c:m:o:f:r:q:i:d:e:hO OPT
do
case ${OPT} in
f) facility=${OPTARG}
@@ -62,6 +63,8 @@ do
;;
m) mem=${OPTARG}
;;
s) safe_kernel_map="safe_kernel_map"
;;
r) ikc_map=${OPTARG}
;;
q) ihk_irq=${OPTARG}
@@ -78,8 +81,8 @@ do
;;
O) allow_oversubscribe="allow_oversubscribe"
;;
*) echo "invalid option -${OPT}" >&2
exit 1
\?) exit 1
;;
esac
done
@@ -232,7 +235,7 @@ if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
enable_mcoverlay="yes"
fi
else
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 862 ]; then
enable_mcoverlay="yes"
fi
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
@@ -446,7 +449,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $safe_kernel_map $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then
echo "error: setting kernel arguments" >&2
error_exit "os_created"
fi

View File

@@ -54,48 +54,6 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to address of kernel symbol __vvar_page, or 0 if exported */
#undef MCCTRL_KSYM___vvar_page
/* Define to address of kernel symbol hpet_address, or 0 if exported */
#undef MCCTRL_KSYM_hpet_address
/* Define to address of kernel symbol hv_clock, or 0 if exported */
#undef MCCTRL_KSYM_hv_clock
/* Define to address of kernel symbol sys_mount, or 0 if exported */
#undef MCCTRL_KSYM_sys_mount
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
#undef MCCTRL_KSYM_sys_readlink
/* Define to address of kernel symbol sys_umount, or 0 if exported */
#undef MCCTRL_KSYM_sys_umount
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
#undef MCCTRL_KSYM_sys_unshare
/* Define to address of kernel symbol vdso_end, or 0 if exported */
#undef MCCTRL_KSYM_vdso_end
/* Define to address of kernel symbol vdso_image_64, or 0 if exported */
#undef MCCTRL_KSYM_vdso_image_64
/* Define to address of kernel symbol vdso_pages, or 0 if exported */
#undef MCCTRL_KSYM_vdso_pages
/* Define to address of kernel symbol vdso_spec, or 0 if exported */
#undef MCCTRL_KSYM_vdso_spec
/* Define to address of kernel symbol vdso_start, or 0 if exported */
#undef MCCTRL_KSYM_vdso_start
/* Define to address of kernel symbol walk_page_range, or 0 if exported */
#undef MCCTRL_KSYM_walk_page_range
/* Define to address of kernel symbol zap_page_range, or 0 if exported */
#undef MCCTRL_KSYM_zap_page_range
/* McKernel specific headers */
#undef MCKERNEL_INCDIR
@@ -128,3 +86,6 @@
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* whether or not syscall_intercept library is linked */
#undef WITH_SYSCALL_INTERCEPT

622
configure vendored
View File

@@ -628,9 +628,12 @@ IHK_RELEASE_DATE
DCFA_VERSION
MCKERNEL_VERSION
IHK_VERSION
WITH_SYSCALL_INTERCEPT
ENABLE_QLMPI
ENABLE_RUSAGE
ENABLE_MCOVERLAYFS
LDFLAGS_SYSCALL_INTERCEPT
CPPFLAGS_SYSCALL_INTERCEPT
MANDIR
KERNDIR
KMODDIR
@@ -702,6 +705,9 @@ enable_option_checking
with_mpi
with_mpi_include
with_mpi_lib
with_syscall_intercept
with_syscall_intercept_include
with_syscall_intercept_lib
with_kernelsrc
with_target
with_system_map
@@ -1346,6 +1352,15 @@ Optional Packages:
--with-mpi-include=PATH specify path where mpi include directory can be
found
--with-mpi-lib=PATH specify path where mpi lib directory can be found
--with-syscall_intercept=PATH
specify path where syscall_intercept include
directory and lib directory can be found
--with-syscall_intercept-include=PATH
specify path where syscall_intercept include
directory can be found
--with-syscall_intercept-lib=PATH
specify path where syscall_intercept lib directory
can be found
--with-kernelsrc=path Path to 'kernel src', default is
/lib/modules/uname_r/build
--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}
@@ -2082,6 +2097,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
IHK_VERSION=1.5.1
MCKERNEL_VERSION=1.5.1
DCFA_VERSION=DCFA_VERSION_m4
@@ -3513,6 +3530,195 @@ fi
# Check whether --with-syscall_intercept was given.
if test "${with_syscall_intercept+set}" = set; then :
withval=$with_syscall_intercept; case "$withval" in #(
yes|no|'') :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept=PATH expects a valid PATH" >&5
$as_echo "$as_me: WARNING: --without-syscall_intercept=PATH expects a valid PATH" >&2;}
with_syscall_intercept="" ;; #(
*) :
;;
esac
else
with_syscall_intercept=
fi
# Check whether --with-syscall_intercept-include was given.
if test "${with_syscall_intercept_include+set}" = set; then :
withval=$with_syscall_intercept_include; case "$withval" in #(
yes|no|'') :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept-include=PATH expects a valid PATH" >&5
$as_echo "$as_me: WARNING: --without-syscall_intercept-include=PATH expects a valid PATH" >&2;}
with_syscall_intercept_include="" ;; #(
*) :
;;
esac
fi
# Check whether --with-syscall_intercept-lib was given.
if test "${with_syscall_intercept_lib+set}" = set; then :
withval=$with_syscall_intercept_lib; case "$withval" in #(
yes|no|'') :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept-lib=PATH expects a valid PATH" >&5
$as_echo "$as_me: WARNING: --without-syscall_intercept-lib=PATH expects a valid PATH" >&2;}
with_syscall_intercept_lib="" ;; #(
*) :
;;
esac
fi
# The args have been sanitized into empty/non-empty values above.
# Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options
# taking priority
if test -n "${with_syscall_intercept_include}"; then :
if echo "$CPPFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-I${with_syscall_intercept_include}\>" >/dev/null 2>&1; then :
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') contains '-I${with_syscall_intercept_include}', not appending" >&5
else
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') does not contain '-I${with_syscall_intercept_include}', appending" >&5
CPPFLAGS_SYSCALL_INTERCEPT="$CPPFLAGS_SYSCALL_INTERCEPT -I${with_syscall_intercept_include}"
fi
else
if test -n "${with_syscall_intercept}"; then :
if echo "$CPPFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-I${with_syscall_intercept}/include\>" >/dev/null 2>&1; then :
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') contains '-I${with_syscall_intercept}/include', not appending" >&5
else
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') does not contain '-I${with_syscall_intercept}/include', appending" >&5
CPPFLAGS_SYSCALL_INTERCEPT="$CPPFLAGS_SYSCALL_INTERCEPT -I${with_syscall_intercept}/include"
fi
fi
fi
if test -n "${with_syscall_intercept_lib}"; then :
if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}\>" >/dev/null 2>&1; then :
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}', not appending" >&5
else
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}', appending" >&5
LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}"
fi
else
if test -n "${with_syscall_intercept}"; then :
if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib\>" >/dev/null 2>&1; then :
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib', not appending" >&5
else
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib', appending" >&5
LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib"
fi
if test -d "${with_syscall_intercept}/lib64"; then :
if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64\>" >/dev/null 2>&1; then :
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64', not appending" >&5
else
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64', appending" >&5
LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64"
fi
fi
fi
fi
if test -n "${with_syscall_intercept}" || test -n "${with_syscall_intercept_include}" || test -n "${with_syscall_intercept_lib}"; then :
WITH_SYSCALL_INTERCEPT=yes
else
WITH_SYSCALL_INTERCEPT=no
fi
if test "x$WITH_SYSCALL_INTERCEPT" == "xno" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for syscall_no_intercept in -lsyscall_intercept" >&5
$as_echo_n "checking for syscall_no_intercept in -lsyscall_intercept... " >&6; }
if ${ac_cv_lib_syscall_intercept_syscall_no_intercept+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lsyscall_intercept -lcapstone -ldl $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char syscall_no_intercept ();
int
main ()
{
return syscall_no_intercept ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_syscall_intercept_syscall_no_intercept=yes
else
ac_cv_lib_syscall_intercept_syscall_no_intercept=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_syscall_intercept_syscall_no_intercept" >&5
$as_echo "$ac_cv_lib_syscall_intercept_syscall_no_intercept" >&6; }
if test "x$ac_cv_lib_syscall_intercept_syscall_no_intercept" = xyes; then :
syscall_intercept_lib_found=yes
else
syscall_intercept_lib_found=no
fi
if test "x$syscall_intercept_lib_found" != "xyes"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: libsyscall_intercept.so not found" >&5
$as_echo "$as_me: libsyscall_intercept.so not found" >&6;}
fi
ac_fn_c_check_header_mongrel "$LINENO" "libsyscall_intercept_hook_point.h" "ac_cv_header_libsyscall_intercept_hook_point_h" "$ac_includes_default"
if test "x$ac_cv_header_libsyscall_intercept_hook_point_h" = xyes; then :
syscall_intercept_header_found=yes
else
syscall_intercept_header_found=no
fi
if test "x$syscall_intercept_header_found" != "xyes"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: libsyscall_intercept_hook_point.h not found" >&5
$as_echo "$as_me: libsyscall_intercept_hook_point.h not found" >&6;}
fi
if test "x$syscall_intercept_lib_found" == "xyes" && test "x$syscall_intercept_header_found" == "xyes"; then :
WITH_SYSCALL_INTERCEPT=yes
else
WITH_SYSCALL_INTERCEPT=no
fi
fi
# Check whether --with-kernelsrc was given.
if test "${with_kernelsrc+set}" = set; then :
withval=$with_kernelsrc; WITH_KERNELSRC=$withval
@@ -4396,399 +4602,6 @@ KDIR="$WITH_KERNELSRC"
UNAME_R="$WITH_UNAME_R"
TARGET="$WITH_TARGET"
MCCTRL_LINUX_SYMTAB=""
case "X$WITH_SYSTEM_MAP" in
Xyes | Xno | X)
MCCTRL_LINUX_SYMTAB=""
;;
*)
MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP"
;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for System.map" >&5
$as_echo_n "checking for System.map... " >&6; }
if test -r "$MCCTRL_LINUX_SYMTAB"; then
MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB"
elif test -r "/boot/System.map-`uname -r`"; then
MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`"
elif test -r "$KDIR/System.map"; then
MCCTRL_LINUX_SYMTAB="$KDIR/System.map"
fi
if test "$MCCTRL_LINUX_SYMTAB" == ""; then
as_fn_error $? "could not find" "$LINENO" 5
fi
if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then
as_fn_error $? "could not read System.map file, no read permission?" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MCCTRL_LINUX_SYMTAB" >&5
$as_echo "$MCCTRL_LINUX_SYMTAB" >&6; }
MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB"
# MCCTRL_FIND_KSYM(SYMBOL)
# ------------------------------------------------------
# Search System.map for address of the given symbol and
# do one of three things in config.h:
# If not found, leave MCCTRL_KSYM_foo undefined
# If found to be exported, "#define MCCTRL_KSYM_foo 0"
# If found not to be exported, "#define MCCTRL_KSYM_foo 0x<value>"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_mount" >&5
$as_echo_n "checking System.map for symbol sys_mount... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_mount\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_mount\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_mount $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5
$as_echo_n "checking System.map for symbol sys_umount... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_umount $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_unshare\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_unshare $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol zap_page_range" >&5
$as_echo_n "checking System.map for symbol zap_page_range... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " zap_page_range\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_zap_page_range\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_zap_page_range $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_image_64" >&5
$as_echo_n "checking System.map for symbol vdso_image_64... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_image_64\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_image_64\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_image_64 $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_start" >&5
$as_echo_n "checking System.map for symbol vdso_start... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_start\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_start\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_start $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_end" >&5
$as_echo_n "checking System.map for symbol vdso_end... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_end\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_end\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_end $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_pages" >&5
$as_echo_n "checking System.map for symbol vdso_pages... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_pages\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_pages\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_pages $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol __vvar_page" >&5
$as_echo_n "checking System.map for symbol __vvar_page... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __vvar_page\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab___vvar_page\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM___vvar_page $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hpet_address" >&5
$as_echo_n "checking System.map for symbol hpet_address... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " hpet_address\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_hpet_address\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_hpet_address $mcctrl_addr
_ACEOF
fi
# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_spec" >&5
$as_echo_n "checking System.map for symbol vdso_spec... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_spec\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_spec\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_spec $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hv_clock" >&5
$as_echo_n "checking System.map for symbol hv_clock... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " hv_clock\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_hv_clock\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_hv_clock $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_readlink" >&5
$as_echo_n "checking System.map for symbol sys_readlink... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_readlink\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_readlink\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_readlink $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol walk_page_range" >&5
$as_echo_n "checking System.map for symbol walk_page_range... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " walk_page_range\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_walk_page_range\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_walk_page_range $mcctrl_addr
_ACEOF
fi
case $ENABLE_MEMDUMP in
yes|no|auto)
;;
@@ -4986,6 +4799,17 @@ else
$as_echo "$as_me: perf is disabled" >&6;}
fi
if test "x$WITH_SYSCALL_INTERCEPT" = "xyes" ; then
$as_echo "#define WITH_SYSCALL_INTERCEPT 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: syscall_intercept library is linked" >&5
$as_echo "$as_me: syscall_intercept library is linked" >&6;}
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: syscall_intercept library isn't linked" >&5
$as_echo "$as_me: syscall_intercept library isn't linked" >&6;}
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then
cat >>confdefs.h <<_ACEOF
@@ -5052,6 +4876,9 @@ fi
@@ -5060,9 +4887,14 @@ ac_config_headers="$ac_config_headers config.h"
# POSTK_DEBUG_ARCH_DEP_37
# AC_CONFIG_FILES arch dependfiles separate
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/Makefile"
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in tools/mcstat/Makefile"
if test -e "${ABS_SRCDIR}/test"; then
ac_config_files="$ac_config_files mck_test_config.sample:test/mck_test_config.sample.in"
fi
if test "$TARGET" = "smp-x86"; then
ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch"
@@ -5797,7 +5629,9 @@ do
"arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;;
"arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;;
"arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;;
"tools/mcstat/mcstat.1") CONFIG_FILES="$CONFIG_FILES tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in" ;;
"tools/mcstat/Makefile") CONFIG_FILES="$CONFIG_FILES tools/mcstat/Makefile" ;;
"mck_test_config.sample") CONFIG_FILES="$CONFIG_FILES mck_test_config.sample:test/mck_test_config.sample.in" ;;
"arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;;
"kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;;
"arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;;

View File

@@ -77,6 +77,58 @@ AC_DEFUN([PAC_SET_HEADER_LIB_PATH],[
])
])
AC_DEFUN([PAC_SET_HEADER_LIB_PATH_SYSCALL_INTERCEPT],[
AC_ARG_WITH([$1],
[AC_HELP_STRING([--with-$1=PATH],
[specify path where $1 include directory and lib directory can be found])],
[AS_CASE(["$withval"],
[yes|no|''],
[AC_MSG_WARN([--with[out]-$1=PATH expects a valid PATH])
with_$1=""])],
[with_$1=$2])
AC_ARG_WITH([$1-include],
[AC_HELP_STRING([--with-$1-include=PATH],
[specify path where $1 include directory can be found])],
[AS_CASE(["$withval"],
[yes|no|''],
[AC_MSG_WARN([--with[out]-$1-include=PATH expects a valid PATH])
with_$1_include=""])],
[])
AC_ARG_WITH([$1-lib],
[AC_HELP_STRING([--with-$1-lib=PATH],
[specify path where $1 lib directory can be found])],
[AS_CASE(["$withval"],
[yes|no|''],
[AC_MSG_WARN([--with[out]-$1-lib=PATH expects a valid PATH])
with_$1_lib=""])],
[])
# The args have been sanitized into empty/non-empty values above.
# Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options
# taking priority
AS_IF([test -n "${with_$1_include}"],
[PAC_APPEND_FLAG([-I${with_$1_include}],[CPPFLAGS_SYSCALL_INTERCEPT])],
[AS_IF([test -n "${with_$1}"],
[PAC_APPEND_FLAG([-I${with_$1}/include],[CPPFLAGS_SYSCALL_INTERCEPT])])])
AS_IF([test -n "${with_$1_lib}"],
[PAC_APPEND_FLAG([-L${with_$1_lib} -Wl,-rpath,${with_$1_lib}],[LDFLAGS_SYSCALL_INTERCEPT])],
[AS_IF([test -n "${with_$1}"],
dnl is adding lib64 by default really the right thing to do? What if
dnl we are on a 32-bit host that happens to have both lib dirs available?
[PAC_APPEND_FLAG([-L${with_$1}/lib -Wl,-rpath,${with_$1}/lib],[LDFLAGS_SYSCALL_INTERCEPT])
AS_IF([test -d "${with_$1}/lib64"],
[PAC_APPEND_FLAG([-L${with_$1}/lib64 -Wl,-rpath,${with_$1}/lib64],[LDFLAGS_SYSCALL_INTERCEPT])])
])
])
AS_IF([test -n "${with_$1}" || test -n "${with_$1_include}" || test -n "${with_$1_lib}"],
[WITH_SYSCALL_INTERCEPT=yes],
[WITH_SYSCALL_INTERCEPT=no])
])
IHK_VERSION=IHK_VERSION_m4
MCKERNEL_VERSION=MCKERNEL_VERSION_m4
DCFA_VERSION=DCFA_VERSION_m4
@@ -95,6 +147,23 @@ AS_IF([test "x$numa_lib_found" != "xyes"],
PAC_SET_HEADER_LIB_PATH([mpi])
PAC_SET_HEADER_LIB_PATH_SYSCALL_INTERCEPT([syscall_intercept])
if test "x$WITH_SYSCALL_INTERCEPT" == "xno" ; then
AC_CHECK_LIB([syscall_intercept],[syscall_no_intercept],[syscall_intercept_lib_found=yes],[syscall_intercept_lib_found=no],[-lcapstone -ldl])
AS_IF([test "x$syscall_intercept_lib_found" != "xyes"],
[AC_MSG_NOTICE([libsyscall_intercept.so not found])])
AC_CHECK_HEADER([libsyscall_intercept_hook_point.h],[syscall_intercept_header_found=yes],[syscall_intercept_header_found=no])
AS_IF([test "x$syscall_intercept_header_found" != "xyes"],
[AC_MSG_NOTICE([libsyscall_intercept_hook_point.h not found])])
AS_IF([test "x$syscall_intercept_lib_found" == "xyes" && test "x$syscall_intercept_header_found" == "xyes"],
[WITH_SYSCALL_INTERCEPT=yes],
[WITH_SYSCALL_INTERCEPT=no])
fi
AC_ARG_WITH([kernelsrc],
AC_HELP_STRING(
[--with-kernelsrc=path],[Path to 'kernel src', default is /lib/modules/uname_r/build]),
@@ -339,78 +408,6 @@ KDIR="$WITH_KERNELSRC"
UNAME_R="$WITH_UNAME_R"
TARGET="$WITH_TARGET"
MCCTRL_LINUX_SYMTAB=""
case "X$WITH_SYSTEM_MAP" in
Xyes | Xno | X)
MCCTRL_LINUX_SYMTAB=""
;;
*)
MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP"
;;
esac
AC_MSG_CHECKING([[for System.map]])
if test -r "$MCCTRL_LINUX_SYMTAB"; then
MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB"
elif test -r "/boot/System.map-`uname -r`"; then
MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`"
elif test -r "$KDIR/System.map"; then
MCCTRL_LINUX_SYMTAB="$KDIR/System.map"
fi
if test "$MCCTRL_LINUX_SYMTAB" == ""; then
AC_MSG_ERROR([could not find])
fi
if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then
AC_MSG_ERROR([could not read System.map file, no read permission?])
fi
AC_MSG_RESULT([$MCCTRL_LINUX_SYMTAB])
MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB"
# MCCTRL_FIND_KSYM(SYMBOL)
# ------------------------------------------------------
# Search System.map for address of the given symbol and
# do one of three things in config.h:
# If not found, leave MCCTRL_KSYM_foo undefined
# If found to be exported, "#define MCCTRL_KSYM_foo 0"
# If found not to be exported, "#define MCCTRL_KSYM_foo 0x<value>"
AC_DEFUN([MCCTRL_FIND_KSYM],[
AC_MSG_CHECKING([[System.map for symbol $1]])
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " $1\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
AC_MSG_RESULT([not found])
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
m4_ifval([$2],[],[
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_$1\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
])
AC_MSG_RESULT([$mcctrl_result])
AC_DEFINE_UNQUOTED(MCCTRL_KSYM_[]$1,$mcctrl_addr,[Define to address of kernel symbol $1, or 0 if exported])
fi
])
MCCTRL_FIND_KSYM([sys_mount])
MCCTRL_FIND_KSYM([sys_umount])
MCCTRL_FIND_KSYM([sys_unshare])
MCCTRL_FIND_KSYM([zap_page_range])
MCCTRL_FIND_KSYM([vdso_image_64])
MCCTRL_FIND_KSYM([vdso_start])
MCCTRL_FIND_KSYM([vdso_end])
MCCTRL_FIND_KSYM([vdso_pages])
MCCTRL_FIND_KSYM([__vvar_page])
MCCTRL_FIND_KSYM([hpet_address])
# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol.
MCCTRL_FIND_KSYM([vdso_spec])
MCCTRL_FIND_KSYM([hv_clock])
MCCTRL_FIND_KSYM([sys_readlink])
MCCTRL_FIND_KSYM([walk_page_range])
case $ENABLE_MEMDUMP in
yes|no|auto)
;;
@@ -489,6 +486,13 @@ else
AC_MSG_NOTICE([perf is disabled])
fi
if test "x$WITH_SYSCALL_INTERCEPT" = "xyes" ; then
AC_DEFINE([WITH_SYSCALL_INTERCEPT],[1],[whether or not syscall_intercept library is linked])
AC_MSG_NOTICE([syscall_intercept library is linked])
else
AC_MSG_NOTICE([syscall_intercept library isn't linked])
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
fi
@@ -526,9 +530,12 @@ AC_SUBST(KMODDIR)
AC_SUBST(KERNDIR)
AC_SUBST(MANDIR)
AC_SUBST(CFLAGS)
AC_SUBST(CPPFLAGS_SYSCALL_INTERCEPT)
AC_SUBST(LDFLAGS_SYSCALL_INTERCEPT)
AC_SUBST(ENABLE_MCOVERLAYFS)
AC_SUBST(ENABLE_RUSAGE)
AC_SUBST(ENABLE_QLMPI)
AC_SUBST(WITH_SYSCALL_INTERCEPT)
AC_SUBST(IHK_VERSION)
AC_SUBST(MCKERNEL_VERSION)
@@ -570,9 +577,16 @@ AC_CONFIG_FILES([
arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in
arch/x86_64/tools/irqbalance_mck.service
arch/x86_64/tools/irqbalance_mck.in
tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in
tools/mcstat/Makefile
])
if test -e "${ABS_SRCDIR}/test"; then
AC_CONFIG_FILES([
mck_test_config.sample:test/mck_test_config.sample.in
])
fi
if test "$TARGET" = "smp-x86"; then
AC_CONFIG_FILES([
arch/x86_64/kernel/Makefile.arch

View File

@@ -55,13 +55,14 @@
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916
#define MCEXEC_UP_UTIL_THREAD1 0x30a02920
#define MCEXEC_UP_UTIL_THREAD2 0x30a02921
#define MCEXEC_UP_UTI_GET_CTX 0x30a02920
#define MCEXEC_UP_UTI_SAVE_FS 0x30a02921
#define MCEXEC_UP_SIG_THREAD 0x30a02922
#define MCEXEC_UP_SYSCALL_THREAD 0x30a02924
#define MCEXEC_UP_TERMINATE_THREAD 0x30a02925
#define MCEXEC_UP_GET_NUM_POOL_THREADS 0x30a02926
#define MCEXEC_UP_UTI_ATTR 0x30a02927
#define MCEXEC_UP_RELEASE_USER_SPACE 0x30a02928
#define MCEXEC_UP_DEBUG_LOG 0x40000000
@@ -91,6 +92,7 @@ struct program_image_section {
struct get_cpu_set_arg {
int nr_processes;
int *process_rank;
void *cpu_set;
size_t cpu_set_size; // Size in bytes
int *target_core;
@@ -140,8 +142,10 @@ struct program_load_desc {
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN];
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
int profile;
struct program_image_section sections[0];
@@ -242,6 +246,28 @@ struct sys_unshare_desc {
unsigned long unshare_flags;
};
struct release_user_space_desc {
unsigned long user_start;
unsigned long user_end;
};
struct terminate_thread_desc {
int pid;
int tid;
long code;
/* 32------32 31--16 15--------8 7----0
exit_group exit-status signal */
unsigned long tsk; /* struct task_struct * */
};
struct rpgtable_desc {
uintptr_t rpgtable;
uintptr_t start;
uintptr_t len;
};
enum perf_ctrl_type {
PERF_CTRL_SET,
PERF_CTRL_GET,
@@ -251,6 +277,7 @@ enum perf_ctrl_type {
struct perf_ctrl_desc {
enum perf_ctrl_type ctrl_type;
int err;
union {
/* for SET, GET */
struct {
@@ -290,6 +317,10 @@ struct perf_ctrl_desc {
#define UTI_FLAG_HIGH_PRIORITY (1ULL<<12)
#define UTI_FLAG_NON_COOPERATIVE (1ULL<<13)
#define UTI_FLAG_PREFER_LWK (1ULL << 14)
#define UTI_FLAG_PREFER_FWK (1ULL << 15)
#define UTI_FLAG_FABRIC_INTR_AFFINITY (1ULL << 16)
/* Linux default value is used */
#define UTI_MAX_NUMA_DOMAINS (1024)
@@ -308,6 +339,30 @@ struct kuti_attr {
struct uti_attr_desc {
unsigned long phys_attr;
char *uti_cpu_set_str; /* UTI_CPU_SET environmental variable */
size_t uti_cpu_set_len;
};
struct uti_ctx {
union {
char ctx[4096]; /* TODO: Get the size from config.h */
struct {
int uti_refill_tid;
};
};
};
struct uti_get_ctx_desc {
unsigned long rp_rctx; /* Remote physical address of remote context */
void *rctx; /* Remote context */
void *lctx; /* Local context */
int uti_refill_tid;
unsigned long key; /* OUT: struct task_struct* of mcexec thread, used to search struct host_thread */
};
struct uti_save_fs_desc {
void *rctx; /* Remote context */
void *lctx; /* Local context */
};
#endif

31
executer/include/uti.h Normal file
View File

@@ -0,0 +1,31 @@
#ifndef UTI_H_INCLUDED
#define UTI_H_INCLUDED
struct syscall_struct {
int number;
unsigned long args[6];
unsigned long ret;
unsigned long uti_clv; /* copy of a clv in McKernel */
};
#define UTI_SZ_SYSCALL_STACK 16
/* Variables accessed by mcexec.c and syscall_intercept.c */
struct uti_desc {
char lctx[4096]; /* TODO: Get the size from config.h */
char rctx[4096]; /* TODO: Get the size from config.h */
int mck_tid; /* TODO: Move this out for multiple migrated-to-Linux threads */
unsigned long key; /* struct task_struct* of mcexec thread, used to search struct host_thread */
int pid, tid; /* Used as the id of tracee when issuing MCEXEC_UP_TERMINATE_THREAD */
unsigned long uti_clv; /* copy of McKernel clv */
int fd; /* /dev/mcosX */
struct syscall_struct syscall_stack[UTI_SZ_SYSCALL_STACK]; /* stack of system call arguments and return values */
int syscall_stack_top; /* stack-pointer of syscall arguments list */
long syscalls[512], syscalls2[512]; /* Syscall profile counters */
int start_syscall_intercept; /* Used to sync between mcexec.c and syscall_intercept.c */
};
#endif

View File

@@ -1,6 +1,7 @@
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <linux/version.h>
#include <linux/mm_types.h>
#include <linux/kallsyms.h>
#include <asm/vdso.h>
#include "../../../config.h"
#include "../../mcctrl.h"
@@ -17,29 +18,31 @@
#define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__)
#ifdef MCCTRL_KSYM_vdso_start
# if MCCTRL_KSYM_vdso_start
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start;
# endif
#else
# error missing address of vdso_start.
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
void *vdso_start;
void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
#endif
#ifdef MCCTRL_KSYM_vdso_end
# if MCCTRL_KSYM_vdso_end
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end;
# endif
#else
# error missing address of vdso_end.
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_spec = (void *) kallsyms_lookup_name("vdso_spec");
if (WARN_ON(!vdso_spec))
return -EFAULT;
#endif
#ifdef MCCTRL_KSYM_vdso_spec
# if MCCTRL_KSYM_vdso_spec
static struct vm_special_mapping (*vdso_spec)[2] = (void*)MCCTRL_KSYM_vdso_spec;
# endif
#else
# error missing address of vdso_spec.
#endif
return 0;
}
#ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 1

View File

@@ -1,5 +1,6 @@
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <linux/version.h>
#include <linux/kallsyms.h>
#include "../../../config.h"
#include "../../mcctrl.h"
@@ -13,57 +14,46 @@
#endif
#endif /* POSTK_DEBUG_ARCH_DEP_83 */
#ifdef MCCTRL_KSYM_vdso_image_64
#if MCCTRL_KSYM_vdso_image_64
struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
static struct vdso_image *vdso_image_64;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
static void *vdso_start;
static void *vdso_end;
static struct page **vdso_pages;
#endif
static void *__vvar_page;
static long *hpet_address;
static void **hv_clock;
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
vdso_image_64 = (void *) kallsyms_lookup_name("vdso_image_64");
if (WARN_ON(!vdso_image_64))
return -EFAULT;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_pages = (void *) kallsyms_lookup_name("vdso_pages");
if (WARN_ON(!vdso_pages))
return -EFAULT;
#endif
#ifdef MCCTRL_KSYM_vdso_start
#if MCCTRL_KSYM_vdso_start
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start;
#endif
#endif
__vvar_page = (void *) kallsyms_lookup_name("__vvar_page");
if (WARN_ON(!__vvar_page))
return -EFAULT;
#ifdef MCCTRL_KSYM_vdso_end
#if MCCTRL_KSYM_vdso_end
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end;
#endif
#endif
hpet_address = (void *) kallsyms_lookup_name("hpet_address");
hv_clock = (void *) kallsyms_lookup_name("hv_clock");
return 0;
}
#ifdef MCCTRL_KSYM_vdso_pages
#if MCCTRL_KSYM_vdso_pages
struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages;
#endif
#endif
#ifdef MCCTRL_KSYM___vvar_page
#if MCCTRL_KSYM___vvar_page
void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page;
#endif
#endif
long *hpet_addressp
#ifdef MCCTRL_KSYM_hpet_address
#if MCCTRL_KSYM_hpet_address
= (void *)MCCTRL_KSYM_hpet_address;
#else
= &hpet_address;
#endif
#else
= NULL;
#endif
void **hv_clockp
#ifdef MCCTRL_KSYM_hv_clock
#if MCCTRL_KSYM_hv_clock
= (void *)MCCTRL_KSYM_hv_clock;
#else
= &hv_clock;
#endif
#else
= NULL;
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 2
@@ -138,7 +128,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
/* VDSO pages */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
size = vdso_image->size;
size = vdso_image_64->size;
vdso->vdso_npages = size >> PAGE_SHIFT;
if (vdso->vdso_npages > VDSO_MAXPAGES) {
@@ -148,7 +138,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
for (i = 0; i < vdso->vdso_npages; ++i) {
vdso->vdso_physlist[i] = virt_to_phys(
vdso_image->data + (i * PAGE_SIZE));
vdso_image_64->data + (i * PAGE_SIZE));
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
size = vdso_end - vdso_start;
@@ -185,36 +175,36 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
#endif
/* HPET page */
if (hpet_addressp && *hpet_addressp) {
if (hpet_address && *hpet_address) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
vdso->hpet_is_global = 1;
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#endif
}
/* struct pvlock_vcpu_time_info table */
if (hv_clockp && *hv_clockp) {
if (hv_clock && *hv_clock) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->pvti_is_global = 0;
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
vdso->pvti_phys = virt_to_phys(*hv_clockp);
vdso->pvti_phys = virt_to_phys(*hv_clock);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
vdso->pvti_is_global = 1;
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
vdso->pvti_phys = virt_to_phys(*hv_clockp);
vdso->pvti_phys = virt_to_phys(*hv_clock);
#endif
}
@@ -289,6 +279,14 @@ get_fs_ctx(void *ctx)
return tctx->fs;
}
unsigned long
get_rsp_ctx(void *ctx)
{
struct trans_uctx *tctx = ctx;
return tctx->rsp;
}
#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */
int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
unsigned long *rpap, unsigned long *pgsizep)

View File

@@ -125,7 +125,6 @@ static int load_elf(struct linux_binprm *bprm
for(i = 0, st = 0; mode != 2;){
if(st == 0){
off = p & ~PAGE_MASK;
#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
rc = get_user_pages_remote(current, bprm->mm,
bprm->p, 1, FOLL_FORCE, &page, NULL, NULL);
@@ -141,17 +140,6 @@ static int load_elf(struct linux_binprm *bprm
bprm->p, 1, 0, 1,
&page, NULL);
#endif
#else /* POSTK_DEBUG_ARCH_DEP_41 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
rc = get_user_pages_remote(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
#else
rc = get_user_pages(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
#endif
#endif /* POSTK_DEBUG_ARCH_DEP_41 */
if(rc <= 0) {
kfree(pbuf);
return -EFAULT;

File diff suppressed because it is too large Load Diff

View File

@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/kallsyms.h>
#include "mcctrl.h"
#include <ihk/ihk_host_user.h>
@@ -43,8 +44,6 @@ extern void mcctrl_syscall_init(void);
extern void procfs_init(int);
extern void procfs_exit(int);
extern void rus_page_hash_init(void);
extern void rus_page_hash_put_pages(void);
extern void uti_attr_finalize(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
@@ -84,13 +83,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTIL_THREAD1, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTIL_THREAD2, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTI_GET_CTX, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTI_SAVE_FS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SIG_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYSCALL_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_TERMINATE_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_NUM_POOL_THREADS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTI_ATTR, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_RELEASE_USER_SPACE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl },
@@ -178,6 +178,7 @@ int mcctrl_os_shutdown_notifier(int os_index)
mdelay(200);
}
pager_cleanup();
sysfsm_cleanup(os[os_index]);
free_topology_info(os[os_index]);
ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index);
@@ -185,9 +186,6 @@ int mcctrl_os_shutdown_notifier(int os_index)
destroy_ikc_channels(os[os_index]);
procfs_exit(os_index);
}
#ifdef POSTK_DEBUG_TEMP_FIX_35 /* in shutdown phase, rus_page_hash_put_pages() call added. */
rus_page_hash_put_pages();
#endif /* POSTK_DEBUG_TEMP_FIX_35 */
os[os_index] = NULL;
@@ -214,6 +212,68 @@ static struct ihk_os_notifier mcctrl_os_notifier = {
.ops = &mcctrl_os_notifier_ops,
};
int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
int (*mcctrl_sys_umount)(char *dir_name, int flags);
int (*mcctrl_sys_unshare)(unsigned long unshare_flags);
long (*mcctrl_sched_setaffinity)(pid_t pid, const struct cpumask *in_mask);
int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, int policy,
const struct sched_param *param);
ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz);
void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start,
unsigned long size,
struct zap_details *details);
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount");
if (WARN_ON(!mcctrl_sys_mount))
return -EFAULT;
mcctrl_sys_umount = (void *) kallsyms_lookup_name("sys_umount");
if (WARN_ON(!mcctrl_sys_umount))
return -EFAULT;
mcctrl_sys_unshare = (void *) kallsyms_lookup_name("sys_unshare");
if (WARN_ON(!mcctrl_sys_unshare))
return -EFAULT;
mcctrl_sched_setaffinity =
(void *) kallsyms_lookup_name("sched_setaffinity");
if (WARN_ON(!mcctrl_sched_setaffinity))
return -EFAULT;
mcctrl_sched_setscheduler_nocheck =
(void *) kallsyms_lookup_name("sched_setscheduler_nocheck");
if (WARN_ON(!mcctrl_sched_setscheduler_nocheck))
return -EFAULT;
mcctrl_sys_readlink =
(void *) kallsyms_lookup_name("sys_readlink");
if (WARN_ON(!mcctrl_sys_readlink))
return -EFAULT;
mcctrl_zap_page_range =
(void *) kallsyms_lookup_name("zap_page_range");
if (WARN_ON(!mcctrl_zap_page_range))
return -EFAULT;
mcctrl_hugetlbfs_inode_operations =
(void *) kallsyms_lookup_name("hugetlbfs_inode_operations");
if (WARN_ON(!mcctrl_hugetlbfs_inode_operations))
return -EFAULT;
return arch_symbols_init();
}
static int __init mcctrl_init(void)
{
int ret = 0;
@@ -227,10 +287,11 @@ static int __init mcctrl_init(void)
os[i] = NULL;
}
rus_page_hash_init();
binfmt_mcexec_init();
if ((ret = symbols_init()))
goto error;
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
printk("mcctrl: error: registering OS notifier\n");
goto error;
@@ -241,7 +302,6 @@ static int __init mcctrl_init(void)
error:
binfmt_mcexec_exit();
rus_page_hash_put_pages();
return ret;
}
@@ -253,7 +313,6 @@ static void __exit mcctrl_exit(void)
}
binfmt_mcexec_exit();
rus_page_hash_put_pages();
uti_attr_finalize();
printk("mcctrl: unregistered.\n");

View File

@@ -52,6 +52,8 @@
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
void sig_done(unsigned long arg, int err);
void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet);
void mcctrl_futex_wake(struct ikc_scd_packet *pisp);
void mcctrl_os_read_write_cpu_response(ihk_os_t os,
struct ikc_scd_packet *pisp);
void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp);
@@ -154,7 +156,7 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_add(&desc->chain, &usrdata->wakeup_descs_list);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
if (free_addrs_count)
if (do_frees)
*do_frees = 0;
return ret < 0 ? ret : -ETIME;
}
@@ -182,6 +184,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PREPARE_PROCESS_ACKED:
case SCD_MSG_PERF_ACK:
case SCD_MSG_SEND_SIGNAL_ACK:
case SCD_MSG_PROCFS_ANSWER:
mcctrl_wakeup_cb(__os, pisp);
break;
@@ -189,11 +192,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
mcexec_syscall(usrdata, pisp);
break;
case SCD_MSG_PROCFS_ANSWER:
procfs_answer(usrdata, pisp->pid);
break;
case SCD_MSG_SYSFS_REQ_CREATE:
case SCD_MSG_SYSFS_REQ_MKDIR:
case SCD_MSG_SYSFS_REQ_SYMLINK:
@@ -209,7 +207,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PROCFS_TID_CREATE:
case SCD_MSG_PROCFS_TID_DELETE:
procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg);
procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg,
pisp->resp_pa);
break;
case SCD_MSG_GET_VDSO_INFO:
@@ -225,6 +224,10 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
mcctrl_eventfd(__os, pisp);
break;
case SCD_MSG_FUTEX_WAKE:
mcctrl_futex_wake(pisp);
break;
default:
printk(KERN_ERR "mcctrl:syscall_packet_handler:"
"unknown message (%d.%d.%d.%d.%d.%#lx)\n",

View File

@@ -67,6 +67,7 @@
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_PROCFS_RELEASE 0x15
#define SCD_MSG_DEBUG_LOG 0x20
@@ -101,23 +102,18 @@
#define SCD_MSG_CPU_RW_REG 0x52
#define SCD_MSG_CPU_RW_REG_RESP 0x53
#define SCD_MSG_FUTEX_WAKE 0x60
#define DMA_PIN_SHIFT 21
#define DO_USER_MODE
#define __NR_coredump 999
#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */
struct coretable {
loff_t len;
unsigned long addr;
};
#else /* POSTK_DEBUG_TEMP_FIX_61 */
struct coretable {
int len;
unsigned long addr;
};
#endif /* POSTK_DEBUG_TEMP_FIX_61 */
enum mcctrl_os_cpu_operation {
MCCTRL_OS_CPU_READ_REGISTER,
@@ -125,6 +121,12 @@ enum mcctrl_os_cpu_operation {
MCCTRL_OS_CPU_MAX_OP
};
/* Used to wake-up a Linux thread futex_wait()-ing */
struct uti_futex_resp {
int done;
wait_queue_head_t wq;
};
struct ikc_scd_packet {
int msg;
int err;
@@ -147,7 +149,7 @@ struct ikc_scd_packet {
long sysfs_arg3;
};
/* SCD_MSG_SCHEDULE_THREAD */
/* SCD_MSG_WAKE_UP_SYSCALL_THREAD */
struct {
int ttid;
};
@@ -163,6 +165,12 @@ struct ikc_scd_packet {
struct {
int eventfd_type;
};
/* SCD_MSG_FUTEX_WAKE */
struct {
void *resp;
int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */
} futex;
};
char padding[8];
};
@@ -213,9 +221,12 @@ struct mcctrl_channel {
};
struct mcctrl_per_thread_data {
struct mcctrl_per_proc_data *ppd;
struct list_head hash;
struct task_struct *task;
void *data;
int tid; /* debug */
atomic_t refcount;
};
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
@@ -315,6 +326,7 @@ struct mcctrl_part_exec {
struct mutex lock;
int nr_processes;
int nr_processes_left;
int process_rank;
cpumask_t cpus_used;
struct list_head pli_list;
};
@@ -400,10 +412,30 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
ihk_os_t osnum_to_os(int n);
/* look up symbols, plus arch-specific ones */
extern int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
extern int (*mcctrl_sys_umount)(char *dir_name, int flags);
extern int (*mcctrl_sys_unshare)(unsigned long unshare_flags);
extern long (*mcctrl_sched_setaffinity)(pid_t pid,
const struct cpumask *in_mask);
extern int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p,
int policy,
const struct sched_param *param);
extern ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz);
extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start,
unsigned long size,
struct zap_details *details);
extern struct inode_operations *mcctrl_hugetlbfs_inode_operations;
/* syscall.c */
void pager_add_process(void);
void pager_remove_process(struct mcctrl_per_proc_data *ppd);
void pager_cleanup(void);
int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
struct mcctrl_per_proc_data *ppd);
@@ -412,20 +444,18 @@ struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
struct mcctrl_usrdata *ud, int pid);
void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd);
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task, void *data);
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
struct task_struct *task);
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data *ppd, void *data);
void mcctrl_put_per_thread_data_unsafe(struct mcctrl_per_thread_data *ptd);
void mcctrl_put_per_thread_data(struct mcctrl_per_thread_data* ptd);
#ifdef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */
static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
struct mcctrl_per_proc_data *ppd, struct task_struct *task)
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task)
{
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
unsigned long flags;
/* Check if data for this thread exists and return it */
read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
/* Check if data for this thread exists */
write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) {
@@ -434,16 +464,27 @@ static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
}
}
read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ptd ? ptd->data : NULL;
if (ptd) {
if (atomic_read(&ptd->refcount) <= 0) {
printk("%s: ERROR: use-after-free detected (%d)", __FUNCTION__, atomic_read(&ptd->refcount));
ptd = NULL;
goto out;
}
atomic_inc(&ptd->refcount);
}
out:
write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ptd;
}
#else /* POSTK_DEBUG_ARCH_DEP_56 */
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task);
#endif /* POSTK_DEBUG_ARCH_DEP_56 */
int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len);
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid);
int clear_pte_range(uintptr_t start, uintptr_t len);
int mcctrl_os_alive(void);
@@ -455,7 +496,6 @@ struct procfs_read {
int count; /* bytes to read (request) */
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
int ret; /* read bytes (answer) */
int status; /* non-zero if done (answer) */
int newcpu; /* migrated new cpu (answer) */
int readwrite; /* 0:read, 1:write */
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
@@ -468,7 +508,8 @@ struct procfs_file {
};
void procfs_answer(struct mcctrl_usrdata *ud, int pid);
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg);
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg,
unsigned long resp_pa);
void add_tid_entry(int osnum, int pid, int tid);
void add_pid_entry(int osnum, int pid);
void delete_tid_entry(int osnum, int pid, int tid);
@@ -504,7 +545,9 @@ struct vdso {
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
unsigned long *endp);
int release_user_space(uintptr_t start, uintptr_t len);
void get_vdso_info(ihk_os_t os, long vdso_pa);
int arch_symbols_init(void);
struct get_cpu_mapping_req {
int busy; /* INOUT: */

View File

@@ -103,33 +103,6 @@ getpath(struct procfs_list_entry *e, char *buf, int bufsize)
}
}
/**
* \brief Process SCD_MSG_PROCFS_ANSWER message.
*
* \param ud mcctrl_usrdata pointer
* \param pid PID of the requesting process
*/
void procfs_answer(struct mcctrl_usrdata *ud, int pid)
{
struct mcctrl_per_proc_data *ppd = NULL;
if (pid > 0) {
ppd = mcctrl_get_per_proc_data(ud, pid);
if (unlikely(!ppd)) {
kprintf("%s: ERROR: no per-process structure for PID %d\n",
__FUNCTION__, pid);
return;
}
}
wake_up_all(pid > 0 ? &ppd->wq_procfs : &ud->wq_procfs);
if (pid > 0) {
mcctrl_put_per_proc_data(ppd);
}
}
static struct procfs_list_entry *
find_procfs_entry(struct procfs_list_entry *parent, const char *name)
{
@@ -321,6 +294,8 @@ get_base_entry(int osnum)
if(!e){
e = add_procfs_entry(NULL, name, S_IFDIR | 0555,
uid, gid, NULL);
if (!e)
return NULL;
e->osnum = osnum;
}
return e;
@@ -456,6 +431,8 @@ proc_exe_link(int osnum, int pid, const char *path)
e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid,
path);
if (!e)
goto out;
e->data = kmalloc(strlen(path) + 1, GFP_KERNEL);
strcpy(e->data, path);
task = find_procfs_entry(parent, "task");
@@ -464,6 +441,7 @@ proc_exe_link(int osnum, int pid, const char *path)
uid, gid, path);
}
}
out:
up(&procfs_file_list_lock);
}
@@ -509,7 +487,6 @@ procfs_exit(int osnum)
* This function conforms to the 2) way of fs/proc/generic.c
* from linux-2.6.39.4.
*/
#ifdef POSTK_DEBUG_TEMP_FIX_43 /* Fixed an issue that failed pread / pwrite of size larger than 4MB */
static ssize_t __mckernel_procfs_read_write(
struct file *file,
char __user *buf, size_t nbytes,
@@ -520,7 +497,7 @@ static ssize_t __mckernel_procfs_read_write(
int order = 0;
volatile struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
int ret, osnum, pid, retw;
int ret, osnum, pid;
unsigned long pbuf;
size_t count = nbytes;
size_t copy_size = 0;
@@ -615,11 +592,11 @@ static ssize_t __mckernel_procfs_read_write(
while (count > 0) {
int this_len = min_t(ssize_t, count, copy_size);
int do_free;
r->pbuf = pbuf;
r->eof = 0;
r->ret = -EIO; /* default */
r->status = 0;
r->offset = offset;
r->count = this_len;
r->readwrite = read_write;
@@ -629,50 +606,26 @@ static ssize_t __mckernel_procfs_read_write(
isp.arg = virt_to_phys(r);
isp.pid = pid;
ret = mcctrl_ikc_send(osnum_to_os(e->osnum),
(pid > 0) ? ppd->ikc_target_cpu : 0, &isp);
ret = mcctrl_ikc_send_wait(osnum_to_os(e->osnum),
(pid > 0) ? ppd->ikc_target_cpu : 0,
&isp, HZ, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
}
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("%s: waiting for reply\n", __FUNCTION__);
retry_wait:
/* Wait for the status field of the procfs_read structure,
* wait on per-process or OS specific data depending on
* who the request is for.
*/
if (pid > 0) {
retw = wait_event_interruptible_timeout(ppd->wq_procfs,
r->status != 0, HZ);
}
else {
retw = wait_event_interruptible_timeout(udp->wq_procfs,
r->status != 0, HZ);
}
/* Timeout? */
if (retw == 0 && r->status == 0) {
printk("%s: error: timeout (1 sec)\n", __FUNCTION__);
if (ret == -ETIME) {
pr_info("%s: error: timeout (1 sec)\n",
__func__);
}
else if (ret == -ERESTARTSYS) {
ret = -ERESTART;
}
if (!do_free)
r = NULL;
goto out;
}
/* Interrupted? */
else if (retw == -ERESTARTSYS) {
ret = -ERESTART;
goto out;
}
/* Were we woken up by a reply to another procfs request? */
else if (r->status == 0) {
/* TODO: r->status is not set atomically, we could be woken
* up with status == 0 and it could change to 1 while in this
* code, we could potentially miss the wake_up()...
*/
printk("%s: stale wake-up, retrying\n", __FUNCTION__);
goto retry_wait;
}
/* Wake up and check the result. */
dprintk("%s: woke up. ret: %d, eof: %d\n",
@@ -717,193 +670,6 @@ out:
return ret;
}
#else /* POSTK_DEBUG_TEMP_FIX_43 */
static ssize_t __mckernel_procfs_read_write(
struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos, int read_write)
{
struct inode * inode = file->f_inode;
char *kern_buffer = NULL;
int order = 0;
volatile struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
int ret, osnum, pid, retw;
unsigned long pbuf;
unsigned long count = nbytes;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
loff_t offset = *ppos;
char pathbuf[PROCFS_NAME_MAX];
char *path, *p;
ihk_os_t os = NULL;
struct mcctrl_usrdata *udp = NULL;
struct mcctrl_per_proc_data *ppd = NULL;
if (count <= 0 || offset < 0) {
return 0;
}
path = getpath(e, pathbuf, PROCFS_NAME_MAX);
dprintk("%s: invoked for %s, offset: %lu, count: %lu\n",
__FUNCTION__, path,
(unsigned long)offset, count);
/* Verify OS number */
ret = sscanf(path, "mcos%d/", &osnum);
if (ret != 1) {
printk("%s: error: couldn't determine OS number\n", __FUNCTION__);
return -EINVAL;
}
if (osnum != e->osnum) {
printk("%s: error: OS numbers don't match\n", __FUNCTION__);
return -EINVAL;
}
/* Is this request for a specific process? */
p = strchr(path, '/') + 1;
ret = sscanf(p, "%d/", &pid);
if (ret != 1) {
pid = -1;
}
os = osnum_to_os(osnum);
if (!os) {
printk("%s: error: no IHK OS data found for OS %d\n",
__FUNCTION__, osnum);
return -EINVAL;
}
udp = ihk_host_os_get_usrdata(os);
if (!udp) {
printk("%s: error: no MCCTRL data found for OS %d\n",
__FUNCTION__, osnum);
return -EINVAL;
}
if (pid > 0) {
ppd = mcctrl_get_per_proc_data(udp, pid);
if (unlikely(!ppd)) {
printk("%s: error: no per-process structure for PID %d",
__FUNCTION__, pid);
return -EINVAL;
}
}
while ((1 << order) < count) ++order;
if (order > 12) {
order -= 12;
}
else {
order = 1;
}
/* NOTE: we need physically contigous memory to pass through IKC */
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
if (!kern_buffer) {
printk("%s: ERROR: allocating kernel buffer\n", __FUNCTION__);
ret = -ENOMEM;
goto out;
}
pbuf = virt_to_phys(kern_buffer);
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
ret = -ENOMEM;
goto out;
}
r->pbuf = pbuf;
r->eof = 0;
r->ret = -EIO; /* default */
r->status = 0;
r->offset = offset;
r->count = count;
r->readwrite = read_write;
strncpy((char *)r->fname, path, PROCFS_NAME_MAX);
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = 0;
isp.arg = virt_to_phys(r);
isp.pid = pid;
ret = mcctrl_ikc_send(osnum_to_os(e->osnum),
(pid > 0) ? ppd->ikc_target_cpu : 0, &isp);
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("%s: waiting for reply\n", __FUNCTION__);
retry_wait:
/* Wait for the status field of the procfs_read structure,
* wait on per-process or OS specific data depending on
* who the request is for.
*/
if (pid > 0) {
retw = wait_event_interruptible_timeout(ppd->wq_procfs,
r->status != 0, 5 * HZ);
}
else {
retw = wait_event_interruptible_timeout(udp->wq_procfs,
r->status != 0, 5 * HZ);
}
/* Timeout? */
if (retw == 0 && r->status == 0) {
printk("%s: error: timeout (1 sec)\n", __FUNCTION__);
goto out;
}
/* Interrupted? */
else if (retw == -ERESTARTSYS) {
ret = -ERESTART;
goto out;
}
/* Were we woken up by a reply to another procfs request? */
else if (r->status == 0) {
/* TODO: r->status is not set atomically, we could be woken
* up with status == 0 and it could change to 1 while in this
* code, we could potentially miss the wake_up()...
*/
printk("%s: stale wake-up, retrying\n", __FUNCTION__);
goto retry_wait;
}
/* Wake up and check the result. */
dprintk("%s: woke up. ret: %d, eof: %d\n",
__FUNCTION__, r->ret, r->eof);
if (r->ret > 0) {
if (read_write == 0) {
if (copy_to_user(buf, kern_buffer, r->ret)) {
printk("%s: ERROR: copy_to_user failed.\n", __FUNCTION__);
ret = -EFAULT;
goto out;
}
}
*ppos += r->ret;
}
ret = r->ret;
out:
if (ppd)
mcctrl_put_per_proc_data(ppd);
if (kern_buffer)
free_pages((uintptr_t)kern_buffer, order);
if (r)
kfree((void *)r);
return ret;
}
#endif /* POSTK_DEBUG_TEMP_FIX_43 */
static ssize_t mckernel_procfs_read(struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos)
@@ -939,33 +705,48 @@ struct procfs_work {
int msg;
int pid;
unsigned long arg;
unsigned long resp_pa;
struct work_struct work;
};
static void procfsm_work_main(struct work_struct *work0)
{
struct procfs_work *work = container_of(work0, struct procfs_work, work);
unsigned long phys;
int *done;
switch (work->msg) {
case SCD_MSG_PROCFS_TID_CREATE:
add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
break;
case SCD_MSG_PROCFS_TID_CREATE:
add_tid_entry(ihk_host_os_get_index(work->os),
work->pid, work->arg);
phys = ihk_device_map_memory(ihk_os_to_dev(work->os),
work->resp_pa, sizeof(int));
done = ihk_device_map_virtual(ihk_os_to_dev(work->os),
phys, sizeof(int), NULL, 0);
*done = 1;
ihk_device_unmap_virtual(ihk_os_to_dev(work->os),
done, sizeof(int));
ihk_device_unmap_memory(ihk_os_to_dev(work->os),
phys, sizeof(int));
break;
case SCD_MSG_PROCFS_TID_DELETE:
delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
break;
case SCD_MSG_PROCFS_TID_DELETE:
delete_tid_entry(ihk_host_os_get_index(work->os),
work->pid, work->arg);
break;
default:
printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
__FUNCTION__, work->msg, work->pid, work->arg);
break;
default:
pr_warn("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
__func__, work->msg, work->pid, work->arg);
break;
}
kfree(work);
return;
}
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg,
unsigned long resp_pa)
{
struct procfs_work *work = NULL;
@@ -979,6 +760,7 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
work->msg = msg;
work->pid = pid;
work->arg = arg;
work->resp_pa = resp_pa;
INIT_WORK(&work->work, &procfsm_work_main);
schedule_work(&work->work);
@@ -997,6 +779,303 @@ static const struct file_operations mckernel_forward = {
.write = mckernel_procfs_write,
};
#define PA_NULL (-1L)
struct mckernel_procfs_buffer_info {
unsigned long top_pa;
unsigned long cur_pa;
ihk_os_t os;
int pid;
char path[0];
};
struct mckernel_procfs_buffer {
unsigned long next_pa;
unsigned long pos;
unsigned long size;
char buf[0];
};
static int mckernel_procfs_buff_open(struct inode *inode, struct file *file)
{
struct mckernel_procfs_buffer_info *info;
int pid;
int ret;
char *path;
char *path_buf;
char *p;
ihk_os_t os;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
os = osnum_to_os(e->osnum);
if (!os) {
return -EINVAL;
}
path_buf = kmalloc(PROCFS_NAME_MAX, GFP_KERNEL);
if (!path_buf) {
return -ENOMEM;
}
path = getpath(e, path_buf, PROCFS_NAME_MAX);
p = strchr(path, '/') + 1;
ret = sscanf(p, "%d/", &pid);
if (ret != 1) {
pid = -1;
}
info = kmalloc(sizeof(struct mckernel_procfs_buffer_info) +
strlen(path) + 1, GFP_KERNEL);
if (!info) {
kfree(path_buf);
return -ENOMEM;
}
info->top_pa = PA_NULL;
info->cur_pa = PA_NULL;
info->os = os;
info->pid = pid;
strcpy(info->path, path);
file->private_data = info;
kfree(path_buf);
return 0;
}
static int mckernel_procfs_buff_release(struct inode *inode, struct file *file)
{
struct mckernel_procfs_buffer_info *info = file->private_data;
int rc = 0;
if (!info) {
return -EIO;
}
file->private_data = NULL;
if (info->top_pa != PA_NULL) {
int ret;
struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
int do_free;
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
rc = -ENOMEM;
goto out;
}
memset(r, '\0', sizeof(struct procfs_read));
r->pbuf = info->top_pa;
r->ret = -EIO; /* default */
r->fname[0] = '\0';
isp.msg = SCD_MSG_PROCFS_RELEASE;
isp.ref = 0;
isp.arg = virt_to_phys(r);
isp.pid = 0;
rc = -EIO;
ret = mcctrl_ikc_send_wait(info->os, 0,
&isp, 5 * HZ, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
}
if (ret < 0) {
rc = ret;
if (ret == -ETIME) {
pr_info("%s: error: timeout (1 sec)\n",
__func__);
}
else if (ret == -ERESTARTSYS) {
rc = -ERESTART;
}
if (!do_free)
r = NULL;
goto out;
}
if (r->ret < 0) {
rc = r->ret;
goto out;
}
rc = 0;
out:
if (r)
kfree((void *)r);
}
kfree(info);
return rc;
}
static ssize_t mckernel_procfs_buff_read(struct file *file, char __user *ubuf,
size_t nbytes, loff_t *ppos)
{
struct mckernel_procfs_buffer_info *info = file->private_data;
unsigned long phys;
struct mckernel_procfs_buffer *buf;
int pos = *ppos;
ssize_t l = 0;
int done = 0;
ihk_os_t os;
if (nbytes <= 0 || *ppos < 0) {
return 0;
}
if (!info) {
return -EIO;
}
os = info->os;
if (info->top_pa == PA_NULL) {
int ret;
int pid = info->pid;
struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
struct mcctrl_usrdata *udp = NULL;
struct mcctrl_per_proc_data *ppd = NULL;
int do_free;
udp = ihk_host_os_get_usrdata(os);
if (!udp) {
pr_err("%s: no MCCTRL data found for OS\n",
__func__);
return -EINVAL;
}
if (pid > 0) {
ppd = mcctrl_get_per_proc_data(udp, pid);
if (unlikely(!ppd)) {
pr_err("%s: no per-process structure for PID %d",
__func__, pid);
return -EINVAL;
}
}
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
l = -ENOMEM;
done = 1;
goto out;
}
memset(r, '\0', sizeof(struct procfs_read));
r->pbuf = PA_NULL;
r->ret = -EIO; /* default */
strncpy((char *)r->fname, info->path, PROCFS_NAME_MAX);
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = 0;
isp.arg = virt_to_phys(r);
isp.pid = pid;
l = -EIO;
done = 1;
ret = mcctrl_ikc_send_wait(os,
(pid > 0) ? ppd->ikc_target_cpu : 0,
&isp, 5 * HZ, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
}
if (ret < 0) {
l = ret;
if (ret == -ETIME) {
pr_info("%s: error: timeout (1 sec)\n",
__func__);
}
else if (ret == -ERESTARTSYS) {
l = -ERESTART;
}
if (!do_free)
r = NULL;
goto out;
}
if (r->ret < 0) {
l = r->ret;
goto out;
}
done = 0;
l = 0;
info->top_pa = info->cur_pa = r->pbuf;
out:
if (ppd)
mcctrl_put_per_proc_data(ppd);
if (r)
kfree((void *)r);
}
if (info->cur_pa == PA_NULL) {
info->cur_pa = info->top_pa;
}
while (!done && info->cur_pa != PA_NULL) {
long bpos;
long bsize;
phys = ihk_device_map_memory(ihk_os_to_dev(os), info->cur_pa,
PAGE_SIZE);
#ifdef CONFIG_MIC
buf = ioremap_wc(phys, PAGE_SIZE);
#else
buf = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
PAGE_SIZE, NULL, 0);
#endif
if (pos < buf->pos) {
info->cur_pa = info->top_pa;
goto rep;
}
if (pos >= buf->pos + buf->size) {
info->cur_pa = buf->next_pa;
goto rep;
}
bpos = pos - buf->pos;
bsize = (buf->pos + buf->size) - pos;
if (bsize > (nbytes - l)) {
bsize = nbytes - l;
}
if (copy_to_user(ubuf, buf->buf + bpos, bsize)) {
done = 1;
pos = *ppos;
l = -EFAULT;
}
else {
ubuf += bsize;
pos += bsize;
l += bsize;
if (l == nbytes) {
done = 1;
}
}
rep:
#ifdef CONFIG_MIC
iounmap(buf);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), buf, PAGE_SIZE);
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
};
*ppos = pos;
return l;
}
static const struct file_operations mckernel_buff_io = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_buff_read,
.write = NULL,
.open = mckernel_procfs_buff_open,
.release = mckernel_procfs_buff_release,
};
static const struct procfs_entry tid_entry_stuff[] = {
// PROC_REG("auxv", S_IRUSR, NULL),
// PROC_REG("clear_refs", S_IWUSR, NULL),
@@ -1006,10 +1085,10 @@ static const struct procfs_entry tid_entry_stuff[] = {
// PROC_LNK("exe", mckernel_readlink),
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
// PROC_REG("maps", S_IRUGO, NULL),
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL),
PROC_REG("mem", 0600, NULL),
// PROC_REG("pagemap", S_IRUGO, NULL),
// PROC_REG("smaps", S_IRUGO, NULL),
PROC_REG("stat", S_IRUGO, NULL),
PROC_REG("stat", 0444, &mckernel_buff_io),
// PROC_REG("statm", S_IRUGO, NULL),
// PROC_REG("status", S_IRUGO, NULL),
// PROC_REG("syscall", S_IRUGO, NULL),
@@ -1018,26 +1097,26 @@ static const struct procfs_entry tid_entry_stuff[] = {
};
static const struct procfs_entry pid_entry_stuff[] = {
PROC_REG("auxv", S_IRUSR, NULL),
PROC_REG("auxv", 0400, &mckernel_buff_io),
/* Support the case where McKernel process retrieves its job-id under the Fujitsu TCS suite. */
// PROC_REG("cgroup", S_IXUSR, NULL),
// PROC_REG("clear_refs", S_IWUSR, NULL),
PROC_REG("cmdline", S_IRUGO, NULL),
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL),
PROC_REG("cmdline", 0444, &mckernel_buff_io),
PROC_REG("comm", 0644, &mckernel_buff_io),
// PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL),
PROC_REG("cpuset", S_IXUSR, NULL),
// PROC_REG("cpuset", S_IRUGO, NULL),
// PROC_REG("environ", S_IRUSR, NULL),
// PROC_LNK("exe", mckernel_readlink),
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
PROC_REG("maps", S_IRUGO, NULL),
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL),
PROC_REG("pagemap", S_IRUGO, NULL),
PROC_REG("smaps", S_IRUGO, NULL),
// PROC_REG("stat", S_IRUGO, NULL),
PROC_REG("maps", 0444, &mckernel_buff_io),
PROC_REG("mem", 0400, NULL),
PROC_REG("pagemap", 0444, NULL),
// PROC_REG("smaps", S_IRUGO, NULL),
// PROC_REG("stat", 0444, &mckernel_buff_io),
// PROC_REG("statm", S_IRUGO, NULL),
PROC_REG("status", S_IRUGO, NULL),
PROC_REG("status", 0444, &mckernel_buff_io),
// PROC_REG("syscall", S_IRUGO, NULL),
PROC_DIR("task", S_IRUGO|S_IXUGO),
PROC_DIR("task", 0555),
// PROC_REG("wchan", S_IRUGO, NULL),
PROC_TERM
};
@@ -1045,14 +1124,14 @@ static const struct procfs_entry pid_entry_stuff[] = {
static const struct procfs_entry base_entry_stuff[] = {
// PROC_REG("cmdline", S_IRUGO, NULL),
#ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */
PROC_REG("cpuinfo", S_IRUGO, NULL),
PROC_REG("cpuinfo", 0444, &mckernel_buff_io),
#else /* POSTK_DEBUG_ARCH_DEP_42 */
// PROC_REG("cpuinfo", S_IRUGO, NULL),
#endif /* POSTK_DEBUG_ARCH_DEP_42 */
// PROC_REG("meminfo", S_IRUGO, NULL),
// PROC_REG("pagetypeinfo",S_IRUGO, NULL),
// PROC_REG("softirq", S_IRUGO, NULL),
PROC_REG("stat", S_IRUGO, NULL),
PROC_REG("stat", 0444, &mckernel_buff_io),
// PROC_REG("uptime", S_IRUGO, NULL),
// PROC_REG("version", S_IRUGO, NULL),
// PROC_REG("vmallocinfo",S_IRUSR, NULL),

File diff suppressed because it is too large Load Diff

View File

@@ -790,6 +790,7 @@ out:
return error;
} /* setup_node_files() */
#ifdef SETUP_PCI_FILES
static int read_file(void *buf, size_t size, char *fmt, va_list ap)
{
int error;
@@ -798,7 +799,6 @@ static int read_file(void *buf, size_t size, char *fmt, va_list ap)
int n;
struct file *fp = NULL;
loff_t off;
mm_segment_t ofs;
ssize_t ss;
dprintk("read_file(%p,%ld,%s,%p)\n", buf, size, fmt, ap);
@@ -824,13 +824,14 @@ static int read_file(void *buf, size_t size, char *fmt, va_list ap)
}
off = 0;
ofs = get_fs();
set_fs(KERNEL_DS);
ss = vfs_read(fp, buf, size, &off);
set_fs(ofs);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
ss = kernel_read(fp, buf, size, &off);
#else
ss = kernel_read(fp, off, buf, size);
#endif
if (ss < 0) {
error = ss;
eprintk("mcctrl:read_file:vfs_read failed. %d\n", error);
eprintk("mcctrl:read_file:kernel_read failed. %d\n", error);
goto out;
}
if (ss >= size) {
@@ -892,16 +893,6 @@ out:
return error;
} /* read_long() */
#ifdef MCCTRL_KSYM_sys_readlink
static ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz)
#if MCCTRL_KSYM_sys_readlink
= (void *)MCCTRL_KSYM_sys_readlink;
#else
= &sys_readlink;
#endif
#endif
static int read_link(char *buf, size_t bufsize, char *fmt, ...)
{
int error;
@@ -951,30 +942,14 @@ out:
return error;
} /* read_link() */
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
static int setup_one_pci(struct mcctrl_usrdata *udp, const char *name)
{
#else /* POSTK_DEBUG_TEMP_FIX_22 */
static int setup_one_pci(void *arg0, const char *name, int namlen,
loff_t offset, u64 ino, unsigned d_type)
{
struct mcctrl_usrdata *udp = arg0;
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
int error;
char *buf = NULL;
long node;
struct sysfsm_bitmap_param param;
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
dprintk("setup_one_pci(%p,%s)\n", udp, name);
#else /* POSTK_DEBUG_TEMP_FIX_22 */
dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d)\n",
arg0, name, namlen, (long)offset, (long)ino, d_type);
if (namlen != 12) {
error = 0;
goto out;
}
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
buf = (void *)__get_free_pages(GFP_KERNEL, 0);
if (!buf) {
@@ -1026,26 +1001,39 @@ static int setup_one_pci(void *arg0, const char *name, int namlen,
error = 0;
out:
free_pages((long)buf, 0);
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
dprintk("setup_one_pci(%p,%s): %d\n", udp, name, error);
#else /* POSTK_DEBUG_TEMP_FIX_22 */
dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d): %d\n",
arg0, name, namlen, (long)offset, (long)ino, d_type,
error);
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
return error;
} /* setup_one_pci() */
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
LIST_HEAD(pci_file_name_list);
struct pci_file_name {
char *name;
struct list_head chain;
};
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
struct mcctrl_filler_args {
struct dir_context ctx;
void *buf;
};
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
static int pci_file_name_gen(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino, unsigned int d_type)
#else
static int pci_file_name_gen(void *ctx, const char *name,
int namlen, loff_t offset, u64 ino, unsigned int d_type)
#endif
{
struct mcctrl_filler_args *args
= container_of(ctx, struct mcctrl_filler_args, ctx);
void *buf = args->buf;
#else
static int pci_file_name_gen(void *buf, const char *name, int namlen,
loff_t offset, u64 ino, unsigned d_type)
{
#endif
struct pci_file_name *p;
int error = -1;
@@ -1083,56 +1071,31 @@ out:
buf, name, namlen, (long)offset, (long)ino, d_type, error);
return error;
}
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
typedef int (*mcctrl_filldir_t)(void *buf, const char *name, int namlen,
loff_t offset, u64 ino, unsigned d_type);
struct mcctrl_filler_args {
struct dir_context ctx;
mcctrl_filldir_t filler;
void *buf;
};
static int mcctrl_filler(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino, unsigned d_type)
{
struct mcctrl_filler_args *args
= container_of(ctx, struct mcctrl_filler_args, ctx);
return (*args->filler)(args->buf, name, namlen, offset, ino, d_type);
} /* mcctrl_filler() */
static inline int mcctrl_vfs_readdir(struct file *file,
mcctrl_filldir_t filler, void *buf)
static inline int mcctrl_vfs_readdir(struct file *file, filldir_t filler,
void *buf)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
struct mcctrl_filler_args args = {
.ctx.actor = &mcctrl_filler,
.filler = (void *)filler,
.ctx.actor = filler,
.buf = buf,
};
return iterate_dir(file, &args.ctx);
} /* mcctrl_vfs_readdir() */
#else
static inline int mcctrl_vfs_readdir(struct file *file, filldir_t filler,
void *buf)
{
return vfs_readdir(file, filler, buf);
} /* mcctrl_vfs_readdir() */
#endif
} /* mcctrl_vfs_readdir() */
static int setup_pci_files(struct mcctrl_usrdata *udp)
{
int error;
int er;
struct file *fp = NULL;
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
int ret = 0;
struct pci_file_name *cur;
struct pci_file_name *next;
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
dprintk("setup_pci_files(%p)\n", udp);
fp = filp_open("/sys/bus/pci/devices", O_DIRECTORY, 0);
@@ -1142,18 +1105,13 @@ static int setup_pci_files(struct mcctrl_usrdata *udp)
goto out;
}
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
error = mcctrl_vfs_readdir(fp, &pci_file_name_gen, udp);
#else /* POSTK_DEBUG_TEMP_FIX_22 */
error = mcctrl_vfs_readdir(fp, &setup_one_pci, udp);
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
if (error) {
eprintk("mcctrl:setup_pci_files:"
"mcctrl_vfs_readdir failed. %d\n", error);
goto out;
}
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
list_for_each_entry_safe(cur, next, &pci_file_name_list, chain) {
if (!ret) {
ret = setup_one_pci(udp, cur->name);
@@ -1162,7 +1120,6 @@ static int setup_pci_files(struct mcctrl_usrdata *udp)
kfree(cur->name);
kfree(cur);
}
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
error = 0;
out:
@@ -1176,6 +1133,7 @@ out:
dprintk("setup_pci_files(%p): %d\n", udp, error);
return error;
} /* setup_pci_files() */
#endif // SETUP_PCI_FILES
void setup_sysfs_files(ihk_os_t os)
{
@@ -1215,7 +1173,9 @@ void setup_sysfs_files(ihk_os_t os)
setup_cpus_sysfs_files(udp);
setup_node_files(udp);
setup_cpus_sysfs_files_node_link(udp);
//setup_pci_files(udp);
#ifdef SETUP_PCI_FILES
setup_pci_files(udp);
#endif
/* Indicate sysfs files setup completion for boot script */
error = sysfsm_mkdirf(os, NULL, "/sys/setup_complete");

View File

@@ -21,7 +21,7 @@ endif
endif
ifeq ($(BUILD_MODULE_TMP),rhel)
ifeq ($(BUILD_MODULE),none)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -ge 327 -a ${RHEL_RELEASE} -le 693 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -ge 327 -a ${RHEL_RELEASE} -le 862 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi)
endif
ifeq ($(BUILD_MODULE),none)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)

View File

@@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/version.h>
#include "overlayfs.h"
struct ovl_cache_entry {
@@ -34,10 +35,18 @@ struct ovl_dir_cache {
struct list_head entries;
};
/* vfs_readdir vs. iterate_dir compat */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
#define USE_ITERATE_DIR 1
#endif
#ifndef USE_ITERATE_DIR
struct dir_context {
const filldir_t actor;
//loff_t pos;
};
#endif
struct ovl_readdir_data {
struct dir_context ctx;
@@ -256,7 +265,11 @@ static inline int ovl_dir_read(struct path *realpath,
do {
rdd->count = 0;
rdd->err = 0;
#ifdef USE_ITERATE_DIR
err = iterate_dir(realfile, &rdd->ctx);
#else
err = vfs_readdir(realfile, rdd->ctx.actor, rdd);
#endif
if (err >= 0)
err = rdd->err;
} while (!err && rdd->count);
@@ -365,6 +378,22 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
return cache;
}
#ifdef USE_ITERATE_DIR
struct iterate_wrapper {
struct dir_context ctx;
filldir_t actor;
void *buf;
};
static int ovl_wrap_readdir(void *ctx, const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct iterate_wrapper *w = ctx;
return w->actor(w->buf, name, namelen, offset, ino, d_type);
}
#endif
static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
{
struct ovl_dir_file *od = file->private_data;
@@ -376,7 +405,16 @@ static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
ovl_dir_reset(file);
if (od->is_real) {
#ifdef USE_ITERATE_DIR
struct iterate_wrapper w = {
.ctx.actor = ovl_wrap_readdir,
.actor = filler,
.buf = buf,
};
res = iterate_dir(od->realfile, &w.ctx);
#else
res = vfs_readdir(od->realfile, filler, buf);
#endif
file->f_pos = od->realfile->f_pos;
return res;

View File

@@ -13,6 +13,8 @@ KDIR ?= @KDIR@
ARCH=@ARCH@
CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH} -I${IHKDIR} -I@abs_builddir@/../../../ihk/linux/include
LDFLAGS=@LDFLAGS@
CPPFLAGS_SYSCALL_INTERCEPT=@CPPFLAGS_SYSCALL_INTERCEPT@
LDFLAGS_SYSCALL_INTERCEPT=@LDFLAGS_SYSCALL_INTERCEPT@
RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}')
VPATH=@abs_srcdir@
TARGET=mcexec libsched_yield ldump2mcdump.so
@@ -21,12 +23,17 @@ LIBS=@LIBS@
IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread -L@abs_builddir@/../../../ihk/linux/user -lihk -Wl,-rpath,$(MCKERNEL_LIBDIR)
ENABLE_QLMPI=@ENABLE_QLMPI@
WITH_SYSCALL_INTERCEPT=@WITH_SYSCALL_INTERCEPT@
ifeq ($(ENABLE_QLMPI),yes)
MCEXEC_LIBS += -lmpi
TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so
endif
ifeq ($(WITH_SYSCALL_INTERCEPT),yes)
TARGET += syscall_intercept.so
endif
ifeq ($(ARCH), arm64)
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i)))
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i)))
@@ -40,10 +47,10 @@ mcexec: mcexec.c libmcexec.a
# POSTK_DEBUG_ARCH_DEP_34, eclair arch depend separate.
ifeq ($(ARCH), arm64)
eclair: eclair.c arch/$(ARCH)/arch-eclair.c
$(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
$(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS) -ldl -lz
else
eclair: eclair.c
$(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS)
eclair: eclair.c arch/$(ARCH)/arch-eclair.c
$(CC) -I.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
endif
ldump2mcdump.so: ldump2mcdump.c
@@ -52,6 +59,12 @@ ldump2mcdump.so: ldump2mcdump.c
libsched_yield: libsched_yield.c
$(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl
syscall_intercept.so: syscall_intercept.c libsyscall_intercept_arch.a
$(CC) $(CPPFLAGS_SYSCALL_INTERCEPT) -g -O2 $(LDFLAGS_SYSCALL_INTERCEPT) -lsyscall_intercept -fpic -shared -L. -lsyscall_intercept_arch $^ -o $@
libsyscall_intercept_arch.a::
+(cd arch/${ARCH}; $(MAKE))
libmcexec.a::
+(cd arch/${ARCH}; $(MAKE))
@@ -99,6 +112,9 @@ ifeq ($(ENABLE_QLMPI),yes)
install -m 755 ql_mpiexec_start $(BINDIR)
install -m 755 ql_mpiexec_finalize $(BINDIR)
install -m 755 ql_talker $(SBINDIR)
endif
ifeq ($(WITH_SYSCALL_INTERCEPT),yes)
install -m 755 syscall_intercept.so $(MCKERNEL_LIBDIR)
endif
@uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR)
@uncomment_if_ENABLE_MEMDUMP@install -m 755 vmcore2mckdump $(BINDIR)

View File

@@ -4,7 +4,7 @@ BINDIR=@BINDIR@
KDIR ?= @KDIR@
CFLAGS=-Wall -O -I.
VPATH=@abs_srcdir@
TARGET=../../libmcexec.a
TARGET=../../libmcexec.a ../../libsyscall_intercept_arch.a
LIBS=@LIBS@
all: $(TARGET)
@@ -18,6 +18,12 @@ archdep.o: archdep.S
arch_syscall.o: arch_syscall.c
$(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $<
../../libsyscall_intercept_arch.a: archdep_c.o
$(AR) cr ../../libsyscall_intercept_arch.a archdep_c.o
archdep_c.o: archdep_c.c
$(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $<
clean:
$(RM) $(TARGET) *.o

View File

@@ -42,7 +42,7 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs)
}
for (i = 0; i < sizeof(regs_1)/sizeof(regs_1[0]); i++) { /* rsi, rdi, rbp, rsp */
ret = print_bin(rbp, rbp_size, (void *)regs_1[i], sizeof(regs_1[0]));
ret = print_bin(rbp, rbp_size, regs_1 + i, sizeof(regs_1[0]));
if (ret < 0) {
return ret;
}
@@ -62,7 +62,7 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs)
}
for (i = 0; i < sizeof(regs_2)/sizeof(regs_2[0]); i++) { /* r12-r15 */
ret = print_bin(rbp, rbp_size, (void *)regs_2[i], sizeof(regs_2[0]));
ret = print_bin(rbp, rbp_size, regs_2 + i, sizeof(regs_2[0]));
if (ret < 0) {
return ret;
}

View File

@@ -67,6 +67,12 @@ get_syscall_arg6(syscall_args *args)
return args->r9;
}
static inline unsigned long
get_syscall_rip(syscall_args *args)
{
return args->rip;
}
static inline void
set_syscall_number(syscall_args *args, unsigned long value)
{

View File

@@ -48,7 +48,7 @@ archdep_syscall(struct syscall_wait_desc *w, long *ret)
if (*ret >= PATH_MAX) {
*ret = -ENAMETOOLONG;
}
if (ret < 0) {
if (*ret < 0) {
return 0;
}
__dprintf("open: %s\n", pathbuf);

View File

@@ -1,15 +1,22 @@
/*
arg: rdi, rsi, rdx, rcx, r8, r9
ret: rax
Calling convention:
arg: rdi, rsi, rdx, rcx, r8, r9
ret: rax
rax syscall number
syscall: (rax:num) rdi rsi rdx r10 r8 r9 (rcx:ret addr)
fd, cmd, param
rdi: fd
rsi: cmd
rdx: param
rcx: save area
r8: new thread context
rdi: fd
rsi: cmd
rdx: param
rcx: save area
r8: new thread context
Syscam call convention:
syscall number: rax
arg: rdi, rsi, rdx, r10, r8, r9
return addr: rcx
rdi: fd
rsi: cmd
rdx: param
*/
.global switch_ctx
@@ -91,6 +98,7 @@ switch_ctx:
1:
mov $0xffffffffffffffff,%eax
retq
2:
pushq %rax
movq $158,%rax /* arch_prctl */
@@ -146,4 +154,3 @@ compare_and_swap_int:
lock
cmpxchgl %edx,0(%rdi)
retq

View File

@@ -0,0 +1,52 @@
/*
function call convention
rdi, rsi, rdx, rcx, r8, r9: IN arguments
rax: OUT return value
syscall convention:
rax: IN syscall number
rdi, rsi, rdx, r10, r8, r9: IN arguments
rax: OUT return value
rcx, r11: CLOBBER
*/
long uti_syscall6(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5)
{
long ret;
asm volatile ("movq %[arg3],%%r10; movq %[arg4],%%r8; movq %[arg5],%%r9; syscall"
: "=a" (ret)
: "a" (syscall_number),
"D" (arg0), "S" (arg1), "d" (arg2),
[arg3] "g" (arg3), [arg4] "g" (arg4), [arg5] "g" (arg5)
: "rcx", "r11", "r10", "r8", "r9", "memory");
return ret;
}
long uti_syscall3(long syscall_number, long arg0, long arg1, long arg2)
{
long ret;
asm volatile ("syscall"
: "=a" (ret)
: "a" (syscall_number), "D" (arg0), "S" (arg1), "d" (arg2)
: "rcx", "r11", "memory");
return ret;
}
long uti_syscall1(long syscall_number, long arg0)
{
long ret;
asm volatile ("syscall"
: "=a" (ret)
: "a" (syscall_number), "D" (arg0)
: "rcx", "r11", "memory");
return ret;
}
long uti_syscall0(long syscall_number)
{
long ret;
asm volatile ("syscall"
: "=a" (ret)
: "a" (syscall_number)
: "rcx", "r11", "memory");
return ret;
}

View File

@@ -2,8 +2,18 @@
#ifndef HEADER_USER_X86_ECLAIR_H
#define HEADER_USER_X86_ECLAIR_H
#define MAP_KERNEL 0xFFFFFFFF80000000
#define MAP_ST 0xFFFF800000000000
#ifndef POSTK_DEBUG_ARCH_DEP_34
#define MAP_ST_START 0xffff800000000000UL
#define MAP_VMAP_START 0xffff850000000000UL
#define MAP_FIXED_START 0xffff860000000000UL
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
/* TODO: these should be updated when McKernel changes */
#define MCKERNEL_ELF_START "0xFFFFFFFFFE801000"
#define MCKERNEL_ELF_LEN "0x0000000000100000"
#define ARCH_CLV_SPAN "x86_cpu_local_variables_span"

View File

@@ -1,4 +1,6 @@
extern int switch_ctx(int fd, unsigned long cmd, void **param, void *lctx, void *rctx);
#include "../include/uprotocol.h"
extern int switch_ctx(int fd, unsigned long cmd, struct uti_save_fs_desc *desc, void *lctx, void *rctx);
extern unsigned long compare_and_swap(unsigned long *addr, unsigned long old, unsigned long new);
extern unsigned int compare_and_swap_int(unsigned int *addr, unsigned int old, unsigned int new);
extern int archdep_syscall(struct syscall_wait_desc *w, long *ret);

View File

@@ -0,0 +1,5 @@
extern long uti_syscall6(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5);
extern long uti_syscall3(long syscall_number, long arg0, long arg1, long arg2);
extern long uti_syscall1(long syscall_number, long arg0);
extern long uti_syscall0(long syscall_number);

View File

@@ -8,9 +8,7 @@
* Copyright (C) 2015 RIKEN AICS
*/
#ifdef POSTK_DEBUG_ARCH_DEP_33
#include "../config.h"
#endif /* POSTK_DEBUG_ARCH_DEP_33 */
#include <bfd.h>
#include <fcntl.h>
#include <inttypes.h>
@@ -22,10 +20,8 @@
#include <arpa/inet.h>
#include <sys/ioctl.h>
#include <ihk/ihk_host_user.h>
#ifdef POSTK_DEBUG_ARCH_DEP_34
#include <eclair.h>
#include <arch-eclair.h>
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
#define CPU_TID_BASE 1000000
@@ -85,11 +81,7 @@ static struct thread_info *curr_thread = NULL;
static uintptr_t ihk_mc_switch_context = -1;
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
#ifdef POSTK_DEBUG_ARCH_DEP_34
uintptr_t lookup_symbol(char *name) {
#else /* POSTK_DEBUG_ARCH_DEP_34 */
static uintptr_t lookup_symbol(char *name) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
int i;
for (i = 0; i < nsyms; ++i) {
@@ -101,22 +93,22 @@ static uintptr_t lookup_symbol(char *name) {
return NOSYMBOL;
} /* lookup_symbol() */
#define NOPHYS ((uintptr_t)-1)
static uintptr_t virt_to_phys(uintptr_t va) {
#ifndef POSTK_DEBUG_ARCH_DEP_34
#define MAP_KERNEL 0xFFFFFFFF80000000
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
if (va >= MAP_KERNEL) {
return (va - MAP_KERNEL + kernel_base);
if (va >= MAP_KERNEL_START) {
return va - MAP_KERNEL_START + kernel_base;
}
#ifndef POSTK_DEBUG_ARCH_DEP_34
#define MAP_ST 0xFFFF800000000000
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
if (va >= MAP_ST) {
return (va - MAP_ST);
else if (va >= LINUX_PAGE_OFFSET) {
return va - LINUX_PAGE_OFFSET;
}
if (0) printf("virt_to_phys(%lx): -1\n", va);
#define NOPHYS ((uintptr_t)-1)
else if (va >= MAP_FIXED_START) {
return va - MAP_FIXED_START;
}
else if (va >= MAP_ST_START) {
return va - MAP_ST_START;
}
return NOPHYS;
} /* virt_to_phys() */
@@ -673,11 +665,7 @@ static int setup_dump(char *fname) {
return 0;
} /* setup_dump() */
#ifdef POSTK_DEBUG_ARCH_DEP_38
static ssize_t print_hex(char *buf, size_t buf_size, char *str) {
#else /* POSTK_DEBUG_ARCH_DEP_38 */
static ssize_t print_hex(char *buf, char *str) {
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
char *p;
char *q;
@@ -702,11 +690,7 @@ static ssize_t print_hex(char *buf, char *str) {
return (q - buf);
} /* print_hex() */
#if defined(POSTK_DEBUG_ARCH_DEP_34) && defined(POSTK_DEBUG_ARCH_DEP_38)
ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size) {
#else /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/
static ssize_t print_bin(char *buf, void *data, size_t size) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/
uint8_t *p;
char *q;
int i;
@@ -733,13 +717,8 @@ static ssize_t print_bin(char *buf, void *data, size_t size) {
return (q - buf);
} /* print_bin() */
#ifdef POSTK_DEBUG_ARCH_DEP_38
static void command(const char *cmd, char *res, size_t res_size) {
const char *p;
#else /* POSTK_DEBUG_ARCH_DEP_38 */
static void command(char *cmd, char *res) {
char *p;
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
char *rbp;
p = cmd;
@@ -801,11 +780,7 @@ static void command(char *cmd, char *res) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
rbp += sprintf(rbp, "l");
if (0)
#ifdef POSTK_DEBUG_ARCH_DEP_38
rbp += print_hex(rbp, res_size, str);
#else /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += print_hex(rbp, str);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += sprintf(rbp, "%s", str);
}
else if (!strcmp(p, "D")) {
@@ -814,20 +789,9 @@ static void command(char *cmd, char *res) {
}
else if (!strcmp(p, "g")) {
if (curr_thread->cpu < 0) {
#ifndef POSTK_DEBUG_ARCH_DEP_34
struct x86_kregs {
uintptr_t rsp, rbp, rbx, rsi;
uintptr_t rdi, r12, r13, r14;
uintptr_t r15, rflags, rsp0;
};
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
int error;
#ifdef POSTK_DEBUG_ARCH_DEP_34
struct arch_kregs kregs;
#else /* POSTK_DEBUG_ARCH_DEP_34 */
struct x86_kregs kregs;
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
error = read_mem(curr_thread->process+K(CTX_OFFSET),
&kregs, sizeof(kregs));
@@ -836,36 +800,7 @@ static void command(char *cmd, char *res) {
break;
}
#ifdef POSTK_DEBUG_ARCH_DEP_34
print_kregs(rbp, res_size, &kregs);
#else /* POSTK_DEBUG_ARCH_DEP_34 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rax */
rbp += print_bin(rbp, &kregs.rbx, sizeof(uint64_t));
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rcx */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rdx */
rbp += print_bin(rbp, &kregs.rsi, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.rdi, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.rbp, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.rsp, sizeof(uint64_t));
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r8 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r9 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r10 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r11 */
rbp += print_bin(rbp, &kregs.r12, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.r13, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.r14, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.r15, sizeof(uint64_t));
rbp += print_bin(rbp, &ihk_mc_switch_context,
sizeof(uint64_t)); /* rip */
rbp += print_bin(rbp, &kregs.rflags, sizeof(uint32_t));
rbp += sprintf(rbp, "xxxxxxxx"); /* cs */
rbp += sprintf(rbp, "xxxxxxxx"); /* ss */
rbp += sprintf(rbp, "xxxxxxxx"); /* ds */
rbp += sprintf(rbp, "xxxxxxxx"); /* es */
rbp += sprintf(rbp, "xxxxxxxx"); /* fs */
rbp += sprintf(rbp, "xxxxxxxx"); /* gs */
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
}
else {
int error;
@@ -943,11 +878,7 @@ static void command(char *cmd, char *res) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
rbp += sprintf(rbp, "l");
if (0)
#ifdef POSTK_DEBUG_ARCH_DEP_38
rbp += print_hex(rbp, res_size, str);
#else /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += print_hex(rbp, str);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += sprintf(rbp, "%s", str);
}
else if (!strncmp(p, "T", 1)) {
@@ -1039,11 +970,7 @@ static void command(char *cmd, char *res) {
else {
q += sprintf(q, "status=%#x", ti->status);
}
#ifdef POSTK_DEBUG_ARCH_DEP_38
rbp += print_hex(rbp, res_size, buf);
#else /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += print_hex(rbp, buf);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
}
} while (0);
@@ -1272,11 +1199,7 @@ int main(int argc, char *argv[]) {
}
mode = 0;
fputc('+', ofp);
#ifdef POSTK_DEBUG_ARCH_DEP_38
command(lbuf, rbuf, sizeof(rbuf));
#else /* POSTK_DEBUG_ARCH_DEP_38 */
command(lbuf, rbuf);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
sum = 0;
for (p = rbuf; *p != '\0'; ++p) {
sum += *p;

View File

@@ -3,11 +3,7 @@
#ifndef HEADER_USER_COMMON_ECLAIR_H
#define HEADER_USER_COMMON_ECLAIR_H
#ifdef POSTK_DEBUG_ARCH_DEP_76 /* header path fix */
#include "../config.h"
#else /* POSTK_DEBUG_ARCH_DEP_76 */
#include <config.h>
#endif /* POSTK_DEBUG_ARCH_DEP_76 */
#include <stdio.h>
#include <inttypes.h>
#include <arch-eclair.h>

View File

@@ -11,7 +11,9 @@
typedef int (*int_void_fn)(void);
#if 0
static int_void_fn orig_sched_yield = 0;
#endif
int sched_yield(void)
{

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,139 @@
#include <libsyscall_intercept_hook_point.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <syscall.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "../include/uprotocol.h"
#include "../include/uti.h"
#include "./archdep_uti.h"
static struct uti_desc uti_desc;
#define DEBUG_UTI
static int
hook(long syscall_number,
long arg0, long arg1,
long arg2, long arg3,
long arg4, long arg5,
long *result)
{
//return 1; /* debug */
int tid = uti_syscall0(__NR_gettid);
struct terminate_thread_desc term_desc;
unsigned long code;
int stack_top;
if (!uti_desc.start_syscall_intercept) {
return 1; /* System call isn't taken over */
}
if (tid != uti_desc.mck_tid) {
if (uti_desc.syscalls2 && syscall_number >= 0 && syscall_number < 512) {
uti_desc.syscalls2[syscall_number]++;
}
return 1;
}
#ifdef DEBUG_UTI
if (uti_desc.syscalls && syscall_number >= 0 && syscall_number < 512) {
uti_desc.syscalls[syscall_number]++;
}
#endif
switch (syscall_number) {
case __NR_gettid:
*result = uti_desc.mck_tid;
return 0;
case __NR_futex:
case __NR_brk:
case __NR_mmap:
case __NR_munmap:
case __NR_mprotect:
case __NR_mremap:
/* Overflow check */
if (uti_desc.syscall_stack_top == -1) {
*result = -ENOMEM;
return 0;
}
/* Sanity check */
if (uti_desc.syscall_stack_top < 0 || uti_desc.syscall_stack_top >= UTI_SZ_SYSCALL_STACK) {
*result = -EINVAL;
return 0;
}
/* Store the return value in the stack to prevent it from getting corrupted
when an interrupt happens just after ioctl() and before copying the return
value to *result */
stack_top = __sync_fetch_and_sub(&uti_desc.syscall_stack_top, 1);
uti_desc.syscall_stack[stack_top].number = syscall_number;
uti_desc.syscall_stack[stack_top].args[0] = arg0;
uti_desc.syscall_stack[stack_top].args[1] = arg1;
uti_desc.syscall_stack[stack_top].args[2] = arg2;
uti_desc.syscall_stack[stack_top].args[3] = arg3;
uti_desc.syscall_stack[stack_top].args[4] = arg4;
uti_desc.syscall_stack[stack_top].args[5] = arg5;
uti_desc.syscall_stack[stack_top].uti_clv = uti_desc.uti_clv;
uti_desc.syscall_stack[stack_top].ret = -EINVAL;
uti_syscall3(__NR_ioctl, uti_desc.fd, MCEXEC_UP_SYSCALL_THREAD, (long)(uti_desc.syscall_stack + stack_top));
*result = uti_desc.syscall_stack[stack_top].ret;
/* push syscall_struct list */
__sync_fetch_and_add(&uti_desc.syscall_stack_top, 1);
return 0; /* System call is taken over */
case __NR_exit_group:
code = 0x100000000;
goto make_remote_thread_exit;
case __NR_exit:
code = 0;
make_remote_thread_exit:
/* Make migrated-to-Linux thread on the McKernel side call do_exit() or terminate() */
term_desc.pid = uti_desc.pid;
term_desc.tid = uti_desc.tid; /* tid of mcexec */
term_desc.code = code | ((arg0 & 255) << 8);
term_desc.tsk = uti_desc.key;
uti_syscall3(__NR_ioctl, uti_desc.fd, MCEXEC_UP_TERMINATE_THREAD, (long)&term_desc);
return 1;
case __NR_clone:
case __NR_fork:
case __NR_vfork:
case __NR_execve:
*result = -ENOSYS;
return 0;
#if 0 /* debug */
case __NR_set_robust_list:
*result = -ENOSYS;
return 0;
#endif
case 888:
*result = (long)&uti_desc;
return 0;
default:
return 1;
}
return 0;
}
static __attribute__((constructor)) void
init(void)
{
/* Set up the callback function */
intercept_hook_point = hook;
/* Initialize uti_desc */
uti_desc.syscall_stack_top = UTI_SZ_SYSCALL_STACK - 1;
/* Pass address of uti_desc to McKernel */
uti_syscall1(733, (unsigned long)&uti_desc);
}
static __attribute__((destructor)) void
dtor(void)
{
}

1
ihk Submodule

Submodule ihk added at d9c74adf3f

View File

@@ -6,7 +6,7 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
OBJS += rbtree.o
OBJS += rbtree.o hugefileobj.o
OBJS += pager.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
DEPSRCS=$(wildcard $(SRC)/*.c)
@@ -19,7 +19,7 @@ endif
CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions
ifneq ($(ARCH), arm64)
CFLAGS += -mcmodel=large -mno-red-zone
CFLAGS += -mcmodel=large -mno-red-zone -mno-sse
endif
LDFLAGS += -e arch_start
IHKOBJ = ihk/ihk.o

View File

@@ -29,15 +29,13 @@
#include <time.h>
#include <syscall.h>
#include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_AP
#ifdef DEBUG_PRINT_AP
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
int num_processors = 1;
@@ -209,8 +207,10 @@ store_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf,
static struct fake_cpu_info_ops show_fci_online = {
.member = ONLINE,
.ops.show = &show_fake_cpu_info,
.ops.store = &store_fake_cpu_info,
.ops = {
.show = &show_fake_cpu_info,
.store = &store_fake_cpu_info,
},
};
void

View File

@@ -1,24 +1,28 @@
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = 0xffffffff80001000;
_head = .;
_head = .;
.text : {
*(.text);
} : text
.text : {
*(.text);
} : text
. = ALIGN(4096);
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
.vsyscall : ALIGN(0x1000) {
@@ -37,14 +41,14 @@ SECTIONS
. = ALIGN(4096);
} : data = 0xf4
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@@ -1,24 +1,28 @@
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = 0xffffffff80001000;
_head = .;
_head = .;
.text : {
*(.text);
} : text
.text : {
*(.text);
} : text
. = ALIGN(4096);
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
.vsyscall : ALIGN(0x1000) {
@@ -37,14 +41,14 @@ SECTIONS
. = ALIGN(4096);
} : data = 0xf4
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@@ -1,24 +1,28 @@
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = 0xffffffff80001000;
_head = .;
_head = .;
.text : {
*(.text);
} : text
.text : {
*(.text);
} : text
. = ALIGN(4096);
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
.vsyscall : ALIGN(0x1000) {
@@ -37,10 +41,10 @@ SECTIONS
. = ALIGN(4096);
} : data = 0xf4
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
}

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : {
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : {
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : {
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : {
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)

View File

@@ -1,24 +1,28 @@
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = 0xffffffff80001000;
_head = .;
. = 0xFFFFFFFFFE801000;
_head = .;
.text : {
*(.text);
} : text
.text : {
*(.text);
} : text
. = ALIGN(4096);
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
.vsyscall : ALIGN(0x1000) {
@@ -37,9 +41,9 @@ SECTIONS
. = ALIGN(4096);
} : data = 0xf4
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
}

View File

@@ -18,6 +18,9 @@
#include <ihk/lock.h>
#include <ihk/monitor.h>
#include <errno.h>
#include <sysfs.h>
#include <debug.h>
#include <limits.h>
struct ihk_kmsg_buf *kmsg_buf;
@@ -84,7 +87,8 @@ void kputs(char *buf)
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer);
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
if (irqflags_can_interrupt(flags_outer) &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
}
@@ -123,8 +127,8 @@ int __kprintf(const char *format, ...)
}
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
if (irqflags_can_interrupt(flags_inner) &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
}
@@ -165,7 +169,8 @@ int kprintf(const char *format, ...)
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer);
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
if (irqflags_can_interrupt(flags_outer) &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
}
@@ -178,3 +183,147 @@ void kmsg_init()
{
ihk_mc_spinlock_init(&kmsg_lock);
}
extern struct ddebug __start___verbose[];
extern struct ddebug __stop___verbose[];
static ssize_t dynamic_debug_sysfs_show(struct sysfs_ops *ops,
void *instance, void *buf, size_t size)
{
struct ddebug *dbg;
ssize_t n = 0;
n = snprintf(buf, size, "# filename:lineno function flags format\n");
for (dbg = __start___verbose; dbg < __stop___verbose; dbg++) {
n += snprintf(buf + n, size - n, "%s:%d %s =%s\n",
dbg->file, dbg->line, dbg->func,
dbg->flags ? "p" : "_");
if (n >= size)
break;
}
return n;
}
static ssize_t dynamic_debug_sysfs_store(struct sysfs_ops *ops,
void *instance, void *buf, size_t size)
{
char *cur = buf;
char *file = NULL, *func = NULL;
long int line_start = 0, line_end = INT_MAX;
int set_flag = -1;
struct ddebug *dbg;
// assume line was new-line terminated and squash last newline
cur[size-1] = '\0';
/* basic line parsing, combinaisons of:
* file <file>
* func <func>
* line <line|line-line|line-|-line>
* and must end with [+-=][p_] (set/clear print flag)
*/
again:
while (cur && cur < ((char *)buf) + size && *cur) {
dkprintf("looking at %.*s, size left %d\n",
size - (cur - (char *)buf), cur,
(char *)buf - cur + size);
if (strncmp(cur, "func ", 5) == 0) {
cur += 5;
func = cur;
} else if (strncmp(cur, "file ", 5) == 0) {
cur += 5;
file = cur;
} else if (strncmp(cur, "line ", 5) == 0) {
cur += 5;
if (*cur != '-') {
line_start = strtol(cur, &cur, 0);
}
if (*cur != '-') {
line_end = line_start;
} else {
cur++;
if (*cur == ' ' || *cur == '\0') {
line_end = INT_MAX;
} else {
line_end = strtol(cur, &cur, 0);
}
}
} else if (strchr("+-=", *cur)) {
switch ((*cur) + 256 * (*(cur+1))) {
case '+' + 256*'p':
case '=' + 256*'p':
set_flag = DDEBUG_PRINT;
break;
case '-' + 256*'p':
case '=' + 256*'_':
set_flag = DDEBUG_NONE;
break;
default:
kprintf("invalid flag: %.*s\n",
size - (cur - (char *)buf), cur);
return -EINVAL;
}
/* XXX check 3rd char is end of input or \n or ; */
cur += 3;
break;
} else {
kprintf("dynamic debug control: unrecognized keyword: %.*s\n",
size - (cur - (char *)buf), cur);
return -EINVAL;
}
cur = strpbrk(cur, " \n");
if (cur) {
*cur = '\0';
cur++;
}
}
dkprintf("func %s, file %s, lines %d-%d, flag %x\n",
func, file, line_start, line_end, set_flag);
if (set_flag < 0) {
kprintf("dynamic debug control: no flag set?\n");
return -EINVAL;
}
if (!func && !file) {
kprintf("at least file or func should be set\n");
return -EINVAL;
}
for (dbg = __start___verbose; dbg < __stop___verbose; dbg++) {
/* TODO: handle wildcards */
if ((!func || strcmp(func, dbg->func) == 0) &&
(!file || strcmp(file, dbg->file) == 0) &&
dbg->line >= line_start &&
dbg->line <= line_end) {
dbg->flags = set_flag;
}
}
if (cur && cur < ((char *)buf) + size && *cur)
goto again;
return size;
}
static struct sysfs_ops dynamic_debug_sysfs_ops = {
.show = &dynamic_debug_sysfs_show,
.store = &dynamic_debug_sysfs_store,
};
void dynamic_debug_sysfs_setup(void)
{
int error;
error = sysfs_createf(&dynamic_debug_sysfs_ops, NULL, 0644,
"/sys/kernel/debug/dynamic_debug/control");
if (error) {
kprintf("%s: ERROR: creating dynamic_debug/control sysfs file",
__func__);
}
}

View File

@@ -36,15 +36,13 @@
#include <syscall.h>
#include <process.h>
#include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_DEVOBJ
#ifdef DEBUG_PRINT_DEVOBJ
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
@@ -54,16 +52,15 @@ struct devobj {
uintptr_t handle;
off_t pfn_pgoff;
uintptr_t * pfn_table;
ihk_spinlock_t pfn_table_lock;
size_t npages;
};
static memobj_release_func_t devobj_release;
static memobj_ref_func_t devobj_ref;
static memobj_free_func_t devobj_free;
static memobj_get_page_func_t devobj_get_page;
static struct memobj_ops devobj_ops = {
.release = &devobj_release,
.ref = &devobj_ref,
.free = &devobj_free,
.get_page = &devobj_get_page,
};
@@ -88,12 +85,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
int error;
struct devobj *obj = NULL;
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
#ifdef POSTK_DEBUG_TEMP_FIX_36
const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
const size_t pfn_npages = (npages + uintptr_per_page - 1) / uintptr_per_page;
#else
const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
#endif /*POSTK_DEBUG_TEMP_FIX_36*/
const size_t pfn_npages =
(npages + uintptr_per_page - 1) / uintptr_per_page;
dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off);
@@ -122,6 +116,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
ihk_mc_syscall_arg5(&ctx) = prot | populate_flags;
memset(&result, 0, sizeof(result));
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n",
@@ -135,6 +131,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
obj->memobj.ops = &devobj_ops;
obj->memobj.flags = MF_HAS_PAGER | MF_DEV_FILE;
obj->memobj.size = len;
ihk_atomic_set(&obj->memobj.refcnt, 1);
obj->handle = result.handle;
dkprintf("%s: path=%s\n", __FUNCTION__, result.path);
@@ -148,10 +145,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
strncpy(obj->memobj.path, result.path, PATH_MAX);
}
obj->ref = 1;
obj->pfn_pgoff = off / PAGE_SIZE;
obj->pfn_pgoff = off >> PAGE_SHIFT;
obj->npages = npages;
ihk_mc_spinlock_init(&obj->memobj.lock);
ihk_mc_spinlock_init(&obj->pfn_table_lock);
error = 0;
*objp = to_memobj(obj);
@@ -170,81 +166,50 @@ out:
return error;
}
static void devobj_ref(struct memobj *memobj)
static void devobj_free(struct memobj *memobj)
{
struct devobj *obj = to_devobj(memobj);
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
memobj_lock(&obj->memobj);
++obj->ref;
memobj_unlock(&obj->memobj);
return;
}
static void devobj_release(struct memobj *memobj)
{
struct devobj *obj = to_devobj(memobj);
struct devobj *free_obj = NULL;
uintptr_t handle;
#ifndef POSTK_DEBUG_TEMP_FIX_36
const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
const size_t pfn_npages =
(obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
#endif /*!POSTK_DEBUG_TEMP_FIX_36*/
(obj->npages + uintptr_per_page - 1) / uintptr_per_page;
int error;
ihk_mc_user_context_t ctx;
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
dkprintf("%s(%p %lx)\n", __func__, obj, obj->handle);
memobj_lock(&obj->memobj);
--obj->ref;
if (obj->ref <= 0) {
free_obj = obj;
}
handle = obj->handle;
memobj_unlock(&obj->memobj);
if (free_obj) {
if (!(free_obj->memobj.flags & MF_HOST_RELEASED)) {
int error;
ihk_mc_user_context_t ctx;
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP;
ihk_mc_syscall_arg1(&ctx) = handle;
ihk_mc_syscall_arg2(&ctx) = 1;
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP;
ihk_mc_syscall_arg1(&ctx) = handle;
ihk_mc_syscall_arg2(&ctx) = 1;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("devobj_release(%p %lx):"
"release failed. %d\n",
free_obj, handle, error);
/* through */
}
}
if (obj->pfn_table) {
// Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory
#ifdef POSTK_DEBUG_TEMP_FIX_36
const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
const size_t pfn_npages = (obj->npages + uintptr_per_page - 1) / uintptr_per_page;
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
#else
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
#endif /*POSTK_DEBUG_TEMP_FIX_36*/
}
if (to_memobj(free_obj)->path) {
kfree(to_memobj(free_obj)->path);
}
kfree(free_obj);
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("%s(%p %lx): release failed. %d\n",
__func__, obj, handle, error);
/* through */
}
dkprintf("devobj_release(%p %lx):free %p\n",
obj, handle, free_obj);
if (obj->pfn_table) {
// Don't call memory_stat_rss_sub() because devobj related
// pages don't reside in main memory
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
}
if (to_memobj(obj)->path) {
kfree(to_memobj(obj)->path);
}
kfree(obj);
dkprintf("%s(%p %lx):free\n", __func__, obj, handle);
return;
}
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr)
{
const off_t pgoff = off / PAGE_SIZE;
const off_t pgoff = off >> PAGE_SHIFT;
struct devobj *obj = to_devobj(memobj);
int error;
uintptr_t pfn;
@@ -262,17 +227,14 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
ix = pgoff - obj->pfn_pgoff;
dkprintf("ix: %ld\n", ix);
memobj_lock(&obj->memobj);
pfn = obj->pfn_table[ix];
#ifdef PROFILE_ENABLE
profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE);
#endif // PROFILE_ENABLE
pfn = obj->pfn_table[ix];
if (!(pfn & PFN_VALID)) {
memobj_unlock(&obj->memobj);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = pgoff << PAGE_SHIFT;
ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1);
ihk_mc_syscall_arg3(&ctx) = virt_to_phys(&pfn);
error = syscall_generic_forwarding(__NR_mmap, &ctx);
@@ -303,11 +265,9 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
}
memobj_lock(&obj->memobj);
obj->pfn_table[ix] = pfn;
// Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory
}
memobj_unlock(&obj->memobj);
if (!(pfn & PFN_PRESENT)) {
kprintf("devobj_get_page(%p %lx,%lx,%d):not present. %lx\n", memobj, obj->handle, off, p2align, pfn);

View File

@@ -27,15 +27,13 @@
#include <string.h>
#include <syscall.h>
#include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_FILEOBJ
#ifdef DEBUG_PRINT_FILEOBJ
#define dkprintf(...) do { if (1) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
mcs_lock_t fileobj_list_lock;
@@ -47,24 +45,21 @@ static LIST_HEAD(fileobj_list);
struct fileobj {
struct memobj memobj; /* must be first */
long sref;
long cref;
uint64_t sref;
uintptr_t handle;
struct list_head list;
struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE];
mcs_lock_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE];
};
static memobj_release_func_t fileobj_release;
static memobj_ref_func_t fileobj_ref;
static memobj_free_func_t fileobj_free;
static memobj_get_page_func_t fileobj_get_page;
static memobj_flush_page_func_t fileobj_flush_page;
static memobj_invalidate_page_func_t fileobj_invalidate_page;
static memobj_lookup_page_func_t fileobj_lookup_page;
static struct memobj_ops fileobj_ops = {
.release = &fileobj_release,
.ref = &fileobj_ref,
.free = &fileobj_free,
.get_page = &fileobj_get_page,
.copy_page = NULL,
.flush_page = &fileobj_flush_page,
@@ -170,22 +165,22 @@ static void obj_list_remove(struct fileobj *obj)
/* return NULL or locked fileobj */
static struct fileobj *obj_list_lookup(uintptr_t handle)
{
struct fileobj *obj;
struct fileobj *p;
obj = NULL;
list_for_each_entry(p, &fileobj_list, list) {
if (p->handle == handle) {
memobj_lock(&p->memobj);
if (p->cref > 0) {
obj = p;
break;
/* for the interval between last put and fileobj_free
* taking list_lock
*/
if (memobj_ref(&p->memobj) <= 1) {
ihk_atomic_dec(&p->memobj.refcnt);
continue;
}
memobj_unlock(&p->memobj);
return p;
}
}
return obj;
return NULL;
}
/***********************************************************************
@@ -200,13 +195,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
struct fileobj *obj;
struct mcs_lock_node node;
dkprintf("fileobj_create(%d)\n", fd);
newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT);
if (!newobj) {
error = -ENOMEM;
kprintf("fileobj_create(%d):kmalloc failed. %d\n", fd, error);
goto out;
}
dkprintf("%s(%d)\n", __func__, fd);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE;
ihk_mc_syscall_arg1(&ctx) = fd;
@@ -214,20 +203,41 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
memset(&result, 0, sizeof(result));
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
dkprintf("fileobj_create(%d):create failed. %d\n", fd, error);
/* -ESRCH doesn't mean an error but requesting a fall
* back to treat the file as a device file
*/
if (error != -ESRCH) {
kprintf("%s(%d):create failed. %d\n",
__func__, fd, error);
}
goto out;
}
if (result.flags & MF_HUGETLBFS) {
return hugefileobj_pre_create(&result, objp, maxprotp);
}
mcs_lock_lock(&fileobj_list_lock, &node);
obj = obj_list_lookup(result.handle);
if (obj)
goto found;
mcs_lock_unlock(&fileobj_list_lock, &node);
// not found: alloc new object and lookup again
newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT);
if (!newobj) {
error = -ENOMEM;
kprintf("%s(%d):kmalloc failed. %d\n", __func__, fd, error);
goto out;
}
memset(newobj, 0, sizeof(*newobj));
newobj->memobj.ops = &fileobj_ops;
newobj->memobj.flags = MF_HAS_PAGER | MF_REG_FILE;
newobj->handle = result.handle;
newobj->sref = 1;
newobj->cref = 1;
fileobj_page_hash_init(newobj);
ihk_mc_spinlock_init(&newobj->memobj.lock);
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
obj = obj_list_lookup(result.handle);
@@ -237,6 +247,8 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
to_memobj(obj)->size = result.size;
to_memobj(obj)->flags |= result.flags;
to_memobj(obj)->status = MEMOBJ_READY;
ihk_atomic_set(&to_memobj(obj)->refcnt, 1);
obj->sref = 1;
if (to_memobj(obj)->flags & MF_PREFETCH) {
to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED;
}
@@ -305,20 +317,17 @@ error_cleanup:
}
newobj = NULL;
dkprintf("%s: new obj 0x%lx cref: %d, %s\n",
dkprintf("%s: new obj 0x%lx %s\n",
__FUNCTION__,
obj,
obj->cref,
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
}
else {
++obj->sref;
++obj->cref;
memobj_unlock(&obj->memobj); /* locked by obj_list_lookup() */
dkprintf("%s: existing obj 0x%lx cref: %d, %s\n",
found:
obj->sref++;
dkprintf("%s: existing obj 0x%lx, %s\n",
__FUNCTION__,
obj,
obj->cref,
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
}
@@ -332,152 +341,111 @@ out:
if (newobj) {
kfree(newobj);
}
dkprintf("fileobj_create(%d):%d %p %x\n", fd, error, *objp, *maxprotp);
dkprintf("%s(%d):%d %p %x\n", __func__, fd, error, *objp, *maxprotp);
return error;
}
static void fileobj_ref(struct memobj *memobj)
static void fileobj_free(struct memobj *memobj)
{
struct fileobj *obj = to_fileobj(memobj);
dkprintf("fileobj_ref(%p %lx):\n", obj, obj->handle);
memobj_lock(&obj->memobj);
++obj->cref;
memobj_unlock(&obj->memobj);
return;
}
static void fileobj_release(struct memobj *memobj)
{
struct fileobj *obj = to_fileobj(memobj);
long free_sref = 0;
uintptr_t free_handle;
struct fileobj *free_obj = NULL;
struct mcs_lock_node node;
int error;
ihk_mc_user_context_t ctx;
dkprintf("fileobj_release(%p %lx)\n", obj, obj->handle);
memobj_lock(&obj->memobj);
--obj->cref;
free_sref = obj->sref - 1; /* surplus sref */
if (obj->cref <= 0) {
free_sref = obj->sref;
free_obj = obj;
}
obj->sref -= free_sref;
free_handle = obj->handle;
memobj_unlock(&obj->memobj);
if (obj->memobj.flags & MF_HOST_RELEASED) {
free_sref = 0; // don't call syscall_generic_forwarding
}
dkprintf("%s: free obj 0x%lx, %s\n", __func__,
obj, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
if (free_obj) {
dkprintf("%s: release obj 0x%lx cref: %d, free_obj: 0x%lx, %s\n",
__FUNCTION__,
obj,
obj->cref,
free_obj,
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
/* zap page_list */
for (;;) {
struct page *page;
void *page_va;
uintptr_t phys;
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
obj_list_remove(obj);
mcs_lock_unlock_noirq(&fileobj_list_lock, &node);
page = fileobj_page_hash_first(obj);
if (!page) {
break;
}
__fileobj_page_hash_remove(page);
phys = page_to_phys(page);
page_va = phys_to_virt(phys);
/* zap page_list */
for (;;) {
struct page *page;
void *page_va;
uintptr_t phys;
/* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and
incremented by the second get_page() reaping the pageio and decremented by clear_range().
page = fileobj_page_hash_first(obj);
if (!page) {
break;
}
__fileobj_page_hash_remove(page);
phys = page_to_phys(page);
page_va = phys_to_virt(phys);
/* Count must be one because set to one on the first
* get_page() invoking fileobj_do_pageio and incremented by
* the second get_page() reaping the pageio and decremented
* by clear_range().
*/
if (ihk_atomic_read(&page->count) != 1) {
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n",
__func__, ihk_atomic_read(&page->count),
page->phys, to_memobj(obj)->flags);
}
else if (page_unmap(page)) {
ihk_mc_free_pages_user(page_va, 1);
/* Track change in page->count for !MF_PREMAP pages.
* It is decremented here or in clear_range()
*/
if (ihk_atomic_read(&page->count) != 1) {
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n",
__FUNCTION__,
ihk_atomic_read(&page->count),
page->phys,
to_memobj(free_obj)->flags);
}
else if (page_unmap(page)) {
ihk_mc_free_pages_user(page_va, 1);
/* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
}
#if 0
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_WILL_PAGEIO)
|| (page->mode == PM_DONE_PAGEIO)
|| (page->mode == PM_PAGEIO_EOF)
|| (page->mode == PM_PAGEIO_ERROR)
|| ((page->mode == PM_MAPPED)
&& (count <= 0)))) {
kprintf("fileobj_release(%p %lx): "
"mode %x, count %d, off %lx\n",
obj, obj->handle, page->mode,
count, page->offset);
panic("fileobj_release");
}
page->mode = PM_NONE;
#endif
}
/* Pre-mapped? */
if (to_memobj(free_obj)->flags & MF_PREMAP) {
int i;
for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) {
if (to_memobj(free_obj)->pages[i]) {
dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]);
// Track change in fileobj->pages[] for MF_PREMAP pages
// Note that page_unmap() isn't called for MF_PREMAP in
// free_process_memory_range() --> ihk_mc_pt_free_range()
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1);
}
}
kfree(to_memobj(free_obj)->pages);
}
if (to_memobj(free_obj)->path) {
dkprintf("%s: %s\n", __FUNCTION__, to_memobj(free_obj)->path);
kfree(to_memobj(free_obj)->path);
}
obj_list_remove(free_obj);
mcs_lock_unlock_noirq(&fileobj_list_lock, &node);
kfree(free_obj);
}
if (free_sref) {
int error;
ihk_mc_user_context_t ctx;
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
ihk_mc_syscall_arg1(&ctx) = free_handle;
ihk_mc_syscall_arg2(&ctx) = free_sref;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("fileobj_release(%p %lx):"
"release %ld failed. %d\n",
obj, free_handle, free_sref, error);
/* through */
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n",
phys, __func__, phys, PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE,
PAGE_SIZE);
}
}
dkprintf("fileobj_release(%p %lx):free %ld %p\n",
obj, free_handle, free_sref, free_obj);
/* Pre-mapped? */
if (to_memobj(obj)->flags & MF_PREMAP) {
int i;
for (i = 0; i < to_memobj(obj)->nr_pages; ++i) {
if (to_memobj(obj)->pages[i]) {
dkprintf("%s: pages[i]=%p\n", __func__, i,
to_memobj(obj)->pages[i]);
// Track change in fileobj->pages[] for MF_PREMAP pages
// Note that page_unmap() isn't called for MF_PREMAP in
// free_process_memory_range() --> ihk_mc_pt_free_range()
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
virt_to_phys(to_memobj(obj)->pages[i]),
__func__,
virt_to_phys(to_memobj(obj)->pages[i]),
PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE,
PAGE_SIZE);
ihk_mc_free_pages_user(to_memobj(obj)->pages[i],
1);
}
}
kfree(to_memobj(obj)->pages);
}
if (to_memobj(obj)->path) {
dkprintf("%s: %s\n", __func__, to_memobj(obj)->path);
kfree(to_memobj(obj)->path);
}
/* linux side
* sref is necessary because handle is used as key, so there could
* be a new mckernel pager with the same handle being created as
* this one is being destroyed
*/
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = obj->sref;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("%s(%p %lx): free failed. %d\n", __func__,
obj, obj->handle, error);
/* through */
}
dkprintf("%s(%p %lx):free\n", __func__, obj, obj->handle);
kfree(obj);
return;
}
struct pageio_args {
@@ -570,7 +538,7 @@ static void fileobj_do_pageio(void *args0)
out:
mcs_lock_unlock_noirq(&obj->page_hash_locks[hash],
&mcs_node);
fileobj_release(&obj->memobj); /* got fileobj_get_page() */
memobj_unref(&obj->memobj); /* got fileobj_get_page() */
kfree(args0);
dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize);
return;
@@ -656,7 +624,9 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
npages = 1 << p2align;
virt = ihk_mc_alloc_pages_user(npages, (IHK_MC_AP_NOWAIT |
(to_memobj(obj)->flags & MF_ZEROFILL) ? IHK_MC_AP_USER : 0), virt_addr);
((to_memobj(obj)->flags & MF_ZEROFILL) ?
IHK_MC_AP_USER : 0)),
virt_addr);
if (!virt) {
error = -ENOMEM;
kprintf("fileobj_get_page(%p,%lx,%x,%x,%p):"
@@ -681,9 +651,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
page->mode = PM_WILL_PAGEIO;
}
memobj_lock(&obj->memobj);
++obj->cref; /* for fileobj_do_pageio() */
memobj_unlock(&obj->memobj);
memobj_ref(&obj->memobj);
args->fileobj = obj;
args->objoff = off;
@@ -744,10 +712,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
return 0;
}
if (memobj->flags & MF_HOST_RELEASED) {
return 0;
}
page = phys_to_page(phys);
if (!page) {
kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n",
@@ -755,8 +719,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
return 0;
}
memobj_unlock(&obj->memobj);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = page->offset;
@@ -771,7 +733,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
/* through */
}
memobj_lock(&obj->memobj);
return 0;
}

View File

@@ -70,15 +70,22 @@
#include <cls.h>
#include <kmsg.h>
#include <timer.h>
#include <debug.h>
#include <syscall.h>
//#define DEBUG_PRINT_FUTEX
#ifdef DEBUG_PRINT_FUTEX
#define dkprintf kprintf
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define uti_dkprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define uti_dkprintf(...) do { } while (0)
#endif
#define uti_kprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
unsigned long ihk_mc_get_ns_per_tsc(void);
int futex_cmpxchg_enabled;
/**
@@ -108,6 +115,9 @@ struct futex_q {
union futex_key key;
union futex_key *requeue_pi_key;
uint32_t bitset;
/* Used to wake-up a thread running on a Linux CPU */
void *uti_futex_resp;
};
/*
@@ -180,11 +190,12 @@ static void drop_futex_key_refs(union futex_key *key)
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key, struct cpu_local_var *clv_override)
{
unsigned long address = (unsigned long)uaddr;
unsigned long phys;
struct process_vm *mm = cpu_local_var(current)->vm;
struct thread *thread = cpu_local_var_with_override(current, clv_override);
struct process_vm *mm = thread->vm;
/*
* The futex address must be "naturally" aligned.
@@ -250,7 +261,7 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.
*/
static void wake_futex(struct futex_q *q)
static void wake_futex(struct futex_q *q, struct cpu_local_var *clv_override)
{
struct thread *p = q->task;
@@ -272,8 +283,31 @@ static void wake_futex(struct futex_q *q)
barrier();
q->lock_ptr = NULL;
dkprintf("wake_futex(): waking up tid %d\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL);
if (q->uti_futex_resp) {
int rc;
uti_dkprintf("wake_futex(): waking up migrated-to-Linux thread (tid %d),uti_futex_resp=%p\n", p->tid, q->uti_futex_resp);
/* TODO: Add the case when a Linux thread waking up another Linux thread */
if (clv_override) {
uti_dkprintf("%s: ERROR: A Linux thread is waking up migrated-to-Linux thread\n", __FUNCTION__);
}
if (p->spin_sleep == 0) {
uti_dkprintf("%s: INFO: woken up by someone else\n", __FUNCTION__);
}
struct ikc_scd_packet pckt;
struct ihk_ikc_channel_desc *resp_channel = cpu_local_var_with_override(ikc2linux, clv_override);
pckt.msg = SCD_MSG_FUTEX_WAKE;
pckt.futex.resp = q->uti_futex_resp;
pckt.futex.spin_sleep = &p->spin_sleep;
rc = ihk_ikc_send(resp_channel, &pckt, 0);
if (rc) {
uti_dkprintf("%s: ERROR: ihk_ikc_send returned %d, resp_channel=%p\n", __FUNCTION__, rc, resp_channel);
}
} else {
uti_dkprintf("wake_futex(): waking up McKernel thread (tid %d)\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL);
}
}
/*
@@ -303,7 +337,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
/*
* Wake up waiters matching bitset queued on this futex (uaddr).
*/
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset)
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset, struct cpu_local_var *clv_override)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
@@ -314,7 +348,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
if (!bitset)
return -EINVAL;
ret = get_futex_key(uaddr, fshared, &key);
ret = get_futex_key(uaddr, fshared, &key, clv_override);
if ((ret != 0))
goto out;
@@ -330,7 +364,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
if (!(this->bitset & bitset))
continue;
wake_futex(this);
wake_futex(this, clv_override);
if (++ret >= nr_wake)
break;
}
@@ -348,7 +382,8 @@ out:
*/
static int
futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int nr_wake, int nr_wake2, int op)
int nr_wake, int nr_wake2, int op,
struct cpu_local_var *clv_override)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb1, *hb2;
@@ -357,10 +392,10 @@ futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int ret, op_ret;
retry:
ret = get_futex_key(uaddr1, fshared, &key1);
ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
if ((ret != 0))
goto out;
ret = get_futex_key(uaddr2, fshared, &key2);
ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
if ((ret != 0))
goto out_put_key1;
@@ -394,7 +429,7 @@ retry_private:
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) {
wake_futex(this);
wake_futex(this, clv_override);
if (++ret >= nr_wake)
break;
}
@@ -406,7 +441,7 @@ retry_private:
op_ret = 0;
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) {
wake_futex(this);
wake_futex(this, clv_override);
if (++op_ret >= nr_wake2)
break;
}
@@ -469,7 +504,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
*/
static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int nr_wake, int nr_requeue, uint32_t *cmpval,
int requeue_pi)
int requeue_pi, struct cpu_local_var *clv_override)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
int drop_count = 0, task_count = 0, ret;
@@ -477,10 +512,10 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
struct plist_head *head1;
struct futex_q *this, *next;
ret = get_futex_key(uaddr1, fshared, &key1);
ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
if ((ret != 0))
goto out;
ret = get_futex_key(uaddr2, fshared, &key2);
ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
if ((ret != 0))
goto out_put_key1;
@@ -515,7 +550,7 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
*/
/* RIKEN: no requeue_pi at this moment */
if (++task_count <= nr_wake) {
wake_futex(this);
wake_futex(this, clv_override);
continue;
}
@@ -574,7 +609,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
* state is implicit in the state of woken task (see futex_wait_requeue_pi() for
* an example).
*/
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb, struct cpu_local_var *clv_override)
{
int prio;
@@ -595,7 +630,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
q->list.plist.spinlock = &hb->lock;
#endif
plist_add(&q->list, &hb->chain);
q->task = cpu_local_var(current);
q->task = cpu_local_var_with_override(current, clv_override);
ihk_mc_spinlock_unlock_noirq(&hb->lock);
}
@@ -658,46 +693,64 @@ retry:
/* RIKEN: this function has been rewritten so that it returns the remaining
* time in case we are waken.
*/
static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
uint64_t timeout)
static int64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
uint64_t timeout, struct cpu_local_var *clv_override)
{
uint64_t time_remain = 0;
int64_t time_remain = 0;
unsigned long irqstate;
struct thread *thread = cpu_local_var(current);
struct thread *thread = cpu_local_var_with_override(current, clv_override);
/*
* The task state is guaranteed to be set before another task can
* wake it.
* queue_me() calls spin_unlock() upon completion, serializing
* access to the hash list and forcing a memory barrier.
*/
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
xchg4(&(thread->status), PS_INTERRUPTIBLE);
/* Indicate spin sleep */
if (!idle_halt) {
/* Indicate spin sleep. Note that schedule_timeout() with
* idle_halt should use spin sleep because sleep with timeout
* is not implemented.
*/
if (!idle_halt || timeout) {
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
thread->spin_sleep = 1;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
}
queue_me(q, hb);
queue_me(q, hb, clv_override);
if (!plist_node_empty(&q->list)) {
if (clv_override) {
uti_dkprintf("%s: tid: %d is trying to sleep\n", __FUNCTION__, thread->tid);
/* Note that the unit of timeout is nsec */
time_remain = (*linux_wait_event)(q->uti_futex_resp, timeout);
/* Note that time_remain == 0 indicates contidion evaluated to false after the timeout elapsed */
if (time_remain < 0) {
if (time_remain == -ERESTARTSYS) { /* Interrupted by signal */
uti_dkprintf("%s: DEBUG: wait_event returned -ERESTARTSYS\n", __FUNCTION__);
} else {
uti_kprintf("%s: ERROR: wait_event returned %d\n", __FUNCTION__, time_remain);
}
}
uti_dkprintf("%s: tid: %d woken up\n", __FUNCTION__, thread->tid);
} else {
if (timeout) {
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", thread->tid);
time_remain = schedule_timeout(timeout);
}
else {
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", thread->tid);
spin_sleep_or_schedule();
time_remain = 0;
}
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid);
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", thread->tid);
}
}
/* This does not need to be serialized */
cpu_local_var(current)->status = PS_RUNNING;
thread->status = PS_RUNNING;
thread->spin_sleep = 0;
return time_remain;
@@ -721,7 +774,8 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
* <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
*/
static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
struct futex_q *q, struct futex_hash_bucket **hb)
struct futex_q *q, struct futex_hash_bucket **hb,
struct cpu_local_var *clv_override)
{
uint32_t uval;
int ret;
@@ -744,7 +798,7 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
* rare, but normal.
*/
q->key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q->key);
ret = get_futex_key(uaddr, fshared, &q->key, clv_override);
if (ret != 0)
return ret;
@@ -768,49 +822,59 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
}
static int futex_wait(uint32_t __user *uaddr, int fshared,
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt)
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt,
struct cpu_local_var *clv_override)
{
struct futex_hash_bucket *hb;
struct futex_q q;
uint64_t time_remain;
int64_t time_remain;
int ret;
if (!bitset)
return -EINVAL;
#ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile &&
cpu_local_var(current)->profile_start_ts) {
cpu_local_var(current)->profile_elapsed_ts +=
(rdtsc() - cpu_local_var(current)->profile_start_ts);
cpu_local_var(current)->profile_start_ts = 0;
if (cpu_local_var_with_override(current, clv_override)->profile &&
cpu_local_var_with_override(current, clv_override)->profile_start_ts) {
cpu_local_var_with_override(current, clv_override)->profile_elapsed_ts +=
(rdtsc() - cpu_local_var_with_override(current, clv_override)->profile_start_ts);
cpu_local_var_with_override(current, clv_override)->profile_start_ts = 0;
}
#endif
q.bitset = bitset;
q.requeue_pi_key = NULL;
q.uti_futex_resp = cpu_local_var_with_override(uti_futex_resp, clv_override);
retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb, clv_override);
if (ret) {
uti_dkprintf("%s: tid=%d futex_wait_setup returns zero, no need to sleep\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out;
}
/* queue_me and wait for wakeup, timeout, or a signal. */
time_remain = futex_wait_queue_me(hb, &q, timeout);
time_remain = futex_wait_queue_me(hb, &q, timeout, clv_override);
/* If we were woken (and unqueued), we succeeded, whatever. */
ret = 0;
if (!unqueue_me(&q))
if (!unqueue_me(&q)) {
uti_dkprintf("%s: tid=%d unqueued\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key;
}
ret = -ETIMEDOUT;
/* RIKEN: timer expired case (indicated by !time_remain) */
if (timeout && !time_remain)
if (timeout && !time_remain) {
uti_dkprintf("%s: tid=%d timer expired\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key;
}
if (hassigpending(cpu_local_var(current))) {
/* RIKEN: futex_wait_queue_me() returns -ERESTARTSYS when waiting on Linux CPU and woken up by signal */
if (hassigpending(cpu_local_var_with_override(current, clv_override)) || time_remain == -ERESTARTSYS) {
ret = -EINTR;
uti_dkprintf("%s: tid=%d woken up by signal\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key;
}
@@ -822,19 +886,22 @@ out_put_key:
put_futex_key(fshared, &q.key);
out:
#ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile) {
cpu_local_var(current)->profile_start_ts = rdtsc();
if (cpu_local_var_with_override(current, clv_override)->profile) {
cpu_local_var_with_override(current, clv_override)->profile_start_ts = rdtsc();
}
#endif
return ret;
}
int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared)
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared,
struct cpu_local_var *clv_override)
{
int clockrt, ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK;
uti_dkprintf("%s: uaddr=%p, op=%x, val=%x, timeout=%ld, uaddr2=%p, val2=%x, val3=%x, fshared=%d, clv=%p\n", __FUNCTION__, uaddr, op, val, timeout, uaddr2, val2, val3, fshared, clv_override);
clockrt = op & FUTEX_CLOCK_REALTIME;
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
@@ -843,21 +910,21 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET:
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt, clv_override);
break;
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAKE_BITSET:
ret = futex_wake(uaddr, fshared, val, val3);
ret = futex_wake(uaddr, fshared, val, val3, clv_override);
break;
case FUTEX_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0, clv_override);
break;
case FUTEX_CMP_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0);
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0, clv_override);
break;
case FUTEX_WAKE_OP:
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3, clv_override);
break;
/* RIKEN: these calls are not supported for now.
case FUTEX_LOCK_PI:

View File

@@ -34,13 +34,13 @@
#include <sysfs.h>
#include <ihk/perfctr.h>
#include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_HOST
#ifdef DEBUG_PRINT_HOST
#define dkprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
/* Linux channel table, indexec by Linux CPU id */
@@ -78,7 +78,6 @@ int prepare_process_ranges_args_envs(struct thread *thread,
unsigned long args_envs_p, args_envs_rp;
unsigned long s, e, up;
char **argv;
char **a;
int i, n, argc, envc, args_envs_npages;
char **env;
int range_npages;
@@ -306,7 +305,7 @@ int prepare_process_ranges_args_envs(struct thread *thread,
/* Only unmap remote address if it wasn't specified as an argument */
if (!args) {
ihk_mc_unmap_virtual(args_envs_r, args_envs_npages, 0);
ihk_mc_unmap_virtual(args_envs_r, args_envs_npages);
ihk_mc_unmap_memory(NULL, args_envs_rp, p->args_len);
}
flush_tlb();
@@ -341,7 +340,7 @@ int prepare_process_ranges_args_envs(struct thread *thread,
/* Only map remote address if it wasn't specified as an argument */
if (!envs) {
ihk_mc_unmap_virtual(args_envs_r, args_envs_npages, 0);
ihk_mc_unmap_virtual(args_envs_r, args_envs_npages);
ihk_mc_unmap_memory(NULL, args_envs_rp, p->envs_len);
}
flush_tlb();
@@ -357,12 +356,13 @@ int prepare_process_ranges_args_envs(struct thread *thread,
proc->saved_cmdline_len = 0;
}
proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT);
proc->saved_cmdline_len = p->args_len - ((argc + 2) * sizeof(char **));
proc->saved_cmdline = kmalloc(proc->saved_cmdline_len,
IHK_MC_AP_NOWAIT);
if (!proc->saved_cmdline) {
goto err;
}
proc->saved_cmdline_len = p->args_len - ((argc + 2) * sizeof(char **));
memcpy(proc->saved_cmdline,
(char *)args_envs + ((argc + 2) * sizeof(char **)),
proc->saved_cmdline_len);
@@ -370,21 +370,18 @@ int prepare_process_ranges_args_envs(struct thread *thread,
__FUNCTION__,
proc->saved_cmdline);
for (a = argv; *a; a++) {
*a = (char *)addr + (unsigned long)*a; // Process' address space!
for (i = 0; i < argc; i++) {
// Process' address space!
argv[i] = (char *)addr + (unsigned long)argv[i];
}
envc = *((long *)(args_envs + p->args_len));
dkprintf("envc: %d\n", envc);
env = (char **)(args_envs + p->args_len + sizeof(long));
while (*env) {
char **_env = env;
//dkprintf("%s\n", args_envs + p->args_len + (unsigned long)*env);
*env = (char *)addr + p->args_len + (unsigned long)*env;
env = ++_env;
for (i = 0; i < envc; i++) {
env[i] = addr + p->args_len + env[i];
}
env = (char **)(args_envs + p->args_len + sizeof(long));
dkprintf("env OK\n");
@@ -449,7 +446,7 @@ static int process_msg_prepare_process(unsigned long rphys)
if((pn = kmalloc(sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * n,
IHK_MC_AP_NOWAIT)) == NULL){
ihk_mc_unmap_virtual(p, npages, 0);
ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM;
}
@@ -460,7 +457,7 @@ static int process_msg_prepare_process(unsigned long rphys)
(unsigned long *)&p->cpu_set,
sizeof(p->cpu_set))) == NULL) {
kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM;
}
@@ -482,6 +479,7 @@ static int process_msg_prepare_process(unsigned long rphys)
proc->mpol_flags = pn->mpol_flags;
proc->mpol_threshold = pn->mpol_threshold;
proc->nr_processes = pn->nr_processes;
proc->process_rank = pn->process_rank;
proc->heap_extension = pn->heap_extension;
/* Update NUMA binding policy if requested */
@@ -504,6 +502,9 @@ static int process_msg_prepare_process(unsigned long rphys)
vm->numa_mem_policy = MPOL_BIND;
}
proc->uti_thread_rank = pn->uti_thread_rank;
proc->uti_use_last_cpu = pn->uti_use_last_cpu;
#ifdef PROFILE_ENABLE
proc->profile = pn->profile;
thread->profile = pn->profile;
@@ -542,14 +543,14 @@ static int process_msg_prepare_process(unsigned long rphys)
kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz);
flush_tlb();
return 0;
err:
kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz);
destroy_thread(thread);
return -ENOMEM;
@@ -562,7 +563,6 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
}
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
extern void process_procfs_request(struct ikc_scd_packet *rpacket);
extern void terminate_host(int pid);
extern void debug_log(long);
@@ -573,7 +573,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct ikc_scd_packet pckt;
struct ihk_ikc_channel_desc *resp_channel = cpu_local_var(ikc2linux);
int rc;
struct mcs_rwlock_node_irqsave lock;
struct thread *thread;
struct process *proc;
struct mcctrl_signal {
@@ -610,7 +609,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_SCHEDULE_PROCESS:
thread = (struct thread *)packet->arg;
cpuid = obtain_clone_cpuid(&thread->cpu_set);
cpuid = obtain_clone_cpuid(&thread->cpu_set, 0);
if (cpuid == -1) {
kprintf("No CPU available\n");
ret = -1;
@@ -634,14 +633,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
* the waiting thread
*/
case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
thread = find_thread(0, packet->ttid, &lock);
thread = find_thread(0, packet->ttid);
if (!thread) {
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
__FUNCTION__, packet->ttid);
ret = -EINVAL;
break;
}
thread_unlock(thread, &lock);
thread_unlock(thread);
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
__FUNCTION__, packet->ttid);
@@ -653,7 +652,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
memcpy(&info, sp, sizeof(struct mcctrl_signal));
ihk_mc_unmap_virtual(sp, 1, 0);
ihk_mc_unmap_virtual(sp, 1);
ihk_mc_unmap_memory(NULL, pp, sizeof(struct mcctrl_signal));
pckt.msg = SCD_MSG_SEND_SIGNAL_ACK;
pckt.err = 0;
@@ -668,7 +667,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break;
case SCD_MSG_PROCFS_REQUEST:
process_procfs_request(packet);
case SCD_MSG_PROCFS_RELEASE:
pckt.msg = SCD_MSG_PROCFS_ANSWER;
pckt.ref = packet->ref;
pckt.arg = packet->arg;
pckt.err = process_procfs_request(packet);
pckt.reply = packet->reply;
pckt.pid = packet->pid;
syscall_channel_send(resp_channel, &pckt);
ret = 0;
break;
@@ -705,17 +711,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
if (!pcd->exclude_user) {
mode |= PERFCTR_USER_MODE;
}
ihk_mc_perfctr_init_raw(pcd->target_cntr, pcd->config, mode);
ihk_mc_perfctr_stop(1 << pcd->target_cntr);
ihk_mc_perfctr_reset(pcd->target_cntr);
ret = ihk_mc_perfctr_init_raw(pcd->target_cntr, pcd->config, mode);
if (ret != 0) {
break;
}
ret = ihk_mc_perfctr_stop(1 << pcd->target_cntr);
if (ret != 0) {
break;
}
ret = ihk_mc_perfctr_reset(pcd->target_cntr);
break;
case PERF_CTRL_ENABLE:
ihk_mc_perfctr_start(pcd->target_cntr_mask);
ret = ihk_mc_perfctr_start(pcd->target_cntr_mask);
break;
case PERF_CTRL_DISABLE:
ihk_mc_perfctr_stop(pcd->target_cntr_mask);
ret = ihk_mc_perfctr_stop(pcd->target_cntr_mask);
break;
case PERF_CTRL_GET:
@@ -726,16 +741,15 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
kprintf("%s: SCD_MSG_PERF_CTRL unexpected ctrl_type\n", __FUNCTION__);
}
ihk_mc_unmap_virtual(pcd, 1, 0);
ihk_mc_unmap_virtual(pcd, 1);
ihk_mc_unmap_memory(NULL, pp, sizeof(struct perf_ctrl_desc));
pckt.msg = SCD_MSG_PERF_ACK;
pckt.err = 0;
pckt.err = ret;
pckt.arg = packet->arg;
pckt.reply = packet->reply;
ihk_ikc_send(resp_channel, &pckt, 0);
ret = 0;
break;
case SCD_MSG_CPU_RW_REG:

303
kernel/hugefileobj.c Normal file
View File

@@ -0,0 +1,303 @@
#include <memobj.h>
#include <ihk/mm.h>
#include <kmsg.h>
#include <kmalloc.h>
#include <string.h>
#include <debug.h>
#if DEBUG_HUGEFILEOBJ
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
struct hugefilechunk {
struct list_head list;
off_t pgoff;
int npages;
void *mem;
};
struct hugefileobj {
struct memobj memobj;
size_t pgsize;
uintptr_t handle;
unsigned int pgshift;
struct list_head chunk_list;
ihk_spinlock_t chunk_lock;
struct list_head obj_list;
};
static ihk_spinlock_t hugefileobj_list_lock;
static LIST_HEAD(hugefileobj_list);
static struct hugefileobj *to_hugefileobj(struct memobj *memobj)
{
return (struct hugefileobj *)memobj;
}
static struct memobj *to_memobj(struct hugefileobj *obj)
{
return &obj->memobj;
}
static struct hugefileobj *hugefileobj_lookup(uintptr_t handle)
{
struct hugefileobj *p;
list_for_each_entry(p, &hugefileobj_list, obj_list) {
if (p->handle == handle) {
/* for the interval between last put and fileobj_free
* taking list_lock
*/
if (memobj_ref(&p->memobj) <= 1) {
ihk_atomic_dec(&p->memobj.refcnt);
continue;
}
return p;
}
}
return NULL;
}
static int hugefileobj_get_page(struct memobj *memobj, off_t off,
int p2align, uintptr_t *physp,
unsigned long *pflag, uintptr_t virt_addr)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk;
off_t pgoff;
if (p2align != obj->pgshift - PTL1_SHIFT) {
kprintf("%s: p2align %d but expected %d\n",
__func__, p2align, obj->pgshift - PTL1_SHIFT);
return -ENOMEM;
}
pgoff = off >> obj->pgshift;
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
list_for_each_entry(chunk, &obj->chunk_list, list) {
if (pgoff >= chunk->pgoff + chunk->npages)
continue;
if (pgoff >= chunk->pgoff)
break;
kprintf("%s: no segment found for pgoff %lx (obj %p)\n",
__func__, pgoff, obj);
chunk = NULL;
break;
}
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
if (!chunk)
return -EIO;
*physp = virt_to_phys(chunk->mem + (off - chunk->pgoff * PAGE_SIZE));
return 0;
}
static void hugefileobj_free(struct memobj *memobj)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk, *next;
dkprintf("Destroying hugefileobj %p\n", memobj);
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
list_del(&obj->obj_list);
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
kfree(memobj->path);
/* don't bother with chunk_lock, memobj refcounting makes this safe */
list_for_each_entry_safe(chunk, next, &obj->chunk_list, list) {
ihk_mc_free_pages_user(chunk->mem, chunk->npages);
kfree(chunk);
}
kfree(memobj);
}
struct memobj_ops hugefileobj_ops = {
.free = hugefileobj_free,
.get_page = hugefileobj_get_page,
};
void hugefileobj_cleanup(void)
{
struct hugefileobj *obj;
int refcnt;
while (true) {
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
if (list_empty(&hugefileobj_list)) {
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
break;
}
obj = list_first_entry(&hugefileobj_list, struct hugefileobj,
obj_list);
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
if ((refcnt = memobj_unref(to_memobj(obj))) != 0) {
kprintf("%s: obj %p had refcnt %ld > 1, destroying anyway\n",
__func__, obj, refcnt + 1);
hugefileobj_free(to_memobj(obj));
}
}
}
int hugefileobj_pre_create(struct pager_create_result *result,
struct memobj **objp, int *maxprotp)
{
struct hugefileobj *obj;
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
obj = hugefileobj_lookup(result->handle);
if (obj)
goto out_unlock;
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj)
return -ENOMEM;
obj->handle = result->handle;
obj->pgsize = result->size;
obj->pgshift = 0;
INIT_LIST_HEAD(&obj->chunk_list);
ihk_mc_spinlock_init(&obj->chunk_lock);
obj->memobj.flags = result->flags;
obj->memobj.status = MEMOBJ_TO_BE_PREFETCHED;
obj->memobj.ops = &hugefileobj_ops;
/* keep mapping around when process is gone */
ihk_atomic_set(&obj->memobj.refcnt, 2);
if (result->path[0]) {
obj->memobj.path = kmalloc(PATH_MAX, IHK_MC_AP_NOWAIT);
if (!obj->memobj.path) {
kfree(obj);
return -ENOMEM;
}
strncpy(obj->memobj.path, result->path, PATH_MAX);
}
list_add(&obj->obj_list, &hugefileobj_list);
out_unlock:
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
*maxprotp = result->maxprot;
*objp = to_memobj(obj);
return 0;
}
int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
int *pgshiftp, uintptr_t virt_addr)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk = NULL, *old_chunk = NULL;
int p2align;
unsigned int pgshift;
int npages, npages_left;
void *v;
off_t pgoff, next_pgoff;
int error;
error = arch_get_smaller_page_size(NULL, obj->pgsize + 1, NULL,
&p2align);
if (error)
return error;
pgshift = p2align + PTL1_SHIFT;
if (1 << pgshift != obj->pgsize) {
dkprintf("invalid hugefileobj pagesize: %d\n",
obj->pgsize);
return -EINVAL;
}
if (len & ((1 << pgshift) - 1)) {
dkprintf("invalid hugetlbfs mmap size %d (pagesize %d)\n",
len, 1 << pgshift);
obj->pgshift = 0;
return -EINVAL;
}
if (off & ((1 << pgshift) - 1)) {
dkprintf("invalid hugetlbfs mmap offset %d (pagesize %d)\n",
off, 1 << pgshift);
obj->pgshift = 0;
return -EINVAL;
}
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
if (obj->pgshift && obj->pgshift != pgshift) {
kprintf("pgshift changed between two calls on same inode?! had %d now %d\n",
obj->pgshift, pgshift);
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
return -EINVAL;
}
obj->pgshift = pgshift;
/* Prealloc upfront, we need to fail here if not enough memory. */
if (!list_empty(&obj->chunk_list))
old_chunk = list_first_entry(&obj->chunk_list,
struct hugefilechunk, list);
pgoff = off >> PAGE_SHIFT;
npages_left = len >> PAGE_SHIFT;
npages = npages_left;
while (npages_left) {
while (old_chunk &&
pgoff >= old_chunk->pgoff + old_chunk->npages) {
if (list_is_last(&old_chunk->list, &obj->chunk_list)) {
old_chunk = NULL;
break;
}
old_chunk = list_entry(old_chunk->list.next,
struct hugefilechunk, list);
}
if (old_chunk) {
next_pgoff = old_chunk->pgoff + old_chunk->npages;
if (pgoff >= old_chunk->pgoff && pgoff < next_pgoff) {
npages_left -= next_pgoff - pgoff;
pgoff = next_pgoff;
continue;
}
}
if (!chunk) {
chunk = kmalloc(sizeof(*chunk), IHK_MC_AP_NOWAIT);
}
if (!chunk) {
kprintf("could not allocate hugefileobj chunk\n");
return -ENOMEM;
}
if (npages > npages_left)
npages = npages_left;
v = ihk_mc_alloc_aligned_pages_user(npages, p2align,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, virt_addr);
if (!v) {
if (npages == 1) {
dkprintf("could not allocate more pages wth pgshift %d\n",
pgshift);
kfree(chunk);
/* caller will cleanup the rest */
return -ENOMEM;
}
/* exponential backoff, try less aggressive? */
npages /= 2;
continue;
}
memset(v, 0, npages * PAGE_SIZE);
chunk->npages = npages;
chunk->mem = v;
chunk->pgoff = pgoff;
/* ordered list: insert before next (bigger) element */
if (old_chunk)
list_add(&chunk->list, old_chunk->list.prev);
else
list_add(&chunk->list, obj->chunk_list.prev);
pgoff += npages;
npages_left -= npages;
}
obj->memobj.size = len;
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
*pgshiftp = pgshift;
return 0;
}

View File

@@ -21,7 +21,7 @@
struct kmalloc_header {
unsigned int front_magic;
unsigned int cpu_id;
int cpu_id;
struct list_head list;
int size; /* The size of this chunk without the header */
unsigned int end_magic;
@@ -74,6 +74,7 @@ struct cpu_local_var {
struct thread *current;
struct list_head runq;
size_t runq_len;
size_t runq_reserved; /* Number of threads which are about to be added to runq */
struct ihk_ikc_channel_desc *ikc2linux;
@@ -99,6 +100,9 @@ struct cpu_local_var {
struct list_head smp_func_req_list;
struct process_vm *on_fork_vm;
/* UTI */
void *uti_futex_resp;
} __attribute__((aligned(64)));
@@ -110,4 +114,6 @@ static struct cpu_local_var *get_this_cpu_local_var(void)
#define cpu_local_var(name) get_this_cpu_local_var()->name
#define cpu_local_var_with_override(name, clv_override) (clv_override ? clv_override->name : get_this_cpu_local_var()->name)
#endif

54
kernel/include/debug.h Normal file
View File

@@ -0,0 +1,54 @@
#ifndef DEBUG_H
#define DEBUG_H
#include "lwk/compiler.h"
void panic(const char *);
/* when someone has a lot of time, add attribute __printf(1, 2) to kprintf */
int kprintf(const char *format, ...);
struct ddebug {
const char *file;
const char *func;
const char *fmt;
unsigned int line:24;
unsigned int flags:8;
} __aligned(8);
#define DDEBUG_NONE 0x0
#define DDEBUG_PRINT 0x1
#define DDEBUG_DEFAULT DDEBUG_NONE
#define DDEBUG_SYMBOL() \
static struct ddebug __aligned(8) \
__attribute__((section("__verbose"))) ddebug = { \
.file = __FILE__, \
.func = __func__, \
.line = __LINE__, \
.flags = DDEBUG_DEFAULT, \
}
#define DDEBUG_TEST ddebug.flags
#define dkprintf(fmt, args...) \
do { \
DDEBUG_SYMBOL(); \
if (DDEBUG_TEST) \
kprintf(fmt, ##args); \
} while (0)
#define ekprintf(fmt, args...) kprintf(fmt, ##args)
#define BUG_ON(condition) do { \
if (condition) { \
kprintf("PANIC: %s: %s(line:%d)\n", \
__FILE__, __func__, __LINE__); \
panic(""); \
} \
} while (0)
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#endif

View File

@@ -63,7 +63,7 @@
#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_OPARG_SHIFT 8U /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */
@@ -150,6 +150,7 @@ union futex_key {
extern int futex_init(void);
struct cpu_local_var;
extern int
futex(
uint32_t __user * uaddr,
@@ -159,7 +160,8 @@ futex(
uint32_t __user * uaddr2,
uint32_t val2,
uint32_t val3,
int fshared
int fshared,
struct cpu_local_var *clv_override
);

View File

@@ -33,6 +33,7 @@ extern void cpu_sysfs_setup(void);
extern void numa_sysfs_setup(void);
extern void rusage_sysfs_setup(void);
extern void status_sysfs_setup(void);
extern void dynamic_debug_sysfs_setup(void);
extern char *find_command_line(char *name);

View File

@@ -13,11 +13,9 @@
#ifndef __HEADER_KMALLOC_H
#define __HEADER_KMALLOC_H
#include <ihk/mm.h>
#include <cls.h>
void panic(const char *);
int kprintf(const char *format, ...);
#include "ihk/mm.h"
#include "cls.h"
#include "debug.h"
#define kmalloc(size, flag) ({\
void *r = _kmalloc(size, flag, __FILE__, __LINE__);\

View File

@@ -12,11 +12,8 @@
/* Optimization barrier */
/* The "volatile" is due to gcc bugs */
/* XXX: barrier is also defined in lib/include/ihk/cpu.h,
* it would be cleaner to restore this here at some point, but we have
* quite a few C files not including either this or kernel's compiler.h
* #define barrier() __asm__ __volatile__("": : :"memory")
*/
#define barrier() __asm__ __volatile__("": : :"memory")
/*
* This version is i.e. to prevent dead stores elimination on @ptr
* where gcc and llvm may behave differently when otherwise using

View File

@@ -3,6 +3,8 @@
#ifndef __ASSEMBLY__
#include <types.h>
#ifdef __CHECKER__
# define __user __attribute__((noderef, address_space(1)))
# define __kernel __attribute__((address_space(0)))
@@ -175,11 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
/* Optimization barrier */
#ifndef barrier
# define barrier() __memory_barrier()
#endif
#ifndef barrier_data
# define barrier_data(ptr) barrier()
#endif
@@ -490,4 +487,62 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
(_________p1); \
})
extern void *memcpy(void *dest, const void *src, size_t n);
static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;
case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;
case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;
case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;
default:
barrier();
memcpy((void *)res, (const void *)p, size);
barrier();
}
}
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
default:
barrier();
memcpy((void *)p, (const void *)res, size);
barrier();
}
}
/*
* Prevent the compiler from merging or refetching reads or writes. The
* compiler is also forbidden from reordering successive instances of
* READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
* compiler is aware of some particular ordering. One way to make the
* compiler aware of ordering is to put the two invocations of READ_ONCE,
* WRITE_ONCE or ACCESS_ONCE() in different C statements.
*
* In contrast to ACCESS_ONCE these two macros will also work on aggregate
* data types like structs or unions. If the size of the accessed data
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
* READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a
* compile-time warning.
*
* Their two major use cases are: (1) Mediating communication between
* process-level code and irq/NMI handlers, all running on the same CPU,
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
* mutilate accesses that either do not require ordering or that interact
* with an explicit memory barrier or atomic instruction that provides the
* required ordering.
*/
#define READ_ONCE(x) \
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
#define WRITE_ONCE(x, val) \
({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
#endif /* __LWK_COMPILER_H */

View File

@@ -25,7 +25,7 @@
#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_OPARG_SHIFT 8U /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */

View File

@@ -19,6 +19,7 @@
#include <ihk/lock.h>
#include <errno.h>
#include <list.h>
#include <pager.h>
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#else /* POSTK_DEBUG_ARCH_DEP_18 */
@@ -44,8 +45,7 @@ enum {
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
MF_SHM = 0x40000,
MF_HOST_RELEASED = 0x80000000,
MF_END
MF_HUGETLBFS = 0x100000,
};
#define MEMOBJ_READY 0
@@ -56,7 +56,7 @@ struct memobj {
uint32_t flags;
uint32_t status;
size_t size;
ihk_spinlock_t lock;
ihk_atomic_t refcnt;
/* For pre-mapped memobjects */
void **pages;
@@ -64,8 +64,7 @@ struct memobj {
char *path;
};
typedef void memobj_release_func_t(struct memobj *obj);
typedef void memobj_ref_func_t(struct memobj *obj);
typedef void memobj_free_func_t(struct memobj *obj);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr);
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
@@ -73,27 +72,28 @@ typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, si
typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
struct memobj_ops {
memobj_release_func_t * release;
memobj_ref_func_t * ref;
memobj_get_page_func_t * get_page;
memobj_copy_page_func_t * copy_page;
memobj_flush_page_func_t * flush_page;
memobj_invalidate_page_func_t * invalidate_page;
memobj_lookup_page_func_t * lookup_page;
memobj_free_func_t *free;
memobj_get_page_func_t *get_page;
memobj_copy_page_func_t *copy_page;
memobj_flush_page_func_t *flush_page;
memobj_invalidate_page_func_t *invalidate_page;
memobj_lookup_page_func_t *lookup_page;
};
static inline void memobj_release(struct memobj *obj)
static inline int memobj_ref(struct memobj *obj)
{
if (obj->ops->release) {
(*obj->ops->release)(obj);
}
return ihk_atomic_inc_return(&obj->refcnt);
}
static inline void memobj_ref(struct memobj *obj)
static inline int memobj_unref(struct memobj *obj)
{
if (obj->ops->ref) {
(*obj->ops->ref)(obj);
int cnt;
if ((cnt = ihk_atomic_dec_return(&obj->refcnt)) == 0) {
(*obj->ops->free)(obj);
}
return cnt;
}
static inline int memobj_get_page(struct memobj *obj, off_t off,
@@ -140,16 +140,6 @@ static inline int memobj_lookup_page(struct memobj *obj, off_t off,
return -ENXIO;
}
static inline void memobj_lock(struct memobj *obj)
{
ihk_mc_spinlock_lock_noirq(&obj->lock);
}
static inline void memobj_unlock(struct memobj *obj)
{
ihk_mc_spinlock_unlock_noirq(&obj->lock);
}
static inline int memobj_has_pager(struct memobj *obj)
{
return !!(obj->flags & MF_HAS_PAGER);
@@ -166,5 +156,10 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp);
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
int prot, int populate_flags);
int hugefileobj_pre_create(struct pager_create_result *result,
struct memobj **objp, int *maxprotp);
int hugefileobj_create(struct memobj *obj, size_t len, off_t off,
int *pgshiftp, uintptr_t virt_addr);
void hugefileobj_cleanup(void);
#endif /* HEADER_MEMOBJ_H */

View File

@@ -70,10 +70,8 @@
#define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */
#define PS_STOPPING 0x80
#define PS_TRACING 0x100
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
#define PS_DELAY_STOPPED 0x200
#define PS_DELAY_TRACED 0x400
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
#define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE)
@@ -244,6 +242,11 @@ enum mpol_rebind_step {
#define SPAWN_TO_REMOTE 1
#define SPAWNING_TO_REMOTE 1001
#define UTI_STATE_DEAD 0
#define UTI_STATE_PROLOGUE 1
#define UTI_STATE_RUNNING_IN_LINUX 2
#define UTI_STATE_EPILOGUE 3
#include <waitq.h>
#include <futex.h>
@@ -277,6 +280,7 @@ extern struct list_head resource_set_list;
extern mcs_rwlock_lock_t resource_set_lock;
extern int idle_halt;
extern int allow_oversubscribe;
extern ihk_spinlock_t runq_reservation_lock; /* mutex for cpuid reservation (clv->runq_reserved) */
struct process_hash {
struct list_head list[HASH_SIZE];
@@ -460,6 +464,14 @@ struct process {
// threads and children
struct list_head threads_list;
struct list_head report_threads_list;
/*
* main_thread is used to refer to thread information using process ID.
* 1) signal related state in signal_flags
* 2) status of trace
*/
struct thread *main_thread;
mcs_rwlock_lock_t threads_lock; // lock for threads_list
/* TID set of proxy process */
struct mcexec_tid *tids;
@@ -488,7 +500,6 @@ struct process {
// V +---- |
// PS_STOPPED -----+
// (PS_TRACED)
unsigned long exit_status; // only for zombie
/* Store exit_status for a group of threads when stopped by SIGSTOP.
exit_status can't be used because values of exit_status of threads
@@ -520,22 +531,6 @@ struct process {
long saved_cmdline_len;
cpu_set_t cpu_set;
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store ptrace event message.
* PTRACE_O_xxx will store event message here.
* PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
/* Store signal sent to parent when the process terminates. */
int termsig;
@@ -557,6 +552,9 @@ struct process {
size_t mpol_threshold;
unsigned long heap_extension;
unsigned long mpol_bind_mask;
int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int clone_count;
// perf_event
int perf_status;
@@ -572,6 +570,7 @@ struct process {
unsigned long profile_elapsed_ts;
#endif // PROFILE_ENABLE
int nr_processes; /* For partitioned execution */
int process_rank; /* Rank in partition */
};
/*
@@ -602,7 +601,7 @@ struct thread {
// thread info
int cpu_id;
int tid;
int status; // PS_RUNNING -> PS_EXITED
int status; // PS_RUNNING -> PS_EXITED (-> ZOMBIE / ptrace)
// | ^ ^
// | | |
// V | |
@@ -612,6 +611,14 @@ struct thread {
// PS_UNINTERRUPTIBLE
int exit_status;
/*
* Store event related to signal. For example,
* it represents that the proceess has been resumed by SIGCONT.
*/
int signal_flags;
int termsig;
// process vm
struct process_vm *vm;
@@ -631,6 +638,22 @@ struct thread {
ihk_spinlock_t spin_sleep_lock;
int spin_sleep;
// for ptrace
struct process *report_proc;
struct list_head report_siblings_list; // lock process
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store ptrace event message.
* PTRACE_O_xxx will store event message here.
* PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
ihk_atomic_t refcount;
int *clear_child_tid;
@@ -687,10 +710,11 @@ struct thread {
/* Syscall offload wait queue head */
struct waitq scd_wq;
int thread_offloaded;
int uti_state;
int mod_clone;
struct uti_attr *mod_clone_arg;
int parent_cpuid;
int uti_refill_tid;
// for performance counter
unsigned long pmc_alloc_map;
@@ -716,6 +740,8 @@ struct process_vm {
// 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
unsigned long is_memory_range_lock_taken;
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
ihk_atomic_t refcount;
int exiting;
@@ -819,14 +845,32 @@ void cpu_clear_and_set(int c_cpu, int s_cpu,
void release_cpuid(int cpuid);
struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock);
void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock);
struct thread *find_thread(int pid, int tid);
void thread_unlock(struct thread *thread);
struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock);
void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock);
void chain_process(struct process *);
void chain_thread(struct thread *);
void proc_init(void);
void set_timer(void);
void set_timer(int runq_locked);
struct sig_pending *hassigpending(struct thread *thread);
extern int do_signal(unsigned long rc, void *regs0, struct thread *thread,
struct sig_pending *pending, int num);
extern void check_signal(unsigned long rc, void *regs0, int num);
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig,
struct siginfo *info, int ptracecont);
extern void set_signal(int sig, void *regs, struct siginfo *info);
extern void check_sig_pending(void);
void clear_single_step(struct thread *thread);
void release_fp_regs(struct thread *proc);
void save_fp_regs(struct thread *proc);
void copy_fp_regs(struct thread *from, struct thread *to);
void restore_fp_regs(struct thread *proc);
void clear_fp_regs(void);
#define VERIFY_READ 0
#define VERIFY_WRITE 1
int access_ok(struct process_vm *vm, int type, uintptr_t addr, size_t len);
#endif

View File

@@ -10,6 +10,7 @@
#include <rusage.h>
#include <ihk/ihk_monitor.h>
#include <arch_rusage.h>
#include <debug.h>
#ifdef ENABLE_RUSAGE
@@ -55,7 +56,7 @@ rusage_rss_add(unsigned long size)
}
vm->currss += size;
if (vm->currss > vm->proc->maxrss) {
if (vm->proc && vm->currss > vm->proc->maxrss) {
vm->proc->maxrss = vm->currss;
}
}
@@ -118,8 +119,9 @@ static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys,
struct page *page = phys_to_page(phys);
/* Is It file map and cow page? */
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) &&
!page) {
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE |
MF_HUGETLBFS)) &&
!page) {
//kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys);
memory_stat_rss_add(size, pgsize);
return 1;

View File

@@ -57,6 +57,7 @@ struct shmobj {
struct shmlock_user * user;
struct shmid_ds ds;
struct list_head page_list;
ihk_spinlock_t page_list_lock;
struct list_head chain; /* shmobj_list */
};
@@ -104,7 +105,6 @@ static inline void shmlock_users_unlock(void)
void shmobj_list_lock(void);
void shmobj_list_unlock(void);
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
void shmobj_destroy(struct shmobj *obj);
void shmlock_user_free(struct shmlock_user *user);
int shmlock_user_get(uid_t ruid, struct shmlock_user **userp);

View File

@@ -49,6 +49,7 @@
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_PROCFS_RELEASE 0x15
#define SCD_MSG_DEBUG_LOG 0x20
@@ -82,6 +83,8 @@
#define SCD_MSG_CPU_RW_REG 0x52
#define SCD_MSG_CPU_RW_REG_RESP 0x53
#define SCD_MSG_FUTEX_WAKE 0x60
/* Cloning flags. */
# define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */
# define CLONE_VM 0x00000100 /* Set if VM shared between processes. */
@@ -197,8 +200,10 @@ struct program_load_desc {
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN];
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
int profile;
struct program_image_section sections[0];
@@ -258,7 +263,7 @@ struct ikc_scd_packet {
long sysfs_arg3;
};
/* SCD_MSG_SCHEDULE_THREAD */
/* SCD_MSG_WAKE_UP_SYSCALL_THREAD */
struct {
int ttid;
};
@@ -274,6 +279,12 @@ struct ikc_scd_packet {
struct {
int eventfd_type;
};
/* SCD_MSG_FUTEX_WAKE */
struct {
void *resp;
int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */
} futex;
};
char padding[8];
};
@@ -336,10 +347,10 @@ struct syscall_post {
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id())
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
int obtain_clone_cpuid(cpu_set_t *cpu_set);
extern long do_syscall(struct syscall_request *req, int cpu);
int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last);
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
#define DECLARATOR(number,name) __NR_##name = number,
@@ -353,17 +364,10 @@ enum {
#undef SYSCALL_DELEGATED
#define __NR_coredump 999 /* pseudo syscall for coredump */
#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */
struct coretable { /* table entry for a core chunk */
off_t len; /* length of the chunk */
unsigned long addr; /* physical addr of the chunk */
};
#else /* POSTK_DEBUG_TEMP_FIX_61 */
struct coretable { /* table entry for a core chunk */
int len; /* length of the chunk */
unsigned long addr; /* physical addr of the chunk */
};
#endif /* POSTK_DEBUG_TEMP_FIX_61 */
#ifdef POSTK_DEBUG_TEMP_FIX_1
void create_proc_procfs_files(int pid, int tid, int cpuid);
@@ -383,7 +387,6 @@ struct procfs_read {
int count; /* bytes to read (request) */
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
int ret; /* read bytes (answer) */
int status; /* non-zero if done (answer) */
int newcpu; /* migrated new cpu (answer) */
int readwrite; /* 0:read, 1:write */
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
@@ -395,6 +398,8 @@ struct procfs_file {
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
};
int process_procfs_request(struct ikc_scd_packet *rpacket);
#define RUSAGE_SELF 0
#define RUSAGE_CHILDREN -1
#define RUSAGE_THREAD 1
@@ -459,8 +464,8 @@ static inline unsigned long timespec_to_jiffy(const struct timespec *ats)
void reset_cputime(void);
void set_cputime(int mode);
int do_munmap(void *addr, size_t len);
intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd,
int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd,
off_t off0);
void clear_host_pte(uintptr_t addr, size_t len);
typedef int32_t key_t;
@@ -471,7 +476,16 @@ int arch_setup_vdso(void);
int arch_cpu_read_write_register(struct ihk_os_cpu_register *desc,
enum mcctrl_os_cpu_operation op);
struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr);
void calculate_time_from_tsc(struct timespec *ts);
time_t time(void);
long do_futex(int n, unsigned long arg0, unsigned long arg1,
unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5,
unsigned long _uti_clv,
void *uti_futex_resp,
void *_linux_wait_event,
void *_linux_printk,
void *_linux_clock_gettime);
#ifndef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 2
@@ -519,6 +533,7 @@ enum perf_ctrl_type {
struct perf_ctrl_desc {
enum perf_ctrl_type ctrl_type;
int err;
union {
/* for SET, GET */
struct {
@@ -569,6 +584,15 @@ typedef struct uti_attr {
uint64_t flags; /* Representing location and behavior hints by bitmap */
} uti_attr_t;
struct uti_ctx {
union {
char ctx[4096];
struct {
int uti_refill_tid;
};
};
};
struct move_pages_smp_req {
unsigned long count;
const void **user_virt_addr;
@@ -589,4 +613,9 @@ struct move_pages_smp_req {
#define PROCESS_VM_READ 0
#define PROCESS_VM_WRITE 1
/* uti: function pointers pointing to Linux codes */
extern long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout);
extern int (*linux_printk)(const char *fmt, ...);
extern int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp);
#endif

View File

@@ -25,6 +25,8 @@
#define CLOCK_PROCESS_CPUTIME_ID 2
#define CLOCK_THREAD_CPUTIME_ID 3
typedef int clockid_t;
typedef long int __time_t;
/* POSIX.1b structure for a time value. This is like a `struct timeval' but

View File

@@ -26,20 +26,17 @@
#include <mc_xpmem.h>
#include <xpmem.h>
#include <debug.h>
#define XPMEM_CURRENT_VERSION 0x00026003
//#define DEBUG_PRINT_XPMEM
#ifdef DEBUG_PRINT_XPMEM
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define XPMEM_DEBUG(format, a...) kprintf("[%d] %s: "format"\n", cpu_local_var(current)->proc->rgid, __func__, ##a)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define XPMEM_DEBUG(format, a...) do { if (0) kprintf("\n"); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define XPMEM_DEBUG(format, a...) dkprintf("[%d] %s: "format"\n", cpu_local_var(current)->proc->rgid, __func__, ##a)
//#define USE_DBUG_ON

View File

@@ -32,6 +32,7 @@
#include <syscall.h>
#include <sysfs.h>
#include <ihk/monitor.h>
#include <debug.h>
//#define IOCTL_FUNC_EXTENSION
#ifdef IOCTL_FUNC_EXTENSION
@@ -41,11 +42,8 @@
//#define DEBUG_PRINT_INIT
#ifdef DEBUG_PRINT_INIT
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define DUMP_LEVEL_USER_UNUSED_EXCLUDE 24
@@ -61,6 +59,13 @@ static void handler_init(void)
ihk_mc_set_syscall_handler(syscall);
}
/* Symbols with name conflict with the linux kernel
* Give the possibility to load all symbols at the same time
*/
int *mck_num_processors = &num_processors;
unsigned long data[1024] __attribute__((aligned(64)));
#ifdef USE_DMA
@@ -125,6 +130,8 @@ char *find_command_line(char *name)
return strstr(cmdline, name);
}
extern int safe_kernel_map;
static void parse_kargs(void)
{
char *ptr;
@@ -145,6 +152,11 @@ static void parse_kargs(void)
}
ihk_mc_set_dump_level(dump_level);
ptr = find_command_line("safe_kernel_map");
if (ptr) {
safe_kernel_map = 1;
}
/* idle_halt option */
ptr = find_command_line("idle_halt");
if (ptr) {
@@ -246,6 +258,11 @@ static void nmi_init()
ihk_set_nmi_mode_addr(phys);
}
static void uti_init()
{
ihk_set_mckernel_do_futex((unsigned long)do_futex);
}
static void rest_init(void)
{
handler_init();
@@ -261,6 +278,7 @@ static void rest_init(void)
#endif /* !POSTK_DEBUG_TEMP_FIX_73 */
cpu_local_var_init();
nmi_init();
uti_init();
time_init();
kmalloc_init();
@@ -331,6 +349,7 @@ static void populate_sysfs(void)
{
cpu_sysfs_setup();
numa_sysfs_setup();
dynamic_debug_sysfs_setup();
//setup_remote_snooping_samples();
} /* populate_sysfs() */

View File

@@ -19,15 +19,13 @@
#include <ihk/ikc.h>
#include <ikc/master.h>
#include <arch/cpu.h>
#include <debug.h>
//#define DEBUG_LISTENERS
#ifdef DEBUG_LISTENERS
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
void testmem(void *v, unsigned long size)
@@ -71,7 +69,7 @@ static int test_packet_handler(struct ihk_ikc_channel_desc *c,
testmem(v, 4 * 1024 * 1024);
ihk_mc_unmap_virtual(v, 4 * 1024, 1);
ihk_mc_unmap_virtual(v, 4 * 1024);
ihk_mc_unmap_memory(NULL, pp, 4 * 1024 * 1024);
} else if (packet->msg == 0x11110012) {
p.msg = 0x11110013;

View File

@@ -44,15 +44,13 @@
#include <process.h>
#include <limits.h>
#include <sysfs.h>
#include <debug.h>
//#define DEBUG_PRINT_MEM
#ifdef DEBUG_PRINT_MEM
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
static unsigned long pa_start, pa_end;
@@ -547,7 +545,7 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
ihk_mc_ap_flag flag, int pref_node, int is_user, uintptr_t virt_addr)
{
unsigned long pa = 0;
int i, node;
int i = 0, node;
#ifndef IHK_RBTREE_ALLOCATOR
struct ihk_page_allocator_desc *pa_allocator;
#endif
@@ -962,8 +960,6 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
.priv = NULL,
};
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long, void *, int);
int gencore(struct thread *, void *, struct coretable **, int *);
void freecore(struct coretable **);
@@ -981,11 +977,9 @@ void coredump(struct thread *thread, void *regs)
struct coretable *coretable;
int chunks;
#ifdef POSTK_DEBUG_ARCH_DEP_67 /* use limit corefile size. (temporarily fix.) */
if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) {
return;
}
#endif /* POSTK_DEBUG_ARCH_DEP_67 */
#ifndef POSTK_DEBUG_ARCH_DEP_18
ret = gencore(thread, regs, &coretable, &chunks);
@@ -997,7 +991,7 @@ void coredump(struct thread *thread, void *regs)
request.args[0] = chunks;
request.args[1] = virt_to_phys(coretable);
/* no data for now */
ret = do_syscall(&request, thread->cpu_id, thread->proc->pid);
ret = do_syscall(&request, thread->cpu_id);
if (ret == 0) {
kprintf("dumped core.\n");
} else {
@@ -1223,7 +1217,7 @@ out:
if(interrupt_from_user(regs)){
cpu_enable_interrupt();
check_need_resched();
check_signal(0, regs, 0);
check_signal(0, regs, -1);
}
set_cputime(interrupt_from_user(regs)? 0: 1);
#ifdef PROFILE_ENABLE
@@ -1671,7 +1665,7 @@ void *ihk_mc_map_virtual(unsigned long phys, int npages,
return (char *)p + offset;
}
void ihk_mc_unmap_virtual(void *va, int npages, int free_physical)
void ihk_mc_unmap_virtual(void *va, int npages)
{
unsigned long i;
@@ -1681,13 +1675,7 @@ void ihk_mc_unmap_virtual(void *va, int npages, int free_physical)
flush_tlb_single((unsigned long)(va + (i << PAGE_SHIFT)));
}
#ifdef POSTK_DEBUG_TEMP_FIX_51 /* ihk_mc_unmap_virtual() free_physical disabled */
ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages);
#else /* POSTK_DEBUG_TEMP_FIX_51 */
if (free_physical) {
ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages);
}
#endif /* POSTK_DEBUG_TEMP_FIX_51 */
}
#ifdef ATTACHED_MIC
@@ -2304,76 +2292,37 @@ void ___kmalloc_print_free_list(struct list_head *list)
kprintf_unlock(irqflags);
}
#ifdef POSTK_DEBUG_ARCH_DEP_27
int search_free_space(struct thread *thread, size_t len, intptr_t hint,
int pgshift, intptr_t *addrp)
{
struct vm_regions *region = &thread->vm->region;
intptr_t addr;
int error;
struct vm_range *range;
size_t pgsize = (size_t)1 << pgshift;
dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp);
addr = hint;
for (;;) {
addr = (addr + pgsize - 1) & ~(pgsize - 1);
if ((region->user_end <= addr)
|| ((region->user_end - len) < addr)) {
ekprintf("search_free_space(%lx,%lx,%p):"
"no space. %lx %lx\n",
len, hint, addrp, addr,
region->user_end);
error = -ENOMEM;
goto out;
}
range = lookup_process_memory_range(thread->vm, addr, addr+len);
if (range == NULL) {
break;
}
addr = range->end;
}
error = 0;
*addrp = addr;
out:
dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n",
len, hint, pgshift, addrp, error, addr);
return error;
}
#endif /* POSTK_DEBUG_ARCH_DEP_27 */
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* supports NUMA for memory area determination */
#ifdef IHK_RBTREE_ALLOCATOR
int is_mckernel_memory(unsigned long phys)
int is_mckernel_memory(unsigned long start, unsigned long end)
{
int i;
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
unsigned long start, end;
unsigned long chunk_start, chunk_end;
int numa_id;
ihk_mc_get_memory_chunk(i, &start, &end, &numa_id);
if (start <= phys && phys < end) {
ihk_mc_get_memory_chunk(i, &chunk_start, &chunk_end, &numa_id);
if ((chunk_start <= start && start < chunk_end) &&
(chunk_start <= end && end < chunk_end)) {
return 1;
}
}
return 0;
}
#else /* IHK_RBTREE_ALLOCATOR */
int is_mckernel_memory(unsigned long phys)
int is_mckernel_memory(unsigned long start, unsigned long end)
{
int i;
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
unsigned long area_start = pa_allocator->start;
unsigned long area_end = pa_allocator->end;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
if (pa_allocator->start <= phys && phys < pa_allocator->end) {
if ((area_start <= start && start < area_end) &&
(area_start <= end && end < area_end)) {
return 1;
}
}
@@ -2381,7 +2330,6 @@ int is_mckernel_memory(unsigned long phys)
return 0;
}
#endif /* IHK_RBTREE_ALLOCATOR */
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
void ihk_mc_query_mem_areas(void){

Some files were not shown because too many files have changed in this diff Show More