Merge tag '1.6.0-rc1' into master-1.6.0

Release target Nov 11, 2018

Conflicts:
	configure
This commit is contained in:
Masamichi Takagi
2018-11-08 10:49:38 +09:00
538 changed files with 45347 additions and 8629 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
*~
*.o *.o
*.elf *.elf
*.bin *.bin

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "ihk"]
path = ihk
url = https://github.com/RIKEN-SysSoft/ihk.git

View File

@@ -30,6 +30,7 @@
#include <debug-monitors.h> #include <debug-monitors.h>
#include <sysreg.h> #include <sysreg.h>
#include <cpufeature.h> #include <cpufeature.h>
#include <debug.h>
#ifdef POSTK_DEBUG_ARCH_DEP_65 #ifdef POSTK_DEBUG_ARCH_DEP_65
#include <hwcap.h> #include <hwcap.h>
#endif /* POSTK_DEBUG_ARCH_DEP_65 */ #endif /* POSTK_DEBUG_ARCH_DEP_65 */
@@ -39,16 +40,10 @@
#include "postk_print_sysreg.c" #include "postk_print_sysreg.c"
#ifdef DEBUG_PRINT_CPU #ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define ekprintf kprintf #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#endif #endif
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
struct cpuinfo_arm64 cpuinfo_data[NR_CPUS]; /* index is logical cpuid */ struct cpuinfo_arm64 cpuinfo_data[NR_CPUS]; /* index is logical cpuid */
static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 }; static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 };
@@ -1283,7 +1278,6 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
return 0; return 0;
} }
#ifdef POSTK_DEBUG_ARCH_DEP_22
/* /*
* @ref.impl linux-linaro/arch/arm64/kernel/process.c::tls_thread_switch() * @ref.impl linux-linaro/arch/arm64/kernel/process.c::tls_thread_switch()
*/ */
@@ -1309,14 +1303,13 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
extern void perf_start(struct mc_perf_event *event); extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event); extern void perf_reset(struct mc_perf_event *event);
struct thread *last; struct thread *last;
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
struct mcs_rwlock_node_irqsave lock; struct mcs_rwlock_node_irqsave lock;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
/* Set up new TLS.. */ /* Set up new TLS.. */
dkprintf("[%d] arch_switch_context: tlsblock_base: 0x%lX\n", dkprintf("[%d] arch_switch_context: tlsblock_base: 0x%lX\n",
ihk_mc_get_processor_id(), next->tlsblock_base); ihk_mc_get_processor_id(), next->tlsblock_base);
#ifdef ENABLE_PERF
/* Performance monitoring inherit */ /* Performance monitoring inherit */
if(next->proc->monitoring_event) { if(next->proc->monitoring_event) {
if(next->proc->perf_status == PP_RESET) if(next->proc->perf_status == PP_RESET)
@@ -1326,10 +1319,10 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
perf_start(next->proc->monitoring_event); perf_start(next->proc->monitoring_event);
} }
} }
#endif /*ENABLE_PERF*/
if (likely(prev)) { if (likely(prev)) {
tls_thread_switch(prev, next); tls_thread_switch(prev, next);
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock); mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock);
if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) { if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) {
switch (prev->proc->status) { switch (prev->proc->status) {
@@ -1343,11 +1336,12 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
break; break;
} }
mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock); mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&prev->proc->parent->waitpid_q); waitq_wakeup(&prev->proc->parent->waitpid_q);
} else { } else {
mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock); mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock);
} }
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
} }
@@ -1357,7 +1351,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
return last; return last;
} }
#endif /* POSTK_DEBUG_ARCH_DEP_22 */
/*@ /*@
@ requires \valid(thread); @ requires \valid(thread);
@@ -1439,8 +1432,7 @@ void copy_fp_regs(struct thread *from, struct thread *to)
} }
} }
void void clear_fp_regs(void)
clear_fp_regs(struct thread *thread)
{ {
if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) {
#ifdef CONFIG_ARM64_SVE #ifdef CONFIG_ARM64_SVE
@@ -1477,7 +1469,7 @@ restore_fp_regs(struct thread *thread)
if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) {
if (!thread->fp_regs) { if (!thread->fp_regs) {
// only clear fpregs. // only clear fpregs.
clear_fp_regs(thread); clear_fp_regs();
return; return;
} }
thread_fpsimd_load(thread); thread_fpsimd_load(thread);

View File

@@ -9,20 +9,16 @@
#include <prctl.h> #include <prctl.h>
#include <cpufeature.h> #include <cpufeature.h>
#include <kmalloc.h> #include <kmalloc.h>
#include <debug.h>
#include <process.h>
//#define DEBUG_PRINT_FPSIMD //#define DEBUG_PRINT_FPSIMD
#ifdef DEBUG_PRINT_FPSIMD #ifdef DEBUG_PRINT_FPSIMD
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define ekprintf kprintf #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#endif #endif
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
#ifdef CONFIG_ARM64_SVE #ifdef CONFIG_ARM64_SVE
/* Maximum supported vector length across all CPUs (initially poisoned) */ /* Maximum supported vector length across all CPUs (initially poisoned) */
@@ -73,9 +69,6 @@ static int get_nr_threads(struct process *proc)
return nr_threads; return nr_threads;
} }
extern void save_fp_regs(struct thread *thread);
extern void clear_fp_regs(struct thread *thread);
extern void restore_fp_regs(struct thread *thread);
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */ /* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */
int sve_set_vector_length(struct thread *thread, int sve_set_vector_length(struct thread *thread,
unsigned long vl, unsigned long flags) unsigned long vl, unsigned long flags)
@@ -129,7 +122,7 @@ int sve_set_vector_length(struct thread *thread,
/* for self at prctl syscall */ /* for self at prctl syscall */
if (thread == cpu_local_var(current)) { if (thread == cpu_local_var(current)) {
save_fp_regs(thread); save_fp_regs(thread);
clear_fp_regs(thread); clear_fp_regs();
thread_sve_to_fpsimd(thread, &fp_regs); thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread); sve_free(thread);

View File

@@ -7,6 +7,7 @@
#include <process.h> #include <process.h>
#include <string.h> #include <string.h>
#include <elfcore.h> #include <elfcore.h>
#include <debug.h>
#define align32(x) ((((x) + 3) / 4) * 4) #define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE)) #define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
@@ -14,11 +15,8 @@
//#define DEBUG_PRINT_GENCORE //#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE #ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
/* /*

View File

@@ -6,6 +6,8 @@
#include <ihk/cpu.h> #include <ihk/cpu.h>
#include <ihk/atomic.h> #include <ihk/atomic.h>
#include "affinity.h"
#include <lwk/compiler.h>
//#define DEBUG_SPINLOCK //#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK //#define DEBUG_MCS_RWLOCK
@@ -152,6 +154,8 @@ typedef struct mcs_lock_node {
unsigned long irqsave; unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t; } __attribute__((aligned(64))) mcs_lock_node_t;
typedef mcs_lock_node_t mcs_lock_t;
static void mcs_lock_init(struct mcs_lock_node *node) static void mcs_lock_init(struct mcs_lock_node *node)
{ {
node->locked = 0; node->locked = 0;
@@ -602,4 +606,16 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif #endif
} }
static inline int irqflags_can_interrupt(unsigned long flags)
{
#ifdef CONFIG_HAS_NMI
#warning irqflags_can_interrupt needs testing/fixing on such a target
return flags > ICC_PMR_EL1_MASKED;
#else
// PSTATE.DAIF I bit clear means interrupt is possible
return !(flags & (1 << 7));
#endif
}
#endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */ #endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */

View File

@@ -35,38 +35,4 @@ void arm64_disable_pmu(void);
int armv8pmu_init(struct arm_pmu* cpu_pmu); int armv8pmu_init(struct arm_pmu* cpu_pmu);
/* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */ /* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */
/*
* Generalized hardware cache events:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_NODE = 6,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
#endif #endif

View File

@@ -9,6 +9,11 @@
#define _NSIG_BPW 64 #define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW) #define _NSIG_WORDS (_NSIG / _NSIG_BPW)
static inline int valid_signal(unsigned long sig)
{
return sig <= _NSIG ? 1 : 0;
}
typedef unsigned long int __sigset_t; typedef unsigned long int __sigset_t;
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1)) #define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))

View File

@@ -114,14 +114,18 @@ SYSCALL_HANDLED(236, get_mempolicy)
SYSCALL_HANDLED(237, set_mempolicy) SYSCALL_HANDLED(237, set_mempolicy)
SYSCALL_HANDLED(238, migrate_pages) SYSCALL_HANDLED(238, migrate_pages)
SYSCALL_HANDLED(239, move_pages) SYSCALL_HANDLED(239, move_pages)
#ifdef PERF_ENABLE
SYSCALL_HANDLED(241, perf_event_open) SYSCALL_HANDLED(241, perf_event_open)
#endif // PERF_ENABLE
SYSCALL_HANDLED(260, wait4) SYSCALL_HANDLED(260, wait4)
SYSCALL_HANDLED(270, process_vm_readv) SYSCALL_HANDLED(270, process_vm_readv)
SYSCALL_HANDLED(271, process_vm_writev) SYSCALL_HANDLED(271, process_vm_writev)
#ifdef PERF_ENABLE
SYSCALL_HANDLED(601, pmc_init) SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start) SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop) SYSCALL_HANDLED(603, pmc_stop)
SYSCALL_HANDLED(604, pmc_reset) SYSCALL_HANDLED(604, pmc_reset)
#endif // PERF_ENABLE
SYSCALL_HANDLED(700, get_cpu_id) SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile) SYSCALL_HANDLED(__NR_profile, profile)

View File

@@ -7,15 +7,13 @@
#include <arch/cpu.h> #include <arch/cpu.h>
#include <memory.h> #include <memory.h>
#include <syscall.h> #include <syscall.h>
#include <debug.h>
// #define DEBUG_GICV2 // #define DEBUG_GICV2
#ifdef DEBUG_GICV2 #ifdef DEBUG_GICV2
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
void *dist_base; void *dist_base;

View File

@@ -7,17 +7,15 @@
#include <cputype.h> #include <cputype.h>
#include <process.h> #include <process.h>
#include <syscall.h> #include <syscall.h>
#include <debug.h>
//#define DEBUG_GICV3 //#define DEBUG_GICV3
#define USE_CAVIUM_THUNDER_X #define USE_CAVIUM_THUNDER_X
#ifdef DEBUG_GICV3 #ifdef DEBUG_GICV3
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
#ifdef USE_CAVIUM_THUNDER_X #ifdef USE_CAVIUM_THUNDER_X

View File

@@ -14,9 +14,7 @@
#include <context.h> #include <context.h>
#include <kmalloc.h> #include <kmalloc.h>
#include <vdso.h> #include <vdso.h>
#include <debug.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
@@ -2924,17 +2922,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
return error; return error;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ if (!is_mckernel_memory(pa, pa + cpsize)) {
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p," dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p,"
"cpsize: %d\n", __FUNCTION__, to, pa, cpsize); "cpsize: %d\n", __FUNCTION__, to, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(to, va, cpsize); memcpy(to, va, cpsize);
ihk_mc_unmap_virtual(va, 1, 1); ihk_mc_unmap_virtual(va, 1);
} }
else { else {
va = phys_to_virt(pa); va = phys_to_virt(pa);
@@ -3007,17 +3000,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error; return error;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ if (!is_mckernel_memory(pa, pa + cpsize)) {
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
dkprintf("%s: pa is outside of LWK memory, from: %p," dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE); va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE);
memcpy(va, from, cpsize); memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1); ihk_mc_unmap_virtual(va, 1);
} }
else { else {
va = phys_to_virt(pa); va = phys_to_virt(pa);
@@ -3078,17 +3066,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error; return error;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ if (!is_mckernel_memory(pa, pa + cpsize)) {
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
dkprintf("%s: pa is outside of LWK memory, from: %p," dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE); va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE);
memcpy(va, from, cpsize); memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1); ihk_mc_unmap_virtual(va, 1);
} }
else { else {
va = phys_to_virt(pa); va = phys_to_virt(pa);

View File

@@ -93,21 +93,50 @@ int ihk_mc_perfctr_init(int counter, uint64_t config, int mode)
return ret; return ret;
} }
int ihk_mc_perfctr_start(int counter) int ihk_mc_perfctr_start(unsigned long counter_mask)
{ {
int ret; int ret = 0;
ret = cpu_pmu.enable_counter(counter); int counter;
return ret; unsigned long counter_bit;
for (counter = 0, counter_bit = 1;
counter_bit < counter_mask;
counter++, counter_bit <<= 1) {
if (!(counter_mask & counter_bit))
continue;
ret = cpu_pmu.enable_counter(counter_mask);
if (ret < 0)
break;
}
return ret < 0 ? ret : 0;
} }
int ihk_mc_perfctr_stop(int counter) int ihk_mc_perfctr_stop(unsigned long counter_mask)
{ {
cpu_pmu.disable_counter(counter); int ret = 0;
int counter;
unsigned long counter_bit;
// ihk_mc_perfctr_startが呼ばれるときには、 for (counter = 0, counter_bit = 1;
// init系関数が呼ばれるのでdisableにする。 counter_bit < counter_mask;
cpu_pmu.disable_intens(counter); counter++, counter_bit <<= 1) {
return 0; if (!(counter_mask & counter_bit))
continue;
ret = cpu_pmu.disable_counter(counter);
if (ret < 0)
break;
// ihk_mc_perfctr_startが呼ばれるときには、
// init系関数が呼ばれるのでdisableにする。
ret = cpu_pmu.disable_intens(counter);
if (ret < 0)
break;
}
return ret < 0 ? ret : 0;
} }
int ihk_mc_perfctr_reset(int counter) int ihk_mc_perfctr_reset(int counter)

View File

@@ -4,16 +4,14 @@
#include <ihk/perfctr.h> #include <ihk/perfctr.h>
#include <errno.h> #include <errno.h>
#include <ihk/debug.h> #include <ihk/debug.h>
#include <debug.h>
#define BIT(nr) (1UL << (nr)) #define BIT(nr) (1UL << (nr))
//#define DEBUG_PRINT_PMU //#define DEBUG_PRINT_PMU
#ifdef DEBUG_PRINT_PMU #ifdef DEBUG_PRINT_PMU
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif

View File

@@ -21,15 +21,13 @@
#include <ihk/debug.h> #include <ihk/debug.h>
#include <compiler.h> #include <compiler.h>
#include <lwk/compiler.h> #include <lwk/compiler.h>
#include <debug.h>
//#define DEBUG_PRINT_PSCI //#define DEBUG_PRINT_PSCI
#ifdef DEBUG_PRINT_PSCI #ifdef DEBUG_PRINT_PSCI
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1

View File

@@ -11,22 +11,17 @@
#include <hwcap.h> #include <hwcap.h>
#include <string.h> #include <string.h>
#include <thread_info.h> #include <thread_info.h>
#include <debug.h>
//#define DEBUG_PRINT_SC //#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif #endif
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
extern void save_debugreg(unsigned long *debugreg); extern void save_debugreg(unsigned long *debugreg);
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern int interrupt_from_user(void *); extern int interrupt_from_user(void *);
@@ -959,11 +954,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
} }
thread->exit_status = sig; thread->exit_status = sig;
/* Transition thread state */ /* Transition thread state */
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_TRACED; proc->status = PS_DELAY_TRACED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_TRACED; thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL; proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP || if (sig == SIGSTOP || sig == SIGTSTP ||
@@ -982,10 +973,6 @@ void ptrace_report_signal(struct thread *thread, int sig)
info._sifields._sigchld.si_pid = thread->tid; info._sifields._sigchld.si_pid = thread->tid;
info._sifields._sigchld.si_status = thread->exit_status; info._sifields._sigchld.si_status = thread->exit_status;
do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0); do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake parent (if sleeping in wait4()) */
waitq_wakeup(&proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
dkprintf("ptrace_report_signal,sleeping\n"); dkprintf("ptrace_report_signal,sleeping\n");
/* Sleep */ /* Sleep */

View File

@@ -14,6 +14,8 @@
#include <prctl.h> #include <prctl.h>
#include <limits.h> #include <limits.h>
#include <syscall.h> #include <syscall.h>
#include <uio.h>
#include <debug.h>
extern void ptrace_report_signal(struct thread *thread, int sig); extern void ptrace_report_signal(struct thread *thread, int sig);
extern void clear_single_step(struct thread *thread); extern void clear_single_step(struct thread *thread);
@@ -27,18 +29,12 @@ static void __check_signal(unsigned long rc, void *regs, int num, int irq_disabl
//#define DEBUG_PRINT_SC //#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif #endif
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
uintptr_t debug_constants[] = { uintptr_t debug_constants[] = {
sizeof(struct cpu_local_var), sizeof(struct cpu_local_var),
offsetof(struct cpu_local_var, current), offsetof(struct cpu_local_var, current),
@@ -59,7 +55,7 @@ static int cpuid_head = 1;
extern int num_processors; extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set) { int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
int min_queue_len = -1; int min_queue_len = -1;
int i, min_cpu = -1; int i, min_cpu = -1;
@@ -1177,19 +1173,10 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
/* Reap and set new signal_flags */ /* Reap and set new signal_flags */
proc->signal_flags = SIGNAL_STOP_STOPPED; proc->signal_flags = SIGNAL_STOP_STOPPED;
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_STOPPED; proc->status = PS_DELAY_STOPPED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_STOPPED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_STOPPED; thread->status = PS_STOPPED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock); mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n", dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n",
proc->pid, thread->tid); proc->pid, thread->tid);
/* Sleep */ /* Sleep */
@@ -1206,19 +1193,10 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
/* Update thread state in fork tree */ /* Update thread state in fork tree */
mcs_rwlock_writer_lock(&proc->update_lock, &lock); mcs_rwlock_writer_lock(&proc->update_lock, &lock);
thread->exit_status = SIGTRAP; thread->exit_status = SIGTRAP;
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_TRACED; proc->status = PS_DELAY_TRACED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_TRACED; thread->status = PS_TRACED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock); mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&thread->proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
/* Sleep */ /* Sleep */
dkprintf("do_signal,SIGTRAP,sleeping\n"); dkprintf("do_signal,SIGTRAP,sleeping\n");
@@ -1594,7 +1572,7 @@ done:
return 0; return 0;
} }
if (tthread->thread_offloaded) { if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) {
interrupt_syscall(tthread, sig); interrupt_syscall(tthread, sig);
release_thread(tthread); release_thread(tthread);
return 0; return 0;
@@ -1729,7 +1707,7 @@ SYSCALL_DECLARE(mmap)
| MAP_NONBLOCK // 0x10000 | MAP_NONBLOCK // 0x10000
; ;
const intptr_t addr0 = ihk_mc_syscall_arg0(ctx); const uintptr_t addr0 = ihk_mc_syscall_arg0(ctx);
const size_t len0 = ihk_mc_syscall_arg1(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx);
const int prot = ihk_mc_syscall_arg2(ctx); const int prot = ihk_mc_syscall_arg2(ctx);
const int flags0 = ihk_mc_syscall_arg3(ctx); const int flags0 = ihk_mc_syscall_arg3(ctx);
@@ -1738,7 +1716,7 @@ SYSCALL_DECLARE(mmap)
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var(current);
struct vm_regions *region = &thread->vm->region; struct vm_regions *region = &thread->vm->region;
int error; int error;
intptr_t addr = 0; uintptr_t addr = 0;
size_t len; size_t len;
int flags = flags0; int flags = flags0;
size_t pgsize; size_t pgsize;

View File

@@ -14,15 +14,13 @@
#include <ihk/debug.h> #include <ihk/debug.h>
#include <ikc/queue.h> #include <ikc/queue.h>
#include <vdso.h> #include <vdso.h>
#include <debug.h>
//#define DEBUG_PRINT_VDSO //#define DEBUG_PRINT_VDSO
#ifdef DEBUG_PRINT_VDSO #ifdef DEBUG_PRINT_VDSO
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
#ifdef POSTK_DEBUG_ARCH_DEP_52 #ifdef POSTK_DEBUG_ARCH_DEP_52

View File

@@ -1,5 +1,7 @@
/* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */ /* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <affinity.h>
#include <arch-memory.h>
#include <time.h> #include <time.h>
#include <syscall.h> #include <syscall.h>
#include <registers.h> #include <registers.h>

View File

@@ -9,29 +9,29 @@ PHDRS
SECTIONS SECTIONS
{ {
. = SIZEOF_HEADERS; . = SIZEOF_HEADERS;
. = ALIGN(4096); . = ALIGN(4096);
.text : { .text : {
*(.text) *(.text)
} :text } :text
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)
} :data } :data
. = ALIGN(8); . = ALIGN(8);
.bss : { .bss : {
_bss_start = .; _bss_start = .;
*(.bss .bss.*) *(.bss .bss.*)
_bss_end = .; _bss_end = .;
. = ALIGN(4096); . = ALIGN(4096);
_stack_end = .; _stack_end = .;
} :data } :data
/DISCARD/ : { /DISCARD/ : {
*(.eh_frame) *(.eh_frame)
*(.note.gnu.build-id) *(.note.gnu.build-id)
} }
} }

View File

@@ -31,6 +31,7 @@
#include <prctl.h> #include <prctl.h>
#include <page.h> #include <page.h>
#include <kmalloc.h> #include <kmalloc.h>
#include <debug.h>
#define LAPIC_ID 0x020 #define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320 #define LAPIC_TIMER 0x320
@@ -69,11 +70,8 @@
//#define DEBUG_PRINT_CPU //#define DEBUG_PRINT_CPU
#ifdef DEBUG_PRINT_CPU #ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define ekprintf kprintf #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#endif #endif
static void *lapic_vp; static void *lapic_vp;
@@ -96,6 +94,8 @@ int gettime_local_support = 0;
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt); extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
extern int kprintf(const char *format, ...); extern int kprintf(const char *format, ...);
extern int interrupt_from_user(void *); extern int interrupt_from_user(void *);
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
static struct idt_entry{ static struct idt_entry{
uint32_t desc[4]; uint32_t desc[4];
@@ -847,9 +847,6 @@ void setup_x86_ap(void (*next_func)(void))
} }
void arch_show_interrupt_context(const void *reg); void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long, void *, int);
void check_sig_pending();
extern void tlb_flush_handler(int vector); extern void tlb_flush_handler(int vector);
void __show_stack(uintptr_t *sp) { void __show_stack(uintptr_t *sp) {
@@ -877,7 +874,7 @@ void interrupt_exit(struct x86_user_context *regs)
cpu_enable_interrupt(); cpu_enable_interrupt();
check_sig_pending(); check_sig_pending();
check_need_resched(); check_need_resched();
check_signal(0, regs, 0); check_signal(0, regs, -1);
} }
else { else {
check_sig_pending(); check_sig_pending();
@@ -1010,6 +1007,12 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
set_cputime(interrupt_from_user(regs)? 0: 1); set_cputime(interrupt_from_user(regs)? 0: 1);
--v->in_interrupt; --v->in_interrupt;
/* for migration by IPI */
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
}
} }
void gpe_handler(struct x86_user_context *regs) void gpe_handler(struct x86_user_context *regs)
@@ -1644,12 +1647,10 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
return 0; return 0;
} }
#ifdef POSTK_DEBUG_ARCH_DEP_22
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
struct thread *arch_switch_context(struct thread *prev, struct thread *next) struct thread *arch_switch_context(struct thread *prev, struct thread *next)
{ {
struct thread *last; struct thread *last;
struct mcs_rwlock_node_irqsave lock;
dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n",
ihk_mc_get_processor_id(), next->tlsblock_base); ihk_mc_get_processor_id(), next->tlsblock_base);
@@ -1668,7 +1669,7 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
} }
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
if (prev->profile && prev->profile_start_ts != 0) { if (prev && prev->profile && prev->profile_start_ts != 0) {
prev->profile_elapsed_ts += prev->profile_elapsed_ts +=
(rdtsc() - prev->profile_start_ts); (rdtsc() - prev->profile_start_ts);
prev->profile_start_ts = 0; prev->profile_start_ts = 0;
@@ -1680,6 +1681,28 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
#endif #endif
if (prev) { if (prev) {
mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock);
if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) {
switch (prev->proc->status) {
case PS_DELAY_STOPPED:
prev->proc->status = PS_STOPPED;
break;
case PS_DELAY_TRACED:
prev->proc->status = PS_TRACED;
break;
default:
break;
}
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&prev->proc->parent->waitpid_q);
} else {
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
}
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
} }
else { else {
@@ -1687,7 +1710,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
} }
return last; return last;
} }
#endif
/*@ /*@
@ requires \valid(thread); @ requires \valid(thread);
@@ -1762,14 +1784,6 @@ void copy_fp_regs(struct thread *from, struct thread *to)
} }
} }
#ifdef POSTK_DEBUG_TEMP_FIX_19
void
clear_fp_regs(struct thread *thread)
{
return;
}
#endif /* POSTK_DEBUG_TEMP_FIX_19 */
/*@ /*@
@ requires \valid(thread); @ requires \valid(thread);
@ assigns thread->fp_regs; @ assigns thread->fp_regs;
@@ -1777,8 +1791,11 @@ clear_fp_regs(struct thread *thread)
void void
restore_fp_regs(struct thread *thread) restore_fp_regs(struct thread *thread)
{ {
if (!thread->fp_regs) if (!thread->fp_regs) {
// only clear fpregs.
clear_fp_regs();
return; return;
}
if (xsave_available) { if (xsave_available) {
unsigned int low, high; unsigned int low, high;
@@ -1797,6 +1814,13 @@ restore_fp_regs(struct thread *thread)
//release_fp_regs(thread); //release_fp_regs(thread);
} }
void clear_fp_regs(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
restore_fp_regs(&v->idle);
}
ihk_mc_user_context_t *lookup_user_context(struct thread *thread) ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
{ {
ihk_mc_user_context_t *uctx = thread->uctx; ihk_mc_user_context_t *uctx = thread->uctx;

View File

@@ -6,6 +6,7 @@
#include <process.h> #include <process.h>
#include <string.h> #include <string.h>
#include <elfcore.h> #include <elfcore.h>
#include <debug.h>
#define align32(x) ((((x) + 3) / 4) * 4) #define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE)) #define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
@@ -13,13 +14,16 @@
//#define DEBUG_PRINT_GENCORE //#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE #ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
/* Exclude reserved (mckernel's internal use), device file,
* hole created by mprotect
*/
#define GENCORE_RANGE_IS_INACCESSIBLE(range) \
((range->flag & (VR_RESERVED | VR_MEMTYPE_UC | VR_DONTDUMP)))
/* /*
* Generate a core file image, which consists of many chunks. * Generate a core file image, which consists of many chunks.
* Returns an allocated table, an etnry of which is a pair of the address * Returns an allocated table, an etnry of which is a pair of the address
@@ -309,12 +313,10 @@ int gencore(struct thread *thread, void *regs,
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff); range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
mckernel's internal use. */ if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DONTDUMP)
continue; continue;
}
/* We need a chunk for each page for a demand paging area. /* We need a chunk for each page for a demand paging area.
This can be optimized for spacial complexity but we would This can be optimized for spacial complexity but we would
lose simplicity instead. */ lose simplicity instead. */
@@ -403,8 +405,9 @@ int gencore(struct thread *thread, void *regs,
unsigned long flag = range->flag; unsigned long flag = range->flag;
unsigned long size = range->end - range->start; unsigned long size = range->end - range->start;
if (range->flag & VR_RESERVED) if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
continue; continue;
}
ph[i].p_type = PT_LOAD; ph[i].p_type = PT_LOAD;
ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0) ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0)
@@ -446,8 +449,9 @@ int gencore(struct thread *thread, void *regs,
unsigned long phys; unsigned long phys;
if (range->flag & VR_RESERVED) if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
continue; continue;
}
if (range->flag & VR_DEMAND_PAGING) { if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */ /* Just an ad hoc kluge. */
unsigned long p, start, phys; unsigned long p, start, phys;

View File

@@ -64,12 +64,13 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return oldval; return oldval;
} }
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
{ {
int op = (encoded_op >> 28) & 7; int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15; int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20; int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = (encoded_op << 20) >> 20; int cmparg = encoded_op & 0xfff;
int oldval = 0, ret, tem; int oldval = 0, ret, tem;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))

View File

@@ -6,6 +6,7 @@
#include <ihk/cpu.h> #include <ihk/cpu.h>
#include <ihk/atomic.h> #include <ihk/atomic.h>
#include <lwk/compiler.h>
//#define DEBUG_SPINLOCK //#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK //#define DEBUG_MCS_RWLOCK
@@ -14,7 +15,17 @@
int __kprintf(const char *format, ...); int __kprintf(const char *format, ...);
#endif #endif
typedef int ihk_spinlock_t; typedef unsigned short __ticket_t;
typedef unsigned int __ticketpair_t;
typedef struct ihk_spinlock {
union {
__ticketpair_t head_tail;
struct __raw_tickets {
__ticket_t head, tail;
} tickets;
};
} ihk_spinlock_t;
extern void preempt_enable(void); extern void preempt_enable(void);
extern void preempt_disable(void); extern void preempt_disable(void);
@@ -23,9 +34,61 @@ extern void preempt_disable(void);
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock) static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{ {
*lock = 0; lock->head_tail = 0;
}
#define SPIN_LOCK_UNLOCKED { .head_tail = 0 }
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", ihk_mc_get_processor_id()); rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
ihk_spinlock_t cur = { .head_tail = lock->head_tail };
ihk_spinlock_t next = { .tickets.head = cur.tickets.head, .tickets.tail = cur.tickets.tail + 2 };
int success;
if (cur.tickets.head != cur.tickets.tail) {
return 0;
}
preempt_disable();
/* Use the same increment amount as other functions! */
success = __sync_bool_compare_and_swap((__ticketpair_t*)lock, cur.head_tail, next.head_tail);
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock, int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
} }
#define SPIN_LOCK_UNLOCKED 0
#ifdef DEBUG_SPINLOCK #ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \ #define ihk_mc_spinlock_lock_noirq(l) { \
@@ -39,40 +102,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{ {
int inc = 0x00010000; register struct __raw_tickets inc = { .tail = 0x0002 };
int tmp;
#if 0
asm volatile("lock ; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
"jmp 1b\n"
"2:"
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
#endif
preempt_disable(); preempt_disable();
asm volatile("lock; xaddl %0, %1\n" asm volatile ("lock xaddl %0, %1\n"
"movzwl %w0, %2\n\t" : "+r" (inc), "+m" (*(lock)) : : "memory", "cc");
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (*lock), "=&r" (tmp)
:
: "memory", "cc");
if (inc.head == inc.tail)
goto out;
for (;;) {
if (*((volatile __ticket_t *)&lock->tickets.head) == inc.tail)
goto out;
cpu_pause();
}
out:
barrier(); /* make sure nothing creeps before the lock is taken */
} }
#ifdef DEBUG_SPINLOCK #ifdef DEBUG_SPINLOCK
@@ -106,7 +153,10 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
#endif #endif
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock) static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{ {
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc"); __ticket_t inc = 0x0002;
asm volatile ("lock addw %1, %0\n"
: "+m" (lock->tickets.head) : "ri" (inc) : "memory", "cc");
preempt_enable(); preempt_enable();
} }
@@ -602,4 +652,9 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif #endif
} }
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !!(flags & 0x200);
}
#endif #endif

View File

@@ -42,16 +42,34 @@
#define USER_END 0x0000800000000000UL #define USER_END 0x0000800000000000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL #define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
/*
* Canonical negative addresses (i.e., the smallest kernel virtual address)
* on x86 64 bit mode (in its most restricted 48 bit format) starts from
* 0xffff800000000000, but Linux starts mapping physical memory at 0xffff880000000000.
* The 0x80000000000 long gap (8TBs, i.e., 16 PGD level entries in the page tables)
* is used for Xen hyervisor (see arch/x86/include/asm/page.h) and that is
* what we utilize for McKernel.
* This gives us the benefit of being able to use Linux kernel virtual
* addresses identically as in Linux.
*
* NOTE: update these also in eclair.c when modified!
*/
#define MAP_ST_START 0xffff800000000000UL #define MAP_ST_START 0xffff800000000000UL
#define MAP_VMAP_START 0xfffff00000000000UL #define MAP_VMAP_START 0xffff850000000000UL
#define MAP_FIXED_START 0xffffffff70000000UL #define MAP_FIXED_START 0xffff860000000000UL
#define MAP_KERNEL_START 0xffffffff80000000UL #define LINUX_PAGE_OFFSET 0xffff880000000000UL
/*
* MAP_KERNEL_START is 8MB below MODULES_END in Linux.
* Placing the LWK image in the virtual address space at the end of
* the Linux modules section enables us to map the LWK TEXT in Linux
* as well, so that Linux can also call into LWK text.
*/
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
#define STACK_TOP(region) ((region)->user_end) #define STACK_TOP(region) ((region)->user_end)
#define MAP_VMAP_SIZE 0x0000000100000000UL #define MAP_VMAP_SIZE 0x0000000100000000UL
#define KERNEL_PHYS_OFFSET MAP_ST_START
#define PTL4_SHIFT 39 #define PTL4_SHIFT 39
#define PTL4_SIZE (1UL << PTL4_SHIFT) #define PTL4_SIZE (1UL << PTL4_SHIFT)
#define PTL3_SHIFT 30 #define PTL3_SHIFT 30

View File

@@ -114,7 +114,7 @@ SYSCALL_HANDLED(160, setrlimit)
SYSCALL_HANDLED(164, settimeofday) SYSCALL_HANDLED(164, settimeofday)
SYSCALL_HANDLED(186, gettid) SYSCALL_HANDLED(186, gettid)
SYSCALL_HANDLED(200, tkill) SYSCALL_HANDLED(200, tkill)
SYSCALL_DELEGATED(201, time) SYSCALL_HANDLED(201, time)
SYSCALL_HANDLED(202, futex) SYSCALL_HANDLED(202, futex)
SYSCALL_HANDLED(203, sched_setaffinity) SYSCALL_HANDLED(203, sched_setaffinity)
SYSCALL_HANDLED(204, sched_getaffinity) SYSCALL_HANDLED(204, sched_getaffinity)
@@ -161,6 +161,7 @@ SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(730, util_migrate_inter_kernel) SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone) SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system) SYSCALL_HANDLED(732, get_system)
SYSCALL_HANDLED(733, util_register_desc)
/* McKernel Specific */ /* McKernel Specific */
SYSCALL_HANDLED(801, swapout) SYSCALL_HANDLED(801, swapout)

View File

@@ -107,9 +107,17 @@ void init_boot_processor_local(void)
@ ensures \result == %gs; @ ensures \result == %gs;
@ assigns \nothing; @ assigns \nothing;
*/ */
extern int num_processors;
int ihk_mc_get_processor_id(void) int ihk_mc_get_processor_id(void)
{ {
int id; int id;
void *gs;
gs = (void *)rdmsr(MSR_GS_BASE);
if (gs < (void *)locals ||
gs > ((void *)locals + LOCALS_SPAN * num_processors)) {
return -1;
}
asm volatile("movl %%gs:0, %0" : "=r"(id)); asm volatile("movl %%gs:0, %0" : "=r"(id));

View File

@@ -25,15 +25,13 @@
#include <cls.h> #include <cls.h>
#include <kmalloc.h> #include <kmalloc.h>
#include <rusage_private.h> #include <rusage_private.h>
#include <debug.h>
//#define DEBUG //#define DEBUG
#ifdef DEBUG #ifdef DEBUG
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) #undef DDEBUG_DEFAULT
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#endif #endif
static char *last_page; static char *last_page;
@@ -41,6 +39,8 @@ extern char _head[], _end[];
extern unsigned long x86_kernel_phys_base; extern unsigned long x86_kernel_phys_base;
int safe_kernel_map = 0;
/* Arch specific early allocation routine */ /* Arch specific early allocation routine */
void *early_alloc_pages(int nr_pages) void *early_alloc_pages(int nr_pages)
{ {
@@ -109,6 +109,7 @@ struct page_table {
}; };
static struct page_table *init_pt; static struct page_table *init_pt;
static int init_pt_loaded = 0;
static ihk_spinlock_t init_pt_lock; static ihk_spinlock_t init_pt_lock;
static int use_1gb_page = 0; static int use_1gb_page = 0;
@@ -167,30 +168,6 @@ static unsigned long setup_l3(struct page_table *pt,
return virt_to_phys(pt); return virt_to_phys(pt);
} }
static void init_normal_area(struct page_table *pt)
{
unsigned long map_start, map_end, phys, pt_phys;
int ident_index, virt_index;
map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0);
map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0);
kprintf("map_start = %lx, map_end = %lx\n", map_start, map_end);
ident_index = map_start >> PTL4_SHIFT;
virt_index = (MAP_ST_START >> PTL4_SHIFT) & (PT_ENTRIES - 1);
memset(pt, 0, sizeof(struct page_table));
for (phys = (map_start & ~(PTL4_SIZE - 1)); phys < map_end;
phys += PTL4_SIZE) {
pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL), phys,
map_start, map_end);
pt->entry[ident_index++] = pt_phys | PFL4_PDIR_ATTR;
pt->entry[virt_index++] = pt_phys | PFL4_PDIR_ATTR;
}
}
static struct page_table *__alloc_new_pt(ihk_mc_ap_flag ap_flag) static struct page_table *__alloc_new_pt(ihk_mc_ap_flag ap_flag)
{ {
struct page_table *newpt = ihk_mc_alloc_pages(1, ap_flag); struct page_table *newpt = ihk_mc_alloc_pages(1, ap_flag);
@@ -258,6 +235,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
} }
} }
#define PTLX_SHIFT(index) PTL ## index ## _SHIFT
#define GET_VIRT_INDEX(virt, index, dest) \
dest = ((virt) >> PTLX_SHIFT(index)) & (PT_ENTRIES - 1)
#define GET_VIRT_INDICES(virt, l4i, l3i, l2i, l1i) \ #define GET_VIRT_INDICES(virt, l4i, l3i, l2i, l1i) \
l4i = ((virt) >> PTL4_SHIFT) & (PT_ENTRIES - 1); \ l4i = ((virt) >> PTL4_SHIFT) & (PT_ENTRIES - 1); \
l3i = ((virt) >> PTL3_SHIFT) & (PT_ENTRIES - 1); \ l3i = ((virt) >> PTL3_SHIFT) & (PT_ENTRIES - 1); \
@@ -1518,12 +1500,12 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
if (page) { if (page) {
dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1); dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1);
} }
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) && if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL1_SIZE) &&
!(args->memobj->flags & MF_ZEROFILL)) { args->memobj && !(args->memobj->flags & MF_ZEROFILL)) {
memobj_flush_page(args->memobj, phys, PTL1_SIZE); memobj_flush_page(args->memobj, phys, PTL1_SIZE);
} }
if (!(old & PFL1_FILEOFF)) { if (!pte_is_fileoff(&old, PTL1_SIZE)) {
if(args->free_physical) { if(args->free_physical) {
if (!page) { if (!page) {
/* Anonymous || !XPMEM attach */ /* Anonymous || !XPMEM attach */
@@ -1585,11 +1567,11 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
page = phys_to_page(phys); page = phys_to_page(phys);
} }
if (page && page_is_in_memobj(page) && (old & PFL2_DIRTY)) { if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL2_SIZE)) {
memobj_flush_page(args->memobj, phys, PTL2_SIZE); memobj_flush_page(args->memobj, phys, PTL2_SIZE);
} }
if (!(old & PFL2_FILEOFF)) { if (!pte_is_fileoff(&old, PTL2_SIZE)) {
if(args->free_physical) { if(args->free_physical) {
if (!page) { if (!page) {
/* Anonymous || !XPMEM attach */ /* Anonymous || !XPMEM attach */
@@ -1666,13 +1648,13 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
page = phys_to_page(phys); page = phys_to_page(phys);
} }
if (page && page_is_in_memobj(page) && (old & PFL3_DIRTY)) { if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL3_SIZE)) {
memobj_flush_page(args->memobj, phys, PTL3_SIZE); memobj_flush_page(args->memobj, phys, PTL3_SIZE);
} }
dkprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old)); dkprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old));
if (!(old & PFL3_FILEOFF)) { if (!pte_is_fileoff(&old, PTL3_SIZE)) {
if(args->free_physical) { if(args->free_physical) {
if (!page) { if (!page) {
/* Anonymous || !XPMEM attach */ /* Anonymous || !XPMEM attach */
@@ -2540,6 +2522,82 @@ static void init_fixed_area(struct page_table *pt)
return; return;
} }
static void init_normal_area(struct page_table *pt)
{
unsigned long map_start, map_end, phys;
void *virt;
map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0);
map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0);
virt = (void *)MAP_ST_START + map_start;
kprintf("map_start = %lx, map_end = %lx, virt %lx\n",
map_start, map_end, virt);
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) {
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
kprintf("%s: error setting mapping for 0x%lx\n",
__func__, virt);
}
virt += LARGE_PAGE_SIZE;
}
}
static void init_linux_kernel_mapping(struct page_table *pt)
{
unsigned long map_start, map_end, phys;
void *virt;
int nr_memory_chunks, chunk_id, numa_id;
/* In case of safe_kernel_map option (safe_kernel_map == 1),
* processing to prevent destruction of the memory area on Linux side
* is executed */
if (safe_kernel_map == 0) {
kprintf("Straight-map entire physical memory\n");
/* Map 2 TB for now */
map_start = 0;
map_end = 0x20000000000;
virt = (void *)LINUX_PAGE_OFFSET;
kprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
LINUX_PAGE_OFFSET, LINUX_PAGE_OFFSET + map_end, 0, map_end);
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) {
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
kprintf("%s: error setting mapping for 0x%lx\n", __FUNCTION__, virt);
}
virt += LARGE_PAGE_SIZE;
}
} else {
kprintf("Straight-map physical memory areas allocated to McKernel\n");
nr_memory_chunks = ihk_mc_get_nr_memory_chunks();
if (nr_memory_chunks == 0) {
kprintf("%s: ERROR: No memory chunk available.\n", __FUNCTION__);
return;
}
for (chunk_id = 0; chunk_id < nr_memory_chunks; chunk_id++) {
if (ihk_mc_get_memory_chunk(chunk_id, &map_start, &map_end, &numa_id)) {
kprintf("%s: ERROR: Memory chunk id (%d) out of range.\n", __FUNCTION__, chunk_id);
continue;
}
dkprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
LINUX_PAGE_OFFSET + map_start, LINUX_PAGE_OFFSET + map_end, map_start, map_end);
virt = (void *)(LINUX_PAGE_OFFSET + map_start);
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE, virt += LARGE_PAGE_SIZE) {
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
kprintf("%s: set_pt_large_page() failed for 0x%lx\n", __FUNCTION__, virt);
}
}
}
}
}
void init_text_area(struct page_table *pt) void init_text_area(struct page_table *pt)
{ {
unsigned long __end, phys, virt; unsigned long __end, phys, virt;
@@ -2624,17 +2682,19 @@ void init_page_table(void)
init_pt = ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL); init_pt = ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL);
ihk_mc_spinlock_init(&init_pt_lock); ihk_mc_spinlock_init(&init_pt_lock);
memset(init_pt, 0, sizeof(PAGE_SIZE)); memset(init_pt, 0, sizeof(*init_pt));
/* Normal memory area */ /* Normal memory area */
init_normal_area(init_pt); init_normal_area(init_pt);
init_linux_kernel_mapping(init_pt);
init_fixed_area(init_pt); init_fixed_area(init_pt);
init_low_area(init_pt); init_low_area(init_pt);
init_text_area(init_pt); init_text_area(init_pt);
init_vsyscall_area(init_pt); init_vsyscall_area(init_pt);
load_page_table(init_pt); load_page_table(init_pt);
kprintf("Page table is now at %p\n", init_pt); init_pt_loaded = 1;
kprintf("Page table is now at 0x%lx\n", init_pt);
} }
extern void __reserve_arch_pages(unsigned long, unsigned long, extern void __reserve_arch_pages(unsigned long, unsigned long,
@@ -2664,15 +2724,31 @@ unsigned long virt_to_phys(void *v)
unsigned long va = (unsigned long)v; unsigned long va = (unsigned long)v;
if (va >= MAP_KERNEL_START) { if (va >= MAP_KERNEL_START) {
dkprintf("%s: MAP_KERNEL_START <= 0x%lx <= LINUX_PAGE_OFFSET\n",
__FUNCTION__, va);
return va - MAP_KERNEL_START + x86_kernel_phys_base; return va - MAP_KERNEL_START + x86_kernel_phys_base;
} else { }
else if (va >= LINUX_PAGE_OFFSET) {
return va - LINUX_PAGE_OFFSET;
}
else if (va >= MAP_FIXED_START) {
return va - MAP_FIXED_START;
}
else {
dkprintf("%s: MAP_ST_START <= 0x%lx <= MAP_FIXED_START\n",
__FUNCTION__, va);
return va - MAP_ST_START; return va - MAP_ST_START;
} }
} }
void *phys_to_virt(unsigned long p) void *phys_to_virt(unsigned long p)
{ {
return (void *)(p + MAP_ST_START); /* Before loading our own PT use straight mapping */
if (!init_pt_loaded) {
return (void *)(p + MAP_ST_START);
}
return (void *)(p + LINUX_PAGE_OFFSET);
} }
int copy_from_user(void *dst, const void *src, size_t siz) int copy_from_user(void *dst, const void *src, size_t siz)
@@ -2840,17 +2916,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
return error; return error;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ if (!is_mckernel_memory(pa, pa + cpsize)) {
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p," dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p,"
"cpsize: %d\n", __FUNCTION__, to, pa, cpsize); "cpsize: %d\n", __FUNCTION__, to, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(to, va, cpsize); memcpy(to, va, cpsize);
ihk_mc_unmap_virtual(va, 1, 1); ihk_mc_unmap_virtual(va, 1);
} }
else { else {
va = phys_to_virt(pa); va = phys_to_virt(pa);
@@ -2924,17 +2995,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error; return error;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ if (!is_mckernel_memory(pa, pa + cpsize)) {
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
dkprintf("%s: pa is outside of LWK memory, from: %p," dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(va, from, cpsize); memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1); ihk_mc_unmap_virtual(va, 1);
} }
else { else {
va = phys_to_virt(pa); va = phys_to_virt(pa);
@@ -2995,17 +3061,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
return error; return error;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ if (!is_mckernel_memory(pa, pa + cpsize)) {
if (!is_mckernel_memory(pa)) {
#else
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
dkprintf("%s: pa is outside of LWK memory, from: %p," dkprintf("%s: pa is outside of LWK memory, from: %p,"
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
memcpy(va, from, cpsize); memcpy(va, from, cpsize);
ihk_mc_unmap_virtual(va, 1, 1); ihk_mc_unmap_virtual(va, 1);
} }
else { else {
va = phys_to_virt(pa); va = phys_to_virt(pa);

View File

@@ -30,7 +30,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc)); memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE) mikc_queue_pages = ((4 * num_processors * MASTER_IKCQ_PKTSIZE)
+ (PAGE_SIZE - 1)) / PAGE_SIZE; + (PAGE_SIZE - 1)) / PAGE_SIZE;
/* Place both sides in this side */ /* Place both sides in this side */

View File

@@ -16,20 +16,16 @@
#include <registers.h> #include <registers.h>
#include <mc_perf_event.h> #include <mc_perf_event.h>
#include <config.h> #include <config.h>
#include <debug.h>
extern unsigned int *x86_march_perfmap; extern unsigned int *x86_march_perfmap;
extern int running_on_kvm(void); extern int running_on_kvm(void);
#ifdef POSTK_DEBUG_TEMP_FIX_31
int ihk_mc_perfctr_fixed_init(int counter, int mode); int ihk_mc_perfctr_fixed_init(int counter, int mode);
#endif/*POSTK_DEBUG_TEMP_FIX_31*/
//#define PERFCTR_DEBUG //#define PERFCTR_DEBUG
#ifdef PERFCTR_DEBUG #ifdef PERFCTR_DEBUG
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) #undef DDEBUG_DEFAULT
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#endif #endif
#define X86_CR4_PCE 0x00000100 #define X86_CR4_PCE 0x00000100
@@ -43,11 +39,11 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode);
} \ } \
} while(0) } while(0)
int perf_counters_discovered = 0; int perf_counters_discovered;
int X86_IA32_NUM_PERF_COUNTERS = 0; int NUM_PERF_COUNTERS;
unsigned long X86_IA32_PERF_COUNTERS_MASK = 0; unsigned long PERF_COUNTERS_MASK;
int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0; int NUM_FIXED_PERF_COUNTERS;
unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0; unsigned long FIXED_PERF_COUNTERS_MASK;
void x86_init_perfctr(void) void x86_init_perfctr(void)
{ {
@@ -78,17 +74,17 @@ void x86_init_perfctr(void)
op = 0x0a; op = 0x0a;
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op)); asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op));
X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8); NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1; PERF_COUNTERS_MASK = (1 << NUM_PERF_COUNTERS) - 1;
X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F); NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
X86_IA32_FIXED_PERF_COUNTERS_MASK = FIXED_PERF_COUNTERS_MASK =
((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) << ((1UL << NUM_FIXED_PERF_COUNTERS) - 1) <<
X86_IA32_BASE_FIXED_PERF_COUNTERS; BASE_FIXED_PERF_COUNTERS;
perf_counters_discovered = 1; perf_counters_discovered = 1;
kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n", kprintf("NUM_PERF_COUNTERS: %d, NUM_FIXED_PERF_COUNTERS: %d\n",
X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS); NUM_PERF_COUNTERS, NUM_FIXED_PERF_COUNTERS);
} }
/* Clear Fixed Counter Control */ /* Clear Fixed Counter Control */
@@ -97,20 +93,20 @@ void x86_init_perfctr(void)
wrmsr(MSR_PERF_FIXED_CTRL, value); wrmsr(MSR_PERF_FIXED_CTRL, value);
/* Clear Generic Counter Control */ /* Clear Generic Counter Control */
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { for (i = 0; i < NUM_PERF_COUNTERS; i++) {
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0); wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
} }
/* Enable PMC Control */ /* Enable PMC Control */
value = rdmsr(MSR_PERF_GLOBAL_CTRL); value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value |= X86_IA32_PERF_COUNTERS_MASK; value |= PERF_COUNTERS_MASK;
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK; value |= FIXED_PERF_COUNTERS_MASK;
wrmsr(MSR_PERF_GLOBAL_CTRL, value); wrmsr(MSR_PERF_GLOBAL_CTRL, value);
} }
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value) static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
{ {
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { if (counter < 0 || counter >= NUM_PERF_COUNTERS) {
return -EINVAL; return -EINVAL;
} }
@@ -149,13 +145,14 @@ static int set_pmc_x86_direct(int counter, long val)
val &= 0x000000ffffffffff; // 40bit Mask val &= 0x000000ffffffffff; // 40bit Mask
cnt_bit = 1UL << counter; cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { if (cnt_bit & PERF_COUNTERS_MASK) {
// set generic pmc // set generic pmc
wrmsr(MSR_IA32_PMC0 + counter, val); wrmsr(MSR_IA32_PMC0 + counter, val);
} }
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) { else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) {
// set fixed pmc // set fixed pmc
wrmsr(MSR_IA32_FIXED_CTR0 + counter - X86_IA32_BASE_FIXED_PERF_COUNTERS, val); wrmsr(MSR_IA32_FIXED_CTR0 +
counter - BASE_FIXED_PERF_COUNTERS, val);
} }
else { else {
return -EINVAL; return -EINVAL;
@@ -175,10 +172,10 @@ static int set_fixed_counter(int counter, int mode)
{ {
unsigned long value = 0; unsigned long value = 0;
unsigned int ctr_mask = 0xf; unsigned int ctr_mask = 0xf;
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ; int counter_idx = counter - BASE_FIXED_PERF_COUNTERS;
unsigned int set_val = 0; unsigned int set_val = 0;
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) { if (counter_idx < 0 || counter_idx >= NUM_FIXED_PERF_COUNTERS) {
return -EINVAL; return -EINVAL;
} }
@@ -208,14 +205,13 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode)
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode) int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode)
#endif /*POSTK_DEBUG_TEMP_FIX_29*/ #endif /*POSTK_DEBUG_TEMP_FIX_29*/
{ {
#ifdef POSTK_DEBUG_TEMP_FIX_31
// PAPI_REF_CYC counted by fixed counter // PAPI_REF_CYC counted by fixed counter
if (counter >= X86_IA32_BASE_FIXED_PERF_COUNTERS) { if (counter >= BASE_FIXED_PERF_COUNTERS &&
counter < BASE_FIXED_PERF_COUNTERS + NUM_FIXED_PERF_COUNTERS) {
return ihk_mc_perfctr_fixed_init(counter, mode); return ihk_mc_perfctr_fixed_init(counter, mode);
} }
#endif /*POSTK_DEBUG_TEMP_FIX_31*/
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { if (counter < 0 || counter >= NUM_PERF_COUNTERS) {
return -EINVAL; return -EINVAL;
} }
@@ -248,7 +244,7 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
} }
#endif /*POSTK_DEBUG_TEMP_FIX_29*/ #endif /*POSTK_DEBUG_TEMP_FIX_29*/
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { if (counter < 0 || counter >= NUM_PERF_COUNTERS) {
return -EINVAL; return -EINVAL;
} }
if (type < 0 || type >= PERFCTR_MAX_TYPE) { if (type < 0 || type >= PERFCTR_MAX_TYPE) {
@@ -300,18 +296,11 @@ int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
extern void x86_march_perfctr_start(unsigned long counter_mask); extern void x86_march_perfctr_start(unsigned long counter_mask);
#endif #endif
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_start(int counter)
#else
int ihk_mc_perfctr_start(unsigned long counter_mask) int ihk_mc_perfctr_start(unsigned long counter_mask)
#endif /*POSTK_DEBUG_TEMP_FIX_30*/
{ {
int ret = 0; int ret = 0;
unsigned long value = 0; unsigned long value = 0;
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK; unsigned long mask = PERF_COUNTERS_MASK | FIXED_PERF_COUNTERS_MASK;
#ifdef POSTK_DEBUG_TEMP_FIX_30
unsigned long counter_mask = 1UL << counter;
#endif /*POSTK_DEBUG_TEMP_FIX_30*/
PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL); PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL);
@@ -328,18 +317,11 @@ int ihk_mc_perfctr_start(unsigned long counter_mask)
goto fn_exit; goto fn_exit;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_stop(int counter)
#else
int ihk_mc_perfctr_stop(unsigned long counter_mask) int ihk_mc_perfctr_stop(unsigned long counter_mask)
#endif/*POSTK_DEBUG_TEMP_FIX_30*/
{ {
int ret = 0; int ret = 0;
unsigned long value; unsigned long value;
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK; unsigned long mask = PERF_COUNTERS_MASK | FIXED_PERF_COUNTERS_MASK;
#ifdef POSTK_DEBUG_TEMP_FIX_30
unsigned long counter_mask = 1UL << counter;
#endif/*POSTK_DEBUG_TEMP_FIX_30*/
PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL); PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL);
@@ -376,10 +358,10 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode)
{ {
unsigned long value = 0; unsigned long value = 0;
unsigned int ctr_mask = 0xf; unsigned int ctr_mask = 0xf;
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ; int counter_idx = counter - BASE_FIXED_PERF_COUNTERS;
unsigned int set_val = 0; unsigned int set_val = 0;
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) { if (counter_idx < 0 || counter_idx >= NUM_FIXED_PERF_COUNTERS) {
return -EINVAL; return -EINVAL;
} }
@@ -420,7 +402,7 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
{ {
int i, j; int i, j;
for (i = 0, j = 0; i < X86_IA32_NUM_PERF_COUNTERS && counter_mask; for (i = 0, j = 0; i < NUM_PERF_COUNTERS && counter_mask;
i++, counter_mask >>= 1) { i++, counter_mask >>= 1) {
if (counter_mask & 1) { if (counter_mask & 1) {
value[j++] = rdpmc(i); value[j++] = rdpmc(i);
@@ -440,13 +422,14 @@ unsigned long ihk_mc_perfctr_read(int counter)
cnt_bit = 1UL << counter; cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { if (cnt_bit & PERF_COUNTERS_MASK) {
// read generic pmc // read generic pmc
retval = rdpmc(counter); retval = rdpmc(counter);
} }
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) { else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) {
// read fixed pmc // read fixed pmc
retval = rdpmc((1 << 30) + (counter - X86_IA32_BASE_FIXED_PERF_COUNTERS)); retval = rdpmc((1 << 30) +
(counter - BASE_FIXED_PERF_COUNTERS));
} }
else { else {
retval = -EINVAL; retval = -EINVAL;
@@ -468,12 +451,12 @@ unsigned long ihk_mc_perfctr_read_msr(int counter)
cnt_bit = 1UL << counter; cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { if (cnt_bit & PERF_COUNTERS_MASK) {
// read generic pmc // read generic pmc
idx = MSR_IA32_PMC0 + counter; idx = MSR_IA32_PMC0 + counter;
retval = (unsigned long) rdmsr(idx); retval = (unsigned long) rdmsr(idx);
} }
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) { else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) {
// read fixed pmc // read fixed pmc
idx = MSR_IA32_FIXED_CTR0 + counter; idx = MSR_IA32_FIXED_CTR0 + counter;
retval = (unsigned long) rdmsr(idx); retval = (unsigned long) rdmsr(idx);
@@ -506,8 +489,8 @@ int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsi
} }
// find avail generic counter // find avail generic counter
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { for (i = 0; i < NUM_PERF_COUNTERS; i++) {
if(!(pmc_status & (1 << i))) { if (!(pmc_status & (1 << i))) {
ret = i; ret = i;
break; break;
} }

View File

@@ -31,12 +31,11 @@
#include <page.h> #include <page.h>
#include <limits.h> #include <limits.h>
#include <syscall.h> #include <syscall.h>
#include <debug.h>
void terminate_mcexec(int, int); void terminate_mcexec(int, int);
extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact); extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact);
long syscall(int num, ihk_mc_user_context_t *ctx); long syscall(int num, ihk_mc_user_context_t *ctx);
void set_signal(int sig, void *regs0, siginfo_t *info);
void check_signal(unsigned long rc, void *regs0, int num);
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long, extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long); unsigned long, unsigned long, unsigned long);
extern int get_xsave_size(); extern int get_xsave_size();
@@ -45,11 +44,8 @@ extern uint64_t get_xsave_mask();
//#define DEBUG_PRINT_SC //#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
uintptr_t debug_constants[] = { uintptr_t debug_constants[] = {
@@ -92,33 +88,45 @@ static ptrdiff_t vdso_offset;
extern int num_processors; extern int num_processors;
int obtain_clone_cpuid(cpu_set_t *cpu_set) { int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
int min_queue_len = -1; int min_queue_len = -1;
int cpu, min_cpu = -1; int cpu, min_cpu = -1, uti_cpu = -1;
unsigned long irqstate;
irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock);
/* Find the first allowed core with the shortest run queue */ /* Find the first allowed core with the shortest run queue */
for (cpu = 0; cpu < num_processors; ++cpu) { for (cpu = 0; cpu < num_processors; ++cpu) {
struct cpu_local_var *v; struct cpu_local_var *v;
unsigned long irqstate;
if (!CPU_ISSET(cpu, cpu_set)) continue; if (!CPU_ISSET(cpu, cpu_set)) continue;
v = get_cpu_local_var(cpu); v = get_cpu_local_var(cpu);
irqstate = ihk_mc_spinlock_lock(&v->runq_lock); ihk_mc_spinlock_lock_noirq(&v->runq_lock);
if (min_queue_len == -1 || v->runq_len < min_queue_len) { dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved);
min_queue_len = v->runq_len; if (min_queue_len == -1 || v->runq_len + v->runq_reserved < min_queue_len) {
min_queue_len = v->runq_len + v->runq_reserved;
min_cpu = cpu; min_cpu = cpu;
} }
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
/* Record the last tie CPU */
if (min_cpu != cpu && v->runq_len + v->runq_reserved == min_queue_len) {
uti_cpu = cpu;
}
dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved, min_cpu, uti_cpu);
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
#if 0
if (min_queue_len == 0) if (min_queue_len == 0)
break; break;
#endif
} }
min_cpu = use_last ? uti_cpu : min_cpu;
if (min_cpu != -1) { if (min_cpu != -1) {
if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED) if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED)
get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED; get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED;
__sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, 1);
} }
ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate);
return min_cpu; return min_cpu;
} }
@@ -251,7 +259,7 @@ SYSCALL_DECLARE(rt_sigreturn)
info.si_code = TRAP_TRACE; info.si_code = TRAP_TRACE;
set_signal(SIGTRAP, regs, &info); set_signal(SIGTRAP, regs, &info);
check_need_resched(); check_need_resched();
check_signal(0, regs, 0); check_signal(0, regs, -1);
} }
if(ksigsp.fpregs && xsavesize){ if(ksigsp.fpregs && xsavesize){
@@ -276,7 +284,6 @@ SYSCALL_DECLARE(rt_sigreturn)
} }
extern struct cpu_local_var *clv; extern struct cpu_local_var *clv;
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern void interrupt_syscall(struct thread *, int sig); extern void interrupt_syscall(struct thread *, int sig);
extern void terminate(int, int); extern void terminate(int, int);
extern int num_processors; extern int num_processors;
@@ -530,23 +537,32 @@ void ptrace_report_signal(struct thread *thread, int sig)
dkprintf("ptrace_report_signal, tid=%d, pid=%d\n", thread->tid, thread->proc->pid); dkprintf("ptrace_report_signal, tid=%d, pid=%d\n", thread->tid, thread->proc->pid);
mcs_rwlock_writer_lock(&proc->update_lock, &lock); mcs_rwlock_writer_lock(&proc->update_lock, &lock);
if(!(proc->ptrace & PT_TRACED)){ if (!(thread->ptrace & PT_TRACED)) {
mcs_rwlock_writer_unlock(&proc->update_lock, &lock); mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
return; return;
} }
thread->exit_status = sig;
/* Transition thread state */ /* Transition thread state */
proc->status = PS_TRACED; thread->exit_status = sig;
thread->status = PS_TRACED; thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL; thread->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
proc->signal_flags |= SIGNAL_STOP_STOPPED;
} else {
proc->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
parent_pid = proc->parent->pid;
save_debugreg(thread->ptrace_debugreg); save_debugreg(thread->ptrace_debugreg);
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
thread->signal_flags |= SIGNAL_STOP_STOPPED;
}
else {
thread->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
if (thread == proc->main_thread) {
proc->status = PS_DELAY_TRACED;
parent_pid = proc->parent->pid;
}
else {
parent_pid = thread->report_proc->pid;
waitq_wakeup(&thread->report_proc->waitpid_q);
}
mcs_rwlock_writer_unlock(&proc->update_lock, &lock); mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
memset(&info, '\0', sizeof info); memset(&info, '\0', sizeof info);
@@ -555,8 +571,6 @@ void ptrace_report_signal(struct thread *thread, int sig)
info._sifields._sigchld.si_pid = thread->tid; info._sifields._sigchld.si_pid = thread->tid;
info._sifields._sigchld.si_status = thread->exit_status; info._sifields._sigchld.si_status = thread->exit_status;
do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0); do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0);
/* Wake parent (if sleeping in wait4()) */
waitq_wakeup(&proc->parent->waitpid_q);
dkprintf("ptrace_report_signal,sleeping\n"); dkprintf("ptrace_report_signal,sleeping\n");
/* Sleep */ /* Sleep */
@@ -569,9 +583,8 @@ ptrace_arch_prctl(int pid, long code, long addr)
{ {
long rc = -EIO; long rc = -EIO;
struct thread *child; struct thread *child;
struct mcs_rwlock_node_irqsave lock;
child = find_thread(pid, pid, &lock); child = find_thread(pid, pid);
if (!child) if (!child)
return -ESRCH; return -ESRCH;
if (child->proc->status & (PS_TRACED | PS_STOPPED)) { if (child->proc->status & (PS_TRACED | PS_STOPPED)) {
@@ -613,7 +626,7 @@ ptrace_arch_prctl(int pid, long code, long addr)
break; break;
} }
} }
thread_unlock(child, &lock); thread_unlock(child);
return rc; return rc;
} }
@@ -635,11 +648,13 @@ arch_ptrace(long request, int pid, long addr, long data)
static int static int
isrestart(int num, unsigned long rc, int sig, int restart) isrestart(int num, unsigned long rc, int sig, int restart)
{ {
if(sig == SIGKILL || sig == SIGSTOP) if (sig == SIGKILL || sig == SIGSTOP)
return 0; return 0;
if(num == 0 || rc != -EINTR) if (num < 0 || rc != -EINTR)
return 0; return 0;
switch(num){ if (sig == SIGCHLD)
return 1;
switch (num) {
case __NR_pause: case __NR_pause:
case __NR_rt_sigsuspend: case __NR_rt_sigsuspend:
case __NR_rt_sigtimedwait: case __NR_rt_sigtimedwait:
@@ -660,14 +675,12 @@ isrestart(int num, unsigned long rc, int sig, int restart)
case __NR_io_getevents: case __NR_io_getevents:
return 0; return 0;
} }
if(sig == SIGCHLD) if (restart)
return 1;
if(restart)
return 1; return 1;
return 0; return 0;
} }
void int
do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int num) do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int num)
{ {
struct x86_user_context *regs = regs0; struct x86_user_context *regs = regs0;
@@ -679,14 +692,15 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
int ptraceflag = 0; int ptraceflag = 0;
struct mcs_rwlock_node_irqsave lock; struct mcs_rwlock_node_irqsave lock;
struct mcs_rwlock_node_irqsave mcs_rw_node; struct mcs_rwlock_node_irqsave mcs_rw_node;
int restart = 0;
for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1); for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1);
dkprintf("do_signal(): tid=%d, pid=%d, sig=%d\n", thread->tid, proc->pid, sig); dkprintf("do_signal(): tid=%d, pid=%d, sig=%d\n", thread->tid, proc->pid, sig);
orgsig = sig; orgsig = sig;
if((proc->ptrace & PT_TRACED) && if ((thread->ptrace & PT_TRACED) &&
pending->ptracecont == 0 && pending->ptracecont == 0 &&
sig != SIGKILL) { sig != SIGKILL) {
ptraceflag = 1; ptraceflag = 1;
sig = SIGSTOP; sig = SIGSTOP;
} }
@@ -707,7 +721,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
if(k->sa.sa_handler == SIG_IGN){ if(k->sa.sa_handler == SIG_IGN){
kfree(pending); kfree(pending);
mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node);
return; goto out;
} }
else if(k->sa.sa_handler){ else if(k->sa.sa_handler){
unsigned long *usp; /* user stack */ unsigned long *usp; /* user stack */
@@ -757,9 +771,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
memcpy(&ksigsp.sigstack, &thread->sigstack, sizeof(stack_t)); memcpy(&ksigsp.sigstack, &thread->sigstack, sizeof(stack_t));
ksigsp.sigrc = rc; ksigsp.sigrc = rc;
ksigsp.num = num; ksigsp.num = num;
ksigsp.restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART); restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART);
if(num != 0 && rc == -EINTR && sig == SIGCHLD) ksigsp.restart = restart;
ksigsp.restart = 1;
if(xsavesize){ if(xsavesize){
uint64_t xsave_mask = get_xsave_mask(); uint64_t xsave_mask = get_xsave_mask();
unsigned int low = (unsigned int)xsave_mask; unsigned int low = (unsigned int)xsave_mask;
@@ -772,7 +785,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
kfree(_kfpregs); kfree(_kfpregs);
kprintf("do_signal,no space available\n"); kprintf("do_signal,no space available\n");
terminate(0, sig); terminate(0, sig);
return; goto out;
} }
kfpregs = (void *)((((unsigned long)_kfpregs) + 63) & ~63); kfpregs = (void *)((((unsigned long)_kfpregs) + 63) & ~63);
memset(kfpregs, '\0', xsavesize); memset(kfpregs, '\0', xsavesize);
@@ -782,7 +795,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
kfree(_kfpregs); kfree(_kfpregs);
kprintf("do_signal,write_process_vm failed\n"); kprintf("do_signal,write_process_vm failed\n");
terminate(0, sig); terminate(0, sig);
return; goto out;
} }
ksigsp.fpregs = (void *)fpregs; ksigsp.fpregs = (void *)fpregs;
kfree(_kfpregs); kfree(_kfpregs);
@@ -794,7 +807,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node);
kprintf("do_signal,write_process_vm failed\n"); kprintf("do_signal,write_process_vm failed\n");
terminate(0, sig); terminate(0, sig);
return; goto out;
} }
usp = (unsigned long *)sigsp; usp = (unsigned long *)sigsp;
@@ -824,12 +837,13 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
info.si_code = TRAP_TRACE; info.si_code = TRAP_TRACE;
set_signal(SIGTRAP, regs, &info); set_signal(SIGTRAP, regs, &info);
check_need_resched(); check_need_resched();
check_signal(0, regs, 0); check_signal(0, regs, -1);
} }
} }
else { else {
int coredumped = 0; int coredumped = 0;
siginfo_t info; siginfo_t info;
int ptc = pending->ptracecont;
if(ptraceflag){ if(ptraceflag){
if(thread->ptrace_recvsig) if(thread->ptrace_recvsig)
@@ -856,25 +870,37 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
info.si_code = CLD_STOPPED; info.si_code = CLD_STOPPED;
info._sifields._sigchld.si_pid = thread->proc->pid; info._sifields._sigchld.si_pid = thread->proc->pid;
info._sifields._sigchld.si_status = (sig << 8) | 0x7f; info._sifields._sigchld.si_status = (sig << 8) | 0x7f;
do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0); if (ptc == 2 &&
dkprintf("do_signal,SIGSTOP,changing state\n"); thread != thread->proc->main_thread) {
thread->signal_flags =
SIGNAL_STOP_STOPPED;
thread->status = PS_STOPPED;
thread->exit_status = SIGSTOP;
do_kill(thread,
thread->report_proc->pid, -1,
SIGCHLD, &info, 0);
waitq_wakeup(
&thread->report_proc->waitpid_q);
}
else {
/* Update thread state in fork tree */
mcs_rwlock_writer_lock(
&proc->update_lock, &lock);
proc->group_exit_status = SIGSTOP;
/* Update thread state in fork tree */ /* Reap and set new signal_flags */
mcs_rwlock_writer_lock(&proc->update_lock, &lock); proc->main_thread->signal_flags =
proc->group_exit_status = SIGSTOP; SIGNAL_STOP_STOPPED;
/* Reap and set new signal_flags */ proc->status = PS_DELAY_STOPPED;
proc->signal_flags = SIGNAL_STOP_STOPPED; thread->status = PS_STOPPED;
mcs_rwlock_writer_unlock(
&proc->update_lock, &lock);
proc->status = PS_STOPPED; do_kill(thread,
thread->status = PS_STOPPED; thread->proc->parent->pid, -1,
mcs_rwlock_writer_unlock(&proc->update_lock, &lock); SIGCHLD, &info, 0);
}
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&proc->parent->waitpid_q);
dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n",
proc->pid, thread->tid);
/* Sleep */ /* Sleep */
schedule(); schedule();
dkprintf("SIGSTOP(): woken up\n"); dkprintf("SIGSTOP(): woken up\n");
@@ -882,19 +908,28 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
break; break;
case SIGTRAP: case SIGTRAP:
dkprintf("do_signal,SIGTRAP\n"); dkprintf("do_signal,SIGTRAP\n");
if(!(proc->ptrace & PT_TRACED)) { if (!(thread->ptrace & PT_TRACED)) {
goto core; goto core;
} }
/* Update thread state in fork tree */ /* Update thread state in fork tree */
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
thread->exit_status = SIGTRAP; thread->exit_status = SIGTRAP;
proc->status = PS_TRACED;
thread->status = PS_TRACED; thread->status = PS_TRACED;
mcs_rwlock_writer_unlock(&proc->update_lock, &lock); if (thread == proc->main_thread) {
mcs_rwlock_writer_lock(&proc->update_lock,
/* Wake up the parent who tried wait4 and sleeping */ &lock);
waitq_wakeup(&thread->proc->parent->waitpid_q); proc->group_exit_status = SIGTRAP;
proc->status = PS_DELAY_TRACED;
mcs_rwlock_writer_unlock(&proc->update_lock,
&lock);
do_kill(thread, thread->proc->parent->pid, -1,
SIGCHLD, &info, 0);
}
else {
do_kill(thread, thread->report_proc->pid, -1,
SIGCHLD, &info, 0);
waitq_wakeup(&thread->report_proc->waitpid_q);
}
/* Sleep */ /* Sleep */
dkprintf("do_signal,SIGTRAP,sleeping\n"); dkprintf("do_signal,SIGTRAP,sleeping\n");
@@ -909,7 +944,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
info._sifields._sigchld.si_pid = proc->pid; info._sifields._sigchld.si_pid = proc->pid;
info._sifields._sigchld.si_status = 0x0000ffff; info._sifields._sigchld.si_status = 0x0000ffff;
do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0); do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0);
proc->signal_flags = SIGNAL_STOP_CONTINUED; proc->main_thread->signal_flags = SIGNAL_STOP_CONTINUED;
proc->status = PS_RUNNING; proc->status = PS_RUNNING;
dkprintf("do_signal,SIGCONT,do nothing\n"); dkprintf("do_signal,SIGCONT,do nothing\n");
break; break;
@@ -938,6 +973,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
break; break;
} }
} }
out:
return restart;
} }
static struct sig_pending * static struct sig_pending *
@@ -957,10 +994,12 @@ getsigpending(struct thread *thread, int delflag){
lock = &thread->sigcommon->lock; lock = &thread->sigcommon->lock;
head = &thread->sigcommon->sigpending; head = &thread->sigcommon->sigpending;
for(;;) { for(;;) {
if (delflag) if (delflag) {
mcs_rwlock_writer_lock(lock, &mcs_rw_node); mcs_rwlock_writer_lock(lock, &mcs_rw_node);
else }
else {
mcs_rwlock_reader_lock(lock, &mcs_rw_node); mcs_rwlock_reader_lock(lock, &mcs_rw_node);
}
list_for_each_entry_safe(pending, next, head, list){ list_for_each_entry_safe(pending, next, head, list){
for(x = pending->sigmask.__val[0], sig = 0; x; sig++, x >>= 1); for(x = pending->sigmask.__val[0], sig = 0; x; sig++, x >>= 1);
@@ -973,19 +1012,23 @@ getsigpending(struct thread *thread, int delflag){
if(delflag) if(delflag)
list_del(&pending->list); list_del(&pending->list);
if (delflag) if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node); mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
else }
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node); mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
return pending; return pending;
} }
} }
} }
if (delflag) if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node); mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
else }
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node); mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
if(lock == &thread->sigpendinglock) if(lock == &thread->sigpendinglock)
return NULL; return NULL;
@@ -1000,6 +1043,11 @@ getsigpending(struct thread *thread, int delflag){
struct sig_pending * struct sig_pending *
hassigpending(struct thread *thread) hassigpending(struct thread *thread)
{ {
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
return NULL;
}
return getsigpending(thread, 0); return getsigpending(thread, 0);
} }
@@ -1017,6 +1065,12 @@ void save_syscall_return_value(int num, unsigned long rc)
return; return;
} }
/** \brief check arrived signals and processing
*
* @param rc return value of syscall
* @param regs0 context
* @param num syscall number (-1: Not called on exiting system call)
*/
void void
check_signal(unsigned long rc, void *regs0, int num) check_signal(unsigned long rc, void *regs0, int num)
{ {
@@ -1050,6 +1104,11 @@ check_signal(unsigned long rc, void *regs0, int num)
goto out; goto out;
} }
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
goto out;
}
for(;;){ for(;;){
pending = getsigpending(thread, 1); pending = getsigpending(thread, 1);
if(!pending) { if(!pending) {
@@ -1057,7 +1116,9 @@ check_signal(unsigned long rc, void *regs0, int num)
goto out; goto out;
} }
do_signal(rc, regs, thread, pending, num); if (do_signal(rc, regs, thread, pending, num)) {
num = -1;
}
} }
out: out:
@@ -1137,7 +1198,7 @@ check_sig_pending_thread(struct thread *thread)
} }
void void
check_sig_pending() check_sig_pending(void)
{ {
struct thread *thread; struct thread *thread;
struct cpu_local_var *v; struct cpu_local_var *v;
@@ -1158,7 +1219,7 @@ repeat:
continue; continue;
} }
if (thread->proc->exit_status & 0x0000000100000000L) { if (thread->proc->group_exit_status & 0x0000000100000000L) {
continue; continue;
} }
@@ -1367,7 +1428,8 @@ done:
return 0; return 0;
} }
if (tthread->thread_offloaded) { /* Forward signal to Linux by interrupt_syscall mechanism */
if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) {
if (!tthread->proc->nohost) { if (!tthread->proc->nohost) {
interrupt_syscall(tthread, sig); interrupt_syscall(tthread, sig);
} }
@@ -1384,10 +1446,10 @@ done:
in check_signal */ in check_signal */
rc = 0; rc = 0;
k = tthread->sigcommon->action + sig - 1; k = tthread->sigcommon->action + sig - 1;
if((sig != SIGKILL && (tproc->ptrace & PT_TRACED)) || if ((sig != SIGKILL && (tthread->ptrace & PT_TRACED)) ||
(k->sa.sa_handler != (void *)1 && (k->sa.sa_handler != (void *)1 &&
(k->sa.sa_handler != NULL || (k->sa.sa_handler != NULL ||
(sig != SIGCHLD && sig != SIGURG)))){ (sig != SIGCHLD && sig != SIGURG)))) {
struct sig_pending *pending = NULL; struct sig_pending *pending = NULL;
if (sig < 33) { // SIGRTMIN - SIGRTMAX if (sig < 33) { // SIGRTMIN - SIGRTMAX
list_for_each_entry(pending, head, list){ list_for_each_entry(pending, head, list){
@@ -1471,7 +1533,7 @@ set_signal(int sig, void *regs0, siginfo_t *info)
SYSCALL_DECLARE(mmap) SYSCALL_DECLARE(mmap)
{ {
const int supported_flags = 0 const unsigned int supported_flags = 0
| MAP_SHARED // 01 | MAP_SHARED // 01
| MAP_PRIVATE // 02 | MAP_PRIVATE // 02
| MAP_FIXED // 10 | MAP_FIXED // 10
@@ -1479,7 +1541,7 @@ SYSCALL_DECLARE(mmap)
| MAP_LOCKED // 2000 | MAP_LOCKED // 2000
| MAP_POPULATE // 8000 | MAP_POPULATE // 8000
| MAP_HUGETLB // 00040000 | MAP_HUGETLB // 00040000
| (0x3F << MAP_HUGE_SHIFT) // FC000000 | (0x3FU << MAP_HUGE_SHIFT) // FC000000
; ;
const int ignored_flags = 0 const int ignored_flags = 0
#ifdef USE_NOCACHE_MMAP #ifdef USE_NOCACHE_MMAP
@@ -1498,7 +1560,7 @@ SYSCALL_DECLARE(mmap)
| MAP_NONBLOCK // 00010000 | MAP_NONBLOCK // 00010000
; ;
const intptr_t addr0 = ihk_mc_syscall_arg0(ctx); const uintptr_t addr0 = ihk_mc_syscall_arg0(ctx);
const size_t len0 = ihk_mc_syscall_arg1(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx);
const int prot = ihk_mc_syscall_arg2(ctx); const int prot = ihk_mc_syscall_arg2(ctx);
const int flags0 = ihk_mc_syscall_arg3(ctx); const int flags0 = ihk_mc_syscall_arg3(ctx);
@@ -1507,7 +1569,7 @@ SYSCALL_DECLARE(mmap)
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var(current);
struct vm_regions *region = &thread->vm->region; struct vm_regions *region = &thread->vm->region;
int error; int error;
intptr_t addr = 0; uintptr_t addr = 0;
size_t len; size_t len;
int flags = flags0; int flags = flags0;
size_t pgsize; size_t pgsize;
@@ -1699,6 +1761,11 @@ SYSCALL_DECLARE(arch_prctl)
ihk_mc_syscall_arg1(ctx)); ihk_mc_syscall_arg1(ctx));
} }
SYSCALL_DECLARE(time)
{
return time();
}
static int vdso_get_vdso_info(void) static int vdso_get_vdso_info(void)
{ {
int error; int error;
@@ -2081,7 +2148,7 @@ int do_process_vm_read_writev(int pid,
range = lookup_process_memory_range(lthread->vm, range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov, (uintptr_t)local_iov,
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec))); (uintptr_t)(local_iov + liovcnt));
if (!range) { if (!range) {
ret = -EFAULT; ret = -EFAULT;
@@ -2090,7 +2157,7 @@ int do_process_vm_read_writev(int pid,
range = lookup_process_memory_range(lthread->vm, range = lookup_process_memory_range(lthread->vm,
(uintptr_t)remote_iov, (uintptr_t)remote_iov,
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec))); (uintptr_t)(remote_iov + riovcnt));
if (!range) { if (!range) {
ret = -EFAULT; ret = -EFAULT;
@@ -2366,8 +2433,6 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0: case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr, memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count); sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
memcpy(mpsr->nodes, mpsr->user_nodes, memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count); sizeof(int) * count);
memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->ptep, 0, sizeof(pte_t) * count);
@@ -2387,41 +2452,38 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0: case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr, memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count); sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
case 1:
memcpy(mpsr->nodes, mpsr->user_nodes, memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count); sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 1:
memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count); memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count); memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0, memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count); sizeof(unsigned long) * count);
mpsr->nodes_ready = 1;
break; break;
default: default:
break; break;
} }
} }
else if (nr_cpus >= 4 && nr_cpus < 8) { else if (nr_cpus >= 4 && nr_cpus < 7) {
switch (cpu_index) { switch (cpu_index) {
case 0: case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr, memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count); sizeof(void *) * count);
break; break;
case 1: case 1:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 2:
memcpy(mpsr->nodes, mpsr->user_nodes, memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count); sizeof(int) * count);
mpsr->nodes_ready = 1; mpsr->nodes_ready = 1;
break; break;
case 3: case 2:
memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count); memset(mpsr->status, 0, sizeof(int) * count);
break;
case 3:
memset(mpsr->nr_pages, 0, sizeof(int) * count); memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0, memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count); sizeof(unsigned long) * count);
@@ -2431,7 +2493,7 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
break; break;
} }
} }
else if (nr_cpus >= 8) { else {
switch (cpu_index) { switch (cpu_index) {
case 0: case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr, memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
@@ -2443,28 +2505,23 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
sizeof(void *) * (count / 2)); sizeof(void *) * (count / 2));
break; break;
case 2: case 2:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 3:
memcpy(mpsr->nodes, mpsr->user_nodes, memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count); sizeof(int) * count);
mpsr->nodes_ready = 1; mpsr->nodes_ready = 1;
break; break;
case 4: case 3:
memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->ptep, 0, sizeof(pte_t) * count);
break; break;
case 5: case 4:
memset(mpsr->status, 0, sizeof(int) * count); memset(mpsr->status, 0, sizeof(int) * count);
break; break;
case 6: case 5:
memset(mpsr->nr_pages, 0, sizeof(int) * count); memset(mpsr->nr_pages, 0, sizeof(int) * count);
break; break;
case 7: case 6:
memset(mpsr->dst_phys, 0, memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count); sizeof(unsigned long) * count);
break; break;
default: default:
break; break;
} }
@@ -2672,11 +2729,19 @@ out:
time_t time(void) { time_t time(void) {
struct syscall_request sreq IHK_DMA_ALIGN; struct syscall_request sreq IHK_DMA_ALIGN;
struct thread *thread = cpu_local_var(current); struct timespec ats;
time_t ret; time_t ret = 0;
sreq.number = __NR_time;
sreq.args[0] = (uintptr_t)NULL; if (gettime_local_support) {
ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid); calculate_time_from_tsc(&ats);
ret = ats.tv_sec;
}
else {
sreq.number = __NR_time;
sreq.args[0] = (uintptr_t)NULL;
ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id());
}
return ret; return ret;
} }

View File

@@ -31,51 +31,6 @@ struct tod_data_s tod_data
.version = IHK_ATOMIC64_INIT(0), .version = IHK_ATOMIC64_INIT(0),
}; };
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("pause" ::: "memory");
return;
} /* cpu_pause_for_vsyscall() */
static inline void calculate_time_from_tsc(struct timespec *ts)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data.origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data.version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data.clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
/ tod_data.clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
} /* calculate_time_from_tsc() */
int vsyscall_gettimeofday(struct timeval *tv, void *tz) int vsyscall_gettimeofday(struct timeval *tv, void *tz)
{ {
int error; int error;

View File

@@ -45,11 +45,12 @@ fi
turbo="" turbo=""
ihk_irq="" ihk_irq=""
safe_kernel_map=""
umask_old=`umask` umask_old=`umask`
idle_halt="" idle_halt=""
allow_oversubscribe="" allow_oversubscribe=""
while getopts :tk:c:m:o:f:r:q:i:d:e:hO OPT while getopts stk:c:m:o:f:r:q:i:d:e:hO OPT
do do
case ${OPT} in case ${OPT} in
f) facility=${OPTARG} f) facility=${OPTARG}
@@ -62,6 +63,8 @@ do
;; ;;
m) mem=${OPTARG} m) mem=${OPTARG}
;; ;;
s) safe_kernel_map="safe_kernel_map"
;;
r) ikc_map=${OPTARG} r) ikc_map=${OPTARG}
;; ;;
q) ihk_irq=${OPTARG} q) ihk_irq=${OPTARG}
@@ -78,8 +81,8 @@ do
;; ;;
O) allow_oversubscribe="allow_oversubscribe" O) allow_oversubscribe="allow_oversubscribe"
;; ;;
*) echo "invalid option -${OPT}" >&2 \?) exit 1
exit 1 ;;
esac esac
done done
@@ -232,7 +235,7 @@ if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
enable_mcoverlay="yes" enable_mcoverlay="yes"
fi fi
else else
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 862 ]; then
enable_mcoverlay="yes" enable_mcoverlay="yes"
fi fi
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
@@ -446,7 +449,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
fi fi
# Set kernel arguments # Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $safe_kernel_map $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then
echo "error: setting kernel arguments" >&2 echo "error: setting kernel arguments" >&2
error_exit "os_created" error_exit "os_created"
fi fi

View File

@@ -54,48 +54,6 @@
/* Define to 1 if you have the <unistd.h> header file. */ /* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H #undef HAVE_UNISTD_H
/* Define to address of kernel symbol __vvar_page, or 0 if exported */
#undef MCCTRL_KSYM___vvar_page
/* Define to address of kernel symbol hpet_address, or 0 if exported */
#undef MCCTRL_KSYM_hpet_address
/* Define to address of kernel symbol hv_clock, or 0 if exported */
#undef MCCTRL_KSYM_hv_clock
/* Define to address of kernel symbol sys_mount, or 0 if exported */
#undef MCCTRL_KSYM_sys_mount
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
#undef MCCTRL_KSYM_sys_readlink
/* Define to address of kernel symbol sys_umount, or 0 if exported */
#undef MCCTRL_KSYM_sys_umount
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
#undef MCCTRL_KSYM_sys_unshare
/* Define to address of kernel symbol vdso_end, or 0 if exported */
#undef MCCTRL_KSYM_vdso_end
/* Define to address of kernel symbol vdso_image_64, or 0 if exported */
#undef MCCTRL_KSYM_vdso_image_64
/* Define to address of kernel symbol vdso_pages, or 0 if exported */
#undef MCCTRL_KSYM_vdso_pages
/* Define to address of kernel symbol vdso_spec, or 0 if exported */
#undef MCCTRL_KSYM_vdso_spec
/* Define to address of kernel symbol vdso_start, or 0 if exported */
#undef MCCTRL_KSYM_vdso_start
/* Define to address of kernel symbol walk_page_range, or 0 if exported */
#undef MCCTRL_KSYM_walk_page_range
/* Define to address of kernel symbol zap_page_range, or 0 if exported */
#undef MCCTRL_KSYM_zap_page_range
/* McKernel specific headers */ /* McKernel specific headers */
#undef MCKERNEL_INCDIR #undef MCKERNEL_INCDIR
@@ -128,3 +86,6 @@
/* Define to 1 if you have the ANSI C header files. */ /* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS #undef STDC_HEADERS
/* whether or not syscall_intercept library is linked */
#undef WITH_SYSCALL_INTERCEPT

622
configure vendored
View File

@@ -628,9 +628,12 @@ IHK_RELEASE_DATE
DCFA_VERSION DCFA_VERSION
MCKERNEL_VERSION MCKERNEL_VERSION
IHK_VERSION IHK_VERSION
WITH_SYSCALL_INTERCEPT
ENABLE_QLMPI ENABLE_QLMPI
ENABLE_RUSAGE ENABLE_RUSAGE
ENABLE_MCOVERLAYFS ENABLE_MCOVERLAYFS
LDFLAGS_SYSCALL_INTERCEPT
CPPFLAGS_SYSCALL_INTERCEPT
MANDIR MANDIR
KERNDIR KERNDIR
KMODDIR KMODDIR
@@ -702,6 +705,9 @@ enable_option_checking
with_mpi with_mpi
with_mpi_include with_mpi_include
with_mpi_lib with_mpi_lib
with_syscall_intercept
with_syscall_intercept_include
with_syscall_intercept_lib
with_kernelsrc with_kernelsrc
with_target with_target
with_system_map with_system_map
@@ -1346,6 +1352,15 @@ Optional Packages:
--with-mpi-include=PATH specify path where mpi include directory can be --with-mpi-include=PATH specify path where mpi include directory can be
found found
--with-mpi-lib=PATH specify path where mpi lib directory can be found --with-mpi-lib=PATH specify path where mpi lib directory can be found
--with-syscall_intercept=PATH
specify path where syscall_intercept include
directory and lib directory can be found
--with-syscall_intercept-include=PATH
specify path where syscall_intercept include
directory can be found
--with-syscall_intercept-lib=PATH
specify path where syscall_intercept lib directory
can be found
--with-kernelsrc=path Path to 'kernel src', default is --with-kernelsrc=path Path to 'kernel src', default is
/lib/modules/uname_r/build /lib/modules/uname_r/build
--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86} --with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}
@@ -2082,6 +2097,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
IHK_VERSION=1.5.1 IHK_VERSION=1.5.1
MCKERNEL_VERSION=1.5.1 MCKERNEL_VERSION=1.5.1
DCFA_VERSION=DCFA_VERSION_m4 DCFA_VERSION=DCFA_VERSION_m4
@@ -3513,6 +3530,195 @@ fi
# Check whether --with-syscall_intercept was given.
if test "${with_syscall_intercept+set}" = set; then :
withval=$with_syscall_intercept; case "$withval" in #(
yes|no|'') :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept=PATH expects a valid PATH" >&5
$as_echo "$as_me: WARNING: --without-syscall_intercept=PATH expects a valid PATH" >&2;}
with_syscall_intercept="" ;; #(
*) :
;;
esac
else
with_syscall_intercept=
fi
# Check whether --with-syscall_intercept-include was given.
if test "${with_syscall_intercept_include+set}" = set; then :
withval=$with_syscall_intercept_include; case "$withval" in #(
yes|no|'') :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept-include=PATH expects a valid PATH" >&5
$as_echo "$as_me: WARNING: --without-syscall_intercept-include=PATH expects a valid PATH" >&2;}
with_syscall_intercept_include="" ;; #(
*) :
;;
esac
fi
# Check whether --with-syscall_intercept-lib was given.
if test "${with_syscall_intercept_lib+set}" = set; then :
withval=$with_syscall_intercept_lib; case "$withval" in #(
yes|no|'') :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept-lib=PATH expects a valid PATH" >&5
$as_echo "$as_me: WARNING: --without-syscall_intercept-lib=PATH expects a valid PATH" >&2;}
with_syscall_intercept_lib="" ;; #(
*) :
;;
esac
fi
# The args have been sanitized into empty/non-empty values above.
# Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options
# taking priority
if test -n "${with_syscall_intercept_include}"; then :
if echo "$CPPFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-I${with_syscall_intercept_include}\>" >/dev/null 2>&1; then :
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') contains '-I${with_syscall_intercept_include}', not appending" >&5
else
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') does not contain '-I${with_syscall_intercept_include}', appending" >&5
CPPFLAGS_SYSCALL_INTERCEPT="$CPPFLAGS_SYSCALL_INTERCEPT -I${with_syscall_intercept_include}"
fi
else
if test -n "${with_syscall_intercept}"; then :
if echo "$CPPFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-I${with_syscall_intercept}/include\>" >/dev/null 2>&1; then :
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') contains '-I${with_syscall_intercept}/include', not appending" >&5
else
echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') does not contain '-I${with_syscall_intercept}/include', appending" >&5
CPPFLAGS_SYSCALL_INTERCEPT="$CPPFLAGS_SYSCALL_INTERCEPT -I${with_syscall_intercept}/include"
fi
fi
fi
if test -n "${with_syscall_intercept_lib}"; then :
if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}\>" >/dev/null 2>&1; then :
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}', not appending" >&5
else
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}', appending" >&5
LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}"
fi
else
if test -n "${with_syscall_intercept}"; then :
if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib\>" >/dev/null 2>&1; then :
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib', not appending" >&5
else
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib', appending" >&5
LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib"
fi
if test -d "${with_syscall_intercept}/lib64"; then :
if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64\>" >/dev/null 2>&1; then :
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64', not appending" >&5
else
echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64', appending" >&5
LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64"
fi
fi
fi
fi
if test -n "${with_syscall_intercept}" || test -n "${with_syscall_intercept_include}" || test -n "${with_syscall_intercept_lib}"; then :
WITH_SYSCALL_INTERCEPT=yes
else
WITH_SYSCALL_INTERCEPT=no
fi
if test "x$WITH_SYSCALL_INTERCEPT" == "xno" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for syscall_no_intercept in -lsyscall_intercept" >&5
$as_echo_n "checking for syscall_no_intercept in -lsyscall_intercept... " >&6; }
if ${ac_cv_lib_syscall_intercept_syscall_no_intercept+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lsyscall_intercept -lcapstone -ldl $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char syscall_no_intercept ();
int
main ()
{
return syscall_no_intercept ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_syscall_intercept_syscall_no_intercept=yes
else
ac_cv_lib_syscall_intercept_syscall_no_intercept=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_syscall_intercept_syscall_no_intercept" >&5
$as_echo "$ac_cv_lib_syscall_intercept_syscall_no_intercept" >&6; }
if test "x$ac_cv_lib_syscall_intercept_syscall_no_intercept" = xyes; then :
syscall_intercept_lib_found=yes
else
syscall_intercept_lib_found=no
fi
if test "x$syscall_intercept_lib_found" != "xyes"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: libsyscall_intercept.so not found" >&5
$as_echo "$as_me: libsyscall_intercept.so not found" >&6;}
fi
ac_fn_c_check_header_mongrel "$LINENO" "libsyscall_intercept_hook_point.h" "ac_cv_header_libsyscall_intercept_hook_point_h" "$ac_includes_default"
if test "x$ac_cv_header_libsyscall_intercept_hook_point_h" = xyes; then :
syscall_intercept_header_found=yes
else
syscall_intercept_header_found=no
fi
if test "x$syscall_intercept_header_found" != "xyes"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: libsyscall_intercept_hook_point.h not found" >&5
$as_echo "$as_me: libsyscall_intercept_hook_point.h not found" >&6;}
fi
if test "x$syscall_intercept_lib_found" == "xyes" && test "x$syscall_intercept_header_found" == "xyes"; then :
WITH_SYSCALL_INTERCEPT=yes
else
WITH_SYSCALL_INTERCEPT=no
fi
fi
# Check whether --with-kernelsrc was given. # Check whether --with-kernelsrc was given.
if test "${with_kernelsrc+set}" = set; then : if test "${with_kernelsrc+set}" = set; then :
withval=$with_kernelsrc; WITH_KERNELSRC=$withval withval=$with_kernelsrc; WITH_KERNELSRC=$withval
@@ -4396,399 +4602,6 @@ KDIR="$WITH_KERNELSRC"
UNAME_R="$WITH_UNAME_R" UNAME_R="$WITH_UNAME_R"
TARGET="$WITH_TARGET" TARGET="$WITH_TARGET"
MCCTRL_LINUX_SYMTAB=""
case "X$WITH_SYSTEM_MAP" in
Xyes | Xno | X)
MCCTRL_LINUX_SYMTAB=""
;;
*)
MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP"
;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for System.map" >&5
$as_echo_n "checking for System.map... " >&6; }
if test -r "$MCCTRL_LINUX_SYMTAB"; then
MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB"
elif test -r "/boot/System.map-`uname -r`"; then
MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`"
elif test -r "$KDIR/System.map"; then
MCCTRL_LINUX_SYMTAB="$KDIR/System.map"
fi
if test "$MCCTRL_LINUX_SYMTAB" == ""; then
as_fn_error $? "could not find" "$LINENO" 5
fi
if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then
as_fn_error $? "could not read System.map file, no read permission?" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MCCTRL_LINUX_SYMTAB" >&5
$as_echo "$MCCTRL_LINUX_SYMTAB" >&6; }
MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB"
# MCCTRL_FIND_KSYM(SYMBOL)
# ------------------------------------------------------
# Search System.map for address of the given symbol and
# do one of three things in config.h:
# If not found, leave MCCTRL_KSYM_foo undefined
# If found to be exported, "#define MCCTRL_KSYM_foo 0"
# If found not to be exported, "#define MCCTRL_KSYM_foo 0x<value>"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_mount" >&5
$as_echo_n "checking System.map for symbol sys_mount... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_mount\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_mount\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_mount $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5
$as_echo_n "checking System.map for symbol sys_umount... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_umount $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_unshare\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_unshare $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol zap_page_range" >&5
$as_echo_n "checking System.map for symbol zap_page_range... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " zap_page_range\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_zap_page_range\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_zap_page_range $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_image_64" >&5
$as_echo_n "checking System.map for symbol vdso_image_64... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_image_64\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_image_64\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_image_64 $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_start" >&5
$as_echo_n "checking System.map for symbol vdso_start... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_start\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_start\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_start $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_end" >&5
$as_echo_n "checking System.map for symbol vdso_end... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_end\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_end\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_end $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_pages" >&5
$as_echo_n "checking System.map for symbol vdso_pages... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_pages\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_pages\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_pages $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol __vvar_page" >&5
$as_echo_n "checking System.map for symbol __vvar_page... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __vvar_page\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab___vvar_page\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM___vvar_page $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hpet_address" >&5
$as_echo_n "checking System.map for symbol hpet_address... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " hpet_address\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_hpet_address\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_hpet_address $mcctrl_addr
_ACEOF
fi
# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_spec" >&5
$as_echo_n "checking System.map for symbol vdso_spec... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_spec\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_spec\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_vdso_spec $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hv_clock" >&5
$as_echo_n "checking System.map for symbol hv_clock... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " hv_clock\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_hv_clock\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_hv_clock $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_readlink" >&5
$as_echo_n "checking System.map for symbol sys_readlink... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_readlink\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_readlink\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_sys_readlink $mcctrl_addr
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol walk_page_range" >&5
$as_echo_n "checking System.map for symbol walk_page_range... " >&6; }
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " walk_page_range\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_walk_page_range\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
$as_echo "$mcctrl_result" >&6; }
cat >>confdefs.h <<_ACEOF
#define MCCTRL_KSYM_walk_page_range $mcctrl_addr
_ACEOF
fi
case $ENABLE_MEMDUMP in case $ENABLE_MEMDUMP in
yes|no|auto) yes|no|auto)
;; ;;
@@ -4986,6 +4799,17 @@ else
$as_echo "$as_me: perf is disabled" >&6;} $as_echo "$as_me: perf is disabled" >&6;}
fi fi
if test "x$WITH_SYSCALL_INTERCEPT" = "xyes" ; then
$as_echo "#define WITH_SYSCALL_INTERCEPT 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: syscall_intercept library is linked" >&5
$as_echo "$as_me: syscall_intercept library is linked" >&6;}
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: syscall_intercept library isn't linked" >&5
$as_echo "$as_me: syscall_intercept library isn't linked" >&6;}
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then if test "x$MCKERNEL_INCDIR" != "x" ; then
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -5052,6 +4876,9 @@ fi
@@ -5060,9 +4887,14 @@ ac_config_headers="$ac_config_headers config.h"
# POSTK_DEBUG_ARCH_DEP_37 # POSTK_DEBUG_ARCH_DEP_37
# AC_CONFIG_FILES arch dependfiles separate # AC_CONFIG_FILES arch dependfiles separate
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/Makefile" ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in tools/mcstat/Makefile"
if test -e "${ABS_SRCDIR}/test"; then
ac_config_files="$ac_config_files mck_test_config.sample:test/mck_test_config.sample.in"
fi
if test "$TARGET" = "smp-x86"; then if test "$TARGET" = "smp-x86"; then
ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch" ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch"
@@ -5797,7 +5629,9 @@ do
"arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;; "arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;;
"arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;; "arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;;
"arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;; "arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;;
"tools/mcstat/mcstat.1") CONFIG_FILES="$CONFIG_FILES tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in" ;;
"tools/mcstat/Makefile") CONFIG_FILES="$CONFIG_FILES tools/mcstat/Makefile" ;; "tools/mcstat/Makefile") CONFIG_FILES="$CONFIG_FILES tools/mcstat/Makefile" ;;
"mck_test_config.sample") CONFIG_FILES="$CONFIG_FILES mck_test_config.sample:test/mck_test_config.sample.in" ;;
"arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;; "arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;;
"kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;; "kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;;
"arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;; "arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;;

View File

@@ -77,6 +77,58 @@ AC_DEFUN([PAC_SET_HEADER_LIB_PATH],[
]) ])
]) ])
AC_DEFUN([PAC_SET_HEADER_LIB_PATH_SYSCALL_INTERCEPT],[
AC_ARG_WITH([$1],
[AC_HELP_STRING([--with-$1=PATH],
[specify path where $1 include directory and lib directory can be found])],
[AS_CASE(["$withval"],
[yes|no|''],
[AC_MSG_WARN([--with[out]-$1=PATH expects a valid PATH])
with_$1=""])],
[with_$1=$2])
AC_ARG_WITH([$1-include],
[AC_HELP_STRING([--with-$1-include=PATH],
[specify path where $1 include directory can be found])],
[AS_CASE(["$withval"],
[yes|no|''],
[AC_MSG_WARN([--with[out]-$1-include=PATH expects a valid PATH])
with_$1_include=""])],
[])
AC_ARG_WITH([$1-lib],
[AC_HELP_STRING([--with-$1-lib=PATH],
[specify path where $1 lib directory can be found])],
[AS_CASE(["$withval"],
[yes|no|''],
[AC_MSG_WARN([--with[out]-$1-lib=PATH expects a valid PATH])
with_$1_lib=""])],
[])
# The args have been sanitized into empty/non-empty values above.
# Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options
# taking priority
AS_IF([test -n "${with_$1_include}"],
[PAC_APPEND_FLAG([-I${with_$1_include}],[CPPFLAGS_SYSCALL_INTERCEPT])],
[AS_IF([test -n "${with_$1}"],
[PAC_APPEND_FLAG([-I${with_$1}/include],[CPPFLAGS_SYSCALL_INTERCEPT])])])
AS_IF([test -n "${with_$1_lib}"],
[PAC_APPEND_FLAG([-L${with_$1_lib} -Wl,-rpath,${with_$1_lib}],[LDFLAGS_SYSCALL_INTERCEPT])],
[AS_IF([test -n "${with_$1}"],
dnl is adding lib64 by default really the right thing to do? What if
dnl we are on a 32-bit host that happens to have both lib dirs available?
[PAC_APPEND_FLAG([-L${with_$1}/lib -Wl,-rpath,${with_$1}/lib],[LDFLAGS_SYSCALL_INTERCEPT])
AS_IF([test -d "${with_$1}/lib64"],
[PAC_APPEND_FLAG([-L${with_$1}/lib64 -Wl,-rpath,${with_$1}/lib64],[LDFLAGS_SYSCALL_INTERCEPT])])
])
])
AS_IF([test -n "${with_$1}" || test -n "${with_$1_include}" || test -n "${with_$1_lib}"],
[WITH_SYSCALL_INTERCEPT=yes],
[WITH_SYSCALL_INTERCEPT=no])
])
IHK_VERSION=IHK_VERSION_m4 IHK_VERSION=IHK_VERSION_m4
MCKERNEL_VERSION=MCKERNEL_VERSION_m4 MCKERNEL_VERSION=MCKERNEL_VERSION_m4
DCFA_VERSION=DCFA_VERSION_m4 DCFA_VERSION=DCFA_VERSION_m4
@@ -95,6 +147,23 @@ AS_IF([test "x$numa_lib_found" != "xyes"],
PAC_SET_HEADER_LIB_PATH([mpi]) PAC_SET_HEADER_LIB_PATH([mpi])
PAC_SET_HEADER_LIB_PATH_SYSCALL_INTERCEPT([syscall_intercept])
if test "x$WITH_SYSCALL_INTERCEPT" == "xno" ; then
AC_CHECK_LIB([syscall_intercept],[syscall_no_intercept],[syscall_intercept_lib_found=yes],[syscall_intercept_lib_found=no],[-lcapstone -ldl])
AS_IF([test "x$syscall_intercept_lib_found" != "xyes"],
[AC_MSG_NOTICE([libsyscall_intercept.so not found])])
AC_CHECK_HEADER([libsyscall_intercept_hook_point.h],[syscall_intercept_header_found=yes],[syscall_intercept_header_found=no])
AS_IF([test "x$syscall_intercept_header_found" != "xyes"],
[AC_MSG_NOTICE([libsyscall_intercept_hook_point.h not found])])
AS_IF([test "x$syscall_intercept_lib_found" == "xyes" && test "x$syscall_intercept_header_found" == "xyes"],
[WITH_SYSCALL_INTERCEPT=yes],
[WITH_SYSCALL_INTERCEPT=no])
fi
AC_ARG_WITH([kernelsrc], AC_ARG_WITH([kernelsrc],
AC_HELP_STRING( AC_HELP_STRING(
[--with-kernelsrc=path],[Path to 'kernel src', default is /lib/modules/uname_r/build]), [--with-kernelsrc=path],[Path to 'kernel src', default is /lib/modules/uname_r/build]),
@@ -339,78 +408,6 @@ KDIR="$WITH_KERNELSRC"
UNAME_R="$WITH_UNAME_R" UNAME_R="$WITH_UNAME_R"
TARGET="$WITH_TARGET" TARGET="$WITH_TARGET"
MCCTRL_LINUX_SYMTAB=""
case "X$WITH_SYSTEM_MAP" in
Xyes | Xno | X)
MCCTRL_LINUX_SYMTAB=""
;;
*)
MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP"
;;
esac
AC_MSG_CHECKING([[for System.map]])
if test -r "$MCCTRL_LINUX_SYMTAB"; then
MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB"
elif test -r "/boot/System.map-`uname -r`"; then
MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`"
elif test -r "$KDIR/System.map"; then
MCCTRL_LINUX_SYMTAB="$KDIR/System.map"
fi
if test "$MCCTRL_LINUX_SYMTAB" == ""; then
AC_MSG_ERROR([could not find])
fi
if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then
AC_MSG_ERROR([could not read System.map file, no read permission?])
fi
AC_MSG_RESULT([$MCCTRL_LINUX_SYMTAB])
MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB"
# MCCTRL_FIND_KSYM(SYMBOL)
# ------------------------------------------------------
# Search System.map for address of the given symbol and
# do one of three things in config.h:
# If not found, leave MCCTRL_KSYM_foo undefined
# If found to be exported, "#define MCCTRL_KSYM_foo 0"
# If found not to be exported, "#define MCCTRL_KSYM_foo 0x<value>"
AC_DEFUN([MCCTRL_FIND_KSYM],[
AC_MSG_CHECKING([[System.map for symbol $1]])
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " $1\$" | cut -d\ -f1`
if test -z $mcctrl_addr; then
AC_MSG_RESULT([not found])
else
mcctrl_result=$mcctrl_addr
mcctrl_addr="0x$mcctrl_addr"
m4_ifval([$2],[],[
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_$1\$" >/dev/null`; then
mcctrl_result="exported"
mcctrl_addr="0"
fi
])
AC_MSG_RESULT([$mcctrl_result])
AC_DEFINE_UNQUOTED(MCCTRL_KSYM_[]$1,$mcctrl_addr,[Define to address of kernel symbol $1, or 0 if exported])
fi
])
MCCTRL_FIND_KSYM([sys_mount])
MCCTRL_FIND_KSYM([sys_umount])
MCCTRL_FIND_KSYM([sys_unshare])
MCCTRL_FIND_KSYM([zap_page_range])
MCCTRL_FIND_KSYM([vdso_image_64])
MCCTRL_FIND_KSYM([vdso_start])
MCCTRL_FIND_KSYM([vdso_end])
MCCTRL_FIND_KSYM([vdso_pages])
MCCTRL_FIND_KSYM([__vvar_page])
MCCTRL_FIND_KSYM([hpet_address])
# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol.
MCCTRL_FIND_KSYM([vdso_spec])
MCCTRL_FIND_KSYM([hv_clock])
MCCTRL_FIND_KSYM([sys_readlink])
MCCTRL_FIND_KSYM([walk_page_range])
case $ENABLE_MEMDUMP in case $ENABLE_MEMDUMP in
yes|no|auto) yes|no|auto)
;; ;;
@@ -489,6 +486,13 @@ else
AC_MSG_NOTICE([perf is disabled]) AC_MSG_NOTICE([perf is disabled])
fi fi
if test "x$WITH_SYSCALL_INTERCEPT" = "xyes" ; then
AC_DEFINE([WITH_SYSCALL_INTERCEPT],[1],[whether or not syscall_intercept library is linked])
AC_MSG_NOTICE([syscall_intercept library is linked])
else
AC_MSG_NOTICE([syscall_intercept library isn't linked])
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then if test "x$MCKERNEL_INCDIR" != "x" ; then
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers]) AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
fi fi
@@ -526,9 +530,12 @@ AC_SUBST(KMODDIR)
AC_SUBST(KERNDIR) AC_SUBST(KERNDIR)
AC_SUBST(MANDIR) AC_SUBST(MANDIR)
AC_SUBST(CFLAGS) AC_SUBST(CFLAGS)
AC_SUBST(CPPFLAGS_SYSCALL_INTERCEPT)
AC_SUBST(LDFLAGS_SYSCALL_INTERCEPT)
AC_SUBST(ENABLE_MCOVERLAYFS) AC_SUBST(ENABLE_MCOVERLAYFS)
AC_SUBST(ENABLE_RUSAGE) AC_SUBST(ENABLE_RUSAGE)
AC_SUBST(ENABLE_QLMPI) AC_SUBST(ENABLE_QLMPI)
AC_SUBST(WITH_SYSCALL_INTERCEPT)
AC_SUBST(IHK_VERSION) AC_SUBST(IHK_VERSION)
AC_SUBST(MCKERNEL_VERSION) AC_SUBST(MCKERNEL_VERSION)
@@ -570,9 +577,16 @@ AC_CONFIG_FILES([
arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in
arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.service
arch/x86_64/tools/irqbalance_mck.in arch/x86_64/tools/irqbalance_mck.in
tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in
tools/mcstat/Makefile tools/mcstat/Makefile
]) ])
if test -e "${ABS_SRCDIR}/test"; then
AC_CONFIG_FILES([
mck_test_config.sample:test/mck_test_config.sample.in
])
fi
if test "$TARGET" = "smp-x86"; then if test "$TARGET" = "smp-x86"; then
AC_CONFIG_FILES([ AC_CONFIG_FILES([
arch/x86_64/kernel/Makefile.arch arch/x86_64/kernel/Makefile.arch

View File

@@ -55,13 +55,14 @@
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915 #define MCEXEC_UP_SYS_UMOUNT 0x30a02915
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916 #define MCEXEC_UP_SYS_UNSHARE 0x30a02916
#define MCEXEC_UP_UTIL_THREAD1 0x30a02920 #define MCEXEC_UP_UTI_GET_CTX 0x30a02920
#define MCEXEC_UP_UTIL_THREAD2 0x30a02921 #define MCEXEC_UP_UTI_SAVE_FS 0x30a02921
#define MCEXEC_UP_SIG_THREAD 0x30a02922 #define MCEXEC_UP_SIG_THREAD 0x30a02922
#define MCEXEC_UP_SYSCALL_THREAD 0x30a02924 #define MCEXEC_UP_SYSCALL_THREAD 0x30a02924
#define MCEXEC_UP_TERMINATE_THREAD 0x30a02925 #define MCEXEC_UP_TERMINATE_THREAD 0x30a02925
#define MCEXEC_UP_GET_NUM_POOL_THREADS 0x30a02926 #define MCEXEC_UP_GET_NUM_POOL_THREADS 0x30a02926
#define MCEXEC_UP_UTI_ATTR 0x30a02927 #define MCEXEC_UP_UTI_ATTR 0x30a02927
#define MCEXEC_UP_RELEASE_USER_SPACE 0x30a02928
#define MCEXEC_UP_DEBUG_LOG 0x40000000 #define MCEXEC_UP_DEBUG_LOG 0x40000000
@@ -91,6 +92,7 @@ struct program_image_section {
struct get_cpu_set_arg { struct get_cpu_set_arg {
int nr_processes; int nr_processes;
int *process_rank;
void *cpu_set; void *cpu_set;
size_t cpu_set_size; // Size in bytes size_t cpu_set_size; // Size in bytes
int *target_core; int *target_core;
@@ -140,8 +142,10 @@ struct program_load_desc {
unsigned long heap_extension; unsigned long heap_extension;
long stack_premap; long stack_premap;
unsigned long mpol_bind_mask; unsigned long mpol_bind_mask;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int nr_processes; int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN]; int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
int profile; int profile;
struct program_image_section sections[0]; struct program_image_section sections[0];
@@ -242,6 +246,28 @@ struct sys_unshare_desc {
unsigned long unshare_flags; unsigned long unshare_flags;
}; };
struct release_user_space_desc {
unsigned long user_start;
unsigned long user_end;
};
struct terminate_thread_desc {
int pid;
int tid;
long code;
/* 32------32 31--16 15--------8 7----0
exit_group exit-status signal */
unsigned long tsk; /* struct task_struct * */
};
struct rpgtable_desc {
uintptr_t rpgtable;
uintptr_t start;
uintptr_t len;
};
enum perf_ctrl_type { enum perf_ctrl_type {
PERF_CTRL_SET, PERF_CTRL_SET,
PERF_CTRL_GET, PERF_CTRL_GET,
@@ -251,6 +277,7 @@ enum perf_ctrl_type {
struct perf_ctrl_desc { struct perf_ctrl_desc {
enum perf_ctrl_type ctrl_type; enum perf_ctrl_type ctrl_type;
int err;
union { union {
/* for SET, GET */ /* for SET, GET */
struct { struct {
@@ -290,6 +317,10 @@ struct perf_ctrl_desc {
#define UTI_FLAG_HIGH_PRIORITY (1ULL<<12) #define UTI_FLAG_HIGH_PRIORITY (1ULL<<12)
#define UTI_FLAG_NON_COOPERATIVE (1ULL<<13) #define UTI_FLAG_NON_COOPERATIVE (1ULL<<13)
#define UTI_FLAG_PREFER_LWK (1ULL << 14)
#define UTI_FLAG_PREFER_FWK (1ULL << 15)
#define UTI_FLAG_FABRIC_INTR_AFFINITY (1ULL << 16)
/* Linux default value is used */ /* Linux default value is used */
#define UTI_MAX_NUMA_DOMAINS (1024) #define UTI_MAX_NUMA_DOMAINS (1024)
@@ -308,6 +339,30 @@ struct kuti_attr {
struct uti_attr_desc { struct uti_attr_desc {
unsigned long phys_attr; unsigned long phys_attr;
char *uti_cpu_set_str; /* UTI_CPU_SET environmental variable */
size_t uti_cpu_set_len;
};
struct uti_ctx {
union {
char ctx[4096]; /* TODO: Get the size from config.h */
struct {
int uti_refill_tid;
};
};
};
struct uti_get_ctx_desc {
unsigned long rp_rctx; /* Remote physical address of remote context */
void *rctx; /* Remote context */
void *lctx; /* Local context */
int uti_refill_tid;
unsigned long key; /* OUT: struct task_struct* of mcexec thread, used to search struct host_thread */
};
struct uti_save_fs_desc {
void *rctx; /* Remote context */
void *lctx; /* Local context */
}; };
#endif #endif

31
executer/include/uti.h Normal file
View File

@@ -0,0 +1,31 @@
#ifndef UTI_H_INCLUDED
#define UTI_H_INCLUDED
struct syscall_struct {
int number;
unsigned long args[6];
unsigned long ret;
unsigned long uti_clv; /* copy of a clv in McKernel */
};
#define UTI_SZ_SYSCALL_STACK 16
/* Variables accessed by mcexec.c and syscall_intercept.c */
struct uti_desc {
char lctx[4096]; /* TODO: Get the size from config.h */
char rctx[4096]; /* TODO: Get the size from config.h */
int mck_tid; /* TODO: Move this out for multiple migrated-to-Linux threads */
unsigned long key; /* struct task_struct* of mcexec thread, used to search struct host_thread */
int pid, tid; /* Used as the id of tracee when issuing MCEXEC_UP_TERMINATE_THREAD */
unsigned long uti_clv; /* copy of McKernel clv */
int fd; /* /dev/mcosX */
struct syscall_struct syscall_stack[UTI_SZ_SYSCALL_STACK]; /* stack of system call arguments and return values */
int syscall_stack_top; /* stack-pointer of syscall arguments list */
long syscalls[512], syscalls2[512]; /* Syscall profile counters */
int start_syscall_intercept; /* Used to sync between mcexec.c and syscall_intercept.c */
};
#endif

View File

@@ -1,6 +1,7 @@
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */ /* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <linux/version.h> #include <linux/version.h>
#include <linux/mm_types.h> #include <linux/mm_types.h>
#include <linux/kallsyms.h>
#include <asm/vdso.h> #include <asm/vdso.h>
#include "../../../config.h" #include "../../../config.h"
#include "../../mcctrl.h" #include "../../mcctrl.h"
@@ -17,29 +18,31 @@
#define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__) #define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__)
#ifdef MCCTRL_KSYM_vdso_start #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
# if MCCTRL_KSYM_vdso_start void *vdso_start;
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start; void *vdso_end;
# endif static struct vm_special_mapping (*vdso_spec)[2];
#else
# error missing address of vdso_start.
#endif #endif
#ifdef MCCTRL_KSYM_vdso_end int arch_symbols_init(void)
# if MCCTRL_KSYM_vdso_end {
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
# endif vdso_start = (void *) kallsyms_lookup_name("vdso_start");
#else if (WARN_ON(!vdso_start))
# error missing address of vdso_end. return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_spec = (void *) kallsyms_lookup_name("vdso_spec");
if (WARN_ON(!vdso_spec))
return -EFAULT;
#endif #endif
#ifdef MCCTRL_KSYM_vdso_spec return 0;
# if MCCTRL_KSYM_vdso_spec }
static struct vm_special_mapping (*vdso_spec)[2] = (void*)MCCTRL_KSYM_vdso_spec;
# endif
#else
# error missing address of vdso_spec.
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52 #ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 1 #define VDSO_MAXPAGES 1

View File

@@ -1,5 +1,6 @@
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */ /* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <linux/version.h> #include <linux/version.h>
#include <linux/kallsyms.h>
#include "../../../config.h" #include "../../../config.h"
#include "../../mcctrl.h" #include "../../mcctrl.h"
@@ -13,57 +14,46 @@
#endif #endif
#endif /* POSTK_DEBUG_ARCH_DEP_83 */ #endif /* POSTK_DEBUG_ARCH_DEP_83 */
#ifdef MCCTRL_KSYM_vdso_image_64 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
#if MCCTRL_KSYM_vdso_image_64 static struct vdso_image *vdso_image_64;
struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64; #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
static void *vdso_start;
static void *vdso_end;
static struct page **vdso_pages;
#endif #endif
static void *__vvar_page;
static long *hpet_address;
static void **hv_clock;
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
vdso_image_64 = (void *) kallsyms_lookup_name("vdso_image_64");
if (WARN_ON(!vdso_image_64))
return -EFAULT;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_pages = (void *) kallsyms_lookup_name("vdso_pages");
if (WARN_ON(!vdso_pages))
return -EFAULT;
#endif #endif
#ifdef MCCTRL_KSYM_vdso_start __vvar_page = (void *) kallsyms_lookup_name("__vvar_page");
#if MCCTRL_KSYM_vdso_start if (WARN_ON(!__vvar_page))
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start; return -EFAULT;
#endif
#endif
#ifdef MCCTRL_KSYM_vdso_end hpet_address = (void *) kallsyms_lookup_name("hpet_address");
#if MCCTRL_KSYM_vdso_end hv_clock = (void *) kallsyms_lookup_name("hv_clock");
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end; return 0;
#endif }
#endif
#ifdef MCCTRL_KSYM_vdso_pages
#if MCCTRL_KSYM_vdso_pages
struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages;
#endif
#endif
#ifdef MCCTRL_KSYM___vvar_page
#if MCCTRL_KSYM___vvar_page
void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page;
#endif
#endif
long *hpet_addressp
#ifdef MCCTRL_KSYM_hpet_address
#if MCCTRL_KSYM_hpet_address
= (void *)MCCTRL_KSYM_hpet_address;
#else
= &hpet_address;
#endif
#else
= NULL;
#endif
void **hv_clockp
#ifdef MCCTRL_KSYM_hv_clock
#if MCCTRL_KSYM_hv_clock
= (void *)MCCTRL_KSYM_hv_clock;
#else
= &hv_clock;
#endif
#else
= NULL;
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52 #ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 2 #define VDSO_MAXPAGES 2
@@ -138,7 +128,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
/* VDSO pages */ /* VDSO pages */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
size = vdso_image->size; size = vdso_image_64->size;
vdso->vdso_npages = size >> PAGE_SHIFT; vdso->vdso_npages = size >> PAGE_SHIFT;
if (vdso->vdso_npages > VDSO_MAXPAGES) { if (vdso->vdso_npages > VDSO_MAXPAGES) {
@@ -148,7 +138,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
for (i = 0; i < vdso->vdso_npages; ++i) { for (i = 0; i < vdso->vdso_npages; ++i) {
vdso->vdso_physlist[i] = virt_to_phys( vdso->vdso_physlist[i] = virt_to_phys(
vdso_image->data + (i * PAGE_SIZE)); vdso_image_64->data + (i * PAGE_SIZE));
} }
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
size = vdso_end - vdso_start; size = vdso_end - vdso_start;
@@ -185,36 +175,36 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
#endif #endif
/* HPET page */ /* HPET page */
if (hpet_addressp && *hpet_addressp) { if (hpet_address && *hpet_address) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->hpet_is_global = 0; vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE); vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp; vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
vdso->hpet_is_global = 0; vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE); vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp; vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
vdso->hpet_is_global = 0; vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE); vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp; vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
vdso->hpet_is_global = 1; vdso->hpet_is_global = 1;
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET); vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
vdso->hpet_phys = *hpet_addressp; vdso->hpet_phys = *hpet_address;
#endif #endif
} }
/* struct pvlock_vcpu_time_info table */ /* struct pvlock_vcpu_time_info table */
if (hv_clockp && *hv_clockp) { if (hv_clock && *hv_clock) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->pvti_is_global = 0; vdso->pvti_is_global = 0;
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE); vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
vdso->pvti_phys = virt_to_phys(*hv_clockp); vdso->pvti_phys = virt_to_phys(*hv_clock);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
vdso->pvti_is_global = 1; vdso->pvti_is_global = 1;
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN); vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
vdso->pvti_phys = virt_to_phys(*hv_clockp); vdso->pvti_phys = virt_to_phys(*hv_clock);
#endif #endif
} }
@@ -289,6 +279,14 @@ get_fs_ctx(void *ctx)
return tctx->fs; return tctx->fs;
} }
unsigned long
get_rsp_ctx(void *ctx)
{
struct trans_uctx *tctx = ctx;
return tctx->rsp;
}
#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ #ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */
int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
unsigned long *rpap, unsigned long *pgsizep) unsigned long *rpap, unsigned long *pgsizep)

View File

@@ -125,7 +125,6 @@ static int load_elf(struct linux_binprm *bprm
for(i = 0, st = 0; mode != 2;){ for(i = 0, st = 0; mode != 2;){
if(st == 0){ if(st == 0){
off = p & ~PAGE_MASK; off = p & ~PAGE_MASK;
#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
rc = get_user_pages_remote(current, bprm->mm, rc = get_user_pages_remote(current, bprm->mm,
bprm->p, 1, FOLL_FORCE, &page, NULL, NULL); bprm->p, 1, FOLL_FORCE, &page, NULL, NULL);
@@ -141,17 +140,6 @@ static int load_elf(struct linux_binprm *bprm
bprm->p, 1, 0, 1, bprm->p, 1, 0, 1,
&page, NULL); &page, NULL);
#endif #endif
#else /* POSTK_DEBUG_ARCH_DEP_41 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
rc = get_user_pages_remote(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
#else
rc = get_user_pages(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
#endif
#endif /* POSTK_DEBUG_ARCH_DEP_41 */
if(rc <= 0) { if(rc <= 0) {
kfree(pbuf); kfree(pbuf);
return -EFAULT; return -EFAULT;

File diff suppressed because it is too large Load Diff

View File

@@ -28,6 +28,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/kallsyms.h>
#include "mcctrl.h" #include "mcctrl.h"
#include <ihk/ihk_host_user.h> #include <ihk/ihk_host_user.h>
@@ -43,8 +44,6 @@ extern void mcctrl_syscall_init(void);
extern void procfs_init(int); extern void procfs_init(int);
extern void procfs_exit(int); extern void procfs_exit(int);
extern void rus_page_hash_init(void);
extern void rus_page_hash_put_pages(void);
extern void uti_attr_finalize(void); extern void uti_attr_finalize(void);
extern void binfmt_mcexec_init(void); extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void); extern void binfmt_mcexec_exit(void);
@@ -84,13 +83,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTIL_THREAD1, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_UTI_GET_CTX, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTIL_THREAD2, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_UTI_SAVE_FS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SIG_THREAD, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SIG_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SYSCALL_THREAD, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYSCALL_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_TERMINATE_THREAD, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_TERMINATE_THREAD, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_NUM_POOL_THREADS, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_GET_NUM_POOL_THREADS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_UTI_ATTR, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_UTI_ATTR, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_RELEASE_USER_SPACE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl },
{ .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl },
@@ -178,6 +178,7 @@ int mcctrl_os_shutdown_notifier(int os_index)
mdelay(200); mdelay(200);
} }
pager_cleanup();
sysfsm_cleanup(os[os_index]); sysfsm_cleanup(os[os_index]);
free_topology_info(os[os_index]); free_topology_info(os[os_index]);
ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index); ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index);
@@ -185,9 +186,6 @@ int mcctrl_os_shutdown_notifier(int os_index)
destroy_ikc_channels(os[os_index]); destroy_ikc_channels(os[os_index]);
procfs_exit(os_index); procfs_exit(os_index);
} }
#ifdef POSTK_DEBUG_TEMP_FIX_35 /* in shutdown phase, rus_page_hash_put_pages() call added. */
rus_page_hash_put_pages();
#endif /* POSTK_DEBUG_TEMP_FIX_35 */
os[os_index] = NULL; os[os_index] = NULL;
@@ -214,6 +212,68 @@ static struct ihk_os_notifier mcctrl_os_notifier = {
.ops = &mcctrl_os_notifier_ops, .ops = &mcctrl_os_notifier_ops,
}; };
int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
int (*mcctrl_sys_umount)(char *dir_name, int flags);
int (*mcctrl_sys_unshare)(unsigned long unshare_flags);
long (*mcctrl_sched_setaffinity)(pid_t pid, const struct cpumask *in_mask);
int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, int policy,
const struct sched_param *param);
ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz);
void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start,
unsigned long size,
struct zap_details *details);
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount");
if (WARN_ON(!mcctrl_sys_mount))
return -EFAULT;
mcctrl_sys_umount = (void *) kallsyms_lookup_name("sys_umount");
if (WARN_ON(!mcctrl_sys_umount))
return -EFAULT;
mcctrl_sys_unshare = (void *) kallsyms_lookup_name("sys_unshare");
if (WARN_ON(!mcctrl_sys_unshare))
return -EFAULT;
mcctrl_sched_setaffinity =
(void *) kallsyms_lookup_name("sched_setaffinity");
if (WARN_ON(!mcctrl_sched_setaffinity))
return -EFAULT;
mcctrl_sched_setscheduler_nocheck =
(void *) kallsyms_lookup_name("sched_setscheduler_nocheck");
if (WARN_ON(!mcctrl_sched_setscheduler_nocheck))
return -EFAULT;
mcctrl_sys_readlink =
(void *) kallsyms_lookup_name("sys_readlink");
if (WARN_ON(!mcctrl_sys_readlink))
return -EFAULT;
mcctrl_zap_page_range =
(void *) kallsyms_lookup_name("zap_page_range");
if (WARN_ON(!mcctrl_zap_page_range))
return -EFAULT;
mcctrl_hugetlbfs_inode_operations =
(void *) kallsyms_lookup_name("hugetlbfs_inode_operations");
if (WARN_ON(!mcctrl_hugetlbfs_inode_operations))
return -EFAULT;
return arch_symbols_init();
}
static int __init mcctrl_init(void) static int __init mcctrl_init(void)
{ {
int ret = 0; int ret = 0;
@@ -227,10 +287,11 @@ static int __init mcctrl_init(void)
os[i] = NULL; os[i] = NULL;
} }
rus_page_hash_init();
binfmt_mcexec_init(); binfmt_mcexec_init();
if ((ret = symbols_init()))
goto error;
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) { if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
printk("mcctrl: error: registering OS notifier\n"); printk("mcctrl: error: registering OS notifier\n");
goto error; goto error;
@@ -241,7 +302,6 @@ static int __init mcctrl_init(void)
error: error:
binfmt_mcexec_exit(); binfmt_mcexec_exit();
rus_page_hash_put_pages();
return ret; return ret;
} }
@@ -253,7 +313,6 @@ static void __exit mcctrl_exit(void)
} }
binfmt_mcexec_exit(); binfmt_mcexec_exit();
rus_page_hash_put_pages();
uti_attr_finalize(); uti_attr_finalize();
printk("mcctrl: unregistered.\n"); printk("mcctrl: unregistered.\n");

View File

@@ -52,6 +52,8 @@
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet); int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
void sig_done(unsigned long arg, int err); void sig_done(unsigned long arg, int err);
void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet);
void mcctrl_futex_wake(struct ikc_scd_packet *pisp);
void mcctrl_os_read_write_cpu_response(ihk_os_t os, void mcctrl_os_read_write_cpu_response(ihk_os_t os,
struct ikc_scd_packet *pisp); struct ikc_scd_packet *pisp);
void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp); void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp);
@@ -154,7 +156,7 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags); spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_add(&desc->chain, &usrdata->wakeup_descs_list); list_add(&desc->chain, &usrdata->wakeup_descs_list);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags); spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
if (free_addrs_count) if (do_frees)
*do_frees = 0; *do_frees = 0;
return ret < 0 ? ret : -ETIME; return ret < 0 ? ret : -ETIME;
} }
@@ -182,6 +184,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PREPARE_PROCESS_ACKED: case SCD_MSG_PREPARE_PROCESS_ACKED:
case SCD_MSG_PERF_ACK: case SCD_MSG_PERF_ACK:
case SCD_MSG_SEND_SIGNAL_ACK: case SCD_MSG_SEND_SIGNAL_ACK:
case SCD_MSG_PROCFS_ANSWER:
mcctrl_wakeup_cb(__os, pisp); mcctrl_wakeup_cb(__os, pisp);
break; break;
@@ -189,11 +192,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
mcexec_syscall(usrdata, pisp); mcexec_syscall(usrdata, pisp);
break; break;
case SCD_MSG_PROCFS_ANSWER:
procfs_answer(usrdata, pisp->pid);
break;
case SCD_MSG_SYSFS_REQ_CREATE: case SCD_MSG_SYSFS_REQ_CREATE:
case SCD_MSG_SYSFS_REQ_MKDIR: case SCD_MSG_SYSFS_REQ_MKDIR:
case SCD_MSG_SYSFS_REQ_SYMLINK: case SCD_MSG_SYSFS_REQ_SYMLINK:
@@ -209,7 +207,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PROCFS_TID_CREATE: case SCD_MSG_PROCFS_TID_CREATE:
case SCD_MSG_PROCFS_TID_DELETE: case SCD_MSG_PROCFS_TID_DELETE:
procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg); procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg,
pisp->resp_pa);
break; break;
case SCD_MSG_GET_VDSO_INFO: case SCD_MSG_GET_VDSO_INFO:
@@ -225,6 +224,10 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
mcctrl_eventfd(__os, pisp); mcctrl_eventfd(__os, pisp);
break; break;
case SCD_MSG_FUTEX_WAKE:
mcctrl_futex_wake(pisp);
break;
default: default:
printk(KERN_ERR "mcctrl:syscall_packet_handler:" printk(KERN_ERR "mcctrl:syscall_packet_handler:"
"unknown message (%d.%d.%d.%d.%d.%#lx)\n", "unknown message (%d.%d.%d.%d.%d.%#lx)\n",

View File

@@ -67,6 +67,7 @@
#define SCD_MSG_PROCFS_DELETE 0x11 #define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12 #define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13 #define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_PROCFS_RELEASE 0x15
#define SCD_MSG_DEBUG_LOG 0x20 #define SCD_MSG_DEBUG_LOG 0x20
@@ -101,23 +102,18 @@
#define SCD_MSG_CPU_RW_REG 0x52 #define SCD_MSG_CPU_RW_REG 0x52
#define SCD_MSG_CPU_RW_REG_RESP 0x53 #define SCD_MSG_CPU_RW_REG_RESP 0x53
#define SCD_MSG_FUTEX_WAKE 0x60
#define DMA_PIN_SHIFT 21 #define DMA_PIN_SHIFT 21
#define DO_USER_MODE #define DO_USER_MODE
#define __NR_coredump 999 #define __NR_coredump 999
#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */
struct coretable { struct coretable {
loff_t len; loff_t len;
unsigned long addr; unsigned long addr;
}; };
#else /* POSTK_DEBUG_TEMP_FIX_61 */
struct coretable {
int len;
unsigned long addr;
};
#endif /* POSTK_DEBUG_TEMP_FIX_61 */
enum mcctrl_os_cpu_operation { enum mcctrl_os_cpu_operation {
MCCTRL_OS_CPU_READ_REGISTER, MCCTRL_OS_CPU_READ_REGISTER,
@@ -125,6 +121,12 @@ enum mcctrl_os_cpu_operation {
MCCTRL_OS_CPU_MAX_OP MCCTRL_OS_CPU_MAX_OP
}; };
/* Used to wake-up a Linux thread futex_wait()-ing */
struct uti_futex_resp {
int done;
wait_queue_head_t wq;
};
struct ikc_scd_packet { struct ikc_scd_packet {
int msg; int msg;
int err; int err;
@@ -147,7 +149,7 @@ struct ikc_scd_packet {
long sysfs_arg3; long sysfs_arg3;
}; };
/* SCD_MSG_SCHEDULE_THREAD */ /* SCD_MSG_WAKE_UP_SYSCALL_THREAD */
struct { struct {
int ttid; int ttid;
}; };
@@ -163,6 +165,12 @@ struct ikc_scd_packet {
struct { struct {
int eventfd_type; int eventfd_type;
}; };
/* SCD_MSG_FUTEX_WAKE */
struct {
void *resp;
int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */
} futex;
}; };
char padding[8]; char padding[8];
}; };
@@ -213,9 +221,12 @@ struct mcctrl_channel {
}; };
struct mcctrl_per_thread_data { struct mcctrl_per_thread_data {
struct mcctrl_per_proc_data *ppd;
struct list_head hash; struct list_head hash;
struct task_struct *task; struct task_struct *task;
void *data; void *data;
int tid; /* debug */
atomic_t refcount;
}; };
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8 #define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
@@ -315,6 +326,7 @@ struct mcctrl_part_exec {
struct mutex lock; struct mutex lock;
int nr_processes; int nr_processes;
int nr_processes_left; int nr_processes_left;
int process_rank;
cpumask_t cpus_used; cpumask_t cpus_used;
struct list_head pli_list; struct list_head pli_list;
}; };
@@ -400,10 +412,30 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
ihk_os_t osnum_to_os(int n); ihk_os_t osnum_to_os(int n);
/* look up symbols, plus arch-specific ones */
extern int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
extern int (*mcctrl_sys_umount)(char *dir_name, int flags);
extern int (*mcctrl_sys_unshare)(unsigned long unshare_flags);
extern long (*mcctrl_sched_setaffinity)(pid_t pid,
const struct cpumask *in_mask);
extern int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p,
int policy,
const struct sched_param *param);
extern ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz);
extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start,
unsigned long size,
struct zap_details *details);
extern struct inode_operations *mcctrl_hugetlbfs_inode_operations;
/* syscall.c */ /* syscall.c */
void pager_add_process(void); void pager_add_process(void);
void pager_remove_process(struct mcctrl_per_proc_data *ppd); void pager_remove_process(struct mcctrl_per_proc_data *ppd);
void pager_cleanup(void);
int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet); int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid, int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
struct mcctrl_per_proc_data *ppd); struct mcctrl_per_proc_data *ppd);
@@ -412,20 +444,18 @@ struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
struct mcctrl_usrdata *ud, int pid); struct mcctrl_usrdata *ud, int pid);
void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd); void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd);
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data *ppd, void *data);
struct task_struct *task, void *data); void mcctrl_put_per_thread_data_unsafe(struct mcctrl_per_thread_data *ptd);
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, void mcctrl_put_per_thread_data(struct mcctrl_per_thread_data* ptd);
struct task_struct *task);
#ifdef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */ #ifdef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */
static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task)
struct mcctrl_per_proc_data *ppd, struct task_struct *task)
{ {
struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL;
int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK);
unsigned long flags; unsigned long flags;
/* Check if data for this thread exists and return it */ /* Check if data for this thread exists */
read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags);
list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) {
if (ptd_iter->task == task) { if (ptd_iter->task == task) {
@@ -434,16 +464,27 @@ static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
} }
} }
read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); if (ptd) {
return ptd ? ptd->data : NULL; if (atomic_read(&ptd->refcount) <= 0) {
printk("%s: ERROR: use-after-free detected (%d)", __FUNCTION__, atomic_read(&ptd->refcount));
ptd = NULL;
goto out;
}
atomic_inc(&ptd->refcount);
}
out:
write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags);
return ptd;
} }
#else /* POSTK_DEBUG_ARCH_DEP_56 */ #else /* POSTK_DEBUG_ARCH_DEP_56 */
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task);
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
#endif /* POSTK_DEBUG_ARCH_DEP_56 */ #endif /* POSTK_DEBUG_ARCH_DEP_56 */
int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len);
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid); long ret, int stid);
int clear_pte_range(uintptr_t start, uintptr_t len);
int mcctrl_os_alive(void); int mcctrl_os_alive(void);
@@ -455,7 +496,6 @@ struct procfs_read {
int count; /* bytes to read (request) */ int count; /* bytes to read (request) */
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/ int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
int ret; /* read bytes (answer) */ int ret; /* read bytes (answer) */
int status; /* non-zero if done (answer) */
int newcpu; /* migrated new cpu (answer) */ int newcpu; /* migrated new cpu (answer) */
int readwrite; /* 0:read, 1:write */ int readwrite; /* 0:read, 1:write */
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
@@ -468,7 +508,8 @@ struct procfs_file {
}; };
void procfs_answer(struct mcctrl_usrdata *ud, int pid); void procfs_answer(struct mcctrl_usrdata *ud, int pid);
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg); int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg,
unsigned long resp_pa);
void add_tid_entry(int osnum, int pid, int tid); void add_tid_entry(int osnum, int pid, int tid);
void add_pid_entry(int osnum, int pid); void add_pid_entry(int osnum, int pid);
void delete_tid_entry(int osnum, int pid, int tid); void delete_tid_entry(int osnum, int pid, int tid);
@@ -504,7 +545,9 @@ struct vdso {
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
unsigned long *endp); unsigned long *endp);
int release_user_space(uintptr_t start, uintptr_t len);
void get_vdso_info(ihk_os_t os, long vdso_pa); void get_vdso_info(ihk_os_t os, long vdso_pa);
int arch_symbols_init(void);
struct get_cpu_mapping_req { struct get_cpu_mapping_req {
int busy; /* INOUT: */ int busy; /* INOUT: */

View File

@@ -103,33 +103,6 @@ getpath(struct procfs_list_entry *e, char *buf, int bufsize)
} }
} }
/**
* \brief Process SCD_MSG_PROCFS_ANSWER message.
*
* \param ud mcctrl_usrdata pointer
* \param pid PID of the requesting process
*/
void procfs_answer(struct mcctrl_usrdata *ud, int pid)
{
struct mcctrl_per_proc_data *ppd = NULL;
if (pid > 0) {
ppd = mcctrl_get_per_proc_data(ud, pid);
if (unlikely(!ppd)) {
kprintf("%s: ERROR: no per-process structure for PID %d\n",
__FUNCTION__, pid);
return;
}
}
wake_up_all(pid > 0 ? &ppd->wq_procfs : &ud->wq_procfs);
if (pid > 0) {
mcctrl_put_per_proc_data(ppd);
}
}
static struct procfs_list_entry * static struct procfs_list_entry *
find_procfs_entry(struct procfs_list_entry *parent, const char *name) find_procfs_entry(struct procfs_list_entry *parent, const char *name)
{ {
@@ -321,6 +294,8 @@ get_base_entry(int osnum)
if(!e){ if(!e){
e = add_procfs_entry(NULL, name, S_IFDIR | 0555, e = add_procfs_entry(NULL, name, S_IFDIR | 0555,
uid, gid, NULL); uid, gid, NULL);
if (!e)
return NULL;
e->osnum = osnum; e->osnum = osnum;
} }
return e; return e;
@@ -456,6 +431,8 @@ proc_exe_link(int osnum, int pid, const char *path)
e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid, e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid,
path); path);
if (!e)
goto out;
e->data = kmalloc(strlen(path) + 1, GFP_KERNEL); e->data = kmalloc(strlen(path) + 1, GFP_KERNEL);
strcpy(e->data, path); strcpy(e->data, path);
task = find_procfs_entry(parent, "task"); task = find_procfs_entry(parent, "task");
@@ -464,6 +441,7 @@ proc_exe_link(int osnum, int pid, const char *path)
uid, gid, path); uid, gid, path);
} }
} }
out:
up(&procfs_file_list_lock); up(&procfs_file_list_lock);
} }
@@ -509,7 +487,6 @@ procfs_exit(int osnum)
* This function conforms to the 2) way of fs/proc/generic.c * This function conforms to the 2) way of fs/proc/generic.c
* from linux-2.6.39.4. * from linux-2.6.39.4.
*/ */
#ifdef POSTK_DEBUG_TEMP_FIX_43 /* Fixed an issue that failed pread / pwrite of size larger than 4MB */
static ssize_t __mckernel_procfs_read_write( static ssize_t __mckernel_procfs_read_write(
struct file *file, struct file *file,
char __user *buf, size_t nbytes, char __user *buf, size_t nbytes,
@@ -520,7 +497,7 @@ static ssize_t __mckernel_procfs_read_write(
int order = 0; int order = 0;
volatile struct procfs_read *r = NULL; volatile struct procfs_read *r = NULL;
struct ikc_scd_packet isp; struct ikc_scd_packet isp;
int ret, osnum, pid, retw; int ret, osnum, pid;
unsigned long pbuf; unsigned long pbuf;
size_t count = nbytes; size_t count = nbytes;
size_t copy_size = 0; size_t copy_size = 0;
@@ -615,11 +592,11 @@ static ssize_t __mckernel_procfs_read_write(
while (count > 0) { while (count > 0) {
int this_len = min_t(ssize_t, count, copy_size); int this_len = min_t(ssize_t, count, copy_size);
int do_free;
r->pbuf = pbuf; r->pbuf = pbuf;
r->eof = 0; r->eof = 0;
r->ret = -EIO; /* default */ r->ret = -EIO; /* default */
r->status = 0;
r->offset = offset; r->offset = offset;
r->count = this_len; r->count = this_len;
r->readwrite = read_write; r->readwrite = read_write;
@@ -629,50 +606,26 @@ static ssize_t __mckernel_procfs_read_write(
isp.arg = virt_to_phys(r); isp.arg = virt_to_phys(r);
isp.pid = pid; isp.pid = pid;
ret = mcctrl_ikc_send(osnum_to_os(e->osnum), ret = mcctrl_ikc_send_wait(osnum_to_os(e->osnum),
(pid > 0) ? ppd->ikc_target_cpu : 0, &isp); (pid > 0) ? ppd->ikc_target_cpu : 0,
&isp, HZ, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
}
if (ret < 0) { if (ret < 0) {
goto out; /* error */ if (ret == -ETIME) {
} pr_info("%s: error: timeout (1 sec)\n",
__func__);
/* Wait for a reply. */ }
ret = -EIO; /* default exit code */ else if (ret == -ERESTARTSYS) {
dprintk("%s: waiting for reply\n", __FUNCTION__); ret = -ERESTART;
}
retry_wait: if (!do_free)
/* Wait for the status field of the procfs_read structure, r = NULL;
* wait on per-process or OS specific data depending on
* who the request is for.
*/
if (pid > 0) {
retw = wait_event_interruptible_timeout(ppd->wq_procfs,
r->status != 0, HZ);
}
else {
retw = wait_event_interruptible_timeout(udp->wq_procfs,
r->status != 0, HZ);
}
/* Timeout? */
if (retw == 0 && r->status == 0) {
printk("%s: error: timeout (1 sec)\n", __FUNCTION__);
goto out; goto out;
} }
/* Interrupted? */
else if (retw == -ERESTARTSYS) {
ret = -ERESTART;
goto out;
}
/* Were we woken up by a reply to another procfs request? */
else if (r->status == 0) {
/* TODO: r->status is not set atomically, we could be woken
* up with status == 0 and it could change to 1 while in this
* code, we could potentially miss the wake_up()...
*/
printk("%s: stale wake-up, retrying\n", __FUNCTION__);
goto retry_wait;
}
/* Wake up and check the result. */ /* Wake up and check the result. */
dprintk("%s: woke up. ret: %d, eof: %d\n", dprintk("%s: woke up. ret: %d, eof: %d\n",
@@ -717,193 +670,6 @@ out:
return ret; return ret;
} }
#else /* POSTK_DEBUG_TEMP_FIX_43 */
static ssize_t __mckernel_procfs_read_write(
struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos, int read_write)
{
struct inode * inode = file->f_inode;
char *kern_buffer = NULL;
int order = 0;
volatile struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
int ret, osnum, pid, retw;
unsigned long pbuf;
unsigned long count = nbytes;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
loff_t offset = *ppos;
char pathbuf[PROCFS_NAME_MAX];
char *path, *p;
ihk_os_t os = NULL;
struct mcctrl_usrdata *udp = NULL;
struct mcctrl_per_proc_data *ppd = NULL;
if (count <= 0 || offset < 0) {
return 0;
}
path = getpath(e, pathbuf, PROCFS_NAME_MAX);
dprintk("%s: invoked for %s, offset: %lu, count: %lu\n",
__FUNCTION__, path,
(unsigned long)offset, count);
/* Verify OS number */
ret = sscanf(path, "mcos%d/", &osnum);
if (ret != 1) {
printk("%s: error: couldn't determine OS number\n", __FUNCTION__);
return -EINVAL;
}
if (osnum != e->osnum) {
printk("%s: error: OS numbers don't match\n", __FUNCTION__);
return -EINVAL;
}
/* Is this request for a specific process? */
p = strchr(path, '/') + 1;
ret = sscanf(p, "%d/", &pid);
if (ret != 1) {
pid = -1;
}
os = osnum_to_os(osnum);
if (!os) {
printk("%s: error: no IHK OS data found for OS %d\n",
__FUNCTION__, osnum);
return -EINVAL;
}
udp = ihk_host_os_get_usrdata(os);
if (!udp) {
printk("%s: error: no MCCTRL data found for OS %d\n",
__FUNCTION__, osnum);
return -EINVAL;
}
if (pid > 0) {
ppd = mcctrl_get_per_proc_data(udp, pid);
if (unlikely(!ppd)) {
printk("%s: error: no per-process structure for PID %d",
__FUNCTION__, pid);
return -EINVAL;
}
}
while ((1 << order) < count) ++order;
if (order > 12) {
order -= 12;
}
else {
order = 1;
}
/* NOTE: we need physically contigous memory to pass through IKC */
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
if (!kern_buffer) {
printk("%s: ERROR: allocating kernel buffer\n", __FUNCTION__);
ret = -ENOMEM;
goto out;
}
pbuf = virt_to_phys(kern_buffer);
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
ret = -ENOMEM;
goto out;
}
r->pbuf = pbuf;
r->eof = 0;
r->ret = -EIO; /* default */
r->status = 0;
r->offset = offset;
r->count = count;
r->readwrite = read_write;
strncpy((char *)r->fname, path, PROCFS_NAME_MAX);
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = 0;
isp.arg = virt_to_phys(r);
isp.pid = pid;
ret = mcctrl_ikc_send(osnum_to_os(e->osnum),
(pid > 0) ? ppd->ikc_target_cpu : 0, &isp);
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("%s: waiting for reply\n", __FUNCTION__);
retry_wait:
/* Wait for the status field of the procfs_read structure,
* wait on per-process or OS specific data depending on
* who the request is for.
*/
if (pid > 0) {
retw = wait_event_interruptible_timeout(ppd->wq_procfs,
r->status != 0, 5 * HZ);
}
else {
retw = wait_event_interruptible_timeout(udp->wq_procfs,
r->status != 0, 5 * HZ);
}
/* Timeout? */
if (retw == 0 && r->status == 0) {
printk("%s: error: timeout (1 sec)\n", __FUNCTION__);
goto out;
}
/* Interrupted? */
else if (retw == -ERESTARTSYS) {
ret = -ERESTART;
goto out;
}
/* Were we woken up by a reply to another procfs request? */
else if (r->status == 0) {
/* TODO: r->status is not set atomically, we could be woken
* up with status == 0 and it could change to 1 while in this
* code, we could potentially miss the wake_up()...
*/
printk("%s: stale wake-up, retrying\n", __FUNCTION__);
goto retry_wait;
}
/* Wake up and check the result. */
dprintk("%s: woke up. ret: %d, eof: %d\n",
__FUNCTION__, r->ret, r->eof);
if (r->ret > 0) {
if (read_write == 0) {
if (copy_to_user(buf, kern_buffer, r->ret)) {
printk("%s: ERROR: copy_to_user failed.\n", __FUNCTION__);
ret = -EFAULT;
goto out;
}
}
*ppos += r->ret;
}
ret = r->ret;
out:
if (ppd)
mcctrl_put_per_proc_data(ppd);
if (kern_buffer)
free_pages((uintptr_t)kern_buffer, order);
if (r)
kfree((void *)r);
return ret;
}
#endif /* POSTK_DEBUG_TEMP_FIX_43 */
static ssize_t mckernel_procfs_read(struct file *file, static ssize_t mckernel_procfs_read(struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos) char __user *buf, size_t nbytes, loff_t *ppos)
@@ -939,33 +705,48 @@ struct procfs_work {
int msg; int msg;
int pid; int pid;
unsigned long arg; unsigned long arg;
unsigned long resp_pa;
struct work_struct work; struct work_struct work;
}; };
static void procfsm_work_main(struct work_struct *work0) static void procfsm_work_main(struct work_struct *work0)
{ {
struct procfs_work *work = container_of(work0, struct procfs_work, work); struct procfs_work *work = container_of(work0, struct procfs_work, work);
unsigned long phys;
int *done;
switch (work->msg) { switch (work->msg) {
case SCD_MSG_PROCFS_TID_CREATE: case SCD_MSG_PROCFS_TID_CREATE:
add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg); add_tid_entry(ihk_host_os_get_index(work->os),
break; work->pid, work->arg);
phys = ihk_device_map_memory(ihk_os_to_dev(work->os),
work->resp_pa, sizeof(int));
done = ihk_device_map_virtual(ihk_os_to_dev(work->os),
phys, sizeof(int), NULL, 0);
*done = 1;
ihk_device_unmap_virtual(ihk_os_to_dev(work->os),
done, sizeof(int));
ihk_device_unmap_memory(ihk_os_to_dev(work->os),
phys, sizeof(int));
break;
case SCD_MSG_PROCFS_TID_DELETE: case SCD_MSG_PROCFS_TID_DELETE:
delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg); delete_tid_entry(ihk_host_os_get_index(work->os),
break; work->pid, work->arg);
break;
default: default:
printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n", pr_warn("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
__FUNCTION__, work->msg, work->pid, work->arg); __func__, work->msg, work->pid, work->arg);
break; break;
} }
kfree(work); kfree(work);
return; return;
} }
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg,
unsigned long resp_pa)
{ {
struct procfs_work *work = NULL; struct procfs_work *work = NULL;
@@ -979,6 +760,7 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
work->msg = msg; work->msg = msg;
work->pid = pid; work->pid = pid;
work->arg = arg; work->arg = arg;
work->resp_pa = resp_pa;
INIT_WORK(&work->work, &procfsm_work_main); INIT_WORK(&work->work, &procfsm_work_main);
schedule_work(&work->work); schedule_work(&work->work);
@@ -997,6 +779,303 @@ static const struct file_operations mckernel_forward = {
.write = mckernel_procfs_write, .write = mckernel_procfs_write,
}; };
#define PA_NULL (-1L)
struct mckernel_procfs_buffer_info {
unsigned long top_pa;
unsigned long cur_pa;
ihk_os_t os;
int pid;
char path[0];
};
struct mckernel_procfs_buffer {
unsigned long next_pa;
unsigned long pos;
unsigned long size;
char buf[0];
};
static int mckernel_procfs_buff_open(struct inode *inode, struct file *file)
{
struct mckernel_procfs_buffer_info *info;
int pid;
int ret;
char *path;
char *path_buf;
char *p;
ihk_os_t os;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
os = osnum_to_os(e->osnum);
if (!os) {
return -EINVAL;
}
path_buf = kmalloc(PROCFS_NAME_MAX, GFP_KERNEL);
if (!path_buf) {
return -ENOMEM;
}
path = getpath(e, path_buf, PROCFS_NAME_MAX);
p = strchr(path, '/') + 1;
ret = sscanf(p, "%d/", &pid);
if (ret != 1) {
pid = -1;
}
info = kmalloc(sizeof(struct mckernel_procfs_buffer_info) +
strlen(path) + 1, GFP_KERNEL);
if (!info) {
kfree(path_buf);
return -ENOMEM;
}
info->top_pa = PA_NULL;
info->cur_pa = PA_NULL;
info->os = os;
info->pid = pid;
strcpy(info->path, path);
file->private_data = info;
kfree(path_buf);
return 0;
}
static int mckernel_procfs_buff_release(struct inode *inode, struct file *file)
{
struct mckernel_procfs_buffer_info *info = file->private_data;
int rc = 0;
if (!info) {
return -EIO;
}
file->private_data = NULL;
if (info->top_pa != PA_NULL) {
int ret;
struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
int do_free;
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
rc = -ENOMEM;
goto out;
}
memset(r, '\0', sizeof(struct procfs_read));
r->pbuf = info->top_pa;
r->ret = -EIO; /* default */
r->fname[0] = '\0';
isp.msg = SCD_MSG_PROCFS_RELEASE;
isp.ref = 0;
isp.arg = virt_to_phys(r);
isp.pid = 0;
rc = -EIO;
ret = mcctrl_ikc_send_wait(info->os, 0,
&isp, 5 * HZ, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
}
if (ret < 0) {
rc = ret;
if (ret == -ETIME) {
pr_info("%s: error: timeout (1 sec)\n",
__func__);
}
else if (ret == -ERESTARTSYS) {
rc = -ERESTART;
}
if (!do_free)
r = NULL;
goto out;
}
if (r->ret < 0) {
rc = r->ret;
goto out;
}
rc = 0;
out:
if (r)
kfree((void *)r);
}
kfree(info);
return rc;
}
static ssize_t mckernel_procfs_buff_read(struct file *file, char __user *ubuf,
size_t nbytes, loff_t *ppos)
{
struct mckernel_procfs_buffer_info *info = file->private_data;
unsigned long phys;
struct mckernel_procfs_buffer *buf;
int pos = *ppos;
ssize_t l = 0;
int done = 0;
ihk_os_t os;
if (nbytes <= 0 || *ppos < 0) {
return 0;
}
if (!info) {
return -EIO;
}
os = info->os;
if (info->top_pa == PA_NULL) {
int ret;
int pid = info->pid;
struct procfs_read *r = NULL;
struct ikc_scd_packet isp;
struct mcctrl_usrdata *udp = NULL;
struct mcctrl_per_proc_data *ppd = NULL;
int do_free;
udp = ihk_host_os_get_usrdata(os);
if (!udp) {
pr_err("%s: no MCCTRL data found for OS\n",
__func__);
return -EINVAL;
}
if (pid > 0) {
ppd = mcctrl_get_per_proc_data(udp, pid);
if (unlikely(!ppd)) {
pr_err("%s: no per-process structure for PID %d",
__func__, pid);
return -EINVAL;
}
}
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
l = -ENOMEM;
done = 1;
goto out;
}
memset(r, '\0', sizeof(struct procfs_read));
r->pbuf = PA_NULL;
r->ret = -EIO; /* default */
strncpy((char *)r->fname, info->path, PROCFS_NAME_MAX);
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = 0;
isp.arg = virt_to_phys(r);
isp.pid = pid;
l = -EIO;
done = 1;
ret = mcctrl_ikc_send_wait(os,
(pid > 0) ? ppd->ikc_target_cpu : 0,
&isp, 5 * HZ, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
}
if (ret < 0) {
l = ret;
if (ret == -ETIME) {
pr_info("%s: error: timeout (1 sec)\n",
__func__);
}
else if (ret == -ERESTARTSYS) {
l = -ERESTART;
}
if (!do_free)
r = NULL;
goto out;
}
if (r->ret < 0) {
l = r->ret;
goto out;
}
done = 0;
l = 0;
info->top_pa = info->cur_pa = r->pbuf;
out:
if (ppd)
mcctrl_put_per_proc_data(ppd);
if (r)
kfree((void *)r);
}
if (info->cur_pa == PA_NULL) {
info->cur_pa = info->top_pa;
}
while (!done && info->cur_pa != PA_NULL) {
long bpos;
long bsize;
phys = ihk_device_map_memory(ihk_os_to_dev(os), info->cur_pa,
PAGE_SIZE);
#ifdef CONFIG_MIC
buf = ioremap_wc(phys, PAGE_SIZE);
#else
buf = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
PAGE_SIZE, NULL, 0);
#endif
if (pos < buf->pos) {
info->cur_pa = info->top_pa;
goto rep;
}
if (pos >= buf->pos + buf->size) {
info->cur_pa = buf->next_pa;
goto rep;
}
bpos = pos - buf->pos;
bsize = (buf->pos + buf->size) - pos;
if (bsize > (nbytes - l)) {
bsize = nbytes - l;
}
if (copy_to_user(ubuf, buf->buf + bpos, bsize)) {
done = 1;
pos = *ppos;
l = -EFAULT;
}
else {
ubuf += bsize;
pos += bsize;
l += bsize;
if (l == nbytes) {
done = 1;
}
}
rep:
#ifdef CONFIG_MIC
iounmap(buf);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), buf, PAGE_SIZE);
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
};
*ppos = pos;
return l;
}
static const struct file_operations mckernel_buff_io = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_buff_read,
.write = NULL,
.open = mckernel_procfs_buff_open,
.release = mckernel_procfs_buff_release,
};
static const struct procfs_entry tid_entry_stuff[] = { static const struct procfs_entry tid_entry_stuff[] = {
// PROC_REG("auxv", S_IRUSR, NULL), // PROC_REG("auxv", S_IRUSR, NULL),
// PROC_REG("clear_refs", S_IWUSR, NULL), // PROC_REG("clear_refs", S_IWUSR, NULL),
@@ -1006,10 +1085,10 @@ static const struct procfs_entry tid_entry_stuff[] = {
// PROC_LNK("exe", mckernel_readlink), // PROC_LNK("exe", mckernel_readlink),
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), // PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
// PROC_REG("maps", S_IRUGO, NULL), // PROC_REG("maps", S_IRUGO, NULL),
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), PROC_REG("mem", 0600, NULL),
// PROC_REG("pagemap", S_IRUGO, NULL), // PROC_REG("pagemap", S_IRUGO, NULL),
// PROC_REG("smaps", S_IRUGO, NULL), // PROC_REG("smaps", S_IRUGO, NULL),
PROC_REG("stat", S_IRUGO, NULL), PROC_REG("stat", 0444, &mckernel_buff_io),
// PROC_REG("statm", S_IRUGO, NULL), // PROC_REG("statm", S_IRUGO, NULL),
// PROC_REG("status", S_IRUGO, NULL), // PROC_REG("status", S_IRUGO, NULL),
// PROC_REG("syscall", S_IRUGO, NULL), // PROC_REG("syscall", S_IRUGO, NULL),
@@ -1018,26 +1097,26 @@ static const struct procfs_entry tid_entry_stuff[] = {
}; };
static const struct procfs_entry pid_entry_stuff[] = { static const struct procfs_entry pid_entry_stuff[] = {
PROC_REG("auxv", S_IRUSR, NULL), PROC_REG("auxv", 0400, &mckernel_buff_io),
/* Support the case where McKernel process retrieves its job-id under the Fujitsu TCS suite. */ /* Support the case where McKernel process retrieves its job-id under the Fujitsu TCS suite. */
// PROC_REG("cgroup", S_IXUSR, NULL), // PROC_REG("cgroup", S_IXUSR, NULL),
// PROC_REG("clear_refs", S_IWUSR, NULL), // PROC_REG("clear_refs", S_IWUSR, NULL),
PROC_REG("cmdline", S_IRUGO, NULL), PROC_REG("cmdline", 0444, &mckernel_buff_io),
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), PROC_REG("comm", 0644, &mckernel_buff_io),
// PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL), // PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL),
PROC_REG("cpuset", S_IXUSR, NULL), // PROC_REG("cpuset", S_IRUGO, NULL),
// PROC_REG("environ", S_IRUSR, NULL), // PROC_REG("environ", S_IRUSR, NULL),
// PROC_LNK("exe", mckernel_readlink), // PROC_LNK("exe", mckernel_readlink),
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), // PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
PROC_REG("maps", S_IRUGO, NULL), PROC_REG("maps", 0444, &mckernel_buff_io),
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), PROC_REG("mem", 0400, NULL),
PROC_REG("pagemap", S_IRUGO, NULL), PROC_REG("pagemap", 0444, NULL),
PROC_REG("smaps", S_IRUGO, NULL), // PROC_REG("smaps", S_IRUGO, NULL),
// PROC_REG("stat", S_IRUGO, NULL), // PROC_REG("stat", 0444, &mckernel_buff_io),
// PROC_REG("statm", S_IRUGO, NULL), // PROC_REG("statm", S_IRUGO, NULL),
PROC_REG("status", S_IRUGO, NULL), PROC_REG("status", 0444, &mckernel_buff_io),
// PROC_REG("syscall", S_IRUGO, NULL), // PROC_REG("syscall", S_IRUGO, NULL),
PROC_DIR("task", S_IRUGO|S_IXUGO), PROC_DIR("task", 0555),
// PROC_REG("wchan", S_IRUGO, NULL), // PROC_REG("wchan", S_IRUGO, NULL),
PROC_TERM PROC_TERM
}; };
@@ -1045,14 +1124,14 @@ static const struct procfs_entry pid_entry_stuff[] = {
static const struct procfs_entry base_entry_stuff[] = { static const struct procfs_entry base_entry_stuff[] = {
// PROC_REG("cmdline", S_IRUGO, NULL), // PROC_REG("cmdline", S_IRUGO, NULL),
#ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ #ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */
PROC_REG("cpuinfo", S_IRUGO, NULL), PROC_REG("cpuinfo", 0444, &mckernel_buff_io),
#else /* POSTK_DEBUG_ARCH_DEP_42 */ #else /* POSTK_DEBUG_ARCH_DEP_42 */
// PROC_REG("cpuinfo", S_IRUGO, NULL), // PROC_REG("cpuinfo", S_IRUGO, NULL),
#endif /* POSTK_DEBUG_ARCH_DEP_42 */ #endif /* POSTK_DEBUG_ARCH_DEP_42 */
// PROC_REG("meminfo", S_IRUGO, NULL), // PROC_REG("meminfo", S_IRUGO, NULL),
// PROC_REG("pagetypeinfo",S_IRUGO, NULL), // PROC_REG("pagetypeinfo",S_IRUGO, NULL),
// PROC_REG("softirq", S_IRUGO, NULL), // PROC_REG("softirq", S_IRUGO, NULL),
PROC_REG("stat", S_IRUGO, NULL), PROC_REG("stat", 0444, &mckernel_buff_io),
// PROC_REG("uptime", S_IRUGO, NULL), // PROC_REG("uptime", S_IRUGO, NULL),
// PROC_REG("version", S_IRUGO, NULL), // PROC_REG("version", S_IRUGO, NULL),
// PROC_REG("vmallocinfo",S_IRUSR, NULL), // PROC_REG("vmallocinfo",S_IRUSR, NULL),

File diff suppressed because it is too large Load Diff

View File

@@ -790,6 +790,7 @@ out:
return error; return error;
} /* setup_node_files() */ } /* setup_node_files() */
#ifdef SETUP_PCI_FILES
static int read_file(void *buf, size_t size, char *fmt, va_list ap) static int read_file(void *buf, size_t size, char *fmt, va_list ap)
{ {
int error; int error;
@@ -798,7 +799,6 @@ static int read_file(void *buf, size_t size, char *fmt, va_list ap)
int n; int n;
struct file *fp = NULL; struct file *fp = NULL;
loff_t off; loff_t off;
mm_segment_t ofs;
ssize_t ss; ssize_t ss;
dprintk("read_file(%p,%ld,%s,%p)\n", buf, size, fmt, ap); dprintk("read_file(%p,%ld,%s,%p)\n", buf, size, fmt, ap);
@@ -824,13 +824,14 @@ static int read_file(void *buf, size_t size, char *fmt, va_list ap)
} }
off = 0; off = 0;
ofs = get_fs(); #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
set_fs(KERNEL_DS); ss = kernel_read(fp, buf, size, &off);
ss = vfs_read(fp, buf, size, &off); #else
set_fs(ofs); ss = kernel_read(fp, off, buf, size);
#endif
if (ss < 0) { if (ss < 0) {
error = ss; error = ss;
eprintk("mcctrl:read_file:vfs_read failed. %d\n", error); eprintk("mcctrl:read_file:kernel_read failed. %d\n", error);
goto out; goto out;
} }
if (ss >= size) { if (ss >= size) {
@@ -892,16 +893,6 @@ out:
return error; return error;
} /* read_long() */ } /* read_long() */
#ifdef MCCTRL_KSYM_sys_readlink
static ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz)
#if MCCTRL_KSYM_sys_readlink
= (void *)MCCTRL_KSYM_sys_readlink;
#else
= &sys_readlink;
#endif
#endif
static int read_link(char *buf, size_t bufsize, char *fmt, ...) static int read_link(char *buf, size_t bufsize, char *fmt, ...)
{ {
int error; int error;
@@ -951,30 +942,14 @@ out:
return error; return error;
} /* read_link() */ } /* read_link() */
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
static int setup_one_pci(struct mcctrl_usrdata *udp, const char *name) static int setup_one_pci(struct mcctrl_usrdata *udp, const char *name)
{ {
#else /* POSTK_DEBUG_TEMP_FIX_22 */
static int setup_one_pci(void *arg0, const char *name, int namlen,
loff_t offset, u64 ino, unsigned d_type)
{
struct mcctrl_usrdata *udp = arg0;
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
int error; int error;
char *buf = NULL; char *buf = NULL;
long node; long node;
struct sysfsm_bitmap_param param; struct sysfsm_bitmap_param param;
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
dprintk("setup_one_pci(%p,%s)\n", udp, name); dprintk("setup_one_pci(%p,%s)\n", udp, name);
#else /* POSTK_DEBUG_TEMP_FIX_22 */
dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d)\n",
arg0, name, namlen, (long)offset, (long)ino, d_type);
if (namlen != 12) {
error = 0;
goto out;
}
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
buf = (void *)__get_free_pages(GFP_KERNEL, 0); buf = (void *)__get_free_pages(GFP_KERNEL, 0);
if (!buf) { if (!buf) {
@@ -1026,26 +1001,39 @@ static int setup_one_pci(void *arg0, const char *name, int namlen,
error = 0; error = 0;
out: out:
free_pages((long)buf, 0); free_pages((long)buf, 0);
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
dprintk("setup_one_pci(%p,%s): %d\n", udp, name, error); dprintk("setup_one_pci(%p,%s): %d\n", udp, name, error);
#else /* POSTK_DEBUG_TEMP_FIX_22 */
dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d): %d\n",
arg0, name, namlen, (long)offset, (long)ino, d_type,
error);
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
return error; return error;
} /* setup_one_pci() */ } /* setup_one_pci() */
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
LIST_HEAD(pci_file_name_list); LIST_HEAD(pci_file_name_list);
struct pci_file_name { struct pci_file_name {
char *name; char *name;
struct list_head chain; struct list_head chain;
}; };
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
struct mcctrl_filler_args {
struct dir_context ctx;
void *buf;
};
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
static int pci_file_name_gen(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino, unsigned int d_type)
#else
static int pci_file_name_gen(void *ctx, const char *name,
int namlen, loff_t offset, u64 ino, unsigned int d_type)
#endif
{
struct mcctrl_filler_args *args
= container_of(ctx, struct mcctrl_filler_args, ctx);
void *buf = args->buf;
#else
static int pci_file_name_gen(void *buf, const char *name, int namlen, static int pci_file_name_gen(void *buf, const char *name, int namlen,
loff_t offset, u64 ino, unsigned d_type) loff_t offset, u64 ino, unsigned d_type)
{ {
#endif
struct pci_file_name *p; struct pci_file_name *p;
int error = -1; int error = -1;
@@ -1083,56 +1071,31 @@ out:
buf, name, namlen, (long)offset, (long)ino, d_type, error); buf, name, namlen, (long)offset, (long)ino, d_type, error);
return error; return error;
} }
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0) static inline int mcctrl_vfs_readdir(struct file *file, filldir_t filler,
typedef int (*mcctrl_filldir_t)(void *buf, const char *name, int namlen, void *buf)
loff_t offset, u64 ino, unsigned d_type);
struct mcctrl_filler_args {
struct dir_context ctx;
mcctrl_filldir_t filler;
void *buf;
};
static int mcctrl_filler(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino, unsigned d_type)
{
struct mcctrl_filler_args *args
= container_of(ctx, struct mcctrl_filler_args, ctx);
return (*args->filler)(args->buf, name, namlen, offset, ino, d_type);
} /* mcctrl_filler() */
static inline int mcctrl_vfs_readdir(struct file *file,
mcctrl_filldir_t filler, void *buf)
{ {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
struct mcctrl_filler_args args = { struct mcctrl_filler_args args = {
.ctx.actor = &mcctrl_filler, .ctx.actor = filler,
.filler = (void *)filler,
.buf = buf, .buf = buf,
}; };
return iterate_dir(file, &args.ctx); return iterate_dir(file, &args.ctx);
} /* mcctrl_vfs_readdir() */
#else #else
static inline int mcctrl_vfs_readdir(struct file *file, filldir_t filler,
void *buf)
{
return vfs_readdir(file, filler, buf); return vfs_readdir(file, filler, buf);
} /* mcctrl_vfs_readdir() */
#endif #endif
} /* mcctrl_vfs_readdir() */
static int setup_pci_files(struct mcctrl_usrdata *udp) static int setup_pci_files(struct mcctrl_usrdata *udp)
{ {
int error; int error;
int er; int er;
struct file *fp = NULL; struct file *fp = NULL;
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
int ret = 0; int ret = 0;
struct pci_file_name *cur; struct pci_file_name *cur;
struct pci_file_name *next; struct pci_file_name *next;
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
dprintk("setup_pci_files(%p)\n", udp); dprintk("setup_pci_files(%p)\n", udp);
fp = filp_open("/sys/bus/pci/devices", O_DIRECTORY, 0); fp = filp_open("/sys/bus/pci/devices", O_DIRECTORY, 0);
@@ -1142,18 +1105,13 @@ static int setup_pci_files(struct mcctrl_usrdata *udp)
goto out; goto out;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
error = mcctrl_vfs_readdir(fp, &pci_file_name_gen, udp); error = mcctrl_vfs_readdir(fp, &pci_file_name_gen, udp);
#else /* POSTK_DEBUG_TEMP_FIX_22 */
error = mcctrl_vfs_readdir(fp, &setup_one_pci, udp);
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
if (error) { if (error) {
eprintk("mcctrl:setup_pci_files:" eprintk("mcctrl:setup_pci_files:"
"mcctrl_vfs_readdir failed. %d\n", error); "mcctrl_vfs_readdir failed. %d\n", error);
goto out; goto out;
} }
#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */
list_for_each_entry_safe(cur, next, &pci_file_name_list, chain) { list_for_each_entry_safe(cur, next, &pci_file_name_list, chain) {
if (!ret) { if (!ret) {
ret = setup_one_pci(udp, cur->name); ret = setup_one_pci(udp, cur->name);
@@ -1162,7 +1120,6 @@ static int setup_pci_files(struct mcctrl_usrdata *udp)
kfree(cur->name); kfree(cur->name);
kfree(cur); kfree(cur);
} }
#endif /* POSTK_DEBUG_TEMP_FIX_22 */
error = 0; error = 0;
out: out:
@@ -1176,6 +1133,7 @@ out:
dprintk("setup_pci_files(%p): %d\n", udp, error); dprintk("setup_pci_files(%p): %d\n", udp, error);
return error; return error;
} /* setup_pci_files() */ } /* setup_pci_files() */
#endif // SETUP_PCI_FILES
void setup_sysfs_files(ihk_os_t os) void setup_sysfs_files(ihk_os_t os)
{ {
@@ -1215,7 +1173,9 @@ void setup_sysfs_files(ihk_os_t os)
setup_cpus_sysfs_files(udp); setup_cpus_sysfs_files(udp);
setup_node_files(udp); setup_node_files(udp);
setup_cpus_sysfs_files_node_link(udp); setup_cpus_sysfs_files_node_link(udp);
//setup_pci_files(udp); #ifdef SETUP_PCI_FILES
setup_pci_files(udp);
#endif
/* Indicate sysfs files setup completion for boot script */ /* Indicate sysfs files setup completion for boot script */
error = sysfsm_mkdirf(os, NULL, "/sys/setup_complete"); error = sysfsm_mkdirf(os, NULL, "/sys/setup_complete");

View File

@@ -21,7 +21,7 @@ endif
endif endif
ifeq ($(BUILD_MODULE_TMP),rhel) ifeq ($(BUILD_MODULE_TMP),rhel)
ifeq ($(BUILD_MODULE),none) ifeq ($(BUILD_MODULE),none)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -ge 327 -a ${RHEL_RELEASE} -le 693 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi) BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -ge 327 -a ${RHEL_RELEASE} -le 862 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi)
endif endif
ifeq ($(BUILD_MODULE),none) ifeq ($(BUILD_MODULE),none)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi) BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)

View File

@@ -15,6 +15,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/cred.h> #include <linux/cred.h>
#include <linux/version.h>
#include "overlayfs.h" #include "overlayfs.h"
struct ovl_cache_entry { struct ovl_cache_entry {
@@ -34,10 +35,18 @@ struct ovl_dir_cache {
struct list_head entries; struct list_head entries;
}; };
/* vfs_readdir vs. iterate_dir compat */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
#define USE_ITERATE_DIR 1
#endif
#ifndef USE_ITERATE_DIR
struct dir_context { struct dir_context {
const filldir_t actor; const filldir_t actor;
//loff_t pos; //loff_t pos;
}; };
#endif
struct ovl_readdir_data { struct ovl_readdir_data {
struct dir_context ctx; struct dir_context ctx;
@@ -256,7 +265,11 @@ static inline int ovl_dir_read(struct path *realpath,
do { do {
rdd->count = 0; rdd->count = 0;
rdd->err = 0; rdd->err = 0;
#ifdef USE_ITERATE_DIR
err = iterate_dir(realfile, &rdd->ctx);
#else
err = vfs_readdir(realfile, rdd->ctx.actor, rdd); err = vfs_readdir(realfile, rdd->ctx.actor, rdd);
#endif
if (err >= 0) if (err >= 0)
err = rdd->err; err = rdd->err;
} while (!err && rdd->count); } while (!err && rdd->count);
@@ -365,6 +378,22 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
return cache; return cache;
} }
#ifdef USE_ITERATE_DIR
struct iterate_wrapper {
struct dir_context ctx;
filldir_t actor;
void *buf;
};
static int ovl_wrap_readdir(void *ctx, const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct iterate_wrapper *w = ctx;
return w->actor(w->buf, name, namelen, offset, ino, d_type);
}
#endif
static int ovl_readdir(struct file *file, void *buf, filldir_t filler) static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
{ {
struct ovl_dir_file *od = file->private_data; struct ovl_dir_file *od = file->private_data;
@@ -376,7 +405,16 @@ static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
ovl_dir_reset(file); ovl_dir_reset(file);
if (od->is_real) { if (od->is_real) {
#ifdef USE_ITERATE_DIR
struct iterate_wrapper w = {
.ctx.actor = ovl_wrap_readdir,
.actor = filler,
.buf = buf,
};
res = iterate_dir(od->realfile, &w.ctx);
#else
res = vfs_readdir(od->realfile, filler, buf); res = vfs_readdir(od->realfile, filler, buf);
#endif
file->f_pos = od->realfile->f_pos; file->f_pos = od->realfile->f_pos;
return res; return res;

View File

@@ -13,6 +13,8 @@ KDIR ?= @KDIR@
ARCH=@ARCH@ ARCH=@ARCH@
CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH} -I${IHKDIR} -I@abs_builddir@/../../../ihk/linux/include CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH} -I${IHKDIR} -I@abs_builddir@/../../../ihk/linux/include
LDFLAGS=@LDFLAGS@ LDFLAGS=@LDFLAGS@
CPPFLAGS_SYSCALL_INTERCEPT=@CPPFLAGS_SYSCALL_INTERCEPT@
LDFLAGS_SYSCALL_INTERCEPT=@LDFLAGS_SYSCALL_INTERCEPT@
RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}') RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}')
VPATH=@abs_srcdir@ VPATH=@abs_srcdir@
TARGET=mcexec libsched_yield ldump2mcdump.so TARGET=mcexec libsched_yield ldump2mcdump.so
@@ -21,12 +23,17 @@ LIBS=@LIBS@
IHKDIR ?= $(VPATH)/../../../ihk/linux/include/ IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread -L@abs_builddir@/../../../ihk/linux/user -lihk -Wl,-rpath,$(MCKERNEL_LIBDIR) MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread -L@abs_builddir@/../../../ihk/linux/user -lihk -Wl,-rpath,$(MCKERNEL_LIBDIR)
ENABLE_QLMPI=@ENABLE_QLMPI@ ENABLE_QLMPI=@ENABLE_QLMPI@
WITH_SYSCALL_INTERCEPT=@WITH_SYSCALL_INTERCEPT@
ifeq ($(ENABLE_QLMPI),yes) ifeq ($(ENABLE_QLMPI),yes)
MCEXEC_LIBS += -lmpi MCEXEC_LIBS += -lmpi
TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so
endif endif
ifeq ($(WITH_SYSCALL_INTERCEPT),yes)
TARGET += syscall_intercept.so
endif
ifeq ($(ARCH), arm64) ifeq ($(ARCH), arm64)
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i)))
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i)))
@@ -40,10 +47,10 @@ mcexec: mcexec.c libmcexec.a
# POSTK_DEBUG_ARCH_DEP_34, eclair arch depend separate. # POSTK_DEBUG_ARCH_DEP_34, eclair arch depend separate.
ifeq ($(ARCH), arm64) ifeq ($(ARCH), arm64)
eclair: eclair.c arch/$(ARCH)/arch-eclair.c eclair: eclair.c arch/$(ARCH)/arch-eclair.c
$(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS) $(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS) -ldl -lz
else else
eclair: eclair.c eclair: eclair.c arch/$(ARCH)/arch-eclair.c
$(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS) $(CC) -I.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
endif endif
ldump2mcdump.so: ldump2mcdump.c ldump2mcdump.so: ldump2mcdump.c
@@ -52,6 +59,12 @@ ldump2mcdump.so: ldump2mcdump.c
libsched_yield: libsched_yield.c libsched_yield: libsched_yield.c
$(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl $(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl
syscall_intercept.so: syscall_intercept.c libsyscall_intercept_arch.a
$(CC) $(CPPFLAGS_SYSCALL_INTERCEPT) -g -O2 $(LDFLAGS_SYSCALL_INTERCEPT) -lsyscall_intercept -fpic -shared -L. -lsyscall_intercept_arch $^ -o $@
libsyscall_intercept_arch.a::
+(cd arch/${ARCH}; $(MAKE))
libmcexec.a:: libmcexec.a::
+(cd arch/${ARCH}; $(MAKE)) +(cd arch/${ARCH}; $(MAKE))
@@ -99,6 +112,9 @@ ifeq ($(ENABLE_QLMPI),yes)
install -m 755 ql_mpiexec_start $(BINDIR) install -m 755 ql_mpiexec_start $(BINDIR)
install -m 755 ql_mpiexec_finalize $(BINDIR) install -m 755 ql_mpiexec_finalize $(BINDIR)
install -m 755 ql_talker $(SBINDIR) install -m 755 ql_talker $(SBINDIR)
endif
ifeq ($(WITH_SYSCALL_INTERCEPT),yes)
install -m 755 syscall_intercept.so $(MCKERNEL_LIBDIR)
endif endif
@uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR) @uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR)
@uncomment_if_ENABLE_MEMDUMP@install -m 755 vmcore2mckdump $(BINDIR) @uncomment_if_ENABLE_MEMDUMP@install -m 755 vmcore2mckdump $(BINDIR)

View File

@@ -4,7 +4,7 @@ BINDIR=@BINDIR@
KDIR ?= @KDIR@ KDIR ?= @KDIR@
CFLAGS=-Wall -O -I. CFLAGS=-Wall -O -I.
VPATH=@abs_srcdir@ VPATH=@abs_srcdir@
TARGET=../../libmcexec.a TARGET=../../libmcexec.a ../../libsyscall_intercept_arch.a
LIBS=@LIBS@ LIBS=@LIBS@
all: $(TARGET) all: $(TARGET)
@@ -18,6 +18,12 @@ archdep.o: archdep.S
arch_syscall.o: arch_syscall.c arch_syscall.o: arch_syscall.c
$(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $< $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $<
../../libsyscall_intercept_arch.a: archdep_c.o
$(AR) cr ../../libsyscall_intercept_arch.a archdep_c.o
archdep_c.o: archdep_c.c
$(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $<
clean: clean:
$(RM) $(TARGET) *.o $(RM) $(TARGET) *.o

View File

@@ -42,7 +42,7 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs)
} }
for (i = 0; i < sizeof(regs_1)/sizeof(regs_1[0]); i++) { /* rsi, rdi, rbp, rsp */ for (i = 0; i < sizeof(regs_1)/sizeof(regs_1[0]); i++) { /* rsi, rdi, rbp, rsp */
ret = print_bin(rbp, rbp_size, (void *)regs_1[i], sizeof(regs_1[0])); ret = print_bin(rbp, rbp_size, regs_1 + i, sizeof(regs_1[0]));
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
@@ -62,7 +62,7 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs)
} }
for (i = 0; i < sizeof(regs_2)/sizeof(regs_2[0]); i++) { /* r12-r15 */ for (i = 0; i < sizeof(regs_2)/sizeof(regs_2[0]); i++) { /* r12-r15 */
ret = print_bin(rbp, rbp_size, (void *)regs_2[i], sizeof(regs_2[0])); ret = print_bin(rbp, rbp_size, regs_2 + i, sizeof(regs_2[0]));
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }

View File

@@ -67,6 +67,12 @@ get_syscall_arg6(syscall_args *args)
return args->r9; return args->r9;
} }
static inline unsigned long
get_syscall_rip(syscall_args *args)
{
return args->rip;
}
static inline void static inline void
set_syscall_number(syscall_args *args, unsigned long value) set_syscall_number(syscall_args *args, unsigned long value)
{ {

View File

@@ -48,7 +48,7 @@ archdep_syscall(struct syscall_wait_desc *w, long *ret)
if (*ret >= PATH_MAX) { if (*ret >= PATH_MAX) {
*ret = -ENAMETOOLONG; *ret = -ENAMETOOLONG;
} }
if (ret < 0) { if (*ret < 0) {
return 0; return 0;
} }
__dprintf("open: %s\n", pathbuf); __dprintf("open: %s\n", pathbuf);

View File

@@ -1,15 +1,22 @@
/* /*
arg: rdi, rsi, rdx, rcx, r8, r9 Calling convention:
ret: rax arg: rdi, rsi, rdx, rcx, r8, r9
ret: rax
rax syscall number rdi: fd
syscall: (rax:num) rdi rsi rdx r10 r8 r9 (rcx:ret addr) rsi: cmd
fd, cmd, param rdx: param
rdi: fd rcx: save area
rsi: cmd r8: new thread context
rdx: param
rcx: save area Syscam call convention:
r8: new thread context syscall number: rax
arg: rdi, rsi, rdx, r10, r8, r9
return addr: rcx
rdi: fd
rsi: cmd
rdx: param
*/ */
.global switch_ctx .global switch_ctx
@@ -91,6 +98,7 @@ switch_ctx:
1: 1:
mov $0xffffffffffffffff,%eax mov $0xffffffffffffffff,%eax
retq
2: 2:
pushq %rax pushq %rax
movq $158,%rax /* arch_prctl */ movq $158,%rax /* arch_prctl */
@@ -146,4 +154,3 @@ compare_and_swap_int:
lock lock
cmpxchgl %edx,0(%rdi) cmpxchgl %edx,0(%rdi)
retq retq

View File

@@ -0,0 +1,52 @@
/*
function call convention
rdi, rsi, rdx, rcx, r8, r9: IN arguments
rax: OUT return value
syscall convention:
rax: IN syscall number
rdi, rsi, rdx, r10, r8, r9: IN arguments
rax: OUT return value
rcx, r11: CLOBBER
*/
long uti_syscall6(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5)
{
long ret;
asm volatile ("movq %[arg3],%%r10; movq %[arg4],%%r8; movq %[arg5],%%r9; syscall"
: "=a" (ret)
: "a" (syscall_number),
"D" (arg0), "S" (arg1), "d" (arg2),
[arg3] "g" (arg3), [arg4] "g" (arg4), [arg5] "g" (arg5)
: "rcx", "r11", "r10", "r8", "r9", "memory");
return ret;
}
long uti_syscall3(long syscall_number, long arg0, long arg1, long arg2)
{
long ret;
asm volatile ("syscall"
: "=a" (ret)
: "a" (syscall_number), "D" (arg0), "S" (arg1), "d" (arg2)
: "rcx", "r11", "memory");
return ret;
}
long uti_syscall1(long syscall_number, long arg0)
{
long ret;
asm volatile ("syscall"
: "=a" (ret)
: "a" (syscall_number), "D" (arg0)
: "rcx", "r11", "memory");
return ret;
}
long uti_syscall0(long syscall_number)
{
long ret;
asm volatile ("syscall"
: "=a" (ret)
: "a" (syscall_number)
: "rcx", "r11", "memory");
return ret;
}

View File

@@ -2,8 +2,18 @@
#ifndef HEADER_USER_X86_ECLAIR_H #ifndef HEADER_USER_X86_ECLAIR_H
#define HEADER_USER_X86_ECLAIR_H #define HEADER_USER_X86_ECLAIR_H
#define MAP_KERNEL 0xFFFFFFFF80000000 #ifndef POSTK_DEBUG_ARCH_DEP_34
#define MAP_ST 0xFFFF800000000000 #define MAP_ST_START 0xffff800000000000UL
#define MAP_VMAP_START 0xffff850000000000UL
#define MAP_FIXED_START 0xffff860000000000UL
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
/* TODO: these should be updated when McKernel changes */
#define MCKERNEL_ELF_START "0xFFFFFFFFFE801000"
#define MCKERNEL_ELF_LEN "0x0000000000100000"
#define ARCH_CLV_SPAN "x86_cpu_local_variables_span" #define ARCH_CLV_SPAN "x86_cpu_local_variables_span"

View File

@@ -1,4 +1,6 @@
extern int switch_ctx(int fd, unsigned long cmd, void **param, void *lctx, void *rctx); #include "../include/uprotocol.h"
extern int switch_ctx(int fd, unsigned long cmd, struct uti_save_fs_desc *desc, void *lctx, void *rctx);
extern unsigned long compare_and_swap(unsigned long *addr, unsigned long old, unsigned long new); extern unsigned long compare_and_swap(unsigned long *addr, unsigned long old, unsigned long new);
extern unsigned int compare_and_swap_int(unsigned int *addr, unsigned int old, unsigned int new); extern unsigned int compare_and_swap_int(unsigned int *addr, unsigned int old, unsigned int new);
extern int archdep_syscall(struct syscall_wait_desc *w, long *ret); extern int archdep_syscall(struct syscall_wait_desc *w, long *ret);

View File

@@ -0,0 +1,5 @@
extern long uti_syscall6(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5);
extern long uti_syscall3(long syscall_number, long arg0, long arg1, long arg2);
extern long uti_syscall1(long syscall_number, long arg0);
extern long uti_syscall0(long syscall_number);

View File

@@ -8,9 +8,7 @@
* Copyright (C) 2015 RIKEN AICS * Copyright (C) 2015 RIKEN AICS
*/ */
#ifdef POSTK_DEBUG_ARCH_DEP_33
#include "../config.h" #include "../config.h"
#endif /* POSTK_DEBUG_ARCH_DEP_33 */
#include <bfd.h> #include <bfd.h>
#include <fcntl.h> #include <fcntl.h>
#include <inttypes.h> #include <inttypes.h>
@@ -22,10 +20,8 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <ihk/ihk_host_user.h> #include <ihk/ihk_host_user.h>
#ifdef POSTK_DEBUG_ARCH_DEP_34
#include <eclair.h> #include <eclair.h>
#include <arch-eclair.h> #include <arch-eclair.h>
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
#define CPU_TID_BASE 1000000 #define CPU_TID_BASE 1000000
@@ -85,11 +81,7 @@ static struct thread_info *curr_thread = NULL;
static uintptr_t ihk_mc_switch_context = -1; static uintptr_t ihk_mc_switch_context = -1;
#endif /* POSTK_DEBUG_ARCH_DEP_34 */ #endif /* POSTK_DEBUG_ARCH_DEP_34 */
#ifdef POSTK_DEBUG_ARCH_DEP_34
uintptr_t lookup_symbol(char *name) { uintptr_t lookup_symbol(char *name) {
#else /* POSTK_DEBUG_ARCH_DEP_34 */
static uintptr_t lookup_symbol(char *name) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
int i; int i;
for (i = 0; i < nsyms; ++i) { for (i = 0; i < nsyms; ++i) {
@@ -101,22 +93,22 @@ static uintptr_t lookup_symbol(char *name) {
return NOSYMBOL; return NOSYMBOL;
} /* lookup_symbol() */ } /* lookup_symbol() */
#define NOPHYS ((uintptr_t)-1)
static uintptr_t virt_to_phys(uintptr_t va) { static uintptr_t virt_to_phys(uintptr_t va) {
#ifndef POSTK_DEBUG_ARCH_DEP_34 if (va >= MAP_KERNEL_START) {
#define MAP_KERNEL 0xFFFFFFFF80000000 return va - MAP_KERNEL_START + kernel_base;
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
if (va >= MAP_KERNEL) {
return (va - MAP_KERNEL + kernel_base);
} }
#ifndef POSTK_DEBUG_ARCH_DEP_34 else if (va >= LINUX_PAGE_OFFSET) {
#define MAP_ST 0xFFFF800000000000 return va - LINUX_PAGE_OFFSET;
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
if (va >= MAP_ST) {
return (va - MAP_ST);
} }
if (0) printf("virt_to_phys(%lx): -1\n", va); else if (va >= MAP_FIXED_START) {
#define NOPHYS ((uintptr_t)-1) return va - MAP_FIXED_START;
}
else if (va >= MAP_ST_START) {
return va - MAP_ST_START;
}
return NOPHYS; return NOPHYS;
} /* virt_to_phys() */ } /* virt_to_phys() */
@@ -673,11 +665,7 @@ static int setup_dump(char *fname) {
return 0; return 0;
} /* setup_dump() */ } /* setup_dump() */
#ifdef POSTK_DEBUG_ARCH_DEP_38
static ssize_t print_hex(char *buf, size_t buf_size, char *str) { static ssize_t print_hex(char *buf, size_t buf_size, char *str) {
#else /* POSTK_DEBUG_ARCH_DEP_38 */
static ssize_t print_hex(char *buf, char *str) {
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
char *p; char *p;
char *q; char *q;
@@ -702,11 +690,7 @@ static ssize_t print_hex(char *buf, char *str) {
return (q - buf); return (q - buf);
} /* print_hex() */ } /* print_hex() */
#if defined(POSTK_DEBUG_ARCH_DEP_34) && defined(POSTK_DEBUG_ARCH_DEP_38)
ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size) { ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size) {
#else /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/
static ssize_t print_bin(char *buf, void *data, size_t size) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/
uint8_t *p; uint8_t *p;
char *q; char *q;
int i; int i;
@@ -733,13 +717,8 @@ static ssize_t print_bin(char *buf, void *data, size_t size) {
return (q - buf); return (q - buf);
} /* print_bin() */ } /* print_bin() */
#ifdef POSTK_DEBUG_ARCH_DEP_38
static void command(const char *cmd, char *res, size_t res_size) { static void command(const char *cmd, char *res, size_t res_size) {
const char *p; const char *p;
#else /* POSTK_DEBUG_ARCH_DEP_38 */
static void command(char *cmd, char *res) {
char *p;
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
char *rbp; char *rbp;
p = cmd; p = cmd;
@@ -801,11 +780,7 @@ static void command(char *cmd, char *res) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 */ #endif /* POSTK_DEBUG_ARCH_DEP_34 */
rbp += sprintf(rbp, "l"); rbp += sprintf(rbp, "l");
if (0) if (0)
#ifdef POSTK_DEBUG_ARCH_DEP_38
rbp += print_hex(rbp, res_size, str); rbp += print_hex(rbp, res_size, str);
#else /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += print_hex(rbp, str);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += sprintf(rbp, "%s", str); rbp += sprintf(rbp, "%s", str);
} }
else if (!strcmp(p, "D")) { else if (!strcmp(p, "D")) {
@@ -814,20 +789,9 @@ static void command(char *cmd, char *res) {
} }
else if (!strcmp(p, "g")) { else if (!strcmp(p, "g")) {
if (curr_thread->cpu < 0) { if (curr_thread->cpu < 0) {
#ifndef POSTK_DEBUG_ARCH_DEP_34
struct x86_kregs {
uintptr_t rsp, rbp, rbx, rsi;
uintptr_t rdi, r12, r13, r14;
uintptr_t r15, rflags, rsp0;
};
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
int error; int error;
#ifdef POSTK_DEBUG_ARCH_DEP_34
struct arch_kregs kregs; struct arch_kregs kregs;
#else /* POSTK_DEBUG_ARCH_DEP_34 */
struct x86_kregs kregs;
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
error = read_mem(curr_thread->process+K(CTX_OFFSET), error = read_mem(curr_thread->process+K(CTX_OFFSET),
&kregs, sizeof(kregs)); &kregs, sizeof(kregs));
@@ -836,36 +800,7 @@ static void command(char *cmd, char *res) {
break; break;
} }
#ifdef POSTK_DEBUG_ARCH_DEP_34
print_kregs(rbp, res_size, &kregs); print_kregs(rbp, res_size, &kregs);
#else /* POSTK_DEBUG_ARCH_DEP_34 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rax */
rbp += print_bin(rbp, &kregs.rbx, sizeof(uint64_t));
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rcx */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rdx */
rbp += print_bin(rbp, &kregs.rsi, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.rdi, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.rbp, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.rsp, sizeof(uint64_t));
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r8 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r9 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r10 */
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r11 */
rbp += print_bin(rbp, &kregs.r12, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.r13, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.r14, sizeof(uint64_t));
rbp += print_bin(rbp, &kregs.r15, sizeof(uint64_t));
rbp += print_bin(rbp, &ihk_mc_switch_context,
sizeof(uint64_t)); /* rip */
rbp += print_bin(rbp, &kregs.rflags, sizeof(uint32_t));
rbp += sprintf(rbp, "xxxxxxxx"); /* cs */
rbp += sprintf(rbp, "xxxxxxxx"); /* ss */
rbp += sprintf(rbp, "xxxxxxxx"); /* ds */
rbp += sprintf(rbp, "xxxxxxxx"); /* es */
rbp += sprintf(rbp, "xxxxxxxx"); /* fs */
rbp += sprintf(rbp, "xxxxxxxx"); /* gs */
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
} }
else { else {
int error; int error;
@@ -943,11 +878,7 @@ static void command(char *cmd, char *res) {
#endif /* POSTK_DEBUG_ARCH_DEP_34 */ #endif /* POSTK_DEBUG_ARCH_DEP_34 */
rbp += sprintf(rbp, "l"); rbp += sprintf(rbp, "l");
if (0) if (0)
#ifdef POSTK_DEBUG_ARCH_DEP_38
rbp += print_hex(rbp, res_size, str); rbp += print_hex(rbp, res_size, str);
#else /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += print_hex(rbp, str);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += sprintf(rbp, "%s", str); rbp += sprintf(rbp, "%s", str);
} }
else if (!strncmp(p, "T", 1)) { else if (!strncmp(p, "T", 1)) {
@@ -1039,11 +970,7 @@ static void command(char *cmd, char *res) {
else { else {
q += sprintf(q, "status=%#x", ti->status); q += sprintf(q, "status=%#x", ti->status);
} }
#ifdef POSTK_DEBUG_ARCH_DEP_38
rbp += print_hex(rbp, res_size, buf); rbp += print_hex(rbp, res_size, buf);
#else /* POSTK_DEBUG_ARCH_DEP_38 */
rbp += print_hex(rbp, buf);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
} }
} while (0); } while (0);
@@ -1272,11 +1199,7 @@ int main(int argc, char *argv[]) {
} }
mode = 0; mode = 0;
fputc('+', ofp); fputc('+', ofp);
#ifdef POSTK_DEBUG_ARCH_DEP_38
command(lbuf, rbuf, sizeof(rbuf)); command(lbuf, rbuf, sizeof(rbuf));
#else /* POSTK_DEBUG_ARCH_DEP_38 */
command(lbuf, rbuf);
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
sum = 0; sum = 0;
for (p = rbuf; *p != '\0'; ++p) { for (p = rbuf; *p != '\0'; ++p) {
sum += *p; sum += *p;

View File

@@ -3,11 +3,7 @@
#ifndef HEADER_USER_COMMON_ECLAIR_H #ifndef HEADER_USER_COMMON_ECLAIR_H
#define HEADER_USER_COMMON_ECLAIR_H #define HEADER_USER_COMMON_ECLAIR_H
#ifdef POSTK_DEBUG_ARCH_DEP_76 /* header path fix */
#include "../config.h" #include "../config.h"
#else /* POSTK_DEBUG_ARCH_DEP_76 */
#include <config.h>
#endif /* POSTK_DEBUG_ARCH_DEP_76 */
#include <stdio.h> #include <stdio.h>
#include <inttypes.h> #include <inttypes.h>
#include <arch-eclair.h> #include <arch-eclair.h>

View File

@@ -11,7 +11,9 @@
typedef int (*int_void_fn)(void); typedef int (*int_void_fn)(void);
#if 0
static int_void_fn orig_sched_yield = 0; static int_void_fn orig_sched_yield = 0;
#endif
int sched_yield(void) int sched_yield(void)
{ {

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,139 @@
#include <libsyscall_intercept_hook_point.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <syscall.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "../include/uprotocol.h"
#include "../include/uti.h"
#include "./archdep_uti.h"
static struct uti_desc uti_desc;
#define DEBUG_UTI
static int
hook(long syscall_number,
long arg0, long arg1,
long arg2, long arg3,
long arg4, long arg5,
long *result)
{
//return 1; /* debug */
int tid = uti_syscall0(__NR_gettid);
struct terminate_thread_desc term_desc;
unsigned long code;
int stack_top;
if (!uti_desc.start_syscall_intercept) {
return 1; /* System call isn't taken over */
}
if (tid != uti_desc.mck_tid) {
if (uti_desc.syscalls2 && syscall_number >= 0 && syscall_number < 512) {
uti_desc.syscalls2[syscall_number]++;
}
return 1;
}
#ifdef DEBUG_UTI
if (uti_desc.syscalls && syscall_number >= 0 && syscall_number < 512) {
uti_desc.syscalls[syscall_number]++;
}
#endif
switch (syscall_number) {
case __NR_gettid:
*result = uti_desc.mck_tid;
return 0;
case __NR_futex:
case __NR_brk:
case __NR_mmap:
case __NR_munmap:
case __NR_mprotect:
case __NR_mremap:
/* Overflow check */
if (uti_desc.syscall_stack_top == -1) {
*result = -ENOMEM;
return 0;
}
/* Sanity check */
if (uti_desc.syscall_stack_top < 0 || uti_desc.syscall_stack_top >= UTI_SZ_SYSCALL_STACK) {
*result = -EINVAL;
return 0;
}
/* Store the return value in the stack to prevent it from getting corrupted
when an interrupt happens just after ioctl() and before copying the return
value to *result */
stack_top = __sync_fetch_and_sub(&uti_desc.syscall_stack_top, 1);
uti_desc.syscall_stack[stack_top].number = syscall_number;
uti_desc.syscall_stack[stack_top].args[0] = arg0;
uti_desc.syscall_stack[stack_top].args[1] = arg1;
uti_desc.syscall_stack[stack_top].args[2] = arg2;
uti_desc.syscall_stack[stack_top].args[3] = arg3;
uti_desc.syscall_stack[stack_top].args[4] = arg4;
uti_desc.syscall_stack[stack_top].args[5] = arg5;
uti_desc.syscall_stack[stack_top].uti_clv = uti_desc.uti_clv;
uti_desc.syscall_stack[stack_top].ret = -EINVAL;
uti_syscall3(__NR_ioctl, uti_desc.fd, MCEXEC_UP_SYSCALL_THREAD, (long)(uti_desc.syscall_stack + stack_top));
*result = uti_desc.syscall_stack[stack_top].ret;
/* push syscall_struct list */
__sync_fetch_and_add(&uti_desc.syscall_stack_top, 1);
return 0; /* System call is taken over */
case __NR_exit_group:
code = 0x100000000;
goto make_remote_thread_exit;
case __NR_exit:
code = 0;
make_remote_thread_exit:
/* Make migrated-to-Linux thread on the McKernel side call do_exit() or terminate() */
term_desc.pid = uti_desc.pid;
term_desc.tid = uti_desc.tid; /* tid of mcexec */
term_desc.code = code | ((arg0 & 255) << 8);
term_desc.tsk = uti_desc.key;
uti_syscall3(__NR_ioctl, uti_desc.fd, MCEXEC_UP_TERMINATE_THREAD, (long)&term_desc);
return 1;
case __NR_clone:
case __NR_fork:
case __NR_vfork:
case __NR_execve:
*result = -ENOSYS;
return 0;
#if 0 /* debug */
case __NR_set_robust_list:
*result = -ENOSYS;
return 0;
#endif
case 888:
*result = (long)&uti_desc;
return 0;
default:
return 1;
}
return 0;
}
static __attribute__((constructor)) void
init(void)
{
/* Set up the callback function */
intercept_hook_point = hook;
/* Initialize uti_desc */
uti_desc.syscall_stack_top = UTI_SZ_SYSCALL_STACK - 1;
/* Pass address of uti_desc to McKernel */
uti_syscall1(733, (unsigned long)&uti_desc);
}
static __attribute__((destructor)) void
dtor(void)
{
}

1
ihk Submodule

Submodule ihk added at d9c74adf3f

View File

@@ -6,7 +6,7 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
OBJS += rbtree.o OBJS += rbtree.o hugefileobj.o
OBJS += pager.o OBJS += pager.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. # POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
DEPSRCS=$(wildcard $(SRC)/*.c) DEPSRCS=$(wildcard $(SRC)/*.c)
@@ -19,7 +19,7 @@ endif
CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions
ifneq ($(ARCH), arm64) ifneq ($(ARCH), arm64)
CFLAGS += -mcmodel=large -mno-red-zone CFLAGS += -mcmodel=large -mno-red-zone -mno-sse
endif endif
LDFLAGS += -e arch_start LDFLAGS += -e arch_start
IHKOBJ = ihk/ihk.o IHKOBJ = ihk/ihk.o

View File

@@ -29,15 +29,13 @@
#include <time.h> #include <time.h>
#include <syscall.h> #include <syscall.h>
#include <rusage_private.h> #include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_AP //#define DEBUG_PRINT_AP
#ifdef DEBUG_PRINT_AP #ifdef DEBUG_PRINT_AP
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) #undef DDEBUG_DEFAULT
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#endif #endif
int num_processors = 1; int num_processors = 1;
@@ -209,8 +207,10 @@ store_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf,
static struct fake_cpu_info_ops show_fci_online = { static struct fake_cpu_info_ops show_fci_online = {
.member = ONLINE, .member = ONLINE,
.ops.show = &show_fake_cpu_info, .ops = {
.ops.store = &store_fake_cpu_info, .show = &show_fake_cpu_info,
.store = &store_fake_cpu_info,
},
}; };
void void

View File

@@ -1,24 +1,28 @@
PHDRS PHDRS
{ {
text PT_LOAD FLAGS(5); text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7); data PT_LOAD FLAGS(7);
} }
SECTIONS SECTIONS
{ {
. = 0xffffffff80001000; . = 0xffffffff80001000;
_head = .; _head = .;
.text : { .text : {
*(.text); *(.text);
} : text } : text
. = ALIGN(4096); . = ALIGN(4096);
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)
} :data } :data
.vsyscall : ALIGN(0x1000) { .vsyscall : ALIGN(0x1000) {
@@ -37,14 +41,14 @@ SECTIONS
. = ALIGN(4096); . = ALIGN(4096);
} : data = 0xf4 } : data = 0xf4
.bss : { .bss : {
*(.bss .bss.*) *(.bss .bss.*)
} }
. = ALIGN(4096); . = ALIGN(4096);
_end = .; _end = .;
/DISCARD/ : { /DISCARD/ : {
*(.eh_frame) *(.eh_frame)
*(.note.gnu.build-id) *(.note.gnu.build-id)
} }
} }

View File

@@ -1,24 +1,28 @@
PHDRS PHDRS
{ {
text PT_LOAD FLAGS(5); text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7); data PT_LOAD FLAGS(7);
} }
SECTIONS SECTIONS
{ {
. = 0xffffffff80001000; . = 0xffffffff80001000;
_head = .; _head = .;
.text : { .text : {
*(.text); *(.text);
} : text } : text
. = ALIGN(4096); . = ALIGN(4096);
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)
} :data } :data
.vsyscall : ALIGN(0x1000) { .vsyscall : ALIGN(0x1000) {
@@ -37,14 +41,14 @@ SECTIONS
. = ALIGN(4096); . = ALIGN(4096);
} : data = 0xf4 } : data = 0xf4
.bss : { .bss : {
*(.bss .bss.*) *(.bss .bss.*)
} }
. = ALIGN(4096); . = ALIGN(4096);
_end = .; _end = .;
/DISCARD/ : { /DISCARD/ : {
*(.eh_frame) *(.eh_frame)
*(.note.gnu.build-id) *(.note.gnu.build-id)
} }
} }

View File

@@ -1,24 +1,28 @@
PHDRS PHDRS
{ {
text PT_LOAD FLAGS(5); text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7); data PT_LOAD FLAGS(7);
} }
SECTIONS SECTIONS
{ {
. = 0xffffffff80001000; . = 0xffffffff80001000;
_head = .; _head = .;
.text : { .text : {
*(.text); *(.text);
} : text } : text
. = ALIGN(4096); . = ALIGN(4096);
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)
} :data } :data
.vsyscall : ALIGN(0x1000) { .vsyscall : ALIGN(0x1000) {
@@ -37,10 +41,10 @@ SECTIONS
. = ALIGN(4096); . = ALIGN(4096);
} : data = 0xf4 } : data = 0xf4
.bss : { .bss : {
*(.bss .bss.*) *(.bss .bss.*)
} }
. = ALIGN(4096); . = ALIGN(4096);
_end = .; _end = .;
} }

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)

View File

@@ -16,6 +16,10 @@ SECTIONS
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)

View File

@@ -1,24 +1,28 @@
PHDRS PHDRS
{ {
text PT_LOAD FLAGS(5); text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7); data PT_LOAD FLAGS(7);
} }
SECTIONS SECTIONS
{ {
. = 0xffffffff80001000; . = 0xFFFFFFFFFE801000;
_head = .; _head = .;
.text : { .text : {
*(.text); *(.text);
} : text } : text
. = ALIGN(4096); . = ALIGN(4096);
.data : { .data : {
*(.data) *(.data)
*(.data.*) *(.data.*)
. = ALIGN(8);
__start___verbose = .;
*(__verbose);
__stop___verbose = .;
} :data } :data
.rodata : { .rodata : {
*(.rodata .rodata.*) *(.rodata .rodata.*)
} :data } :data
.vsyscall : ALIGN(0x1000) { .vsyscall : ALIGN(0x1000) {
@@ -37,9 +41,9 @@ SECTIONS
. = ALIGN(4096); . = ALIGN(4096);
} : data = 0xf4 } : data = 0xf4
.bss : { .bss : {
*(.bss .bss.*) *(.bss .bss.*)
} }
. = ALIGN(4096); . = ALIGN(4096);
_end = .; _end = .;
} }

View File

@@ -18,6 +18,9 @@
#include <ihk/lock.h> #include <ihk/lock.h>
#include <ihk/monitor.h> #include <ihk/monitor.h>
#include <errno.h> #include <errno.h>
#include <sysfs.h>
#include <debug.h>
#include <limits.h>
struct ihk_kmsg_buf *kmsg_buf; struct ihk_kmsg_buf *kmsg_buf;
@@ -84,7 +87,8 @@ void kputs(char *buf)
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner); debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer); kprintf_unlock(flags_outer);
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { if (irqflags_can_interrupt(flags_outer) &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG); eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY); ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
} }
@@ -123,8 +127,8 @@ int __kprintf(const char *format, ...)
} }
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner); debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
if (irqflags_can_interrupt(flags_inner) &&
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG); eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY); ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
} }
@@ -165,7 +169,8 @@ int kprintf(const char *format, ...)
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner); debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer); kprintf_unlock(flags_outer);
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { if (irqflags_can_interrupt(flags_outer) &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG); eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY); ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
} }
@@ -178,3 +183,147 @@ void kmsg_init()
{ {
ihk_mc_spinlock_init(&kmsg_lock); ihk_mc_spinlock_init(&kmsg_lock);
} }
extern struct ddebug __start___verbose[];
extern struct ddebug __stop___verbose[];
static ssize_t dynamic_debug_sysfs_show(struct sysfs_ops *ops,
void *instance, void *buf, size_t size)
{
struct ddebug *dbg;
ssize_t n = 0;
n = snprintf(buf, size, "# filename:lineno function flags format\n");
for (dbg = __start___verbose; dbg < __stop___verbose; dbg++) {
n += snprintf(buf + n, size - n, "%s:%d %s =%s\n",
dbg->file, dbg->line, dbg->func,
dbg->flags ? "p" : "_");
if (n >= size)
break;
}
return n;
}
static ssize_t dynamic_debug_sysfs_store(struct sysfs_ops *ops,
void *instance, void *buf, size_t size)
{
char *cur = buf;
char *file = NULL, *func = NULL;
long int line_start = 0, line_end = INT_MAX;
int set_flag = -1;
struct ddebug *dbg;
// assume line was new-line terminated and squash last newline
cur[size-1] = '\0';
/* basic line parsing, combinaisons of:
* file <file>
* func <func>
* line <line|line-line|line-|-line>
* and must end with [+-=][p_] (set/clear print flag)
*/
again:
while (cur && cur < ((char *)buf) + size && *cur) {
dkprintf("looking at %.*s, size left %d\n",
size - (cur - (char *)buf), cur,
(char *)buf - cur + size);
if (strncmp(cur, "func ", 5) == 0) {
cur += 5;
func = cur;
} else if (strncmp(cur, "file ", 5) == 0) {
cur += 5;
file = cur;
} else if (strncmp(cur, "line ", 5) == 0) {
cur += 5;
if (*cur != '-') {
line_start = strtol(cur, &cur, 0);
}
if (*cur != '-') {
line_end = line_start;
} else {
cur++;
if (*cur == ' ' || *cur == '\0') {
line_end = INT_MAX;
} else {
line_end = strtol(cur, &cur, 0);
}
}
} else if (strchr("+-=", *cur)) {
switch ((*cur) + 256 * (*(cur+1))) {
case '+' + 256*'p':
case '=' + 256*'p':
set_flag = DDEBUG_PRINT;
break;
case '-' + 256*'p':
case '=' + 256*'_':
set_flag = DDEBUG_NONE;
break;
default:
kprintf("invalid flag: %.*s\n",
size - (cur - (char *)buf), cur);
return -EINVAL;
}
/* XXX check 3rd char is end of input or \n or ; */
cur += 3;
break;
} else {
kprintf("dynamic debug control: unrecognized keyword: %.*s\n",
size - (cur - (char *)buf), cur);
return -EINVAL;
}
cur = strpbrk(cur, " \n");
if (cur) {
*cur = '\0';
cur++;
}
}
dkprintf("func %s, file %s, lines %d-%d, flag %x\n",
func, file, line_start, line_end, set_flag);
if (set_flag < 0) {
kprintf("dynamic debug control: no flag set?\n");
return -EINVAL;
}
if (!func && !file) {
kprintf("at least file or func should be set\n");
return -EINVAL;
}
for (dbg = __start___verbose; dbg < __stop___verbose; dbg++) {
/* TODO: handle wildcards */
if ((!func || strcmp(func, dbg->func) == 0) &&
(!file || strcmp(file, dbg->file) == 0) &&
dbg->line >= line_start &&
dbg->line <= line_end) {
dbg->flags = set_flag;
}
}
if (cur && cur < ((char *)buf) + size && *cur)
goto again;
return size;
}
static struct sysfs_ops dynamic_debug_sysfs_ops = {
.show = &dynamic_debug_sysfs_show,
.store = &dynamic_debug_sysfs_store,
};
void dynamic_debug_sysfs_setup(void)
{
int error;
error = sysfs_createf(&dynamic_debug_sysfs_ops, NULL, 0644,
"/sys/kernel/debug/dynamic_debug/control");
if (error) {
kprintf("%s: ERROR: creating dynamic_debug/control sysfs file",
__func__);
}
}

View File

@@ -36,15 +36,13 @@
#include <syscall.h> #include <syscall.h>
#include <process.h> #include <process.h>
#include <rusage_private.h> #include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_DEVOBJ //#define DEBUG_PRINT_DEVOBJ
#ifdef DEBUG_PRINT_DEVOBJ #ifdef DEBUG_PRINT_DEVOBJ
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
@@ -54,16 +52,15 @@ struct devobj {
uintptr_t handle; uintptr_t handle;
off_t pfn_pgoff; off_t pfn_pgoff;
uintptr_t * pfn_table; uintptr_t * pfn_table;
ihk_spinlock_t pfn_table_lock;
size_t npages; size_t npages;
}; };
static memobj_release_func_t devobj_release; static memobj_free_func_t devobj_free;
static memobj_ref_func_t devobj_ref;
static memobj_get_page_func_t devobj_get_page; static memobj_get_page_func_t devobj_get_page;
static struct memobj_ops devobj_ops = { static struct memobj_ops devobj_ops = {
.release = &devobj_release, .free = &devobj_free,
.ref = &devobj_ref,
.get_page = &devobj_get_page, .get_page = &devobj_get_page,
}; };
@@ -88,12 +85,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
int error; int error;
struct devobj *obj = NULL; struct devobj *obj = NULL;
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE; const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
#ifdef POSTK_DEBUG_TEMP_FIX_36
const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
const size_t pfn_npages = (npages + uintptr_per_page - 1) / uintptr_per_page; const size_t pfn_npages =
#else (npages + uintptr_per_page - 1) / uintptr_per_page;
const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
#endif /*POSTK_DEBUG_TEMP_FIX_36*/
dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off); dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off);
@@ -122,6 +116,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result); ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
ihk_mc_syscall_arg5(&ctx) = prot | populate_flags; ihk_mc_syscall_arg5(&ctx) = prot | populate_flags;
memset(&result, 0, sizeof(result));
error = syscall_generic_forwarding(__NR_mmap, &ctx); error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) { if (error) {
kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n", kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n",
@@ -135,6 +131,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
obj->memobj.ops = &devobj_ops; obj->memobj.ops = &devobj_ops;
obj->memobj.flags = MF_HAS_PAGER | MF_DEV_FILE; obj->memobj.flags = MF_HAS_PAGER | MF_DEV_FILE;
obj->memobj.size = len; obj->memobj.size = len;
ihk_atomic_set(&obj->memobj.refcnt, 1);
obj->handle = result.handle; obj->handle = result.handle;
dkprintf("%s: path=%s\n", __FUNCTION__, result.path); dkprintf("%s: path=%s\n", __FUNCTION__, result.path);
@@ -148,10 +145,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
strncpy(obj->memobj.path, result.path, PATH_MAX); strncpy(obj->memobj.path, result.path, PATH_MAX);
} }
obj->ref = 1; obj->pfn_pgoff = off >> PAGE_SHIFT;
obj->pfn_pgoff = off / PAGE_SIZE;
obj->npages = npages; obj->npages = npages;
ihk_mc_spinlock_init(&obj->memobj.lock); ihk_mc_spinlock_init(&obj->pfn_table_lock);
error = 0; error = 0;
*objp = to_memobj(obj); *objp = to_memobj(obj);
@@ -170,81 +166,50 @@ out:
return error; return error;
} }
static void devobj_ref(struct memobj *memobj) static void devobj_free(struct memobj *memobj)
{ {
struct devobj *obj = to_devobj(memobj); struct devobj *obj = to_devobj(memobj);
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
memobj_lock(&obj->memobj);
++obj->ref;
memobj_unlock(&obj->memobj);
return;
}
static void devobj_release(struct memobj *memobj)
{
struct devobj *obj = to_devobj(memobj);
struct devobj *free_obj = NULL;
uintptr_t handle; uintptr_t handle;
#ifndef POSTK_DEBUG_TEMP_FIX_36 const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
const size_t pfn_npages = const size_t pfn_npages =
(obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1; (obj->npages + uintptr_per_page - 1) / uintptr_per_page;
#endif /*!POSTK_DEBUG_TEMP_FIX_36*/ int error;
ihk_mc_user_context_t ctx;
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle); dkprintf("%s(%p %lx)\n", __func__, obj, obj->handle);
memobj_lock(&obj->memobj);
--obj->ref;
if (obj->ref <= 0) {
free_obj = obj;
}
handle = obj->handle; handle = obj->handle;
memobj_unlock(&obj->memobj);
if (free_obj) { ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP;
if (!(free_obj->memobj.flags & MF_HOST_RELEASED)) { ihk_mc_syscall_arg1(&ctx) = handle;
int error; ihk_mc_syscall_arg2(&ctx) = 1;
ihk_mc_user_context_t ctx;
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP; error = syscall_generic_forwarding(__NR_mmap, &ctx);
ihk_mc_syscall_arg1(&ctx) = handle; if (error) {
ihk_mc_syscall_arg2(&ctx) = 1; kprintf("%s(%p %lx): release failed. %d\n",
__func__, obj, handle, error);
error = syscall_generic_forwarding(__NR_mmap, &ctx); /* through */
if (error) {
kprintf("devobj_release(%p %lx):"
"release failed. %d\n",
free_obj, handle, error);
/* through */
}
}
if (obj->pfn_table) {
// Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory
#ifdef POSTK_DEBUG_TEMP_FIX_36
const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
const size_t pfn_npages = (obj->npages + uintptr_per_page - 1) / uintptr_per_page;
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
#else
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
#endif /*POSTK_DEBUG_TEMP_FIX_36*/
}
if (to_memobj(free_obj)->path) {
kfree(to_memobj(free_obj)->path);
}
kfree(free_obj);
} }
dkprintf("devobj_release(%p %lx):free %p\n", if (obj->pfn_table) {
obj, handle, free_obj); // Don't call memory_stat_rss_sub() because devobj related
// pages don't reside in main memory
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
}
if (to_memobj(obj)->path) {
kfree(to_memobj(obj)->path);
}
kfree(obj);
dkprintf("%s(%p %lx):free\n", __func__, obj, handle);
return; return;
} }
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr) static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr)
{ {
const off_t pgoff = off / PAGE_SIZE; const off_t pgoff = off >> PAGE_SHIFT;
struct devobj *obj = to_devobj(memobj); struct devobj *obj = to_devobj(memobj);
int error; int error;
uintptr_t pfn; uintptr_t pfn;
@@ -262,17 +227,14 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
ix = pgoff - obj->pfn_pgoff; ix = pgoff - obj->pfn_pgoff;
dkprintf("ix: %ld\n", ix); dkprintf("ix: %ld\n", ix);
memobj_lock(&obj->memobj);
pfn = obj->pfn_table[ix];
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE); profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE);
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
pfn = obj->pfn_table[ix];
if (!(pfn & PFN_VALID)) { if (!(pfn & PFN_VALID)) {
memobj_unlock(&obj->memobj);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN; ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN;
ihk_mc_syscall_arg1(&ctx) = obj->handle; ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = pgoff << PAGE_SHIFT; ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1);
ihk_mc_syscall_arg3(&ctx) = virt_to_phys(&pfn); ihk_mc_syscall_arg3(&ctx) = virt_to_phys(&pfn);
error = syscall_generic_forwarding(__NR_mmap, &ctx); error = syscall_generic_forwarding(__NR_mmap, &ctx);
@@ -303,11 +265,9 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn); dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
} }
memobj_lock(&obj->memobj);
obj->pfn_table[ix] = pfn; obj->pfn_table[ix] = pfn;
// Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory // Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory
} }
memobj_unlock(&obj->memobj);
if (!(pfn & PFN_PRESENT)) { if (!(pfn & PFN_PRESENT)) {
kprintf("devobj_get_page(%p %lx,%lx,%d):not present. %lx\n", memobj, obj->handle, off, p2align, pfn); kprintf("devobj_get_page(%p %lx,%lx,%d):not present. %lx\n", memobj, obj->handle, off, p2align, pfn);

View File

@@ -27,15 +27,13 @@
#include <string.h> #include <string.h>
#include <syscall.h> #include <syscall.h>
#include <rusage_private.h> #include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_FILEOBJ //#define DEBUG_PRINT_FILEOBJ
#ifdef DEBUG_PRINT_FILEOBJ #ifdef DEBUG_PRINT_FILEOBJ
#define dkprintf(...) do { if (1) kprintf(__VA_ARGS__); } while (0) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
mcs_lock_t fileobj_list_lock; mcs_lock_t fileobj_list_lock;
@@ -47,24 +45,21 @@ static LIST_HEAD(fileobj_list);
struct fileobj { struct fileobj {
struct memobj memobj; /* must be first */ struct memobj memobj; /* must be first */
long sref; uint64_t sref;
long cref;
uintptr_t handle; uintptr_t handle;
struct list_head list; struct list_head list;
struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE]; struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE];
mcs_lock_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE]; mcs_lock_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE];
}; };
static memobj_release_func_t fileobj_release; static memobj_free_func_t fileobj_free;
static memobj_ref_func_t fileobj_ref;
static memobj_get_page_func_t fileobj_get_page; static memobj_get_page_func_t fileobj_get_page;
static memobj_flush_page_func_t fileobj_flush_page; static memobj_flush_page_func_t fileobj_flush_page;
static memobj_invalidate_page_func_t fileobj_invalidate_page; static memobj_invalidate_page_func_t fileobj_invalidate_page;
static memobj_lookup_page_func_t fileobj_lookup_page; static memobj_lookup_page_func_t fileobj_lookup_page;
static struct memobj_ops fileobj_ops = { static struct memobj_ops fileobj_ops = {
.release = &fileobj_release, .free = &fileobj_free,
.ref = &fileobj_ref,
.get_page = &fileobj_get_page, .get_page = &fileobj_get_page,
.copy_page = NULL, .copy_page = NULL,
.flush_page = &fileobj_flush_page, .flush_page = &fileobj_flush_page,
@@ -170,22 +165,22 @@ static void obj_list_remove(struct fileobj *obj)
/* return NULL or locked fileobj */ /* return NULL or locked fileobj */
static struct fileobj *obj_list_lookup(uintptr_t handle) static struct fileobj *obj_list_lookup(uintptr_t handle)
{ {
struct fileobj *obj;
struct fileobj *p; struct fileobj *p;
obj = NULL;
list_for_each_entry(p, &fileobj_list, list) { list_for_each_entry(p, &fileobj_list, list) {
if (p->handle == handle) { if (p->handle == handle) {
memobj_lock(&p->memobj); /* for the interval between last put and fileobj_free
if (p->cref > 0) { * taking list_lock
obj = p; */
break; if (memobj_ref(&p->memobj) <= 1) {
ihk_atomic_dec(&p->memobj.refcnt);
continue;
} }
memobj_unlock(&p->memobj); return p;
} }
} }
return obj; return NULL;
} }
/*********************************************************************** /***********************************************************************
@@ -200,13 +195,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
struct fileobj *obj; struct fileobj *obj;
struct mcs_lock_node node; struct mcs_lock_node node;
dkprintf("fileobj_create(%d)\n", fd); dkprintf("%s(%d)\n", __func__, fd);
newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT);
if (!newobj) {
error = -ENOMEM;
kprintf("fileobj_create(%d):kmalloc failed. %d\n", fd, error);
goto out;
}
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE; ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE;
ihk_mc_syscall_arg1(&ctx) = fd; ihk_mc_syscall_arg1(&ctx) = fd;
@@ -214,20 +203,41 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
memset(&result, 0, sizeof(result)); memset(&result, 0, sizeof(result));
error = syscall_generic_forwarding(__NR_mmap, &ctx); error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) { if (error) {
dkprintf("fileobj_create(%d):create failed. %d\n", fd, error); /* -ESRCH doesn't mean an error but requesting a fall
* back to treat the file as a device file
*/
if (error != -ESRCH) {
kprintf("%s(%d):create failed. %d\n",
__func__, fd, error);
}
goto out; goto out;
} }
if (result.flags & MF_HUGETLBFS) {
return hugefileobj_pre_create(&result, objp, maxprotp);
}
mcs_lock_lock(&fileobj_list_lock, &node);
obj = obj_list_lookup(result.handle);
if (obj)
goto found;
mcs_lock_unlock(&fileobj_list_lock, &node);
// not found: alloc new object and lookup again
newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT);
if (!newobj) {
error = -ENOMEM;
kprintf("%s(%d):kmalloc failed. %d\n", __func__, fd, error);
goto out;
}
memset(newobj, 0, sizeof(*newobj)); memset(newobj, 0, sizeof(*newobj));
newobj->memobj.ops = &fileobj_ops; newobj->memobj.ops = &fileobj_ops;
newobj->memobj.flags = MF_HAS_PAGER | MF_REG_FILE; newobj->memobj.flags = MF_HAS_PAGER | MF_REG_FILE;
newobj->handle = result.handle; newobj->handle = result.handle;
newobj->sref = 1;
newobj->cref = 1;
fileobj_page_hash_init(newobj); fileobj_page_hash_init(newobj);
ihk_mc_spinlock_init(&newobj->memobj.lock);
mcs_lock_lock_noirq(&fileobj_list_lock, &node); mcs_lock_lock_noirq(&fileobj_list_lock, &node);
obj = obj_list_lookup(result.handle); obj = obj_list_lookup(result.handle);
@@ -237,6 +247,8 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
to_memobj(obj)->size = result.size; to_memobj(obj)->size = result.size;
to_memobj(obj)->flags |= result.flags; to_memobj(obj)->flags |= result.flags;
to_memobj(obj)->status = MEMOBJ_READY; to_memobj(obj)->status = MEMOBJ_READY;
ihk_atomic_set(&to_memobj(obj)->refcnt, 1);
obj->sref = 1;
if (to_memobj(obj)->flags & MF_PREFETCH) { if (to_memobj(obj)->flags & MF_PREFETCH) {
to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED; to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED;
} }
@@ -305,20 +317,17 @@ error_cleanup:
} }
newobj = NULL; newobj = NULL;
dkprintf("%s: new obj 0x%lx cref: %d, %s\n", dkprintf("%s: new obj 0x%lx %s\n",
__FUNCTION__, __FUNCTION__,
obj, obj,
obj->cref,
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
} }
else { else {
++obj->sref; found:
++obj->cref; obj->sref++;
memobj_unlock(&obj->memobj); /* locked by obj_list_lookup() */ dkprintf("%s: existing obj 0x%lx, %s\n",
dkprintf("%s: existing obj 0x%lx cref: %d, %s\n",
__FUNCTION__, __FUNCTION__,
obj, obj,
obj->cref,
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
} }
@@ -332,152 +341,111 @@ out:
if (newobj) { if (newobj) {
kfree(newobj); kfree(newobj);
} }
dkprintf("fileobj_create(%d):%d %p %x\n", fd, error, *objp, *maxprotp); dkprintf("%s(%d):%d %p %x\n", __func__, fd, error, *objp, *maxprotp);
return error; return error;
} }
static void fileobj_ref(struct memobj *memobj) static void fileobj_free(struct memobj *memobj)
{ {
struct fileobj *obj = to_fileobj(memobj); struct fileobj *obj = to_fileobj(memobj);
dkprintf("fileobj_ref(%p %lx):\n", obj, obj->handle);
memobj_lock(&obj->memobj);
++obj->cref;
memobj_unlock(&obj->memobj);
return;
}
static void fileobj_release(struct memobj *memobj)
{
struct fileobj *obj = to_fileobj(memobj);
long free_sref = 0;
uintptr_t free_handle;
struct fileobj *free_obj = NULL;
struct mcs_lock_node node; struct mcs_lock_node node;
int error;
ihk_mc_user_context_t ctx;
dkprintf("fileobj_release(%p %lx)\n", obj, obj->handle);
memobj_lock(&obj->memobj); dkprintf("%s: free obj 0x%lx, %s\n", __func__,
--obj->cref; obj, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
free_sref = obj->sref - 1; /* surplus sref */
if (obj->cref <= 0) {
free_sref = obj->sref;
free_obj = obj;
}
obj->sref -= free_sref;
free_handle = obj->handle;
memobj_unlock(&obj->memobj);
if (obj->memobj.flags & MF_HOST_RELEASED) {
free_sref = 0; // don't call syscall_generic_forwarding
}
if (free_obj) { mcs_lock_lock_noirq(&fileobj_list_lock, &node);
dkprintf("%s: release obj 0x%lx cref: %d, free_obj: 0x%lx, %s\n", obj_list_remove(obj);
__FUNCTION__, mcs_lock_unlock_noirq(&fileobj_list_lock, &node);
obj,
obj->cref,
free_obj,
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
/* zap page_list */
for (;;) {
struct page *page;
void *page_va;
uintptr_t phys;
page = fileobj_page_hash_first(obj); /* zap page_list */
if (!page) { for (;;) {
break; struct page *page;
} void *page_va;
__fileobj_page_hash_remove(page); uintptr_t phys;
phys = page_to_phys(page);
page_va = phys_to_virt(phys);
/* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and page = fileobj_page_hash_first(obj);
incremented by the second get_page() reaping the pageio and decremented by clear_range(). if (!page) {
break;
}
__fileobj_page_hash_remove(page);
phys = page_to_phys(page);
page_va = phys_to_virt(phys);
/* Count must be one because set to one on the first
* get_page() invoking fileobj_do_pageio and incremented by
* the second get_page() reaping the pageio and decremented
* by clear_range().
*/
if (ihk_atomic_read(&page->count) != 1) {
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n",
__func__, ihk_atomic_read(&page->count),
page->phys, to_memobj(obj)->flags);
}
else if (page_unmap(page)) {
ihk_mc_free_pages_user(page_va, 1);
/* Track change in page->count for !MF_PREMAP pages.
* It is decremented here or in clear_range()
*/ */
if (ihk_atomic_read(&page->count) != 1) { dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n",
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n", phys, __func__, phys, PAGE_SIZE, PAGE_SIZE);
__FUNCTION__, rusage_memory_stat_mapped_file_sub(PAGE_SIZE,
ihk_atomic_read(&page->count), PAGE_SIZE);
page->phys,
to_memobj(free_obj)->flags);
}
else if (page_unmap(page)) {
ihk_mc_free_pages_user(page_va, 1);
/* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
}
#if 0
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_WILL_PAGEIO)
|| (page->mode == PM_DONE_PAGEIO)
|| (page->mode == PM_PAGEIO_EOF)
|| (page->mode == PM_PAGEIO_ERROR)
|| ((page->mode == PM_MAPPED)
&& (count <= 0)))) {
kprintf("fileobj_release(%p %lx): "
"mode %x, count %d, off %lx\n",
obj, obj->handle, page->mode,
count, page->offset);
panic("fileobj_release");
}
page->mode = PM_NONE;
#endif
}
/* Pre-mapped? */
if (to_memobj(free_obj)->flags & MF_PREMAP) {
int i;
for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) {
if (to_memobj(free_obj)->pages[i]) {
dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]);
// Track change in fileobj->pages[] for MF_PREMAP pages
// Note that page_unmap() isn't called for MF_PREMAP in
// free_process_memory_range() --> ihk_mc_pt_free_range()
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1);
}
}
kfree(to_memobj(free_obj)->pages);
}
if (to_memobj(free_obj)->path) {
dkprintf("%s: %s\n", __FUNCTION__, to_memobj(free_obj)->path);
kfree(to_memobj(free_obj)->path);
}
obj_list_remove(free_obj);
mcs_lock_unlock_noirq(&fileobj_list_lock, &node);
kfree(free_obj);
}
if (free_sref) {
int error;
ihk_mc_user_context_t ctx;
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
ihk_mc_syscall_arg1(&ctx) = free_handle;
ihk_mc_syscall_arg2(&ctx) = free_sref;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("fileobj_release(%p %lx):"
"release %ld failed. %d\n",
obj, free_handle, free_sref, error);
/* through */
} }
} }
dkprintf("fileobj_release(%p %lx):free %ld %p\n", /* Pre-mapped? */
obj, free_handle, free_sref, free_obj); if (to_memobj(obj)->flags & MF_PREMAP) {
int i;
for (i = 0; i < to_memobj(obj)->nr_pages; ++i) {
if (to_memobj(obj)->pages[i]) {
dkprintf("%s: pages[i]=%p\n", __func__, i,
to_memobj(obj)->pages[i]);
// Track change in fileobj->pages[] for MF_PREMAP pages
// Note that page_unmap() isn't called for MF_PREMAP in
// free_process_memory_range() --> ihk_mc_pt_free_range()
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
virt_to_phys(to_memobj(obj)->pages[i]),
__func__,
virt_to_phys(to_memobj(obj)->pages[i]),
PAGE_SIZE, PAGE_SIZE);
rusage_memory_stat_mapped_file_sub(PAGE_SIZE,
PAGE_SIZE);
ihk_mc_free_pages_user(to_memobj(obj)->pages[i],
1);
}
}
kfree(to_memobj(obj)->pages);
}
if (to_memobj(obj)->path) {
dkprintf("%s: %s\n", __func__, to_memobj(obj)->path);
kfree(to_memobj(obj)->path);
}
/* linux side
* sref is necessary because handle is used as key, so there could
* be a new mckernel pager with the same handle being created as
* this one is being destroyed
*/
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = obj->sref;
error = syscall_generic_forwarding(__NR_mmap, &ctx);
if (error) {
kprintf("%s(%p %lx): free failed. %d\n", __func__,
obj, obj->handle, error);
/* through */
}
dkprintf("%s(%p %lx):free\n", __func__, obj, obj->handle);
kfree(obj);
return; return;
} }
struct pageio_args { struct pageio_args {
@@ -570,7 +538,7 @@ static void fileobj_do_pageio(void *args0)
out: out:
mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], mcs_lock_unlock_noirq(&obj->page_hash_locks[hash],
&mcs_node); &mcs_node);
fileobj_release(&obj->memobj); /* got fileobj_get_page() */ memobj_unref(&obj->memobj); /* got fileobj_get_page() */
kfree(args0); kfree(args0);
dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize); dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize);
return; return;
@@ -656,7 +624,9 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
npages = 1 << p2align; npages = 1 << p2align;
virt = ihk_mc_alloc_pages_user(npages, (IHK_MC_AP_NOWAIT | virt = ihk_mc_alloc_pages_user(npages, (IHK_MC_AP_NOWAIT |
(to_memobj(obj)->flags & MF_ZEROFILL) ? IHK_MC_AP_USER : 0), virt_addr); ((to_memobj(obj)->flags & MF_ZEROFILL) ?
IHK_MC_AP_USER : 0)),
virt_addr);
if (!virt) { if (!virt) {
error = -ENOMEM; error = -ENOMEM;
kprintf("fileobj_get_page(%p,%lx,%x,%x,%p):" kprintf("fileobj_get_page(%p,%lx,%x,%x,%p):"
@@ -681,9 +651,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
page->mode = PM_WILL_PAGEIO; page->mode = PM_WILL_PAGEIO;
} }
memobj_lock(&obj->memobj); memobj_ref(&obj->memobj);
++obj->cref; /* for fileobj_do_pageio() */
memobj_unlock(&obj->memobj);
args->fileobj = obj; args->fileobj = obj;
args->objoff = off; args->objoff = off;
@@ -744,10 +712,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
return 0; return 0;
} }
if (memobj->flags & MF_HOST_RELEASED) {
return 0;
}
page = phys_to_page(phys); page = phys_to_page(phys);
if (!page) { if (!page) {
kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n", kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n",
@@ -755,8 +719,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
return 0; return 0;
} }
memobj_unlock(&obj->memobj);
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE; ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE;
ihk_mc_syscall_arg1(&ctx) = obj->handle; ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = page->offset; ihk_mc_syscall_arg2(&ctx) = page->offset;
@@ -771,7 +733,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
/* through */ /* through */
} }
memobj_lock(&obj->memobj);
return 0; return 0;
} }

View File

@@ -70,15 +70,22 @@
#include <cls.h> #include <cls.h>
#include <kmsg.h> #include <kmsg.h>
#include <timer.h> #include <timer.h>
#include <debug.h>
#include <syscall.h>
//#define DEBUG_PRINT_FUTEX //#define DEBUG_PRINT_FUTEX
#ifdef DEBUG_PRINT_FUTEX #ifdef DEBUG_PRINT_FUTEX
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define uti_dkprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
#else #else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define uti_dkprintf(...) do { } while (0)
#endif #endif
#define uti_kprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
unsigned long ihk_mc_get_ns_per_tsc(void);
int futex_cmpxchg_enabled; int futex_cmpxchg_enabled;
/** /**
@@ -108,6 +115,9 @@ struct futex_q {
union futex_key key; union futex_key key;
union futex_key *requeue_pi_key; union futex_key *requeue_pi_key;
uint32_t bitset; uint32_t bitset;
/* Used to wake-up a thread running on a Linux CPU */
void *uti_futex_resp;
}; };
/* /*
@@ -180,11 +190,12 @@ static void drop_futex_key_refs(union futex_key *key)
* lock_page() might sleep, the caller should not hold a spinlock. * lock_page() might sleep, the caller should not hold a spinlock.
*/ */
static int static int
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key) get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key, struct cpu_local_var *clv_override)
{ {
unsigned long address = (unsigned long)uaddr; unsigned long address = (unsigned long)uaddr;
unsigned long phys; unsigned long phys;
struct process_vm *mm = cpu_local_var(current)->vm; struct thread *thread = cpu_local_var_with_override(current, clv_override);
struct process_vm *mm = thread->vm;
/* /*
* The futex address must be "naturally" aligned. * The futex address must be "naturally" aligned.
@@ -250,7 +261,7 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
* The hash bucket lock must be held when this is called. * The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed. * Afterwards, the futex_q must not be accessed.
*/ */
static void wake_futex(struct futex_q *q) static void wake_futex(struct futex_q *q, struct cpu_local_var *clv_override)
{ {
struct thread *p = q->task; struct thread *p = q->task;
@@ -272,8 +283,31 @@ static void wake_futex(struct futex_q *q)
barrier(); barrier();
q->lock_ptr = NULL; q->lock_ptr = NULL;
dkprintf("wake_futex(): waking up tid %d\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL); if (q->uti_futex_resp) {
int rc;
uti_dkprintf("wake_futex(): waking up migrated-to-Linux thread (tid %d),uti_futex_resp=%p\n", p->tid, q->uti_futex_resp);
/* TODO: Add the case when a Linux thread waking up another Linux thread */
if (clv_override) {
uti_dkprintf("%s: ERROR: A Linux thread is waking up migrated-to-Linux thread\n", __FUNCTION__);
}
if (p->spin_sleep == 0) {
uti_dkprintf("%s: INFO: woken up by someone else\n", __FUNCTION__);
}
struct ikc_scd_packet pckt;
struct ihk_ikc_channel_desc *resp_channel = cpu_local_var_with_override(ikc2linux, clv_override);
pckt.msg = SCD_MSG_FUTEX_WAKE;
pckt.futex.resp = q->uti_futex_resp;
pckt.futex.spin_sleep = &p->spin_sleep;
rc = ihk_ikc_send(resp_channel, &pckt, 0);
if (rc) {
uti_dkprintf("%s: ERROR: ihk_ikc_send returned %d, resp_channel=%p\n", __FUNCTION__, rc, resp_channel);
}
} else {
uti_dkprintf("wake_futex(): waking up McKernel thread (tid %d)\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL);
}
} }
/* /*
@@ -303,7 +337,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
/* /*
* Wake up waiters matching bitset queued on this futex (uaddr). * Wake up waiters matching bitset queued on this futex (uaddr).
*/ */
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset) static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset, struct cpu_local_var *clv_override)
{ {
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q *this, *next; struct futex_q *this, *next;
@@ -314,7 +348,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
if (!bitset) if (!bitset)
return -EINVAL; return -EINVAL;
ret = get_futex_key(uaddr, fshared, &key); ret = get_futex_key(uaddr, fshared, &key, clv_override);
if ((ret != 0)) if ((ret != 0))
goto out; goto out;
@@ -330,7 +364,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
if (!(this->bitset & bitset)) if (!(this->bitset & bitset))
continue; continue;
wake_futex(this); wake_futex(this, clv_override);
if (++ret >= nr_wake) if (++ret >= nr_wake)
break; break;
} }
@@ -348,7 +382,8 @@ out:
*/ */
static int static int
futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int nr_wake, int nr_wake2, int op) int nr_wake, int nr_wake2, int op,
struct cpu_local_var *clv_override)
{ {
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb1, *hb2; struct futex_hash_bucket *hb1, *hb2;
@@ -357,10 +392,10 @@ futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int ret, op_ret; int ret, op_ret;
retry: retry:
ret = get_futex_key(uaddr1, fshared, &key1); ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
if ((ret != 0)) if ((ret != 0))
goto out; goto out;
ret = get_futex_key(uaddr2, fshared, &key2); ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
if ((ret != 0)) if ((ret != 0))
goto out_put_key1; goto out_put_key1;
@@ -394,7 +429,7 @@ retry_private:
plist_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) { if (match_futex (&this->key, &key1)) {
wake_futex(this); wake_futex(this, clv_override);
if (++ret >= nr_wake) if (++ret >= nr_wake)
break; break;
} }
@@ -406,7 +441,7 @@ retry_private:
op_ret = 0; op_ret = 0;
plist_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) { if (match_futex (&this->key, &key2)) {
wake_futex(this); wake_futex(this, clv_override);
if (++op_ret >= nr_wake2) if (++op_ret >= nr_wake2)
break; break;
} }
@@ -469,7 +504,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
*/ */
static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int nr_wake, int nr_requeue, uint32_t *cmpval, int nr_wake, int nr_requeue, uint32_t *cmpval,
int requeue_pi) int requeue_pi, struct cpu_local_var *clv_override)
{ {
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
int drop_count = 0, task_count = 0, ret; int drop_count = 0, task_count = 0, ret;
@@ -477,10 +512,10 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
struct plist_head *head1; struct plist_head *head1;
struct futex_q *this, *next; struct futex_q *this, *next;
ret = get_futex_key(uaddr1, fshared, &key1); ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
if ((ret != 0)) if ((ret != 0))
goto out; goto out;
ret = get_futex_key(uaddr2, fshared, &key2); ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
if ((ret != 0)) if ((ret != 0))
goto out_put_key1; goto out_put_key1;
@@ -515,7 +550,7 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
*/ */
/* RIKEN: no requeue_pi at this moment */ /* RIKEN: no requeue_pi at this moment */
if (++task_count <= nr_wake) { if (++task_count <= nr_wake) {
wake_futex(this); wake_futex(this, clv_override);
continue; continue;
} }
@@ -574,7 +609,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
* state is implicit in the state of woken task (see futex_wait_requeue_pi() for * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
* an example). * an example).
*/ */
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb, struct cpu_local_var *clv_override)
{ {
int prio; int prio;
@@ -595,7 +630,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
q->list.plist.spinlock = &hb->lock; q->list.plist.spinlock = &hb->lock;
#endif #endif
plist_add(&q->list, &hb->chain); plist_add(&q->list, &hb->chain);
q->task = cpu_local_var(current); q->task = cpu_local_var_with_override(current, clv_override);
ihk_mc_spinlock_unlock_noirq(&hb->lock); ihk_mc_spinlock_unlock_noirq(&hb->lock);
} }
@@ -658,46 +693,64 @@ retry:
/* RIKEN: this function has been rewritten so that it returns the remaining /* RIKEN: this function has been rewritten so that it returns the remaining
* time in case we are waken. * time in case we are waken.
*/ */
static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, static int64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
uint64_t timeout) uint64_t timeout, struct cpu_local_var *clv_override)
{ {
uint64_t time_remain = 0; int64_t time_remain = 0;
unsigned long irqstate; unsigned long irqstate;
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var_with_override(current, clv_override);
/* /*
* The task state is guaranteed to be set before another task can * The task state is guaranteed to be set before another task can
* wake it. * wake it.
* queue_me() calls spin_unlock() upon completion, serializing * queue_me() calls spin_unlock() upon completion, serializing
* access to the hash list and forcing a memory barrier. * access to the hash list and forcing a memory barrier.
*/ */
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE); xchg4(&(thread->status), PS_INTERRUPTIBLE);
/* Indicate spin sleep */ /* Indicate spin sleep. Note that schedule_timeout() with
if (!idle_halt) { * idle_halt should use spin sleep because sleep with timeout
* is not implemented.
*/
if (!idle_halt || timeout) {
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
thread->spin_sleep = 1; thread->spin_sleep = 1;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
} }
queue_me(q, hb); queue_me(q, hb, clv_override);
if (!plist_node_empty(&q->list)) { if (!plist_node_empty(&q->list)) {
if (clv_override) {
uti_dkprintf("%s: tid: %d is trying to sleep\n", __FUNCTION__, thread->tid);
/* Note that the unit of timeout is nsec */
time_remain = (*linux_wait_event)(q->uti_futex_resp, timeout);
/* Note that time_remain == 0 indicates contidion evaluated to false after the timeout elapsed */
if (time_remain < 0) {
if (time_remain == -ERESTARTSYS) { /* Interrupted by signal */
uti_dkprintf("%s: DEBUG: wait_event returned -ERESTARTSYS\n", __FUNCTION__);
} else {
uti_kprintf("%s: ERROR: wait_event returned %d\n", __FUNCTION__, time_remain);
}
}
uti_dkprintf("%s: tid: %d woken up\n", __FUNCTION__, thread->tid);
} else {
if (timeout) { if (timeout) {
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid); dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", thread->tid);
time_remain = schedule_timeout(timeout); time_remain = schedule_timeout(timeout);
} }
else { else {
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid); dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", thread->tid);
spin_sleep_or_schedule(); spin_sleep_or_schedule();
time_remain = 0; time_remain = 0;
} }
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", thread->tid);
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid); }
} }
/* This does not need to be serialized */ /* This does not need to be serialized */
cpu_local_var(current)->status = PS_RUNNING; thread->status = PS_RUNNING;
thread->spin_sleep = 0; thread->spin_sleep = 0;
return time_remain; return time_remain;
@@ -721,7 +774,8 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
* <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
*/ */
static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared, static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
struct futex_q *q, struct futex_hash_bucket **hb) struct futex_q *q, struct futex_hash_bucket **hb,
struct cpu_local_var *clv_override)
{ {
uint32_t uval; uint32_t uval;
int ret; int ret;
@@ -744,7 +798,7 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
* rare, but normal. * rare, but normal.
*/ */
q->key = FUTEX_KEY_INIT; q->key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q->key); ret = get_futex_key(uaddr, fshared, &q->key, clv_override);
if (ret != 0) if (ret != 0)
return ret; return ret;
@@ -768,49 +822,59 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
} }
static int futex_wait(uint32_t __user *uaddr, int fshared, static int futex_wait(uint32_t __user *uaddr, int fshared,
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt) uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt,
struct cpu_local_var *clv_override)
{ {
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q q; struct futex_q q;
uint64_t time_remain; int64_t time_remain;
int ret; int ret;
if (!bitset) if (!bitset)
return -EINVAL; return -EINVAL;
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile && if (cpu_local_var_with_override(current, clv_override)->profile &&
cpu_local_var(current)->profile_start_ts) { cpu_local_var_with_override(current, clv_override)->profile_start_ts) {
cpu_local_var(current)->profile_elapsed_ts += cpu_local_var_with_override(current, clv_override)->profile_elapsed_ts +=
(rdtsc() - cpu_local_var(current)->profile_start_ts); (rdtsc() - cpu_local_var_with_override(current, clv_override)->profile_start_ts);
cpu_local_var(current)->profile_start_ts = 0; cpu_local_var_with_override(current, clv_override)->profile_start_ts = 0;
} }
#endif #endif
q.bitset = bitset; q.bitset = bitset;
q.requeue_pi_key = NULL; q.requeue_pi_key = NULL;
q.uti_futex_resp = cpu_local_var_with_override(uti_futex_resp, clv_override);
retry: retry:
/* Prepare to wait on uaddr. */ /* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); ret = futex_wait_setup(uaddr, val, fshared, &q, &hb, clv_override);
if (ret) if (ret) {
uti_dkprintf("%s: tid=%d futex_wait_setup returns zero, no need to sleep\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out; goto out;
}
/* queue_me and wait for wakeup, timeout, or a signal. */ /* queue_me and wait for wakeup, timeout, or a signal. */
time_remain = futex_wait_queue_me(hb, &q, timeout); time_remain = futex_wait_queue_me(hb, &q, timeout, clv_override);
/* If we were woken (and unqueued), we succeeded, whatever. */ /* If we were woken (and unqueued), we succeeded, whatever. */
ret = 0; ret = 0;
if (!unqueue_me(&q)) if (!unqueue_me(&q)) {
uti_dkprintf("%s: tid=%d unqueued\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key; goto out_put_key;
}
ret = -ETIMEDOUT; ret = -ETIMEDOUT;
/* RIKEN: timer expired case (indicated by !time_remain) */ /* RIKEN: timer expired case (indicated by !time_remain) */
if (timeout && !time_remain) if (timeout && !time_remain) {
uti_dkprintf("%s: tid=%d timer expired\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key; goto out_put_key;
}
if (hassigpending(cpu_local_var(current))) { /* RIKEN: futex_wait_queue_me() returns -ERESTARTSYS when waiting on Linux CPU and woken up by signal */
if (hassigpending(cpu_local_var_with_override(current, clv_override)) || time_remain == -ERESTARTSYS) {
ret = -EINTR; ret = -EINTR;
uti_dkprintf("%s: tid=%d woken up by signal\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key; goto out_put_key;
} }
@@ -822,19 +886,22 @@ out_put_key:
put_futex_key(fshared, &q.key); put_futex_key(fshared, &q.key);
out: out:
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile) { if (cpu_local_var_with_override(current, clv_override)->profile) {
cpu_local_var(current)->profile_start_ts = rdtsc(); cpu_local_var_with_override(current, clv_override)->profile_start_ts = rdtsc();
} }
#endif #endif
return ret; return ret;
} }
int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout, int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared) uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared,
struct cpu_local_var *clv_override)
{ {
int clockrt, ret = -ENOSYS; int clockrt, ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK; int cmd = op & FUTEX_CMD_MASK;
uti_dkprintf("%s: uaddr=%p, op=%x, val=%x, timeout=%ld, uaddr2=%p, val2=%x, val3=%x, fshared=%d, clv=%p\n", __FUNCTION__, uaddr, op, val, timeout, uaddr2, val2, val3, fshared, clv_override);
clockrt = op & FUTEX_CLOCK_REALTIME; clockrt = op & FUTEX_CLOCK_REALTIME;
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS; return -ENOSYS;
@@ -843,21 +910,21 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
case FUTEX_WAIT: case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY; val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET: case FUTEX_WAIT_BITSET:
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt, clv_override);
break; break;
case FUTEX_WAKE: case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY; val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAKE_BITSET: case FUTEX_WAKE_BITSET:
ret = futex_wake(uaddr, fshared, val, val3); ret = futex_wake(uaddr, fshared, val, val3, clv_override);
break; break;
case FUTEX_REQUEUE: case FUTEX_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0, clv_override);
break; break;
case FUTEX_CMP_REQUEUE: case FUTEX_CMP_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0); ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0, clv_override);
break; break;
case FUTEX_WAKE_OP: case FUTEX_WAKE_OP:
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3, clv_override);
break; break;
/* RIKEN: these calls are not supported for now. /* RIKEN: these calls are not supported for now.
case FUTEX_LOCK_PI: case FUTEX_LOCK_PI:

View File

@@ -34,13 +34,13 @@
#include <sysfs.h> #include <sysfs.h>
#include <ihk/perfctr.h> #include <ihk/perfctr.h>
#include <rusage_private.h> #include <rusage_private.h>
#include <debug.h>
//#define DEBUG_PRINT_HOST //#define DEBUG_PRINT_HOST
#ifdef DEBUG_PRINT_HOST #ifdef DEBUG_PRINT_HOST
#define dkprintf kprintf #undef DDEBUG_DEFAULT
#else #define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif #endif
/* Linux channel table, indexec by Linux CPU id */ /* Linux channel table, indexec by Linux CPU id */
@@ -78,7 +78,6 @@ int prepare_process_ranges_args_envs(struct thread *thread,
unsigned long args_envs_p, args_envs_rp; unsigned long args_envs_p, args_envs_rp;
unsigned long s, e, up; unsigned long s, e, up;
char **argv; char **argv;
char **a;
int i, n, argc, envc, args_envs_npages; int i, n, argc, envc, args_envs_npages;
char **env; char **env;
int range_npages; int range_npages;
@@ -306,7 +305,7 @@ int prepare_process_ranges_args_envs(struct thread *thread,
/* Only unmap remote address if it wasn't specified as an argument */ /* Only unmap remote address if it wasn't specified as an argument */
if (!args) { if (!args) {
ihk_mc_unmap_virtual(args_envs_r, args_envs_npages, 0); ihk_mc_unmap_virtual(args_envs_r, args_envs_npages);
ihk_mc_unmap_memory(NULL, args_envs_rp, p->args_len); ihk_mc_unmap_memory(NULL, args_envs_rp, p->args_len);
} }
flush_tlb(); flush_tlb();
@@ -341,7 +340,7 @@ int prepare_process_ranges_args_envs(struct thread *thread,
/* Only map remote address if it wasn't specified as an argument */ /* Only map remote address if it wasn't specified as an argument */
if (!envs) { if (!envs) {
ihk_mc_unmap_virtual(args_envs_r, args_envs_npages, 0); ihk_mc_unmap_virtual(args_envs_r, args_envs_npages);
ihk_mc_unmap_memory(NULL, args_envs_rp, p->envs_len); ihk_mc_unmap_memory(NULL, args_envs_rp, p->envs_len);
} }
flush_tlb(); flush_tlb();
@@ -357,12 +356,13 @@ int prepare_process_ranges_args_envs(struct thread *thread,
proc->saved_cmdline_len = 0; proc->saved_cmdline_len = 0;
} }
proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT); proc->saved_cmdline_len = p->args_len - ((argc + 2) * sizeof(char **));
proc->saved_cmdline = kmalloc(proc->saved_cmdline_len,
IHK_MC_AP_NOWAIT);
if (!proc->saved_cmdline) { if (!proc->saved_cmdline) {
goto err; goto err;
} }
proc->saved_cmdline_len = p->args_len - ((argc + 2) * sizeof(char **));
memcpy(proc->saved_cmdline, memcpy(proc->saved_cmdline,
(char *)args_envs + ((argc + 2) * sizeof(char **)), (char *)args_envs + ((argc + 2) * sizeof(char **)),
proc->saved_cmdline_len); proc->saved_cmdline_len);
@@ -370,21 +370,18 @@ int prepare_process_ranges_args_envs(struct thread *thread,
__FUNCTION__, __FUNCTION__,
proc->saved_cmdline); proc->saved_cmdline);
for (a = argv; *a; a++) { for (i = 0; i < argc; i++) {
*a = (char *)addr + (unsigned long)*a; // Process' address space! // Process' address space!
argv[i] = (char *)addr + (unsigned long)argv[i];
} }
envc = *((long *)(args_envs + p->args_len)); envc = *((long *)(args_envs + p->args_len));
dkprintf("envc: %d\n", envc); dkprintf("envc: %d\n", envc);
env = (char **)(args_envs + p->args_len + sizeof(long)); env = (char **)(args_envs + p->args_len + sizeof(long));
while (*env) { for (i = 0; i < envc; i++) {
char **_env = env; env[i] = addr + p->args_len + env[i];
//dkprintf("%s\n", args_envs + p->args_len + (unsigned long)*env);
*env = (char *)addr + p->args_len + (unsigned long)*env;
env = ++_env;
} }
env = (char **)(args_envs + p->args_len + sizeof(long));
dkprintf("env OK\n"); dkprintf("env OK\n");
@@ -449,7 +446,7 @@ static int process_msg_prepare_process(unsigned long rphys)
if((pn = kmalloc(sizeof(struct program_load_desc) if((pn = kmalloc(sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * n, + sizeof(struct program_image_section) * n,
IHK_MC_AP_NOWAIT)) == NULL){ IHK_MC_AP_NOWAIT)) == NULL){
ihk_mc_unmap_virtual(p, npages, 0); ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM; return -ENOMEM;
} }
@@ -460,7 +457,7 @@ static int process_msg_prepare_process(unsigned long rphys)
(unsigned long *)&p->cpu_set, (unsigned long *)&p->cpu_set,
sizeof(p->cpu_set))) == NULL) { sizeof(p->cpu_set))) == NULL) {
kfree(pn); kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM; return -ENOMEM;
} }
@@ -482,6 +479,7 @@ static int process_msg_prepare_process(unsigned long rphys)
proc->mpol_flags = pn->mpol_flags; proc->mpol_flags = pn->mpol_flags;
proc->mpol_threshold = pn->mpol_threshold; proc->mpol_threshold = pn->mpol_threshold;
proc->nr_processes = pn->nr_processes; proc->nr_processes = pn->nr_processes;
proc->process_rank = pn->process_rank;
proc->heap_extension = pn->heap_extension; proc->heap_extension = pn->heap_extension;
/* Update NUMA binding policy if requested */ /* Update NUMA binding policy if requested */
@@ -504,6 +502,9 @@ static int process_msg_prepare_process(unsigned long rphys)
vm->numa_mem_policy = MPOL_BIND; vm->numa_mem_policy = MPOL_BIND;
} }
proc->uti_thread_rank = pn->uti_thread_rank;
proc->uti_use_last_cpu = pn->uti_use_last_cpu;
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
proc->profile = pn->profile; proc->profile = pn->profile;
thread->profile = pn->profile; thread->profile = pn->profile;
@@ -542,14 +543,14 @@ static int process_msg_prepare_process(unsigned long rphys)
kfree(pn); kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
flush_tlb(); flush_tlb();
return 0; return 0;
err: err:
kfree(pn); kfree(pn);
ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_virtual(p, npages);
ihk_mc_unmap_memory(NULL, phys, sz); ihk_mc_unmap_memory(NULL, phys, sz);
destroy_thread(thread); destroy_thread(thread);
return -ENOMEM; return -ENOMEM;
@@ -562,7 +563,6 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
} }
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont); extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
extern void process_procfs_request(struct ikc_scd_packet *rpacket);
extern void terminate_host(int pid); extern void terminate_host(int pid);
extern void debug_log(long); extern void debug_log(long);
@@ -573,7 +573,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct ikc_scd_packet pckt; struct ikc_scd_packet pckt;
struct ihk_ikc_channel_desc *resp_channel = cpu_local_var(ikc2linux); struct ihk_ikc_channel_desc *resp_channel = cpu_local_var(ikc2linux);
int rc; int rc;
struct mcs_rwlock_node_irqsave lock;
struct thread *thread; struct thread *thread;
struct process *proc; struct process *proc;
struct mcctrl_signal { struct mcctrl_signal {
@@ -610,7 +609,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_SCHEDULE_PROCESS: case SCD_MSG_SCHEDULE_PROCESS:
thread = (struct thread *)packet->arg; thread = (struct thread *)packet->arg;
cpuid = obtain_clone_cpuid(&thread->cpu_set); cpuid = obtain_clone_cpuid(&thread->cpu_set, 0);
if (cpuid == -1) { if (cpuid == -1) {
kprintf("No CPU available\n"); kprintf("No CPU available\n");
ret = -1; ret = -1;
@@ -634,14 +633,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
* the waiting thread * the waiting thread
*/ */
case SCD_MSG_WAKE_UP_SYSCALL_THREAD: case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
thread = find_thread(0, packet->ttid, &lock); thread = find_thread(0, packet->ttid);
if (!thread) { if (!thread) {
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n", kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
__FUNCTION__, packet->ttid); __FUNCTION__, packet->ttid);
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
thread_unlock(thread, &lock); thread_unlock(thread);
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n", dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
__FUNCTION__, packet->ttid); __FUNCTION__, packet->ttid);
@@ -653,7 +652,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
memcpy(&info, sp, sizeof(struct mcctrl_signal)); memcpy(&info, sp, sizeof(struct mcctrl_signal));
ihk_mc_unmap_virtual(sp, 1, 0); ihk_mc_unmap_virtual(sp, 1);
ihk_mc_unmap_memory(NULL, pp, sizeof(struct mcctrl_signal)); ihk_mc_unmap_memory(NULL, pp, sizeof(struct mcctrl_signal));
pckt.msg = SCD_MSG_SEND_SIGNAL_ACK; pckt.msg = SCD_MSG_SEND_SIGNAL_ACK;
pckt.err = 0; pckt.err = 0;
@@ -668,7 +667,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break; break;
case SCD_MSG_PROCFS_REQUEST: case SCD_MSG_PROCFS_REQUEST:
process_procfs_request(packet); case SCD_MSG_PROCFS_RELEASE:
pckt.msg = SCD_MSG_PROCFS_ANSWER;
pckt.ref = packet->ref;
pckt.arg = packet->arg;
pckt.err = process_procfs_request(packet);
pckt.reply = packet->reply;
pckt.pid = packet->pid;
syscall_channel_send(resp_channel, &pckt);
ret = 0; ret = 0;
break; break;
@@ -705,17 +711,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
if (!pcd->exclude_user) { if (!pcd->exclude_user) {
mode |= PERFCTR_USER_MODE; mode |= PERFCTR_USER_MODE;
} }
ihk_mc_perfctr_init_raw(pcd->target_cntr, pcd->config, mode);
ihk_mc_perfctr_stop(1 << pcd->target_cntr); ret = ihk_mc_perfctr_init_raw(pcd->target_cntr, pcd->config, mode);
ihk_mc_perfctr_reset(pcd->target_cntr); if (ret != 0) {
break;
}
ret = ihk_mc_perfctr_stop(1 << pcd->target_cntr);
if (ret != 0) {
break;
}
ret = ihk_mc_perfctr_reset(pcd->target_cntr);
break; break;
case PERF_CTRL_ENABLE: case PERF_CTRL_ENABLE:
ihk_mc_perfctr_start(pcd->target_cntr_mask); ret = ihk_mc_perfctr_start(pcd->target_cntr_mask);
break; break;
case PERF_CTRL_DISABLE: case PERF_CTRL_DISABLE:
ihk_mc_perfctr_stop(pcd->target_cntr_mask); ret = ihk_mc_perfctr_stop(pcd->target_cntr_mask);
break; break;
case PERF_CTRL_GET: case PERF_CTRL_GET:
@@ -726,16 +741,15 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
kprintf("%s: SCD_MSG_PERF_CTRL unexpected ctrl_type\n", __FUNCTION__); kprintf("%s: SCD_MSG_PERF_CTRL unexpected ctrl_type\n", __FUNCTION__);
} }
ihk_mc_unmap_virtual(pcd, 1, 0); ihk_mc_unmap_virtual(pcd, 1);
ihk_mc_unmap_memory(NULL, pp, sizeof(struct perf_ctrl_desc)); ihk_mc_unmap_memory(NULL, pp, sizeof(struct perf_ctrl_desc));
pckt.msg = SCD_MSG_PERF_ACK; pckt.msg = SCD_MSG_PERF_ACK;
pckt.err = 0; pckt.err = ret;
pckt.arg = packet->arg; pckt.arg = packet->arg;
pckt.reply = packet->reply; pckt.reply = packet->reply;
ihk_ikc_send(resp_channel, &pckt, 0); ihk_ikc_send(resp_channel, &pckt, 0);
ret = 0;
break; break;
case SCD_MSG_CPU_RW_REG: case SCD_MSG_CPU_RW_REG:

303
kernel/hugefileobj.c Normal file
View File

@@ -0,0 +1,303 @@
#include <memobj.h>
#include <ihk/mm.h>
#include <kmsg.h>
#include <kmalloc.h>
#include <string.h>
#include <debug.h>
#if DEBUG_HUGEFILEOBJ
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
struct hugefilechunk {
struct list_head list;
off_t pgoff;
int npages;
void *mem;
};
struct hugefileobj {
struct memobj memobj;
size_t pgsize;
uintptr_t handle;
unsigned int pgshift;
struct list_head chunk_list;
ihk_spinlock_t chunk_lock;
struct list_head obj_list;
};
static ihk_spinlock_t hugefileobj_list_lock;
static LIST_HEAD(hugefileobj_list);
static struct hugefileobj *to_hugefileobj(struct memobj *memobj)
{
return (struct hugefileobj *)memobj;
}
static struct memobj *to_memobj(struct hugefileobj *obj)
{
return &obj->memobj;
}
static struct hugefileobj *hugefileobj_lookup(uintptr_t handle)
{
struct hugefileobj *p;
list_for_each_entry(p, &hugefileobj_list, obj_list) {
if (p->handle == handle) {
/* for the interval between last put and fileobj_free
* taking list_lock
*/
if (memobj_ref(&p->memobj) <= 1) {
ihk_atomic_dec(&p->memobj.refcnt);
continue;
}
return p;
}
}
return NULL;
}
static int hugefileobj_get_page(struct memobj *memobj, off_t off,
int p2align, uintptr_t *physp,
unsigned long *pflag, uintptr_t virt_addr)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk;
off_t pgoff;
if (p2align != obj->pgshift - PTL1_SHIFT) {
kprintf("%s: p2align %d but expected %d\n",
__func__, p2align, obj->pgshift - PTL1_SHIFT);
return -ENOMEM;
}
pgoff = off >> obj->pgshift;
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
list_for_each_entry(chunk, &obj->chunk_list, list) {
if (pgoff >= chunk->pgoff + chunk->npages)
continue;
if (pgoff >= chunk->pgoff)
break;
kprintf("%s: no segment found for pgoff %lx (obj %p)\n",
__func__, pgoff, obj);
chunk = NULL;
break;
}
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
if (!chunk)
return -EIO;
*physp = virt_to_phys(chunk->mem + (off - chunk->pgoff * PAGE_SIZE));
return 0;
}
static void hugefileobj_free(struct memobj *memobj)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk, *next;
dkprintf("Destroying hugefileobj %p\n", memobj);
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
list_del(&obj->obj_list);
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
kfree(memobj->path);
/* don't bother with chunk_lock, memobj refcounting makes this safe */
list_for_each_entry_safe(chunk, next, &obj->chunk_list, list) {
ihk_mc_free_pages_user(chunk->mem, chunk->npages);
kfree(chunk);
}
kfree(memobj);
}
struct memobj_ops hugefileobj_ops = {
.free = hugefileobj_free,
.get_page = hugefileobj_get_page,
};
void hugefileobj_cleanup(void)
{
struct hugefileobj *obj;
int refcnt;
while (true) {
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
if (list_empty(&hugefileobj_list)) {
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
break;
}
obj = list_first_entry(&hugefileobj_list, struct hugefileobj,
obj_list);
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
if ((refcnt = memobj_unref(to_memobj(obj))) != 0) {
kprintf("%s: obj %p had refcnt %ld > 1, destroying anyway\n",
__func__, obj, refcnt + 1);
hugefileobj_free(to_memobj(obj));
}
}
}
int hugefileobj_pre_create(struct pager_create_result *result,
struct memobj **objp, int *maxprotp)
{
struct hugefileobj *obj;
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
obj = hugefileobj_lookup(result->handle);
if (obj)
goto out_unlock;
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj)
return -ENOMEM;
obj->handle = result->handle;
obj->pgsize = result->size;
obj->pgshift = 0;
INIT_LIST_HEAD(&obj->chunk_list);
ihk_mc_spinlock_init(&obj->chunk_lock);
obj->memobj.flags = result->flags;
obj->memobj.status = MEMOBJ_TO_BE_PREFETCHED;
obj->memobj.ops = &hugefileobj_ops;
/* keep mapping around when process is gone */
ihk_atomic_set(&obj->memobj.refcnt, 2);
if (result->path[0]) {
obj->memobj.path = kmalloc(PATH_MAX, IHK_MC_AP_NOWAIT);
if (!obj->memobj.path) {
kfree(obj);
return -ENOMEM;
}
strncpy(obj->memobj.path, result->path, PATH_MAX);
}
list_add(&obj->obj_list, &hugefileobj_list);
out_unlock:
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
*maxprotp = result->maxprot;
*objp = to_memobj(obj);
return 0;
}
int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
int *pgshiftp, uintptr_t virt_addr)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk = NULL, *old_chunk = NULL;
int p2align;
unsigned int pgshift;
int npages, npages_left;
void *v;
off_t pgoff, next_pgoff;
int error;
error = arch_get_smaller_page_size(NULL, obj->pgsize + 1, NULL,
&p2align);
if (error)
return error;
pgshift = p2align + PTL1_SHIFT;
if (1 << pgshift != obj->pgsize) {
dkprintf("invalid hugefileobj pagesize: %d\n",
obj->pgsize);
return -EINVAL;
}
if (len & ((1 << pgshift) - 1)) {
dkprintf("invalid hugetlbfs mmap size %d (pagesize %d)\n",
len, 1 << pgshift);
obj->pgshift = 0;
return -EINVAL;
}
if (off & ((1 << pgshift) - 1)) {
dkprintf("invalid hugetlbfs mmap offset %d (pagesize %d)\n",
off, 1 << pgshift);
obj->pgshift = 0;
return -EINVAL;
}
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
if (obj->pgshift && obj->pgshift != pgshift) {
kprintf("pgshift changed between two calls on same inode?! had %d now %d\n",
obj->pgshift, pgshift);
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
return -EINVAL;
}
obj->pgshift = pgshift;
/* Prealloc upfront, we need to fail here if not enough memory. */
if (!list_empty(&obj->chunk_list))
old_chunk = list_first_entry(&obj->chunk_list,
struct hugefilechunk, list);
pgoff = off >> PAGE_SHIFT;
npages_left = len >> PAGE_SHIFT;
npages = npages_left;
while (npages_left) {
while (old_chunk &&
pgoff >= old_chunk->pgoff + old_chunk->npages) {
if (list_is_last(&old_chunk->list, &obj->chunk_list)) {
old_chunk = NULL;
break;
}
old_chunk = list_entry(old_chunk->list.next,
struct hugefilechunk, list);
}
if (old_chunk) {
next_pgoff = old_chunk->pgoff + old_chunk->npages;
if (pgoff >= old_chunk->pgoff && pgoff < next_pgoff) {
npages_left -= next_pgoff - pgoff;
pgoff = next_pgoff;
continue;
}
}
if (!chunk) {
chunk = kmalloc(sizeof(*chunk), IHK_MC_AP_NOWAIT);
}
if (!chunk) {
kprintf("could not allocate hugefileobj chunk\n");
return -ENOMEM;
}
if (npages > npages_left)
npages = npages_left;
v = ihk_mc_alloc_aligned_pages_user(npages, p2align,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, virt_addr);
if (!v) {
if (npages == 1) {
dkprintf("could not allocate more pages wth pgshift %d\n",
pgshift);
kfree(chunk);
/* caller will cleanup the rest */
return -ENOMEM;
}
/* exponential backoff, try less aggressive? */
npages /= 2;
continue;
}
memset(v, 0, npages * PAGE_SIZE);
chunk->npages = npages;
chunk->mem = v;
chunk->pgoff = pgoff;
/* ordered list: insert before next (bigger) element */
if (old_chunk)
list_add(&chunk->list, old_chunk->list.prev);
else
list_add(&chunk->list, obj->chunk_list.prev);
pgoff += npages;
npages_left -= npages;
}
obj->memobj.size = len;
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
*pgshiftp = pgshift;
return 0;
}

View File

@@ -21,7 +21,7 @@
struct kmalloc_header { struct kmalloc_header {
unsigned int front_magic; unsigned int front_magic;
unsigned int cpu_id; int cpu_id;
struct list_head list; struct list_head list;
int size; /* The size of this chunk without the header */ int size; /* The size of this chunk without the header */
unsigned int end_magic; unsigned int end_magic;
@@ -74,6 +74,7 @@ struct cpu_local_var {
struct thread *current; struct thread *current;
struct list_head runq; struct list_head runq;
size_t runq_len; size_t runq_len;
size_t runq_reserved; /* Number of threads which are about to be added to runq */
struct ihk_ikc_channel_desc *ikc2linux; struct ihk_ikc_channel_desc *ikc2linux;
@@ -99,6 +100,9 @@ struct cpu_local_var {
struct list_head smp_func_req_list; struct list_head smp_func_req_list;
struct process_vm *on_fork_vm; struct process_vm *on_fork_vm;
/* UTI */
void *uti_futex_resp;
} __attribute__((aligned(64))); } __attribute__((aligned(64)));
@@ -110,4 +114,6 @@ static struct cpu_local_var *get_this_cpu_local_var(void)
#define cpu_local_var(name) get_this_cpu_local_var()->name #define cpu_local_var(name) get_this_cpu_local_var()->name
#define cpu_local_var_with_override(name, clv_override) (clv_override ? clv_override->name : get_this_cpu_local_var()->name)
#endif #endif

54
kernel/include/debug.h Normal file
View File

@@ -0,0 +1,54 @@
#ifndef DEBUG_H
#define DEBUG_H
#include "lwk/compiler.h"
void panic(const char *);
/* when someone has a lot of time, add attribute __printf(1, 2) to kprintf */
int kprintf(const char *format, ...);
struct ddebug {
const char *file;
const char *func;
const char *fmt;
unsigned int line:24;
unsigned int flags:8;
} __aligned(8);
#define DDEBUG_NONE 0x0
#define DDEBUG_PRINT 0x1
#define DDEBUG_DEFAULT DDEBUG_NONE
#define DDEBUG_SYMBOL() \
static struct ddebug __aligned(8) \
__attribute__((section("__verbose"))) ddebug = { \
.file = __FILE__, \
.func = __func__, \
.line = __LINE__, \
.flags = DDEBUG_DEFAULT, \
}
#define DDEBUG_TEST ddebug.flags
#define dkprintf(fmt, args...) \
do { \
DDEBUG_SYMBOL(); \
if (DDEBUG_TEST) \
kprintf(fmt, ##args); \
} while (0)
#define ekprintf(fmt, args...) kprintf(fmt, ##args)
#define BUG_ON(condition) do { \
if (condition) { \
kprintf("PANIC: %s: %s(line:%d)\n", \
__FILE__, __func__, __LINE__); \
panic(""); \
} \
} while (0)
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#endif

View File

@@ -63,7 +63,7 @@
#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ #define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ #define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ #define FUTEX_OP_OPARG_SHIFT 8U /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ #define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ #define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */
@@ -150,6 +150,7 @@ union futex_key {
extern int futex_init(void); extern int futex_init(void);
struct cpu_local_var;
extern int extern int
futex( futex(
uint32_t __user * uaddr, uint32_t __user * uaddr,
@@ -159,7 +160,8 @@ futex(
uint32_t __user * uaddr2, uint32_t __user * uaddr2,
uint32_t val2, uint32_t val2,
uint32_t val3, uint32_t val3,
int fshared int fshared,
struct cpu_local_var *clv_override
); );

View File

@@ -33,6 +33,7 @@ extern void cpu_sysfs_setup(void);
extern void numa_sysfs_setup(void); extern void numa_sysfs_setup(void);
extern void rusage_sysfs_setup(void); extern void rusage_sysfs_setup(void);
extern void status_sysfs_setup(void); extern void status_sysfs_setup(void);
extern void dynamic_debug_sysfs_setup(void);
extern char *find_command_line(char *name); extern char *find_command_line(char *name);

View File

@@ -13,11 +13,9 @@
#ifndef __HEADER_KMALLOC_H #ifndef __HEADER_KMALLOC_H
#define __HEADER_KMALLOC_H #define __HEADER_KMALLOC_H
#include <ihk/mm.h> #include "ihk/mm.h"
#include <cls.h> #include "cls.h"
#include "debug.h"
void panic(const char *);
int kprintf(const char *format, ...);
#define kmalloc(size, flag) ({\ #define kmalloc(size, flag) ({\
void *r = _kmalloc(size, flag, __FILE__, __LINE__);\ void *r = _kmalloc(size, flag, __FILE__, __LINE__);\

View File

@@ -12,11 +12,8 @@
/* Optimization barrier */ /* Optimization barrier */
/* The "volatile" is due to gcc bugs */ /* The "volatile" is due to gcc bugs */
/* XXX: barrier is also defined in lib/include/ihk/cpu.h, #define barrier() __asm__ __volatile__("": : :"memory")
* it would be cleaner to restore this here at some point, but we have
* quite a few C files not including either this or kernel's compiler.h
* #define barrier() __asm__ __volatile__("": : :"memory")
*/
/* /*
* This version is i.e. to prevent dead stores elimination on @ptr * This version is i.e. to prevent dead stores elimination on @ptr
* where gcc and llvm may behave differently when otherwise using * where gcc and llvm may behave differently when otherwise using

View File

@@ -3,6 +3,8 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <types.h>
#ifdef __CHECKER__ #ifdef __CHECKER__
# define __user __attribute__((noderef, address_space(1))) # define __user __attribute__((noderef, address_space(1)))
# define __kernel __attribute__((address_space(0))) # define __kernel __attribute__((address_space(0)))
@@ -175,11 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
# define unlikely(x) __builtin_expect(!!(x), 0) # define unlikely(x) __builtin_expect(!!(x), 0)
#endif #endif
/* Optimization barrier */
#ifndef barrier
# define barrier() __memory_barrier()
#endif
#ifndef barrier_data #ifndef barrier_data
# define barrier_data(ptr) barrier() # define barrier_data(ptr) barrier()
#endif #endif
@@ -490,4 +487,62 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
(_________p1); \ (_________p1); \
}) })
extern void *memcpy(void *dest, const void *src, size_t n);
static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;
case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;
case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;
case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;
default:
barrier();
memcpy((void *)res, (const void *)p, size);
barrier();
}
}
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
default:
barrier();
memcpy((void *)p, (const void *)res, size);
barrier();
}
}
/*
* Prevent the compiler from merging or refetching reads or writes. The
* compiler is also forbidden from reordering successive instances of
* READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
* compiler is aware of some particular ordering. One way to make the
* compiler aware of ordering is to put the two invocations of READ_ONCE,
* WRITE_ONCE or ACCESS_ONCE() in different C statements.
*
* In contrast to ACCESS_ONCE these two macros will also work on aggregate
* data types like structs or unions. If the size of the accessed data
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
* READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a
* compile-time warning.
*
* Their two major use cases are: (1) Mediating communication between
* process-level code and irq/NMI handlers, all running on the same CPU,
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
* mutilate accesses that either do not require ordering or that interact
* with an explicit memory barrier or atomic instruction that provides the
* required ordering.
*/
#define READ_ONCE(x) \
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
#define WRITE_ONCE(x, val) \
({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
#endif /* __LWK_COMPILER_H */ #endif /* __LWK_COMPILER_H */

View File

@@ -25,7 +25,7 @@
#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ #define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ #define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ #define FUTEX_OP_OPARG_SHIFT 8U /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ #define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ #define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */

View File

@@ -19,6 +19,7 @@
#include <ihk/lock.h> #include <ihk/lock.h>
#include <errno.h> #include <errno.h>
#include <list.h> #include <list.h>
#include <pager.h>
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ #ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#else /* POSTK_DEBUG_ARCH_DEP_18 */ #else /* POSTK_DEBUG_ARCH_DEP_18 */
@@ -44,8 +45,7 @@ enum {
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
MF_SHM = 0x40000, MF_SHM = 0x40000,
MF_HOST_RELEASED = 0x80000000, MF_HUGETLBFS = 0x100000,
MF_END
}; };
#define MEMOBJ_READY 0 #define MEMOBJ_READY 0
@@ -56,7 +56,7 @@ struct memobj {
uint32_t flags; uint32_t flags;
uint32_t status; uint32_t status;
size_t size; size_t size;
ihk_spinlock_t lock; ihk_atomic_t refcnt;
/* For pre-mapped memobjects */ /* For pre-mapped memobjects */
void **pages; void **pages;
@@ -64,8 +64,7 @@ struct memobj {
char *path; char *path;
}; };
typedef void memobj_release_func_t(struct memobj *obj); typedef void memobj_free_func_t(struct memobj *obj);
typedef void memobj_ref_func_t(struct memobj *obj);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr); typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr);
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align); typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize); typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
@@ -73,27 +72,28 @@ typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, si
typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag); typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
struct memobj_ops { struct memobj_ops {
memobj_release_func_t * release; memobj_free_func_t *free;
memobj_ref_func_t * ref; memobj_get_page_func_t *get_page;
memobj_get_page_func_t * get_page; memobj_copy_page_func_t *copy_page;
memobj_copy_page_func_t * copy_page; memobj_flush_page_func_t *flush_page;
memobj_flush_page_func_t * flush_page; memobj_invalidate_page_func_t *invalidate_page;
memobj_invalidate_page_func_t * invalidate_page; memobj_lookup_page_func_t *lookup_page;
memobj_lookup_page_func_t * lookup_page;
}; };
static inline void memobj_release(struct memobj *obj) static inline int memobj_ref(struct memobj *obj)
{ {
if (obj->ops->release) { return ihk_atomic_inc_return(&obj->refcnt);
(*obj->ops->release)(obj);
}
} }
static inline void memobj_ref(struct memobj *obj) static inline int memobj_unref(struct memobj *obj)
{ {
if (obj->ops->ref) { int cnt;
(*obj->ops->ref)(obj);
if ((cnt = ihk_atomic_dec_return(&obj->refcnt)) == 0) {
(*obj->ops->free)(obj);
} }
return cnt;
} }
static inline int memobj_get_page(struct memobj *obj, off_t off, static inline int memobj_get_page(struct memobj *obj, off_t off,
@@ -140,16 +140,6 @@ static inline int memobj_lookup_page(struct memobj *obj, off_t off,
return -ENXIO; return -ENXIO;
} }
static inline void memobj_lock(struct memobj *obj)
{
ihk_mc_spinlock_lock_noirq(&obj->lock);
}
static inline void memobj_unlock(struct memobj *obj)
{
ihk_mc_spinlock_unlock_noirq(&obj->lock);
}
static inline int memobj_has_pager(struct memobj *obj) static inline int memobj_has_pager(struct memobj *obj)
{ {
return !!(obj->flags & MF_HAS_PAGER); return !!(obj->flags & MF_HAS_PAGER);
@@ -166,5 +156,10 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp); int zeroobj_create(struct memobj **objp);
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp, int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
int prot, int populate_flags); int prot, int populate_flags);
int hugefileobj_pre_create(struct pager_create_result *result,
struct memobj **objp, int *maxprotp);
int hugefileobj_create(struct memobj *obj, size_t len, off_t off,
int *pgshiftp, uintptr_t virt_addr);
void hugefileobj_cleanup(void);
#endif /* HEADER_MEMOBJ_H */ #endif /* HEADER_MEMOBJ_H */

View File

@@ -70,10 +70,8 @@
#define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */ #define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */
#define PS_STOPPING 0x80 #define PS_STOPPING 0x80
#define PS_TRACING 0x100 #define PS_TRACING 0x100
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
#define PS_DELAY_STOPPED 0x200 #define PS_DELAY_STOPPED 0x200
#define PS_DELAY_TRACED 0x400 #define PS_DELAY_TRACED 0x400
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
#define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE) #define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE)
@@ -244,6 +242,11 @@ enum mpol_rebind_step {
#define SPAWN_TO_REMOTE 1 #define SPAWN_TO_REMOTE 1
#define SPAWNING_TO_REMOTE 1001 #define SPAWNING_TO_REMOTE 1001
#define UTI_STATE_DEAD 0
#define UTI_STATE_PROLOGUE 1
#define UTI_STATE_RUNNING_IN_LINUX 2
#define UTI_STATE_EPILOGUE 3
#include <waitq.h> #include <waitq.h>
#include <futex.h> #include <futex.h>
@@ -277,6 +280,7 @@ extern struct list_head resource_set_list;
extern mcs_rwlock_lock_t resource_set_lock; extern mcs_rwlock_lock_t resource_set_lock;
extern int idle_halt; extern int idle_halt;
extern int allow_oversubscribe; extern int allow_oversubscribe;
extern ihk_spinlock_t runq_reservation_lock; /* mutex for cpuid reservation (clv->runq_reserved) */
struct process_hash { struct process_hash {
struct list_head list[HASH_SIZE]; struct list_head list[HASH_SIZE];
@@ -460,6 +464,14 @@ struct process {
// threads and children // threads and children
struct list_head threads_list; struct list_head threads_list;
struct list_head report_threads_list;
/*
* main_thread is used to refer to thread information using process ID.
* 1) signal related state in signal_flags
* 2) status of trace
*/
struct thread *main_thread;
mcs_rwlock_lock_t threads_lock; // lock for threads_list mcs_rwlock_lock_t threads_lock; // lock for threads_list
/* TID set of proxy process */ /* TID set of proxy process */
struct mcexec_tid *tids; struct mcexec_tid *tids;
@@ -488,7 +500,6 @@ struct process {
// V +---- | // V +---- |
// PS_STOPPED -----+ // PS_STOPPED -----+
// (PS_TRACED) // (PS_TRACED)
unsigned long exit_status; // only for zombie
/* Store exit_status for a group of threads when stopped by SIGSTOP. /* Store exit_status for a group of threads when stopped by SIGSTOP.
exit_status can't be used because values of exit_status of threads exit_status can't be used because values of exit_status of threads
@@ -520,22 +531,6 @@ struct process {
long saved_cmdline_len; long saved_cmdline_len;
cpu_set_t cpu_set; cpu_set_t cpu_set;
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store ptrace event message.
* PTRACE_O_xxx will store event message here.
* PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
/* Store signal sent to parent when the process terminates. */ /* Store signal sent to parent when the process terminates. */
int termsig; int termsig;
@@ -557,6 +552,9 @@ struct process {
size_t mpol_threshold; size_t mpol_threshold;
unsigned long heap_extension; unsigned long heap_extension;
unsigned long mpol_bind_mask; unsigned long mpol_bind_mask;
int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int clone_count;
// perf_event // perf_event
int perf_status; int perf_status;
@@ -572,6 +570,7 @@ struct process {
unsigned long profile_elapsed_ts; unsigned long profile_elapsed_ts;
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
int nr_processes; /* For partitioned execution */ int nr_processes; /* For partitioned execution */
int process_rank; /* Rank in partition */
}; };
/* /*
@@ -602,7 +601,7 @@ struct thread {
// thread info // thread info
int cpu_id; int cpu_id;
int tid; int tid;
int status; // PS_RUNNING -> PS_EXITED int status; // PS_RUNNING -> PS_EXITED (-> ZOMBIE / ptrace)
// | ^ ^ // | ^ ^
// | | | // | | |
// V | | // V | |
@@ -612,6 +611,14 @@ struct thread {
// PS_UNINTERRUPTIBLE // PS_UNINTERRUPTIBLE
int exit_status; int exit_status;
/*
* Store event related to signal. For example,
* it represents that the proceess has been resumed by SIGCONT.
*/
int signal_flags;
int termsig;
// process vm // process vm
struct process_vm *vm; struct process_vm *vm;
@@ -631,6 +638,22 @@ struct thread {
ihk_spinlock_t spin_sleep_lock; ihk_spinlock_t spin_sleep_lock;
int spin_sleep; int spin_sleep;
// for ptrace
struct process *report_proc;
struct list_head report_siblings_list; // lock process
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store ptrace event message.
* PTRACE_O_xxx will store event message here.
* PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
ihk_atomic_t refcount; ihk_atomic_t refcount;
int *clear_child_tid; int *clear_child_tid;
@@ -687,10 +710,11 @@ struct thread {
/* Syscall offload wait queue head */ /* Syscall offload wait queue head */
struct waitq scd_wq; struct waitq scd_wq;
int thread_offloaded; int uti_state;
int mod_clone; int mod_clone;
struct uti_attr *mod_clone_arg; struct uti_attr *mod_clone_arg;
int parent_cpuid; int parent_cpuid;
int uti_refill_tid;
// for performance counter // for performance counter
unsigned long pmc_alloc_map; unsigned long pmc_alloc_map;
@@ -716,6 +740,8 @@ struct process_vm {
// 2. addition of process page table (allocate_pages, update_process_page_table) // 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc) // note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c) // is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
unsigned long is_memory_range_lock_taken;
/* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */
ihk_atomic_t refcount; ihk_atomic_t refcount;
int exiting; int exiting;
@@ -819,14 +845,32 @@ void cpu_clear_and_set(int c_cpu, int s_cpu,
void release_cpuid(int cpuid); void release_cpuid(int cpuid);
struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock); struct thread *find_thread(int pid, int tid);
void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock); void thread_unlock(struct thread *thread);
struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock); struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock);
void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock); void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock);
void chain_process(struct process *); void chain_process(struct process *);
void chain_thread(struct thread *); void chain_thread(struct thread *);
void proc_init(void); void proc_init(void);
void set_timer(void); void set_timer(int runq_locked);
struct sig_pending *hassigpending(struct thread *thread); struct sig_pending *hassigpending(struct thread *thread);
extern int do_signal(unsigned long rc, void *regs0, struct thread *thread,
struct sig_pending *pending, int num);
extern void check_signal(unsigned long rc, void *regs0, int num);
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig,
struct siginfo *info, int ptracecont);
extern void set_signal(int sig, void *regs, struct siginfo *info);
extern void check_sig_pending(void);
void clear_single_step(struct thread *thread);
void release_fp_regs(struct thread *proc);
void save_fp_regs(struct thread *proc);
void copy_fp_regs(struct thread *from, struct thread *to);
void restore_fp_regs(struct thread *proc);
void clear_fp_regs(void);
#define VERIFY_READ 0
#define VERIFY_WRITE 1
int access_ok(struct process_vm *vm, int type, uintptr_t addr, size_t len);
#endif #endif

View File

@@ -10,6 +10,7 @@
#include <rusage.h> #include <rusage.h>
#include <ihk/ihk_monitor.h> #include <ihk/ihk_monitor.h>
#include <arch_rusage.h> #include <arch_rusage.h>
#include <debug.h>
#ifdef ENABLE_RUSAGE #ifdef ENABLE_RUSAGE
@@ -55,7 +56,7 @@ rusage_rss_add(unsigned long size)
} }
vm->currss += size; vm->currss += size;
if (vm->currss > vm->proc->maxrss) { if (vm->proc && vm->currss > vm->proc->maxrss) {
vm->proc->maxrss = vm->currss; vm->proc->maxrss = vm->currss;
} }
} }
@@ -118,8 +119,9 @@ static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys,
struct page *page = phys_to_page(phys); struct page *page = phys_to_page(phys);
/* Is It file map and cow page? */ /* Is It file map and cow page? */
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE |
!page) { MF_HUGETLBFS)) &&
!page) {
//kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys);
memory_stat_rss_add(size, pgsize); memory_stat_rss_add(size, pgsize);
return 1; return 1;

View File

@@ -57,6 +57,7 @@ struct shmobj {
struct shmlock_user * user; struct shmlock_user * user;
struct shmid_ds ds; struct shmid_ds ds;
struct list_head page_list; struct list_head page_list;
ihk_spinlock_t page_list_lock;
struct list_head chain; /* shmobj_list */ struct list_head chain; /* shmobj_list */
}; };
@@ -104,7 +105,6 @@ static inline void shmlock_users_unlock(void)
void shmobj_list_lock(void); void shmobj_list_lock(void);
void shmobj_list_unlock(void); void shmobj_list_unlock(void);
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp); int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
void shmobj_destroy(struct shmobj *obj);
void shmlock_user_free(struct shmlock_user *user); void shmlock_user_free(struct shmlock_user *user);
int shmlock_user_get(uid_t ruid, struct shmlock_user **userp); int shmlock_user_get(uid_t ruid, struct shmlock_user **userp);

View File

@@ -49,6 +49,7 @@
#define SCD_MSG_PROCFS_DELETE 0x11 #define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12 #define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13 #define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_PROCFS_RELEASE 0x15
#define SCD_MSG_DEBUG_LOG 0x20 #define SCD_MSG_DEBUG_LOG 0x20
@@ -82,6 +83,8 @@
#define SCD_MSG_CPU_RW_REG 0x52 #define SCD_MSG_CPU_RW_REG 0x52
#define SCD_MSG_CPU_RW_REG_RESP 0x53 #define SCD_MSG_CPU_RW_REG_RESP 0x53
#define SCD_MSG_FUTEX_WAKE 0x60
/* Cloning flags. */ /* Cloning flags. */
# define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */ # define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */
# define CLONE_VM 0x00000100 /* Set if VM shared between processes. */ # define CLONE_VM 0x00000100 /* Set if VM shared between processes. */
@@ -197,8 +200,10 @@ struct program_load_desc {
unsigned long heap_extension; unsigned long heap_extension;
long stack_premap; long stack_premap;
unsigned long mpol_bind_mask; unsigned long mpol_bind_mask;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int nr_processes; int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN]; int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
int profile; int profile;
struct program_image_section sections[0]; struct program_image_section sections[0];
@@ -258,7 +263,7 @@ struct ikc_scd_packet {
long sysfs_arg3; long sysfs_arg3;
}; };
/* SCD_MSG_SCHEDULE_THREAD */ /* SCD_MSG_WAKE_UP_SYSCALL_THREAD */
struct { struct {
int ttid; int ttid;
}; };
@@ -274,6 +279,12 @@ struct ikc_scd_packet {
struct { struct {
int eventfd_type; int eventfd_type;
}; };
/* SCD_MSG_FUTEX_WAKE */
struct {
void *resp;
int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */
} futex;
}; };
char padding[8]; char padding[8];
}; };
@@ -336,10 +347,10 @@ struct syscall_post {
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \ SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5); SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0) #define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id())
extern long do_syscall(struct syscall_request *req, int cpu, int pid); extern long do_syscall(struct syscall_request *req, int cpu);
int obtain_clone_cpuid(cpu_set_t *cpu_set); int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last);
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx); extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
#define DECLARATOR(number,name) __NR_##name = number, #define DECLARATOR(number,name) __NR_##name = number,
@@ -353,17 +364,10 @@ enum {
#undef SYSCALL_DELEGATED #undef SYSCALL_DELEGATED
#define __NR_coredump 999 /* pseudo syscall for coredump */ #define __NR_coredump 999 /* pseudo syscall for coredump */
#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */
struct coretable { /* table entry for a core chunk */ struct coretable { /* table entry for a core chunk */
off_t len; /* length of the chunk */ off_t len; /* length of the chunk */
unsigned long addr; /* physical addr of the chunk */ unsigned long addr; /* physical addr of the chunk */
}; };
#else /* POSTK_DEBUG_TEMP_FIX_61 */
struct coretable { /* table entry for a core chunk */
int len; /* length of the chunk */
unsigned long addr; /* physical addr of the chunk */
};
#endif /* POSTK_DEBUG_TEMP_FIX_61 */
#ifdef POSTK_DEBUG_TEMP_FIX_1 #ifdef POSTK_DEBUG_TEMP_FIX_1
void create_proc_procfs_files(int pid, int tid, int cpuid); void create_proc_procfs_files(int pid, int tid, int cpuid);
@@ -383,7 +387,6 @@ struct procfs_read {
int count; /* bytes to read (request) */ int count; /* bytes to read (request) */
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/ int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
int ret; /* read bytes (answer) */ int ret; /* read bytes (answer) */
int status; /* non-zero if done (answer) */
int newcpu; /* migrated new cpu (answer) */ int newcpu; /* migrated new cpu (answer) */
int readwrite; /* 0:read, 1:write */ int readwrite; /* 0:read, 1:write */
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
@@ -395,6 +398,8 @@ struct procfs_file {
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
}; };
int process_procfs_request(struct ikc_scd_packet *rpacket);
#define RUSAGE_SELF 0 #define RUSAGE_SELF 0
#define RUSAGE_CHILDREN -1 #define RUSAGE_CHILDREN -1
#define RUSAGE_THREAD 1 #define RUSAGE_THREAD 1
@@ -459,8 +464,8 @@ static inline unsigned long timespec_to_jiffy(const struct timespec *ats)
void reset_cputime(void); void reset_cputime(void);
void set_cputime(int mode); void set_cputime(int mode);
int do_munmap(void *addr, size_t len); int do_munmap(void *addr, size_t len, int holding_memory_range_lock);
intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd, intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd,
off_t off0); off_t off0);
void clear_host_pte(uintptr_t addr, size_t len); void clear_host_pte(uintptr_t addr, size_t len);
typedef int32_t key_t; typedef int32_t key_t;
@@ -471,7 +476,16 @@ int arch_setup_vdso(void);
int arch_cpu_read_write_register(struct ihk_os_cpu_register *desc, int arch_cpu_read_write_register(struct ihk_os_cpu_register *desc,
enum mcctrl_os_cpu_operation op); enum mcctrl_os_cpu_operation op);
struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr); struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr);
void calculate_time_from_tsc(struct timespec *ts);
time_t time(void); time_t time(void);
long do_futex(int n, unsigned long arg0, unsigned long arg1,
unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5,
unsigned long _uti_clv,
void *uti_futex_resp,
void *_linux_wait_event,
void *_linux_printk,
void *_linux_clock_gettime);
#ifndef POSTK_DEBUG_ARCH_DEP_52 #ifndef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 2 #define VDSO_MAXPAGES 2
@@ -519,6 +533,7 @@ enum perf_ctrl_type {
struct perf_ctrl_desc { struct perf_ctrl_desc {
enum perf_ctrl_type ctrl_type; enum perf_ctrl_type ctrl_type;
int err;
union { union {
/* for SET, GET */ /* for SET, GET */
struct { struct {
@@ -569,6 +584,15 @@ typedef struct uti_attr {
uint64_t flags; /* Representing location and behavior hints by bitmap */ uint64_t flags; /* Representing location and behavior hints by bitmap */
} uti_attr_t; } uti_attr_t;
struct uti_ctx {
union {
char ctx[4096];
struct {
int uti_refill_tid;
};
};
};
struct move_pages_smp_req { struct move_pages_smp_req {
unsigned long count; unsigned long count;
const void **user_virt_addr; const void **user_virt_addr;
@@ -589,4 +613,9 @@ struct move_pages_smp_req {
#define PROCESS_VM_READ 0 #define PROCESS_VM_READ 0
#define PROCESS_VM_WRITE 1 #define PROCESS_VM_WRITE 1
/* uti: function pointers pointing to Linux codes */
extern long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout);
extern int (*linux_printk)(const char *fmt, ...);
extern int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp);
#endif #endif

View File

@@ -25,6 +25,8 @@
#define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_PROCESS_CPUTIME_ID 2
#define CLOCK_THREAD_CPUTIME_ID 3 #define CLOCK_THREAD_CPUTIME_ID 3
typedef int clockid_t;
typedef long int __time_t; typedef long int __time_t;
/* POSIX.1b structure for a time value. This is like a `struct timeval' but /* POSIX.1b structure for a time value. This is like a `struct timeval' but

View File

@@ -26,20 +26,17 @@
#include <mc_xpmem.h> #include <mc_xpmem.h>
#include <xpmem.h> #include <xpmem.h>
#include <debug.h>
#define XPMEM_CURRENT_VERSION 0x00026003 #define XPMEM_CURRENT_VERSION 0x00026003
//#define DEBUG_PRINT_XPMEM //#define DEBUG_PRINT_XPMEM
#ifdef DEBUG_PRINT_XPMEM #ifdef DEBUG_PRINT_XPMEM
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#define XPMEM_DEBUG(format, a...) kprintf("[%d] %s: "format"\n", cpu_local_var(current)->proc->rgid, __func__, ##a)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define XPMEM_DEBUG(format, a...) do { if (0) kprintf("\n"); } while (0)
#endif #endif
#define XPMEM_DEBUG(format, a...) dkprintf("[%d] %s: "format"\n", cpu_local_var(current)->proc->rgid, __func__, ##a)
//#define USE_DBUG_ON //#define USE_DBUG_ON

View File

@@ -32,6 +32,7 @@
#include <syscall.h> #include <syscall.h>
#include <sysfs.h> #include <sysfs.h>
#include <ihk/monitor.h> #include <ihk/monitor.h>
#include <debug.h>
//#define IOCTL_FUNC_EXTENSION //#define IOCTL_FUNC_EXTENSION
#ifdef IOCTL_FUNC_EXTENSION #ifdef IOCTL_FUNC_EXTENSION
@@ -41,11 +42,8 @@
//#define DEBUG_PRINT_INIT //#define DEBUG_PRINT_INIT
#ifdef DEBUG_PRINT_INIT #ifdef DEBUG_PRINT_INIT
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) #undef DDEBUG_DEFAULT
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#endif #endif
#define DUMP_LEVEL_USER_UNUSED_EXCLUDE 24 #define DUMP_LEVEL_USER_UNUSED_EXCLUDE 24
@@ -61,6 +59,13 @@ static void handler_init(void)
ihk_mc_set_syscall_handler(syscall); ihk_mc_set_syscall_handler(syscall);
} }
/* Symbols with name conflict with the linux kernel
* Give the possibility to load all symbols at the same time
*/
int *mck_num_processors = &num_processors;
unsigned long data[1024] __attribute__((aligned(64))); unsigned long data[1024] __attribute__((aligned(64)));
#ifdef USE_DMA #ifdef USE_DMA
@@ -125,6 +130,8 @@ char *find_command_line(char *name)
return strstr(cmdline, name); return strstr(cmdline, name);
} }
extern int safe_kernel_map;
static void parse_kargs(void) static void parse_kargs(void)
{ {
char *ptr; char *ptr;
@@ -145,6 +152,11 @@ static void parse_kargs(void)
} }
ihk_mc_set_dump_level(dump_level); ihk_mc_set_dump_level(dump_level);
ptr = find_command_line("safe_kernel_map");
if (ptr) {
safe_kernel_map = 1;
}
/* idle_halt option */ /* idle_halt option */
ptr = find_command_line("idle_halt"); ptr = find_command_line("idle_halt");
if (ptr) { if (ptr) {
@@ -246,6 +258,11 @@ static void nmi_init()
ihk_set_nmi_mode_addr(phys); ihk_set_nmi_mode_addr(phys);
} }
static void uti_init()
{
ihk_set_mckernel_do_futex((unsigned long)do_futex);
}
static void rest_init(void) static void rest_init(void)
{ {
handler_init(); handler_init();
@@ -261,6 +278,7 @@ static void rest_init(void)
#endif /* !POSTK_DEBUG_TEMP_FIX_73 */ #endif /* !POSTK_DEBUG_TEMP_FIX_73 */
cpu_local_var_init(); cpu_local_var_init();
nmi_init(); nmi_init();
uti_init();
time_init(); time_init();
kmalloc_init(); kmalloc_init();
@@ -331,6 +349,7 @@ static void populate_sysfs(void)
{ {
cpu_sysfs_setup(); cpu_sysfs_setup();
numa_sysfs_setup(); numa_sysfs_setup();
dynamic_debug_sysfs_setup();
//setup_remote_snooping_samples(); //setup_remote_snooping_samples();
} /* populate_sysfs() */ } /* populate_sysfs() */

View File

@@ -19,15 +19,13 @@
#include <ihk/ikc.h> #include <ihk/ikc.h>
#include <ikc/master.h> #include <ikc/master.h>
#include <arch/cpu.h> #include <arch/cpu.h>
#include <debug.h>
//#define DEBUG_LISTENERS //#define DEBUG_LISTENERS
#ifdef DEBUG_LISTENERS #ifdef DEBUG_LISTENERS
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
void testmem(void *v, unsigned long size) void testmem(void *v, unsigned long size)
@@ -71,7 +69,7 @@ static int test_packet_handler(struct ihk_ikc_channel_desc *c,
testmem(v, 4 * 1024 * 1024); testmem(v, 4 * 1024 * 1024);
ihk_mc_unmap_virtual(v, 4 * 1024, 1); ihk_mc_unmap_virtual(v, 4 * 1024);
ihk_mc_unmap_memory(NULL, pp, 4 * 1024 * 1024); ihk_mc_unmap_memory(NULL, pp, 4 * 1024 * 1024);
} else if (packet->msg == 0x11110012) { } else if (packet->msg == 0x11110012) {
p.msg = 0x11110013; p.msg = 0x11110013;

View File

@@ -44,15 +44,13 @@
#include <process.h> #include <process.h>
#include <limits.h> #include <limits.h>
#include <sysfs.h> #include <sysfs.h>
#include <debug.h>
//#define DEBUG_PRINT_MEM //#define DEBUG_PRINT_MEM
#ifdef DEBUG_PRINT_MEM #ifdef DEBUG_PRINT_MEM
#define dkprintf(...) kprintf(__VA_ARGS__) #undef DDEBUG_DEFAULT
#define ekprintf(...) kprintf(__VA_ARGS__) #define DDEBUG_DEFAULT DDEBUG_PRINT
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
static unsigned long pa_start, pa_end; static unsigned long pa_start, pa_end;
@@ -547,7 +545,7 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
ihk_mc_ap_flag flag, int pref_node, int is_user, uintptr_t virt_addr) ihk_mc_ap_flag flag, int pref_node, int is_user, uintptr_t virt_addr)
{ {
unsigned long pa = 0; unsigned long pa = 0;
int i, node; int i = 0, node;
#ifndef IHK_RBTREE_ALLOCATOR #ifndef IHK_RBTREE_ALLOCATOR
struct ihk_page_allocator_desc *pa_allocator; struct ihk_page_allocator_desc *pa_allocator;
#endif #endif
@@ -962,8 +960,6 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
.priv = NULL, .priv = NULL,
}; };
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long, void *, int);
int gencore(struct thread *, void *, struct coretable **, int *); int gencore(struct thread *, void *, struct coretable **, int *);
void freecore(struct coretable **); void freecore(struct coretable **);
@@ -981,11 +977,9 @@ void coredump(struct thread *thread, void *regs)
struct coretable *coretable; struct coretable *coretable;
int chunks; int chunks;
#ifdef POSTK_DEBUG_ARCH_DEP_67 /* use limit corefile size. (temporarily fix.) */
if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) { if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) {
return; return;
} }
#endif /* POSTK_DEBUG_ARCH_DEP_67 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 #ifndef POSTK_DEBUG_ARCH_DEP_18
ret = gencore(thread, regs, &coretable, &chunks); ret = gencore(thread, regs, &coretable, &chunks);
@@ -997,7 +991,7 @@ void coredump(struct thread *thread, void *regs)
request.args[0] = chunks; request.args[0] = chunks;
request.args[1] = virt_to_phys(coretable); request.args[1] = virt_to_phys(coretable);
/* no data for now */ /* no data for now */
ret = do_syscall(&request, thread->cpu_id, thread->proc->pid); ret = do_syscall(&request, thread->cpu_id);
if (ret == 0) { if (ret == 0) {
kprintf("dumped core.\n"); kprintf("dumped core.\n");
} else { } else {
@@ -1223,7 +1217,7 @@ out:
if(interrupt_from_user(regs)){ if(interrupt_from_user(regs)){
cpu_enable_interrupt(); cpu_enable_interrupt();
check_need_resched(); check_need_resched();
check_signal(0, regs, 0); check_signal(0, regs, -1);
} }
set_cputime(interrupt_from_user(regs)? 0: 1); set_cputime(interrupt_from_user(regs)? 0: 1);
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
@@ -1671,7 +1665,7 @@ void *ihk_mc_map_virtual(unsigned long phys, int npages,
return (char *)p + offset; return (char *)p + offset;
} }
void ihk_mc_unmap_virtual(void *va, int npages, int free_physical) void ihk_mc_unmap_virtual(void *va, int npages)
{ {
unsigned long i; unsigned long i;
@@ -1681,13 +1675,7 @@ void ihk_mc_unmap_virtual(void *va, int npages, int free_physical)
flush_tlb_single((unsigned long)(va + (i << PAGE_SHIFT))); flush_tlb_single((unsigned long)(va + (i << PAGE_SHIFT)));
} }
#ifdef POSTK_DEBUG_TEMP_FIX_51 /* ihk_mc_unmap_virtual() free_physical disabled */
ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages); ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages);
#else /* POSTK_DEBUG_TEMP_FIX_51 */
if (free_physical) {
ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages);
}
#endif /* POSTK_DEBUG_TEMP_FIX_51 */
} }
#ifdef ATTACHED_MIC #ifdef ATTACHED_MIC
@@ -2304,76 +2292,37 @@ void ___kmalloc_print_free_list(struct list_head *list)
kprintf_unlock(irqflags); kprintf_unlock(irqflags);
} }
#ifdef POSTK_DEBUG_ARCH_DEP_27
int search_free_space(struct thread *thread, size_t len, intptr_t hint,
int pgshift, intptr_t *addrp)
{
struct vm_regions *region = &thread->vm->region;
intptr_t addr;
int error;
struct vm_range *range;
size_t pgsize = (size_t)1 << pgshift;
dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp);
addr = hint;
for (;;) {
addr = (addr + pgsize - 1) & ~(pgsize - 1);
if ((region->user_end <= addr)
|| ((region->user_end - len) < addr)) {
ekprintf("search_free_space(%lx,%lx,%p):"
"no space. %lx %lx\n",
len, hint, addrp, addr,
region->user_end);
error = -ENOMEM;
goto out;
}
range = lookup_process_memory_range(thread->vm, addr, addr+len);
if (range == NULL) {
break;
}
addr = range->end;
}
error = 0;
*addrp = addr;
out:
dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n",
len, hint, pgshift, addrp, error, addr);
return error;
}
#endif /* POSTK_DEBUG_ARCH_DEP_27 */
#ifdef POSTK_DEBUG_TEMP_FIX_52 /* supports NUMA for memory area determination */
#ifdef IHK_RBTREE_ALLOCATOR #ifdef IHK_RBTREE_ALLOCATOR
int is_mckernel_memory(unsigned long phys) int is_mckernel_memory(unsigned long start, unsigned long end)
{ {
int i; int i;
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
unsigned long start, end; unsigned long chunk_start, chunk_end;
int numa_id; int numa_id;
ihk_mc_get_memory_chunk(i, &start, &end, &numa_id); ihk_mc_get_memory_chunk(i, &chunk_start, &chunk_end, &numa_id);
if (start <= phys && phys < end) { if ((chunk_start <= start && start < chunk_end) &&
(chunk_start <= end && end < chunk_end)) {
return 1; return 1;
} }
} }
return 0; return 0;
} }
#else /* IHK_RBTREE_ALLOCATOR */ #else /* IHK_RBTREE_ALLOCATOR */
int is_mckernel_memory(unsigned long phys) int is_mckernel_memory(unsigned long start, unsigned long end)
{ {
int i; int i;
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator; struct ihk_page_allocator_desc *pa_allocator;
unsigned long area_start = pa_allocator->start;
unsigned long area_end = pa_allocator->end;
list_for_each_entry(pa_allocator, list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) { &memory_nodes[i].allocators, list) {
if (pa_allocator->start <= phys && phys < pa_allocator->end) { if ((area_start <= start && start < area_end) &&
(area_start <= end && end < area_end)) {
return 1; return 1;
} }
} }
@@ -2381,7 +2330,6 @@ int is_mckernel_memory(unsigned long phys)
return 0; return 0;
} }
#endif /* IHK_RBTREE_ALLOCATOR */ #endif /* IHK_RBTREE_ALLOCATOR */
#endif /* POSTK_DEBUG_TEMP_FIX_52 */
void ihk_mc_query_mem_areas(void){ void ihk_mc_query_mem_areas(void){

Some files were not shown because too many files have changed in this diff Show More