diff --git a/.gitignore b/.gitignore index c304f0c6..ab70c188 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ *.o *.elf *.bin diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..d7f8e591 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "ihk"] + path = ihk + url = https://github.com/RIKEN-SysSoft/ihk.git diff --git a/arch/arm64/kernel/cpu.c b/arch/arm64/kernel/cpu.c index ba1600b2..27fd920e 100644 --- a/arch/arm64/kernel/cpu.c +++ b/arch/arm64/kernel/cpu.c @@ -30,6 +30,7 @@ #include #include #include +#include #ifdef POSTK_DEBUG_ARCH_DEP_65 #include #endif /* POSTK_DEBUG_ARCH_DEP_65 */ @@ -39,16 +40,10 @@ #include "postk_print_sysreg.c" #ifdef DEBUG_PRINT_CPU -#define dkprintf kprintf -#define ekprintf kprintf -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf kprintf +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif -#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ - __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) - struct cpuinfo_arm64 cpuinfo_data[NR_CPUS]; /* index is logical cpuid */ static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 }; @@ -1283,7 +1278,6 @@ int ihk_mc_interrupt_cpu(int cpu, int vector) return 0; } -#ifdef POSTK_DEBUG_ARCH_DEP_22 /* * @ref.impl linux-linaro/arch/arm64/kernel/process.c::tls_thread_switch() */ @@ -1309,14 +1303,13 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) extern void perf_start(struct mc_perf_event *event); extern void perf_reset(struct mc_perf_event *event); struct thread *last; -#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ struct mcs_rwlock_node_irqsave lock; -#endif /* POSTK_DEBUG_TEMP_FIX_41 */ /* Set up new TLS.. */ dkprintf("[%d] arch_switch_context: tlsblock_base: 0x%lX\n", ihk_mc_get_processor_id(), next->tlsblock_base); +#ifdef ENABLE_PERF /* Performance monitoring inherit */ if(next->proc->monitoring_event) { if(next->proc->perf_status == PP_RESET) @@ -1326,10 +1319,10 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) perf_start(next->proc->monitoring_event); } } +#endif /*ENABLE_PERF*/ if (likely(prev)) { tls_thread_switch(prev, next); -#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock); if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) { switch (prev->proc->status) { @@ -1343,11 +1336,12 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) break; } mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock); + + /* Wake up the parent who tried wait4 and sleeping */ waitq_wakeup(&prev->proc->parent->waitpid_q); } else { mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock); } -#endif /* POSTK_DEBUG_TEMP_FIX_41 */ last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); } @@ -1357,7 +1351,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) return last; } -#endif /* POSTK_DEBUG_ARCH_DEP_22 */ /*@ @ requires \valid(thread); @@ -1439,8 +1432,7 @@ void copy_fp_regs(struct thread *from, struct thread *to) } } -void -clear_fp_regs(struct thread *thread) +void clear_fp_regs(void) { if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { #ifdef CONFIG_ARM64_SVE @@ -1477,7 +1469,7 @@ restore_fp_regs(struct thread *thread) if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { if (!thread->fp_regs) { // only clear fpregs. - clear_fp_regs(thread); + clear_fp_regs(); return; } thread_fpsimd_load(thread); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a32e7d78..2e7bfe0f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -9,20 +9,16 @@ #include #include #include +#include +#include //#define DEBUG_PRINT_FPSIMD #ifdef DEBUG_PRINT_FPSIMD -#define dkprintf kprintf -#define ekprintf kprintf -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf kprintf +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif -#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ - __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) - #ifdef CONFIG_ARM64_SVE /* Maximum supported vector length across all CPUs (initially poisoned) */ @@ -73,9 +69,6 @@ static int get_nr_threads(struct process *proc) return nr_threads; } -extern void save_fp_regs(struct thread *thread); -extern void clear_fp_regs(struct thread *thread); -extern void restore_fp_regs(struct thread *thread); /* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */ int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags) @@ -129,7 +122,7 @@ int sve_set_vector_length(struct thread *thread, /* for self at prctl syscall */ if (thread == cpu_local_var(current)) { save_fp_regs(thread); - clear_fp_regs(thread); + clear_fp_regs(); thread_sve_to_fpsimd(thread, &fp_regs); sve_free(thread); diff --git a/arch/arm64/kernel/gencore.c b/arch/arm64/kernel/gencore.c index 77e33a53..ec621e49 100644 --- a/arch/arm64/kernel/gencore.c +++ b/arch/arm64/kernel/gencore.c @@ -7,6 +7,7 @@ #include #include #include +#include #define align32(x) ((((x) + 3) / 4) * 4) #define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE)) @@ -14,11 +15,8 @@ //#define DEBUG_PRINT_GENCORE #ifdef DEBUG_PRINT_GENCORE -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif /* diff --git a/arch/arm64/kernel/include/arch-lock.h b/arch/arm64/kernel/include/arch-lock.h index 9c924a9c..cd1a7691 100644 --- a/arch/arm64/kernel/include/arch-lock.h +++ b/arch/arm64/kernel/include/arch-lock.h @@ -6,6 +6,8 @@ #include #include +#include "affinity.h" +#include //#define DEBUG_SPINLOCK //#define DEBUG_MCS_RWLOCK @@ -152,6 +154,8 @@ typedef struct mcs_lock_node { unsigned long irqsave; } __attribute__((aligned(64))) mcs_lock_node_t; +typedef mcs_lock_node_t mcs_lock_t; + static void mcs_lock_init(struct mcs_lock_node *node) { node->locked = 0; @@ -602,4 +606,16 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_ #endif } +static inline int irqflags_can_interrupt(unsigned long flags) +{ +#ifdef CONFIG_HAS_NMI +#warning irqflags_can_interrupt needs testing/fixing on such a target + return flags > ICC_PMR_EL1_MASKED; +#else + // PSTATE.DAIF I bit clear means interrupt is possible + return !(flags & (1 << 7)); +#endif +} + + #endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */ diff --git a/arch/arm64/kernel/include/arch-perfctr.h b/arch/arm64/kernel/include/arch-perfctr.h index fad87bcb..f07131f3 100644 --- a/arch/arm64/kernel/include/arch-perfctr.h +++ b/arch/arm64/kernel/include/arch-perfctr.h @@ -35,38 +35,4 @@ void arm64_disable_pmu(void); int armv8pmu_init(struct arm_pmu* cpu_pmu); /* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */ -/* - * Generalized hardware cache events: - * - * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x - * { read, write, prefetch } x - * { accesses, misses } - */ -enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - PERF_COUNT_HW_CACHE_NODE = 6, - - PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - - PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, - PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ -}; - #endif diff --git a/arch/arm64/kernel/include/signal.h b/arch/arm64/kernel/include/signal.h index cd9bff0a..623bb6b4 100644 --- a/arch/arm64/kernel/include/signal.h +++ b/arch/arm64/kernel/include/signal.h @@ -9,6 +9,11 @@ #define _NSIG_BPW 64 #define _NSIG_WORDS (_NSIG / _NSIG_BPW) +static inline int valid_signal(unsigned long sig) +{ + return sig <= _NSIG ? 1 : 0; +} + typedef unsigned long int __sigset_t; #define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1)) diff --git a/arch/arm64/kernel/include/syscall_list.h b/arch/arm64/kernel/include/syscall_list.h index f370ad7a..4747f3a4 100644 --- a/arch/arm64/kernel/include/syscall_list.h +++ b/arch/arm64/kernel/include/syscall_list.h @@ -114,14 +114,18 @@ SYSCALL_HANDLED(236, get_mempolicy) SYSCALL_HANDLED(237, set_mempolicy) SYSCALL_HANDLED(238, migrate_pages) SYSCALL_HANDLED(239, move_pages) +#ifdef PERF_ENABLE SYSCALL_HANDLED(241, perf_event_open) +#endif // PERF_ENABLE SYSCALL_HANDLED(260, wait4) SYSCALL_HANDLED(270, process_vm_readv) SYSCALL_HANDLED(271, process_vm_writev) +#ifdef PERF_ENABLE SYSCALL_HANDLED(601, pmc_init) SYSCALL_HANDLED(602, pmc_start) SYSCALL_HANDLED(603, pmc_stop) SYSCALL_HANDLED(604, pmc_reset) +#endif // PERF_ENABLE SYSCALL_HANDLED(700, get_cpu_id) #ifdef PROFILE_ENABLE SYSCALL_HANDLED(__NR_profile, profile) diff --git a/arch/arm64/kernel/irq-gic-v2.c b/arch/arm64/kernel/irq-gic-v2.c index d29b45ca..995b3bb4 100644 --- a/arch/arm64/kernel/irq-gic-v2.c +++ b/arch/arm64/kernel/irq-gic-v2.c @@ -7,15 +7,13 @@ #include #include #include +#include // #define DEBUG_GICV2 #ifdef DEBUG_GICV2 -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif void *dist_base; diff --git a/arch/arm64/kernel/irq-gic-v3.c b/arch/arm64/kernel/irq-gic-v3.c index 6550eee8..99a352bc 100644 --- a/arch/arm64/kernel/irq-gic-v3.c +++ b/arch/arm64/kernel/irq-gic-v3.c @@ -7,17 +7,15 @@ #include #include #include +#include //#define DEBUG_GICV3 #define USE_CAVIUM_THUNDER_X #ifdef DEBUG_GICV3 -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #ifdef USE_CAVIUM_THUNDER_X diff --git a/arch/arm64/kernel/memory.c b/arch/arm64/kernel/memory.c index bb0fbf0b..de74c7c6 100644 --- a/arch/arm64/kernel/memory.c +++ b/arch/arm64/kernel/memory.c @@ -14,9 +14,7 @@ #include #include #include - -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#include #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) @@ -2924,17 +2922,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t return error; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + cpsize)) { dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p," "cpsize: %d\n", __FUNCTION__, to, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); memcpy(to, va, cpsize); - ihk_mc_unmap_virtual(va, 1, 1); + ihk_mc_unmap_virtual(va, 1); } else { va = phys_to_virt(pa); @@ -3007,17 +3000,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t return error; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + cpsize)) { dkprintf("%s: pa is outside of LWK memory, from: %p," "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE); memcpy(va, from, cpsize); - ihk_mc_unmap_virtual(va, 1, 1); + ihk_mc_unmap_virtual(va, 1); } else { va = phys_to_virt(pa); @@ -3078,17 +3066,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t return error; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + cpsize)) { dkprintf("%s: pa is outside of LWK memory, from: %p," "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE); memcpy(va, from, cpsize); - ihk_mc_unmap_virtual(va, 1, 1); + ihk_mc_unmap_virtual(va, 1); } else { va = phys_to_virt(pa); diff --git a/arch/arm64/kernel/perfctr.c b/arch/arm64/kernel/perfctr.c index 5904e416..518173ab 100644 --- a/arch/arm64/kernel/perfctr.c +++ b/arch/arm64/kernel/perfctr.c @@ -93,21 +93,50 @@ int ihk_mc_perfctr_init(int counter, uint64_t config, int mode) return ret; } -int ihk_mc_perfctr_start(int counter) +int ihk_mc_perfctr_start(unsigned long counter_mask) { - int ret; - ret = cpu_pmu.enable_counter(counter); - return ret; + int ret = 0; + int counter; + unsigned long counter_bit; + + for (counter = 0, counter_bit = 1; + counter_bit < counter_mask; + counter++, counter_bit <<= 1) { + if (!(counter_mask & counter_bit)) + continue; + + ret = cpu_pmu.enable_counter(counter_mask); + if (ret < 0) + break; + } + + return ret < 0 ? ret : 0; } -int ihk_mc_perfctr_stop(int counter) +int ihk_mc_perfctr_stop(unsigned long counter_mask) { - cpu_pmu.disable_counter(counter); + int ret = 0; + int counter; + unsigned long counter_bit; - // ihk_mc_perfctr_startが呼ばれるときには、 - // init系関数が呼ばれるのでdisableにする。 - cpu_pmu.disable_intens(counter); - return 0; + for (counter = 0, counter_bit = 1; + counter_bit < counter_mask; + counter++, counter_bit <<= 1) { + if (!(counter_mask & counter_bit)) + continue; + + ret = cpu_pmu.disable_counter(counter); + if (ret < 0) + break; + + // ihk_mc_perfctr_startが呼ばれるときには、 + // init系関数が呼ばれるのでdisableにする。 + ret = cpu_pmu.disable_intens(counter); + if (ret < 0) + break; + } + + return ret < 0 ? ret : 0; } int ihk_mc_perfctr_reset(int counter) diff --git a/arch/arm64/kernel/perfctr_armv8pmu.c b/arch/arm64/kernel/perfctr_armv8pmu.c index 2aea7343..87f919e7 100644 --- a/arch/arm64/kernel/perfctr_armv8pmu.c +++ b/arch/arm64/kernel/perfctr_armv8pmu.c @@ -4,16 +4,14 @@ #include #include #include +#include #define BIT(nr) (1UL << (nr)) //#define DEBUG_PRINT_PMU #ifdef DEBUG_PRINT_PMU -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index db092998..b264fdb2 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -21,15 +21,13 @@ #include #include #include +#include //#define DEBUG_PRINT_PSCI #ifdef DEBUG_PRINT_PSCI -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1795eb5f..da3652af 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -11,22 +11,17 @@ #include #include #include +#include //#define DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC -#define dkprintf kprintf -#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) -#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ - __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) - extern void save_debugreg(unsigned long *debugreg); extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern int interrupt_from_user(void *); @@ -959,11 +954,7 @@ void ptrace_report_signal(struct thread *thread, int sig) } thread->exit_status = sig; /* Transition thread state */ -#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ proc->status = PS_DELAY_TRACED; -#else /* POSTK_DEBUG_TEMP_FIX_41 */ - proc->status = PS_TRACED; -#endif /* POSTK_DEBUG_TEMP_FIX_41 */ thread->status = PS_TRACED; proc->ptrace &= ~PT_TRACE_SYSCALL; if (sig == SIGSTOP || sig == SIGTSTP || @@ -982,10 +973,6 @@ void ptrace_report_signal(struct thread *thread, int sig) info._sifields._sigchld.si_pid = thread->tid; info._sifields._sigchld.si_status = thread->exit_status; do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0); -#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ - /* Wake parent (if sleeping in wait4()) */ - waitq_wakeup(&proc->parent->waitpid_q); -#endif /* !POSTK_DEBUG_TEMP_FIX_41 */ dkprintf("ptrace_report_signal,sleeping\n"); /* Sleep */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index b325e15a..e15d6adb 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include extern void ptrace_report_signal(struct thread *thread, int sig); extern void clear_single_step(struct thread *thread); @@ -27,18 +29,12 @@ static void __check_signal(unsigned long rc, void *regs, int num, int irq_disabl //#define DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC -#define dkprintf kprintf -#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) -#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ - __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) - uintptr_t debug_constants[] = { sizeof(struct cpu_local_var), offsetof(struct cpu_local_var, current), @@ -59,7 +55,7 @@ static int cpuid_head = 1; extern int num_processors; -int obtain_clone_cpuid(cpu_set_t *cpu_set) { +int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { int min_queue_len = -1; int i, min_cpu = -1; @@ -1177,19 +1173,10 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi /* Reap and set new signal_flags */ proc->signal_flags = SIGNAL_STOP_STOPPED; -#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ proc->status = PS_DELAY_STOPPED; -#else /* POSTK_DEBUG_TEMP_FIX_41 */ - proc->status = PS_STOPPED; -#endif /* POSTK_DEBUG_TEMP_FIX_41 */ thread->status = PS_STOPPED; mcs_rwlock_writer_unlock(&proc->update_lock, &lock); -#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ - /* Wake up the parent who tried wait4 and sleeping */ - waitq_wakeup(&proc->parent->waitpid_q); -#endif /* !POSTK_DEBUG_TEMP_FIX_41 */ - dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n", proc->pid, thread->tid); /* Sleep */ @@ -1206,19 +1193,10 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi /* Update thread state in fork tree */ mcs_rwlock_writer_lock(&proc->update_lock, &lock); thread->exit_status = SIGTRAP; -#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ proc->status = PS_DELAY_TRACED; -#else /* POSTK_DEBUG_TEMP_FIX_41 */ - proc->status = PS_TRACED; -#endif /* POSTK_DEBUG_TEMP_FIX_41 */ thread->status = PS_TRACED; mcs_rwlock_writer_unlock(&proc->update_lock, &lock); -#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ - /* Wake up the parent who tried wait4 and sleeping */ - waitq_wakeup(&thread->proc->parent->waitpid_q); -#endif /* !POSTK_DEBUG_TEMP_FIX_41 */ - /* Sleep */ dkprintf("do_signal,SIGTRAP,sleeping\n"); @@ -1594,7 +1572,7 @@ done: return 0; } - if (tthread->thread_offloaded) { + if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) { interrupt_syscall(tthread, sig); release_thread(tthread); return 0; @@ -1729,7 +1707,7 @@ SYSCALL_DECLARE(mmap) | MAP_NONBLOCK // 0x10000 ; - const intptr_t addr0 = ihk_mc_syscall_arg0(ctx); + const uintptr_t addr0 = ihk_mc_syscall_arg0(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx); const int prot = ihk_mc_syscall_arg2(ctx); const int flags0 = ihk_mc_syscall_arg3(ctx); @@ -1738,7 +1716,7 @@ SYSCALL_DECLARE(mmap) struct thread *thread = cpu_local_var(current); struct vm_regions *region = &thread->vm->region; int error; - intptr_t addr = 0; + uintptr_t addr = 0; size_t len; int flags = flags0; size_t pgsize; diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 92b12949..bc2329e8 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -14,15 +14,13 @@ #include #include #include +#include //#define DEBUG_PRINT_VDSO #ifdef DEBUG_PRINT_VDSO -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #ifdef POSTK_DEBUG_ARCH_DEP_52 diff --git a/arch/arm64/kernel/vdso/gettimeofday.c b/arch/arm64/kernel/vdso/gettimeofday.c index d1418b27..d6616024 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.c +++ b/arch/arm64/kernel/vdso/gettimeofday.c @@ -1,5 +1,7 @@ /* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */ +#include +#include #include #include #include diff --git a/arch/x86_64/elfboot/raw.lds b/arch/x86_64/elfboot/raw.lds index 798395a0..fa026190 100644 --- a/arch/x86_64/elfboot/raw.lds +++ b/arch/x86_64/elfboot/raw.lds @@ -9,29 +9,29 @@ PHDRS SECTIONS { . = SIZEOF_HEADERS; - . = ALIGN(4096); + . = ALIGN(4096); .text : { - *(.text) + *(.text) } :text .data : { - *(.data) - *(.data.*) + *(.data) + *(.data.*) } :data .rodata : { - *(.rodata .rodata.*) + *(.rodata .rodata.*) } :data . = ALIGN(8); .bss : { - _bss_start = .; - *(.bss .bss.*) - _bss_end = .; - . = ALIGN(4096); - _stack_end = .; - } :data + _bss_start = .; + *(.bss .bss.*) + _bss_end = .; + . = ALIGN(4096); + _stack_end = .; + } :data /DISCARD/ : { - *(.eh_frame) - *(.note.gnu.build-id) + *(.eh_frame) + *(.note.gnu.build-id) } -} \ No newline at end of file +} diff --git a/arch/x86_64/kernel/cpu.c b/arch/x86_64/kernel/cpu.c index f7464d6e..3467f58c 100644 --- a/arch/x86_64/kernel/cpu.c +++ b/arch/x86_64/kernel/cpu.c @@ -31,6 +31,7 @@ #include #include #include +#include #define LAPIC_ID 0x020 #define LAPIC_TIMER 0x320 @@ -69,11 +70,8 @@ //#define DEBUG_PRINT_CPU #ifdef DEBUG_PRINT_CPU -#define dkprintf kprintf -#define ekprintf kprintf -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf kprintf +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif static void *lapic_vp; @@ -96,6 +94,8 @@ int gettime_local_support = 0; extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt); extern int kprintf(const char *format, ...); extern int interrupt_from_user(void *); +extern void perf_start(struct mc_perf_event *event); +extern void perf_reset(struct mc_perf_event *event); static struct idt_entry{ uint32_t desc[4]; @@ -847,9 +847,6 @@ void setup_x86_ap(void (*next_func)(void)) } void arch_show_interrupt_context(const void *reg); -void set_signal(int sig, void *regs, struct siginfo *info); -void check_signal(unsigned long, void *, int); -void check_sig_pending(); extern void tlb_flush_handler(int vector); void __show_stack(uintptr_t *sp) { @@ -877,7 +874,7 @@ void interrupt_exit(struct x86_user_context *regs) cpu_enable_interrupt(); check_sig_pending(); check_need_resched(); - check_signal(0, regs, 0); + check_signal(0, regs, -1); } else { check_sig_pending(); @@ -1010,6 +1007,12 @@ void handle_interrupt(int vector, struct x86_user_context *regs) set_cputime(interrupt_from_user(regs)? 0: 1); --v->in_interrupt; + + /* for migration by IPI */ + if (v->flags & CPU_FLAG_NEED_MIGRATE) { + schedule(); + check_signal(0, regs, 0); + } } void gpe_handler(struct x86_user_context *regs) @@ -1644,12 +1647,10 @@ int ihk_mc_interrupt_cpu(int cpu, int vector) return 0; } -#ifdef POSTK_DEBUG_ARCH_DEP_22 -extern void perf_start(struct mc_perf_event *event); -extern void perf_reset(struct mc_perf_event *event); struct thread *arch_switch_context(struct thread *prev, struct thread *next) { struct thread *last; + struct mcs_rwlock_node_irqsave lock; dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", ihk_mc_get_processor_id(), next->tlsblock_base); @@ -1668,7 +1669,7 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) } #ifdef PROFILE_ENABLE - if (prev->profile && prev->profile_start_ts != 0) { + if (prev && prev->profile && prev->profile_start_ts != 0) { prev->profile_elapsed_ts += (rdtsc() - prev->profile_start_ts); prev->profile_start_ts = 0; @@ -1680,6 +1681,28 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) #endif if (prev) { + mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock); + if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) { + switch (prev->proc->status) { + case PS_DELAY_STOPPED: + prev->proc->status = PS_STOPPED; + break; + case PS_DELAY_TRACED: + prev->proc->status = PS_TRACED; + break; + default: + break; + } + mcs_rwlock_writer_unlock(&prev->proc->update_lock, + &lock); + + /* Wake up the parent who tried wait4 and sleeping */ + waitq_wakeup(&prev->proc->parent->waitpid_q); + } else { + mcs_rwlock_writer_unlock(&prev->proc->update_lock, + &lock); + } + last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); } else { @@ -1687,7 +1710,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next) } return last; } -#endif /*@ @ requires \valid(thread); @@ -1762,14 +1784,6 @@ void copy_fp_regs(struct thread *from, struct thread *to) } } -#ifdef POSTK_DEBUG_TEMP_FIX_19 -void -clear_fp_regs(struct thread *thread) -{ - return; -} -#endif /* POSTK_DEBUG_TEMP_FIX_19 */ - /*@ @ requires \valid(thread); @ assigns thread->fp_regs; @@ -1777,8 +1791,11 @@ clear_fp_regs(struct thread *thread) void restore_fp_regs(struct thread *thread) { - if (!thread->fp_regs) + if (!thread->fp_regs) { + // only clear fpregs. + clear_fp_regs(); return; + } if (xsave_available) { unsigned int low, high; @@ -1797,6 +1814,13 @@ restore_fp_regs(struct thread *thread) //release_fp_regs(thread); } +void clear_fp_regs(void) +{ + struct cpu_local_var *v = get_this_cpu_local_var(); + + restore_fp_regs(&v->idle); +} + ihk_mc_user_context_t *lookup_user_context(struct thread *thread) { ihk_mc_user_context_t *uctx = thread->uctx; diff --git a/arch/x86_64/kernel/gencore.c b/arch/x86_64/kernel/gencore.c index 7ad03f06..d27e2fd9 100644 --- a/arch/x86_64/kernel/gencore.c +++ b/arch/x86_64/kernel/gencore.c @@ -6,6 +6,7 @@ #include #include #include +#include #define align32(x) ((((x) + 3) / 4) * 4) #define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE)) @@ -13,13 +14,16 @@ //#define DEBUG_PRINT_GENCORE #ifdef DEBUG_PRINT_GENCORE -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif +/* Exclude reserved (mckernel's internal use), device file, + * hole created by mprotect + */ +#define GENCORE_RANGE_IS_INACCESSIBLE(range) \ + ((range->flag & (VR_RESERVED | VR_MEMTYPE_UC | VR_DONTDUMP))) + /* * Generate a core file image, which consists of many chunks. * Returns an allocated table, an etnry of which is a pair of the address @@ -309,12 +313,10 @@ int gencore(struct thread *thread, void *regs, dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", range->start, range->end, range->flag, range->objoff); - /* We omit reserved areas because they are only for - mckernel's internal use. */ - if (range->flag & VR_RESERVED) - continue; - if (range->flag & VR_DONTDUMP) + + if (GENCORE_RANGE_IS_INACCESSIBLE(range)) { continue; + } /* We need a chunk for each page for a demand paging area. This can be optimized for spacial complexity but we would lose simplicity instead. */ @@ -403,8 +405,9 @@ int gencore(struct thread *thread, void *regs, unsigned long flag = range->flag; unsigned long size = range->end - range->start; - if (range->flag & VR_RESERVED) + if (GENCORE_RANGE_IS_INACCESSIBLE(range)) { continue; + } ph[i].p_type = PT_LOAD; ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0) @@ -446,8 +449,9 @@ int gencore(struct thread *thread, void *regs, unsigned long phys; - if (range->flag & VR_RESERVED) + if (GENCORE_RANGE_IS_INACCESSIBLE(range)) { continue; + } if (range->flag & VR_DEMAND_PAGING) { /* Just an ad hoc kluge. */ unsigned long p, start, phys; diff --git a/arch/x86_64/kernel/include/arch-futex.h b/arch/x86_64/kernel/include/arch-futex.h index d9974e03..11b4f699 100644 --- a/arch/x86_64/kernel/include/arch-futex.h +++ b/arch/x86_64/kernel/include/arch-futex.h @@ -64,12 +64,13 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, return oldval; } -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, + int __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (encoded_op & 0x00fff000) >> 12; + int cmparg = encoded_op & 0xfff; int oldval = 0, ret, tem; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) diff --git a/arch/x86_64/kernel/include/arch-lock.h b/arch/x86_64/kernel/include/arch-lock.h index 94c19f4f..da4bdbfe 100644 --- a/arch/x86_64/kernel/include/arch-lock.h +++ b/arch/x86_64/kernel/include/arch-lock.h @@ -6,6 +6,7 @@ #include #include +#include //#define DEBUG_SPINLOCK //#define DEBUG_MCS_RWLOCK @@ -14,7 +15,17 @@ int __kprintf(const char *format, ...); #endif -typedef int ihk_spinlock_t; +typedef unsigned short __ticket_t; +typedef unsigned int __ticketpair_t; + +typedef struct ihk_spinlock { + union { + __ticketpair_t head_tail; + struct __raw_tickets { + __ticket_t head, tail; + } tickets; + }; +} ihk_spinlock_t; extern void preempt_enable(void); extern void preempt_disable(void); @@ -23,9 +34,61 @@ extern void preempt_disable(void); static void ihk_mc_spinlock_init(ihk_spinlock_t *lock) { - *lock = 0; + lock->head_tail = 0; +} +#define SPIN_LOCK_UNLOCKED { .head_tail = 0 } + + +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_trylock_noirq(l) { int rc; \ +__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +rc = __ihk_mc_spinlock_trylock_noirq(l); \ + __kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", ihk_mc_get_processor_id()); rc; \ +} +#else +#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq +#endif + +static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock) +{ + ihk_spinlock_t cur = { .head_tail = lock->head_tail }; + ihk_spinlock_t next = { .tickets.head = cur.tickets.head, .tickets.tail = cur.tickets.tail + 2 }; + int success; + + if (cur.tickets.head != cur.tickets.tail) { + return 0; + } + + preempt_disable(); + + /* Use the same increment amount as other functions! */ + success = __sync_bool_compare_and_swap((__ticketpair_t*)lock, cur.head_tail, next.head_tail); + + if (!success) { + preempt_enable(); + } + return success; +} + +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_trylock(l, result) ({ unsigned long rc; \ +__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ + rc = __ihk_mc_spinlock_trylock(l, result); \ +__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", ihk_mc_get_processor_id()); rc;\ +}) +#else +#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock +#endif +static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock, int *result) +{ + unsigned long flags; + + flags = cpu_disable_interrupt_save(); + + *result = __ihk_mc_spinlock_trylock_noirq(lock); + + return flags; } -#define SPIN_LOCK_UNLOCKED 0 #ifdef DEBUG_SPINLOCK #define ihk_mc_spinlock_lock_noirq(l) { \ @@ -39,40 +102,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) { - int inc = 0x00010000; - int tmp; - -#if 0 - asm volatile("lock ; xaddl %0, %1\n" - "movzwl %w0, %2\n\t" - "shrl $16, %0\n\t" - "1:\t" - "cmpl %0, %2\n\t" - "je 2f\n\t" - "rep ; nop\n\t" - "movzwl %1, %2\n\t" - "jmp 1b\n" - "2:" - : "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc"); -#endif + register struct __raw_tickets inc = { .tail = 0x0002 }; preempt_disable(); - asm volatile("lock; xaddl %0, %1\n" - "movzwl %w0, %2\n\t" - "shrl $16, %0\n\t" - "1:\t" - "cmpl %0, %2\n\t" - "je 2f\n\t" - "rep ; nop\n\t" - "movzwl %1, %2\n\t" - /* don't need lfence here, because loads are in-order */ - "jmp 1b\n" - "2:" - : "+r" (inc), "+m" (*lock), "=&r" (tmp) - : - : "memory", "cc"); + asm volatile ("lock xaddl %0, %1\n" + : "+r" (inc), "+m" (*(lock)) : : "memory", "cc"); + if (inc.head == inc.tail) + goto out; + + for (;;) { + if (*((volatile __ticket_t *)&lock->tickets.head) == inc.tail) + goto out; + cpu_pause(); + } + +out: + barrier(); /* make sure nothing creeps before the lock is taken */ } #ifdef DEBUG_SPINLOCK @@ -106,8 +153,11 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); #endif static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock) { - asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc"); - + __ticket_t inc = 0x0002; + + asm volatile ("lock addw %1, %0\n" + : "+m" (lock->tickets.head) : "ri" (inc) : "memory", "cc"); + preempt_enable(); } @@ -602,4 +652,9 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_ #endif } +static inline int irqflags_can_interrupt(unsigned long flags) +{ + return !!(flags & 0x200); +} + #endif diff --git a/arch/x86_64/kernel/include/arch-memory.h b/arch/x86_64/kernel/include/arch-memory.h index e38d5e28..b3b931cb 100644 --- a/arch/x86_64/kernel/include/arch-memory.h +++ b/arch/x86_64/kernel/include/arch-memory.h @@ -42,16 +42,34 @@ #define USER_END 0x0000800000000000UL #define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL + +/* + * Canonical negative addresses (i.e., the smallest kernel virtual address) + * on x86 64 bit mode (in its most restricted 48 bit format) starts from + * 0xffff800000000000, but Linux starts mapping physical memory at 0xffff880000000000. + * The 0x80000000000 long gap (8TBs, i.e., 16 PGD level entries in the page tables) + * is used for Xen hyervisor (see arch/x86/include/asm/page.h) and that is + * what we utilize for McKernel. + * This gives us the benefit of being able to use Linux kernel virtual + * addresses identically as in Linux. + * + * NOTE: update these also in eclair.c when modified! + */ #define MAP_ST_START 0xffff800000000000UL -#define MAP_VMAP_START 0xfffff00000000000UL -#define MAP_FIXED_START 0xffffffff70000000UL -#define MAP_KERNEL_START 0xffffffff80000000UL +#define MAP_VMAP_START 0xffff850000000000UL +#define MAP_FIXED_START 0xffff860000000000UL +#define LINUX_PAGE_OFFSET 0xffff880000000000UL +/* + * MAP_KERNEL_START is 8MB below MODULES_END in Linux. + * Placing the LWK image in the virtual address space at the end of + * the Linux modules section enables us to map the LWK TEXT in Linux + * as well, so that Linux can also call into LWK text. + */ +#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL #define STACK_TOP(region) ((region)->user_end) #define MAP_VMAP_SIZE 0x0000000100000000UL -#define KERNEL_PHYS_OFFSET MAP_ST_START - #define PTL4_SHIFT 39 #define PTL4_SIZE (1UL << PTL4_SHIFT) #define PTL3_SHIFT 30 diff --git a/arch/x86_64/kernel/include/syscall_list.h b/arch/x86_64/kernel/include/syscall_list.h index 48b1ea0a..6de0cccc 100644 --- a/arch/x86_64/kernel/include/syscall_list.h +++ b/arch/x86_64/kernel/include/syscall_list.h @@ -114,7 +114,7 @@ SYSCALL_HANDLED(160, setrlimit) SYSCALL_HANDLED(164, settimeofday) SYSCALL_HANDLED(186, gettid) SYSCALL_HANDLED(200, tkill) -SYSCALL_DELEGATED(201, time) +SYSCALL_HANDLED(201, time) SYSCALL_HANDLED(202, futex) SYSCALL_HANDLED(203, sched_setaffinity) SYSCALL_HANDLED(204, sched_getaffinity) @@ -161,6 +161,7 @@ SYSCALL_HANDLED(__NR_profile, profile) SYSCALL_HANDLED(730, util_migrate_inter_kernel) SYSCALL_HANDLED(731, util_indicate_clone) SYSCALL_HANDLED(732, get_system) +SYSCALL_HANDLED(733, util_register_desc) /* McKernel Specific */ SYSCALL_HANDLED(801, swapout) diff --git a/arch/x86_64/kernel/local.c b/arch/x86_64/kernel/local.c index 84262fc9..3ca416d9 100644 --- a/arch/x86_64/kernel/local.c +++ b/arch/x86_64/kernel/local.c @@ -107,9 +107,17 @@ void init_boot_processor_local(void) @ ensures \result == %gs; @ assigns \nothing; */ +extern int num_processors; int ihk_mc_get_processor_id(void) { int id; + void *gs; + + gs = (void *)rdmsr(MSR_GS_BASE); + if (gs < (void *)locals || + gs > ((void *)locals + LOCALS_SPAN * num_processors)) { + return -1; + } asm volatile("movl %%gs:0, %0" : "=r"(id)); diff --git a/arch/x86_64/kernel/memory.c b/arch/x86_64/kernel/memory.c index c7ccee7f..17ed6e45 100644 --- a/arch/x86_64/kernel/memory.c +++ b/arch/x86_64/kernel/memory.c @@ -25,15 +25,13 @@ #include #include #include +#include //#define DEBUG #ifdef DEBUG -#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#else -#define dkprintf(...) do { } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif static char *last_page; @@ -41,6 +39,8 @@ extern char _head[], _end[]; extern unsigned long x86_kernel_phys_base; +int safe_kernel_map = 0; + /* Arch specific early allocation routine */ void *early_alloc_pages(int nr_pages) { @@ -109,6 +109,7 @@ struct page_table { }; static struct page_table *init_pt; +static int init_pt_loaded = 0; static ihk_spinlock_t init_pt_lock; static int use_1gb_page = 0; @@ -167,30 +168,6 @@ static unsigned long setup_l3(struct page_table *pt, return virt_to_phys(pt); } -static void init_normal_area(struct page_table *pt) -{ - unsigned long map_start, map_end, phys, pt_phys; - int ident_index, virt_index; - - map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0); - map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0); - - kprintf("map_start = %lx, map_end = %lx\n", map_start, map_end); - ident_index = map_start >> PTL4_SHIFT; - virt_index = (MAP_ST_START >> PTL4_SHIFT) & (PT_ENTRIES - 1); - - memset(pt, 0, sizeof(struct page_table)); - - for (phys = (map_start & ~(PTL4_SIZE - 1)); phys < map_end; - phys += PTL4_SIZE) { - pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL), phys, - map_start, map_end); - - pt->entry[ident_index++] = pt_phys | PFL4_PDIR_ATTR; - pt->entry[virt_index++] = pt_phys | PFL4_PDIR_ATTR; - } -} - static struct page_table *__alloc_new_pt(ihk_mc_ap_flag ap_flag) { struct page_table *newpt = ihk_mc_alloc_pages(1, ap_flag); @@ -258,6 +235,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr) } } +#define PTLX_SHIFT(index) PTL ## index ## _SHIFT + +#define GET_VIRT_INDEX(virt, index, dest) \ + dest = ((virt) >> PTLX_SHIFT(index)) & (PT_ENTRIES - 1) + #define GET_VIRT_INDICES(virt, l4i, l3i, l2i, l1i) \ l4i = ((virt) >> PTL4_SHIFT) & (PT_ENTRIES - 1); \ l3i = ((virt) >> PTL3_SHIFT) & (PT_ENTRIES - 1); \ @@ -1518,12 +1500,12 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, if (page) { dkprintf("%s: page=%p,is_in_memobj=%d,(old & PFL1_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", __FUNCTION__, page, page_is_in_memobj(page), (old & PFL1_DIRTY), args->memobj, args->memobj ? args->memobj->flags : -1); } - if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) && - !(args->memobj->flags & MF_ZEROFILL)) { + if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL1_SIZE) && + args->memobj && !(args->memobj->flags & MF_ZEROFILL)) { memobj_flush_page(args->memobj, phys, PTL1_SIZE); } - if (!(old & PFL1_FILEOFF)) { + if (!pte_is_fileoff(&old, PTL1_SIZE)) { if(args->free_physical) { if (!page) { /* Anonymous || !XPMEM attach */ @@ -1585,11 +1567,11 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, page = phys_to_page(phys); } - if (page && page_is_in_memobj(page) && (old & PFL2_DIRTY)) { + if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL2_SIZE)) { memobj_flush_page(args->memobj, phys, PTL2_SIZE); } - if (!(old & PFL2_FILEOFF)) { + if (!pte_is_fileoff(&old, PTL2_SIZE)) { if(args->free_physical) { if (!page) { /* Anonymous || !XPMEM attach */ @@ -1666,13 +1648,13 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, page = phys_to_page(phys); } - if (page && page_is_in_memobj(page) && (old & PFL3_DIRTY)) { + if (page && page_is_in_memobj(page) && pte_is_dirty(&old, PTL3_SIZE)) { memobj_flush_page(args->memobj, phys, PTL3_SIZE); } dkprintf("%s: phys=%ld, pte_get_phys(&old),PTL3_SIZE\n", __FUNCTION__, pte_get_phys(&old)); - if (!(old & PFL3_FILEOFF)) { + if (!pte_is_fileoff(&old, PTL3_SIZE)) { if(args->free_physical) { if (!page) { /* Anonymous || !XPMEM attach */ @@ -2540,6 +2522,82 @@ static void init_fixed_area(struct page_table *pt) return; } +static void init_normal_area(struct page_table *pt) +{ + unsigned long map_start, map_end, phys; + void *virt; + + map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0); + map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0); + virt = (void *)MAP_ST_START + map_start; + + kprintf("map_start = %lx, map_end = %lx, virt %lx\n", + map_start, map_end, virt); + + for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) { + if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) { + kprintf("%s: error setting mapping for 0x%lx\n", + __func__, virt); + } + virt += LARGE_PAGE_SIZE; + } +} + +static void init_linux_kernel_mapping(struct page_table *pt) +{ + unsigned long map_start, map_end, phys; + void *virt; + int nr_memory_chunks, chunk_id, numa_id; + + /* In case of safe_kernel_map option (safe_kernel_map == 1), + * processing to prevent destruction of the memory area on Linux side + * is executed */ + if (safe_kernel_map == 0) { + kprintf("Straight-map entire physical memory\n"); + + /* Map 2 TB for now */ + map_start = 0; + map_end = 0x20000000000; + + virt = (void *)LINUX_PAGE_OFFSET; + + kprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n", + LINUX_PAGE_OFFSET, LINUX_PAGE_OFFSET + map_end, 0, map_end); + + for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) { + if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) { + kprintf("%s: error setting mapping for 0x%lx\n", __FUNCTION__, virt); + } + virt += LARGE_PAGE_SIZE; + } + } else { + kprintf("Straight-map physical memory areas allocated to McKernel\n"); + + nr_memory_chunks = ihk_mc_get_nr_memory_chunks(); + if (nr_memory_chunks == 0) { + kprintf("%s: ERROR: No memory chunk available.\n", __FUNCTION__); + return; + } + + for (chunk_id = 0; chunk_id < nr_memory_chunks; chunk_id++) { + if (ihk_mc_get_memory_chunk(chunk_id, &map_start, &map_end, &numa_id)) { + kprintf("%s: ERROR: Memory chunk id (%d) out of range.\n", __FUNCTION__, chunk_id); + continue; + } + + dkprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n", + LINUX_PAGE_OFFSET + map_start, LINUX_PAGE_OFFSET + map_end, map_start, map_end); + + virt = (void *)(LINUX_PAGE_OFFSET + map_start); + for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE, virt += LARGE_PAGE_SIZE) { + if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) { + kprintf("%s: set_pt_large_page() failed for 0x%lx\n", __FUNCTION__, virt); + } + } + } + } +} + void init_text_area(struct page_table *pt) { unsigned long __end, phys, virt; @@ -2624,17 +2682,19 @@ void init_page_table(void) init_pt = ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL); ihk_mc_spinlock_init(&init_pt_lock); - memset(init_pt, 0, sizeof(PAGE_SIZE)); + memset(init_pt, 0, sizeof(*init_pt)); /* Normal memory area */ init_normal_area(init_pt); + init_linux_kernel_mapping(init_pt); init_fixed_area(init_pt); init_low_area(init_pt); init_text_area(init_pt); init_vsyscall_area(init_pt); load_page_table(init_pt); - kprintf("Page table is now at %p\n", init_pt); + init_pt_loaded = 1; + kprintf("Page table is now at 0x%lx\n", init_pt); } extern void __reserve_arch_pages(unsigned long, unsigned long, @@ -2662,17 +2722,33 @@ void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator, unsigned long virt_to_phys(void *v) { unsigned long va = (unsigned long)v; - + if (va >= MAP_KERNEL_START) { + dkprintf("%s: MAP_KERNEL_START <= 0x%lx <= LINUX_PAGE_OFFSET\n", + __FUNCTION__, va); return va - MAP_KERNEL_START + x86_kernel_phys_base; - } else { + } + else if (va >= LINUX_PAGE_OFFSET) { + return va - LINUX_PAGE_OFFSET; + } + else if (va >= MAP_FIXED_START) { + return va - MAP_FIXED_START; + } + else { + dkprintf("%s: MAP_ST_START <= 0x%lx <= MAP_FIXED_START\n", + __FUNCTION__, va); return va - MAP_ST_START; } } void *phys_to_virt(unsigned long p) { - return (void *)(p + MAP_ST_START); + /* Before loading our own PT use straight mapping */ + if (!init_pt_loaded) { + return (void *)(p + MAP_ST_START); + } + + return (void *)(p + LINUX_PAGE_OFFSET); } int copy_from_user(void *dst, const void *src, size_t siz) @@ -2840,17 +2916,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t return error; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + cpsize)) { dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p," "cpsize: %d\n", __FUNCTION__, to, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); memcpy(to, va, cpsize); - ihk_mc_unmap_virtual(va, 1, 1); + ihk_mc_unmap_virtual(va, 1); } else { va = phys_to_virt(pa); @@ -2924,17 +2995,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t return error; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + cpsize)) { dkprintf("%s: pa is outside of LWK memory, from: %p," "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); memcpy(va, from, cpsize); - ihk_mc_unmap_virtual(va, 1, 1); + ihk_mc_unmap_virtual(va, 1); } else { va = phys_to_virt(pa); @@ -2995,17 +3061,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t return error; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + cpsize)) { dkprintf("%s: pa is outside of LWK memory, from: %p," "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); memcpy(va, from, cpsize); - ihk_mc_unmap_virtual(va, 1, 1); + ihk_mc_unmap_virtual(va, 1); } else { va = phys_to_virt(pa); diff --git a/arch/x86_64/kernel/mikc.c b/arch/x86_64/kernel/mikc.c index 79ced05a..7a57e84f 100644 --- a/arch/x86_64/kernel/mikc.c +++ b/arch/x86_64/kernel/mikc.c @@ -30,7 +30,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel, memset(channel, 0, sizeof(struct ihk_ikc_channel_desc)); - mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE) + mikc_queue_pages = ((4 * num_processors * MASTER_IKCQ_PKTSIZE) + (PAGE_SIZE - 1)) / PAGE_SIZE; /* Place both sides in this side */ diff --git a/arch/x86_64/kernel/perfctr.c b/arch/x86_64/kernel/perfctr.c index 4f33939d..cc10f3c1 100644 --- a/arch/x86_64/kernel/perfctr.c +++ b/arch/x86_64/kernel/perfctr.c @@ -16,20 +16,16 @@ #include #include #include +#include extern unsigned int *x86_march_perfmap; extern int running_on_kvm(void); -#ifdef POSTK_DEBUG_TEMP_FIX_31 int ihk_mc_perfctr_fixed_init(int counter, int mode); -#endif/*POSTK_DEBUG_TEMP_FIX_31*/ //#define PERFCTR_DEBUG #ifdef PERFCTR_DEBUG -#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#else -#define dkprintf(...) do { } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #define X86_CR4_PCE 0x00000100 @@ -43,11 +39,11 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode); } \ } while(0) -int perf_counters_discovered = 0; -int X86_IA32_NUM_PERF_COUNTERS = 0; -unsigned long X86_IA32_PERF_COUNTERS_MASK = 0; -int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0; -unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0; +int perf_counters_discovered; +int NUM_PERF_COUNTERS; +unsigned long PERF_COUNTERS_MASK; +int NUM_FIXED_PERF_COUNTERS; +unsigned long FIXED_PERF_COUNTERS_MASK; void x86_init_perfctr(void) { @@ -78,17 +74,17 @@ void x86_init_perfctr(void) op = 0x0a; asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op)); - X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8); - X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1; + NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8); + PERF_COUNTERS_MASK = (1 << NUM_PERF_COUNTERS) - 1; - X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F); - X86_IA32_FIXED_PERF_COUNTERS_MASK = - ((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) << - X86_IA32_BASE_FIXED_PERF_COUNTERS; + NUM_FIXED_PERF_COUNTERS = (edx & 0x0F); + FIXED_PERF_COUNTERS_MASK = + ((1UL << NUM_FIXED_PERF_COUNTERS) - 1) << + BASE_FIXED_PERF_COUNTERS; perf_counters_discovered = 1; - kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n", - X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS); + kprintf("NUM_PERF_COUNTERS: %d, NUM_FIXED_PERF_COUNTERS: %d\n", + NUM_PERF_COUNTERS, NUM_FIXED_PERF_COUNTERS); } /* Clear Fixed Counter Control */ @@ -97,20 +93,20 @@ void x86_init_perfctr(void) wrmsr(MSR_PERF_FIXED_CTRL, value); /* Clear Generic Counter Control */ - for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { + for (i = 0; i < NUM_PERF_COUNTERS; i++) { wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0); } /* Enable PMC Control */ value = rdmsr(MSR_PERF_GLOBAL_CTRL); - value |= X86_IA32_PERF_COUNTERS_MASK; - value |= X86_IA32_FIXED_PERF_COUNTERS_MASK; + value |= PERF_COUNTERS_MASK; + value |= FIXED_PERF_COUNTERS_MASK; wrmsr(MSR_PERF_GLOBAL_CTRL, value); } static int set_perfctr_x86_direct(int counter, int mode, unsigned int value) { - if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { + if (counter < 0 || counter >= NUM_PERF_COUNTERS) { return -EINVAL; } @@ -149,13 +145,14 @@ static int set_pmc_x86_direct(int counter, long val) val &= 0x000000ffffffffff; // 40bit Mask cnt_bit = 1UL << counter; - if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { + if (cnt_bit & PERF_COUNTERS_MASK) { // set generic pmc wrmsr(MSR_IA32_PMC0 + counter, val); } - else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) { + else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) { // set fixed pmc - wrmsr(MSR_IA32_FIXED_CTR0 + counter - X86_IA32_BASE_FIXED_PERF_COUNTERS, val); + wrmsr(MSR_IA32_FIXED_CTR0 + + counter - BASE_FIXED_PERF_COUNTERS, val); } else { return -EINVAL; @@ -175,10 +172,10 @@ static int set_fixed_counter(int counter, int mode) { unsigned long value = 0; unsigned int ctr_mask = 0xf; - int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ; + int counter_idx = counter - BASE_FIXED_PERF_COUNTERS; unsigned int set_val = 0; - if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) { + if (counter_idx < 0 || counter_idx >= NUM_FIXED_PERF_COUNTERS) { return -EINVAL; } @@ -208,14 +205,13 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode) int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode) #endif /*POSTK_DEBUG_TEMP_FIX_29*/ { -#ifdef POSTK_DEBUG_TEMP_FIX_31 // PAPI_REF_CYC counted by fixed counter - if (counter >= X86_IA32_BASE_FIXED_PERF_COUNTERS) { + if (counter >= BASE_FIXED_PERF_COUNTERS && + counter < BASE_FIXED_PERF_COUNTERS + NUM_FIXED_PERF_COUNTERS) { return ihk_mc_perfctr_fixed_init(counter, mode); } -#endif /*POSTK_DEBUG_TEMP_FIX_31*/ - if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { + if (counter < 0 || counter >= NUM_PERF_COUNTERS) { return -EINVAL; } @@ -248,7 +244,7 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode) } #endif /*POSTK_DEBUG_TEMP_FIX_29*/ - if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { + if (counter < 0 || counter >= NUM_PERF_COUNTERS) { return -EINVAL; } if (type < 0 || type >= PERFCTR_MAX_TYPE) { @@ -300,18 +296,11 @@ int ihk_mc_perfctr_set_extra(struct mc_perf_event *event) extern void x86_march_perfctr_start(unsigned long counter_mask); #endif -#ifdef POSTK_DEBUG_TEMP_FIX_30 -int ihk_mc_perfctr_start(int counter) -#else int ihk_mc_perfctr_start(unsigned long counter_mask) -#endif /*POSTK_DEBUG_TEMP_FIX_30*/ { int ret = 0; unsigned long value = 0; - unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK; -#ifdef POSTK_DEBUG_TEMP_FIX_30 - unsigned long counter_mask = 1UL << counter; -#endif /*POSTK_DEBUG_TEMP_FIX_30*/ + unsigned long mask = PERF_COUNTERS_MASK | FIXED_PERF_COUNTERS_MASK; PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL); @@ -328,18 +317,11 @@ int ihk_mc_perfctr_start(unsigned long counter_mask) goto fn_exit; } -#ifdef POSTK_DEBUG_TEMP_FIX_30 -int ihk_mc_perfctr_stop(int counter) -#else int ihk_mc_perfctr_stop(unsigned long counter_mask) -#endif/*POSTK_DEBUG_TEMP_FIX_30*/ { int ret = 0; unsigned long value; - unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK; -#ifdef POSTK_DEBUG_TEMP_FIX_30 - unsigned long counter_mask = 1UL << counter; -#endif/*POSTK_DEBUG_TEMP_FIX_30*/ + unsigned long mask = PERF_COUNTERS_MASK | FIXED_PERF_COUNTERS_MASK; PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL); @@ -376,10 +358,10 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode) { unsigned long value = 0; unsigned int ctr_mask = 0xf; - int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ; + int counter_idx = counter - BASE_FIXED_PERF_COUNTERS; unsigned int set_val = 0; - if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) { + if (counter_idx < 0 || counter_idx >= NUM_FIXED_PERF_COUNTERS) { return -EINVAL; } @@ -420,7 +402,7 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value) { int i, j; - for (i = 0, j = 0; i < X86_IA32_NUM_PERF_COUNTERS && counter_mask; + for (i = 0, j = 0; i < NUM_PERF_COUNTERS && counter_mask; i++, counter_mask >>= 1) { if (counter_mask & 1) { value[j++] = rdpmc(i); @@ -440,13 +422,14 @@ unsigned long ihk_mc_perfctr_read(int counter) cnt_bit = 1UL << counter; - if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { + if (cnt_bit & PERF_COUNTERS_MASK) { // read generic pmc retval = rdpmc(counter); } - else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) { + else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) { // read fixed pmc - retval = rdpmc((1 << 30) + (counter - X86_IA32_BASE_FIXED_PERF_COUNTERS)); + retval = rdpmc((1 << 30) + + (counter - BASE_FIXED_PERF_COUNTERS)); } else { retval = -EINVAL; @@ -468,12 +451,12 @@ unsigned long ihk_mc_perfctr_read_msr(int counter) cnt_bit = 1UL << counter; - if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { + if (cnt_bit & PERF_COUNTERS_MASK) { // read generic pmc idx = MSR_IA32_PMC0 + counter; retval = (unsigned long) rdmsr(idx); } - else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) { + else if (cnt_bit & FIXED_PERF_COUNTERS_MASK) { // read fixed pmc idx = MSR_IA32_FIXED_CTR0 + counter; retval = (unsigned long) rdmsr(idx); @@ -506,8 +489,8 @@ int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsi } // find avail generic counter - for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { - if(!(pmc_status & (1 << i))) { + for (i = 0; i < NUM_PERF_COUNTERS; i++) { + if (!(pmc_status & (1 << i))) { ret = i; break; } diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index f41fa288..230fa98c 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c @@ -31,12 +31,11 @@ #include #include #include +#include void terminate_mcexec(int, int); extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact); long syscall(int num, ihk_mc_user_context_t *ctx); -void set_signal(int sig, void *regs0, siginfo_t *info); -void check_signal(unsigned long rc, void *regs0, int num); extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int get_xsave_size(); @@ -45,11 +44,8 @@ extern uint64_t get_xsave_mask(); //#define DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC -#define dkprintf kprintf -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif uintptr_t debug_constants[] = { @@ -92,33 +88,45 @@ static ptrdiff_t vdso_offset; extern int num_processors; -int obtain_clone_cpuid(cpu_set_t *cpu_set) { +int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { int min_queue_len = -1; - int cpu, min_cpu = -1; - + int cpu, min_cpu = -1, uti_cpu = -1; + unsigned long irqstate; + + irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock); /* Find the first allowed core with the shortest run queue */ for (cpu = 0; cpu < num_processors; ++cpu) { struct cpu_local_var *v; - unsigned long irqstate; if (!CPU_ISSET(cpu, cpu_set)) continue; v = get_cpu_local_var(cpu); - irqstate = ihk_mc_spinlock_lock(&v->runq_lock); - if (min_queue_len == -1 || v->runq_len < min_queue_len) { - min_queue_len = v->runq_len; + ihk_mc_spinlock_lock_noirq(&v->runq_lock); + dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved); + if (min_queue_len == -1 || v->runq_len + v->runq_reserved < min_queue_len) { + min_queue_len = v->runq_len + v->runq_reserved; min_cpu = cpu; } - ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + /* Record the last tie CPU */ + if (min_cpu != cpu && v->runq_len + v->runq_reserved == min_queue_len) { + uti_cpu = cpu; + } + dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d,min_cpu=%d,uti_cpu=%d\n", __FUNCTION__, cpu, v->runq_len, v->runq_reserved, min_cpu, uti_cpu); + ihk_mc_spinlock_unlock_noirq(&v->runq_lock); +#if 0 if (min_queue_len == 0) break; +#endif } + min_cpu = use_last ? uti_cpu : min_cpu; if (min_cpu != -1) { if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED) get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED; + __sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, 1); } + ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate); return min_cpu; } @@ -251,7 +259,7 @@ SYSCALL_DECLARE(rt_sigreturn) info.si_code = TRAP_TRACE; set_signal(SIGTRAP, regs, &info); check_need_resched(); - check_signal(0, regs, 0); + check_signal(0, regs, -1); } if(ksigsp.fpregs && xsavesize){ @@ -276,7 +284,6 @@ SYSCALL_DECLARE(rt_sigreturn) } extern struct cpu_local_var *clv; -extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern void interrupt_syscall(struct thread *, int sig); extern void terminate(int, int); extern int num_processors; @@ -530,23 +537,32 @@ void ptrace_report_signal(struct thread *thread, int sig) dkprintf("ptrace_report_signal, tid=%d, pid=%d\n", thread->tid, thread->proc->pid); mcs_rwlock_writer_lock(&proc->update_lock, &lock); - if(!(proc->ptrace & PT_TRACED)){ + if (!(thread->ptrace & PT_TRACED)) { mcs_rwlock_writer_unlock(&proc->update_lock, &lock); return; } - thread->exit_status = sig; + /* Transition thread state */ - proc->status = PS_TRACED; + thread->exit_status = sig; thread->status = PS_TRACED; - proc->ptrace &= ~PT_TRACE_SYSCALL; - if (sig == SIGSTOP || sig == SIGTSTP || - sig == SIGTTIN || sig == SIGTTOU) { - proc->signal_flags |= SIGNAL_STOP_STOPPED; - } else { - proc->signal_flags &= ~SIGNAL_STOP_STOPPED; - } - parent_pid = proc->parent->pid; + thread->ptrace &= ~PT_TRACE_SYSCALL; save_debugreg(thread->ptrace_debugreg); + if (sig == SIGSTOP || sig == SIGTSTP || + sig == SIGTTIN || sig == SIGTTOU) { + thread->signal_flags |= SIGNAL_STOP_STOPPED; + } + else { + thread->signal_flags &= ~SIGNAL_STOP_STOPPED; + } + + if (thread == proc->main_thread) { + proc->status = PS_DELAY_TRACED; + parent_pid = proc->parent->pid; + } + else { + parent_pid = thread->report_proc->pid; + waitq_wakeup(&thread->report_proc->waitpid_q); + } mcs_rwlock_writer_unlock(&proc->update_lock, &lock); memset(&info, '\0', sizeof info); @@ -555,8 +571,6 @@ void ptrace_report_signal(struct thread *thread, int sig) info._sifields._sigchld.si_pid = thread->tid; info._sifields._sigchld.si_status = thread->exit_status; do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0); - /* Wake parent (if sleeping in wait4()) */ - waitq_wakeup(&proc->parent->waitpid_q); dkprintf("ptrace_report_signal,sleeping\n"); /* Sleep */ @@ -569,9 +583,8 @@ ptrace_arch_prctl(int pid, long code, long addr) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(pid, pid); if (!child) return -ESRCH; if (child->proc->status & (PS_TRACED | PS_STOPPED)) { @@ -613,7 +626,7 @@ ptrace_arch_prctl(int pid, long code, long addr) break; } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -635,11 +648,13 @@ arch_ptrace(long request, int pid, long addr, long data) static int isrestart(int num, unsigned long rc, int sig, int restart) { - if(sig == SIGKILL || sig == SIGSTOP) + if (sig == SIGKILL || sig == SIGSTOP) return 0; - if(num == 0 || rc != -EINTR) + if (num < 0 || rc != -EINTR) return 0; - switch(num){ + if (sig == SIGCHLD) + return 1; + switch (num) { case __NR_pause: case __NR_rt_sigsuspend: case __NR_rt_sigtimedwait: @@ -660,14 +675,12 @@ isrestart(int num, unsigned long rc, int sig, int restart) case __NR_io_getevents: return 0; } - if(sig == SIGCHLD) - return 1; - if(restart) + if (restart) return 1; return 0; } -void +int do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int num) { struct x86_user_context *regs = regs0; @@ -679,14 +692,15 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi int ptraceflag = 0; struct mcs_rwlock_node_irqsave lock; struct mcs_rwlock_node_irqsave mcs_rw_node; + int restart = 0; for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1); dkprintf("do_signal(): tid=%d, pid=%d, sig=%d\n", thread->tid, proc->pid, sig); orgsig = sig; - if((proc->ptrace & PT_TRACED) && - pending->ptracecont == 0 && - sig != SIGKILL) { + if ((thread->ptrace & PT_TRACED) && + pending->ptracecont == 0 && + sig != SIGKILL) { ptraceflag = 1; sig = SIGSTOP; } @@ -707,7 +721,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi if(k->sa.sa_handler == SIG_IGN){ kfree(pending); mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); - return; + goto out; } else if(k->sa.sa_handler){ unsigned long *usp; /* user stack */ @@ -757,9 +771,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi memcpy(&ksigsp.sigstack, &thread->sigstack, sizeof(stack_t)); ksigsp.sigrc = rc; ksigsp.num = num; - ksigsp.restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART); - if(num != 0 && rc == -EINTR && sig == SIGCHLD) - ksigsp.restart = 1; + restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART); + ksigsp.restart = restart; if(xsavesize){ uint64_t xsave_mask = get_xsave_mask(); unsigned int low = (unsigned int)xsave_mask; @@ -772,7 +785,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi kfree(_kfpregs); kprintf("do_signal,no space available\n"); terminate(0, sig); - return; + goto out; } kfpregs = (void *)((((unsigned long)_kfpregs) + 63) & ~63); memset(kfpregs, '\0', xsavesize); @@ -782,7 +795,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi kfree(_kfpregs); kprintf("do_signal,write_process_vm failed\n"); terminate(0, sig); - return; + goto out; } ksigsp.fpregs = (void *)fpregs; kfree(_kfpregs); @@ -794,7 +807,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); kprintf("do_signal,write_process_vm failed\n"); terminate(0, sig); - return; + goto out; } usp = (unsigned long *)sigsp; @@ -824,12 +837,13 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi info.si_code = TRAP_TRACE; set_signal(SIGTRAP, regs, &info); check_need_resched(); - check_signal(0, regs, 0); + check_signal(0, regs, -1); } } else { int coredumped = 0; siginfo_t info; + int ptc = pending->ptracecont; if(ptraceflag){ if(thread->ptrace_recvsig) @@ -856,25 +870,37 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi info.si_code = CLD_STOPPED; info._sifields._sigchld.si_pid = thread->proc->pid; info._sifields._sigchld.si_status = (sig << 8) | 0x7f; - do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0); - dkprintf("do_signal,SIGSTOP,changing state\n"); + if (ptc == 2 && + thread != thread->proc->main_thread) { + thread->signal_flags = + SIGNAL_STOP_STOPPED; + thread->status = PS_STOPPED; + thread->exit_status = SIGSTOP; + do_kill(thread, + thread->report_proc->pid, -1, + SIGCHLD, &info, 0); + waitq_wakeup( + &thread->report_proc->waitpid_q); + } + else { + /* Update thread state in fork tree */ + mcs_rwlock_writer_lock( + &proc->update_lock, &lock); + proc->group_exit_status = SIGSTOP; - /* Update thread state in fork tree */ - mcs_rwlock_writer_lock(&proc->update_lock, &lock); - proc->group_exit_status = SIGSTOP; + /* Reap and set new signal_flags */ + proc->main_thread->signal_flags = + SIGNAL_STOP_STOPPED; - /* Reap and set new signal_flags */ - proc->signal_flags = SIGNAL_STOP_STOPPED; + proc->status = PS_DELAY_STOPPED; + thread->status = PS_STOPPED; + mcs_rwlock_writer_unlock( + &proc->update_lock, &lock); - proc->status = PS_STOPPED; - thread->status = PS_STOPPED; - mcs_rwlock_writer_unlock(&proc->update_lock, &lock); - - /* Wake up the parent who tried wait4 and sleeping */ - waitq_wakeup(&proc->parent->waitpid_q); - - dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n", - proc->pid, thread->tid); + do_kill(thread, + thread->proc->parent->pid, -1, + SIGCHLD, &info, 0); + } /* Sleep */ schedule(); dkprintf("SIGSTOP(): woken up\n"); @@ -882,19 +908,28 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi break; case SIGTRAP: dkprintf("do_signal,SIGTRAP\n"); - if(!(proc->ptrace & PT_TRACED)) { + if (!(thread->ptrace & PT_TRACED)) { goto core; } /* Update thread state in fork tree */ - mcs_rwlock_writer_lock(&proc->update_lock, &lock); thread->exit_status = SIGTRAP; - proc->status = PS_TRACED; thread->status = PS_TRACED; - mcs_rwlock_writer_unlock(&proc->update_lock, &lock); - - /* Wake up the parent who tried wait4 and sleeping */ - waitq_wakeup(&thread->proc->parent->waitpid_q); + if (thread == proc->main_thread) { + mcs_rwlock_writer_lock(&proc->update_lock, + &lock); + proc->group_exit_status = SIGTRAP; + proc->status = PS_DELAY_TRACED; + mcs_rwlock_writer_unlock(&proc->update_lock, + &lock); + do_kill(thread, thread->proc->parent->pid, -1, + SIGCHLD, &info, 0); + } + else { + do_kill(thread, thread->report_proc->pid, -1, + SIGCHLD, &info, 0); + waitq_wakeup(&thread->report_proc->waitpid_q); + } /* Sleep */ dkprintf("do_signal,SIGTRAP,sleeping\n"); @@ -909,7 +944,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi info._sifields._sigchld.si_pid = proc->pid; info._sifields._sigchld.si_status = 0x0000ffff; do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0); - proc->signal_flags = SIGNAL_STOP_CONTINUED; + proc->main_thread->signal_flags = SIGNAL_STOP_CONTINUED; proc->status = PS_RUNNING; dkprintf("do_signal,SIGCONT,do nothing\n"); break; @@ -938,6 +973,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi break; } } +out: + return restart; } static struct sig_pending * @@ -957,10 +994,12 @@ getsigpending(struct thread *thread, int delflag){ lock = &thread->sigcommon->lock; head = &thread->sigcommon->sigpending; for(;;) { - if (delflag) + if (delflag) { mcs_rwlock_writer_lock(lock, &mcs_rw_node); - else + } + else { mcs_rwlock_reader_lock(lock, &mcs_rw_node); + } list_for_each_entry_safe(pending, next, head, list){ for(x = pending->sigmask.__val[0], sig = 0; x; sig++, x >>= 1); @@ -973,19 +1012,23 @@ getsigpending(struct thread *thread, int delflag){ if(delflag) list_del(&pending->list); - if (delflag) + if (delflag) { mcs_rwlock_writer_unlock(lock, &mcs_rw_node); - else + } + else { mcs_rwlock_reader_unlock(lock, &mcs_rw_node); + } return pending; } } } - if (delflag) + if (delflag) { mcs_rwlock_writer_unlock(lock, &mcs_rw_node); - else + } + else { mcs_rwlock_reader_unlock(lock, &mcs_rw_node); + } if(lock == &thread->sigpendinglock) return NULL; @@ -1000,6 +1043,11 @@ getsigpending(struct thread *thread, int delflag){ struct sig_pending * hassigpending(struct thread *thread) { + if (list_empty(&thread->sigpending) && + list_empty(&thread->sigcommon->sigpending)) { + return NULL; + } + return getsigpending(thread, 0); } @@ -1017,6 +1065,12 @@ void save_syscall_return_value(int num, unsigned long rc) return; } +/** \brief check arrived signals and processing + * + * @param rc return value of syscall + * @param regs0 context + * @param num syscall number (-1: Not called on exiting system call) + */ void check_signal(unsigned long rc, void *regs0, int num) { @@ -1050,6 +1104,11 @@ check_signal(unsigned long rc, void *regs0, int num) goto out; } + if (list_empty(&thread->sigpending) && + list_empty(&thread->sigcommon->sigpending)) { + goto out; + } + for(;;){ pending = getsigpending(thread, 1); if(!pending) { @@ -1057,7 +1116,9 @@ check_signal(unsigned long rc, void *regs0, int num) goto out; } - do_signal(rc, regs, thread, pending, num); + if (do_signal(rc, regs, thread, pending, num)) { + num = -1; + } } out: @@ -1137,7 +1198,7 @@ check_sig_pending_thread(struct thread *thread) } void -check_sig_pending() +check_sig_pending(void) { struct thread *thread; struct cpu_local_var *v; @@ -1158,7 +1219,7 @@ repeat: continue; } - if (thread->proc->exit_status & 0x0000000100000000L) { + if (thread->proc->group_exit_status & 0x0000000100000000L) { continue; } @@ -1367,7 +1428,8 @@ done: return 0; } - if (tthread->thread_offloaded) { + /* Forward signal to Linux by interrupt_syscall mechanism */ + if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) { if (!tthread->proc->nohost) { interrupt_syscall(tthread, sig); } @@ -1384,10 +1446,10 @@ done: in check_signal */ rc = 0; k = tthread->sigcommon->action + sig - 1; - if((sig != SIGKILL && (tproc->ptrace & PT_TRACED)) || - (k->sa.sa_handler != (void *)1 && - (k->sa.sa_handler != NULL || - (sig != SIGCHLD && sig != SIGURG)))){ + if ((sig != SIGKILL && (tthread->ptrace & PT_TRACED)) || + (k->sa.sa_handler != (void *)1 && + (k->sa.sa_handler != NULL || + (sig != SIGCHLD && sig != SIGURG)))) { struct sig_pending *pending = NULL; if (sig < 33) { // SIGRTMIN - SIGRTMAX list_for_each_entry(pending, head, list){ @@ -1471,7 +1533,7 @@ set_signal(int sig, void *regs0, siginfo_t *info) SYSCALL_DECLARE(mmap) { - const int supported_flags = 0 + const unsigned int supported_flags = 0 | MAP_SHARED // 01 | MAP_PRIVATE // 02 | MAP_FIXED // 10 @@ -1479,7 +1541,7 @@ SYSCALL_DECLARE(mmap) | MAP_LOCKED // 2000 | MAP_POPULATE // 8000 | MAP_HUGETLB // 00040000 - | (0x3F << MAP_HUGE_SHIFT) // FC000000 + | (0x3FU << MAP_HUGE_SHIFT) // FC000000 ; const int ignored_flags = 0 #ifdef USE_NOCACHE_MMAP @@ -1498,7 +1560,7 @@ SYSCALL_DECLARE(mmap) | MAP_NONBLOCK // 00010000 ; - const intptr_t addr0 = ihk_mc_syscall_arg0(ctx); + const uintptr_t addr0 = ihk_mc_syscall_arg0(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx); const int prot = ihk_mc_syscall_arg2(ctx); const int flags0 = ihk_mc_syscall_arg3(ctx); @@ -1507,7 +1569,7 @@ SYSCALL_DECLARE(mmap) struct thread *thread = cpu_local_var(current); struct vm_regions *region = &thread->vm->region; int error; - intptr_t addr = 0; + uintptr_t addr = 0; size_t len; int flags = flags0; size_t pgsize; @@ -1699,6 +1761,11 @@ SYSCALL_DECLARE(arch_prctl) ihk_mc_syscall_arg1(ctx)); } +SYSCALL_DECLARE(time) +{ + return time(); +} + static int vdso_get_vdso_info(void) { int error; @@ -2081,7 +2148,7 @@ int do_process_vm_read_writev(int pid, range = lookup_process_memory_range(lthread->vm, (uintptr_t)local_iov, - (uintptr_t)(local_iov + liovcnt * sizeof(struct iovec))); + (uintptr_t)(local_iov + liovcnt)); if (!range) { ret = -EFAULT; @@ -2090,7 +2157,7 @@ int do_process_vm_read_writev(int pid, range = lookup_process_memory_range(lthread->vm, (uintptr_t)remote_iov, - (uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec))); + (uintptr_t)(remote_iov + riovcnt)); if (!range) { ret = -EFAULT; @@ -2366,8 +2433,6 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); memcpy(mpsr->nodes, mpsr->user_nodes, sizeof(int) * count); memset(mpsr->ptep, 0, sizeof(pte_t) * count); @@ -2387,41 +2452,38 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - case 1: memcpy(mpsr->nodes, mpsr->user_nodes, sizeof(int) * count); + mpsr->nodes_ready = 1; + break; + case 1: memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->status, 0, sizeof(int) * count); memset(mpsr->nr_pages, 0, sizeof(int) * count); memset(mpsr->dst_phys, 0, sizeof(unsigned long) * count); - mpsr->nodes_ready = 1; break; default: break; } } - else if (nr_cpus >= 4 && nr_cpus < 8) { + else if (nr_cpus >= 4 && nr_cpus < 7) { switch (cpu_index) { case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); break; case 1: - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - break; - case 2: memcpy(mpsr->nodes, mpsr->user_nodes, sizeof(int) * count); mpsr->nodes_ready = 1; break; - case 3: + case 2: memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->status, 0, sizeof(int) * count); + break; + case 3: memset(mpsr->nr_pages, 0, sizeof(int) * count); memset(mpsr->dst_phys, 0, sizeof(unsigned long) * count); @@ -2431,7 +2493,7 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) break; } } - else if (nr_cpus >= 8) { + else { switch (cpu_index) { case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, @@ -2443,28 +2505,23 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) sizeof(void *) * (count / 2)); break; case 2: - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - break; - case 3: memcpy(mpsr->nodes, mpsr->user_nodes, sizeof(int) * count); mpsr->nodes_ready = 1; break; - case 4: + case 3: memset(mpsr->ptep, 0, sizeof(pte_t) * count); break; - case 5: + case 4: memset(mpsr->status, 0, sizeof(int) * count); break; - case 6: + case 5: memset(mpsr->nr_pages, 0, sizeof(int) * count); break; - case 7: + case 6: memset(mpsr->dst_phys, 0, sizeof(unsigned long) * count); break; - default: break; } @@ -2672,11 +2729,19 @@ out: time_t time(void) { struct syscall_request sreq IHK_DMA_ALIGN; - struct thread *thread = cpu_local_var(current); - time_t ret; - sreq.number = __NR_time; - sreq.args[0] = (uintptr_t)NULL; - ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid); + struct timespec ats; + time_t ret = 0; + + if (gettime_local_support) { + calculate_time_from_tsc(&ats); + ret = ats.tv_sec; + } + else { + sreq.number = __NR_time; + sreq.args[0] = (uintptr_t)NULL; + ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id()); + } + return ret; } diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 1de7c452..e6e93e5e 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -31,51 +31,6 @@ struct tod_data_s tod_data .version = IHK_ATOMIC64_INIT(0), }; -static inline void cpu_pause_for_vsyscall(void) -{ - asm volatile ("pause" ::: "memory"); - return; -} /* cpu_pause_for_vsyscall() */ - -static inline void calculate_time_from_tsc(struct timespec *ts) -{ - long ver; - unsigned long current_tsc; - __time_t sec_delta; - long ns_delta; - - for (;;) { - while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) { - /* settimeofday() is in progress */ - cpu_pause_for_vsyscall(); - } - rmb(); - *ts = tod_data.origin; - rmb(); - if (ver == ihk_atomic64_read(&tod_data.version)) { - break; - } - - /* settimeofday() has intervened */ - cpu_pause_for_vsyscall(); - } - - current_tsc = rdtsc(); - sec_delta = current_tsc / tod_data.clocks_per_sec; - ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec) - / tod_data.clocks_per_sec; - /* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */ - - ts->tv_sec += sec_delta; - ts->tv_nsec += ns_delta; - if (ts->tv_nsec >= NS_PER_SEC) { - ts->tv_nsec -= NS_PER_SEC; - ++ts->tv_sec; - } - - return; -} /* calculate_time_from_tsc() */ - int vsyscall_gettimeofday(struct timeval *tv, void *tz) { int error; diff --git a/arch/x86_64/tools/mcreboot-smp-x86.sh.in b/arch/x86_64/tools/mcreboot-smp-x86.sh.in index 318c9999..f84ce034 100644 --- a/arch/x86_64/tools/mcreboot-smp-x86.sh.in +++ b/arch/x86_64/tools/mcreboot-smp-x86.sh.in @@ -45,11 +45,12 @@ fi turbo="" ihk_irq="" +safe_kernel_map="" umask_old=`umask` idle_halt="" allow_oversubscribe="" -while getopts :tk:c:m:o:f:r:q:i:d:e:hO OPT +while getopts stk:c:m:o:f:r:q:i:d:e:hO OPT do case ${OPT} in f) facility=${OPTARG} @@ -62,6 +63,8 @@ do ;; m) mem=${OPTARG} ;; + s) safe_kernel_map="safe_kernel_map" + ;; r) ikc_map=${OPTARG} ;; q) ihk_irq=${OPTARG} @@ -78,8 +81,8 @@ do ;; O) allow_oversubscribe="allow_oversubscribe" ;; - *) echo "invalid option -${OPT}" >&2 - exit 1 + \?) exit 1 + ;; esac done @@ -232,7 +235,7 @@ if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then enable_mcoverlay="yes" fi else - if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then + if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 862 ]; then enable_mcoverlay="yes" fi if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then @@ -446,7 +449,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then fi # Set kernel arguments -if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then +if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $safe_kernel_map $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then echo "error: setting kernel arguments" >&2 error_exit "os_created" fi diff --git a/config.h.in b/config.h.in index dd84e07f..0ff560da 100644 --- a/config.h.in +++ b/config.h.in @@ -54,48 +54,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H -/* Define to address of kernel symbol __vvar_page, or 0 if exported */ -#undef MCCTRL_KSYM___vvar_page - -/* Define to address of kernel symbol hpet_address, or 0 if exported */ -#undef MCCTRL_KSYM_hpet_address - -/* Define to address of kernel symbol hv_clock, or 0 if exported */ -#undef MCCTRL_KSYM_hv_clock - -/* Define to address of kernel symbol sys_mount, or 0 if exported */ -#undef MCCTRL_KSYM_sys_mount - -/* Define to address of kernel symbol sys_readlink, or 0 if exported */ -#undef MCCTRL_KSYM_sys_readlink - -/* Define to address of kernel symbol sys_umount, or 0 if exported */ -#undef MCCTRL_KSYM_sys_umount - -/* Define to address of kernel symbol sys_unshare, or 0 if exported */ -#undef MCCTRL_KSYM_sys_unshare - -/* Define to address of kernel symbol vdso_end, or 0 if exported */ -#undef MCCTRL_KSYM_vdso_end - -/* Define to address of kernel symbol vdso_image_64, or 0 if exported */ -#undef MCCTRL_KSYM_vdso_image_64 - -/* Define to address of kernel symbol vdso_pages, or 0 if exported */ -#undef MCCTRL_KSYM_vdso_pages - -/* Define to address of kernel symbol vdso_spec, or 0 if exported */ -#undef MCCTRL_KSYM_vdso_spec - -/* Define to address of kernel symbol vdso_start, or 0 if exported */ -#undef MCCTRL_KSYM_vdso_start - -/* Define to address of kernel symbol walk_page_range, or 0 if exported */ -#undef MCCTRL_KSYM_walk_page_range - -/* Define to address of kernel symbol zap_page_range, or 0 if exported */ -#undef MCCTRL_KSYM_zap_page_range - /* McKernel specific headers */ #undef MCKERNEL_INCDIR @@ -128,3 +86,6 @@ /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS + +/* whether or not syscall_intercept library is linked */ +#undef WITH_SYSCALL_INTERCEPT diff --git a/configure b/configure index 58e1baec..0ad270e6 100755 --- a/configure +++ b/configure @@ -628,9 +628,12 @@ IHK_RELEASE_DATE DCFA_VERSION MCKERNEL_VERSION IHK_VERSION +WITH_SYSCALL_INTERCEPT ENABLE_QLMPI ENABLE_RUSAGE ENABLE_MCOVERLAYFS +LDFLAGS_SYSCALL_INTERCEPT +CPPFLAGS_SYSCALL_INTERCEPT MANDIR KERNDIR KMODDIR @@ -702,6 +705,9 @@ enable_option_checking with_mpi with_mpi_include with_mpi_lib +with_syscall_intercept +with_syscall_intercept_include +with_syscall_intercept_lib with_kernelsrc with_target with_system_map @@ -1346,6 +1352,15 @@ Optional Packages: --with-mpi-include=PATH specify path where mpi include directory can be found --with-mpi-lib=PATH specify path where mpi lib directory can be found + --with-syscall_intercept=PATH + specify path where syscall_intercept include + directory and lib directory can be found + --with-syscall_intercept-include=PATH + specify path where syscall_intercept include + directory can be found + --with-syscall_intercept-lib=PATH + specify path where syscall_intercept lib directory + can be found --with-kernelsrc=path Path to 'kernel src', default is /lib/modules/uname_r/build --with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86} @@ -2082,6 +2097,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu + + IHK_VERSION=1.5.1 MCKERNEL_VERSION=1.5.1 DCFA_VERSION=DCFA_VERSION_m4 @@ -3513,6 +3530,195 @@ fi + +# Check whether --with-syscall_intercept was given. +if test "${with_syscall_intercept+set}" = set; then : + withval=$with_syscall_intercept; case "$withval" in #( + yes|no|'') : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept=PATH expects a valid PATH" >&5 +$as_echo "$as_me: WARNING: --without-syscall_intercept=PATH expects a valid PATH" >&2;} + with_syscall_intercept="" ;; #( + *) : + ;; +esac +else + with_syscall_intercept= +fi + + +# Check whether --with-syscall_intercept-include was given. +if test "${with_syscall_intercept_include+set}" = set; then : + withval=$with_syscall_intercept_include; case "$withval" in #( + yes|no|'') : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept-include=PATH expects a valid PATH" >&5 +$as_echo "$as_me: WARNING: --without-syscall_intercept-include=PATH expects a valid PATH" >&2;} + with_syscall_intercept_include="" ;; #( + *) : + ;; +esac +fi + + +# Check whether --with-syscall_intercept-lib was given. +if test "${with_syscall_intercept_lib+set}" = set; then : + withval=$with_syscall_intercept_lib; case "$withval" in #( + yes|no|'') : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-syscall_intercept-lib=PATH expects a valid PATH" >&5 +$as_echo "$as_me: WARNING: --without-syscall_intercept-lib=PATH expects a valid PATH" >&2;} + with_syscall_intercept_lib="" ;; #( + *) : + ;; +esac +fi + + + # The args have been sanitized into empty/non-empty values above. + # Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options + # taking priority + + if test -n "${with_syscall_intercept_include}"; then : + + + if echo "$CPPFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-I${with_syscall_intercept_include}\>" >/dev/null 2>&1; then : + echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') contains '-I${with_syscall_intercept_include}', not appending" >&5 +else + echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') does not contain '-I${with_syscall_intercept_include}', appending" >&5 + CPPFLAGS_SYSCALL_INTERCEPT="$CPPFLAGS_SYSCALL_INTERCEPT -I${with_syscall_intercept_include}" + +fi + +else + if test -n "${with_syscall_intercept}"; then : + + + if echo "$CPPFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-I${with_syscall_intercept}/include\>" >/dev/null 2>&1; then : + echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') contains '-I${with_syscall_intercept}/include', not appending" >&5 +else + echo "CPPFLAGS_SYSCALL_INTERCEPT(='$CPPFLAGS_SYSCALL_INTERCEPT') does not contain '-I${with_syscall_intercept}/include', appending" >&5 + CPPFLAGS_SYSCALL_INTERCEPT="$CPPFLAGS_SYSCALL_INTERCEPT -I${with_syscall_intercept}/include" + +fi + +fi +fi + + if test -n "${with_syscall_intercept_lib}"; then : + + + if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}\>" >/dev/null 2>&1; then : + echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}', not appending" >&5 +else + echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}', appending" >&5 + LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept_lib} -Wl,-rpath,${with_syscall_intercept_lib}" + +fi + +else + if test -n "${with_syscall_intercept}"; then : + + + if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib\>" >/dev/null 2>&1; then : + echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib', not appending" >&5 +else + echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib', appending" >&5 + LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept}/lib -Wl,-rpath,${with_syscall_intercept}/lib" + +fi + + if test -d "${with_syscall_intercept}/lib64"; then : + + + if echo "$LDFLAGS_SYSCALL_INTERCEPT" | $FGREP -e "\<-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64\>" >/dev/null 2>&1; then : + echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') contains '-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64', not appending" >&5 +else + echo "LDFLAGS_SYSCALL_INTERCEPT(='$LDFLAGS_SYSCALL_INTERCEPT') does not contain '-L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64', appending" >&5 + LDFLAGS_SYSCALL_INTERCEPT="$LDFLAGS_SYSCALL_INTERCEPT -L${with_syscall_intercept}/lib64 -Wl,-rpath,${with_syscall_intercept}/lib64" + +fi + +fi + +fi + +fi + + if test -n "${with_syscall_intercept}" || test -n "${with_syscall_intercept_include}" || test -n "${with_syscall_intercept_lib}"; then : + WITH_SYSCALL_INTERCEPT=yes +else + WITH_SYSCALL_INTERCEPT=no +fi + + +if test "x$WITH_SYSCALL_INTERCEPT" == "xno" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for syscall_no_intercept in -lsyscall_intercept" >&5 +$as_echo_n "checking for syscall_no_intercept in -lsyscall_intercept... " >&6; } +if ${ac_cv_lib_syscall_intercept_syscall_no_intercept+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsyscall_intercept -lcapstone -ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char syscall_no_intercept (); +int +main () +{ +return syscall_no_intercept (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_syscall_intercept_syscall_no_intercept=yes +else + ac_cv_lib_syscall_intercept_syscall_no_intercept=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_syscall_intercept_syscall_no_intercept" >&5 +$as_echo "$ac_cv_lib_syscall_intercept_syscall_no_intercept" >&6; } +if test "x$ac_cv_lib_syscall_intercept_syscall_no_intercept" = xyes; then : + syscall_intercept_lib_found=yes +else + syscall_intercept_lib_found=no +fi + + if test "x$syscall_intercept_lib_found" != "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: libsyscall_intercept.so not found" >&5 +$as_echo "$as_me: libsyscall_intercept.so not found" >&6;} +fi + + ac_fn_c_check_header_mongrel "$LINENO" "libsyscall_intercept_hook_point.h" "ac_cv_header_libsyscall_intercept_hook_point_h" "$ac_includes_default" +if test "x$ac_cv_header_libsyscall_intercept_hook_point_h" = xyes; then : + syscall_intercept_header_found=yes +else + syscall_intercept_header_found=no +fi + + + if test "x$syscall_intercept_header_found" != "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: libsyscall_intercept_hook_point.h not found" >&5 +$as_echo "$as_me: libsyscall_intercept_hook_point.h not found" >&6;} +fi + + if test "x$syscall_intercept_lib_found" == "xyes" && test "x$syscall_intercept_header_found" == "xyes"; then : + WITH_SYSCALL_INTERCEPT=yes +else + WITH_SYSCALL_INTERCEPT=no +fi +fi + + + # Check whether --with-kernelsrc was given. if test "${with_kernelsrc+set}" = set; then : withval=$with_kernelsrc; WITH_KERNELSRC=$withval @@ -4396,399 +4602,6 @@ KDIR="$WITH_KERNELSRC" UNAME_R="$WITH_UNAME_R" TARGET="$WITH_TARGET" -MCCTRL_LINUX_SYMTAB="" -case "X$WITH_SYSTEM_MAP" in - Xyes | Xno | X) - MCCTRL_LINUX_SYMTAB="" - ;; - *) - MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP" - ;; -esac - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for System.map" >&5 -$as_echo_n "checking for System.map... " >&6; } -if test -r "$MCCTRL_LINUX_SYMTAB"; then - MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB" -elif test -r "/boot/System.map-`uname -r`"; then - MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`" -elif test -r "$KDIR/System.map"; then - MCCTRL_LINUX_SYMTAB="$KDIR/System.map" -fi - -if test "$MCCTRL_LINUX_SYMTAB" == ""; then - as_fn_error $? "could not find" "$LINENO" 5 -fi - -if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then - as_fn_error $? "could not read System.map file, no read permission?" "$LINENO" 5 -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MCCTRL_LINUX_SYMTAB" >&5 -$as_echo "$MCCTRL_LINUX_SYMTAB" >&6; } - -MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB" - -# MCCTRL_FIND_KSYM(SYMBOL) -# ------------------------------------------------------ -# Search System.map for address of the given symbol and -# do one of three things in config.h: -# If not found, leave MCCTRL_KSYM_foo undefined -# If found to be exported, "#define MCCTRL_KSYM_foo 0" -# If found not to be exported, "#define MCCTRL_KSYM_foo 0x" - - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_mount" >&5 -$as_echo_n "checking System.map for symbol sys_mount... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_mount\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_mount\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_sys_mount $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5 -$as_echo_n "checking System.map for symbol sys_umount... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_sys_umount $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5 -$as_echo_n "checking System.map for symbol sys_unshare... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_unshare\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_sys_unshare $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol zap_page_range" >&5 -$as_echo_n "checking System.map for symbol zap_page_range... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " zap_page_range\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_zap_page_range\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_zap_page_range $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_image_64" >&5 -$as_echo_n "checking System.map for symbol vdso_image_64... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_image_64\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_image_64\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_vdso_image_64 $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_start" >&5 -$as_echo_n "checking System.map for symbol vdso_start... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_start\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_start\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_vdso_start $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_end" >&5 -$as_echo_n "checking System.map for symbol vdso_end... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_end\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_end\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_vdso_end $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_pages" >&5 -$as_echo_n "checking System.map for symbol vdso_pages... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_pages\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_pages\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_vdso_pages $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol __vvar_page" >&5 -$as_echo_n "checking System.map for symbol __vvar_page... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __vvar_page\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab___vvar_page\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM___vvar_page $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hpet_address" >&5 -$as_echo_n "checking System.map for symbol hpet_address... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " hpet_address\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_hpet_address\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_hpet_address $mcctrl_addr -_ACEOF - - fi - -# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol. - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_spec" >&5 -$as_echo_n "checking System.map for symbol vdso_spec... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_spec\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_spec\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_vdso_spec $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hv_clock" >&5 -$as_echo_n "checking System.map for symbol hv_clock... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " hv_clock\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_hv_clock\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_hv_clock $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_readlink" >&5 -$as_echo_n "checking System.map for symbol sys_readlink... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_readlink\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_readlink\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_sys_readlink $mcctrl_addr -_ACEOF - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol walk_page_range" >&5 -$as_echo_n "checking System.map for symbol walk_page_range... " >&6; } - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " walk_page_range\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 -$as_echo "not found" >&6; } - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_walk_page_range\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 -$as_echo "$mcctrl_result" >&6; } - -cat >>confdefs.h <<_ACEOF -#define MCCTRL_KSYM_walk_page_range $mcctrl_addr -_ACEOF - - fi - - case $ENABLE_MEMDUMP in yes|no|auto) ;; @@ -4986,6 +4799,17 @@ else $as_echo "$as_me: perf is disabled" >&6;} fi +if test "x$WITH_SYSCALL_INTERCEPT" = "xyes" ; then + +$as_echo "#define WITH_SYSCALL_INTERCEPT 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: syscall_intercept library is linked" >&5 +$as_echo "$as_me: syscall_intercept library is linked" >&6;} +else + { $as_echo "$as_me:${as_lineno-$LINENO}: syscall_intercept library isn't linked" >&5 +$as_echo "$as_me: syscall_intercept library isn't linked" >&6;} +fi + if test "x$MCKERNEL_INCDIR" != "x" ; then cat >>confdefs.h <<_ACEOF @@ -5052,6 +4876,9 @@ fi + + + @@ -5060,9 +4887,14 @@ ac_config_headers="$ac_config_headers config.h" # POSTK_DEBUG_ARCH_DEP_37 # AC_CONFIG_FILES arch dependfiles separate -ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/Makefile" +ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in tools/mcstat/Makefile" +if test -e "${ABS_SRCDIR}/test"; then +ac_config_files="$ac_config_files mck_test_config.sample:test/mck_test_config.sample.in" + +fi + if test "$TARGET" = "smp-x86"; then ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch" @@ -5797,7 +5629,9 @@ do "arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;; "arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;; "arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;; + "tools/mcstat/mcstat.1") CONFIG_FILES="$CONFIG_FILES tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in" ;; "tools/mcstat/Makefile") CONFIG_FILES="$CONFIG_FILES tools/mcstat/Makefile" ;; + "mck_test_config.sample") CONFIG_FILES="$CONFIG_FILES mck_test_config.sample:test/mck_test_config.sample.in" ;; "arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;; "kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;; "arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;; diff --git a/configure.ac b/configure.ac index da806bbd..732d2d40 100644 --- a/configure.ac +++ b/configure.ac @@ -77,6 +77,58 @@ AC_DEFUN([PAC_SET_HEADER_LIB_PATH],[ ]) ]) +AC_DEFUN([PAC_SET_HEADER_LIB_PATH_SYSCALL_INTERCEPT],[ + AC_ARG_WITH([$1], + [AC_HELP_STRING([--with-$1=PATH], + [specify path where $1 include directory and lib directory can be found])], + + [AS_CASE(["$withval"], + [yes|no|''], + [AC_MSG_WARN([--with[out]-$1=PATH expects a valid PATH]) + with_$1=""])], + [with_$1=$2]) + AC_ARG_WITH([$1-include], + [AC_HELP_STRING([--with-$1-include=PATH], + [specify path where $1 include directory can be found])], + [AS_CASE(["$withval"], + [yes|no|''], + [AC_MSG_WARN([--with[out]-$1-include=PATH expects a valid PATH]) + with_$1_include=""])], + []) + AC_ARG_WITH([$1-lib], + [AC_HELP_STRING([--with-$1-lib=PATH], + [specify path where $1 lib directory can be found])], + [AS_CASE(["$withval"], + [yes|no|''], + [AC_MSG_WARN([--with[out]-$1-lib=PATH expects a valid PATH]) + with_$1_lib=""])], + []) + + # The args have been sanitized into empty/non-empty values above. + # Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options + # taking priority + + AS_IF([test -n "${with_$1_include}"], + [PAC_APPEND_FLAG([-I${with_$1_include}],[CPPFLAGS_SYSCALL_INTERCEPT])], + [AS_IF([test -n "${with_$1}"], + [PAC_APPEND_FLAG([-I${with_$1}/include],[CPPFLAGS_SYSCALL_INTERCEPT])])]) + + AS_IF([test -n "${with_$1_lib}"], + [PAC_APPEND_FLAG([-L${with_$1_lib} -Wl,-rpath,${with_$1_lib}],[LDFLAGS_SYSCALL_INTERCEPT])], + [AS_IF([test -n "${with_$1}"], + dnl is adding lib64 by default really the right thing to do? What if + dnl we are on a 32-bit host that happens to have both lib dirs available? + [PAC_APPEND_FLAG([-L${with_$1}/lib -Wl,-rpath,${with_$1}/lib],[LDFLAGS_SYSCALL_INTERCEPT]) + AS_IF([test -d "${with_$1}/lib64"], + [PAC_APPEND_FLAG([-L${with_$1}/lib64 -Wl,-rpath,${with_$1}/lib64],[LDFLAGS_SYSCALL_INTERCEPT])]) + ]) + ]) + + AS_IF([test -n "${with_$1}" || test -n "${with_$1_include}" || test -n "${with_$1_lib}"], + [WITH_SYSCALL_INTERCEPT=yes], + [WITH_SYSCALL_INTERCEPT=no]) +]) + IHK_VERSION=IHK_VERSION_m4 MCKERNEL_VERSION=MCKERNEL_VERSION_m4 DCFA_VERSION=DCFA_VERSION_m4 @@ -95,6 +147,23 @@ AS_IF([test "x$numa_lib_found" != "xyes"], PAC_SET_HEADER_LIB_PATH([mpi]) +PAC_SET_HEADER_LIB_PATH_SYSCALL_INTERCEPT([syscall_intercept]) + +if test "x$WITH_SYSCALL_INTERCEPT" == "xno" ; then + AC_CHECK_LIB([syscall_intercept],[syscall_no_intercept],[syscall_intercept_lib_found=yes],[syscall_intercept_lib_found=no],[-lcapstone -ldl]) + AS_IF([test "x$syscall_intercept_lib_found" != "xyes"], + [AC_MSG_NOTICE([libsyscall_intercept.so not found])]) + + AC_CHECK_HEADER([libsyscall_intercept_hook_point.h],[syscall_intercept_header_found=yes],[syscall_intercept_header_found=no]) + AS_IF([test "x$syscall_intercept_header_found" != "xyes"], + [AC_MSG_NOTICE([libsyscall_intercept_hook_point.h not found])]) + + AS_IF([test "x$syscall_intercept_lib_found" == "xyes" && test "x$syscall_intercept_header_found" == "xyes"], + [WITH_SYSCALL_INTERCEPT=yes], + [WITH_SYSCALL_INTERCEPT=no]) +fi + + AC_ARG_WITH([kernelsrc], AC_HELP_STRING( [--with-kernelsrc=path],[Path to 'kernel src', default is /lib/modules/uname_r/build]), @@ -339,78 +408,6 @@ KDIR="$WITH_KERNELSRC" UNAME_R="$WITH_UNAME_R" TARGET="$WITH_TARGET" -MCCTRL_LINUX_SYMTAB="" -case "X$WITH_SYSTEM_MAP" in - Xyes | Xno | X) - MCCTRL_LINUX_SYMTAB="" - ;; - *) - MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP" - ;; -esac - -AC_MSG_CHECKING([[for System.map]]) -if test -r "$MCCTRL_LINUX_SYMTAB"; then - MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB" -elif test -r "/boot/System.map-`uname -r`"; then - MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`" -elif test -r "$KDIR/System.map"; then - MCCTRL_LINUX_SYMTAB="$KDIR/System.map" -fi - -if test "$MCCTRL_LINUX_SYMTAB" == ""; then - AC_MSG_ERROR([could not find]) -fi - -if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then - AC_MSG_ERROR([could not read System.map file, no read permission?]) -fi -AC_MSG_RESULT([$MCCTRL_LINUX_SYMTAB]) - -MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB" - -# MCCTRL_FIND_KSYM(SYMBOL) -# ------------------------------------------------------ -# Search System.map for address of the given symbol and -# do one of three things in config.h: -# If not found, leave MCCTRL_KSYM_foo undefined -# If found to be exported, "#define MCCTRL_KSYM_foo 0" -# If found not to be exported, "#define MCCTRL_KSYM_foo 0x" -AC_DEFUN([MCCTRL_FIND_KSYM],[ - AC_MSG_CHECKING([[System.map for symbol $1]]) - mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " $1\$" | cut -d\ -f1` - if test -z $mcctrl_addr; then - AC_MSG_RESULT([not found]) - else - mcctrl_result=$mcctrl_addr - mcctrl_addr="0x$mcctrl_addr" - m4_ifval([$2],[],[ - if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_$1\$" >/dev/null`; then - mcctrl_result="exported" - mcctrl_addr="0" - fi - ]) - AC_MSG_RESULT([$mcctrl_result]) - AC_DEFINE_UNQUOTED(MCCTRL_KSYM_[]$1,$mcctrl_addr,[Define to address of kernel symbol $1, or 0 if exported]) - fi -]) - -MCCTRL_FIND_KSYM([sys_mount]) -MCCTRL_FIND_KSYM([sys_umount]) -MCCTRL_FIND_KSYM([sys_unshare]) -MCCTRL_FIND_KSYM([zap_page_range]) -MCCTRL_FIND_KSYM([vdso_image_64]) -MCCTRL_FIND_KSYM([vdso_start]) -MCCTRL_FIND_KSYM([vdso_end]) -MCCTRL_FIND_KSYM([vdso_pages]) -MCCTRL_FIND_KSYM([__vvar_page]) -MCCTRL_FIND_KSYM([hpet_address]) -# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol. -MCCTRL_FIND_KSYM([vdso_spec]) -MCCTRL_FIND_KSYM([hv_clock]) -MCCTRL_FIND_KSYM([sys_readlink]) -MCCTRL_FIND_KSYM([walk_page_range]) - case $ENABLE_MEMDUMP in yes|no|auto) ;; @@ -489,6 +486,13 @@ else AC_MSG_NOTICE([perf is disabled]) fi +if test "x$WITH_SYSCALL_INTERCEPT" = "xyes" ; then + AC_DEFINE([WITH_SYSCALL_INTERCEPT],[1],[whether or not syscall_intercept library is linked]) + AC_MSG_NOTICE([syscall_intercept library is linked]) +else + AC_MSG_NOTICE([syscall_intercept library isn't linked]) +fi + if test "x$MCKERNEL_INCDIR" != "x" ; then AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers]) fi @@ -526,9 +530,12 @@ AC_SUBST(KMODDIR) AC_SUBST(KERNDIR) AC_SUBST(MANDIR) AC_SUBST(CFLAGS) +AC_SUBST(CPPFLAGS_SYSCALL_INTERCEPT) +AC_SUBST(LDFLAGS_SYSCALL_INTERCEPT) AC_SUBST(ENABLE_MCOVERLAYFS) AC_SUBST(ENABLE_RUSAGE) AC_SUBST(ENABLE_QLMPI) +AC_SUBST(WITH_SYSCALL_INTERCEPT) AC_SUBST(IHK_VERSION) AC_SUBST(MCKERNEL_VERSION) @@ -570,9 +577,16 @@ AC_CONFIG_FILES([ arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in + tools/mcstat/mcstat.1:tools/mcstat/mcstat.1in tools/mcstat/Makefile ]) +if test -e "${ABS_SRCDIR}/test"; then +AC_CONFIG_FILES([ +mck_test_config.sample:test/mck_test_config.sample.in +]) +fi + if test "$TARGET" = "smp-x86"; then AC_CONFIG_FILES([ arch/x86_64/kernel/Makefile.arch diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index e590fd22..d2b44310 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -55,13 +55,14 @@ #define MCEXEC_UP_SYS_UMOUNT 0x30a02915 #define MCEXEC_UP_SYS_UNSHARE 0x30a02916 -#define MCEXEC_UP_UTIL_THREAD1 0x30a02920 -#define MCEXEC_UP_UTIL_THREAD2 0x30a02921 +#define MCEXEC_UP_UTI_GET_CTX 0x30a02920 +#define MCEXEC_UP_UTI_SAVE_FS 0x30a02921 #define MCEXEC_UP_SIG_THREAD 0x30a02922 #define MCEXEC_UP_SYSCALL_THREAD 0x30a02924 #define MCEXEC_UP_TERMINATE_THREAD 0x30a02925 #define MCEXEC_UP_GET_NUM_POOL_THREADS 0x30a02926 #define MCEXEC_UP_UTI_ATTR 0x30a02927 +#define MCEXEC_UP_RELEASE_USER_SPACE 0x30a02928 #define MCEXEC_UP_DEBUG_LOG 0x40000000 @@ -91,6 +92,7 @@ struct program_image_section { struct get_cpu_set_arg { int nr_processes; + int *process_rank; void *cpu_set; size_t cpu_set_size; // Size in bytes int *target_core; @@ -140,8 +142,10 @@ struct program_load_desc { unsigned long heap_extension; long stack_premap; unsigned long mpol_bind_mask; + int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */ + int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int nr_processes; - char shell_path[SHELL_PATH_MAX_LEN]; + int process_rank; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; int profile; struct program_image_section sections[0]; @@ -242,6 +246,28 @@ struct sys_unshare_desc { unsigned long unshare_flags; }; +struct release_user_space_desc { + unsigned long user_start; + unsigned long user_end; +}; + +struct terminate_thread_desc { + int pid; + int tid; + + long code; + /* 32------32 31--16 15--------8 7----0 + exit_group exit-status signal */ + + unsigned long tsk; /* struct task_struct * */ +}; + +struct rpgtable_desc { + uintptr_t rpgtable; + uintptr_t start; + uintptr_t len; +}; + enum perf_ctrl_type { PERF_CTRL_SET, PERF_CTRL_GET, @@ -251,6 +277,7 @@ enum perf_ctrl_type { struct perf_ctrl_desc { enum perf_ctrl_type ctrl_type; + int err; union { /* for SET, GET */ struct { @@ -290,6 +317,10 @@ struct perf_ctrl_desc { #define UTI_FLAG_HIGH_PRIORITY (1ULL<<12) #define UTI_FLAG_NON_COOPERATIVE (1ULL<<13) +#define UTI_FLAG_PREFER_LWK (1ULL << 14) +#define UTI_FLAG_PREFER_FWK (1ULL << 15) +#define UTI_FLAG_FABRIC_INTR_AFFINITY (1ULL << 16) + /* Linux default value is used */ #define UTI_MAX_NUMA_DOMAINS (1024) @@ -308,6 +339,30 @@ struct kuti_attr { struct uti_attr_desc { unsigned long phys_attr; + char *uti_cpu_set_str; /* UTI_CPU_SET environmental variable */ + size_t uti_cpu_set_len; +}; + +struct uti_ctx { + union { + char ctx[4096]; /* TODO: Get the size from config.h */ + struct { + int uti_refill_tid; + }; + }; +}; + +struct uti_get_ctx_desc { + unsigned long rp_rctx; /* Remote physical address of remote context */ + void *rctx; /* Remote context */ + void *lctx; /* Local context */ + int uti_refill_tid; + unsigned long key; /* OUT: struct task_struct* of mcexec thread, used to search struct host_thread */ +}; + +struct uti_save_fs_desc { + void *rctx; /* Remote context */ + void *lctx; /* Local context */ }; #endif diff --git a/executer/include/uti.h b/executer/include/uti.h new file mode 100644 index 00000000..05f7c7a1 --- /dev/null +++ b/executer/include/uti.h @@ -0,0 +1,31 @@ +#ifndef UTI_H_INCLUDED +#define UTI_H_INCLUDED + +struct syscall_struct { + int number; + unsigned long args[6]; + unsigned long ret; + unsigned long uti_clv; /* copy of a clv in McKernel */ +}; + +#define UTI_SZ_SYSCALL_STACK 16 + +/* Variables accessed by mcexec.c and syscall_intercept.c */ +struct uti_desc { + char lctx[4096]; /* TODO: Get the size from config.h */ + char rctx[4096]; /* TODO: Get the size from config.h */ + int mck_tid; /* TODO: Move this out for multiple migrated-to-Linux threads */ + unsigned long key; /* struct task_struct* of mcexec thread, used to search struct host_thread */ + int pid, tid; /* Used as the id of tracee when issuing MCEXEC_UP_TERMINATE_THREAD */ + unsigned long uti_clv; /* copy of McKernel clv */ + + int fd; /* /dev/mcosX */ + struct syscall_struct syscall_stack[UTI_SZ_SYSCALL_STACK]; /* stack of system call arguments and return values */ + int syscall_stack_top; /* stack-pointer of syscall arguments list */ + long syscalls[512], syscalls2[512]; /* Syscall profile counters */ + int start_syscall_intercept; /* Used to sync between mcexec.c and syscall_intercept.c */ +}; + + +#endif + diff --git a/executer/kernel/mcctrl/arch/arm64/archdeps.c b/executer/kernel/mcctrl/arch/arm64/archdeps.c index 6c297e84..af661ec3 100644 --- a/executer/kernel/mcctrl/arch/arm64/archdeps.c +++ b/executer/kernel/mcctrl/arch/arm64/archdeps.c @@ -1,6 +1,7 @@ /* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */ #include #include +#include #include #include "../../../config.h" #include "../../mcctrl.h" @@ -17,29 +18,31 @@ #define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__) -#ifdef MCCTRL_KSYM_vdso_start -# if MCCTRL_KSYM_vdso_start -void *vdso_start = (void *)MCCTRL_KSYM_vdso_start; -# endif -#else -# error missing address of vdso_start. +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) +void *vdso_start; +void *vdso_end; +static struct vm_special_mapping (*vdso_spec)[2]; #endif -#ifdef MCCTRL_KSYM_vdso_end -# if MCCTRL_KSYM_vdso_end -void *vdso_end = (void *)MCCTRL_KSYM_vdso_end; -# endif -#else -# error missing address of vdso_end. +int arch_symbols_init(void) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) + vdso_start = (void *) kallsyms_lookup_name("vdso_start"); + if (WARN_ON(!vdso_start)) + return -EFAULT; + + vdso_end = (void *) kallsyms_lookup_name("vdso_end"); + if (WARN_ON(!vdso_end)) + return -EFAULT; + + vdso_spec = (void *) kallsyms_lookup_name("vdso_spec"); + if (WARN_ON(!vdso_spec)) + return -EFAULT; #endif -#ifdef MCCTRL_KSYM_vdso_spec -# if MCCTRL_KSYM_vdso_spec -static struct vm_special_mapping (*vdso_spec)[2] = (void*)MCCTRL_KSYM_vdso_spec; -# endif -#else -# error missing address of vdso_spec. -#endif + return 0; +} + #ifdef POSTK_DEBUG_ARCH_DEP_52 #define VDSO_MAXPAGES 1 diff --git a/executer/kernel/mcctrl/arch/x86_64/archdeps.c b/executer/kernel/mcctrl/arch/x86_64/archdeps.c index 554be39b..ff7a2b76 100644 --- a/executer/kernel/mcctrl/arch/x86_64/archdeps.c +++ b/executer/kernel/mcctrl/arch/x86_64/archdeps.c @@ -1,5 +1,6 @@ /* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */ #include +#include #include "../../../config.h" #include "../../mcctrl.h" @@ -13,57 +14,46 @@ #endif #endif /* POSTK_DEBUG_ARCH_DEP_83 */ -#ifdef MCCTRL_KSYM_vdso_image_64 -#if MCCTRL_KSYM_vdso_image_64 -struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) +static struct vdso_image *vdso_image_64; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) +static void *vdso_start; +static void *vdso_end; +static struct page **vdso_pages; #endif +static void *__vvar_page; +static long *hpet_address; +static void **hv_clock; + +int arch_symbols_init(void) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) + vdso_image_64 = (void *) kallsyms_lookup_name("vdso_image_64"); + if (WARN_ON(!vdso_image_64)) + return -EFAULT; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) + vdso_start = (void *) kallsyms_lookup_name("vdso_start"); + if (WARN_ON(!vdso_start)) + return -EFAULT; + + vdso_end = (void *) kallsyms_lookup_name("vdso_end"); + if (WARN_ON(!vdso_end)) + return -EFAULT; + + vdso_pages = (void *) kallsyms_lookup_name("vdso_pages"); + if (WARN_ON(!vdso_pages)) + return -EFAULT; #endif -#ifdef MCCTRL_KSYM_vdso_start -#if MCCTRL_KSYM_vdso_start -void *vdso_start = (void *)MCCTRL_KSYM_vdso_start; -#endif -#endif + __vvar_page = (void *) kallsyms_lookup_name("__vvar_page"); + if (WARN_ON(!__vvar_page)) + return -EFAULT; -#ifdef MCCTRL_KSYM_vdso_end -#if MCCTRL_KSYM_vdso_end -void *vdso_end = (void *)MCCTRL_KSYM_vdso_end; -#endif -#endif + hpet_address = (void *) kallsyms_lookup_name("hpet_address"); + hv_clock = (void *) kallsyms_lookup_name("hv_clock"); + return 0; +} -#ifdef MCCTRL_KSYM_vdso_pages -#if MCCTRL_KSYM_vdso_pages -struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages; -#endif -#endif - -#ifdef MCCTRL_KSYM___vvar_page -#if MCCTRL_KSYM___vvar_page -void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page; -#endif -#endif - -long *hpet_addressp -#ifdef MCCTRL_KSYM_hpet_address -#if MCCTRL_KSYM_hpet_address - = (void *)MCCTRL_KSYM_hpet_address; -#else - = &hpet_address; -#endif -#else - = NULL; -#endif - -void **hv_clockp -#ifdef MCCTRL_KSYM_hv_clock -#if MCCTRL_KSYM_hv_clock - = (void *)MCCTRL_KSYM_hv_clock; -#else - = &hv_clock; -#endif -#else - = NULL; -#endif #ifdef POSTK_DEBUG_ARCH_DEP_52 #define VDSO_MAXPAGES 2 @@ -138,7 +128,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa) /* VDSO pages */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) - size = vdso_image->size; + size = vdso_image_64->size; vdso->vdso_npages = size >> PAGE_SHIFT; if (vdso->vdso_npages > VDSO_MAXPAGES) { @@ -148,7 +138,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa) for (i = 0; i < vdso->vdso_npages; ++i) { vdso->vdso_physlist[i] = virt_to_phys( - vdso_image->data + (i * PAGE_SIZE)); + vdso_image_64->data + (i * PAGE_SIZE)); } #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) size = vdso_end - vdso_start; @@ -185,36 +175,36 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa) #endif /* HPET page */ - if (hpet_addressp && *hpet_addressp) { + if (hpet_address && *hpet_address) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) vdso->hpet_is_global = 0; vdso->hpet_virt = (void *)(-2 * PAGE_SIZE); - vdso->hpet_phys = *hpet_addressp; + vdso->hpet_phys = *hpet_address; #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) vdso->hpet_is_global = 0; vdso->hpet_virt = (void *)(-1 * PAGE_SIZE); - vdso->hpet_phys = *hpet_addressp; + vdso->hpet_phys = *hpet_address; #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) vdso->hpet_is_global = 0; vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE); - vdso->hpet_phys = *hpet_addressp; + vdso->hpet_phys = *hpet_address; #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) vdso->hpet_is_global = 1; vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET); - vdso->hpet_phys = *hpet_addressp; + vdso->hpet_phys = *hpet_address; #endif } /* struct pvlock_vcpu_time_info table */ - if (hv_clockp && *hv_clockp) { + if (hv_clock && *hv_clock) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) vdso->pvti_is_global = 0; vdso->pvti_virt = (void *)(-1 * PAGE_SIZE); - vdso->pvti_phys = virt_to_phys(*hv_clockp); + vdso->pvti_phys = virt_to_phys(*hv_clock); #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0) vdso->pvti_is_global = 1; vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN); - vdso->pvti_phys = virt_to_phys(*hv_clockp); + vdso->pvti_phys = virt_to_phys(*hv_clock); #endif } @@ -289,6 +279,14 @@ get_fs_ctx(void *ctx) return tctx->fs; } +unsigned long +get_rsp_ctx(void *ctx) +{ + struct trans_uctx *tctx = ctx; + + return tctx->rsp; +} + #ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, unsigned long *rpap, unsigned long *pgsizep) diff --git a/executer/kernel/mcctrl/binfmt_mcexec.c b/executer/kernel/mcctrl/binfmt_mcexec.c index 2c48a5fc..ecb1725b 100644 --- a/executer/kernel/mcctrl/binfmt_mcexec.c +++ b/executer/kernel/mcctrl/binfmt_mcexec.c @@ -125,7 +125,6 @@ static int load_elf(struct linux_binprm *bprm for(i = 0, st = 0; mode != 2;){ if(st == 0){ off = p & ~PAGE_MASK; -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) rc = get_user_pages_remote(current, bprm->mm, bprm->p, 1, FOLL_FORCE, &page, NULL, NULL); @@ -141,17 +140,6 @@ static int load_elf(struct linux_binprm *bprm bprm->p, 1, 0, 1, &page, NULL); #endif -#else /* POSTK_DEBUG_ARCH_DEP_41 */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0) - rc = get_user_pages_remote(current, bprm->mm, - bprm->p, 1, 0, 1, - &page, NULL); -#else - rc = get_user_pages(current, bprm->mm, - bprm->p, 1, 0, 1, - &page, NULL); -#endif -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ if(rc <= 0) { kfree(pbuf); return -EFAULT; diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index f3022733..07a53035 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -47,6 +46,9 @@ #include #include #include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#endif //#define DEBUG @@ -56,37 +58,26 @@ #define dprintk(...) #endif -#ifdef MCCTRL_KSYM_sys_unshare -#if MCCTRL_KSYM_sys_unshare -typedef int (*int_star_fn_ulong_t)(unsigned long); -int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = - (int_star_fn_ulong_t) - MCCTRL_KSYM_sys_unshare; -#else // exported -int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = NULL; -#endif +//#define DEBUG_PTD +#ifdef DEBUG_PTD +#define pr_ptd(msg, tid, ptd) do { printk("%s: " msg ",tid=%d,refc=%d\n", __FUNCTION__, tid, atomic_read(&ptd->refcount)); } while(0) +#else +#define pr_ptd(msg, tid, ptd) do { } while(0) #endif -#ifdef MCCTRL_KSYM_sys_mount -#if MCCTRL_KSYM_sys_mount -typedef int (*int_star_fn_char_char_char_ulong_void_t)(char *, char *, char *, unsigned long, void *); -int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = - (int_star_fn_char_char_char_ulong_void_t) - MCCTRL_KSYM_sys_mount; -#else // exported -int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount; -#endif +//#define DEBUG_PPD +#ifdef DEBUG_PPD +#define pr_ppd(msg, tid, ppd) do { printk("%s: " msg ",tid=%d,refc=%d\n", __FUNCTION__, tid, atomic_read(&ppd->refcount)); } while(0) +#else +#define pr_ppd(msg, tid, ppd) do { } while(0) #endif -#ifdef MCCTRL_KSYM_sys_umount -#if MCCTRL_KSYM_sys_umount -typedef int (*int_fn_char_star_int_t)(char *, int); -int (*mcctrl_sys_umount)(char *dir_name, int flags) = - (int_fn_char_star_int_t) - MCCTRL_KSYM_sys_umount; -#else // exported -int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount; -#endif +#if defined(RHEL_RELEASE_CODE) || (LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)) +#define BITMAP_SCNLISTPRINTF(buf, buflen, maskp, nmaskbits) \ + bitmap_scnlistprintf(buf, buflen, maskp, nmaskbits) +#else +#define BITMAP_SCNLISTPRINTF(buf, buflen, maskp, nmaskbits) \ + scnprintf(buf, buflen, "%*pbl", nmaskbits, maskp) #endif //extern struct mcctrl_channel *channels; @@ -158,8 +149,8 @@ static long mcexec_prepare_image(ihk_os_t os, pdesc->pid = task_tgid_vnr(current); - if (reserve_user_space(usrdata, &pdesc->user_start, &pdesc->user_end)) { - ret = -ENOMEM; + if ((ret = reserve_user_space(usrdata, &pdesc->user_start, + &pdesc->user_end))) { goto put_and_free_out; } @@ -314,14 +305,17 @@ struct mcos_handler_info { int cpu; struct mcctrl_usrdata *ud; struct file *file; + unsigned long user_start; + unsigned long user_end; }; struct mcos_handler_info; -static struct host_thread *host_threads; +static LIST_HEAD(host_threads); /* Used for FS switch */ DEFINE_RWLOCK(host_thread_lock); +/* Info of Linux counterpart of migrated-to-Linux thread */ struct host_thread { - struct host_thread *next; + struct list_head list; struct mcos_handler_info *handler; int pid; int tid; @@ -357,7 +351,7 @@ static long mcexec_debug_log(ihk_os_t os, unsigned long arg) return 0; } -int mcexec_close_exec(ihk_os_t os); +int mcexec_close_exec(ihk_os_t os, int pid); int mcexec_destroy_per_process_data(ihk_os_t os, int pid); static void release_handler(ihk_os_t os, void *param) @@ -368,15 +362,16 @@ static void release_handler(ihk_os_t os, void *param) unsigned long flags; struct host_thread *thread; + /* Finalize FS switch for uti threads */ write_lock_irqsave(&host_thread_lock, flags); - for (thread = host_threads; thread; thread = thread->next) { + list_for_each_entry(thread, &host_threads, list) { if (thread->handler == info) { thread->handler = NULL; } } write_unlock_irqrestore(&host_thread_lock, flags); - mcexec_close_exec(os); + mcexec_close_exec(os, info->pid); mcexec_destroy_per_process_data(os, info->pid); @@ -451,6 +446,8 @@ static long mcexec_start_image(ihk_os_t os, info->pid = desc->pid; info->cpu = desc->cpu; + info->user_start = desc->user_start; + info->user_end = desc->user_end; ihk_os_register_release_handler(file, release_handler, info); ihk_os_set_mcos_private_data(file, info); @@ -685,6 +682,7 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg) wake_up_interruptible(&pli_next->pli_wq); /* Reset process counter */ pe->nr_processes_left = pe->nr_processes; + pe->process_rank = 0; } /* Wait for the rest if not the last or if the last but @@ -916,6 +914,15 @@ next_cpu: goto put_and_unlock_out; } + /* Copy rank */ + if (copy_to_user(req.process_rank, &pe->process_rank, + sizeof(int))) { + printk("%s: error copying process rank to user\n", + __FUNCTION__); + ret = -EINVAL; + goto put_and_unlock_out; + } + /* mcexec NUMA to bind to */ mcexec_linux_numa = cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu)); if (copy_to_user(req.mcexec_linux_numa, &mcexec_linux_numa, @@ -963,6 +970,7 @@ next_cpu: } /* Otherwise wake up next process in list */ else { + ++pe->process_rank; pli_next = list_first_entry(&pe->pli_list, struct process_list_item, list); list_del(&pli_next->list); @@ -1092,6 +1100,8 @@ void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd) struct wait_queue_head_list_node *wqhln; struct wait_queue_head_list_node *wqhln_next; struct ikc_scd_packet *packet; + struct mcctrl_per_thread_data *ptd; + struct mcctrl_per_thread_data *next; if (!ppd) return; @@ -1110,25 +1120,34 @@ void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd) write_unlock_irqrestore(&ppd->ud->per_proc_data_hash_lock[hash], flags); dprintk("%s: deallocating PPD for pid %d\n", __FUNCTION__, ppd->pid); - for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; i++) { - struct mcctrl_per_thread_data *ptd; - struct mcctrl_per_thread_data *next; + for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; i++) { + write_lock_irqsave(&ppd->per_thread_data_hash_lock[i], flags); list_for_each_entry_safe(ptd, next, ppd->per_thread_data_hash + i, hash) { - packet = ptd->data; - list_del(&ptd->hash); - kfree(ptd); + /* We use ERESTARTSYS to tell the LWK that the proxy - * process is gone and the application should be terminated */ + process is gone and the application should be terminated. */ + packet = (struct ikc_scd_packet *)ptd->data; + dprintk("%s: calling __return_syscall (hash),target pid=%d,tid=%d\n", __FUNCTION__, ppd->pid, packet->req.rtid); __return_syscall(ppd->ud->os, packet, -ERESTARTSYS, - packet->req.rtid); + packet->req.rtid); ihk_ikc_release_packet( (struct ihk_ikc_free_packet *)packet, (ppd->ud->ikc2linux[smp_processor_id()] ? ppd->ud->ikc2linux[smp_processor_id()] : ppd->ud->ikc2linux[0])); + + /* Note that uti ptd needs another put by mcexec_terminate_thread() + (see mcexec_syscall_wait()). + TODO: Detect tracer has died before calling mcexec_terminate_thread() and put uti ptd */ + if (atomic_read(&ptd->refcount) != 1) { + printk("%s: WARNING: ptd->refcount != 1 but %d\n", __FUNCTION__, atomic_read(&ptd->refcount)); + } + mcctrl_put_per_thread_data_unsafe(ptd); + pr_ptd("put", ptd->tid, ptd); } + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[i], flags); } flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); @@ -1164,6 +1183,17 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) int pid = packet->pid; unsigned long flags; struct mcctrl_per_proc_data *ppd; + int ret; + + /* Handle requests that do not need the proxy process right now */ + ret = __do_in_kernel_irq_syscall(ud->os, packet); + if (ret != -ENOSYS) { + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, + (ud->ikc2linux[smp_processor_id()] ? + ud->ikc2linux[smp_processor_id()] : + ud->ikc2linux[0])); + return ret; + } /* Get a reference to per-process structure */ ppd = mcctrl_get_per_proc_data(ud, pid); @@ -1207,9 +1237,31 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet) wqhln = wqhln_iter; break; } - if (!wqhln) { - printk("%s: WARNING: no target thread found for exact request??\n", - __FUNCTION__); + /* Find the mcexec thread with the same tid as the requesting McKernel thread + and let it handle the migrate-to-Linux request */ + if (packet->req.number == __NR_sched_setaffinity && packet->req.args[0] == 0) { + list_for_each_entry(wqhln_iter, &ppd->wq_list, list) { + if (packet->req.ttid == wqhln_iter->rtid) { + if (!wqhln_iter->task) { + printk("%s: ERROR: wqhln_iter->task=%p,rtid=%d,&ppd->wq_list_lock=%p\n", __FUNCTION__, wqhln_iter->task, wqhln_iter->rtid, &ppd->wq_list_lock); + } else if(wqhln_iter->req) { + /* list_del() is called after woken-up */ + dprintk("%s: INFO: target thread is busy, wqhln_iter->req=%d,rtid=%d,&ppd->wq_list_lock=%p\n", __FUNCTION__, wqhln_iter->req, wqhln_iter->rtid, &ppd->wq_list_lock); + } else { + wqhln = wqhln_iter; + dprintk("%s: uti, worker with tid of %d found in wq_list\n", __FUNCTION__, packet->req.ttid); + } + break; + } + } + if (!wqhln) { + dprintk("%s: uti: INFO: target worker (tid=%d) not found in wq_list\n", __FUNCTION__, packet->req.ttid); + } + } else { + if (!wqhln) { + printk("%s: WARNING: no target thread (tid=%d) found for exact request??\n", + __FUNCTION__, packet->req.ttid); + } } } /* Is there any thread available? */ @@ -1234,6 +1286,12 @@ retry_alloc: wqhln = wqhln_alloc; wqhln->req = 0; wqhln->task = NULL; + /* Let the mcexec thread to handle migrate-to-Linux request in mcexec_wait_syscall() after finishing the current task */ + if (packet->req.number == __NR_sched_setaffinity && packet->req.args[0] == 0) { + wqhln->rtid = packet->req.ttid; + } else { + wqhln->rtid = 0; + } init_waitqueue_head(&wqhln->wq_syscall); list_add_tail(&wqhln->list, &ppd->wq_req_list); } @@ -1260,6 +1318,7 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) int ret = 0; unsigned long irqflags; struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd = NULL; /* Get a reference to per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); @@ -1270,27 +1329,39 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) return -EINVAL; } - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); - if (packet) { + ptd = mcctrl_get_per_thread_data(ppd, current); + if (ptd) { printk("%s: ERROR: packet %p is already registered for thread %d\n", - __FUNCTION__, packet, task_pid_vnr(current)); + __FUNCTION__, ptd->data, task_pid_vnr(current)); + mcctrl_put_per_thread_data(ptd); ret = -EBUSY; - goto put_ppd_out; + goto no_ptd; } retry: /* Prepare per-thread wait queue head or find a valid request */ irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + + /* Handle migrate-to-Linux request if any */ + list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) { + if (wqhln_iter->rtid == task_pid_vnr(current)) { + wqhln = wqhln_iter; + wqhln->task = current; + list_del(&wqhln->list); + goto found; + } + } + /* First see if there is a valid request already that is not yet taken */ list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) { - if (wqhln_iter->task == NULL && wqhln_iter->req) { + if (!wqhln_iter->rtid && wqhln_iter->task == NULL && wqhln_iter->req) { wqhln = wqhln_iter; wqhln->task = current; list_del(&wqhln->list); break; } } - + found: if (!wqhln) { retry_alloc: wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC); @@ -1302,6 +1373,8 @@ retry_alloc: wqhln->task = current; wqhln->req = 0; wqhln->packet = NULL; + /* Let mcexec_syscall() find the mcexec thread to handle migrate-to-Linux request */ + wqhln->rtid = task_pid_vnr(current); init_waitqueue_head(&wqhln->wq_syscall); list_add(&wqhln->list, &ppd->wq_list); @@ -1317,20 +1390,18 @@ retry_alloc: ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); if (ret == -ERESTARTSYS) { - /* Is the request valid? */ + /* Requeue valid requests */ if (wqhln->req) { - packet = wqhln->packet; - kfree(wqhln); - wqhln = NULL; - ret = -EINTR; - goto put_ppd_out; + irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); + list_add_tail(&wqhln->list, &ppd->wq_req_list); + ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); } else { kfree(wqhln); - wqhln = NULL; - ret = -EINTR; - goto put_ppd_out; } + wqhln = NULL; + ret = -EINTR; + goto no_ptd; } packet = wqhln->packet; @@ -1366,27 +1437,43 @@ retry_alloc: packet->req.args[4], packet->req.args[5]); - if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) { - kprintf("%s: error adding per-thread data\n", __FUNCTION__); - ret = -EINVAL;; - goto put_ppd_out; + /* Create ptd */ + if ((ret = mcctrl_add_per_thread_data(ppd, packet))) { + kprintf("%s: error adding per-thread data (%d)\n", __FUNCTION__, ret); + ret = -EINVAL; + goto no_ptd; + } + + /* Get a reference valid until offload is done */ + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + kprintf("%s: ERROR: ptd not found\n", __FUNCTION__); + ret = -EINVAL; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); + + if (packet->req.number == __NR_sched_setaffinity && packet->req.args[0] == 0) { + dprintk("%s: uti,packet=%p,tid=%d\n", __FUNCTION__, packet, task_pid_vnr(current)); + + /* Get a reference valid until thread-offload is done */ + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + kprintf("%s: ptd not found\n", __FUNCTION__); + ret = -EINVAL; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); } if (__do_in_kernel_syscall(os, packet)) { if (copy_to_user(&req->sr, &packet->req, sizeof(struct syscall_request))) { - - if (mcctrl_delete_per_thread_data(ppd, current) < 0) { - kprintf("%s: error deleting per-thread data\n", __FUNCTION__); - } - ret = -EINVAL;; + ret = -EINVAL; goto put_ppd_out; } if (copy_to_user(&req->cpu, &packet->ref, sizeof(req->cpu))) { - if (mcctrl_delete_per_thread_data(ppd, current) < 0) { - kprintf("%s: error deleting per-thread data\n", __FUNCTION__); - } ret = -EINVAL; goto put_ppd_out; } @@ -1395,20 +1482,18 @@ retry_alloc: goto put_ppd_out; } - ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, - (usrdata->ikc2linux[smp_processor_id()] ? - usrdata->ikc2linux[smp_processor_id()] : - usrdata->ikc2linux[0])); - - if (mcctrl_delete_per_thread_data(ppd, current) < 0) { - kprintf("%s: error deleting per-thread data\n", __FUNCTION__); - ret = -EINVAL;; - goto put_ppd_out; - } + /* Drop reference to zero and restart from add */ + mcctrl_put_per_thread_data(ptd); + pr_ptd("put,in_kernel", task_pid_vnr(current), ptd); + mcctrl_put_per_thread_data(ptd); + pr_ptd("put,in_kernel", task_pid_vnr(current), ptd); goto retry; put_ppd_out: + mcctrl_put_per_thread_data(ptd); + pr_ptd("put,in_mcexec", task_pid_vnr(current), ptd); + no_ptd: mcctrl_put_per_proc_data(ppd); return ret; } @@ -1503,6 +1588,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd; int error = 0; if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { @@ -1517,16 +1603,22 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) return -EINVAL; } - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + /* Get a reference for this function */ + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + printk("%s: ERROR: mcctrl_get_per_thread_data failed\n", __FUNCTION__); + error = -EINVAL; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); + packet = (struct ikc_scd_packet *)ptd->data; if (!packet) { kprintf("%s: ERROR: no packet registered for TID %d\n", __FUNCTION__, task_pid_vnr(current)); error = -EINVAL; - goto out; + goto put_ppd_out; } - mcctrl_delete_per_thread_data(ppd, current); - if (ret.size > 0) { /* Host => Accel. Write is fast. */ unsigned long phys; @@ -1561,7 +1653,15 @@ out: (usrdata->ikc2linux[smp_processor_id()] ? usrdata->ikc2linux[smp_processor_id()] : usrdata->ikc2linux[0])); - + put_ppd_out: + /* Drop a reference for this function */ + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", task_pid_vnr(current), ptd); + + /* Final drop of the reference for non-uti syscall offloading */ + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", task_pid_vnr(current), ptd); + no_ptd: mcctrl_put_per_proc_data(ppd); return error; } @@ -1655,11 +1755,18 @@ mcexec_getcredv(int __user *virt) return 0; } -int mcexec_create_per_process_data(ihk_os_t os) +int mcexec_create_per_process_data(ihk_os_t os, + struct rpgtable_desc * __user rpt) { struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd = NULL; int i; + struct rpgtable_desc krpt; + + if (rpt && + copy_from_user(&krpt, rpt, sizeof(krpt))) { + return -EFAULT; + } ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (ppd) { @@ -1674,6 +1781,7 @@ int mcexec_create_per_process_data(ihk_os_t os) printk("%s: ERROR: allocating per-process data\n", __FUNCTION__); return -ENOMEM; } + memset(ppd, 0, sizeof(struct mcctrl_per_proc_data)); /* debug */ ppd->ud = usrdata; ppd->pid = task_tgid_vnr(current); @@ -1711,6 +1819,11 @@ int mcexec_create_per_process_data(ihk_os_t os) dprintk("%s: PID: %d, counter: %d\n", __FUNCTION__, ppd->pid, atomic_read(&ppd->refcount)); + if (rpt) { + ppd->rpgtable = krpt.rpgtable; + return mcctrl_clear_pte_range(krpt.start, krpt.len); + } + return 0; } @@ -1725,7 +1838,11 @@ int mcexec_destroy_per_process_data(ihk_os_t os, int pid) /* One for the reference and one for deallocation. * XXX: actual deallocation may not happen here */ mcctrl_put_per_proc_data(ppd); + pr_ppd("put", task_pid_vnr(current), ppd); + + /* Note that it will call return_syscall() */ mcctrl_put_per_proc_data(ppd); + pr_ppd("put", task_pid_vnr(current), ppd); } else { printk("WARNING: no per process data for PID %d ?\n", @@ -1752,13 +1869,13 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) return -EINVAL; } - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); if (!pathbuf) { retval = -ENOMEM; goto out; } - kfilename = kmalloc(PATH_MAX, GFP_TEMPORARY); + kfilename = kmalloc(PATH_MAX, GFP_KERNEL); if (!kfilename) { retval = -ENOMEM; kfree(pathbuf); @@ -1788,6 +1905,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) retval = -ENOMEM; goto out_put_file; } + memset(mcef, 0, sizeof(struct mckernel_exec_file)); /* debug */ down(&mckernel_exec_file_lock); /* Find previous file (if exists) and drop it */ @@ -1829,7 +1947,7 @@ out: return retval; } -int mcexec_close_exec(ihk_os_t os) +int mcexec_close_exec(ihk_os_t os, int pid) { struct mckernel_exec_file *mcef = NULL; int found = 0; @@ -1841,7 +1959,7 @@ int mcexec_close_exec(ihk_os_t os) down(&mckernel_exec_file_lock); list_for_each_entry(mcef, &mckernel_exec_files, list) { - if (mcef->os == os && mcef->pid == task_tgid_vnr(current)) { + if (mcef->os == os && mcef->pid == pid) { allow_write_access(mcef->fp); fput(mcef->fp); list_del(&mcef->list); @@ -1927,12 +2045,8 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg) cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); -#ifdef MCCTRL_KSYM_sys_mount ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type, desc.flags, desc.data); -#else - ret = -EFAULT; -#endif revert_creds(original); put_cred(promoted); @@ -1958,11 +2072,7 @@ long mcexec_sys_umount(struct sys_mount_desc *__user arg) cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); -#ifdef MCCTRL_KSYM_sys_umount ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE); -#else - ret = -EFAULT; -#endif revert_creds(original); put_cred(promoted); @@ -1988,11 +2098,7 @@ long mcexec_sys_unshare(struct sys_unshare_desc *__user arg) cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); -#if MCCTRL_KSYM_sys_unshare ret = mcctrl_sys_unshare(desc.unshare_flags); -#else - ret = -EFAULT; -#endif revert_creds(original); put_cred(promoted); @@ -2019,6 +2125,9 @@ struct mcctrl_perf_ctrl_desc { #define wakeup_desc_of_perf_desc(_desc) \ (&container_of((_desc), struct mcctrl_perf_ctrl_desc, desc)->wakeup) +/* Note that usrdata->perf_event_num is updated with # of registered + * events + */ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg) { struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); @@ -2029,6 +2138,8 @@ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg) int ret = 0; int i = 0, j = 0; int need_free; + int num_registered = 0; + int err = 0; for (i = 0; i < usrdata->perf_event_num; i++) { ret = copy_from_user(&attr, &arg[i], @@ -2047,6 +2158,7 @@ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg) memset(perf_desc, '\0', sizeof(struct perf_ctrl_desc)); perf_desc->ctrl_type = PERF_CTRL_SET; + perf_desc->err = 0; perf_desc->target_cntr = i; perf_desc->config = attr.config; perf_desc->exclude_kernel = attr.exclude_kernel; @@ -2057,7 +2169,8 @@ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg) isp.arg = virt_to_phys(perf_desc); for (j = 0; j < info->n_cpus; j++) { - ret = mcctrl_ikc_send_wait(os, j, &isp, 0, + ret = mcctrl_ikc_send_wait(os, j, &isp, + msecs_to_jiffies(10000), wakeup_desc_of_perf_desc(perf_desc), &need_free, 1, perf_desc); if (ret < 0) { @@ -2065,14 +2178,24 @@ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg) __func__, ret); if (need_free) kfree(perf_desc); - return -EINVAL; + return ret; + } + + err = perf_desc->err; + if (err != 0) { + break; } } + if (err == 0) { + num_registered++; + } kfree(perf_desc); } - return usrdata->perf_event_num; + usrdata->perf_event_num = num_registered; + + return num_registered; } long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg) @@ -2095,6 +2218,7 @@ long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg) memset(perf_desc, '\0', sizeof(struct perf_ctrl_desc)); perf_desc->ctrl_type = PERF_CTRL_GET; + perf_desc->err = 0; perf_desc->target_cntr = i; memset(&isp, '\0', sizeof(struct ikc_scd_packet)); @@ -2102,7 +2226,8 @@ long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg) isp.arg = virt_to_phys(perf_desc); for (j = 0; j < info->n_cpus; j++) { - ret = mcctrl_ikc_send_wait(os, j, &isp, 0, + ret = mcctrl_ikc_send_wait(os, j, &isp, + msecs_to_jiffies(10000), wakeup_desc_of_perf_desc(perf_desc), &need_free, 1, perf_desc); if (ret < 0) { @@ -2110,10 +2235,12 @@ long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg) __func__, ret); if (need_free) kfree(perf_desc); - return -EINVAL; + return ret; } - value_sum += perf_desc->read_value; + if (perf_desc->err == 0) { + value_sum += perf_desc->read_value; + } } kfree(perf_desc); if (copy_to_user(&arg[i], &value_sum, sizeof(unsigned long))) { @@ -2133,13 +2260,13 @@ long mcctrl_perf_enable(ihk_os_t os) struct ikc_scd_packet isp; struct perf_ctrl_desc *perf_desc; struct ihk_cpu_info *info = ihk_os_get_cpu_info(os); - unsigned int cntr_mask = 0; + unsigned long cntr_mask = 0; int ret = 0; int i = 0, j = 0; int need_free; for (i = 0; i < usrdata->perf_event_num; i++) { - cntr_mask |= 1 << i; + cntr_mask |= 1UL << i; } perf_desc = kmalloc(sizeof(struct mcctrl_perf_ctrl_desc), GFP_KERNEL); if (!perf_desc) { @@ -2148,6 +2275,7 @@ long mcctrl_perf_enable(ihk_os_t os) memset(perf_desc, '\0', sizeof(struct perf_ctrl_desc)); perf_desc->ctrl_type = PERF_CTRL_ENABLE; + perf_desc->err = 0; perf_desc->target_cntr_mask = cntr_mask; memset(&isp, '\0', sizeof(struct ikc_scd_packet)); @@ -2167,6 +2295,12 @@ long mcctrl_perf_enable(ihk_os_t os) return -EINVAL; } + if (perf_desc->err < 0) { + ret = perf_desc->err; + kfree(perf_desc); + return ret; + } + } kfree(perf_desc); @@ -2179,13 +2313,13 @@ long mcctrl_perf_disable(ihk_os_t os) struct ikc_scd_packet isp; struct perf_ctrl_desc *perf_desc; struct ihk_cpu_info *info = ihk_os_get_cpu_info(os); - unsigned int cntr_mask = 0; + unsigned long cntr_mask = 0; int ret = 0; int i = 0, j = 0; int need_free; for (i = 0; i < usrdata->perf_event_num; i++) { - cntr_mask |= 1 << i; + cntr_mask |= 1UL << i; } perf_desc = kmalloc(sizeof(struct mcctrl_perf_ctrl_desc), GFP_KERNEL); if (!perf_desc) { @@ -2194,6 +2328,7 @@ long mcctrl_perf_disable(ihk_os_t os) memset(perf_desc, '\0', sizeof(struct perf_ctrl_desc)); perf_desc->ctrl_type = PERF_CTRL_DISABLE; + perf_desc->err = 0; perf_desc->target_cntr_mask = cntr_mask; memset(&isp, '\0', sizeof(struct ikc_scd_packet)); @@ -2212,6 +2347,11 @@ long mcctrl_perf_disable(ihk_os_t os) return -EINVAL; } + if (perf_desc->err < 0) { + ret = perf_desc->err; + kfree(perf_desc); + return ret; + } } kfree(perf_desc); @@ -2301,204 +2441,396 @@ extern void set_user_sp(unsigned long); extern void restore_fs(unsigned long fs); extern void save_fs_ctx(void *); extern unsigned long get_fs_ctx(void *); +extern unsigned long get_rsp_ctx(void *); -long -mcexec_util_thread1(ihk_os_t os, unsigned long arg, struct file *file) +long mcexec_uti_get_ctx(ihk_os_t os, struct uti_get_ctx_desc __user *udesc) { - void **__user uparam = (void ** __user)arg; - void *param[6]; - unsigned long p_rctx; + struct uti_get_ctx_desc desc; unsigned long phys; - void *__user u_rctx; - void *rctx; + struct uti_ctx *rctx; int rc = 0; - unsigned long free_address; - unsigned long free_size; unsigned long icurrent = (unsigned long)current; - if(copy_from_user(param, uparam, sizeof(void *) * 6)) { - return -EFAULT; - } - p_rctx = (unsigned long)param[0]; - u_rctx = (void *__user)param[1]; - free_address = (unsigned long)param[4]; - free_size = (unsigned long)param[5]; - - phys = ihk_device_map_memory(ihk_os_to_dev(os), p_rctx, PAGE_SIZE); -#ifdef CONFIG_MIC - rctx = ioremap_wc(phys, PAGE_SIZE); -#else - rctx = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0); -#endif - if(copy_to_user(u_rctx, rctx, PAGE_SIZE) || - copy_to_user((unsigned long *)(uparam + 3), &icurrent, - sizeof(unsigned long))) + if(copy_from_user(&desc, udesc, sizeof(struct uti_get_ctx_desc))) { rc = -EFAULT; + goto out; + } - ((unsigned long *)rctx)[0] = free_address; - ((unsigned long *)rctx)[1] = free_size; + phys = ihk_device_map_memory(ihk_os_to_dev(os), desc.rp_rctx, sizeof(struct uti_ctx)); +#ifdef CONFIG_MIC + rctx = ioremap_wc(phys, sizeof(struct uti_ctx)); +#else + rctx = ihk_device_map_virtual(ihk_os_to_dev(os), phys, sizeof(struct uti_ctx), NULL, 0); +#endif + if (copy_to_user(desc.rctx, rctx->ctx, sizeof(struct uti_ctx))) { + rc = -EFAULT; + goto unmap_and_out; + } + if (copy_to_user(&udesc->key, &icurrent, sizeof(unsigned long))) { + rc = -EFAULT; + goto unmap_and_out; + } + + rctx->uti_refill_tid = desc.uti_refill_tid; + + unmap_and_out: #ifdef CONFIG_MIC iounmap(rctx); #else - ihk_device_unmap_virtual(ihk_os_to_dev(os), rctx, PAGE_SIZE); + ihk_device_unmap_virtual(ihk_os_to_dev(os), rctx, sizeof(struct uti_ctx)); #endif - ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); - + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, sizeof(struct uti_ctx)); + out: return rc; } -static inline struct host_thread *get_host_thread(void) -{ - int pid = task_tgid_vnr(current); - int tid = task_pid_vnr(current); - unsigned long flags; - struct host_thread *thread; - - read_lock_irqsave(&host_thread_lock, flags); - for (thread = host_threads; thread; thread = thread->next) - if(thread->pid == pid && thread->tid == tid) - break; - read_unlock_irqrestore(&host_thread_lock, flags); - - return thread; -} - -long -mcexec_util_thread2(ihk_os_t os, unsigned long arg, struct file *file) +long mcexec_uti_save_fs(ihk_os_t os, struct uti_save_fs_desc __user *udesc, struct file *file) { + int rc = 0; void *usp = get_user_sp(); struct mcos_handler_info *info; struct host_thread *thread; unsigned long flags; - void **__user param = (void **__user )arg; - void *__user rctx = (void *__user)param[1]; - void *__user lctx = (void *__user)param[2]; + struct uti_save_fs_desc desc; + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct mcctrl_per_proc_data *ppd; - save_fs_ctx(lctx); + if(copy_from_user(&desc, udesc, sizeof(struct uti_save_fs_desc))) { + printk("%s: Error: copy_from_user failed\n", __FUNCTION__); + rc = -EFAULT; + goto out; + } + + save_fs_ctx(desc.lctx); info = ihk_os_get_mcos_private_data(file); thread = kmalloc(sizeof(struct host_thread), GFP_KERNEL); memset(thread, '\0', sizeof(struct host_thread)); thread->pid = task_tgid_vnr(current); thread->tid = task_pid_vnr(current); thread->usp = (unsigned long)usp; - thread->lfs = get_fs_ctx(lctx); - thread->rfs = get_fs_ctx(rctx); + thread->lfs = get_fs_ctx(desc.lctx); + thread->rfs = get_fs_ctx(desc.rctx); thread->handler = info; write_lock_irqsave(&host_thread_lock, flags); - thread->next = host_threads; - host_threads = thread; + list_add_tail(&thread->list, &host_threads); write_unlock_irqrestore(&host_thread_lock, flags); - return 0; + /* How ppd refcount reaches zero depends on how utility-thread exits: + (1) MCEXEC_UP_CREATE_PPD sets to 1 + (2) mcexec_util_thread2() increments to 2 + (3) Tracer detects exit/exit_group/killed by signal of tracee + and decrements to 1 via mcexec_terminate_thread() + (4) Tracer calls exit_fd(), it calls release_handler(), + it decrements to 0 + + KNOWN ISSUE: + mcexec_terminate_thread() isn't called when tracer is + unexpectedly killed so the refcount remains 1 when + exiting release_handler() + */ + ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); + pr_ppd("get", task_pid_vnr(current), ppd); + out: + return rc; } +/* Return value: 0 if target is uti thread, -EINVAL if not */ long mcexec_sig_thread(ihk_os_t os, unsigned long arg, struct file *file) { int tid = task_pid_vnr(current); int pid = task_tgid_vnr(current); unsigned long flags; - struct host_thread *thread; + struct host_thread *thread_iter, *thread = NULL; + long ret = 0; read_lock_irqsave(&host_thread_lock, flags); - for (thread = host_threads; thread; thread = thread->next) - if(thread->pid == pid && thread->tid == tid) + list_for_each_entry(thread_iter, &host_threads, list) { + if(thread_iter->pid == pid && thread_iter->tid == tid) { + thread = thread_iter; break; + } + } read_unlock_irqrestore(&host_thread_lock, flags); if (thread) { if (arg) restore_fs(thread->lfs); else restore_fs(thread->rfs); - return 0; + goto out; } - return -EINVAL; + ret = -EINVAL; + out: + return ret; } -long -mcexec_terminate_thread(ihk_os_t os, unsigned long *param, struct file *file) +static long mcexec_terminate_thread_unsafe(ihk_os_t os, int pid, int tid, long code, struct task_struct *tsk) { - int pid = param[0]; - int tid = param[1]; - struct task_struct *tsk = (struct task_struct *)param[3]; - unsigned long flags; - struct host_thread *thread; - struct host_thread *prev; struct ikc_scd_packet *packet; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd; - write_lock_irqsave(&host_thread_lock, flags); - for (prev = NULL, thread = host_threads; thread; - prev = thread, thread = thread->next) { - if(thread->tid == tid) - break; - } - if (!thread) { - write_unlock_irqrestore(&host_thread_lock, flags); - return -EINVAL; - } + dprintk("%s: target pid=%d,tid=%d,code=%lx,task=%p\n", __FUNCTION__, pid, tid, code, tsk); ppd = mcctrl_get_per_proc_data(usrdata, pid); if (!ppd) { kprintf("%s: ERROR: no per-process structure for PID %d??\n", - __FUNCTION__, pid); - goto err; + __FUNCTION__, pid); + goto no_ppd; } - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, tsk); + + ptd = mcctrl_get_per_thread_data(ppd, tsk); + if (!ptd) { + printk("%s: ERROR: mcctrl_get_per_thread_data failed\n", __FUNCTION__); + goto no_ptd; + } + if (ptd->tid != tid) { + printk("%s: ERROR: ptd->tid(%d) != tid(%d)\n", __FUNCTION__, ptd->tid, tid); + goto no_ptd; + } + pr_ptd("get", tid, ptd); + + packet = (struct ikc_scd_packet *)ptd->data; if (!packet) { kprintf("%s: ERROR: no packet registered for TID %d\n", - __FUNCTION__, tid); - goto err; + __FUNCTION__, tid); + goto no_ptd; } - mcctrl_delete_per_thread_data(ppd, tsk); - __return_syscall(usrdata->os, packet, param[2], tid); + __return_syscall(usrdata->os, packet, code, tid); ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, (usrdata->ikc2linux[smp_processor_id()] ? usrdata->ikc2linux[smp_processor_id()] : usrdata->ikc2linux[0])); -err: - if(ppd) - mcctrl_put_per_proc_data(ppd); - if (prev) - prev->next = thread->next; - else - host_threads = thread->next; - write_unlock_irqrestore(&host_thread_lock, flags); - kfree(thread); + /* Drop reference for this function */ + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", tid, ptd); + + /* Final drop of reference for uti ptd */ + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", tid, ptd); + + if (atomic_read(&ptd->refcount) != 1) { + printk("%s: WARNING: ptd->refcount != 1 but %d\n", __FUNCTION__, atomic_read(&ptd->refcount)); + } + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", tid, ptd); + no_ptd: + mcctrl_put_per_proc_data(ppd); + pr_ppd("put", task_pid_vnr(current), ppd); + + /* This is the final drop of uti-ppd */ + mcctrl_put_per_proc_data(ppd); + pr_ppd("put", task_pid_vnr(current), ppd); + no_ppd: return 0; } -long -mcexec_syscall_thread(ihk_os_t os, unsigned long arg, struct file *file) +static long +mcexec_terminate_thread(ihk_os_t os, struct terminate_thread_desc * __user arg) +{ + long rc; + unsigned long flags; + struct terminate_thread_desc desc; + struct host_thread *thread_iter, *thread = NULL; + + if (copy_from_user(&desc, arg, sizeof(struct terminate_thread_desc))) { + rc = -EFAULT; + goto out; + } + + dprintk("%s: target pid=%d,tid=%d\n", __FUNCTION__, desc.pid, desc.tid); + + /* Stop switching FS registers for uti thread */ + write_lock_irqsave(&host_thread_lock, flags); + list_for_each_entry(thread_iter, &host_threads, list) { + if(thread_iter->tid == desc.tid) { + thread = thread_iter; + break; + } + } + if (!thread) { + printk("%s: ERROR: thread (pid=%d,tid=%d) not found in host_threads\n", __FUNCTION__, desc.pid, desc.tid); + rc = -ESRCH; + goto unlock_out; + } + + list_del(&thread->list); + kfree(thread); + + write_unlock_irqrestore(&host_thread_lock, flags); + + rc = mcexec_terminate_thread_unsafe(os, desc.pid, desc.tid, desc.code, (struct task_struct *)desc.tsk); + + out: + return rc; + + unlock_out: + write_unlock_irqrestore(&host_thread_lock, flags); + goto out; +} + +static long mcexec_release_user_space(struct release_user_space_desc *__user arg) +{ + struct release_user_space_desc desc; + + if (copy_from_user(&desc, arg, sizeof(desc))) { + return -EFAULT; + } + +#if 1 + return mcctrl_clear_pte_range(desc.user_start, + desc.user_end - desc.user_start); +#else + return release_user_space(desc.user_start, desc.user_end - desc.user_start); +#endif +} + + static long (*mckernel_do_futex)(int n, unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, + unsigned long _uti_clv, + void *uti_futex_resp, + void *_linux_wait_event, + void *_linux_printk, + void *_linux_clock_gettime); + + long uti_wait_event(void *_resp, unsigned long nsec_timeout) { + struct uti_futex_resp *resp = _resp; + if (nsec_timeout) { + return wait_event_interruptible_timeout(resp->wq, resp->done, nsecs_to_jiffies(nsec_timeout)); + } else { + return wait_event_interruptible(resp->wq, resp->done); + } + } + + int uti_printk(const char *fmt, ...) { + int sum = 0, nwritten; + va_list args; + va_start(args, fmt); + nwritten = vprintk(fmt, args); + sum += nwritten; + va_end(args); + return sum; + } + +int uti_clock_gettime(clockid_t clk_id, struct timespec *tp) { + int ret = 0; + struct timespec64 ts64; + dprintk("%s: clk_id=%x,REALTIME=%x,MONOTONIC=%x\n", __FUNCTION__, clk_id, CLOCK_REALTIME, CLOCK_MONOTONIC); + switch(clk_id) { + case CLOCK_REALTIME: + getnstimeofday64(&ts64); + tp->tv_sec = ts64.tv_sec; + tp->tv_nsec = ts64.tv_nsec; + dprintk("%s: CLOCK_REALTIME,%ld.%09ld\n", __FUNCTION__, tp->tv_sec, tp->tv_nsec); + break; + case CLOCK_MONOTONIC: { + /* Do not use getrawmonotonic() because it returns different value than clock_gettime() */ + ktime_get_ts64(&ts64); + tp->tv_sec = ts64.tv_sec; + tp->tv_nsec = ts64.tv_nsec; + dprintk("%s: CLOCK_MONOTONIC,%ld.%09ld\n", __FUNCTION__, tp->tv_sec, tp->tv_nsec); + break; } + default: + ret = -EINVAL; + break; + } + return ret; +} + +long mcexec_syscall_thread(ihk_os_t os, unsigned long arg, struct file *file) { struct syscall_struct { int number; unsigned long args[6]; unsigned long ret; + unsigned long uti_clv; /* copy of a clv in McKernel */ }; struct syscall_struct param; struct syscall_struct __user *uparam = (struct syscall_struct __user *)arg; - int rc; + long rc; + if (copy_from_user(¶m, uparam, sizeof param)) { return -EFAULT; } - rc = syscall_backward(ihk_host_os_get_usrdata(os), param.number, - param.args[0], param.args[1], param.args[2], - param.args[3], param.args[4], param.args[5], - ¶m.ret); +#if 0 /* debug */ + if (param.number == __NR_futex) { +#else + if (0) { +#endif + struct uti_futex_resp resp = { + .done = 0 + }; + init_waitqueue_head(&resp.wq); + + if (!mckernel_do_futex) { + if (ihk_os_get_special_address(os, IHK_SPADDR_MCKERNEL_DO_FUTEX, + (unsigned long *)&mckernel_do_futex, + NULL)) { + kprintf("%s: ihk_os_get_special_address failed\n", __FUNCTION__); + return -EINVAL; + } + dprintk("%s: mckernel_do_futex=%p\n", __FUNCTION__, mckernel_do_futex); + } + rc = (*mckernel_do_futex)(param.number, param.args[0], param.args[1], param.args[2], + param.args[3], param.args[4], param.args[5], param.uti_clv, (void *)&resp, (void *)uti_wait_event, (void *)uti_printk, (void *)uti_clock_gettime); + param.ret = rc; + } else { + dprintk("%s: syscall_backward, SC %d, tid %d\n", __FUNCTION__, param.number, task_tgid_vnr(current)); + rc = syscall_backward(ihk_host_os_get_usrdata(os), param.number, + param.args[0], param.args[1], param.args[2], + param.args[3], param.args[4], param.args[5], + ¶m.ret); + switch (param.number) { + case __NR_munmap: + //printk("%s: syscall_backward, munmap,addr=%lx,len=%lx,tid=%d\n", __FUNCTION__, param.args[0], param.args[1], task_tgid_vnr(current)); + break; + case __NR_mmap: + //printk("%s: syscall_backward, mmap,ret=%lx,tid=%d\n", __FUNCTION__, param.ret, task_tgid_vnr(current)); + break; + default: + break; + } + } if (copy_to_user(&uparam->ret, ¶m.ret, sizeof(unsigned long))) { return -EFAULT; } return rc; } +void mcctrl_futex_wake(struct ikc_scd_packet *pisp) +{ + struct uti_futex_resp *resp; + + /* Guard the access to pisp->futex.resp, which is dead out of mcexec_syscall_thread() */ + if (*pisp->futex.spin_sleep == 0) { + dprintk("%s: DEBUG: woken up by someone else\n", __FUNCTION__); + return; + } + + resp = pisp->futex.resp; + if (!resp) { + kprintf("%s: ERROR: pisp->futex.resp is NULL\n", __FUNCTION__); + return; + } + + if (*pisp->futex.spin_sleep == 0) { + kprintf("%s: ERROR: resp is dead\n", __FUNCTION__); + return; + } + + resp->done = 1; + wake_up_interruptible(&resp->wq); +} + + static struct ihk_cache_topology * cache_topo_search(struct ihk_cpu_topology *cpu_topo, int level) { @@ -2512,9 +2844,6 @@ cache_topo_search(struct ihk_cpu_topology *cpu_topo, int level) return NULL; } -static long (*setaffinity)(pid_t pid, const struct cpumask *in_mask); -static int (*setscheduler_nocheck)(struct task_struct *p, int policy, - const struct sched_param *param); static unsigned int *uti_rr; static int max_cpu; @@ -2528,20 +2857,6 @@ uti_attr_init(void) if (uti_rr) return 0; - if (!setaffinity) { - setaffinity = (long (*)(pid_t, const struct cpumask *)) - kallsyms_lookup_name("sched_setaffinity"); - if (!setaffinity) - return -ENOSYS; - } - if (!setscheduler_nocheck) { - setscheduler_nocheck = (int (*)(struct task_struct *, int, - const struct sched_param *)) - kallsyms_lookup_name("sched_setscheduler_nocheck"); - if (!setscheduler_nocheck) - return -ENOSYS; - } - for_each_possible_cpu(i) { max_cpu = i; } @@ -2600,12 +2915,35 @@ retry: return cpumask; } +int pr_cpumask(const char *msg, cpumask_t* cpumask) { + int ret; + char *buf; + + if (!(buf = kmalloc(PAGE_SIZE * 2, GFP_KERNEL))) { + kprintf("%s: error: allocating buf\n", + __func__); + ret = -ENOMEM; + goto out; + } + + BITMAP_SCNLISTPRINTF(buf, PAGE_SIZE * 2, + cpumask_bits(cpumask), + nr_cpumask_bits); + buf[PAGE_SIZE * 2 - 1] = 0; + + pr_info("%s: info: cpuset: %s\n", msg, buf); + ret = 0; + out: + return ret; +} + static long -mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg) +mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *_desc) { struct uti_attr_desc desc; + char *uti_cpu_set_str; struct kuti_attr *kattr; - cpumask_t *cpuset; + cpumask_t *cpuset = NULL, *env_cpuset = NULL; struct mcctrl_usrdata *ud = ihk_host_os_get_usrdata(os); ihk_device_t dev = ihk_os_to_dev(os); #ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ @@ -2624,30 +2962,44 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg) int mask_size = cpumask_size(); if ((rc = uti_attr_init())) { - return rc; + pr_err("%s: error: uti_attr_init (%d)\n", + __func__, rc); + goto out; + } + + if ((rc = copy_from_user(&desc, _desc, sizeof(desc)))) { + pr_err("%s: error: copy_from_user\n", + __func__); + rc = -EFAULT; + goto out; + } + + if (!(uti_cpu_set_str = kmalloc(desc.uti_cpu_set_len, GFP_KERNEL))) { + pr_err("%s: error: allocating uti_cpu_set_str\n", + __func__); + rc = -ENOMEM; + goto out; + } + + if ((rc = copy_from_user(uti_cpu_set_str, desc.uti_cpu_set_str, desc.uti_cpu_set_len))) { + pr_err("%s: error: copy_from_user\n", + __func__); + rc = -EFAULT; + goto out; } - if (copy_from_user(&desc, arg, sizeof desc)) - return -EFAULT; kattr = phys_to_virt(desc.phys_attr); - if (((kattr->attr.flags & UTI_FLAG_SAME_L1) && - (kattr->attr.flags & UTI_FLAG_DIFFERENT_L1)) || - ((kattr->attr.flags & UTI_FLAG_SAME_L2) && - (kattr->attr.flags & UTI_FLAG_DIFFERENT_L2)) || - ((kattr->attr.flags & UTI_FLAG_SAME_L3) && - (kattr->attr.flags & UTI_FLAG_DIFFERENT_L3)) || - ((kattr->attr.flags & UTI_FLAG_SAME_NUMA_DOMAIN) && - (kattr->attr.flags & UTI_FLAG_DIFFERENT_NUMA_DOMAIN))) { - return -EINVAL; - } - + /* Find caller cpu for later resolution of subgroups */ list_for_each_entry(cpu_topo, &ud->cpu_topology_list, chain) { if (cpu_topo->mckernel_cpu_id == kattr->parent_cpuid) { target_cpu = cpu_topo; } } + if (!target_cpu) { + printk("%s: errror: caller cpu not found\n", + __func__); return -EINVAL; } @@ -2656,6 +3008,7 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg) } wkmask = (cpumask_t *)(((char *)cpuset) + mask_size); + /* Initial cpuset */ memcpy(cpuset, cpu_active_mask, mask_size); if (kattr->attr.flags & UTI_FLAG_NUMA_SET) { @@ -2753,38 +3106,70 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg) cpumask_and(cpuset, cpuset, wkmask); } } + + /* UTI_CPU_SET, PREFER_FWK, PREFER_LWK */ + if (uti_cpu_set_str) { + if (!(env_cpuset = kmalloc(mask_size, GFP_KERNEL))) { + pr_err("%s: error: allocating env_cpuset\n", + __func__); + rc = -ENOMEM; + goto out; + } + + if (cpulist_parse(uti_cpu_set_str, env_cpuset) < 0) { + pr_err("%s: error: cpulist_parse: %s\n", + __func__, uti_cpu_set_str); + rc = -EINVAL; + goto out; + } + + //pr_cpumask("cpuset", cpuset); + //pr_cpumask("env_cpuset", env_cpuset); + + if ((kattr->attr.flags & UTI_FLAG_PREFER_LWK)) { + cpumask_andnot(cpuset, cpuset, env_cpuset); + } else { /* Including PREFER_FWK and !PREFER_FWK */ + cpumask_and(cpuset, cpuset, env_cpuset); + } + } + + if (kattr->attr.flags & + (UTI_FLAG_EXCLUSIVE_CPU | UTI_FLAG_CPU_INTENSIVE)) { + uti_cpu_select(cpuset); + } + + //pr_cpumask("final cpuset", cpuset); + + /* Setaffinity cpuset */ rc = cpumask_weight(cpuset); - if (!rc); /* do nothing */ - else if (kattr->attr.flags & UTI_FLAG_EXCLUSIVE_CPU) { + if (rc > 0) { + if ((rc = mcctrl_sched_setaffinity(0, cpuset))) { + pr_err("%s: error: setaffinity (%d)\n", + __func__, rc); + goto out; + } + } else { + pr_warn("%s: warning: cpuset is empty\n", __func__); + } + + + /* Assign real-time scheduler */ + if (kattr->attr.flags & UTI_FLAG_HIGH_PRIORITY) { struct sched_param sp; - setaffinity(0, uti_cpu_select(cpuset)); sp.sched_priority = 1; - setscheduler_nocheck(current, SCHED_FIFO, &sp); - rc = 1; - } - else if (kattr->attr.flags & UTI_FLAG_CPU_INTENSIVE) { - setaffinity(0, uti_cpu_select(cpuset)); - rc = 1; - } - else if (kattr->attr.flags & UTI_FLAG_HIGH_PRIORITY) { - struct sched_param sp; - - setaffinity(0, uti_cpu_select(cpuset)); - sp.sched_priority = 1; - setscheduler_nocheck(current, SCHED_FIFO, &sp); - rc = 1; - } - else if (kattr->attr.flags & UTI_FLAG_NON_COOPERATIVE) { - setaffinity(0, uti_cpu_select(cpuset)); - rc = 1; - } - else { - setaffinity(0, cpuset); + if ((rc = mcctrl_sched_setscheduler_nocheck(current, SCHED_FIFO, &sp))) { + pr_err("%s: error: setscheduler_nocheck (%d)\n", + __func__, rc); + goto out; + } } + rc = 0; +out: kfree(cpuset); + kfree(env_cpuset); return rc; } @@ -2817,7 +3202,8 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, return mcexec_get_cpu(os); case MCEXEC_UP_CREATE_PPD: - return mcexec_create_per_process_data(os); + return mcexec_create_per_process_data(os, + (struct rpgtable_desc * __user)arg); case MCEXEC_UP_GET_NODES: return mcexec_get_nodes(os); @@ -2837,7 +3223,7 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, return mcexec_open_exec(os, (char *)arg); case MCEXEC_UP_CLOSE_EXEC: - return mcexec_close_exec(os); + return mcexec_close_exec(os, task_tgid_vnr(current)); case MCEXEC_UP_PREPARE_DMA: return mcexec_pin_region(os, (unsigned long *)arg); @@ -2860,11 +3246,11 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, case MCEXEC_UP_SYS_UNSHARE: return mcexec_sys_unshare((struct sys_unshare_desc *)arg); - case MCEXEC_UP_UTIL_THREAD1: - return mcexec_util_thread1(os, arg, file); + case MCEXEC_UP_UTI_GET_CTX: + return mcexec_uti_get_ctx(os, (struct uti_get_ctx_desc *)arg); - case MCEXEC_UP_UTIL_THREAD2: - return mcexec_util_thread2(os, arg, file); + case MCEXEC_UP_UTI_SAVE_FS: + return mcexec_uti_save_fs(os, (struct uti_save_fs_desc *)arg, file); case MCEXEC_UP_SIG_THREAD: return mcexec_sig_thread(os, arg, file); @@ -2873,7 +3259,10 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, return mcexec_syscall_thread(os, arg, file); case MCEXEC_UP_TERMINATE_THREAD: - return mcexec_terminate_thread(os, (unsigned long *)arg, file); + return mcexec_terminate_thread(os, (struct terminate_thread_desc *)arg); + + case MCEXEC_UP_RELEASE_USER_SPACE: + return mcexec_release_user_space((struct release_user_space_desc *)arg); case MCEXEC_UP_GET_NUM_POOL_THREADS: return mcctrl_get_num_pool_threads(os); @@ -2919,6 +3308,7 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu) { struct mcctrl_usrdata *usrdata; struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd; struct ikc_scd_packet *packet; struct ihk_ikc_channel_desc *ch; int ret = 0; @@ -2943,11 +3333,18 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu) } /* Look up per-thread structure */ - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); - if (!packet) { + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + printk("%s: ERROR: mcctrl_get_per_thread_data failed\n", __FUNCTION__); ret = -EINVAL; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); + packet = (struct ikc_scd_packet *)ptd->data; + if (!packet) { printk("%s: ERROR: no packet registered for TID %d\n", __FUNCTION__, task_pid_vnr(current)); + ret = -EINVAL; goto out_put_ppd; } @@ -2960,6 +3357,9 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu) printk("%s: OS: %p, CPU: %d\n", __FUNCTION__, os, *ret_cpu); out_put_ppd: + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", task_pid_vnr(current), ptd); + no_ptd: mcctrl_put_per_proc_data(ppd); return ret; diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index 390dac6c..1588f63d 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "mcctrl.h" #include @@ -43,8 +44,6 @@ extern void mcctrl_syscall_init(void); extern void procfs_init(int); extern void procfs_exit(int); -extern void rus_page_hash_init(void); -extern void rus_page_hash_put_pages(void); extern void uti_attr_finalize(void); extern void binfmt_mcexec_init(void); extern void binfmt_mcexec_exit(void); @@ -84,13 +83,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = { { .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl }, - { .request = MCEXEC_UP_UTIL_THREAD1, .func = mcctrl_ioctl }, - { .request = MCEXEC_UP_UTIL_THREAD2, .func = mcctrl_ioctl }, + { .request = MCEXEC_UP_UTI_GET_CTX, .func = mcctrl_ioctl }, + { .request = MCEXEC_UP_UTI_SAVE_FS, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SIG_THREAD, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYSCALL_THREAD, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_TERMINATE_THREAD, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_GET_NUM_POOL_THREADS, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_UTI_ATTR, .func = mcctrl_ioctl }, + { .request = MCEXEC_UP_RELEASE_USER_SPACE, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_NUM, .func = mcctrl_ioctl }, { .request = IHK_OS_AUX_PERF_SET, .func = mcctrl_ioctl }, @@ -178,6 +178,7 @@ int mcctrl_os_shutdown_notifier(int os_index) mdelay(200); } + pager_cleanup(); sysfsm_cleanup(os[os_index]); free_topology_info(os[os_index]); ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index); @@ -185,9 +186,6 @@ int mcctrl_os_shutdown_notifier(int os_index) destroy_ikc_channels(os[os_index]); procfs_exit(os_index); } -#ifdef POSTK_DEBUG_TEMP_FIX_35 /* in shutdown phase, rus_page_hash_put_pages() call added. */ - rus_page_hash_put_pages(); -#endif /* POSTK_DEBUG_TEMP_FIX_35 */ os[os_index] = NULL; @@ -214,6 +212,68 @@ static struct ihk_os_notifier mcctrl_os_notifier = { .ops = &mcctrl_os_notifier_ops, }; + + +int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type, + unsigned long flags, void *data); +int (*mcctrl_sys_umount)(char *dir_name, int flags); +int (*mcctrl_sys_unshare)(unsigned long unshare_flags); +long (*mcctrl_sched_setaffinity)(pid_t pid, const struct cpumask *in_mask); +int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, int policy, + const struct sched_param *param); + +ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf, + size_t bufsiz); +void (*mcctrl_zap_page_range)(struct vm_area_struct *vma, + unsigned long start, + unsigned long size, + struct zap_details *details); + +struct inode_operations *mcctrl_hugetlbfs_inode_operations; + + +static int symbols_init(void) +{ + mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount"); + if (WARN_ON(!mcctrl_sys_mount)) + return -EFAULT; + + mcctrl_sys_umount = (void *) kallsyms_lookup_name("sys_umount"); + if (WARN_ON(!mcctrl_sys_umount)) + return -EFAULT; + + mcctrl_sys_unshare = (void *) kallsyms_lookup_name("sys_unshare"); + if (WARN_ON(!mcctrl_sys_unshare)) + return -EFAULT; + + mcctrl_sched_setaffinity = + (void *) kallsyms_lookup_name("sched_setaffinity"); + if (WARN_ON(!mcctrl_sched_setaffinity)) + return -EFAULT; + + mcctrl_sched_setscheduler_nocheck = + (void *) kallsyms_lookup_name("sched_setscheduler_nocheck"); + if (WARN_ON(!mcctrl_sched_setscheduler_nocheck)) + return -EFAULT; + + mcctrl_sys_readlink = + (void *) kallsyms_lookup_name("sys_readlink"); + if (WARN_ON(!mcctrl_sys_readlink)) + return -EFAULT; + + mcctrl_zap_page_range = + (void *) kallsyms_lookup_name("zap_page_range"); + if (WARN_ON(!mcctrl_zap_page_range)) + return -EFAULT; + + mcctrl_hugetlbfs_inode_operations = + (void *) kallsyms_lookup_name("hugetlbfs_inode_operations"); + if (WARN_ON(!mcctrl_hugetlbfs_inode_operations)) + return -EFAULT; + + return arch_symbols_init(); +} + static int __init mcctrl_init(void) { int ret = 0; @@ -227,10 +287,11 @@ static int __init mcctrl_init(void) os[i] = NULL; } - rus_page_hash_init(); - binfmt_mcexec_init(); + if ((ret = symbols_init())) + goto error; + if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) { printk("mcctrl: error: registering OS notifier\n"); goto error; @@ -241,7 +302,6 @@ static int __init mcctrl_init(void) error: binfmt_mcexec_exit(); - rus_page_hash_put_pages(); return ret; } @@ -253,7 +313,6 @@ static void __exit mcctrl_exit(void) } binfmt_mcexec_exit(); - rus_page_hash_put_pages(); uti_attr_finalize(); printk("mcctrl: unregistered.\n"); diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 1faf7f5d..e07d67a2 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -52,6 +52,8 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet); void sig_done(unsigned long arg, int err); +void mcctrl_perf_ack(ihk_os_t os, struct ikc_scd_packet *packet); +void mcctrl_futex_wake(struct ikc_scd_packet *pisp); void mcctrl_os_read_write_cpu_response(ihk_os_t os, struct ikc_scd_packet *pisp); void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp); @@ -154,7 +156,7 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp, spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags); list_add(&desc->chain, &usrdata->wakeup_descs_list); spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags); - if (free_addrs_count) + if (do_frees) *do_frees = 0; return ret < 0 ? ret : -ETIME; } @@ -182,6 +184,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, case SCD_MSG_PREPARE_PROCESS_ACKED: case SCD_MSG_PERF_ACK: case SCD_MSG_SEND_SIGNAL_ACK: + case SCD_MSG_PROCFS_ANSWER: mcctrl_wakeup_cb(__os, pisp); break; @@ -189,11 +192,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, mcexec_syscall(usrdata, pisp); break; - case SCD_MSG_PROCFS_ANSWER: - procfs_answer(usrdata, pisp->pid); - break; - - case SCD_MSG_SYSFS_REQ_CREATE: case SCD_MSG_SYSFS_REQ_MKDIR: case SCD_MSG_SYSFS_REQ_SYMLINK: @@ -209,7 +207,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, case SCD_MSG_PROCFS_TID_CREATE: case SCD_MSG_PROCFS_TID_DELETE: - procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg); + procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg, + pisp->resp_pa); break; case SCD_MSG_GET_VDSO_INFO: @@ -225,6 +224,10 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, mcctrl_eventfd(__os, pisp); break; + case SCD_MSG_FUTEX_WAKE: + mcctrl_futex_wake(pisp); + break; + default: printk(KERN_ERR "mcctrl:syscall_packet_handler:" "unknown message (%d.%d.%d.%d.%d.%#lx)\n", diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 1761861b..811a58b8 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -67,6 +67,7 @@ #define SCD_MSG_PROCFS_DELETE 0x11 #define SCD_MSG_PROCFS_REQUEST 0x12 #define SCD_MSG_PROCFS_ANSWER 0x13 +#define SCD_MSG_PROCFS_RELEASE 0x15 #define SCD_MSG_DEBUG_LOG 0x20 @@ -101,23 +102,18 @@ #define SCD_MSG_CPU_RW_REG 0x52 #define SCD_MSG_CPU_RW_REG_RESP 0x53 +#define SCD_MSG_FUTEX_WAKE 0x60 + #define DMA_PIN_SHIFT 21 #define DO_USER_MODE #define __NR_coredump 999 -#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ struct coretable { loff_t len; unsigned long addr; }; -#else /* POSTK_DEBUG_TEMP_FIX_61 */ -struct coretable { - int len; - unsigned long addr; -}; -#endif /* POSTK_DEBUG_TEMP_FIX_61 */ enum mcctrl_os_cpu_operation { MCCTRL_OS_CPU_READ_REGISTER, @@ -125,6 +121,12 @@ enum mcctrl_os_cpu_operation { MCCTRL_OS_CPU_MAX_OP }; +/* Used to wake-up a Linux thread futex_wait()-ing */ +struct uti_futex_resp { + int done; + wait_queue_head_t wq; +}; + struct ikc_scd_packet { int msg; int err; @@ -147,7 +149,7 @@ struct ikc_scd_packet { long sysfs_arg3; }; - /* SCD_MSG_SCHEDULE_THREAD */ + /* SCD_MSG_WAKE_UP_SYSCALL_THREAD */ struct { int ttid; }; @@ -163,6 +165,12 @@ struct ikc_scd_packet { struct { int eventfd_type; }; + + /* SCD_MSG_FUTEX_WAKE */ + struct { + void *resp; + int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */ + } futex; }; char padding[8]; }; @@ -213,9 +221,12 @@ struct mcctrl_channel { }; struct mcctrl_per_thread_data { + struct mcctrl_per_proc_data *ppd; struct list_head hash; struct task_struct *task; void *data; + int tid; /* debug */ + atomic_t refcount; }; #define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8 @@ -315,6 +326,7 @@ struct mcctrl_part_exec { struct mutex lock; int nr_processes; int nr_processes_left; + int process_rank; cpumask_t cpus_used; struct list_head pli_list; }; @@ -400,10 +412,30 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp, ihk_os_t osnum_to_os(int n); +/* look up symbols, plus arch-specific ones */ +extern int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type, + unsigned long flags, void *data); +extern int (*mcctrl_sys_umount)(char *dir_name, int flags); +extern int (*mcctrl_sys_unshare)(unsigned long unshare_flags); +extern long (*mcctrl_sched_setaffinity)(pid_t pid, + const struct cpumask *in_mask); +extern int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, + int policy, + const struct sched_param *param); +extern ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf, + size_t bufsiz); +extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma, + unsigned long start, + unsigned long size, + struct zap_details *details); +extern struct inode_operations *mcctrl_hugetlbfs_inode_operations; + /* syscall.c */ void pager_add_process(void); void pager_remove_process(struct mcctrl_per_proc_data *ppd); +void pager_cleanup(void); +int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet); int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet); int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid, struct mcctrl_per_proc_data *ppd); @@ -412,20 +444,18 @@ struct mcctrl_per_proc_data *mcctrl_get_per_proc_data( struct mcctrl_usrdata *ud, int pid); void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd); -int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, - struct task_struct *task, void *data); -int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, - struct task_struct *task); +int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data *ppd, void *data); +void mcctrl_put_per_thread_data_unsafe(struct mcctrl_per_thread_data *ptd); +void mcctrl_put_per_thread_data(struct mcctrl_per_thread_data* ptd); #ifdef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */ -static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( - struct mcctrl_per_proc_data *ppd, struct task_struct *task) +inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task) { struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); unsigned long flags; - /* Check if data for this thread exists and return it */ - read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + /* Check if data for this thread exists */ + write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { if (ptd_iter->task == task) { @@ -434,16 +464,27 @@ static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( } } - read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); - return ptd ? ptd->data : NULL; + if (ptd) { + if (atomic_read(&ptd->refcount) <= 0) { + printk("%s: ERROR: use-after-free detected (%d)", __FUNCTION__, atomic_read(&ptd->refcount)); + ptd = NULL; + goto out; + } + atomic_inc(&ptd->refcount); + } + + out: + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + return ptd; } #else /* POSTK_DEBUG_ARCH_DEP_56 */ -inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( - struct mcctrl_per_proc_data *ppd, struct task_struct *task); +inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task); #endif /* POSTK_DEBUG_ARCH_DEP_56 */ +int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len); void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, long ret, int stid); +int clear_pte_range(uintptr_t start, uintptr_t len); int mcctrl_os_alive(void); @@ -455,7 +496,6 @@ struct procfs_read { int count; /* bytes to read (request) */ int eof; /* if eof is detected, 1 otherwise 0. (answer)*/ int ret; /* read bytes (answer) */ - int status; /* non-zero if done (answer) */ int newcpu; /* migrated new cpu (answer) */ int readwrite; /* 0:read, 1:write */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ @@ -468,7 +508,8 @@ struct procfs_file { }; void procfs_answer(struct mcctrl_usrdata *ud, int pid); -int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg); +int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg, + unsigned long resp_pa); void add_tid_entry(int osnum, int pid, int tid); void add_pid_entry(int osnum, int pid); void delete_tid_entry(int osnum, int pid, int tid); @@ -504,7 +545,9 @@ struct vdso { int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp); +int release_user_space(uintptr_t start, uintptr_t len); void get_vdso_info(ihk_os_t os, long vdso_pa); +int arch_symbols_init(void); struct get_cpu_mapping_req { int busy; /* INOUT: */ diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index 0f27d9e4..48b19814 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -103,33 +103,6 @@ getpath(struct procfs_list_entry *e, char *buf, int bufsize) } } -/** - * \brief Process SCD_MSG_PROCFS_ANSWER message. - * - * \param ud mcctrl_usrdata pointer - * \param pid PID of the requesting process - */ -void procfs_answer(struct mcctrl_usrdata *ud, int pid) -{ - struct mcctrl_per_proc_data *ppd = NULL; - - if (pid > 0) { - ppd = mcctrl_get_per_proc_data(ud, pid); - - if (unlikely(!ppd)) { - kprintf("%s: ERROR: no per-process structure for PID %d\n", - __FUNCTION__, pid); - return; - } - } - - wake_up_all(pid > 0 ? &ppd->wq_procfs : &ud->wq_procfs); - - if (pid > 0) { - mcctrl_put_per_proc_data(ppd); - } -} - static struct procfs_list_entry * find_procfs_entry(struct procfs_list_entry *parent, const char *name) { @@ -321,6 +294,8 @@ get_base_entry(int osnum) if(!e){ e = add_procfs_entry(NULL, name, S_IFDIR | 0555, uid, gid, NULL); + if (!e) + return NULL; e->osnum = osnum; } return e; @@ -456,6 +431,8 @@ proc_exe_link(int osnum, int pid, const char *path) e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid, path); + if (!e) + goto out; e->data = kmalloc(strlen(path) + 1, GFP_KERNEL); strcpy(e->data, path); task = find_procfs_entry(parent, "task"); @@ -464,6 +441,7 @@ proc_exe_link(int osnum, int pid, const char *path) uid, gid, path); } } +out: up(&procfs_file_list_lock); } @@ -509,7 +487,6 @@ procfs_exit(int osnum) * This function conforms to the 2) way of fs/proc/generic.c * from linux-2.6.39.4. */ -#ifdef POSTK_DEBUG_TEMP_FIX_43 /* Fixed an issue that failed pread / pwrite of size larger than 4MB */ static ssize_t __mckernel_procfs_read_write( struct file *file, char __user *buf, size_t nbytes, @@ -520,7 +497,7 @@ static ssize_t __mckernel_procfs_read_write( int order = 0; volatile struct procfs_read *r = NULL; struct ikc_scd_packet isp; - int ret, osnum, pid, retw; + int ret, osnum, pid; unsigned long pbuf; size_t count = nbytes; size_t copy_size = 0; @@ -615,11 +592,11 @@ static ssize_t __mckernel_procfs_read_write( while (count > 0) { int this_len = min_t(ssize_t, count, copy_size); + int do_free; r->pbuf = pbuf; r->eof = 0; r->ret = -EIO; /* default */ - r->status = 0; r->offset = offset; r->count = this_len; r->readwrite = read_write; @@ -629,50 +606,26 @@ static ssize_t __mckernel_procfs_read_write( isp.arg = virt_to_phys(r); isp.pid = pid; - ret = mcctrl_ikc_send(osnum_to_os(e->osnum), - (pid > 0) ? ppd->ikc_target_cpu : 0, &isp); + ret = mcctrl_ikc_send_wait(osnum_to_os(e->osnum), + (pid > 0) ? ppd->ikc_target_cpu : 0, + &isp, HZ, NULL, &do_free, 1, r); + + if (!do_free && ret >= 0) { + ret = -EIO; + } if (ret < 0) { - goto out; /* error */ - } - - /* Wait for a reply. */ - ret = -EIO; /* default exit code */ - dprintk("%s: waiting for reply\n", __FUNCTION__); - -retry_wait: - /* Wait for the status field of the procfs_read structure, - * wait on per-process or OS specific data depending on - * who the request is for. - */ - if (pid > 0) { - retw = wait_event_interruptible_timeout(ppd->wq_procfs, - r->status != 0, HZ); - } - else { - retw = wait_event_interruptible_timeout(udp->wq_procfs, - r->status != 0, HZ); - } - - /* Timeout? */ - if (retw == 0 && r->status == 0) { - printk("%s: error: timeout (1 sec)\n", __FUNCTION__); + if (ret == -ETIME) { + pr_info("%s: error: timeout (1 sec)\n", + __func__); + } + else if (ret == -ERESTARTSYS) { + ret = -ERESTART; + } + if (!do_free) + r = NULL; goto out; } - /* Interrupted? */ - else if (retw == -ERESTARTSYS) { - ret = -ERESTART; - goto out; - } - /* Were we woken up by a reply to another procfs request? */ - else if (r->status == 0) { - /* TODO: r->status is not set atomically, we could be woken - * up with status == 0 and it could change to 1 while in this - * code, we could potentially miss the wake_up()... - */ - printk("%s: stale wake-up, retrying\n", __FUNCTION__); - goto retry_wait; - } /* Wake up and check the result. */ dprintk("%s: woke up. ret: %d, eof: %d\n", @@ -717,193 +670,6 @@ out: return ret; } -#else /* POSTK_DEBUG_TEMP_FIX_43 */ -static ssize_t __mckernel_procfs_read_write( - struct file *file, - char __user *buf, size_t nbytes, - loff_t *ppos, int read_write) -{ - struct inode * inode = file->f_inode; - char *kern_buffer = NULL; - int order = 0; - volatile struct procfs_read *r = NULL; - struct ikc_scd_packet isp; - int ret, osnum, pid, retw; - unsigned long pbuf; - unsigned long count = nbytes; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - struct proc_dir_entry *dp = PDE(inode); - struct procfs_list_entry *e = dp->data; -#else - struct procfs_list_entry *e = PDE_DATA(inode); -#endif - loff_t offset = *ppos; - char pathbuf[PROCFS_NAME_MAX]; - char *path, *p; - ihk_os_t os = NULL; - struct mcctrl_usrdata *udp = NULL; - struct mcctrl_per_proc_data *ppd = NULL; - - if (count <= 0 || offset < 0) { - return 0; - } - - path = getpath(e, pathbuf, PROCFS_NAME_MAX); - dprintk("%s: invoked for %s, offset: %lu, count: %lu\n", - __FUNCTION__, path, - (unsigned long)offset, count); - - /* Verify OS number */ - ret = sscanf(path, "mcos%d/", &osnum); - if (ret != 1) { - printk("%s: error: couldn't determine OS number\n", __FUNCTION__); - return -EINVAL; - } - - if (osnum != e->osnum) { - printk("%s: error: OS numbers don't match\n", __FUNCTION__); - return -EINVAL; - } - - /* Is this request for a specific process? */ - p = strchr(path, '/') + 1; - ret = sscanf(p, "%d/", &pid); - if (ret != 1) { - pid = -1; - } - - os = osnum_to_os(osnum); - if (!os) { - printk("%s: error: no IHK OS data found for OS %d\n", - __FUNCTION__, osnum); - return -EINVAL; - } - - udp = ihk_host_os_get_usrdata(os); - if (!udp) { - printk("%s: error: no MCCTRL data found for OS %d\n", - __FUNCTION__, osnum); - return -EINVAL; - } - - if (pid > 0) { - ppd = mcctrl_get_per_proc_data(udp, pid); - - if (unlikely(!ppd)) { - printk("%s: error: no per-process structure for PID %d", - __FUNCTION__, pid); - return -EINVAL; - } - } - - while ((1 << order) < count) ++order; - if (order > 12) { - order -= 12; - } - else { - order = 1; - } - - /* NOTE: we need physically contigous memory to pass through IKC */ - kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order); - if (!kern_buffer) { - printk("%s: ERROR: allocating kernel buffer\n", __FUNCTION__); - ret = -ENOMEM; - goto out; - } - - pbuf = virt_to_phys(kern_buffer); - - r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL); - if (r == NULL) { - ret = -ENOMEM; - goto out; - } - r->pbuf = pbuf; - r->eof = 0; - r->ret = -EIO; /* default */ - r->status = 0; - r->offset = offset; - r->count = count; - r->readwrite = read_write; - strncpy((char *)r->fname, path, PROCFS_NAME_MAX); - isp.msg = SCD_MSG_PROCFS_REQUEST; - isp.ref = 0; - isp.arg = virt_to_phys(r); - isp.pid = pid; - - ret = mcctrl_ikc_send(osnum_to_os(e->osnum), - (pid > 0) ? ppd->ikc_target_cpu : 0, &isp); - - if (ret < 0) { - goto out; /* error */ - } - - /* Wait for a reply. */ - ret = -EIO; /* default exit code */ - dprintk("%s: waiting for reply\n", __FUNCTION__); - -retry_wait: - /* Wait for the status field of the procfs_read structure, - * wait on per-process or OS specific data depending on - * who the request is for. - */ - if (pid > 0) { - retw = wait_event_interruptible_timeout(ppd->wq_procfs, - r->status != 0, 5 * HZ); - } - else { - retw = wait_event_interruptible_timeout(udp->wq_procfs, - r->status != 0, 5 * HZ); - } - - /* Timeout? */ - if (retw == 0 && r->status == 0) { - printk("%s: error: timeout (1 sec)\n", __FUNCTION__); - goto out; - } - /* Interrupted? */ - else if (retw == -ERESTARTSYS) { - ret = -ERESTART; - goto out; - } - /* Were we woken up by a reply to another procfs request? */ - else if (r->status == 0) { - /* TODO: r->status is not set atomically, we could be woken - * up with status == 0 and it could change to 1 while in this - * code, we could potentially miss the wake_up()... - */ - printk("%s: stale wake-up, retrying\n", __FUNCTION__); - goto retry_wait; - } - - /* Wake up and check the result. */ - dprintk("%s: woke up. ret: %d, eof: %d\n", - __FUNCTION__, r->ret, r->eof); - - if (r->ret > 0) { - if (read_write == 0) { - if (copy_to_user(buf, kern_buffer, r->ret)) { - printk("%s: ERROR: copy_to_user failed.\n", __FUNCTION__); - ret = -EFAULT; - goto out; - } - } - *ppos += r->ret; - } - ret = r->ret; - -out: - if (ppd) - mcctrl_put_per_proc_data(ppd); - if (kern_buffer) - free_pages((uintptr_t)kern_buffer, order); - if (r) - kfree((void *)r); - - return ret; -} -#endif /* POSTK_DEBUG_TEMP_FIX_43 */ static ssize_t mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) @@ -939,33 +705,48 @@ struct procfs_work { int msg; int pid; unsigned long arg; + unsigned long resp_pa; struct work_struct work; }; static void procfsm_work_main(struct work_struct *work0) { struct procfs_work *work = container_of(work0, struct procfs_work, work); + unsigned long phys; + int *done; switch (work->msg) { - case SCD_MSG_PROCFS_TID_CREATE: - add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg); - break; + case SCD_MSG_PROCFS_TID_CREATE: + add_tid_entry(ihk_host_os_get_index(work->os), + work->pid, work->arg); + phys = ihk_device_map_memory(ihk_os_to_dev(work->os), + work->resp_pa, sizeof(int)); + done = ihk_device_map_virtual(ihk_os_to_dev(work->os), + phys, sizeof(int), NULL, 0); + *done = 1; + ihk_device_unmap_virtual(ihk_os_to_dev(work->os), + done, sizeof(int)); + ihk_device_unmap_memory(ihk_os_to_dev(work->os), + phys, sizeof(int)); + break; - case SCD_MSG_PROCFS_TID_DELETE: - delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg); - break; + case SCD_MSG_PROCFS_TID_DELETE: + delete_tid_entry(ihk_host_os_get_index(work->os), + work->pid, work->arg); + break; - default: - printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n", - __FUNCTION__, work->msg, work->pid, work->arg); - break; + default: + pr_warn("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n", + __func__, work->msg, work->pid, work->arg); + break; } kfree(work); return; } -int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) +int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg, + unsigned long resp_pa) { struct procfs_work *work = NULL; @@ -979,6 +760,7 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg) work->msg = msg; work->pid = pid; work->arg = arg; + work->resp_pa = resp_pa; INIT_WORK(&work->work, &procfsm_work_main); schedule_work(&work->work); @@ -997,6 +779,303 @@ static const struct file_operations mckernel_forward = { .write = mckernel_procfs_write, }; +#define PA_NULL (-1L) + +struct mckernel_procfs_buffer_info { + unsigned long top_pa; + unsigned long cur_pa; + ihk_os_t os; + int pid; + char path[0]; +}; + +struct mckernel_procfs_buffer { + unsigned long next_pa; + unsigned long pos; + unsigned long size; + char buf[0]; +}; + +static int mckernel_procfs_buff_open(struct inode *inode, struct file *file) +{ + struct mckernel_procfs_buffer_info *info; + int pid; + int ret; + char *path; + char *path_buf; + char *p; + ihk_os_t os; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) + struct proc_dir_entry *dp = PDE(inode); + struct procfs_list_entry *e = dp->data; +#else + struct procfs_list_entry *e = PDE_DATA(inode); +#endif + + os = osnum_to_os(e->osnum); + if (!os) { + return -EINVAL; + } + path_buf = kmalloc(PROCFS_NAME_MAX, GFP_KERNEL); + if (!path_buf) { + return -ENOMEM; + } + path = getpath(e, path_buf, PROCFS_NAME_MAX); + p = strchr(path, '/') + 1; + ret = sscanf(p, "%d/", &pid); + if (ret != 1) { + pid = -1; + } + + info = kmalloc(sizeof(struct mckernel_procfs_buffer_info) + + strlen(path) + 1, GFP_KERNEL); + if (!info) { + kfree(path_buf); + return -ENOMEM; + } + info->top_pa = PA_NULL; + info->cur_pa = PA_NULL; + info->os = os; + info->pid = pid; + strcpy(info->path, path); + file->private_data = info; + + kfree(path_buf); + return 0; +} + +static int mckernel_procfs_buff_release(struct inode *inode, struct file *file) +{ + struct mckernel_procfs_buffer_info *info = file->private_data; + int rc = 0; + + if (!info) { + return -EIO; + } + + file->private_data = NULL; + if (info->top_pa != PA_NULL) { + int ret; + struct procfs_read *r = NULL; + struct ikc_scd_packet isp; + int do_free; + + r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL); + if (r == NULL) { + rc = -ENOMEM; + goto out; + } + memset(r, '\0', sizeof(struct procfs_read)); + r->pbuf = info->top_pa; + r->ret = -EIO; /* default */ + r->fname[0] = '\0'; + isp.msg = SCD_MSG_PROCFS_RELEASE; + isp.ref = 0; + isp.arg = virt_to_phys(r); + isp.pid = 0; + + rc = -EIO; + ret = mcctrl_ikc_send_wait(info->os, 0, + &isp, 5 * HZ, NULL, &do_free, 1, r); + + if (!do_free && ret >= 0) { + ret = -EIO; + } + + if (ret < 0) { + rc = ret; + if (ret == -ETIME) { + pr_info("%s: error: timeout (1 sec)\n", + __func__); + } + else if (ret == -ERESTARTSYS) { + rc = -ERESTART; + } + if (!do_free) + r = NULL; + goto out; + } + + if (r->ret < 0) { + rc = r->ret; + goto out; + } + rc = 0; +out: + if (r) + kfree((void *)r); + } + kfree(info); + return rc; +} + +static ssize_t mckernel_procfs_buff_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + struct mckernel_procfs_buffer_info *info = file->private_data; + unsigned long phys; + struct mckernel_procfs_buffer *buf; + int pos = *ppos; + ssize_t l = 0; + int done = 0; + ihk_os_t os; + + if (nbytes <= 0 || *ppos < 0) { + return 0; + } + + if (!info) { + return -EIO; + } + + os = info->os; + if (info->top_pa == PA_NULL) { + int ret; + int pid = info->pid; + struct procfs_read *r = NULL; + struct ikc_scd_packet isp; + struct mcctrl_usrdata *udp = NULL; + struct mcctrl_per_proc_data *ppd = NULL; + int do_free; + + udp = ihk_host_os_get_usrdata(os); + if (!udp) { + pr_err("%s: no MCCTRL data found for OS\n", + __func__); + return -EINVAL; + } + + if (pid > 0) { + ppd = mcctrl_get_per_proc_data(udp, pid); + + if (unlikely(!ppd)) { + pr_err("%s: no per-process structure for PID %d", + __func__, pid); + return -EINVAL; + } + } + + r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL); + if (r == NULL) { + l = -ENOMEM; + done = 1; + goto out; + } + memset(r, '\0', sizeof(struct procfs_read)); + r->pbuf = PA_NULL; + r->ret = -EIO; /* default */ + strncpy((char *)r->fname, info->path, PROCFS_NAME_MAX); + isp.msg = SCD_MSG_PROCFS_REQUEST; + isp.ref = 0; + isp.arg = virt_to_phys(r); + isp.pid = pid; + + l = -EIO; + done = 1; + ret = mcctrl_ikc_send_wait(os, + (pid > 0) ? ppd->ikc_target_cpu : 0, + &isp, 5 * HZ, NULL, &do_free, 1, r); + + if (!do_free && ret >= 0) { + ret = -EIO; + } + + if (ret < 0) { + l = ret; + if (ret == -ETIME) { + pr_info("%s: error: timeout (1 sec)\n", + __func__); + } + else if (ret == -ERESTARTSYS) { + l = -ERESTART; + } + if (!do_free) + r = NULL; + goto out; + } + + if (r->ret < 0) { + l = r->ret; + goto out; + } + + done = 0; + l = 0; + info->top_pa = info->cur_pa = r->pbuf; + +out: + if (ppd) + mcctrl_put_per_proc_data(ppd); + if (r) + kfree((void *)r); + } + + if (info->cur_pa == PA_NULL) { + info->cur_pa = info->top_pa; + } + + while (!done && info->cur_pa != PA_NULL) { + long bpos; + long bsize; + + phys = ihk_device_map_memory(ihk_os_to_dev(os), info->cur_pa, + PAGE_SIZE); +#ifdef CONFIG_MIC + buf = ioremap_wc(phys, PAGE_SIZE); +#else + buf = ihk_device_map_virtual(ihk_os_to_dev(os), phys, + PAGE_SIZE, NULL, 0); +#endif + + if (pos < buf->pos) { + info->cur_pa = info->top_pa; + goto rep; + } + + if (pos >= buf->pos + buf->size) { + info->cur_pa = buf->next_pa; + goto rep; + } + + bpos = pos - buf->pos; + bsize = (buf->pos + buf->size) - pos; + if (bsize > (nbytes - l)) { + bsize = nbytes - l; + } + if (copy_to_user(ubuf, buf->buf + bpos, bsize)) { + done = 1; + pos = *ppos; + l = -EFAULT; + } + else { + ubuf += bsize; + pos += bsize; + l += bsize; + if (l == nbytes) { + done = 1; + } + } +rep: +#ifdef CONFIG_MIC + iounmap(buf); +#else + ihk_device_unmap_virtual(ihk_os_to_dev(os), buf, PAGE_SIZE); +#endif + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + }; + + *ppos = pos; + return l; +} + +static const struct file_operations mckernel_buff_io = { + .llseek = mckernel_procfs_lseek, + .read = mckernel_procfs_buff_read, + .write = NULL, + .open = mckernel_procfs_buff_open, + .release = mckernel_procfs_buff_release, +}; + static const struct procfs_entry tid_entry_stuff[] = { // PROC_REG("auxv", S_IRUSR, NULL), // PROC_REG("clear_refs", S_IWUSR, NULL), @@ -1006,10 +1085,10 @@ static const struct procfs_entry tid_entry_stuff[] = { // PROC_LNK("exe", mckernel_readlink), // PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), // PROC_REG("maps", S_IRUGO, NULL), - PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), + PROC_REG("mem", 0600, NULL), // PROC_REG("pagemap", S_IRUGO, NULL), // PROC_REG("smaps", S_IRUGO, NULL), - PROC_REG("stat", S_IRUGO, NULL), + PROC_REG("stat", 0444, &mckernel_buff_io), // PROC_REG("statm", S_IRUGO, NULL), // PROC_REG("status", S_IRUGO, NULL), // PROC_REG("syscall", S_IRUGO, NULL), @@ -1018,26 +1097,26 @@ static const struct procfs_entry tid_entry_stuff[] = { }; static const struct procfs_entry pid_entry_stuff[] = { - PROC_REG("auxv", S_IRUSR, NULL), + PROC_REG("auxv", 0400, &mckernel_buff_io), /* Support the case where McKernel process retrieves its job-id under the Fujitsu TCS suite. */ // PROC_REG("cgroup", S_IXUSR, NULL), // PROC_REG("clear_refs", S_IWUSR, NULL), - PROC_REG("cmdline", S_IRUGO, NULL), -// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), + PROC_REG("cmdline", 0444, &mckernel_buff_io), + PROC_REG("comm", 0644, &mckernel_buff_io), // PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL), - PROC_REG("cpuset", S_IXUSR, NULL), +// PROC_REG("cpuset", S_IRUGO, NULL), // PROC_REG("environ", S_IRUSR, NULL), // PROC_LNK("exe", mckernel_readlink), // PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), - PROC_REG("maps", S_IRUGO, NULL), - PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), - PROC_REG("pagemap", S_IRUGO, NULL), - PROC_REG("smaps", S_IRUGO, NULL), -// PROC_REG("stat", S_IRUGO, NULL), + PROC_REG("maps", 0444, &mckernel_buff_io), + PROC_REG("mem", 0400, NULL), + PROC_REG("pagemap", 0444, NULL), +// PROC_REG("smaps", S_IRUGO, NULL), +// PROC_REG("stat", 0444, &mckernel_buff_io), // PROC_REG("statm", S_IRUGO, NULL), - PROC_REG("status", S_IRUGO, NULL), + PROC_REG("status", 0444, &mckernel_buff_io), // PROC_REG("syscall", S_IRUGO, NULL), - PROC_DIR("task", S_IRUGO|S_IXUGO), + PROC_DIR("task", 0555), // PROC_REG("wchan", S_IRUGO, NULL), PROC_TERM }; @@ -1045,14 +1124,14 @@ static const struct procfs_entry pid_entry_stuff[] = { static const struct procfs_entry base_entry_stuff[] = { // PROC_REG("cmdline", S_IRUGO, NULL), #ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ - PROC_REG("cpuinfo", S_IRUGO, NULL), + PROC_REG("cpuinfo", 0444, &mckernel_buff_io), #else /* POSTK_DEBUG_ARCH_DEP_42 */ // PROC_REG("cpuinfo", S_IRUGO, NULL), #endif /* POSTK_DEBUG_ARCH_DEP_42 */ // PROC_REG("meminfo", S_IRUGO, NULL), // PROC_REG("pagetypeinfo",S_IRUGO, NULL), // PROC_REG("softirq", S_IRUGO, NULL), - PROC_REG("stat", S_IRUGO, NULL), + PROC_REG("stat", 0444, &mckernel_buff_io), // PROC_REG("uptime", S_IRUGO, NULL), // PROC_REG("version", S_IRUGO, NULL), // PROC_REG("vmallocinfo",S_IRUSR, NULL), diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index f06ec042..1db62fda 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -63,17 +63,21 @@ #define dprintk(...) #endif -#ifdef MCCTRL_KSYM_zap_page_range -static void -(*mcctrl_zap_page_range)(struct vm_area_struct *vma, unsigned long start, - unsigned long size, struct zap_details *details) -#if MCCTRL_KSYM_zap_page_range - = (void *)MCCTRL_KSYM_zap_page_range; +//#define DEBUG_PTD +#ifdef DEBUG_PTD +#define pr_ptd(msg, tid, ptd) do { printk("%s: " msg ",tid=%d,refc=%d\n", __FUNCTION__, tid, atomic_read(&ptd->refcount)); } while(0) #else - = &zap_page_range; -#endif +#define pr_ptd(msg, tid, ptd) do { } while(0) #endif +//#define DEBUG_PPD +#ifdef DEBUG_PPD +#define pr_ppd(msg, tid, ppd) do { printk("%s: " msg ",tid=%d,refc=%d\n", __FUNCTION__, tid, atomic_read(&ppd->refcount)); } while(0) +#else +#define pr_ppd(msg, tid, ppd) do { } while(0) +#endif + +static long pager_call_irq(ihk_os_t os, struct syscall_request *req); static long pager_call(ihk_os_t os, struct syscall_request *req); #ifdef SC_DEBUG @@ -89,75 +93,90 @@ static void print_dma_lastreq(void) } #endif -int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, - struct task_struct *task, void *data) +void mcctrl_put_per_thread_data_unsafe(struct mcctrl_per_thread_data *ptd) { - struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; - struct mcctrl_per_thread_data *ptd_alloc = NULL; - int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); - int ret = 0; - unsigned long flags; - - ptd_alloc = kmalloc(sizeof(*ptd), GFP_ATOMIC); - if (!ptd_alloc) { - kprintf("%s: error allocate per thread data\n", __FUNCTION__); - ret = -ENOMEM; - goto out_noalloc; - } - - /* Check if data for this thread exists and add if not */ - write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); - list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { - if (ptd_iter->task == task) { - ptd = ptd_iter; - break; + if (!atomic_dec_and_test(&ptd->refcount)) { + int ret = atomic_read(&ptd->refcount); + if (ret < 0) { + printk("%s: ERROR: invalid refcount=%d\n", __FUNCTION__, ret); } + return; } - if (unlikely(ptd)) { - ret = -EBUSY; - kfree(ptd_alloc); - goto out; - } - - ptd = ptd_alloc; - ptd->task = task; - ptd->data = data; - list_add_tail(&ptd->hash, &ppd->per_thread_data_hash[hash]); - -out: - write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); -out_noalloc: - return ret; + list_del(&ptd->hash); + kfree(ptd); } -int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, - struct task_struct *task) +void mcctrl_put_per_thread_data(struct mcctrl_per_thread_data* _ptd) { + struct mcctrl_per_proc_data *ppd = _ptd->ppd; struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; - int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); - int ret = 0; + int hash = (((uint64_t)_ptd->task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); unsigned long flags; - + /* Check if data for this thread exists and delete it */ write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { - if (ptd_iter->task == task) { + if (ptd_iter->task == _ptd->task) { ptd = ptd_iter; break; } } if (!ptd) { - ret = -EINVAL; + printk("%s: ERROR: ptd not found\n", __FUNCTION__); goto out; } - list_del(&ptd->hash); - kfree(ptd); - + mcctrl_put_per_thread_data_unsafe(ptd); + out: write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); +} + +int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data *ppd, void *data) +{ + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + struct mcctrl_per_thread_data *ptd_alloc = NULL; + int hash = (((uint64_t)current >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + int ret = 0; + unsigned long flags; + + ptd_alloc = kmalloc(sizeof(struct mcctrl_per_thread_data), GFP_ATOMIC); + if (!ptd_alloc) { + kprintf("%s: error allocate per thread data\n", __FUNCTION__); + ret = -ENOMEM; + goto out_noalloc; + } + memset(ptd_alloc, 0, sizeof(struct mcctrl_per_thread_data)); + + /* Check if data for this thread exists and add if not */ + write_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == current) { + ptd = ptd_iter; + break; + } + } + + if (unlikely(ptd)) { + kprintf("%s: WARNING: ptd of tid: %d exists\n", __FUNCTION__, task_pid_vnr(current)); + ret = -EBUSY; + kfree(ptd_alloc); + goto out; + } + + ptd = ptd_alloc; + ptd->ppd = ppd; + ptd->task = current; + ptd->tid = task_pid_vnr(current); + ptd->data = data; + atomic_set(&ptd->refcount, 1); + list_add_tail(&ptd->hash, &ppd->per_thread_data_hash[hash]); + + out: + write_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + out_noalloc: return ret; } @@ -168,7 +187,7 @@ struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); unsigned long flags; - /* Check if data for this thread exists and return it */ + /* Check if data for this thread exists */ read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { @@ -178,8 +197,18 @@ struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc } } + if (ptd) { + if (atomic_read(&ptd->refcount) <= 0) { + printk("%s: ERROR: use-after-free detected (%d)", __FUNCTION__, atomic_read(&ptd->refcount)); + ptd = NULL; + goto out; + } + atomic_inc(&ptd->refcount); + } + + out: read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); - return ptd ? ptd->data : NULL; + return ptd; } #endif /* !POSTK_DEBUG_ARCH_DEP_56 */ @@ -270,10 +299,21 @@ static int __notify_syscall_requester(ihk_os_t os, struct ikc_scd_packet *packet IHK_SCD_REQ_THREAD_SPINNING, IHK_SCD_REQ_THREAD_TO_BE_WOKEN)) { dprintk("%s: no need to send IKC message for PID %d\n", - __FUNCTION__, packet->pid); + __FUNCTION__, packet->pid); return ret; } + /* Wait until the status goes back to IHK_SCD_REQ_THREAD_SPINNING or + IHK_SCD_REQ_THREAD_DESCHEDULED because two wake-up attempts are competing. + Note that mcexec_terminate_thread() and remote page fault and + returning EINTR would compete. */ + if (res->req_thread_status == IHK_SCD_REQ_THREAD_TO_BE_WOKEN) { + printk("%s: INFO: someone else is waking up the McKernel thread, " + "pid: %d, req status: %lu, syscall nr: %lu\n", + __FUNCTION__, packet->pid, + res->req_thread_status, packet->req.number); + } + /* The thread is not spinning any more, make sure it's descheduled */ if (!__sync_bool_compare_and_swap(&res->req_thread_status, IHK_SCD_REQ_THREAD_DESCHEDULED, @@ -301,15 +341,18 @@ long syscall_backward(struct mcctrl_usrdata *usrdata, int num, unsigned long *ret) { struct ikc_scd_packet *packet; + struct ikc_scd_packet *free_packet = NULL; struct syscall_request *req; struct syscall_response *resp; unsigned long syscall_ret; struct wait_queue_head_list_node *wqhln; unsigned long irqflags; struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd; unsigned long phys; struct syscall_request _request[2]; struct syscall_request *request; + int retry; if (((unsigned long)_request ^ (unsigned long)(_request + 1)) & ~(PAGE_SIZE -1)) @@ -334,7 +377,14 @@ long syscall_backward(struct mcctrl_usrdata *usrdata, int num, return -EINVAL; } - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + printk("%s: ERROR: mcctrl_get_per_thread_data failed\n", __FUNCTION__); + syscall_ret = -ENOENT; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); + packet = (struct ikc_scd_packet *)ptd->data; if (!packet) { syscall_ret = -ENOENT; printk("%s: no packet registered for TID %d\n", @@ -356,6 +406,7 @@ retry_alloc: printk("WARNING: coudln't alloc wait queue head, retrying..\n"); goto retry_alloc; } + memset(wqhln, 0, sizeof(struct wait_queue_head_list_node)); /* Prepare per-thread wait queue head */ wqhln->task = current; @@ -385,30 +436,52 @@ retry_alloc: mb(); resp->status = STATUS_SYSCALL; + retry = 0; + retry_offload: dprintk("%s: tid: %d, syscall: %d SLEEPING\n", __FUNCTION__, task_pid_vnr(current), num); /* wait for response */ syscall_ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); + /* debug */ + if (syscall_ret == -ERESTARTSYS) { + printk("%s: INFO: interrupted by signal\n", __FUNCTION__); + retry++; + if (retry < 5) { + printk("%s: INFO: retry=%d\n", __FUNCTION__, retry); + goto retry_offload; + } + } + /* Remove per-thread wait queue head */ irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags); - dprintk("%s: tid: %d, syscall: %d WOKEN UP\n", - __FUNCTION__, task_pid_vnr(current), num); + dprintk("%s: tid: %d, syscall: %d WOKEN UP\n", + __FUNCTION__, task_pid_vnr(current), num); + + if (retry >= 5) { + kfree(wqhln); + kprintf("%s: INFO: mcexec is gone or retry count exceeded,pid=%d,ppd=%p,retry=%d\n", __FUNCTION__, task_tgid_vnr(current), ppd, retry); + syscall_ret = -EINVAL; + goto out; + } if (syscall_ret) { kfree(wqhln); + printk("%s: ERROR: wait_event_interruptible returned %ld\n", __FUNCTION__, syscall_ret); goto out; } else { unsigned long phys2; struct syscall_response *resp2; - /* Update packet reference */ + /* Note that wqhln->packet is a new packet */ packet = wqhln->packet; + free_packet = packet; req = &packet->req; + phys2 = ihk_device_map_memory(ihk_os_to_dev(usrdata->os), packet->resp_pa, sizeof(*resp)); resp2 = ihk_device_map_virtual(ihk_os_to_dev(usrdata->os), @@ -433,28 +506,26 @@ retry_alloc: syscall_ret = -EIO; goto out; } -#define PAGER_REQ_RESUME 0x0101 - else if (req->args[0] != PAGER_REQ_RESUME) { - resp->ret = pager_call(usrdata->os, (void *)req); - if (__notify_syscall_requester(usrdata->os, packet, resp) < 0) { - printk("%s: WARNING: failed to notify PID %d\n", - __FUNCTION__, packet->pid); - } - - mb(); - } - else { - *ret = req->args[1]; - } + *ret = req->args[1]; kfree(wqhln); syscall_ret = 0; out: + /* Release packet sent from McKernel */ + if (free_packet) { + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)free_packet, + (usrdata->ikc2linux[smp_processor_id()] ? + usrdata->ikc2linux[smp_processor_id()] : + usrdata->ikc2linux[0])); + } ihk_device_unmap_virtual(ihk_os_to_dev(usrdata->os), resp, sizeof(*resp)); ihk_device_unmap_memory(ihk_os_to_dev(usrdata->os), phys, sizeof(*resp)); out_put_ppd: + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", task_pid_vnr(current), ptd); + no_ptd: dprintk("%s: tid: %d, syscall: %d, syscall_ret: %lx\n", __FUNCTION__, task_pid_vnr(current), num, syscall_ret); @@ -462,7 +533,24 @@ out_put_ppd: return syscall_ret; } -static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) +#if 0 /* debug */ +/* Info of Linux counterpart of migrated-to-Linux thread */ +struct host_thread { + struct host_thread *next; + struct mcos_handler_info *handler; + int pid; + int tid; + unsigned long usp; + unsigned long lfs; + unsigned long rfs; + struct task_struct *task; +}; + +extern struct host_thread *host_threads; +extern rwlock_t host_thread_lock; +#endif + +int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason) { struct ikc_scd_packet *packet; struct ikc_scd_packet *free_packet = NULL; @@ -472,7 +560,9 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u struct wait_queue_head_list_node *wqhln; unsigned long irqflags; struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd; unsigned long phys; + int retry; dprintk("%s: tid: %d, fault_addr: %p, reason: %lu\n", __FUNCTION__, task_pid_vnr(current), fault_addr, (unsigned long)reason); @@ -486,11 +576,18 @@ static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, u return -EINVAL; } - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); - if (!packet) { + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + printk("%s: ERROR: mcctrl_get_per_thread_data failed\n", __FUNCTION__); error = -ENOENT; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); + packet = (struct ikc_scd_packet *)ptd->data; + if (!packet) { printk("%s: no packet registered for TID %d\n", __FUNCTION__, task_pid_vnr(current)); + error = -ENOENT; goto out_put_ppd; } @@ -514,6 +611,7 @@ retry_alloc: printk("WARNING: coudln't alloc wait queue head, retrying..\n"); goto retry_alloc; } + memset(wqhln, 0, sizeof(struct wait_queue_head_list_node)); /* Prepare per-thread wait queue head */ wqhln->task = current; @@ -546,12 +644,23 @@ retry_alloc: mb(); resp->status = STATUS_PAGE_FAULT; + retry = 0; for (;;) { dprintk("%s: tid: %d, fault_addr: %p SLEEPING\n", __FUNCTION__, task_pid_vnr(current), fault_addr); /* wait for response */ error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); - + + /* Delay signal handling */ + if (error == -ERESTARTSYS) { + printk("%s: INFO: interrupted by signal\n", __FUNCTION__); + retry++; + if (retry < 5) { /* mcexec is alive */ + printk("%s: INFO: retry=%d\n", __FUNCTION__, retry); + continue; + } + } + /* Remove per-thread wait queue head */ irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock); list_del(&wqhln->list); @@ -560,6 +669,13 @@ retry_alloc: dprintk("%s: tid: %d, fault_addr: %p WOKEN UP\n", __FUNCTION__, task_pid_vnr(current), fault_addr); + if (retry >= 5) { + kfree(wqhln); + kprintf("%s: INFO: mcexec is gone or retry count exceeded,pid=%d,retry=%d\n", __FUNCTION__, task_tgid_vnr(current), retry); + error = -EINVAL; + goto out; + } + if (error) { kfree(wqhln); printk("remote_page_fault:interrupted. %d\n", error); @@ -627,15 +743,20 @@ retry_alloc: error = 0; out: /* Release remote page-fault response packet */ - ihk_ikc_release_packet((struct ihk_ikc_free_packet *)free_packet, - (usrdata->ikc2linux[smp_processor_id()] ? - usrdata->ikc2linux[smp_processor_id()] : - usrdata->ikc2linux[0])); + if (free_packet) { + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)free_packet, + (usrdata->ikc2linux[smp_processor_id()] ? + usrdata->ikc2linux[smp_processor_id()] : + usrdata->ikc2linux[0])); + } ihk_device_unmap_virtual(ihk_os_to_dev(usrdata->os), resp, sizeof(*resp)); ihk_device_unmap_memory(ihk_os_to_dev(usrdata->os), phys, sizeof(*resp)); out_put_ppd: + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", task_pid_vnr(current), ptd); + no_ptd: dprintk("%s: tid: %d, fault_addr: %p, reason: %lu, error: %d\n", __FUNCTION__, task_pid_vnr(current), fault_addr, (unsigned long)reason, error); @@ -643,111 +764,6 @@ out_put_ppd: return error; } -#define RUS_PAGE_HASH_SHIFT 8 -#define RUS_PAGE_HASH_SIZE (1UL << RUS_PAGE_HASH_SHIFT) -#define RUS_PAGE_HASH_MASK (RUS_PAGE_HASH_SIZE - 1) - -struct list_head rus_page_hash[RUS_PAGE_HASH_SIZE]; -spinlock_t rus_page_hash_lock; - -struct rus_page { - struct list_head hash; - struct page *page; - int refcount; - int put_page; -}; - -void rus_page_hash_init(void) -{ - int i; - - spin_lock_init(&rus_page_hash_lock); - for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) { - INIT_LIST_HEAD(&rus_page_hash[i]); - } -} - -/* rus_page_hash_lock must be held */ -struct rus_page *_rus_page_hash_lookup(struct page *page) -{ - struct rus_page *rp = NULL; - struct rus_page *rp_iter; - - list_for_each_entry(rp_iter, - &rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK], hash) { - - if (rp_iter->page != page) - continue; - - rp = rp_iter; - break; - } - - return rp; -} - - -static int rus_page_hash_insert(struct page *page) -{ - int ret = 0; - struct rus_page *rp; - unsigned long flags; - - spin_lock_irqsave(&rus_page_hash_lock, flags); - - rp = _rus_page_hash_lookup(page); - if (!rp) { - rp = kmalloc(sizeof(*rp), GFP_ATOMIC); - - if (!rp) { - printk("rus_page_add_hash(): error allocating rp\n"); - ret = -ENOMEM; - goto out; - } - - rp->page = page; - rp->put_page = 0; - - get_page(page); - - rp->refcount = 0; /* Will be increased below */ - - list_add_tail(&rp->hash, - &rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK]); - } - - ++rp->refcount; - - -out: - spin_unlock_irqrestore(&rus_page_hash_lock, flags); - return ret; -} - -void rus_page_hash_put_pages(void) -{ - int i; - struct rus_page *rp_iter; - struct rus_page *rp_iter_next; - unsigned long flags; - - spin_lock_irqsave(&rus_page_hash_lock, flags); - - for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) { - - list_for_each_entry_safe(rp_iter, rp_iter_next, - &rus_page_hash[i], hash) { - list_del(&rp_iter->hash); - - put_page(rp_iter->page); - kfree(rp_iter); - } - } - - spin_unlock_irqrestore(&rus_page_hash_lock, flags); -} - - /* * By remap_pfn_range(), VM_PFN_AT_MMAP may be raised. * VM_PFN_AT_MMAP cause the following problems. @@ -761,9 +777,15 @@ void rus_page_hash_put_pages(void) #define USE_VM_INSERT_PFN 1 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +static int rus_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#else static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct mcctrl_usrdata * usrdata = vma->vm_file->private_data; +#endif + struct mcctrl_usrdata *usrdata = vma->vm_file->private_data; ihk_device_t dev = ihk_os_to_dev(usrdata->os); unsigned long rpa; unsigned long phys; @@ -777,10 +799,10 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) size_t pix; #endif struct mcctrl_per_proc_data *ppd; + struct mcctrl_per_thread_data *ptd; struct ikc_scd_packet *packet; int ret = 0; -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %#lx page %p\n", vmf->flags, vmf->pgoff, vmf->address, vmf->page); @@ -788,24 +810,30 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", - vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); if (!ppd) { + kprintf("%s: INFO: no per-process structure for pid %d (tid %d), try to use pid %d\n", + __FUNCTION__, task_tgid_vnr(current), task_pid_vnr(current), vma->vm_mm->owner->pid); ppd = mcctrl_get_per_proc_data(usrdata, vma->vm_mm->owner->pid); } if (!ppd) { kprintf("%s: ERROR: no per-process structure for PID %d??\n", __FUNCTION__, task_tgid_vnr(current)); - return -EINVAL; + ret = VM_FAULT_SIGBUS; + goto no_ppd; } - packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current); + ptd = mcctrl_get_per_thread_data(ppd, current); + if (!ptd) { + printk("%s: ERROR: mcctrl_get_per_thread_data failed\n", __FUNCTION__); + ret = VM_FAULT_SIGBUS; + goto no_ptd; + } + pr_ptd("get", task_pid_vnr(current), ptd); + packet = (struct ikc_scd_packet *)ptd->data; if (!packet) { ret = VM_FAULT_SIGBUS; printk("%s: no packet registered for TID %d\n", @@ -814,7 +842,6 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } for (try = 1; ; ++try) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) error = translate_rva_to_rpa(usrdata->os, ppd->rpgtable, vmf->address, &rpa, &pgsize); @@ -823,15 +850,9 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) (unsigned long)vmf->virtual_address, &rpa, &pgsize); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - error = translate_rva_to_rpa(usrdata->os, ppd->rpgtable, - (unsigned long)vmf->virtual_address, - &rpa, &pgsize); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ #define NTRIES 2 if (!error || (try >= NTRIES)) { if (error) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) printk("%s: error translating 0x%#lx " "(req: TID: %u, syscall: %lu)\n", @@ -843,12 +864,6 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) __FUNCTION__, vmf->virtual_address, packet->req.rtid, packet->req.number); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - printk("%s: error translating 0x%p " - "(req: TID: %u, syscall: %lu)\n", - __FUNCTION__, vmf->virtual_address, - packet->req.rtid, packet->req.number); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } break; @@ -859,17 +874,12 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #define PF_WRITE 0x02 reason |= PF_WRITE; } -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) error = remote_page_fault(usrdata, (void *)vmf->address, reason); #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ error = remote_page_fault(usrdata, vmf->virtual_address, reason); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - error = remote_page_fault(usrdata, vmf->virtual_address, reason); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ if (error) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) printk("%s: error forwarding PF for 0x%#lx " "(req: TID: %d, syscall: %lu)\n", @@ -881,12 +891,6 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) __FUNCTION__, vmf->virtual_address, packet->req.rtid, packet->req.number); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - printk("%s: error forwarding PF for 0x%p " - "(req: TID: %d, syscall: %lu)\n", - __FUNCTION__, vmf->virtual_address, - packet->req.rtid, packet->req.number); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ break; } } @@ -895,15 +899,11 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto put_and_out; } -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) rva = vmf->address & ~(pgsize - 1); #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ rva = (unsigned long)vmf->virtual_address & ~(pgsize - 1); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - rva = (unsigned long)vmf->virtual_address & ~(pgsize - 1); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ rpa = rpa & ~(pgsize - 1); phys = ihk_device_map_memory(dev, rpa, pgsize); @@ -921,30 +921,8 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (pfn_valid(pfn+pix)) { page = pfn_to_page(pfn+pix); - if ((error = rus_page_hash_insert(page)) < 0) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) - printk("%s: error adding page to RUS hash for 0x%#lx " - "(req: TID: %d, syscall: %lu)\n", - __FUNCTION__, vmf->address, - packet->req.rtid, packet->req.number); -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ - printk("%s: error adding page to RUS hash for 0x%p " - "(req: TID: %d, syscall: %lu)\n", - __FUNCTION__, vmf->virtual_address, - packet->req.rtid, packet->req.number); -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - printk("%s: error adding page to RUS hash for 0x%p " - "(req: TID: %d, syscall: %lu)\n", - __FUNCTION__, vmf->virtual_address, - packet->req.rtid, packet->req.number); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ - } - error = vm_insert_page(vma, rva+(pix*PAGE_SIZE), page); if (error) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) printk("%s: error inserting mapping for 0x%#lx " "(req: TID: %d, syscall: %lu) error: %d, " @@ -960,20 +938,13 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) packet->req.rtid, packet->req.number, error, vma->vm_start, vma->vm_end); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - printk("%s: error inserting mapping for 0x%p " - "(req: TID: %d, syscall: %lu) error: %d, " - "vm_start: 0x%lx, vm_end: 0x%lx\n", - __FUNCTION__, vmf->virtual_address, - packet->req.rtid, packet->req.number, error, - vma->vm_start, vma->vm_end); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } } else error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE), pfn+pix); if (error) { -#ifdef POSTK_DEBUG_TEMP_FIX_11 /* rus_vm_fault() multi-thread fix */ +#if 1 /* POSTK_DEBUG_TEMP_FIX_11 */ /* rus_vm_fault() multi-thread fix */ + printk("%s: vm_insert_pfn returned %d\n", __FUNCTION__, error); if (error == -EBUSY) { error = 0; } else { @@ -989,7 +960,6 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #endif ihk_device_unmap_memory(dev, phys, pgsize); if (error) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) printk("%s: remote PF failed for 0x%#lx, pgoff: %lu " "(req: TID: %d, syscall: %lu)\n", @@ -1001,12 +971,6 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) __FUNCTION__, vmf->virtual_address, vmf->pgoff, packet->req.rtid, packet->req.number); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - printk("%s: remote PF failed for 0x%p, pgoff: %lu " - "(req: TID: %d, syscall: %lu)\n", - __FUNCTION__, vmf->virtual_address, vmf->pgoff, - packet->req.rtid, packet->req.number); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ ret = VM_FAULT_SIGBUS; goto put_and_out; } @@ -1014,7 +978,11 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; put_and_out: + mcctrl_put_per_thread_data(ptd); + pr_ptd("put", task_pid_vnr(current), ptd); + no_ptd: mcctrl_put_per_proc_data(ppd); + no_ppd: return ret; } @@ -1070,7 +1038,15 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u start = vm_mmap(file, start, end, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_SHARED, 0); #endif - +#if 0 + { /* debug */ + struct vm_area_struct *vma; + down_write(¤t->mm->mmap_sem); + vma = find_vma(current->mm, start); + vma->vm_flags |= VM_DONTCOPY; + up_write(¤t->mm->mmap_sem); + } +#endif revert_creds(original); put_cred(promoted); fput(file); @@ -1084,7 +1060,7 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u struct pager { struct list_head list; struct inode * inode; - int ref; + uint64_t ref; /* needs same type as fileobj->sref */ struct file * rofile; struct file * rwfile; uintptr_t map_uaddr; @@ -1092,35 +1068,28 @@ struct pager { off_t map_off; }; -/* - * for linux v2.6.35 or prior - */ -#ifndef DEFINE_SEMAPHORE -#define DEFINE_SEMAPHORE(...) DECLARE_MUTEX(__VA_ARGS__) -#endif -static DEFINE_SEMAPHORE(pager_sem); +static DEFINE_SPINLOCK(pager_lock); static struct list_head pager_list = LIST_HEAD_INIT(pager_list); int pager_nr_processes = 0; void pager_add_process(void) { - int error; - error = down_interruptible(&pager_sem); - if (error) { - return; - } + unsigned long flags; + + spin_lock_irqsave(&pager_lock, flags); ++pager_nr_processes; - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, flags); } void pager_remove_process(struct mcctrl_per_proc_data *ppd) { int error; struct pager *pager_next, *pager; + unsigned long flags; if (in_atomic() || in_interrupt()) { printk("%s: WARNING: shouldn't be called in IRQ context..\n", @@ -1145,15 +1114,17 @@ void pager_remove_process(struct mcctrl_per_proc_data *ppd) /* Clean up global pagers for regular file mappings if this * was the last process */ - error = down_interruptible(&pager_sem); - if (error) { - return; - } - + spin_lock_irqsave(&pager_lock, flags); --pager_nr_processes; - if (pager_nr_processes > 0) { - goto out; - } + spin_unlock_irqrestore(&pager_lock, flags); +} + +void pager_cleanup(void) +{ + unsigned long flags; + struct pager *pager_next, *pager; + + spin_lock_irqsave(&pager_lock, flags); list_for_each_entry_safe(pager, pager_next, &pager_list, list) { list_del(&pager->list); @@ -1170,11 +1141,7 @@ void pager_remove_process(struct mcctrl_per_proc_data *ppd) kfree(pager); } - /* Flush page hash as well */ - rus_page_hash_put_pages(); - -out: - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, flags); } struct pager_create_result { @@ -1198,14 +1165,14 @@ enum { MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ MF_SHM = 0x40000, - MF_END + MF_HUGETLBFS = 0x100000, }; static int pager_get_path(struct file *file, char *path) { int error = 0; char *pathbuf, *fullpath; - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); if (!pathbuf) { printk("%s: ERROR: allocating path\n", __FUNCTION__); error = -ENOMEM; @@ -1214,7 +1181,7 @@ static int pager_get_path(struct file *file, char *path) { fullpath = d_path(&file->f_path, pathbuf, PATH_MAX); if (!IS_ERR(fullpath)) { - memcpy(path, fullpath, strlen(fullpath)); + memcpy(path, fullpath, strlen(fullpath) + 1); } else { path[0] = 0; @@ -1233,7 +1200,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) ihk_device_t dev = ihk_os_to_dev(os); int error; struct pager_create_result *resp; - int maxprot = -1; + int maxprot = 0; struct file *file = NULL; struct inode *inode; struct pager *pager = NULL; @@ -1241,6 +1208,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) uintptr_t phys; struct kstat st; int mf_flags = 0; + unsigned long irqflags; dprintk("pager_req_create(%d,%lx)\n", fd, (long)result_pa); @@ -1272,7 +1240,6 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) goto out; } - maxprot = 0; if ((file->f_mode & FMODE_READ) && (file->f_mode & FMODE_PREAD)) { maxprot |= PROT_READ; } @@ -1288,13 +1255,19 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) goto out; } + if (inode->i_op == mcctrl_hugetlbfs_inode_operations) { + mf_flags = MF_HUGETLBFS; + /* pager is used as handle id on mckernel side, use inode */ + pager = (void *)st.ino; + /* retrofit blksize in resp as well through st.size field; + * the actual file size is not used + */ + st.size = st.blksize; + goto out_reply; + } + for (;;) { - error = down_interruptible(&pager_sem); - if (error) { - error = -EINTR; - printk("pager_req_create(%d,%lx):signaled. %d\n", fd, (long)result_pa, error); - goto out; - } + spin_lock_irqsave(&pager_lock, irqflags); list_for_each_entry(pager, &pager_list, list) { if (pager->inode == inode) { @@ -1313,7 +1286,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) { char *pathbuf, *fullpath; - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + pathbuf = kmalloc(PATH_MAX, GFP_ATOMIC); if (pathbuf) { fullpath = d_path(&file->f_path, pathbuf, PATH_MAX); if (!IS_ERR(fullpath)) { @@ -1339,7 +1312,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) break; } - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, irqflags); newpager = kzalloc(sizeof(*newpager), GFP_ATOMIC); if (!newpager) { @@ -1359,8 +1332,9 @@ found: get_file(file); pager->rofile = file; } - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, irqflags); +out_reply: phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp)); resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0); if (!resp) { @@ -1377,13 +1351,7 @@ found: resp->size = st.size; error = pager_get_path(file, resp->path); - if (error) { - goto out_unmap; - } - error = 0; - -out_unmap: ihk_device_unmap_virtual(dev, resp, sizeof(*resp)); ihk_device_unmap_memory(dev, phys, sizeof(*resp)); @@ -1399,37 +1367,35 @@ out: return error; } -static int pager_req_release(ihk_os_t os, uintptr_t handle, int unref) +static int pager_req_release(ihk_os_t os, uintptr_t handle, uint64_t sref) { int error; struct pager *p; struct pager *free_pager = NULL; + unsigned long flags; - dprintk("pager_req_relase(%p,%lx,%d)\n", os, handle, unref); + dprintk("%s(%p,%lx)\n", __func__, os, handle); - error = down_interruptible(&pager_sem); - if (error) { - printk("pager_req_relase(%p,%lx,%d):signaled. %d\n", os, handle, unref, error); - goto out; - } + spin_lock_irqsave(&pager_lock, flags); error = -EBADF; list_for_each_entry(p, &pager_list, list) { if ((uintptr_t)p == handle) { error = 0; - p->ref -= unref; - if (p->ref <= 0) { - list_del(&p->list); - free_pager = p; - } + p->ref -= sref; + if (p->ref > 0) + break; + list_del(&p->list); + free_pager = p; break; } } - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, flags); if (error) { - printk("pager_req_relase(%p,%lx,%d):pager not found. %d\n", os, handle, unref, error); + pr_err("%s(%p,%lx):pager not found. %d\n", + __func__, os, handle, error); goto out; } @@ -1445,28 +1411,24 @@ static int pager_req_release(ihk_os_t os, uintptr_t handle, int unref) error = 0; out: - dprintk("pager_req_relase(%p,%lx,%d): %d\n", os, handle, unref, error); + dprintk("%s(%p,%lx): %d\n", __func__, os, handle, error); return error; } static int pager_req_read(ihk_os_t os, uintptr_t handle, off_t off, size_t size, uintptr_t rpa) { - ssize_t ss; + ssize_t ss, n; struct pager *pager; struct file *file = NULL; uintptr_t phys = -1; ihk_device_t dev = ihk_os_to_dev(os); void *buf = NULL; - mm_segment_t fs; loff_t pos; + unsigned long flags; dprintk("pager_req_read(%lx,%lx,%lx,%lx)\n", handle, off, size, rpa); - ss = down_interruptible(&pager_sem); - if (ss) { - printk("pager_req_read(%lx,%lx,%lx,%lx): signaled. %ld\n", handle, off, size, rpa, ss); - goto out; - } + spin_lock_irqsave(&pager_lock, flags); list_for_each_entry(pager, &pager_list, list) { if ((uintptr_t)pager == handle) { @@ -1475,45 +1437,54 @@ static int pager_req_read(ihk_os_t os, uintptr_t handle, off_t off, size_t size, break; } } - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, flags); if (!file) { ss = -EBADF; - printk("pager_req_read(%lx,%lx,%lx,%lx):pager not found. %ld\n", handle, off, size, rpa, ss); + pr_warn("%s(%lx,%lx,%lx,%lx):pager not found. %ld\n", + __func__, handle, off, size, rpa, ss); goto out; } phys = ihk_device_map_memory(dev, rpa, size); buf = ihk_device_map_virtual(dev, phys, size, NULL, 0); if (!buf) { - printk("%s: ERROR: invalid buffer address\n", - __FUNCTION__); + pr_warn("%s: ERROR: invalid buffer address\n", + __func__); ss = -EINVAL; goto out; } - fs = get_fs(); - set_fs(KERNEL_DS); pos = off; - ss = vfs_read(file, buf, size, &pos); - if ((ss != size) && (ss > 0)) { -#ifdef POSTK_DEBUG_TEMP_FIX_12 /* clear_user() used by kernel area, fix */ - memset(buf + ss, 0, size - ss); - ss = size; -#else /* POSTK_DEBUG_TEMP_FIX_12 */ - if (clear_user(buf+ss, size-ss) == 0) { - ss = size; + n = 0; + while (n < size) { + if (pos != off + n) { + pr_warn("%s: pos wrong? got %lld, expected %ld\n", + __func__, pos, off+n); + pos = off + n; } - else { - ss = -EFAULT; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) + ss = kernel_read(file, buf + n, size - n, &pos); +#else + ss = kernel_read(file, pos, buf + n, size - n); + pos += ss; +#endif + if (ss < 0) { + break; } -#endif /* POSTK_DEBUG_TEMP_FIX_12 */ + if (ss == 0) { + memset(buf + n, 0, size - n); + n = size; + break; + } + n += ss; } - set_fs(fs); if (ss < 0) { - printk("pager_req_read(%lx,%lx,%lx,%lx):pread failed. %ld\n", handle, off, size, rpa, ss); + pr_warn("%s(%lx,%lx,%lx,%lx):pread failed. %ld\n", + __func__, handle, off, size, rpa, ss); goto out; } + ss = n; out: if (buf) { @@ -1537,18 +1508,14 @@ static int pager_req_write(ihk_os_t os, uintptr_t handle, off_t off, size_t size uintptr_t phys = -1; ihk_device_t dev = ihk_os_to_dev(os); void *buf = NULL; - mm_segment_t fs; loff_t pos; loff_t fsize; size_t len; + unsigned long flags; dprintk("pager_req_write(%lx,%lx,%lx,%lx)\n", handle, off, size, rpa); - ss = down_interruptible(&pager_sem); - if (ss) { - printk("pager_req_write(%lx,%lx,%lx,%lx): signaled. %ld\n", handle, off, size, rpa, ss); - goto out; - } + spin_lock_irqsave(&pager_lock, flags); list_for_each_entry(pager, &pager_list, list) { if ((uintptr_t)pager == handle) { @@ -1559,7 +1526,7 @@ static int pager_req_write(ihk_os_t os, uintptr_t handle, off_t off, size_t size if (file) { get_file(file); } - up(&pager_sem); + spin_unlock_irqrestore(&pager_lock, flags); if (!file) { ss = -EBADF; @@ -1569,7 +1536,7 @@ static int pager_req_write(ihk_os_t os, uintptr_t handle, off_t off, size_t size /* * XXX: Find a way to avoid changing the file size - * by using a function in the same abstraction level as vfs_write(). + * by using a function in the same abstraction level as kernel_write(). */ fsize = i_size_read(file->f_mapping->host); if (off >= fsize) { @@ -1586,15 +1553,16 @@ static int pager_req_write(ihk_os_t os, uintptr_t handle, off_t off, size_t size goto out; } - fs = get_fs(); - set_fs(KERNEL_DS); pos = off; len = size; if ((off + size) > fsize) { len = fsize - off; } - ss = vfs_write(file, buf, len, &pos); - set_fs(fs); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) + ss = kernel_write(file, buf, len, &pos); +#else + ss = kernel_write(file, buf, len, pos); +#endif if (ss < 0) { printk("pager_req_write(%lx,%lx,%lx,%lx):pwrite failed. %ld\n", handle, off, size, rpa, ss); goto out; @@ -1828,11 +1796,12 @@ retry: goto out_release; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || \ + (defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) fault = handle_mm_fault(vma, va, flags); -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) */ +#else fault = handle_mm_fault(current->mm, vma, va, flags); -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) */ +#endif if (fault != 0) { printk("%s: error: faulting %lx at off: %lu\n", __FUNCTION__, va, off); @@ -1917,42 +1886,6 @@ out: return error; } -#ifdef SC_DEBUG -#ifdef MCCTRL_KSYM_walk_page_range -static void -(*mcctrl_walk_page_range)(unsigned long addr, unsigned long end, struct mm_walk *walk) -#if MCCTRL_KSYM_walk_page_range - = (void *)MCCTRL_KSYM_walk_page_range; -#else - = &walk_page_range; -#endif -#endif - -static int mywalk(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) -{ - unsigned long pfn; - struct page *page; - - if (pte == NULL) { - kprintf("mywalk: ptr(%p)\n", pte); - return 0; - } - pfn = pte_pfn(*pte); - page = pfn_to_page(pfn); - if (page == NULL) { - kprintf("mywalk: pte(%p) page is null\n", pte); - return 0; - } - if (PageLocked(page)) { - kprintf("mywalk: MLOCK (%p)\n", (void*) addr); - } - if (addr > 0x700000 && addr < 0x705000) { - kprintf("mywalk: %p(%lx)\n", (void*) addr, page->flags); - } - return 0; -} -#endif - static long pager_req_mlock_list(ihk_os_t os, unsigned long start, unsigned long end, void *addr, int nent) { @@ -1988,12 +1921,6 @@ full: return cnt; } -static long pager_call(ihk_os_t os, struct syscall_request *req) -{ - long ret; - - dprintk("pager_call(%#lx)\n", req->args[0]); - switch (req->args[0]) { #define PAGER_REQ_CREATE 0x0001 #define PAGER_REQ_RELEASE 0x0002 #define PAGER_REQ_READ 0x0003 @@ -2002,13 +1929,28 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) #define PAGER_REQ_PFN 0x0006 #define PAGER_REQ_UNMAP 0x0007 #define PAGER_REQ_MLOCK_LIST 0x0008 - case PAGER_REQ_CREATE: - ret = pager_req_create(os, req->args[1], req->args[2]); - break; +static long pager_call_irq(ihk_os_t os, struct syscall_request *req) +{ + long ret = -ENOSYS; + switch (req->args[0]) { case PAGER_REQ_RELEASE: ret = pager_req_release(os, req->args[1], req->args[2]); break; + } + + return ret; +} + +static long pager_call(ihk_os_t os, struct syscall_request *req) +{ + long ret; + + dprintk("pager_call(%#lx)\n", req->args[0]); + switch (req->args[0]) { + case PAGER_REQ_CREATE: + ret = pager_req_create(os, req->args[1], req->args[2]); + break; case PAGER_REQ_READ: ret = pager_req_read(os, req->args[1], req->args[2], req->args[3], req->args[4]); @@ -2030,6 +1972,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) case PAGER_REQ_UNMAP: ret = pager_req_unmap(os, req->args[1]); break; + case PAGER_REQ_MLOCK_LIST: ret = pager_req_mlock_list(os, (unsigned long) req->args[1], (unsigned long) req->args[2], @@ -2122,7 +2065,7 @@ out: return (IS_ERR_VALUE(map))? (int)map: 0; } -static int clear_pte_range(uintptr_t start, uintptr_t len) +int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -2163,6 +2106,43 @@ static int clear_pte_range(uintptr_t start, uintptr_t len) return ret; } +int release_user_space(uintptr_t start, uintptr_t len) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + uintptr_t addr; + uintptr_t end; + int error; + int ret; + + ret = 0; + //down_read(&mm->mmap_sem); + addr = start; + while (addr < (start + len)) { + vma = find_vma(mm, addr); + if (!vma) { + break; + } + + if (addr < vma->vm_start) { + addr = vma->vm_start; + } + + end = vma->vm_end; + if (addr < end) { + if ((error = vm_munmap(addr, end - addr))) { + printk("%s: ERROR: vm_munmap failed (%d)\n", __func__, error); + } + if (ret == 0) { + ret = error; + } + } + addr = vma->vm_end; + } + //up_read(&mm->mmap_sem); + return ret; +} + /** * \brief Write out the core file image to a core file. * @@ -2174,14 +2154,9 @@ static int clear_pte_range(uintptr_t start, uintptr_t len) static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { struct file *file; struct coretable *coretable; -#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ int i, tablesize, error = 0; loff_t size; ssize_t ret; -#else /* POSTK_DEBUG_TEMP_FIX_61 */ - int ret, i, tablesize, size, error = 0; -#endif /* POSTK_DEBUG_TEMP_FIX_61 */ - mm_segment_t oldfs = get_fs(); unsigned long phys, tablephys, rphys; ihk_device_t dev = ihk_os_to_dev(os); char *pt; @@ -2194,28 +2169,14 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { goto fail; } - set_fs(KERNEL_DS); - /* Every Linux documentation insists we should not * open a file in the kernel module, but our karma * leads us here. Precisely, Here we emulate the core * dump routine of the Linux kernel in linux/fs/exec.c. * So we have a legitimate reason to do this. */ -#ifdef POSTK_DEBUG_TEMP_FIX_59 /* corefile open flag add O_TRUNC */ file = filp_open("core", O_CREAT | O_RDWR | O_LARGEFILE | O_TRUNC, 0600); -#else /* POSTK_DEBUG_TEMP_FIX_59 */ - file = filp_open("core", O_CREAT | O_RDWR | O_LARGEFILE, 0600); -#endif /* POSTK_DEBUG_TEMP_FIX_59 */ -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* use writehandler version switch add */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) if (IS_ERR(file) || !file->f_op) { -#else - if (IS_ERR(file) || !file->f_op || !file->f_op->write) { -#endif -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - if (IS_ERR(file) || !file->f_op || !file->f_op->write) { -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ dprintk("cannot open core file\n"); error = PTR_ERR(file); goto fail; @@ -2234,22 +2195,18 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { phys = ihk_device_map_memory(dev, rphys, size); dprintk("physical %lx, ", phys); pt = ihk_device_map_virtual(dev, phys, size, NULL, 0); -#ifdef POSTK_DEBUG_TEMP_FIX_38 if (pt == NULL) { pt = phys_to_virt(phys); } -#endif /*POSTK_DEBUG_TEMP_FIX_38*/ dprintk("virtual %p\n", pt); if (pt != NULL) { -#ifdef POSTK_DEBUG_ARCH_DEP_41 /* use writehandler version switch add */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) - ret = __kernel_write(file, pt, size, &file->f_pos); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) + ret = kernel_write(file, pt, size, + &file->f_pos); #else - ret = file->f_op->write(file, pt, size, &file->f_pos); + ret = kernel_write(file, pt, size, file->f_pos); + file->f_pos += ret; #endif -#else /* POSTK_DEBUG_ARCH_DEP_41 */ - ret = file->f_op->write(file, pt, size, &file->f_pos); -#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } else { dprintk("cannot map physical memory(%lx) to virtual memory.\n", phys); @@ -2260,11 +2217,7 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { ihk_device_unmap_virtual(dev, pt, size); ihk_device_unmap_memory(dev, phys, size); if (ret != size) { -#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ dprintk("core file write failed(%ld).\n", ret); -#else /* POSTK_DEBUG_TEMP_FIX_61 */ - dprintk("core file write failed(%d).\n", ret); -#endif /* POSTK_DEBUG_TEMP_FIX_61 */ error = PTR_ERR(file); break; } @@ -2277,11 +2230,7 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { } ret = file->f_op->llseek(file, size, SEEK_CUR); if (ret < 0) { -#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ dprintk("core file seek failed(%ld).\n", ret); -#else /* POSTK_DEBUG_TEMP_FIX_61 */ - dprintk("core file seek failed(%d).\n", ret); -#endif /* POSTK_DEBUG_TEMP_FIX_61 */ error = PTR_ERR(file); break; } @@ -2292,7 +2241,6 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { ihk_device_unmap_memory(dev, tablephys, tablesize); filp_close(file, NULL); fail: - set_fs(oldfs); if (error == -ENOSYS) { /* make sure we do not travel to user land */ error = -EINVAL; @@ -2303,6 +2251,27 @@ fail: #define SCHED_CHECK_SAME_OWNER 0x01 #define SCHED_CHECK_ROOT 0x02 +int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet) +{ + struct syscall_request *sc = &packet->req; + int ret; + + switch (sc->number) { + case __NR_mmap: + ret = pager_call_irq(os, sc); + break; + default: + ret = -ENOSYS; + } + + if (ret == -ENOSYS) + return -ENOSYS; + + __return_syscall(os, packet, ret, 0); + + return 0; +} + int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) { struct syscall_request *sc = &packet->req; @@ -2317,25 +2286,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) break; case __NR_munmap: - /* Set new remote page table if not zero */ - if (sc->args[2]) { - struct mcctrl_per_proc_data *ppd = NULL; - - ppd = mcctrl_get_per_proc_data(usrdata, sc->args[3]); - if (unlikely(!ppd)) { - kprintf("%s: ERROR: no per-process structure for PID %d??\n", - __FUNCTION__, task_tgid_vnr(current)); - return -1; - } - - ppd->rpgtable = sc->args[2]; - - dprintk("%s: pid: %d, rpgtable: 0x%lx updated\n", - __FUNCTION__, ppd->pid, ppd->rpgtable); - mcctrl_put_per_proc_data(ppd); - } - - ret = clear_pte_range(sc->args[0], sc->args[1]); + ret = mcctrl_clear_pte_range(sc->args[0], sc->args[1]); break; case __NR_mprotect: @@ -2350,12 +2301,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) } case __NR_coredump: - error = writecore(os, sc->args[1], sc->args[0]); -#ifdef POSTK_DEBUG_TEMP_FIX_62 /* Fix to notify McKernel that core file generation failed */ - ret = error; -#else /* POSTK_DEBUG_TEMP_FIX_62 */ - ret = 0; -#endif /* POSTK_DEBUG_TEMP_FIX_62 */ + ret = writecore(os, sc->args[1], sc->args[0]); break; case __NR_sched_setparam: { @@ -2430,6 +2376,10 @@ sched_setparam_out: } __return_syscall(os, packet, ret, 0); + ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, + (usrdata->ikc2linux[smp_processor_id()] ? + usrdata->ikc2linux[smp_processor_id()] : + usrdata->ikc2linux[0])); error = 0; out: diff --git a/executer/kernel/mcctrl/sysfs_files.c b/executer/kernel/mcctrl/sysfs_files.c index 27ffbb06..aab42d4b 100644 --- a/executer/kernel/mcctrl/sysfs_files.c +++ b/executer/kernel/mcctrl/sysfs_files.c @@ -790,6 +790,7 @@ out: return error; } /* setup_node_files() */ +#ifdef SETUP_PCI_FILES static int read_file(void *buf, size_t size, char *fmt, va_list ap) { int error; @@ -798,7 +799,6 @@ static int read_file(void *buf, size_t size, char *fmt, va_list ap) int n; struct file *fp = NULL; loff_t off; - mm_segment_t ofs; ssize_t ss; dprintk("read_file(%p,%ld,%s,%p)\n", buf, size, fmt, ap); @@ -824,13 +824,14 @@ static int read_file(void *buf, size_t size, char *fmt, va_list ap) } off = 0; - ofs = get_fs(); - set_fs(KERNEL_DS); - ss = vfs_read(fp, buf, size, &off); - set_fs(ofs); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) + ss = kernel_read(fp, buf, size, &off); +#else + ss = kernel_read(fp, off, buf, size); +#endif if (ss < 0) { error = ss; - eprintk("mcctrl:read_file:vfs_read failed. %d\n", error); + eprintk("mcctrl:read_file:kernel_read failed. %d\n", error); goto out; } if (ss >= size) { @@ -892,16 +893,6 @@ out: return error; } /* read_long() */ -#ifdef MCCTRL_KSYM_sys_readlink -static ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf, - size_t bufsiz) -#if MCCTRL_KSYM_sys_readlink - = (void *)MCCTRL_KSYM_sys_readlink; -#else - = &sys_readlink; -#endif -#endif - static int read_link(char *buf, size_t bufsize, char *fmt, ...) { int error; @@ -951,30 +942,14 @@ out: return error; } /* read_link() */ -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ static int setup_one_pci(struct mcctrl_usrdata *udp, const char *name) { -#else /* POSTK_DEBUG_TEMP_FIX_22 */ -static int setup_one_pci(void *arg0, const char *name, int namlen, - loff_t offset, u64 ino, unsigned d_type) -{ - struct mcctrl_usrdata *udp = arg0; -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ int error; char *buf = NULL; long node; struct sysfsm_bitmap_param param; -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ dprintk("setup_one_pci(%p,%s)\n", udp, name); -#else /* POSTK_DEBUG_TEMP_FIX_22 */ - dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d)\n", - arg0, name, namlen, (long)offset, (long)ino, d_type); - if (namlen != 12) { - error = 0; - goto out; - } -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ buf = (void *)__get_free_pages(GFP_KERNEL, 0); if (!buf) { @@ -1026,26 +1001,39 @@ static int setup_one_pci(void *arg0, const char *name, int namlen, error = 0; out: free_pages((long)buf, 0); -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ dprintk("setup_one_pci(%p,%s): %d\n", udp, name, error); -#else /* POSTK_DEBUG_TEMP_FIX_22 */ - dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d): %d\n", - arg0, name, namlen, (long)offset, (long)ino, d_type, - error); -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ return error; } /* setup_one_pci() */ -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ LIST_HEAD(pci_file_name_list); struct pci_file_name { char *name; struct list_head chain; }; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \ + (defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) +struct mcctrl_filler_args { + struct dir_context ctx; + void *buf; +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) +static int pci_file_name_gen(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) +#else +static int pci_file_name_gen(void *ctx, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) +#endif +{ + struct mcctrl_filler_args *args + = container_of(ctx, struct mcctrl_filler_args, ctx); + void *buf = args->buf; +#else static int pci_file_name_gen(void *buf, const char *name, int namlen, loff_t offset, u64 ino, unsigned d_type) { +#endif struct pci_file_name *p; int error = -1; @@ -1083,56 +1071,31 @@ out: buf, name, namlen, (long)offset, (long)ino, d_type, error); return error; } -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0) -typedef int (*mcctrl_filldir_t)(void *buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned d_type); - -struct mcctrl_filler_args { - struct dir_context ctx; - mcctrl_filldir_t filler; - void *buf; -}; - -static int mcctrl_filler(struct dir_context *ctx, const char *name, - int namlen, loff_t offset, u64 ino, unsigned d_type) -{ - struct mcctrl_filler_args *args - = container_of(ctx, struct mcctrl_filler_args, ctx); - - return (*args->filler)(args->buf, name, namlen, offset, ino, d_type); -} /* mcctrl_filler() */ - -static inline int mcctrl_vfs_readdir(struct file *file, - mcctrl_filldir_t filler, void *buf) +static inline int mcctrl_vfs_readdir(struct file *file, filldir_t filler, + void *buf) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \ + (defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) struct mcctrl_filler_args args = { - .ctx.actor = &mcctrl_filler, - .filler = (void *)filler, + .ctx.actor = filler, .buf = buf, }; return iterate_dir(file, &args.ctx); -} /* mcctrl_vfs_readdir() */ #else -static inline int mcctrl_vfs_readdir(struct file *file, filldir_t filler, - void *buf) -{ return vfs_readdir(file, filler, buf); -} /* mcctrl_vfs_readdir() */ #endif +} /* mcctrl_vfs_readdir() */ static int setup_pci_files(struct mcctrl_usrdata *udp) { int error; int er; struct file *fp = NULL; -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ int ret = 0; struct pci_file_name *cur; struct pci_file_name *next; -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ dprintk("setup_pci_files(%p)\n", udp); fp = filp_open("/sys/bus/pci/devices", O_DIRECTORY, 0); @@ -1142,18 +1105,13 @@ static int setup_pci_files(struct mcctrl_usrdata *udp) goto out; } -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ error = mcctrl_vfs_readdir(fp, &pci_file_name_gen, udp); -#else /* POSTK_DEBUG_TEMP_FIX_22 */ - error = mcctrl_vfs_readdir(fp, &setup_one_pci, udp); -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ if (error) { eprintk("mcctrl:setup_pci_files:" "mcctrl_vfs_readdir failed. %d\n", error); goto out; } -#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ list_for_each_entry_safe(cur, next, &pci_file_name_list, chain) { if (!ret) { ret = setup_one_pci(udp, cur->name); @@ -1162,7 +1120,6 @@ static int setup_pci_files(struct mcctrl_usrdata *udp) kfree(cur->name); kfree(cur); } -#endif /* POSTK_DEBUG_TEMP_FIX_22 */ error = 0; out: @@ -1176,6 +1133,7 @@ out: dprintk("setup_pci_files(%p): %d\n", udp, error); return error; } /* setup_pci_files() */ +#endif // SETUP_PCI_FILES void setup_sysfs_files(ihk_os_t os) { @@ -1215,7 +1173,9 @@ void setup_sysfs_files(ihk_os_t os) setup_cpus_sysfs_files(udp); setup_node_files(udp); setup_cpus_sysfs_files_node_link(udp); - //setup_pci_files(udp); +#ifdef SETUP_PCI_FILES + setup_pci_files(udp); +#endif /* Indicate sysfs files setup completion for boot script */ error = sysfsm_mkdirf(os, NULL, "/sys/setup_complete"); diff --git a/executer/kernel/mcoverlayfs/Makefile.in b/executer/kernel/mcoverlayfs/Makefile.in index d8bfbb2f..c34094de 100644 --- a/executer/kernel/mcoverlayfs/Makefile.in +++ b/executer/kernel/mcoverlayfs/Makefile.in @@ -21,7 +21,7 @@ endif endif ifeq ($(BUILD_MODULE_TMP),rhel) ifeq ($(BUILD_MODULE),none) -BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -ge 327 -a ${RHEL_RELEASE} -le 693 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi) +BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -ge 327 -a ${RHEL_RELEASE} -le 862 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi) endif ifeq ($(BUILD_MODULE),none) BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi) diff --git a/executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c b/executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c index 9b47c7dd..f7df9841 100644 --- a/executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c +++ b/executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "overlayfs.h" struct ovl_cache_entry { @@ -34,10 +35,18 @@ struct ovl_dir_cache { struct list_head entries; }; +/* vfs_readdir vs. iterate_dir compat */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) || \ + (defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) +#define USE_ITERATE_DIR 1 +#endif + +#ifndef USE_ITERATE_DIR struct dir_context { const filldir_t actor; //loff_t pos; }; +#endif struct ovl_readdir_data { struct dir_context ctx; @@ -256,7 +265,11 @@ static inline int ovl_dir_read(struct path *realpath, do { rdd->count = 0; rdd->err = 0; +#ifdef USE_ITERATE_DIR + err = iterate_dir(realfile, &rdd->ctx); +#else err = vfs_readdir(realfile, rdd->ctx.actor, rdd); +#endif if (err >= 0) err = rdd->err; } while (!err && rdd->count); @@ -365,6 +378,22 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) return cache; } +#ifdef USE_ITERATE_DIR +struct iterate_wrapper { + struct dir_context ctx; + filldir_t actor; + void *buf; +}; + +static int ovl_wrap_readdir(void *ctx, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct iterate_wrapper *w = ctx; + + return w->actor(w->buf, name, namelen, offset, ino, d_type); +} +#endif + static int ovl_readdir(struct file *file, void *buf, filldir_t filler) { struct ovl_dir_file *od = file->private_data; @@ -376,7 +405,16 @@ static int ovl_readdir(struct file *file, void *buf, filldir_t filler) ovl_dir_reset(file); if (od->is_real) { +#ifdef USE_ITERATE_DIR + struct iterate_wrapper w = { + .ctx.actor = ovl_wrap_readdir, + .actor = filler, + .buf = buf, + }; + res = iterate_dir(od->realfile, &w.ctx); +#else res = vfs_readdir(od->realfile, filler, buf); +#endif file->f_pos = od->realfile->f_pos; return res; diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index 2664353e..494c51db 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -13,6 +13,8 @@ KDIR ?= @KDIR@ ARCH=@ARCH@ CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH} -I${IHKDIR} -I@abs_builddir@/../../../ihk/linux/include LDFLAGS=@LDFLAGS@ +CPPFLAGS_SYSCALL_INTERCEPT=@CPPFLAGS_SYSCALL_INTERCEPT@ +LDFLAGS_SYSCALL_INTERCEPT=@LDFLAGS_SYSCALL_INTERCEPT@ RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}') VPATH=@abs_srcdir@ TARGET=mcexec libsched_yield ldump2mcdump.so @@ -21,12 +23,17 @@ LIBS=@LIBS@ IHKDIR ?= $(VPATH)/../../../ihk/linux/include/ MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread -L@abs_builddir@/../../../ihk/linux/user -lihk -Wl,-rpath,$(MCKERNEL_LIBDIR) ENABLE_QLMPI=@ENABLE_QLMPI@ +WITH_SYSCALL_INTERCEPT=@WITH_SYSCALL_INTERCEPT@ ifeq ($(ENABLE_QLMPI),yes) MCEXEC_LIBS += -lmpi TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so endif +ifeq ($(WITH_SYSCALL_INTERCEPT),yes) + TARGET += syscall_intercept.so +endif + ifeq ($(ARCH), arm64) CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) @@ -40,10 +47,10 @@ mcexec: mcexec.c libmcexec.a # POSTK_DEBUG_ARCH_DEP_34, eclair arch depend separate. ifeq ($(ARCH), arm64) eclair: eclair.c arch/$(ARCH)/arch-eclair.c - $(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS) + $(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS) -ldl -lz else -eclair: eclair.c - $(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS) +eclair: eclair.c arch/$(ARCH)/arch-eclair.c + $(CC) -I.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS) endif ldump2mcdump.so: ldump2mcdump.c @@ -52,6 +59,12 @@ ldump2mcdump.so: ldump2mcdump.c libsched_yield: libsched_yield.c $(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl +syscall_intercept.so: syscall_intercept.c libsyscall_intercept_arch.a + $(CC) $(CPPFLAGS_SYSCALL_INTERCEPT) -g -O2 $(LDFLAGS_SYSCALL_INTERCEPT) -lsyscall_intercept -fpic -shared -L. -lsyscall_intercept_arch $^ -o $@ + +libsyscall_intercept_arch.a:: + +(cd arch/${ARCH}; $(MAKE)) + libmcexec.a:: +(cd arch/${ARCH}; $(MAKE)) @@ -99,6 +112,9 @@ ifeq ($(ENABLE_QLMPI),yes) install -m 755 ql_mpiexec_start $(BINDIR) install -m 755 ql_mpiexec_finalize $(BINDIR) install -m 755 ql_talker $(SBINDIR) +endif +ifeq ($(WITH_SYSCALL_INTERCEPT),yes) + install -m 755 syscall_intercept.so $(MCKERNEL_LIBDIR) endif @uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR) @uncomment_if_ENABLE_MEMDUMP@install -m 755 vmcore2mckdump $(BINDIR) diff --git a/executer/user/arch/x86_64/Makefile.in b/executer/user/arch/x86_64/Makefile.in index b913d94e..00da3154 100644 --- a/executer/user/arch/x86_64/Makefile.in +++ b/executer/user/arch/x86_64/Makefile.in @@ -4,7 +4,7 @@ BINDIR=@BINDIR@ KDIR ?= @KDIR@ CFLAGS=-Wall -O -I. VPATH=@abs_srcdir@ -TARGET=../../libmcexec.a +TARGET=../../libmcexec.a ../../libsyscall_intercept_arch.a LIBS=@LIBS@ all: $(TARGET) @@ -18,6 +18,12 @@ archdep.o: archdep.S arch_syscall.o: arch_syscall.c $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $< +../../libsyscall_intercept_arch.a: archdep_c.o + $(AR) cr ../../libsyscall_intercept_arch.a archdep_c.o + +archdep_c.o: archdep_c.c + $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $< + clean: $(RM) $(TARGET) *.o diff --git a/executer/user/arch/x86_64/arch-eclair.c b/executer/user/arch/x86_64/arch-eclair.c index 841a92a9..6b1b0d2a 100644 --- a/executer/user/arch/x86_64/arch-eclair.c +++ b/executer/user/arch/x86_64/arch-eclair.c @@ -42,7 +42,7 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs) } for (i = 0; i < sizeof(regs_1)/sizeof(regs_1[0]); i++) { /* rsi, rdi, rbp, rsp */ - ret = print_bin(rbp, rbp_size, (void *)regs_1[i], sizeof(regs_1[0])); + ret = print_bin(rbp, rbp_size, regs_1 + i, sizeof(regs_1[0])); if (ret < 0) { return ret; } @@ -62,7 +62,7 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs) } for (i = 0; i < sizeof(regs_2)/sizeof(regs_2[0]); i++) { /* r12-r15 */ - ret = print_bin(rbp, rbp_size, (void *)regs_2[i], sizeof(regs_2[0])); + ret = print_bin(rbp, rbp_size, regs_2 + i, sizeof(regs_2[0])); if (ret < 0) { return ret; } diff --git a/executer/user/arch/x86_64/arch_args.h b/executer/user/arch/x86_64/arch_args.h index 8c7fa740..d9bb0a16 100644 --- a/executer/user/arch/x86_64/arch_args.h +++ b/executer/user/arch/x86_64/arch_args.h @@ -67,6 +67,12 @@ get_syscall_arg6(syscall_args *args) return args->r9; } +static inline unsigned long +get_syscall_rip(syscall_args *args) +{ + return args->rip; +} + static inline void set_syscall_number(syscall_args *args, unsigned long value) { diff --git a/executer/user/arch/x86_64/arch_syscall.c b/executer/user/arch/x86_64/arch_syscall.c index 4aa63874..bae50d17 100644 --- a/executer/user/arch/x86_64/arch_syscall.c +++ b/executer/user/arch/x86_64/arch_syscall.c @@ -48,7 +48,7 @@ archdep_syscall(struct syscall_wait_desc *w, long *ret) if (*ret >= PATH_MAX) { *ret = -ENAMETOOLONG; } - if (ret < 0) { + if (*ret < 0) { return 0; } __dprintf("open: %s\n", pathbuf); diff --git a/executer/user/arch/x86_64/archdep.S b/executer/user/arch/x86_64/archdep.S index c4da1ef6..6cc5351d 100644 --- a/executer/user/arch/x86_64/archdep.S +++ b/executer/user/arch/x86_64/archdep.S @@ -1,15 +1,22 @@ /* -arg: rdi, rsi, rdx, rcx, r8, r9 -ret: rax +Calling convention: + arg: rdi, rsi, rdx, rcx, r8, r9 + ret: rax -rax syscall number -syscall: (rax:num) rdi rsi rdx r10 r8 r9 (rcx:ret addr) -fd, cmd, param -rdi: fd -rsi: cmd -rdx: param -rcx: save area -r8: new thread context + rdi: fd + rsi: cmd + rdx: param + rcx: save area + r8: new thread context + +Syscam call convention: + syscall number: rax + arg: rdi, rsi, rdx, r10, r8, r9 + return addr: rcx + + rdi: fd + rsi: cmd + rdx: param */ .global switch_ctx @@ -91,6 +98,7 @@ switch_ctx: 1: mov $0xffffffffffffffff,%eax + retq 2: pushq %rax movq $158,%rax /* arch_prctl */ @@ -146,4 +154,3 @@ compare_and_swap_int: lock cmpxchgl %edx,0(%rdi) retq - diff --git a/executer/user/arch/x86_64/archdep_c.c b/executer/user/arch/x86_64/archdep_c.c new file mode 100644 index 00000000..2d650f77 --- /dev/null +++ b/executer/user/arch/x86_64/archdep_c.c @@ -0,0 +1,52 @@ +/* +function call convention +rdi, rsi, rdx, rcx, r8, r9: IN arguments +rax: OUT return value + +syscall convention: +rax: IN syscall number +rdi, rsi, rdx, r10, r8, r9: IN arguments +rax: OUT return value +rcx, r11: CLOBBER +*/ +long uti_syscall6(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5) +{ + long ret; + asm volatile ("movq %[arg3],%%r10; movq %[arg4],%%r8; movq %[arg5],%%r9; syscall" + : "=a" (ret) + : "a" (syscall_number), + "D" (arg0), "S" (arg1), "d" (arg2), + [arg3] "g" (arg3), [arg4] "g" (arg4), [arg5] "g" (arg5) + : "rcx", "r11", "r10", "r8", "r9", "memory"); + return ret; +} + +long uti_syscall3(long syscall_number, long arg0, long arg1, long arg2) +{ + long ret; + asm volatile ("syscall" + : "=a" (ret) + : "a" (syscall_number), "D" (arg0), "S" (arg1), "d" (arg2) + : "rcx", "r11", "memory"); + return ret; +} + +long uti_syscall1(long syscall_number, long arg0) +{ + long ret; + asm volatile ("syscall" + : "=a" (ret) + : "a" (syscall_number), "D" (arg0) + : "rcx", "r11", "memory"); + return ret; +} + +long uti_syscall0(long syscall_number) +{ + long ret; + asm volatile ("syscall" + : "=a" (ret) + : "a" (syscall_number) + : "rcx", "r11", "memory"); + return ret; +} diff --git a/executer/user/arch/x86_64/include/arch-eclair.h b/executer/user/arch/x86_64/include/arch-eclair.h index a76053f3..99350247 100644 --- a/executer/user/arch/x86_64/include/arch-eclair.h +++ b/executer/user/arch/x86_64/include/arch-eclair.h @@ -2,8 +2,18 @@ #ifndef HEADER_USER_X86_ECLAIR_H #define HEADER_USER_X86_ECLAIR_H -#define MAP_KERNEL 0xFFFFFFFF80000000 -#define MAP_ST 0xFFFF800000000000 +#ifndef POSTK_DEBUG_ARCH_DEP_34 +#define MAP_ST_START 0xffff800000000000UL +#define MAP_VMAP_START 0xffff850000000000UL +#define MAP_FIXED_START 0xffff860000000000UL +#define LINUX_PAGE_OFFSET 0xffff880000000000UL +#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + +/* TODO: these should be updated when McKernel changes */ +#define MCKERNEL_ELF_START "0xFFFFFFFFFE801000" +#define MCKERNEL_ELF_LEN "0x0000000000100000" + #define ARCH_CLV_SPAN "x86_cpu_local_variables_span" diff --git a/executer/user/archdep.h b/executer/user/archdep.h index abe81042..4a655203 100644 --- a/executer/user/archdep.h +++ b/executer/user/archdep.h @@ -1,4 +1,6 @@ -extern int switch_ctx(int fd, unsigned long cmd, void **param, void *lctx, void *rctx); +#include "../include/uprotocol.h" + +extern int switch_ctx(int fd, unsigned long cmd, struct uti_save_fs_desc *desc, void *lctx, void *rctx); extern unsigned long compare_and_swap(unsigned long *addr, unsigned long old, unsigned long new); extern unsigned int compare_and_swap_int(unsigned int *addr, unsigned int old, unsigned int new); extern int archdep_syscall(struct syscall_wait_desc *w, long *ret); diff --git a/executer/user/archdep_uti.h b/executer/user/archdep_uti.h new file mode 100644 index 00000000..c3c33f31 --- /dev/null +++ b/executer/user/archdep_uti.h @@ -0,0 +1,5 @@ +extern long uti_syscall6(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); +extern long uti_syscall3(long syscall_number, long arg0, long arg1, long arg2); +extern long uti_syscall1(long syscall_number, long arg0); +extern long uti_syscall0(long syscall_number); + diff --git a/executer/user/eclair.c b/executer/user/eclair.c index dc758789..603f4838 100644 --- a/executer/user/eclair.c +++ b/executer/user/eclair.c @@ -8,9 +8,7 @@ * Copyright (C) 2015 RIKEN AICS */ -#ifdef POSTK_DEBUG_ARCH_DEP_33 #include "../config.h" -#endif /* POSTK_DEBUG_ARCH_DEP_33 */ #include #include #include @@ -22,10 +20,8 @@ #include #include #include -#ifdef POSTK_DEBUG_ARCH_DEP_34 #include #include -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ #define CPU_TID_BASE 1000000 @@ -85,11 +81,7 @@ static struct thread_info *curr_thread = NULL; static uintptr_t ihk_mc_switch_context = -1; #endif /* POSTK_DEBUG_ARCH_DEP_34 */ -#ifdef POSTK_DEBUG_ARCH_DEP_34 uintptr_t lookup_symbol(char *name) { -#else /* POSTK_DEBUG_ARCH_DEP_34 */ -static uintptr_t lookup_symbol(char *name) { -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ int i; for (i = 0; i < nsyms; ++i) { @@ -101,22 +93,22 @@ static uintptr_t lookup_symbol(char *name) { return NOSYMBOL; } /* lookup_symbol() */ +#define NOPHYS ((uintptr_t)-1) static uintptr_t virt_to_phys(uintptr_t va) { -#ifndef POSTK_DEBUG_ARCH_DEP_34 -#define MAP_KERNEL 0xFFFFFFFF80000000 -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ - if (va >= MAP_KERNEL) { - return (va - MAP_KERNEL + kernel_base); + if (va >= MAP_KERNEL_START) { + return va - MAP_KERNEL_START + kernel_base; } -#ifndef POSTK_DEBUG_ARCH_DEP_34 -#define MAP_ST 0xFFFF800000000000 -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ - if (va >= MAP_ST) { - return (va - MAP_ST); + else if (va >= LINUX_PAGE_OFFSET) { + return va - LINUX_PAGE_OFFSET; } - if (0) printf("virt_to_phys(%lx): -1\n", va); -#define NOPHYS ((uintptr_t)-1) + else if (va >= MAP_FIXED_START) { + return va - MAP_FIXED_START; + } + else if (va >= MAP_ST_START) { + return va - MAP_ST_START; + } + return NOPHYS; } /* virt_to_phys() */ @@ -673,11 +665,7 @@ static int setup_dump(char *fname) { return 0; } /* setup_dump() */ -#ifdef POSTK_DEBUG_ARCH_DEP_38 static ssize_t print_hex(char *buf, size_t buf_size, char *str) { -#else /* POSTK_DEBUG_ARCH_DEP_38 */ -static ssize_t print_hex(char *buf, char *str) { -#endif /* POSTK_DEBUG_ARCH_DEP_38 */ char *p; char *q; @@ -702,11 +690,7 @@ static ssize_t print_hex(char *buf, char *str) { return (q - buf); } /* print_hex() */ -#if defined(POSTK_DEBUG_ARCH_DEP_34) && defined(POSTK_DEBUG_ARCH_DEP_38) ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size) { -#else /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/ -static ssize_t print_bin(char *buf, void *data, size_t size) { -#endif /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/ uint8_t *p; char *q; int i; @@ -733,13 +717,8 @@ static ssize_t print_bin(char *buf, void *data, size_t size) { return (q - buf); } /* print_bin() */ -#ifdef POSTK_DEBUG_ARCH_DEP_38 static void command(const char *cmd, char *res, size_t res_size) { const char *p; -#else /* POSTK_DEBUG_ARCH_DEP_38 */ -static void command(char *cmd, char *res) { - char *p; -#endif /* POSTK_DEBUG_ARCH_DEP_38 */ char *rbp; p = cmd; @@ -801,11 +780,7 @@ static void command(char *cmd, char *res) { #endif /* POSTK_DEBUG_ARCH_DEP_34 */ rbp += sprintf(rbp, "l"); if (0) -#ifdef POSTK_DEBUG_ARCH_DEP_38 rbp += print_hex(rbp, res_size, str); -#else /* POSTK_DEBUG_ARCH_DEP_38 */ - rbp += print_hex(rbp, str); -#endif /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += sprintf(rbp, "%s", str); } else if (!strcmp(p, "D")) { @@ -814,20 +789,9 @@ static void command(char *cmd, char *res) { } else if (!strcmp(p, "g")) { if (curr_thread->cpu < 0) { -#ifndef POSTK_DEBUG_ARCH_DEP_34 - struct x86_kregs { - uintptr_t rsp, rbp, rbx, rsi; - uintptr_t rdi, r12, r13, r14; - uintptr_t r15, rflags, rsp0; - }; -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ int error; -#ifdef POSTK_DEBUG_ARCH_DEP_34 struct arch_kregs kregs; -#else /* POSTK_DEBUG_ARCH_DEP_34 */ - struct x86_kregs kregs; -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ error = read_mem(curr_thread->process+K(CTX_OFFSET), &kregs, sizeof(kregs)); @@ -836,36 +800,7 @@ static void command(char *cmd, char *res) { break; } -#ifdef POSTK_DEBUG_ARCH_DEP_34 print_kregs(rbp, res_size, &kregs); -#else /* POSTK_DEBUG_ARCH_DEP_34 */ - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rax */ - rbp += print_bin(rbp, &kregs.rbx, sizeof(uint64_t)); - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rcx */ - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rdx */ - rbp += print_bin(rbp, &kregs.rsi, sizeof(uint64_t)); - rbp += print_bin(rbp, &kregs.rdi, sizeof(uint64_t)); - rbp += print_bin(rbp, &kregs.rbp, sizeof(uint64_t)); - rbp += print_bin(rbp, &kregs.rsp, sizeof(uint64_t)); - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r8 */ - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r9 */ - - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r10 */ - rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r11 */ - rbp += print_bin(rbp, &kregs.r12, sizeof(uint64_t)); - rbp += print_bin(rbp, &kregs.r13, sizeof(uint64_t)); - rbp += print_bin(rbp, &kregs.r14, sizeof(uint64_t)); - rbp += print_bin(rbp, &kregs.r15, sizeof(uint64_t)); - rbp += print_bin(rbp, &ihk_mc_switch_context, - sizeof(uint64_t)); /* rip */ - rbp += print_bin(rbp, &kregs.rflags, sizeof(uint32_t)); - rbp += sprintf(rbp, "xxxxxxxx"); /* cs */ - rbp += sprintf(rbp, "xxxxxxxx"); /* ss */ - rbp += sprintf(rbp, "xxxxxxxx"); /* ds */ - rbp += sprintf(rbp, "xxxxxxxx"); /* es */ - rbp += sprintf(rbp, "xxxxxxxx"); /* fs */ - rbp += sprintf(rbp, "xxxxxxxx"); /* gs */ -#endif /* POSTK_DEBUG_ARCH_DEP_34 */ } else { int error; @@ -943,11 +878,7 @@ static void command(char *cmd, char *res) { #endif /* POSTK_DEBUG_ARCH_DEP_34 */ rbp += sprintf(rbp, "l"); if (0) -#ifdef POSTK_DEBUG_ARCH_DEP_38 rbp += print_hex(rbp, res_size, str); -#else /* POSTK_DEBUG_ARCH_DEP_38 */ - rbp += print_hex(rbp, str); -#endif /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += sprintf(rbp, "%s", str); } else if (!strncmp(p, "T", 1)) { @@ -1039,11 +970,7 @@ static void command(char *cmd, char *res) { else { q += sprintf(q, "status=%#x", ti->status); } -#ifdef POSTK_DEBUG_ARCH_DEP_38 rbp += print_hex(rbp, res_size, buf); -#else /* POSTK_DEBUG_ARCH_DEP_38 */ - rbp += print_hex(rbp, buf); -#endif /* POSTK_DEBUG_ARCH_DEP_38 */ } } while (0); @@ -1272,11 +1199,7 @@ int main(int argc, char *argv[]) { } mode = 0; fputc('+', ofp); -#ifdef POSTK_DEBUG_ARCH_DEP_38 command(lbuf, rbuf, sizeof(rbuf)); -#else /* POSTK_DEBUG_ARCH_DEP_38 */ - command(lbuf, rbuf); -#endif /* POSTK_DEBUG_ARCH_DEP_38 */ sum = 0; for (p = rbuf; *p != '\0'; ++p) { sum += *p; diff --git a/executer/user/eclair.h b/executer/user/eclair.h index a80c6c0f..e0e81669 100644 --- a/executer/user/eclair.h +++ b/executer/user/eclair.h @@ -3,11 +3,7 @@ #ifndef HEADER_USER_COMMON_ECLAIR_H #define HEADER_USER_COMMON_ECLAIR_H -#ifdef POSTK_DEBUG_ARCH_DEP_76 /* header path fix */ #include "../config.h" -#else /* POSTK_DEBUG_ARCH_DEP_76 */ -#include -#endif /* POSTK_DEBUG_ARCH_DEP_76 */ #include #include #include diff --git a/executer/user/libsched_yield.c b/executer/user/libsched_yield.c index 99be0f78..b69af142 100644 --- a/executer/user/libsched_yield.c +++ b/executer/user/libsched_yield.c @@ -11,7 +11,9 @@ typedef int (*int_void_fn)(void); +#if 0 static int_void_fn orig_sched_yield = 0; +#endif int sched_yield(void) { diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 0a325822..49765ff3 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -77,6 +77,7 @@ #endif /* !POSTK_DEBUG_ARCH_DEP_77 */ #include "../include/uprotocol.h" #include +#include "../include/uti.h" #include #include "archdep.h" #include "arch_args.h" @@ -95,36 +96,39 @@ //#define DEBUG #define ADD_ENVS_OPTION -#ifndef DEBUG -#define __dprint(msg) -#define __dprintf(arg, ...) -#define __eprint(msg) -#define __eprintf(format, ...) +#ifdef DEBUG +static int debug = 1; #else -#define __dprint(msg) {printf("%s: " msg, __FUNCTION__);fflush(stdout);} -#define __dprintf(format, args...) {printf("%s: " format, __FUNCTION__, \ - ##args);fflush(stdout);} -#define __eprint(msg) {fprintf(stderr, "%s: " msg, __FUNCTION__);fflush(stderr);} -#define __eprintf(format, args...) {fprintf(stderr, "%s: " format, __FUNCTION__, \ - ##args);fflush(stderr);} +static int debug; #endif - -#define CHKANDJUMPF(cond, err, format, ...) \ - do { \ - if(cond) { \ - __eprintf(format, __VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ + +#define __dprintf(format, args...) do { \ + if (debug) { \ + printf("%s: " format, __func__, ##args); \ + fflush(stdout); \ + } \ +} while (0) +#define __eprintf(format, args...) do { \ + fprintf(stderr, "%s: " format, __func__, ##args); \ + fflush(stderr); \ +} while (0) + +#define CHKANDJUMPF(cond, err, format, ...) \ + do { \ + if (cond) { \ + __eprintf(format, __VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ } while(0) -#define CHKANDJUMP(cond, err, msg) \ - do { \ - if(cond) { \ - __eprint(msg); \ - ret = err; \ - goto fn_fail; \ - } \ +#define CHKANDJUMP(cond, err, msg) \ + do { \ + if (cond) { \ + __eprintf(msg); \ + ret = err; \ + goto fn_fail; \ + } \ } while(0) @@ -184,13 +188,6 @@ struct sigfd { struct sigfd *sigfdtop; - -struct syscall_struct { - int number; - unsigned long args[6]; - unsigned long ret; -}; - #ifdef NCCS #undef NCCS #endif @@ -229,6 +226,9 @@ static long stack_premap = (2ULL << 20); static long stack_max = -1; static struct rlimit rlim_stack; static char *mpol_bind_nodes = NULL; +static int uti_thread_rank = 0; +static int uti_use_last_cpu = 0; +static int enable_uti = 0; /* Partitioned execution (e.g., for MPI) */ static int nr_processes = 0; @@ -278,11 +278,11 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp) *interp_pathp = NULL; if (fread(&hdr, sizeof(hdr), 1, fp) < 1) { - __eprint("Cannot read Ehdr.\n"); + __eprintf("Cannot read Ehdr.\n"); return NULL; } if (memcmp(hdr.e_ident, ELFMAG, SELFMAG)) { - __eprint("ELFMAG mismatched.\n"); + __eprintf("ELFMAG mismatched.\n"); return NULL; } fseek(fp, hdr.e_phoff, SEEK_SET); @@ -300,7 +300,6 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp) + sizeof(struct program_image_section) * nhdrs); memset(desc, '\0', sizeof(struct program_load_desc) + sizeof(struct program_image_section) * nhdrs); - desc->shell_path[0] = '\0'; fseek(fp, hdr.e_phoff, SEEK_SET); j = 0; desc->num_sections = nhdrs; @@ -312,13 +311,13 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp) } if (phdr.p_type == PT_INTERP) { if (phdr.p_filesz > sizeof(interp_path)) { - __eprint("too large PT_INTERP segment\n"); + __eprintf("too large PT_INTERP segment\n"); return NULL; } ss = pread(fileno(fp), interp_path, phdr.p_filesz, phdr.p_offset); if (ss <= 0) { - __eprint("cannot read PT_INTERP segment\n"); + __eprintf("cannot read PT_INTERP segment\n"); return NULL; } interp_path[ss] = '\0'; @@ -408,11 +407,11 @@ struct program_load_desc *load_interp(struct program_load_desc *desc0, FILE *fp) unsigned long align; if (fread(&hdr, sizeof(hdr), 1, fp) < 1) { - __eprint("Cannot read Ehdr.\n"); + __eprintf("Cannot read Ehdr.\n"); return NULL; } if (memcmp(hdr.e_ident, ELFMAG, SELFMAG)) { - __eprint("ELFMAG mismatched.\n"); + __eprintf("ELFMAG mismatched.\n"); return NULL; } fseek(fp, hdr.e_phoff, SEEK_SET); @@ -441,10 +440,12 @@ struct program_load_desc *load_interp(struct program_load_desc *desc0, FILE *fp) for (i = 0; i < hdr.e_phnum; i++) { if (fread(&phdr, sizeof(phdr), 1, fp) < 1) { __eprintf("Loading phdr failed (%d)\n", i); + free(desc); return NULL; } if (phdr.p_type == PT_INTERP) { - __eprint("PT_INTERP on interp\n"); + __eprintf("PT_INTERP on interp\n"); + free(desc); return NULL; } if (phdr.p_type == PT_LOAD) { @@ -491,7 +492,6 @@ int lookup_exec_path(char *filename, char *path, int max_len, int execvp) struct stat sb; char *link_path = NULL; -retry: found = 0; /* Is file not absolute path? */ @@ -505,11 +505,13 @@ retry: if (!execvp) { if (strlen(filename) + 1 > max_len) { + free(link_path); return ENAMETOOLONG; } strcpy(path, filename); error = access(path, X_OK); if (error) { + free(link_path); return errno; } found = 1; @@ -521,6 +523,7 @@ retry: } if (strlen(filename) >= 255) { + free(link_path); return ENAMETOOLONG; } @@ -530,6 +533,7 @@ retry: tofree = string = strdup(PATH); if (string == NULL) { printf("lookup_exec_path(): copying PATH, not enough memory?\n"); + free(link_path); return ENOMEM; } @@ -550,7 +554,8 @@ retry: } free(tofree); - if(!found){ + if (!found) { + free(link_path); return ENOENT; } break; @@ -562,6 +567,7 @@ retry: if (error < 0 || error >= max_len) { fprintf(stderr, "lookup_exec_path(): array too small?\n"); + free(link_path); return ENOMEM; } @@ -581,6 +587,7 @@ retry: if (error < 0 || error >= max_len) { fprintf(stderr, "lookup_exec_path(): array too small?\n"); + free(link_path); return ENOMEM; } @@ -594,41 +601,12 @@ retry: /* Check whether the resolved path is a symlink */ if (lstat(path, &sb) == -1) { - __eprint("lookup_exec_path(): error stat\n"); - return errno; + error = errno; + __dprintf("lookup_exec_path(): error stat for %s: %d\n", + path, error); + return error; } - if ((sb.st_mode & S_IFMT) == S_IFLNK) { - link_path = malloc(max_len); - if (!link_path) { - fprintf(stderr, "lookup_exec_path(): error allocating\n"); - return ENOMEM; - } -#ifdef POSTK_DEBUG_TEMP_FIX_6 /* dynamic allocate area initialize clear */ - memset(link_path, '\0', max_len); -#endif /* POSTK_DEBUG_TEMP_FIX_6 */ - - error = readlink(path, link_path, max_len); - if (error == -1 || error == max_len) { - fprintf(stderr, "lookup_exec_path(): error readlink\n"); - return EINVAL; - } - link_path[error] = '\0'; - - __dprintf("lookup_exec_path(): %s is link -> %s\n", path, link_path); - - if(link_path[0] != '/'){ - char *t = strrchr(path, '/'); - if(t){ - t++; - strcpy(t, link_path); - strcpy(link_path, path); - } - } - filename = link_path; - goto retry; - } - if (!found) { fprintf(stderr, "lookup_exec_path(): error finding file %s\n", filename); @@ -641,13 +619,13 @@ retry: } int load_elf_desc(char *filename, struct program_load_desc **desc_p, - char **shell_p) + char **shebang_p) { FILE *fp; FILE *interp = NULL; char *interp_path; - char *shell = NULL; - size_t shell_len = 0; + char *shebang = NULL; + size_t shebang_len = 0; struct program_load_desc *desc; int ret = 0; struct stat sb; @@ -677,20 +655,26 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, if (fread(&header, 1, 2, fp) != 2) { fprintf(stderr, "Error: Failed to read header from %s\n", filename); + fclose(fp); return errno; } if (!strncmp(header, "#!", 2)) { - - if (getline(&shell, &shell_len, fp) == -1) { - fprintf(stderr, "Error: reading shell path %s\n", filename); + if (getline(&shebang, &shebang_len, fp) == -1) { + fprintf(stderr, "Error: reading shebang path %s\n", + filename); } fclose(fp); - /* Delete new line character */ - shell[strlen(shell) - 1] = 0; - *shell_p = shell; + /* Delete new line character and any trailing spaces */ + shebang_len = strlen(shebang) - 1; + shebang[shebang_len] = '\0'; + while (strpbrk(shebang + shebang_len - 1, " \t")) { + shebang_len--; + shebang[shebang_len] = '\0'; + } + *shebang_p = shebang; return 0; } @@ -699,6 +683,7 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, filename)) != 0) { fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n", filename, ret, fd); + fclose(fp); return ret; } @@ -713,6 +698,7 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, if (!exec_path) { fprintf(stderr, "WARNING: strdup(filename) failed\n"); + fclose(fp); return ENOMEM; } } @@ -720,12 +706,14 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, char *cwd = getcwd(NULL, 0); if (!cwd) { fprintf(stderr, "Error: getting current working dir pathname\n"); + fclose(fp); return ENOMEM; } exec_path = malloc(strlen(cwd) + strlen(filename) + 2); if (!exec_path) { fprintf(stderr, "Error: allocating exec_path\n"); + fclose(fp); return ENOMEM; } @@ -735,8 +723,8 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, desc = load_elf(fp, &interp_path); if (!desc) { - fclose(fp); fprintf(stderr, "Error: Failed to parse ELF!\n"); + fclose(fp); return 1; } @@ -746,18 +734,22 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, path = search_file(interp_path, X_OK); if (!path) { fprintf(stderr, "Error: interp not found: %s\n", interp_path); + fclose(fp); return 1; } interp = fopen(path, "rb"); if (!interp) { fprintf(stderr, "Error: Failed to open %s\n", path); + fclose(fp); return 1; } desc = load_interp(desc, interp); if (!desc) { fprintf(stderr, "Error: Failed to parse interp!\n"); + fclose(fp); + fclose(interp); return 1; } } @@ -768,6 +760,77 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p, return 0; } +/* recursively resolve shebangs + * + * Note: shebang_argv_p must point to reallocable memory or be NULL + */ +int load_elf_desc_shebang(char *shebang_argv0, + struct program_load_desc **desc_p, + char ***shebang_argv_p) +{ + char path[PATH_MAX]; + char *shebang = NULL; + int ret; + + if ((ret = lookup_exec_path(shebang_argv0, path, sizeof(path), 1)) + != 0) { + __dprintf("error: finding file: %s\n", shebang_argv0); + return ret; + } + + if ((ret = load_elf_desc(path, desc_p, &shebang)) != 0) { + __eprintf("error: loading file: %s\n", shebang_argv0); + return ret; + } + + if (shebang) { + char *shebang_params; + size_t shebang_param_count = 1; + size_t shebang_argv_count = 0; + char **shebang_argv; + + if (!shebang_argv_p) + return load_elf_desc_shebang(shebang, desc_p, NULL); + + shebang_argv = *shebang_argv_p; + + /* if there is a space, add whatever follows as extra arg */ + shebang_params = strchr(shebang, ' '); + if (shebang_params) { + shebang_params[0] = '\0'; + shebang_params++; + shebang_param_count++; + } + + if (shebang_argv == NULL) { + shebang_argv_count = shebang_param_count + 1; + shebang_argv = malloc(shebang_argv_count * + sizeof(void *)); + shebang_argv[shebang_param_count] = 0; + } else { + while (shebang_argv[shebang_argv_count++]) + ; + + shebang_argv_count += shebang_param_count + 1; + shebang_argv = realloc(shebang_argv, + shebang_argv_count * sizeof(void *)); + memmove(shebang_argv + shebang_param_count, + shebang_argv, + (shebang_argv_count - shebang_param_count) + * sizeof(void *)); + } + shebang_argv[0] = shebang; + if (shebang_params) + shebang_argv[1] = shebang_params; + + *shebang_argv_p = shebang_argv; + + return load_elf_desc_shebang(shebang, desc_p, shebang_argv_p); + } + + return 0; +} + int transfer_image(int fd, struct program_load_desc *desc) { struct remote_transfer pt; @@ -897,9 +960,8 @@ void print_desc(struct program_load_desc *desc) int i; __dprintf("Desc (%p)\n", desc); - __dprintf("Status = %d, CPU = %d, pid = %d, entry = %lx, rp = %lx\n", - desc->status, desc->cpu, desc->pid, desc->entry, - desc->rprocess); + __dprintf("CPU = %d, pid = %d, entry = %lx, rp = %lx\n", + desc->cpu, desc->pid, desc->entry, desc->rprocess); for (i = 0; i < desc->num_sections; i++) { __dprintf("vaddr: %lx, mem_len: %lx, remote_pa: %lx, files: %lx\n", desc->sections[i].vaddr, desc->sections[i].len, @@ -918,55 +980,66 @@ unsigned long dma_buf_pa; void print_flat(char *flat) { - char **string; - - __dprintf("counter: %d\n", *((int *)flat)); + long i, count; + long *_flat = (long *)flat; - string = (char **)(flat + sizeof(int)); - while (*string) { - - __dprintf("%s\n", (flat + (unsigned long)(*string))); + count = _flat[0]; + __dprintf("counter: %ld\n", count); - ++string; + for (i = 0; i < count; i++) { + __dprintf("%s\n", (flat + _flat[i + 1])); } } /* * Flatten out a (char **) string array into the following format: - * [nr_strings][char *offset of string_0]...[char *offset of string_n-1][NULL][string0]...[stringn_1] + * [nr_strings][char *offset of string_0]...[char *offset of string_n-1][char *offset of end of string][string0]...[stringn_1] * if nr_strings == -1, we assume the last item is NULL * + * sizes all are longs. + * * NOTE: copy this string somewhere, add the address of the string to each offset * and we get back a valid argv or envp array. * + * pre_strings is already flattened, so we just need to manage counts and copy + * the string part appropriately. + * * returns the total length of the flat string and updates flat to * point to the beginning. */ -int flatten_strings(int nr_strings, char *first, char **strings, char **flat) +int flatten_strings(char *pre_strings, char **strings, char **flat) { - int full_len, string_i; - unsigned long flat_offset; - char *_flat; + int full_len, i; + int nr_strings; + int pre_strings_count = 0; + int pre_strings_len = 0; + long *_flat; + long *pre_strings_flat; + char *p; - /* How many strings do we have? */ - if (nr_strings == -1) { - for (nr_strings = 0; strings[nr_strings]; ++nr_strings); - } + for (nr_strings = 0; strings[nr_strings]; ++nr_strings) + ; /* Count full length */ full_len = sizeof(long) + sizeof(char *); // Counter and terminating NULL - if (first) { - full_len += sizeof(char *) + strlen(first) + 1; + if (pre_strings) { + pre_strings_flat = (long *)pre_strings; + pre_strings_count = pre_strings_flat[0]; + + pre_strings_len = pre_strings_flat[pre_strings_count + 1]; + pre_strings_len -= sizeof(long) * (pre_strings_count + 2); + + full_len += pre_strings_count * sizeof(long) + pre_strings_len; } - for (string_i = 0; string_i < nr_strings; ++string_i) { + for (i = 0; strings[i]; ++i) { // Pointer + actual value - full_len += sizeof(char *) + strlen(strings[string_i]) + 1; + full_len += sizeof(char *) + strlen(strings[i]) + 1; } full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1); - _flat = (char *)malloc(full_len); + _flat = malloc(full_len); if (!_flat) { return 0; } @@ -974,29 +1047,33 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) memset(_flat, 0, full_len); /* Number of strings */ - *((long *)_flat) = nr_strings + (first ? 1 : 0); + _flat[0] = nr_strings + pre_strings_count; // Actual offset - flat_offset = sizeof(long) + sizeof(char *) * (nr_strings + 1 + - (first ? 1 : 0)); + p = (char *)(_flat + nr_strings + pre_strings_count + 2); - if (first) { - *((char **)(_flat + sizeof(long))) = (void *)flat_offset; - memcpy(_flat + flat_offset, first, strlen(first) + 1); - flat_offset += strlen(first) + 1; + if (pre_strings) { + for (i = 0; i < pre_strings_count; i++) { + _flat[i + 1] = pre_strings_flat[i + 1] + + nr_strings * sizeof(long); + } + memcpy(p, pre_strings + pre_strings_flat[1], + pre_strings_len); + p += pre_strings_len; } - for (string_i = 0; string_i < nr_strings; ++string_i) { - - /* Fabricate the string */ - *((char **)(_flat + sizeof(long) + (string_i + (first ? 1 : 0)) - * sizeof(char *))) = (void *)flat_offset; - memcpy(_flat + flat_offset, strings[string_i], strlen(strings[string_i]) + 1); - flat_offset += strlen(strings[string_i]) + 1; - } + for (i = 0; i < nr_strings; ++i) { + int len = strlen(strings[i]) + 1; - *flat = _flat; - return full_len; + _flat[i + pre_strings_count + 1] = p - (char *)_flat; + + memcpy(p, strings[i], len); + p += len; + } + _flat[nr_strings + pre_strings_count + 1] = p - (char *)_flat; + + *flat = (char *)_flat; + return p - (char *)_flat; } //#define NUM_HANDLER_THREADS 248 @@ -1010,7 +1087,7 @@ struct thread_data_s { int terminate; int remote_tid; int remote_cpu; - int joined; + int joined, detached; pthread_mutex_t *lock; pthread_barrier_t *init_ready; } *thread_data; @@ -1022,6 +1099,7 @@ pid_t master_tid; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; pthread_barrier_t init_ready; +pthread_barrier_t uti_init_ready; pthread_attr_t watchdog_thread_attr; pthread_t watchdog_thread; @@ -1130,9 +1208,9 @@ sendsig(int sig, siginfo_t *siginfo, void *context) int cpu; struct signal_desc sigdesc; struct thread_data_s *tp; - int localthread; + int not_uti; - localthread = ioctl(fd, MCEXEC_UP_SIG_THREAD, 1); + not_uti = ioctl(fd, MCEXEC_UP_SIG_THREAD, 1); pid = getpid(); tid = gettid(); if (siginfo->si_pid == pid && @@ -1165,7 +1243,7 @@ sendsig(int sig, siginfo_t *siginfo, void *context) remote_tid = -1; } - if (localthread) { + if (not_uti) { /* target isn't uti thread, ask McKernel to call the handler */ memset(&sigdesc, '\0', sizeof sigdesc); sigdesc.cpu = cpu; sigdesc.pid = (int)pid; @@ -1177,7 +1255,7 @@ sendsig(int sig, siginfo_t *siginfo, void *context) exit(1); } } - else { + else { /* target is uti thread, mcexec calls the handler */ struct syscall_struct param; int rc; @@ -1202,7 +1280,7 @@ sendsig(int sig, siginfo_t *siginfo, void *context) } } out: - if (!localthread) + if (!not_uti) ioctl(fd, MCEXEC_UP_SIG_THREAD, 0); } @@ -1306,6 +1384,7 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[]) { int n; char newval[40]; + char path[PATH_MAX]; int error; struct rlimit new_rlim; @@ -1333,22 +1412,32 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[]) error = setrlimit(RLIMIT_STACK, &new_rlim); if (error) { - __eprint("failed to setrlimit(RLIMIT_STACK)\n"); + __eprintf("failed to setrlimit(RLIMIT_STACK)\n"); return 1; } - execv("/proc/self/exe", argv); + error = readlink("/proc/self/exe", path, sizeof(path)); + if (error < 0) { + __eprintf("Could not readlink /proc/self/exe? %m\n"); + return 1; + } else if (error >= sizeof(path)) { + strcpy(path, "/proc/self/exe"); + } else { + path[error] = '\0'; + } - __eprint("failed to execv(myself)\n"); + execv(path, argv); + + __eprintf("failed to execv(myself)\n"); return 1; } void print_usage(char **argv) { #ifdef ADD_ENVS_OPTION - fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [] (program) [args...]\n", argv[0]); + fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [--enable-uti] [--uti-thread-rank=N] [--uti-use-last-cpu] [] (program) [args...]\n", argv[0]); #else /* ADD_ENVS_OPTION */ - fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [] (program) [args...]\n", argv[0]); + fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [--enable-uti] [--uti-thread-rank=N] [--uti-use-last-cpu] [] (program) [args...]\n", argv[0]); #endif /* ADD_ENVS_OPTION */ } @@ -1372,8 +1461,7 @@ void init_sigaction(void) static int max_cpuid; -static int -create_worker_thread(pthread_barrier_t *init_ready) +static int create_worker_thread(struct thread_data_s **tp_out, pthread_barrier_t *init_ready) { struct thread_data_s *tp; @@ -1391,6 +1479,10 @@ create_worker_thread(pthread_barrier_t *init_ready) tp->next = thread_data; thread_data = tp; + if (tp_out) { + *tp_out = tp; + } + return pthread_create(&tp->thread_id, NULL, &main_loop_thread_func, tp); } @@ -1404,7 +1496,7 @@ int init_worker_threads(int fd) max_cpuid = 0; for (i = 0; i <= n_threads; ++i) { - int ret = create_worker_thread(&init_ready); + int ret = create_worker_thread(NULL, &init_ready); if (ret) { printf("ERROR: creating syscall threads (%d), check ulimit?\n", ret); @@ -1444,8 +1536,7 @@ static int find_mount_prefix(char *prefix) } } - if (line) - free(line); + free(line); return ret; } @@ -1653,6 +1744,8 @@ static void destroy_local_environ(char **local_env) unsigned long atobytes(char *string) { unsigned long mult = 1; + unsigned long ret; + char orig_postfix = 0; char *postfix; errno = ERANGE; @@ -1664,19 +1757,26 @@ unsigned long atobytes(char *string) if (*postfix == 'k' || *postfix == 'K') { mult = 1024; + orig_postfix = *postfix; *postfix = 0; } else if (*postfix == 'm' || *postfix == 'M') { mult = 1024 * 1024; + orig_postfix = *postfix; *postfix = 0; } else if (*postfix == 'g' || *postfix == 'G') { mult = 1024 * 1024 * 1024; + orig_postfix = *postfix; *postfix = 0; } + ret = atol(string) * mult; + if (orig_postfix) + *postfix = orig_postfix; + errno = 0; - return atol(string) * mult; + return ret; } static struct option mcexec_options[] = { @@ -1756,64 +1856,101 @@ static struct option mcexec_options[] = { .flag = NULL, .val = 's', }, + { + .name = "uti-thread-rank", + .has_arg = required_argument, + .flag = NULL, + .val = 'u', + }, + { + .name = "uti-use-last-cpu", + .has_arg = no_argument, + .flag = &uti_use_last_cpu, + .val = 1, + }, + { + .name = "enable-uti", + .has_arg = no_argument, + .flag = &enable_uti, + .val = 1, + }, /* end */ { NULL, 0, NULL, 0, }, }; #ifdef ENABLE_MCOVERLAYFS +/* bind-mount files under / over recursively */ void bind_mount_recursive(const char *root, char *prefix) { DIR *dir; struct dirent *entry; char path[PATH_MAX]; - int len; - len = snprintf(path, sizeof(path) - 1, "%s/%s", root, prefix); - path[len] = 0; + snprintf(path, sizeof(path), "%s/%s", root, prefix); + path[sizeof(path) - 1] = 0; if (!(dir = opendir(path))) { return; } - if (!(entry = readdir(dir))) { - return; - } + while ((entry = readdir(dir))) { + char fullpath[PATH_MAX]; + char shortpath[PATH_MAX]; + struct stat st; - do { - len = snprintf(path, sizeof(path) - 1, - "%s/%s", prefix, entry->d_name); - path[len] = 0; + /* Use lstat instead of checking dt_type of readdir + result because the latter reports DT_UNKNOWN for + files on some file systems */ + snprintf(fullpath, sizeof(fullpath), + "%s/%s/%s", root, prefix, entry->d_name); + fullpath[sizeof(fullpath) - 1] = 0; + + if (lstat(fullpath, &st)) { + fprintf(stderr, "%s: error: lstat %s: %s\n", + __func__, fullpath, strerror(errno)); + continue; + } + + /* Traverse target or mount point */ + snprintf(shortpath, sizeof(shortpath), + "%s/%s", prefix, entry->d_name); + shortpath[sizeof(shortpath) - 1] = 0; + + if (S_ISDIR(st.st_mode)) { + __dprintf("dir found: %s\n", fullpath); - if (entry->d_type == DT_DIR) { if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue; - bind_mount_recursive(root, path); + bind_mount_recursive(root, shortpath); } - else if (entry->d_type == DT_REG) { + else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { int ret; struct sys_mount_desc mount_desc; - memset(&mount_desc, '\0', sizeof mount_desc); - char bind_path[PATH_MAX]; - len = snprintf(bind_path, sizeof(bind_path) - 1, - "%s/%s/%s", root, prefix, entry->d_name); - bind_path[len] = 0; + __dprintf("reg/symlink found: %s\n", fullpath); - mount_desc.dev_name = bind_path; - mount_desc.dir_name = path; + if (lstat(shortpath, &st)) { + fprintf(stderr, "%s: warning: lstat of mount point (%s) failed: %s\n", + __func__, shortpath, strerror(errno)); + continue; + } + + memset(&mount_desc, '\0', sizeof(mount_desc)); + mount_desc.dev_name = fullpath; + mount_desc.dir_name = shortpath; mount_desc.type = NULL; mount_desc.flags = MS_BIND | MS_PRIVATE; mount_desc.data = NULL; + if ((ret = ioctl(fd, MCEXEC_UP_SYS_MOUNT, (unsigned long)&mount_desc)) != 0) { - fprintf(stderr, "WARNING: failed to bind mount %s over %s: %d\n", - bind_path, path, ret); + fprintf(stderr, "%s: warning: failed to bind mount %s over %s: %d\n", + __func__, fullpath, shortpath, ret); } } } - while ((entry = readdir(dir)) != NULL); closedir(dir); } @@ -1828,7 +1965,7 @@ join_all_threads() do { live_thread = 0; for (tp = thread_data; tp; tp = tp->next) { - if (tp->joined) + if (tp->joined || tp->detached) continue; live_thread = 1; pthread_join(tp->thread_id, NULL); @@ -1869,37 +2006,63 @@ opendev() return fd; } +#define LD_PRELOAD_PREPARE(name) do { \ + sprintf(elembuf, "%s%s/" name, nelem > 0 ? ":" : "", MCKERNEL_LIBDIR); \ + } while (0) + +#define LD_PRELOAD_APPEND do { \ + if (strlen(elembuf) + 1 > remainder) { \ + fprintf(stderr, "%s: warning: LD_PRELOAD line is too long\n", __FUNCTION__); \ + return; \ + } \ + strncat(envbuf, elembuf, remainder); \ + remainder = PATH_MAX - (strlen(envbuf) + 1); \ + nelem++; \ + } while (0) + static void ld_preload_init() { char envbuf[PATH_MAX]; -#ifdef ENABLE_QLMPI - char *old_ld_preload; -#endif + char *ld_preload_str; + size_t remainder = PATH_MAX; + int nelem = 0; + char elembuf[PATH_MAX]; + + memset(envbuf, 0, PATH_MAX); + + if (enable_uti) { + LD_PRELOAD_PREPARE("syscall_intercept.so"); + LD_PRELOAD_APPEND; + } if (disable_sched_yield) { - sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR); - __dprintf("%s: preload library: %s\n", __FUNCTION__, envbuf); - if (setenv("LD_PRELOAD", envbuf, 1) < 0) { - printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n", - __FUNCTION__); - } + LD_PRELOAD_PREPARE("libsched_yield.so.1.0.0"); + LD_PRELOAD_APPEND; } + +#ifdef ENABLE_QLMPI + LD_PRELOAD_PREPARE("libqlfort.so"); + LD_PRELOAD_APPEND; +#endif + /* Set LD_PRELOAD to McKernel specific value */ - else if (getenv(ld_preload_envname)) { - if (setenv("LD_PRELOAD", getenv(ld_preload_envname), 1) < 0) { + ld_preload_str = getenv(ld_preload_envname); + if (ld_preload_str) { + sprintf(elembuf, "%s%s", nelem > 0 ? ":" : "", ld_preload_str); + LD_PRELOAD_APPEND; + } + + if (strlen(envbuf)) { + if (setenv("LD_PRELOAD", envbuf, 1) < 0) { printf("%s: warning: failed to set LD_PRELOAD environment variable\n", __FUNCTION__); } - unsetenv(ld_preload_envname); + __dprintf("%s: preload library: %s\n", __FUNCTION__, envbuf); } -#ifdef ENABLE_QLMPI - sprintf(envbuf, "%s/libqlfort.so", MCKERNEL_LIBDIR); - if ((old_ld_preload = getenv("LD_PRELOAD"))) { - sprintf(strchr(envbuf, '\0'), " %s", old_ld_preload); + if (getenv("ld_preload_envname")) { + unsetenv(ld_preload_envname); } - setenv("LD_PRELOAD", envbuf, 1); -#endif } int main(int argc, char **argv) @@ -1908,7 +2071,6 @@ int main(int argc, char **argv) struct program_load_desc *desc; int envs_len; char *envs; - char *args; char *p; int i; int error; @@ -1916,9 +2078,8 @@ int main(int argc, char **argv) unsigned long lmax; int target_core = 0; int opt; - char path[1024]; - char *shell = NULL; - char shell_path[1024]; + char **shebang_argv = NULL; + char *shebang_argv_flat = NULL; int num = 0; int persona; #ifdef ADD_ENVS_OPTION @@ -1951,6 +2112,8 @@ int main(int argc, char **argv) /* Disable address space layout randomization */ __dprintf("persona=%08x\n", persona); if ((persona & (PER_LINUX | ADDR_NO_RANDOMIZE)) == 0) { + char path[PATH_MAX]; + CHKANDJUMP(getenv("MCEXEC_ADDR_NO_RANDOMIZE"), 1, "personality() and then execv() failed\n"); persona = personality(persona | PER_LINUX | ADDR_NO_RANDOMIZE); @@ -1959,7 +2122,15 @@ int main(int argc, char **argv) error = setenv("MCEXEC_ADDR_NO_RANDOMIZE", "1", 1); CHKANDJUMP(error == -1, 1, "setenv failed\n"); - error = execv("/proc/self/exe", argv); + error = readlink("/proc/self/exe", path, sizeof(path)); + CHKANDJUMP(error == -1, 1, "readlink failed: %m\n"); + if (error >= sizeof(path)) { + strcpy(path, "/proc/self/exe"); + } else { + path[error] = '\0'; + } + + error = execv(path, argv); CHKANDJUMPF(error == -1, 1, "execv failed, error=%d,strerror=%s\n", error, strerror(errno)); } if (getenv("MCEXEC_ADDR_NO_RANDOMIZE")) { @@ -1985,9 +2156,9 @@ int main(int argc, char **argv) /* Parse options ("+" denotes stop at the first non-option) */ #ifdef ADD_ENVS_OPTION - while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:e:s:m:", mcexec_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:e:s:m:u:", mcexec_options, NULL)) != -1) { #else /* ADD_ENVS_OPTION */ - while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:s:m:", mcexec_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:s:m:u:", mcexec_options, NULL)) != -1) { #endif /* ADD_ENVS_OPTION */ switch (opt) { char *tmp; @@ -2050,6 +2221,10 @@ int main(int argc, char **argv) __dprintf("stack_premap=%ld,stack_max=%ld\n", stack_premap, stack_max); break; } + case 'u': + uti_thread_rank = atoi(optarg); + break; + case 0: /* long opt */ break; @@ -2080,16 +2255,23 @@ int main(int argc, char **argv) if (opendev() == -1) exit(EXIT_FAILURE); +#ifndef WITH_SYSCALL_INTERCEPT + if (enable_uti) { + __eprintf("ERROR: uti is not available when not configured with --with-syscall_intercept=\n"); + exit(EXIT_FAILURE); + } +#endif + ld_preload_init(); #ifdef ADD_ENVS_OPTION #else /* ADD_ENVS_OPTION */ /* Collect environment variables */ - envs_len = flatten_strings(-1, NULL, environ, &envs); + envs_len = flatten_strings(NULL, environ, &envs); #endif /* ADD_ENVS_OPTION */ #ifdef ENABLE_MCOVERLAYFS - __dprint("mcoverlay enable\n"); + __dprintf("mcoverlay enable\n"); char mcos_procdir[PATH_MAX]; char mcos_sysdir[PATH_MAX]; @@ -2205,32 +2387,8 @@ int main(int argc, char **argv) __dprintf("mcoverlay disable\n"); #endif // ENABLE_MCOVERLAYFS - if (lookup_exec_path(argv[optind], path, sizeof(path), 1) != 0) { - fprintf(stderr, "error: finding file: %s\n", argv[optind]); + if (load_elf_desc_shebang(argv[optind], &desc, &shebang_argv)) return 1; - } - - if (load_elf_desc(path, &desc, &shell) != 0) { - fprintf(stderr, "error: loading file: %s\n", argv[optind]); - return 1; - } - - /* Check whether shell script */ - if (shell) { - if (lookup_exec_path(shell, shell_path, sizeof(shell_path), 0) != 0) { - fprintf(stderr, "error: finding file: %s\n", shell); - return 1; - } - - if (load_elf_desc(shell_path, &desc, &shell) != 0) { - fprintf(stderr, "error: loading file: %s\n", shell); - return 1; - } - } - - if (shell) { - argv[optind] = path; - } #ifdef ADD_ENVS_OPTION /* Collect environment variables */ @@ -2238,7 +2396,7 @@ int main(int argc, char **argv) add_env_list(&extra_env, environ[i]); } local_env = create_local_environ(extra_env); - envs_len = flatten_strings(-1, NULL, local_env, &envs); + envs_len = flatten_strings(NULL, local_env, &envs); destroy_local_environ(local_env); local_env = NULL; destroy_env_list(extra_env); @@ -2251,9 +2409,14 @@ int main(int argc, char **argv) desc->envs = envs; //print_flat(envs); - desc->args_len = flatten_strings(-1, shell, argv + optind, &args); - desc->args = args; - //print_flat(args); + if (shebang_argv) + flatten_strings(NULL, shebang_argv, &shebang_argv_flat); + + desc->args_len = flatten_strings(shebang_argv_flat, argv + optind, + &desc->args); + //print_flat(desc->args); + free(shebang_argv); + free(shebang_argv_flat); desc->cpu = target_core; desc->enable_vdso = enable_vdso; @@ -2394,18 +2557,18 @@ int main(int argc, char **argv) dma_buf = mmap(0, PIN_SIZE, PROT_READ | PROT_WRITE, (MAP_ANONYMOUS | MAP_PRIVATE), -1, 0); if (dma_buf == (void *)-1) { - __dprint("error: allocating DMA area\n"); + __dprintf("error: allocating DMA area\n"); exit(1); } /* PIN buffer */ if (mlock(dma_buf, (size_t)PIN_SIZE)) { - __dprint("ERROR: locking dma_buf\n"); + __dprintf("ERROR: locking dma_buf\n"); exit(1); } /* Register per-process structure in mcctrl */ - if (ioctl(fd, MCEXEC_UP_CREATE_PPD) != 0) { + if (ioctl(fd, MCEXEC_UP_CREATE_PPD, NULL)) { perror("creating mcctrl per-process structure"); close(fd); exit(1); @@ -2416,6 +2579,7 @@ int main(int argc, char **argv) struct get_cpu_set_arg cpu_set_arg; int mcexec_linux_numa = 0; int ikc_mapped = 0; + int process_rank = -1; cpu_set_t mcexec_cpu_set; CPU_ZERO(&mcexec_cpu_set); @@ -2424,6 +2588,7 @@ int main(int argc, char **argv) cpu_set_arg.cpu_set_size = sizeof(desc->cpu_set); cpu_set_arg.nr_processes = nr_processes; cpu_set_arg.target_core = &target_core; + cpu_set_arg.process_rank = &process_rank; cpu_set_arg.mcexec_linux_numa = &mcexec_linux_numa; cpu_set_arg.mcexec_cpu_set = &mcexec_cpu_set; cpu_set_arg.mcexec_cpu_set_size = sizeof(mcexec_cpu_set); @@ -2436,13 +2601,14 @@ int main(int argc, char **argv) } desc->cpu = target_core; + desc->process_rank = process_rank; /* Bind to CPU cores where the LWK process' IKC target maps to */ if (ikc_mapped && !no_bind_ikc_map) { /* This call may not succeed, but that is fine */ if (sched_setaffinity(0, sizeof(mcexec_cpu_set), &mcexec_cpu_set) < 0) { - __dprint("WARNING: couldn't bind to mcexec_cpu_set\n"); + __dprintf("WARNING: couldn't bind to mcexec_cpu_set\n"); } #ifdef DEBUG else { @@ -2523,6 +2689,10 @@ int main(int argc, char **argv) } } + desc->uti_thread_rank = uti_thread_rank; + desc->uti_use_last_cpu = uti_use_last_cpu; + + /* user_start and user_end are set by this call */ if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) { perror("prepare"); close(fd); @@ -2559,7 +2729,7 @@ int main(int argc, char **argv) return -1; } #endif - __dprint("mccmd server initialized\n"); + __dprintf("mccmd server initialized\n"); #endif init_sigaction(); @@ -2596,8 +2766,9 @@ int main(int argc, char **argv) return 1; } +#if 1 /* debug : thread killed by exit_group() are still joinable? */ join_all_threads(); - +#endif fn_fail: return ret; } @@ -2712,7 +2883,8 @@ do_generic_syscall( sprintf(proc_path, "/proc/self/fd/%d", (int)w->sr.args[0]); /* Get filename */ - if ((len = readlink(proc_path, path, sizeof(path))) < 0) { + len = readlink(proc_path, path, sizeof(path)); + if (len < 0 || len >= sizeof(path)) { fprintf(stderr, "%s: error: readlink() failed for %s\n", __FUNCTION__, proc_path); goto out; @@ -2756,8 +2928,10 @@ out: return ret; } -static void -kill_thread(unsigned long tid, int sig) +static struct uti_desc *uti_desc; + +static void kill_thread(unsigned long tid, int sig, + struct thread_data_s *my_thread) { struct thread_data_s *tp; @@ -2765,325 +2939,74 @@ kill_thread(unsigned long tid, int sig) sig = LOCALSIG; for (tp = thread_data; tp; tp = tp->next) { - if (tp->remote_tid == tid) { - pthread_kill(tp->thread_id, sig); - break; - } - } -} - -static int -samepage(void *a, void *b) -{ - unsigned long aa = (unsigned long)a; - unsigned long bb = (unsigned long)b; - -#ifdef POSTK_DEBUG_ARCH_DEP_35 - return (aa & page_mask) == (bb & page_mask); -#else /* POSTK_DEBUG_ARCH_DEP_35 */ - return (aa & PAGE_MASK) == (bb & PAGE_MASK); -#endif /* POSTK_DEBUG_ARCH_DEP_35 */ -} - -#ifdef DEBUG_UTI -long syscalls[512]; - -static void -debug_sig(int s) -{ - int i; - for (i = 0; i < 512; i++) - if (syscalls[i]) - fprintf(stderr, "syscall %d called %ld\n", i, - syscalls[i]); -} -#endif - -static int -create_tracer(void *wp, int mck_tid, unsigned long key) -{ - int pid = getpid(); - int tid = gettid(); - int pfd[2]; - int tpid; - int rc; - int st; - int sig = 0; - int i; - struct syscall_struct *param_top = NULL; - struct syscall_struct *param; - unsigned long code = 0; - int exited = 0; - int mode = 0; - - if (pipe(pfd) == -1) - return -1; - tpid = fork(); - if (tpid) { - struct timeval tv; - fd_set rfd; - - if (tpid == -1) - return -1; - close(pfd[1]); - while ((rc = waitpid(tpid, &st, 0)) == -1 && errno == EINTR); - if (rc == -1 || !WIFEXITED(st) || WEXITSTATUS(st)) { - fprintf(stderr, "waitpid rc=%d st=%08x\n", rc, st); - return -ENOMEM; - } - FD_ZERO(&rfd); - FD_SET(pfd[0], &rfd); - tv.tv_sec = 1; - tv.tv_usec = 0; - while ((rc = select(pfd[0] + 1, &rfd, NULL, NULL, &tv)) == -1 && - errno == EINTR); - if (rc == 0) { - close(pfd[0]); - return -ETIMEDOUT; - } - if (rc == -1) { - close(pfd[0]); - return -errno; - } - rc = read(pfd[0], &st, 1); - close(pfd[0]); - if (rc != 1) { - return -EAGAIN; - } - return 0; - } - close(pfd[0]); - tpid = fork(); - if (tpid) { - if (tpid == -1) { - fprintf(stderr, "fork errno=%d\n", errno); - exit(1); - } - exit(0); - } - if (ptrace(PTRACE_ATTACH, tid, 0, 0) == -1) { - fprintf(stderr, "PTRACE_ATTACH errno=%d\n", errno); - exit(1); - } - waitpid(-1, &st, __WALL); - if (ptrace(PTRACE_SETOPTIONS, tid, 0, PTRACE_O_TRACESYSGOOD) == -1) { - fprintf(stderr, "PTRACE_SETOPTIONS errno=%d\n", errno); - exit(1); - } - write(pfd[1], " ", 1); - close(pfd[1]); - - for (i = 0; i < 4096; i++) - if (i != fd -#ifdef DEBUG_UTI - && i != 2 -#endif - ) - close(i); - open("/dev/null", O_RDONLY); - open("/dev/null", O_WRONLY); -#ifndef DEBUG_UTI - open("/dev/null", O_WRONLY); -#endif - - for (i = 1; i <= 10; i++) { - param = (struct syscall_struct *)wp + i; - *(void **)param = param_top; - param_top = param; - } - memset(wp, '\0', sizeof(long)); - -#ifdef DEBUG_UTI - fprintf(stderr, "tracer PID=%d\n", getpid()); - signal(SIGINT, debug_sig); -#endif - for (;;) { - ptrace(PTRACE_SYSCALL, tid, 0, sig); - sig = 0; - waitpid(-1, &st, __WALL); - if (WIFEXITED(st) || WIFSIGNALED(st)) { - unsigned long term_param[4]; - - term_param[0] = pid; - term_param[1] = tid; - term_param[3] = key; - code = st; - if (exited == 2 || // exit_group - WIFSIGNALED(st)) { - code |= 0x0000000100000000; - } - term_param[2] = code; - ioctl(fd, MCEXEC_UP_TERMINATE_THREAD, term_param); - break; - } - if (!WIFSTOPPED(st)) { + if (tp == my_thread) continue; - } - if (WSTOPSIG(st) & 0x80) { // syscall - syscall_args args; - - get_syscall_args(tid, &args); - -#ifdef DEBUG_UTI - if (get_syscall_return(&args) == -ENOSYS) { - if (get_syscall_number(&args) >= 0 && - get_syscall_number(&args) < 512) { - syscalls[get_syscall_number(&args)]++; - } + if (tp->remote_tid == tid) { + if (pthread_kill(tp->thread_id, sig) == ESRCH) { + printf("%s: ERROR: Thread not found (tid=%ld,sig=%d)\n", __FUNCTION__, tid, sig); } -#endif - - if (get_syscall_number(&args) == __NR_ioctl && - get_syscall_return(&args) == -ENOSYS && - get_syscall_arg1(&args) == fd && - get_syscall_arg2(&args) == MCEXEC_UP_SIG_THREAD) { - mode = get_syscall_arg3(&args); - } - - if (mode) { - continue; - } - - switch (get_syscall_number(&args)) { - case __NR_gettid: - set_syscall_number(&args, -1); - set_syscall_return(&args, mck_tid); - set_syscall_args(tid, &args); - continue; - case __NR_futex: - case __NR_brk: - case __NR_mmap: - case __NR_munmap: - case __NR_mprotect: - case __NR_mremap: - break; - case __NR_exit_group: - exited++; - case __NR_exit: - exited++; - continue; - case __NR_clone: -#ifdef POSTK_DEBUG_ARCH_DEP_78 /* arch dep syscallno hide */ -#ifdef __NR_fork - case __NR_fork: -#endif -#ifdef __NR_vfork - case __NR_vfork: -#endif -#else /* POSTK_DEBUG_ARCH_DEP_78 */ - case __NR_fork: - case __NR_vfork: -#endif /* POSTK_DEBUG_ARCH_DEP_78 */ - case __NR_execve: - set_syscall_number(&args, -1); - set_syscall_args(tid, &args); - continue; - case __NR_ioctl: - param = (struct syscall_struct *) - get_syscall_arg3(&args); - if (get_syscall_return(&args) != -ENOSYS && - get_syscall_arg1(&args) == fd && - get_syscall_arg2(&args) == - MCEXEC_UP_SYSCALL_THREAD && - samepage(wp, param)) { - set_syscall_arg1(&args, param->args[0]); - set_syscall_arg2(&args, param->args[1]); - set_syscall_arg3(&args, param->args[2]); - set_syscall_arg4(&args, param->args[3]); - set_syscall_arg5(&args, param->args[4]); - set_syscall_arg6(&args, param->args[5]); - set_syscall_return(&args, param->ret); - *(void **)param = param_top; - param_top = param; - set_syscall_args(tid, &args); - } - continue; - default: - continue; - } - param = param_top; - if (!param) { - set_syscall_number(&args, -1); - set_syscall_return(&args, -ENOMEM); - } - else { - param_top = *(void **)param; - param->number = get_syscall_number(&args); - param->args[0] = get_syscall_arg1(&args); - param->args[1] = get_syscall_arg2(&args); - param->args[2] = get_syscall_arg3(&args); - param->args[3] = get_syscall_arg4(&args); - param->args[4] = get_syscall_arg5(&args); - param->args[5] = get_syscall_arg6(&args); - param->ret = -EINVAL; - set_syscall_number(&args, __NR_ioctl); - set_syscall_arg1(&args, fd); - set_syscall_arg2(&args, - MCEXEC_UP_SYSCALL_THREAD); - set_syscall_arg3(&args, (unsigned long)param); - } - set_syscall_args(tid, &args); - } - else { // signal - sig = WSTOPSIG(st) & 0x7f; } } - -#ifdef DEBUG_UTI - fprintf(stderr, "offloaded thread called these syscalls\n"); - debug_sig(0); -#endif - - exit(0); } -static long -util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr) +static long util_thread(struct thread_data_s *my_thread, unsigned long rp_rctx, int remote_tid, unsigned long pattr, unsigned long uti_clv, unsigned long _uti_desc) { - void *lctx; - void *rctx; - void *wp; - void *param[6]; + struct uti_get_ctx_desc get_ctx_desc; + struct uti_save_fs_desc save_fs_desc; int rc = 0; -#ifdef POSTK_DEBUG_ARCH_DEP_35 - wp = mmap(NULL, page_size * 3, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); -#else /* POSTK_DEBUG_ARCH_DEP_35 */ - wp = mmap(NULL, PAGE_SIZE * 3, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); -#endif /* POSTK_DEBUG_ARCH_DEP_35 */ - if (wp == (void *)-1) { - rc = -errno; + struct thread_data_s *tp; + + uti_desc = (struct uti_desc *)_uti_desc; + if (!uti_desc) { + printf("%s: ERROR: uti_desc not found. Add --enable-uti option to mcexec.\n", + __func__); + rc = -EINVAL; goto out; } -#ifdef POSTK_DEBUG_ARCH_DEP_35 - lctx = (char *)wp + page_size; - rctx = (char *)lctx + page_size; -#else /* POSTK_DEBUG_ARCH_DEP_35 */ - lctx = (char *)wp + PAGE_SIZE; - rctx = (char *)lctx + PAGE_SIZE; -#endif /* POSTK_DEBUG_ARCH_DEP_35 */ + __dprintf("%s: uti_desc=%p\n", __FUNCTION__, uti_desc); - param[0] = (void *)uctx_pa; - param[1] = rctx; - param[2] = lctx; - param[4] = wp; -#ifdef POSTK_DEBUG_ARCH_DEP_35 - param[5] = (void *)(page_size * 3); -#else /* POSTK_DEBUG_ARCH_DEP_35 */ - param[5] = (void *)(PAGE_SIZE * 3); -#endif /* POSTK_DEBUG_ARCH_DEP_35 */ - if ((rc = ioctl(fd, MCEXEC_UP_UTIL_THREAD1, param)) == -1) { - fprintf(stderr, "util_thread1: %d errno=%d\n", rc, errno); + pthread_barrier_init(&uti_init_ready, NULL, 2); + if ((rc = create_worker_thread(&tp, &uti_init_ready))) { + printf("%s: Error: create_worker_thread failed (%d)\n", __FUNCTION__, rc); + rc = -EINVAL; + goto out; + } + pthread_barrier_wait(&uti_init_ready); + __dprintf("%s: worker tid: %d\n", __FUNCTION__, tp->tid); + + + /* Initialize uti related variables for syscall_intercept */ + uti_desc->fd = fd; + + rc = syscall(888); + if (rc != -1) { + fprintf(stderr, "%s: WARNING: syscall_intercept returned %x\n", __FUNCTION__, rc); + } + + /* Get the remote context, record refill tid */ + get_ctx_desc.rp_rctx = rp_rctx; + get_ctx_desc.rctx = uti_desc->rctx; + get_ctx_desc.lctx = uti_desc->lctx; + get_ctx_desc.uti_refill_tid = tp->tid; + + if ((rc = ioctl(fd, MCEXEC_UP_UTI_GET_CTX, &get_ctx_desc))) { + fprintf(stderr, "%s: Error: MCEXEC_UP_UTI_GET_CTX failed (%d)\n", __FUNCTION__, errno); rc = -errno; goto out; } - create_worker_thread(NULL); - if ((rc = create_tracer(wp, remote_tid, (unsigned long)param[3]))) { - fprintf(stderr, "create tracer %d\n", rc); - rc = -errno; + /* Initialize uti thread info */ + uti_desc->mck_tid = remote_tid; + uti_desc->key = get_ctx_desc.key; + uti_desc->pid = getpid(); + uti_desc->tid = gettid(); + uti_desc->uti_clv = uti_clv; + + /* Initialize list of syscall arguments for syscall_intercept */ + if (sizeof(struct syscall_struct) * 11 > PAGE_SIZE) { + fprintf(stderr, "%s: ERROR: param is too large\n", __FUNCTION__); + rc = -ENOMEM; goto out; } @@ -3091,23 +3014,33 @@ util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr) struct uti_attr_desc desc; desc.phys_attr = pattr; - ioctl(fd, MCEXEC_UP_UTI_ATTR, &desc); + desc.uti_cpu_set_str = getenv("UTI_CPU_SET"); + desc.uti_cpu_set_len = strlen(desc.uti_cpu_set_str) + 1; + + if ((rc = ioctl(fd, MCEXEC_UP_UTI_ATTR, &desc))) { + fprintf(stderr, "%s: error: MCEXEC_UP_UTI_ATTR: %s\n", + __func__, strerror(errno)); + rc = -errno; + goto out; + } } - if ((rc = switch_ctx(fd, MCEXEC_UP_UTIL_THREAD2, param, lctx, rctx)) + /* Start intercepting syscalls. Note that it dereferences pointers in uti_desc. */ + uti_desc->start_syscall_intercept = 1; + + /* Save remote and local FS and then contex-switch */ + save_fs_desc.rctx = uti_desc->rctx; + save_fs_desc.lctx = uti_desc->lctx; + + if ((rc = switch_ctx(fd, MCEXEC_UP_UTI_SAVE_FS, &save_fs_desc, uti_desc->lctx, uti_desc->rctx)) < 0) { - fprintf(stderr, "util_thread2: %d\n", rc); + fprintf(stderr, "%s: ERROR switch_ctx failed (%d)\n", __FUNCTION__, rc); + goto out; } - fprintf(stderr, "return from util_thread2 rc=%d\n", rc); - pthread_exit(NULL); + fprintf(stderr, "%s: ERROR: Returned from switch_ctx (%d)\n", __FUNCTION__, rc); + rc = -EINVAL; out: - if (wp) -#ifdef POSTK_DEBUG_ARCH_DEP_35 - munmap(wp, page_size * 3); -#else /* POSTK_DEBUG_ARCH_DEP_35 */ - munmap(wp, PAGE_SIZE * 3); -#endif /* POSTK_DEBUG_ARCH_DEP_35 */ return rc; } @@ -3316,9 +3249,9 @@ int main_loop(struct thread_data_s *my_thread) } /* Don't print when got a msg to stdout */ - if (!(w.sr.number == __NR_write && w.sr.args[0] == 1)) + if (!(w.sr.number == __NR_write && w.sr.args[0] == 1)) { __dprintf("[%d] got syscall: %ld\n", cpu, w.sr.number); - + } //pthread_mutex_lock(lock); my_thread->remote_tid = w.sr.rtid; @@ -3380,7 +3313,7 @@ int main_loop(struct thread_data_s *my_thread) } else { } - __dprintf("openat: %s\n", pathbuf); + __dprintf("openat: %s,tid=%d\n", pathbuf, my_thread->remote_tid); fn = chgpath(pathbuf, tmpbuf); @@ -3400,7 +3333,7 @@ int main_loop(struct thread_data_s *my_thread) break; case __NR_kill: // interrupt syscall - kill_thread(w.sr.args[1], w.sr.args[2]); + kill_thread(w.sr.args[1], w.sr.args[2], my_thread); do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); break; case __NR_exit: @@ -3408,7 +3341,11 @@ int main_loop(struct thread_data_s *my_thread) sig = 0; term = 0; - do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); + /* Enforce the order in which mcexec is destroyed and then + McKernel process is destroyed to prevent + migrated-to-Linux thread from accessing stale memory values. + It is done by not calling do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); + here and making McKernel side wait until release_handler() is called. */ /* Drop executable file */ if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) { @@ -3422,12 +3359,15 @@ int main_loop(struct thread_data_s *my_thread) term = (w.sr.args[0] & 0xff00) >> 8; if(isatty(2)){ if(sig){ - if(!ischild) + if(!ischild) { fprintf(stderr, "Terminate by signal %d\n", sig); + } } - else if(term) + else if(term) { __dprintf("Exit status: %d\n", term); + } } + } #ifdef USE_SYSCALL_MOD_CALL @@ -3439,14 +3379,15 @@ int main_loop(struct thread_data_s *my_thread) dcfampi_cmd_server_exit(); #endif mc_cmd_server_exit(); - __dprint("mccmd server exited\n"); + __dprintf("mccmd server exited\n"); #endif if(sig){ signal(sig, SIG_DFL); kill(getpid(), sig); pause(); } - exit(term); + + exit(term); /* Call release_handler() and proceed terminate() */ //pthread_mutex_unlock(lock); return w.sr.args[0]; @@ -3488,7 +3429,7 @@ int main_loop(struct thread_data_s *my_thread) tids[i++] = tp->tid; } - for (; i < ncpu; ++i) { + for (; i < w.sr.args[4]; ++i) { tids[i] = 0; } @@ -3572,6 +3513,7 @@ gettid_out: case 0: { int ret = 1; struct newprocess_desc npdesc; + struct rpgtable_desc rpt; ischild = 1; /* Reopen device fd */ @@ -3584,7 +3526,10 @@ gettid_out: goto fork_child_sync_pipe; } - if (ioctl(fd, MCEXEC_UP_CREATE_PPD) != 0) { + rpt.start = w.sr.args[1]; + rpt.len = w.sr.args[2]; + rpt.rpgtable = w.sr.args[3]; + if (ioctl(fd, MCEXEC_UP_CREATE_PPD, &rpt)) { fs->status = -errno; fprintf(stderr, "ERROR: creating PPD %s\n", dev); @@ -3653,8 +3598,9 @@ fork_child_sync_pipe: } munmap(fs, sizeof(struct fork_sync)); +#if 1 /* debug : thread killed by exit_group() are still joinable? */ join_all_threads(); - +#endif return ret; } @@ -3734,93 +3680,80 @@ fork_err: switch (w.sr.args[0]) { struct program_load_desc *desc; struct remote_transfer trans; - char path[1024]; char *filename; + char **shebang_argv; + char *shebang_argv_flat; + char *buffer; + size_t size; int ret; - char *shell; - char shell_path[1024]; /* Load descriptor phase */ case 1: - - shell = NULL; + shebang_argv = NULL; + buffer = NULL; filename = (char *)w.sr.args[1]; - if ((ret = lookup_exec_path(filename, path, sizeof(path), 0)) - != 0) { + if ((ret = load_elf_desc_shebang(filename, &desc, + &shebang_argv)) != 0) { goto return_execve1; } - if ((ret = load_elf_desc(path, &desc, &shell)) != 0) { - fprintf(stderr, - "execve(): error loading ELF for file %s\n", path); - goto return_execve1; - } - - /* Check whether shell script */ - if (shell) { - if ((ret = lookup_exec_path(shell, shell_path, - sizeof(shell_path), 0)) != 0) { - fprintf(stderr, "execve(): error: finding file: %s\n", shell); - goto return_execve1; - } - - if ((ret = load_elf_desc(shell_path, &desc, &shell)) - != 0) { - fprintf(stderr, "execve(): error: loading file: %s\n", shell); - goto return_execve1; - } - -#ifdef POSTK_DEBUG_TEMP_FIX_9 /* shell-script run via execve arg[0] fix */ - if (strlen(shell) >= SHELL_PATH_MAX_LEN) { -#else /* POSTK_DEBUG_TEMP_FIX_9 */ - if (strlen(shell_path) >= SHELL_PATH_MAX_LEN) { -#endif /* POSTK_DEBUG_TEMP_FIX_9 */ - fprintf(stderr, "execve(): error: shell path too long: %s\n", shell_path); - ret = ENAMETOOLONG; - goto return_execve1; - } - - /* Let the LWK know the shell interpreter */ -#ifdef POSTK_DEBUG_TEMP_FIX_9 /* shell-script run via execve arg[0] fix */ - strcpy(desc->shell_path, shell); -#else /* POSTK_DEBUG_TEMP_FIX_9 */ - strcpy(desc->shell_path, shell_path); -#endif /* POSTK_DEBUG_TEMP_FIX_9 */ - } - desc->enable_vdso = enable_vdso; __dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n", - path, desc->num_sections); + filename, desc->num_sections); desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur; desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max; desc->stack_premap = stack_premap; + buffer = (char *)desc; + size = sizeof(struct program_load_desc) + + sizeof(struct program_image_section) * + desc->num_sections; + if (shebang_argv) { + desc->args_len = flatten_strings(NULL, shebang_argv, + &shebang_argv_flat); + buffer = malloc(size + desc->args_len); + if (!buffer) { + fprintf(stderr, + "execve(): could not alloc transfer buffer for file %s\n", + filename); + free(shebang_argv_flat); + ret = ENOMEM; + goto return_execve1; + } + memcpy(buffer, desc, size); + memcpy(buffer + size, shebang_argv_flat, + desc->args_len); + free(shebang_argv_flat); + size += desc->args_len; + } + /* Copy descriptor to co-kernel side */ - trans.userp = (void*)desc; + trans.userp = buffer; trans.rphys = w.sr.args[2]; - trans.size = sizeof(struct program_load_desc) + - sizeof(struct program_image_section) * - desc->num_sections; + trans.size = size; trans.direction = MCEXEC_UP_TRANSFER_TO_REMOTE; if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) { fprintf(stderr, "execve(): error transfering ELF for file %s\n", - (char *)w.sr.args[1]); + filename); + ret = -errno; goto return_execve1; } __dprintf("execve(): load_elf_desc() for %s OK\n", - path); + filename); - /* We can't be sure next phase will succeed */ - /* TODO: what shall we do with fp in desc?? */ - free(desc); - ret = 0; return_execve1: + /* We can't be sure next phase will succeed */ + /* TODO: what shall we do with fp in desc?? */ + if (buffer != (char *)desc) + free(buffer); + free(desc); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; @@ -4178,14 +4111,12 @@ return_execve2: case __NR_sched_setaffinity: if (w.sr.args[0] == 0) { - ret = util_thread(w.sr.args[1], w.sr.rtid, - w.sr.args[2]); + ret = util_thread(my_thread, w.sr.args[1], w.sr.rtid, + w.sr.args[2], w.sr.args[3], w.sr.args[4]); } else { - ret = munmap((void *)w.sr.args[1], - w.sr.args[2]); - if (ret == -1) - ret = -errno; + __eprintf("__NR_sched_setaffinity: invalid argument (%lx)\n", w.sr.args[0]); + ret = -EINVAL; } do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; @@ -4391,6 +4322,6 @@ return_linux_spawn: //pthread_mutex_unlock(lock); } - __dprint("timed out.\n"); + __dprintf("timed out.\n"); return 1; } diff --git a/executer/user/syscall_intercept.c b/executer/user/syscall_intercept.c new file mode 100644 index 00000000..132cb9ba --- /dev/null +++ b/executer/user/syscall_intercept.c @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include +#include +#include +#include "../include/uprotocol.h" +#include "../include/uti.h" +#include "./archdep_uti.h" + +static struct uti_desc uti_desc; + +#define DEBUG_UTI + +static int +hook(long syscall_number, + long arg0, long arg1, + long arg2, long arg3, + long arg4, long arg5, + long *result) +{ + //return 1; /* debug */ + int tid = uti_syscall0(__NR_gettid); + struct terminate_thread_desc term_desc; + unsigned long code; + int stack_top; + + if (!uti_desc.start_syscall_intercept) { + return 1; /* System call isn't taken over */ + } + if (tid != uti_desc.mck_tid) { + if (uti_desc.syscalls2 && syscall_number >= 0 && syscall_number < 512) { + uti_desc.syscalls2[syscall_number]++; + } + return 1; + } +#ifdef DEBUG_UTI + if (uti_desc.syscalls && syscall_number >= 0 && syscall_number < 512) { + uti_desc.syscalls[syscall_number]++; + } +#endif + + switch (syscall_number) { + case __NR_gettid: + *result = uti_desc.mck_tid; + return 0; + case __NR_futex: + case __NR_brk: + case __NR_mmap: + case __NR_munmap: + case __NR_mprotect: + case __NR_mremap: + /* Overflow check */ + if (uti_desc.syscall_stack_top == -1) { + *result = -ENOMEM; + return 0; + } + + /* Sanity check */ + if (uti_desc.syscall_stack_top < 0 || uti_desc.syscall_stack_top >= UTI_SZ_SYSCALL_STACK) { + *result = -EINVAL; + return 0; + } + + /* Store the return value in the stack to prevent it from getting corrupted + when an interrupt happens just after ioctl() and before copying the return + value to *result */ + stack_top = __sync_fetch_and_sub(&uti_desc.syscall_stack_top, 1); + + uti_desc.syscall_stack[stack_top].number = syscall_number; + uti_desc.syscall_stack[stack_top].args[0] = arg0; + uti_desc.syscall_stack[stack_top].args[1] = arg1; + uti_desc.syscall_stack[stack_top].args[2] = arg2; + uti_desc.syscall_stack[stack_top].args[3] = arg3; + uti_desc.syscall_stack[stack_top].args[4] = arg4; + uti_desc.syscall_stack[stack_top].args[5] = arg5; + uti_desc.syscall_stack[stack_top].uti_clv = uti_desc.uti_clv; + uti_desc.syscall_stack[stack_top].ret = -EINVAL; + + uti_syscall3(__NR_ioctl, uti_desc.fd, MCEXEC_UP_SYSCALL_THREAD, (long)(uti_desc.syscall_stack + stack_top)); + *result = uti_desc.syscall_stack[stack_top].ret; + + /* push syscall_struct list */ + __sync_fetch_and_add(&uti_desc.syscall_stack_top, 1); + + return 0; /* System call is taken over */ + case __NR_exit_group: + code = 0x100000000; + goto make_remote_thread_exit; + case __NR_exit: + code = 0; + make_remote_thread_exit: + /* Make migrated-to-Linux thread on the McKernel side call do_exit() or terminate() */ + term_desc.pid = uti_desc.pid; + term_desc.tid = uti_desc.tid; /* tid of mcexec */ + term_desc.code = code | ((arg0 & 255) << 8); + term_desc.tsk = uti_desc.key; + + uti_syscall3(__NR_ioctl, uti_desc.fd, MCEXEC_UP_TERMINATE_THREAD, (long)&term_desc); + return 1; + case __NR_clone: + case __NR_fork: + case __NR_vfork: + case __NR_execve: + *result = -ENOSYS; + return 0; +#if 0 /* debug */ + case __NR_set_robust_list: + *result = -ENOSYS; + return 0; +#endif + case 888: + *result = (long)&uti_desc; + return 0; + default: + return 1; + } + + return 0; +} + +static __attribute__((constructor)) void +init(void) +{ + /* Set up the callback function */ + intercept_hook_point = hook; + + /* Initialize uti_desc */ + uti_desc.syscall_stack_top = UTI_SZ_SYSCALL_STACK - 1; + + /* Pass address of uti_desc to McKernel */ + uti_syscall1(733, (unsigned long)&uti_desc); +} + +static __attribute__((destructor)) void +dtor(void) +{ +} diff --git a/ihk b/ihk new file mode 160000 index 00000000..d9c74adf --- /dev/null +++ b/ihk @@ -0,0 +1 @@ +Subproject commit d9c74adf3f3037b5e1c0d9f40dd2e18e4fa70165 diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index 77bf0238..ca463ff9 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -6,7 +6,7 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o -OBJS += rbtree.o +OBJS += rbtree.o hugefileobj.o OBJS += pager.o # POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. DEPSRCS=$(wildcard $(SRC)/*.c) @@ -19,7 +19,7 @@ endif CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions ifneq ($(ARCH), arm64) -CFLAGS += -mcmodel=large -mno-red-zone +CFLAGS += -mcmodel=large -mno-red-zone -mno-sse endif LDFLAGS += -e arch_start IHKOBJ = ihk/ihk.o diff --git a/kernel/ap.c b/kernel/ap.c index 1105672a..131efe4a 100644 --- a/kernel/ap.c +++ b/kernel/ap.c @@ -29,15 +29,13 @@ #include #include #include +#include //#define DEBUG_PRINT_AP #ifdef DEBUG_PRINT_AP -#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#else -#define dkprintf(...) do { } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif int num_processors = 1; @@ -209,8 +207,10 @@ store_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf, static struct fake_cpu_info_ops show_fci_online = { .member = ONLINE, - .ops.show = &show_fake_cpu_info, - .ops.store = &store_fake_cpu_info, + .ops = { + .show = &show_fake_cpu_info, + .store = &store_fake_cpu_info, + }, }; void diff --git a/kernel/config/attached-mic.lds b/kernel/config/attached-mic.lds index 4da8397a..c826cc17 100644 --- a/kernel/config/attached-mic.lds +++ b/kernel/config/attached-mic.lds @@ -1,24 +1,28 @@ PHDRS { text PT_LOAD FLAGS(5); - data PT_LOAD FLAGS(7); + data PT_LOAD FLAGS(7); } SECTIONS { . = 0xffffffff80001000; - _head = .; + _head = .; - .text : { - *(.text); - } : text + .text : { + *(.text); + } : text - . = ALIGN(4096); + . = ALIGN(4096); .data : { - *(.data) - *(.data.*) + *(.data) + *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { - *(.rodata .rodata.*) + *(.rodata .rodata.*) } :data .vsyscall : ALIGN(0x1000) { @@ -37,14 +41,14 @@ SECTIONS . = ALIGN(4096); } : data = 0xf4 - .bss : { - *(.bss .bss.*) - } - . = ALIGN(4096); - _end = .; + .bss : { + *(.bss .bss.*) + } + . = ALIGN(4096); + _end = .; /DISCARD/ : { - *(.eh_frame) - *(.note.gnu.build-id) + *(.eh_frame) + *(.note.gnu.build-id) } } diff --git a/kernel/config/builtin-mic.lds b/kernel/config/builtin-mic.lds index 4da8397a..c826cc17 100644 --- a/kernel/config/builtin-mic.lds +++ b/kernel/config/builtin-mic.lds @@ -1,24 +1,28 @@ PHDRS { text PT_LOAD FLAGS(5); - data PT_LOAD FLAGS(7); + data PT_LOAD FLAGS(7); } SECTIONS { . = 0xffffffff80001000; - _head = .; + _head = .; - .text : { - *(.text); - } : text + .text : { + *(.text); + } : text - . = ALIGN(4096); + . = ALIGN(4096); .data : { - *(.data) - *(.data.*) + *(.data) + *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { - *(.rodata .rodata.*) + *(.rodata .rodata.*) } :data .vsyscall : ALIGN(0x1000) { @@ -37,14 +41,14 @@ SECTIONS . = ALIGN(4096); } : data = 0xf4 - .bss : { - *(.bss .bss.*) - } - . = ALIGN(4096); - _end = .; + .bss : { + *(.bss .bss.*) + } + . = ALIGN(4096); + _end = .; /DISCARD/ : { - *(.eh_frame) - *(.note.gnu.build-id) + *(.eh_frame) + *(.note.gnu.build-id) } } diff --git a/kernel/config/builtin-x86.lds b/kernel/config/builtin-x86.lds index bae71899..8f33262d 100644 --- a/kernel/config/builtin-x86.lds +++ b/kernel/config/builtin-x86.lds @@ -1,24 +1,28 @@ PHDRS { text PT_LOAD FLAGS(5); - data PT_LOAD FLAGS(7); + data PT_LOAD FLAGS(7); } SECTIONS { . = 0xffffffff80001000; - _head = .; + _head = .; - .text : { - *(.text); - } : text + .text : { + *(.text); + } : text - . = ALIGN(4096); + . = ALIGN(4096); .data : { - *(.data) - *(.data.*) + *(.data) + *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { - *(.rodata .rodata.*) + *(.rodata .rodata.*) } :data .vsyscall : ALIGN(0x1000) { @@ -37,10 +41,10 @@ SECTIONS . = ALIGN(4096); } : data = 0xf4 - .bss : { - *(.bss .bss.*) - } - . = ALIGN(4096); - _end = .; + .bss : { + *(.bss .bss.*) + } + . = ALIGN(4096); + _end = .; } diff --git a/kernel/config/smp-arm64_type1.lds b/kernel/config/smp-arm64_type1.lds index 1a5969e6..86c7f658 100644 --- a/kernel/config/smp-arm64_type1.lds +++ b/kernel/config/smp-arm64_type1.lds @@ -16,6 +16,10 @@ SECTIONS .data : { *(.data) *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { *(.rodata .rodata.*) diff --git a/kernel/config/smp-arm64_type2.lds b/kernel/config/smp-arm64_type2.lds index 32be18ba..481e139e 100644 --- a/kernel/config/smp-arm64_type2.lds +++ b/kernel/config/smp-arm64_type2.lds @@ -16,6 +16,10 @@ SECTIONS .data : { *(.data) *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { *(.rodata .rodata.*) diff --git a/kernel/config/smp-arm64_type3.lds b/kernel/config/smp-arm64_type3.lds index 8a39b1ee..6186d926 100644 --- a/kernel/config/smp-arm64_type3.lds +++ b/kernel/config/smp-arm64_type3.lds @@ -16,6 +16,10 @@ SECTIONS .data : { *(.data) *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { *(.rodata .rodata.*) diff --git a/kernel/config/smp-arm64_type4.lds b/kernel/config/smp-arm64_type4.lds index 11843c44..0ef7d5ac 100644 --- a/kernel/config/smp-arm64_type4.lds +++ b/kernel/config/smp-arm64_type4.lds @@ -16,6 +16,10 @@ SECTIONS .data : { *(.data) *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { *(.rodata .rodata.*) diff --git a/kernel/config/smp-x86.lds b/kernel/config/smp-x86.lds index 9f4b0832..e03c5da4 100644 --- a/kernel/config/smp-x86.lds +++ b/kernel/config/smp-x86.lds @@ -1,24 +1,28 @@ PHDRS { text PT_LOAD FLAGS(5); - data PT_LOAD FLAGS(7); + data PT_LOAD FLAGS(7); } SECTIONS { - . = 0xffffffff80001000; - _head = .; + . = 0xFFFFFFFFFE801000; + _head = .; - .text : { - *(.text); - } : text + .text : { + *(.text); + } : text - . = ALIGN(4096); + . = ALIGN(4096); .data : { - *(.data) - *(.data.*) + *(.data) + *(.data.*) + . = ALIGN(8); + __start___verbose = .; + *(__verbose); + __stop___verbose = .; } :data .rodata : { - *(.rodata .rodata.*) + *(.rodata .rodata.*) } :data .vsyscall : ALIGN(0x1000) { @@ -37,9 +41,9 @@ SECTIONS . = ALIGN(4096); } : data = 0xf4 - .bss : { - *(.bss .bss.*) - } - . = ALIGN(4096); - _end = .; + .bss : { + *(.bss .bss.*) + } + . = ALIGN(4096); + _end = .; } diff --git a/kernel/debug.c b/kernel/debug.c index a88bc2c3..0328eb93 100644 --- a/kernel/debug.c +++ b/kernel/debug.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include struct ihk_kmsg_buf *kmsg_buf; @@ -84,7 +87,8 @@ void kputs(char *buf) debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner); kprintf_unlock(flags_outer); - if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { + if (irqflags_can_interrupt(flags_outer) && + DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { eventfd(IHK_OS_EVENTFD_TYPE_KMSG); ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY); } @@ -123,8 +127,8 @@ int __kprintf(const char *format, ...) } debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner); - - if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { + if (irqflags_can_interrupt(flags_inner) && + DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { eventfd(IHK_OS_EVENTFD_TYPE_KMSG); ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY); } @@ -165,7 +169,8 @@ int kprintf(const char *format, ...) debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner); kprintf_unlock(flags_outer); - if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { + if (irqflags_can_interrupt(flags_outer) && + DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) { eventfd(IHK_OS_EVENTFD_TYPE_KMSG); ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY); } @@ -178,3 +183,147 @@ void kmsg_init() { ihk_mc_spinlock_init(&kmsg_lock); } + +extern struct ddebug __start___verbose[]; +extern struct ddebug __stop___verbose[]; + +static ssize_t dynamic_debug_sysfs_show(struct sysfs_ops *ops, + void *instance, void *buf, size_t size) +{ + struct ddebug *dbg; + ssize_t n = 0; + + n = snprintf(buf, size, "# filename:lineno function flags format\n"); + + for (dbg = __start___verbose; dbg < __stop___verbose; dbg++) { + n += snprintf(buf + n, size - n, "%s:%d %s =%s\n", + dbg->file, dbg->line, dbg->func, + dbg->flags ? "p" : "_"); + + if (n >= size) + break; + } + + return n; +} + +static ssize_t dynamic_debug_sysfs_store(struct sysfs_ops *ops, + void *instance, void *buf, size_t size) +{ + char *cur = buf; + char *file = NULL, *func = NULL; + long int line_start = 0, line_end = INT_MAX; + int set_flag = -1; + struct ddebug *dbg; + + + // assume line was new-line terminated and squash last newline + cur[size-1] = '\0'; + + /* basic line parsing, combinaisons of: + * file + * func + * line + * and must end with [+-=][p_] (set/clear print flag) + */ +again: + while (cur && cur < ((char *)buf) + size && *cur) { + dkprintf("looking at %.*s, size left %d\n", + size - (cur - (char *)buf), cur, + (char *)buf - cur + size); + + if (strncmp(cur, "func ", 5) == 0) { + cur += 5; + func = cur; + } else if (strncmp(cur, "file ", 5) == 0) { + cur += 5; + file = cur; + } else if (strncmp(cur, "line ", 5) == 0) { + cur += 5; + if (*cur != '-') { + line_start = strtol(cur, &cur, 0); + } + if (*cur != '-') { + line_end = line_start; + } else { + cur++; + if (*cur == ' ' || *cur == '\0') { + line_end = INT_MAX; + } else { + line_end = strtol(cur, &cur, 0); + } + } + } else if (strchr("+-=", *cur)) { + switch ((*cur) + 256 * (*(cur+1))) { + case '+' + 256*'p': + case '=' + 256*'p': + set_flag = DDEBUG_PRINT; + break; + case '-' + 256*'p': + case '=' + 256*'_': + set_flag = DDEBUG_NONE; + break; + default: + kprintf("invalid flag: %.*s\n", + size - (cur - (char *)buf), cur); + return -EINVAL; + } + /* XXX check 3rd char is end of input or \n or ; */ + cur += 3; + break; + + } else { + kprintf("dynamic debug control: unrecognized keyword: %.*s\n", + size - (cur - (char *)buf), cur); + return -EINVAL; + } + cur = strpbrk(cur, " \n"); + if (cur) { + *cur = '\0'; + cur++; + } + } + dkprintf("func %s, file %s, lines %d-%d, flag %x\n", + func, file, line_start, line_end, set_flag); + + if (set_flag < 0) { + kprintf("dynamic debug control: no flag set?\n"); + return -EINVAL; + } + if (!func && !file) { + kprintf("at least file or func should be set\n"); + return -EINVAL; + } + + for (dbg = __start___verbose; dbg < __stop___verbose; dbg++) { + /* TODO: handle wildcards */ + if ((!func || strcmp(func, dbg->func) == 0) && + (!file || strcmp(file, dbg->file) == 0) && + dbg->line >= line_start && + dbg->line <= line_end) { + dbg->flags = set_flag; + } + } + + if (cur && cur < ((char *)buf) + size && *cur) + goto again; + + return size; +} + +static struct sysfs_ops dynamic_debug_sysfs_ops = { + .show = &dynamic_debug_sysfs_show, + .store = &dynamic_debug_sysfs_store, +}; + +void dynamic_debug_sysfs_setup(void) +{ + int error; + + error = sysfs_createf(&dynamic_debug_sysfs_ops, NULL, 0644, + "/sys/kernel/debug/dynamic_debug/control"); + if (error) { + kprintf("%s: ERROR: creating dynamic_debug/control sysfs file", + __func__); + } +} diff --git a/kernel/devobj.c b/kernel/devobj.c index 1d05275c..7898a725 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -36,15 +36,13 @@ #include #include #include +#include //#define DEBUG_PRINT_DEVOBJ #ifdef DEBUG_PRINT_DEVOBJ -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif @@ -54,16 +52,15 @@ struct devobj { uintptr_t handle; off_t pfn_pgoff; uintptr_t * pfn_table; + ihk_spinlock_t pfn_table_lock; size_t npages; }; -static memobj_release_func_t devobj_release; -static memobj_ref_func_t devobj_ref; +static memobj_free_func_t devobj_free; static memobj_get_page_func_t devobj_get_page; static struct memobj_ops devobj_ops = { - .release = &devobj_release, - .ref = &devobj_ref, + .free = &devobj_free, .get_page = &devobj_get_page, }; @@ -88,12 +85,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp int error; struct devobj *obj = NULL; const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE; -#ifdef POSTK_DEBUG_TEMP_FIX_36 const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); - const size_t pfn_npages = (npages + uintptr_per_page - 1) / uintptr_per_page; -#else - const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1; -#endif /*POSTK_DEBUG_TEMP_FIX_36*/ + const size_t pfn_npages = + (npages + uintptr_per_page - 1) / uintptr_per_page; dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off); @@ -122,6 +116,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result); ihk_mc_syscall_arg5(&ctx) = prot | populate_flags; + memset(&result, 0, sizeof(result)); + error = syscall_generic_forwarding(__NR_mmap, &ctx); if (error) { kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n", @@ -135,6 +131,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp obj->memobj.ops = &devobj_ops; obj->memobj.flags = MF_HAS_PAGER | MF_DEV_FILE; obj->memobj.size = len; + ihk_atomic_set(&obj->memobj.refcnt, 1); obj->handle = result.handle; dkprintf("%s: path=%s\n", __FUNCTION__, result.path); @@ -148,10 +145,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp strncpy(obj->memobj.path, result.path, PATH_MAX); } - obj->ref = 1; - obj->pfn_pgoff = off / PAGE_SIZE; + obj->pfn_pgoff = off >> PAGE_SHIFT; obj->npages = npages; - ihk_mc_spinlock_init(&obj->memobj.lock); + ihk_mc_spinlock_init(&obj->pfn_table_lock); error = 0; *objp = to_memobj(obj); @@ -170,81 +166,50 @@ out: return error; } -static void devobj_ref(struct memobj *memobj) +static void devobj_free(struct memobj *memobj) { struct devobj *obj = to_devobj(memobj); - - dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle); - memobj_lock(&obj->memobj); - ++obj->ref; - memobj_unlock(&obj->memobj); - return; -} - -static void devobj_release(struct memobj *memobj) -{ - struct devobj *obj = to_devobj(memobj); - struct devobj *free_obj = NULL; uintptr_t handle; -#ifndef POSTK_DEBUG_TEMP_FIX_36 + const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); const size_t pfn_npages = - (obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1; -#endif /*!POSTK_DEBUG_TEMP_FIX_36*/ + (obj->npages + uintptr_per_page - 1) / uintptr_per_page; + int error; + ihk_mc_user_context_t ctx; - dkprintf("devobj_release(%p %lx)\n", obj, obj->handle); + dkprintf("%s(%p %lx)\n", __func__, obj, obj->handle); - memobj_lock(&obj->memobj); - --obj->ref; - if (obj->ref <= 0) { - free_obj = obj; - } handle = obj->handle; - memobj_unlock(&obj->memobj); - if (free_obj) { - if (!(free_obj->memobj.flags & MF_HOST_RELEASED)) { - int error; - ihk_mc_user_context_t ctx; + ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP; + ihk_mc_syscall_arg1(&ctx) = handle; + ihk_mc_syscall_arg2(&ctx) = 1; - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP; - ihk_mc_syscall_arg1(&ctx) = handle; - ihk_mc_syscall_arg2(&ctx) = 1; - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error) { - kprintf("devobj_release(%p %lx):" - "release failed. %d\n", - free_obj, handle, error); - /* through */ - } - } - - if (obj->pfn_table) { - // Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory -#ifdef POSTK_DEBUG_TEMP_FIX_36 - const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); - const size_t pfn_npages = (obj->npages + uintptr_per_page - 1) / uintptr_per_page; - ihk_mc_free_pages(obj->pfn_table, pfn_npages); -#else - ihk_mc_free_pages(obj->pfn_table, pfn_npages); -#endif /*POSTK_DEBUG_TEMP_FIX_36*/ - } - - if (to_memobj(free_obj)->path) { - kfree(to_memobj(free_obj)->path); - } - - kfree(free_obj); + error = syscall_generic_forwarding(__NR_mmap, &ctx); + if (error) { + kprintf("%s(%p %lx): release failed. %d\n", + __func__, obj, handle, error); + /* through */ } - dkprintf("devobj_release(%p %lx):free %p\n", - obj, handle, free_obj); + if (obj->pfn_table) { + // Don't call memory_stat_rss_sub() because devobj related + // pages don't reside in main memory + ihk_mc_free_pages(obj->pfn_table, pfn_npages); + } + + if (to_memobj(obj)->path) { + kfree(to_memobj(obj)->path); + } + + kfree(obj); + + dkprintf("%s(%p %lx):free\n", __func__, obj, handle); return; } static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr) { - const off_t pgoff = off / PAGE_SIZE; + const off_t pgoff = off >> PAGE_SHIFT; struct devobj *obj = to_devobj(memobj); int error; uintptr_t pfn; @@ -262,17 +227,14 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt ix = pgoff - obj->pfn_pgoff; dkprintf("ix: %ld\n", ix); - memobj_lock(&obj->memobj); - pfn = obj->pfn_table[ix]; #ifdef PROFILE_ENABLE profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE); #endif // PROFILE_ENABLE + pfn = obj->pfn_table[ix]; if (!(pfn & PFN_VALID)) { - memobj_unlock(&obj->memobj); - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN; ihk_mc_syscall_arg1(&ctx) = obj->handle; - ihk_mc_syscall_arg2(&ctx) = pgoff << PAGE_SHIFT; + ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1); ihk_mc_syscall_arg3(&ctx) = virt_to_phys(&pfn); error = syscall_generic_forwarding(__NR_mmap, &ctx); @@ -303,11 +265,9 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn); } - memobj_lock(&obj->memobj); obj->pfn_table[ix] = pfn; // Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory } - memobj_unlock(&obj->memobj); if (!(pfn & PFN_PRESENT)) { kprintf("devobj_get_page(%p %lx,%lx,%d):not present. %lx\n", memobj, obj->handle, off, p2align, pfn); diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 36c96330..74cf7601 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -27,15 +27,13 @@ #include #include #include +#include //#define DEBUG_PRINT_FILEOBJ #ifdef DEBUG_PRINT_FILEOBJ -#define dkprintf(...) do { if (1) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif mcs_lock_t fileobj_list_lock; @@ -47,24 +45,21 @@ static LIST_HEAD(fileobj_list); struct fileobj { struct memobj memobj; /* must be first */ - long sref; - long cref; + uint64_t sref; uintptr_t handle; struct list_head list; struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE]; mcs_lock_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE]; }; -static memobj_release_func_t fileobj_release; -static memobj_ref_func_t fileobj_ref; +static memobj_free_func_t fileobj_free; static memobj_get_page_func_t fileobj_get_page; static memobj_flush_page_func_t fileobj_flush_page; static memobj_invalidate_page_func_t fileobj_invalidate_page; static memobj_lookup_page_func_t fileobj_lookup_page; static struct memobj_ops fileobj_ops = { - .release = &fileobj_release, - .ref = &fileobj_ref, + .free = &fileobj_free, .get_page = &fileobj_get_page, .copy_page = NULL, .flush_page = &fileobj_flush_page, @@ -170,22 +165,22 @@ static void obj_list_remove(struct fileobj *obj) /* return NULL or locked fileobj */ static struct fileobj *obj_list_lookup(uintptr_t handle) { - struct fileobj *obj; struct fileobj *p; - obj = NULL; list_for_each_entry(p, &fileobj_list, list) { if (p->handle == handle) { - memobj_lock(&p->memobj); - if (p->cref > 0) { - obj = p; - break; + /* for the interval between last put and fileobj_free + * taking list_lock + */ + if (memobj_ref(&p->memobj) <= 1) { + ihk_atomic_dec(&p->memobj.refcnt); + continue; } - memobj_unlock(&p->memobj); + return p; } } - return obj; + return NULL; } /*********************************************************************** @@ -200,13 +195,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a struct fileobj *obj; struct mcs_lock_node node; - dkprintf("fileobj_create(%d)\n", fd); - newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT); - if (!newobj) { - error = -ENOMEM; - kprintf("fileobj_create(%d):kmalloc failed. %d\n", fd, error); - goto out; - } + dkprintf("%s(%d)\n", __func__, fd); ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE; ihk_mc_syscall_arg1(&ctx) = fd; @@ -214,20 +203,41 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a memset(&result, 0, sizeof(result)); error = syscall_generic_forwarding(__NR_mmap, &ctx); + if (error) { - dkprintf("fileobj_create(%d):create failed. %d\n", fd, error); + /* -ESRCH doesn't mean an error but requesting a fall + * back to treat the file as a device file + */ + if (error != -ESRCH) { + kprintf("%s(%d):create failed. %d\n", + __func__, fd, error); + } goto out; } + if (result.flags & MF_HUGETLBFS) { + return hugefileobj_pre_create(&result, objp, maxprotp); + } + + mcs_lock_lock(&fileobj_list_lock, &node); + obj = obj_list_lookup(result.handle); + if (obj) + goto found; + mcs_lock_unlock(&fileobj_list_lock, &node); + + // not found: alloc new object and lookup again + newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT); + if (!newobj) { + error = -ENOMEM; + kprintf("%s(%d):kmalloc failed. %d\n", __func__, fd, error); + goto out; + } memset(newobj, 0, sizeof(*newobj)); newobj->memobj.ops = &fileobj_ops; newobj->memobj.flags = MF_HAS_PAGER | MF_REG_FILE; newobj->handle = result.handle; - newobj->sref = 1; - newobj->cref = 1; fileobj_page_hash_init(newobj); - ihk_mc_spinlock_init(&newobj->memobj.lock); mcs_lock_lock_noirq(&fileobj_list_lock, &node); obj = obj_list_lookup(result.handle); @@ -237,6 +247,8 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a to_memobj(obj)->size = result.size; to_memobj(obj)->flags |= result.flags; to_memobj(obj)->status = MEMOBJ_READY; + ihk_atomic_set(&to_memobj(obj)->refcnt, 1); + obj->sref = 1; if (to_memobj(obj)->flags & MF_PREFETCH) { to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED; } @@ -305,20 +317,17 @@ error_cleanup: } newobj = NULL; - dkprintf("%s: new obj 0x%lx cref: %d, %s\n", + dkprintf("%s: new obj 0x%lx %s\n", __FUNCTION__, obj, - obj->cref, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); } else { - ++obj->sref; - ++obj->cref; - memobj_unlock(&obj->memobj); /* locked by obj_list_lookup() */ - dkprintf("%s: existing obj 0x%lx cref: %d, %s\n", +found: + obj->sref++; + dkprintf("%s: existing obj 0x%lx, %s\n", __FUNCTION__, obj, - obj->cref, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); } @@ -332,152 +341,111 @@ out: if (newobj) { kfree(newobj); } - dkprintf("fileobj_create(%d):%d %p %x\n", fd, error, *objp, *maxprotp); + dkprintf("%s(%d):%d %p %x\n", __func__, fd, error, *objp, *maxprotp); return error; } -static void fileobj_ref(struct memobj *memobj) +static void fileobj_free(struct memobj *memobj) { struct fileobj *obj = to_fileobj(memobj); - - dkprintf("fileobj_ref(%p %lx):\n", obj, obj->handle); - memobj_lock(&obj->memobj); - ++obj->cref; - memobj_unlock(&obj->memobj); - return; -} - -static void fileobj_release(struct memobj *memobj) -{ - struct fileobj *obj = to_fileobj(memobj); - long free_sref = 0; - uintptr_t free_handle; - struct fileobj *free_obj = NULL; struct mcs_lock_node node; + int error; + ihk_mc_user_context_t ctx; - dkprintf("fileobj_release(%p %lx)\n", obj, obj->handle); - memobj_lock(&obj->memobj); - --obj->cref; - free_sref = obj->sref - 1; /* surplus sref */ - if (obj->cref <= 0) { - free_sref = obj->sref; - free_obj = obj; - } - obj->sref -= free_sref; - free_handle = obj->handle; - memobj_unlock(&obj->memobj); - if (obj->memobj.flags & MF_HOST_RELEASED) { - free_sref = 0; // don't call syscall_generic_forwarding - } + dkprintf("%s: free obj 0x%lx, %s\n", __func__, + obj, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); - if (free_obj) { - dkprintf("%s: release obj 0x%lx cref: %d, free_obj: 0x%lx, %s\n", - __FUNCTION__, - obj, - obj->cref, - free_obj, - to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); - mcs_lock_lock_noirq(&fileobj_list_lock, &node); - /* zap page_list */ - for (;;) { - struct page *page; - void *page_va; - uintptr_t phys; + mcs_lock_lock_noirq(&fileobj_list_lock, &node); + obj_list_remove(obj); + mcs_lock_unlock_noirq(&fileobj_list_lock, &node); - page = fileobj_page_hash_first(obj); - if (!page) { - break; - } - __fileobj_page_hash_remove(page); - phys = page_to_phys(page); - page_va = phys_to_virt(phys); + /* zap page_list */ + for (;;) { + struct page *page; + void *page_va; + uintptr_t phys; - /* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and - incremented by the second get_page() reaping the pageio and decremented by clear_range(). + page = fileobj_page_hash_first(obj); + if (!page) { + break; + } + __fileobj_page_hash_remove(page); + phys = page_to_phys(page); + page_va = phys_to_virt(phys); + /* Count must be one because set to one on the first + * get_page() invoking fileobj_do_pageio and incremented by + * the second get_page() reaping the pageio and decremented + * by clear_range(). + */ + if (ihk_atomic_read(&page->count) != 1) { + kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n", + __func__, ihk_atomic_read(&page->count), + page->phys, to_memobj(obj)->flags); + } + else if (page_unmap(page)) { + ihk_mc_free_pages_user(page_va, 1); + /* Track change in page->count for !MF_PREMAP pages. + * It is decremented here or in clear_range() */ - if (ihk_atomic_read(&page->count) != 1) { - kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n", - __FUNCTION__, - ihk_atomic_read(&page->count), - page->phys, - to_memobj(free_obj)->flags); - } - else if (page_unmap(page)) { - ihk_mc_free_pages_user(page_va, 1); - /* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */ - dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE); - rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE); - } -#if 0 - count = ihk_atomic_sub_return(1, &page->count); - - if (!((page->mode == PM_WILL_PAGEIO) - || (page->mode == PM_DONE_PAGEIO) - || (page->mode == PM_PAGEIO_EOF) - || (page->mode == PM_PAGEIO_ERROR) - || ((page->mode == PM_MAPPED) - && (count <= 0)))) { - kprintf("fileobj_release(%p %lx): " - "mode %x, count %d, off %lx\n", - obj, obj->handle, page->mode, - count, page->offset); - panic("fileobj_release"); - } - - page->mode = PM_NONE; -#endif - } - - /* Pre-mapped? */ - if (to_memobj(free_obj)->flags & MF_PREMAP) { - int i; - for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) { - if (to_memobj(free_obj)->pages[i]) { - dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]); - // Track change in fileobj->pages[] for MF_PREMAP pages - // Note that page_unmap() isn't called for MF_PREMAP in - // free_process_memory_range() --> ihk_mc_pt_free_range() - dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n", - virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE); - rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE); - ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1); - } - } - - kfree(to_memobj(free_obj)->pages); - } - - if (to_memobj(free_obj)->path) { - dkprintf("%s: %s\n", __FUNCTION__, to_memobj(free_obj)->path); - kfree(to_memobj(free_obj)->path); - } - - obj_list_remove(free_obj); - mcs_lock_unlock_noirq(&fileobj_list_lock, &node); - kfree(free_obj); - } - - if (free_sref) { - int error; - ihk_mc_user_context_t ctx; - - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE; - ihk_mc_syscall_arg1(&ctx) = free_handle; - ihk_mc_syscall_arg2(&ctx) = free_sref; - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error) { - kprintf("fileobj_release(%p %lx):" - "release %ld failed. %d\n", - obj, free_handle, free_sref, error); - /* through */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", + phys, __func__, phys, PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_sub(PAGE_SIZE, + PAGE_SIZE); } } - dkprintf("fileobj_release(%p %lx):free %ld %p\n", - obj, free_handle, free_sref, free_obj); + /* Pre-mapped? */ + if (to_memobj(obj)->flags & MF_PREMAP) { + int i; + + for (i = 0; i < to_memobj(obj)->nr_pages; ++i) { + if (to_memobj(obj)->pages[i]) { + dkprintf("%s: pages[i]=%p\n", __func__, i, + to_memobj(obj)->pages[i]); + // Track change in fileobj->pages[] for MF_PREMAP pages + // Note that page_unmap() isn't called for MF_PREMAP in + // free_process_memory_range() --> ihk_mc_pt_free_range() + dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n", + virt_to_phys(to_memobj(obj)->pages[i]), + __func__, + virt_to_phys(to_memobj(obj)->pages[i]), + PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_sub(PAGE_SIZE, + PAGE_SIZE); + ihk_mc_free_pages_user(to_memobj(obj)->pages[i], + 1); + } + } + + kfree(to_memobj(obj)->pages); + } + + if (to_memobj(obj)->path) { + dkprintf("%s: %s\n", __func__, to_memobj(obj)->path); + kfree(to_memobj(obj)->path); + } + + /* linux side + * sref is necessary because handle is used as key, so there could + * be a new mckernel pager with the same handle being created as + * this one is being destroyed + */ + ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE; + ihk_mc_syscall_arg1(&ctx) = obj->handle; + ihk_mc_syscall_arg2(&ctx) = obj->sref; + + error = syscall_generic_forwarding(__NR_mmap, &ctx); + if (error) { + kprintf("%s(%p %lx): free failed. %d\n", __func__, + obj, obj->handle, error); + /* through */ + } + + dkprintf("%s(%p %lx):free\n", __func__, obj, obj->handle); + kfree(obj); return; + } struct pageio_args { @@ -570,7 +538,7 @@ static void fileobj_do_pageio(void *args0) out: mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], &mcs_node); - fileobj_release(&obj->memobj); /* got fileobj_get_page() */ + memobj_unref(&obj->memobj); /* got fileobj_get_page() */ kfree(args0); dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize); return; @@ -656,7 +624,9 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, npages = 1 << p2align; virt = ihk_mc_alloc_pages_user(npages, (IHK_MC_AP_NOWAIT | - (to_memobj(obj)->flags & MF_ZEROFILL) ? IHK_MC_AP_USER : 0), virt_addr); + ((to_memobj(obj)->flags & MF_ZEROFILL) ? + IHK_MC_AP_USER : 0)), + virt_addr); if (!virt) { error = -ENOMEM; kprintf("fileobj_get_page(%p,%lx,%x,%x,%p):" @@ -681,9 +651,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, page->mode = PM_WILL_PAGEIO; } - memobj_lock(&obj->memobj); - ++obj->cref; /* for fileobj_do_pageio() */ - memobj_unlock(&obj->memobj); + memobj_ref(&obj->memobj); args->fileobj = obj; args->objoff = off; @@ -744,10 +712,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, return 0; } - if (memobj->flags & MF_HOST_RELEASED) { - return 0; - } - page = phys_to_page(phys); if (!page) { kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n", @@ -755,8 +719,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, return 0; } - memobj_unlock(&obj->memobj); - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE; ihk_mc_syscall_arg1(&ctx) = obj->handle; ihk_mc_syscall_arg2(&ctx) = page->offset; @@ -771,7 +733,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, /* through */ } - memobj_lock(&obj->memobj); return 0; } diff --git a/kernel/futex.c b/kernel/futex.c index ee728d1c..66682cc2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -70,15 +70,22 @@ #include #include #include +#include +#include //#define DEBUG_PRINT_FUTEX #ifdef DEBUG_PRINT_FUTEX -#define dkprintf kprintf +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT +#define uti_dkprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0) #else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define uti_dkprintf(...) do { } while (0) #endif +#define uti_kprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0) + +unsigned long ihk_mc_get_ns_per_tsc(void); int futex_cmpxchg_enabled; /** @@ -108,6 +115,9 @@ struct futex_q { union futex_key key; union futex_key *requeue_pi_key; uint32_t bitset; + + /* Used to wake-up a thread running on a Linux CPU */ + void *uti_futex_resp; }; /* @@ -180,11 +190,12 @@ static void drop_futex_key_refs(union futex_key *key) * lock_page() might sleep, the caller should not hold a spinlock. */ static int -get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key) +get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key, struct cpu_local_var *clv_override) { unsigned long address = (unsigned long)uaddr; unsigned long phys; - struct process_vm *mm = cpu_local_var(current)->vm; + struct thread *thread = cpu_local_var_with_override(current, clv_override); + struct process_vm *mm = thread->vm; /* * The futex address must be "naturally" aligned. @@ -250,7 +261,7 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. */ -static void wake_futex(struct futex_q *q) +static void wake_futex(struct futex_q *q, struct cpu_local_var *clv_override) { struct thread *p = q->task; @@ -272,8 +283,31 @@ static void wake_futex(struct futex_q *q) barrier(); q->lock_ptr = NULL; - dkprintf("wake_futex(): waking up tid %d\n", p->tid); - sched_wakeup_thread(p, PS_NORMAL); + + if (q->uti_futex_resp) { + int rc; + uti_dkprintf("wake_futex(): waking up migrated-to-Linux thread (tid %d),uti_futex_resp=%p\n", p->tid, q->uti_futex_resp); + /* TODO: Add the case when a Linux thread waking up another Linux thread */ + if (clv_override) { + uti_dkprintf("%s: ERROR: A Linux thread is waking up migrated-to-Linux thread\n", __FUNCTION__); + } + if (p->spin_sleep == 0) { + uti_dkprintf("%s: INFO: woken up by someone else\n", __FUNCTION__); + } + + struct ikc_scd_packet pckt; + struct ihk_ikc_channel_desc *resp_channel = cpu_local_var_with_override(ikc2linux, clv_override); + pckt.msg = SCD_MSG_FUTEX_WAKE; + pckt.futex.resp = q->uti_futex_resp; + pckt.futex.spin_sleep = &p->spin_sleep; + rc = ihk_ikc_send(resp_channel, &pckt, 0); + if (rc) { + uti_dkprintf("%s: ERROR: ihk_ikc_send returned %d, resp_channel=%p\n", __FUNCTION__, rc, resp_channel); + } + } else { + uti_dkprintf("wake_futex(): waking up McKernel thread (tid %d)\n", p->tid); + sched_wakeup_thread(p, PS_NORMAL); + } } /* @@ -303,7 +337,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) /* * Wake up waiters matching bitset queued on this futex (uaddr). */ -static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset) +static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset, struct cpu_local_var *clv_override) { struct futex_hash_bucket *hb; struct futex_q *this, *next; @@ -314,7 +348,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, clv_override); if ((ret != 0)) goto out; @@ -330,7 +364,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset if (!(this->bitset & bitset)) continue; - wake_futex(this); + wake_futex(this, clv_override); if (++ret >= nr_wake) break; } @@ -348,7 +382,8 @@ out: */ static int futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, - int nr_wake, int nr_wake2, int op) + int nr_wake, int nr_wake2, int op, + struct cpu_local_var *clv_override) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; @@ -357,10 +392,10 @@ futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, clv_override); if ((ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, clv_override); if ((ret != 0)) goto out_put_key1; @@ -394,7 +429,7 @@ retry_private: plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key1)) { - wake_futex(this); + wake_futex(this, clv_override); if (++ret >= nr_wake) break; } @@ -406,7 +441,7 @@ retry_private: op_ret = 0; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key2)) { - wake_futex(this); + wake_futex(this, clv_override); if (++op_ret >= nr_wake2) break; } @@ -469,7 +504,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, */ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, int nr_wake, int nr_requeue, uint32_t *cmpval, - int requeue_pi) + int requeue_pi, struct cpu_local_var *clv_override) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; int drop_count = 0, task_count = 0, ret; @@ -477,10 +512,10 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, struct plist_head *head1; struct futex_q *this, *next; - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, clv_override); if ((ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, clv_override); if ((ret != 0)) goto out_put_key1; @@ -515,7 +550,7 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2, */ /* RIKEN: no requeue_pi at this moment */ if (++task_count <= nr_wake) { - wake_futex(this); + wake_futex(this, clv_override); continue; } @@ -574,7 +609,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) * state is implicit in the state of woken task (see futex_wait_requeue_pi() for * an example). */ -static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) +static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb, struct cpu_local_var *clv_override) { int prio; @@ -595,7 +630,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) q->list.plist.spinlock = &hb->lock; #endif plist_add(&q->list, &hb->chain); - q->task = cpu_local_var(current); + q->task = cpu_local_var_with_override(current, clv_override); ihk_mc_spinlock_unlock_noirq(&hb->lock); } @@ -658,46 +693,64 @@ retry: /* RIKEN: this function has been rewritten so that it returns the remaining * time in case we are waken. */ -static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - uint64_t timeout) +static int64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, + uint64_t timeout, struct cpu_local_var *clv_override) { - uint64_t time_remain = 0; + int64_t time_remain = 0; unsigned long irqstate; - struct thread *thread = cpu_local_var(current); + struct thread *thread = cpu_local_var_with_override(current, clv_override); /* * The task state is guaranteed to be set before another task can * wake it. * queue_me() calls spin_unlock() upon completion, serializing * access to the hash list and forcing a memory barrier. */ - xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE); + xchg4(&(thread->status), PS_INTERRUPTIBLE); - /* Indicate spin sleep */ - if (!idle_halt) { + /* Indicate spin sleep. Note that schedule_timeout() with + * idle_halt should use spin sleep because sleep with timeout + * is not implemented. + */ + if (!idle_halt || timeout) { irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); thread->spin_sleep = 1; ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); } - queue_me(q, hb); + queue_me(q, hb, clv_override); if (!plist_node_empty(&q->list)) { + if (clv_override) { + uti_dkprintf("%s: tid: %d is trying to sleep\n", __FUNCTION__, thread->tid); + /* Note that the unit of timeout is nsec */ + time_remain = (*linux_wait_event)(q->uti_futex_resp, timeout); + + /* Note that time_remain == 0 indicates contidion evaluated to false after the timeout elapsed */ + if (time_remain < 0) { + if (time_remain == -ERESTARTSYS) { /* Interrupted by signal */ + uti_dkprintf("%s: DEBUG: wait_event returned -ERESTARTSYS\n", __FUNCTION__); + } else { + uti_kprintf("%s: ERROR: wait_event returned %d\n", __FUNCTION__, time_remain); + } + } + uti_dkprintf("%s: tid: %d woken up\n", __FUNCTION__, thread->tid); + } else { if (timeout) { - dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid); + dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", thread->tid); time_remain = schedule_timeout(timeout); } else { - dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid); + dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", thread->tid); spin_sleep_or_schedule(); time_remain = 0; } - - dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid); + dkprintf("futex_wait_queue_me(): tid: %d woken up\n", thread->tid); + } } /* This does not need to be serialized */ - cpu_local_var(current)->status = PS_RUNNING; + thread->status = PS_RUNNING; thread->spin_sleep = 0; return time_remain; @@ -721,7 +774,8 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked */ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared, - struct futex_q *q, struct futex_hash_bucket **hb) + struct futex_q *q, struct futex_hash_bucket **hb, + struct cpu_local_var *clv_override) { uint32_t uval; int ret; @@ -744,7 +798,7 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared, * rare, but normal. */ q->key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q->key); + ret = get_futex_key(uaddr, fshared, &q->key, clv_override); if (ret != 0) return ret; @@ -768,49 +822,59 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared, } static int futex_wait(uint32_t __user *uaddr, int fshared, - uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt) + uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt, + struct cpu_local_var *clv_override) { struct futex_hash_bucket *hb; struct futex_q q; - uint64_t time_remain; + int64_t time_remain; int ret; if (!bitset) return -EINVAL; #ifdef PROFILE_ENABLE - if (cpu_local_var(current)->profile && - cpu_local_var(current)->profile_start_ts) { - cpu_local_var(current)->profile_elapsed_ts += - (rdtsc() - cpu_local_var(current)->profile_start_ts); - cpu_local_var(current)->profile_start_ts = 0; + if (cpu_local_var_with_override(current, clv_override)->profile && + cpu_local_var_with_override(current, clv_override)->profile_start_ts) { + cpu_local_var_with_override(current, clv_override)->profile_elapsed_ts += + (rdtsc() - cpu_local_var_with_override(current, clv_override)->profile_start_ts); + cpu_local_var_with_override(current, clv_override)->profile_start_ts = 0; } #endif q.bitset = bitset; q.requeue_pi_key = NULL; + q.uti_futex_resp = cpu_local_var_with_override(uti_futex_resp, clv_override); retry: /* Prepare to wait on uaddr. */ - ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); - if (ret) + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb, clv_override); + if (ret) { + uti_dkprintf("%s: tid=%d futex_wait_setup returns zero, no need to sleep\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid); goto out; + } /* queue_me and wait for wakeup, timeout, or a signal. */ - time_remain = futex_wait_queue_me(hb, &q, timeout); + time_remain = futex_wait_queue_me(hb, &q, timeout, clv_override); /* If we were woken (and unqueued), we succeeded, whatever. */ ret = 0; - if (!unqueue_me(&q)) + if (!unqueue_me(&q)) { + uti_dkprintf("%s: tid=%d unqueued\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid); goto out_put_key; + } ret = -ETIMEDOUT; /* RIKEN: timer expired case (indicated by !time_remain) */ - if (timeout && !time_remain) + if (timeout && !time_remain) { + uti_dkprintf("%s: tid=%d timer expired\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid); goto out_put_key; + } - if (hassigpending(cpu_local_var(current))) { + /* RIKEN: futex_wait_queue_me() returns -ERESTARTSYS when waiting on Linux CPU and woken up by signal */ + if (hassigpending(cpu_local_var_with_override(current, clv_override)) || time_remain == -ERESTARTSYS) { ret = -EINTR; + uti_dkprintf("%s: tid=%d woken up by signal\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid); goto out_put_key; } @@ -822,19 +886,22 @@ out_put_key: put_futex_key(fshared, &q.key); out: #ifdef PROFILE_ENABLE - if (cpu_local_var(current)->profile) { - cpu_local_var(current)->profile_start_ts = rdtsc(); + if (cpu_local_var_with_override(current, clv_override)->profile) { + cpu_local_var_with_override(current, clv_override)->profile_start_ts = rdtsc(); } #endif return ret; } int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout, - uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared) + uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared, + struct cpu_local_var *clv_override) { int clockrt, ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; + uti_dkprintf("%s: uaddr=%p, op=%x, val=%x, timeout=%ld, uaddr2=%p, val2=%x, val3=%x, fshared=%d, clv=%p\n", __FUNCTION__, uaddr, op, val, timeout, uaddr2, val2, val3, fshared, clv_override); + clockrt = op & FUTEX_CLOCK_REALTIME; if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; @@ -843,21 +910,21 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout, case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAIT_BITSET: - ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); + ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt, clv_override); break; case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAKE_BITSET: - ret = futex_wake(uaddr, fshared, val, val3); + ret = futex_wake(uaddr, fshared, val, val3, clv_override); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0, clv_override); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0, clv_override); break; case FUTEX_WAKE_OP: - ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); + ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3, clv_override); break; /* RIKEN: these calls are not supported for now. case FUTEX_LOCK_PI: diff --git a/kernel/host.c b/kernel/host.c index 5d62544e..adad70bc 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -34,13 +34,13 @@ #include #include #include +#include //#define DEBUG_PRINT_HOST #ifdef DEBUG_PRINT_HOST -#define dkprintf kprintf -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif /* Linux channel table, indexec by Linux CPU id */ @@ -78,7 +78,6 @@ int prepare_process_ranges_args_envs(struct thread *thread, unsigned long args_envs_p, args_envs_rp; unsigned long s, e, up; char **argv; - char **a; int i, n, argc, envc, args_envs_npages; char **env; int range_npages; @@ -306,7 +305,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, /* Only unmap remote address if it wasn't specified as an argument */ if (!args) { - ihk_mc_unmap_virtual(args_envs_r, args_envs_npages, 0); + ihk_mc_unmap_virtual(args_envs_r, args_envs_npages); ihk_mc_unmap_memory(NULL, args_envs_rp, p->args_len); } flush_tlb(); @@ -341,7 +340,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, /* Only map remote address if it wasn't specified as an argument */ if (!envs) { - ihk_mc_unmap_virtual(args_envs_r, args_envs_npages, 0); + ihk_mc_unmap_virtual(args_envs_r, args_envs_npages); ihk_mc_unmap_memory(NULL, args_envs_rp, p->envs_len); } flush_tlb(); @@ -357,12 +356,13 @@ int prepare_process_ranges_args_envs(struct thread *thread, proc->saved_cmdline_len = 0; } - proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT); + proc->saved_cmdline_len = p->args_len - ((argc + 2) * sizeof(char **)); + proc->saved_cmdline = kmalloc(proc->saved_cmdline_len, + IHK_MC_AP_NOWAIT); if (!proc->saved_cmdline) { goto err; } - proc->saved_cmdline_len = p->args_len - ((argc + 2) * sizeof(char **)); memcpy(proc->saved_cmdline, (char *)args_envs + ((argc + 2) * sizeof(char **)), proc->saved_cmdline_len); @@ -370,21 +370,18 @@ int prepare_process_ranges_args_envs(struct thread *thread, __FUNCTION__, proc->saved_cmdline); - for (a = argv; *a; a++) { - *a = (char *)addr + (unsigned long)*a; // Process' address space! + for (i = 0; i < argc; i++) { + // Process' address space! + argv[i] = (char *)addr + (unsigned long)argv[i]; } envc = *((long *)(args_envs + p->args_len)); dkprintf("envc: %d\n", envc); env = (char **)(args_envs + p->args_len + sizeof(long)); - while (*env) { - char **_env = env; - //dkprintf("%s\n", args_envs + p->args_len + (unsigned long)*env); - *env = (char *)addr + p->args_len + (unsigned long)*env; - env = ++_env; + for (i = 0; i < envc; i++) { + env[i] = addr + p->args_len + env[i]; } - env = (char **)(args_envs + p->args_len + sizeof(long)); dkprintf("env OK\n"); @@ -449,7 +446,7 @@ static int process_msg_prepare_process(unsigned long rphys) if((pn = kmalloc(sizeof(struct program_load_desc) + sizeof(struct program_image_section) * n, IHK_MC_AP_NOWAIT)) == NULL){ - ihk_mc_unmap_virtual(p, npages, 0); + ihk_mc_unmap_virtual(p, npages); ihk_mc_unmap_memory(NULL, phys, sz); return -ENOMEM; } @@ -460,7 +457,7 @@ static int process_msg_prepare_process(unsigned long rphys) (unsigned long *)&p->cpu_set, sizeof(p->cpu_set))) == NULL) { kfree(pn); - ihk_mc_unmap_virtual(p, npages, 1); + ihk_mc_unmap_virtual(p, npages); ihk_mc_unmap_memory(NULL, phys, sz); return -ENOMEM; } @@ -482,6 +479,7 @@ static int process_msg_prepare_process(unsigned long rphys) proc->mpol_flags = pn->mpol_flags; proc->mpol_threshold = pn->mpol_threshold; proc->nr_processes = pn->nr_processes; + proc->process_rank = pn->process_rank; proc->heap_extension = pn->heap_extension; /* Update NUMA binding policy if requested */ @@ -504,6 +502,9 @@ static int process_msg_prepare_process(unsigned long rphys) vm->numa_mem_policy = MPOL_BIND; } + proc->uti_thread_rank = pn->uti_thread_rank; + proc->uti_use_last_cpu = pn->uti_use_last_cpu; + #ifdef PROFILE_ENABLE proc->profile = pn->profile; thread->profile = pn->profile; @@ -542,14 +543,14 @@ static int process_msg_prepare_process(unsigned long rphys) kfree(pn); - ihk_mc_unmap_virtual(p, npages, 1); + ihk_mc_unmap_virtual(p, npages); ihk_mc_unmap_memory(NULL, phys, sz); flush_tlb(); return 0; err: kfree(pn); - ihk_mc_unmap_virtual(p, npages, 1); + ihk_mc_unmap_virtual(p, npages); ihk_mc_unmap_memory(NULL, phys, sz); destroy_thread(thread); return -ENOMEM; @@ -562,7 +563,6 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c, } extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont); -extern void process_procfs_request(struct ikc_scd_packet *rpacket); extern void terminate_host(int pid); extern void debug_log(long); @@ -573,7 +573,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, struct ikc_scd_packet pckt; struct ihk_ikc_channel_desc *resp_channel = cpu_local_var(ikc2linux); int rc; - struct mcs_rwlock_node_irqsave lock; struct thread *thread; struct process *proc; struct mcctrl_signal { @@ -610,7 +609,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, case SCD_MSG_SCHEDULE_PROCESS: thread = (struct thread *)packet->arg; - cpuid = obtain_clone_cpuid(&thread->cpu_set); + cpuid = obtain_clone_cpuid(&thread->cpu_set, 0); if (cpuid == -1) { kprintf("No CPU available\n"); ret = -1; @@ -634,14 +633,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, * the waiting thread */ case SCD_MSG_WAKE_UP_SYSCALL_THREAD: - thread = find_thread(0, packet->ttid, &lock); + thread = find_thread(0, packet->ttid); if (!thread) { kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n", __FUNCTION__, packet->ttid); ret = -EINVAL; break; } - thread_unlock(thread, &lock); + thread_unlock(thread); dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n", __FUNCTION__, packet->ttid); @@ -653,7 +652,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); memcpy(&info, sp, sizeof(struct mcctrl_signal)); - ihk_mc_unmap_virtual(sp, 1, 0); + ihk_mc_unmap_virtual(sp, 1); ihk_mc_unmap_memory(NULL, pp, sizeof(struct mcctrl_signal)); pckt.msg = SCD_MSG_SEND_SIGNAL_ACK; pckt.err = 0; @@ -668,7 +667,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, break; case SCD_MSG_PROCFS_REQUEST: - process_procfs_request(packet); + case SCD_MSG_PROCFS_RELEASE: + pckt.msg = SCD_MSG_PROCFS_ANSWER; + pckt.ref = packet->ref; + pckt.arg = packet->arg; + pckt.err = process_procfs_request(packet); + pckt.reply = packet->reply; + pckt.pid = packet->pid; + syscall_channel_send(resp_channel, &pckt); ret = 0; break; @@ -705,17 +711,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, if (!pcd->exclude_user) { mode |= PERFCTR_USER_MODE; } - ihk_mc_perfctr_init_raw(pcd->target_cntr, pcd->config, mode); - ihk_mc_perfctr_stop(1 << pcd->target_cntr); - ihk_mc_perfctr_reset(pcd->target_cntr); + + ret = ihk_mc_perfctr_init_raw(pcd->target_cntr, pcd->config, mode); + if (ret != 0) { + break; + } + + ret = ihk_mc_perfctr_stop(1 << pcd->target_cntr); + if (ret != 0) { + break; + } + + ret = ihk_mc_perfctr_reset(pcd->target_cntr); break; case PERF_CTRL_ENABLE: - ihk_mc_perfctr_start(pcd->target_cntr_mask); + ret = ihk_mc_perfctr_start(pcd->target_cntr_mask); break; case PERF_CTRL_DISABLE: - ihk_mc_perfctr_stop(pcd->target_cntr_mask); + ret = ihk_mc_perfctr_stop(pcd->target_cntr_mask); break; case PERF_CTRL_GET: @@ -726,16 +741,15 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, kprintf("%s: SCD_MSG_PERF_CTRL unexpected ctrl_type\n", __FUNCTION__); } - ihk_mc_unmap_virtual(pcd, 1, 0); + ihk_mc_unmap_virtual(pcd, 1); ihk_mc_unmap_memory(NULL, pp, sizeof(struct perf_ctrl_desc)); pckt.msg = SCD_MSG_PERF_ACK; - pckt.err = 0; + pckt.err = ret; pckt.arg = packet->arg; pckt.reply = packet->reply; ihk_ikc_send(resp_channel, &pckt, 0); - ret = 0; break; case SCD_MSG_CPU_RW_REG: diff --git a/kernel/hugefileobj.c b/kernel/hugefileobj.c new file mode 100644 index 00000000..2684888e --- /dev/null +++ b/kernel/hugefileobj.c @@ -0,0 +1,303 @@ +#include +#include +#include +#include +#include +#include + +#if DEBUG_HUGEFILEOBJ +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT +#endif + +struct hugefilechunk { + struct list_head list; + off_t pgoff; + int npages; + void *mem; +}; + +struct hugefileobj { + struct memobj memobj; + size_t pgsize; + uintptr_t handle; + unsigned int pgshift; + struct list_head chunk_list; + ihk_spinlock_t chunk_lock; + struct list_head obj_list; +}; + +static ihk_spinlock_t hugefileobj_list_lock; +static LIST_HEAD(hugefileobj_list); + +static struct hugefileobj *to_hugefileobj(struct memobj *memobj) +{ + return (struct hugefileobj *)memobj; +} + +static struct memobj *to_memobj(struct hugefileobj *obj) +{ + return &obj->memobj; +} + +static struct hugefileobj *hugefileobj_lookup(uintptr_t handle) +{ + struct hugefileobj *p; + + list_for_each_entry(p, &hugefileobj_list, obj_list) { + if (p->handle == handle) { + /* for the interval between last put and fileobj_free + * taking list_lock + */ + if (memobj_ref(&p->memobj) <= 1) { + ihk_atomic_dec(&p->memobj.refcnt); + continue; + } + return p; + } + } + + return NULL; +} + +static int hugefileobj_get_page(struct memobj *memobj, off_t off, + int p2align, uintptr_t *physp, + unsigned long *pflag, uintptr_t virt_addr) +{ + struct hugefileobj *obj = to_hugefileobj(memobj); + struct hugefilechunk *chunk; + off_t pgoff; + + if (p2align != obj->pgshift - PTL1_SHIFT) { + kprintf("%s: p2align %d but expected %d\n", + __func__, p2align, obj->pgshift - PTL1_SHIFT); + return -ENOMEM; + } + + pgoff = off >> obj->pgshift; + ihk_mc_spinlock_lock_noirq(&obj->chunk_lock); + list_for_each_entry(chunk, &obj->chunk_list, list) { + if (pgoff >= chunk->pgoff + chunk->npages) + continue; + if (pgoff >= chunk->pgoff) + break; + kprintf("%s: no segment found for pgoff %lx (obj %p)\n", + __func__, pgoff, obj); + chunk = NULL; + break; + } + ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock); + if (!chunk) + return -EIO; + + *physp = virt_to_phys(chunk->mem + (off - chunk->pgoff * PAGE_SIZE)); + + return 0; +} + +static void hugefileobj_free(struct memobj *memobj) +{ + struct hugefileobj *obj = to_hugefileobj(memobj); + struct hugefilechunk *chunk, *next; + + dkprintf("Destroying hugefileobj %p\n", memobj); + + ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock); + list_del(&obj->obj_list); + ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock); + + kfree(memobj->path); + /* don't bother with chunk_lock, memobj refcounting makes this safe */ + list_for_each_entry_safe(chunk, next, &obj->chunk_list, list) { + ihk_mc_free_pages_user(chunk->mem, chunk->npages); + kfree(chunk); + } + kfree(memobj); +} + +struct memobj_ops hugefileobj_ops = { + .free = hugefileobj_free, + .get_page = hugefileobj_get_page, + +}; + +void hugefileobj_cleanup(void) +{ + struct hugefileobj *obj; + int refcnt; + + while (true) { + ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock); + if (list_empty(&hugefileobj_list)) { + ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock); + break; + } + obj = list_first_entry(&hugefileobj_list, struct hugefileobj, + obj_list); + ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock); + + if ((refcnt = memobj_unref(to_memobj(obj))) != 0) { + kprintf("%s: obj %p had refcnt %ld > 1, destroying anyway\n", + __func__, obj, refcnt + 1); + hugefileobj_free(to_memobj(obj)); + } + } +} + +int hugefileobj_pre_create(struct pager_create_result *result, + struct memobj **objp, int *maxprotp) +{ + struct hugefileobj *obj; + + ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock); + obj = hugefileobj_lookup(result->handle); + if (obj) + goto out_unlock; + + obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT); + if (!obj) + return -ENOMEM; + + obj->handle = result->handle; + obj->pgsize = result->size; + obj->pgshift = 0; + INIT_LIST_HEAD(&obj->chunk_list); + ihk_mc_spinlock_init(&obj->chunk_lock); + obj->memobj.flags = result->flags; + obj->memobj.status = MEMOBJ_TO_BE_PREFETCHED; + obj->memobj.ops = &hugefileobj_ops; + /* keep mapping around when process is gone */ + ihk_atomic_set(&obj->memobj.refcnt, 2); + if (result->path[0]) { + obj->memobj.path = kmalloc(PATH_MAX, IHK_MC_AP_NOWAIT); + if (!obj->memobj.path) { + kfree(obj); + return -ENOMEM; + } + strncpy(obj->memobj.path, result->path, PATH_MAX); + } + + list_add(&obj->obj_list, &hugefileobj_list); +out_unlock: + ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock); + + *maxprotp = result->maxprot; + *objp = to_memobj(obj); + + return 0; +} + +int hugefileobj_create(struct memobj *memobj, size_t len, off_t off, + int *pgshiftp, uintptr_t virt_addr) +{ + struct hugefileobj *obj = to_hugefileobj(memobj); + struct hugefilechunk *chunk = NULL, *old_chunk = NULL; + int p2align; + unsigned int pgshift; + int npages, npages_left; + void *v; + off_t pgoff, next_pgoff; + int error; + + error = arch_get_smaller_page_size(NULL, obj->pgsize + 1, NULL, + &p2align); + if (error) + return error; + pgshift = p2align + PTL1_SHIFT; + if (1 << pgshift != obj->pgsize) { + dkprintf("invalid hugefileobj pagesize: %d\n", + obj->pgsize); + return -EINVAL; + } + + if (len & ((1 << pgshift) - 1)) { + dkprintf("invalid hugetlbfs mmap size %d (pagesize %d)\n", + len, 1 << pgshift); + obj->pgshift = 0; + return -EINVAL; + } + if (off & ((1 << pgshift) - 1)) { + dkprintf("invalid hugetlbfs mmap offset %d (pagesize %d)\n", + off, 1 << pgshift); + obj->pgshift = 0; + return -EINVAL; + } + + + ihk_mc_spinlock_lock_noirq(&obj->chunk_lock); + if (obj->pgshift && obj->pgshift != pgshift) { + kprintf("pgshift changed between two calls on same inode?! had %d now %d\n", + obj->pgshift, pgshift); + ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock); + return -EINVAL; + } + obj->pgshift = pgshift; + + /* Prealloc upfront, we need to fail here if not enough memory. */ + if (!list_empty(&obj->chunk_list)) + old_chunk = list_first_entry(&obj->chunk_list, + struct hugefilechunk, list); + pgoff = off >> PAGE_SHIFT; + npages_left = len >> PAGE_SHIFT; + npages = npages_left; + while (npages_left) { + while (old_chunk && + pgoff >= old_chunk->pgoff + old_chunk->npages) { + if (list_is_last(&old_chunk->list, &obj->chunk_list)) { + old_chunk = NULL; + break; + } + old_chunk = list_entry(old_chunk->list.next, + struct hugefilechunk, list); + } + if (old_chunk) { + next_pgoff = old_chunk->pgoff + old_chunk->npages; + if (pgoff >= old_chunk->pgoff && pgoff < next_pgoff) { + npages_left -= next_pgoff - pgoff; + pgoff = next_pgoff; + continue; + } + } + if (!chunk) { + chunk = kmalloc(sizeof(*chunk), IHK_MC_AP_NOWAIT); + } + if (!chunk) { + kprintf("could not allocate hugefileobj chunk\n"); + return -ENOMEM; + } + if (npages > npages_left) + npages = npages_left; + v = ihk_mc_alloc_aligned_pages_user(npages, p2align, + IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, virt_addr); + if (!v) { + if (npages == 1) { + dkprintf("could not allocate more pages wth pgshift %d\n", + pgshift); + kfree(chunk); + /* caller will cleanup the rest */ + return -ENOMEM; + } + /* exponential backoff, try less aggressive? */ + npages /= 2; + continue; + } + memset(v, 0, npages * PAGE_SIZE); + chunk->npages = npages; + chunk->mem = v; + chunk->pgoff = pgoff; + /* ordered list: insert before next (bigger) element */ + if (old_chunk) + list_add(&chunk->list, old_chunk->list.prev); + else + list_add(&chunk->list, obj->chunk_list.prev); + pgoff += npages; + npages_left -= npages; + } + obj->memobj.size = len; + + ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock); + + *pgshiftp = pgshift; + + return 0; +} diff --git a/kernel/include/cls.h b/kernel/include/cls.h index d778407c..f1f8b4c0 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -21,7 +21,7 @@ struct kmalloc_header { unsigned int front_magic; - unsigned int cpu_id; + int cpu_id; struct list_head list; int size; /* The size of this chunk without the header */ unsigned int end_magic; @@ -74,6 +74,7 @@ struct cpu_local_var { struct thread *current; struct list_head runq; size_t runq_len; + size_t runq_reserved; /* Number of threads which are about to be added to runq */ struct ihk_ikc_channel_desc *ikc2linux; @@ -99,6 +100,9 @@ struct cpu_local_var { struct list_head smp_func_req_list; struct process_vm *on_fork_vm; + + /* UTI */ + void *uti_futex_resp; } __attribute__((aligned(64))); @@ -110,4 +114,6 @@ static struct cpu_local_var *get_this_cpu_local_var(void) #define cpu_local_var(name) get_this_cpu_local_var()->name +#define cpu_local_var_with_override(name, clv_override) (clv_override ? clv_override->name : get_this_cpu_local_var()->name) + #endif diff --git a/kernel/include/debug.h b/kernel/include/debug.h new file mode 100644 index 00000000..6c471ab9 --- /dev/null +++ b/kernel/include/debug.h @@ -0,0 +1,54 @@ +#ifndef DEBUG_H +#define DEBUG_H + +#include "lwk/compiler.h" + +void panic(const char *); + +/* when someone has a lot of time, add attribute __printf(1, 2) to kprintf */ +int kprintf(const char *format, ...); + +struct ddebug { + const char *file; + const char *func; + const char *fmt; + unsigned int line:24; + unsigned int flags:8; +} __aligned(8); + +#define DDEBUG_NONE 0x0 +#define DDEBUG_PRINT 0x1 + +#define DDEBUG_DEFAULT DDEBUG_NONE + +#define DDEBUG_SYMBOL() \ + static struct ddebug __aligned(8) \ + __attribute__((section("__verbose"))) ddebug = { \ + .file = __FILE__, \ + .func = __func__, \ + .line = __LINE__, \ + .flags = DDEBUG_DEFAULT, \ + } + +#define DDEBUG_TEST ddebug.flags + + + +#define dkprintf(fmt, args...) \ +do { \ + DDEBUG_SYMBOL(); \ + if (DDEBUG_TEST) \ + kprintf(fmt, ##args); \ +} while (0) +#define ekprintf(fmt, args...) kprintf(fmt, ##args) + +#define BUG_ON(condition) do { \ + if (condition) { \ + kprintf("PANIC: %s: %s(line:%d)\n", \ + __FILE__, __func__, __LINE__); \ + panic(""); \ + } \ +} while (0) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#endif diff --git a/kernel/include/futex.h b/kernel/include/futex.h index cf0b9f3f..f09afcf9 100644 --- a/kernel/include/futex.h +++ b/kernel/include/futex.h @@ -63,7 +63,7 @@ #define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ #define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ -#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ +#define FUTEX_OP_OPARG_SHIFT 8U /* Use (1 << OPARG) instead of OPARG. */ #define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ #define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ @@ -150,6 +150,7 @@ union futex_key { extern int futex_init(void); +struct cpu_local_var; extern int futex( uint32_t __user * uaddr, @@ -159,7 +160,8 @@ futex( uint32_t __user * uaddr2, uint32_t val2, uint32_t val3, - int fshared + int fshared, + struct cpu_local_var *clv_override ); diff --git a/kernel/include/init.h b/kernel/include/init.h index 874a7cbc..400438c2 100644 --- a/kernel/include/init.h +++ b/kernel/include/init.h @@ -33,6 +33,7 @@ extern void cpu_sysfs_setup(void); extern void numa_sysfs_setup(void); extern void rusage_sysfs_setup(void); extern void status_sysfs_setup(void); +extern void dynamic_debug_sysfs_setup(void); extern char *find_command_line(char *name); diff --git a/kernel/include/kmalloc.h b/kernel/include/kmalloc.h index d48704e1..384d87fe 100644 --- a/kernel/include/kmalloc.h +++ b/kernel/include/kmalloc.h @@ -13,11 +13,9 @@ #ifndef __HEADER_KMALLOC_H #define __HEADER_KMALLOC_H -#include -#include - -void panic(const char *); -int kprintf(const char *format, ...); +#include "ihk/mm.h" +#include "cls.h" +#include "debug.h" #define kmalloc(size, flag) ({\ void *r = _kmalloc(size, flag, __FILE__, __LINE__);\ diff --git a/kernel/include/lwk/compiler-gcc.h b/kernel/include/lwk/compiler-gcc.h index 7dddaa75..c0695a9f 100644 --- a/kernel/include/lwk/compiler-gcc.h +++ b/kernel/include/lwk/compiler-gcc.h @@ -12,11 +12,8 @@ /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ -/* XXX: barrier is also defined in lib/include/ihk/cpu.h, - * it would be cleaner to restore this here at some point, but we have - * quite a few C files not including either this or kernel's compiler.h - * #define barrier() __asm__ __volatile__("": : :"memory") - */ +#define barrier() __asm__ __volatile__("": : :"memory") + /* * This version is i.e. to prevent dead stores elimination on @ptr * where gcc and llvm may behave differently when otherwise using diff --git a/kernel/include/lwk/compiler.h b/kernel/include/lwk/compiler.h index df62035b..ee987f2c 100644 --- a/kernel/include/lwk/compiler.h +++ b/kernel/include/lwk/compiler.h @@ -3,6 +3,8 @@ #ifndef __ASSEMBLY__ +#include + #ifdef __CHECKER__ # define __user __attribute__((noderef, address_space(1))) # define __kernel __attribute__((address_space(0))) @@ -175,11 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define unlikely(x) __builtin_expect(!!(x), 0) #endif -/* Optimization barrier */ -#ifndef barrier -# define barrier() __memory_barrier() -#endif - #ifndef barrier_data # define barrier_data(ptr) barrier() #endif @@ -490,4 +487,62 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, (_________p1); \ }) +extern void *memcpy(void *dest, const void *src, size_t n); + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; + default: + barrier(); + memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + #endif /* __LWK_COMPILER_H */ diff --git a/kernel/include/lwk/futex.h b/kernel/include/lwk/futex.h index 9c28208a..71bd088e 100644 --- a/kernel/include/lwk/futex.h +++ b/kernel/include/lwk/futex.h @@ -25,7 +25,7 @@ #define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ #define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ -#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ +#define FUTEX_OP_OPARG_SHIFT 8U /* Use (1 << OPARG) instead of OPARG. */ #define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ #define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index b164e670..593fe7d1 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ #else /* POSTK_DEBUG_ARCH_DEP_18 */ @@ -44,8 +45,7 @@ enum { MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ MF_SHM = 0x40000, - MF_HOST_RELEASED = 0x80000000, - MF_END + MF_HUGETLBFS = 0x100000, }; #define MEMOBJ_READY 0 @@ -56,7 +56,7 @@ struct memobj { uint32_t flags; uint32_t status; size_t size; - ihk_spinlock_t lock; + ihk_atomic_t refcnt; /* For pre-mapped memobjects */ void **pages; @@ -64,8 +64,7 @@ struct memobj { char *path; }; -typedef void memobj_release_func_t(struct memobj *obj); -typedef void memobj_ref_func_t(struct memobj *obj); +typedef void memobj_free_func_t(struct memobj *obj); typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr); typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align); typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize); @@ -73,27 +72,28 @@ typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, si typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag); struct memobj_ops { - memobj_release_func_t * release; - memobj_ref_func_t * ref; - memobj_get_page_func_t * get_page; - memobj_copy_page_func_t * copy_page; - memobj_flush_page_func_t * flush_page; - memobj_invalidate_page_func_t * invalidate_page; - memobj_lookup_page_func_t * lookup_page; + memobj_free_func_t *free; + memobj_get_page_func_t *get_page; + memobj_copy_page_func_t *copy_page; + memobj_flush_page_func_t *flush_page; + memobj_invalidate_page_func_t *invalidate_page; + memobj_lookup_page_func_t *lookup_page; }; -static inline void memobj_release(struct memobj *obj) +static inline int memobj_ref(struct memobj *obj) { - if (obj->ops->release) { - (*obj->ops->release)(obj); - } + return ihk_atomic_inc_return(&obj->refcnt); } -static inline void memobj_ref(struct memobj *obj) +static inline int memobj_unref(struct memobj *obj) { - if (obj->ops->ref) { - (*obj->ops->ref)(obj); + int cnt; + + if ((cnt = ihk_atomic_dec_return(&obj->refcnt)) == 0) { + (*obj->ops->free)(obj); } + + return cnt; } static inline int memobj_get_page(struct memobj *obj, off_t off, @@ -140,16 +140,6 @@ static inline int memobj_lookup_page(struct memobj *obj, off_t off, return -ENXIO; } -static inline void memobj_lock(struct memobj *obj) -{ - ihk_mc_spinlock_lock_noirq(&obj->lock); -} - -static inline void memobj_unlock(struct memobj *obj) -{ - ihk_mc_spinlock_unlock_noirq(&obj->lock); -} - static inline int memobj_has_pager(struct memobj *obj) { return !!(obj->flags & MF_HAS_PAGER); @@ -166,5 +156,10 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp); int zeroobj_create(struct memobj **objp); int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp, int prot, int populate_flags); +int hugefileobj_pre_create(struct pager_create_result *result, + struct memobj **objp, int *maxprotp); +int hugefileobj_create(struct memobj *obj, size_t len, off_t off, + int *pgshiftp, uintptr_t virt_addr); +void hugefileobj_cleanup(void); #endif /* HEADER_MEMOBJ_H */ diff --git a/kernel/include/process.h b/kernel/include/process.h index e76e2220..4b0f6472 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -70,10 +70,8 @@ #define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */ #define PS_STOPPING 0x80 #define PS_TRACING 0x100 -#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ #define PS_DELAY_STOPPED 0x200 #define PS_DELAY_TRACED 0x400 -#endif /* POSTK_DEBUG_TEMP_FIX_41 */ #define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE) @@ -244,6 +242,11 @@ enum mpol_rebind_step { #define SPAWN_TO_REMOTE 1 #define SPAWNING_TO_REMOTE 1001 +#define UTI_STATE_DEAD 0 +#define UTI_STATE_PROLOGUE 1 +#define UTI_STATE_RUNNING_IN_LINUX 2 +#define UTI_STATE_EPILOGUE 3 + #include #include @@ -277,6 +280,7 @@ extern struct list_head resource_set_list; extern mcs_rwlock_lock_t resource_set_lock; extern int idle_halt; extern int allow_oversubscribe; +extern ihk_spinlock_t runq_reservation_lock; /* mutex for cpuid reservation (clv->runq_reserved) */ struct process_hash { struct list_head list[HASH_SIZE]; @@ -460,6 +464,14 @@ struct process { // threads and children struct list_head threads_list; + struct list_head report_threads_list; + + /* + * main_thread is used to refer to thread information using process ID. + * 1) signal related state in signal_flags + * 2) status of trace + */ + struct thread *main_thread; mcs_rwlock_lock_t threads_lock; // lock for threads_list /* TID set of proxy process */ struct mcexec_tid *tids; @@ -488,7 +500,6 @@ struct process { // V +---- | // PS_STOPPED -----+ // (PS_TRACED) - unsigned long exit_status; // only for zombie /* Store exit_status for a group of threads when stopped by SIGSTOP. exit_status can't be used because values of exit_status of threads @@ -520,22 +531,6 @@ struct process { long saved_cmdline_len; cpu_set_t cpu_set; - /* Store ptrace flags. - * The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request. - * Other bits are for inner use of the McKernel. - */ - int ptrace; - - /* Store ptrace event message. - * PTRACE_O_xxx will store event message here. - * PTRACE_GETEVENTMSG will get from here. - */ - unsigned long ptrace_eventmsg; - - /* Store event related to signal. For example, - it represents that the proceess has been resumed by SIGCONT. */ - int signal_flags; - /* Store signal sent to parent when the process terminates. */ int termsig; @@ -557,6 +552,9 @@ struct process { size_t mpol_threshold; unsigned long heap_extension; unsigned long mpol_bind_mask; + int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */ + int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ + int clone_count; // perf_event int perf_status; @@ -572,6 +570,7 @@ struct process { unsigned long profile_elapsed_ts; #endif // PROFILE_ENABLE int nr_processes; /* For partitioned execution */ + int process_rank; /* Rank in partition */ }; /* @@ -602,7 +601,7 @@ struct thread { // thread info int cpu_id; int tid; - int status; // PS_RUNNING -> PS_EXITED + int status; // PS_RUNNING -> PS_EXITED (-> ZOMBIE / ptrace) // | ^ ^ // | | | // V | | @@ -612,6 +611,14 @@ struct thread { // PS_UNINTERRUPTIBLE int exit_status; + /* + * Store event related to signal. For example, + * it represents that the proceess has been resumed by SIGCONT. + */ + int signal_flags; + + int termsig; + // process vm struct process_vm *vm; @@ -631,6 +638,22 @@ struct thread { ihk_spinlock_t spin_sleep_lock; int spin_sleep; + // for ptrace + struct process *report_proc; + struct list_head report_siblings_list; // lock process + + /* Store ptrace flags. + * The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request. + * Other bits are for inner use of the McKernel. + */ + int ptrace; + + /* Store ptrace event message. + * PTRACE_O_xxx will store event message here. + * PTRACE_GETEVENTMSG will get from here. + */ + unsigned long ptrace_eventmsg; + ihk_atomic_t refcount; int *clear_child_tid; @@ -687,10 +710,11 @@ struct thread { /* Syscall offload wait queue head */ struct waitq scd_wq; - int thread_offloaded; + int uti_state; int mod_clone; struct uti_attr *mod_clone_arg; int parent_cpuid; + int uti_refill_tid; // for performance counter unsigned long pmc_alloc_map; @@ -716,6 +740,8 @@ struct process_vm { // 2. addition of process page table (allocate_pages, update_process_page_table) // note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc) // is protected by its own lock (see ihk/manycore/generic/page_alloc.c) + unsigned long is_memory_range_lock_taken; + /* #986: Fix deadlock between do_page_fault_process_vm() and set_host_vma() */ ihk_atomic_t refcount; int exiting; @@ -819,14 +845,32 @@ void cpu_clear_and_set(int c_cpu, int s_cpu, void release_cpuid(int cpuid); -struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock); -void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock); +struct thread *find_thread(int pid, int tid); +void thread_unlock(struct thread *thread); struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock); void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock); void chain_process(struct process *); void chain_thread(struct thread *); void proc_init(void); -void set_timer(void); +void set_timer(int runq_locked); struct sig_pending *hassigpending(struct thread *thread); +extern int do_signal(unsigned long rc, void *regs0, struct thread *thread, + struct sig_pending *pending, int num); +extern void check_signal(unsigned long rc, void *regs0, int num); +extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, + struct siginfo *info, int ptracecont); +extern void set_signal(int sig, void *regs, struct siginfo *info); +extern void check_sig_pending(void); +void clear_single_step(struct thread *thread); + +void release_fp_regs(struct thread *proc); +void save_fp_regs(struct thread *proc); +void copy_fp_regs(struct thread *from, struct thread *to); +void restore_fp_regs(struct thread *proc); +void clear_fp_regs(void); + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 +int access_ok(struct process_vm *vm, int type, uintptr_t addr, size_t len); #endif diff --git a/kernel/include/rusage_private.h b/kernel/include/rusage_private.h index 72401691..7da77280 100644 --- a/kernel/include/rusage_private.h +++ b/kernel/include/rusage_private.h @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef ENABLE_RUSAGE @@ -55,7 +56,7 @@ rusage_rss_add(unsigned long size) } vm->currss += size; - if (vm->currss > vm->proc->maxrss) { + if (vm->proc && vm->currss > vm->proc->maxrss) { vm->proc->maxrss = vm->currss; } } @@ -118,8 +119,9 @@ static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys, struct page *page = phys_to_page(phys); /* Is It file map and cow page? */ - if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && - !page) { + if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE | + MF_HUGETLBFS)) && + !page) { //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); memory_stat_rss_add(size, pgsize); return 1; diff --git a/kernel/include/shm.h b/kernel/include/shm.h index 4071e578..d8b136b6 100644 --- a/kernel/include/shm.h +++ b/kernel/include/shm.h @@ -57,6 +57,7 @@ struct shmobj { struct shmlock_user * user; struct shmid_ds ds; struct list_head page_list; + ihk_spinlock_t page_list_lock; struct list_head chain; /* shmobj_list */ }; @@ -104,7 +105,6 @@ static inline void shmlock_users_unlock(void) void shmobj_list_lock(void); void shmobj_list_unlock(void); int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp); -void shmobj_destroy(struct shmobj *obj); void shmlock_user_free(struct shmlock_user *user); int shmlock_user_get(uid_t ruid, struct shmlock_user **userp); diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index b0764558..77a1e639 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -49,6 +49,7 @@ #define SCD_MSG_PROCFS_DELETE 0x11 #define SCD_MSG_PROCFS_REQUEST 0x12 #define SCD_MSG_PROCFS_ANSWER 0x13 +#define SCD_MSG_PROCFS_RELEASE 0x15 #define SCD_MSG_DEBUG_LOG 0x20 @@ -82,6 +83,8 @@ #define SCD_MSG_CPU_RW_REG 0x52 #define SCD_MSG_CPU_RW_REG_RESP 0x53 +#define SCD_MSG_FUTEX_WAKE 0x60 + /* Cloning flags. */ # define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */ # define CLONE_VM 0x00000100 /* Set if VM shared between processes. */ @@ -197,8 +200,10 @@ struct program_load_desc { unsigned long heap_extension; long stack_premap; unsigned long mpol_bind_mask; + int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */ + int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int nr_processes; - char shell_path[SHELL_PATH_MAX_LEN]; + int process_rank; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; int profile; struct program_image_section sections[0]; @@ -258,7 +263,7 @@ struct ikc_scd_packet { long sysfs_arg3; }; - /* SCD_MSG_SCHEDULE_THREAD */ + /* SCD_MSG_WAKE_UP_SYSCALL_THREAD */ struct { int ttid; }; @@ -274,6 +279,12 @@ struct ikc_scd_packet { struct { int eventfd_type; }; + + /* SCD_MSG_FUTEX_WAKE */ + struct { + void *resp; + int *spin_sleep; /* 1: waiting in linux_wait_event() 0: woken up by someone else */ + } futex; }; char padding[8]; }; @@ -336,10 +347,10 @@ struct syscall_post { SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \ SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5); -#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0) +#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id()) -extern long do_syscall(struct syscall_request *req, int cpu, int pid); -int obtain_clone_cpuid(cpu_set_t *cpu_set); +extern long do_syscall(struct syscall_request *req, int cpu); +int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last); extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx); #define DECLARATOR(number,name) __NR_##name = number, @@ -353,17 +364,10 @@ enum { #undef SYSCALL_DELEGATED #define __NR_coredump 999 /* pseudo syscall for coredump */ -#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ struct coretable { /* table entry for a core chunk */ off_t len; /* length of the chunk */ unsigned long addr; /* physical addr of the chunk */ }; -#else /* POSTK_DEBUG_TEMP_FIX_61 */ -struct coretable { /* table entry for a core chunk */ - int len; /* length of the chunk */ - unsigned long addr; /* physical addr of the chunk */ -}; -#endif /* POSTK_DEBUG_TEMP_FIX_61 */ #ifdef POSTK_DEBUG_TEMP_FIX_1 void create_proc_procfs_files(int pid, int tid, int cpuid); @@ -383,7 +387,6 @@ struct procfs_read { int count; /* bytes to read (request) */ int eof; /* if eof is detected, 1 otherwise 0. (answer)*/ int ret; /* read bytes (answer) */ - int status; /* non-zero if done (answer) */ int newcpu; /* migrated new cpu (answer) */ int readwrite; /* 0:read, 1:write */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ @@ -395,6 +398,8 @@ struct procfs_file { char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ }; +int process_procfs_request(struct ikc_scd_packet *rpacket); + #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN -1 #define RUSAGE_THREAD 1 @@ -459,8 +464,8 @@ static inline unsigned long timespec_to_jiffy(const struct timespec *ats) void reset_cputime(void); void set_cputime(int mode); -int do_munmap(void *addr, size_t len); -intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd, +int do_munmap(void *addr, size_t len, int holding_memory_range_lock); +intptr_t do_mmap(uintptr_t addr0, size_t len0, int prot, int flags, int fd, off_t off0); void clear_host_pte(uintptr_t addr, size_t len); typedef int32_t key_t; @@ -471,7 +476,16 @@ int arch_setup_vdso(void); int arch_cpu_read_write_register(struct ihk_os_cpu_register *desc, enum mcctrl_os_cpu_operation op); struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr); +void calculate_time_from_tsc(struct timespec *ts); time_t time(void); +long do_futex(int n, unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, + unsigned long _uti_clv, + void *uti_futex_resp, + void *_linux_wait_event, + void *_linux_printk, + void *_linux_clock_gettime); #ifndef POSTK_DEBUG_ARCH_DEP_52 #define VDSO_MAXPAGES 2 @@ -519,6 +533,7 @@ enum perf_ctrl_type { struct perf_ctrl_desc { enum perf_ctrl_type ctrl_type; + int err; union { /* for SET, GET */ struct { @@ -569,6 +584,15 @@ typedef struct uti_attr { uint64_t flags; /* Representing location and behavior hints by bitmap */ } uti_attr_t; +struct uti_ctx { + union { + char ctx[4096]; + struct { + int uti_refill_tid; + }; + }; +}; + struct move_pages_smp_req { unsigned long count; const void **user_virt_addr; @@ -589,4 +613,9 @@ struct move_pages_smp_req { #define PROCESS_VM_READ 0 #define PROCESS_VM_WRITE 1 +/* uti: function pointers pointing to Linux codes */ +extern long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout); +extern int (*linux_printk)(const char *fmt, ...); +extern int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp); + #endif diff --git a/kernel/include/time.h b/kernel/include/time.h index b4c1bffd..1d4a30c2 100644 --- a/kernel/include/time.h +++ b/kernel/include/time.h @@ -25,6 +25,8 @@ #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 +typedef int clockid_t; + typedef long int __time_t; /* POSIX.1b structure for a time value. This is like a `struct timeval' but diff --git a/kernel/include/xpmem_private.h b/kernel/include/xpmem_private.h index 23d76f10..a12c406a 100644 --- a/kernel/include/xpmem_private.h +++ b/kernel/include/xpmem_private.h @@ -26,20 +26,17 @@ #include #include +#include #define XPMEM_CURRENT_VERSION 0x00026003 //#define DEBUG_PRINT_XPMEM #ifdef DEBUG_PRINT_XPMEM -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#define XPMEM_DEBUG(format, a...) kprintf("[%d] %s: "format"\n", cpu_local_var(current)->proc->rgid, __func__, ##a) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) -#define XPMEM_DEBUG(format, a...) do { if (0) kprintf("\n"); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif +#define XPMEM_DEBUG(format, a...) dkprintf("[%d] %s: "format"\n", cpu_local_var(current)->proc->rgid, __func__, ##a) //#define USE_DBUG_ON diff --git a/kernel/init.c b/kernel/init.c index 88d11682..2618b983 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -32,6 +32,7 @@ #include #include #include +#include //#define IOCTL_FUNC_EXTENSION #ifdef IOCTL_FUNC_EXTENSION @@ -41,11 +42,8 @@ //#define DEBUG_PRINT_INIT #ifdef DEBUG_PRINT_INIT -#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) -#else -#define dkprintf(...) do { } while (0) -#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #define DUMP_LEVEL_USER_UNUSED_EXCLUDE 24 @@ -61,6 +59,13 @@ static void handler_init(void) ihk_mc_set_syscall_handler(syscall); } + +/* Symbols with name conflict with the linux kernel + * Give the possibility to load all symbols at the same time + */ +int *mck_num_processors = &num_processors; + + unsigned long data[1024] __attribute__((aligned(64))); #ifdef USE_DMA @@ -125,6 +130,8 @@ char *find_command_line(char *name) return strstr(cmdline, name); } +extern int safe_kernel_map; + static void parse_kargs(void) { char *ptr; @@ -145,6 +152,11 @@ static void parse_kargs(void) } ihk_mc_set_dump_level(dump_level); + ptr = find_command_line("safe_kernel_map"); + if (ptr) { + safe_kernel_map = 1; + } + /* idle_halt option */ ptr = find_command_line("idle_halt"); if (ptr) { @@ -246,6 +258,11 @@ static void nmi_init() ihk_set_nmi_mode_addr(phys); } +static void uti_init() +{ + ihk_set_mckernel_do_futex((unsigned long)do_futex); +} + static void rest_init(void) { handler_init(); @@ -261,6 +278,7 @@ static void rest_init(void) #endif /* !POSTK_DEBUG_TEMP_FIX_73 */ cpu_local_var_init(); nmi_init(); + uti_init(); time_init(); kmalloc_init(); @@ -331,6 +349,7 @@ static void populate_sysfs(void) { cpu_sysfs_setup(); numa_sysfs_setup(); + dynamic_debug_sysfs_setup(); //setup_remote_snooping_samples(); } /* populate_sysfs() */ diff --git a/kernel/listeners.c b/kernel/listeners.c index 7dad945a..345f0655 100644 --- a/kernel/listeners.c +++ b/kernel/listeners.c @@ -19,15 +19,13 @@ #include #include #include +#include //#define DEBUG_LISTENERS #ifdef DEBUG_LISTENERS -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif void testmem(void *v, unsigned long size) @@ -71,7 +69,7 @@ static int test_packet_handler(struct ihk_ikc_channel_desc *c, testmem(v, 4 * 1024 * 1024); - ihk_mc_unmap_virtual(v, 4 * 1024, 1); + ihk_mc_unmap_virtual(v, 4 * 1024); ihk_mc_unmap_memory(NULL, pp, 4 * 1024 * 1024); } else if (packet->msg == 0x11110012) { p.msg = 0x11110013; diff --git a/kernel/mem.c b/kernel/mem.c index 27595637..fa2e347f 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -44,15 +44,13 @@ #include #include #include +#include //#define DEBUG_PRINT_MEM #ifdef DEBUG_PRINT_MEM -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif static unsigned long pa_start, pa_end; @@ -547,7 +545,7 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, ihk_mc_ap_flag flag, int pref_node, int is_user, uintptr_t virt_addr) { unsigned long pa = 0; - int i, node; + int i = 0, node; #ifndef IHK_RBTREE_ALLOCATOR struct ihk_page_allocator_desc *pa_allocator; #endif @@ -962,8 +960,6 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = { .priv = NULL, }; -void set_signal(int sig, void *regs, struct siginfo *info); -void check_signal(unsigned long, void *, int); int gencore(struct thread *, void *, struct coretable **, int *); void freecore(struct coretable **); @@ -981,11 +977,9 @@ void coredump(struct thread *thread, void *regs) struct coretable *coretable; int chunks; -#ifdef POSTK_DEBUG_ARCH_DEP_67 /* use limit corefile size. (temporarily fix.) */ if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) { return; } -#endif /* POSTK_DEBUG_ARCH_DEP_67 */ #ifndef POSTK_DEBUG_ARCH_DEP_18 ret = gencore(thread, regs, &coretable, &chunks); @@ -997,7 +991,7 @@ void coredump(struct thread *thread, void *regs) request.args[0] = chunks; request.args[1] = virt_to_phys(coretable); /* no data for now */ - ret = do_syscall(&request, thread->cpu_id, thread->proc->pid); + ret = do_syscall(&request, thread->cpu_id); if (ret == 0) { kprintf("dumped core.\n"); } else { @@ -1223,7 +1217,7 @@ out: if(interrupt_from_user(regs)){ cpu_enable_interrupt(); check_need_resched(); - check_signal(0, regs, 0); + check_signal(0, regs, -1); } set_cputime(interrupt_from_user(regs)? 0: 1); #ifdef PROFILE_ENABLE @@ -1671,7 +1665,7 @@ void *ihk_mc_map_virtual(unsigned long phys, int npages, return (char *)p + offset; } -void ihk_mc_unmap_virtual(void *va, int npages, int free_physical) +void ihk_mc_unmap_virtual(void *va, int npages) { unsigned long i; @@ -1681,13 +1675,7 @@ void ihk_mc_unmap_virtual(void *va, int npages, int free_physical) flush_tlb_single((unsigned long)(va + (i << PAGE_SHIFT))); } -#ifdef POSTK_DEBUG_TEMP_FIX_51 /* ihk_mc_unmap_virtual() free_physical disabled */ ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages); -#else /* POSTK_DEBUG_TEMP_FIX_51 */ - if (free_physical) { - ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages); - } -#endif /* POSTK_DEBUG_TEMP_FIX_51 */ } #ifdef ATTACHED_MIC @@ -2304,76 +2292,37 @@ void ___kmalloc_print_free_list(struct list_head *list) kprintf_unlock(irqflags); } -#ifdef POSTK_DEBUG_ARCH_DEP_27 -int search_free_space(struct thread *thread, size_t len, intptr_t hint, - int pgshift, intptr_t *addrp) -{ - struct vm_regions *region = &thread->vm->region; - intptr_t addr; - int error; - struct vm_range *range; - size_t pgsize = (size_t)1 << pgshift; - - dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp); - - addr = hint; - for (;;) { - addr = (addr + pgsize - 1) & ~(pgsize - 1); - if ((region->user_end <= addr) - || ((region->user_end - len) < addr)) { - ekprintf("search_free_space(%lx,%lx,%p):" - "no space. %lx %lx\n", - len, hint, addrp, addr, - region->user_end); - error = -ENOMEM; - goto out; - } - - range = lookup_process_memory_range(thread->vm, addr, addr+len); - if (range == NULL) { - break; - } - addr = range->end; - } - - error = 0; - *addrp = addr; - -out: - dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n", - len, hint, pgshift, addrp, error, addr); - return error; -} -#endif /* POSTK_DEBUG_ARCH_DEP_27 */ - -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* supports NUMA for memory area determination */ #ifdef IHK_RBTREE_ALLOCATOR -int is_mckernel_memory(unsigned long phys) +int is_mckernel_memory(unsigned long start, unsigned long end) { int i; for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { - unsigned long start, end; + unsigned long chunk_start, chunk_end; int numa_id; - ihk_mc_get_memory_chunk(i, &start, &end, &numa_id); - if (start <= phys && phys < end) { + ihk_mc_get_memory_chunk(i, &chunk_start, &chunk_end, &numa_id); + if ((chunk_start <= start && start < chunk_end) && + (chunk_start <= end && end < chunk_end)) { return 1; } } return 0; } #else /* IHK_RBTREE_ALLOCATOR */ -int is_mckernel_memory(unsigned long phys) +int is_mckernel_memory(unsigned long start, unsigned long end) { int i; for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { struct ihk_page_allocator_desc *pa_allocator; + unsigned long area_start = pa_allocator->start; + unsigned long area_end = pa_allocator->end; list_for_each_entry(pa_allocator, &memory_nodes[i].allocators, list) { - if (pa_allocator->start <= phys && phys < pa_allocator->end) { + if ((area_start <= start && start < area_end) && + (area_start <= end && end < area_end)) { return 1; } } @@ -2381,7 +2330,6 @@ int is_mckernel_memory(unsigned long phys) return 0; } #endif /* IHK_RBTREE_ALLOCATOR */ -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ void ihk_mc_query_mem_areas(void){ diff --git a/kernel/pager.c b/kernel/pager.c index 2fc1a725..f6b2cb0b 100644 --- a/kernel/pager.c +++ b/kernel/pager.c @@ -21,6 +21,7 @@ #include #include #include +#include #define O_RDONLY 00000000 #define O_WRONLY 00000001 @@ -44,11 +45,8 @@ //#define DEBUG_PRINT_PROCESS #ifdef DEBUG_PRINT_PROCESS -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif /* diff --git a/kernel/process.c b/kernel/process.c index 54ae10cd..8b0f40c9 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -38,12 +38,13 @@ #include #include #include +#include //#define DEBUG_PRINT_PROCESS #ifdef DEBUG_PRINT_PROCESS -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT static void dtree(struct rb_node *node, int l) { struct vm_range *range; if (!node) @@ -60,14 +61,10 @@ static void dump_tree(struct process_vm *vm) { dtree(vm->vm_range_tree.rb_node, 1); } #else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) static void dump_tree(struct process_vm *vm) {} #endif -#ifdef POSTK_DEBUG_ARCH_DEP_22 extern struct thread *arch_switch_context(struct thread *prev, struct thread *next); -#endif /* POSTK_DEBUG_ARCH_DEP_22 */ extern long alloc_debugreg(struct thread *proc); extern void save_debugreg(unsigned long *debugreg); extern void restore_debugreg(unsigned long *debugreg); @@ -78,10 +75,6 @@ static int vm_range_insert(struct process_vm *vm, static struct vm_range *vm_range_find(struct process_vm *vm, unsigned long addr); static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm); -extern void release_fp_regs(struct thread *proc); -extern void save_fp_regs(struct thread *proc); -extern void copy_fp_regs(struct thread *from, struct thread *to); -extern void restore_fp_regs(struct thread *proc); extern void __runq_add_proc(struct thread *proc, int cpu_id); extern void terminate_host(int pid); extern void lapic_timer_enable(unsigned int clocks); @@ -89,16 +82,12 @@ extern void lapic_timer_disable(); extern int num_processors; extern ihk_spinlock_t cpuid_head_lock; int ptrace_detach(int pid, int data); -extern unsigned long do_kill(struct thread *, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern void procfs_create_thread(struct thread *); extern void procfs_delete_thread(struct thread *); -#ifndef POSTK_DEBUG_ARCH_DEP_22 -extern void perf_start(struct mc_perf_event *event); -extern void perf_reset(struct mc_perf_event *event); -#endif /* !POSTK_DEBUG_ARCH_DEP_22 */ struct list_head resource_set_list; mcs_rwlock_lock_t resource_set_lock; +ihk_spinlock_t runq_reservation_lock; int idle_halt = 0; int allow_oversubscribe = 0; @@ -126,10 +115,8 @@ init_process(struct process *proc, struct process *parent) proc->mpol_threshold = parent->mpol_threshold; memcpy(proc->rlimit, parent->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); -#ifdef POSTK_DEBUG_TEMP_FIX_69 /* Fix problem not to inherit parent cpu_set. */ memcpy(&proc->cpu_set, &parent->cpu_set, sizeof(proc->cpu_set)); -#endif /* POSTK_DEBUG_TEMP_FIX_69 */ } #ifdef POSTK_DEBUG_ARCH_DEP_63 /* struct process member initialize add */ @@ -138,6 +125,7 @@ init_process(struct process *proc, struct process *parent) INIT_LIST_HEAD(&proc->ptraced_siblings_list); mcs_rwlock_init(&proc->update_lock); #endif /* POSTK_DEBUG_ARCH_DEP_63 */ + INIT_LIST_HEAD(&proc->report_threads_list); INIT_LIST_HEAD(&proc->threads_list); INIT_LIST_HEAD(&proc->children_list); INIT_LIST_HEAD(&proc->ptraced_children_list); @@ -362,6 +350,7 @@ struct thread *create_thread(unsigned long user_pc, thread->vm = vm; thread->proc = proc; proc->vm = vm; + proc->main_thread = thread; if(init_process_vm(proc, asp, vm) != 0){ goto err; @@ -453,18 +442,30 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp, proc->termsig = termsig; asp = create_address_space(cpu_local_var(resource_set), 1); - if(!asp){ + if (!asp) { kfree(proc); goto err_free_proc; } proc->vm = kmalloc(sizeof(struct process_vm), IHK_MC_AP_NOWAIT); - if(!proc->vm){ + if (!proc->vm) { release_address_space(asp); kfree(proc); goto err_free_proc; } memset(proc->vm, '\0', sizeof(struct process_vm)); + proc->saved_cmdline_len = org->proc->saved_cmdline_len; + proc->saved_cmdline = kmalloc(proc->saved_cmdline_len, + IHK_MC_AP_NOWAIT); + if (!proc->saved_cmdline) { + release_address_space(asp); + kfree(proc->vm); + kfree(proc); + goto err_free_proc; + } + memcpy(proc->saved_cmdline, org->proc->saved_cmdline, + proc->saved_cmdline_len); + dkprintf("fork(): init_process_vm()\n"); if (init_process_vm(proc, asp, proc->vm) != 0) { release_address_space(asp); @@ -479,6 +480,7 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp, thread->proc = proc; thread->vm = proc->vm; + proc->main_thread = thread; memcpy(&proc->vm->region, &org->vm->region, sizeof(struct vm_regions)); @@ -583,29 +585,47 @@ ptrace_traceme(void) struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; struct process *parent = proc->parent; - struct mcs_rwlock_node_irqsave lock; struct mcs_rwlock_node child_lock; + struct resource_set *resource_set = cpu_local_var(resource_set); + struct process *pid1 = resource_set->pid1; dkprintf("ptrace_traceme,pid=%d,proc->parent=%p\n", proc->pid, proc->parent); - if (proc->ptrace & PT_TRACED) { + if (thread->ptrace & PT_TRACED) { + return -EPERM; + } + if (parent == pid1) { return -EPERM; } dkprintf("ptrace_traceme,parent->pid=%d\n", proc->parent->pid); - mcs_rwlock_writer_lock(&proc->update_lock, &lock); - mcs_rwlock_writer_lock_noirq(&parent->children_lock, &child_lock); - list_add_tail(&proc->ptraced_siblings_list, &parent->ptraced_children_list); - mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &child_lock); - proc->ptrace = PT_TRACED | PT_TRACE_EXEC; - mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + if (thread == proc->main_thread) { + mcs_rwlock_writer_lock_noirq(&parent->children_lock, + &child_lock); + list_add_tail(&proc->ptraced_siblings_list, + &parent->ptraced_children_list); + mcs_rwlock_writer_unlock_noirq(&parent->children_lock, + &child_lock); + } + if (!thread->report_proc) { + mcs_rwlock_writer_lock_noirq(&parent->threads_lock, + &child_lock); + list_add_tail(&thread->report_siblings_list, + &parent->report_threads_list); + mcs_rwlock_writer_unlock_noirq(&parent->threads_lock, + &child_lock); + thread->report_proc = parent; + } + + thread->ptrace = PT_TRACED | PT_TRACE_EXEC; if (thread->ptrace_debugreg == NULL) { error = alloc_debugreg(thread); } clear_single_step(thread); + hold_thread(thread); dkprintf("ptrace_traceme,returning,error=%d\n", error); return error; @@ -894,7 +914,7 @@ int join_process_memory_range(struct process_vm *vm, surviving->end = merging->end; if (merging->memobj) { - memobj_release(merging->memobj); + memobj_unref(merging->memobj); } rb_erase(&merging->vm_rb_node, &vm->vm_range_tree); for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) { @@ -967,13 +987,19 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); if (range->memobj) { - memobj_lock(range->memobj); + memobj_ref(range->memobj); + } + if (range->memobj && range->memobj->flags & MF_HUGETLBFS) { + error = ihk_mc_pt_clear_range(vm->address_space->page_table, + vm, (void *)start, (void *)end); + } else { + error = ihk_mc_pt_free_range(vm->address_space->page_table, + vm, (void *)start, (void *)end, + (range->flag & VR_PRIVATE) ? NULL : + range->memobj); } - error = ihk_mc_pt_free_range(vm->address_space->page_table, vm, - (void *)start, (void *)end, - (range->flag & VR_PRIVATE)? NULL: range->memobj); if (range->memobj) { - memobj_unlock(range->memobj); + memobj_unref(range->memobj); } ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error && (error != -ENOENT)) { @@ -1000,7 +1026,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } if (range->memobj) { - memobj_release(range->memobj); + memobj_unref(range->memobj); } rb_erase(&range->vm_rb_node, &vm->vm_range_tree); @@ -1263,7 +1289,7 @@ int add_process_memory_range(struct process_vm *vm, if (phys != NOPHYS && !(flag & (VR_REMOTE | VR_DEMAND_PAGING)) && ((flag & VR_PROT_MASK) != VR_PROT_NONE)) { #if 1 - memset((void*)phys_to_virt(phys), 0, end - start); + memset((void *)phys_to_virt(phys), 0, end - start); #else if (end - start < (1024*1024)) { memset((void*)phys_to_virt(phys), 0, end - start); @@ -1443,7 +1469,8 @@ int change_prot_process_memory_range(struct process_vm *vm, * We need to keep the page table read-only to trigger a page * fault for copy-on-write later on */ - if (range->memobj && (range->flag & VR_PRIVATE)) { + if (range->memobj && (range->flag & VR_PRIVATE) && + !(range->memobj->flags & MF_HUGETLBFS)) { setattr &= ~PTATTR_WRITABLE; if (!clrattr && !setattr) { range->flag = newflag; @@ -1532,7 +1559,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range, dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx)\n", vm, range, start, end, off); ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - memobj_lock(range->memobj); + memobj_ref(range->memobj); args.start = start; args.off = off; @@ -1557,7 +1584,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range, error = 0; out: - memobj_unlock(range->memobj); + memobj_unref(range->memobj); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):%d\n", vm, range, start, end, off, error); @@ -1622,7 +1649,7 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range, ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); if (!(range->memobj->flags & MF_ZEROFILL)) { - memobj_lock(range->memobj); + memobj_ref(range->memobj); } error = visit_pte_range(vm->address_space->page_table, (void *)start, @@ -1630,7 +1657,7 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range, &sync_one_page, &args); if (!(range->memobj->flags & MF_ZEROFILL)) { - memobj_unlock(range->memobj); + memobj_unref(range->memobj); } ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); @@ -1712,11 +1739,11 @@ int invalidate_process_memory_range(struct process_vm *vm, args.range = range; ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - memobj_lock(range->memobj); + memobj_ref(range->memobj); error = visit_pte_range(vm->address_space->page_table, (void *)start, (void *)end, range->pgshift, VPTEF_SKIP_NULL, &invalidate_one_page, &args); - memobj_unlock(range->memobj); + memobj_unref(range->memobj); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):" @@ -1881,30 +1908,8 @@ retry: } dkprintf("%s: cow,copying virt:%lx<-%lx,phys:%lx<-%lx,pgsize=%lu\n", __FUNCTION__, virt, phys_to_virt(phys), virt_to_phys(virt), phys, pgsize); -#ifdef POSTK_DEBUG_TEMP_FIX_14 - if (page) { - // McKernel memory space - memcpy(virt, phys_to_virt(phys), pgsize); - } else { - // Host Kernel memory space - const enum ihk_mc_pt_attribute attr = 0; - const int remove_vmap_allocator_entry = 1; - void* vmap; - - vmap = ihk_mc_map_virtual(phys, npages, attr); - if (!vmap) { - error = -ENOMEM; - kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot virtual mapping. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); - ihk_mc_free_pages(virt, npages); - goto out; - } - memcpy(virt, vmap, pgsize); - ihk_mc_unmap_virtual(vmap, npages, remove_vmap_allocator_entry); - } -#else /*POSTK_DEBUG_TEMP_FIX_14*/ memcpy(virt, phys_to_virt(phys), pgsize); -#endif /*POSTK_DEBUG_TEMP_FIX_14*/ /* Call rusage_memory_stat_add() because remote page fault may create a page not pointed-to by PTE */ if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) { dkprintf("%lx+,%s: remote page fault + cow, calling memory_stat_rss_add(),pgsize=%ld\n", @@ -1987,11 +1992,28 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui int error; const uintptr_t fault_addr = (uintptr_t)fault_addr0; struct vm_range *range; + struct thread *thread = cpu_local_var(current); + int locked = 0; dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason); - - ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + if (!thread->vm->is_memory_range_lock_taken) { + /* For the case where is_memory_range_lock_taken is incremented after memory_range_lock is taken. */ + while (1) { + if (thread->vm->is_memory_range_lock_taken) { + goto skip; + } + if (ihk_mc_spinlock_trylock_noirq(&vm->memory_range_lock)) { + locked = 1; + break; + } + } + } else { +skip:; + dkprintf("%s: INFO: skip locking of memory_range_lock,pid=%d,tid=%d\n", + __func__, thread->proc->pid, thread->tid); + } if (vm->exiting) { error = -ECANCELED; @@ -2100,7 +2122,9 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui error = 0; out: - ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + if (locked) { + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + } dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx): %d\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason, error); @@ -2141,6 +2165,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, char *stack; int error; unsigned long *p; + unsigned long maxsz; unsigned long minsz; unsigned long at_rand; struct process *proc = thread->proc; @@ -2157,19 +2182,20 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, minsz = (pn->stack_premap + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; #endif /* POSTK_DEBUG_ARCH_DEP_80 */ - size = (proc->rlimit[MCK_RLIMIT_STACK].rlim_cur - + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + maxsz = (end - thread->vm->region.map_start) / 2; + size = proc->rlimit[MCK_RLIMIT_STACK].rlim_cur; + if (size > maxsz) { + size = maxsz; + } + else if (size < minsz) { + size = minsz; + } + size = (size + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n", __FUNCTION__, pn->stack_premap, proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, minsz, size); - if (size > (USER_END / 2)) { - size = USER_END / 2; - } - else if (size < minsz) { - size = minsz; - } start = (end - size) & LARGE_PAGE_MASK; /* Apply user allocation policy to stacks */ @@ -2444,6 +2470,11 @@ void free_process_memory_ranges(struct process_vm *vm) ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); } +static void free_thread_pages(struct thread *thread) +{ + ihk_mc_free_pages(thread, KERNEL_STACK_NR_PAGES); +} + void hold_process(struct process *proc) { @@ -2476,13 +2507,6 @@ release_process(struct process *proc) list_del(&proc->siblings_list); mcs_rwlock_writer_unlock(&parent->children_lock, &lock); - if(proc->ptrace & PT_TRACED){ - parent = proc->ppid_parent; - mcs_rwlock_writer_lock(&parent->children_lock, &lock); - list_del(&proc->ptraced_siblings_list); - mcs_rwlock_writer_unlock(&parent->children_lock, &lock); - } - if (proc->tids) kfree(proc->tids); #ifdef PROFILE_ENABLE if (proc->profile) { @@ -2495,7 +2519,15 @@ release_process(struct process *proc) } profile_dealloc_proc_events(proc); #endif // PROFILE_ENABLE + free_thread_pages(proc->main_thread); kfree(proc); + + /* no process left */ + mcs_rwlock_reader_lock(&rset->pid1->children_lock, &lock); + if (list_empty(&rset->pid1->children_list)) { + hugefileobj_cleanup(); + } + mcs_rwlock_reader_unlock(&rset->pid1->children_lock, &lock); } void @@ -2516,9 +2548,6 @@ free_all_process_memory_range(struct process_vm *vm) range = rb_entry(node, struct vm_range, vm_rb_node); next = rb_next(node); - if (range->memobj) { - range->memobj->flags |= MF_HOST_RELEASED; - } error = free_process_memory_range(vm, range); if (error) { ekprintf("free_process_memory(%p):" @@ -2591,8 +2620,8 @@ out: int hold_thread(struct thread *thread) { if (thread->status == PS_EXITED) { - kprintf("hold_thread: ERROR: already exited process,tid=%d\n", thread->tid); - return -ESRCH; + kprintf("hold_thread: WARNING: already exited process,tid=%d\n", + thread->tid); } ihk_atomic_inc(&thread->refcount); @@ -2639,6 +2668,21 @@ void __release_tid(struct process *proc, struct thread *thread) { } } +/* Replace tid specified by thread with tid specified by new_tid */ +void __find_and_replace_tid(struct process *proc, struct thread *thread, int new_tid) { + int i; + + for (i = 0; i < proc->nr_tids; ++i) { + if (proc->tids[i].thread != thread) continue; + + proc->tids[i].thread = NULL; + proc->tids[i].tid = new_tid; + dkprintf("%s: tid %d (thread %p) has been relaced with tid %d\n", + __FUNCTION__, thread->tid, thread, new_tid); + break; + } +} + void destroy_thread(struct thread *thread) { struct sig_pending *pending; @@ -2659,13 +2703,16 @@ void destroy_thread(struct thread *thread) ts_add(&thread->proc->stime, &ats); tsc_to_ts(thread->user_tsc, &ats); ts_add(&thread->proc->utime, &ats); + mcs_rwlock_writer_unlock(&proc->update_lock, &updatelock); mcs_rwlock_writer_lock(&proc->threads_lock, &lock); list_del(&thread->siblings_list); - __release_tid(proc, thread); - mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); - - mcs_rwlock_writer_unlock(&proc->update_lock, &updatelock); + if (thread->uti_state == UTI_STATE_EPILOGUE) { + __find_and_replace_tid(proc, thread, thread->uti_refill_tid); + } + else if (thread != proc->main_thread) { + __release_tid(proc, thread); + } cpu_clear(thread->cpu_id, &thread->vm->address_space->cpu_set, &thread->vm->address_space->cpu_set_lock); @@ -2689,7 +2736,9 @@ void destroy_thread(struct thread *thread) release_sigcommon(thread->sigcommon); - ihk_mc_free_pages(thread, KERNEL_STACK_NR_PAGES); + if (thread != proc->main_thread) + free_thread_pages(thread); + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); } void release_thread(struct thread *thread) @@ -2711,20 +2760,6 @@ void release_thread(struct thread *thread) destroy_thread(thread); release_process_vm(vm); - rusage_num_threads_dec(); - -#ifdef RUSAGE_DEBUG - if (rusage->num_threads == 0) { - int i; - kprintf("total_memory_usage=%ld\n", rusage->total_memory_usage); - for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - kprintf("memory_stat_rss[%d]=%ld\n", i, rusage->memory_stat_rss[i]); - } - for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - kprintf("memory_stat_mapped_file[%d]=%ld\n", i, rusage->memory_stat_mapped_file[i]); - } - } -#endif } void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock) @@ -2949,6 +2984,9 @@ void sched_init(void) INIT_LIST_HEAD(&cpu_local_var(migq)); ihk_mc_spinlock_init(&cpu_local_var(migq_lock)); + // to save default fpregs + save_fp_regs(idle_thread); + #ifdef TIMER_CPU_ID if (ihk_mc_get_processor_id() == TIMER_CPU_ID) { init_timers(); @@ -3057,15 +3095,28 @@ ack: ihk_mc_spinlock_unlock(&cur_v->migq_lock, irqstate); } -void -set_timer() +void set_timer(int runq_locked) { struct cpu_local_var *v = get_this_cpu_local_var(); + struct thread *thread; + int num_running = 0; + unsigned long irqstate; + + if (!runq_locked) { + irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); + } + + list_for_each_entry(thread, &v->runq, sched_list) { + if (thread->status != PS_RUNNING) { + continue; + } + num_running++; + } /* Toggle timesharing if CPU core is oversubscribed */ - if (v->runq_len > 1 || v->current->itimer_enabled) { + if (num_running > 1 || v->current->itimer_enabled) { if (!cpu_local_var(timer_enabled)) { - lapic_timer_enable(10000000); + lapic_timer_enable(/*10000000*/1000000); cpu_local_var(timer_enabled) = 1; } } @@ -3075,6 +3126,10 @@ set_timer() cpu_local_var(timer_enabled) = 0; } } + + if (!runq_locked) { + ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); + } } /* @@ -3128,11 +3183,11 @@ void spin_sleep_or_schedule(void) } ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); -#ifdef POSTK_DEBUG_TEMP_FIX_56 /* in futex_wait() signal handring fix. */ - if (hassigpending(cpu_local_var(current))) { + if ((!list_empty(&thread->sigpending) || + !list_empty(&thread->sigcommon->sigpending)) && + hassigpending(thread)) { woken = 1; } -#endif /* POSTK_DEBUG_TEMP_FIX_56 */ if (woken) { return; @@ -3162,10 +3217,6 @@ void schedule(void) return; } -redo: - /* Reset for redo */ - switch_ctx = 0; - cpu_local_var(runq_irqstate) = ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); v = get_this_cpu_local_var(); @@ -3190,7 +3241,7 @@ redo: /* Switch to idle() when prev is PS_EXITED since it always reaches release_thread() because it always resumes from just after ihk_mc_switch_context() call. See #1029 */ if (v->flags & CPU_FLAG_NEED_MIGRATE || - prev->status == PS_EXITED) { + (prev && prev->status == PS_EXITED)) { next = &cpu_local_var(idle); } else { /* Pick a new running process or one that has a pending signal */ @@ -3220,7 +3271,7 @@ redo: reset_cputime(); } - set_timer(); + set_timer(1); if (switch_ctx) { dkprintf("schedule: %d => %d \n", @@ -3255,44 +3306,7 @@ redo: next->vm->address_space->page_table) ihk_mc_load_page_table(next->vm->address_space->page_table); -#ifdef POSTK_DEBUG_ARCH_DEP_22 last = arch_switch_context(prev, next); -#else - dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", - ihk_mc_get_processor_id(), next->tlsblock_base); - - /* Set up new TLS.. */ - ihk_mc_init_user_tlsbase(next->uctx, next->tlsblock_base); - - /* Performance monitoring inherit */ - if(next->proc->monitoring_event) { - if(next->proc->perf_status == PP_RESET) - perf_reset(next->proc->monitoring_event); - if(next->proc->perf_status != PP_COUNT) { - perf_reset(next->proc->monitoring_event); - perf_start(next->proc->monitoring_event); - } - } - -#ifdef PROFILE_ENABLE - if (prev->profile && prev->profile_start_ts != 0) { - prev->profile_elapsed_ts += - (rdtsc() - prev->profile_start_ts); - prev->profile_start_ts = 0; - } - - if (next->profile && next->profile_start_ts == 0) { - next->profile_start_ts = rdtsc(); - } -#endif - - if (prev) { - last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); - } - else { - last = ihk_mc_switch_context(NULL, &next->ctx, prev); - } -#endif /* POSTK_DEBUG_ARCH_DEP_22 */ /* * We must hold the lock throughout the context switch, otherwise @@ -3306,11 +3320,31 @@ redo: if ((last != NULL) && (last->status == PS_EXITED)) { release_thread(last); + rusage_num_threads_dec(); +#ifdef RUSAGE_DEBUG + if (rusage->num_threads == 0) { + int i; + + kprintf("total_memory_usage=%ld\n", + rusage->total_memory_usage); + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + kprintf("memory_stat_rss[%d]=%ld\n", i, + rusage->memory_stat_rss[i]); + } + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + kprintf( + "memory_stat_mapped_file[%d]=%ld\n", + i, + rusage->memory_stat_mapped_file[i]); + } + } +#endif } /* Have we migrated to another core meanwhile? */ if (v != get_this_cpu_local_var()) { - goto redo; + v = get_this_cpu_local_var(); + v->flags &= ~CPU_FLAG_NEED_RESCHED; } } else { @@ -3322,8 +3356,15 @@ redo: void release_cpuid(int cpuid) { - if (!get_cpu_local_var(cpuid)->runq_len) - get_cpu_local_var(cpuid)->status = CPU_STATUS_IDLE; + unsigned long irqstate; + struct cpu_local_var *v = get_cpu_local_var(cpuid); + irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock); + ihk_mc_spinlock_lock_noirq(&(v->runq_lock)); + if (!v->runq_len) + v->status = CPU_STATUS_IDLE; + __sync_fetch_and_sub(&v->runq_reserved, 1); + ihk_mc_spinlock_unlock_noirq(&(v->runq_lock)); + ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate); } void check_need_resched(void) @@ -3381,6 +3422,9 @@ int __sched_wakeup_thread(struct thread *thread, mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); xchg4((int *)(&thread->status), PS_RUNNING); status = 0; + + /* Make interrupt_exit() call schedule() */ + v->flags |= CPU_FLAG_NEED_RESCHED; } else { status = -EINVAL; @@ -3489,13 +3533,17 @@ void runq_add_thread(struct thread *thread, int cpu_id) { struct cpu_local_var *v = get_cpu_local_var(cpu_id); unsigned long irqstate; - - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); + irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock); + ihk_mc_spinlock_lock_noirq(&(v->runq_lock)); __runq_add_thread(thread, cpu_id); - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); + __sync_fetch_and_sub(&v->runq_reserved, 1); + ihk_mc_spinlock_unlock_noirq(&(v->runq_lock)); + ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate); procfs_create_thread(thread); + __sync_add_and_fetch(&thread->proc->clone_count, 1); + dkprintf("%s: clone_count is %d\n", __FUNCTION__, thread->proc->clone_count); rusage_num_threads_inc(); #ifdef RUSAGE_DEBUG if (rusage->num_threads == 1) { @@ -3539,22 +3587,26 @@ void runq_del_thread(struct thread *thread, int cpu_id) } struct thread * -find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock) +find_thread(int pid, int tid) { struct thread *thread; struct thread_hash *thash = cpu_local_var(resource_set)->thread_hash; int hash = thread_hash(tid); + struct mcs_rwlock_node_irqsave lock; if(tid <= 0) return NULL; - mcs_rwlock_reader_lock(&thash->lock[hash], lock); + mcs_rwlock_reader_lock(&thash->lock[hash], &lock); retry: list_for_each_entry(thread, &thash->list[hash], hash_list){ if(thread->tid == tid){ - if(pid <= 0) - return thread; - if(thread->proc->pid == pid) + if (pid <= 0 || + thread->proc->pid == pid) { + hold_thread(thread); + mcs_rwlock_reader_unlock(&thash->lock[hash], + &lock); return thread; + } } } /* If no thread with pid == tid was found, then we may be looking for a @@ -3564,20 +3616,16 @@ retry: pid = 0; goto retry; } - mcs_rwlock_reader_unlock(&thash->lock[hash], lock); + mcs_rwlock_reader_unlock(&thash->lock[hash], &lock); return NULL; } void -thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock) +thread_unlock(struct thread *thread) { - struct thread_hash *thash = cpu_local_var(resource_set)->thread_hash; - int hash; - if(!thread) return; - hash = thread_hash(thread->tid); - mcs_rwlock_reader_unlock(&thash->lock[hash], lock); + release_thread(thread); } struct process * @@ -3632,8 +3680,9 @@ debug_log(unsigned long arg) if (p == pid1) continue; found++; - kprintf("pid=%d ppid=%d status=%d\n", - p->pid, p->ppid_parent->pid, p->status); + kprintf("pid=%d ppid=%d status=%d ref=%d\n", + p->pid, p->ppid_parent->pid, p->status, + p->refcount.counter); } __mcs_rwlock_reader_unlock(&phash->lock[i], &lock); } @@ -3644,9 +3693,11 @@ debug_log(unsigned long arg) __mcs_rwlock_reader_lock(&thash->lock[i], &lock); list_for_each_entry(t, &thash->list[i], hash_list){ found++; - kprintf("cpu=%d pid=%d tid=%d status=%d offload=%d\n", - t->cpu_id, t->proc->pid, t->tid, - t->status, t->in_syscall_offload); + kprintf("cpu=%d pid=%d tid=%d status=%d " + "offload=%d ref=%d ptrace=%08x\n", + t->cpu_id, t->proc->pid, t->tid, + t->status, t->in_syscall_offload, + t->refcount.counter, t->ptrace); } __mcs_rwlock_reader_unlock(&thash->lock[i], &lock); } @@ -3677,3 +3728,47 @@ debug_log(unsigned long arg) break; } } + +int access_ok(struct process_vm *vm, int type, uintptr_t addr, size_t len) { + struct vm_range *range, *next; + int first = true; + + range = lookup_process_memory_range(vm, addr, addr + len); + + if (!range || range->start > addr) { + kprintf("%s: No VM range at 0x%llx, refusing access\n", + __FUNCTION__, addr); + return -EFAULT; + } + do { + if (first) { + first = false; + } else { + next = next_process_memory_range(vm, range); + if (!next) { + kprintf("%s: No VM range after 0x%llx, but checking until 0x%llx. Refusing access\n", + __FUNCTION__, range->end, addr + len); + return -EFAULT; + } + if (range->end != next->start) { + kprintf("%s: 0x%llx - 0x%llx and 0x%llx - 0x%llx are not adjacent (request was %0x%llx-0x%llx %zu)\n", + __FUNCTION__, range->start, range->end, + next->start, next->end, + addr, addr+len, len); + return -EFAULT; + } + range = next; + } + + if ((type == VERIFY_WRITE && !(range->flag & VR_PROT_WRITE)) || + (type == VERIFY_READ && !(range->flag & VR_PROT_READ))) { + kprintf("%s: 0x%llx - 0x%llx does not have prot %s (request was %0x%llx-0x%llx %zu)\n", + __FUNCTION__, range->start, range->end, + type == VERIFY_WRITE ? "write" : "ready", + addr, addr+len, len); + return -EACCES; + } + } while (addr + len > range->end); + + return 0; +} diff --git a/kernel/procfs.c b/kernel/procfs.c index 071f13f3..f8f53916 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -40,11 +40,85 @@ extern int sprintf(char * buf, const char *fmt, ...); extern int sscanf(const char * buf, const char * fmt, ...); extern int scnprintf(char * buf, size_t size, const char *fmt, ...); +struct mckernel_procfs_buffer { + unsigned long next_pa; + unsigned long pos; + unsigned long size; + char buf[0]; +}; + +#define PA_NULL (-1L) + +static struct mckernel_procfs_buffer *buf_alloc(unsigned long *phys, long pos) +{ + struct mckernel_procfs_buffer *buf; + + buf = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); + if (!buf) + return NULL; + buf->next_pa = PA_NULL; + buf->pos = pos; + buf->size = 0; + if (phys) + *phys = virt_to_phys(buf); + return buf; +} + +static void buf_free(unsigned long phys) +{ + struct mckernel_procfs_buffer *pbuf; + unsigned long next; + + while (phys != PA_NULL) { + pbuf = phys_to_virt(phys); + next = pbuf->next_pa; + ihk_mc_free_pages(pbuf, 1); + phys = next; + } +} + +static int buf_add(struct mckernel_procfs_buffer **top, + struct mckernel_procfs_buffer **cur, + const void *buf, int l) +{ + int pos = 0; + int r; + int bufmax = PAGE_SIZE - sizeof(struct mckernel_procfs_buffer); + const char *chr = buf; + + if (!*top) { + *top = *cur = buf_alloc(NULL, 0); + if (!*top) + return -ENOMEM; + } + while (l) { + r = bufmax - (*cur)->size; + if (!r) { + *cur = buf_alloc(&(*cur)->next_pa, (*cur)->pos + + bufmax); + if (!*cur) { + buf_free(virt_to_phys(*top)); + return -ENOMEM; + } + r = bufmax; + } + if (r > l) { + r = l; + } + memcpy((*cur)->buf + (*cur)->size, chr + pos, r); + l -= r; + pos += r; + (*cur)->size += r; + } + return 0; +} + static void procfs_thread_ctl(struct thread *thread, int msg) { struct ihk_ikc_channel_desc *syscall_channel; struct ikc_scd_packet packet; + int done = 0; syscall_channel = cpu_local_var(ikc2linux); memset(&packet, '\0', sizeof packet); @@ -53,9 +127,14 @@ procfs_thread_ctl(struct thread *thread, int msg) packet.osnum = ihk_mc_get_osnum(); packet.ref = thread->cpu_id; packet.pid = thread->proc->pid; + packet.resp_pa = virt_to_phys(&done); packet.err = 0; ihk_ikc_send(syscall_channel, &packet, 0); + if (msg == SCD_MSG_PROCFS_TID_CREATE) { + while (!done) + cpu_pause(); + } } void @@ -75,7 +154,7 @@ procfs_delete_thread(struct thread *thread) * * \param rarg returned argument */ -void process_procfs_request(struct ikc_scd_packet *rpacket) +int process_procfs_request(struct ikc_scd_packet *rpacket) { unsigned long rarg = rpacket->arg; unsigned long parg, pbuf; @@ -83,49 +162,76 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) struct process *proc = NULL; struct process_vm *vm = NULL; struct procfs_read *r; - struct ikc_scd_packet packet; int osnum = ihk_mc_get_osnum(); int rosnum, ret, pid, tid, ans = -EIO, eof = 0; - char *buf, *p; - struct ihk_ikc_channel_desc *syscall_channel; + char *buf, *p = NULL; + char *vbuf = NULL; + char *tmp = NULL; struct mcs_rwlock_node_irqsave lock; unsigned long offset; int count; int npages; int readwrite = 0; + int err = -EIO; + struct mckernel_procfs_buffer *buf_top = NULL; + struct mckernel_procfs_buffer *buf_cur = NULL; dprintf("process_procfs_request: invoked.\n"); - syscall_channel = get_cpu_local_var(0)->ikc2linux; - dprintf("rarg: %x\n", rarg); parg = ihk_mc_map_memory(NULL, rarg, sizeof(struct procfs_read)); dprintf("parg: %x\n", parg); r = ihk_mc_map_virtual(parg, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); if (r == NULL) { + ihk_mc_unmap_memory(NULL, parg, sizeof(struct procfs_read)); kprintf("ERROR: process_procfs_request: got a null procfs_read structure.\n"); - packet.err = -EIO; - goto dataunavail; + goto err; } dprintf("r: %p\n", r); - dprintf("remote pbuf: %x\n", r->pbuf); - pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count); - dprintf("pbuf: %x\n", pbuf); - count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1)); - npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE; - buf = ihk_mc_map_virtual(pbuf, npages, PTATTR_WRITABLE | PTATTR_ACTIVE); - dprintf("buf: %p\n", buf); - if (buf == NULL) { - kprintf("ERROR: process_procfs_request: got a null buffer.\n"); - packet.err = -EIO; - goto bufunavail; + if (rpacket->msg == SCD_MSG_PROCFS_RELEASE) { + struct mckernel_procfs_buffer *pbuf; + unsigned long phys; + unsigned long next; + + for (phys = r->pbuf; phys != PA_NULL; phys = next) { + pbuf = phys_to_virt(phys); + next = pbuf->next_pa; + ihk_mc_free_pages(pbuf, 1); + } + r->ret = 0; + err = 0; + goto err; } - readwrite = r->readwrite; - count = r->count; + if (r->pbuf == PA_NULL) { + tmp = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); + if (!tmp) + goto err; + buf = tmp; + count = PAGE_SIZE; + } + else { + dprintf("remote pbuf: %x\n", r->pbuf); + pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count); + dprintf("pbuf: %x\n", pbuf); + count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1)); + npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE; + vbuf = ihk_mc_map_virtual(pbuf, npages, + PTATTR_WRITABLE|PTATTR_ACTIVE); + dprintf("buf: %p\n", vbuf); + if (vbuf == NULL) { + ihk_mc_unmap_memory(NULL, pbuf, r->count); + kprintf("ERROR: %s: got a null buffer.\n", __func__); + goto err; + } + buf = vbuf; + readwrite = r->readwrite; + count = r->count; + dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, + r->offset, r->count); + } offset = r->offset; - dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count); /* * check for "mcos%d/" @@ -205,35 +311,24 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) } else if (!strcmp(p, "stat")) { /* "/proc/stat" */ extern int num_processors; /* kernel/ap.c */ - char *p; - size_t remain; int cpu; - if (offset > 0) { - ans = 0; - eof = 1; - goto end; - } - p = buf; - remain = count; for (cpu = 0; cpu < num_processors; ++cpu) { - size_t n; - - n = snprintf(p, remain, "cpu%d\n", cpu); - if (n >= remain) { - ans = -ENOSPC; - eof = 1; - goto end; - } - p += n; + ans = snprintf(buf, count, "cpu%d\n", cpu); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) + goto err; } - ans = p - buf; - eof = 1; + ans = 0; goto end; } #ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ else if (!strcmp(p, "cpuinfo")) { /* "/proc/cpuinfo" */ - ans = ihk_mc_show_cpuinfo(buf, count, offset, &eof); + ans = ihk_mc_show_cpuinfo(buf, count, 0, &eof); + if (ans < 0) + goto err; + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) + goto err; + ans = 0; goto end; } #endif /* POSTK_DEBUG_ARCH_DEP_42 */ @@ -291,12 +386,7 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) goto end; } -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ - if (!is_mckernel_memory(pa)) { -#else - if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || - pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + if (!is_mckernel_memory(pa, pa + size)) { ans = -EIO; goto end; } @@ -318,110 +408,51 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) */ if (strcmp(p, "maps") == 0) { struct vm_range *range; -#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ - int left = PAGE_SIZE * 2; -#else /* POSTK_DEBUG_TEMP_FIX_47 */ - int left = r->count - 1; /* extra 1 for terminating NULL */ -#endif /* POSTK_DEBUG_TEMP_FIX_47 */ - int written = 0; - char *_buf = buf; -#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ - int len = 0; - char *tmp = NULL; - - _buf = tmp = kmalloc(left, IHK_MC_AP_CRITICAL); - if (!tmp) { - kprintf("%s: error allocating /proc/self/maps buffer\n", - __FUNCTION__); - ans = 0; - goto end; - } -#endif /* POSTK_DEBUG_TEMP_FIX_47 */ - -#ifndef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ - /* Starting from the middle of a proc file is not supported for maps */ - if (offset > 0) { - ans = 0; - eof = 1; - goto end; - } -#endif /* POSTK_DEBUG_TEMP_FIX_47 */ ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); range = lookup_process_memory_range(vm, 0, -1); while (range) { - int written_now; - /* format is (from man proc): * address perms offset dev inode pathname * 08048000-08056000 r-xp 00000000 03:0c 64593 /usr/sbin/gpm */ - written_now = snprintf(_buf, left, - "%012lx-%012lx %s%s%s%s %lx %lx:%lx %d\t\t\t%s\n", - range->start, range->end, - range->flag & VR_PROT_READ ? "r" : "-", - range->flag & VR_PROT_WRITE ? "w" : "-", - range->flag & VR_PROT_EXEC ? "x" : "-", - range->flag & VR_PRIVATE ? "p" : "s", - /* TODO: fill in file details! */ - 0UL, - 0UL, - 0UL, - 0, - range->memobj && range->memobj->path ? range->memobj->path : - range->start == - (unsigned long)vm->vdso_addr ? "[vdso]" : - range->start == - (unsigned long)vm->vvar_addr ? "[vsyscall]" : - range->flag & VR_STACK ? "[stack]" : - range->start >= vm->region.brk_start && - range->end <= vm->region.brk_end_allocated ? - "[heap]" : + ans = snprintf(buf, count, + "%012lx-%012lx %s%s%s%s %lx %lx:%lx %d\t\t\t%s\n", + range->start, range->end, + range->flag & VR_PROT_READ ? "r" : "-", + range->flag & VR_PROT_WRITE ? "w" : "-", + range->flag & VR_PROT_EXEC ? "x" : "-", + range->flag & VR_PRIVATE ? "p" : "s", + /* TODO: fill in file details! */ + 0UL, + 0UL, + 0UL, + 0, + range->memobj && range->memobj->path ? + range->memobj->path : + range->start == (unsigned long)vm->vdso_addr ? + "[vdso]" : + range->start == (unsigned long)vm->vvar_addr ? + "[vsyscall]" : + range->flag & VR_STACK ? + "[stack]" : + range->start >= vm->region.brk_start && + range->end <= vm->region.brk_end_allocated ? + "[heap]" : "" - ); - - left -= written_now; - _buf += written_now; - written += written_now; + ); -#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ - if (left == 0) { - kprintf("%s(): WARNING: buffer too small to fill proc/maps\n", - __FUNCTION__); - break; + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) { + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + goto err; } -#else /* POSTK_DEBUG_TEMP_FIX_47 */ - if (left == 1) { - kprintf("%s(): WARNING: buffer too small to fill proc/maps\n", - __FUNCTION__); - break; - } -#endif /* POSTK_DEBUG_TEMP_FIX_47 */ range = next_process_memory_range(vm, range); } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); -#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ - len = strlen(tmp); - if (r->offset < len) { - if (r->offset + r->count < len) { - ans = r->count; - } else { - eof = 1; - ans = len; - } - strncpy(buf, tmp + r->offset, ans); - } else if (r->offset == len) { - ans = 0; - eof = 1; - } - kfree(tmp); -#else /* POSTK_DEBUG_TEMP_FIX_47 */ - ans = written; - eof = 1; -#endif /* POSTK_DEBUG_TEMP_FIX_47 */ + ans = 0; goto end; } @@ -470,28 +501,16 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) extern int num_processors; /* kernel/ap.c */ struct vm_range *range; unsigned long lockedsize = 0; - char *tmp; char *bitmasks; int bitmasks_offset = 0; char *cpu_bitmask, *cpu_list, *numa_bitmask, *numa_list; - int len; char *state; - tmp = kmalloc(8192, IHK_MC_AP_CRITICAL); - if (!tmp) { - kprintf("%s: error allocating /proc/self/status buffer\n", - __FUNCTION__); - ans = 0; - goto end; - } - bitmasks = kmalloc(BITMASKS_BUF_SIZE, IHK_MC_AP_CRITICAL); - if (!tmp) { + if (!bitmasks) { kprintf("%s: error allocating /proc/self/status bitmaks buffer\n", __FUNCTION__); - kfree(tmp); - ans = 0; - goto end; + goto err; } ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); @@ -534,35 +553,46 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) state = "T (tracing stop)"; else if (proc->status == PS_EXITED) state = "Z (zombie)"; - sprintf(tmp, - "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n" + ans = snprintf(buf, count, + "Pid:\t%d\n" + "Uid:\t%d\t%d\t%d\t%d\n" + "Gid:\t%d\t%d\t%d\t%d\n" "State:\t%s\n" - "VmLck:\t%9lu kB\n" - "Cpus_allowed:\t%s\n" - "Cpus_allowed_list:\t%s\n" - "Mems_allowed:\t%s\n" - "Mems_allowed_list:\t%s\n", - proc->ruid, proc->euid, proc->suid, proc->fsuid, - proc->rgid, proc->egid, proc->sgid, proc->fsgid, + "VmLck:\t%9lu kB\n", + proc->pid, + proc->ruid, proc->euid, proc->suid, proc->fsuid, + proc->rgid, proc->egid, proc->sgid, proc->fsgid, state, - (lockedsize + 1023) >> 10, - cpu_bitmask, cpu_list, numa_bitmask, numa_list); - len = strlen(tmp); - if (r->offset < len) { - if (r->offset + r->count < len) { - ans = r->count; - } else { - eof = 1; - ans = len; - } - strncpy(buf, tmp + r->offset, ans); - } else if (r->offset == len) { - ans = 0; - eof = 1; + (lockedsize + 1023) >> 10); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) { + goto err; + } + + ans = snprintf(buf, count, "Cpus_allowed:\t%s\n", cpu_bitmask); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) { + kfree(bitmasks); + goto err; + } + ans = snprintf(buf, count, "Cpus_allowed_list:\t%s\n", + cpu_list); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) { + kfree(bitmasks); + goto err; + } + ans = snprintf(buf, count, "Mems_allowed:\t%s\n", + numa_bitmask); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) { + kfree(bitmasks); + goto err; + } + ans = snprintf(buf, count, "Mems_allowed_list:\t%s\n", + numa_list); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) { + kfree(bitmasks); + goto err; } - kfree(tmp); kfree(bitmasks); + ans = 0; goto end; } @@ -571,20 +601,10 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) */ if (strcmp(p, "auxv") == 0) { unsigned int limit = AUXV_LEN * sizeof(unsigned long); - unsigned int len = r->count; - if (r->offset < limit) { - if (limit < r->offset + r->count) { - len = limit - r->offset; - } - memcpy((void *)buf, ((char *) proc->saved_auxv) + r->offset, len); - ans = len; - if (r->offset + len == limit) { - eof = 1; - } - } else if (r->offset == limit) { - ans = 0; - eof = 1; - } + + if (buf_add(&buf_top, &buf_cur, proc->saved_auxv, limit) < 0) + goto err; + ans = 0; goto end; } @@ -593,27 +613,17 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) */ if (strcmp(p, "cmdline") == 0) { unsigned int limit = proc->saved_cmdline_len; - unsigned int len = r->count; if(!proc->saved_cmdline){ + if (buf_add(&buf_top, &buf_cur, "", 0) < 0) + goto err; ans = 0; - eof = 1; goto end; } - if (r->offset < limit) { - if (limit < r->offset + r->count) { - len = limit - r->offset; - } - memcpy((void *)buf, ((char *) proc->saved_cmdline) + r->offset, len); - ans = len; - if (r->offset + len == limit) { - eof = 1; - } - } else if (r->offset == limit) { - ans = 0; - eof = 1; - } + if (buf_add(&buf_top, &buf_cur, proc->saved_cmdline, limit) < 0) + goto err; + ans = 0; goto end; } @@ -624,9 +634,25 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) * of the process. The count is the length of the area. */ + if (!strcmp(p, "comm")) { + const char *comm = "exe"; + + if (proc->saved_cmdline) { + comm = strrchr(proc->saved_cmdline, '/'); + if (comm) + comm++; + else + comm = proc->saved_cmdline; + } + + ans = snprintf(buf, count, "%s\n", comm); + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) + goto err; + ans = 0; + goto end; + } + if (!strcmp(p, "stat")) { - char tmp[1024]; - int len; /* * pid (comm) state ppid @@ -641,7 +667,7 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) * cnswap exit_signal processor rt_priority * policy delayacct_blkio_ticks guest_time cguest_time */ - ans = sprintf(tmp, + ans = snprintf(buf, count, "%d (%s) %c %d " // pid... "%d %d %d %d " // pgrp... "%u %lu %lu %lu " // flags... @@ -665,21 +691,10 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) 0L, 0, thread->cpu_id, 0, // cnswap... 0, 0LL, 0L, 0L // policy... ); - dprintf("tmp=%s\n", tmp); - len = strlen(tmp); - if (r->offset < len) { - if (r->offset + r->count < len) { - ans = r->count; - } else { - eof = 1; - ans = len; - } - strncpy(buf, tmp + r->offset, ans); - } else if (r->offset == len) { - ans = 0; - eof = 1; - } + if (buf_add(&buf_top, &buf_cur, buf, ans) < 0) + goto err; + ans = 0; goto end; } @@ -689,32 +704,31 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) kprintf("unsupported procfs entry: %d/%s\n", pid, p); end: - ihk_mc_unmap_virtual(buf, npages, 0); dprintf("ret: %d, eof: %d\n", ans, eof); r->ret = ans; r->eof = eof; - r->status = 1; /* done */ - packet.err = 0; -bufunavail: - ihk_mc_unmap_memory(NULL, pbuf, r->count); - ihk_mc_unmap_virtual(r, 1, 0); -dataunavail: - ihk_mc_unmap_memory(NULL, parg, sizeof(struct procfs_read)); - - packet.msg = SCD_MSG_PROCFS_ANSWER; - packet.arg = rarg; - packet.pid = rpacket->pid; - - ret = ihk_ikc_send(syscall_channel, &packet, 0); - if (ret < 0) { - kprintf("ERROR: sending IKC msg, ret: %d\n", ret); + err = 0; + if (r->pbuf == PA_NULL && buf_top) + r->pbuf = virt_to_phys(buf_top); +err: + if (vbuf) { + ihk_mc_unmap_virtual(vbuf, npages); + ihk_mc_unmap_memory(NULL, pbuf, r->count); } + if (r) { + ihk_mc_unmap_virtual(r, 1); + ihk_mc_unmap_memory(NULL, parg, sizeof(struct procfs_read)); + } + if (tmp) { + ihk_mc_free_pages(tmp, 1); + } + if(proc) release_process(proc); if(thread) release_thread(thread); if(vm) release_process_vm(vm); - return; + return err; } diff --git a/kernel/profile.c b/kernel/profile.c index d434ad7d..4f3e66fb 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -39,6 +39,7 @@ #include #include #include +#include extern char *syscall_name[]; @@ -47,11 +48,8 @@ extern char *syscall_name[]; //#define DEBUG_PRINT_PROFILE #ifdef DEBUG_PRINT_PROFILE -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif @@ -72,7 +70,7 @@ char *profile_event_names[] = "" }; -mcs_lock_node_t job_profile_lock = {0, NULL}; +mcs_lock_node_t job_profile_lock = { 0 }; struct profile_event *job_profile_events = NULL; int job_nr_processes = -1; int job_nr_processes_left = -1; @@ -445,7 +443,7 @@ void profile_dealloc_proc_events(struct process *proc) kfree(proc->profile_events); } -void static profile_clear_process(struct process *proc) +static void profile_clear_process(struct process *proc) { proc->profile_elapsed_ts = 0; if (!proc->profile_events) return; @@ -454,7 +452,7 @@ void static profile_clear_process(struct process *proc) sizeof(*proc->profile_events) * PROFILE_EVENT_MAX); } -void static profile_clear_thread(struct thread *thread) +static void profile_clear_thread(struct thread *thread) { thread->profile_start_ts = 0; thread->profile_elapsed_ts = 0; diff --git a/kernel/shmobj.c b/kernel/shmobj.c index 473aaadb..1b3a7d77 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -23,23 +23,19 @@ #include #include #include +#include -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) -#define fkprintf(...) kprintf(__VA_ARGS__) static LIST_HEAD(shmobj_list_head); static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED; -static memobj_release_func_t shmobj_release; -static memobj_ref_func_t shmobj_ref; +static memobj_free_func_t shmobj_free; static memobj_get_page_func_t shmobj_get_page; static memobj_invalidate_page_func_t shmobj_invalidate_page; static memobj_lookup_page_func_t shmobj_lookup_page; static struct memobj_ops shmobj_ops = { - .release = &shmobj_release, - .ref = &shmobj_ref, + .free = &shmobj_free, .get_page = &shmobj_get_page, .invalidate_page = &shmobj_invalidate_page, .lookup_page = &shmobj_lookup_page, @@ -61,9 +57,20 @@ static struct memobj *to_memobj(struct shmobj *shmobj) static void page_list_init(struct shmobj *obj) { INIT_LIST_HEAD(&obj->page_list); + ihk_mc_spinlock_init(&obj->page_list_lock); return; } +static void page_list_lock(struct shmobj *obj) +{ + ihk_mc_spinlock_lock_noirq(&obj->page_list_lock); +} + +static void page_list_unlock(struct shmobj *obj) +{ + ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); +} + static void page_list_insert(struct shmobj *obj, struct page *page) { list_add(&page->list, &obj->page_list); @@ -182,15 +189,14 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp) obj->memobj.ops = &shmobj_ops; obj->memobj.flags = MF_SHM; obj->memobj.size = ds->shm_segsz; + ihk_atomic_set(&obj->memobj.refcnt, 1); obj->ds = *ds; obj->ds.shm_perm.seq = the_seq++; - obj->ds.shm_nattch = 1; obj->ds.init_pgshift = 0; obj->index = -1; obj->pgshift = pgshift; obj->real_segsz = (obj->ds.shm_segsz + pgsize - 1) & ~(pgsize - 1); page_list_init(obj); - ihk_mc_spinlock_init(&obj->memobj.lock); error = 0; *objp = to_memobj(obj); @@ -218,7 +224,7 @@ int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp) return error; } -void shmobj_destroy(struct shmobj *obj) +static void shmobj_destroy(struct shmobj *obj) { extern struct shm_info the_shm_info; extern struct list_head kds_free_list; @@ -246,6 +252,7 @@ void shmobj_destroy(struct shmobj *obj) void *page_va; uintptr_t phys; + /* no lock required as obj is inaccessible */ page = page_list_first(obj); if (!page) { break; @@ -273,7 +280,7 @@ void shmobj_destroy(struct shmobj *obj) page->mode, page->count); count = ihk_atomic_sub_return(1, &page->count); if (!((page->mode == PM_MAPPED) && (count == 0))) { - fkprintf("shmobj_destroy(%p): " + ekprintf("shmobj_destroy(%p): " "page %p phys %#lx mode %#x" " count %d off %#lx\n", obj, page, @@ -295,6 +302,7 @@ void shmobj_destroy(struct shmobj *obj) --the_shm_info.used_ids; list_add(&obj->chain, &kds_free_list); + /* For index reuse, release in descending order of index. */ for (;;) { struct shmobj *p; @@ -315,61 +323,22 @@ void shmobj_destroy(struct shmobj *obj) return; } -static void shmobj_release(struct memobj *memobj) +static void shmobj_free(struct memobj *memobj) { struct shmobj *obj = to_shmobj(memobj); - struct thread *thread = cpu_local_var(current); - struct process *proc = thread->proc; - struct shmobj *freeobj = NULL; - long newref; extern time_t time(void); - dkprintf("shmobj_release(%p)\n", memobj); - memobj_lock(&obj->memobj); - if (obj->index >= 0) { - obj->ds.shm_dtime = time(); - obj->ds.shm_lpid = proc->pid; - dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch); - } - newref = --obj->ds.shm_nattch; - if (newref <= 0) { - if (newref < 0) { - fkprintf("shmobj_release(%p):ref %ld\n", - memobj, newref); - panic("shmobj_release:freeing free shmobj"); - } - if (obj->ds.shm_perm.mode & SHM_DEST) { - freeobj = obj; - } - } - memobj_unlock(&obj->memobj); + dkprintf("%s(%p)\n", __func__, memobj); - if (freeobj) { - shmobj_list_lock(); - shmobj_destroy(freeobj); - shmobj_list_unlock(); + shmobj_list_lock(); + if (!(obj->ds.shm_perm.mode & SHM_DEST)) { + ekprintf("%s called without going through rmid?", __func__); } - dkprintf("shmobj_release(%p): %ld\n", memobj, newref); - return; -} -static void shmobj_ref(struct memobj *memobj) -{ - struct shmobj *obj = to_shmobj(memobj); - struct thread *thread = cpu_local_var(current); - struct process *proc = thread->proc; - long newref; - extern time_t time(void); + shmobj_destroy(obj); + shmobj_list_unlock(); - dkprintf("shmobj_ref(%p)\n", memobj); - memobj_lock(&obj->memobj); - newref = ++obj->ds.shm_nattch; - if (obj->index >= 0) { - obj->ds.shm_atime = time(); - obj->ds.shm_lpid = proc->pid; - } - memobj_unlock(&obj->memobj); - dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref); + dkprintf("%s(%p)\n", __func__, memobj); return; } @@ -385,7 +354,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n", memobj, off, p2align, physp); - memobj_lock(&obj->memobj); + memobj_ref(memobj); if (off & ~PAGE_MASK) { error = -EINVAL; ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n", @@ -411,12 +380,14 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, goto out; } + page_list_lock(obj); page = page_list_lookup(obj, off); if (!page) { npages = 1 << p2align; virt = ihk_mc_alloc_aligned_pages_user(npages, p2align, IHK_MC_AP_NOWAIT, virt_addr); if (!virt) { + page_list_unlock(obj); error = -ENOMEM; ekprintf("shmobj_get_page(%p,%#lx,%d,%p):" "alloc failed. %d\n", @@ -429,7 +400,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, Add when setting the PTE for a page with count of one in ihk_mc_pt_set_range(). */ if (page->mode != PM_NONE) { - fkprintf("shmobj_get_page(%p,%#lx,%d,%p):" + ekprintf("shmobj_get_page(%p,%#lx,%d,%p):" "page %p %#lx %d %d %#lx\n", memobj, off, p2align, physp, page, page_to_phys(page), page->mode, @@ -446,6 +417,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n", memobj, off, p2align, physp, page, phys); } + page_list_unlock(obj); ihk_atomic_inc(&page->count); @@ -453,7 +425,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, *physp = page_to_phys(page); out: - memobj_unlock(&obj->memobj); + memobj_unref(memobj); if (virt) { ihk_mc_free_pages_user(virt, npages); } @@ -471,11 +443,14 @@ static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys, dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize); + page_list_lock(obj); if (!(page = phys_to_page(phys)) || !(page = page_list_lookup(obj, page->offset))) { + page_list_unlock(obj); error = 0; goto out; } + page_list_unlock(obj); if (ihk_atomic_read(&page->count) == 1) { if (page_unmap(page)) { @@ -504,7 +479,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align, dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p)\n", memobj, off, p2align, physp); - memobj_lock(&obj->memobj); + memobj_ref(&obj->memobj); if (off & ~PAGE_MASK) { error = -EINVAL; ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):invalid argument. %d\n", @@ -530,7 +505,9 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align, goto out; } + page_list_lock(obj); page = page_list_lookup(obj, off); + page_list_unlock(obj); if (!page) { error = -ENOENT; dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p):page not found. %d\n", @@ -545,7 +522,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align, } out: - memobj_unlock(&obj->memobj); + memobj_unref(&obj->memobj); dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p):%d %#lx\n", memobj, off, p2align, physp, error, phys); return error; diff --git a/kernel/syscall.c b/kernel/syscall.c index c243fd25..36452d4d 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -59,9 +59,8 @@ #include #include #include -#ifdef POSTK_DEBUG_ARCH_DEP_27 -#include -#endif /* POSTK_DEBUG_ARCH_DEP_27 */ +#include +#include "../executer/include/uti.h" /* Headers taken from kitten LWK */ #include @@ -72,11 +71,15 @@ //#define DEBUG_PRINT_SC #ifdef DEBUG_PRINT_SC -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT +#endif + +//#define DEBUG_UTI +#ifdef DEBUG_UTI +#define uti_dkprintf(...) do { ((uti_clv && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0) #else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) +#define uti_dkprintf(...) do { } while (0) #endif //static ihk_atomic_t pid_cnt = IHK_ATOMIC_INIT(1024); @@ -110,12 +113,9 @@ char *syscall_name[] MCKERNEL_UNUSED = { }; static ihk_spinlock_t tod_data_lock = SPIN_LOCK_UNLOCKED; -static void calculate_time_from_tsc(struct timespec *ts); +static unsigned long uti_desc; /* Address of struct uti_desc object in syscall_intercept.c */ -void check_signal(unsigned long, void *, int); void save_syscall_return_value(int num, unsigned long rc); -void do_signal(long rc, void *regs, struct thread *thread, struct sig_pending *pending, int num); -extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern long alloc_debugreg(struct thread *thread); extern int num_processors; extern unsigned long ihk_mc_get_ns_per_tsc(void); @@ -125,7 +125,6 @@ extern void free_all_process_memory_range(struct process_vm *vm); extern int arch_clear_host_user_space(); extern long arch_ptrace(long request, int pid, long addr, long data); extern struct cpu_local_var *clv; -extern void sync_child_event(struct mc_perf_event *event); int prepare_process_ranges_args_envs(struct thread *thread, struct program_load_desc *pn, @@ -138,24 +137,24 @@ int prepare_process_ranges_args_envs(struct thread *thread, static void do_mod_exit(int status); #endif -static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res) +/* Size of tid table. It needs to be more than #CPUs when CPU + * oversubscription is needed. The examples of CPU oversubscription are: + * (1) pmi_proxy + gdb + #CPU OMP threads + * (2) pmi_proxy + #CPU OMP threads + POSIX AIO IO + POSIX AIO notification + */ +#define NR_TIDS (allow_oversubscribe ? (num_processors * 2) : num_processors) + +long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout); +int (*linux_printk)(const char *fmt, ...); +int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp); + +static void send_syscall(struct syscall_request *req, int cpu, + struct syscall_response *res) { struct ikc_scd_packet packet IHK_DMA_ALIGN; struct ihk_ikc_channel_desc *syscall_channel = get_cpu_local_var(cpu)->ikc2linux; int ret; - if(req->number == __NR_exit_group || - req->number == __NR_kill){ // interrupt syscall -#ifndef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ - if (req->number == __NR_kill) { - req->rtid = -1; // no response - pid = req->args[0]; - } - if (req->number == __NR_gettid) - pid = req->args[1]; -#endif /* !POSTK_DEBUG_TEMP_FIX_26 */ - } - res->status = 0; req->valid = 0; @@ -167,11 +166,7 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid, struct s #ifdef SYSCALL_BY_IKC packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.ref = cpu; -#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ - packet.pid = pid; -#else /* POSTK_DEBUG_TEMP_FIX_26 */ - packet.pid = pid ? pid : cpu_local_var(current)->proc->pid; -#endif /* POSTK_DEBUG_TEMP_FIX_26 */ + packet.pid = cpu_local_var(current)->proc->pid; packet.resp_pa = virt_to_phys(res); dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); @@ -182,7 +177,7 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid, struct s #endif } -long do_syscall(struct syscall_request *req, int cpu, int pid) +long do_syscall(struct syscall_request *req, int cpu) { struct syscall_response res; struct syscall_request req2 IHK_DMA_ALIGN; @@ -201,9 +196,6 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) t_s = rdtsc(); } #endif // PROFILE_ENABLE -#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ - int target_pid = pid; -#endif /* POSTK_DEBUG_TEMP_FIX_26 */ dkprintf("SC(%d)[%3d] sending syscall\n", ihk_mc_get_processor_id(), @@ -214,65 +206,30 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) barrier(); -#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ - switch (req->number) { - case __NR_kill: - req->rtid = -1; // no response - target_pid = req->args[0]; - break; - case __NR_gettid: - target_pid = req->args[1]; - break; - default: - break; - } - target_pid = target_pid ? target_pid : proc->pid; -#endif /* POSTK_DEBUG_TEMP_FIX_26 */ - if(req->number != __NR_exit_group){ -#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ -#ifdef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ - struct process *target_proc = NULL; - struct mcs_rwlock_node_irqsave lock; - - if (target_pid != proc->pid) { - target_proc = find_process(target_pid, &lock); - if (!target_proc) { - return -EPIPE; - } - process_unlock(target_proc, &lock); - } else { - target_proc = proc; - } - - if (target_proc->nohost) { // host is down + if (proc->nohost) {// host is down return -EPIPE; } -#else /* POSTK_DEBUG_TEMP_FIX_48 */ - if (proc->nohost && // host is down - target_pid == proc->pid) { - return -EPIPE; - } -#endif /* POSTK_DEBUG_TEMP_FIX_48 */ -#else /* POSTK_DEBUG_TEMP_FIX_26 */ - if(proc->nohost && // host is down - pid == proc->pid) { - return -EPIPE; - } -#endif /* POSTK_DEBUG_TEMP_FIX_26 */ ++thread->in_syscall_offload; } - /* The current thread is the requester and any thread from - * the pool may serve the request */ + /* The current thread is the requester */ req->rtid = cpu_local_var(current)->tid; - req->ttid = 0; + + if (req->number == __NR_sched_setaffinity && req->args[0] == 0) { + /* mcexec thread serving migrate-to-Linux request must have + the same tid as the requesting McKernel thread because the + serving thread jumps to hfi driver and then jumps to + rus_vm_fault() without registering it into per thread data + by mcctrl_add_per_thread_data()). */ + req->ttid = cpu_local_var(current)->tid/*0*/; + dkprintf("%s: uti, ttid=%d\n", __FUNCTION__, req->ttid); + } else { + /* Any thread from the pool may serve the request */ + req->ttid = 0; + } res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; -#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ - send_syscall(req, cpu, target_pid, &res); -#else /* POSTK_DEBUG_TEMP_FIX_26 */ - send_syscall(req, cpu, pid, &res); -#endif /* POSTK_DEBUG_TEMP_FIX_26 */ + send_syscall(req, cpu, &res); if (req->rtid == -1) { preempt_disable(); @@ -307,6 +264,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) v = get_this_cpu_local_var(); if (v->flags & CPU_FLAG_NEED_RESCHED || + v->runq_len > 1 || req->number == __NR_sched_setaffinity) { do_schedule = 1; } @@ -365,11 +323,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) req2.ttid = res.stid; res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; -#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ - send_syscall(&req2, cpu, target_pid, &res); -#else /* POSTK_DEBUG_TEMP_FIX_26 */ - send_syscall(&req2, cpu, pid, &res); -#endif /* POSTK_DEBUG_TEMP_FIX_26 */ + send_syscall(&req2, cpu, &res); #ifdef PROFILE_ENABLE profile_event_add(PROFILE_remote_page_fault, (rdtsc() - t_s)); @@ -384,13 +338,14 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) int ns; unsigned long syscall_ret; unsigned long phys; + struct syscall_request req2 IHK_DMA_ALIGN; /* debug */ phys = ihk_mc_map_memory(NULL, res.fault_address, sizeof(struct syscall_request)); requestp = ihk_mc_map_virtual(phys, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); memcpy(&request, requestp, sizeof request); - ihk_mc_unmap_virtual(requestp, 1, 1); + ihk_mc_unmap_virtual(requestp, 1); ihk_mc_unmap_memory(NULL, phys, sizeof(struct syscall_request)); num = request.number; @@ -433,8 +388,6 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) /* send result */ req2.number = __NR_mmap; -#define PAGER_RESUME_PAGE_FAULT 0x0101 - req2.args[0] = PAGER_RESUME_PAGE_FAULT; req2.args[1] = syscall_ret; /* The current thread is the requester and only the waiting thread * may serve the request */ @@ -442,7 +395,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) req2.ttid = res.stid; res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; - send_syscall(&req2, cpu, pid, &res); + send_syscall(&req2, cpu, &res); } } if (req->rtid == -1) { @@ -461,7 +414,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) /* -ERESTARTSYS indicates that the proxy process is gone * and the application should be terminated */ if (rc == -ERESTARTSYS) { - kprintf("%s: proxy PID %d is dead, terminate()\n", + dkprintf("%s: proxy PID %d is dead, terminate()\n", __FUNCTION__, thread->proc->pid); thread->proc->nohost = 1; } @@ -493,15 +446,13 @@ long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx) static int wait_zombie(struct thread *thread, struct process *child, int *status, int options) { int ret; struct syscall_request request IHK_DMA_ALIGN; - int ppid = 0; dkprintf("wait_zombie,found PS_ZOMBIE process: %d\n", child->pid); - if (status) { - *status = child->exit_status; - } + if (status) { + *status = child->group_exit_status; + } - ppid = child->ppid_parent->pid; if(child->ppid_parent->pid != thread->proc->pid || child->nowait) return child->pid; request.number = __NR_wait4; @@ -509,7 +460,7 @@ static int wait_zombie(struct thread *thread, struct process *child, int *status request.args[1] = 0; request.args[2] = options; /* Ask host to clean up exited child */ - ret = do_syscall(&request, ihk_mc_get_processor_id(), ppid); + ret = do_syscall(&request, ihk_mc_get_processor_id()); if (ret != child->pid) kprintf("WARNING: host waitpid failed?\n"); @@ -526,9 +477,17 @@ static int wait_stopped(struct thread *thread, struct process *child, struct thr int ret; /* Copy exit_status created in do_signal */ - int *exit_status = (child->status == PS_STOPPED || !c_thread) ? - &child->group_exit_status : - &c_thread->exit_status; + int *exit_status; + + if (c_thread) { + exit_status = &c_thread->exit_status; + } + else if (child->status & (PS_STOPPED | PS_DELAY_STOPPED)) { + exit_status = &child->group_exit_status; + } + else { + exit_status = &child->main_thread->exit_status; + } /* Skip this process because exit_status has been reaped. */ if (!*exit_status) { @@ -549,12 +508,14 @@ static int wait_stopped(struct thread *thread, struct process *child, struct thr dkprintf("wait_stopped,child->pid=%d,status=%08x\n", child->pid, status ? *status : -1); - ret = child->pid; + ret = c_thread ? c_thread->tid : child->pid; out: return ret; } -static int wait_continued(struct thread *thread, struct process *child, int *status, int options) { +static int wait_continued(struct thread *thread, struct process *child, + struct thread *c_thread, int *status, int options) +{ int ret; if (status) { @@ -563,40 +524,50 @@ static int wait_continued(struct thread *thread, struct process *child, int *sta /* Reap signal_flags */ if(!(options & WNOWAIT)) { - child->signal_flags &= ~SIGNAL_STOP_CONTINUED; + if (c_thread) + c_thread->signal_flags &= ~SIGNAL_STOP_CONTINUED; + else + child->main_thread->signal_flags &= + ~SIGNAL_STOP_CONTINUED; } dkprintf("wait4,SIGNAL_STOP_CONTINUED,pid=%d,status=%08x\n", child->pid, status ? *status : -1); - ret = child->pid; + ret = c_thread ? c_thread->tid : child->pid; return ret; } -struct thread *find_thread_of_process(struct process *child, int pid) -{ - int c_found = 0; - struct mcs_rwlock_node c_lock; - struct thread *c_thread = NULL; - - mcs_rwlock_reader_lock_noirq(&child->threads_lock, &c_lock); - list_for_each_entry(c_thread, &child->threads_list, siblings_list) { - if (c_thread->tid == pid) { - c_found = 1; - break; - } - } - mcs_rwlock_reader_unlock_noirq(&child->threads_lock, &c_lock); - if (!c_found) c_thread = NULL; - - return c_thread; -} - static void -set_process_rusage(struct process *proc, struct rusage *usage) +thread_exit_signal(struct thread *thread) { - ts_to_tv(&usage->ru_utime, &proc->utime); - ts_to_tv(&usage->ru_stime, &proc->stime); - usage->ru_maxrss = proc->maxrss / 1024; + int sig; + struct siginfo info; + int error; + struct timespec ats; + + if (thread->report_proc == NULL) { + return; + } + + if (thread->ptrace) + sig = SIGCHLD; + else + sig = thread->termsig; + memset(&info, '\0', sizeof(info)); + info.si_signo = sig; + info.si_code = (thread->exit_status & 0x7f) ? + ((thread->exit_status & 0x80) ? + CLD_DUMPED : CLD_KILLED) : CLD_EXITED; + info._sifields._sigchld.si_pid = thread->tid; + info._sifields._sigchld.si_status = thread->exit_status; + tsc_to_ts(thread->user_tsc, &ats); + info._sifields._sigchld.si_utime = timespec_to_jiffy(&ats); + tsc_to_ts(thread->system_tsc, &ats); + info._sifields._sigchld.si_stime = timespec_to_jiffy(&ats); + error = do_kill(NULL, thread->report_proc->pid, -1, sig, &info, 0); + dkprintf("terminate,klll %d,error=%d\n", sig, error); + /* Wake parent (if sleeping in wait4()) */ + waitq_wakeup(&thread->report_proc->waitpid_q); } static void @@ -604,7 +575,7 @@ finalize_process(struct process *proc) { struct resource_set *resource_set = cpu_local_var(resource_set); struct process *pid1 = resource_set->pid1; - int exit_status = proc->exit_status; + int exit_status = proc->group_exit_status; // Send signal to parent if (proc->parent == pid1) { @@ -641,185 +612,421 @@ finalize_process(struct process *proc) } } -/* +static void +ptrace_detach_thread(struct thread *thread, int data) +{ + struct resource_set *resource_set = cpu_local_var(resource_set); + struct process *pid1 = resource_set->pid1; + struct thread *mythread = cpu_local_var(current); + struct process *proc = mythread->proc; + struct process *report_proc = NULL; + struct mcs_rwlock_node_irqsave lock; + struct process *term_proc = NULL; + + if (thread == thread->proc->main_thread) { + struct process *tracee_proc = thread->proc; + struct process *parent = tracee_proc->ppid_parent; + + if (thread->proc->status == PS_ZOMBIE && + thread->proc->parent != parent) { + term_proc = thread->proc; + } + mcs_rwlock_reader_lock(&proc->children_lock, &lock); + + list_del(&tracee_proc->siblings_list); + mcs_rwlock_reader_unlock(&proc->children_lock, &lock); + + mcs_rwlock_reader_lock(&tracee_proc->children_lock, &lock); + list_del(&tracee_proc->ptraced_siblings_list); + list_add_tail(&tracee_proc->siblings_list, + &parent->children_list); + tracee_proc->parent = parent; + + mcs_rwlock_reader_unlock(&tracee_proc->children_lock, &lock); + } + if (thread->termsig && + thread->termsig != SIGCHLD && + thread->proc != pid1) { + report_proc = thread->proc; + } + thread->report_proc = report_proc; + mcs_rwlock_reader_lock(&proc->threads_lock, &lock); + list_del(&thread->report_siblings_list); + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + thread->ptrace = 0; + kfree(thread->ptrace_debugreg); + thread->ptrace_debugreg = NULL; + + clear_single_step(thread); + if (report_proc) { + mcs_rwlock_reader_lock(&report_proc->threads_lock, &lock); + list_add_tail(&thread->report_siblings_list, + &report_proc->report_threads_list); + mcs_rwlock_reader_unlock(&report_proc->threads_lock, &lock); + if (thread->status == PS_EXITED || + thread->status == PS_ZOMBIE) { + /* + * Traced thread reports to the original parent with + * the termination signal in addition to the report + * to the tracer. + */ + thread_exit_signal(thread); + } + } + + if (data) { + struct siginfo info; + + memset(&info, '\0', sizeof(info)); + info.si_signo = data; + info.si_code = SI_USER; + info._sifields._kill.si_pid = proc->pid; + do_kill(mythread, thread->proc->pid, thread->tid, + data, &info, 1); + } + sched_wakeup_thread(thread, PS_TRACED | PS_STOPPED); + release_thread(thread); + if (term_proc) { + finalize_process(term_proc); + } +} + +static void +set_process_rusage(struct process *proc, struct rusage *usage) +{ + ts_to_tv(&usage->ru_utime, &proc->utime); + ts_to_tv(&usage->ru_stime, &proc->stime); + usage->ru_maxrss = proc->maxrss / 1024; +} + +static int +wait_proc(int pid, int *status, int options, void *rusage, int *empty) +{ + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct process *child, *next; + int pgid = proc->pgid; + int ret = 0; + struct mcs_rwlock_node lock; + struct mcs_rwlock_node child_lock; + struct thread *c_thread = NULL; + + mcs_rwlock_writer_lock_noirq(&proc->children_lock, &lock); + list_for_each_entry_safe(child, next, &proc->children_list, + siblings_list) { + /* + * Find thread with pid == tid, this will be either the main + * thread or the one we are looking for specifically when + * __WCLONE is passed + */ + if ((pid >= 0 || -pid != child->pgid) && + pid != -1 && + (pid != 0 || pgid != child->pgid) && + (pid <= 0 || pid != child->pid)) + continue; + + *empty = 0; + + if ((options & WEXITED) && + child->status == PS_ZOMBIE) { + ret = wait_zombie(thread, child, status, options); + if (!(options & WNOWAIT) && + child->parent == child->ppid_parent) { + struct mcs_rwlock_node updatelock; + struct mcs_rwlock_node childlock; + struct process *pid1; + + pid1 = cpu_local_var(resource_set)->pid1; + + mcs_rwlock_writer_lock_noirq(&proc->update_lock, + &updatelock); + ts_add(&proc->stime_children, &child->stime); + ts_add(&proc->utime_children, &child->utime); + ts_add(&proc->stime_children, + &child->stime_children); + ts_add(&proc->utime_children, + &child->utime_children); + if (child->maxrss > proc->maxrss_children) + proc->maxrss_children = child->maxrss; + if (child->maxrss_children > + proc->maxrss_children) + proc->maxrss_children = + child->maxrss_children; + set_process_rusage(child, rusage); + mcs_rwlock_writer_unlock_noirq( + &proc->update_lock, &updatelock); + list_del(&child->siblings_list); + mcs_rwlock_writer_unlock_noirq( + &proc->children_lock, &lock); + + mcs_rwlock_writer_lock_noirq( + &child->update_lock, &updatelock); + child->parent = pid1; + child->ppid_parent = pid1; + mcs_rwlock_writer_lock_noirq( + &pid1->children_lock, &childlock); + list_add_tail(&child->siblings_list, + &pid1->children_list); + mcs_rwlock_writer_unlock_noirq( + &pid1->children_lock, &childlock); + mcs_rwlock_writer_unlock_noirq( + &child->update_lock, &updatelock); + mcs_rwlock_writer_lock_noirq( + &child->threads_lock, &child_lock); + c_thread = child->main_thread; + if (c_thread && + (c_thread->ptrace & PT_TRACED)) { + mcs_rwlock_writer_unlock_noirq( + &child->threads_lock, &child_lock); + ptrace_detach_thread(c_thread, 0); + } + else { + mcs_rwlock_writer_unlock_noirq( + &child->threads_lock, &child_lock); + } + release_process(child); + } + else{ + mcs_rwlock_writer_lock_noirq( + &child->threads_lock, &child_lock); + c_thread = child->main_thread; + if (c_thread && !(options & WNOWAIT) && + (c_thread->ptrace & PT_TRACED)) { + mcs_rwlock_writer_unlock_noirq( + &child->threads_lock, &child_lock); + mcs_rwlock_writer_unlock_noirq( + &proc->children_lock, &lock); + ptrace_detach_thread(c_thread, 0); + } + else { + mcs_rwlock_writer_unlock_noirq( + &child->threads_lock, &child_lock); + mcs_rwlock_writer_unlock_noirq( + &proc->children_lock, &lock); + } + } + + goto out_found; + } + + mcs_rwlock_writer_lock_noirq(&child->threads_lock, &child_lock); + c_thread = child->main_thread; + + if (!(c_thread->ptrace & PT_TRACED) && + (c_thread->signal_flags & SIGNAL_STOP_STOPPED) && + (options & WUNTRACED)) { + /* + * Not ptraced and in stopped state and WUNTRACED is + * specified + */ + ret = wait_stopped(thread, child, NULL, status, + options); + if (!(options & WNOWAIT)) { + c_thread->signal_flags &= ~SIGNAL_STOP_STOPPED; + } + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, + &lock); + mcs_rwlock_writer_unlock_noirq(&child->threads_lock, + &child_lock); + goto out_found; + } + + if ((c_thread->ptrace & PT_TRACED) && + (child->status & (PS_STOPPED | PS_TRACED))) { + ret = wait_stopped(thread, child, NULL, status, + options); + if (ret == child->pid) { + /* Are we looking for a specific thread? */ + if (pid == c_thread->tid) { + ret = c_thread->tid; + } + if (!(options & WNOWAIT)) { + c_thread->signal_flags &= + ~SIGNAL_STOP_STOPPED; + } + mcs_rwlock_writer_unlock_noirq( + &proc->children_lock, &lock); + mcs_rwlock_writer_unlock_noirq( + &child->threads_lock, &child_lock); + goto out_found; + } + } + + if ((c_thread->signal_flags & SIGNAL_STOP_CONTINUED) && + (options & WCONTINUED)) { + ret = wait_continued(thread, child, NULL, status, + options); + if (!(options & WNOWAIT)) { + c_thread->signal_flags &= + ~SIGNAL_STOP_CONTINUED; + } + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, + &lock); + mcs_rwlock_writer_unlock_noirq(&child->threads_lock, + &child_lock); + goto out_found; + } + mcs_rwlock_writer_unlock_noirq(&child->threads_lock, + &child_lock); + } + + if (*empty) { + list_for_each_entry(child, &proc->ptraced_children_list, + ptraced_siblings_list) { + if ((pid < 0 && -pid == child->pgid) || + pid == -1 || + (pid == 0 && pgid == child->pgid) || + (pid > 0 && pid == child->pid)) { + *empty = 0; + break; + } + } + } + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &lock); +out_found: + + return ret; +} + +static int +wait_thread(int tid, int *status, int options, void *rusage, int *empty) +{ + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct thread *child, *next; + int ret = 0; + struct mcs_rwlock_node lock; + + mcs_rwlock_writer_lock_noirq(&thread->proc->threads_lock, &lock); + list_for_each_entry_safe(child, next, &proc->report_threads_list, + report_siblings_list) { + if (tid != -1 && child->tid != tid) + continue; + if (child == child->proc->main_thread) + continue; + *empty = 0; + if ((options & WEXITED) && + (child->status == PS_EXITED || + child->status == PS_ZOMBIE)) { + ret = child->tid; + if (!(options & WNOWAIT)) { + if (child->ptrace & PT_TRACED) { + mcs_rwlock_writer_unlock_noirq( + &thread->proc->threads_lock, &lock); + ptrace_detach_thread(child, 0); + } + else { + list_del(&child->report_siblings_list); + child->report_proc = NULL; + mcs_rwlock_writer_unlock_noirq( + &thread->proc->threads_lock, &lock); + release_thread(child); + } + } + else + mcs_rwlock_writer_unlock_noirq( + &thread->proc->threads_lock, &lock); + goto out_found; + } + + if (!(child->ptrace & PT_TRACED) && + (child->signal_flags & SIGNAL_STOP_STOPPED) && + (options & WUNTRACED)) { + /* + * Not ptraced and in stopped state and WUNTRACED is + * specified + */ + ret = wait_stopped(thread, child->proc, child, status, + options); + if (!(options & WNOWAIT)) { + child->signal_flags &= ~SIGNAL_STOP_STOPPED; + } + mcs_rwlock_writer_unlock_noirq( + &thread->proc->threads_lock, &lock); + goto out_found; + } + + if ((child->ptrace & PT_TRACED) && + (child->status & (PS_STOPPED | PS_TRACED))) { + ret = wait_stopped(thread, child->proc, child, status, + options); + if (ret == child->tid) { + /* Are we looking for a specific thread? */ + if (!(options & WNOWAIT)) { + child->signal_flags &= + ~SIGNAL_STOP_STOPPED; + } + mcs_rwlock_writer_unlock_noirq( + &thread->proc->threads_lock, &lock); + goto out_found; + } + } + + if ((child->signal_flags & SIGNAL_STOP_CONTINUED) && + (options & WCONTINUED)) { + ret = wait_continued(thread, child->proc, child, status, + options); + if (!(options & WNOWAIT)) { + child->signal_flags &= ~SIGNAL_STOP_CONTINUED; + } + mcs_rwlock_writer_unlock_noirq( + &thread->proc->threads_lock, &lock); + goto out_found; + } + } + + if (*empty) { + list_for_each_entry(child, &proc->threads_list, + siblings_list) { + if (child == child->proc->main_thread) + continue; + if (child->termsig && child->termsig != SIGCHLD) { + *empty = 0; + break; + } + } + } + mcs_rwlock_writer_unlock_noirq(&thread->proc->threads_lock, &lock); +out_found: + return ret; +} + +/* * From glibc: INLINE_SYSCALL (wait4, 4, pid, stat_loc, options, NULL); */ static int do_wait(int pid, int *status, int options, void *rusage) { struct thread *thread = cpu_local_var(current); - struct process *proc = thread->proc; - struct process *child, *next; - int pgid = proc->pgid; int ret; struct waitq_entry waitpid_wqe; int empty = 1; int orgpid = pid; - struct mcs_rwlock_node lock; - struct thread *c_thread = NULL; - dkprintf("wait4(): current->proc->pid: %d, pid: %d\n", thread->proc->pid, pid); + dkprintf("wait4(): current->proc->pid: %d, pid: %d\n", + thread->proc->pid, pid); rescan: -#ifdef POSTK_DEBUG_TEMP_FIX_65 /* wait4() lose infomation fix. */ waitq_init_entry(&waitpid_wqe, thread); - waitq_prepare_to_wait(&thread->proc->waitpid_q, &waitpid_wqe, PS_INTERRUPTIBLE); -#endif /* POSTK_DEBUG_TEMP_FIX_65 */ + waitq_prepare_to_wait(&thread->proc->waitpid_q, &waitpid_wqe, + PS_INTERRUPTIBLE); pid = orgpid; - mcs_rwlock_writer_lock_noirq(&thread->proc->children_lock, &lock); - list_for_each_entry_safe(child, next, &proc->children_list, siblings_list) { - /* - if (!(options & __WALL) && - !(!!(options & __WCLONE) ^ (child->termsig == SIGCHLD))) { - continue; + if (!(options & __WCLONE)) { + if ((ret = wait_proc(pid, status, options, rusage, &empty))) { + goto out_found; } - */ - - /* Find thread with pid == tid, this will be either the main thread - * or the one we are looking for specifically when __WCLONE is passed */ - //if (options & __WCLONE) - c_thread = find_thread_of_process(child, pid); - - if ((pid < 0 && -pid == child->pgid) || - pid == -1 || - (pid == 0 && pgid == child->pgid) || - (pid > 0 && pid == child->pid) || c_thread != NULL) { - - empty = 0; - - if((options & WEXITED) && - child->status == PS_ZOMBIE) { - int org_options = options; - - if ((child->ptrace & PT_TRACED) && - child->parent != child->ppid_parent) { - options |= WNOWAIT; - } - - ret = wait_zombie(thread, child, status, options); - if(!(options & WNOWAIT)){ - struct mcs_rwlock_node updatelock; - struct mcs_rwlock_node childlock; - struct process *pid1 = cpu_local_var(resource_set)->pid1; - mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock); - ts_add(&proc->stime_children, &child->stime); - ts_add(&proc->utime_children, &child->utime); - ts_add(&proc->stime_children, &child->stime_children); - ts_add(&proc->utime_children, &child->utime_children); - if(child->maxrss > proc->maxrss_children) - proc->maxrss_children = child->maxrss; - if(child->maxrss_children > proc->maxrss_children) - proc->maxrss_children = child->maxrss_children; - set_process_rusage(child, rusage); - mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); - list_del(&child->siblings_list); - mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &lock); - - if(child->ptrace & PT_TRACED){ - struct process *parent = child->ppid_parent; - mcs_rwlock_writer_lock_noirq(&parent->children_lock, &childlock); - list_del(&child->ptraced_siblings_list); - mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &childlock); - } - mcs_rwlock_writer_lock_noirq(&child->update_lock, &updatelock); - child->ptrace = 0; - child->parent = pid1; - child->ppid_parent = pid1; - mcs_rwlock_writer_lock_noirq(&pid1->children_lock, &childlock); - list_add_tail(&child->siblings_list, &pid1->children_list); - mcs_rwlock_writer_unlock_noirq(&pid1->children_lock, &childlock); - mcs_rwlock_writer_unlock_noirq(&child->update_lock, &updatelock); - release_process(child); - } - else - mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &lock); - - if (!(org_options & WNOWAIT) && - (options & WNOWAIT)) { - struct process *parent; - - child->ptrace = 0; - parent = child->ppid_parent; - mcs_rwlock_writer_lock_noirq(&proc->children_lock, &lock); - list_del(&child->siblings_list); - mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &lock); - mcs_rwlock_writer_lock_noirq(&parent->children_lock, &lock); - list_del(&child->ptraced_siblings_list); - list_add_tail(&child->siblings_list, &parent->children_list); - child->parent = parent; - mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &lock); - - finalize_process(child); - } - - goto out_found; - } - - if(!(child->ptrace & PT_TRACED) && - (child->signal_flags & SIGNAL_STOP_STOPPED) && - (options & WUNTRACED)) { - /* Find main thread of process if pid == -1 */ - if (pid == -1) - c_thread = find_thread_of_process(child, child->pid); - /* Not ptraced and in stopped state and WUNTRACED is specified */ - ret = wait_stopped(thread, child, c_thread, status, options); - if(!(options & WNOWAIT)){ - child->signal_flags &= ~SIGNAL_STOP_STOPPED; - } - mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); - goto out_found; - } - - if((child->ptrace & PT_TRACED) && - (child->status & (PS_STOPPED | PS_TRACED))) { - /* Find main thread of process if pid == -1 */ - if (pid == -1) - c_thread = find_thread_of_process(child, child->pid); - ret = wait_stopped(thread, child, c_thread, status, options); - if(c_thread && ret == child->pid){ - /* Are we looking for a specific thread? */ - if (pid == c_thread->tid) { - ret = c_thread->tid; - } - if(!(options & WNOWAIT)){ - child->signal_flags &= ~SIGNAL_STOP_STOPPED; - } - mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); - goto out_found; - } - } - - if((child->signal_flags & SIGNAL_STOP_CONTINUED) && - (options & WCONTINUED)) { - ret = wait_continued(thread, child, status, options); - if(!(options & WNOWAIT)){ - child->signal_flags &= ~SIGNAL_STOP_CONTINUED; - } - mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); - goto out_found; - } + } + if ((pid == -1 || pid > 0) && + (options & (__WCLONE | __WALL))) { + if ((ret = wait_thread(pid, status, options, rusage, &empty))) { + goto out_found; } - } if (empty) { - list_for_each_entry_safe(child, next, - &proc->ptraced_children_list, - ptraced_siblings_list) { - if ((pid < 0 && -pid == child->pgid) || - pid == -1 || - (pid == 0 && pgid == child->pgid) || - (pid > 0 && pid == child->pid) || - c_thread != NULL) { - empty = 0; - break; - } - } - if (empty) { - ret = -ECHILD; - goto out_notfound; - } + ret = -ECHILD; + goto out_notfound; } /* Don't sleep if WNOHANG requested */ @@ -831,12 +1038,7 @@ do_wait(int pid, int *status, int options, void *rusage) /* Sleep */ dkprintf("wait4,sleeping\n"); -#ifndef POSTK_DEBUG_TEMP_FIX_65 /* wait4() lose infomation fix. */ - waitq_init_entry(&waitpid_wqe, thread); - waitq_prepare_to_wait(&thread->proc->waitpid_q, &waitpid_wqe, PS_INTERRUPTIBLE); -#endif /* !POSTK_DEBUG_TEMP_FIX_65 */ - mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); if(hassigpending(thread)){ waitq_finish_wait(&thread->proc->waitpid_q, &waitpid_wqe); return -EINTR; @@ -850,16 +1052,13 @@ do_wait(int pid, int *status, int options, void *rusage) goto rescan; exit: -#ifdef POSTK_DEBUG_TEMP_FIX_65 /* wait4() lose infomation fix. */ waitq_finish_wait(&thread->proc->waitpid_q, &waitpid_wqe); -#endif /* POSTK_DEBUG_TEMP_FIX_65 */ return ret; out_found: dkprintf("wait4,out_found\n"); goto exit; out_notfound: dkprintf("wait4,out_notfound\n"); - mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); goto exit; } @@ -949,17 +1148,49 @@ void terminate_mcexec(int rc, int sig) struct process *proc = mythread->proc; struct syscall_request request IHK_DMA_ALIGN; - if ((old_exit_status = proc->exit_status) & 0x0000000100000000L) + if ((old_exit_status = proc->group_exit_status) & 0x0000000100000000L) return; exit_status = 0x0000000100000000L | ((rc & 0x00ff) << 8) | (sig & 0xff); - if (!__sync_bool_compare_and_swap(&proc->exit_status, + if (!__sync_bool_compare_and_swap(&proc->group_exit_status, old_exit_status, exit_status)) return; if (!proc->nohost) { request.number = __NR_exit_group; - request.args[0] = proc->exit_status; + request.args[0] = proc->group_exit_status; proc->nohost = 1; - do_syscall(&request, ihk_mc_get_processor_id(), proc->pid); + do_syscall(&request, ihk_mc_get_processor_id()); + } +} + +void sync_child_event(struct mc_perf_event *event) +{ + struct mc_perf_event *leader; + struct mc_perf_event *sub; + + if (!event) + return; + if (!(event->attr.inherit) && (event->pid == 0)) + return; + + leader = event->group_leader; + if (leader->pid == 0) { + leader->child_count_total += + ihk_mc_perfctr_read(leader->counter_id); + } + else if (leader->pid > 0) { + leader->count = ihk_mc_perfctr_read(leader->counter_id); + } + else + return; // Error + + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (event->pid == 0) { + sub->child_count_total += + ihk_mc_perfctr_read(sub->counter_id); + } + else if (event->pid > 0) { + sub->count = ihk_mc_perfctr_read(sub->counter_id); + } } } @@ -981,6 +1212,8 @@ void terminate(int rc, int sig) int n; int *ids = NULL; int exit_status; + struct timespec ats; + int found; // sync perf info if (proc->monitoring_event) @@ -993,7 +1226,15 @@ void terminate(int rc, int sig) dkprintf("%s: PID: %d, TID: %d PS_EXITED already\n", __FUNCTION__, proc->pid, mythread->tid); preempt_disable(); + tsc_to_ts(mythread->user_tsc, &ats); + ts_add(&proc->utime, &ats); + tsc_to_ts(mythread->system_tsc, &ats); + ts_add(&proc->stime, &ats); + mythread->user_tsc = 0; + mythread->system_tsc = 0; mythread->status = PS_EXITED; + mythread->exit_status = proc->group_exit_status; + thread_exit_signal(mythread); mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); release_thread(mythread); @@ -1005,7 +1246,14 @@ void terminate(int rc, int sig) dkprintf("%s: PID: %d, TID: %d setting PS_EXITED\n", __FUNCTION__, proc->pid, mythread->tid); + tsc_to_ts(mythread->user_tsc, &ats); + ts_add(&proc->utime, &ats); + tsc_to_ts(mythread->system_tsc, &ats); + ts_add(&proc->stime, &ats); + mythread->user_tsc = 0; + mythread->system_tsc = 0; exit_status = ((rc & 0x00ff) << 8) | (sig & 0xff); + proc->group_exit_status = exit_status; mythread->exit_status = exit_status; proc->status = PS_EXITED; mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); @@ -1020,7 +1268,9 @@ void terminate(int rc, int sig) mcs_rwlock_reader_lock(&proc->threads_lock, &lock); n = 0; list_for_each_entry(thread, &proc->threads_list, siblings_list) { - n++; + if (thread != mythread) { + n++; + } } if (n) { @@ -1047,12 +1297,22 @@ void terminate(int rc, int sig) for (;;) { __mcs_rwlock_reader_lock(&proc->threads_lock, &lock); - if (list_empty(&proc->threads_list)) { - mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + found = 0; + list_for_each_entry(thread, &proc->threads_list, + siblings_list) { + if (thread->status != PS_EXITED && + thread->status != PS_ZOMBIE) { + found = 1; + break; + } + } + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + if (!found) { break; } - __mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); - cpu_pause(); + + /* We might be waiting for another thread on same CPU */ + schedule(); } mcs_rwlock_writer_lock(&proc->threads_lock, &lock); @@ -1066,35 +1326,31 @@ void terminate(int rc, int sig) kfree(proc->saved_cmdline); } - // check tracee and ptrace_detach - n = 0; - mcs_rwlock_reader_lock(&proc->children_lock, &lock); - list_for_each_entry(child, &proc->children_list, siblings_list) { - if (child->ptrace & PT_TRACED) - n++; - } + while (!list_empty(&proc->report_threads_list)) { + struct thread *thr; - if (n) { - ids = kmalloc(sizeof(int) * n, IHK_MC_AP_NOWAIT); - i = 0; + thr = list_first_entry(&proc->report_threads_list, + struct thread, report_siblings_list); + if (thr->ptrace) { + int release_flag = thr->proc == proc && + thr->termsig && + thr->termsig != SIGCHLD; - if (ids) { - list_for_each_entry(child, &proc->children_list, siblings_list) { - if (child->ptrace & PT_TRACED) { - ids[i] = child->pid; - i++; - } + if (release_flag) { + thr->termsig = 0; + } + ptrace_detach_thread(thr, 0); + if (release_flag) { + release_thread(thr); } } - } - mcs_rwlock_reader_unlock(&proc->children_lock, &lock); - - if (ids) { - for (i = 0; i < n; i++) { - ptrace_detach(ids[i], 0); + else { + mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + list_del(&thr->report_siblings_list); + thr->report_proc = NULL; + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); + release_thread(thr); } - kfree(ids); - ids = NULL; } if (!list_empty(&proc->children_list) || @@ -1242,15 +1498,24 @@ void clear_host_pte(uintptr_t addr, size_t len) return; } -static int set_host_vma(uintptr_t addr, size_t len, int prot) +static int set_host_vma(uintptr_t addr, size_t len, int prot, int holding_memory_range_lock) { ihk_mc_user_context_t ctx; long lerror; + struct thread *thread = cpu_local_var(current); ihk_mc_syscall_arg0(&ctx) = addr; ihk_mc_syscall_arg1(&ctx) = len; ihk_mc_syscall_arg2(&ctx) = prot; + dkprintf("%s: offloading __NR_mprotect\n", __FUNCTION__); + /* #986: Let remote page fault code skip + read-locking memory_range_lock. It's safe because other writers are warded off + until the remote PF handling code calls up_write(¤t->mm->mmap_sem) and + vm_range is consistent when calling this function. */ + if (holding_memory_range_lock) { + thread->vm->is_memory_range_lock_taken = 1; + } lerror = syscall_generic_forwarding(__NR_mprotect, &ctx); if (lerror) { kprintf("set_host_vma(%lx,%lx,%x) failed. %ld\n", @@ -1260,10 +1525,13 @@ static int set_host_vma(uintptr_t addr, size_t len, int prot) lerror = 0; out: + if (holding_memory_range_lock) { + thread->vm->is_memory_range_lock_taken = 0; + } return (int)lerror; } -int do_munmap(void *addr, size_t len) +int do_munmap(void *addr, size_t len, int holding_memory_range_lock) { int error; int ro_freed; @@ -1275,21 +1543,20 @@ int do_munmap(void *addr, size_t len) clear_host_pte((uintptr_t)addr, len); } else { - error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE); + error = set_host_vma((uintptr_t)addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, holding_memory_range_lock); if (error) { kprintf("sys_munmap:set_host_vma failed. %d\n", error); /* through */ } } finish_free_pages_pending(); + dkprintf("%s: 0x%lx:%lu, error: %ld\n", __FUNCTION__, addr, len, error); return error; } -#ifdef POSTK_DEBUG_ARCH_DEP_27 -#else -static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *addrp) +static int search_free_space(size_t len, int pgshift, uintptr_t *addrp) { struct thread *thread = cpu_local_var(current); struct vm_regions *region = &thread->vm->region; @@ -1298,17 +1565,17 @@ static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *a struct vm_range *range; size_t pgsize = (size_t)1 << pgshift; - dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp); + dkprintf("%s: len: %lu, pgshift: %d\n", + __FUNCTION__, len, pgshift); - addr = hint; + addr = region->map_end; for (;;) { addr = (addr + pgsize - 1) & ~(pgsize - 1); if ((region->user_end <= addr) || ((region->user_end - len) < addr)) { - ekprintf("search_free_space(%lx,%lx,%p):" - "no space. %lx %lx\n", - len, hint, addrp, addr, - region->user_end); + ekprintf("%s: error: addr 0x%lx is outside the user region\n", + __FUNCTION__, addr); + error = -ENOMEM; goto out; } @@ -1320,23 +1587,23 @@ static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *a addr = range->end; } + region->map_end = addr + len; error = 0; *addrp = addr; out: - dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n", - len, hint, pgshift, addrp, error, addr); + dkprintf("%s: len: %lu, pgshift: %d, addr: 0x%lx\n", + __FUNCTION__, len, pgshift, addr); return error; } -#endif intptr_t -do_mmap(const intptr_t addr0, const size_t len0, const int prot, +do_mmap(const uintptr_t addr0, const size_t len0, const int prot, const int flags, const int fd, const off_t off0) { struct thread *thread = cpu_local_var(current); struct vm_regions *region = &thread->vm->region; - intptr_t addr = addr0; + uintptr_t addr = addr0; size_t len = len0; size_t populate_len = 0; off_t off; @@ -1345,7 +1612,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, int p2align; void *p = NULL; int vrflags; - intptr_t phys; + uintptr_t phys; struct memobj *memobj = NULL; int maxprot; int denied; @@ -1412,28 +1679,21 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, if (flags & MAP_FIXED) { /* clear specified address range */ - error = do_munmap((void *)addr, len); + error = do_munmap((void *)addr, len, 1/* holding memory_range_lock */); if (error) { ekprintf("do_mmap:do_munmap(%lx,%lx) failed. %d\n", addr, len, error); goto out; } } - else { - /* choose mapping address */ -#ifdef POSTK_DEBUG_ARCH_DEP_27 - error = search_free_space(cpu_local_var(current), len, - region->map_end, PAGE_SHIFT + p2align, &addr); -#else - error = search_free_space(len, region->map_end, - PAGE_SHIFT + p2align, &addr); -#endif /* POSTK_DEBUG_ARCH_DEP_27 */ + else if (flags & MAP_ANONYMOUS) { + /* Obtain mapping address */ + error = search_free_space(len, PAGE_SHIFT + p2align, &addr); if (error) { ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n", len, region->map_end, p2align, error); goto out; } - region->map_end = addr + len; } /* do the map */ @@ -1460,8 +1720,8 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, populated_mapping = 0; } - if (!(prot & PROT_WRITE)) { - error = set_host_vma(addr, len, PROT_READ); + if ((flags & MAP_ANONYMOUS) && !(prot & PROT_WRITE)) { + error = set_host_vma(addr, len, PROT_READ | PROT_EXEC, 1/* holding memory_range_lock */); if (error) { kprintf("do_mmap:set_host_vma failed. %d\n", error); goto out; @@ -1512,15 +1772,56 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, #ifdef PROFILE_ENABLE profile_event_add(PROFILE_mmap_device_file, len); #endif // PROFILE_ENABLE - dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n", - __FUNCTION__, fd, off, addr, addr + len); } } if (error) { kprintf("%s: error: file mapping failed, fd: %d, error: %d\n", - __FUNCTION__, error); + __func__, fd, error); goto out; } + + /* hugetlbfs files are pre-created in fileobj_create, but + * need extra processing + */ + if (memobj && (memobj->flags & MF_HUGETLBFS)) { + error = hugefileobj_create(memobj, len, off, &pgshift, + addr0); + if (error) { + memobj->ops->free(memobj); + kprintf("%s: error creating hugetlbfs memobj, fd: %d, error: %d\n", + __func__, fd, error); + goto out; + } + p2align = pgshift - PAGE_SHIFT; + } + + /* Obtain mapping address - delayed to use proper p2align */ + if (!(flags & MAP_FIXED)) + error = search_free_space(len, PAGE_SHIFT + p2align, + &addr); + if (error) { + ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n", + len, region->map_end, p2align, error); + goto out; + } + if (!(prot & PROT_WRITE)) { + error = set_host_vma(addr, len, PROT_READ | PROT_EXEC, + 1/* holding memory_range_lock */); + if (error) { + kprintf("do_mmap:set_host_vma failed. %d\n", + error); + goto out; + } + + ro_vma_mapped = 1; + } + if (memobj->flags & MF_HUGETLBFS) { + dkprintf("Created hugefileobj %p (%d:%x %llx-%llx, fd %d, pgshift %d)\n", + memobj, len, off, addr, addr+len, fd, pgshift); + } else if (memobj->flags & MF_DEV_FILE) { + dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n", + __func__, fd, off, addr, addr + len); + } } /* Prepopulated ANONYMOUS mapping */ else if (!(vrflags & VR_DEMAND_PAGING) @@ -1615,13 +1916,11 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, populate_len = len; if (!(flags & MAP_ANONYMOUS)) { - memobj_lock(memobj); - if (memobj->status == MEMOBJ_TO_BE_PREFETCHED) { - memobj->status = MEMOBJ_READY; + if (atomic_cmpxchg4(&memobj->status, MEMOBJ_TO_BE_PREFETCHED, + MEMOBJ_READY)) { populated_mapping = 1; populate_len = memobj->size; } - memobj_unlock(memobj); /* Update PTEs for pre-mapped memory object */ if ((memobj->flags & MF_PREMAP) && @@ -1663,7 +1962,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, out: if (ro_vma_mapped) { - (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); + (void)set_host_vma(addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, 1/* holding memory_range_lock */); } ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); @@ -1699,7 +1998,7 @@ out: ihk_mc_free_pages_user(p, npages); } if (memobj) { - memobj_release(memobj); + memobj_unref(memobj); } dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, " "fd: %d, off: %lu, error: %ld, addr: 0x%lx\n", @@ -1734,7 +2033,7 @@ SYSCALL_DECLARE(munmap) } ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); - error = do_munmap((void *)addr, len); + error = do_munmap((void *)addr, len, 1/* holding memory_range_lock */); ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); out: @@ -1873,7 +2172,7 @@ out: // XXX: TLB flush flush_tlb(); if (ro_changed && !error) { - error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE)); + error = set_host_vma(start, len, prot & (PROT_READ | PROT_WRITE | PROT_EXEC), 1/* holding memory_range_lock */); if (error) { kprintf("sys_mprotect:set_host_vma failed. %d\n", error); /* through */ @@ -1971,8 +2270,7 @@ static void settid(struct thread *thread, int nr_tids, int *tids) */ request.args[4] = nr_tids; request.args[5] = virt_to_phys(tids); - if ((ret = do_syscall(&request, ihk_mc_get_processor_id(), - thread->proc->pid)) < 0) { + if ((ret = do_syscall(&request, ihk_mc_get_processor_id())) < 0) { kprintf("%s: WARNING: do_syscall returns %d\n", __FUNCTION__, ret); } @@ -1986,14 +2284,16 @@ SYSCALL_DECLARE(gettid) extern void ptrace_report_signal(struct thread *thread, int sig); static int ptrace_report_exec(struct thread *thread) { - int ptrace = thread->proc->ptrace; + int ptrace = thread->ptrace; if (ptrace & (PT_TRACE_EXEC|PTRACE_O_TRACEEXEC)) { ihk_mc_kernel_context_t ctx; int sig = (SIGTRAP | (PTRACE_EVENT_EXEC << 8)); memcpy(&ctx, &thread->ctx, sizeof ctx); + preempt_enable(); ptrace_report_signal(thread, sig); + preempt_disable(); memcpy(&thread->ctx, &ctx, sizeof ctx); } return 0; @@ -2001,7 +2301,7 @@ static int ptrace_report_exec(struct thread *thread) static void ptrace_syscall_event(struct thread *thread) { - int ptrace = thread->proc->ptrace; + int ptrace = thread->ptrace; if (ptrace & PT_TRACE_SYSCALL) { int sig = (SIGTRAP | ((ptrace & PTRACE_O_TRACESYSGOOD) ? 0x80 : 0)); @@ -2015,20 +2315,20 @@ static int ptrace_check_clone_event(struct thread *thread, int clone_flags) if (clone_flags & CLONE_VFORK) { /* vfork */ - if (thread->proc->ptrace & PTRACE_O_TRACEVFORK) { + if (thread->ptrace & PTRACE_O_TRACEVFORK) { event = PTRACE_EVENT_VFORK; } - if (thread->proc->ptrace & PTRACE_O_TRACEVFORKDONE) { + if (thread->ptrace & PTRACE_O_TRACEVFORKDONE) { event = PTRACE_EVENT_VFORK_DONE; } } else if ((clone_flags & CSIGNAL) == SIGCHLD) { /* fork */ - if (thread->proc->ptrace & PTRACE_O_TRACEFORK) { + if (thread->ptrace & PTRACE_O_TRACEFORK) { event = PTRACE_EVENT_FORK; } } else { /* clone */ - if (thread->proc->ptrace & PTRACE_O_TRACECLONE) { + if (thread->ptrace & PTRACE_O_TRACECLONE) { event = PTRACE_EVENT_CLONE; } } @@ -2036,6 +2336,56 @@ static int ptrace_check_clone_event(struct thread *thread, int clone_flags) return event; } +static int ptrace_attach_thread(struct thread *thread, struct process *proc) +{ + struct process *child; + struct process *parent; + struct mcs_rwlock_node_irqsave lock; + int error = 0; + + if (thread->report_proc) { + mcs_rwlock_writer_lock(&thread->report_proc->threads_lock, + &lock); + list_del(&thread->report_siblings_list); + mcs_rwlock_writer_unlock(&thread->report_proc->threads_lock, + &lock); + } + + mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + list_add_tail(&thread->report_siblings_list, + &proc->report_threads_list); + thread->report_proc = proc; + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); + + child = thread->proc; + if (thread == child->main_thread) { + parent = child->parent; + dkprintf("ptrace_attach() parent->pid=%d\n", parent->pid); + mcs_rwlock_writer_lock(&parent->children_lock, &lock); + list_del(&child->siblings_list); + list_add_tail(&child->ptraced_siblings_list, + &parent->ptraced_children_list); + mcs_rwlock_writer_unlock(&parent->children_lock, &lock); + + mcs_rwlock_writer_lock(&proc->children_lock, &lock); + list_add_tail(&child->siblings_list, &proc->children_list); + child->parent = proc; + mcs_rwlock_writer_unlock(&proc->children_lock, &lock); + } + + if (thread->ptrace_debugreg == NULL) { + error = alloc_debugreg(thread); + if (error < 0) { + goto out; + } + } + hold_thread(thread); + + clear_single_step(thread); +out: + return error; +} + static int ptrace_report_clone(struct thread *thread, struct thread *new, int event) { dkprintf("ptrace_report_clone,enter\n"); @@ -2052,8 +2402,8 @@ static int ptrace_report_clone(struct thread *thread, struct thread *new, int ev /* Transition process state */ thread->proc->status = PS_TRACED; thread->status = PS_TRACED; - thread->proc->ptrace_eventmsg = new->tid; - thread->proc->ptrace &= ~PT_TRACE_SYSCALL; /** ??? **/ + thread->ptrace_eventmsg = new->tid; + thread->ptrace &= ~PT_TRACE_SYSCALL; parent_pid = thread->proc->parent->pid; mcs_rwlock_writer_unlock_noirq(&thread->proc->update_lock, &lock); @@ -2062,25 +2412,10 @@ static int ptrace_report_clone(struct thread *thread, struct thread *new, int ev mcs_rwlock_writer_lock_noirq(&new->proc->update_lock, &updatelock); /* set ptrace features to new process */ - new->proc->ptrace = thread->proc->ptrace; - if (event != PTRACE_EVENT_CLONE) { - new->proc->ppid_parent = new->proc->parent; /* maybe proc */ - } + new->ptrace = thread->ptrace; - if ((new->proc->ptrace & PT_TRACED) && new->ptrace_debugreg == NULL) { - alloc_debugreg(new); - } + ptrace_attach_thread(new, thread->proc->parent); - if (event != PTRACE_EVENT_CLONE) { - mcs_rwlock_writer_lock_noirq(&new->proc->parent->children_lock, &lock); - list_del(&new->proc->siblings_list); - list_add_tail(&new->proc->ptraced_siblings_list, &new->proc->parent->ptraced_children_list); - mcs_rwlock_writer_unlock_noirq(&new->proc->parent->children_lock, &lock); - new->proc->parent = thread->proc->parent; /* new ptracing parent */ - mcs_rwlock_writer_lock_noirq(&new->proc->parent->children_lock, &lock); - list_add_tail(&new->proc->siblings_list, &new->proc->parent->children_list); - mcs_rwlock_writer_unlock_noirq(&new->proc->parent->children_lock, &lock); - } /* trace and SIGSTOP */ new->exit_status = SIGSTOP; new->proc->status = PS_TRACED; @@ -2123,7 +2458,7 @@ static void munmap_all(void) addr = (void *)range->start; size = range->end - range->start; - error = do_munmap(addr, size); + error = do_munmap(addr, size, 1/* holding memory_range_lock */); if (error) { kprintf("munmap_all():do_munmap(%p,%lx) failed. %d\n", addr, size, error); @@ -2141,10 +2476,6 @@ static void munmap_all(void) return; } /* munmap_all() */ -#ifdef POSTK_DEBUG_TEMP_FIX_19 -extern void clear_fp_regs(struct thread *thread); -#endif /* POSTK_DEBUG_TEMP_FIX_19 */ - SYSCALL_DECLARE(execve) { int error; @@ -2192,7 +2523,7 @@ SYSCALL_DECLARE(execve) request.args[0] = 1; /* 1st phase - get ELF desc */ request.args[1] = (unsigned long)filename; request.args[2] = virt_to_phys(desc); - ret = do_syscall(&request, ihk_mc_get_processor_id(), 0); + ret = do_syscall(&request, ihk_mc_get_processor_id()); if (ret != 0) { dkprintf("execve(): ERROR: host failed to load elf header, errno: %d\n", @@ -2204,13 +2535,16 @@ SYSCALL_DECLARE(execve) dkprintf("execve(): ELF desc received, num sections: %d\n", desc->num_sections); - if (desc->shell_path[0]) { - dkprintf("execve(): shell interpreter: %s\n", desc->shell_path); + /* for shebang script we get extra argvs from mcexec */ + if (desc->args_len) { + desc->args = ((char *)desc) + sizeof(struct program_load_desc) + + sizeof(struct program_image_section) * + desc->num_sections; } /* Flatten argv and envp into kernel-space buffers */ - argv_flat_len = flatten_strings_from_user(-1, (desc->shell_path[0] ? - desc->shell_path : NULL), argv, &argv_flat); + argv_flat_len = flatten_strings_from_user(desc->args, argv, + &argv_flat); if (argv_flat_len < 0) { char *kfilename; int len = strlen_user(filename); @@ -2224,8 +2558,10 @@ SYSCALL_DECLARE(execve) ret = argv_flat_len; goto end; } + desc->args = NULL; + desc->args_len = 0; - envp_flat_len = flatten_strings_from_user(-1, NULL, envp, &envp_flat); + envp_flat_len = flatten_strings_from_user(NULL, envp, &envp_flat); if (envp_flat_len < 0) { char *kfilename; int len = strlen_user(filename); @@ -2240,7 +2576,7 @@ SYSCALL_DECLARE(execve) goto end; } - if (thread->proc->ptrace) { + if (thread->ptrace) { ihk_mc_syscall_ret(ctx) = 0; ptrace_syscall_event(thread); } @@ -2275,7 +2611,7 @@ SYSCALL_DECLARE(execve) request.args[2] = sizeof(struct program_load_desc) + sizeof(struct program_image_section) * desc->num_sections; - if ((ret = do_syscall(&request, ihk_mc_get_processor_id(), 0)) != 0) { + if ((ret = do_syscall(&request, ihk_mc_get_processor_id())) != 0) { goto end; } @@ -2285,10 +2621,13 @@ SYSCALL_DECLARE(execve) thread->sigcommon->action[i].sa.sa_handler = SIG_DFL; } -#ifdef POSTK_DEBUG_TEMP_FIX_19 - /* The floating-point environment is reset to the default. */ - clear_fp_regs(thread); -#endif /* POSTK_DEBUG_TEMP_FIX_19 */ + /* Reset floating-point environment to default. */ + clear_fp_regs(); + + /* Reset sigaltstack to default */ + thread->sigstack.ss_sp = NULL; + thread->sigstack.ss_flags = SS_DISABLE; + thread->sigstack.ss_size = 0; error = ptrace_report_exec(thread); if(error) { @@ -2385,7 +2724,19 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, return -EINVAL; } - cpuid = obtain_clone_cpuid(&old->cpu_set); + /* N-th creation put the new on Linux CPU. It's turned off when zero is + set to uti_thread_rank. */ + if (oldproc->uti_thread_rank) { + if (oldproc->clone_count + 1 == oldproc->uti_thread_rank) { + old->mod_clone = SPAWN_TO_REMOTE; + kprintf("%s: mod_clone is set to %d\n", __FUNCTION__, old->mod_clone); + } else { + old->mod_clone = SPAWN_TO_LOCAL; + kprintf("%s: mod_clone is set to %d\n", __FUNCTION__, old->mod_clone); + } + } + + cpuid = obtain_clone_cpuid(&old->cpu_set, old->mod_clone == SPAWN_TO_REMOTE && oldproc->uti_use_last_cpu); if (cpuid == -1) { kprintf("do_fork,core not available\n"); return -EAGAIN; @@ -2412,14 +2763,15 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, mcs_rwlock_writer_lock(&newproc->threads_lock, &lock); /* Obtain mcexec TIDs if not known yet */ if (!newproc->nr_tids) { - tids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT); + tids = kmalloc(sizeof(int) * NR_TIDS, IHK_MC_AP_NOWAIT); if (!tids) { mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock); release_cpuid(cpuid); return -ENOMEM; } - newproc->tids = kmalloc(sizeof(struct mcexec_tid) * num_processors, IHK_MC_AP_NOWAIT); + newproc->tids = kmalloc(sizeof(struct mcexec_tid) * + NR_TIDS, IHK_MC_AP_NOWAIT); if (!newproc->tids) { mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock); kfree(tids); @@ -2427,10 +2779,11 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, return -ENOMEM; } - settid(new, num_processors, tids); + settid(new, NR_TIDS, tids); - for (i = 0; (i < num_processors) && tids[i]; ++i) { - dkprintf("%s: tid[%d]: %d\n", __FUNCTION__, i, tids[i]); + for (i = 0; (i < NR_TIDS) && tids[i]; ++i) { + dkprintf("%s: tids[%d]: %d\n", + __func__, i, tids[i]); newproc->tids[i].tid = tids[i]; newproc->tids[i].thread = NULL; ++newproc->nr_tids; @@ -2459,7 +2812,12 @@ retry_tid: /* TODO: spawn more mcexec threads */ if (!new->tid) { release_cpuid(cpuid); - kprintf("%s: no more TIDs available\n"); + kprintf("%s: no more TIDs available\n", __func__); + for (i = 0; i < newproc->nr_tids; ++i) { + kprintf("%s: i=%d,tid=%d,thread=%p\n", + __func__, i, newproc->tids[i].tid, + newproc->tids[i].thread); + } return -ENOMEM; } } @@ -2467,11 +2825,16 @@ retry_tid: else { request1.number = __NR_clone; request1.args[0] = 0; + request1.args[1] = new->vm->region.user_start; + request1.args[2] = new->vm->region.user_end - + new->vm->region.user_start; + request1.args[3] = + virt_to_phys(new->vm->address_space->page_table); if(clone_flags & CLONE_PARENT){ if(oldproc->ppid_parent->pid != 1) request1.args[0] = clone_flags; } - newproc->pid = do_syscall(&request1, ihk_mc_get_processor_id(), 0); + newproc->pid = do_syscall(&request1, ihk_mc_get_processor_id()); if (newproc->pid < 0) { kprintf("ERROR: forking host process\n"); @@ -2485,24 +2848,6 @@ retry_tid: new->vm->address_space->pids[0] = new->proc->pid; dkprintf("fork(): new pid: %d\n", new->proc->pid); -#ifndef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ - /* clear user space PTEs and set new rpgtable so that consequent - * page faults will look up the right mappings */ - request1.number = __NR_munmap; - request1.args[0] = new->vm->region.user_start; - request1.args[1] = new->vm->region.user_end - - new->vm->region.user_start; - /* 3rd parameter denotes new rpgtable of host process */ - request1.args[2] = virt_to_phys(new->vm->address_space->page_table); - request1.args[3] = newproc->pid; - - dkprintf("fork(): requesting PTE clear and rpgtable (0x%lx) update\n", - request1.args[2]); - - if (do_syscall(&request1, ihk_mc_get_processor_id(), new->proc->pid)) { - kprintf("ERROR: clearing PTEs in host process\n"); - } -#endif /* !POSTK_DEBUG_TEMP_FIX_48 */ if(oldproc->monitoring_event && oldproc->monitoring_event->attr.inherit){ newproc->monitoring_event = oldproc->monitoring_event; @@ -2553,14 +2898,20 @@ retry_tid: ihk_mc_syscall_ret(new->uctx) = 0; new->status = PS_RUNNING; - if (old->mod_clone == SPAWN_TO_REMOTE) { + + /* Only the first do_fork() call creates a thread on a Linux CPU */ + if (__sync_bool_compare_and_swap(&old->mod_clone, SPAWN_TO_REMOTE, SPAWN_TO_LOCAL)) { new->mod_clone = SPAWNING_TO_REMOTE; if (old->mod_clone_arg) { new->mod_clone_arg = kmalloc(sizeof(struct uti_attr), IHK_MC_AP_NOWAIT); - if (new->mod_clone_arg) - memcpy(new->mod_clone_arg, old->mod_clone_arg, - sizeof(struct uti_attr)); + if (!new->mod_clone_arg) { + kprintf("%s: error: allocating mod_clone_arg\n", + __func__); + return -ENOMEM; + } + memcpy(new->mod_clone_arg, old->mod_clone_arg, + sizeof(struct uti_attr)); } } chain_thread(new); @@ -2590,25 +2941,7 @@ retry_tid: chain_process(newproc); } -#ifdef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ - /* clear user space PTEs and set new rpgtable so that consequent - * page faults will look up the right mappings */ - request1.number = __NR_munmap; - request1.args[0] = new->vm->region.user_start; - request1.args[1] = new->vm->region.user_end - - new->vm->region.user_start; - /* 3rd parameter denotes new rpgtable of host process */ - request1.args[2] = virt_to_phys(new->vm->address_space->page_table); - request1.args[3] = newproc->pid; - - dkprintf("fork(): requesting PTE clear and rpgtable (0x%lx) update\n", - request1.args[2]); - - if (do_syscall(&request1, ihk_mc_get_processor_id(), new->proc->pid)) { - kprintf("ERROR: clearing PTEs in host process\n"); - } -#endif /* !POSTK_DEBUG_TEMP_FIX_48 */ - if (oldproc->ptrace) { + if (old->ptrace) { ptrace_event = ptrace_check_clone_event(old, clone_flags); if (ptrace_event) { ptrace_report_clone(old, new, ptrace_event); @@ -2624,7 +2957,18 @@ retry_tid: request1.number = __NR_clone; request1.args[0] = 1; request1.args[1] = new->tid; - do_syscall(&request1, ihk_mc_get_processor_id(), 0); + do_syscall(&request1, ihk_mc_get_processor_id()); + } + else if (termsig && termsig != SIGCHLD) { + struct mcs_rwlock_node_irqsave lock; + + mcs_rwlock_writer_lock(&oldproc->threads_lock, &lock); + new->termsig = termsig; + new->report_proc = oldproc; + list_add_tail(&new->report_siblings_list, + &oldproc->report_threads_list); + mcs_rwlock_writer_unlock(&oldproc->threads_lock, &lock); + hold_thread(new); } runq_add_thread(new, cpuid); @@ -2764,7 +3108,7 @@ getcred(int *_buf) request.number = __NR_setfsuid; request.args[0] = phys; request.args[1] = 1; - do_syscall(&request, ihk_mc_get_processor_id(), 0); + do_syscall(&request, ihk_mc_get_processor_id()); return buf; } @@ -2871,7 +3215,7 @@ SYSCALL_DECLARE(setfsuid) request.number = __NR_setfsuid; request.args[0] = fsuid; request.args[1] = 0; - newfsuid = do_syscall(&request, ihk_mc_get_processor_id(), 0); + newfsuid = do_syscall(&request, ihk_mc_get_processor_id()); #ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ do_setresuid((int)(newfsuid >> 32)); newfsuid &= (1UL << 32) - 1; @@ -2934,7 +3278,7 @@ SYSCALL_DECLARE(setfsgid) request.number = __NR_setfsgid; request.args[0] = fsgid; - newfsgid = do_syscall(&request, ihk_mc_get_processor_id(), 0); + newfsgid = do_syscall(&request, ihk_mc_get_processor_id()); #ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ do_setresgid((int)(newfsgid >> 32)); newfsgid &= (1UL << 32) - 1; @@ -3050,6 +3394,8 @@ SYSCALL_DECLARE(setpgid) return rc; } +/* Ignore the registration by start_thread() (in pthread_create.c) + because McKernel doesn't unlock mutex-es held by the thread which has been killed. */ SYSCALL_DECLARE(set_robust_list) { // Palliative fix. wait for impl. @@ -3385,7 +3731,7 @@ SYSCALL_DECLARE(signalfd4) request.number = __NR_signalfd4; request.args[0] = 0; request.args[1] = flags; - fd = do_syscall(&request, ihk_mc_get_processor_id(), 0); + fd = do_syscall(&request, ihk_mc_get_processor_id()); if(fd < 0){ return fd; } @@ -3413,14 +3759,15 @@ SYSCALL_DECLARE(signalfd4) return sfd->fd; } -int +#ifdef ENABLE_PERF +int perf_counter_alloc(struct thread *thread) { int ret = -1; int i = 0; // find avail generic counter - for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { + for (i = 0; i < NUM_PERF_COUNTERS; i++) { if(!(thread->pmc_alloc_map & (1 << i))) { ret = i; break; @@ -3430,8 +3777,7 @@ perf_counter_alloc(struct thread *thread) return ret; } -int -perf_counter_start(struct mc_perf_event *event) +int perf_counter_set(struct mc_perf_event *event) { int ret = 0; struct perf_event_attr *attr = &event->attr; @@ -3444,24 +3790,14 @@ perf_counter_start(struct mc_perf_event *event) mode |= PERFCTR_USER_MODE; } - if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) { - if (event->extra_reg.reg) { - if (ihk_mc_perfctr_set_extra(event)) { - ret = -1; - goto out; - } + if (event->extra_reg.reg) { + if (ihk_mc_perfctr_set_extra(event)) { + ret = -1; + goto out; } - ret = ihk_mc_perfctr_init_raw(event->counter_id, event->hw_config, mode); - ihk_mc_perfctr_start(1UL << event->counter_id); - } - else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS && - event->counter_id < X86_IA32_BASE_FIXED_PERF_COUNTERS + X86_IA32_NUM_FIXED_PERF_COUNTERS) { - ret = ihk_mc_perfctr_fixed_init(event->counter_id, mode); - ihk_mc_perfctr_start(1UL << event->counter_id); - } - else { - ret = -1; } + ret = ihk_mc_perfctr_init_raw(event->counter_id, + event->hw_config, mode); out: return ret; @@ -3486,36 +3822,6 @@ unsigned long perf_event_read_value(struct mc_perf_event *event) return rtn_count; } -void sync_child_event(struct mc_perf_event *event) -{ - struct mc_perf_event *leader; - struct mc_perf_event *sub; - - if(!event) - return; - if(!(event->attr.inherit) && (event->pid == 0)) - return; - - leader = event->group_leader; - if(leader->pid == 0){ - leader->child_count_total += ihk_mc_perfctr_read(leader->counter_id); - } - else if(leader->pid > 0) { - leader->count = ihk_mc_perfctr_read(leader->counter_id); - } - else - return; // Error - - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - if(event->pid == 0){ - sub->child_count_total += ihk_mc_perfctr_read(sub->counter_id); - } - else if(event->pid > 0) { - sub->count = ihk_mc_perfctr_read(sub->counter_id); - } - } -} - static int perf_event_read_group(struct mc_perf_event *event, unsigned long read_format, char *buf) { @@ -3584,26 +3890,32 @@ perf_read(struct mckfd *sfd, ihk_mc_user_context_t *ctx) return ret; } -void -perf_start(struct mc_perf_event *event) +void perf_start(struct mc_perf_event *event) { int counter_id; + unsigned long counter_mask = 0; struct mc_perf_event *leader = event->group_leader, *sub; counter_id = leader->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - perf_counter_start(leader); + if ((1UL << counter_id & PERF_COUNTERS_MASK) | + (1UL << counter_id & FIXED_PERF_COUNTERS_MASK)) { + perf_counter_set(leader); + counter_mask |= 1UL << counter_id; } list_for_each_entry(sub, &leader->sibling_list, group_entry) { counter_id = sub->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - perf_counter_start(sub); + if ((1UL << counter_id & PERF_COUNTERS_MASK) | + (1UL << counter_id & FIXED_PERF_COUNTERS_MASK)) { + perf_counter_set(sub); + counter_mask |= 1UL << counter_id; } } - cpu_local_var(current)->proc->perf_status = PP_COUNT; + + if (counter_mask) { + ihk_mc_perfctr_start(counter_mask); + cpu_local_var(current)->proc->perf_status = PP_COUNT; + } } void @@ -3613,15 +3925,15 @@ perf_reset(struct mc_perf_event *event) struct mc_perf_event *leader = event->group_leader, *sub; counter_id = leader->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { + if ((1UL << counter_id & PERF_COUNTERS_MASK) | + (1UL << counter_id & FIXED_PERF_COUNTERS_MASK)) { ihk_mc_perfctr_reset(counter_id); } list_for_each_entry(sub, &leader->sibling_list, group_entry) { counter_id = sub->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { + if ((1UL << counter_id & PERF_COUNTERS_MASK) | + (1UL << counter_id & FIXED_PERF_COUNTERS_MASK)) { ihk_mc_perfctr_reset(counter_id); } } @@ -3631,37 +3943,28 @@ static void perf_stop(struct mc_perf_event *event) { int counter_id; + unsigned long counter_mask = 0; struct mc_perf_event *leader = event->group_leader, *sub; -#ifdef POSTK_DEBUG_TEMP_FIX_30 counter_id = leader->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - ihk_mc_perfctr_stop(counter_id); + if ((1UL << counter_id & PERF_COUNTERS_MASK) | + (1UL << counter_id & FIXED_PERF_COUNTERS_MASK)) { + counter_mask |= 1UL << counter_id; } list_for_each_entry(sub, &leader->sibling_list, group_entry) { counter_id = sub->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - ihk_mc_perfctr_stop(counter_id); + if ((1UL << counter_id & PERF_COUNTERS_MASK) | + (1UL << counter_id & FIXED_PERF_COUNTERS_MASK)) { + counter_mask |= 1UL << counter_id; } } -#else - counter_id = leader->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - ihk_mc_perfctr_stop(1UL << counter_id); - } - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - counter_id = sub->counter_id; - if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | - (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - ihk_mc_perfctr_stop(1UL << counter_id); - } + if (counter_mask) { + ihk_mc_perfctr_stop(counter_mask); + cpu_local_var(current)->proc->monitoring_event = NULL; + cpu_local_var(current)->proc->perf_status = PP_NONE; } -#endif /*POSTK_DEBUG_TEMP_FIX_30*/ } static int @@ -3694,8 +3997,6 @@ perf_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx) if(event->pid == 0){ perf_stop(event); } - cpu_local_var(current)->proc->monitoring_event = NULL; - cpu_local_var(current)->proc->perf_status = PP_NONE; // TODO: stop other process /* else if(event->pid > 0){ @@ -3794,6 +4095,7 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx) return rc; } +#endif /*ENABLE_PERF*/ struct vm_range_numa_policy *vm_range_policy_search(struct process_vm *vm, uintptr_t addr) { @@ -3845,7 +4147,7 @@ static int vm_policy_insert(struct process_vm *vm, struct vm_range_numa_policy * return 0; } - +#ifdef ENABLE_PERF struct mc_perf_event* mc_perf_event_alloc(struct perf_event_attr *attr) { @@ -3990,7 +4292,7 @@ SYSCALL_DECLARE(perf_event_open) request.number = __NR_perf_event_open; request.args[0] = 0; - fd = do_syscall(&request, ihk_mc_get_processor_id(), 0); + fd = do_syscall(&request, ihk_mc_get_processor_id()); if(fd < 0){ return fd; } @@ -4021,6 +4323,7 @@ SYSCALL_DECLARE(perf_event_open) ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); return sfd->fd; } +#endif /* ENABLE_PERF */ SYSCALL_DECLARE(rt_sigtimedwait) { @@ -4159,7 +4462,7 @@ SYSCALL_DECLARE(rt_sigtimedwait) list_del(&pending->list); thread->sigmask.__val[0] = bset; mcs_rwlock_writer_unlock(lock, &mcs_rw_node); - do_signal(-EINTR, NULL, thread, pending, 0); + do_signal(-EINTR, NULL, thread, pending, -1); return -EINTR; } mcs_rwlock_writer_unlock(lock, &mcs_rw_node); @@ -4272,7 +4575,7 @@ do_sigsuspend(struct thread *thread, const sigset_t *set) list_del(&pending->list); mcs_rwlock_writer_unlock(lock, &mcs_rw_node); thread->sigmask.__val[0] = bset; - do_signal(-EINTR, NULL, thread, pending, 0); + do_signal(-EINTR, NULL, thread, pending, -1); break; } return -EINTR; @@ -4749,6 +5052,7 @@ int shmobj_list_lookup(int shmid, struct shmobj **objp) return -EIDRM; } + memobj_ref(&obj->memobj); *objp = obj; return 0; } /* shmobj_list_lookup() */ @@ -4758,7 +5062,8 @@ int shmobj_list_lookup_by_key(key_t key, struct shmobj **objp) struct shmobj *obj; list_for_each_entry(obj, &kds_list, chain) { - if (obj->ds.shm_perm.key == key) { + if (obj->ds.shm_perm.key == key && + !(obj->ds.shm_perm.mode & SHM_DEST)) { break; } } @@ -4766,6 +5071,7 @@ int shmobj_list_lookup_by_key(key_t key, struct shmobj **objp) return -EINVAL; } + memobj_ref(&obj->memobj); *objp = obj; return 0; } /* shmobj_list_lookup_by_key() */ @@ -4783,6 +5089,7 @@ int shmobj_list_lookup_by_index(int index, struct shmobj **objp) return -EINVAL; } + memobj_ref(&obj->memobj); *objp = obj; return 0; } /* shmobj_list_lookup_by_index() */ @@ -4824,6 +5131,7 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) } if (obj && (shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): -EEXIST\n", key, size, shmflg); return -EEXIST; } @@ -4850,12 +5158,14 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) } if (req & ~obj->ds.shm_perm.mode) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); return -EACCES; } } if (obj->ds.shm_segsz < size) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); return -EINVAL; } @@ -4902,7 +5212,6 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) shmid = make_shmid(obj); shmobj_list_unlock(); - memobj_release(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): %d\n", key, size, shmflg, shmid); return shmid; @@ -4918,8 +5227,7 @@ SYSCALL_DECLARE(shmat) struct process_vm *vm = thread->vm; size_t len; int error; - struct vm_regions *region = &vm->region; - intptr_t addr; + uintptr_t addr; int prot; int vrflags; int req; @@ -4939,6 +5247,7 @@ SYSCALL_DECLARE(shmat) pgsize = (size_t)1 << obj->pgshift; if (shmaddr && ((uintptr_t)shmaddr & (pgsize - 1)) && !(shmflg & SHM_RND)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg); return -EINVAL; } @@ -4968,6 +5277,7 @@ SYSCALL_DECLARE(shmat) } if (~obj->ds.shm_perm.mode & req) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg); return -EACCES; } @@ -4978,24 +5288,20 @@ SYSCALL_DECLARE(shmat) if (lookup_process_memory_range(vm, addr, addr+len)) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x):lookup_process_memory_range succeeded. -ENOMEM\n", shmid, shmaddr, shmflg); return -ENOMEM; } } else { -#ifdef POSTK_DEBUG_ARCH_DEP_27 - error = search_free_space(cpu_local_var(current), len, - region->map_end, obj->pgshift, &addr); -#else - error = search_free_space(len, region->map_end, obj->pgshift, &addr); -#endif /* POSTK_DEBUG_ARCH_DEP_27 */ + error = search_free_space(len, obj->pgshift, &addr); if (error) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x):search_free_space failed. %d\n", shmid, shmaddr, shmflg, error); return error; } - region->map_end = addr + len; } vrflags = VR_NONE; @@ -5004,24 +5310,23 @@ SYSCALL_DECLARE(shmat) vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); if (!(prot & PROT_WRITE)) { - error = set_host_vma(addr, len, PROT_READ); + error = set_host_vma(addr, len, PROT_READ | PROT_EXEC, 1/* holding memory_range_lock */); if (error) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x):set_host_vma failed. %d\n", shmid, shmaddr, shmflg, error); return error; } } - memobj_ref(&obj->memobj); - error = add_process_memory_range(vm, addr, addr+len, -1, vrflags, &obj->memobj, 0, obj->pgshift, NULL); if (error) { if (!(prot & PROT_WRITE)) { - (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); + (void)set_host_vma(addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, 1/* holding memory_range_lock */); } - memobj_release(&obj->memobj); + memobj_unref(&obj->memobj); ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat(%#x,%p,%#x):add_process_memory_range failed. %d\n", shmid, shmaddr, shmflg, error); @@ -5031,7 +5336,6 @@ SYSCALL_DECLARE(shmat) ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); - dkprintf("shmat:bump shm_nattach %p %d\n", obj, obj->ds.shm_nattch); dkprintf("shmat(%#x,%p,%#x): 0x%lx. %d\n", shmid, shmaddr, shmflg, addr); return addr; } /* sys_shmat() */ @@ -5053,10 +5357,11 @@ SYSCALL_DECLARE(shmctl) size_t size; struct shmlock_user *user; uid_t ruid = proc->ruid; + uint16_t oldmode; dkprintf("shmctl(%#x,%d,%p)\n", shmid, cmd, buf); - if (0) ; - else if (cmd == IPC_RMID) { + switch (cmd) { + case IPC_RMID: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5068,19 +5373,21 @@ SYSCALL_DECLARE(shmctl) && (obj->ds.shm_perm.uid != proc->euid) && (obj->ds.shm_perm.cuid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf); return -EPERM; } + oldmode = obj->ds.shm_perm.mode; obj->ds.shm_perm.mode |= SHM_DEST; - if (obj->ds.shm_nattch <= 0) { - shmobj_destroy(obj); - } shmobj_list_unlock(); + // unref twice if this is the first time rmid is called + if (!(oldmode & SHM_DEST)) + memobj_unref(&obj->memobj); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == IPC_SET) { + case IPC_SET: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5091,12 +5398,14 @@ SYSCALL_DECLARE(shmctl) if ((obj->ds.shm_perm.uid != proc->euid) && (obj->ds.shm_perm.cuid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf); return -EPERM; } error = copy_from_user(&ads, buf, sizeof(ads)); if (error) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); return error; } @@ -5107,48 +5416,66 @@ SYSCALL_DECLARE(shmctl) obj->ds.shm_ctime = now; shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == IPC_STAT) { + case IPC_STAT: + case SHM_STAT: shmobj_list_lock(); - error = shmobj_list_lookup(shmid, &obj); + if (cmd == IPC_STAT) { + error = shmobj_list_lookup(shmid, &obj); + } else { // SHM_STAT + error = shmobj_list_lookup_by_index(shmid, &obj); + } if (error) { shmobj_list_unlock(); dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); return error; } - if (!proc->euid) { - req = 0; + + if (cmd == IPC_STAT) { + if (!proc->euid) { + req = 0; + } else if ((proc->euid == obj->ds.shm_perm.uid) || + (proc->euid == obj->ds.shm_perm.cuid)) { + req = 0400; + } else if ((proc->egid == obj->ds.shm_perm.gid) || + (proc->egid == obj->ds.shm_perm.cgid)) { + req = 0040; + } else { + req = 0004; + } + if (req & ~obj->ds.shm_perm.mode) { + shmobj_list_unlock(); + memobj_unref(&obj->memobj); + dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid, + cmd, buf); + return -EACCES; + } } - else if ((proc->euid == obj->ds.shm_perm.uid) - || (proc->euid == obj->ds.shm_perm.cuid)) { - req = 0400; - } - else if ((proc->egid == obj->ds.shm_perm.gid) - || (proc->egid == obj->ds.shm_perm.cgid)) { - req = 0040; - } - else { - req = 0004; - } - if (req & ~obj->ds.shm_perm.mode) { - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid, cmd, buf); - return -EACCES; + + /* This could potentially be higher than required if some other + * thread holds a ref at this point. + * Minus one here is because we hold a ref... + */ + obj->ds.shm_nattch = ihk_atomic_read(&obj->memobj.refcnt) - 1; + /* ... And one for sentinel unless RMID has been called */ + if (!(obj->ds.shm_perm.mode & SHM_DEST)) { + obj->ds.shm_nattch--; } + error = copy_to_user(buf, &obj->ds, sizeof(*buf)); if (error) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); return error; } - shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == IPC_INFO) { + case IPC_INFO: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5159,6 +5486,7 @@ SYSCALL_DECLARE(shmctl) error = copy_to_user(buf, &the_shminfo, sizeof(the_shminfo)); if (error) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); return error; } @@ -5168,10 +5496,10 @@ SYSCALL_DECLARE(shmctl) maxi = 0; } shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi); return maxi; - } - else if (cmd == SHM_LOCK) { + case SHM_LOCK: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5183,12 +5511,14 @@ SYSCALL_DECLARE(shmctl) && (obj->ds.shm_perm.cuid != proc->euid) && (obj->ds.shm_perm.uid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): perm shm: %d\n", shmid, cmd, buf, error); return -EPERM; } rlim = &proc->rlimit[MCK_RLIMIT_MEMLOCK]; if (!rlim->rlim_cur && !has_cap_ipc_lock(thread)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): perm proc: %d\n", shmid, cmd, buf, error); return -EPERM; } @@ -5199,6 +5529,7 @@ SYSCALL_DECLARE(shmctl) error = shmlock_user_get(ruid, &user); if (error) { shmlock_users_unlock(); + memobj_unref(&obj->memobj); shmobj_list_unlock(); ekprintf("shmctl(%#x,%d,%p): user lookup: %d\n", shmid, cmd, buf, error); return -ENOMEM; @@ -5209,6 +5540,7 @@ SYSCALL_DECLARE(shmctl) && ((rlim->rlim_cur < user->locked) || ((rlim->rlim_cur - user->locked) < size))) { shmlock_users_unlock(); + memobj_unref(&obj->memobj); shmobj_list_unlock(); dkprintf("shmctl(%#x,%d,%p): too large: %d\n", shmid, cmd, buf, error); return -ENOMEM; @@ -5219,11 +5551,11 @@ SYSCALL_DECLARE(shmctl) shmlock_users_unlock(); } shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == SHM_UNLOCK) { + case SHM_UNLOCK: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5235,6 +5567,7 @@ SYSCALL_DECLARE(shmctl) && (obj->ds.shm_perm.cuid != proc->euid) && (obj->ds.shm_perm.uid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): perm shm: %d\n", shmid, cmd, buf, error); return -EPERM; } @@ -5253,28 +5586,10 @@ SYSCALL_DECLARE(shmctl) obj->ds.shm_perm.mode &= ~SHM_LOCKED; } shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == SHM_STAT) { - shmobj_list_lock(); - error = shmobj_list_lookup_by_index(shmid, &obj); - if (error) { - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); - return error; - } - error = copy_to_user(buf, &obj->ds, sizeof(*buf)); - if (error) { - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); - return error; - } - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); - return 0; - } - else if (cmd == SHM_INFO) { + case SHM_INFO: shmobj_list_lock(); error = copy_to_user(buf, &the_shm_info, sizeof(the_shm_info)); if (error) { @@ -5290,10 +5605,10 @@ SYSCALL_DECLARE(shmctl) shmobj_list_unlock(); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi); return maxi; + default: + dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf); + return -EINVAL; } - - dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf); - return -EINVAL; } /* sys_shmctl() */ SYSCALL_DECLARE(shmdt) @@ -5314,7 +5629,7 @@ SYSCALL_DECLARE(shmdt) return -EINVAL; } - error = do_munmap((void *)range->start, (range->end - range->start)); + error = do_munmap((void *)range->start, (range->end - range->start), 1/* holding memory_range_lock */); if (error) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); dkprintf("shmdt(%p): %d\n", shmaddr, error); @@ -5326,8 +5641,16 @@ SYSCALL_DECLARE(shmdt) return 0; } /* sys_shmdt() */ -SYSCALL_DECLARE(futex) +long do_futex(int n, unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, + unsigned long _uti_clv, + void *uti_futex_resp, + void *_linux_wait_event, + void *_linux_printk, + void *_linux_clock_gettime) { + struct cpu_local_var *uti_clv = (struct cpu_local_var *)_uti_clv; uint64_t timeout = 0; // No timeout uint32_t val2 = 0; // Only one clock is used, ignore FUTEX_CLOCK_REALTIME @@ -5335,24 +5658,44 @@ SYSCALL_DECLARE(futex) int fshared = 1; int ret = 0; - uint32_t *uaddr = (uint32_t *)ihk_mc_syscall_arg0(ctx); - int op = (int)ihk_mc_syscall_arg1(ctx); - uint32_t val = (uint32_t)ihk_mc_syscall_arg2(ctx); - struct timespec *utime = (struct timespec*)ihk_mc_syscall_arg3(ctx); - uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx); - uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx); + uint32_t *uaddr = (uint32_t *)arg0; + int op = (int)arg1; + uint32_t val = (uint32_t)arg2; + struct timespec *utime = (struct timespec*)arg3; + uint32_t *uaddr2 = (uint32_t *)arg4; + uint32_t val3 = (uint32_t)arg5; int flags = op; - struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); - monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; - + + /* TODO: replace these with passing via struct smp_boot_param */ + if (_linux_printk && !linux_printk) { + linux_printk = (int (*)(const char *fmt, ...))_linux_printk; + } + if (_linux_wait_event && !linux_wait_event) { + linux_wait_event = (long (*)(void *_resp, unsigned long nsec_timeout))_linux_wait_event; + } + if (_linux_clock_gettime && !linux_clock_gettime) { + linux_clock_gettime = (int (*)(clockid_t clk_id, struct timespec *tp))_linux_clock_gettime; + } + + /* Fill in clv */ + if (uti_clv) { + uti_clv->uti_futex_resp = uti_futex_resp; + } + + /* monitor is per-cpu object */ + if (!uti_clv) { + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); + monitor->status = IHK_OS_MONITOR_KERNEL_HEAVY; + } + /* Cross-address space futex? */ if (op & FUTEX_PRIVATE_FLAG) { fshared = 0; } op = (op & FUTEX_CMD_MASK); - dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d\n", + uti_dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d\n", flags, (op == FUTEX_WAIT) ? "FUTEX_WAIT" : (op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" : @@ -5363,8 +5706,13 @@ SYSCALL_DECLARE(futex) (op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown", (unsigned long)uaddr, val, utime, uaddr2, val3, *uaddr, fshared); + if ((op == FUTEX_WAIT || op == FUTEX_WAIT_BITSET) && utime) { + uti_dkprintf("%s: utime=%ld.%09ld\n", __FUNCTION__, utime->tv_sec, utime->tv_nsec); + } if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) { unsigned long nsec_timeout; + if (!uti_clv) { + /* Use cycles for non-UTI case */ /* As per the Linux implementation FUTEX_WAIT specifies the duration of * the timeout, while FUTEX_WAIT_BITSET specifies the absolute timestamp */ @@ -5390,7 +5738,8 @@ SYSCALL_DECLARE(futex) request.args[1] = (flags & FUTEX_CLOCK_REALTIME)? CLOCK_REALTIME: CLOCK_MONOTONIC; - int r = do_syscall(&request, ihk_mc_get_processor_id(), 0); + int r = do_syscall(&request, + ihk_mc_get_processor_id()); if (r < 0) { return -EFAULT; @@ -5410,19 +5759,35 @@ SYSCALL_DECLARE(futex) else { nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec); } - timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc(); - dkprintf("futex timeout: %lu\n", timeout); + + } + else{ + if (op == FUTEX_WAIT_BITSET) { /* User passed absolute time */ + struct timespec ats; + ret = (*linux_clock_gettime)((flags & FUTEX_CLOCK_REALTIME) ? CLOCK_REALTIME: CLOCK_MONOTONIC, &ats); + if (ret) { + return ret; + } + uti_dkprintf("%s: ats=%ld.%09ld\n", __FUNCTION__, ats.tv_sec, ats.tv_nsec); + /* Use nsec for UTI case */ + timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec) - + (ats.tv_sec * NS_PER_SEC + ats.tv_nsec); + } else { /* User passed relative time */ + /* Use nsec for UTI case */ + timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec); + } + } } /* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE. * number of waiters to wake in 'utime' if op == FUTEX_WAKE_OP. */ if (op == FUTEX_CMP_REQUEUE || op == FUTEX_WAKE_OP) - val2 = (uint32_t) (unsigned long) ihk_mc_syscall_arg3(ctx); + val2 = (uint32_t) (unsigned long) arg3; - ret = futex(uaddr, op, val, timeout, uaddr2, val2, val3, fshared); + ret = futex(uaddr, op, val, timeout, uaddr2, val2, val3, fshared, uti_clv); - dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d, ret: %d\n", + uti_dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d, ret: %d\n", op, (op == FUTEX_WAIT) ? "FUTEX_WAIT" : (op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" : @@ -5436,6 +5801,14 @@ SYSCALL_DECLARE(futex) return ret; } +SYSCALL_DECLARE(futex) +{ + return do_futex(n, ihk_mc_syscall_arg0(ctx), ihk_mc_syscall_arg1(ctx), + ihk_mc_syscall_arg2(ctx), ihk_mc_syscall_arg3(ctx), + ihk_mc_syscall_arg4(ctx), ihk_mc_syscall_arg5(ctx), + 0UL, NULL, NULL, NULL, NULL); +} + static void do_exit(int code) { @@ -5446,25 +5819,10 @@ do_exit(int code) int nproc; int exit_status = (code >> 8) & 255; int sig = code & 255; + struct timespec ats; dkprintf("sys_exit,pid=%d\n", proc->pid); - mcs_rwlock_reader_lock(&proc->threads_lock, &lock); - nproc = 0; - list_for_each_entry(child, &proc->threads_list, siblings_list){ - nproc++; - } - mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); - - if(nproc == 1){ // process has only one thread - terminate(exit_status, sig); - return; - } - -#ifdef DCFA_KMOD - do_mod_exit((int)ihk_mc_syscall_arg0(ctx)); -#endif - /* XXX: for if all threads issued the exit(2) rather than exit_group(2), * exit(2) also should delegate. */ @@ -5477,17 +5835,43 @@ do_exit(int code) setint_user((int*)thread->clear_child_tid, 0); barrier(); futex((uint32_t *)thread->clear_child_tid, - FUTEX_WAKE, 1, 0, NULL, 0, 0, 1); + FUTEX_WAKE, 1, 0, NULL, 0, 0, 1, NULL); + thread->clear_child_tid = NULL; } - mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + mcs_rwlock_reader_lock(&proc->threads_lock, &lock); + nproc = 0; + list_for_each_entry(child, &proc->threads_list, siblings_list) { + if (child->status != PS_EXITED && + child->status != PS_ZOMBIE) + nproc++; + } + + if (nproc == 1) { // process has only one thread + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + terminate(exit_status, sig); + return; + } + +#ifdef DCFA_KMOD + do_mod_exit((int)ihk_mc_syscall_arg0(ctx)); +#endif + if(proc->status == PS_EXITED){ mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); terminate(exit_status, 0); return; } preempt_disable(); + thread->exit_status = code; thread->status = PS_EXITED; + tsc_to_ts(thread->user_tsc, &ats); + ts_add(&proc->utime, &ats); + tsc_to_ts(thread->system_tsc, &ats); + ts_add(&proc->stime, &ats); + thread->user_tsc = 0; + thread->system_tsc = 0; + thread_exit_signal(thread); sync_child_event(thread->proc->monitoring_event); mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); release_thread(thread); @@ -5500,7 +5884,7 @@ do_exit(int code) SYSCALL_DECLARE(exit) { - int exit_status = (int)ihk_mc_syscall_arg0(ctx); + int exit_status = ((int)ihk_mc_syscall_arg0(ctx)) & 255; do_exit(exit_status << 8); return 0; @@ -5564,16 +5948,16 @@ SYSCALL_DECLARE(setrlimit) struct rlimit *rlm = (struct rlimit *)ihk_mc_syscall_arg1(ctx); struct thread *thread = cpu_local_var(current); int i; -#ifdef POSTK_DEBUG_TEMP_FIX_3 /* If rlim_cur is greater than rlim_max, return -EINVAL (S64FX_19) */ struct rlimit new_rlim; - - if (copy_from_user(&new_rlim, rlm, sizeof(*rlm))) - return -EFAULT; - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; -#endif /* POSTK_DEBUG_TEMP_FIX_3 */ int mcresource; + if (copy_from_user(&new_rlim, rlm, sizeof(*rlm))) { + return -EFAULT; + } + if (new_rlim.rlim_cur > new_rlim.rlim_max) { + return -EINVAL; + } + switch(resource){ case RLIMIT_FSIZE: case RLIMIT_NOFILE: @@ -5594,12 +5978,8 @@ SYSCALL_DECLARE(setrlimit) return syscall_generic_forwarding(__NR_setrlimit, ctx); } -#ifdef POSTK_DEBUG_TEMP_FIX_3 /* If rlim_cur is greater than rlim_max, return -EINVAL (S64FX_19) */ - memcpy(thread->proc->rlimit + mcresource, &new_rlim, sizeof(new_rlim)); -#else /* POSTK_DEBUG_TEMP_FIX_3 */ - if(copy_from_user(thread->proc->rlimit + mcresource, rlm, sizeof(struct rlimit))) - return -EFAULT; -#endif /* POSTK_DEBUG_TEMP_FIX_3 */ + memcpy(thread->proc->rlimit + mcresource, &new_rlim, + sizeof(new_rlim)); return 0; } @@ -5707,7 +6087,6 @@ SYSCALL_DECLARE(getrusage) } extern int ptrace_traceme(void); -extern void clear_single_step(struct thread *thread); extern void set_single_step(struct thread *thread); static int ptrace_wakeup_sig(int pid, long request, long data) { @@ -5718,13 +6097,11 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { struct mcs_rwlock_node_irqsave lock; struct thread *thread = cpu_local_var(current); - child = find_thread(pid, pid, &lock); + child = find_thread(pid, pid); if (!child) { error = -ESRCH; goto out; } - hold_thread(child); - thread_unlock(child, &lock); if (data > 64 || data < 0) { error = -EINVAL; @@ -5747,9 +6124,9 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { set_single_step(child); } mcs_rwlock_writer_lock(&child->proc->update_lock, &lock); - child->proc->ptrace &= ~PT_TRACE_SYSCALL; + child->ptrace &= ~PT_TRACE_SYSCALL; if (request == PTRACE_SYSCALL) { - child->proc->ptrace |= PT_TRACE_SYSCALL; + child->ptrace |= PT_TRACE_SYSCALL; } mcs_rwlock_writer_unlock(&child->proc->update_lock, &lock); if(data != 0 && data != SIGSTOP) { @@ -5785,7 +6162,7 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { sched_wakeup_thread(child, PS_TRACED | PS_STOPPED); out: if(child) - release_thread(child); + thread_unlock(child); return error; } @@ -5796,17 +6173,16 @@ static long ptrace_pokeuser(int pid, long addr, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; if(addr > sizeof(struct user) - 8 || addr < 0) return -EFAULT; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ rc = ptrace_write_user(child, addr, (unsigned long)data); } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5815,12 +6191,11 @@ static long ptrace_peekuser(int pid, long addr, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; unsigned long *p = (unsigned long *)data; if(addr > sizeof(struct user) - 8|| addr < 0) return -EFAULT; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -5830,7 +6205,7 @@ static long ptrace_peekuser(int pid, long addr, long data) rc = copy_to_user(p, (char *)&value, sizeof(value)); } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5840,9 +6215,8 @@ static long ptrace_getregs(int pid, long data) struct user_regs_struct *regs = (struct user_regs_struct *)data; long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -5860,7 +6234,7 @@ static long ptrace_getregs(int pid, long data) rc = copy_to_user(regs, &user_regs, sizeof(struct user_regs_struct)); } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5870,9 +6244,8 @@ static long ptrace_setregs(int pid, long data) struct user_regs_struct *regs = (struct user_regs_struct *)data; long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -5891,7 +6264,7 @@ static long ptrace_setregs(int pid, long data) } } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5903,15 +6276,14 @@ static long ptrace_getfpregs(int pid, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ rc = ptrace_read_fpregs(child, (void *)data); } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5920,15 +6292,14 @@ static long ptrace_setfpregs(int pid, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ rc = ptrace_write_fpregs(child, (void *)data); } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5940,9 +6311,8 @@ static long ptrace_getregset(int pid, long type, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -5957,7 +6327,7 @@ static long ptrace_getregset(int pid, long type, long data) &iov.iov_len, sizeof(iov.iov_len)); } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5966,9 +6336,8 @@ static long ptrace_setregset(int pid, long type, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -5983,7 +6352,7 @@ static long ptrace_setregset(int pid, long type, long data) &iov.iov_len, sizeof(iov.iov_len)); } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -5992,10 +6361,9 @@ static long ptrace_peektext(int pid, long addr, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; unsigned long *p = (unsigned long *)data; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -6007,7 +6375,7 @@ static long ptrace_peektext(int pid, long addr, long data) rc = copy_to_user(p, &value, sizeof(value)); } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -6016,9 +6384,8 @@ static long ptrace_poketext(int pid, long addr, long data) { long rc = -EIO; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) return -ESRCH; if(child->status & (PS_STOPPED | PS_TRACED)){ @@ -6027,7 +6394,7 @@ static long ptrace_poketext(int pid, long addr, long data) dkprintf("ptrace_poketext: bad address 0x%llx\n", addr); } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -6036,7 +6403,6 @@ static int ptrace_setoptions(int pid, int flags) { int ret; struct thread *child; - struct mcs_rwlock_node_irqsave lock; /* Only supported options are enabled. * Following options are pretended to be supported for the time being: @@ -6058,19 +6424,19 @@ static int ptrace_setoptions(int pid, int flags) goto out; } - child = find_thread(pid, pid, &lock); - if (!child || !child->proc || !(child->proc->ptrace & PT_TRACED)) { + child = find_thread(0, pid); + if (!child || !child->proc || !(child->ptrace & PT_TRACED)) { ret = -ESRCH; goto unlockout; } - child->proc->ptrace &= ~PTRACE_O_MASK; /* PT_TRACE_EXEC remains */ - child->proc->ptrace |= flags; + child->ptrace &= ~PTRACE_O_MASK; /* PT_TRACE_EXEC remains */ + child->ptrace |= flags; ret = 0; unlockout: if(child) - thread_unlock(child, &lock); + thread_unlock(child); out: return ret; } @@ -6081,78 +6447,37 @@ static int ptrace_attach(int pid) struct thread *thread; struct thread *mythread = cpu_local_var(current); struct process *proc = mythread->proc; - struct process *child; - struct process *parent; - struct mcs_rwlock_node_irqsave lock; - struct mcs_rwlock_node childlock; - struct mcs_rwlock_node updatelock; struct siginfo info; - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { error = -ESRCH; goto out; } if (proc->pid == pid) { - thread_unlock(thread, &lock); + thread_unlock(thread); error = -EPERM; goto out; } - child = thread->proc; - dkprintf("ptrace_attach(): pid requested:%d, thread->tid:%d, thread->proc->pid=%d, thread->proc->parent=%p\n", pid, thread->tid, thread->proc->pid, thread->proc->parent); - - mcs_rwlock_writer_lock_noirq(&child->update_lock, &updatelock); - - /* Only for the first thread of a process XXX: fix this */ - if (thread->tid == child->pid) { - if (thread->proc->ptrace & PT_TRACED) { - mcs_rwlock_writer_unlock_noirq(&child->update_lock, &updatelock); - thread_unlock(thread, &lock); - dkprintf("ptrace_attach: -EPERM\n"); - error = -EPERM; - goto out; - } + if ((thread->ptrace & PT_TRACED) || + thread->proc == proc) { + thread_unlock(thread); + error = -EPERM; + goto out; } - parent = child->parent; - dkprintf("ptrace_attach() parent->pid=%d\n", parent->pid); + thread->ptrace = PT_TRACED | PT_TRACE_EXEC; + error = ptrace_attach_thread(thread, proc); - mcs_rwlock_writer_lock_noirq(&parent->children_lock, &childlock); - list_del(&child->siblings_list); - list_add_tail(&child->ptraced_siblings_list, &parent->ptraced_children_list); - mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &childlock); - - mcs_rwlock_writer_lock_noirq(&proc->children_lock, &childlock); - list_add_tail(&child->siblings_list, &proc->children_list); - child->parent = proc; - mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &childlock); - - child->ptrace = PT_TRACED | PT_TRACE_EXEC; - - mcs_rwlock_writer_unlock_noirq(&thread->proc->update_lock, &updatelock); - - if (thread->ptrace_debugreg == NULL) { - error = alloc_debugreg(thread); - if (error < 0) { - thread_unlock(thread, &lock); - goto out; - } - } - - clear_single_step(thread); - - thread_unlock(thread, &lock); + thread_unlock(thread); memset(&info, '\0', sizeof info); info.si_signo = SIGSTOP; info.si_code = SI_USER; info._sifields._kill.si_pid = proc->pid; error = do_kill(mythread, -1, pid, SIGSTOP, &info, 2); - if (error < 0) { - goto out; - } out: dkprintf("ptrace_attach,returning,error=%d\n", error); @@ -6166,67 +6491,26 @@ int ptrace_detach(int pid, int data) struct thread *thread; struct thread *mythread = cpu_local_var(current); struct process *proc = mythread->proc;; - struct process *child; - struct process *parent; - struct mcs_rwlock_node_irqsave lock; - struct mcs_rwlock_node childlock; - struct mcs_rwlock_node updatelock; - struct siginfo info; if (data > 64 || data < 0) { return -EIO; } - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { error = -ESRCH; goto out; } - child = thread->proc; - mcs_rwlock_writer_lock_noirq(&child->update_lock, &updatelock); - parent = child->ppid_parent; - if (!(child->ptrace & PT_TRACED) || child->parent != proc) { - mcs_rwlock_writer_unlock_noirq(&child->update_lock, &updatelock); - thread_unlock(thread, &lock); + if (!(thread->ptrace & PT_TRACED) || thread->report_proc != proc) { + thread_unlock(thread); error = -ESRCH; goto out; } - mcs_rwlock_writer_unlock_noirq(&child->update_lock, &updatelock); - mcs_rwlock_writer_lock_noirq(&proc->children_lock, &childlock); - list_del(&child->siblings_list); - mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &childlock); + ptrace_detach_thread(thread, data); - mcs_rwlock_writer_lock_noirq(&parent->children_lock, &childlock); - list_del(&child->ptraced_siblings_list); - list_add_tail(&child->siblings_list, &parent->children_list); - child->parent = parent; - mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &childlock); - - child->ptrace = 0; - - if (thread->ptrace_debugreg) { - kfree(thread->ptrace_debugreg); - thread->ptrace_debugreg = NULL; - } - - clear_single_step(thread); - - thread_unlock(thread, &lock); - - if (data != 0) { - memset(&info, '\0', sizeof info); - info.si_signo = data; - info.si_code = SI_USER; - info._sifields._kill.si_pid = proc->pid; - error = do_kill(mythread, pid, -1, data, &info, 1); - if (error < 0) { - goto out; - } - } - - sched_wakeup_thread(thread, PS_TRACED | PS_STOPPED); + thread_unlock(thread); out: return error; } @@ -6236,20 +6520,21 @@ static long ptrace_geteventmsg(int pid, long data) unsigned long *msg_p = (unsigned long *)data; long rc = -ESRCH; struct thread *child; - struct mcs_rwlock_node_irqsave lock; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) { return -ESRCH; } if(child->status & (PS_STOPPED | PS_TRACED)){ - if (copy_to_user(msg_p, &child->proc->ptrace_eventmsg, sizeof(*msg_p))) { + if (copy_to_user(msg_p, &child->ptrace_eventmsg, + sizeof(*msg_p))) { rc = -EFAULT; - } else { + } + else { rc = 0; } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -6258,10 +6543,9 @@ static long ptrace_getsiginfo(int pid, siginfo_t *data) { struct thread *child; - struct mcs_rwlock_node_irqsave lock; int rc = 0; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) { return -ESRCH; } @@ -6277,7 +6561,7 @@ ptrace_getsiginfo(int pid, siginfo_t *data) else { rc = -ESRCH; } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -6285,10 +6569,9 @@ static long ptrace_setsiginfo(int pid, siginfo_t *data) { struct thread *child; - struct mcs_rwlock_node_irqsave lock; int rc = 0; - child = find_thread(pid, pid, &lock); + child = find_thread(0, pid); if (!child) { return -ESRCH; } @@ -6315,7 +6598,7 @@ ptrace_setsiginfo(int pid, siginfo_t *data) } } } - thread_unlock(child, &lock); + thread_unlock(child); return rc; } @@ -6460,7 +6743,6 @@ SYSCALL_DECLARE(sched_setparam) struct sched_param *uparam = (struct sched_param *)ihk_mc_syscall_arg1(ctx); struct sched_param param; struct thread *thread = cpu_local_var(current); - struct mcs_rwlock_node_irqsave lock; struct syscall_request request1 IHK_DMA_ALIGN; int other_thread = 0; @@ -6475,18 +6757,18 @@ SYSCALL_DECLARE(sched_setparam) if (thread->proc->pid != pid) { other_thread = 1; - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { return -ESRCH; } - thread_unlock(thread, &lock); + thread_unlock(thread); /* Ask Linux about ownership.. */ request1.number = __NR_sched_setparam; request1.args[0] = SCHED_CHECK_SAME_OWNER; request1.args[1] = pid; - retval = do_syscall(&request1, ihk_mc_get_processor_id(), 0); + retval = do_syscall(&request1, ihk_mc_get_processor_id()); if (retval != 0) { return retval; } @@ -6498,14 +6780,14 @@ SYSCALL_DECLARE(sched_setparam) } if (other_thread) { - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { return -ESRCH; } } retval = setscheduler(thread, thread->sched_policy, ¶m); if (other_thread) { - thread_unlock(thread, &lock); + thread_unlock(thread); } return retval; } @@ -6516,7 +6798,6 @@ SYSCALL_DECLARE(sched_getparam) int pid = (int)ihk_mc_syscall_arg0(ctx); struct sched_param *param = (struct sched_param *)ihk_mc_syscall_arg1(ctx); struct thread *thread = cpu_local_var(current); - struct mcs_rwlock_node_irqsave lock; if (!param || pid < 0) { return -EINVAL; @@ -6526,11 +6807,11 @@ SYSCALL_DECLARE(sched_getparam) pid = thread->proc->pid; if (thread->proc->pid != pid) { - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { return -ESRCH; } - thread_unlock(thread, &lock); + thread_unlock(thread); } retval = copy_to_user(param, &thread->sched_param, sizeof(*param)) ? -EFAULT : 0; @@ -6546,7 +6827,6 @@ SYSCALL_DECLARE(sched_setscheduler) struct sched_param *uparam = (struct sched_param *)ihk_mc_syscall_arg2(ctx); struct sched_param param; struct thread *thread = cpu_local_var(current); - struct mcs_rwlock_node_irqsave lock; struct syscall_request request1 IHK_DMA_ALIGN; @@ -6567,7 +6847,7 @@ SYSCALL_DECLARE(sched_setscheduler) request1.number = __NR_sched_setparam; request1.args[0] = SCHED_CHECK_ROOT; - retval = do_syscall(&request1, ihk_mc_get_processor_id(), 0); + retval = do_syscall(&request1, ihk_mc_get_processor_id()); if (retval != 0) { return retval; } @@ -6582,18 +6862,18 @@ SYSCALL_DECLARE(sched_setscheduler) pid = thread->proc->pid; if (thread->proc->pid != pid) { - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { return -ESRCH; } - thread_unlock(thread, &lock); + thread_unlock(thread); /* Ask Linux about ownership.. */ request1.number = __NR_sched_setparam; request1.args[0] = SCHED_CHECK_SAME_OWNER; request1.args[1] = pid; - retval = do_syscall(&request1, ihk_mc_get_processor_id(), 0); + retval = do_syscall(&request1, ihk_mc_get_processor_id()); if (retval != 0) { return retval; } @@ -6606,7 +6886,6 @@ SYSCALL_DECLARE(sched_getscheduler) { int pid = (int)ihk_mc_syscall_arg0(ctx); struct thread *thread = cpu_local_var(current); - struct mcs_rwlock_node_irqsave lock; if (pid < 0) { return -EINVAL; @@ -6616,11 +6895,11 @@ SYSCALL_DECLARE(sched_getscheduler) pid = thread->proc->pid; if (thread->proc->pid != pid) { - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { return -ESRCH; } - thread_unlock(thread, &lock); + thread_unlock(thread); } return thread->sched_policy; @@ -6671,7 +6950,6 @@ SYSCALL_DECLARE(sched_rr_get_interval) struct timespec *utime = (struct timespec *)ihk_mc_syscall_arg1(ctx); struct timespec t; struct thread *thread = cpu_local_var(current); - struct mcs_rwlock_node_irqsave lock; int retval = 0; if (pid < 0) @@ -6681,11 +6959,11 @@ SYSCALL_DECLARE(sched_rr_get_interval) pid = thread->proc->pid; if (thread->proc->pid != pid) { - thread = find_thread(pid, pid, &lock); + thread = find_thread(0, pid); if (!thread) { return -ESRCH; } - thread_unlock(thread, &lock); + thread_unlock(thread); } t.tv_sec = 0; @@ -6734,10 +7012,9 @@ SYSCALL_DECLARE(sched_setaffinity) hold_thread(thread); } else { - struct mcs_rwlock_node_irqsave lock; struct thread *mythread = cpu_local_var(current); - thread = find_thread(0, tid, &lock); + thread = find_thread(0, tid); if (!thread) return -ESRCH; @@ -6745,12 +7022,12 @@ SYSCALL_DECLARE(sched_setaffinity) if (mythread->proc->euid != 0 && mythread->proc->euid != thread->proc->ruid && mythread->proc->euid != thread->proc->euid) { - thread_unlock(thread, &lock); + thread_unlock(thread); return -EPERM; } hold_thread(thread); - thread_unlock(thread, &lock); + thread_unlock(thread); cpu_id = thread->cpu_id; } @@ -6812,20 +7089,19 @@ SYSCALL_DECLARE(sched_getaffinity) hold_thread(thread); } else{ - struct mcs_rwlock_node_irqsave lock; struct thread *mythread = cpu_local_var(current); - thread = find_thread(0, tid, &lock); + thread = find_thread(0, tid); if(!thread) return -ESRCH; if(mythread->proc->euid != 0 && mythread->proc->euid != thread->proc->ruid && mythread->proc->euid != thread->proc->euid){ - thread_unlock(thread, &lock); + thread_unlock(thread); return -EPERM; } hold_thread(thread); - thread_unlock(thread, &lock); + thread_unlock(thread); } ret = copy_to_user(u_cpu_set, &thread->cpu_set, len); @@ -6847,7 +7123,7 @@ SYSCALL_DECLARE(get_cpu_id) return ihk_mc_get_processor_id(); } -static void calculate_time_from_tsc(struct timespec *ts) +void calculate_time_from_tsc(struct timespec *ts) { long ver; unsigned long current_tsc; @@ -6908,7 +7184,7 @@ SYSCALL_DECLARE(setitimer) request.args[1] = ihk_mc_syscall_arg1(ctx); request.args[2] = ihk_mc_syscall_arg2(ctx); - return do_syscall(&request, ihk_mc_get_processor_id(), 0); + return do_syscall(&request, ihk_mc_get_processor_id()); } else if(which == ITIMER_VIRTUAL){ if(old){ @@ -6957,7 +7233,7 @@ SYSCALL_DECLARE(setitimer) timer_start = 0; } thread->itimer_enabled = timer_start; - set_timer(); + set_timer(0); return 0; } @@ -6980,7 +7256,7 @@ SYSCALL_DECLARE(getitimer) request.args[0] = ihk_mc_syscall_arg0(ctx); request.args[1] = ihk_mc_syscall_arg1(ctx); - return do_syscall(&request, ihk_mc_get_processor_id(), 0); + return do_syscall(&request, ihk_mc_get_processor_id()); } else if(which == ITIMER_VIRTUAL){ if(old){ @@ -7076,7 +7352,7 @@ SYSCALL_DECLARE(clock_gettime) request.args[0] = ihk_mc_syscall_arg0(ctx); request.args[1] = ihk_mc_syscall_arg1(ctx); - return do_syscall(&request, ihk_mc_get_processor_id(), 0); + return do_syscall(&request, ihk_mc_get_processor_id()); } SYSCALL_DECLARE(gettimeofday) @@ -7111,7 +7387,7 @@ SYSCALL_DECLARE(gettimeofday) request.args[0] = (unsigned long)tv; request.args[1] = (unsigned long)tz; - return do_syscall(&request, ihk_mc_get_processor_id(), 0); + return do_syscall(&request, ihk_mc_get_processor_id()); } SYSCALL_DECLARE(settimeofday) @@ -7231,7 +7507,7 @@ SYSCALL_DECLARE(nanosleep) request.args[0] = (unsigned long)tv; request.args[1] = (unsigned long)rem; - return do_syscall(&request, ihk_mc_get_processor_id(), 0); + return do_syscall(&request, ihk_mc_get_processor_id()); } //#define DISABLE_SCHED_YIELD @@ -7739,6 +8015,14 @@ SYSCALL_DECLARE(mremap) goto out; } + /* check necessity of remap */ + if (!(flags & MREMAP_FIXED) && oldsize == newsize) { + /* Nothing to do */ + error = 0; + newstart = oldaddr; + goto out; + } + if (oldend < oldstart) { error = -EINVAL; ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" @@ -7819,14 +8103,7 @@ SYSCALL_DECLARE(mremap) goto out; } need_relocate = 1; -#ifdef POSTK_DEBUG_ARCH_DEP_27 - error = search_free_space(cpu_local_var(current), newsize, - vm->region.map_end, - range->pgshift, (intptr_t *)&newstart); -#else - error = search_free_space(newsize, vm->region.map_end, - range->pgshift, (intptr_t *)&newstart); -#endif /* POSTK_DEBUG_ARCH_DEP_27 */ + error = search_free_space(newsize, range->pgshift, &newstart); if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" "search failed. %d\n", @@ -7844,7 +8121,7 @@ SYSCALL_DECLARE(mremap) /* do the remap */ if (need_relocate) { if (flags & MREMAP_FIXED) { - error = do_munmap((void *)newstart, newsize); + error = do_munmap((void *)newstart, newsize, 1/* holding memory_range_lock */); if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" "fixed:munmap failed. %d\n", @@ -7866,7 +8143,7 @@ SYSCALL_DECLARE(mremap) oldaddr, oldsize0, newsize0, flags, newaddr, error); if (range->memobj) { - memobj_release(range->memobj); + memobj_unref(range->memobj); } goto out; } @@ -7891,7 +8168,7 @@ SYSCALL_DECLARE(mremap) goto out; } - error = do_munmap((void *)oldstart, oldsize); + error = do_munmap((void *)oldstart, oldsize, 1/* holding memory_range_lock */); if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" "relocate:munmap failed. %d\n", @@ -7902,7 +8179,7 @@ SYSCALL_DECLARE(mremap) } } else if (newsize < oldsize) { - error = do_munmap((void *)newend, (oldend - newend)); + error = do_munmap((void *)newend, (oldend - newend), 1/* holding memory_range_lock */); if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" "shrink:munmap failed. %d\n", @@ -8013,9 +8290,8 @@ SYSCALL_DECLARE(msync) if ((range->flag & VR_PRIVATE) || !range->memobj || !memobj_has_pager(range->memobj)) { - dkprintf("sys_msync(%#lx,%#lx,%#x):" - "unsyncable VMR %d %#lx-%#lx %#lx\n", - start0, len0, flags, error, + dkprintf("sys_msync(%#lx,%#lx,%#x):unsyncable VMR %#lx-%#lx %#lx\n", + start0, len0, flags, range->start, range->end, range->flag); /* nothing to do */ continue; @@ -8948,6 +9224,7 @@ static void do_mod_exit(int status){ } #endif +#ifdef ENABLE_PERF /* select counter type */ SYSCALL_DECLARE(pmc_init) { @@ -8994,62 +9271,120 @@ SYSCALL_DECLARE(pmc_reset) int counter = ihk_mc_syscall_arg0(ctx); return ihk_mc_perfctr_reset(counter); } +#endif /*ENABLE_PERF*/ extern void save_uctx(void *, void *); -int -util_thread(struct uti_attr *arg) +/* TODO: use copy_from_user() */ +int util_show_syscall_profile() { - volatile unsigned long *context; - unsigned long pcontext; + int i; + struct uti_desc *desc = (struct uti_desc *)uti_desc; + + kprintf("Syscall stats for offloaded thread:\n"); + for (i = 0; i < 512; i++) { + if (desc->syscalls[i]) { + kprintf("nr=%d #called=%ld\n", i, desc->syscalls[i]); + } + } + + kprintf("Syscall stats for other threads:\n"); + for (i = 0; i < 512; i++) { + if (desc->syscalls2[i]) { + kprintf("nr=%d #called=%ld\n", i, desc->syscalls2[i]); + } + } + + return 0; +} + +int util_thread(struct uti_attr *arg) +{ + struct uti_ctx *rctx = NULL; + unsigned long rp_rctx; + struct cpu_local_var *uti_clv = NULL; struct syscall_request request IHK_DMA_ALIGN; long rc; struct thread *thread = cpu_local_var(current); - unsigned long free_address; - unsigned long free_size; struct kuti_attr { long parent_cpuid; struct uti_attr attr; } kattr; - context = (volatile unsigned long *)ihk_mc_alloc_pages(1, - IHK_MC_AP_NOWAIT); - if (!context) { - return -ENOMEM; + thread->uti_state = UTI_STATE_PROLOGUE; + + rctx = kmalloc(sizeof(struct uti_ctx), IHK_MC_AP_NOWAIT); + if (!rctx) { + rc = -ENOMEM; + goto out; } - pcontext = virt_to_phys((void *)context); - save_uctx((void *)context, NULL); + rp_rctx = virt_to_phys((void *)rctx); + save_uctx((void *)rctx->ctx, NULL); + + /* Create a copy of clv and replace clv with it when the Linux thread calls in a McKernel function */ + uti_clv = kmalloc(sizeof(struct cpu_local_var), IHK_MC_AP_NOWAIT); + if (!uti_clv) { + rc = -ENOMEM; + goto out; + } + memcpy(uti_clv, get_this_cpu_local_var(), sizeof(struct cpu_local_var)); request.number = __NR_sched_setaffinity; request.args[0] = 0; - request.args[1] = pcontext; + request.args[1] = rp_rctx; request.args[2] = 0; if (arg) { memcpy(&kattr.attr, arg, sizeof(struct uti_attr)); kattr.parent_cpuid = thread->parent_cpuid; request.args[2] = virt_to_phys(&kattr); } - thread->thread_offloaded = 1; - rc = do_syscall(&request, ihk_mc_get_processor_id(), 0); - thread->thread_offloaded = 0; - free_address = context[0]; - free_size = context[1]; - ihk_mc_free_pages((void *)context, 1); + request.args[3] = (unsigned long)uti_clv; + request.args[4] = uti_desc; + thread->uti_state = UTI_STATE_RUNNING_IN_LINUX; + rc = do_syscall(&request, ihk_mc_get_processor_id()); + dkprintf("%s: returned from do_syscall,tid=%d,rc=%lx\n", __FUNCTION__, thread->tid, rc); + + thread->uti_state = UTI_STATE_EPILOGUE; + + util_show_syscall_profile(); + + /* Save it before freed */ + thread->uti_refill_tid = rctx->uti_refill_tid; + dkprintf("%s: mcexec worker tid=%d\n", __FUNCTION__, thread->uti_refill_tid); + + kfree(rctx); + rctx = NULL; + + kfree(uti_clv); + uti_clv = NULL; if (rc >= 0) { - if (rc & 0x10000007f) { // exit_group || signal + if (rc & 0x100000000) { /* exit_group */ + dkprintf("%s: exit_group, tid=%d,rc=%lx\n", __FUNCTION__, thread->tid, rc); thread->proc->nohost = 1; terminate((rc >> 8) & 255, rc & 255); - } - else { - request.number = __NR_sched_setaffinity; - request.args[0] = 1; - request.args[1] = free_address; - request.args[2] = free_size; - do_syscall(&request, ihk_mc_get_processor_id(), 0); + } else { + /* exit or killed-by-signal detected */ + dkprintf("%s: exit or killed by signal, pid=%d,tid=%d,rc=%lx\n", __FUNCTION__, thread->proc->pid, thread->tid, rc); do_exit(rc); } + } else if (rc == -ERESTARTSYS) { + /* tracer is not working and /dev/mcosX has detected exit of mcexec process */ + kprintf("%s: release_handler,pid=%d,tid=%d,rc=%lx\n", __FUNCTION__, thread->proc->pid, thread->tid, rc); + thread->proc->nohost = 1; + do_exit(rc); + } else { + kprintf("%s: ERROR: do_syscall() failed (%ld)\n", __FUNCTION__, rc); } + + out: + if (rctx) { + kfree(rctx); + } + if (uti_clv) { + kfree(uti_clv); + } + return rc; } @@ -9058,6 +9393,7 @@ utilthr_migrate() { struct thread *thread = cpu_local_var(current); + /* Don't inherit mod_clone */ if (thread->mod_clone == SPAWNING_TO_REMOTE) { thread->mod_clone = SPAWN_TO_LOCAL; util_thread(thread->mod_clone_arg); @@ -9089,7 +9425,11 @@ SYSCALL_DECLARE(util_indicate_clone) mod != SPAWN_TO_REMOTE) return -EINVAL; if (arg) { - kattr = kmalloc(sizeof(struct uti_attr), IHK_MC_AP_NOWAIT); + if (!(kattr = kmalloc(sizeof(struct uti_attr), IHK_MC_AP_NOWAIT))) { + kprintf("%s: error: allocating kattr\n", __func__); + return -ENOMEM; + } + if (copy_from_user(kattr, arg, sizeof(struct uti_attr))) { kfree(kattr); return -EFAULT; @@ -9207,6 +9547,14 @@ SYSCALL_DECLARE(resume_threads) return 0; } +SYSCALL_DECLARE(util_register_desc) +{ + struct thread *thread = cpu_local_var(current); + uti_desc = ihk_mc_syscall_arg0(ctx); + dkprintf("%s: tid=%d,uti_desc=%lx\n", __FUNCTION__, thread->tid, uti_desc); + return 0; +} + void reset_cputime() { @@ -9333,7 +9681,7 @@ set_cputime(int mode) thread->itimer_prof.it_value.tv_sec == 0 && thread->itimer_prof.it_value.tv_usec == 0){ thread->itimer_enabled = 0; - set_timer(); + set_timer(0); } } } @@ -9343,13 +9691,8 @@ set_cputime(int mode) long syscall(int num, ihk_mc_user_context_t *ctx) { long l; -#if !defined(POSTK_DEBUG_TEMP_FIX_60) && !defined(POSTK_DEBUG_TEMP_FIX_56) -#ifdef PROFILE_ENABLE - struct thread *thread = cpu_local_var(current); -#endif // PROFILE_ENABLE -#else /* !defined(POSTK_DEBUG_TEMP_FIX_60) && !defined(POSTK_DEBUG_TEMP_FIX_56) */ - struct thread *thread = cpu_local_var(current); -#endif /* !defined(POSTK_DEBUG_TEMP_FIX_60) && !defined(POSTK_DEBUG_TEMP_FIX_56) */ + struct cpu_local_var *v = get_this_cpu_local_var(); + struct thread *thread = v->current; #ifdef DISABLE_SCHED_YIELD if (num != __NR_sched_yield) @@ -9368,14 +9711,14 @@ long syscall(int num, ihk_mc_user_context_t *ctx) if(cpu_local_var(current)->proc->status == PS_EXITED && (num != __NR_exit && num != __NR_exit_group)){ save_syscall_return_value(num, -EINVAL); - check_signal(-EINVAL, NULL, 0); + check_signal(-EINVAL, NULL, -1); set_cputime(0); return -EINVAL; } cpu_enable_interrupt(); - if (cpu_local_var(current)->proc->ptrace) { + if (cpu_local_var(current)->ptrace) { ihk_mc_syscall_ret(ctx) = -ENOSYS; ptrace_syscall_event(cpu_local_var(current)); num = ihk_mc_syscall_number(ctx); @@ -9411,7 +9754,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx) && (syscall_table[num] != NULL)) { l = syscall_table[num](num, ctx); - dkprintf("SC(%d)[%3d] ret: %d\n", + dkprintf("SC(%d)[%3d] ret: %lx\n", ihk_mc_get_processor_id(), num, l); } else { dkprintf("USC[%3d](%lx, %lx, %lx, %lx, %lx) @ %lx | %lx\n", num, @@ -9422,7 +9765,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx) l = syscall_generic_forwarding(num, ctx); } - if (cpu_local_var(current)->proc->ptrace) { + if (cpu_local_var(current)->ptrace) { ihk_mc_syscall_ret(ctx) = l; ptrace_syscall_event(cpu_local_var(current)); l = ihk_mc_syscall_ret(ctx); @@ -9451,22 +9794,9 @@ long syscall(int num, ihk_mc_user_context_t *ctx) } #endif // PROFILE_ENABLE -#if defined(POSTK_DEBUG_TEMP_FIX_60) && defined(POSTK_DEBUG_TEMP_FIX_56) - check_need_resched(); -#elif defined(POSTK_DEBUG_TEMP_FIX_60) /* sched_yield called check_signal fix. */ - if (num != __NR_futex) { + if (v->flags & CPU_FLAG_NEED_RESCHED) { check_need_resched(); } -#elif defined(POSTK_DEBUG_TEMP_FIX_56) /* in futex_wait() signal handring fix. */ - if (num != __NR_sched_yield) { - check_need_resched(); - } -#else /* POSTK_DEBUG_TEMP_FIX_60 && POSTK_DEBUG_TEMP_FIX_56 */ - if (num != __NR_sched_yield && - num != __NR_futex) { - check_need_resched(); - } -#endif /* POSTK_DEBUG_TEMP_FIX_60 && POSTK_DEBUG_TEMP_FIX_56 */ if (!list_empty(&thread->sigpending) || !list_empty(&thread->sigcommon->sigpending)) { diff --git a/kernel/sysfs.c b/kernel/sysfs.c index fc07cd32..670d4294 100644 --- a/kernel/sysfs.c +++ b/kernel/sysfs.c @@ -23,9 +23,7 @@ #include #include #include - -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) do { if (1) kprintf(__VA_ARGS__); } while (0) +#include static size_t sysfs_data_bufsize; static void *sysfs_data_buf; diff --git a/kernel/timer.c b/kernel/timer.c index 031d15b7..1871ad43 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -32,13 +32,13 @@ #include #include #include +#include //#define DEBUG_PRINT_TIMER #ifdef DEBUG_PRINT_TIMER -#define dkprintf kprintf -#else -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT #endif #define LOOP_TIMEOUT 500 diff --git a/kernel/xpmem.c b/kernel/xpmem.c index e2668b75..8419409d 100644 --- a/kernel/xpmem.c +++ b/kernel/xpmem.c @@ -83,7 +83,7 @@ int xpmem_open( request.number = __NR_open; request.args[0] = (unsigned long)pathname; request.args[1] = flags; - fd = do_syscall(&request, ihk_mc_get_processor_id(), 0); + fd = do_syscall(&request, ihk_mc_get_processor_id()); if (fd < 0) { XPMEM_DEBUG("__NR_open error: fd=%d", fd); return fd; @@ -283,7 +283,8 @@ static int xpmem_close( int n_opened; XPMEM_DEBUG("call: fd=%d, pid=%d, rgid=%d", - mckfd->fd, proc->pid, proc->rgid); + mckfd->fd, cpu_local_var(current)->proc->pid, + cpu_local_var(current)->proc->rgid); n_opened = ihk_atomic_dec_return(&xpmem_my_part->n_opened); XPMEM_DEBUG("n_opened=%d", n_opened); @@ -1398,7 +1399,7 @@ static int xpmem_free_process_memory_range( } if (range->memobj) { - memobj_release(range->memobj); + memobj_unref(range->memobj); } rb_erase(&range->vm_rb_node, &vm->vm_range_tree); @@ -1731,7 +1732,8 @@ int xpmem_remove_process_memory_range( remaining_vmr->private_data = NULL; /* This function is always followed by xpmem_free_process_memory_range() - which in turn calls memobj_release() */ + * which in turn calls memobj_put() + */ remaining_vaddr = att->at_vaddr; } @@ -1754,7 +1756,8 @@ int xpmem_remove_process_memory_range( vmr->private_data = NULL; /* This function is always followed by [xpmem_]free_process_memory_range() - which in turn calls memobj_release() */ + * which in turn calls memobj_put() + */ out: mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); diff --git a/kernel/zeroobj.c b/kernel/zeroobj.c index 1b0f78bb..c937ba5b 100644 --- a/kernel/zeroobj.c +++ b/kernel/zeroobj.c @@ -21,10 +21,7 @@ #include #include #include - -#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) -#define ekprintf(...) kprintf(__VA_ARGS__) -#define fkprintf(...) kprintf(__VA_ARGS__) +#include struct zeroobj { struct memobj memobj; /* must be first */ @@ -35,9 +32,11 @@ static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED; static struct zeroobj *the_zeroobj = NULL; /* singleton */ static memobj_get_page_func_t zeroobj_get_page; +static memobj_free_func_t zeroobj_free; static struct memobj_ops zeroobj_ops = { .get_page = &zeroobj_get_page, + .free = &zeroobj_free, }; static struct zeroobj *to_zeroobj(struct memobj *memobj) @@ -77,6 +76,12 @@ static struct page *page_list_first(struct zeroobj *obj) /*********************************************************************** * zeroobj */ + +static void zeroobj_free(struct memobj *obj) +{ + kprintf("trying to free zeroobj, this should never happen\n"); +} + static int alloc_zeroobj(void) { int error; @@ -104,8 +109,8 @@ static int alloc_zeroobj(void) obj->memobj.ops = &zeroobj_ops; obj->memobj.flags = MF_ZEROOBJ; obj->memobj.size = 0; + ihk_atomic_set(&obj->memobj.refcnt, 2); // never reaches 0 page_list_init(obj); - ihk_mc_spinlock_init(&obj->memobj.lock); virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */ if (!virt) { @@ -117,7 +122,7 @@ static int alloc_zeroobj(void) page = phys_to_page_insert_hash(phys); if (page->mode != PM_NONE) { - fkprintf("alloc_zeroobj():" + ekprintf("alloc_zeroobj():" "page %p %#lx %d %d %#lx\n", page, page_to_phys(page), page->mode, page->count, page->offset); @@ -162,6 +167,7 @@ int zeroobj_create(struct memobj **objp) error = 0; *objp = to_memobj(the_zeroobj); + memobj_ref(*objp); out: dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp); diff --git a/lib/bitmap.c b/lib/bitmap.c index 2eb47142..f059e771 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -11,9 +11,9 @@ #include #include #include +#include #define EXPORT_SYMBOL(x) -#define BUG_ON(x) /** * hex_to_bin - convert a hex digit to its real value diff --git a/lib/include/ihk/cpu.h b/lib/include/ihk/cpu.h index 5839f783..c76e866f 100644 --- a/lib/include/ihk/cpu.h +++ b/lib/include/ihk/cpu.h @@ -25,8 +25,6 @@ void cpu_safe_halt(void); void cpu_restore_interrupt(unsigned long); void cpu_pause(void); -#define barrier() arch_barrier() - unsigned long cpu_disable_interrupt_save(void); struct ihk_mc_interrupt_handler { @@ -76,6 +74,7 @@ void ihk_mc_init_ap(void); void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx, void *stack_pointer, void (*next_function)(void)); +void *ihk_mc_get_linux_kernel_pgt(void); int ihk_mc_get_extra_reg_id(unsigned long hw_config, unsigned long hw_config_ext); unsigned int ihk_mc_get_nr_extra_regs(void); diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index f9ff3715..88cb5c20 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -106,7 +106,7 @@ void ihk_mc_unmap_memory(void *os, unsigned long phys, unsigned long size); void *ihk_mc_map_virtual(unsigned long phys, int npages, enum ihk_mc_pt_attribute attr); -void ihk_mc_unmap_virtual(void *va, int npages, int free_physical); +void ihk_mc_unmap_virtual(void *va, int npages); extern void *sbox_base; extern unsigned int free_bitmap_micpa; @@ -229,6 +229,7 @@ char *ihk_get_kargs(void); int ihk_set_monitor(unsigned long addr, unsigned long size); int ihk_set_rusage(unsigned long addr, unsigned long size); int ihk_set_nmi_mode_addr(unsigned long addr); +int ihk_set_mckernel_do_futex(unsigned long addr); extern void (*__tlb_flush_handler)(int vector); diff --git a/lib/include/ihk/perfctr.h b/lib/include/ihk/perfctr.h index be000355..89fdbb95 100644 --- a/lib/include/ihk/perfctr.h +++ b/lib/include/ihk/perfctr.h @@ -63,13 +63,8 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode); int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode); #endif/*POSTK_DEBUG_TEMP_FIX_29*/ int ihk_mc_perfctr_set_extra(struct mc_perf_event *event); -#ifdef POSTK_DEBUG_TEMP_FIX_30 -int ihk_mc_perfctr_start(int counter); -int ihk_mc_perfctr_stop(int counter); -#else int ihk_mc_perfctr_start(unsigned long counter_mask); int ihk_mc_perfctr_stop(unsigned long counter_mask); -#endif/*POSTK_DEBUG_TEMP_FIX_30*/ int ihk_mc_perfctr_fixed_init(int counter, int mode); int ihk_mc_perfctr_reset(int counter); int ihk_mc_perfctr_set(int counter, long value); diff --git a/lib/include/mc_perf_event.h b/lib/include/mc_perf_event.h index da62519c..05885d4e 100644 --- a/lib/include/mc_perf_event.h +++ b/lib/include/mc_perf_event.h @@ -2,9 +2,7 @@ #ifndef MC_PERF_EVENT_H #define MC_PERF_EVENT_H -//#ifdef POSTK_DEBUG_TEMP_FIX_32 #include -//#endif /*POSTK_DEBUG_TEMP_FIX_32*/ #include struct perf_event_attr; diff --git a/lib/include/memory.h b/lib/include/memory.h index 063103d9..141111cb 100644 --- a/lib/include/memory.h +++ b/lib/include/memory.h @@ -16,19 +16,6 @@ #include -#ifndef KERNEL_PHYS_OFFSET -#define KERNEL_PHYS_OFFSET 0 - -static unsigned long virt_to_phys(void *v) -{ - return (unsigned long)v - KERNEL_PHYS_OFFSET; -} -static void *phys_to_virt(unsigned long p) -{ - return (void *)(p + KERNEL_PHYS_OFFSET); -} -#endif - struct process_vm; unsigned long virt_to_phys(void *v); @@ -46,15 +33,8 @@ int setlong_user(long *dst, long data); int setint_user(int *dst, int data); int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz); int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz); -#ifdef POSTK_DEBUG_ARCH_DEP_27 -struct thread; -int search_free_space(struct thread *thread, size_t len, intptr_t hint, - int pgshift, intptr_t *addrp); -#endif /* POSTK_DEBUG_ARCH_DEP_27 */ -#ifdef POSTK_DEBUG_TEMP_FIX_52 /* supports NUMA for memory area determination */ -int is_mckernel_memory(unsigned long phys); -#endif /* POSTK_DEBUG_TEMP_FIX_52 */ +int is_mckernel_memory(unsigned long start, unsigned long end); #endif diff --git a/lib/include/string.h b/lib/include/string.h index e26a13ab..455561d8 100644 --- a/lib/include/string.h +++ b/lib/include/string.h @@ -23,6 +23,7 @@ char *strncpy(char *dest, const char *src, size_t maxlen); int strcmp(const char *s1, const char *s2); int strncmp(const char *s1, const char *s2, size_t n); char *strstr(const char *haystack, const char *needle); +char *strpbrk(const char *haystack, const char *accept); char *strchr(const char *s, int n); char *strrchr(const char *s, int n); void *memcpy(void *dest, const void *src, size_t n); @@ -46,7 +47,6 @@ extern int sscanf(const char * buf, const char * fmt, ...); extern int scnprintf(char * buf, size_t size, const char *fmt, ...); unsigned long strtol(const char *cp, char **endp, unsigned int base); -int flatten_strings(int nr_strings, char *first, char **strings, char **flat); -int flatten_strings_from_user(int nr_strings, char *first, char **strings, char **flat); +int flatten_strings_from_user(char *pre_strings, char **strings, char **flat); #endif diff --git a/lib/string.c b/lib/string.c index ab194e27..eb1559f5 100644 --- a/lib/string.c +++ b/lib/string.c @@ -112,6 +112,19 @@ strrchr(const char *s, int c) return (char *)last; } /* strrchr() */ +char *strpbrk(const char *s, const char *accept) +{ + const char *a; + + do { + for (a = accept; *a; a++) + if (*s == *a) + return (char *)s; + } while (*(s++)); + + return NULL; +} + char *strstr(const char *haystack, const char *needle) { int len = strlen(needle); @@ -206,79 +219,30 @@ int memcmp(const void *s1, const void *s2, size_t n) /* * Flatten out a (char **) string array into the following format: - * [nr_strings][char *offset of string_0]...[char *offset of string_n-1][NULL][string0]...[stringn_1] - * if nr_strings == -1, we assume the last item is NULL + * [nr_strings][char *offset of string_0]...[char *offset of string_n-1][char *offset of end of string][string0]...[stringn_1] + * + * sizes all are longs. * * NOTE: copy this string somewhere, add the address of the string to each offset * and we get back a valid argv or envp array. * + * pre_strings is already flattened, so we just need to manage counts and copy + * the string parts appropriately. + * * returns the total length of the flat string and updates flat to * point to the beginning. */ -int flatten_strings(int nr_strings, char *first, char **strings, char **flat) +int flatten_strings_from_user(char *pre_strings, char **strings, char **flat) { - int full_len, string_i; - unsigned long flat_offset; - char *_flat; - - /* How many strings do we have? */ - if (nr_strings == -1) { - for (nr_strings = 0; strings[nr_strings]; ++nr_strings); - } - - /* Count full length */ - full_len = sizeof(long) + sizeof(char *); // Counter and terminating NULL - if (first) { - full_len += sizeof(char *) + strlen(first) + 1; - } - - for (string_i = 0; string_i < nr_strings; ++string_i) { - // Pointer + actual value - full_len += sizeof(char *) + strlen(strings[string_i]) + 1; - } - - full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1); - - _flat = (char *)kmalloc(full_len, IHK_MC_AP_NOWAIT); - if (!_flat) { - return 0; - } - - memset(_flat, 0, full_len); - - /* Number of strings */ - *((long *)_flat) = nr_strings + (first ? 1 : 0); - - // Actual offset - flat_offset = sizeof(long) + sizeof(char *) * (nr_strings + 1 + - (first ? 1 : 0)); - - if (first) { - *((char **)(_flat + sizeof(long))) = (void *)flat_offset; - memcpy(_flat + flat_offset, first, strlen(first) + 1); - flat_offset += strlen(first) + 1; - } - - for (string_i = 0; string_i < nr_strings; ++string_i) { - - /* Fabricate the string */ - *((char **)(_flat + sizeof(long) + (string_i + (first ? 1 : 0)) - * sizeof(char *))) = (void *)flat_offset; - memcpy(_flat + flat_offset, strings[string_i], strlen(strings[string_i]) + 1); - flat_offset += strlen(strings[string_i]) + 1; - } - - *flat = _flat; - return full_len; -} - -int flatten_strings_from_user(int nr_strings, char *first, char **strings, char **flat) -{ - int full_len, string_i; + int full_len, i; + int nr_strings = 0; + int pre_strings_count = 0; + int pre_strings_len = 0; long *_flat; + long *pre_strings_flat; char *p; long r; - int n, ret; + int ret; /* When strings is NULL, make array one NULL */ if (!strings) { @@ -293,35 +257,34 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char } /* How many strings do we have? */ - if (nr_strings == -1) { - nr_strings = 0; - for (;;) { - ret = getlong_user(&r, (void *)(strings + nr_strings)); - if (ret < 0) - return ret; + for (;;) { + ret = getlong_user(&r, (void *)(strings + nr_strings)); + if (ret < 0) + return ret; - if (r == 0) - break; + if (r == 0) + break; - ++nr_strings; - } + ++nr_strings; } /* Count full length */ full_len = sizeof(long) + sizeof(char *); // Counter and terminating NULL - if (first) { - int len = strlen(first); + if (pre_strings) { + pre_strings_flat = (long *)pre_strings; + pre_strings_count = pre_strings_flat[0]; - if(len < 0) - return len; - full_len += sizeof(char *) + len + 1; + pre_strings_len = pre_strings_flat[pre_strings_count + 1]; + pre_strings_len -= sizeof(long) * (pre_strings_count + 2); + + full_len += pre_strings_count * sizeof(long) + pre_strings_len; } - for (string_i = 0; string_i < nr_strings; ++string_i) { + for (i = 0; i < nr_strings; ++i) { char *userp; int len; - ret = getlong_user((long *)&userp, (void *)(strings + string_i)); + ret = getlong_user((long *)&userp, (void *)(strings + i)); if (ret < 0) return ret; @@ -341,32 +304,34 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char } /* Number of strings */ - n = first? 1: 0; - _flat[0] = nr_strings + n; - - // Actual offset - p = (char *)(_flat + nr_strings + 2 + n); + _flat[0] = nr_strings + pre_strings_count; - n = 1; - if (first) { - _flat[n++] = p - (char *)_flat; - strcpy(p, first); - p = strchr(p, '\0') + 1; + // Actual offset + p = (char *)(_flat + nr_strings + pre_strings_count + 2); + + if (pre_strings) { + for (i = 0; i < pre_strings_count; i++) { + _flat[i + 1] = pre_strings_flat[i + 1] + + nr_strings * sizeof(long); + } + memcpy(p, pre_strings + pre_strings_flat[1], + pre_strings_len); + p += pre_strings_len; } - for (string_i = 0; string_i < nr_strings; ++string_i) { + for (i = 0; i < nr_strings; ++i) { char *userp; - _flat[n++] = p - (char *)_flat; + _flat[i + pre_strings_count + 1] = p - (char *)_flat; - ret = getlong_user((long *)&userp, (void *)(strings + string_i)); + ret = getlong_user((long *)&userp, (void *)(strings + i)); if (ret < 0) return ret; strcpy_from_user(p, userp); p = strchr(p, '\0') + 1; } - _flat[n] = 0; + _flat[nr_strings + pre_strings_count + 1] = p - (char *)_flat; *flat = (char *)_flat; - return full_len; + return p - (char *)_flat; } diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl new file mode 100755 index 00000000..6a0b8b08 --- /dev/null +++ b/scripts/checkpatch.pl @@ -0,0 +1,6410 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# +# (c) 2001, Dave Jones. (the file handling bit) +# (c) 2005, Joel Schopp (the ugly bit) +# (c) 2007,2008, Andy Whitcroft (new conditions, test suite) +# (c) 2008-2010 Andy Whitcroft +# (c) 2010-2018 Joe Perches + +use strict; +use warnings; +use POSIX; +use File::Basename; +use Cwd 'abs_path'; +use Term::ANSIColor qw(:constants); + +my $P = $0; +my $D = dirname(abs_path($P)); + +my $V = '0.32'; + +use Getopt::Long qw(:config no_auto_abbrev); + +my $quiet = 0; +my $tree = 1; +my $chk_signoff = 1; +my $chk_patch = 1; +my $tst_only; +my $emacs = 0; +my $terse = 0; +my $showfile = 0; +my $file = 0; +my $git = 0; +my %git_commits = (); +my $check = 0; +my $check_orig = 0; +my $summary = 1; +my $mailback = 0; +my $summary_file = 0; +my $show_types = 0; +my $list_types = 0; +my $fix = 0; +my $fix_inplace = 0; +my $root; +my %debug; +my %camelcase = (); +my %use_type = (); +my @use = (); +my %ignore_type = (); +my @ignore = (); +my $help = 0; +my $configuration_file = ".checkpatch.conf"; +my $max_line_length = 80; +my $ignore_perl_version = 0; +my $minimum_perl_version = 5.10.0; +my $min_conf_desc_length = 4; +my $spelling_file = "$D/spelling.txt"; +my $codespell = 0; +my $codespellfile = "/usr/share/codespell/dictionary.txt"; +my $conststructsfile = "$D/const_structs.checkpatch"; +my $typedefsfile = ""; +my $color = "auto"; +my $allow_c99_comments = 1; + +sub help { + my ($exitcode) = @_; + + print << "EOM"; +Usage: $P [OPTION]... [FILE]... +Version: $V + +Options: + -q, --quiet quiet + --no-tree run without a kernel tree + --no-signoff do not check for 'Signed-off-by' line + --patch treat FILE as patchfile (default) + --emacs emacs compile window format + --terse one line per report + --showfile emit diffed file position, not input file position + -g, --git treat FILE as a single commit or git revision range + single git commit with: + + ^ + ~n + multiple git commits with: + .. + ... + - + git merges are ignored + -f, --file treat FILE as regular source file + --subjective, --strict enable more subjective tests + --list-types list the possible message types + --types TYPE(,TYPE2...) show only these comma separated message types + --ignore TYPE(,TYPE2...) ignore various comma separated message types + --show-types show the specific message type in the output + --max-line-length=n set the maximum line length, if exceeded, warn + --min-conf-desc-length=n set the min description length, if shorter, warn + --root=PATH PATH to the kernel tree root + --no-summary suppress the per-file summary + --mailback only produce a report in case of warnings/errors + --summary-file include the filename in summary + --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is one of + 'values', 'possible', 'type', and 'attr' (default + is all off) + --test-only=WORD report only warnings/errors containing WORD + literally + --fix EXPERIMENTAL - may create horrible results + If correctable single-line errors exist, create + ".EXPERIMENTAL-checkpatch-fixes" + with potential errors corrected to the preferred + checkpatch style + --fix-inplace EXPERIMENTAL - may create horrible results + Is the same as --fix, but overwrites the input + file. It's your fault if there's no backup or git + --ignore-perl-version override checking of perl version. expect + runtime errors. + --codespell Use the codespell dictionary for spelling/typos + (default:/usr/share/codespell/dictionary.txt) + --codespellfile Use this codespell dictionary + --typedefsfile Read additional types from this file + --color[=WHEN] Use colors 'always', 'never', or only when output + is a terminal ('auto'). Default is 'auto'. + -h, --help, --version display this help and exit + +When FILE is - read standard input. +EOM + + exit($exitcode); +} + +sub uniq { + my %seen; + return grep { !$seen{$_}++ } @_; +} + +sub list_types { + my ($exitcode) = @_; + + my $count = 0; + + local $/ = undef; + + open(my $script, '<', abs_path($P)) or + die "$P: Can't read '$P' $!\n"; + + my $text = <$script>; + close($script); + + my @types = (); + # Also catch when type or level is passed through a variable + for ($text =~ /(?:(?:\bCHK|\bWARN|\bERROR|&\{\$msg_level})\s*\(|\$msg_type\s*=)\s*"([^"]+)"/g) { + push (@types, $_); + } + @types = sort(uniq(@types)); + print("#\tMessage type\n\n"); + foreach my $type (@types) { + print(++$count . "\t" . $type . "\n"); + } + + exit($exitcode); +} + +my $conf = which_conf($configuration_file); +if (-f $conf) { + my @conf_args; + open(my $conffile, '<', "$conf") + or warn "$P: Can't find a readable $configuration_file file $!\n"; + + while (<$conffile>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + $line =~ s/\s+/ /g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + + my @words = split(" ", $line); + foreach my $word (@words) { + last if ($word =~ m/^#/); + push (@conf_args, $word); + } + } + close($conffile); + unshift(@ARGV, @conf_args) if @conf_args; +} + +# Perl's Getopt::Long allows options to take optional arguments after a space. +# Prevent --color by itself from consuming other arguments +foreach (@ARGV) { + if ($_ eq "--color" || $_ eq "-color") { + $_ = "--color=$color"; + } +} + +GetOptions( + 'q|quiet+' => \$quiet, + 'tree!' => \$tree, + 'signoff!' => \$chk_signoff, + 'patch!' => \$chk_patch, + 'emacs!' => \$emacs, + 'terse!' => \$terse, + 'showfile!' => \$showfile, + 'f|file!' => \$file, + 'g|git!' => \$git, + 'subjective!' => \$check, + 'strict!' => \$check, + 'ignore=s' => \@ignore, + 'types=s' => \@use, + 'show-types!' => \$show_types, + 'list-types!' => \$list_types, + 'max-line-length=i' => \$max_line_length, + 'min-conf-desc-length=i' => \$min_conf_desc_length, + 'root=s' => \$root, + 'summary!' => \$summary, + 'mailback!' => \$mailback, + 'summary-file!' => \$summary_file, + 'fix!' => \$fix, + 'fix-inplace!' => \$fix_inplace, + 'ignore-perl-version!' => \$ignore_perl_version, + 'debug=s' => \%debug, + 'test-only=s' => \$tst_only, + 'codespell!' => \$codespell, + 'codespellfile=s' => \$codespellfile, + 'typedefsfile=s' => \$typedefsfile, + 'color=s' => \$color, + 'no-color' => \$color, #keep old behaviors of -nocolor + 'nocolor' => \$color, #keep old behaviors of -nocolor + 'h|help' => \$help, + 'version' => \$help +) or help(1); + +help(0) if ($help); + +list_types(0) if ($list_types); + +$fix = 1 if ($fix_inplace); +$check_orig = $check; + +my $exit = 0; + +if ($^V && $^V lt $minimum_perl_version) { + printf "$P: requires at least perl version %vd\n", $minimum_perl_version; + if (!$ignore_perl_version) { + exit(1); + } +} + +#if no filenames are given, push '-' to read patch from stdin +if ($#ARGV < 0) { + push(@ARGV, '-'); +} + +if ($color =~ /^[01]$/) { + $color = !$color; +} elsif ($color =~ /^always$/i) { + $color = 1; +} elsif ($color =~ /^never$/i) { + $color = 0; +} elsif ($color =~ /^auto$/i) { + $color = (-t STDOUT); +} else { + die "Invalid color mode: $color\n"; +} + +sub hash_save_array_words { + my ($hashRef, $arrayRef) = @_; + + my @array = split(/,/, join(',', @$arrayRef)); + foreach my $word (@array) { + $word =~ s/\s*\n?$//g; + $word =~ s/^\s*//g; + $word =~ s/\s+/ /g; + $word =~ tr/[a-z]/[A-Z]/; + + next if ($word =~ m/^\s*#/); + next if ($word =~ m/^\s*$/); + + $hashRef->{$word}++; + } +} + +sub hash_show_words { + my ($hashRef, $prefix) = @_; + + if (keys %$hashRef) { + print "\nNOTE: $prefix message types:"; + foreach my $word (sort keys %$hashRef) { + print " $word"; + } + print "\n"; + } +} + +hash_save_array_words(\%ignore_type, \@ignore); +hash_save_array_words(\%use_type, \@use); + +my $dbg_values = 0; +my $dbg_possible = 0; +my $dbg_type = 0; +my $dbg_attr = 0; +for my $key (keys %debug) { + ## no critic + eval "\${dbg_$key} = '$debug{$key}';"; + die "$@" if ($@); +} + +my $rpt_cleaners = 0; + +if ($terse) { + $emacs = 1; + $quiet++; +} + +if ($tree) { + if (defined $root) { + if (!top_of_kernel_tree($root)) { + die "$P: $root: --root does not point at a valid tree\n"; + } + } else { + if (top_of_kernel_tree('.')) { + $root = '.'; + } elsif ($0 =~ m@(.*)/scripts/[^/]*$@ && + top_of_kernel_tree($1)) { + $root = $1; + } + } + + if (!defined $root) { + print "Must be run from the top-level dir. of a kernel tree\n"; + exit(2); + } +} + +my $emitted_corrupt = 0; + +our $Ident = qr{ + [A-Za-z_][A-Za-z\d_]* + (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)* + }x; +our $Storage = qr{extern|static|asmlinkage}; +our $Sparse = qr{ + __user| + __kernel| + __force| + __iomem| + __must_check| + __init_refok| + __kprobes| + __ref| + __rcu| + __private + }x; +our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)}; +our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)}; +our $InitAttributeConst = qr{$InitAttributePrefix(?:initconst\b)}; +our $InitAttributeInit = qr{$InitAttributePrefix(?:init\b)}; +our $InitAttribute = qr{$InitAttributeData|$InitAttributeConst|$InitAttributeInit}; + +# Notes to $Attribute: +# We need \b after 'init' otherwise 'initconst' will cause a false positive in a check +our $Attribute = qr{ + const| + __percpu| + __nocast| + __safe| + __bitwise| + __packed__| + __packed2__| + __naked| + __maybe_unused| + __always_unused| + __noreturn| + __used| + __cold| + __pure| + __noclone| + __deprecated| + __read_mostly| + __kprobes| + $InitAttribute| + ____cacheline_aligned| + ____cacheline_aligned_in_smp| + ____cacheline_internodealigned_in_smp| + __weak + }x; +our $Modifier; +our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__}; +our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]}; +our $Lval = qr{$Ident(?:$Member)*}; + +our $Int_type = qr{(?i)llu|ull|ll|lu|ul|l|u}; +our $Binary = qr{(?i)0b[01]+$Int_type?}; +our $Hex = qr{(?i)0x[0-9a-f]+$Int_type?}; +our $Int = qr{[0-9]+$Int_type?}; +our $Octal = qr{0[0-7]+$Int_type?}; +our $String = qr{"[X\t]*"}; +our $Float_hex = qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?}; +our $Float_dec = qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?}; +our $Float_int = qr{(?i)[0-9]+e-?[0-9]+[fl]?}; +our $Float = qr{$Float_hex|$Float_dec|$Float_int}; +our $Constant = qr{$Float|$Binary|$Octal|$Hex|$Int}; +our $Assignment = qr{\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=}; +our $Compare = qr{<=|>=|==|!=|<|(?}; +our $Arithmetic = qr{\+|-|\*|\/|%}; +our $Operators = qr{ + <=|>=|==|!=| + =>|->|<<|>>|<|>|!|~| + &&|\|\||,|\^|\+\+|--|&|\||$Arithmetic + }x; + +our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x; + +our $BasicType; +our $NonptrType; +our $NonptrTypeMisordered; +our $NonptrTypeWithAttr; +our $Type; +our $TypeMisordered; +our $Declare; +our $DeclareMisordered; + +our $NON_ASCII_UTF8 = qr{ + [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte + | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte + | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates + | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 + | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 + | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 +}x; + +our $UTF8 = qr{ + [\x09\x0A\x0D\x20-\x7E] # ASCII + | $NON_ASCII_UTF8 +}x; + +our $typeC99Typedefs = qr{(?:__)?(?:[us]_?)?int_?(?:8|16|32|64)_t}; +our $typeOtherOSTypedefs = qr{(?x: + u_(?:char|short|int|long) | # bsd + u(?:nchar|short|int|long) # sysv +)}; +our $typeKernelTypedefs = qr{(?x: + (?:__)?(?:u|s|be|le)(?:8|16|32|64)| + atomic_t +)}; +our $typeTypedefs = qr{(?x: + $typeC99Typedefs\b| + $typeOtherOSTypedefs\b| + $typeKernelTypedefs\b +)}; + +our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b}; + +our $logFunctions = qr{(?x: + printk(?:_ratelimited|_once|_deferred_once|_deferred|)| + (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| + TP_printk| + WARN(?:_RATELIMIT|_ONCE|)| + panic| + MODULE_[A-Z_]+| + seq_vprintf|seq_printf|seq_puts +)}; + +our $signature_tags = qr{(?xi: + Signed-off-by:| + Acked-by:| + Tested-by:| + Reviewed-by:| + Reported-by:| + Suggested-by:| + To:| + Cc: +)}; + +our @typeListMisordered = ( + qr{char\s+(?:un)?signed}, + qr{int\s+(?:(?:un)?signed\s+)?short\s}, + qr{int\s+short(?:\s+(?:un)?signed)}, + qr{short\s+int(?:\s+(?:un)?signed)}, + qr{(?:un)?signed\s+int\s+short}, + qr{short\s+(?:un)?signed}, + qr{long\s+int\s+(?:un)?signed}, + qr{int\s+long\s+(?:un)?signed}, + qr{long\s+(?:un)?signed\s+int}, + qr{int\s+(?:un)?signed\s+long}, + qr{int\s+(?:un)?signed}, + qr{int\s+long\s+long\s+(?:un)?signed}, + qr{long\s+long\s+int\s+(?:un)?signed}, + qr{long\s+long\s+(?:un)?signed\s+int}, + qr{long\s+long\s+(?:un)?signed}, + qr{long\s+(?:un)?signed}, +); + +our @typeList = ( + qr{void}, + qr{(?:(?:un)?signed\s+)?char}, + qr{(?:(?:un)?signed\s+)?short\s+int}, + qr{(?:(?:un)?signed\s+)?short}, + qr{(?:(?:un)?signed\s+)?int}, + qr{(?:(?:un)?signed\s+)?long\s+int}, + qr{(?:(?:un)?signed\s+)?long\s+long\s+int}, + qr{(?:(?:un)?signed\s+)?long\s+long}, + qr{(?:(?:un)?signed\s+)?long}, + qr{(?:un)?signed}, + qr{float}, + qr{double}, + qr{bool}, + qr{struct\s+$Ident}, + qr{union\s+$Ident}, + qr{enum\s+$Ident}, + qr{${Ident}_t}, + qr{${Ident}_handler}, + qr{${Ident}_handler_fn}, + @typeListMisordered, +); + +our $C90_int_types = qr{(?x: + long\s+long\s+int\s+(?:un)?signed| + long\s+long\s+(?:un)?signed\s+int| + long\s+long\s+(?:un)?signed| + (?:(?:un)?signed\s+)?long\s+long\s+int| + (?:(?:un)?signed\s+)?long\s+long| + int\s+long\s+long\s+(?:un)?signed| + int\s+(?:(?:un)?signed\s+)?long\s+long| + + long\s+int\s+(?:un)?signed| + long\s+(?:un)?signed\s+int| + long\s+(?:un)?signed| + (?:(?:un)?signed\s+)?long\s+int| + (?:(?:un)?signed\s+)?long| + int\s+long\s+(?:un)?signed| + int\s+(?:(?:un)?signed\s+)?long| + + int\s+(?:un)?signed| + (?:(?:un)?signed\s+)?int +)}; + +our @typeListFile = (); +our @typeListWithAttr = ( + @typeList, + qr{struct\s+$InitAttribute\s+$Ident}, + qr{union\s+$InitAttribute\s+$Ident}, +); + +our @modifierList = ( + qr{fastcall}, +); +our @modifierListFile = (); + +our @mode_permission_funcs = ( + ["module_param", 3], + ["module_param_(?:array|named|string)", 4], + ["module_param_array_named", 5], + ["debugfs_create_(?:file|u8|u16|u32|u64|x8|x16|x32|x64|size_t|atomic_t|bool|blob|regset32|u32_array)", 2], + ["proc_create(?:_data|)", 2], + ["(?:CLASS|DEVICE|SENSOR|SENSOR_DEVICE|IIO_DEVICE)_ATTR", 2], + ["IIO_DEV_ATTR_[A-Z_]+", 1], + ["SENSOR_(?:DEVICE_|)ATTR_2", 2], + ["SENSOR_TEMPLATE(?:_2|)", 3], + ["__ATTR", 2], +); + +#Create a search pattern for all these functions to speed up a loop below +our $mode_perms_search = ""; +foreach my $entry (@mode_permission_funcs) { + $mode_perms_search .= '|' if ($mode_perms_search ne ""); + $mode_perms_search .= $entry->[0]; +} +$mode_perms_search = "(?:${mode_perms_search})"; + +our $mode_perms_world_writable = qr{ + S_IWUGO | + S_IWOTH | + S_IRWXUGO | + S_IALLUGO | + 0[0-7][0-7][2367] +}x; + +our %mode_permission_string_types = ( + "S_IRWXU" => 0700, + "S_IRUSR" => 0400, + "S_IWUSR" => 0200, + "S_IXUSR" => 0100, + "S_IRWXG" => 0070, + "S_IRGRP" => 0040, + "S_IWGRP" => 0020, + "S_IXGRP" => 0010, + "S_IRWXO" => 0007, + "S_IROTH" => 0004, + "S_IWOTH" => 0002, + "S_IXOTH" => 0001, + "S_IRWXUGO" => 0777, + "S_IRUGO" => 0444, + "S_IWUGO" => 0222, + "S_IXUGO" => 0111, +); + +#Create a search pattern for all these strings to speed up a loop below +our $mode_perms_string_search = ""; +foreach my $entry (keys %mode_permission_string_types) { + $mode_perms_string_search .= '|' if ($mode_perms_string_search ne ""); + $mode_perms_string_search .= $entry; +} +our $single_mode_perms_string_search = "(?:${mode_perms_string_search})"; +our $multi_mode_perms_string_search = qr{ + ${single_mode_perms_string_search} + (?:\s*\|\s*${single_mode_perms_string_search})* +}x; + +sub perms_to_octal { + my ($string) = @_; + + return trim($string) if ($string =~ /^\s*0[0-7]{3,3}\s*$/); + + my $val = ""; + my $oval = ""; + my $to = 0; + my $curpos = 0; + my $lastpos = 0; + while ($string =~ /\b(($single_mode_perms_string_search)\b(?:\s*\|\s*)?\s*)/g) { + $curpos = pos($string); + my $match = $2; + my $omatch = $1; + last if ($lastpos > 0 && ($curpos - length($omatch) != $lastpos)); + $lastpos = $curpos; + $to |= $mode_permission_string_types{$match}; + $val .= '\s*\|\s*' if ($val ne ""); + $val .= $match; + $oval .= $omatch; + } + $oval =~ s/^\s*\|\s*//; + $oval =~ s/\s*\|\s*$//; + return sprintf("%04o", $to); +} + +our $allowed_asm_includes = qr{(?x: + irq| + memory| + time| + reboot +)}; +# memory.h: ARM has a custom one + +# Load common spelling mistakes and build regular expression list. +my $misspellings; +my %spelling_fix; + +if (open(my $spelling, '<', $spelling_file)) { + while (<$spelling>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + + my ($suspect, $fix) = split(/\|\|/, $line); + + $spelling_fix{$suspect} = $fix; + } + close($spelling); +} else { + warn "No typos will be found - file '$spelling_file': $!\n"; +} + +if ($codespell) { + if (open(my $spelling, '<', $codespellfile)) { + while (<$spelling>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + next if ($line =~ m/, disabled/i); + + $line =~ s/,.*$//; + + my ($suspect, $fix) = split(/->/, $line); + + $spelling_fix{$suspect} = $fix; + } + close($spelling); + } else { + warn "No codespell typos will be found - file '$codespellfile': $!\n"; + } +} + +$misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix; + +sub read_words { + my ($wordsRef, $file) = @_; + + if (open(my $words, '<', $file)) { + while (<$words>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + if ($line =~ /\s/) { + print("$file: '$line' invalid - ignored\n"); + next; + } + + $$wordsRef .= '|' if ($$wordsRef ne ""); + $$wordsRef .= $line; + } + close($file); + return 1; + } + + return 0; +} + +my $const_structs = ""; + +my $typeOtherTypedefs = ""; +if (length($typedefsfile)) { + read_words(\$typeOtherTypedefs, $typedefsfile) + or warn "No additional types will be considered - file '$typedefsfile': $!\n"; +} +$typeTypedefs .= '|' . $typeOtherTypedefs if ($typeOtherTypedefs ne ""); + +sub build_types { + my $mods = "(?x: \n" . join("|\n ", (@modifierList, @modifierListFile)) . "\n)"; + my $all = "(?x: \n" . join("|\n ", (@typeList, @typeListFile)) . "\n)"; + my $Misordered = "(?x: \n" . join("|\n ", @typeListMisordered) . "\n)"; + my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)"; + $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; + $BasicType = qr{ + (?:$typeTypedefs\b)| + (?:${all}\b) + }x; + $NonptrType = qr{ + (?:$Modifier\s+|const\s+)* + (?: + (?:typeof|__typeof__)\s*\([^\)]*\)| + (?:$typeTypedefs\b)| + (?:${all}\b) + ) + (?:\s+$Modifier|\s+const)* + }x; + $NonptrTypeMisordered = qr{ + (?:$Modifier\s+|const\s+)* + (?: + (?:${Misordered}\b) + ) + (?:\s+$Modifier|\s+const)* + }x; + $NonptrTypeWithAttr = qr{ + (?:$Modifier\s+|const\s+)* + (?: + (?:typeof|__typeof__)\s*\([^\)]*\)| + (?:$typeTypedefs\b)| + (?:${allWithAttr}\b) + ) + (?:\s+$Modifier|\s+const)* + }x; + $Type = qr{ + $NonptrType + (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)? + (?:\s+$Inline|\s+$Modifier)* + }x; + $TypeMisordered = qr{ + $NonptrTypeMisordered + (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)? + (?:\s+$Inline|\s+$Modifier)* + }x; + $Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type}; + $DeclareMisordered = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered}; +} +build_types(); + +our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*}; + +# Using $balanced_parens, $LvalOrFunc, or $FuncArg +# requires at least perl version v5.10.0 +# Any use must be runtime checked with $^V + +our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/; +our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*}; +our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)}; + +our $declaration_macros = qr{(?x: + (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(| + (?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(| + (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(| + (?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\( +)}; + +sub deparenthesize { + my ($string) = @_; + return "" if (!defined($string)); + + while ($string =~ /^\s*\(.*\)\s*$/) { + $string =~ s@^\s*\(\s*@@; + $string =~ s@\s*\)\s*$@@; + } + + $string =~ s@\s+@ @g; + + return $string; +} + +sub seed_camelcase_file { + my ($file) = @_; + + return if (!(-f $file)); + + local $/; + + open(my $include_file, '<', "$file") + or warn "$P: Can't read '$file' $!\n"; + my $text = <$include_file>; + close($include_file); + + my @lines = split('\n', $text); + + foreach my $line (@lines) { + next if ($line !~ /(?:[A-Z][a-z]|[a-z][A-Z])/); + if ($line =~ /^[ \t]*(?:#[ \t]*define|typedef\s+$Type)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)/) { + $camelcase{$1} = 1; + } elsif ($line =~ /^\s*$Declare\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[\(\[,;]/) { + $camelcase{$1} = 1; + } elsif ($line =~ /^\s*(?:union|struct|enum)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[;\{]/) { + $camelcase{$1} = 1; + } + } +} + +sub is_maintained_obsolete { + my ($filename) = @_; + + return 0 if (!$tree || !(-e "$root/scripts/get_maintainer.pl")); + + my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`; + + return $status =~ /obsolete/i; +} + +my $camelcase_seeded = 0; +sub seed_camelcase_includes { + return if ($camelcase_seeded); + + my $files; + my $camelcase_cache = ""; + my @include_files = (); + + $camelcase_seeded = 1; + + if (-e ".git") { + my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`; + chomp $git_last_include_commit; + $camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit"; + } else { + my $last_mod_date = 0; + $files = `find $root/include -name "*.h"`; + @include_files = split('\n', $files); + foreach my $file (@include_files) { + my $date = POSIX::strftime("%Y%m%d%H%M", + localtime((stat $file)[9])); + $last_mod_date = $date if ($last_mod_date < $date); + } + $camelcase_cache = ".checkpatch-camelcase.date.$last_mod_date"; + } + + if ($camelcase_cache ne "" && -f $camelcase_cache) { + open(my $camelcase_file, '<', "$camelcase_cache") + or warn "$P: Can't read '$camelcase_cache' $!\n"; + while (<$camelcase_file>) { + chomp; + $camelcase{$_} = 1; + } + close($camelcase_file); + + return; + } + + if (-e ".git") { + $files = `git ls-files "include/*.h"`; + @include_files = split('\n', $files); + } + + foreach my $file (@include_files) { + seed_camelcase_file($file); + } + + if ($camelcase_cache ne "") { + unlink glob ".checkpatch-camelcase.*"; + open(my $camelcase_file, '>', "$camelcase_cache") + or warn "$P: Can't write '$camelcase_cache' $!\n"; + foreach (sort { lc($a) cmp lc($b) } keys(%camelcase)) { + print $camelcase_file ("$_\n"); + } + close($camelcase_file); + } +} + +sub git_commit_info { + my ($commit, $id, $desc) = @_; + + return ($id, $desc) if ((which("git") eq "") || !(-e ".git")); + + my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`; + $output =~ s/^\s*//gm; + my @lines = split("\n", $output); + + return ($id, $desc) if ($#lines < 0); + + if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) { +# Maybe one day convert this block of bash into something that returns +# all matching commit ids, but it's very slow... +# +# echo "checking commits $1..." +# git rev-list --remotes | grep -i "^$1" | +# while read line ; do +# git log --format='%H %s' -1 $line | +# echo "commit $(cut -c 1-12,41-)" +# done + } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) { + $id = undef; + } else { + $id = substr($lines[0], 0, 12); + $desc = substr($lines[0], 41); + } + + return ($id, $desc); +} + +$chk_signoff = 0 if ($file); + +my @rawlines = (); +my @lines = (); +my @fixed = (); +my @fixed_inserted = (); +my @fixed_deleted = (); +my $fixlinenr = -1; + +# If input is git commits, extract all commits from the commit expressions. +# For example, HEAD-3 means we need check 'HEAD, HEAD~1, HEAD~2'. +die "$P: No git repository found\n" if ($git && !-e ".git"); + +if ($git) { + my @commits = (); + foreach my $commit_expr (@ARGV) { + my $git_range; + if ($commit_expr =~ m/^(.*)-(\d+)$/) { + $git_range = "-$2 $1"; + } elsif ($commit_expr =~ m/\.\./) { + $git_range = "$commit_expr"; + } else { + $git_range = "-1 $commit_expr"; + } + my $lines = `git log --no-color --no-merges --pretty=format:'%H %s' $git_range`; + foreach my $line (split(/\n/, $lines)) { + $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/; + next if (!defined($1) || !defined($2)); + my $sha1 = $1; + my $subject = $2; + unshift(@commits, $sha1); + $git_commits{$sha1} = $subject; + } + } + die "$P: no git commits after extraction!\n" if (@commits == 0); + @ARGV = @commits; +} + +my $vname; +for my $filename (@ARGV) { + my $FILE; + if ($git) { + open($FILE, '-|', "git format-patch -M --stdout -1 $filename") || + die "$P: $filename: git format-patch failed - $!\n"; + } elsif ($file) { + open($FILE, '-|', "diff -u /dev/null $filename") || + die "$P: $filename: diff failed - $!\n"; + } elsif ($filename eq '-') { + open($FILE, '<&STDIN'); + } else { + open($FILE, '<', "$filename") || + die "$P: $filename: open failed - $!\n"; + } + if ($filename eq '-') { + $vname = 'Your patch'; + } elsif ($git) { + $vname = "Commit " . substr($filename, 0, 12) . ' ("' . $git_commits{$filename} . '")'; + } else { + $vname = $filename; + } + while (<$FILE>) { + chomp; + push(@rawlines, $_); + } + close($FILE); + + if ($#ARGV > 0 && $quiet == 0) { + print '-' x length($vname) . "\n"; + print "$vname\n"; + print '-' x length($vname) . "\n"; + } + + if (!process($filename)) { + $exit = 1; + } + @rawlines = (); + @lines = (); + @fixed = (); + @fixed_inserted = (); + @fixed_deleted = (); + $fixlinenr = -1; + @modifierListFile = (); + @typeListFile = (); + build_types(); +} + +if (!$quiet) { + hash_show_words(\%use_type, "Used"); + hash_show_words(\%ignore_type, "Ignored"); + + if ($^V lt 5.10.0) { + print << "EOM" + +NOTE: perl $^V is not modern enough to detect all possible issues. + An upgrade to at least perl v5.10.0 is suggested. +EOM + } + if ($exit) { + print << "EOM" + +NOTE: If any of the errors are false positives, please report + them to the maintainer, see CHECKPATCH in MAINTAINERS. +EOM + } +} + +exit($exit); + +sub top_of_kernel_tree { + my ($root) = @_; + + my @tree_check = ( + "COPYING", "CREDITS", "Kbuild", "MAINTAINERS", "Makefile", + "README", "Documentation", "arch", "include", "drivers", + "fs", "init", "ipc", "kernel", "lib", "scripts", + ); + + foreach my $check (@tree_check) { + if (! -e $root . '/' . $check) { + return 0; + } + } + return 1; +} + +sub parse_email { + my ($formatted_email) = @_; + + my $name = ""; + my $address = ""; + my $comment = ""; + + if ($formatted_email =~ /^(.*)<(\S+\@\S+)>(.*)$/) { + $name = $1; + $address = $2; + $comment = $3 if defined $3; + } elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>(.*)$/) { + $address = $1; + $comment = $2 if defined $2; + } elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) { + $address = $1; + $comment = $2 if defined $2; + $formatted_email =~ s/\Q$address\E.*$//; + $name = $formatted_email; + $name = trim($name); + $name =~ s/^\"|\"$//g; + # If there's a name left after stripping spaces and + # leading quotes, and the address doesn't have both + # leading and trailing angle brackets, the address + # is invalid. ie: + # "joe smith joe@smith.com" bad + # "joe smith ]+>$/) { + $name = ""; + $address = ""; + $comment = ""; + } + } + + $name = trim($name); + $name =~ s/^\"|\"$//g; + $address = trim($address); + $address =~ s/^\<|\>$//g; + + if ($name =~ /[^\w \-]/i) { ##has "must quote" chars + $name =~ s/(?"; + } + + return $formatted_email; +} + +sub which { + my ($bin) = @_; + + foreach my $path (split(/:/, $ENV{PATH})) { + if (-e "$path/$bin") { + return "$path/$bin"; + } + } + + return ""; +} + +sub which_conf { + my ($conf) = @_; + + foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) { + if (-e "$path/$conf") { + return "$path/$conf"; + } + } + + return ""; +} + +sub expand_tabs { + my ($str) = @_; + + my $res = ''; + my $n = 0; + for my $c (split(//, $str)) { + if ($c eq "\t") { + $res .= ' '; + $n++; + for (; ($n % 8) != 0; $n++) { + $res .= ' '; + } + next; + } + $res .= $c; + $n++; + } + + return $res; +} +sub copy_spacing { + (my $res = shift) =~ tr/\t/ /c; + return $res; +} + +sub line_stats { + my ($line) = @_; + + # Drop the diff line leader and expand tabs + $line =~ s/^.//; + $line = expand_tabs($line); + + # Pick the indent from the front of the line. + my ($white) = ($line =~ /^(\s*)/); + + return (length($line), length($white)); +} + +my $sanitise_quote = ''; + +sub sanitise_line_reset { + my ($in_comment) = @_; + + if ($in_comment) { + $sanitise_quote = '*/'; + } else { + $sanitise_quote = ''; + } +} +sub sanitise_line { + my ($line) = @_; + + my $res = ''; + my $l = ''; + + my $qlen = 0; + my $off = 0; + my $c; + + # Always copy over the diff marker. + $res = substr($line, 0, 1); + + for ($off = 1; $off < length($line); $off++) { + $c = substr($line, $off, 1); + + # Comments we are whacking completely including the begin + # and end, all to $;. + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') { + $sanitise_quote = '*/'; + + substr($res, $off, 2, "$;$;"); + $off++; + next; + } + if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') { + $sanitise_quote = ''; + substr($res, $off, 2, "$;$;"); + $off++; + next; + } + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') { + $sanitise_quote = '//'; + + substr($res, $off, 2, $sanitise_quote); + $off++; + next; + } + + # A \ in a string means ignore the next character. + if (($sanitise_quote eq "'" || $sanitise_quote eq '"') && + $c eq "\\") { + substr($res, $off, 2, 'XX'); + $off++; + next; + } + # Regular quotes. + if ($c eq "'" || $c eq '"') { + if ($sanitise_quote eq '') { + $sanitise_quote = $c; + + substr($res, $off, 1, $c); + next; + } elsif ($sanitise_quote eq $c) { + $sanitise_quote = ''; + } + } + + #print "c<$c> SQ<$sanitise_quote>\n"; + if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { + substr($res, $off, 1, $;); + } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") { + substr($res, $off, 1, $;); + } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { + substr($res, $off, 1, 'X'); + } else { + substr($res, $off, 1, $c); + } + } + + if ($sanitise_quote eq '//') { + $sanitise_quote = ''; + } + + # The pathname on a #include may be surrounded by '<' and '>'. + if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) { + my $clean = 'X' x length($1); + $res =~ s@\<.*\>@<$clean>@; + + # The whole of a #error is a string. + } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) { + my $clean = 'X' x length($1); + $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@; + } + + if ($allow_c99_comments && $res =~ m@(//.*$)@) { + my $match = $1; + $res =~ s/\Q$match\E/"$;" x length($match)/e; + } + + return $res; +} + +sub get_quoted_string { + my ($line, $rawline) = @_; + + return "" if (!defined($line) || !defined($rawline)); + return "" if ($line !~ m/($String)/g); + return substr($rawline, $-[0], $+[0] - $-[0]); +} + +sub ctx_statement_block { + my ($linenr, $remain, $off) = @_; + my $line = $linenr - 1; + my $blk = ''; + my $soff = $off; + my $coff = $off - 1; + my $coff_set = 0; + + my $loff = 0; + + my $type = ''; + my $level = 0; + my @stack = (); + my $p; + my $c; + my $len = 0; + + my $remainder; + while (1) { + @stack = (['', 0]) if ($#stack == -1); + + #warn "CSB: blk<$blk> remain<$remain>\n"; + # If we are about to drop off the end, pull in more + # context. + if ($off >= $len) { + for (; $remain > 0; $line++) { + last if (!defined $lines[$line]); + next if ($lines[$line] =~ /^-/); + $remain--; + $loff = $len; + $blk .= $lines[$line] . "\n"; + $len = length($blk); + $line++; + last; + } + # Bail if there is no further context. + #warn "CSB: blk<$blk> off<$off> len<$len>\n"; + if ($off >= $len) { + last; + } + if ($level == 0 && substr($blk, $off) =~ /^.\s*#\s*define/) { + $level++; + $type = '#'; + } + } + $p = $c; + $c = substr($blk, $off, 1); + $remainder = substr($blk, $off); + + #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n"; + + # Handle nested #if/#else. + if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) { + push(@stack, [ $type, $level ]); + } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) { + ($type, $level) = @{$stack[$#stack - 1]}; + } elsif ($remainder =~ /^#\s*endif\b/) { + ($type, $level) = @{pop(@stack)}; + } + + # Statement ends at the ';' or a close '}' at the + # outermost level. + if ($level == 0 && $c eq ';') { + last; + } + + # An else is really a conditional as long as its not else if + if ($level == 0 && $coff_set == 0 && + (!defined($p) || $p =~ /(?:\s|\}|\+)/) && + $remainder =~ /^(else)(?:\s|{)/ && + $remainder !~ /^else\s+if\b/) { + $coff = $off + length($1) - 1; + $coff_set = 1; + #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n"; + #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n"; + } + + if (($type eq '' || $type eq '(') && $c eq '(') { + $level++; + $type = '('; + } + if ($type eq '(' && $c eq ')') { + $level--; + $type = ($level != 0)? '(' : ''; + + if ($level == 0 && $coff < $soff) { + $coff = $off; + $coff_set = 1; + #warn "CSB: mark coff<$coff>\n"; + } + } + if (($type eq '' || $type eq '{') && $c eq '{') { + $level++; + $type = '{'; + } + if ($type eq '{' && $c eq '}') { + $level--; + $type = ($level != 0)? '{' : ''; + + if ($level == 0) { + if (substr($blk, $off + 1, 1) eq ';') { + $off++; + } + last; + } + } + # Preprocessor commands end at the newline unless escaped. + if ($type eq '#' && $c eq "\n" && $p ne "\\") { + $level--; + $type = ''; + $off++; + last; + } + $off++; + } + # We are truly at the end, so shuffle to the next line. + if ($off == $len) { + $loff = $len + 1; + $line++; + $remain--; + } + + my $statement = substr($blk, $soff, $off - $soff + 1); + my $condition = substr($blk, $soff, $coff - $soff + 1); + + #warn "STATEMENT<$statement>\n"; + #warn "CONDITION<$condition>\n"; + + #print "coff<$coff> soff<$off> loff<$loff>\n"; + + return ($statement, $condition, + $line, $remain + 1, $off - $loff + 1, $level); +} + +sub statement_lines { + my ($stmt) = @_; + + # Strip the diff line prefixes and rip blank lines at start and end. + $stmt =~ s/(^|\n)./$1/g; + $stmt =~ s/^\s*//; + $stmt =~ s/\s*$//; + + my @stmt_lines = ($stmt =~ /\n/g); + + return $#stmt_lines + 2; +} + +sub statement_rawlines { + my ($stmt) = @_; + + my @stmt_lines = ($stmt =~ /\n/g); + + return $#stmt_lines + 2; +} + +sub statement_block_size { + my ($stmt) = @_; + + $stmt =~ s/(^|\n)./$1/g; + $stmt =~ s/^\s*{//; + $stmt =~ s/}\s*$//; + $stmt =~ s/^\s*//; + $stmt =~ s/\s*$//; + + my @stmt_lines = ($stmt =~ /\n/g); + my @stmt_statements = ($stmt =~ /;/g); + + my $stmt_lines = $#stmt_lines + 2; + my $stmt_statements = $#stmt_statements + 1; + + if ($stmt_lines > $stmt_statements) { + return $stmt_lines; + } else { + return $stmt_statements; + } +} + +sub ctx_statement_full { + my ($linenr, $remain, $off) = @_; + my ($statement, $condition, $level); + + my (@chunks); + + # Grab the first conditional/block pair. + ($statement, $condition, $linenr, $remain, $off, $level) = + ctx_statement_block($linenr, $remain, $off); + #print "F: c<$condition> s<$statement> remain<$remain>\n"; + push(@chunks, [ $condition, $statement ]); + if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) { + return ($level, $linenr, @chunks); + } + + # Pull in the following conditional/block pairs and see if they + # could continue the statement. + for (;;) { + ($statement, $condition, $linenr, $remain, $off, $level) = + ctx_statement_block($linenr, $remain, $off); + #print "C: c<$condition> s<$statement> remain<$remain>\n"; + last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s)); + #print "C: push\n"; + push(@chunks, [ $condition, $statement ]); + } + + return ($level, $linenr, @chunks); +} + +sub ctx_block_get { + my ($linenr, $remain, $outer, $open, $close, $off) = @_; + my $line; + my $start = $linenr - 1; + my $blk = ''; + my @o; + my @c; + my @res = (); + + my $level = 0; + my @stack = ($level); + for ($line = $start; $remain > 0; $line++) { + next if ($rawlines[$line] =~ /^-/); + $remain--; + + $blk .= $rawlines[$line]; + + # Handle nested #if/#else. + if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) { + push(@stack, $level); + } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) { + $level = $stack[$#stack - 1]; + } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) { + $level = pop(@stack); + } + + foreach my $c (split(//, $lines[$line])) { + ##print "C<$c>L<$level><$open$close>O<$off>\n"; + if ($off > 0) { + $off--; + next; + } + + if ($c eq $close && $level > 0) { + $level--; + last if ($level == 0); + } elsif ($c eq $open) { + $level++; + } + } + + if (!$outer || $level <= 1) { + push(@res, $rawlines[$line]); + } + + last if ($level == 0); + } + + return ($level, @res); +} +sub ctx_block_outer { + my ($linenr, $remain) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0); + return @r; +} +sub ctx_block { + my ($linenr, $remain) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0); + return @r; +} +sub ctx_statement { + my ($linenr, $remain, $off) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off); + return @r; +} +sub ctx_block_level { + my ($linenr, $remain) = @_; + + return ctx_block_get($linenr, $remain, 0, '{', '}', 0); +} +sub ctx_statement_level { + my ($linenr, $remain, $off) = @_; + + return ctx_block_get($linenr, $remain, 0, '(', ')', $off); +} + +sub ctx_locate_comment { + my ($first_line, $end_line) = @_; + + # Catch a comment on the end of the line itself. + my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@); + return $current_comment if (defined $current_comment); + + # Look through the context and try and figure out if there is a + # comment. + my $in_comment = 0; + $current_comment = ''; + for (my $linenr = $first_line; $linenr < $end_line; $linenr++) { + my $line = $rawlines[$linenr - 1]; + #warn " $line\n"; + if ($linenr == $first_line and $line =~ m@^.\s*\*@) { + $in_comment = 1; + } + if ($line =~ m@/\*@) { + $in_comment = 1; + } + if (!$in_comment && $current_comment ne '') { + $current_comment = ''; + } + $current_comment .= $line . "\n" if ($in_comment); + if ($line =~ m@\*/@) { + $in_comment = 0; + } + } + + chomp($current_comment); + return($current_comment); +} +sub ctx_has_comment { + my ($first_line, $end_line) = @_; + my $cmt = ctx_locate_comment($first_line, $end_line); + + ##print "LINE: $rawlines[$end_line - 1 ]\n"; + ##print "CMMT: $cmt\n"; + + return ($cmt ne ''); +} + +sub raw_line { + my ($linenr, $cnt) = @_; + + my $offset = $linenr - 1; + $cnt++; + + my $line; + while ($cnt) { + $line = $rawlines[$offset++]; + next if (defined($line) && $line =~ /^-/); + $cnt--; + } + + return $line; +} + +sub get_stat_real { + my ($linenr, $lc) = @_; + + my $stat_real = raw_line($linenr, 0); + for (my $count = $linenr + 1; $count <= $lc; $count++) { + $stat_real = $stat_real . "\n" . raw_line($count, 0); + } + + return $stat_real; +} + +sub get_stat_here { + my ($linenr, $cnt, $here) = @_; + + my $herectx = $here . "\n"; + for (my $n = 0; $n < $cnt; $n++) { + $herectx .= raw_line($linenr, $n) . "\n"; + } + + return $herectx; +} + +sub cat_vet { + my ($vet) = @_; + my ($res, $coded); + + $res = ''; + while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) { + $res .= $1; + if ($2 ne '') { + $coded = sprintf("^%c", unpack('C', $2) + 64); + $res .= $coded; + } + } + $res =~ s/$/\$/; + + return $res; +} + +my $av_preprocessor = 0; +my $av_pending; +my @av_paren_type; +my $av_pend_colon; + +sub annotate_reset { + $av_preprocessor = 0; + $av_pending = '_'; + @av_paren_type = ('E'); + $av_pend_colon = 'O'; +} + +sub annotate_values { + my ($stream, $type) = @_; + + my $res; + my $var = '_' x length($stream); + my $cur = $stream; + + print "$stream\n" if ($dbg_values > 1); + + while (length($cur)) { + @av_paren_type = ('E') if ($#av_paren_type < 0); + print " <" . join('', @av_paren_type) . + "> <$type> <$av_pending>" if ($dbg_values > 1); + if ($cur =~ /^(\s+)/o) { + print "WS($1)\n" if ($dbg_values > 1); + if ($1 =~ /\n/ && $av_preprocessor) { + $type = pop(@av_paren_type); + $av_preprocessor = 0; + } + + } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') { + print "CAST($1)\n" if ($dbg_values > 1); + push(@av_paren_type, $type); + $type = 'c'; + + } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) { + print "DECLARE($1)\n" if ($dbg_values > 1); + $type = 'T'; + + } elsif ($cur =~ /^($Modifier)\s*/) { + print "MODIFIER($1)\n" if ($dbg_values > 1); + $type = 'T'; + + } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) { + print "DEFINE($1,$2)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + push(@av_paren_type, $type); + if ($2 ne '') { + $av_pending = 'N'; + } + $type = 'E'; + + } elsif ($cur =~ /^(\#\s*(?:undef\s*$Ident|include\b))/o) { + print "UNDEF($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + push(@av_paren_type, $type); + + } elsif ($cur =~ /^(\#\s*(?:ifdef|ifndef|if))/o) { + print "PRE_START($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + + push(@av_paren_type, $type); + push(@av_paren_type, $type); + $type = 'E'; + + } elsif ($cur =~ /^(\#\s*(?:else|elif))/o) { + print "PRE_RESTART($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + + push(@av_paren_type, $av_paren_type[$#av_paren_type]); + + $type = 'E'; + + } elsif ($cur =~ /^(\#\s*(?:endif))/o) { + print "PRE_END($1)\n" if ($dbg_values > 1); + + $av_preprocessor = 1; + + # Assume all arms of the conditional end as this + # one does, and continue as if the #endif was not here. + pop(@av_paren_type); + push(@av_paren_type, $type); + $type = 'E'; + + } elsif ($cur =~ /^(\\\n)/o) { + print "PRECONT($1)\n" if ($dbg_values > 1); + + } elsif ($cur =~ /^(__attribute__)\s*\(?/o) { + print "ATTR($1)\n" if ($dbg_values > 1); + $av_pending = $type; + $type = 'N'; + + } elsif ($cur =~ /^(sizeof)\s*(\()?/o) { + print "SIZEOF($1)\n" if ($dbg_values > 1); + if (defined $2) { + $av_pending = 'V'; + } + $type = 'N'; + + } elsif ($cur =~ /^(if|while|for)\b/o) { + print "COND($1)\n" if ($dbg_values > 1); + $av_pending = 'E'; + $type = 'N'; + + } elsif ($cur =~/^(case)/o) { + print "CASE($1)\n" if ($dbg_values > 1); + $av_pend_colon = 'C'; + $type = 'N'; + + } elsif ($cur =~/^(return|else|goto|typeof|__typeof__)\b/o) { + print "KEYWORD($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~ /^(\()/o) { + print "PAREN('$1')\n" if ($dbg_values > 1); + push(@av_paren_type, $av_pending); + $av_pending = '_'; + $type = 'N'; + + } elsif ($cur =~ /^(\))/o) { + my $new_type = pop(@av_paren_type); + if ($new_type ne '_') { + $type = $new_type; + print "PAREN('$1') -> $type\n" + if ($dbg_values > 1); + } else { + print "PAREN('$1')\n" if ($dbg_values > 1); + } + + } elsif ($cur =~ /^($Ident)\s*\(/o) { + print "FUNC($1)\n" if ($dbg_values > 1); + $type = 'V'; + $av_pending = 'V'; + + } elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) { + if (defined $2 && $type eq 'C' || $type eq 'T') { + $av_pend_colon = 'B'; + } elsif ($type eq 'E') { + $av_pend_colon = 'L'; + } + print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1); + $type = 'V'; + + } elsif ($cur =~ /^($Ident|$Constant)/o) { + print "IDENT($1)\n" if ($dbg_values > 1); + $type = 'V'; + + } elsif ($cur =~ /^($Assignment)/o) { + print "ASSIGN($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~/^(;|{|})/) { + print "END($1)\n" if ($dbg_values > 1); + $type = 'E'; + $av_pend_colon = 'O'; + + } elsif ($cur =~/^(,)/) { + print "COMMA($1)\n" if ($dbg_values > 1); + $type = 'C'; + + } elsif ($cur =~ /^(\?)/o) { + print "QUESTION($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~ /^(:)/o) { + print "COLON($1,$av_pend_colon)\n" if ($dbg_values > 1); + + substr($var, length($res), 1, $av_pend_colon); + if ($av_pend_colon eq 'C' || $av_pend_colon eq 'L') { + $type = 'E'; + } else { + $type = 'N'; + } + $av_pend_colon = 'O'; + + } elsif ($cur =~ /^(\[)/o) { + print "CLOSE($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~ /^(-(?![->])|\+(?!\+)|\*|\&\&|\&)/o) { + my $variant; + + print "OPV($1)\n" if ($dbg_values > 1); + if ($type eq 'V') { + $variant = 'B'; + } else { + $variant = 'U'; + } + + substr($var, length($res), 1, $variant); + $type = 'N'; + + } elsif ($cur =~ /^($Operators)/o) { + print "OP($1)\n" if ($dbg_values > 1); + if ($1 ne '++' && $1 ne '--') { + $type = 'N'; + } + + } elsif ($cur =~ /(^.)/o) { + print "C($1)\n" if ($dbg_values > 1); + } + if (defined $1) { + $cur = substr($cur, length($1)); + $res .= $type x length($1); + } + } + + return ($res, $var); +} + +sub possible { + my ($possible, $line) = @_; + my $notPermitted = qr{(?: + ^(?: + $Modifier| + $Storage| + $Type| + DEFINE_\S+ + )$| + ^(?: + goto| + return| + case| + else| + asm|__asm__| + do| + \#| + \#\#| + )(?:\s|$)| + ^(?:typedef|struct|enum)\b + )}x; + warn "CHECK<$possible> ($line)\n" if ($dbg_possible > 2); + if ($possible !~ $notPermitted) { + # Check for modifiers. + $possible =~ s/\s*$Storage\s*//g; + $possible =~ s/\s*$Sparse\s*//g; + if ($possible =~ /^\s*$/) { + + } elsif ($possible =~ /\s/) { + $possible =~ s/\s*$Type\s*//g; + for my $modifier (split(' ', $possible)) { + if ($modifier !~ $notPermitted) { + warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible); + push(@modifierListFile, $modifier); + } + } + + } else { + warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible); + push(@typeListFile, $possible); + } + build_types(); + } else { + warn "NOTPOSS: $possible ($line)\n" if ($dbg_possible > 1); + } +} + +my $prefix = ''; + +sub show_type { + my ($type) = @_; + + $type =~ tr/[a-z]/[A-Z]/; + + return defined $use_type{$type} if (scalar keys %use_type > 0); + + return !defined $ignore_type{$type}; +} + +sub report { + my ($level, $type, $msg) = @_; + + if (!show_type($type) || + (defined $tst_only && $msg !~ /\Q$tst_only\E/)) { + return 0; + } + my $output = ''; + if ($color) { + if ($level eq 'ERROR') { + $output .= RED; + } elsif ($level eq 'WARNING') { + $output .= YELLOW; + } else { + $output .= GREEN; + } + } + $output .= $prefix . $level . ':'; + if ($show_types) { + $output .= BLUE if ($color); + $output .= "$type:"; + } + $output .= RESET if ($color); + $output .= ' ' . $msg . "\n"; + + if ($showfile) { + my @lines = split("\n", $output, -1); + splice(@lines, 1, 1); + $output = join("\n", @lines); + } + $output = (split('\n', $output))[0] . "\n" if ($terse); + + push(our @report, $output); + + return 1; +} + +sub report_dump { + our @report; +} + +sub fixup_current_range { + my ($lineRef, $offset, $length) = @_; + + if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) { + my $o = $1; + my $l = $2; + my $no = $o + $offset; + my $nl = $l + $length; + $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/; + } +} + +sub fix_inserted_deleted_lines { + my ($linesRef, $insertedRef, $deletedRef) = @_; + + my $range_last_linenr = 0; + my $delta_offset = 0; + + my $old_linenr = 0; + my $new_linenr = 0; + + my $next_insert = 0; + my $next_delete = 0; + + my @lines = (); + + my $inserted = @{$insertedRef}[$next_insert++]; + my $deleted = @{$deletedRef}[$next_delete++]; + + foreach my $old_line (@{$linesRef}) { + my $save_line = 1; + my $line = $old_line; #don't modify the array + if ($line =~ /^(?:\+\+\+|\-\-\-)\s+\S+/) { #new filename + $delta_offset = 0; + } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) { #new hunk + $range_last_linenr = $new_linenr; + fixup_current_range(\$line, $delta_offset, 0); + } + + while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) { + $deleted = @{$deletedRef}[$next_delete++]; + $save_line = 0; + fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1); + } + + while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) { + push(@lines, ${$inserted}{'LINE'}); + $inserted = @{$insertedRef}[$next_insert++]; + $new_linenr++; + fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1); + } + + if ($save_line) { + push(@lines, $line); + $new_linenr++; + } + + $old_linenr++; + } + + return @lines; +} + +sub fix_insert_line { + my ($linenr, $line) = @_; + + my $inserted = { + LINENR => $linenr, + LINE => $line, + }; + push(@fixed_inserted, $inserted); +} + +sub fix_delete_line { + my ($linenr, $line) = @_; + + my $deleted = { + LINENR => $linenr, + LINE => $line, + }; + + push(@fixed_deleted, $deleted); +} + +sub ERROR { + my ($type, $msg) = @_; + + if (report("ERROR", $type, $msg)) { + our $clean = 0; + our $cnt_error++; + return 1; + } + return 0; +} +sub WARN { + my ($type, $msg) = @_; + + if (report("WARNING", $type, $msg)) { + our $clean = 0; + our $cnt_warn++; + return 1; + } + return 0; +} +sub CHK { + my ($type, $msg) = @_; + + if ($check && report("CHECK", $type, $msg)) { + our $clean = 0; + our $cnt_chk++; + return 1; + } + return 0; +} + +sub check_absolute_file { + my ($absolute, $herecurr) = @_; + my $file = $absolute; + + ##print "absolute<$absolute>\n"; + + # See if any suffix of this path is a path within the tree. + while ($file =~ s@^[^/]*/@@) { + if (-f "$root/$file") { + ##print "file<$file>\n"; + last; + } + } + if (! -f _) { + return 0; + } + + # It is, so see if the prefix is acceptable. + my $prefix = $absolute; + substr($prefix, -length($file)) = ''; + + ##print "prefix<$prefix>\n"; + if ($prefix ne ".../") { + WARN("USE_RELATIVE_PATH", + "use relative pathname instead of absolute in changelog text\n" . $herecurr); + } +} + +sub trim { + my ($string) = @_; + + $string =~ s/^\s+|\s+$//g; + + return $string; +} + +sub ltrim { + my ($string) = @_; + + $string =~ s/^\s+//; + + return $string; +} + +sub rtrim { + my ($string) = @_; + + $string =~ s/\s+$//; + + return $string; +} + +sub string_find_replace { + my ($string, $find, $replace) = @_; + + $string =~ s/$find/$replace/g; + + return $string; +} + +sub tabify { + my ($leading) = @_; + + my $source_indent = 8; + my $max_spaces_before_tab = $source_indent - 1; + my $spaces_to_tab = " " x $source_indent; + + #convert leading spaces to tabs + 1 while $leading =~ s@^([\t]*)$spaces_to_tab@$1\t@g; + #Remove spaces before a tab + 1 while $leading =~ s@^([\t]*)( {1,$max_spaces_before_tab})\t@$1\t@g; + + return "$leading"; +} + +sub pos_last_openparen { + my ($line) = @_; + + my $pos = 0; + + my $opens = $line =~ tr/\(/\(/; + my $closes = $line =~ tr/\)/\)/; + + my $last_openparen = 0; + + if (($opens == 0) || ($closes >= $opens)) { + return -1; + } + + my $len = length($line); + + for ($pos = 0; $pos < $len; $pos++) { + my $string = substr($line, $pos); + if ($string =~ /^($FuncArg|$balanced_parens)/) { + $pos += length($1) - 1; + } elsif (substr($line, $pos, 1) eq '(') { + $last_openparen = $pos; + } elsif (index($string, '(') == -1) { + last; + } + } + + return length(expand_tabs(substr($line, 0, $last_openparen))) + 1; +} + +sub process { + my $filename = shift; + + my $linenr=0; + my $prevline=""; + my $prevrawline=""; + my $stashline=""; + my $stashrawline=""; + + my $length; + my $indent; + my $previndent=0; + my $stashindent=0; + + our $clean = 1; + my $signoff = 0; + my $is_patch = 0; + my $in_header_lines = $file ? 0 : 1; + my $in_commit_log = 0; #Scanning lines before patch + my $has_commit_log = 0; #Encountered lines before patch + my $commit_log_possible_stack_dump = 0; + my $commit_log_long_line = 0; + my $commit_log_has_diff = 0; + my $reported_maintainer_file = 0; + my $non_utf8_charset = 0; + + my $last_blank_line = 0; + my $last_coalesced_string_linenr = -1; + + our @report = (); + our $cnt_lines = 0; + our $cnt_error = 0; + our $cnt_warn = 0; + our $cnt_chk = 0; + + # Trace the real file/line as we go. + my $realfile = ''; + my $realline = 0; + my $realcnt = 0; + my $here = ''; + my $context_function; #undef'd unless there's a known function + my $in_comment = 0; + my $comment_edge = 0; + my $first_line = 0; + my $p1_prefix = ''; + + my $prev_values = 'E'; + + # suppression flags + my %suppress_ifbraces; + my %suppress_whiletrailers; + my %suppress_export; + my $suppress_statement = 0; + + my %signatures = (); + + # Pre-scan the patch sanitizing the lines. + # Pre-scan the patch looking for any __setup documentation. + # + my @setup_docs = (); + my $setup_docs = 0; + + my $camelcase_file_seeded = 0; + + my $checklicenseline = 1; + + sanitise_line_reset(); + my $line; + foreach my $rawline (@rawlines) { + $linenr++; + $line = $rawline; + + push(@fixed, $rawline) if ($fix); + + if ($rawline=~/^\+\+\+\s+(\S+)/) { + $setup_docs = 0; + if ($1 =~ m@Documentation/admin-guide/kernel-parameters.rst$@) { + $setup_docs = 1; + } + #next; + } + if ($rawline =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { + $realline=$1-1; + if (defined $2) { + $realcnt=$3+1; + } else { + $realcnt=1+1; + } + $in_comment = 0; + + # Guestimate if this is a continuing comment. Run + # the context looking for a comment "edge". If this + # edge is a close comment then we must be in a comment + # at context start. + my $edge; + my $cnt = $realcnt; + for (my $ln = $linenr + 1; $cnt > 0; $ln++) { + next if (defined $rawlines[$ln - 1] && + $rawlines[$ln - 1] =~ /^-/); + $cnt--; + #print "RAW<$rawlines[$ln - 1]>\n"; + last if (!defined $rawlines[$ln - 1]); + if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ && + $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) { + ($edge) = $1; + last; + } + } + if (defined $edge && $edge eq '*/') { + $in_comment = 1; + } + + # Guestimate if this is a continuing comment. If this + # is the start of a diff block and this line starts + # ' *' then it is very likely a comment. + if (!defined $edge && + $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@) + { + $in_comment = 1; + } + + ##print "COMMENT:$in_comment edge<$edge> $rawline\n"; + sanitise_line_reset($in_comment); + + } elsif ($realcnt && $rawline =~ /^(?:\+| |$)/) { + # Standardise the strings and chars within the input to + # simplify matching -- only bother with positive lines. + $line = sanitise_line($rawline); + } + push(@lines, $line); + + if ($realcnt > 1) { + $realcnt-- if ($line =~ /^(?:\+| |$)/); + } else { + $realcnt = 0; + } + + #print "==>$rawline\n"; + #print "-->$line\n"; + + if ($setup_docs && $line =~ /^\+/) { + push(@setup_docs, $line); + } + } + + $prefix = ''; + + $realcnt = 0; + $linenr = 0; + $fixlinenr = -1; + foreach my $line (@lines) { + $linenr++; + $fixlinenr++; + my $sline = $line; #copy of $line + $sline =~ s/$;/ /g; #with comments as spaces + + my $rawline = $rawlines[$linenr - 1]; + +# check if it's a mode change, rename or start of a patch + if (!$in_commit_log && + ($line =~ /^ mode change [0-7]+ => [0-7]+ \S+\s*$/ || + ($line =~ /^rename (?:from|to) \S+\s*$/ || + $line =~ /^diff --git a\/[\w\/\.\_\-]+ b\/\S+\s*$/))) { + $is_patch = 1; + } + +#extract the line range in the file after the patch is applied + if (!$in_commit_log && + $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { + my $context = $4; + $is_patch = 1; + $first_line = $linenr + 1; + $realline=$1-1; + if (defined $2) { + $realcnt=$3+1; + } else { + $realcnt=1+1; + } + annotate_reset(); + $prev_values = 'E'; + + %suppress_ifbraces = (); + %suppress_whiletrailers = (); + %suppress_export = (); + $suppress_statement = 0; + if ($context =~ /\b(\w+)\s*\(/) { + $context_function = $1; + } else { + undef $context_function; + } + next; + +# track the line number as we move through the hunk, note that +# new versions of GNU diff omit the leading space on completely +# blank context lines so we need to count that too. + } elsif ($line =~ /^( |\+|$)/) { + $realline++; + $realcnt-- if ($realcnt != 0); + + # Measure the line length and indent. + ($length, $indent) = line_stats($rawline); + + # Track the previous line. + ($prevline, $stashline) = ($stashline, $line); + ($previndent, $stashindent) = ($stashindent, $indent); + ($prevrawline, $stashrawline) = ($stashrawline, $rawline); + + #warn "line<$line>\n"; + + } elsif ($realcnt == 1) { + $realcnt--; + } + + my $hunk_line = ($realcnt != 0); + + $here = "#$linenr: " if (!$file); + $here = "#$realline: " if ($file); + + my $found_file = 0; + # extract the filename as it passes + if ($line =~ /^diff --git.*?(\S+)$/) { + $realfile = $1; + $realfile =~ s@^([^/]*)/@@ if (!$file); + $in_commit_log = 0; + $found_file = 1; + } elsif ($line =~ /^\+\+\+\s+(\S+)/) { + $realfile = $1; + $realfile =~ s@^([^/]*)/@@ if (!$file); + $in_commit_log = 0; + + $p1_prefix = $1; + if (!$file && $tree && $p1_prefix ne '' && + -e "$root/$p1_prefix") { + WARN("PATCH_PREFIX", + "patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n"); + } + + if ($realfile =~ m@^include/asm/@) { + ERROR("MODIFIED_INCLUDE_ASM", + "do not modify files in include/asm, change architecture specific files in include/asm-\n" . "$here$rawline\n"); + } + $found_file = 1; + } + +#make up the handle for any error we report on this line + if ($showfile) { + $prefix = "$realfile:$realline: " + } elsif ($emacs) { + if ($file) { + $prefix = "$filename:$realline: "; + } else { + $prefix = "$filename:$linenr: "; + } + } + + if ($found_file) { + if (is_maintained_obsolete($realfile)) { + WARN("OBSOLETE", + "$realfile is marked as 'obsolete' in the MAINTAINERS hierarchy. No unnecessary modifications please.\n"); + } + if ($realfile =~ m@^(?:drivers/net/|net/|drivers/staging/)@) { + $check = 1; + } else { + $check = $check_orig; + } + $checklicenseline = 1; + next; + } + + $here .= "FILE: $realfile:$realline:" if ($realcnt != 0); + + my $hereline = "$here\n$rawline\n"; + my $herecurr = "$here\n$rawline\n"; + my $hereprev = "$here\n$prevrawline\n$rawline\n"; + + $cnt_lines++ if ($realcnt != 0); + +# Check if the commit log has what seems like a diff which can confuse patch + if ($in_commit_log && !$commit_log_has_diff && + (($line =~ m@^\s+diff\b.*a/[\w/]+@ && + $line =~ m@^\s+diff\b.*a/([\w/]+)\s+b/$1\b@) || + $line =~ m@^\s*(?:\-\-\-\s+a/|\+\+\+\s+b/)@ || + $line =~ m/^\s*\@\@ \-\d+,\d+ \+\d+,\d+ \@\@/)) { + ERROR("DIFF_IN_COMMIT_MSG", + "Avoid using diff content in the commit message - patch(1) might not work\n" . $herecurr); + $commit_log_has_diff = 1; + } + +# Check for incorrect file permissions + if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) { + my $permhere = $here . "FILE: $realfile\n"; + if ($realfile !~ m@scripts/@ && + $realfile !~ /\.(py|pl|awk|sh)$/) { + ERROR("EXECUTE_PERMISSIONS", + "do not set execute permissions for source files\n" . $permhere); + } + } + +# Check the patch for a signoff: + if ($line =~ /^\s*signed-off-by:/i) { + $signoff++; + $in_commit_log = 0; + } + +# Check if MAINTAINERS is being updated. If so, there's probably no need to +# emit the "does MAINTAINERS need updating?" message on file add/move/delete + if ($line =~ /^\s*MAINTAINERS\s*\|/) { + $reported_maintainer_file = 1; + } + +# Check signature styles + if (!$in_header_lines && + $line =~ /^(\s*)([a-z0-9_-]+by:|$signature_tags)(\s*)(.*)/i) { + my $space_before = $1; + my $sign_off = $2; + my $space_after = $3; + my $email = $4; + my $ucfirst_sign_off = ucfirst(lc($sign_off)); + + if ($sign_off !~ /$signature_tags/) { + WARN("BAD_SIGN_OFF", + "Non-standard signature: $sign_off\n" . $herecurr); + } + if (defined $space_before && $space_before ne "") { + if (WARN("BAD_SIGN_OFF", + "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] = + "$ucfirst_sign_off $email"; + } + } + if ($sign_off =~ /-by:$/i && $sign_off ne $ucfirst_sign_off) { + if (WARN("BAD_SIGN_OFF", + "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] = + "$ucfirst_sign_off $email"; + } + + } + if (!defined $space_after || $space_after ne " ") { + if (WARN("BAD_SIGN_OFF", + "Use a single space after $ucfirst_sign_off\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] = + "$ucfirst_sign_off $email"; + } + } + + my ($email_name, $email_address, $comment) = parse_email($email); + my $suggested_email = format_email(($email_name, $email_address)); + if ($suggested_email eq "") { + ERROR("BAD_SIGN_OFF", + "Unrecognized email address: '$email'\n" . $herecurr); + } else { + my $dequoted = $suggested_email; + $dequoted =~ s/^"//; + $dequoted =~ s/" $comment" ne $email && + "$suggested_email$comment" ne $email) { + WARN("BAD_SIGN_OFF", + "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr); + } + } + +# Check for duplicate signatures + my $sig_nospace = $line; + $sig_nospace =~ s/\s//g; + $sig_nospace = lc($sig_nospace); + if (defined $signatures{$sig_nospace}) { + WARN("BAD_SIGN_OFF", + "Duplicate signature\n" . $herecurr); + } else { + $signatures{$sig_nospace} = 1; + } + } + +# Check email subject for common tools that don't need to be mentioned + if ($in_header_lines && + $line =~ /^Subject:.*\b(?:checkpatch|sparse|smatch)\b[^:]/i) { + WARN("EMAIL_SUBJECT", + "A patch subject line should describe the change not the tool that found it\n" . $herecurr); + } + +# Check for old stable address + if ($line =~ /^\s*cc:\s*.*?.*$/i) { + ERROR("STABLE_ADDRESS", + "The 'stable' address should be 'stable\@vger.kernel.org'\n" . $herecurr); + } + +# Check if the commit log is in a possible stack dump + if ($in_commit_log && !$commit_log_possible_stack_dump && + ($line =~ /^\s*(?:WARNING:|BUG:)/ || + $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ || + # timestamp + $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) { + # stack dump address + $commit_log_possible_stack_dump = 1; + } + +# Check for line lengths > 75 in commit log, warn once + if ($in_commit_log && !$commit_log_long_line && + length($line) > 75 && + !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ || + # file delta changes + $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ || + # filename then : + $line =~ /^\s*(?:Fixes:|Link:)/i || + # A Fixes: or Link: line + $commit_log_possible_stack_dump)) { + WARN("COMMIT_LOG_LONG_LINE", + "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr); + $commit_log_long_line = 1; + } + +# Reset possible stack dump if a blank line is found + if ($in_commit_log && $commit_log_possible_stack_dump && + $line =~ /^\s*$/) { + $commit_log_possible_stack_dump = 0; + } + +# Check for git id commit length and improperly formed commit descriptions + if ($in_commit_log && !$commit_log_possible_stack_dump && + $line !~ /^\s*(?:Link|Patchwork|http|https|BugLink):/i && + $line !~ /^This reverts commit [0-9a-f]{7,40}/ && + ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i || + ($line =~ /(?:\s|^)[0-9a-f]{12,40}(?:[\s"'\(\[]|$)/i && + $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i && + $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) { + my $init_char = "c"; + my $orig_commit = ""; + my $short = 1; + my $long = 0; + my $case = 1; + my $space = 1; + my $hasdesc = 0; + my $hasparens = 0; + my $id = '0123456789ab'; + my $orig_desc = "commit description"; + my $description = ""; + + if ($line =~ /\b(c)ommit\s+([0-9a-f]{5,})\b/i) { + $init_char = $1; + $orig_commit = lc($2); + } elsif ($line =~ /\b([0-9a-f]{12,40})\b/i) { + $orig_commit = lc($1); + } + + $short = 0 if ($line =~ /\bcommit\s+[0-9a-f]{12,40}/i); + $long = 1 if ($line =~ /\bcommit\s+[0-9a-f]{41,}/i); + $space = 0 if ($line =~ /\bcommit [0-9a-f]/i); + $case = 0 if ($line =~ /\b[Cc]ommit\s+[0-9a-f]{5,40}[^A-F]/); + if ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)"\)/i) { + $orig_desc = $1; + $hasparens = 1; + } elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s*$/i && + defined $rawlines[$linenr] && + $rawlines[$linenr] =~ /^\s*\("([^"]+)"\)/) { + $orig_desc = $1; + $hasparens = 1; + } elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("[^"]+$/i && + defined $rawlines[$linenr] && + $rawlines[$linenr] =~ /^\s*[^"]+"\)/) { + $line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)$/i; + $orig_desc = $1; + $rawlines[$linenr] =~ /^\s*([^"]+)"\)/; + $orig_desc .= " " . $1; + $hasparens = 1; + } + + ($id, $description) = git_commit_info($orig_commit, + $id, $orig_desc); + + if (defined($id) && + ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens)) { + ERROR("GIT_COMMIT_ID", + "Please use git commit description style 'commit <12+ chars of sha1> (\"\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herecurr); + } + } + + +# Check for wrappage within a valid hunk of the file + if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) { + ERROR("CORRUPTED_PATCH", + "patch seems to be corrupt (line wrapped?)\n" . + $herecurr) if (!$emitted_corrupt++); + } + +# UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php + if (($realfile =~ /^$/ || $line =~ /^\+/) && + $rawline !~ m/^$UTF8*$/) { + my ($utf8_prefix) = ($rawline =~ /^($UTF8*)/); + + my $blank = copy_spacing($rawline); + my $ptr = substr($blank, 0, length($utf8_prefix)) . "^"; + my $hereptr = "$hereline$ptr\n"; + + CHK("INVALID_UTF8", + "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); + } + +# Check if it's the start of a commit log +# (not a header line and we haven't seen the patch filename) + if ($in_header_lines && $realfile =~ /^$/ && + !($rawline =~ /^\s+(?:\S|$)/ || + $rawline =~ /^(?:commit\b|from\b|[\w-]+:)/i)) { + $in_header_lines = 0; + $in_commit_log = 1; + $has_commit_log = 1; + } + +# Check if there is UTF-8 in a commit log when a mail header has explicitly +# declined it, i.e defined some charset where it is missing. + if ($in_header_lines && + $rawline =~ /^Content-Type:.+charset="(.+)".*$/ && + $1 !~ /utf-8/i) { + $non_utf8_charset = 1; + } + + if ($in_commit_log && $non_utf8_charset && $realfile =~ /^$/ && + $rawline =~ /$NON_ASCII_UTF8/) { + WARN("UTF8_BEFORE_PATCH", + "8-bit UTF-8 used in possible commit log\n" . $herecurr); + } + +# Check for absolute kernel paths in commit message + if ($tree && $in_commit_log) { + while ($line =~ m{(?:^|\s)(/\S*)}g) { + my $file = $1; + + if ($file =~ m{^(.*?)(?::\d+)+:?$} && + check_absolute_file($1, $herecurr)) { + # + } else { + check_absolute_file($file, $herecurr); + } + } + } + +# Check for various typo / spelling mistakes + if (defined($misspellings) && + ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) { + while ($rawline =~ /(?:^|[^a-z@])($misspellings)(?:\b|$|[^a-z@])/gi) { + my $typo = $1; + my $typo_fix = $spelling_fix{lc($typo)}; + $typo_fix = ucfirst($typo_fix) if ($typo =~ /^[A-Z]/); + $typo_fix = uc($typo_fix) if ($typo =~ /^[A-Z]+$/); + my $msg_level = \&WARN; + $msg_level = \&CHK if ($file); + if (&{$msg_level}("TYPO_SPELLING", + "'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(^|[^A-Za-z@])($typo)($|[^A-Za-z@])/$1$typo_fix$3/; + } + } + } + +# ignore non-hunk lines and lines being removed + next if (!$hunk_line || $line =~ /^-/); + +#trailing whitespace + if ($line =~ /^\+.*\015/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + if (ERROR("DOS_LINE_ENDINGS", + "DOS line endings\n" . $herevet) && + $fix) { + $fixed[$fixlinenr] =~ s/[\s\015]+$//; + } + } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + if (ERROR("TRAILING_WHITESPACE", + "trailing whitespace\n" . $herevet) && + $fix) { + $fixed[$fixlinenr] =~ s/\s+$//; + } + + $rpt_cleaners = 1; + } + +# Check for FSF mailing addresses. + if ($rawline =~ /\bwrite to the Free/i || + $rawline =~ /\b675\s+Mass\s+Ave/i || + $rawline =~ /\b59\s+Temple\s+Pl/i || + $rawline =~ /\b51\s+Franklin\s+St/i) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + my $msg_level = \&ERROR; + $msg_level = \&CHK if ($file); + &{$msg_level}("FSF_MAILING_ADDRESS", + "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet) + } + +# check for Kconfig help text having a real description +# Only applies when adding the entry originally, after that we do not have +# sufficient context to determine whether it is indeed long enough. + if ($realfile =~ /Kconfig/ && + # 'choice' is usually the last thing on the line (though + # Kconfig supports named choices), so use a word boundary + # (\b) rather than a whitespace character (\s) + $line =~ /^\+\s*(?:config|menuconfig|choice)\b/) { + my $length = 0; + my $cnt = $realcnt; + my $ln = $linenr + 1; + my $f; + my $is_start = 0; + my $is_end = 0; + for (; $cnt > 0 && defined $lines[$ln - 1]; $ln++) { + $f = $lines[$ln - 1]; + $cnt-- if ($lines[$ln - 1] !~ /^-/); + $is_end = $lines[$ln - 1] =~ /^\+/; + + next if ($f =~ /^-/); + last if (!$file && $f =~ /^\@\@/); + + if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) { + $is_start = 1; + } elsif ($lines[$ln - 1] =~ /^\+\s*(?:help|---help---)\s*$/) { + if ($lines[$ln - 1] =~ "---help---") { + WARN("CONFIG_DESCRIPTION", + "prefer 'help' over '---help---' for new help texts\n" . $herecurr); + } + $length = -1; + } + + $f =~ s/^.//; + $f =~ s/#.*//; + $f =~ s/^\s+//; + next if ($f =~ /^$/); + + # This only checks context lines in the patch + # and so hopefully shouldn't trigger false + # positives, even though some of these are + # common words in help texts + if ($f =~ /^\s*(?:config|menuconfig|choice|endchoice| + if|endif|menu|endmenu|source)\b/x) { + $is_end = 1; + last; + } + $length++; + } + if ($is_start && $is_end && $length < $min_conf_desc_length) { + WARN("CONFIG_DESCRIPTION", + "please write a paragraph that describes the config symbol fully\n" . $herecurr); + } + #print "is_start<$is_start> is_end<$is_end> length<$length>\n"; + } + +# check for MAINTAINERS entries that don't have the right form + if ($realfile =~ /^MAINTAINERS$/ && + $rawline =~ /^\+[A-Z]:/ && + $rawline !~ /^\+[A-Z]:\t\S/) { + if (WARN("MAINTAINERS_STYLE", + "MAINTAINERS entries use one tab after TYPE:\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/^(\+[A-Z]):\s*/$1:\t/; + } + } + +# discourage the use of boolean for type definition attributes of Kconfig options + if ($realfile =~ /Kconfig/ && + $line =~ /^\+\s*\bboolean\b/) { + WARN("CONFIG_TYPE_BOOLEAN", + "Use of boolean is deprecated, please use bool instead.\n" . $herecurr); + } + + if (($realfile =~ /Makefile.*/ || $realfile =~ /Kbuild.*/) && + ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) { + my $flag = $1; + my $replacement = { + 'EXTRA_AFLAGS' => 'asflags-y', + 'EXTRA_CFLAGS' => 'ccflags-y', + 'EXTRA_CPPFLAGS' => 'cppflags-y', + 'EXTRA_LDFLAGS' => 'ldflags-y', + }; + + WARN("DEPRECATED_VARIABLE", + "Use of $flag is deprecated, please use \`$replacement->{$flag} instead.\n" . $herecurr) if ($replacement->{$flag}); + } + +# check for DT compatible documentation + if (defined $root && + (($realfile =~ /\.dtsi?$/ && $line =~ /^\+\s*compatible\s*=\s*\"/) || + ($realfile =~ /\.[ch]$/ && $line =~ /^\+.*\.compatible\s*=\s*\"/))) { + + my @compats = $rawline =~ /\"([a-zA-Z0-9\-\,\.\+_]+)\"/g; + + my $dt_path = $root . "/Documentation/devicetree/bindings/"; + my $vp_file = $dt_path . "vendor-prefixes.txt"; + + foreach my $compat (@compats) { + my $compat2 = $compat; + $compat2 =~ s/\,[a-zA-Z0-9]*\-/\,<\.\*>\-/; + my $compat3 = $compat; + $compat3 =~ s/\,([a-z]*)[0-9]*\-/\,$1<\.\*>\-/; + `grep -Erq "$compat|$compat2|$compat3" $dt_path`; + if ( $? >> 8 ) { + WARN("UNDOCUMENTED_DT_STRING", + "DT compatible string \"$compat\" appears un-documented -- check $dt_path\n" . $herecurr); + } + + next if $compat !~ /^([a-zA-Z0-9\-]+)\,/; + my $vendor = $1; + `grep -Eq "^$vendor\\b" $vp_file`; + if ( $? >> 8 ) { + WARN("UNDOCUMENTED_DT_STRING", + "DT compatible string vendor \"$vendor\" appears un-documented -- check $vp_file\n" . $herecurr); + } + } + } + +# check we are in a valid source file if not then ignore this hunk + next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/); + +# line length limit (with some exclusions) +# +# There are a few types of lines that may extend beyond $max_line_length: +# logging functions like pr_info that end in a string +# lines with a single string +# #defines that are a single string +# lines with an RFC3986 like URL +# +# There are 3 different line length message types: +# LONG_LINE_COMMENT a comment starts before but extends beyond $max_line_length +# LONG_LINE_STRING a string starts before but extends beyond $max_line_length +# LONG_LINE all other lines longer than $max_line_length +# +# if LONG_LINE is ignored, the other 2 types are also ignored +# + + if ($line =~ /^\+/ && $length > $max_line_length) { + my $msg_type = "LONG_LINE"; + + # Check the allowed long line types first + + # logging functions that end in a string that starts + # before $max_line_length + if ($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(?:KERN_\S+\s*|[^"]*))?($String\s*(?:|,|\)\s*;)\s*)$/ && + length(expand_tabs(substr($line, 1, length($line) - length($1) - 1))) <= $max_line_length) { + $msg_type = ""; + + # lines with only strings (w/ possible termination) + # #defines with only strings + } elsif ($line =~ /^\+\s*$String\s*(?:\s*|,|\)\s*;)\s*$/ || + $line =~ /^\+\s*#\s*define\s+\w+\s+$String$/) { + $msg_type = ""; + + # More special cases + } elsif ($line =~ /^\+.*\bEFI_GUID\s*\(/ || + $line =~ /^\+\s*(?:\w+)?\s*DEFINE_PER_CPU/) { + $msg_type = ""; + + # URL ($rawline is used in case the URL is in a comment) + } elsif ($rawline =~ /^\+.*\b[a-z][\w\.\+\-]*:\/\/\S+/i) { + $msg_type = ""; + + # Otherwise set the alternate message types + + # a comment starts before $max_line_length + } elsif ($line =~ /($;[\s$;]*)$/ && + length(expand_tabs(substr($line, 1, length($line) - length($1) - 1))) <= $max_line_length) { + $msg_type = "LONG_LINE_COMMENT" + + # a quoted string starts before $max_line_length + } elsif ($sline =~ /\s*($String(?:\s*(?:\\|,\s*|\)\s*;\s*))?)$/ && + length(expand_tabs(substr($line, 1, length($line) - length($1) - 1))) <= $max_line_length) { + $msg_type = "LONG_LINE_STRING" + } + + if ($msg_type ne "" && + (show_type("LONG_LINE") || show_type($msg_type))) { + WARN($msg_type, + "line over $max_line_length characters\n" . $herecurr); + } + } + +# check for adding lines without a newline. + if ($line =~ /^\+/ && defined $lines[$linenr] && $lines[$linenr] =~ /^\\ No newline at end of file/) { + WARN("MISSING_EOF_NEWLINE", + "adding a line without newline at end of file\n" . $herecurr); + } + +# check we are in a valid source file C or perl if not then ignore this hunk + next if ($realfile !~ /\.(h|c|pl|dtsi|dts)$/); + +# at the beginning of a line any tabs must come first and anything +# more than 8 must use tabs. + if ($rawline =~ /^\+\s* \t\s*\S/ || + $rawline =~ /^\+\s* \s*/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + $rpt_cleaners = 1; + if (ERROR("CODE_INDENT", + "code indent should use tabs where possible\n" . $herevet) && + $fix) { + $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; + } + } + +# check for space before tabs. + if ($rawline =~ /^\+/ && $rawline =~ / \t/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + if (WARN("SPACE_BEFORE_TAB", + "please, no space before tabs\n" . $herevet) && + $fix) { + while ($fixed[$fixlinenr] =~ + s/(^\+.*) {8,8}\t/$1\t\t/) {} + while ($fixed[$fixlinenr] =~ + s/(^\+.*) +\t/$1\t/) {} + } + } + +# check for assignments on the start of a line + if ($sline =~ /^\+\s+($Assignment)[^=]/) { + CHK("ASSIGNMENT_CONTINUATIONS", + "Assignment operator '$1' should be on the previous line\n" . $hereprev); + } + +# check for && or || at the start of a line + if ($rawline =~ /^\+\s*(&&|\|\|)/) { + CHK("LOGICAL_CONTINUATIONS", + "Logical continuations should be on the previous line\n" . $hereprev); + } + +# check indentation starts on a tab stop + if ($^V && $^V ge 5.10.0 && + $sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$)|$Declare\s*$Ident\s*[;=])/) { + my $indent = length($1); + if ($indent % 8) { + if (WARN("TABSTOP", + "Statements should start on a tabstop\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s@(^\+\t+) +@$1 . "\t" x ($indent/8)@e; + } + } + } + +# check multi-line statement indentation matches previous line + if ($^V && $^V ge 5.10.0 && + $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) { + $prevline =~ /^\+(\t*)(.*)$/; + my $oldindent = $1; + my $rest = $2; + + my $pos = pos_last_openparen($rest); + if ($pos >= 0) { + $line =~ /^(\+| )([ \t]*)/; + my $newindent = $2; + + my $goodtabindent = $oldindent . + "\t" x ($pos / 8) . + " " x ($pos % 8); + my $goodspaceindent = $oldindent . " " x $pos; + + if ($newindent ne $goodtabindent && + $newindent ne $goodspaceindent) { + + if (CHK("PARENTHESIS_ALIGNMENT", + "Alignment should match open parenthesis\n" . $hereprev) && + $fix && $line =~ /^\+/) { + $fixed[$fixlinenr] =~ + s/^\+[ \t]*/\+$goodtabindent/; + } + } + } + } + +# check for space after cast like "(int) foo" or "(struct foo) bar" +# avoid checking a few false positives: +# "sizeof(<type>)" or "__alignof__(<type>)" +# function pointer declarations like "(*foo)(int) = bar;" +# structure definitions like "(struct foo) { 0 };" +# multiline macros that define functions +# known attributes or the __attribute__ keyword + if ($line =~ /^\+(.*)\(\s*$Type\s*\)([ \t]++)((?![={]|\\$|$Attribute|__attribute__))/ && + (!defined($1) || $1 !~ /\b(?:sizeof|__alignof__)\s*$/)) { + if (CHK("SPACING", + "No space is necessary after a cast\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/(\(\s*$Type\s*\))[ \t]+/$1/; + } + } + +# Block comment styles +# Networking with an initial /* + if ($realfile =~ m@^(drivers/net/|net/)@ && + $prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ && + $rawline =~ /^\+[ \t]*\*/ && + $realline > 2) { + WARN("NETWORKING_BLOCK_COMMENT_STYLE", + "networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev); + } + +# Block comments use * on subsequent lines + if ($prevline =~ /$;[ \t]*$/ && #ends in comment + $prevrawline =~ /^\+.*?\/\*/ && #starting /* + $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ + $rawline =~ /^\+/ && #line is new + $rawline !~ /^\+[ \t]*\*/) { #no leading * + WARN("BLOCK_COMMENT_STYLE", + "Block comments use * on subsequent lines\n" . $hereprev); + } + +# Block comments use */ on trailing lines + if ($rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */ + $rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ && #inline /*...*/ + $rawline !~ m@^\+.*\*{2,}/[ \t]*$@ && #trailing **/ + $rawline =~ m@^\+[ \t]*.+\*\/[ \t]*$@) { #non blank */ + WARN("BLOCK_COMMENT_STYLE", + "Block comments use a trailing */ on a separate line\n" . $herecurr); + } + +# Block comment * alignment + if ($prevline =~ /$;[ \t]*$/ && #ends in comment + $line =~ /^\+[ \t]*$;/ && #leading comment + $rawline =~ /^\+[ \t]*\*/ && #leading * + (($prevrawline =~ /^\+.*?\/\*/ && #leading /* + $prevrawline !~ /\*\/[ \t]*$/) || #no trailing */ + $prevrawline =~ /^\+[ \t]*\*/)) { #leading * + my $oldindent; + $prevrawline =~ m@^\+([ \t]*/?)\*@; + if (defined($1)) { + $oldindent = expand_tabs($1); + } else { + $prevrawline =~ m@^\+(.*/?)\*@; + $oldindent = expand_tabs($1); + } + $rawline =~ m@^\+([ \t]*)\*@; + my $newindent = $1; + $newindent = expand_tabs($newindent); + if (length($oldindent) ne length($newindent)) { + WARN("BLOCK_COMMENT_STYLE", + "Block comments should align the * on each line\n" . $hereprev); + } + } + +# check for missing blank lines after struct/union declarations +# with exceptions for various attributes and macros + if ($prevline =~ /^[\+ ]};?\s*$/ && + $line =~ /^\+/ && + !($line =~ /^\+\s*$/ || + $line =~ /^\+\s*EXPORT_SYMBOL/ || + $line =~ /^\+\s*MODULE_/i || + $line =~ /^\+\s*\#\s*(?:end|elif|else)/ || + $line =~ /^\+[a-z_]*init/ || + $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ || + $line =~ /^\+\s*DECLARE/ || + $line =~ /^\+\s*builtin_[\w_]*driver/ || + $line =~ /^\+\s*__setup/)) { + if (CHK("LINE_SPACING", + "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) && + $fix) { + fix_insert_line($fixlinenr, "\+"); + } + } + +# check for multiple consecutive blank lines + if ($prevline =~ /^[\+ ]\s*$/ && + $line =~ /^\+\s*$/ && + $last_blank_line != ($linenr - 1)) { + if (CHK("LINE_SPACING", + "Please don't use multiple blank lines\n" . $hereprev) && + $fix) { + fix_delete_line($fixlinenr, $rawline); + } + + $last_blank_line = $linenr; + } + +# check for missing blank lines after declarations + if ($sline =~ /^\+\s+\S/ && #Not at char 1 + # actual declarations + ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ || + # function pointer declarations + $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ || + # foo bar; where foo is some local typedef or #define + $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ || + # known declaration macros + $prevline =~ /^\+\s+$declaration_macros/) && + # for "else if" which can look like "$Ident $Ident" + !($prevline =~ /^\+\s+$c90_Keywords\b/ || + # other possible extensions of declaration lines + $prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ || + # not starting a section or a macro "\" extended line + $prevline =~ /(?:\{\s*|\\)$/) && + # looks like a declaration + !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ || + # function pointer declarations + $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ || + # foo bar; where foo is some local typedef or #define + $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ || + # known declaration macros + $sline =~ /^\+\s+$declaration_macros/ || + # start of struct or union or enum + $sline =~ /^\+\s+(?:union|struct|enum|typedef)\b/ || + # start or end of block or continuation of declaration + $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ || + # bitfield continuation + $sline =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ || + # other possible extensions of declaration lines + $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) && + # indentation of previous and current line are the same + (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) { + if (WARN("LINE_SPACING", + "Missing a blank line after declarations\n" . $hereprev) && + $fix) { + fix_insert_line($fixlinenr, "\+"); + } + } + +# check for spaces at the beginning of a line. +# Exceptions: +# 1) within comments +# 2) indented preprocessor commands +# 3) hanging labels + if ($rawline =~ /^\+ / && $line !~ /^\+ *(?:$;|#|$Ident:)/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + if (WARN("LEADING_SPACE", + "please, no spaces at the start of a line\n" . $herevet) && + $fix) { + $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; + } + } + +# check we are in a valid C source file if not then ignore this hunk + next if ($realfile !~ /\.(h|c)$/); + +# check for unusual line ending [ or ( + if ($line =~ /^\+.*([\[\(])\s*$/) { + CHK("OPEN_ENDED_LINE", + "Lines should not end with a '$1'\n" . $herecurr); + } + +# check if this appears to be the start function declaration, save the name + if ($sline =~ /^\+\{\s*$/ && + $prevline =~ /^\+(?:(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*)?($Ident)\(/) { + $context_function = $1; + } + +# check if this appears to be the end of function declaration + if ($sline =~ /^\+\}\s*$/) { + undef $context_function; + } + +# check indentation of any line with a bare else +# (but not if it is a multiple line "if (foo) return bar; else return baz;") +# if the previous line is a break or return and is indented 1 tab more... + if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) { + my $tabs = length($1) + 1; + if ($prevline =~ /^\+\t{$tabs,$tabs}break\b/ || + ($prevline =~ /^\+\t{$tabs,$tabs}return\b/ && + defined $lines[$linenr] && + $lines[$linenr] !~ /^[ \+]\t{$tabs,$tabs}return/)) { + WARN("UNNECESSARY_ELSE", + "else is not generally useful after a break or return\n" . $hereprev); + } + } + +# check indentation of a line with a break; +# if the previous line is a goto or return and is indented the same # of tabs + if ($sline =~ /^\+([\t]+)break\s*;\s*$/) { + my $tabs = $1; + if ($prevline =~ /^\+$tabs(?:goto|return)\b/) { + WARN("UNNECESSARY_BREAK", + "break is not useful after a goto or return\n" . $hereprev); + } + } + +# check for RCS/CVS revision markers + if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) { + WARN("CVS_KEYWORD", + "CVS style keyword markers, these will _not_ be updated\n". $herecurr); + } + +# check for old HOTPLUG __dev<foo> section markings + if ($line =~ /\b(__dev(init|exit)(data|const|))\b/) { + WARN("HOTPLUG_SECTION", + "Using $1 is unnecessary\n" . $herecurr); + } + +# Check for potential 'bare' types + my ($stat, $cond, $line_nr_next, $remain_next, $off_next, + $realline_next); +#print "LINE<$line>\n"; + if ($linenr > $suppress_statement && + $realcnt && $sline =~ /.\s*\S/) { + ($stat, $cond, $line_nr_next, $remain_next, $off_next) = + ctx_statement_block($linenr, $realcnt, 0); + $stat =~ s/\n./\n /g; + $cond =~ s/\n./\n /g; + +#print "linenr<$linenr> <$stat>\n"; + # If this statement has no statement boundaries within + # it there is no point in retrying a statement scan + # until we hit end of it. + my $frag = $stat; $frag =~ s/;+\s*$//; + if ($frag !~ /(?:{|;)/) { +#print "skip<$line_nr_next>\n"; + $suppress_statement = $line_nr_next; + } + + # Find the real next line. + $realline_next = $line_nr_next; + if (defined $realline_next && + (!defined $lines[$realline_next - 1] || + substr($lines[$realline_next - 1], $off_next) =~ /^\s*$/)) { + $realline_next++; + } + + my $s = $stat; + $s =~ s/{.*$//s; + + # Ignore goto labels. + if ($s =~ /$Ident:\*$/s) { + + # Ignore functions being called + } elsif ($s =~ /^.\s*$Ident\s*\(/s) { + + } elsif ($s =~ /^.\s*else\b/s) { + + # declarations always start with types + } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) { + my $type = $1; + $type =~ s/\s+/ /g; + possible($type, "A:" . $s); + + # definitions in global scope can only start with types + } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b\s*(?!:)/s) { + possible($1, "B:" . $s); + } + + # any (foo ... *) is a pointer cast, and foo is a type + while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) { + possible($1, "C:" . $s); + } + + # Check for any sort of function declaration. + # int foo(something bar, other baz); + # void (*store_gdt)(x86_descr_ptr *); + if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/s) { + my ($name_len) = length($1); + + my $ctx = $s; + substr($ctx, 0, $name_len + 1, ''); + $ctx =~ s/\)[^\)]*$//; + + for my $arg (split(/\s*,\s*/, $ctx)) { + if ($arg =~ /^(?:const\s+)?($Ident)(?:\s+$Sparse)*\s*\**\s*(:?\b$Ident)?$/s || $arg =~ /^($Ident)$/s) { + + possible($1, "D:" . $s); + } + } + } + + } + +# +# Checks which may be anchored in the context. +# + +# Check for switch () and associated case and default +# statements should be at the same indent. + if ($line=~/\bswitch\s*\(.*\)/) { + my $err = ''; + my $sep = ''; + my @ctx = ctx_block_outer($linenr, $realcnt); + shift(@ctx); + for my $ctx (@ctx) { + my ($clen, $cindent) = line_stats($ctx); + if ($ctx =~ /^\+\s*(case\s+|default:)/ && + $indent != $cindent) { + $err .= "$sep$ctx\n"; + $sep = ''; + } else { + $sep = "[...]\n"; + } + } + if ($err ne '') { + ERROR("SWITCH_CASE_INDENT_LEVEL", + "switch and case should be at the same indent\n$hereline$err"); + } + } + +# if/while/etc brace do not go on next line, unless defining a do while loop, +# or if that brace on the next line is for something else + if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) { + my $pre_ctx = "$1$2"; + + my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0); + + if ($line =~ /^\+\t{6,}/) { + WARN("DEEP_INDENTATION", + "Too many leading tabs - consider code refactoring\n" . $herecurr); + } + + my $ctx_cnt = $realcnt - $#ctx - 1; + my $ctx = join("\n", @ctx); + + my $ctx_ln = $linenr; + my $ctx_skip = $realcnt; + + while ($ctx_skip > $ctx_cnt || ($ctx_skip == $ctx_cnt && + defined $lines[$ctx_ln - 1] && + $lines[$ctx_ln - 1] =~ /^-/)) { + ##print "SKIP<$ctx_skip> CNT<$ctx_cnt>\n"; + $ctx_skip-- if (!defined $lines[$ctx_ln - 1] || $lines[$ctx_ln - 1] !~ /^-/); + $ctx_ln++; + } + + #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n"; + #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; + + if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { + ERROR("OPEN_BRACE", + "that open brace { should be on the previous line\n" . + "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); + } + if ($level == 0 && $pre_ctx !~ /}\s*while\s*\($/ && + $ctx =~ /\)\s*\;\s*$/ && + defined $lines[$ctx_ln - 1]) + { + my ($nlength, $nindent) = line_stats($lines[$ctx_ln - 1]); + if ($nindent > $indent) { + WARN("TRAILING_SEMICOLON", + "trailing semicolon indicates no statements, indent implies otherwise\n" . + "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); + } + } + } + +# Check relative indent for conditionals and blocks. + if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|(?:do|else)\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { + ($stat, $cond, $line_nr_next, $remain_next, $off_next) = + ctx_statement_block($linenr, $realcnt, 0) + if (!defined $stat); + my ($s, $c) = ($stat, $cond); + + substr($s, 0, length($c), ''); + + # remove inline comments + $s =~ s/$;/ /g; + $c =~ s/$;/ /g; + + # Find out how long the conditional actually is. + my @newlines = ($c =~ /\n/gs); + my $cond_lines = 1 + $#newlines; + + # Make sure we remove the line prefixes as we have + # none on the first line, and are going to readd them + # where necessary. + $s =~ s/\n./\n/gs; + while ($s =~ /\n\s+\\\n/) { + $cond_lines += $s =~ s/\n\s+\\\n/\n/g; + } + + # We want to check the first line inside the block + # starting at the end of the conditional, so remove: + # 1) any blank line termination + # 2) any opening brace { on end of the line + # 3) any do (...) { + my $continuation = 0; + my $check = 0; + $s =~ s/^.*\bdo\b//; + $s =~ s/^\s*{//; + if ($s =~ s/^\s*\\//) { + $continuation = 1; + } + if ($s =~ s/^\s*?\n//) { + $check = 1; + $cond_lines++; + } + + # Also ignore a loop construct at the end of a + # preprocessor statement. + if (($prevline =~ /^.\s*#\s*define\s/ || + $prevline =~ /\\\s*$/) && $continuation == 0) { + $check = 0; + } + + my $cond_ptr = -1; + $continuation = 0; + while ($cond_ptr != $cond_lines) { + $cond_ptr = $cond_lines; + + # If we see an #else/#elif then the code + # is not linear. + if ($s =~ /^\s*\#\s*(?:else|elif)/) { + $check = 0; + } + + # Ignore: + # 1) blank lines, they should be at 0, + # 2) preprocessor lines, and + # 3) labels. + if ($continuation || + $s =~ /^\s*?\n/ || + $s =~ /^\s*#\s*?/ || + $s =~ /^\s*$Ident\s*:/) { + $continuation = ($s =~ /^.*?\\\n/) ? 1 : 0; + if ($s =~ s/^.*?\n//) { + $cond_lines++; + } + } + } + + my (undef, $sindent) = line_stats("+" . $s); + my $stat_real = raw_line($linenr, $cond_lines); + + # Check if either of these lines are modified, else + # this is not this patch's fault. + if (!defined($stat_real) || + $stat !~ /^\+/ && $stat_real !~ /^\+/) { + $check = 0; + } + if (defined($stat_real) && $cond_lines > 1) { + $stat_real = "[...]\n$stat_real"; + } + + #print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n"; + + if ($check && $s ne '' && + (($sindent % 8) != 0 || + ($sindent < $indent) || + ($sindent == $indent && + ($s !~ /^\s*(?:\}|\{|else\b)/)) || + ($sindent > $indent + 8))) { + WARN("SUSPECT_CODE_INDENT", + "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); + } + } + + # Track the 'values' across context and added lines. + my $opline = $line; $opline =~ s/^./ /; + my ($curr_values, $curr_vars) = + annotate_values($opline . "\n", $prev_values); + $curr_values = $prev_values . $curr_values; + if ($dbg_values) { + my $outline = $opline; $outline =~ s/\t/ /g; + print "$linenr > .$outline\n"; + print "$linenr > $curr_values\n"; + print "$linenr > $curr_vars\n"; + } + $prev_values = substr($curr_values, -1); + +#ignore lines not being added + next if ($line =~ /^[^\+]/); + +# check for dereferences that span multiple lines + if ($prevline =~ /^\+.*$Lval\s*(?:\.|->)\s*$/ && + $line =~ /^\+\s*(?!\#\s*(?!define\s+|if))\s*$Lval/) { + $prevline =~ /($Lval\s*(?:\.|->))\s*$/; + my $ref = $1; + $line =~ /^.\s*($Lval)/; + $ref .= $1; + $ref =~ s/\s//g; + WARN("MULTILINE_DEREFERENCE", + "Avoid multiple line dereference - prefer '$ref'\n" . $hereprev); + } + +# check for declarations of signed or unsigned without int + while ($line =~ m{\b($Declare)\s*(?!char\b|short\b|int\b|long\b)\s*($Ident)?\s*[=,;\[\)\(]}g) { + my $type = $1; + my $var = $2; + $var = "" if (!defined $var); + if ($type =~ /^(?:(?:$Storage|$Inline|$Attribute)\s+)*((?:un)?signed)((?:\s*\*)*)\s*$/) { + my $sign = $1; + my $pointer = $2; + + $pointer = "" if (!defined $pointer); + + if (WARN("UNSPECIFIED_INT", + "Prefer '" . trim($sign) . " int" . rtrim($pointer) . "' to bare use of '$sign" . rtrim($pointer) . "'\n" . $herecurr) && + $fix) { + my $decl = trim($sign) . " int "; + my $comp_pointer = $pointer; + $comp_pointer =~ s/\s//g; + $decl .= $comp_pointer; + $decl = rtrim($decl) if ($var eq ""); + $fixed[$fixlinenr] =~ s@\b$sign\s*\Q$pointer\E\s*$var\b@$decl$var@; + } + } + } + +# TEST: allow direct testing of the type matcher. + if ($dbg_type) { + if ($line =~ /^.\s*$Declare\s*$/) { + ERROR("TEST_TYPE", + "TEST: is type\n" . $herecurr); + } elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) { + ERROR("TEST_NOT_TYPE", + "TEST: is not type ($1 is)\n". $herecurr); + } + next; + } +# TEST: allow direct testing of the attribute matcher. + if ($dbg_attr) { + if ($line =~ /^.\s*$Modifier\s*$/) { + ERROR("TEST_ATTR", + "TEST: is attr\n" . $herecurr); + } elsif ($dbg_attr > 1 && $line =~ /^.+($Modifier)/) { + ERROR("TEST_NOT_ATTR", + "TEST: is not attr ($1 is)\n". $herecurr); + } + next; + } + +# check for initialisation to aggregates open brace on the next line + if ($line =~ /^.\s*{/ && + $prevline =~ /(?:^|[^=])=\s*$/) { + if (ERROR("OPEN_BRACE", + "that open brace { should be on the previous line\n" . $hereprev) && + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { + fix_delete_line($fixlinenr - 1, $prevrawline); + fix_delete_line($fixlinenr, $rawline); + my $fixedline = $prevrawline; + $fixedline =~ s/\s*=\s*$/ = {/; + fix_insert_line($fixlinenr, $fixedline); + $fixedline = $line; + $fixedline =~ s/^(.\s*)\{\s*/$1/; + fix_insert_line($fixlinenr, $fixedline); + } + } + +# +# Checks which are anchored on the added line. +# + +# check for malformed paths in #include statements (uses RAW line) + if ($rawline =~ m{^.\s*\#\s*include\s+[<"](.*)[">]}) { + my $path = $1; + if ($path =~ m{//}) { + ERROR("MALFORMED_INCLUDE", + "malformed #include filename\n" . $herecurr); + } + if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) { + ERROR("UAPI_INCLUDE", + "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr); + } + } + +# no C99 // comments + if ($line =~ m{//}) { + if (ERROR("C99_COMMENTS", + "do not use C99 // comments\n" . $herecurr) && + $fix) { + my $line = $fixed[$fixlinenr]; + if ($line =~ /\/\/(.*)$/) { + my $comment = trim($1); + $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@; + } + } + } + # Remove C99 comments. + $line =~ s@//.*@@; + $opline =~ s@//.*@@; + +# EXPORT_SYMBOL should immediately follow the thing it is exporting, consider +# the whole statement. +#print "APW <$lines[$realline_next - 1]>\n"; + if (defined $realline_next && + exists $lines[$realline_next - 1] && + !defined $suppress_export{$realline_next} && + ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/ || + $lines[$realline_next - 1] =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) { + # Handle definitions which produce identifiers with + # a prefix: + # XXX(foo); + # EXPORT_SYMBOL(something_foo); + my $name = $1; + if ($stat =~ /^(?:.\s*}\s*\n)?.([A-Z_]+)\s*\(\s*($Ident)/ && + $name =~ /^${Ident}_$2/) { +#print "FOO C name<$name>\n"; + $suppress_export{$realline_next} = 1; + + } elsif ($stat !~ /(?: + \n.}\s*$| + ^.DEFINE_$Ident\(\Q$name\E\)| + ^.DECLARE_$Ident\(\Q$name\E\)| + ^.LIST_HEAD\(\Q$name\E\)| + ^.(?:$Storage\s+)?$Type\s*\(\s*\*\s*\Q$name\E\s*\)\s*\(| + \b\Q$name\E(?:\s+$Attribute)*\s*(?:;|=|\[|\() + )/x) { +#print "FOO A<$lines[$realline_next - 1]> stat<$stat> name<$name>\n"; + $suppress_export{$realline_next} = 2; + } else { + $suppress_export{$realline_next} = 1; + } + } + if (!defined $suppress_export{$linenr} && + $prevline =~ /^.\s*$/ && + ($line =~ /EXPORT_SYMBOL.*\((.*)\)/ || + $line =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) { +#print "FOO B <$lines[$linenr - 1]>\n"; + $suppress_export{$linenr} = 2; + } + if (defined $suppress_export{$linenr} && + $suppress_export{$linenr} == 2) { + WARN("EXPORT_SYMBOL", + "EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr); + } + +# check for global initialisers. + if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/) { + if (ERROR("GLOBAL_INITIALISERS", + "do not initialise globals to $1\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*$zero_initializer\s*;/$1;/; + } + } +# check for static initialisers. + if ($line =~ /^\+.*\bstatic\s.*=\s*($zero_initializer)\s*;/) { + if (ERROR("INITIALISED_STATIC", + "do not initialise statics to $1\n" . + $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*$zero_initializer\s*;/$1;/; + } + } + +# check for misordered declarations of char/short/int/long with signed/unsigned + while ($sline =~ m{(\b$TypeMisordered\b)}g) { + my $tmp = trim($1); + WARN("MISORDERED_TYPE", + "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr); + } + +# check for static const char * arrays. + if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) { + WARN("STATIC_CONST_CHAR_ARRAY", + "static const char * array should probably be static const char * const\n" . + $herecurr); + } + +# check for static char foo[] = "bar" declarations. + if ($line =~ /\bstatic\s+char\s+(\w+)\s*\[\s*\]\s*=\s*"/) { + WARN("STATIC_CONST_CHAR_ARRAY", + "static char array declaration should probably be static const char\n" . + $herecurr); + } + +# check for const <foo> const where <foo> is not a pointer or array type + if ($sline =~ /\bconst\s+($BasicType)\s+const\b/) { + my $found = $1; + if ($sline =~ /\bconst\s+\Q$found\E\s+const\b\s*\*/) { + WARN("CONST_CONST", + "'const $found const *' should probably be 'const $found * const'\n" . $herecurr); + } elsif ($sline !~ /\bconst\s+\Q$found\E\s+const\s+\w+\s*\[/) { + WARN("CONST_CONST", + "'const $found const' should probably be 'const $found'\n" . $herecurr); + } + } + +# check for non-global char *foo[] = {"bar", ...} declarations. + if ($line =~ /^.\s+(?:static\s+|const\s+)?char\s+\*\s*\w+\s*\[\s*\]\s*=\s*\{/) { + WARN("STATIC_CONST_CHAR_ARRAY", + "char * array declaration might be better as static const\n" . + $herecurr); + } + +# check for sizeof(foo)/sizeof(foo[0]) that could be ARRAY_SIZE(foo) + if ($line =~ m@\bsizeof\s*\(\s*($Lval)\s*\)@) { + my $array = $1; + if ($line =~ m@\b(sizeof\s*\(\s*\Q$array\E\s*\)\s*/\s*sizeof\s*\(\s*\Q$array\E\s*\[\s*0\s*\]\s*\))@) { + my $array_div = $1; + if (WARN("ARRAY_SIZE", + "Prefer ARRAY_SIZE($array)\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\Q$array_div\E/ARRAY_SIZE($array)/; + } + } + } + +# check for function declarations without arguments like "int foo()" + if ($line =~ /(\b$Type\s+$Ident)\s*\(\s*\)/) { + if (ERROR("FUNCTION_WITHOUT_ARGS", + "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/; + } + } + +# check for new typedefs, only function parameters and sparse annotations +# make sense. + if ($line =~ /\btypedef\s/ && + $line !~ /\btypedef\s+$Type\s*\(\s*\*?$Ident\s*\)\s*\(/ && + $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ && + $line !~ /\b$typeTypedefs\b/ && + $line !~ /\b__bitwise\b/) { + WARN("NEW_TYPEDEFS", + "do not add new typedefs\n" . $herecurr); + } + +# * goes on variable not on type + # (char*[ const]) + while ($line =~ m{(\($NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)\))}g) { + #print "AA<$1>\n"; + my ($ident, $from, $to) = ($1, $2, $2); + + # Should start with a space. + $to =~ s/^(\S)/ $1/; + # Should not end with a space. + $to =~ s/\s+$//; + # '*'s should not have spaces between. + while ($to =~ s/\*\s+\*/\*\*/) { + } + +## print "1: from<$from> to<$to> ident<$ident>\n"; + if ($from ne $to) { + if (ERROR("POINTER_LOCATION", + "\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr) && + $fix) { + my $sub_from = $ident; + my $sub_to = $ident; + $sub_to =~ s/\Q$from\E/$to/; + $fixed[$fixlinenr] =~ + s@\Q$sub_from\E@$sub_to@; + } + } + } + while ($line =~ m{(\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident))}g) { + #print "BB<$1>\n"; + my ($match, $from, $to, $ident) = ($1, $2, $2, $3); + + # Should start with a space. + $to =~ s/^(\S)/ $1/; + # Should not end with a space. + $to =~ s/\s+$//; + # '*'s should not have spaces between. + while ($to =~ s/\*\s+\*/\*\*/) { + } + # Modifiers should have spaces. + $to =~ s/(\b$Modifier$)/$1 /; + +## print "2: from<$from> to<$to> ident<$ident>\n"; + if ($from ne $to && $ident !~ /^$Modifier$/) { + if (ERROR("POINTER_LOCATION", + "\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr) && + $fix) { + + my $sub_from = $match; + my $sub_to = $match; + $sub_to =~ s/\Q$from\E/$to/; + $fixed[$fixlinenr] =~ + s@\Q$sub_from\E@$sub_to@; + } + } + } + +# avoid BUG() or BUG_ON() + if ($line =~ /\b(?:BUG|BUG_ON)\b/) { + my $msg_level = \&WARN; + $msg_level = \&CHK if ($file); + &{$msg_level}("AVOID_BUG", + "Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()\n" . $herecurr); + } + +# check for uses of printk_ratelimit + if ($line =~ /\bprintk_ratelimit\s*\(/) { + WARN("PRINTK_RATELIMITED", + "Prefer printk_ratelimited or pr_<level>_ratelimited to printk_ratelimit\n" . $herecurr); + } + +# printk should use KERN_* levels + if ($line =~ /\bprintk\s*\(\s*(?!KERN_[A-Z]+\b)/) { + WARN("PRINTK_WITHOUT_KERN_LEVEL", + "printk() should include KERN_<LEVEL> facility level\n" . $herecurr); + } + + if ($line =~ /\bprintk\s*\(\s*KERN_([A-Z]+)/) { + my $orig = $1; + my $level = lc($orig); + $level = "warn" if ($level eq "warning"); + my $level2 = $level; + $level2 = "dbg" if ($level eq "debug"); + WARN("PREFER_PR_LEVEL", + "Prefer [subsystem eg: netdev]_$level2([subsystem]dev, ... then dev_$level2(dev, ... then pr_$level(... to printk(KERN_$orig ...\n" . $herecurr); + } + + if ($line =~ /\bpr_warning\s*\(/) { + if (WARN("PREFER_PR_LEVEL", + "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\bpr_warning\b/pr_warn/; + } + } + + if ($line =~ /\bdev_printk\s*\(\s*KERN_([A-Z]+)/) { + my $orig = $1; + my $level = lc($orig); + $level = "warn" if ($level eq "warning"); + $level = "dbg" if ($level eq "debug"); + WARN("PREFER_DEV_LEVEL", + "Prefer dev_$level(... to dev_printk(KERN_$orig, ...\n" . $herecurr); + } + +# ENOSYS means "bad syscall nr" and nothing else. This will have a small +# number of false positives, but assembly files are not checked, so at +# least the arch entry code will not trigger this warning. + if ($line =~ /\bENOSYS\b/) { + WARN("ENOSYS", + "ENOSYS means 'invalid syscall nr' and nothing else\n" . $herecurr); + } + +# function brace can't be on same line, except for #defines of do while, +# or if closed on same line + if ($^V && $^V ge 5.10.0 && + $sline =~ /$Type\s*$Ident\s*$balanced_parens\s*\{/ && + $sline !~ /\#\s*define\b.*do\s*\{/ && + $sline !~ /}/) { + if (ERROR("OPEN_BRACE", + "open brace '{' following function definitions go on the next line\n" . $herecurr) && + $fix) { + fix_delete_line($fixlinenr, $rawline); + my $fixed_line = $rawline; + $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/; + my $line1 = $1; + my $line2 = $2; + fix_insert_line($fixlinenr, ltrim($line1)); + fix_insert_line($fixlinenr, "\+{"); + if ($line2 !~ /^\s*$/) { + fix_insert_line($fixlinenr, "\+\t" . trim($line2)); + } + } + } + +# open braces for enum, union and struct go on the same line. + if ($line =~ /^.\s*{/ && + $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) { + if (ERROR("OPEN_BRACE", + "open brace '{' following $1 go on the same line\n" . $hereprev) && + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { + fix_delete_line($fixlinenr - 1, $prevrawline); + fix_delete_line($fixlinenr, $rawline); + my $fixedline = rtrim($prevrawline) . " {"; + fix_insert_line($fixlinenr, $fixedline); + $fixedline = $rawline; + $fixedline =~ s/^(.\s*)\{\s*/$1\t/; + if ($fixedline !~ /^\+\s*$/) { + fix_insert_line($fixlinenr, $fixedline); + } + } + } + +# missing space after union, struct or enum definition + if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident){1,2}[=\{]/) { + if (WARN("SPACING", + "missing space after $1 definition\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/; + } + } + +# Function pointer declarations +# check spacing between type, funcptr, and args +# canonical declaration is "type (*funcptr)(args...)" + if ($line =~ /^.\s*($Declare)\((\s*)\*(\s*)($Ident)(\s*)\)(\s*)\(/) { + my $declare = $1; + my $pre_pointer_space = $2; + my $post_pointer_space = $3; + my $funcname = $4; + my $post_funcname_space = $5; + my $pre_args_space = $6; + +# the $Declare variable will capture all spaces after the type +# so check it for a missing trailing missing space but pointer return types +# don't need a space so don't warn for those. + my $post_declare_space = ""; + if ($declare =~ /(\s+)$/) { + $post_declare_space = $1; + $declare = rtrim($declare); + } + if ($declare !~ /\*$/ && $post_declare_space =~ /^$/) { + WARN("SPACING", + "missing space after return type\n" . $herecurr); + $post_declare_space = " "; + } + +# unnecessary space "type (*funcptr)(args...)" +# This test is not currently implemented because these declarations are +# equivalent to +# int foo(int bar, ...) +# and this is form shouldn't/doesn't generate a checkpatch warning. +# +# elsif ($declare =~ /\s{2,}$/) { +# WARN("SPACING", +# "Multiple spaces after return type\n" . $herecurr); +# } + +# unnecessary space "type ( *funcptr)(args...)" + if (defined $pre_pointer_space && + $pre_pointer_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space after function pointer open parenthesis\n" . $herecurr); + } + +# unnecessary space "type (* funcptr)(args...)" + if (defined $post_pointer_space && + $post_pointer_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space before function pointer name\n" . $herecurr); + } + +# unnecessary space "type (*funcptr )(args...)" + if (defined $post_funcname_space && + $post_funcname_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space after function pointer name\n" . $herecurr); + } + +# unnecessary space "type (*funcptr) (args...)" + if (defined $pre_args_space && + $pre_args_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space before function pointer arguments\n" . $herecurr); + } + + if (show_type("SPACING") && $fix) { + $fixed[$fixlinenr] =~ + s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex; + } + } + +# check for spacing round square brackets; allowed: +# 1. with a type on the left -- int [] a; +# 2. at the beginning of a line for slice initialisers -- [0...10] = 5, +# 3. inside a curly brace -- = { [0...10] = 5 } + while ($line =~ /(.*?\s)\[/g) { + my ($where, $prefix) = ($-[1], $1); + if ($prefix !~ /$Type\s+$/ && + ($where != 0 || $prefix !~ /^.\s+$/) && + $prefix !~ /[{,:]\s+$/) { + if (ERROR("BRACKET_SPACE", + "space prohibited before open square bracket '['\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/^(\+.*?)\s+\[/$1\[/; + } + } + } + +# check for spaces between functions and their parentheses. + while ($line =~ /($Ident)\s+\(/g) { + my $name = $1; + my $ctx_before = substr($line, 0, $-[1]); + my $ctx = "$ctx_before$name"; + + # Ignore those directives where spaces _are_ permitted. + if ($name =~ /^(?: + if|for|while|switch|return|case| + volatile|__volatile__| + __attribute__|format|__extension__| + asm|__asm__)$/x) + { + # cpp #define statements have non-optional spaces, ie + # if there is a space between the name and the open + # parenthesis it is simply not a parameter group. + } elsif ($ctx_before =~ /^.\s*\#\s*define\s*$/) { + + # cpp #elif statement condition may start with a ( + } elsif ($ctx =~ /^.\s*\#\s*elif\s*$/) { + + # If this whole things ends with a type its most + # likely a typedef for a function. + } elsif ($ctx =~ /$Type$/) { + + } else { + if (WARN("SPACING", + "space prohibited between function name and open parenthesis '('\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\b$name\s+\(/$name\(/; + } + } + } + +# Check operator spacing. + if (!($line=~/\#\s*include/)) { + my $fixed_line = ""; + my $line_fixed = 0; + + my $ops = qr{ + <<=|>>=|<=|>=|==|!=| + \+=|-=|\*=|\/=|%=|\^=|\|=|&=| + =>|->|<<|>>|<|>|=|!|~| + &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%| + \?:|\?|: + }x; + my @elements = split(/($ops|;)/, $opline); + +## print("element count: <" . $#elements . ">\n"); +## foreach my $el (@elements) { +## print("el: <$el>\n"); +## } + + my @fix_elements = (); + my $off = 0; + + foreach my $el (@elements) { + push(@fix_elements, substr($rawline, $off, length($el))); + $off += length($el); + } + + $off = 0; + + my $blank = copy_spacing($opline); + my $last_after = -1; + + for (my $n = 0; $n < $#elements; $n += 2) { + + my $good = $fix_elements[$n] . $fix_elements[$n + 1]; + +## print("n: <$n> good: <$good>\n"); + + $off += length($elements[$n]); + + # Pick up the preceding and succeeding characters. + my $ca = substr($opline, 0, $off); + my $cc = ''; + if (length($opline) >= ($off + length($elements[$n + 1]))) { + $cc = substr($opline, $off + length($elements[$n + 1])); + } + my $cb = "$ca$;$cc"; + + my $a = ''; + $a = 'V' if ($elements[$n] ne ''); + $a = 'W' if ($elements[$n] =~ /\s$/); + $a = 'C' if ($elements[$n] =~ /$;$/); + $a = 'B' if ($elements[$n] =~ /(\[|\()$/); + $a = 'O' if ($elements[$n] eq ''); + $a = 'E' if ($ca =~ /^\s*$/); + + my $op = $elements[$n + 1]; + + my $c = ''; + if (defined $elements[$n + 2]) { + $c = 'V' if ($elements[$n + 2] ne ''); + $c = 'W' if ($elements[$n + 2] =~ /^\s/); + $c = 'C' if ($elements[$n + 2] =~ /^$;/); + $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/); + $c = 'O' if ($elements[$n + 2] eq ''); + $c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/); + } else { + $c = 'E'; + } + + my $ctx = "${a}x${c}"; + + my $at = "(ctx:$ctx)"; + + my $ptr = substr($blank, 0, $off) . "^"; + my $hereptr = "$hereline$ptr\n"; + + # Pull out the value of this operator. + my $op_type = substr($curr_values, $off + 1, 1); + + # Get the full operator variant. + my $opv = $op . substr($curr_vars, $off, 1); + + # Ignore operators passed as parameters. + if ($op_type ne 'V' && + $ca =~ /\s$/ && $cc =~ /^\s*[,\)]/) { + +# # Ignore comments +# } elsif ($op =~ /^$;+$/) { + + # ; should have either the end of line or a space or \ after it + } elsif ($op eq ';') { + if ($ctx !~ /.x[WEBC]/ && + $cc !~ /^\\/ && $cc !~ /^;/) { + if (ERROR("SPACING", + "space required after that '$op' $at\n" . $hereptr)) { + $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } + } + + # // is a comment + } elsif ($op eq '//') { + + # : when part of a bitfield + } elsif ($opv eq ':B') { + # skip the bitfield test for now + + # No spaces for: + # -> + } elsif ($op eq '->') { + if ($ctx =~ /Wx.|.xW/) { + if (ERROR("SPACING", + "spaces prohibited around that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + $line_fixed = 1; + } + } + + # , must not have a space before and must have a space on the right. + } elsif ($op eq ',') { + my $rtrim_before = 0; + my $space_after = 0; + if ($ctx =~ /Wx./) { + if (ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr)) { + $line_fixed = 1; + $rtrim_before = 1; + } + } + if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) { + if (ERROR("SPACING", + "space required after that '$op' $at\n" . $hereptr)) { + $line_fixed = 1; + $last_after = $n; + $space_after = 1; + } + } + if ($rtrim_before || $space_after) { + if ($rtrim_before) { + $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + } else { + $good = $fix_elements[$n] . trim($fix_elements[$n + 1]); + } + if ($space_after) { + $good .= " "; + } + } + + # '*' as part of a type definition -- reported already. + } elsif ($opv eq '*_') { + #warn "'*' is part of type\n"; + + # unary operators should have a space before and + # none after. May be left adjacent to another + # unary operator, or a cast + } elsif ($op eq '!' || $op eq '~' || + $opv eq '*U' || $opv eq '-U' || + $opv eq '&U' || $opv eq '&&U') { + if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { + if (ERROR("SPACING", + "space required before that '$op' $at\n" . $hereptr)) { + if ($n != $last_after + 2) { + $good = $fix_elements[$n] . " " . ltrim($fix_elements[$n + 1]); + $line_fixed = 1; + } + } + } + if ($op eq '*' && $cc =~/\s*$Modifier\b/) { + # A unary '*' may be const + + } elsif ($ctx =~ /.xW/) { + if (ERROR("SPACING", + "space prohibited after that '$op' $at\n" . $hereptr)) { + $good = $fix_elements[$n] . rtrim($fix_elements[$n + 1]); + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + $line_fixed = 1; + } + } + + # unary ++ and unary -- are allowed no space on one side. + } elsif ($op eq '++' or $op eq '--') { + if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { + if (ERROR("SPACING", + "space required one side of that '$op' $at\n" . $hereptr)) { + $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } + } + if ($ctx =~ /Wx[BE]/ || + ($ctx =~ /Wx./ && $cc =~ /^;/)) { + if (ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } + } + if ($ctx =~ /ExW/) { + if (ERROR("SPACING", + "space prohibited after that '$op' $at\n" . $hereptr)) { + $good = $fix_elements[$n] . trim($fix_elements[$n + 1]); + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + $line_fixed = 1; + } + } + + # << and >> may either have or not have spaces both sides + } elsif ($op eq '<<' or $op eq '>>' or + $op eq '&' or $op eq '^' or $op eq '|' or + $op eq '+' or $op eq '-' or + $op eq '*' or $op eq '/' or + $op eq '%') + { + if ($check) { + if (defined $fix_elements[$n + 2] && $ctx !~ /[EW]x[EW]/) { + if (CHK("SPACING", + "spaces preferred around that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + $fix_elements[$n + 2] =~ s/^\s+//; + $line_fixed = 1; + } + } elsif (!defined $fix_elements[$n + 2] && $ctx !~ /Wx[OE]/) { + if (CHK("SPACING", + "space preferred before that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } + } + } elsif ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { + if (ERROR("SPACING", + "need consistent spacing around '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + $line_fixed = 1; + } + } + + # A colon needs no spaces before when it is + # terminating a case value or a label. + } elsif ($opv eq ':C' || $opv eq ':L') { + if ($ctx =~ /Wx./) { + if (ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } + } + + # All the others need spaces both sides. + } elsif ($ctx !~ /[EWC]x[CWE]/) { + my $ok = 0; + + # Ignore email addresses <foo@bar> + if (($op eq '<' && + $cc =~ /^\S+\@\S+>/) || + ($op eq '>' && + $ca =~ /<\S+\@\S+$/)) + { + $ok = 1; + } + + # for asm volatile statements + # ignore a colon with another + # colon immediately before or after + if (($op eq ':') && + ($ca =~ /:$/ || $cc =~ /^:/)) { + $ok = 1; + } + + # messages are ERROR, but ?: are CHK + if ($ok == 0) { + my $msg_level = \&ERROR; + $msg_level = \&CHK if (($op eq '?:' || $op eq '?' || $op eq ':') && $ctx =~ /VxV/); + + if (&{$msg_level}("SPACING", + "spaces required around that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + $line_fixed = 1; + } + } + } + $off += length($elements[$n + 1]); + +## print("n: <$n> GOOD: <$good>\n"); + + $fixed_line = $fixed_line . $good; + } + + if (($#elements % 2) == 0) { + $fixed_line = $fixed_line . $fix_elements[$#elements]; + } + + if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) { + $fixed[$fixlinenr] = $fixed_line; + } + + + } + +# check for whitespace before a non-naked semicolon + if ($line =~ /^\+.*\S\s+;\s*$/) { + if (WARN("SPACING", + "space prohibited before semicolon\n" . $herecurr) && + $fix) { + 1 while $fixed[$fixlinenr] =~ + s/^(\+.*\S)\s+;/$1;/; + } + } + +# check for multiple assignments + if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) { + CHK("MULTIPLE_ASSIGNMENTS", + "multiple assignments should be avoided\n" . $herecurr); + } + +## # check for multiple declarations, allowing for a function declaration +## # continuation. +## if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ && +## $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) { +## +## # Remove any bracketed sections to ensure we do not +## # falsly report the parameters of functions. +## my $ln = $line; +## while ($ln =~ s/\([^\(\)]*\)//g) { +## } +## if ($ln =~ /,/) { +## WARN("MULTIPLE_DECLARATION", +## "declaring multiple variables together should be avoided\n" . $herecurr); +## } +## } + +#need space before brace following if, while, etc + if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) || + $line =~ /do\{/) { + if (ERROR("SPACING", + "space required before the open brace '{'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/; + } + } + +## # check for blank lines before declarations +## if ($line =~ /^.\t+$Type\s+$Ident(?:\s*=.*)?;/ && +## $prevrawline =~ /^.\s*$/) { +## WARN("SPACING", +## "No blank lines before declarations\n" . $hereprev); +## } +## + +# closing brace should have a space following it when it has anything +# on the line + if ($line =~ /}(?!(?:,|;|\)))\S/) { + if (ERROR("SPACING", + "space required after that close brace '}'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/}((?!(?:,|;|\)))\S)/} $1/; + } + } + +# check spacing on square brackets + if ($line =~ /\[\s/ && $line !~ /\[\s*$/) { + if (ERROR("SPACING", + "space prohibited after that open square bracket '['\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\[\s+/\[/; + } + } + if ($line =~ /\s\]/) { + if (ERROR("SPACING", + "space prohibited before that close square bracket ']'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\s+\]/\]/; + } + } + +# check spacing on parentheses + if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ && + $line !~ /for\s*\(\s+;/) { + if (ERROR("SPACING", + "space prohibited after that open parenthesis '('\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\(\s+/\(/; + } + } + if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ && + $line !~ /for\s*\(.*;\s+\)/ && + $line !~ /:\s+\)/) { + if (ERROR("SPACING", + "space prohibited before that close parenthesis ')'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\s+\)/\)/; + } + } + +# check unnecessary parentheses around addressof/dereference single $Lvals +# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar + + while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) { + my $var = $1; + if (CHK("UNNECESSARY_PARENTHESES", + "Unnecessary parentheses around $var\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\(\s*\Q$var\E\s*\)/$var/; + } + } + +# check for unnecessary parentheses around function pointer uses +# ie: (foo->bar)(); should be foo->bar(); +# but not "if (foo->bar) (" to avoid some false positives + if ($line =~ /(\bif\s*|)(\(\s*$Ident\s*(?:$Member\s*)+\))[ \t]*\(/ && $1 !~ /^if/) { + my $var = $2; + if (CHK("UNNECESSARY_PARENTHESES", + "Unnecessary parentheses around function pointer $var\n" . $herecurr) && + $fix) { + my $var2 = deparenthesize($var); + $var2 =~ s/\s//g; + $fixed[$fixlinenr] =~ s/\Q$var\E/$var2/; + } + } + +# check for unnecessary parentheses around comparisons in if uses +# when !drivers/staging or command-line uses --strict + if (($realfile !~ m@^(?:drivers/staging/)@ || $check_orig) && + $^V && $^V ge 5.10.0 && defined($stat) && + $stat =~ /(^.\s*if\s*($balanced_parens))/) { + my $if_stat = $1; + my $test = substr($2, 1, -1); + my $herectx; + while ($test =~ /(?:^|[^\w\&\!\~])+\s*\(\s*([\&\!\~]?\s*$Lval\s*(?:$Compare\s*$FuncArg)?)\s*\)/g) { + my $match = $1; + # avoid parentheses around potential macro args + next if ($match =~ /^\s*\w+\s*$/); + if (!defined($herectx)) { + $herectx = $here . "\n"; + my $cnt = statement_rawlines($if_stat); + for (my $n = 0; $n < $cnt; $n++) { + my $rl = raw_line($linenr, $n); + $herectx .= $rl . "\n"; + last if $rl =~ /^[ \+].*\{/; + } + } + CHK("UNNECESSARY_PARENTHESES", + "Unnecessary parentheses around '$match'\n" . $herectx); + } + } + +#goto labels aren't indented, allow a single space however + if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and + !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) { + if (WARN("INDENTED_LABEL", + "labels should not be indented\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/^(.)\s+/$1/; + } + } + +# return is not a function + if (defined($stat) && $stat =~ /^.\s*return(\s*)\(/s) { + my $spacing = $1; + if ($^V && $^V ge 5.10.0 && + $stat =~ /^.\s*return\s*($balanced_parens)\s*;\s*$/) { + my $value = $1; + $value = deparenthesize($value); + if ($value =~ m/^\s*$FuncArg\s*(?:\?|$)/) { + ERROR("RETURN_PARENTHESES", + "return is not a function, parentheses are not required\n" . $herecurr); + } + } elsif ($spacing !~ /\s+/) { + ERROR("SPACING", + "space required before the open parenthesis '('\n" . $herecurr); + } + } + +# unnecessary return in a void function +# at end-of-function, with the previous line a single leading tab, then return; +# and the line before that not a goto label target like "out:" + if ($sline =~ /^[ \+]}\s*$/ && + $prevline =~ /^\+\treturn\s*;\s*$/ && + $linenr >= 3 && + $lines[$linenr - 3] =~ /^[ +]/ && + $lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) { + WARN("RETURN_VOID", + "void function return statements are not generally useful\n" . $hereprev); + } + +# if statements using unnecessary parentheses - ie: if ((foo == bar)) + if ($^V && $^V ge 5.10.0 && + $line =~ /\bif\s*((?:\(\s*){2,})/) { + my $openparens = $1; + my $count = $openparens =~ tr@\(@\(@; + my $msg = ""; + if ($line =~ /\bif\s*(?:\(\s*){$count,$count}$LvalOrFunc\s*($Compare)\s*$LvalOrFunc(?:\s*\)){$count,$count}/) { + my $comp = $4; #Not $1 because of $LvalOrFunc + $msg = " - maybe == should be = ?" if ($comp eq "=="); + WARN("UNNECESSARY_PARENTHESES", + "Unnecessary parentheses$msg\n" . $herecurr); + } + } + +# comparisons with a constant or upper case identifier on the left +# avoid cases like "foo + BAR < baz" +# only fix matches surrounded by parentheses to avoid incorrect +# conversions like "FOO < baz() + 5" being "misfixed" to "baz() > FOO + 5" + if ($^V && $^V ge 5.10.0 && + $line =~ /^\+(.*)\b($Constant|[A-Z_][A-Z0-9_]*)\s*($Compare)\s*($LvalOrFunc)/) { + my $lead = $1; + my $const = $2; + my $comp = $3; + my $to = $4; + my $newcomp = $comp; + if ($lead !~ /(?:$Operators|\.)\s*$/ && + $to !~ /^(?:Constant|[A-Z_][A-Z0-9_]*)$/ && + WARN("CONSTANT_COMPARISON", + "Comparisons should place the constant on the right side of the test\n" . $herecurr) && + $fix) { + if ($comp eq "<") { + $newcomp = ">"; + } elsif ($comp eq "<=") { + $newcomp = ">="; + } elsif ($comp eq ">") { + $newcomp = "<"; + } elsif ($comp eq ">=") { + $newcomp = "<="; + } + $fixed[$fixlinenr] =~ s/\(\s*\Q$const\E\s*$Compare\s*\Q$to\E\s*\)/($to $newcomp $const)/; + } + } + +# Return of what appears to be an errno should normally be negative + if ($sline =~ /\breturn(?:\s*\(+\s*|\s+)(E[A-Z]+)(?:\s*\)+\s*|\s*)[;:,]/) { + my $name = $1; + if ($name ne 'EOF' && $name ne 'ERROR') { + WARN("USE_NEGATIVE_ERRNO", + "return of an errno should typically be negative (ie: return -$1)\n" . $herecurr); + } + } + +# Need a space before open parenthesis after if, while etc + if ($line =~ /\b(if|while|for|switch)\(/) { + if (ERROR("SPACING", + "space required before the open parenthesis '('\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/\b(if|while|for|switch)\(/$1 \(/; + } + } + +# Check for illegal assignment in if conditional -- and check for trailing +# statements after the conditional. + if ($line =~ /do\s*(?!{)/) { + ($stat, $cond, $line_nr_next, $remain_next, $off_next) = + ctx_statement_block($linenr, $realcnt, 0) + if (!defined $stat); + my ($stat_next) = ctx_statement_block($line_nr_next, + $remain_next, $off_next); + $stat_next =~ s/\n./\n /g; + ##print "stat<$stat> stat_next<$stat_next>\n"; + + if ($stat_next =~ /^\s*while\b/) { + # If the statement carries leading newlines, + # then count those as offsets. + my ($whitespace) = + ($stat_next =~ /^((?:\s*\n[+-])*\s*)/s); + my $offset = + statement_rawlines($whitespace) - 1; + + $suppress_whiletrailers{$line_nr_next + + $offset} = 1; + } + } + if (!defined $suppress_whiletrailers{$linenr} && + defined($stat) && defined($cond) && + $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { + my ($s, $c) = ($stat, $cond); + + # Find out what is on the end of the line after the + # conditional. + substr($s, 0, length($c), ''); + $s =~ s/\n.*//g; + $s =~ s/$;//g; # Remove any comments + if (length($c) && $s !~ /^\s*{?\s*\\*\s*$/ && + $c !~ /}\s*while\s*/) + { + # Find out how long the conditional actually is. + my @newlines = ($c =~ /\n/gs); + my $cond_lines = 1 + $#newlines; + my $stat_real = ''; + + $stat_real = raw_line($linenr, $cond_lines) + . "\n" if ($cond_lines); + if (defined($stat_real) && $cond_lines > 1) { + $stat_real = "[...]\n$stat_real"; + } + + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr . $stat_real); + } + } + +# Check for bitwise tests written as boolean + if ($line =~ / + (?: + (?:\[|\(|\&\&|\|\|) + \s*0[xX][0-9]+\s* + (?:\&\&|\|\|) + | + (?:\&\&|\|\|) + \s*0[xX][0-9]+\s* + (?:\&\&|\|\||\)|\]) + )/x) + { + WARN("HEXADECIMAL_BOOLEAN_TEST", + "boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr); + } + +# if and else should not have general statements after it + if ($line =~ /^.\s*(?:}\s*)?else\b(.*)/) { + my $s = $1; + $s =~ s/$;//g; # Remove any comments + if ($s !~ /^\s*(?:\sif|(?:{|)\s*\\?\s*$)/) { + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr); + } + } +# if should not continue a brace + if ($line =~ /}\s*if\b/) { + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line (or did you mean 'else if'?)\n" . + $herecurr); + } +# case and default should not have general statements after them + if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g && + $line !~ /\G(?: + (?:\s*$;*)(?:\s*{)?(?:\s*$;*)(?:\s*\\)?\s*$| + \s*return\s+ + )/xg) + { + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr); + } + + if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ && + $previndent == $indent) { + my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); + + # Find out what is on the end of the line after the + # conditional. + substr($s, 0, length($c), ''); + $s =~ s/\n.*//g; + + if ($s =~ /^\s*;/) { + if (ERROR("WHILE_AFTER_BRACE", + "while should follow close brace '}'\n" . $hereprev) && + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { + fix_delete_line($fixlinenr - 1, $prevrawline); + fix_delete_line($fixlinenr, $rawline); + my $fixedline = $prevrawline; + my $trailing = $rawline; + $trailing =~ s/^\+//; + $trailing = trim($trailing); + $fixedline =~ s/}\s*$/} $trailing/; + fix_insert_line($fixlinenr, $fixedline); + } + } + } + +#Specific variable tests + while ($line =~ m{($Constant|$Lval)}g) { + my $var = $1; + +#gcc binary extension + if ($var =~ /^$Binary$/) { + if (WARN("GCC_BINARY_CONSTANT", + "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) && + $fix) { + my $hexval = sprintf("0x%x", oct($var)); + $fixed[$fixlinenr] =~ + s/\b$var\b/$hexval/; + } + } + +#CamelCase + if ($var !~ /^$Constant$/ && + $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && +#Ignore Page<foo> variants + $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && +#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show) + $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/ && +#Ignore some three character SI units explicitly, like MiB and KHz + $var !~ /^(?:[a-z_]*?)_?(?:[KMGT]iB|[KMGT]?Hz)(?:_[a-z_]+)?$/) { + while ($var =~ m{($Ident)}g) { + my $word = $1; + next if ($word !~ /[A-Z][a-z]|[a-z][A-Z]/); + if ($check) { + seed_camelcase_includes(); + if (!$file && !$camelcase_file_seeded) { + seed_camelcase_file($realfile); + $camelcase_file_seeded = 1; + } + } + if (!defined $camelcase{$word}) { + $camelcase{$word} = 1; + CHK("CAMELCASE", + "Avoid CamelCase: <$word>\n" . $herecurr); + } + } + } + } + +#no spaces allowed after \ in define + if ($line =~ /\#\s*define.*\\\s+$/) { + if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION", + "Whitespace after \\ makes next lines useless\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\s+$//; + } + } + +# warn if <asm/foo.h> is #included and <linux/foo.h> is available and includes +# itself <asm/foo.h> (uses RAW line) + if ($tree && $rawline =~ m{^.\s*\#\s*include\s*\<asm\/(.*)\.h\>}) { + my $file = "$1.h"; + my $checkfile = "include/linux/$file"; + if (-f "$root/$checkfile" && + $realfile ne $checkfile && + $1 !~ /$allowed_asm_includes/) + { + my $asminclude = `grep -Ec "#include\\s+<asm/$file>" $root/$checkfile`; + if ($asminclude > 0) { + if ($realfile =~ m{^arch/}) { + CHK("ARCH_INCLUDE_LINUX", + "Consider using #include <linux/$file> instead of <asm/$file>\n" . $herecurr); + } else { + WARN("INCLUDE_LINUX", + "Use #include <linux/$file> instead of <asm/$file>\n" . $herecurr); + } + } + } + } + +# multi-statement macros should be enclosed in a do while loop, grab the +# first statement and ensure its the whole macro if its not enclosed +# in a known good container + if ($realfile !~ m@/vmlinux.lds.h$@ && + $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) { + my $ln = $linenr; + my $cnt = $realcnt; + my ($off, $dstat, $dcond, $rest); + my $ctx = ''; + my $has_flow_statement = 0; + my $has_arg_concat = 0; + ($dstat, $dcond, $ln, $cnt, $off) = + ctx_statement_block($linenr, $realcnt, 0); + $ctx = $dstat; + #print "dstat<$dstat> dcond<$dcond> cnt<$cnt> off<$off>\n"; + #print "LINE<$lines[$ln-1]> len<" . length($lines[$ln-1]) . "\n"; + + $has_flow_statement = 1 if ($ctx =~ /\b(goto|return)\b/); + $has_arg_concat = 1 if ($ctx =~ /\#\#/ && $ctx !~ /\#\#\s*(?:__VA_ARGS__|args)\b/); + + $dstat =~ s/^.\s*\#\s*define\s+$Ident(\([^\)]*\))?\s*//; + my $define_args = $1; + my $define_stmt = $dstat; + my @def_args = (); + + if (defined $define_args && $define_args ne "") { + $define_args = substr($define_args, 1, length($define_args) - 2); + $define_args =~ s/\s*//g; + @def_args = split(",", $define_args); + } + + $dstat =~ s/$;//g; + $dstat =~ s/\\\n.//g; + $dstat =~ s/^\s*//s; + $dstat =~ s/\s*$//s; + + # Flatten any parentheses and braces + while ($dstat =~ s/\([^\(\)]*\)/1/ || + $dstat =~ s/\{[^\{\}]*\}/1/ || + $dstat =~ s/.\[[^\[\]]*\]/1/) + { + } + + # Flatten any obvious string concatentation. + while ($dstat =~ s/($String)\s*$Ident/$1/ || + $dstat =~ s/$Ident\s*($String)/$1/) + { + } + + # Make asm volatile uses seem like a generic function + $dstat =~ s/\b_*asm_*\s+_*volatile_*\b/asm_volatile/g; + + my $exceptions = qr{ + $Declare| + module_param_named| + MODULE_PARM_DESC| + DECLARE_PER_CPU| + DEFINE_PER_CPU| + __typeof__\(| + union| + struct| + \.$Ident\s*=\s*| + ^\"|\"$| + ^\[ + }x; + #print "REST<$rest> dstat<$dstat> ctx<$ctx>\n"; + + $ctx =~ s/\n*$//; + my $stmt_cnt = statement_rawlines($ctx); + my $herectx = get_stat_here($linenr, $stmt_cnt, $here); + + if ($dstat ne '' && + $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(), + $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo(); + $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz + $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ && # character constants + $dstat !~ /$exceptions/ && + $dstat !~ /^\.$Ident\s*=/ && # .foo = + $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo + $dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...) + $dstat !~ /^for\s*$Constant$/ && # for (...) + $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() + $dstat !~ /^do\s*{/ && # do {... + $dstat !~ /^\(\{/ && # ({... + $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/) + { + if ($dstat =~ /^\s*if\b/) { + ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", + "Macros starting with if should be enclosed by a do - while loop to avoid possible if/else logic defects\n" . "$herectx"); + } elsif ($dstat =~ /;/) { + ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", + "Macros with multiple statements should be enclosed in a do - while loop\n" . "$herectx"); + } else { + ERROR("COMPLEX_MACRO", + "Macros with complex values should be enclosed in parentheses\n" . "$herectx"); + } + + } + + # Make $define_stmt single line, comment-free, etc + my @stmt_array = split('\n', $define_stmt); + my $first = 1; + $define_stmt = ""; + foreach my $l (@stmt_array) { + $l =~ s/\\$//; + if ($first) { + $define_stmt = $l; + $first = 0; + } elsif ($l =~ /^[\+ ]/) { + $define_stmt .= substr($l, 1); + } + } + $define_stmt =~ s/$;//g; + $define_stmt =~ s/\s+/ /g; + $define_stmt = trim($define_stmt); + +# check if any macro arguments are reused (ignore '...' and 'type') + foreach my $arg (@def_args) { + next if ($arg =~ /\.\.\./); + next if ($arg =~ /^type$/i); + my $tmp_stmt = $define_stmt; + $tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g; + $tmp_stmt =~ s/\#+\s*$arg\b//g; + $tmp_stmt =~ s/\b$arg\s*\#\#//g; + my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g; + if ($use_cnt > 1) { + CHK("MACRO_ARG_REUSE", + "Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx"); + } +# check if any macro arguments may have other precedence issues + if ($tmp_stmt =~ m/($Operators)?\s*\b$arg\b\s*($Operators)?/m && + ((defined($1) && $1 ne ',') || + (defined($2) && $2 ne ','))) { + CHK("MACRO_ARG_PRECEDENCE", + "Macro argument '$arg' may be better as '($arg)' to avoid precedence issues\n" . "$herectx"); + } + } + +# check for macros with flow control, but without ## concatenation +# ## concatenation is commonly a macro that defines a function so ignore those + if ($has_flow_statement && !$has_arg_concat) { + my $cnt = statement_rawlines($ctx); + my $herectx = get_stat_here($linenr, $cnt, $here); + + WARN("MACRO_WITH_FLOW_CONTROL", + "Macros with flow control statements should be avoided\n" . "$herectx"); + } + +# check for line continuations outside of #defines, preprocessor #, and asm + + } else { + if ($prevline !~ /^..*\\$/ && + $line !~ /^\+\s*\#.*\\$/ && # preprocessor + $line !~ /^\+.*\b(__asm__|asm)\b.*\\$/ && # asm + $line =~ /^\+.*\\$/) { + WARN("LINE_CONTINUATIONS", + "Avoid unnecessary line continuations\n" . $herecurr); + } + } + +# do {} while (0) macro tests: +# single-statement macros do not need to be enclosed in do while (0) loop, +# macro should not end with a semicolon + if ($^V && $^V ge 5.10.0 && + $realfile !~ m@/vmlinux.lds.h$@ && + $line =~ /^.\s*\#\s*define\s+$Ident(\()?/) { + my $ln = $linenr; + my $cnt = $realcnt; + my ($off, $dstat, $dcond, $rest); + my $ctx = ''; + ($dstat, $dcond, $ln, $cnt, $off) = + ctx_statement_block($linenr, $realcnt, 0); + $ctx = $dstat; + + $dstat =~ s/\\\n.//g; + $dstat =~ s/$;/ /g; + + if ($dstat =~ /^\+\s*#\s*define\s+$Ident\s*${balanced_parens}\s*do\s*{(.*)\s*}\s*while\s*\(\s*0\s*\)\s*([;\s]*)\s*$/) { + my $stmts = $2; + my $semis = $3; + + $ctx =~ s/\n*$//; + my $cnt = statement_rawlines($ctx); + my $herectx = get_stat_here($linenr, $cnt, $here); + + if (($stmts =~ tr/;/;/) == 1 && + $stmts !~ /^\s*(if|while|for|switch)\b/) { + WARN("SINGLE_STATEMENT_DO_WHILE_MACRO", + "Single statement macros should not use a do {} while (0) loop\n" . "$herectx"); + } + if (defined $semis && $semis ne "") { + WARN("DO_WHILE_MACRO_WITH_TRAILING_SEMICOLON", + "do {} while (0) macros should not be semicolon terminated\n" . "$herectx"); + } + } elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) { + $ctx =~ s/\n*$//; + my $cnt = statement_rawlines($ctx); + my $herectx = get_stat_here($linenr, $cnt, $here); + + WARN("TRAILING_SEMICOLON", + "macros should not use a trailing semicolon\n" . "$herectx"); + } + } + +# check for redundant bracing round if etc + if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) { + my ($level, $endln, @chunks) = + ctx_statement_full($linenr, $realcnt, 1); + #print "chunks<$#chunks> linenr<$linenr> endln<$endln> level<$level>\n"; + #print "APW: <<$chunks[1][0]>><<$chunks[1][1]>>\n"; + if ($#chunks > 0 && $level == 0) { + my @allowed = (); + my $allow = 0; + my $seen = 0; + my $herectx = $here . "\n"; + my $ln = $linenr - 1; + for my $chunk (@chunks) { + my ($cond, $block) = @{$chunk}; + + # If the condition carries leading newlines, then count those as offsets. + my ($whitespace) = ($cond =~ /^((?:\s*\n[+-])*\s*)/s); + my $offset = statement_rawlines($whitespace) - 1; + + $allowed[$allow] = 0; + #print "COND<$cond> whitespace<$whitespace> offset<$offset>\n"; + + # We have looked at and allowed this specific line. + $suppress_ifbraces{$ln + $offset} = 1; + + $herectx .= "$rawlines[$ln + $offset]\n[...]\n"; + $ln += statement_rawlines($block) - 1; + + substr($block, 0, length($cond), ''); + + $seen++ if ($block =~ /^\s*{/); + + #print "cond<$cond> block<$block> allowed<$allowed[$allow]>\n"; + if (statement_lines($cond) > 1) { + #print "APW: ALLOWED: cond<$cond>\n"; + $allowed[$allow] = 1; + } + if ($block =~/\b(?:if|for|while)\b/) { + #print "APW: ALLOWED: block<$block>\n"; + $allowed[$allow] = 1; + } + if (statement_block_size($block) > 1) { + #print "APW: ALLOWED: lines block<$block>\n"; + $allowed[$allow] = 1; + } + $allow++; + } + if ($seen) { + my $sum_allowed = 0; + foreach (@allowed) { + $sum_allowed += $_; + } + if ($sum_allowed != 0 && + $sum_allowed != $allow && + $seen != $allow) { + CHK("BRACES", + "braces {} should be used on all arms of this statement\n" . $herectx); + } + } + } + } + +# check for single line unbalanced braces + if ($sline =~ /^.\s*\}\s*else\s*$/ || + $sline =~ /^.\s*else\s*\{\s*$/) { + CHK("BRACES", "Unbalanced braces around else statement\n" . $herecurr); + } + +# check for unnecessary blank lines around braces + if (($line =~ /^.\s*}\s*$/ && $prevrawline =~ /^.\s*$/)) { + if (CHK("BRACES", + "Blank lines aren't necessary before a close brace '}'\n" . $hereprev) && + $fix && $prevrawline =~ /^\+/) { + fix_delete_line($fixlinenr - 1, $prevrawline); + } + } + if (($rawline =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) { + if (CHK("BRACES", + "Blank lines aren't necessary after an open brace '{'\n" . $hereprev) && + $fix) { + fix_delete_line($fixlinenr, $rawline); + } + } + +# no volatiles please + my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b}; + if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) { + WARN("VOLATILE", + "Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst\n" . $herecurr); + } + +# check for missing a space in a string concatenation + if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) { + WARN('MISSING_SPACE', + "break quoted strings at a space character\n" . $hereprev); + } + +# check for an embedded function name in a string when the function is known +# This does not work very well for -f --file checking as it depends on patch +# context providing the function name or a single line form for in-file +# function declarations + if ($line =~ /^\+.*$String/ && + defined($context_function) && + get_quoted_string($line, $rawline) =~ /\b$context_function\b/ && + length(get_quoted_string($line, $rawline)) != (length($context_function) + 2)) { + WARN("EMBEDDED_FUNCTION_NAME", + "Prefer using '\"%s...\", __func__' to using '$context_function', this function's name, in a string\n" . $herecurr); + } + +# check for spaces before a quoted newline + if ($rawline =~ /^.*\".*\s\\n/) { + if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE", + "unnecessary whitespace before a quoted newline\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/; + } + + } + +# concatenated string without spaces between elements + if ($line =~ /$String[A-Z_]/ || $line =~ /[A-Za-z0-9_]$String/) { + CHK("CONCATENATED_STRING", + "Concatenated strings should use spaces between elements\n" . $herecurr); + } + +# uncoalesced string fragments + if ($line =~ /$String\s*"/) { + WARN("STRING_FRAGMENTS", + "Consecutive strings are generally better as a single string\n" . $herecurr); + } + +# check for non-standard and hex prefixed decimal printf formats + my $show_L = 1; #don't show the same defect twice + my $show_Z = 1; + while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) { + my $string = substr($rawline, $-[1], $+[1] - $-[1]); + $string =~ s/%%/__/g; + # check for %L + if ($show_L && $string =~ /%[\*\d\.\$]*L([diouxX])/) { + WARN("PRINTF_L", + "\%L$1 is non-standard C, use %ll$1\n" . $herecurr); + $show_L = 0; + } + # check for %Z + if ($show_Z && $string =~ /%[\*\d\.\$]*Z([diouxX])/) { + WARN("PRINTF_Z", + "%Z$1 is non-standard C, use %z$1\n" . $herecurr); + $show_Z = 0; + } + # check for 0x<decimal> + if ($string =~ /0x%[\*\d\.\$\Llzth]*[diou]/) { + ERROR("PRINTF_0XDECIMAL", + "Prefixing 0x with decimal output is defective\n" . $herecurr); + } + } + +# check for line continuations in quoted strings with odd counts of " + if ($rawline =~ /\\$/ && $sline =~ tr/"/"/ % 2) { + WARN("LINE_CONTINUATIONS", + "Avoid line continuations in quoted strings\n" . $herecurr); + } + +# warn about #if 0 + if ($line =~ /^.\s*\#\s*if\s+0\b/) { + CHK("REDUNDANT_CODE", + "if this code is redundant consider removing it\n" . + $herecurr); + } + +# check for needless "if (<foo>) fn(<foo>)" uses + if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) { + my $tested = quotemeta($1); + my $expr = '\s*\(\s*' . $tested . '\s*\)\s*;'; + if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?|(?:kmem_cache|mempool|dma_pool)_destroy)$expr/) { + my $func = $1; + if (WARN('NEEDLESS_IF', + "$func(NULL) is safe and this check is probably not required\n" . $hereprev) && + $fix) { + my $do_fix = 1; + my $leading_tabs = ""; + my $new_leading_tabs = ""; + if ($lines[$linenr - 2] =~ /^\+(\t*)if\s*\(\s*$tested\s*\)\s*$/) { + $leading_tabs = $1; + } else { + $do_fix = 0; + } + if ($lines[$linenr - 1] =~ /^\+(\t+)$func\s*\(\s*$tested\s*\)\s*;\s*$/) { + $new_leading_tabs = $1; + if (length($leading_tabs) + 1 ne length($new_leading_tabs)) { + $do_fix = 0; + } + } else { + $do_fix = 0; + } + if ($do_fix) { + fix_delete_line($fixlinenr - 1, $prevrawline); + $fixed[$fixlinenr] =~ s/^\+$new_leading_tabs/\+$leading_tabs/; + } + } + } + } + +# check for unnecessary "Out of Memory" messages + if ($line =~ /^\+.*\b$logFunctions\s*\(/ && + $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ && + (defined $1 || defined $3) && + $linenr > 3) { + my $testval = $2; + my $testline = $lines[$linenr - 3]; + + my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0); +# print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n"); + + if ($s =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|kmemdup|(?:dev_)?alloc_skb)/) { + WARN("OOM_MESSAGE", + "Possible unnecessary 'out of memory' message\n" . $hereprev); + } + } + +# check for logging functions with KERN_<LEVEL> + if ($line !~ /printk(?:_ratelimited|_once)?\s*\(/ && + $line =~ /\b$logFunctions\s*\(.*\b(KERN_[A-Z]+)\b/) { + my $level = $1; + if (WARN("UNNECESSARY_KERN_LEVEL", + "Possible unnecessary $level\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\s*$level\s*//; + } + } + +# check for logging continuations + if ($line =~ /\bprintk\s*\(\s*KERN_CONT\b|\bpr_cont\s*\(/) { + WARN("LOGGING_CONTINUATION", + "Avoid logging continuation uses where feasible\n" . $herecurr); + } + +# check for mask then right shift without a parentheses + if ($^V && $^V ge 5.10.0 && + $line =~ /$LvalOrFunc\s*\&\s*($LvalOrFunc)\s*>>/ && + $4 !~ /^\&/) { # $LvalOrFunc may be &foo, ignore if so + WARN("MASK_THEN_SHIFT", + "Possible precedence defect with mask then right shift - may need parentheses\n" . $herecurr); + } + +# check for pointer comparisons to NULL + if ($^V && $^V ge 5.10.0) { + while ($line =~ /\b$LvalOrFunc\s*(==|\!=)\s*NULL\b/g) { + my $val = $1; + my $equal = "!"; + $equal = "" if ($4 eq "!="); + if (CHK("COMPARISON_TO_NULL", + "Comparison to NULL could be written \"${equal}${val}\"\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b\Q$val\E\s*(?:==|\!=)\s*NULL\b/$equal$val/; + } + } + } + +# check for bad placement of section $InitAttribute (e.g.: __initdata) + if ($line =~ /(\b$InitAttribute\b)/) { + my $attr = $1; + if ($line =~ /^\+\s*static\s+(?:const\s+)?(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*[=;]/) { + my $ptr = $1; + my $var = $2; + if ((($ptr =~ /\b(union|struct)\s+$attr\b/ && + ERROR("MISPLACED_INIT", + "$attr should be placed after $var\n" . $herecurr)) || + ($ptr !~ /\b(union|struct)\s+$attr\b/ && + WARN("MISPLACED_INIT", + "$attr should be placed after $var\n" . $herecurr))) && + $fix) { + $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e; + } + } + } + +# check for $InitAttributeData (ie: __initdata) with const + if ($line =~ /\bconst\b/ && $line =~ /($InitAttributeData)/) { + my $attr = $1; + $attr =~ /($InitAttributePrefix)(.*)/; + my $attr_prefix = $1; + my $attr_type = $2; + if (ERROR("INIT_ATTRIBUTE", + "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/$InitAttributeData/${attr_prefix}initconst/; + } + } + +# check for $InitAttributeConst (ie: __initconst) without const + if ($line !~ /\bconst\b/ && $line =~ /($InitAttributeConst)/) { + my $attr = $1; + if (ERROR("INIT_ATTRIBUTE", + "Use of $attr requires a separate use of const\n" . $herecurr) && + $fix) { + my $lead = $fixed[$fixlinenr] =~ + /(^\+\s*(?:static\s+))/; + $lead = rtrim($1); + $lead = "$lead " if ($lead !~ /^\+$/); + $lead = "${lead}const "; + $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/; + } + } + +# check for __read_mostly with const non-pointer (should just be const) + if ($line =~ /\b__read_mostly\b/ && + $line =~ /($Type)\s*$Ident/ && $1 !~ /\*\s*$/ && $1 =~ /\bconst\b/) { + if (ERROR("CONST_READ_MOSTLY", + "Invalid use of __read_mostly with const type\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\s+__read_mostly\b//; + } + } + +# don't use __constant_<foo> functions outside of include/uapi/ + if ($realfile !~ m@^include/uapi/@ && + $line =~ /(__constant_(?:htons|ntohs|[bl]e(?:16|32|64)_to_cpu|cpu_to_[bl]e(?:16|32|64)))\s*\(/) { + my $constant_func = $1; + my $func = $constant_func; + $func =~ s/^__constant_//; + if (WARN("CONSTANT_CONVERSION", + "$constant_func should be $func\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g; + } + } + +# prefer usleep_range over udelay + if ($line =~ /\budelay\s*\(\s*(\d+)\s*\)/) { + my $delay = $1; + # ignore udelay's < 10, however + if (! ($delay < 10) ) { + CHK("USLEEP_RANGE", + "usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $herecurr); + } + if ($delay > 2000) { + WARN("LONG_UDELAY", + "long udelay - prefer mdelay; see arch/arm/include/asm/delay.h\n" . $herecurr); + } + } + +# warn about unexpectedly long msleep's + if ($line =~ /\bmsleep\s*\((\d+)\);/) { + if ($1 < 20) { + WARN("MSLEEP", + "msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $herecurr); + } + } + +# check for comparisons of jiffies + if ($line =~ /\bjiffies\s*$Compare|$Compare\s*jiffies\b/) { + WARN("JIFFIES_COMPARISON", + "Comparing jiffies is almost always wrong; prefer time_after, time_before and friends\n" . $herecurr); + } + +# check for comparisons of get_jiffies_64() + if ($line =~ /\bget_jiffies_64\s*\(\s*\)\s*$Compare|$Compare\s*get_jiffies_64\s*\(\s*\)/) { + WARN("JIFFIES_COMPARISON", + "Comparing get_jiffies_64() is almost always wrong; prefer time_after64, time_before64 and friends\n" . $herecurr); + } + +# warn about #ifdefs in C files +# if ($line =~ /^.\s*\#\s*if(|n)def/ && ($realfile =~ /\.c$/)) { +# print "#ifdef in C files should be avoided\n"; +# print "$herecurr"; +# $clean = 0; +# } + +# warn about spacing in #ifdefs + if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) { + if (ERROR("SPACING", + "exactly one space required after that #$1\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ + s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /; + } + + } + +# check for spinlock_t definitions without a comment. + if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/ || + $line =~ /^.\s*(DEFINE_MUTEX)\s*\(/) { + my $which = $1; + if (!ctx_has_comment($first_line, $linenr)) { + CHK("UNCOMMENTED_DEFINITION", + "$1 definition without comment\n" . $herecurr); + } + } +# check for memory barriers without a comment. + + my $barriers = qr{ + mb| + rmb| + wmb| + read_barrier_depends + }x; + my $barrier_stems = qr{ + mb__before_atomic| + mb__after_atomic| + store_release| + load_acquire| + store_mb| + (?:$barriers) + }x; + my $all_barriers = qr{ + (?:$barriers)| + smp_(?:$barrier_stems)| + virt_(?:$barrier_stems) + }x; + + if ($line =~ /\b(?:$all_barriers)\s*\(/) { + if (!ctx_has_comment($first_line, $linenr)) { + WARN("MEMORY_BARRIER", + "memory barrier without comment\n" . $herecurr); + } + } + + my $underscore_smp_barriers = qr{__smp_(?:$barrier_stems)}x; + + if ($realfile !~ m@^include/asm-generic/@ && + $realfile !~ m@/barrier\.h$@ && + $line =~ m/\b(?:$underscore_smp_barriers)\s*\(/ && + $line !~ m/^.\s*\#\s*define\s+(?:$underscore_smp_barriers)\s*\(/) { + WARN("MEMORY_BARRIER", + "__smp memory barriers shouldn't be used outside barrier.h and asm-generic\n" . $herecurr); + } + +# check for waitqueue_active without a comment. + if ($line =~ /\bwaitqueue_active\s*\(/) { + if (!ctx_has_comment($first_line, $linenr)) { + WARN("WAITQUEUE_ACTIVE", + "waitqueue_active without comment\n" . $herecurr); + } + } + +# check for smp_read_barrier_depends and read_barrier_depends + if (!$file && $line =~ /\b(smp_|)read_barrier_depends\s*\(/) { + WARN("READ_BARRIER_DEPENDS", + "$1read_barrier_depends should only be used in READ_ONCE or DEC Alpha code\n" . $herecurr); + } + +# check of hardware specific defines + if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) { + CHK("ARCH_DEFINES", + "architecture specific defines should be avoided\n" . $herecurr); + } + +# check that the storage class is not after a type + if ($line =~ /\b($Type)\s+($Storage)\b/) { + WARN("STORAGE_CLASS", + "storage class '$2' should be located before type '$1'\n" . $herecurr); + } +# Check that the storage class is at the beginning of a declaration + if ($line =~ /\b$Storage\b/ && + $line !~ /^.\s*$Storage/ && + $line =~ /^.\s*(.+?)\$Storage\s/ && + $1 !~ /[\,\)]\s*$/) { + WARN("STORAGE_CLASS", + "storage class should be at the beginning of the declaration\n" . $herecurr); + } + +# check the location of the inline attribute, that it is between +# storage class and type. + if ($line =~ /\b$Type\s+$Inline\b/ || + $line =~ /\b$Inline\s+$Storage\b/) { + ERROR("INLINE_LOCATION", + "inline keyword should sit between storage class and type\n" . $herecurr); + } + +# Check for __inline__ and __inline, prefer inline + if ($realfile !~ m@\binclude/uapi/@ && + $line =~ /\b(__inline__|__inline)\b/) { + if (WARN("INLINE", + "plain inline is preferred over $1\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/; + + } + } + +# Check for __attribute__ packed, prefer __packed + if ($realfile !~ m@\binclude/uapi/@ && + $line =~ /\b__attribute__\s*\(\s*\(.*\bpacked\b/) { + WARN("PREFER_PACKED", + "__packed is preferred over __attribute__((packed))\n" . $herecurr); + } + +# Check for __attribute__ aligned, prefer __aligned + if ($realfile !~ m@\binclude/uapi/@ && + $line =~ /\b__attribute__\s*\(\s*\(.*aligned/) { + WARN("PREFER_ALIGNED", + "__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr); + } + +# Check for __attribute__ format(printf, prefer __printf + if ($realfile !~ m@\binclude/uapi/@ && + $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) { + if (WARN("PREFER_PRINTF", + "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex; + + } + } + +# Check for __attribute__ format(scanf, prefer __scanf + if ($realfile !~ m@\binclude/uapi/@ && + $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) { + if (WARN("PREFER_SCANF", + "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex; + } + } + +# Check for __attribute__ weak, or __weak declarations (may have link issues) + if ($^V && $^V ge 5.10.0 && + $line =~ /(?:$Declare|$DeclareMisordered)\s*$Ident\s*$balanced_parens\s*(?:$Attribute)?\s*;/ && + ($line =~ /\b__attribute__\s*\(\s*\(.*\bweak\b/ || + $line =~ /\b__weak\b/)) { + ERROR("WEAK_DECLARATION", + "Using weak declarations can have unintended link defects\n" . $herecurr); + } + +# check for c99 types like uint8_t used outside of uapi/ and tools/ + if ($realfile !~ m@\binclude/uapi/@ && + $realfile !~ m@\btools/@ && + $line =~ /\b($Declare)\s*$Ident\s*[=;,\[]/) { + my $type = $1; + if ($type =~ /\b($typeC99Typedefs)\b/) { + $type = $1; + my $kernel_type = 'u'; + $kernel_type = 's' if ($type =~ /^_*[si]/); + $type =~ /(\d+)/; + $kernel_type .= $1; + if (CHK("PREFER_KERNEL_TYPES", + "Prefer kernel type '$kernel_type' over '$type'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b$type\b/$kernel_type/; + } + } + } + +# check for cast of C90 native int or longer types constants + if ($line =~ /(\(\s*$C90_int_types\s*\)\s*)($Constant)\b/) { + my $cast = $1; + my $const = $2; + if (WARN("TYPECAST_INT_CONSTANT", + "Unnecessary typecast of c90 int constant\n" . $herecurr) && + $fix) { + my $suffix = ""; + my $newconst = $const; + $newconst =~ s/${Int_type}$//; + $suffix .= 'U' if ($cast =~ /\bunsigned\b/); + if ($cast =~ /\blong\s+long\b/) { + $suffix .= 'LL'; + } elsif ($cast =~ /\blong\b/) { + $suffix .= 'L'; + } + $fixed[$fixlinenr] =~ s/\Q$cast\E$const\b/$newconst$suffix/; + } + } + +# check for sizeof(&) + if ($line =~ /\bsizeof\s*\(\s*\&/) { + WARN("SIZEOF_ADDRESS", + "sizeof(& should be avoided\n" . $herecurr); + } + +# check for sizeof without parenthesis + if ($line =~ /\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/) { + if (WARN("SIZEOF_PARENTHESIS", + "sizeof $1 should be sizeof($1)\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex; + } + } + +# check for struct spinlock declarations + if ($line =~ /^.\s*\bstruct\s+spinlock\s+\w+\s*;/) { + WARN("USE_SPINLOCK_T", + "struct spinlock should be spinlock_t\n" . $herecurr); + } + +# check for seq_printf uses that could be seq_puts + if ($sline =~ /\bseq_printf\s*\(.*"\s*\)\s*;\s*$/) { + my $fmt = get_quoted_string($line, $rawline); + $fmt =~ s/%%//g; + if ($fmt !~ /%/) { + if (WARN("PREFER_SEQ_PUTS", + "Prefer seq_puts to seq_printf\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/; + } + } + } + +# check for vsprintf extension %p<foo> misuses + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s && + $1 !~ /^_*volatile_*$/) { + my $specifier; + my $extension; + my $bad_specifier = ""; + my $stat_real; + + my $lc = $stat =~ tr@\n@@; + $lc = $lc + $linenr; + for (my $count = $linenr; $count <= $lc; $count++) { + my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0)); + $fmt =~ s/%%//g; + + while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) { + $specifier = $1; + $extension = $2; + if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) { + $bad_specifier = $specifier; + last; + } + if ($extension eq "x" && !defined($stat_real)) { + if (!defined($stat_real)) { + $stat_real = get_stat_real($linenr, $lc); + } + WARN("VSPRINTF_SPECIFIER_PX", + "Using vsprintf specifier '\%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '\%p'.\n" . "$here\n$stat_real\n"); + } + } + if ($bad_specifier ne "") { + my $stat_real = get_stat_real($linenr, $lc); + my $ext_type = "Invalid"; + my $use = ""; + if ($bad_specifier =~ /p[Ff]/) { + $ext_type = "Deprecated"; + $use = " - use %pS instead"; + $use =~ s/pS/ps/ if ($bad_specifier =~ /pf/); + } + + WARN("VSPRINTF_POINTER_EXTENSION", + "$ext_type vsprintf pointer extension '$bad_specifier'$use\n" . "$here\n$stat_real\n"); + } + } + } + +# Check for misused memsets + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/) { + + my $ms_addr = $2; + my $ms_val = $7; + my $ms_size = $12; + + if ($ms_size =~ /^(0x|)0$/i) { + ERROR("MEMSET", + "memset to 0's uses 0 as the 2nd argument, not the 3rd\n" . "$here\n$stat\n"); + } elsif ($ms_size =~ /^(0x|)1$/i) { + WARN("MEMSET", + "single byte memset is suspicious. Swapped 2nd/3rd argument?\n" . "$here\n$stat\n"); + } + } + +# Check for memcpy(foo, bar, ETH_ALEN) that could be ether_addr_copy(foo, bar) +# if ($^V && $^V ge 5.10.0 && +# defined $stat && +# $stat =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) { +# if (WARN("PREFER_ETHER_ADDR_COPY", +# "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . "$here\n$stat\n") && +# $fix) { +# $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/; +# } +# } + +# Check for memcmp(foo, bar, ETH_ALEN) that could be ether_addr_equal*(foo, bar) +# if ($^V && $^V ge 5.10.0 && +# defined $stat && +# $stat =~ /^\+(?:.*?)\bmemcmp\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) { +# WARN("PREFER_ETHER_ADDR_EQUAL", +# "Prefer ether_addr_equal() or ether_addr_equal_unaligned() over memcmp()\n" . "$here\n$stat\n") +# } + +# check for memset(foo, 0x0, ETH_ALEN) that could be eth_zero_addr +# check for memset(foo, 0xFF, ETH_ALEN) that could be eth_broadcast_addr +# if ($^V && $^V ge 5.10.0 && +# defined $stat && +# $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) { +# +# my $ms_val = $7; +# +# if ($ms_val =~ /^(?:0x|)0+$/i) { +# if (WARN("PREFER_ETH_ZERO_ADDR", +# "Prefer eth_zero_addr over memset()\n" . "$here\n$stat\n") && +# $fix) { +# $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_zero_addr($2)/; +# } +# } elsif ($ms_val =~ /^(?:0xff|255)$/i) { +# if (WARN("PREFER_ETH_BROADCAST_ADDR", +# "Prefer eth_broadcast_addr() over memset()\n" . "$here\n$stat\n") && +# $fix) { +# $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_broadcast_addr($2)/; +# } +# } +# } + +# typecasts on min/max could be min_t/max_t + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?:.*?)\b(min|max)\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\)/) { + if (defined $2 || defined $7) { + my $call = $1; + my $cast1 = deparenthesize($2); + my $arg1 = $3; + my $cast2 = deparenthesize($7); + my $arg2 = $8; + my $cast; + + if ($cast1 ne "" && $cast2 ne "" && $cast1 ne $cast2) { + $cast = "$cast1 or $cast2"; + } elsif ($cast1 ne "") { + $cast = $cast1; + } else { + $cast = $cast2; + } + WARN("MINMAX", + "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . "$here\n$stat\n"); + } + } + +# check usleep_range arguments + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?:.*?)\busleep_range\s*\(\s*($FuncArg)\s*,\s*($FuncArg)\s*\)/) { + my $min = $1; + my $max = $7; + if ($min eq $max) { + WARN("USLEEP_RANGE", + "usleep_range should not use min == max args; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n"); + } elsif ($min =~ /^\d+$/ && $max =~ /^\d+$/ && + $min > $max) { + WARN("USLEEP_RANGE", + "usleep_range args reversed, use min then max; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n"); + } + } + +# check for naked sscanf + if ($^V && $^V ge 5.10.0 && + defined $stat && + $line =~ /\bsscanf\b/ && + ($stat !~ /$Ident\s*=\s*sscanf\s*$balanced_parens/ && + $stat !~ /\bsscanf\s*$balanced_parens\s*(?:$Compare)/ && + $stat !~ /(?:$Compare)\s*\bsscanf\s*$balanced_parens/)) { + my $lc = $stat =~ tr@\n@@; + $lc = $lc + $linenr; + my $stat_real = get_stat_real($linenr, $lc); + WARN("NAKED_SSCANF", + "unchecked sscanf return value\n" . "$here\n$stat_real\n"); + } + +# check for simple sscanf that should be kstrto<foo> + if ($^V && $^V ge 5.10.0 && + defined $stat && + $line =~ /\bsscanf\b/) { + my $lc = $stat =~ tr@\n@@; + $lc = $lc + $linenr; + my $stat_real = get_stat_real($linenr, $lc); + if ($stat_real =~ /\bsscanf\b\s*\(\s*$FuncArg\s*,\s*("[^"]+")/) { + my $format = $6; + my $count = $format =~ tr@%@%@; + if ($count == 1 && + $format =~ /^"\%(?i:ll[udxi]|[udxi]ll|ll|[hl]h?[udxi]|[udxi][hl]h?|[hl]h?|[udxi])"$/) { + WARN("SSCANF_TO_KSTRTO", + "Prefer kstrto<type> to single variable sscanf\n" . "$here\n$stat_real\n"); + } + } + } + +# check for new externs in .h files. + if ($realfile =~ /\.h$/ && + $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) { + if (CHK("AVOID_EXTERNS", + "extern prototypes should be avoided in .h files\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; + } + } + +# check for new externs in .c files. + if ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s) + { + my $function_name = $1; + my $paren_space = $2; + + my $s = $stat; + if (defined $cond) { + substr($s, 0, length($cond), ''); + } + if ($s =~ /^\s*;/ && + $function_name ne 'uninitialized_var') + { + WARN("AVOID_EXTERNS", + "externs should be avoided in .c files\n" . $herecurr); + } + + if ($paren_space =~ /\n/) { + WARN("FUNCTION_ARGUMENTS", + "arguments for function declarations should follow identifier\n" . $herecurr); + } + + } elsif ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^.\s*extern\s+/) + { + WARN("AVOID_EXTERNS", + "externs should be avoided in .c files\n" . $herecurr); + } + +# check for function declarations that have arguments without identifier names + if (defined $stat && + $stat =~ /^.\s*(?:extern\s+)?$Type\s*(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*\(\s*([^{]+)\s*\)\s*;/s && + $1 ne "void") { + my $args = trim($1); + while ($args =~ m/\s*($Type\s*(?:$Ident|\(\s*\*\s*$Ident?\s*\)\s*$balanced_parens)?)/g) { + my $arg = trim($1); + if ($arg =~ /^$Type$/ && $arg !~ /enum\s+$Ident$/) { + WARN("FUNCTION_ARGUMENTS", + "function definition argument '$arg' should also have an identifier name\n" . $herecurr); + } + } + } + +# check for function definitions + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^.\s*(?:$Storage\s+)?$Type\s*($Ident)\s*$balanced_parens\s*{/s) { + $context_function = $1; + +# check for multiline function definition with misplaced open brace + my $ok = 0; + my $cnt = statement_rawlines($stat); + my $herectx = $here . "\n"; + for (my $n = 0; $n < $cnt; $n++) { + my $rl = raw_line($linenr, $n); + $herectx .= $rl . "\n"; + $ok = 1 if ($rl =~ /^[ \+]\{/); + $ok = 1 if ($rl =~ /\{/ && $n == 0); + last if $rl =~ /^[ \+].*\{/; + } + if (!$ok) { + ERROR("OPEN_BRACE", + "open brace '{' following function definitions go on the next line\n" . $herectx); + } + } + +# checks for new __setup's + if ($rawline =~ /\b__setup\("([^"]*)"/) { + my $name = $1; + + if (!grep(/$name/, @setup_docs)) { + CHK("UNDOCUMENTED_SETUP", + "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst\n" . $herecurr); + } + } + +# check for pointless casting of kmalloc return + if ($line =~ /\*\s*\)\s*[kv][czm]alloc(_node){0,1}\b/) { + WARN("UNNECESSARY_CASTS", + "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr); + } + +# alloc style +# p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...) + if ($^V && $^V ge 5.10.0 && + $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) { + CHK("ALLOC_SIZEOF_STRUCT", + "Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr); + } + +# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+\s*($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) { + my $oldfunc = $3; + my $a1 = $4; + my $a2 = $10; + my $newfunc = "kmalloc_array"; + $newfunc = "kcalloc" if ($oldfunc eq "kzalloc"); + my $r1 = $a1; + my $r2 = $a2; + if ($a1 =~ /^sizeof\s*\S/) { + $r1 = $a2; + $r2 = $a1; + } + if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ && + !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) { + my $cnt = statement_rawlines($stat); + my $herectx = get_stat_here($linenr, $cnt, $here); + + if (WARN("ALLOC_WITH_MULTIPLY", + "Prefer $newfunc over $oldfunc with multiply\n" . $herectx) && + $cnt == 1 && + $fix) { + $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e; + } + } + } + +# check for krealloc arg reuse + if ($^V && $^V ge 5.10.0 && + $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*\1\s*,/) { + WARN("KREALLOC_ARG_REUSE", + "Reusing the krealloc arg is almost always a bug\n" . $herecurr); + } + +# check for alloc argument mismatch + if ($line =~ /\b(kcalloc|kmalloc_array)\s*\(\s*sizeof\b/) { + WARN("ALLOC_ARRAY_ARGS", + "$1 uses number as first arg, sizeof is generally wrong\n" . $herecurr); + } + +# check for multiple semicolons + if ($line =~ /;\s*;\s*$/) { + if (WARN("ONE_SEMICOLON", + "Statements terminations use 1 semicolon\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g; + } + } + +# check for #defines like: 1 << <digit> that could be BIT(digit), it is not exported to uapi + if ($realfile !~ m@^include/uapi/@ && + $line =~ /#\s*define\s+\w+\s+\(?\s*1\s*([ulUL]*)\s*\<\<\s*(?:\d+|$Ident)\s*\)?/) { + my $ull = ""; + $ull = "_ULL" if (defined($1) && $1 =~ /ll/i); + if (CHK("BIT_MACRO", + "Prefer using the BIT$ull macro\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\(?\s*1\s*[ulUL]*\s*<<\s*(\d+|$Ident)\s*\)?/BIT${ull}($1)/; + } + } + +# check for #if defined CONFIG_<FOO> || defined CONFIG_<FOO>_MODULE + if ($line =~ /^\+\s*#\s*if\s+defined(?:\s*\(?\s*|\s+)(CONFIG_[A-Z_]+)\s*\)?\s*\|\|\s*defined(?:\s*\(?\s*|\s+)\1_MODULE\s*\)?\s*$/) { + my $config = $1; + if (WARN("PREFER_IS_ENABLED", + "Prefer IS_ENABLED(<FOO>) to CONFIG_<FOO> || CONFIG_<FOO>_MODULE\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] = "\+#if IS_ENABLED($config)"; + } + } + +# check for case / default statements not preceded by break/fallthrough/switch + if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) { + my $has_break = 0; + my $has_statement = 0; + my $count = 0; + my $prevline = $linenr; + while ($prevline > 1 && ($file || $count < 3) && !$has_break) { + $prevline--; + my $rline = $rawlines[$prevline - 1]; + my $fline = $lines[$prevline - 1]; + last if ($fline =~ /^\@\@/); + next if ($fline =~ /^\-/); + next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/); + $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i); + next if ($fline =~ /^.[\s$;]*$/); + $has_statement = 1; + $count++; + $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|exit\s*\(\b|return\b|goto\b|continue\b)/); + } + if (!$has_break && $has_statement) { + WARN("MISSING_BREAK", + "Possible switch case/default not preceded by break or fallthrough comment\n" . $herecurr); + } + } + +# check for switch/default statements without a break; + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) { + my $cnt = statement_rawlines($stat); + my $herectx = get_stat_here($linenr, $cnt, $here); + + WARN("DEFAULT_NO_BREAK", + "switch default: should use break\n" . $herectx); + } + +# check for gcc specific __FUNCTION__ + if ($line =~ /\b__FUNCTION__\b/) { + if (WARN("USE_FUNC", + "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g; + } + } + +# check for uses of __DATE__, __TIME__, __TIMESTAMP__ + while ($line =~ /\b(__(?:DATE|TIME|TIMESTAMP)__)\b/g) { + ERROR("DATE_TIME", + "Use of the '$1' macro makes the build non-deterministic\n" . $herecurr); + } + +# check for use of yield() + if ($line =~ /\byield\s*\(\s*\)/) { + WARN("YIELD", + "Using yield() is generally wrong. See yield() kernel-doc (sched/core.c)\n" . $herecurr); + } + +# check for comparisons against true and false + if ($line =~ /\+\s*(.*?)\b(true|false|$Lval)\s*(==|\!=)\s*(true|false|$Lval)\b(.*)$/i) { + my $lead = $1; + my $arg = $2; + my $test = $3; + my $otype = $4; + my $trail = $5; + my $op = "!"; + + ($arg, $otype) = ($otype, $arg) if ($arg =~ /^(?:true|false)$/i); + + my $type = lc($otype); + if ($type =~ /^(?:true|false)$/) { + if (("$test" eq "==" && "$type" eq "true") || + ("$test" eq "!=" && "$type" eq "false")) { + $op = ""; + } + + CHK("BOOL_COMPARISON", + "Using comparison to $otype is error prone\n" . $herecurr); + +## maybe suggesting a correct construct would better +## "Using comparison to $otype is error prone. Perhaps use '${lead}${op}${arg}${trail}'\n" . $herecurr); + + } + } + +# check for bool bitfields + if ($sline =~ /^.\s+bool\s*$Ident\s*:\s*\d+\s*;/) { + WARN("BOOL_BITFIELD", + "Avoid using bool as bitfield. Prefer bool bitfields as unsigned int or u<8|16|32>\n" . $herecurr); + } + +# check for semaphores initialized locked + if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) { + WARN("CONSIDER_COMPLETION", + "consider using a completion\n" . $herecurr); + } + +# recommend kstrto* over simple_strto* and strict_strto* + if ($line =~ /\b((simple|strict)_(strto(l|ll|ul|ull)))\s*\(/) { + WARN("CONSIDER_KSTRTO", + "$1 is obsolete, use k$3 instead\n" . $herecurr); + } + +# check for __initcall(), use device_initcall() explicitly or more appropriate function please + if ($line =~ /^.\s*__initcall\s*\(/) { + WARN("USE_DEVICE_INITCALL", + "please use device_initcall() or more appropriate function instead of __initcall() (see include/linux/init.h)\n" . $herecurr); + } + +# use of NR_CPUS is usually wrong +# ignore definitions of NR_CPUS and usage to define arrays as likely right + if ($line =~ /\bNR_CPUS\b/ && + $line !~ /^.\s*\s*#\s*if\b.*\bNR_CPUS\b/ && + $line !~ /^.\s*\s*#\s*define\b.*\bNR_CPUS\b/ && + $line !~ /^.\s*$Declare\s.*\[[^\]]*NR_CPUS[^\]]*\]/ && + $line !~ /\[[^\]]*\.\.\.[^\]]*NR_CPUS[^\]]*\]/ && + $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/) + { + WARN("NR_CPUS", + "usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr); + } + +# Use of __ARCH_HAS_<FOO> or ARCH_HAVE_<BAR> is wrong. + if ($line =~ /\+\s*#\s*define\s+((?:__)?ARCH_(?:HAS|HAVE)\w*)\b/) { + ERROR("DEFINE_ARCH_HAS", + "#define of '$1' is wrong - use Kconfig variables or standard guards instead\n" . $herecurr); + } + +# likely/unlikely comparisons similar to "(likely(foo) > 0)" + if ($^V && $^V ge 5.10.0 && + $line =~ /\b((?:un)?likely)\s*\(\s*$FuncArg\s*\)\s*$Compare/) { + WARN("LIKELY_MISUSE", + "Using $1 should generally have parentheses around the comparison\n" . $herecurr); + } + +# whine mightly about in_atomic + if ($line =~ /\bin_atomic\s*\(/) { + if ($realfile =~ m@^drivers/@) { + ERROR("IN_ATOMIC", + "do not use in_atomic in drivers\n" . $herecurr); + } elsif ($realfile !~ m@^kernel/@) { + WARN("IN_ATOMIC", + "use of in_atomic() is incorrect outside core kernel code\n" . $herecurr); + } + } + +# check for mutex_trylock_recursive usage + if ($line =~ /mutex_trylock_recursive/) { + ERROR("LOCKING", + "recursive locking is bad, do not use this ever.\n" . $herecurr); + } + +# check for lockdep_set_novalidate_class + if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ || + $line =~ /__lockdep_no_validate__\s*\)/ ) { + if ($realfile !~ m@^kernel/lockdep@ && + $realfile !~ m@^include/linux/lockdep@ && + $realfile !~ m@^drivers/base/core@) { + ERROR("LOCKDEP", + "lockdep_no_validate class is reserved for device->mutex.\n" . $herecurr); + } + } + + if ($line =~ /debugfs_create_\w+.*\b$mode_perms_world_writable\b/ || + $line =~ /DEVICE_ATTR.*\b$mode_perms_world_writable\b/) { + WARN("EXPORTED_WORLD_WRITABLE", + "Exporting world writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr); + } + +# check for DEVICE_ATTR uses that could be DEVICE_ATTR_<FOO> +# and whether or not function naming is typical and if +# DEVICE_ATTR permissions uses are unusual too + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /\bDEVICE_ATTR\s*\(\s*(\w+)\s*,\s*\(?\s*(\s*(?:${multi_mode_perms_string_search}|0[0-7]{3,3})\s*)\s*\)?\s*,\s*(\w+)\s*,\s*(\w+)\s*\)/) { + my $var = $1; + my $perms = $2; + my $show = $3; + my $store = $4; + my $octal_perms = perms_to_octal($perms); + if ($show =~ /^${var}_show$/ && + $store =~ /^${var}_store$/ && + $octal_perms eq "0644") { + if (WARN("DEVICE_ATTR_RW", + "Use DEVICE_ATTR_RW\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bDEVICE_ATTR\s*\(\s*$var\s*,\s*\Q$perms\E\s*,\s*$show\s*,\s*$store\s*\)/DEVICE_ATTR_RW(${var})/; + } + } elsif ($show =~ /^${var}_show$/ && + $store =~ /^NULL$/ && + $octal_perms eq "0444") { + if (WARN("DEVICE_ATTR_RO", + "Use DEVICE_ATTR_RO\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bDEVICE_ATTR\s*\(\s*$var\s*,\s*\Q$perms\E\s*,\s*$show\s*,\s*NULL\s*\)/DEVICE_ATTR_RO(${var})/; + } + } elsif ($show =~ /^NULL$/ && + $store =~ /^${var}_store$/ && + $octal_perms eq "0200") { + if (WARN("DEVICE_ATTR_WO", + "Use DEVICE_ATTR_WO\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bDEVICE_ATTR\s*\(\s*$var\s*,\s*\Q$perms\E\s*,\s*NULL\s*,\s*$store\s*\)/DEVICE_ATTR_WO(${var})/; + } + } elsif ($octal_perms eq "0644" || + $octal_perms eq "0444" || + $octal_perms eq "0200") { + my $newshow = "$show"; + $newshow = "${var}_show" if ($show ne "NULL" && $show ne "${var}_show"); + my $newstore = $store; + $newstore = "${var}_store" if ($store ne "NULL" && $store ne "${var}_store"); + my $rename = ""; + if ($show ne $newshow) { + $rename .= " '$show' to '$newshow'"; + } + if ($store ne $newstore) { + $rename .= " '$store' to '$newstore'"; + } + WARN("DEVICE_ATTR_FUNCTIONS", + "Consider renaming function(s)$rename\n" . $herecurr); + } else { + WARN("DEVICE_ATTR_PERMS", + "DEVICE_ATTR unusual permissions '$perms' used\n" . $herecurr); + } + } + +# Mode permission misuses where it seems decimal should be octal +# This uses a shortcut match to avoid unnecessary uses of a slow foreach loop +# o Ignore module_param*(...) uses with a decimal 0 permission as that has a +# specific definition of not visible in sysfs. +# o Ignore proc_create*(...) uses with a decimal 0 permission as that means +# use the default permissions + if ($^V && $^V ge 5.10.0 && + defined $stat && + $line =~ /$mode_perms_search/) { + foreach my $entry (@mode_permission_funcs) { + my $func = $entry->[0]; + my $arg_pos = $entry->[1]; + + my $lc = $stat =~ tr@\n@@; + $lc = $lc + $linenr; + my $stat_real = get_stat_real($linenr, $lc); + + my $skip_args = ""; + if ($arg_pos > 1) { + $arg_pos--; + $skip_args = "(?:\\s*$FuncArg\\s*,\\s*){$arg_pos,$arg_pos}"; + } + my $test = "\\b$func\\s*\\(${skip_args}($FuncArg(?:\\|\\s*$FuncArg)*)\\s*[,\\)]"; + if ($stat =~ /$test/) { + my $val = $1; + $val = $6 if ($skip_args ne ""); + if (!($func =~ /^(?:module_param|proc_create)/ && $val eq "0") && + (($val =~ /^$Int$/ && $val !~ /^$Octal$/) || + ($val =~ /^$Octal$/ && length($val) ne 4))) { + ERROR("NON_OCTAL_PERMISSIONS", + "Use 4 digit octal (0777) not decimal permissions\n" . "$here\n" . $stat_real); + } + if ($val =~ /^$Octal$/ && (oct($val) & 02)) { + ERROR("EXPORTED_WORLD_WRITABLE", + "Exporting writable files is usually an error. Consider more restrictive permissions.\n" . "$here\n" . $stat_real); + } + } + } + } + +# check for uses of S_<PERMS> that could be octal for readability + while ($line =~ m{\b($multi_mode_perms_string_search)\b}g) { + my $oval = $1; + my $octal = perms_to_octal($oval); + if (WARN("SYMBOLIC_PERMS", + "Symbolic permissions '$oval' are not preferred. Consider using octal permissions '$octal'.\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\Q$oval\E/$octal/; + } + } + +# validate content of MODULE_LICENSE against list from include/linux/module.h + if ($line =~ /\bMODULE_LICENSE\s*\(\s*($String)\s*\)/) { + my $extracted_string = get_quoted_string($line, $rawline); + my $valid_licenses = qr{ + GPL| + GPL\ v2| + GPL\ and\ additional\ rights| + Dual\ BSD/GPL| + Dual\ MIT/GPL| + Dual\ MPL/GPL| + Proprietary + }x; + if ($extracted_string !~ /^"(?:$valid_licenses)"$/x) { + WARN("MODULE_LICENSE", + "unknown module license " . $extracted_string . "\n" . $herecurr); + } + } + } + + # If we have no input at all, then there is nothing to report on + # so just keep quiet. + if ($#rawlines == -1) { + exit(0); + } + + # In mailback mode only produce a report in the negative, for + # things that appear to be patches. + if ($mailback && ($clean == 1 || !$is_patch)) { + exit(0); + } + + # This is not a patch, and we are are in 'no-patch' mode so + # just keep quiet. + if (!$chk_patch && !$is_patch) { + exit(0); + } + + if (!$is_patch && $filename !~ /cover-letter\.patch$/) { + ERROR("NOT_UNIFIED_DIFF", + "Does not appear to be a unified-diff format patch\n"); + } + if ($is_patch && $has_commit_log && $chk_signoff && $signoff == 0) { + ERROR("MISSING_SIGN_OFF", + "Missing Signed-off-by: line(s)\n"); + } + + print report_dump(); + if ($summary && !($clean == 1 && $quiet == 1)) { + print "$filename " if ($summary_file); + print "total: $cnt_error errors, $cnt_warn warnings, " . + (($check)? "$cnt_chk checks, " : "") . + "$cnt_lines lines checked\n"; + } + + if ($quiet == 0) { + # If there were any defects found and not already fixing them + if (!$clean and !$fix) { + print << "EOM" + +NOTE: For some of the reported defects, checkpatch may be able to + mechanically convert to the typical style using --fix or --fix-inplace. +EOM + } + # If there were whitespace errors which cleanpatch can fix + # then suggest that. + if ($rpt_cleaners) { + $rpt_cleaners = 0; + print << "EOM" + +NOTE: Whitespace errors detected. + You may wish to use scripts/cleanpatch or scripts/cleanfile +EOM + } + } + + if ($clean == 0 && $fix && + ("@rawlines" ne "@fixed" || + $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) { + my $newfile = $filename; + $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace); + my $linecount = 0; + my $f; + + @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted); + + open($f, '>', $newfile) + or die "$P: Can't open $newfile for write\n"; + foreach my $fixed_line (@fixed) { + $linecount++; + if ($file) { + if ($linecount > 3) { + $fixed_line =~ s/^\+//; + print $f $fixed_line . "\n"; + } + } else { + print $f $fixed_line . "\n"; + } + } + close($f); + + if (!$quiet) { + print << "EOM"; + +Wrote EXPERIMENTAL --fix correction(s) to '$newfile' + +Do _NOT_ trust the results written to this file. +Do _NOT_ submit these changes without inspecting them for correctness. + +This EXPERIMENTAL file is simply a convenience to help rewrite patches. +No warranties, expressed or implied... +EOM + } + } + + if ($quiet == 0) { + print "\n"; + if ($clean == 1) { + print "$vname has no obvious style problems and is ready for submission.\n"; + } else { + print "$vname has style problems, please review.\n"; + } + } + return $clean; +} diff --git a/scripts/git_hooks/commit-msg b/scripts/git_hooks/commit-msg new file mode 100755 index 00000000..233b6702 --- /dev/null +++ b/scripts/git_hooks/commit-msg @@ -0,0 +1,194 @@ +#!/bin/sh +# From Gerrit Code Review 2.15.1 +# +# Part of Gerrit Code Review (https://www.gerritcodereview.com/) +# +# Copyright (C) 2009 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +unset GREP_OPTIONS + +CHANGE_ID_AFTER="Bug|Depends-On|Issue|Test|Feature|Fixes|Fixed" +MSG="$1" + +# Check for, and add if missing, a unique Change-Id +# +add_ChangeId() { + clean_message=`sed -e ' + /^diff --git .*/{ + s/// + q + } + /^Signed-off-by:/d + /^#/d + ' "$MSG" | git stripspace` + if test -z "$clean_message" + then + return + fi + + # Do not add Change-Id to temp commits + if echo "$clean_message" | head -1 | grep -q '^\(fixup\|squash\)!' + then + return + fi + + if test "false" = "`git config --bool --get gerrit.createChangeId`" + then + return + fi + + # Does Change-Id: already exist? if so, exit (no change). + if grep -i '^Change-Id:' "$MSG" >/dev/null + then + return + fi + + id=`_gen_ChangeId` + T="$MSG.tmp.$$" + AWK=awk + if [ -x /usr/xpg4/bin/awk ]; then + # Solaris AWK is just too broken + AWK=/usr/xpg4/bin/awk + fi + + # Get core.commentChar from git config or use default symbol + commentChar=`git config --get core.commentChar` + commentChar=${commentChar:-#} + + # How this works: + # - parse the commit message as (textLine+ blankLine*)* + # - assume textLine+ to be a footer until proven otherwise + # - exception: the first block is not footer (as it is the title) + # - read textLine+ into a variable + # - then count blankLines + # - once the next textLine appears, print textLine+ blankLine* as these + # aren't footer + # - in END, the last textLine+ block is available for footer parsing + $AWK ' + BEGIN { + if (match(ENVIRON["OS"], "Windows")) { + RS="\r?\n" # Required on recent Cygwin + } + # while we start with the assumption that textLine+ + # is a footer, the first block is not. + isFooter = 0 + footerComment = 0 + blankLines = 0 + } + + # Skip lines starting with commentChar without any spaces before it. + /^'"$commentChar"'/ { next } + + # Skip the line starting with the diff command and everything after it, + # up to the end of the file, assuming it is only patch data. + # If more than one line before the diff was empty, strip all but one. + /^diff --git / { + blankLines = 0 + while (getline) { } + next + } + + # Count blank lines outside footer comments + /^$/ && (footerComment == 0) { + blankLines++ + next + } + + # Catch footer comment + /^\[[a-zA-Z0-9-]+:/ && (isFooter == 1) { + footerComment = 1 + } + + /]$/ && (footerComment == 1) { + footerComment = 2 + } + + # We have a non-blank line after blank lines. Handle this. + (blankLines > 0) { + print lines + for (i = 0; i < blankLines; i++) { + print "" + } + + lines = "" + blankLines = 0 + isFooter = 1 + footerComment = 0 + } + + # Detect that the current block is not the footer + (footerComment == 0) && (!/^\[?[a-zA-Z0-9-]+:/ || /^[a-zA-Z0-9-]+:\/\//) { + isFooter = 0 + } + + { + # We need this information about the current last comment line + if (footerComment == 2) { + footerComment = 0 + } + if (lines != "") { + lines = lines "\n"; + } + lines = lines $0 + } + + # Footer handling: + # If the last block is considered a footer, splice in the Change-Id at the + # right place. + # Look for the right place to inject Change-Id by considering + # CHANGE_ID_AFTER. Keys listed in it (case insensitive) come first, + # then Change-Id, then everything else (eg. Signed-off-by:). + # + # Otherwise just print the last block, a new line and the Change-Id as a + # block of its own. + END { + unprinted = 1 + if (isFooter == 0) { + print lines "\n" + lines = "" + } + changeIdAfter = "^(" tolower("'"$CHANGE_ID_AFTER"'") "):" + numlines = split(lines, footer, "\n") + for (line = 1; line <= numlines; line++) { + if (unprinted && match(tolower(footer[line]), changeIdAfter) != 1) { + unprinted = 0 + print "Change-Id: I'"$id"'" + } + print footer[line] + } + if (unprinted) { + print "Change-Id: I'"$id"'" + } + }' "$MSG" > "$T" && mv "$T" "$MSG" || rm -f "$T" +} +_gen_ChangeIdInput() { + echo "tree `git write-tree`" + if parent=`git rev-parse "HEAD^0" 2>/dev/null` + then + echo "parent $parent" + fi + echo "author `git var GIT_AUTHOR_IDENT`" + echo "committer `git var GIT_COMMITTER_IDENT`" + echo + printf '%s' "$clean_message" +} +_gen_ChangeId() { + _gen_ChangeIdInput | + git hash-object -t commit --stdin +} + + +add_ChangeId diff --git a/scripts/git_hooks/install_git_hooks.sh b/scripts/git_hooks/install_git_hooks.sh new file mode 100755 index 00000000..ca7c3fcd --- /dev/null +++ b/scripts/git_hooks/install_git_hooks.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -eu + +CURDIR=$(dirname "$(readlink -m "$0")") +GITDIR=$(git rev-parse --git-dir) +HOOKDIR=$GITDIR/hooks + +if [[ ! -e "$HOOKDIR" ]]; then + echo "$HOOKDIR does not exist, install hook on main worktree?" + exit 1 +fi + +cp -vf "$CURDIR/pre-commit" "$HOOKDIR" +chmod +x "$HOOKDIR/pre-commit" + +cp -vf "$CURDIR/commit-msg" "$HOOKDIR" +chmod +x "$HOOKDIR/commit-msg" + diff --git a/scripts/git_hooks/pre-commit b/scripts/git_hooks/pre-commit new file mode 100755 index 00000000..9497372d --- /dev/null +++ b/scripts/git_hooks/pre-commit @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Mostly taken from nfs-ganesha +# +# 1. Run checkpatch on the commit +# 2. Check to see if a submodule is not being updated + +# define colors for use in output +green='\033[0;32m' +no_color='\033[0m' +grey='\033[0;90m' + + +if git rev-parse --verify HEAD 2>/dev/null >/dev/null +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +ROOT_DIR=$(git rev-parse --show-toplevel) + +# skip if no checkpatch (compat for branch switches) +[[ -x "$ROOT_DIR/scripts/checkpatch.pl" ]] || exit 0 + +git diff --cached $against | \ + "$ROOT_DIR/scripts/checkpatch.pl" --no-signoff --no-tree -q - +if [ $? != 0 ]; then + echo -n -e "Continue with commit? ${grey}[N|y]${no_color} " + read -n 1 reply </dev/tty + echo + if [[ "$reply" == "y" || "$reply" == "Y" ]]; then + echo "Permitting to be committed..." + else + echo "Aborting commit due to checkpatch errors." + exit 1 + fi +fi + +# Check whether any submodule is about to be updated with the +# commit. Ask the user for confirmation. +[[ -e "${ROOT_DIR}/.gitmodules" ]] || exit 0 +SUBMODULES=$(sed -ne 's/^.*path = //p' "${ROOT_DIR}/.gitmodules") + +# Finding the submodules that have been modified +MOD_SUBMODULES=$(git diff --cached --name-only | grep -F "$SUBMODULES") + +# If no modified submodules, exit with status code 0, else prompt the +# user and exit accordingly +if [[ -n "$MOD_SUBMODULES" ]]; then + echo "Submodules to be committed:" + echo " (use \"git reset HEAD <file>...\" to unstage)" + echo + + for SUB in $MOD_SUBMODULES + do + echo -e "\t${green}modified:\t$SUB${no_color}" + done + echo + echo -n -e "Continue with commit? ${grey}[N|y]${no_color} " + read -n 1 reply </dev/tty + echo + if [[ "$reply" == "y" || "$reply" == "Y" ]]; then + echo "Permitting submodules to be committed..." + else + echo "Aborting commit due to submodule update." + exit 1 + fi +fi +exit 0 diff --git a/scripts/spelling.txt b/scripts/spelling.txt new file mode 100644 index 00000000..9a058cff --- /dev/null +++ b/scripts/spelling.txt @@ -0,0 +1,1254 @@ +# Originally from Debian's Lintian tool. Various false positives have been +# removed, and various additions have been made as they've been discovered +# in the kernel source. +# +# License: GPLv2 +# +# The format of each line is: +# mistake||correction +# +abandonning||abandoning +abigious||ambiguous +abitrate||arbitrate +abov||above +abreviated||abbreviated +absense||absence +absolut||absolute +absoulte||absolute +acccess||access +acceess||access +acceleratoin||acceleration +accelleration||acceleration +accesing||accessing +accesnt||accent +accessable||accessible +accesss||access +accidentaly||accidentally +accidentually||accidentally +accoding||according +accomodate||accommodate +accomodates||accommodates +accordign||according +accoring||according +accout||account +accquire||acquire +accquired||acquired +accross||across +acessable||accessible +acess||access +achitecture||architecture +acient||ancient +acitions||actions +acitve||active +acknowldegement||acknowledgment +acknowledgement||acknowledgment +ackowledge||acknowledge +ackowledged||acknowledged +acording||according +activete||activate +actived||activated +actualy||actually +acumulating||accumulating +acumulator||accumulator +adapater||adapter +addional||additional +additionaly||additionally +additonal||additional +addres||address +adddress||address +addreses||addresses +addresss||address +aditional||additional +aditionally||additionally +aditionaly||additionally +adminstrative||administrative +adress||address +adresses||addresses +adviced||advised +afecting||affecting +againt||against +agaist||against +aggreataon||aggregation +aggreation||aggregation +albumns||albums +alegorical||allegorical +algined||aligned +algorith||algorithm +algorithmical||algorithmically +algoritm||algorithm +algoritms||algorithms +algorrithm||algorithm +algorritm||algorithm +aligment||alignment +alignement||alignment +allign||align +alligned||aligned +alllocate||allocate +alloated||allocated +allocatote||allocate +allocatrd||allocated +allocte||allocate +allpication||application +alocate||allocate +alogirhtms||algorithms +alogrithm||algorithm +alot||a lot +alow||allow +alows||allows +altough||although +alue||value +ambigious||ambiguous +amoung||among +amout||amount +an union||a union +an user||a user +an userspace||a userspace +an one||a one +analysator||analyzer +ang||and +anniversery||anniversary +annoucement||announcement +anomolies||anomalies +anomoly||anomaly +anway||anyway +aplication||application +appearence||appearance +applicaion||application +appliction||application +applictions||applications +applys||applies +appplications||applications +appropiate||appropriate +appropriatly||appropriately +approriate||appropriate +approriately||appropriately +apropriate||appropriate +aquainted||acquainted +aquired||acquired +aquisition||acquisition +arbitary||arbitrary +architechture||architecture +arguement||argument +arguements||arguments +aritmetic||arithmetic +arne't||aren't +arraival||arrival +artifical||artificial +artillary||artillery +asign||assign +asser||assert +assertation||assertion +assiged||assigned +assigment||assignment +assigments||assignments +assistent||assistant +assocation||association +associcated||associated +assotiated||associated +assum||assume +assumtpion||assumption +asuming||assuming +asycronous||asynchronous +asynchnous||asynchronous +atomatically||automatically +atomicly||atomically +atempt||attempt +attachement||attachment +attched||attached +attemps||attempts +attemping||attempting +attruibutes||attributes +authentification||authentication +automaticaly||automatically +automaticly||automatically +automatize||automate +automatized||automated +automatizes||automates +autonymous||autonomous +auxillary||auxiliary +auxilliary||auxiliary +avaiable||available +avaible||available +availabe||available +availabled||available +availablity||availability +availale||available +availavility||availability +availble||available +availiable||available +availible||available +avalable||available +avaliable||available +aysnc||async +backgroud||background +backword||backward +backwords||backwards +bahavior||behavior +bakup||backup +baloon||balloon +baloons||balloons +bandwith||bandwidth +banlance||balance +batery||battery +beacuse||because +becasue||because +becomming||becoming +becuase||because +beeing||being +befor||before +begining||beginning +beter||better +betweeen||between +bianries||binaries +bitmast||bitmask +boardcast||broadcast +borad||board +boundry||boundary +brievely||briefly +broadcat||broadcast +cacluated||calculated +caculation||calculation +calender||calendar +calescing||coalescing +calle||called +callibration||calibration +calucate||calculate +calulate||calculate +cancelation||cancellation +cancle||cancel +capabilites||capabilities +capabilty||capability +capabitilies||capabilities +capatibilities||capabilities +capapbilities||capabilities +carefuly||carefully +cariage||carriage +catagory||category +cehck||check +challange||challenge +challanges||challenges +chanell||channel +changable||changeable +chanined||chained +channle||channel +channnel||channel +charachter||character +charachters||characters +charactor||character +charater||character +charaters||characters +charcter||character +chcek||check +chck||check +checksuming||checksumming +childern||children +childs||children +chiled||child +chked||checked +chnage||change +chnages||changes +chnnel||channel +choosen||chosen +chouse||chose +circumvernt||circumvent +claread||cleared +clared||cleared +closeing||closing +clustred||clustered +coexistance||coexistence +collapsable||collapsible +colorfull||colorful +comand||command +comit||commit +commerical||commercial +comming||coming +comminucation||communication +commited||committed +commiting||committing +committ||commit +commoditiy||commodity +comsume||consume +comsumer||consumer +comsuming||consuming +compability||compatibility +compaibility||compatibility +compatability||compatibility +compatable||compatible +compatibiliy||compatibility +compatibilty||compatibility +compatiblity||compatibility +competion||completion +compilant||compliant +compleatly||completely +completition||completion +completly||completely +complient||compliant +componnents||components +compoment||component +compres||compress +compresion||compression +comression||compression +comunication||communication +conbination||combination +conditionaly||conditionally +conected||connected +connecetd||connected +configuartion||configuration +configuratoin||configuration +configuraton||configuration +configuretion||configuration +configutation||configuration +conider||consider +conjuction||conjunction +connectinos||connections +connnection||connection +connnections||connections +consistancy||consistency +consistant||consistent +containes||contains +containts||contains +contaisn||contains +contant||contact +contence||contents +continious||continuous +continous||continuous +continously||continuously +continueing||continuing +contraints||constraints +contol||control +contoller||controller +controled||controlled +controler||controller +controll||control +contruction||construction +contry||country +conuntry||country +convertion||conversion +convertor||converter +convienient||convenient +convinient||convenient +corected||corrected +correponding||corresponding +correponds||corresponds +correspoding||corresponding +cotrol||control +cound||could +couter||counter +coutner||counter +cryptocraphic||cryptographic +cunter||counter +curently||currently +cylic||cyclic +dafault||default +deafult||default +deamon||daemon +decompres||decompress +decription||description +dectected||detected +defailt||default +defferred||deferred +definate||definite +definately||definitely +defintion||definition +defintions||definitions +defualt||default +defult||default +deintializing||deinitializing +deintialize||deinitialize +deintialized||deinitialized +deivce||device +delared||declared +delare||declare +delares||declares +delaring||declaring +delemiter||delimiter +demodualtor||demodulator +demension||dimension +dependancies||dependencies +dependancy||dependency +dependant||dependent +depreacted||deprecated +depreacte||deprecate +desactivate||deactivate +desciptor||descriptor +desciptors||descriptors +descripton||description +descrition||description +descritptor||descriptor +desctiptor||descriptor +desriptor||descriptor +desriptors||descriptors +destionation||destination +destory||destroy +destoryed||destroyed +destorys||destroys +destroied||destroyed +detabase||database +deteced||detected +develope||develop +developement||development +developped||developed +developpement||development +developper||developer +developpment||development +deveolpment||development +devided||divided +deviece||device +diable||disable +dictionnary||dictionary +didnt||didn't +diferent||different +differrence||difference +diffrent||different +diffrentiate||differentiate +difinition||definition +dimesions||dimensions +diplay||display +direectly||directly +disassocation||disassociation +disapear||disappear +disapeared||disappeared +disappared||disappeared +disble||disable +disbled||disabled +disconnet||disconnect +discontinous||discontinuous +dispertion||dispersion +dissapears||disappears +distiction||distinction +docuentation||documentation +documantation||documentation +documentaion||documentation +documment||document +doesnt||doesn't +dorp||drop +dosen||doesn +downlad||download +downlads||downloads +druing||during +dynmaic||dynamic +easilly||easily +ecspecially||especially +edditable||editable +editting||editing +efective||effective +efficently||efficiently +ehther||ether +eigth||eight +elementry||elementary +eletronic||electronic +embeded||embedded +enabledi||enabled +enchanced||enhanced +encorporating||incorporating +encrupted||encrypted +encrypiton||encryption +encryptio||encryption +endianess||endianness +enhaced||enhanced +enlightnment||enlightenment +entrys||entries +enocded||encoded +enterily||entirely +enviroiment||environment +enviroment||environment +environement||environment +environent||environment +eqivalent||equivalent +equiped||equipped +equivelant||equivalent +equivilant||equivalent +eror||error +errorr||error +estbalishment||establishment +etsablishment||establishment +etsbalishment||establishment +excecutable||executable +exceded||exceeded +excellant||excellent +exeed||exceed +existance||existence +existant||existent +exixt||exist +exlcude||exclude +exlcusive||exclusive +exmaple||example +expecially||especially +explicite||explicit +explicitely||explicitly +explict||explicit +explictely||explicitly +explictly||explicitly +expresion||expression +exprimental||experimental +extened||extended +extensability||extensibility +extention||extension +extracter||extractor +falied||failed +faild||failed +faill||fail +failied||failed +faillure||failure +failue||failure +failuer||failure +failng||failing +faireness||fairness +falied||failed +faliure||failure +fallbck||fallback +familar||familiar +fatser||faster +feauture||feature +feautures||features +fetaure||feature +fetaures||features +fileystem||filesystem +fimware||firmware +firware||firmware +finanize||finalize +findn||find +finilizes||finalizes +finsih||finish +flusing||flushing +folloing||following +followign||following +followings||following +follwing||following +fonud||found +forseeable||foreseeable +forse||force +fortan||fortran +forwardig||forwarding +framming||framing +framwork||framework +frequncy||frequency +frome||from +fucntion||function +fuction||function +fuctions||functions +funcion||function +functionallity||functionality +functionaly||functionally +functionnality||functionality +functonality||functionality +funtion||function +funtions||functions +furthur||further +futhermore||furthermore +futrue||future +gaurenteed||guaranteed +generiously||generously +genereate||generate +genric||generic +globel||global +grabing||grabbing +grahical||graphical +grahpical||graphical +grapic||graphic +grranted||granted +guage||gauge +guarenteed||guaranteed +guarentee||guarantee +halfs||halves +hander||handler +handfull||handful +hanled||handled +happend||happened +harware||hardware +heirarchically||hierarchically +helpfull||helpful +hybernate||hibernate +hierachy||hierarchy +hierarchie||hierarchy +howver||however +hsould||should +hypervior||hypervisor +hypter||hyper +identidier||identifier +iligal||illegal +illigal||illegal +imblance||imbalance +immeadiately||immediately +immedaite||immediate +immediatelly||immediately +immediatly||immediately +immidiate||immediate +impelentation||implementation +impementated||implemented +implemantation||implementation +implemenation||implementation +implementaiton||implementation +implementated||implemented +implemention||implementation +implementd||implemented +implemetation||implementation +implemntation||implementation +implentation||implementation +implmentation||implementation +implmenting||implementing +incative||inactive +incomming||incoming +incompatabilities||incompatibilities +incompatable||incompatible +inconsistant||inconsistent +increas||increase +incremeted||incremented +incrment||increment +indendation||indentation +indended||intended +independant||independent +independantly||independently +independed||independent +indiate||indicate +indicat||indicate +inexpect||inexpected +infomation||information +informatiom||information +informations||information +informtion||information +infromation||information +ingore||ignore +inital||initial +initalized||initialized +initalised||initialized +initalise||initialize +initalize||initialize +initation||initiation +initators||initiators +initialiazation||initialization +initializiation||initialization +initialzed||initialized +initilization||initialization +initilize||initialize +inofficial||unofficial +insititute||institute +instal||install +instanciated||instantiated +inteface||interface +integreated||integrated +integrety||integrity +integrey||integrity +intendet||intended +intented||intended +interanl||internal +interchangable||interchangeable +interferring||interfering +interger||integer +intermittant||intermittent +internel||internal +interoprability||interoperability +interuupt||interrupt +interrface||interface +interrrupt||interrupt +interrup||interrupt +interrups||interrupts +interruptted||interrupted +interupted||interrupted +interupt||interrupt +intial||initial +intialisation||initialisation +intialised||initialised +intialise||initialise +intialization||initialization +intialized||initialized +intialize||initialize +intregral||integral +intrrupt||interrupt +intterrupt||interrupt +intuative||intuitive +invaid||invalid +invald||invalid +invalde||invalid +invalide||invalid +invalidiate||invalidate +invalud||invalid +invididual||individual +invokation||invocation +invokations||invocations +irrelevent||irrelevant +isnt||isn't +isssue||issue +iternations||iterations +itertation||iteration +itslef||itself +jave||java +jeffies||jiffies +juse||just +jus||just +kown||known +langage||language +langauage||language +langauge||language +langugage||language +lauch||launch +layed||laid +leightweight||lightweight +lengh||length +lenght||length +lenth||length +lesstiff||lesstif +libaries||libraries +libary||library +librairies||libraries +libraris||libraries +licenceing||licencing +loggging||logging +loggin||login +logile||logfile +loosing||losing +losted||lost +machinary||machinery +maintainance||maintenance +maintainence||maintenance +maintan||maintain +makeing||making +malplaced||misplaced +malplace||misplace +managable||manageable +managment||management +mangement||management +manoeuvering||maneuvering +mappping||mapping +mathimatical||mathematical +mathimatic||mathematic +mathimatics||mathematics +maxium||maximum +mechamism||mechanism +meetign||meeting +ment||meant +mergable||mergeable +mesage||message +messags||messages +messgaes||messages +messsage||message +messsages||messages +micropone||microphone +microprocesspr||microprocessor +milliseonds||milliseconds +minium||minimum +minimam||minimum +minumum||minimum +misalinged||misaligned +miscelleneous||miscellaneous +misformed||malformed +mispelled||misspelled +mispelt||misspelt +mising||missing +mismactch||mismatch +missmanaged||mismanaged +missmatch||mismatch +miximum||maximum +mmnemonic||mnemonic +mnay||many +modulues||modules +momery||memory +memomry||memory +monochorome||monochrome +monochromo||monochrome +monocrome||monochrome +mopdule||module +mroe||more +mulitplied||multiplied +multidimensionnal||multidimensional +multple||multiple +mumber||number +muticast||multicast +mutilcast||multicast +mutiple||multiple +mutli||multi +nams||names +navagating||navigating +nead||need +neccecary||necessary +neccesary||necessary +neccessary||necessary +necesary||necessary +neded||needed +negaive||negative +negoitation||negotiation +negotation||negotiation +nerver||never +nescessary||necessary +nessessary||necessary +noticable||noticeable +notications||notifications +notifed||notified +numebr||number +numner||number +obtaion||obtain +occassionally||occasionally +occationally||occasionally +occurance||occurrence +occurances||occurrences +occured||occurred +occurence||occurrence +occure||occurred +occured||occurred +occuring||occurring +offet||offset +omited||omitted +omiting||omitting +omitt||omit +ommiting||omitting +ommitted||omitted +onself||oneself +ony||only +operatione||operation +opertaions||operations +optionnal||optional +optmizations||optimizations +orientatied||orientated +orientied||oriented +orignal||original +otherise||otherwise +ouput||output +oustanding||outstanding +overaall||overall +overhread||overhead +overlaping||overlapping +overide||override +overrided||overridden +overriden||overridden +overun||overrun +overwritting||overwriting +overwriten||overwritten +pacakge||package +pachage||package +packacge||package +packege||package +packge||package +packtes||packets +pakage||package +pallette||palette +paln||plan +paramameters||parameters +paramaters||parameters +paramater||parameter +parametes||parameters +parametised||parametrised +paramter||parameter +paramters||parameters +particuarly||particularly +particularily||particularly +partiton||partition +pased||passed +passin||passing +pathes||paths +pecularities||peculiarities +peformance||performance +peice||piece +pendantic||pedantic +peprocessor||preprocessor +perfoming||performing +permissons||permissions +peroid||period +persistance||persistence +persistant||persistent +plalform||platform +platfrom||platform +plattform||platform +pleaes||please +ploting||plotting +plugable||pluggable +poinnter||pointer +pointeur||pointer +poiter||pointer +posible||possible +positon||position +possibilites||possibilities +powerfull||powerful +preample||preamble +preapre||prepare +preceeded||preceded +preceeding||preceding +preceed||precede +precendence||precedence +precission||precision +preemptable||preemptible +prefered||preferred +prefferably||preferably +premption||preemption +prepaired||prepared +pressre||pressure +primative||primitive +princliple||principle +priorty||priority +privilaged||privileged +privilage||privilege +priviledge||privilege +priviledges||privileges +probaly||probably +procceed||proceed +proccesors||processors +procesed||processed +proces||process +procesing||processing +processessing||processing +processess||processes +processpr||processor +processsed||processed +processsing||processing +procteted||protected +prodecure||procedure +progams||programs +progess||progress +programers||programmers +programm||program +programms||programs +progresss||progress +promiscous||promiscuous +promps||prompts +pronnounced||pronounced +prononciation||pronunciation +pronouce||pronounce +pronunce||pronounce +propery||property +propigate||propagate +propigation||propagation +propogate||propagate +prosess||process +protable||portable +protcol||protocol +protecion||protection +protocoll||protocol +promixity||proximity +psudo||pseudo +psuedo||pseudo +psychadelic||psychedelic +pwoer||power +quering||querying +randomally||randomly +raoming||roaming +reasearcher||researcher +reasearchers||researchers +reasearch||research +recepient||recipient +receving||receiving +recieved||received +recieve||receive +reciever||receiver +recieves||receives +recogniced||recognised +recognizeable||recognizable +recommanded||recommended +recyle||recycle +redircet||redirect +redirectrion||redirection +reename||rename +refcounf||refcount +refence||reference +refered||referred +referenace||reference +refering||referring +refernces||references +refernnce||reference +refrence||reference +registerd||registered +registeresd||registered +registerred||registered +registes||registers +registraration||registration +regsiter||register +regster||register +regualar||regular +reguator||regulator +regulamentations||regulations +reigstration||registration +releated||related +relevent||relevant +remoote||remote +remore||remote +removeable||removable +repectively||respectively +replacable||replaceable +replacments||replacements +replys||replies +reponse||response +representaion||representation +reqeust||request +requestied||requested +requiere||require +requirment||requirement +requred||required +requried||required +requst||request +reseting||resetting +resizeable||resizable +resouce||resource +resouces||resources +resoures||resources +responce||response +ressizes||resizes +ressource||resource +ressources||resources +retransmited||retransmitted +retreived||retrieved +retreive||retrieve +retrive||retrieve +retuned||returned +reudce||reduce +reuest||request +reuqest||request +reutnred||returned +revsion||revision +rmeoved||removed +rmeove||remove +rmeoves||removes +rountine||routine +routins||routines +rquest||request +runing||running +runned||ran +runnning||running +runtine||runtime +sacrifying||sacrificing +safly||safely +safty||safety +savable||saveable +scaned||scanned +scaning||scanning +scarch||search +seach||search +searchs||searches +secquence||sequence +secund||second +segement||segment +senarios||scenarios +sentivite||sensitive +separatly||separately +sepcify||specify +sepc||spec +seperated||separated +seperately||separately +seperate||separate +seperatly||separately +seperator||separator +sepperate||separate +sequece||sequence +sequencial||sequential +serveral||several +setts||sets +settting||setting +shotdown||shutdown +shoud||should +shouldnt||shouldn't +shoule||should +shrinked||shrunk +siginificantly||significantly +signabl||signal +similary||similarly +similiar||similar +simlar||similar +simliar||similar +simpified||simplified +singaled||signaled +singal||signal +singed||signed +sleeped||slept +softwares||software +speach||speech +specfic||specific +speciefied||specified +specifc||specific +specifed||specified +specificatin||specification +specificaton||specification +specifing||specifying +specifiying||specifying +speficied||specified +speicify||specify +speling||spelling +spinlcok||spinlock +spinock||spinlock +splitted||split +spreaded||spread +spurrious||spurious +sructure||structure +stablilization||stabilization +staically||statically +staion||station +standardss||standards +standartization||standardization +standart||standard +staticly||statically +stoped||stopped +stoppped||stopped +straming||streaming +struc||struct +structres||structures +stuct||struct +strucuture||structure +stucture||structure +sturcture||structure +subdirectoires||subdirectories +suble||subtle +substract||subtract +submition||submission +succesfully||successfully +succesful||successful +successed||succeeded +successfull||successful +successfuly||successfully +sucessfully||successfully +sucess||success +superflous||superfluous +superseeded||superseded +suplied||supplied +suported||supported +suport||support +supportet||supported +suppored||supported +supportin||supporting +suppoted||supported +suppported||supported +suppport||support +supress||suppress +surpressed||suppressed +surpresses||suppresses +susbsystem||subsystem +suspeneded||suspended +suspicously||suspiciously +swaping||swapping +switchs||switches +swith||switch +swithable||switchable +swithc||switch +swithced||switched +swithcing||switching +swithed||switched +swithing||switching +swtich||switch +symetric||symmetric +synax||syntax +synchonized||synchronized +syncronize||synchronize +syncronized||synchronized +syncronizing||synchronizing +syncronus||synchronous +syste||system +sytem||system +sythesis||synthesis +taht||that +targetted||targeted +targetting||targeting +teh||the +temorary||temporary +temproarily||temporarily +therfore||therefore +thier||their +threds||threads +threshhold||threshold +thresold||threshold +throught||through +troughput||throughput +thses||these +tiggered||triggered +tipically||typically +timout||timeout +tmis||this +torerable||tolerable +tramsmitted||transmitted +tramsmit||transmit +tranasction||transaction +tranfer||transfer +transciever||transceiver +transferd||transferred +transfered||transferred +transfering||transferring +transision||transition +transmittd||transmitted +transormed||transformed +trasfer||transfer +trasmission||transmission +treshold||threshold +trigerring||triggering +trun||turn +tunning||tuning +ture||true +tyep||type +udpate||update +uesd||used +uncommited||uncommitted +unconditionaly||unconditionally +underun||underrun +unecessary||unnecessary +unexecpted||unexpected +unexepected||unexpected +unexpcted||unexpected +unexpectd||unexpected +unexpeted||unexpected +unexpexted||unexpected +unfortunatelly||unfortunately +unifiy||unify +unintialized||uninitialized +unkmown||unknown +unknonw||unknown +unknow||unknown +unkown||unknown +unneded||unneeded +unneccecary||unnecessary +unneccesary||unnecessary +unneccessary||unnecessary +unnecesary||unnecessary +unneedingly||unnecessarily +unnsupported||unsupported +unmached||unmatched +unregester||unregister +unresgister||unregister +unrgesiter||unregister +unsinged||unsigned +unstabel||unstable +unsolicitied||unsolicited +unsuccessfull||unsuccessful +unsuported||unsupported +untill||until +unuseful||useless +upate||update +usefule||useful +usefull||useful +usege||usage +usera||users +usualy||usually +utilites||utilities +utillities||utilities +utilties||utilities +utiltity||utility +utitity||utility +utitlty||utility +vaid||valid +vaild||valid +valide||valid +variantions||variations +varible||variable +varient||variant +vaule||value +verbse||verbose +verisons||versions +verison||version +verson||version +vicefersa||vice-versa +virtal||virtual +virtaul||virtual +virtiual||virtual +visiters||visitors +vitual||virtual +wakeus||wakeups +wating||waiting +wiat||wait +wether||whether +whataver||whatever +whcih||which +whenver||whenever +wheter||whether +whe||when +wierd||weird +wiil||will +wirte||write +withing||within +wnat||want +workarould||workaround +writeing||writing +writting||writing +zombe||zombie +zomebie||zombie diff --git a/test/common.sh b/test/common.sh new file mode 100644 index 00000000..31a4c5d4 --- /dev/null +++ b/test/common.sh @@ -0,0 +1,117 @@ +# Common test framework, source this file to get config and basic variables: +# BIN/SBIN are the mckernel install paths +# if USELTP is set, +# LTP is the root of the ltp install directory +# LTPBIN is the testcases bin dir +# if USEOSTEST is set, +# OSTEST is the root of the ostest repo after install +# TESTMCK is the test_mck binary +# MCEXEC, IHKCONFIG, IHKOSCTL are the corresponding binaries +# mcreboot and mcstop are functions that perform the action with prints/checks +# +# Additionally, the following parameters can be provided through environment: +# BOOTPARAM to override mcreboot options +# MCREBOOT and MCSTOP can be set to 0/empty to not run the action at start + + +# Unfortunately, there is no standard way to get sourced file's path +# use bash-specific feature. +TEST_BASE=$(dirname "${BASH_SOURCE[0]}") + +if [ -f "$HOME/.mck_test_config" ]; then + . "$HOME/.mck_test_config" +elif [ -f "$TEST_BASE/../mck_test_config.sample" ]; then + . "$TEST_BASE/../mck_test_config.sample" +fi + + +if [[ -z "$BIN" ]]; then + if [ -f ../../../config.h ]; then + BIN=$(awk -F\" '/^#define BINDIR/ { print $2; exit }' \ + "$TEST_BASE/../config.h") + fi +fi + +if [[ -z "$SBIN" ]]; then + if [ -f ../../../Makefile ]; then + SBIN=$(awk -F\" '/^#define SBINDIR/ { print $2; exit }' \ + "$TEST_BASE/../config.h") + fi +fi + +if [[ ! -x "$BIN/mcexec" ]]; then + echo no mckernel found $BIN >&2 + exit 1 +fi +MCEXEC="$BIN/mcexec" +IHKOSCTL="$SBIN/ihkosctl" +IHKCONFIG="$SBIN/ihkconfig" + +if ((USELTP)); then + if [[ -z "$LTP" ]]; then + if [[ -f "$HOME/ltp/testcases/bin/fork01" ]]; then + LTP="$HOME/ltp" + fi + fi + + if [[ ! -x "$LTP/testcases/bin/fork01" ]]; then + echo no LTP found $LTP >&2 + exit 1 + fi + LTPBIN="$LTP/testcases/bin" +fi + +if ((USEOSTEST)); then + if [[ -z "$OSTEST" ]]; then + if [[ -f "$HOME/ostest/bin/test_mck" ]]; then + OSTEST="$HOME/ostest" + fi + fi + + if [[ ! -x "$OSTEST"/bin/test_mck ]]; then + echo no ostest found $OSTEST >&2 + exit 1 + fi + TESTMCK="$OSTEST/bin/test_mck" +fi + +# compat variables +BINDIR="$BIN" +SBINDIR="$SBIN" +LTPDIR="$LTP" +OSTESTDIR="$OSTEST" + +if [[ ! -x "$SBIN/mcstop+release.sh" ]]; then + echo mcstop+release: not found >&2 + exit 1 +fi + +if [[ ! -x "$SBIN/mcreboot.sh" ]]; then + echo mcreboot: not found >&2 + exit 1 +fi + +mcstop() { + echo -n "mcstop+release.sh ... " + sudo "$SBIN/mcstop+release.sh" + echo "done" + + if lsmod | grep mcctrl > /dev/null 2>&1; then + echo mckernel shutdown failed >&2 + exit 1 + fi +} + +mcreboot() { + echo -n "mcreboot.sh $BOOTPARAM ... " + sudo "$SBIN/mcreboot.sh" $BOOTPARAM + echo "done" + + if ! lsmod | grep mcctrl > /dev/null 2>&1; then + echo mckernel boot failed >&2 + exit 1 + fi +} + +((${MCSTOP-1})) && mcstop +((${MCREBOOT-1})) && mcreboot diff --git a/test/issues/1001/C1001.sh b/test/issues/1001/C1001.sh new file mode 100644 index 00000000..bad0b44a --- /dev/null +++ b/test/issues/1001/C1001.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=0 + +. ../../common.sh + +${MCEXEC} ./CT_001 +${MCEXEC} ./CT_002 +./CT_003 +./CT_004 + +tid=001 +echo "*** LT_$tid start *******************************" +${MCEXEC} ${LTPBIN}/perf_event_open01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" diff --git a/test/issues/1001/CT_001.c b/test/issues/1001/CT_001.c new file mode 100644 index 00000000..b028b1d9 --- /dev/null +++ b/test/issues/1001/CT_001.c @@ -0,0 +1,124 @@ +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <linux/perf_event.h> +#include "./test_chk.h" + +#include "./perf_tool.h" + +#define TEST_NAME "CT_001" + +#define NUM_CNTR 1 + +int main(int argc, char **argv) +{ + int fds[NUM_CNTR]; + long long tmp_count; + long long counts[NUM_CNTR]; + __u32 configs[NUM_CNTR] = { + PERF_COUNT_HW_INSTRUCTIONS}; + char *config_names[NUM_CNTR] = { + "INSTRUCTIONS"}; + struct perf_event_attr pe_attr; + int group_fd = -1; + + int rc = 0; + int i = 0; + int chk_fail = 0; + + printf("*** %s start *******************************\n", TEST_NAME); + + // common config + memset(&pe_attr, 0, sizeof(struct perf_event_attr)); + pe_attr.size = sizeof(struct perf_event_attr); + pe_attr.disabled = 1; + pe_attr.exclude_kernel = 1; + pe_attr.exclude_user = 0; + pe_attr.type = PERF_TYPE_HARDWARE; + + chk_fail = 0; + // perf_event_open + for (i = 0; i < NUM_CNTR; i++) { + pe_attr.config = configs[i]; + + fds[i] = perf_event_open(&pe_attr, 0, -1, group_fd, 0); + if (fds[i] == -1) { + chk_fail = 1; + break; + } + if (group_fd == -1) { + group_fd = fds[i]; + } + } + OKNG(chk_fail != 0, "perf_event_open for %d counter", NUM_CNTR); + + // reset counters + for (i = 0; i < NUM_CNTR; i++) { + rc = ioctl(fds[i], PERF_EVENT_IOC_RESET, 0); + CHKANDJUMP(rc != 0, "ioctl RESET"); + } + + chk_fail = 0; + // read counters at first + for (i = 0; i < NUM_CNTR; i++) { + rc = read(fds[i], &tmp_count, sizeof(long long)); + CHKANDJUMP(rc == -1, "read counter[%d]", i); + printf("%-16s: %ld\n", config_names[i], tmp_count); + if (tmp_count != 0) { + chk_fail = 1; + break; + } + } + OKNG(chk_fail != 0, "Reset counter to 0"); + + // start counters at once + rc = ioctl(group_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); + OKNG(rc != 0, "Start counter at once"); + + // monitoring target + printf(" do some processing...\n"); + calc_task(); + memory_task(); + + // stop counters at once + rc = ioctl(group_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); + OKNG(rc != 0, "Stop counter at once"); + + printf(" counted value is as belows...\n"); + // read counters after processing + for (i = 0; i < NUM_CNTR; i++) { + rc = read(fds[i], &tmp_count, sizeof(long long)); + CHKANDJUMP(rc == -1, "read counter[%d]", i); + printf("%-16s: %ld\n", config_names[i], tmp_count); + counts[i] = tmp_count; + } + + printf(" processing again... (to check if counter is stopped)\n"); + // processing again (counters are stopped) + calc_task(); + memory_task(); + + printf(" current value is bellow\n" + " (expected to be same value as last time)\n"); + + chk_fail = 0; + // read counters again to check if counters were stopped + for (i = 0; i < NUM_CNTR; i++) { + rc = read(fds[i], &tmp_count, sizeof(long long)); + printf("%-16s: %ld\n", config_names[i], tmp_count); + if (counts[i] != tmp_count) { + chk_fail = 1; + break; + } + } + OKNG(chk_fail != 0, "Counter is stopped"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1001/CT_002.c b/test/issues/1001/CT_002.c new file mode 100644 index 00000000..c98bd3b7 --- /dev/null +++ b/test/issues/1001/CT_002.c @@ -0,0 +1,130 @@ +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <linux/perf_event.h> +#include "./test_chk.h" + +#include "./perf_tool.h" + +#define TEST_NAME "CT_002" + +#define NUM_CNTR 4 + +int main(int argc, char **argv) +{ + int fds[NUM_CNTR]; + long long tmp_count; + long long counts[NUM_CNTR]; + __u32 configs[NUM_CNTR] = { + PERF_COUNT_HW_INSTRUCTIONS, + PERF_COUNT_HW_CACHE_REFERENCES, + PERF_COUNT_HW_CACHE_MISSES, + PERF_COUNT_HW_BRANCH_MISSES}; + char *config_names[NUM_CNTR] = { + "INSTRUCTIONS", + "CACHE_REFERENCES", + "CACHE_MISSES", + "BRANCH_MISSES"}; + struct perf_event_attr pe_attr; + int group_fd = -1; + + int rc = 0; + int i = 0; + int chk_fail = 0; + + printf("*** %s start *******************************\n", TEST_NAME); + + // common config + memset(&pe_attr, 0, sizeof(struct perf_event_attr)); + pe_attr.size = sizeof(struct perf_event_attr); + pe_attr.disabled = 1; + pe_attr.exclude_kernel = 1; + pe_attr.exclude_user = 0; + pe_attr.type = PERF_TYPE_HARDWARE; + + chk_fail = 0; + // perf_event_open + for (i = 0; i < NUM_CNTR; i++) { + pe_attr.config = configs[i]; + + fds[i] = perf_event_open(&pe_attr, 0, -1, group_fd, 0); + if (fds[i] == -1) { + chk_fail = 1; + break; + } + if (group_fd == -1) { + group_fd = fds[i]; + } + } + OKNG(chk_fail != 0, "perf_event_open for %d counters", NUM_CNTR); + + // reset counters + for (i = 0; i < NUM_CNTR; i++) { + rc = ioctl(fds[i], PERF_EVENT_IOC_RESET, 0); + CHKANDJUMP(rc != 0, "ioctl RESET"); + } + + chk_fail = 0; + // read counters at first + for (i = 0; i < NUM_CNTR; i++) { + rc = read(fds[i], &tmp_count, sizeof(long long)); + CHKANDJUMP(rc == -1, "read counter[%d]", i); + printf("%-16s: %ld\n", config_names[i], tmp_count); + if (tmp_count != 0) { + chk_fail = 1; + break; + } + } + OKNG(chk_fail != 0, "Reset counters to 0"); + + // start counters at once + rc = ioctl(group_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); + OKNG(rc != 0, "Start counters at once"); + + // monitoring target + printf(" do some processing...\n"); + calc_task(); + memory_task(); + + // stop counters at once + rc = ioctl(group_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); + OKNG(rc != 0, "Stop counters at once"); + + printf(" counted values are as belows...\n"); + // read counters after processing + for (i = 0; i < NUM_CNTR; i++) { + rc = read(fds[i], &tmp_count, sizeof(long long)); + CHKANDJUMP(rc == -1, "read counter[%d]", i); + printf("%-16s: %ld\n", config_names[i], tmp_count); + counts[i] = tmp_count; + } + + printf(" processing again... (to check if counters are stopped)\n"); + // processing again (counters are stopped) + calc_task(); + memory_task(); + + printf(" current values are as bellow\n" + " (expected to be same value as last time)\n"); + + chk_fail = 0; + // read counters again to check if counters were stopped + for (i = 0; i < NUM_CNTR; i++) { + rc = read(fds[i], &tmp_count, sizeof(long long)); + printf("%-16s: %ld\n", config_names[i], tmp_count); + if (counts[i] != tmp_count) { + chk_fail = 1; + break; + } + } + OKNG(chk_fail != 0, "Counters are stopped"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1001/CT_003.c b/test/issues/1001/CT_003.c new file mode 100644 index 00000000..f1bea679 --- /dev/null +++ b/test/issues/1001/CT_003.c @@ -0,0 +1,95 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> + +#include "ihklib.h" +#include "./test_chk.h" + +#define TEST_NAME "CT_003" + +#define NUM_CNTR 1 + +int main(int argc, char **argv) +{ + struct ihk_perf_event_attr attr[NUM_CNTR]; + // for x86_64 arch + unsigned long configs[NUM_CNTR] = { + 0x00c0}; + char *config_names[NUM_CNTR] = { + "INSTRUCTIONS"}; + unsigned long counts[NUM_CNTR]; + unsigned long tmp_counts[NUM_CNTR]; + + int rc = 0; + int i = 0; + int chk_fail = 0; + int event_num; + + printf("*** %s start *******************************\n", TEST_NAME); + + // setup attrs + for (i = 0; i < NUM_CNTR; i++) { + attr[i].config = configs[i]; + attr[i].exclude_kernel = 1; + attr[i].exclude_user = 0; + attr[i].disabled = 1; + } + + // set perf_event + rc = ihk_os_setperfevent(0, attr, NUM_CNTR); + OKNG(rc < 0, "setperfevent for %d counter", NUM_CNTR); + event_num = rc; + + // start counters at once + rc = ihk_os_perfctl(0, PERF_EVENT_ENABLE); + OKNG(rc != 0, "Start counter"); + + // monitoring target + printf(" do some processing...\n"); + system("bash ./processing.sh > /dev/null"); + + // stop counters at once + rc = ihk_os_perfctl(0, PERF_EVENT_DISABLE); + OKNG(rc != 0, "Stop counter"); + + rc = ihk_os_getperfevent(0, tmp_counts, event_num); + OKNG(rc != 0, "getperfevent %d counter", event_num); + printf(" counted value is as belows...\n"); + // read counters after processing + for (i = 0; i < NUM_CNTR; i++) { + printf("%-16s: %ld\n", config_names[i], tmp_counts[i]); + counts[i] = tmp_counts[i]; + } + + printf(" processing again... (to check if counter is stopped)\n"); + // processing again (counters are stopped) + system("bash ./processing.sh > /dev/null"); + + rc = ihk_os_getperfevent(0, tmp_counts, event_num); + OKNG(rc != 0, "getperfevent %d counter", event_num); + printf(" current value is as bellow\n" + " (expected to be same value as last time)\n"); + + // read counters again to check if counters were stopped + chk_fail = 0; + for (i = 0; i < NUM_CNTR; i++) { + printf("%-16s: %ld\n", config_names[i], tmp_counts[i]); + if (counts[i] != tmp_counts[i]) { + chk_fail = 1; + break; + } + } + OKNG(chk_fail != 0, "Counter is stopped"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1001/CT_004.c b/test/issues/1001/CT_004.c new file mode 100644 index 00000000..247082e6 --- /dev/null +++ b/test/issues/1001/CT_004.c @@ -0,0 +1,101 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> + +#include "ihklib.h" +#include "./test_chk.h" + +#define TEST_NAME "CT_004" + +#define NUM_CNTR 4 + +int main(int argc, char **argv) +{ + struct ihk_perf_event_attr attr[NUM_CNTR]; + // for x86_64 arch + unsigned long configs[NUM_CNTR] = { + 0x00c0, + 0x4f2e, + 0x412e, + 0x00c5}; + char *config_names[NUM_CNTR] = { + "INSTRUCTIONS", + "CACHE_REFERENCES", + "CACHE_MISSES", + "BRANCH_MISSES"}; + unsigned long counts[NUM_CNTR]; + unsigned long tmp_counts[NUM_CNTR]; + + int rc = 0; + int i = 0; + int chk_fail = 0; + int event_num; + + printf("*** %s start *******************************\n", TEST_NAME); + + // setup attrs + for (i = 0; i < NUM_CNTR; i++) { + attr[i].config = configs[i]; + attr[i].exclude_kernel = 1; + attr[i].exclude_user = 0; + attr[i].disabled = 1; + } + + // set perf_event + rc = ihk_os_setperfevent(0, attr, NUM_CNTR); + OKNG(rc < 0, "setperfevent for %d counters", NUM_CNTR); + event_num = rc; + + // start counters at once + rc = ihk_os_perfctl(0, PERF_EVENT_ENABLE); + OKNG(rc != 0, "Start counters at once"); + + // monitoring target + printf(" do some processing...\n"); + system("bash ./processing.sh > /dev/null"); + + // stop counters at once + rc = ihk_os_perfctl(0, PERF_EVENT_DISABLE); + OKNG(rc != 0, "Stop counters at once"); + + rc = ihk_os_getperfevent(0, tmp_counts, event_num); + OKNG(rc != 0, "getperfevent %d counters", event_num); + printf(" counted values are as belows...\n"); + // read counters after processing + for (i = 0; i < NUM_CNTR; i++) { + printf("%-16s: %ld\n", config_names[i], tmp_counts[i]); + counts[i] = tmp_counts[i]; + } + + printf(" processing again... (to check if counters are stopped)\n"); + // processing again (counters are stopped) + system("bash ./processing.sh > /dev/null"); + + rc = ihk_os_getperfevent(0, tmp_counts, event_num); + OKNG(rc != 0, "getperfevent %d counters", event_num); + printf(" current values are as bellow\n" + " (expected to be same value as last time)\n"); + + // read counters again to check if counters were stopped + chk_fail = 0; + for (i = 0; i < NUM_CNTR; i++) { + printf("%-16s: %ld\n", config_names[i], tmp_counts[i]); + if (counts[i] != tmp_counts[i]) { + chk_fail = 1; + break; + } + } + OKNG(chk_fail != 0, "Counters are stopped"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1001/Makefile b/test/issues/1001/Makefile new file mode 100644 index 00000000..33fd869d --- /dev/null +++ b/test/issues/1001/Makefile @@ -0,0 +1,35 @@ +include $(HOME)/.mck_test_config.mk + +CC = gcc +TARGET=perf_tool.o processing CT_001 CT_002 CT_003 CT_004 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c perf_tool.o + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c perf_tool.o + $(CC) -o $@ $^ $(LDFLAGS) + +CT_003: CT_003.c perf_tool.o + $(CC) -o $@ $^ $(LDFLAGS) -I$(MCK_DIR)/include -L$(MCK_DIR)/lib -Wl,--rpath=$(MCK_DIR)/lib -l ihk + +CT_004: CT_004.c perf_tool.o + $(CC) -o $@ $^ $(LDFLAGS) -I$(MCK_DIR)/include -L$(MCK_DIR)/lib -Wl,--rpath=$(MCK_DIR)/lib -l ihk + +perf_tool.o: perf_tool.c perf_tool.h + +processing: processing.c perf_tool.o + + +test: all + @echo "#!/bin/sh" > ./processing.sh + @echo "$(BIN)/mcexec ./processing" >> ./processing.sh + @sh ./C1001.sh + +clean: + rm -f $(TARGET) *.o processing.sh + diff --git a/test/issues/1001/README b/test/issues/1001/README new file mode 100644 index 00000000..3ccb4d57 --- /dev/null +++ b/test/issues/1001/README @@ -0,0 +1,62 @@ +【Issue#1001 動作確認】 +□ テスト内容 +1. 既存のperf_event_open機能に影響がないことを確認 +CT_001: 単一イベント種別での計測 + 1. perf_event_open を1回呼び出し、1つのカウンタの設定とfdの取得を行う + 2. PERF_EVENT_IOC_RESET を取得したfdにioctlで送信し、 + カウンタの値が0になっていることを確認する + 3. PERF_EVENT_IOC_ENABLE を取得したfdにioctlで送信し、計測を開始する + 4. 計測対象プログラム(calc_task(), memory_task())を実行する + 5. PERF_EVENT_IOC_DISABLE を取得したfdにioctlで送信し、計測を終了する + 6. 計測終了時のカウンタの値を取得し、表示する + 7. カウンタが停止していることを確認するため、計測対象プログラムを再び実行する + 8. カウンタの値が前回取得時から変化していないことを確認する + +CT_002: 複数のイベント種別での計測 + 1. perf_event_open を4回呼び出し、4つのカウンタの設定とfdの取得を行う + 1つ目のカウンタをリーダーとし、4つのカウンタを1つのグループとして設定する + 2. PERF_EVENT_IOC_RESET を各fdにioctlで送信し、 + 各カウンタの値が0になっていることを確認する + 3. PERF_EVENT_IOC_ENABLE をグループリーダーのfdにioctlで送信し、計測を開始する + 4. 計測対象プログラム(calc_task(), memory_task())を実行する + 5. PERF_EVENT_IOC_DISABLE をグループリーダーのfdにioctlで送信し、計測を終了する + 6. 計測終了時の各カウンタの値を取得し、表示する + 7. 各カウンタが停止していることを確認するため、計測対象プログラムを再び実行する + 8. カウンタの値が前回取得時から変化していないことを確認する + +2. 既存のpa_info機能に影響がないことを確認 +CT_003: 単一のイベント種別での計測 + 1. ihk_os_setperfevent を呼び出し、1種類のイベントの設定を行う + 2. ihk_os_perfctlをPERF_EVENT_ENABLE指定で呼び出し、計測を開始する + 3. 計測対象プログラム(calc_task(), memory_task())をmckernel上で実行する + 4. ihk_os_perfctlをPERF_EVENT_DISABLE指定で呼び出し、計測を終了する + 5. 計測終了時の各カウンタの値を取得し、表示する + 6. カウンタが停止していることを確認するため、計測対象プログラムを再び実行する + 7. カウンタの値が前回取得時から変化していないことを確認する + +CT_004: 複数のイベント種別での計測 + 1. ihk_os_setperfevent を呼び出し、4種類のイベントの設定を行う + 2. ihk_os_perfctlをPERF_EVENT_ENABLE指定で呼び出し、計測を開始する + 3. 計測対象プログラム(calc_task(), memory_task())をmckernel上で実行する + 4. ihk_os_perfctlをPERF_EVENT_DISABLE指定で呼び出し、計測を終了する + 5. 計測終了時の各カウンタの値を取得し、表示する + 6. カウンタが停止していることを確認するため、計測対象プログラムを再び実行する + 7. カウンタの値が前回取得時から変化していないことを確認する + +3. LTPによる動作の確認 +LT_001: perf_event_open01 で、PERF_TYPE_HARDWARE 種別の計測が行えることを確認 + 1. 上記テスト中の 1-5件目のテストがTPASSであることを確認 + 1. 上記テスト中の 6,7件目のテストがTCONFであることを確認 + (PERF_TYPE_SOFTWARE指定はMcKernelでは未サポートのため) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1001/perf_tool.c b/test/issues/1001/perf_tool.c new file mode 100644 index 00000000..e8e958dc --- /dev/null +++ b/test/issues/1001/perf_tool.c @@ -0,0 +1,54 @@ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sys/ioctl.h> +#include <linux/perf_event.h> +#include <asm/unistd.h> + +#define WORKSIZE (1024 * 1024 * 32) +#define LOOPSIZE 1000000 +#define REP 1000 + +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, + int cpu, int group_fd, unsigned long flags) +{ + int ret; + + ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, + group_fd, flags); + + return ret; +} + +void memory_task(void) +{ + char *work = malloc(WORKSIZE); + char *fromaddr; + char *toaddr; + double r; + int offset; + int i; + + for (i = 0; i < LOOPSIZE; i++) { + r = drand48(); + offset = (int)(r * (double)WORKSIZE); + fromaddr = work + offset; + r = drand48(); + offset = (int)(r * (double)WORKSIZE); + toaddr = work + offset; + *toaddr = *fromaddr; + } +} + +void calc_task(void) +{ + int i, j; + double tmp; + + for (i = 0; i < REP; i++) { + for (j = 0; j < REP; j++) { + tmp = drand48() * drand48(); + } + } +} diff --git a/test/issues/1001/perf_tool.h b/test/issues/1001/perf_tool.h new file mode 100644 index 00000000..988e7378 --- /dev/null +++ b/test/issues/1001/perf_tool.h @@ -0,0 +1,13 @@ +#ifndef __PERFTOOL_H__ +#define __PERFTOOL_H__ + +#include <linux/perf_event.h> + +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, + int cpu, int group_fd, unsigned long flags); + +long long hw_cache_build(long long id, long long op_id, long long op_result_id); +void memory_task(void); +void calc_task(void); + +#endif diff --git a/test/issues/1001/processing.c b/test/issues/1001/processing.c new file mode 100644 index 00000000..6d065e51 --- /dev/null +++ b/test/issues/1001/processing.c @@ -0,0 +1,12 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "./perf_tool.h" + +int main(int argc, char **argv) +{ + calc_task(); + memory_task(); + + return 0; +} diff --git a/test/issues/1001/result.log b/test/issues/1001/result.log new file mode 100644 index 00000000..d357992d --- /dev/null +++ b/test/issues/1001/result.log @@ -0,0 +1,93 @@ +*** CT_001 start ******************************* + [OK] perf_event_open for 1 counter +INSTRUCTIONS : 0 + [OK] Reset counter to 0 + [OK] Start counter at once + do some processing... + [OK] Stop counter at once + counted value is as belows... +INSTRUCTIONS : 291067667 + processing again... (to check if counter is stopped) + current value is bellow + (expected to be same value as last time) +INSTRUCTIONS : 291067667 + [OK] Counter is stopped +*** CT_001 PASSED + +*** CT_002 start ******************************* + [OK] perf_event_open for 4 counters +INSTRUCTIONS : 0 +CACHE_REFERENCES: 0 +CACHE_MISSES : 0 +BRANCH_MISSES : 0 + [OK] Reset counters to 0 + [OK] Start counters at once + do some processing... + [OK] Stop counters at once + counted values are as belows... +INSTRUCTIONS : 291067668 +CACHE_REFERENCES: 1984930 +CACHE_MISSES : 781531 +BRANCH_MISSES : 2784 + processing again... (to check if counters are stopped) + current values are as bellow + (expected to be same value as last time) +INSTRUCTIONS : 291067668 +CACHE_REFERENCES: 1984930 +CACHE_MISSES : 781531 +BRANCH_MISSES : 2784 + [OK] Counters are stopped +*** CT_002 PASSED + +*** CT_003 start ******************************* + [OK] setperfevent for 1 counter + [OK] Start counter + do some processing... + [OK] Stop counter + [OK] getperfevent 1 counter + counted value is as belows... +INSTRUCTIONS : 291184821 + processing again... (to check if counter is stopped) + [OK] getperfevent 1 counter + current value is as bellow + (expected to be same value as last time) +INSTRUCTIONS : 291184821 + [OK] Counter is stopped +*** CT_003 PASSED + +*** CT_004 start ******************************* + [OK] setperfevent for 4 counters + [OK] Start counters at once + do some processing... + [OK] Stop counters at once + [OK] getperfevent 4 counters + counted values are as belows... +INSTRUCTIONS : 291184822 +CACHE_REFERENCES: 1986528 +CACHE_MISSES : 780284 +BRANCH_MISSES : 3657 + processing again... (to check if counters are stopped) + [OK] getperfevent 4 counters + current values are as bellow + (expected to be same value as last time) +INSTRUCTIONS : 291184822 +CACHE_REFERENCES: 1986528 +CACHE_MISSES : 780284 +BRANCH_MISSES : 3657 + [OK] Counters are stopped +*** CT_004 PASSED + +*** LT_001 start ******************************* +perf_event_open01 0 TINFO : read event counter succeeded, value: 300000015 +perf_event_open01 1 TPASS : test PERF_TYPE_HARDWARE: PERF_COUNT_HW_INSTRUCTIONS succeeded +perf_event_open01 0 TINFO : read event counter succeeded, value: 0 +perf_event_open01 2 TPASS : test PERF_TYPE_HARDWARE: PERF_COUNT_HW_CACHE_REFERENCES succeeded +perf_event_open01 0 TINFO : read event counter succeeded, value: 0 +perf_event_open01 3 TPASS : test PERF_TYPE_HARDWARE: PERF_COUNT_HW_CACHE_MISSES succeeded +perf_event_open01 0 TINFO : read event counter succeeded, value: 100000006 +perf_event_open01 4 TPASS : test PERF_TYPE_HARDWARE: PERF_COUNT_HW_BRANCH_INSTRUCTIONS succeeded +perf_event_open01 0 TINFO : read event counter succeeded, value: 1 +perf_event_open01 5 TPASS : test PERF_TYPE_HARDWARE: PERF_COUNT_HW_BRANCH_MISSES succeeded +perf_event_open01 6 TCONF : perf_event_open01.c:155: perf_event_open for PERF_COUNT_SW_CPU_CLOCK not supported: TEST_ERRNO=ENOENT(2): No such file or directory +perf_event_open01 7 TCONF : perf_event_open01.c:155: perf_event_open for PERF_COUNT_SW_TASK_CLOCK not supported: TEST_ERRNO=ENOENT(2): No such file or directory +*** LT_001: PASSED (ok:5) diff --git a/test/issues/1001/test_chk.h b/test/issues/1001/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1001/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1005/C1005.sh b/test/issues/1005/C1005.sh new file mode 100644 index 00000000..58a70399 --- /dev/null +++ b/test/issues/1005/C1005.sh @@ -0,0 +1,112 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=0 + +. ../../common.sh + +sudo /bin/sh ${OSTESTDIR}/util/insmod_test_drv.sh + +ulimit_c_bk=`ulimit -Sc` +# set ulimit -c unlimited to dump core +ulimit -Sc unlimited + +$BINDIR/mcexec ./devmap_and_segv | tee ./maps.txt + +# restore ulimit -c +ulimit -c ${ulimit_c_bk} + +sudo /bin/sh ${OSTESTDIR}/util/rmmod_test_drv.sh + +tid=001 +echo "*** CT_$tid start *******************************" +echo "** check file type by readelf" +readelf -h ./core | grep -e "Type:.*CORE" +if [ $? == 0 ]; then + echo "*** CT_$tid PASSED ******************************" +else + echo "*** CT_$tid FAILED ******************************" +fi +echo "" + +# check by gdb +VDSO_ADDR=`grep "\[vdso\]" ./maps.txt | cut -f 1 -d "-"` +DEVMAP_ADDR=`grep "mmap_dev2$" ./maps.txt | cut -f 1 -d "-"` +GDB_OUT="./gdb_out.txt" + +expect -c " + set timeout 3 + log_file -noappend ${GDB_OUT} + + spawn gdb --quiet -c ./core ./devmap_and_segv + # check vdso addr + expect \"(gdb)\" + send \"x 0x${VDSO_ADDR}\n\" + + #check devmap addr + expect \"(gdb)\" + send \"x 0x${DEVMAP_ADDR}\n\" + + #check backtrace + expect \"(gdb)\" + send \"bt\n\" + + #check info registers + expect \"(gdb)\" + send \"info registers\n\" + + # quit gdb_test + expect \"(gdb)\" + send \"quit\n\" + + log_file + interact +" > /dev/null + +tid=002 +echo "*** CT_$tid start *******************************" +echo "** check that core contains vdso data" +grep -A 1 "(gdb) x 0x${VDSO_ADDR}" ${GDB_OUT} +grep -A 1 "(gdb) x 0x${VDSO_ADDR}" ${GDB_OUT} | tail -1 | grep -q "0x${VDSO_ADDR}:\s*0x[0-9a-f]\+" +if [ $? == 0 ]; then + echo "*** CT_$tid PASSED ******************************" +else + echo "*** CT_$tid FAILED ******************************" +fi +echo "" + +tid=003 +echo "*** CT_$tid start *******************************" +echo "** check that core dose NOT contain devmap data" +grep -A 1 "(gdb) x 0x${DEVMAP_ADDR}" ${GDB_OUT} +grep -A 1 "(gdb) x 0x${DEVMAP_ADDR}" ${GDB_OUT} | tail -1 | grep -q "0x${VDSO_ADDR}:\s*0x[0-9a-f]\+" +if [ $? == 1 ]; then + echo "*** CT_$tid PASSED ******************************" +else + echo "*** CT_$tid FAILED ******************************" +fi +echo "" + +tid=004 +echo "*** CT_$tid start *******************************" +echo "** check that core can be backtraced" +grep -A 1 "(gdb) bt" ${GDB_OUT} +grep -A 1 "(gdb) bt" ${GDB_OUT} | tail -1 | grep -q "^#0.*in main" +if [ $? == 0 ]; then + echo "*** CT_$tid PASSED ******************************" +else + echo "*** CT_$tid FAILED ******************************" +fi +echo "" + +tid=005 +echo "*** CT_$tid start *******************************" +echo "** check that core can be got info registers" +grep -A 30 "(gdb) info registers" ${GDB_OUT} +grep -A 30 "(gdb) info registers" ${GDB_OUT} | grep -q "^rip\s*0x.*main" +if [ $? == 0 ]; then + echo "*** CT_$tid PASSED ******************************" +else + echo "*** CT_$tid FAILED ******************************" +fi +echo "" diff --git a/test/issues/1005/Makefile b/test/issues/1005/Makefile new file mode 100644 index 00000000..d8e55287 --- /dev/null +++ b/test/issues/1005/Makefile @@ -0,0 +1,17 @@ +CC = gcc +TARGET=devmap_and_segv + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +devmap_and_segv: devmap_and_segv.c + $(CC) -g -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1005.sh + +clean: + rm -f $(TARGET) *.o ./core ./core.* ./maps.txt ./gdb_out.txt + diff --git a/test/issues/1005/README b/test/issues/1005/README new file mode 100644 index 00000000..ff2c1086 --- /dev/null +++ b/test/issues/1005/README @@ -0,0 +1,31 @@ +【Issue#1005 動作確認】 +□ テスト内容 +1. Issueで報告された症状が修正されたことの確認 +CT_001: McKernelのcoreファイルの形式を確認 + readelf -h で、TypeがCOREであることを確認する + +CT_002: vdso領域の内容がcoreファイルに含まれていることの確認 + gdbのxコマンドでvdso領域のアドレスを指定し、 + 内容が表示されることを確認する + +CT_003: device-fileの内容がcoreファイルに含まれていないことの確認 + gdbのxコマンドでdevice-fileをマップした領域のアドレスを指定し、 + 内容を取得できないことを確認する + +CT_004: backtraceの情報がcoreファイルに含まれていることを確認 + gdbのbtコマンドでbacktraceの情報が表示されることを確認する + +CT_005: registersの情報がcoreファイルに含まれていることを確認 + gdbのinfo registersコマンドでregistersの情報が表示されることを確認する + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1005/devmap_and_segv.c b/test/issues/1005/devmap_and_segv.c new file mode 100644 index 00000000..13606661 --- /dev/null +++ b/test/issues/1005/devmap_and_segv.c @@ -0,0 +1,54 @@ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include "./test_chk.h" + +#define TEST_NAME "devmap_and_segv" + +#define PROCFILE_LEN 128 +#define MAP_LEN 4096 +#define DEV_NAME "/dev/test_mck/mmap_dev2" + +int main(int argc, char *argv[]) +{ + int dev_fd = 0; + pid_t pid = getpid(); + void *dev_map = NULL; + char *segv_addr = NULL; + char cmd[128]; + + printf("*** %s start *******************************\n", TEST_NAME); + + /* open device file */ + dev_fd = open(DEV_NAME, O_RDONLY); + OKNG(dev_fd < 0, "open test_device_file:%s", DEV_NAME); + + /* mmap device file */ + dev_map = mmap(NULL, MAP_LEN, PROT_READ, MAP_SHARED, dev_fd, 0); + OKNG(dev_map == MAP_FAILED, "mmap device file"); + printf(" map dev_file to %p\n", dev_map); + + /* print maps */ + sprintf(cmd, "cat /proc/%d/maps", pid); + system(cmd); + + /* occur segv */ + *segv_addr = '0'; + + printf("*** Why reached here? ***\n"); + return 0; + +fn_fail: + + if (dev_fd > 0) { + close(dev_fd); + } + + return -1; +} diff --git a/test/issues/1005/result.log b/test/issues/1005/result.log new file mode 100644 index 00000000..12e9f053 --- /dev/null +++ b/test/issues/1005/result.log @@ -0,0 +1,77 @@ +insmod /home/satoken/ostest/util/../bin/test_mck.ko +create charcter device /dev/test_mck/mmap_dev(244:0) +create charcter device /dev/test_mck/mmap_dev2(244:1) +000000400000-000000401000 r-xs 0 0:0 0 +000000600000-000000601000 r--s 0 0:0 0 +000000601000-000000602000 rw-s 0 0:0 0 +2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 +2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] +2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 +2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 +2aaaaac4e000-2aaaaae06000 r-xp 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaaae06000-2aaaab006000 ---p 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaab006000-2aaaab00a000 r--p 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 +2aaaab011000-2aaaab013000 rw-p 0 0:0 0 +2aaaab013000-2aaaab014000 rw-p 0 0:0 0 +2aaaab014000-2aaaab015000 r--s 0 0:0 0 /dev/test_mck/mmap_dev2 +547fff800000-548000000000 rw-s 0 0:0 0 [stack] +Terminate by signal 11 +remove /dev/test_mck +rmmod /home/satoken/ostest/util/../bin/test_mck.ko +*** CT_001 start ******************************* +** check file type by readelf + Type: CORE (Core file) +*** CT_001 PASSED ****************************** + +*** CT_002 start ******************************* +** check that core contains vdso data +(gdb) x 0x2aaaaac24000 +0x2aaaaac24000: 0x464c457f +*** CT_002 PASSED ****************************** + +*** CT_003 start ******************************* +** check that core dose NOT contain devmap data +(gdb) x 0x2aaaab014000 +0x2aaaab014000: Cannot access memory at address 0x2aaaab014000 +*** CT_003 PASSED ****************************** + +*** CT_004 start ******************************* +** check that core can be backtraced +(gdb) bt +#0 0x0000000000400ad0 in main (argc=1, argv=0x547ffffffd08) at devmap_and_segv.c:42 +*** CT_004 PASSED ****************************** + +*** CT_005 start ******************************* +** check that core can be got info registers +(gdb) info registers +rax 0x0 0 +rbx 0x0 0 +rcx 0x2aaaaac834a0 46912498054304 +rdx 0x0 0 +rsi 0x547ffffffa30 92908732545584 +rdi 0x2 2 +rbp 0x547ffffffc20 0x547ffffffc20 +rsp 0x547ffffffb70 0x547ffffffb70 +r8 0x0 0 +r9 0x547ffffff930 92908732545328 +r10 0x8 8 +r11 0x246 582 +r12 0xffff8001007d0dc0 -140733185192512 +r13 0xffff800100770000 -140733185589248 +r14 0xb98 2968 +r15 0xffff80010078fa9c -140733185459556 +rip 0x400ad0 0x400ad0 <main+592> +eflags 0x10206 [ PF IF RF ] +cs 0x33 51 +ss 0x3b 59 +ds 0x0 0 +es 0x0 0 +fs 0x0 0 +gs 0x0 0 +(gdb) +*** CT_005 PASSED ****************************** diff --git a/test/issues/1005/test_chk.h b/test/issues/1005/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1005/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1006/C1006.sh b/test/issues/1006/C1006.sh new file mode 100644 index 00000000..d90c8778 --- /dev/null +++ b/test/issues/1006/C1006.sh @@ -0,0 +1,65 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s ptrace -n 8 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=001 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/ptrace01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=002 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/ptrace02 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=003 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/ptrace03 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=004 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/ptrace05 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + diff --git a/test/issues/1006/Makefile b/test/issues/1006/Makefile new file mode 100644 index 00000000..609d82b1 --- /dev/null +++ b/test/issues/1006/Makefile @@ -0,0 +1,14 @@ +CC = gcc +TARGET= + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +test: all + @sh ./C1006.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1006/README b/test/issues/1006/README new file mode 100644 index 00000000..3c0bb58b --- /dev/null +++ b/test/issues/1006/README @@ -0,0 +1,31 @@ +【Issue#1006 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-ptrace.008 での確認 + wait()で待っている親プロセスを起動した直後の子プロセスから、 + ptrace(GETFPREGS)で正しい値が取得できることを確認 + +2. 既存のptrace機能に影響がないことをLTPを用いて確認 +LT_001: ltp-ptrace01 + PTRACE_TRACEME と PTRACE_KILL の動作を確認 (TPASS 2件) + +LT_002: ltp-ptrace02 + PTRACE_TRACEME と PTRACE_CONT の動作を確認 (TPASS 2件) + +LT_003: ltp-ptrace03 + ptrace()に不正なpidを指定した場合の動作を確認 (TCONF 1件、TPASS 2件) + +LT_004: ltp-ptrace05 + TRACE状態での各シグナル受信時の動作を確認 (TPASS 65件) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1006/result.log b/test/issues/1006/result.log new file mode 100644 index 00000000..a2c92f99 --- /dev/null +++ b/test/issues/1006/result.log @@ -0,0 +1,95 @@ +*** RT_001 start ******************************* +TEST_SUITE: ptrace +TEST_NUMBER: 8 +ARGS: +TEST_SUITE: ptrace +TEST_NUMBER: 8 +ARGS: +child's fpregs.swd is 0x38 (expected NOT ZERO) +RESULT: ok +*** RT_001: PASSED + +*** LT_001 start ******************************* +ptrace01 1 TPASS : Test Passed +ptrace01 2 TPASS : Test Passed +*** LT_001: PASSED (ok:2) + +*** LT_002 start ******************************* +ptrace02 1 TPASS : Test Passed +ptrace02 2 TPASS : Test Passed +*** LT_002: PASSED (ok:2) + +*** LT_003 start ******************************* +ptrace03 1 TCONF : ptrace03.c:137: this kernel allows to trace init +ptrace03 2 TPASS : Test Passed +ptrace03 3 TPASS : Test Passed +*** LT_003: PASSED (ok:2) + +*** LT_004 start ******************************* +ptrace05 0 TINFO : [child] Sending kill(.., 0) +ptrace05 1 TPASS : kill(.., 0) exited with 0, as expected. +ptrace05 2 TPASS : Stopped as expected +ptrace05 3 TPASS : Stopped as expected +ptrace05 4 TPASS : Stopped as expected +ptrace05 5 TPASS : Stopped as expected +ptrace05 6 TPASS : Stopped as expected +ptrace05 7 TPASS : Stopped as expected +ptrace05 8 TPASS : Stopped as expected +ptrace05 9 TPASS : Stopped as expected +ptrace05 10 TPASS : Killed with SIGKILL, as expected. +ptrace05 11 TPASS : Stopped as expected +ptrace05 12 TPASS : Stopped as expected +ptrace05 13 TPASS : Stopped as expected +ptrace05 14 TPASS : Stopped as expected +ptrace05 15 TPASS : Stopped as expected +ptrace05 16 TPASS : Stopped as expected +ptrace05 17 TPASS : Stopped as expected +ptrace05 18 TPASS : Stopped as expected +ptrace05 19 TPASS : Stopped as expected +ptrace05 20 TPASS : Stopped as expected +ptrace05 21 TPASS : Stopped as expected +ptrace05 22 TPASS : Stopped as expected +ptrace05 23 TPASS : Stopped as expected +ptrace05 24 TPASS : Stopped as expected +ptrace05 25 TPASS : Stopped as expected +ptrace05 26 TPASS : Stopped as expected +ptrace05 27 TPASS : Stopped as expected +ptrace05 28 TPASS : Stopped as expected +ptrace05 29 TPASS : Stopped as expected +ptrace05 30 TPASS : Stopped as expected +ptrace05 31 TPASS : Stopped as expected +ptrace05 32 TPASS : Stopped as expected +ptrace05 33 TPASS : Stopped as expected +ptrace05 34 TPASS : Stopped as expected +ptrace05 35 TPASS : Stopped as expected +ptrace05 36 TPASS : Stopped as expected +ptrace05 37 TPASS : Stopped as expected +ptrace05 38 TPASS : Stopped as expected +ptrace05 39 TPASS : Stopped as expected +ptrace05 40 TPASS : Stopped as expected +ptrace05 41 TPASS : Stopped as expected +ptrace05 42 TPASS : Stopped as expected +ptrace05 43 TPASS : Stopped as expected +ptrace05 44 TPASS : Stopped as expected +ptrace05 45 TPASS : Stopped as expected +ptrace05 46 TPASS : Stopped as expected +ptrace05 47 TPASS : Stopped as expected +ptrace05 48 TPASS : Stopped as expected +ptrace05 49 TPASS : Stopped as expected +ptrace05 50 TPASS : Stopped as expected +ptrace05 51 TPASS : Stopped as expected +ptrace05 52 TPASS : Stopped as expected +ptrace05 53 TPASS : Stopped as expected +ptrace05 54 TPASS : Stopped as expected +ptrace05 55 TPASS : Stopped as expected +ptrace05 56 TPASS : Stopped as expected +ptrace05 57 TPASS : Stopped as expected +ptrace05 58 TPASS : Stopped as expected +ptrace05 59 TPASS : Stopped as expected +ptrace05 60 TPASS : Stopped as expected +ptrace05 61 TPASS : Stopped as expected +ptrace05 62 TPASS : Stopped as expected +ptrace05 63 TPASS : Stopped as expected +ptrace05 64 TPASS : Stopped as expected +ptrace05 65 TPASS : Stopped as expected +*** LT_004: PASSED (ok:65) diff --git a/test/issues/1009/C1009.patch b/test/issues/1009/C1009.patch new file mode 100644 index 00000000..05cf8c6b --- /dev/null +++ b/test/issues/1009/C1009.patch @@ -0,0 +1,49 @@ +diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c +index 4b2742b..a3173c9 100644 +--- a/arch/x86_64/kernel/syscall.c ++++ b/arch/x86_64/kernel/syscall.c +@@ -1670,6 +1670,11 @@ long do_arch_prctl(unsigned long code, unsigned long address) + break; + case ARCH_SET_GS: + return -ENOTSUPP; ++ case 999: { ++ struct thread *thread = cpu_local_var(current); ++ thread->proc->dblsig = (int)address; ++ return 0; ++ } + default: + return -EINVAL; + } +diff --git a/kernel/include/process.h b/kernel/include/process.h +index 24acf1f..dd94469 100644 +--- a/kernel/include/process.h ++++ b/kernel/include/process.h +@@ -580,6 +580,8 @@ struct process { + #endif // PROFILE_ENABLE + int nr_processes; /* For partitioned execution */ + int process_rank; /* Rank in partition */ ++ ++ int dblsig; + }; + + /* +diff --git a/kernel/syscall.c b/kernel/syscall.c +index 15d4593..3d03fad 100644 +--- a/kernel/syscall.c ++++ b/kernel/syscall.c +@@ -9632,6 +9632,15 @@ long syscall(int num, ihk_mc_user_context_t *ctx) + + if (!list_empty(&thread->sigpending) || + !list_empty(&thread->sigcommon->sigpending)) { ++ if (!list_empty(&thread->sigcommon->sigpending) && ++ thread->proc->dblsig) { ++ kprintf("have a signal, waiting arrive more signal\n"); ++ while (list_is_singular( ++ &thread->sigcommon->sigpending)) { ++ schedule(); ++ } ++ kprintf("have some signals\n"); ++ } + check_signal(l, NULL, num); + } + diff --git a/test/issues/1009/C1009.sh b/test/issues/1009/C1009.sh new file mode 100644 index 00000000..3775a80d --- /dev/null +++ b/test/issues/1009/C1009.sh @@ -0,0 +1,22 @@ +#!/bin/sh +USELTP=1 +USEOSTEST=0 + +BOOTPARAM="-c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1" +. ../../common.sh + +################################################################################ +$MCEXEC ./C1009T01 + +for i in kill01:02 kill12:03 pause02:04 sigaction01:05 ; do + tp=`echo $i|sed 's/:.*//'` + id=`echo $i|sed 's/.*://'` + $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt + ok=`grep TPASS $tp.txt | wc -l` + ng=`grep TFAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** C1009T$id: $tp OK ($ok)" + else + echo "*** C1009T$id: $tp NG (ok=$ok ng=%ng)" + fi +done diff --git a/test/issues/1009/C1009.txt b/test/issues/1009/C1009.txt new file mode 100644 index 00000000..c77bc7ee --- /dev/null +++ b/test/issues/1009/C1009.txt @@ -0,0 +1,24 @@ +Script started on Mon Sep 10 15:12:28 2018 +bash-4.2$ make test +gcc -g -Wall -o C1009T01 C1009T01.c +sh ./C1009.sh +SIGUSR2 +SIGUSR1 +read A OK +read B OK +*** C1009T01: OK +kill01 1 TPASS : received expected signal 9 +*** C1009T02: kill01 OK (1) +kill12 1 TPASS : Test passed +*** C1009T03: kill12 OK (1) +pause02 1 TPASS : pause was interrupted correctly +*** C1009T04: pause02 OK (1) +sigaction01 1 TPASS : SA_RESETHAND did not cause SA_SIGINFO to be cleared +sigaction01 2 TPASS : SA_RESETHAND was masked when handler executed +sigaction01 3 TPASS : sig has been masked because sa_mask originally contained sig +sigaction01 4 TPASS : siginfo pointer non NULL +*** C1009T05: sigaction01 OK (4) +bash-4.2$ exit +exit + +Script done on Mon Sep 10 15:12:54 2018 diff --git a/test/issues/1009/C1009T01.c b/test/issues/1009/C1009T01.c new file mode 100644 index 00000000..74716ff7 --- /dev/null +++ b/test/issues/1009/C1009T01.c @@ -0,0 +1,107 @@ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <errno.h> +#include <signal.h> +#include <asm/prctl.h> +#include <sys/prctl.h> + +int arch_prctl(int code, unsigned long *addr); + +void +sigusr(int sig) +{ + if (sig == SIGUSR1) { + printf("SIGUSR1\n"); + } + else if (sig == SIGUSR2) { + printf("SIGUSR2\n"); + } + else { + printf("other sig\n"); + } + fflush(stdout); +} + +int +main(int argc, char **argv) +{ + struct sigaction act; + pid_t pid1 = 0; + pid_t pid2 = 0; + pid_t parent; + int pfd[2]; + char ch; + int rc; + unsigned long val; + + memset(&act, '\0', sizeof(act)); + act.sa_handler = sigusr; + act.sa_flags = SA_RESTART; + sigaction(SIGUSR1, &act, NULL); + sigaction(SIGUSR2, &act, NULL); + + pipe(pfd); + + parent = getpid(); + val = 1; + if (arch_prctl(999, (unsigned long *)val) == -1) { + fprintf(stderr, "C1009T01 WARN: no mckernel patch detected.\n"); + exit(1); + } + + if ((pid1 = fork())) { + pid2 = fork(); + } + + if (!pid1 || !pid2) { + int sig; + + close(pfd[0]); + if (pid1) + sig = SIGUSR2; + else + sig = SIGUSR1; + + sleep(1); + kill(parent, sig); + if (pid1) { + sleep(2); + ch = 'B'; + } + else { + sleep(1); + ch = 'A'; + } + write(pfd[1], &ch, 1); + close(pfd[1]); + exit(0); + } + rc = read(pfd[0], &ch, 1); + if (rc != 1) { + printf("C1009T01 NG: read error rc=%d errno=%d\n", rc, errno); + exit(1); + } + if (ch != 'A') { + printf("C1009T01 NG: read BAD DATA ch=%c\n", ch); + exit(1); + } + val = 0; + arch_prctl(999, (unsigned long *)val); + printf("read %c OK\n", ch); + rc = read(pfd[0], &ch, 1); + if (rc != 1) { + printf("C1009T01 NG: read error rc=%d errno=%d\n", rc, errno); + exit(1); + } + if (ch != 'B') { + printf("C1009T01 NG: read BAD DATA ch=%c\n", ch); + exit(1); + } + + printf("read %c OK\n", ch); + printf("*** C1009T01: OK\n"); + exit(0); +} diff --git a/test/issues/1009/Makefile b/test/issues/1009/Makefile new file mode 100644 index 00000000..4453ab75 --- /dev/null +++ b/test/issues/1009/Makefile @@ -0,0 +1,13 @@ +CC = gcc +TARGET = C1009T01 + +all:: $(TARGET) + +C1009T01: C1009T01.c + $(CC) -g -Wall -o $@ $^ + +test:: all + sh ./C1009.sh + +clean:: + rm -f $(TARGET) *.o diff --git a/test/issues/1009/README b/test/issues/1009/README new file mode 100644 index 00000000..07531067 --- /dev/null +++ b/test/issues/1009/README @@ -0,0 +1,27 @@ +【Issue#1009 動作確認】 +□ テスト内容 +1. システムコール処理中にシグナルハンドラを呼び出す複数のシグナルを + 受信し、当該システムコールを再処理するとき、当該システムコールが + 1度しか処理されないことを確認する(指摘現象)。 + 尚、シグナルを同時に発行する状態を再現させるのが困難なため、本 + テストは複数シグナルを待ち合わせるパッチを適用したカーネルで行う。 + McKernel へのパッチファイルは C1009.patch である。 +C1009T01 シグナルを複数受信したとき、システムコールの再処理を1度だけ行う確認 + +2. 変更が他のシグナル処理に影響しないことをLTPを用いて確認する。 +C1009T02 kill01: kill の基本機能の確認 +C1009T03 kill12: kill, wait, signal の組み合わせ確認 +C1009T04 pause02: pause の基本機能の確認 +C1009T05 sigaction01: sigaction の基本機能の確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1009.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1018/C1018.sh b/test/issues/1018/C1018.sh new file mode 100644 index 00000000..254dabeb --- /dev/null +++ b/test/issues/1018/C1018.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +${MCEXEC} ./CT_001 +${MCEXEC} ./CT_002 + +tid=001 +echo "*** RT_$tid start *******************************" +sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s procfs -n 0 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=002 +echo "*** RT_$tid start *******************************" +sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s procfs -n 1 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=003 +echo "*** RT_$tid start *******************************" +sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s procfs -n 3 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" diff --git a/test/issues/1018/CT_001.c b/test/issues/1018/CT_001.c new file mode 100644 index 00000000..f8aaad17 --- /dev/null +++ b/test/issues/1018/CT_001.c @@ -0,0 +1,99 @@ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include "./test_chk.h" + +#define TEST_NAME "CT_001" + +#define MEGA (1024 * 1024) + +#define PROCFILE_LEN 128 +#define MAP_LEN (8 * MEGA) + +int main(int argc, char *argv[]) +{ + int fd = 0, i = 0; + pid_t pid = getpid(); + char pfname[PROCFILE_LEN]; + unsigned long *anon_map = NULL; + unsigned long *tmp_buf = NULL; + int data_pos[3] = {0 * MEGA / sizeof(unsigned long), + 4 * MEGA / sizeof(unsigned long) - 1, + 8 * MEGA / sizeof(unsigned long) - 1}; + off_t ret = 0; + + printf("*** %s start *******************************\n", TEST_NAME); + + /* anonymous mmap */ + anon_map = (unsigned long *)mmap(NULL, MAP_LEN, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + OKNG(anon_map == MAP_FAILED, "mmap device file"); + printf(" anonymous map to %p, size:%.2f MB\n", + anon_map, (double)MAP_LEN / MEGA); + + /* allocate tmp_buf */ + tmp_buf = (unsigned long *)mmap(NULL, MAP_LEN, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + CHKANDJUMP(tmp_buf == NULL, "alloc tmp_buf"); + + /* set magic_number */ + anon_map[data_pos[0]] = 0x1111; + anon_map[data_pos[1]] = 0x2222; + anon_map[data_pos[2]] = 0x3333; + + /* generate proc_mem path */ + sprintf(pfname, "/proc/%d/mem", pid); + + /* open proc_mem */ + fd = open(pfname, O_RDONLY); + CHKANDJUMP(fd < 0, "open proc_mem"); + + /* pread 2MB */ + errno = 0; + ret = pread(fd, tmp_buf, 2 * MEGA, (off_t)anon_map); + OKNG(ret != 2 * MEGA || errno != 0, "2MB pread"); + + /* check read data */ + OKNG(tmp_buf[data_pos[0]] != anon_map[data_pos[0]], + "check read data :0x%lx", tmp_buf[data_pos[0]]); + + /* pread 4MB */ + errno = 0; + ret = pread(fd, tmp_buf, 4 * MEGA, (off_t)anon_map); + OKNG(ret != 4 * MEGA || errno != 0, "4MB pread"); + + /* check read data */ + OKNG(tmp_buf[data_pos[1]] != anon_map[data_pos[1]], + "check read data :0x%lx", tmp_buf[data_pos[1]]); + + /* pread 8MB */ + errno = 0; + ret = pread(fd, tmp_buf, 8 * MEGA, (off_t)anon_map); + OKNG(ret != 8 * MEGA || errno != 0, "8MB pread"); + + /* check read data */ + OKNG(tmp_buf[data_pos[2]] != anon_map[data_pos[2]], + "check read data :0x%lx", tmp_buf[data_pos[2]]); + + close(fd); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + if (fd > 0) { + close(fd); + } + + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; + +} diff --git a/test/issues/1018/CT_002.c b/test/issues/1018/CT_002.c new file mode 100644 index 00000000..1caa5993 --- /dev/null +++ b/test/issues/1018/CT_002.c @@ -0,0 +1,52 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include "./test_chk.h" + +#define TEST_NAME "CT_002" + +#define MEGA (1024 * 1024) + +#define PROCFILE_LEN 128 +#define MAP_LEN (8 * MEGA) + +int main(int argc, char *argv[]) +{ + int fd = 0; + pid_t pid = getpid(); + char pfname[PROCFILE_LEN]; + off_t ret = 0; + + printf("*** %s start *******************************\n", TEST_NAME); + + /* generate proc_mem path */ + sprintf(pfname, "/proc/%d/mem", pid); + + /* open proc_mem */ + fd = open(pfname, O_WRONLY); + OKNG(fd != -1, "open /proc/<PID>/mem is failed"); + + OKNG(errno != EACCES, "errno is EACCES"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + + if (fd > 0) { + close(fd); + } + + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; + +} diff --git a/test/issues/1018/Makefile b/test/issues/1018/Makefile new file mode 100644 index 00000000..ae5691b0 --- /dev/null +++ b/test/issues/1018/Makefile @@ -0,0 +1,20 @@ +CC = gcc +TARGET=CT_001 CT_002 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1018.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1018/README b/test/issues/1018/README new file mode 100644 index 00000000..0604c5a5 --- /dev/null +++ b/test/issues/1018/README @@ -0,0 +1,32 @@ +【Issue#1018 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +CT_001: /proc/<PID>/mem に対するpread + /proc/<PID>/mem に対して、2MB, 4MB, 8MB でそれぞれpreadを実行し、 + preadが成功することと、読み取ったデータが正しいことを確認 + +CT_002: /proc/<PID>/mem に対するpwrite + /proc/<PID>/mem に対して、書き込み権限のあるopenを実行し、 + openが失敗することと、errnoにEACCESが設定されることを確認 + +2. 既存のprocfs機能に影響がないことをOSTESTを用いて確認 +RT_001: ostest_procfs.000 + /proc/<PID>/auxv の内容を取得できることを確認 + +RT_002: ostest_procfs.001 + /proc/<PID>/mem の内容を取得できることを確認 + +RT_003: ostest_procfs.003 + /proc/<PID>/stat の内容を取得できることを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1018/result.log b/test/issues/1018/result.log new file mode 100644 index 00000000..5a91af12 --- /dev/null +++ b/test/issues/1018/result.log @@ -0,0 +1,15 @@ +*** CT_001 start ******************************* + [OK] mmap device file + anonymous map to 0x2aaaab200000, size:8.00 MB + [OK] 2MB pread + [OK] check read data :0x1111 + [OK] 4MB pread + [OK] check read data :0x2222 + [OK] 8MB pread + [OK] check read data :0x3333 +*** CT_001 PASSED + +*** CT_002 start ******************************* + [OK] open /proc/<PID>/mem is failed + [OK] errno is EACCES +*** CT_002 PASSED diff --git a/test/issues/1018/test_chk.h b/test/issues/1018/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1018/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1021/C1021.c b/test/issues/1021/C1021.c new file mode 100644 index 00000000..df55bd09 --- /dev/null +++ b/test/issues/1021/C1021.c @@ -0,0 +1,286 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <errno.h> +#include <ctype.h> + +int id; +int okcnt; +int ngcnt; +void *area; + +void +ok(char *file, char *fmt, ...) +{ + va_list ap; + + printf("*** C1021T%02d %s ", id, file); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + printf("\n"); + okcnt++; +} + +void +ng(char *file, char *fmt, ...) +{ + va_list ap; + + printf("*** C1021T%02d %s ", id, file); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + printf("\n"); + ngcnt++; +} + +void +hex(char *bp, int len) +{ + unsigned char *buf; + long l; + long p; + long zl = 0; + long zf = 1; + long zp = 0; + + for (p = 0; p < len; p += 16) { + int i; + + buf = (unsigned char *)bp + p; + l = 16; + if (p + 16 > len) { + l = len - p; + } + if (!zf) { + int zz = 0; + + for (i = 0; i < l; i++) { + if (buf[i]) + zz = 1; + } + if (l < 16 || !zz) { + zl += 16; + continue; + } + if (zl == 16) { + printf("%016lx 00000000 00000000 00000000 " + "00000000 *................*\n", zp); + } + else if (zl) { + printf(" %08lx - %08lx ZERO\n", zp, p); + } + } + zf = 0; + printf("%08lx ", p); + for (i = 0; i < 16; i++) { + if (i % 4 == 0) + printf(" "); + printf(i < l ? "%02x" : " ", buf[i]); + if (i < l && buf[i]) + zf = 1; + } + printf(" *"); + for (i = 0; i < 16; i++) + printf(i < l ? "%c" : " ", + isprint(buf[i]) ? buf[i] : '.'); + printf("*\n"); + zl = 0; + zp = p + 16; + } +} + +void +sub(char *file, int mapsflag) +{ + int fd; + int fd2; + char buf1[65536]; + char buf2[65536]; + char buf3[65536]; + int n; + int rc; + int pos; + + id++; + fd = open(file, O_RDONLY); + if (fd == -1) { + ng(file, "open %s", strerror(errno)); + } + else { + ok(file, "open OK"); + } + + id++; + fd2 = dup(fd); + if (fd2 == -1) { + ng(file, "dup %s", strerror(errno)); + } + else { + ok(file, "dup OK"); + } + + id++; + for (n = 0; (rc = read(fd, buf1 + n, 1)) == 1; n++); + if (rc == -1) { + ng(file, "read(1) %s", strerror(errno)); + } + else if (mapsflag && n < 4096) { + ng(file, "read(1) short n=%d", n); + } + else { + ok(file, "read(1) OK n=%d", n); + } + + id++; + rc = lseek(fd, 0L, SEEK_SET); + if (rc == -1) { + ng(file, "lseek %s", strerror(errno)); + } + else { + ok(file, "lseek OK"); + } + + if (mapsflag) + munmap(area, 4096); + + id++; + pos = 0; + while ((rc = read(fd, buf2 + pos, 1024)) > 0) { + pos += rc; + } + if (rc == -1) { + ng(file, "read(1) %s\n", strerror(errno)); + } + else { + if (pos != n) { + ng(file, "read(1024) invalid size %d != %d", pos, n); + } + else if (memcmp(buf1, buf2, n) != 0) { + ng(file, "read(1024) invalid data"); + hex(buf1, n); + hex(buf2, n); + } + else { + ok(file, "read(1024) OK"); + } + } + + id++; + rc = close(fd); + if (rc == -1) { + ng(file, "close %s", strerror(errno)); + } + else { + ok(file, "close OK"); + } + + id++; + rc = read(fd2, buf3, n); + if (rc == -1) { + ng(file, "read(dup) EOF %s", strerror(errno)); + } + else if (rc == 0) { + ok(file, "read(dup) EOF OK"); + } + else { + ng(file, "read(dup) invalid position"); + } + + id++; + rc = lseek(fd2, 0L, SEEK_SET); + if (rc == -1) { + ng(file, "lseek(dup) %s", strerror(errno)); + } + else { + ok(file, "lseek(dup) OK"); + } + + + id++; + rc = read(fd2, buf3, n); + if (rc == -1) { + ng(file, "read(dup) %s", strerror(errno)); + } + else if (rc != n) { + ng(file, "read(dup) too short"); + } + else { + rc = read(fd2, buf3 + rc, n); + if (rc == -1) { + ng(file, "read(dup) %s", strerror(errno)); + } + else if (rc != 0) { + ng(file, "read(dup) too long"); + } + else if (memcmp(buf1, buf3, n) != 0) { + ng(file, "read(dup) invalid data"); + hex(buf1, n); + hex(buf3, n); + } + else { + ok(file, "read(dup) OK"); + } + } + + id++; + rc = close(fd2); + if (rc == -1) { + ng(file, "close(dup) %s", strerror(errno)); + } + else { + ok(file, "close(dup) OK"); + } +} + +int +main(int argc, char **argv) +{ + int i; + int pid = getpid(); + char file[1024]; + + for (i = 0; i < 512; i++) { + char *c; + + if (i % 2) { + c = mmap(NULL, 4096, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + area = c; + } + else { + c = mmap(NULL, 4096, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANONYMOUS, -1, 0); + } + + if (c == (void *)-1) { + printf("mmap error %d\n", errno); + exit(1); + } + *c = 1; + } + sub("/proc/stat", 0); + sprintf(file, "/proc/%d/auxv", pid); + sub(file, 0); + sprintf(file, "/proc/%d/cmdline", pid); + sub(file, 0); + sprintf(file, "/proc/%d/maps", pid); + sub(file, 1); + sprintf(file, "/proc/%d/status", pid); + sub(file, 0); + sprintf(file, "/proc/%d/task/%d/stat", pid, pid); + sub(file, 0); + + if (ngcnt) { + printf("TEST FAILED OK=%d NG=%d\n", okcnt, ngcnt); + exit(1); + } + printf("TEST SUCCESS OK=%d\n", okcnt); + exit(0); +} diff --git a/test/issues/1021/C1021.sh b/test/issues/1021/C1021.sh new file mode 100755 index 00000000..bdbee9b2 --- /dev/null +++ b/test/issues/1021/C1021.sh @@ -0,0 +1,22 @@ +#!/bin/sh +USELTP=0 +USEOSTEST=0 + +BOOTPARAM="-c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1" +. ../../common.sh + +if ! sudo ls /sys/kernel/debug | grep kmemleak > /dev/null 2>&1; then + echo kmemleak: not found >&2 + exit 1 +fi + +################################################################################ +sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' +$MCEXEC ./C1021 +sudo $SBINDIR/mcstop+release.sh +sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' +if sudo cat /sys/kernel/debug/kmemleak | tee C1021T71.kmemleak | grep 'mcctrl'; then + echo '*** C1021T61 NG (kmemleak)' +else + echo '*** C1021T61 OK (kmemleak)' +fi diff --git a/test/issues/1021/C1021.txt b/test/issues/1021/C1021.txt new file mode 100644 index 00000000..33d314b3 --- /dev/null +++ b/test/issues/1021/C1021.txt @@ -0,0 +1,71 @@ +Script started on Wed Aug 29 15:21:45 2018 +bash-4.2$ make test +sh ./C1021.sh +mcstop+release.sh ... done +mcreboot.sh -c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1 ... done +*** C1021T01 /proc/stat open OK +*** C1021T02 /proc/stat dup OK +*** C1021T03 /proc/stat read(1) OK n=158 +*** C1021T04 /proc/stat lseek OK +*** C1021T05 /proc/stat read(1024) OK +*** C1021T06 /proc/stat close OK +*** C1021T07 /proc/stat read(dup) EOF OK +*** C1021T08 /proc/stat lseek(dup) OK +*** C1021T09 /proc/stat read(dup) OK +*** C1021T10 /proc/stat close(dup) OK +*** C1021T11 /proc/12455/auxv open OK +*** C1021T12 /proc/12455/auxv dup OK +*** C1021T13 /proc/12455/auxv read(1) OK n=144 +*** C1021T14 /proc/12455/auxv lseek OK +*** C1021T15 /proc/12455/auxv read(1024) OK +*** C1021T16 /proc/12455/auxv close OK +*** C1021T17 /proc/12455/auxv read(dup) EOF OK +*** C1021T18 /proc/12455/auxv lseek(dup) OK +*** C1021T19 /proc/12455/auxv read(dup) OK +*** C1021T20 /proc/12455/auxv close(dup) OK +*** C1021T21 /proc/12455/cmdline open OK +*** C1021T22 /proc/12455/cmdline dup OK +*** C1021T23 /proc/12455/cmdline read(1) OK n=8 +*** C1021T24 /proc/12455/cmdline lseek OK +*** C1021T25 /proc/12455/cmdline read(1024) OK +*** C1021T26 /proc/12455/cmdline close OK +*** C1021T27 /proc/12455/cmdline read(dup) EOF OK +*** C1021T28 /proc/12455/cmdline lseek(dup) OK +*** C1021T29 /proc/12455/cmdline read(dup) OK +*** C1021T30 /proc/12455/cmdline close(dup) OK +*** C1021T31 /proc/12455/maps open OK +*** C1021T32 /proc/12455/maps dup OK +*** C1021T33 /proc/12455/maps read(1) OK n=25401 +*** C1021T34 /proc/12455/maps lseek OK +*** C1021T35 /proc/12455/maps read(1024) OK +*** C1021T36 /proc/12455/maps close OK +*** C1021T37 /proc/12455/maps read(dup) EOF OK +*** C1021T38 /proc/12455/maps lseek(dup) OK +*** C1021T39 /proc/12455/maps read(dup) OK +*** C1021T40 /proc/12455/maps close(dup) OK +*** C1021T41 /proc/12455/status open OK +*** C1021T42 /proc/12455/status dup OK +*** C1021T43 /proc/12455/status read(1) OK n=255 +*** C1021T44 /proc/12455/status lseek OK +*** C1021T45 /proc/12455/status read(1024) OK +*** C1021T46 /proc/12455/status close OK +*** C1021T47 /proc/12455/status read(dup) EOF OK +*** C1021T48 /proc/12455/status lseek(dup) OK +*** C1021T49 /proc/12455/status read(dup) OK +*** C1021T50 /proc/12455/status close(dup) OK +*** C1021T51 /proc/12455/task/12455/stat open OK +*** C1021T52 /proc/12455/task/12455/stat dup OK +*** C1021T53 /proc/12455/task/12455/stat read(1) OK n=92 +*** C1021T54 /proc/12455/task/12455/stat lseek OK +*** C1021T55 /proc/12455/task/12455/stat read(1024) OK +*** C1021T56 /proc/12455/task/12455/stat close OK +*** C1021T57 /proc/12455/task/12455/stat read(dup) EOF OK +*** C1021T58 /proc/12455/task/12455/stat lseek(dup) OK +*** C1021T59 /proc/12455/task/12455/stat read(dup) OK +*** C1021T60 /proc/12455/task/12455/stat close(dup) OK +TEST SUCCESS OK=60 +*** C1021T61 OK (kmemleak) +bash-4.2$ exit +exit + +Script done on Wed Aug 29 15:22:55 2018 diff --git a/test/issues/1021/Makefile b/test/issues/1021/Makefile new file mode 100644 index 00000000..cfdd0e6f --- /dev/null +++ b/test/issues/1021/Makefile @@ -0,0 +1,13 @@ +CC=gcc +TARGET=C1021 + +all:: $(TARGET) + +C1021: C1021.c + $(CC) -o C1021 C1021.c -Wall -g + +test:: $(TARGET) + sh ./C1021.sh + +clean:: + rm -f *.o $(TARGET) diff --git a/test/issues/1021/README b/test/issues/1021/README new file mode 100644 index 00000000..b453a744 --- /dev/null +++ b/test/issues/1021/README @@ -0,0 +1,58 @@ +【Issue#1021 動作確認】 +□ テスト内容 +1. procfs ファイルに対するファイルオペレーションのテスト +Issue#1021 の対応において、procfs の以下のファイル処理を変更している。 + /proc/stat + /proc/pid/auxv + /proc/pid/cmdline + /proc/pid/maps + /proc/pid/status + /proc/pid/task/tid/stat + +これらのファイルに対するファイルオペレーションとして、以下をテストする。 +1) ファイルをopen(2)できること。 +2) ファイルディスクリプタをdup(2)できること。 +3) 1バイト単位にファイル終端までread(2)できること。 +4) lseek(2) できること。(※2) +5) lseek(2) 後に、1024バイト単位に read(2) できること。最初の read(2)と内容が + 一致していること。 +6) close(2) できること。 +7) dup(2) したファイルディスクリプタが EOF になっていること (5 の read(2) の + 影響)。 +8) dup(2) したファイルディスクリプタを lseek(2) できること。 +9) dup(2) したファイルディスクリプタを read(2) し、ファイル全体を 1 回の read(2) + で読み込むことができること。また、最初の read(2) と内容が一致していること。 +10) dup(2) したファイルを close(2) できること。 + +テストケースは以下の通りである。 +C1021T01-C1021T10 /proc/stat に対する上記 1) - 10) のテスト +C1021T11-C1021T20 /proc/pid/auxv に対する上記 1) - 10) のテスト +C1021T21-C1021T30 /proc/pid/cmdline に対する上記 1) - 10) のテスト +C1021T31-C1021T40 /proc/pid/maps に対する上記 1) - 10) のテスト (※1, ※2) +C1021T41-C1021T50 /proc/pid/status に対する上記 1) - 10) のテスト +C1021T51-C1021T60 /proc/pid/rask/tid/stat に対する上記 1) - 10) のテスト + +※1 /proc/pid/maps はユーザプログラムのメモリの使い方により非常に大きなファイル + になることがあるので、予め mmap を複数回行って4kB以上の read が発生するよう + にしておく。(McKernel 内のバッファが複数ページになる場合のテストを兼ねる)。 + +※2 /proc/pid/maps の読み込み中に情報が変化しても後続の read(2) に影響しない + ことを確認するため、/proc/pid/maps の lseek(2) 後に munmap(2) を行い、 + McKernel の内部情報を変化させる。(後続の read(2) では、munmap 前の情報を + 読み込む仕様)。 + +2. メモリリークが発生していないことの確認 +C1021T61 kmemleak を用いて mcctrl の procfs 処理がメモリリークを起こして + いないことを確認する。 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1021.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1024/C1024.sh b/test/issues/1024/C1024.sh new file mode 100755 index 00000000..6c1a2725 --- /dev/null +++ b/test/issues/1024/C1024.sh @@ -0,0 +1,25 @@ +#!/bin/sh +USELTP=1 +USEOSTEST=0 + +BOOTPARAM="-c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1" +. ../../common.sh + +################################################################################ +rm -f mcexec +ln -s $MCEXEC +./C1024T01 +./mcexec ./C1024T02 + +for i in process_vm_readv02:03 process_vm_readv03:04 process_vm_writev02:05; do + tp=`echo $i|sed 's/:.*//'` + id=`echo $i|sed 's/.*://'` + sudo $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt + ok=`grep TPASS $tp.txt | wc -l` + ng=`grep TFAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** C1024T$id: $tp OK ($ok)" + else + echo "*** C1024T$id: $tp NG (ok=$ok ng=%ng)" + fi +done diff --git a/test/issues/1024/C1024.txt b/test/issues/1024/C1024.txt new file mode 100644 index 00000000..0233a241 --- /dev/null +++ b/test/issues/1024/C1024.txt @@ -0,0 +1,147 @@ +Script started on Wed Jul 11 19:17:51 2018 +bash-4.2$ make test +gcc -o C1024T01 C1024T01.c -Wall -g +gcc -o C1024T02 C1024T02.c -Wall -g +sh ./C1024.sh +*** C1024T01 START +19:17:54 c=0 +19:22:54 c=9817 +19:27:54 c=18527 +19:32:55 c=27856 +19:37:55 c=37483 +19:42:55 c=45813 +19:47:55 c=55300 +19:52:55 c=64887 +19:57:55 c=73167 +20:02:55 c=82642 +20:07:55 c=92207 +20:12:55 c=100535 +20:17:55 c=109965 +20:22:56 c=119554 +20:27:56 c=127708 +20:32:56 c=137094 +20:37:56 c=146592 +20:42:56 c=154906 +20:47:56 c=164316 +20:52:56 c=173823 +20:57:56 c=181913 +21:02:56 c=191485 +21:07:56 c=201029 +21:12:57 c=209331 +21:17:57 c=218691 +21:22:57 c=228263 +21:27:57 c=236463 +21:32:57 c=245879 +21:37:57 c=255435 +21:42:57 c=263831 +21:47:57 c=273045 +21:52:57 c=282637 +21:57:57 c=290994 +22:02:58 c=300088 +22:07:58 c=309593 +22:12:58 c=318283 +22:17:58 c=327215 +22:22:58 c=336790 +22:27:58 c=345148 +22:32:58 c=354190 +22:37:58 c=363723 +22:42:58 c=372165 +22:47:58 c=381241 +22:52:59 c=390780 +22:57:59 c=399701 +23:02:59 c=408179 +23:07:59 c=417728 +23:12:59 c=426767 +23:17:59 c=435111 +23:22:59 c=444696 +23:27:59 c=454153 +23:32:59 c=462305 +23:37:59 c=471544 +23:43:00 c=481128 +23:48:00 c=489413 +23:53:00 c=498511 +23:58:00 c=508002 +00:03:00 c=516321 +00:08:00 c=525240 +00:13:00 c=534764 +00:18:00 c=543425 +00:23:00 c=552119 +00:28:00 c=561668 +00:33:01 c=570540 +00:38:01 c=578954 +00:43:01 c=588458 +00:48:01 c=597499 +00:53:01 c=605647 +00:58:01 c=615083 +01:03:01 c=624466 +01:08:01 c=632584 +01:13:01 c=641645 +01:18:02 c=651150 +01:23:02 c=659685 +01:28:02 c=668129 +01:33:02 c=677555 +01:38:02 c=686246 +01:43:02 c=694575 +01:48:02 c=704131 +01:53:02 c=713330 +01:58:02 c=721354 +02:03:02 c=730718 +02:08:03 c=740192 +02:13:03 c=748106 +02:18:03 c=757081 +02:23:03 c=766548 +02:28:03 c=775053 +02:33:03 c=783508 +02:38:03 c=793041 +02:43:03 c=802198 +02:48:03 c=810191 +02:53:03 c=819405 +02:58:04 c=828856 +03:03:04 c=836987 +03:08:04 c=845733 +03:13:04 c=855234 +03:18:04 c=863744 +03:23:04 c=872016 +03:28:04 c=881451 +03:33:04 c=890508 +03:38:04 c=898634 +03:43:04 c=907499 +03:48:05 c=916825 +03:53:05 c=924778 +03:58:05 c=933039 +04:03:05 c=942260 +04:08:05 c=950914 +04:13:05 c=958693 +04:18:05 c=967690 +04:23:05 c=976997 +04:28:05 c=984635 +04:33:05 c=993041 +04:38:06 c=1002233 +04:43:06 c=1010732 +04:48:06 c=1018751 +04:53:06 c=1027635 +04:58:06 c=1036889 +05:03:06 c=1044866 +05:05:39 c=1048600 +*** C1024T01 OK +*** C1024T02 START +05:05:42 c=0 +05:06:21 c=1048600 +*** C1024T02 OK +process_vm_readv02 0 TINFO : child 0: memory allocated and initialized. +process_vm_readv02 0 TINFO : child 1: reading string from same memory location. +process_vm_readv02 1 TPASS : expected string received. +*** C1024T03: process_vm_readv02 OK (1) +process_vm_readv03 0 TINFO : child 0: 10 iovecs allocated and initialized. +process_vm_readv03 0 TINFO : child 1: 10 remote iovecs received. +process_vm_readv03 0 TINFO : child 1: 4 local iovecs initialized. +process_vm_readv03 1 TPASS : child 1: all bytes are correctly received. +*** C1024T04: process_vm_readv03 OK (1) +process_vm_writev02 0 TINFO : child 0: memory allocated. +process_vm_writev02 0 TINFO : child 2: write to the same memory location. +process_vm_writev02 1 TPASS : child 0: all bytes are expected. +*** C1024T05: process_vm_writev02 OK (1) +bash-4.2$ exit +exit + +Script done on Thu Jul 12 05:43:43 2018 diff --git a/test/issues/1024/C1024T01.c b/test/issues/1024/C1024T01.c new file mode 100644 index 00000000..a9b6151e --- /dev/null +++ b/test/issues/1024/C1024T01.c @@ -0,0 +1,171 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <sys/select.h> +#include <sys/syscall.h> +#include <errno.h> +#include <fcntl.h> +#include <time.h> +#include <signal.h> + +#define EXEC_CNT (1024 * 1024) +#define FORK_CNT 24 + +void +killall() +{ + fprintf(stderr, "*** C1024T01 NG\n"); + fflush(stderr); + kill(-getpid(), SIGKILL); + exit(1); +} + +void +print(int c) +{ + time_t t; + char tbuf[16]; + + time(&t); + strftime(tbuf, 16, "%H:%M:%S", localtime(&t)); + fprintf(stderr, "%s c=%d\n", tbuf, c); + fflush(stderr); +} + +int +main(int argc, char **argv) +{ + int *c; + pid_t pids[FORK_CNT]; + int pfd[FORK_CNT]; + int i; + int st; + int maxfd = -1; + int rc; + char buf[1024]; + + fprintf(stderr, "*** C1024T01 START\n"); + c = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, + -1, 0); + if (c == (void *)-1) { + perror("mmap"); + exit(1); + } + + *c = 0; + print(*c); + setpgid(0, 0); + for (i = 0; i < FORK_CNT; i++) { + int fds[2]; + + if (pipe(fds) == -1) { + perror("pipe"); + exit(1); + } + fflush(stderr); + if ((pids[i] = fork()) == 0) { + close(fds[0]); + close(0); + close(1); + close(2); + open("/dev/null", O_RDONLY); + dup(fds[1]); + dup(fds[1]); + while (*c <= EXEC_CNT) { + pid_t pid; + + if ((pid = fork()) == 0) { + execl("./mcexec", "./mcexec", + "/bin/sleep", "0", NULL); + perror("execve"); + exit(1); + } + if (pid == -1) { + perror("fork"); + exit(1); + } + while ((rc = waitpid(pid, &st, 0)) == -1 && + errno == EINTR); + if (rc == -1) { + perror("wait"); + exit(1); + } + if (!WIFEXITED(st) || WEXITSTATUS(st) != 0) { + sprintf(buf, "exit: %08x\n", st); + write(2, buf, strlen(buf)); + exit(1); + } + __sync_fetch_and_add(c, 1); + } + exit(0); + } + close(fds[1]); + pfd[i] = fds[0]; + if (pfd[i] > maxfd) + maxfd = pfd[i]; + if (pids[i] == -1) { + perror("fork"); + killall(); + } + } + for (;;) { + fd_set readfds; + int e = 0; + struct timeval to; + + FD_ZERO(&readfds); + for (i = 0; i < FORK_CNT; i++) { + if (pfd[i] != -1) { + FD_SET(pfd[i], &readfds); + e++; + } + } + if (!e) + break; + to.tv_sec = 300; + to.tv_usec = 0; + rc = select(maxfd + 1, &readfds, NULL, NULL, &to); + if (rc == 0) { + print(*c); + continue; + } + for (i = 0; i < FORK_CNT; i++) { + if (pfd[i] != -1 && FD_ISSET(pfd[i], &readfds)) { + if ((rc = read(pfd[i], buf, 1024)) == -1) { + perror("read"); + killall(); + } + if (rc == 0) { + close(pfd[i]); + pfd[i] = -1; + } + else { + write(2, buf, rc); + killall(); + } + } + } + } + for (i = 0; i < FORK_CNT; i++) { + while ((rc = waitpid(pids[i], &st, 0)) == -1 && errno == EINTR); + if (rc == -1) { + perror("wait"); + killall(); + } + if (!WIFEXITED(st) || WEXITSTATUS(st) != 0) { + sprintf(buf, "%d: exit: %08x\n", pids[i], st); + killall(); + } + } + print(*c); + if (*c <= EXEC_CNT) { + fprintf(stderr, "*** C1024T01 NG\n"); + } + + fprintf(stderr, "*** C1024T01 OK\n"); + exit(0); +} diff --git a/test/issues/1024/C1024T02.c b/test/issues/1024/C1024T02.c new file mode 100644 index 00000000..c06cb626 --- /dev/null +++ b/test/issues/1024/C1024T02.c @@ -0,0 +1,177 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <sys/select.h> +#include <sys/syscall.h> +#include <errno.h> +#include <fcntl.h> +#include <time.h> +#include <signal.h> +#include <sys/ipc.h> +#include <sys/shm.h> + +#define READ_CNT (1024 *1024) +#define FORK_CNT 24 + +void +killall() +{ + fprintf(stderr, "*** C1024T02 NG\n"); + fflush(stderr); + kill(-getpid(), SIGKILL); + exit(1); +} + +void +print(int c) +{ + time_t t; + char tbuf[16]; + + time(&t); + strftime(tbuf, 16, "%H:%M:%S", localtime(&t)); + fprintf(stderr, "%s c=%d\n", tbuf, c); + fflush(stderr); +} + +int +main(int argc, char **argv) +{ + key_t key; + int shmid; + int *c; + pid_t pids[FORK_CNT]; + int pfd[FORK_CNT]; + int i; + int st; + int maxfd = -1; + int rc; + char buf[1024]; + struct shmid_ds shmbuf; + + fprintf(stderr, "*** C1024T02 START\n"); + key = ftok("C1024T02", 1); + if ((shmid = shmget(key, 4096, IPC_CREAT | 0660)) == -1) { + perror("shmget"); + exit(1); + } + if ((c = shmat(shmid, NULL, 0)) == (void *)-1) { + perror("shmget"); + exit(1); + } + if (shmctl(shmid, IPC_RMID, &shmbuf) == -1) { + perror("RMID"); + exit(1); + } + + *c = 0; + print(*c); + setpgid(0, 0); + for (i = 0; i < FORK_CNT; i++) { + int fds[2]; + + if (pipe(fds) == -1) { + perror("pipe"); + exit(1); + } + fflush(stderr); + if ((pids[i] = fork()) == 0) { + int fd; + + close(fds[0]); + close(0); + close(1); + close(2); + open("/dev/null", O_RDONLY); + dup(fds[1]); + dup(fds[1]); + if ((fd = open("/proc/self/maps", O_RDONLY)) == -1) { + perror("open"); + exit(1); + } + while (*c <= READ_CNT) { + lseek(fd, 0L, SEEK_SET); + if ((rc = read(fd, buf, 1024)) <= 0) { + if (rc == 0) { + fprintf(stderr, "EOF\n"); + } + else { + perror("read"); + } + exit(1); + } + __sync_fetch_and_add(c, 1); + } + exit(0); + } + close(fds[1]); + pfd[i] = fds[0]; + if (pfd[i] > maxfd) + maxfd = pfd[i]; + if (pids[i] == -1) { + perror("fork"); + killall(); + } + } + for (;;) { + fd_set readfds; + int e = 0; + struct timeval to; + + FD_ZERO(&readfds); + for (i = 0; i < FORK_CNT; i++) { + if (pfd[i] != -1) { + FD_SET(pfd[i], &readfds); + e++; + } + } + if (!e) + break; + to.tv_sec = 300; + to.tv_usec = 0; + rc = select(maxfd + 1, &readfds, NULL, NULL, &to); + if (rc == 0) { + print(*c); + continue; + } + for (i = 0; i < FORK_CNT; i++) { + if (pfd[i] != -1 && FD_ISSET(pfd[i], &readfds)) { + if ((rc = read(pfd[i], buf, 1024)) == -1) { + perror("read"); + killall(); + } + if (rc == 0) { + close(pfd[i]); + pfd[i] = -1; + } + else { + write(2, buf, rc); + print(*c); + killall(); + } + } + } + } + for (i = 0; i < FORK_CNT; i++) { + while ((rc = waitpid(pids[i], &st, 0)) == -1 && errno == EINTR); + if (rc == -1) { + perror("wait"); + killall(); + } + if (!WIFEXITED(st) || WEXITSTATUS(st) != 0) { + sprintf(buf, "%d: exit: %08x\n", pids[i], st); + killall(); + } + } + print(*c); + if (*c <= READ_CNT) { + fprintf(stderr, "*** C1024T02 NG\n"); + } + + fprintf(stderr, "*** C1024T02 OK\n"); + exit(0); +} diff --git a/test/issues/1024/Makefile b/test/issues/1024/Makefile new file mode 100644 index 00000000..9018b08d --- /dev/null +++ b/test/issues/1024/Makefile @@ -0,0 +1,16 @@ +CC=gcc +TARGET=C1024T01 C1024T02 + +all:: $(TARGET) + +C1024T01: C1024T01.c + $(CC) -o C1024T01 C1024T01.c -Wall -g + +C1024T02: C1024T02.c + $(CC) -o C1024T02 C1024T02.c -Wall -g + +test:: $(TARGET) + sh ./C1024.sh + +clean:: + rm -f *.o $(TARGET) mcexec diff --git a/test/issues/1024/README b/test/issues/1024/README new file mode 100644 index 00000000..63a9b1fe --- /dev/null +++ b/test/issues/1024/README @@ -0,0 +1,24 @@ +【Issue#1024 動作確認】 +□ テスト内容 +1. VMAP領域の仮想アドレス回収漏れはプログラム起動時、および、procfsアクセス時 + に存在する。VMAP領域は1M(1024×1024)ページ分の仮想アドレスが割り当て可能 + なので、以下を1M回以上繰り返しても問題が発生しないことを確認する。 +C1024T01 mcexec sleep 0を1M回以上行う +C1024T02 /proc/self/mapsのreadを1M回以上行う + +2. 変更がVMAP領域を使用する他の処理に影響ないことをLTPを用いて確認する。 +C1024T03 process_vm_readv02 (process_vm_readvの処理でVMAPを使用) +C1024T04 process_vm_readv03 (process_vm_readvの処理でVMAPを使用) +C1024T05 process_vm_writev02 (process_vm_writevの処理でVMAPを使用) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1024.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1027/C1027.sh b/test/issues/1027/C1027.sh new file mode 100644 index 00000000..493ee55b --- /dev/null +++ b/test/issues/1027/C1027.sh @@ -0,0 +1,85 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_yield -n 0 +echo "*** RT_$tid: PASSED" +echo "" + +tid=001 +echo "*** LT_$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/fork01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=002 +echo "*** LT_$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/fork02 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=003 +echo "*** LT_$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/fork03 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=004 +echo "*** LT_$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/fork04 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=005 +echo "*** LT_$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/fork07 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=006 +echo "*** LT_$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/fork08 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + diff --git a/test/issues/1027/Makefile b/test/issues/1027/Makefile new file mode 100644 index 00000000..0a8e41d7 --- /dev/null +++ b/test/issues/1027/Makefile @@ -0,0 +1,10 @@ +CC = gcc + +all: $(TARGET) + +test: all + @sh ./C1027.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1027/README b/test/issues/1027/README new file mode 100644 index 00000000..19c59877 --- /dev/null +++ b/test/issues/1027/README @@ -0,0 +1,44 @@ +【Issue#1027 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-sched_yield.000 + sched_yield()によるスケジューリングが行われ、 + - [child] End process + - [parent] End process + の順に出力されることを確認 + +2. 既存のfork機能に影響がないことをLTPを用いて確認 +LT_001: ltp-fork01 + fork後に子プロセスのプロセスIDが正しいことを確認 (TPASS 2件) + +LT_002: ltp-fork02 + fork後にwaitを行い、waitが子プロセスのPIDを返却することを確認(TPASS 1件) + +LT_003: ltp-fork03 + 子プロセスがfork後に計算を行えること、また子プロセスでのfork返却値が0で + あることを確認 (TPASS 1件) + +LT_004: ltp-fork04 + forkで生成した子プロセスが環境変数を変更しても、親プロセス側の + 環境変数に変化が無いことを確認 (TPASS 3件) + +LT_005: ltp-fork07 + forkした子プロセスに、親プロセスからファイルディスクリプタを + 引き継いでいることを確認 (TPASS 1件) + +LT_006: ltp-fork08 + forkした複数の子プロセスが、それぞれ親プロセスから引き継いだファイル + ディスクリプタを別個に操作できることを確認 + (ある子プロセスがcloseしても別な子プロセスがI/O可能) (TPASS 4件) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1027/result.log b/test/issues/1027/result.log new file mode 100644 index 00000000..1c9fef94 --- /dev/null +++ b/test/issues/1027/result.log @@ -0,0 +1,58 @@ +*** RT_001 start ******************************* +TEST_SUITE: sched_yield +TEST_NUMBER: 0 +ARGS: +[child] running core 1 +[parent] running core 0 +[parent] child process migrate/bind to core 0 +[parent] parent process bind to core 0 +[parent] send sched_yield. +[child] before migrate prevcore 1, nowcore 0 +[child] End process. +[parent] End process. +RESULT: check end order, [end child] -> [end parent] +*** RT_001: PASSED + +*** LT_001 start ******************************* +fork01 1 TPASS : fork() returned 18705 +fork01 2 TPASS : child pid and fork() return agree: 18705 +*** LT_001: PASSED (ok:2) + +*** LT_002 start ******************************* +fork02 0 TINFO : Inside parent +fork02 0 TINFO : exit status of wait 0 +fork02 1 TPASS : test 1 PASSED +*** LT_002: PASSED (ok:1) + +*** LT_003 start ******************************* +fork03 0 TINFO : process id in parent of child from fork : 18843 +fork03 1 TPASS : test 1 PASSED +*** LT_003: PASSED (ok:1) + +*** LT_004 start ******************************* +fork04 1 TPASS : Env var TERM unchanged after fork(): xterm-256color +fork04 2 TPASS : Env var NoTSetzWq unchanged after fork(): getenv() does not find variable set +fork04 3 TPASS : Env var TESTPROG unchanged after fork(): FRKTCS04 +*** LT_004: PASSED (ok:3) + +*** LT_005 start ******************************* +fork07 0 TINFO : Forking 100 children +fork07 0 TINFO : Forked all 100 children, now collecting +fork07 0 TINFO : Collected all 100 children +fork07 1 TPASS : 100/100 children read correctly from an inheritted fd +*** LT_005: PASSED (ok:1) + +*** LT_006 start ******************************* +fork08 0 TINFO : parent forksval: 1 +fork08 0 TINFO : second child got char: b +fork08 1 TPASS : Test passed in childnumber 2 +fork08 0 TINFO : parent forksval: 1 +fork08 0 TINFO : parent forksval: 2 +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 1 TPASS : parent test PASSED +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 2 TPASS : parent test PASSED +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 3 TPASS : parent test PASSED +fork08 0 TINFO : Number of processes forked is 2 +*** LT_006: PASSED (ok:4) diff --git a/test/issues/1031/C1031.sh b/test/issues/1031/C1031.sh new file mode 100644 index 00000000..0b0b387d --- /dev/null +++ b/test/issues/1031/C1031.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_${tid} start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s rt_sigaction -n 4 +echo "*** RT_${tid}: CHECK \"Terminate by signal 10\"" +echo "" + +sudo ${MCEXEC} ./CT_001 +sudo ${MCEXEC} ./CT_002 +sudo ${MCEXEC} ./CT_003 +sudo ${MCEXEC} ./CT_004 +sudo ${MCEXEC} ./CT_005 + diff --git a/test/issues/1031/CT_001.c b/test/issues/1031/CT_001.c new file mode 100644 index 00000000..80fcd001 --- /dev/null +++ b/test/issues/1031/CT_001.c @@ -0,0 +1,64 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_001" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int pid = 0; + int status; + int tmp_flag = 0; + struct sigaction sa; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + pid = fork(); + CHKANDJUMP(pid == -1, "fork"); + + if (pid == 0) { /* child */ + sa.sa_handler = test_handler; + sa.sa_flags = SA_RESETHAND; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND"); + + printf(" send 1st SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 1, "invoked test_handler"); + printf(" send 2nd SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(1, "can't reach here"); + } else { /* parent */ + rc = waitpid(pid, &status, 0); + CHKANDJUMP(rc == -1, "waitpid"); + + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) == SIGUSR1) { + tmp_flag = 1; + } + } + OKNG(tmp_flag != 1, "child is killed by SIGUSR1"); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1031/CT_002.c b/test/issues/1031/CT_002.c new file mode 100644 index 00000000..4bd3db71 --- /dev/null +++ b/test/issues/1031/CT_002.c @@ -0,0 +1,66 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_002" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int pid = 0; + int status; + int tmp_flag = 0; + struct sigaction sa; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + pid = fork(); + CHKANDJUMP(pid == -1, "fork"); + + if (pid == 0) { /* child */ + sa.sa_handler = test_handler; + sa.sa_flags = 0; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction (no SA_RESETHAND)"); + + printf(" send 1st SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 1, "invoked test_handler"); + printf(" send 2nd SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 2, "invoked test_handler again"); + _exit(123); + } else { /* parent */ + rc = waitpid(pid, &status, 0); + CHKANDJUMP(rc == -1, "waitpid"); + + if (!WIFSIGNALED(status) && + WIFEXITED(status)) { + if (WEXITSTATUS(status) == 123) { + tmp_flag = 1; + } + } + OKNG(tmp_flag != 1, "child exited normaly"); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1031/CT_003.c b/test/issues/1031/CT_003.c new file mode 100644 index 00000000..84897ef0 --- /dev/null +++ b/test/issues/1031/CT_003.c @@ -0,0 +1,70 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_003" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int pid = 0; + int status; + int tmp_flag = 0; + struct sigaction sa; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + pid = fork(); + CHKANDJUMP(pid == -1, "fork"); + + if (pid == 0) { /* child */ + sa.sa_handler = test_handler; + sa.sa_flags |= SA_RESETHAND; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND"); + + sa.sa_flags = 0; + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction (rewrite no SA_RESETHAND)"); + + printf(" send 1st SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 1, "invoked test_handler"); + printf(" send 2nd SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 2, "invoked test_handler again"); + _exit(123); + } else { /* parent */ + rc = waitpid(pid, &status, 0); + CHKANDJUMP(rc == -1, "waitpid"); + + if (!WIFSIGNALED(status) && + WIFEXITED(status)) { + if (WEXITSTATUS(status) == 123) { + tmp_flag = 1; + } + } + OKNG(tmp_flag != 1, "child exited normaly"); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1031/CT_004.c b/test/issues/1031/CT_004.c new file mode 100644 index 00000000..9c3b9b49 --- /dev/null +++ b/test/issues/1031/CT_004.c @@ -0,0 +1,83 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_004" + +int handled_cnt; +int handled_cnt2; + +void test_handler(int sig) +{ + handled_cnt++; +} + +void test_handler2(int sig) +{ + handled_cnt2++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int pid = 0; + int status; + int tmp_flag = 0; + struct sigaction sa; + struct sigaction sa2; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + handled_cnt2 = 0; + + pid = fork(); + CHKANDJUMP(pid == -1, "fork"); + + if (pid == 0) { /* child */ + sa.sa_handler = test_handler; + sa.sa_flags |= SA_RESETHAND; + + sa2.sa_handler = test_handler2; + sa2.sa_flags |= SA_RESETHAND; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND to SIGUSR1"); + + rc = sigaction(SIGUSR2, &sa2, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND to SIGUSR2"); + + printf(" send 1st SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 1, "invoked test_handler"); + + printf(" send 1st SIGUSR2\n"); + kill(getpid(), SIGUSR2); + OKNG(handled_cnt2 != 1, "invoked test_handler2"); + + printf(" send 2nd SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(1, "can't reach here"); + } else { /* parent */ + rc = waitpid(pid, &status, 0); + CHKANDJUMP(rc == -1, "waitpid"); + + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) == SIGUSR1) { + tmp_flag = 1; + } + } + OKNG(tmp_flag != 1, "child is killed by SIGUSR1"); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1031/CT_005.c b/test/issues/1031/CT_005.c new file mode 100644 index 00000000..248a3fc7 --- /dev/null +++ b/test/issues/1031/CT_005.c @@ -0,0 +1,72 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_005" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int pid = 0; + int status; + int tmp_flag = 0; + struct sigaction sa; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + pid = fork(); + CHKANDJUMP(pid == -1, "fork"); + + if (pid == 0) { /* child */ + sa.sa_handler = test_handler; + sa.sa_flags = SA_RESETHAND; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND"); + + printf(" send 1st SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 1, "invoked test_handler"); + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND again"); + + printf(" send 2nd SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 2, "invoked test_handler again"); + + printf(" send 3rd SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(1, "can't reach here"); + } else { /* parent */ + rc = waitpid(pid, &status, 0); + CHKANDJUMP(rc == -1, "waitpid"); + + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) == SIGUSR1) { + tmp_flag = 1; + } + } + OKNG(tmp_flag != 1, "child is killed by SIGUSR1"); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1031/Makefile b/test/issues/1031/Makefile new file mode 100644 index 00000000..cf54860a --- /dev/null +++ b/test/issues/1031/Makefile @@ -0,0 +1,29 @@ +CC = gcc +TARGET=CT_001 CT_002 CT_003 CT_004 CT_005 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_003: CT_003.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_004: CT_004.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_005: CT_005.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1031.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1031/README b/test/issues/1031/README new file mode 100644 index 00000000..6f5871c3 --- /dev/null +++ b/test/issues/1031/README @@ -0,0 +1,60 @@ +【Issue#1031 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-rt_sigaction.004 による確認 + SIGUSR1 でプロセスが終了し、「Terminate by signal 10」が出力されることを確認する + +2. 既存のsigaction機能に影響がないことを確認 +CT_001: SIG_RESETHAND 指定時の動作 + 1. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 2. 自身にSIGUSR1を送る + 3. 1.で登録したハンドラが呼び出される + 4. 自身にSIGUSR1を送る + 5. 1.で登録したハンドラが呼び出されず、プロセスが終了する + +CT_002: SIG_RESETHAND 未指定時の動作 + 1. SIG_RESETHANDを指定しないsigaction()でSIGUSR1にハンドラを設定 + 2. 自身にSIGUSR1を送る + 3. 1.で登録したハンドラが呼び出される + 4. 自身にSIGUSR1を送る + 5. 1.で登録したハンドラが呼び出される + +CT_003: SIG_RESETHANDO 指定ハンドラへの上書き登録時の動作 + 1. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 2. SIG_RESETHANDを指定しないsigaction()でSIG_USR1にハンドラを設定 + 3. 自身にSIGUSR1を送る + 4. 2.で登録したハンドラが呼び出される + 5. 自身にSIGUSR1を送る + 6. 2.で登録したハンドラが呼び出される + +CT_004: 複数のsig_numへのハンドラ登録時の動作 + 1. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 2. SIG_RESETHANDを指定したsigaction()でSIG_USR2にハンドラを設定 + 3. 自身にSIGUSR1を送る + 4. 1.で登録したハンドラが呼び出される + 5. 自身にSIGUSR2を送る + 6. 2.で登録したハンドラが呼び出される + 7. 自身にSIGUSR1を送る + 8. 1.で登録したハンドラが呼び出されず、プロセスが終了する + +CT_005: 複数回(非上書き)のSIG_RESETHAND 指定時の動作 + 1. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 2. 自身にSIGUSR1を送る + 3. 1.で登録したハンドラが呼び出される + 4. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 5. 自身にSIGUSR1を送る + 6. 4.で登録したハンドラが呼び出される + 7. 自身にSIGUSR1を送る + 8. 4.で登録したハンドラが呼び出されず、プロセスが終了する + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1031/result.log b/test/issues/1031/result.log new file mode 100644 index 00000000..c966359e --- /dev/null +++ b/test/issues/1031/result.log @@ -0,0 +1,58 @@ +*** RT_001 start ******************************* +TEST_SUITE: rt_sigaction +TEST_NUMBER: 4 +ARGS: +/-------- Signal handler will activate -------/ +sig#10 is handled. +/------ Process will terminate by signal -----/ +Terminate by signal 10 +*** RT_001: CHECK "Terminate by signal 10" + +*** CT_001 start ******************************* + [OK] sigaction with SA_RESETHAND + send 1st SIGUSR1 + [OK] invoked test_handler + send 2nd SIGUSR1 + [OK] child is killed by SIGUSR1 +*** CT_001 PASSED + +*** CT_002 start ******************************* + [OK] sigaction (no SA_RESETHAND) + send 1st SIGUSR1 + [OK] invoked test_handler + send 2nd SIGUSR1 + [OK] invoked test_handler again + [OK] child exited normaly +*** CT_002 PASSED + +*** CT_003 start ******************************* + [OK] sigaction with SA_RESETHAND + [OK] sigaction (rewrite no SA_RESETHAND) + send 1st SIGUSR1 + [OK] invoked test_handler + send 2nd SIGUSR1 + [OK] invoked test_handler again + [OK] child exited normaly +*** CT_003 PASSED + +*** CT_004 start ******************************* + [OK] sigaction with SA_RESETHAND to SIGUSR1 + [OK] sigaction with SA_RESETHAND to SIGUSR2 + send 1st SIGUSR1 + [OK] invoked test_handler + send 1st SIGUSR2 + [OK] invoked test_handler2 + send 2nd SIGUSR1 + [OK] child is killed by SIGUSR1 +*** CT_004 PASSED + +*** CT_005 start ******************************* + [OK] sigaction with SA_RESETHAND + send 1st SIGUSR1 + [OK] invoked test_handler + [OK] sigaction with SA_RESETHAND again + send 2nd SIGUSR1 + [OK] invoked test_handler again + send 3rd SIGUSR1 + [OK] child is killed by SIGUSR1 +*** CT_005 PASSED diff --git a/test/issues/1031/test_chk.h b/test/issues/1031/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1031/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1036/C1036.sh b/test/issues/1036/C1036.sh new file mode 100644 index 00000000..7ec6079e --- /dev/null +++ b/test/issues/1036/C1036.sh @@ -0,0 +1,71 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=0 + +. ../../common.sh + +strace -f -c -o ./CT_001.strc $BINDIR/mcexec ./CT_001 + +tid=002 +echo "*** CT_$tid start *******************************" +echo "* Check syscall_time is not delegated to mcexec" +echo "* Result of strace -f -c (expect time is NOT contained)" +cat ./CT_001.strc + +grep -e "time$" ./CT_001.strc &> /dev/null +if [ $? != 0 ]; then + echo "*** CT_$tid: PASSED" +else + echo "*** CT_$tid: FAILED" +fi +echo "" + +tid=001 +echo "*** LT_$tid start *******************************" +$BINDIR/mcexec $LTPDIR/bin/time01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=002 +echo "*** LT_$tid start *******************************" +$BINDIR/mcexec $LTPDIR/bin/time02 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=003 +echo "*** LT_$tid start *******************************" +$BINDIR/mcexec $LTPDIR/bin/gettimeofday01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=004 +echo "*** LT_$tid start *******************************" +$BINDIR/mcexec $LTPDIR/bin/gettimeofday02 2>&1 | tee ./LT_${tid}.txt +ok=`grep PASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + diff --git a/test/issues/1036/CT_001.c b/test/issues/1036/CT_001.c new file mode 100644 index 00000000..f31e89d9 --- /dev/null +++ b/test/issues/1036/CT_001.c @@ -0,0 +1,47 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "./test_chk.h" + +#define TEST_NAME "CT_001" + +int main(int argc, char *argv[]) +{ + time_t now; + long sys_ret; + int ng_flag = 0; + + printf("*** %s start *******************************\n", TEST_NAME); + + /* get seconds since the Epoch by glibc time() */ + now = time(NULL); + + /* get seconds since the Epoch by syscall_time */ + sys_ret = syscall(__NR_time, NULL); + + if (now != sys_ret) { + /* check again only once */ + now = time(NULL); + if (now != sys_ret) { + ng_flag = 1; + } + } + printf("glibc time(): %ld seconds\n", now); + printf("sys_time : %ld seconds\n", sys_ret); + + OKNG(ng_flag != 0, "check seconds since the Epoch"); + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} + diff --git a/test/issues/1036/Makefile b/test/issues/1036/Makefile new file mode 100644 index 00000000..75685285 --- /dev/null +++ b/test/issues/1036/Makefile @@ -0,0 +1,17 @@ +CC = gcc +TARGET=CT_001 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1036.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1036/README b/test/issues/1036/README new file mode 100644 index 00000000..f1bb9822 --- /dev/null +++ b/test/issues/1036/README @@ -0,0 +1,32 @@ +【Issue#1036 動作確認】 +□ テスト内容 +1. Issueで報告された要望が実現されていることの確認 +CT_001: timeシステムコールで取得される秒数の確認 + timeシステムコールで取得された秒数と、glibcのtime()関数で + 取得された秒数が等しいことを確認する +CT_002: timeシステムコールがLinuxに移譲されないことを確認 + straceコマンドを用いて、mcexecがtimeシステムコールを + 呼び出していないことを確認する + +2. 既存のtime機能に影響がないことをLTPを用いて確認 +LT_001: ltp-syscall_time01 + time(2)の基本動作の確認 +LT_002: ltp-syscall_time02 + time(2)の返り値と、引数に指定した領域に格納される値が一致することを確認 +LT_003: ltp-syscall_gettimeofday01 + 不正な引数を指定してgettimeofday(2)を実行した場合、エラー値が返り、 + errnoにEFAULTが設定されることを確認 +LT_004: ltp-syscall_gettimeofday02 + gettimeofday(2)で取得される時刻が増加し続けることを確認する + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1036/result.log b/test/issues/1036/result.log new file mode 100644 index 00000000..6156cccb --- /dev/null +++ b/test/issues/1036/result.log @@ -0,0 +1,73 @@ +*** CT_001 start ******************************* +glibc time(): 1536804078 seconds +sys_time : 1536804078 seconds + [OK] check seconds since the Epoch +*** CT_001 PASSED + +*** CT_002 start ******************************* +* Check syscall_time is not delegated to mcexec +* Result of strace -f -c (expect time is NOT contained) +% time seconds usecs/call calls errors syscall +------ ----------- ----------- --------- --------- ---------------- + 58.26 0.033991 1133 30 futex + 31.92 0.018621 71 264 ioctl + 4.48 0.002614 7 392 356 open + 1.44 0.000839 6 146 108 stat + 0.84 0.000491 5 97 mmap + 0.80 0.000468 6 81 read + 0.51 0.000299 5 66 mprotect + 0.26 0.000151 5 32 set_robust_list + 0.21 0.000120 4 30 gettid + 0.19 0.000110 3 35 fstat + 0.17 0.000099 3 33 close + 0.14 0.000079 16 5 3 access + 0.12 0.000071 2 30 clone + 0.10 0.000057 3 19 getrlimit + 0.08 0.000048 5 10 munmap + 0.07 0.000043 0 122 rt_sigaction + 0.06 0.000033 7 5 write + 0.05 0.000032 6 5 1 openat + 0.05 0.000027 3 9 lseek + 0.05 0.000027 3 8 getdents + 0.03 0.000019 3 6 brk + 0.03 0.000018 4 5 personality + 0.03 0.000017 17 1 mlock + 0.03 0.000016 16 1 epoll_ctl + 0.02 0.000011 6 2 execve + 0.01 0.000007 7 1 pread + 0.01 0.000007 4 2 arch_prctl + 0.01 0.000006 6 1 lstat + 0.01 0.000004 2 2 rt_sigprocmask + 0.01 0.000004 4 1 getcwd + 0.01 0.000004 2 2 sched_getaffinity + 0.01 0.000003 3 1 getpgid + 0.01 0.000003 2 2 set_tid_address + 0.00 0.000000 0 1 epoll_create + 0.00 0.000000 0 1 eventfd2 +------ ----------- ----------- --------- --------- ---------------- +100.00 0.058339 1448 468 total +*** CT_002: PASSED + +*** LT_001 start ******************************* +time01 1 TPASS : time(0) returned 1536804078 +*** LT_001: PASSED (ok:1) + +*** LT_002 start ******************************* +time02 1 TPASS : time() returned value 1536804078, stored value 1536804078 are same +*** LT_002: PASSED (ok:1) + +*** LT_003 start ******************************* +gettimeofday01 1 TPASS : gettimeofday(2) set the errno EFAULT correctly +*** LT_003: PASSED (ok:1) + +*** LT_004 start ******************************* +tst_test.c:934: INFO: Timeout per run is 0h 05m 00s +gettimeofday02.c:89: INFO: gettimeofday() called 7968855 times +gettimeofday02.c:90: PASS: gettimeofday() monotonous in 10 seconds + +Summary: +passed 1 +failed 0 +skipped 0 +warnings 0 +*** LT_004: PASSED (ok:0) diff --git a/test/issues/1036/test_chk.h b/test/issues/1036/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1036/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1039/C1039.sh b/test/issues/1039/C1039.sh new file mode 100755 index 00000000..b4c13ac4 --- /dev/null +++ b/test/issues/1039/C1039.sh @@ -0,0 +1,20 @@ +#!/bin/sh +USELTP=1 +USEOSTEST=0 + +BOOTPARAM="-c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1" +. ../../common.sh + +################################################################################ +for i in fork02:01 fork03:02 execve01:03 execve02:04 execve03:05 mmap12:06; do + tp=`echo $i|sed 's/:.*//'` + id=`echo $i|sed 's/.*://'` + sudo sh -c "PATH=$LTPBIN:$PATH $MCEXEC $LTPBIN/$tp" 2>&1 | tee $tp.txt + ok=`grep TPASS $tp.txt | wc -l` + ng=`grep TFAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** C1039T$id: $tp OK ($ok)" + else + echo "*** C1039T$id: $tp NG (ok=$ok ng=%ng)" + fi +done diff --git a/test/issues/1039/C1039.txt b/test/issues/1039/C1039.txt new file mode 100644 index 00000000..475764fd --- /dev/null +++ b/test/issues/1039/C1039.txt @@ -0,0 +1,37 @@ +Script started on Mon Aug 27 11:54:38 2018 +bash-4.2$ make test +sh ./C1039.sh +fork02 0 TINFO : Inside parent +fork02 0 TINFO : Inside child +fork02 0 TINFO : exit status of wait 0 +fork02 1 TPASS : test 1 PASSED +*** C1039T01: fork02 OK (1) +fork03 0 TINFO : process id in parent of child from fork : 29001 +fork03 1 TPASS : test 1 PASSED +*** C1039T02: fork03 OK (1) +execl01_child 1 TPASS : execve01_child executed +execve01 0 TINFO : Child process returned TPASS +*** C1039T03: execve01 OK (2) +Error: Failed to open execve_child +execve(): error loading ELF for file execve_child +execve02 1 TPASS : execve() failed expectedly: TEST_ERRNO=EACCES(13): Permission denied +execve02 0 TINFO : Child process returned TPASS +*** C1039T04: execve02 OK (2) +execve03 1 TPASS : execve failed as expected: TEST_ERRNO=ENAMETOOLONG(36): File name too long +execve03 2 TPASS : execve failed as expected: TEST_ERRNO=ENOENT(2): No such file or directory +lookup_exec_path: lookup_exec_path(): error stat +execve03 3 TPASS : execve failed as expected: TEST_ERRNO=ENOTDIR(20): Not a directory +execve03 4 TPASS : execve failed as expected: TEST_ERRNO=EFAULT(14): Bad address +Error: /tmp/exe0BQ0Va/fake.29225 is not an executable?, errno: 13 +execve(): error loading ELF for file /tmp/exe0BQ0Va/fake.29225 +execve03 5 TPASS : execve failed as expected: TEST_ERRNO=EACCES(13): Permission denied +Error: file /tmp/exe0BQ0Va/execve03.29225 is zero length +execve(): error loading ELF for file /tmp/exe0BQ0Va/execve03.29225 +execve03 6 TPASS : execve failed as expected: TEST_ERRNO=ENOEXEC(8): Exec format error +*** C1039T05: execve03 OK (6) +mmap12 1 TPASS : Functionality of mmap() successful +*** C1039T06: mmap12 OK (1) +bash-4.2$ exit +exit + +Script done on Mon Aug 27 11:54:58 2018 diff --git a/test/issues/1039/Makefile b/test/issues/1039/Makefile new file mode 100644 index 00000000..e887268d --- /dev/null +++ b/test/issues/1039/Makefile @@ -0,0 +1,10 @@ +CC=gcc +TARGET= + +all:: $(TARGET) + +test:: $(TARGET) + sh ./C1039.sh + +clean:: + rm -f *.o $(TARGET) diff --git a/test/issues/1039/README b/test/issues/1039/README new file mode 100644 index 00000000..fb6485e0 --- /dev/null +++ b/test/issues/1039/README @@ -0,0 +1,26 @@ +【Issue#1039 動作確認】 +□ テスト内容 +調査の結果、Issue#1039の指摘は本来発生しない現象なことが判明し、 +Issue#1039向けパッチ(POSTK_TEMP_FIX_14)は不要なため、本パッチの除去を行った。 + +このため、パッチ除去により動作に影響が無いことをLTPを用いて確認した。 +page faultのcopy on write処理が呼び出される以下のテストプログラムを選定した。 + +C1039T01 fork02 +C1039T02 fork03 +C1039T03 execve01 +C1039T04 execve02 +C1039T05 execve03 +C1039T06 mmap12 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1039.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1050/C1050.sh b/test/issues/1050/C1050.sh new file mode 100644 index 00000000..6e3ffb85 --- /dev/null +++ b/test/issues/1050/C1050.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s setrlimit -n 15 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +sudo ${MCEXEC} ./CT_001 +sudo ${MCEXEC} ./CT_002 +sudo ${MCEXEC} ./CT_003 +sudo ${MCEXEC} ./CT_004 diff --git a/test/issues/1050/CT_001.c b/test/issues/1050/CT_001.c new file mode 100644 index 00000000..391a50c9 --- /dev/null +++ b/test/issues/1050/CT_001.c @@ -0,0 +1,66 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_001" + +int main(int argc, char **argv) +{ + struct rlimit get_rlim, set_rlim; + int rc, resource; + int __errno; + rlim_t set_max, set_cur; + + + printf("*** %s start *******************************\n", TEST_NAME); + CHKANDJUMP(geteuid() != 0, "Test needs to be run as root"); + + resource = RLIMIT_NPROC; + rc = getrlimit(resource, &get_rlim); + + OKNG(rc != 0, "getrlimit cur:%lx max:%lx", + get_rlim.rlim_cur, get_rlim.rlim_max); + + set_max = get_rlim.rlim_max - 10; + if (get_rlim.rlim_cur > set_max) { + set_cur = set_max; + } + else { + set_cur = get_rlim.rlim_cur; + } + + set_rlim.rlim_cur = set_cur; + set_rlim.rlim_max = set_max; + + errno = 0; + rc = setrlimit(resource, &set_rlim); + __errno = errno; + + OKNG(rc != 0, "setrlimit cur:%lx max:%lx returned %d" + " (expect return is 0)", + set_rlim.rlim_cur, set_rlim.rlim_max, rc); + + OKNG(__errno != 0, "errno after setrlimit :%d" + " (expect error is 0)", __errno); + + rc = getrlimit(resource, &get_rlim); + OKNG(get_rlim.rlim_max != set_max, "getrlimit cur:%lx max:%lx" + " (expect max is %lx)", + get_rlim.rlim_cur, get_rlim.rlim_max, set_max); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1050/CT_002.c b/test/issues/1050/CT_002.c new file mode 100644 index 00000000..09a6dc70 --- /dev/null +++ b/test/issues/1050/CT_002.c @@ -0,0 +1,66 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_002" + +int main(int argc, char **argv) +{ + struct rlimit get_rlim, set_rlim; + int rc, resource; + int __errno; + rlim_t set_max, set_cur; + + + printf("*** %s start *******************************\n", TEST_NAME); + CHKANDJUMP(geteuid() != 0, "Test needs to be run as root"); + + resource = RLIMIT_NPROC; + rc = getrlimit(resource, &get_rlim); + + OKNG(rc != 0, "getrlimit cur:%lx max:%lx", + get_rlim.rlim_cur, get_rlim.rlim_max); + + set_max = get_rlim.rlim_max + 10; + if (get_rlim.rlim_cur > set_max) { + set_cur = set_max; + } + else { + set_cur = get_rlim.rlim_cur; + } + + set_rlim.rlim_cur = set_cur; + set_rlim.rlim_max = set_max; + + errno = 0; + rc = setrlimit(resource, &set_rlim); + __errno = errno; + + OKNG(rc != 0, "setrlimit cur:%lx max:%lx returned %d" + " (expect return is 0)", + set_rlim.rlim_cur, set_rlim.rlim_max, rc); + + OKNG(__errno != 0, "errno after setrlimit :%d" + " (expect error is 0)", __errno); + + rc = getrlimit(resource, &get_rlim); + OKNG(get_rlim.rlim_max != set_max, "getrlimit cur:%lx max:%lx" + " (expect max is %lx)", + get_rlim.rlim_cur, get_rlim.rlim_max, set_max); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1050/CT_003.c b/test/issues/1050/CT_003.c new file mode 100644 index 00000000..e54ecf94 --- /dev/null +++ b/test/issues/1050/CT_003.c @@ -0,0 +1,66 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_003" + +int main(int argc, char **argv) +{ + struct rlimit get_rlim, set_rlim; + int rc, resource; + int __errno; + rlim_t set_max, set_cur; + + + printf("*** %s start *******************************\n", TEST_NAME); + CHKANDJUMP(geteuid() != 0, "Test needs to be run as root"); + + resource = RLIMIT_MSGQUEUE; + rc = getrlimit(resource, &get_rlim); + + OKNG(rc != 0, "getrlimit cur:%lx max:%lx", + get_rlim.rlim_cur, get_rlim.rlim_max); + + set_max = get_rlim.rlim_max - 10; + if (get_rlim.rlim_cur > set_max) { + set_cur = set_max; + } + else { + set_cur = get_rlim.rlim_cur; + } + + set_rlim.rlim_cur = set_cur; + set_rlim.rlim_max = set_max; + + errno = 0; + rc = setrlimit(resource, &set_rlim); + __errno = errno; + + OKNG(rc != 0, "setrlimit cur:%lx max:%lx returned %d" + " (expect return is 0)", + set_rlim.rlim_cur, set_rlim.rlim_max, rc); + + OKNG(__errno != 0, "errno after setrlimit :%d" + " (expect error is 0)", __errno); + + rc = getrlimit(resource, &get_rlim); + OKNG(get_rlim.rlim_max != set_max, "getrlimit cur:%lx max:%lx" + " (expect max is %lx)", + get_rlim.rlim_cur, get_rlim.rlim_max, set_max); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1050/CT_004.c b/test/issues/1050/CT_004.c new file mode 100644 index 00000000..a23245ba --- /dev/null +++ b/test/issues/1050/CT_004.c @@ -0,0 +1,66 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_004" + +int main(int argc, char **argv) +{ + struct rlimit get_rlim, set_rlim; + int rc, resource; + int __errno; + rlim_t set_max, set_cur; + + + printf("*** %s start *******************************\n", TEST_NAME); + CHKANDJUMP(geteuid() != 0, "Test needs to be run as root"); + + resource = RLIMIT_MSGQUEUE; + rc = getrlimit(resource, &get_rlim); + + OKNG(rc != 0, "getrlimit cur:%lx max:%lx", + get_rlim.rlim_cur, get_rlim.rlim_max); + + set_max = get_rlim.rlim_max + 10; + if (get_rlim.rlim_cur > set_max) { + set_cur = set_max; + } + else { + set_cur = get_rlim.rlim_cur; + } + + set_rlim.rlim_cur = set_cur; + set_rlim.rlim_max = set_max; + + errno = 0; + rc = setrlimit(resource, &set_rlim); + __errno = errno; + + OKNG(rc != 0, "setrlimit cur:%lx max:%lx returned %d" + " (expect return is 0)", + set_rlim.rlim_cur, set_rlim.rlim_max, rc); + + OKNG(__errno != 0, "errno after setrlimit :%d" + " (expect error is 0)", __errno); + + rc = getrlimit(resource, &get_rlim); + OKNG(get_rlim.rlim_max != set_max, "getrlimit cur:%lx max:%lx" + " (expect max is %lx)", + get_rlim.rlim_cur, get_rlim.rlim_max, set_max); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1050/Makefile b/test/issues/1050/Makefile new file mode 100644 index 00000000..5afa4b35 --- /dev/null +++ b/test/issues/1050/Makefile @@ -0,0 +1,26 @@ +CC = gcc +TARGET=CT_001 CT_002 CT_003 CT_004 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_003: CT_003.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_004: CT_004.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1050.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1050/README b/test/issues/1050/README new file mode 100644 index 00000000..bdba0884 --- /dev/null +++ b/test/issues/1050/README @@ -0,0 +1,52 @@ +【Issue#1050 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-setrlimit.015 + rlim_max(ハードリミット)を超えるrlim_cur(ソフトリミット)を指定した場合、 + -1が返り、errnoにEINVALが設定されることを確認 + +2. 既存のset/get_rlimit機能に影響がないことをテストプログラムを用いて確認 + なお、以下のテストプログラムは特権ユーザ権限で実行する +CT_001: Linuxに処理移譲しないresource指定で、rlim_maxを減少させる操作の確認 + RLIMIT_NPROC をresourceに指定し、以下を確認 + 1. getrlimitで、現在のrlim_max の値を取得 + 2. setrlimitで、現在のrlim_max - 10 の値を設定 + 3. setrlimitが成功し、0を返すことを確認 + 4. errnoが変更されていないことを確認 + 5. getrlimitで、変更後の値が設定されていることを確認 + +CT_002: Linuxに処理移譲しないresource指定で、rlim_maxを増加させる操作の確認 + RLIMIT_NPROC をresourceに指定し、以下を確認 + 1. getrlimitで、現在のrlim_max の値を取得 + 2. setrlimitで、現在のrlim_max + 10 の値を設定 + 3. setrlimitが成功し、0を返すことを確認 + 4. errnoが変更されていないことを確認 + 5. getrlimitで、値が変更されていることを確認 + +CT_003: Linuxに処理移譲するresource指定で、rlim_maxを減少させる操作の確認 + RLIMIT_MSGQUEUE をresourceに指定し、以下を確認 + 1. getrlimitで、現在のrlim_max の値を取得 + 2. setrlimitで、現在のrlim_max - 10 の値を設定 + 3. setrlimitが成功し、0を返すことを確認 + 4. errnoが変更されていないことを確認 + 5. getrlimitで、変更後の値が設定されていることを確認 + +CT_004: Linuxに処理移譲するresource指定で、rlim_maxを増加させる操作の確認 + RLIMIT_MSGQUEUE をresourceに指定し、以下を確認 + 1. getrlimitで、現在のrlim_max の値を取得 + 2. setrlimitで、現在のrlim_max + 10 の値を設定 + 3. setrlimitが成功し、0を返すことを確認 + 4. errnoが変更されていないことを確認 + 5. getrlimitで、値が変更されていることを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1050/result.log b/test/issues/1050/result.log new file mode 100644 index 00000000..d0f1a3fc --- /dev/null +++ b/test/issues/1050/result.log @@ -0,0 +1,35 @@ +*** RT_001 start ******************************* +TEST_SUITE: setrlimit +TEST_NUMBER: 15 +ARGS: +setrlimit() = -1, errno = 22 +RESULT: ok +*** RT_001: PASSED + +*** CT_001 start ******************************* + [OK] getrlimit cur:3eb1e max:3eb1e + [OK] setrlimit cur:3eb14 max:3eb14 returned 0 (expect return is 0) + [OK] errno after setrlimit :0 (expect error is 0) + [OK] getrlimit cur:3eb14 max:3eb14 (expect max is 3eb14) +*** CT_001 PASSED + +*** CT_002 start ******************************* + [OK] getrlimit cur:3eb1e max:3eb1e + [OK] setrlimit cur:3eb1e max:3eb28 returned 0 (expect return is 0) + [OK] errno after setrlimit :0 (expect error is 0) + [OK] getrlimit cur:3eb1e max:3eb28 (expect max is 3eb28) +*** CT_002 PASSED + +*** CT_003 start ******************************* + [OK] getrlimit cur:c8000 max:c8000 + [OK] setrlimit cur:c7ff6 max:c7ff6 returned 0 (expect return is 0) + [OK] errno after setrlimit :0 (expect error is 0) + [OK] getrlimit cur:c7ff6 max:c7ff6 (expect max is c7ff6) +*** CT_003 PASSED + +*** CT_004 start ******************************* + [OK] getrlimit cur:c8000 max:c8000 + [OK] setrlimit cur:c8000 max:c800a returned 0 (expect return is 0) + [OK] errno after setrlimit :0 (expect error is 0) + [OK] getrlimit cur:c8000 max:c800a (expect max is c800a) +*** CT_004 PASSED diff --git a/test/issues/1050/test_chk.h b/test/issues/1050/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1050/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1065/C1065.sh b/test/issues/1065/C1065.sh new file mode 100644 index 00000000..dc0b9d1c --- /dev/null +++ b/test/issues/1065/C1065.sh @@ -0,0 +1,163 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=0 + +. ../../common.sh + +tid=001 +echo "*** CT_${tid} start *******************" +fail=0 +map_path=`realpath ./dummy_file` +real_path=`realpath ${map_path}` + +echo "file map: ${map_path}" + +${MCEXEC} ./file_map ${map_path} | tee ./CT_${tid}.txt +if [ X$? != X0 ]; then + fail=1 +fi + +echo "" +echo "** grep ${real_path} from maps" +grep -a -e "${real_path}$" ./CT_${tid}.txt + +if [ X$? = X0 ]; then + echo "[OK] ${real_path} is found" +else + echo "[NG] ${real_path} is not found" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=002 +echo "*** CT_${tid} start *******************" +fail=0 +map_path="./dummy_file" +real_path=`realpath ${map_path}` + +echo "file map: ${map_path}" + +${MCEXEC} ./file_map ${map_path} | tee ./CT_${tid}.txt +if [ X$? != X0 ]; then + fail=1 +fi + +echo "" +echo "** grep ${real_path} from maps" +grep -a -e "${real_path}$" ./CT_${tid}.txt + +if [ X$? = X0 ]; then + echo "[OK] ${real_path} is found" +else + echo "[NG] ${real_path} is not found" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=003 +echo "*** CT_${tid} start *******************" +fail=0 +map_path=`realpath ./lnk_to_dummy` +real_path=`realpath ${map_path}` + +echo "file map: ${map_path}" + +${MCEXEC} ./file_map ${map_path} | tee ./CT_${tid}.txt +if [ X$? != X0 ]; then + fail=1 +fi + +echo "" +echo "** grep ${real_path} from maps" +grep -a -e "${real_path}$" ./CT_${tid}.txt + +if [ X$? = X0 ]; then + echo "[OK] ${real_path} is found" +else + echo "[NG] ${real_path} is not found" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=004 +echo "*** CT_${tid} start *******************" +fail=0 +map_path="./lnk_to_dummy" +real_path=`realpath ${map_path}` + +echo "file map: ${map_path}" + +${MCEXEC} ./file_map ${map_path} | tee ./CT_${tid}.txt +if [ X$? != X0 ]; then + fail=1 +fi + +echo "" +echo "** grep ${real_path} from maps" +grep -a -e "${real_path}$" ./CT_${tid}.txt + +if [ X$? = X0 ]; then + echo "[OK] ${real_path} is found" +else + echo "[NG] ${real_path} is not found" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=005 +echo "*** CT_${tid} start *******************" +fail=0 +map_path="./dummy_file" + +echo "check [vdso], [stack]" + +${MCEXEC} ./file_map ${map_path} | tee ./CT_${tid}.txt +if [ X$? != X0 ]; then + fail=1 +fi + +echo "" +for tgt in "\[vdso\]" "\[stack\]" +do + echo "** grep ${tgt} from maps" + grep -a -e "${tgt}$" ./CT_${tid}.txt + + if [ X$? = X0 ]; then + echo "[OK] ${tgt} is found" + else + echo "[NG] ${tgt} is not found" + fail=1 + fi +done + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" diff --git a/test/issues/1065/CT_001.sh b/test/issues/1065/CT_001.sh deleted file mode 100755 index 81d6bc7e..00000000 --- a/test/issues/1065/CT_001.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_001 -arg_path=`realpath ./dummy_file` - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${arg_path}` - -echo "file map: ${arg_path}" - -${MCEXEC} ./file_map ${arg_path} | tee ./${TESTNAME}.log -if [ X$? != X0 ]; then - fail=1 -fi - -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1065/CT_002.sh b/test/issues/1065/CT_002.sh deleted file mode 100755 index 66cc58cb..00000000 --- a/test/issues/1065/CT_002.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_002 -arg_path=./dummy_file - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${arg_path}` - -echo "file map: ${arg_path}" - -${MCEXEC} ./file_map ${arg_path} | tee ./${TESTNAME}.log -if [ X$? != X0 ]; then - fail=1 -fi - -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1065/CT_003.sh b/test/issues/1065/CT_003.sh deleted file mode 100755 index 47b19f8d..00000000 --- a/test/issues/1065/CT_003.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_003 -arg_path=./lnk_to_dummy - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${arg_path}` - -echo "file map: ${arg_path}" - -${MCEXEC} ./file_map ${arg_path} | tee ./${TESTNAME}.log -if [ X$? != X0 ]; then - fail=1 -fi - -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1065/CT_004.sh b/test/issues/1065/CT_004.sh deleted file mode 100755 index 23dd66a5..00000000 --- a/test/issues/1065/CT_004.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_004 -test_program=./print_maps - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${test_program}` -interp_path=`readelf -l ${test_program} | grep "interpreter:" | sed -r 's/.*\[.*interpreter:\s(.*)\].*/\1/'` -interp_real_path=`realpath ${interp_path}` - -echo "exec : ${test_program}" - -${MCEXEC} ${test_program} | tee ./${TESTNAME}.log -if [ X$? != X0 ]; then - fail=1 -fi - -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -echo "" -echo "** grep ${interp_real_path} from maps" -grep -a -e "${interp_real_path}$" ./${TESTNAME}.log - -if [ X$? = X0 ]; then - echo "[OK] ${interp_real_path} is found" -else - echo "[NG] ${interp_real_path} is not found" - fail=1 -fi - - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1065/CT_005.sh b/test/issues/1065/CT_005.sh deleted file mode 100755 index 5a181960..00000000 --- a/test/issues/1065/CT_005.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_005 -test_program=./static_print_maps -ref_program=./print_maps - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${test_program}` -interp_path=`readelf -l ${ref_program} | grep "interpreter:" | sed -r 's/.*\[.*interpreter:\s(.*)\].*/\1/'` -interp_real_path=`realpath ${interp_path}` - -echo "exec : ${test_program}" - -${MCEXEC} ${test_program} | tee ./${TESTNAME}.log -if [ X$? != X0 ]; then - fail=1 -fi - -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -echo "" -echo "** grep ${interp_real_path} from maps" -grep -a -e "${interp_real_path}$" ./${TESTNAME}.log - -if [ X$? != X0 ]; then - echo "[OK] ${interp_real_path} is not found" -else - echo "[NG] ${interp_real_path} is found" - fail=1 -fi - - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1065/CT_006.sh b/test/issues/1065/CT_006.sh deleted file mode 100755 index 91451170..00000000 --- a/test/issues/1065/CT_006.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_006 - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -${MCEXEC} cat /proc/self/maps - -if [ $? = 0 ]; then - echo "[OK] shell script is running normaly" -else - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" diff --git a/test/issues/1065/CT_007.sh b/test/issues/1065/CT_007.sh deleted file mode 100755 index c0aafd42..00000000 --- a/test/issues/1065/CT_007.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_007 -exec_program="./print_maps_and_cmdline" -test_program="./call_execve ${exec_program}" - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${exec_program}` -interp_path=`readelf -l ${exec_program} | grep "interpreter:" | sed -r 's/.*\[.*interpreter:\s(.*)\].*/\1/'` -interp_real_path=`realpath ${interp_path}` - -echo "exec : ${test_program}" - -#${test_program} | tee ./${TESTNAME}.log -${MCEXEC} ${test_program} 1> ./${TESTNAME}_maps.log 2> ./${TESTNAME}_cmdline.log -if [ X$? != X0 ]; then - fail=1 -fi - -cat ./${TESTNAME}_maps.log -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}_maps.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -echo "" -echo "** grep ${interp_real_path} from maps" -grep -a -e "${interp_real_path}$" ./${TESTNAME}_maps.log - -if [ X$? = X0 ]; then - echo "[OK] ${interp_real_path} is found" -else - echo "[NG] ${interp_real_path} is not found" - fail=1 -fi - -cat ./${TESTNAME}_cmdline.log -echo "" -echo "** grep ${exec_program} from cmdline" -grep -a -e "${exec_program}" ./${TESTNAME}_cmdline.log - -if [ X$? = X0 ]; then - echo "[OK] ${exec_program} is found" -else - echo "[NG] ${exec_program} is not found" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}_maps.log -rm ./${TESTNAME}_cmdline.log diff --git a/test/issues/1065/CT_008.sh b/test/issues/1065/CT_008.sh deleted file mode 100755 index d5dee261..00000000 --- a/test/issues/1065/CT_008.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_008 -exec_program="./print_maps_and_cmdline" -test_program="./call_execve.sh ${exec_program}" - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -real_path=`realpath ${exec_program}` -interp_path=`readelf -l ${exec_program} | grep "interpreter:" | sed -r 's/.*\[.*interpreter:\s(.*)\].*/\1/'` -interp_real_path=`realpath ${interp_path}` - -echo "exec : ${test_program}" - -${MCEXEC} ${test_program} 1> ./${TESTNAME}_maps.log 2> ./${TESTNAME}_cmdline.log -if [ X$? != X0 ]; then - fail=1 -fi - -cat ./${TESTNAME}_maps.log -echo "" -echo "** grep ${real_path} from maps" -grep -a -e "${real_path}$" ./${TESTNAME}_maps.log - -if [ X$? = X0 ]; then - echo "[OK] ${real_path} is found" -else - echo "[NG] ${real_path} is not found" - fail=1 -fi - -echo "" -echo "** grep ${interp_real_path} from maps" -grep -a -e "${interp_real_path}$" ./${TESTNAME}_maps.log - -if [ X$? = X0 ]; then - echo "[OK] ${interp_real_path} is found" -else - echo "[NG] ${interp_real_path} is not found" - fail=1 -fi - -cat ./${TESTNAME}_cmdline.log -echo "" -echo "** grep ${exec_program} from cmdline" -grep -a -e "${exec_program}" ./${TESTNAME}_cmdline.log - -if [ X$? = X0 ]; then - echo "[OK] ${exec_program} is found" -else - echo "[NG] ${exec_program} is not found" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -#rm ./${TESTNAME}_maps.log -#rm ./${TESTNAME}_cmdline.log diff --git a/test/issues/1065/CT_008_cmdline.log b/test/issues/1065/CT_008_cmdline.log deleted file mode 100644 index e69de29b..00000000 diff --git a/test/issues/1065/CT_008_maps.log b/test/issues/1065/CT_008_maps.log deleted file mode 100644 index f49e1fb9..00000000 --- a/test/issues/1065/CT_008_maps.log +++ /dev/null @@ -1,3 +0,0 @@ -STKN:[main] This is shell script -STKN:[main_loop] filename:./call_execve shell:0x(nil) -STKN:[main_loop] filename:./print_maps_and_cmdline shell:0x(nil) diff --git a/test/issues/1065/Makefile b/test/issues/1065/Makefile index 01570fd8..f4ebb9c9 100644 --- a/test/issues/1065/Makefile +++ b/test/issues/1065/Makefile @@ -1,8 +1,5 @@ CC = gcc -MCK_DIR=/home/satoken/ppos - -MCEXEC=$(MCK_DIR)/bin/mcexec -TARGET= file_map print_maps static_print_maps call_execve print_maps_and_cmdline config +TARGET= file_map CPPFLAGS = LDFLAGS = @@ -12,34 +9,13 @@ all: $(TARGET) file_map: file_map.c $(CC) -o $@ $^ $(LDFLAGS) -print_maps: print_maps.c - $(CC) -o $@ $^ $(LDFLAGS) - -print_maps_and_cmdline: print_maps_and_cmdline.c - $(CC) -o $@ $^ $(LDFLAGS) - -static_print_maps: print_maps.c - $(CC) -o $@ $^ $(LDFLAGS) -static - -call_execve: call_execve.c - $(CC) -o $@ $^ $(LDFLAGS) - -config: - @echo "MCEXEC=$(MCEXEC)" > ./config - init: - ln -nfs ./dummy_file ./lnk_to_dummy + @echo "This is Test file for mmap" > ./dummy_file + @ln -nfs ./dummy_file ./lnk_to_dummy test: init all - ./CT_001.sh - ./CT_002.sh - ./CT_003.sh - ./CT_004.sh - ./CT_005.sh - ./CT_006.sh - ./CT_007.sh - ./CT_008.sh + @sh ./C1065.sh clean: - rm -f $(TARGET) *.o ./lnk_to_dummy + rm -f $(TARGET) *.o ./dummy_file ./lnk_to_dummy diff --git a/test/issues/1065/README b/test/issues/1065/README index be558240..78141b86 100644 --- a/test/issues/1065/README +++ b/test/issues/1065/README @@ -1,32 +1,33 @@ +【Issue#1065 動作確認】 +□ テスト内容 +1. Issueで報告された症状が解消されていることの確認 +CT_001: 絶対パス指定でのファイルマップ + 絶対パス指定でファイルマップを実行し、/proc/<PID>/maps に + 対象ファイルの絶対パスが表示されることを確認する -CT_001: - ファイル実体を指す絶対パスを指定したファイルマップを実行する - -> /proc/<PID>/maps にファイル実体の絶対パスで表示される +CT_002: 相対パス指定でのファイルマップ + 相対パス指定でファイルマップを実行し、/proc/<PID>/maps に + 対象ファイルの絶対パスが表示されることを確認する -CT_002: - ファイル実体を指す相対パスを指定したファイルマップを実行する - -> /proc/<PID>/maps にファイル実体の絶対パスで表示される +CT_003: シンボリックリンクの絶対パス指定でのファイルマップ + シンボリックリンクへの絶対パス指定でファイルマップを実行し、 + /proc/<PID>/maps に対象ファイルの実体の絶対パスが表示されることを確認する -CT_003: - ファイル実体を指すシンボリックリンクを指定したファイルマップを実行する - -> /proc/<PID>/maps にファイル実体の絶対パスで表示される +CT_004: シンボリックリンクの相対パス指定でのファイルマップ + シンボリックリンクへの相対パス指定でファイルマップを実行し、 + /proc/<PID>/maps に対象ファイルの実体の絶対パスが表示されることを確認する -CT_004: - ELF形式のプログラムを実行する - -> /proc/<PID>/maps に実行したプログラムと、そのinterpの絶対パスが表示される +CT_005: vdso, stack 領域の表示 + /proc/<PID>/maps に[vdso], [maps] が表示されることを確認する -CT_005: - static linkでビルドされたELF形式のプログラムを実行する - -> /proc/<PID>/maps に実行したプログラムと、そのinterpの絶対パスが表示される +□ 実行手順 +$ make test -CT_006: - シェルスクリプトを実行する - -> シェルスクリプトが正しく実行される +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する -CT_007: - execveを行うプログラムを実行する - -> execveされたプログラムが正常に実行され、/proc下のmaps, cmdline から新しい情報が取得できる - -CT_008: - execveを行うプログラムをシェルスクリプトから実行する - -> execveされたプログラムが正常に実行され、/proc下のmaps, cmdline から新しい情報が取得できる +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1065/call_execve.c b/test/issues/1065/call_execve.c deleted file mode 100644 index 0673c5aa..00000000 --- a/test/issues/1065/call_execve.c +++ /dev/null @@ -1,28 +0,0 @@ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> - -int main(int argc, char* argv[]) -{ - char* command; - char* exargv[] = {NULL, NULL}; - char* exenvp[] = {NULL}; - int rc; - - if (argc < 2) { - printf("Error: too few arguments\n"); - return -1; - } - - exargv[0] = argv[1]; - - rc = execve(argv[1], exargv, exenvp); - - /* Don't reach here */ - if (rc == -1) { - perror("Error: failed to execve"); - } - - return -1; -} diff --git a/test/issues/1065/call_execve.sh b/test/issues/1065/call_execve.sh deleted file mode 100755 index d98f04db..00000000 --- a/test/issues/1065/call_execve.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -./call_execve $* - diff --git a/test/issues/1065/dummy_file b/test/issues/1065/dummy_file deleted file mode 100644 index 0784a50d..00000000 --- a/test/issues/1065/dummy_file +++ /dev/null @@ -1 +0,0 @@ -This is Test file for mmap. diff --git a/test/issues/1065/print_maps.c b/test/issues/1065/print_maps.c deleted file mode 100644 index 9905a995..00000000 --- a/test/issues/1065/print_maps.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> - -int main(void) -{ - char command[128]; - - sprintf(command, "cat /proc/%d/maps", getpid()); - system(command); - - return 0; -} diff --git a/test/issues/1065/print_maps_and_cmdline.c b/test/issues/1065/print_maps_and_cmdline.c deleted file mode 100644 index 8757e5dc..00000000 --- a/test/issues/1065/print_maps_and_cmdline.c +++ /dev/null @@ -1,21 +0,0 @@ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> - -#define CMD_SIZE 128 - -int main(void) -{ - char command[CMD_SIZE]; - - memset(command, '0', CMD_SIZE); - sprintf(command, "cat /proc/%d/maps", getpid()); - system(command); - - memset(command, '0', CMD_SIZE); - sprintf(command, "cat /proc/%d/cmdline 1>&2", getpid()); - system(command); - - return 0; -} diff --git a/test/issues/1065/result.log b/test/issues/1065/result.log index d360c578..2cbc00e4 100644 --- a/test/issues/1065/result.log +++ b/test/issues/1065/result.log @@ -1,14 +1,12 @@ -./CT_001.sh *** CT_001 start ******************* -file map: /home/satoken/2018_work/04/test/dummy_file -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/file_map -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/file_map -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/file_map +file map: /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file +000000400000-000000401000 r-xs 0 0:0 0 +000000600000-000000601000 r--s 0 0:0 0 +000000601000-000000602000 rw-s 0 0:0 0 2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac23000-2aaaaac24000 rw-p 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] 2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 @@ -18,25 +16,23 @@ file map: /home/satoken/2018_work/04/test/dummy_file 2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so 2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 2aaaab011000-2aaaab013000 rw-p 0 0:0 0 -2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/dummy_file +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file 547fff800000-548000000000 rw-s 0 0:0 0 [stack] -** grep /home/satoken/2018_work/04/test/dummy_file from maps -2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/dummy_file -[OK] /home/satoken/2018_work/04/test/dummy_file is found +** grep /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file from maps +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file +[OK] /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file is found *** CT_001 PASSED -./CT_002.sh *** CT_002 start ******************* file map: ./dummy_file -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/file_map -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/file_map -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/file_map +000000400000-000000401000 r-xs 0 0:0 0 +000000600000-000000601000 r--s 0 0:0 0 +000000601000-000000602000 rw-s 0 0:0 0 2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac23000-2aaaaac24000 rw-p 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] 2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 @@ -46,25 +42,23 @@ file map: ./dummy_file 2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so 2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 2aaaab011000-2aaaab013000 rw-p 0 0:0 0 -2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/dummy_file +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file 547fff800000-548000000000 rw-s 0 0:0 0 [stack] -** grep /home/satoken/2018_work/04/test/dummy_file from maps -2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/dummy_file -[OK] /home/satoken/2018_work/04/test/dummy_file is found +** grep /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file from maps +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file +[OK] /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file is found *** CT_002 PASSED -./CT_003.sh *** CT_003 start ******************* -file map: ./lnk_to_dummy -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/file_map -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/file_map -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/file_map +file map: /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file +000000400000-000000401000 r-xs 0 0:0 0 +000000600000-000000601000 r--s 0 0:0 0 +000000601000-000000602000 rw-s 0 0:0 0 2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac23000-2aaaaac24000 rw-p 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] 2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 @@ -74,25 +68,23 @@ file map: ./lnk_to_dummy 2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so 2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 2aaaab011000-2aaaab013000 rw-p 0 0:0 0 -2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/dummy_file +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file 547fff800000-548000000000 rw-s 0 0:0 0 [stack] -** grep /home/satoken/2018_work/04/test/dummy_file from maps -2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/dummy_file -[OK] /home/satoken/2018_work/04/test/dummy_file is found +** grep /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file from maps +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file +[OK] /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file is found *** CT_003 PASSED -./CT_004.sh *** CT_004 start ******************* -exec : ./print_maps -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/print_maps -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps +file map: ./lnk_to_dummy +000000400000-000000401000 r-xs 0 0:0 0 +000000600000-000000601000 r--s 0 0:0 0 +000000601000-000000602000 rw-s 0 0:0 0 2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac23000-2aaaaac24000 rw-p 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] 2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 @@ -102,54 +94,23 @@ exec : ./print_maps 2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so 2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 2aaaab011000-2aaaab013000 rw-p 0 0:0 0 +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file 547fff800000-548000000000 rw-s 0 0:0 0 [stack] -** grep /home/satoken/2018_work/04/test/print_maps from maps -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/print_maps -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps -[OK] /home/satoken/2018_work/04/test/print_maps is found - -** grep /usr/lib64/ld-2.17.so from maps -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -[OK] /usr/lib64/ld-2.17.so is found +** grep /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file from maps +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file +[OK] /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file is found *** CT_004 PASSED -./CT_005.sh *** CT_005 start ******************* -exec : ./static_print_maps -000000400000-0000004b8000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/static_print_maps -0000006b7000-0000006ba000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/static_print_maps -0000006ba000-0000006bc000 rw-p 0 0:0 0 -000000800000-000000802000 rw-s 0 0:0 0 [heap] -000000802000-000000823000 rw-s 0 0:0 0 [heap] -000000823000-000000824000 rw-s 0 0:0 0 [heap] +check [vdso], [stack] +000000400000-000000401000 r-xs 0 0:0 0 +000000600000-000000601000 r--s 0 0:0 0 +000000601000-000000602000 rw-s 0 0:0 0 2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa02000 r-xs 0 0:0 0 [vdso] -547fff800000-548000000000 rw-s 0 0:0 0 [stack] - -** grep /home/satoken/2018_work/04/test/static_print_maps from maps -000000400000-0000004b8000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/static_print_maps -0000006b7000-0000006ba000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/static_print_maps -[OK] /home/satoken/2018_work/04/test/static_print_maps is found - -** grep /usr/lib64/ld-2.17.so from maps -[OK] /usr/lib64/ld-2.17.so is not found -*** CT_005 PASSED - -./CT_006.sh -*** CT_006 start ******************* -000000400000-00000040b000 r-xs 0 0:0 0 /usr/bin/cat -00000060b000-00000060c000 r--s 0 0:0 0 /usr/bin/cat -00000060c000-00000060d000 rw-s 0 0:0 0 /usr/bin/cat -000000800000-000000821000 rw-s 0 0:0 0 [heap] -2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac23000-2aaaaac24000 rw-p 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] 2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 @@ -159,67 +120,13 @@ exec : ./static_print_maps 2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so 2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 2aaaab011000-2aaaab013000 rw-p 0 0:0 0 -2aaaab013000-2aaab153c000 r--p 0 0:0 0 /usr/lib/locale/locale-archive +2aaaab013000-2aaaab014000 rw-s 0 0:0 0 /home/satoken/mck_srcs/gerrit/mckernel/test/issues/1065/dummy_file 547fff800000-548000000000 rw-s 0 0:0 0 [stack] -[OK] shell script is running normaly -*** CT_006 PASSED -./CT_007.sh -*** CT_007 start ******************* -exec : ./call_execve ./print_maps_and_cmdline -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/print_maps_and_cmdline -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps_and_cmdline -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps_and_cmdline -2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac23000-2aaaaac24000 rw-p 0 0:0 0 +** grep \[vdso\] from maps 2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] -2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 -2aaaaac4d000-2aaaaae05000 r-xp 0 0:0 0 /usr/lib64/libc-2.17.so -2aaaaae05000-2aaaab005000 ---p 0 0:0 0 /usr/lib64/libc-2.17.so -2aaaab005000-2aaaab009000 r--p 0 0:0 0 /usr/lib64/libc-2.17.so -2aaaab009000-2aaaab00b000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so -2aaaab00b000-2aaaab010000 rw-p 0 0:0 0 -2aaaab010000-2aaaab011000 rw-p 0 0:0 0 -2aaaab011000-2aaaab013000 rw-p 0 0:0 0 +[OK] \[vdso\] is found +** grep \[stack\] from maps 547fff800000-548000000000 rw-s 0 0:0 0 [stack] -STKN:[main_loop] filename:./print_maps_and_cmdline shell:0x(nil) -STKN:[main_loop] filename:/bin/sh shell:0x(nil) -STKN:[main_loop] filename:./print_maps_and_cmdline shell:0x(nil) - -** grep /home/satoken/2018_work/04/test/print_maps_and_cmdline from maps -000000400000-000000401000 r-xs 0 0:0 0 /home/satoken/2018_work/04/test/print_maps_and_cmdline -000000600000-000000601000 r--s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps_and_cmdline -000000601000-000000602000 rw-s 0 0:0 0 /home/satoken/2018_work/04/test/print_maps_and_cmdline -[OK] /home/satoken/2018_work/04/test/print_maps_and_cmdline is found - -** grep /usr/lib64/ld-2.17.so from maps -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 /usr/lib64/ld-2.17.so -2aaaaac22000-2aaaaac23000 rw-s 0 0:0 0 /usr/lib64/ld-2.17.so -[OK] /usr/lib64/ld-2.17.so is found -./print_maps_and_cmdlineso.6o. -** grep ./print_maps_and_cmdline from cmdline -./print_maps_and_cmdlineso.6o. -[OK] ./print_maps_and_cmdline is found -*** CT_007 PASSED - -./CT_008.sh -*** CT_008 start ******************* -exec : ./call_execve.sh ./print_maps_and_cmdline -./CT_008.sh: line 18: 25571 Segmentation fault (core dumped) ${MCEXEC} ${test_program} > ./${TESTNAME}_maps.log 2> ./${TESTNAME}_cmdline.log -STKN:[main] This is shell script -STKN:[main_loop] filename:./call_execve shell:0x(nil) -STKN:[main_loop] filename:./print_maps_and_cmdline shell:0x(nil) - -** grep /home/satoken/2018_work/04/test/print_maps_and_cmdline from maps -[NG] /home/satoken/2018_work/04/test/print_maps_and_cmdline is not found - -** grep /usr/lib64/ld-2.17.so from maps -[NG] /usr/lib64/ld-2.17.so is not found - -** grep ./print_maps_and_cmdline from cmdline -[NG] ./print_maps_and_cmdline is not found -*** CT_008 FAILED +[OK] \[stack\] is found +*** CT_005 PASSED diff --git a/test/issues/1102/C1102.sh b/test/issues/1102/C1102.sh new file mode 100644 index 00000000..744ab7b7 --- /dev/null +++ b/test/issues/1102/C1102.sh @@ -0,0 +1,188 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** CT_${tid} start *******************" +fail=0 +REP=30 +echo "** exec ltp-syscall_mkdir09 ${REP} times" +echo -n "" > ./CT_${tid}.txt +for i in `seq 1 ${REP}` +do + ${MCEXEC} ${LTPBIN}/mkdir09 | tee -a ./CT_${tid}.txt +done + +grep -a -e "FAIL" ./CT_${tid}.txt + +if [ $? != 0 ]; then + echo "[OK] ltp-syscall_mkdir09 ${REP} times all passed" +else + echo "[NG] ltp-syscall_mkdir09 failed" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=002 +echo "*** CT_${tid} start *******************" +fail=0 +echo "** exec ostest siginfo_00" +${MCEXEC} ${TESTMCK} -s siginfo -n 0| tee -a ./CT_${tid}.txt + +tail -n 1 ./CT_${tid}.txt | grep -a -e "RESULT: ok" &> /dev/null + +if [ $? = 0 ]; then + echo "[OK] ostest siginfo_00 passed" +else + echo "[NG] ostest siginfo_00 failed" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=003 +echo "*** CT_${tid} start *******************" +fail=0 +SIG_NAME=SIGHUP +SIG_NUM=1 +echo "** exec ostest siginfo_01 and then send ${SIG_NAME} to mcexec" +${MCEXEC} ${TESTMCK} -s siginfo -n 1 & +sleep 1 + +echo "** back ground process(mcexec): $!" +echo "** send ${SIG_NAME} to mcexec once" +kill -${SIG_NUM} $! +sleep 1 + +echo "** check existing of $!" +ps -p $! + +if [ $? = 0 ]; then + echo "[OK] $! exists yet" +else + echo "[NG] $! doesn't exist" + fail=1 +fi + +echo "** send ${SIG_NAME} to mcexec again" +kill -${SIG_NUM} $! +sleep 1 +echo "** check existing of $!" +ps -p $! + +if [ $? != 0 ]; then + echo "[OK] $! doesn't exist (be killed by signal)" +else + echo "[NG] exist yet" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=004 +echo "*** CT_${tid} start *******************" +fail=0 +SIG_NAME=SIGINT +SIG_NUM=2 +echo "** exec ostest siginfo_01 and then send ${SIG_NAME} to mcexec" +${MCEXEC} ${TESTMCK} -s siginfo -n 1 & +sleep 1 + +echo "** back ground process(mcexec): $!" +echo "** send ${SIG_NAME} to mcexec once" +kill -${SIG_NUM} $! +sleep 1 + +echo "** check existing of $!" +ps -p $! + +if [ $? = 0 ]; then + echo "[OK] $! exists yet" +else + echo "[NG] $! doesn't exist" + fail=1 +fi + +echo "** send ${SIG_NAME} to mcexec again" +kill -${SIG_NUM} $! +sleep 1 +echo "** check existing of $!" +ps -p $! + +if [ $? != 0 ]; then + echo "[OK] $! doesn't exist (be killed by signal)" +else + echo "[NG] exist yet" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + +tid=005 +echo "*** CT_${tid} start *******************" +fail=0 +SIG_NAME=SIGTERM +SIG_NUM=15 +echo "** exec ostest siginfo_01 and then send ${SIG_NAME} to mcexec" +${MCEXEC} ${TESTMCK} -s siginfo -n 1 & +sleep 1 + +echo "** back ground process(mcexec): $!" +echo "** send ${SIG_NAME} to mcexec once" +kill -${SIG_NUM} $! +sleep 1 + +echo "** check existing of $!" +ps -p $! + +if [ $? = 0 ]; then + echo "[OK] $! exists yet" +else + echo "[NG] $! doesn't exist" + fail=1 +fi + +echo "** send ${SIG_NAME} to mcexec again" +kill -${SIG_NUM} $! +sleep 1 +echo "** check existing of $!" +ps -p $! + +if [ $? != 0 ]; then + echo "[OK] $! doesn't exist (be killed by signal)" +else + echo "[NG] exist yet" + fail=1 +fi + +if [ X$fail = X0 ]; then + echo "*** CT_${tid} PASSED" +else + echo "*** CT_${tid} FAILED" +fi +echo "" + diff --git a/test/issues/1102/CT_001.sh b/test/issues/1102/CT_001.sh deleted file mode 100755 index 9e3e5f4f..00000000 --- a/test/issues/1102/CT_001.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_001 -REP=30 - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -echo "** exec ltp-syscall_mkdir09 ${REP} times" -echo -n "" > ./${TESTNAME}.log -for i in `seq 1 ${REP}` -do - ${MCEXEC} ${LTP_DIR}/testcases/bin/mkdir09 | tee -a ./${TESTNAME}.log -done - -grep -a -e "FAIL" ./${TESTNAME}.log - -if [ $? != 0 ]; then - echo "[OK] ltp-syscall_mkdir09 ${REP} times all passed" -else - echo "[NG] ltp-syscall_mkdir09 failed" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1102/CT_002.sh b/test/issues/1102/CT_002.sh deleted file mode 100755 index 869e8158..00000000 --- a/test/issues/1102/CT_002.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_002 - -. ./config - -fail=0 - -echo "*** ${TESTNAME} start *******************" -echo "** exec ostest siginfo_00" -${MCEXEC} ${OSTEST_DIR}/bin/test_mck -s siginfo -n 0| tee -a ./${TESTNAME}.log - -tail -n 1 ./${TESTNAME}.log | grep -a -e "RESULT: ok" &> /dev/null - -if [ $? = 0 ]; then - echo "[OK] ostest siginfo_00 passed" -else - echo "[NG] ostest siginfo_00 failed" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - -rm ./${TESTNAME}.log diff --git a/test/issues/1102/CT_003.sh b/test/issues/1102/CT_003.sh deleted file mode 100755 index a5fc8e23..00000000 --- a/test/issues/1102/CT_003.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_003 - -. ./config - -SIG_NAME=SIGHUP -SIG_NUM=1 - -fail=0 - -echo "*** ${TESTNAME} start *******************" -echo "** exec ostest siginfo_01 and then send ${SIG_NAME} to mcexec" -${MCEXEC} ${OSTEST_DIR}/bin/test_mck -s siginfo -n 1 & -sleep 1 - -echo "** back ground process(mcexec): $!" -echo "** send ${SIG_NAME} to mcexec once" -kill -${SIG_NUM} $! -sleep 1 - -echo "** check existing of $!" -ps -p $! - -if [ $? = 0 ]; then - echo "[OK] $! exists yet" -else - echo "[NG] $! doesn't exist" - fail=1 -fi - -echo "** send ${SIG_NAME} to mcexec again" -kill -${SIG_NUM} $! -sleep 1 -echo "** check existing of $!" -ps -p $! - -if [ $? != 0 ]; then - echo "[OK] $! doesn't exist (be killed by signal)" -else - echo "[NG] exist yet" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - diff --git a/test/issues/1102/CT_004.sh b/test/issues/1102/CT_004.sh deleted file mode 100755 index 4ce557e4..00000000 --- a/test/issues/1102/CT_004.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_004 - -. ./config - -SIG_NAME=SIGINT -SIG_NUM=2 - -fail=0 - -echo "*** ${TESTNAME} start *******************" -echo "** exec ostest siginfo_01 and then send ${SIG_NAME} to mcexec" -${MCEXEC} ${OSTEST_DIR}/bin/test_mck -s siginfo -n 1 & -sleep 1 - -echo "** back ground process(mcexec): $!" -echo "** send ${SIG_NAME} to mcexec once" -kill -${SIG_NUM} $! -sleep 1 - -echo "** check existing of $!" -ps -p $! - -if [ $? = 0 ]; then - echo "[OK] $! exists yet" -else - echo "[NG] $! doesn't exist" - fail=1 -fi - -echo "** send ${SIG_NAME} to mcexec again" -kill -${SIG_NUM} $! -sleep 1 -echo "** check existing of $!" -ps -p $! - -if [ $? != 0 ]; then - echo "[OK] $! doesn't exist (be killed by signal)" -else - echo "[NG] exist yet" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - diff --git a/test/issues/1102/CT_005.sh b/test/issues/1102/CT_005.sh deleted file mode 100755 index 1ffe6e67..00000000 --- a/test/issues/1102/CT_005.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_005 - -. ./config - -SIG_NAME=SIGTERM -SIG_NUM=15 - -fail=0 - -echo "*** ${TESTNAME} start *******************" -echo "** exec ostest siginfo_01 and then send ${SIG_NAME} to mcexec" -${MCEXEC} ${OSTEST_DIR}/bin/test_mck -s siginfo -n 1 & -sleep 1 - -echo "** back ground process(mcexec): $!" -echo "** send ${SIG_NAME} to mcexec once" -kill -${SIG_NUM} $! -sleep 1 - -echo "** check existing of $!" -ps -p $! - -if [ $? = 0 ]; then - echo "[OK] $! exists yet" -else - echo "[NG] $! doesn't exist" - fail=1 -fi - -echo "** send ${SIG_NAME} to mcexec again" -kill -${SIG_NUM} $! -sleep 1 -echo "** check existing of $!" -ps -p $! - -if [ $? != 0 ]; then - echo "[OK] $! doesn't exist (be killed by signal)" -else - echo "[NG] exist yet" - fail=1 -fi - -if [ X$fail = X0 ]; then - echo "*** ${TESTNAME} PASSED" -else - echo "*** ${TESTNAME} FAILED" -fi -echo "" - diff --git a/test/issues/1102/Makefile b/test/issues/1102/Makefile index e1e492c1..3a0b3137 100644 --- a/test/issues/1102/Makefile +++ b/test/issues/1102/Makefile @@ -1,28 +1,14 @@ CC = gcc -MCK_DIR=/home/satoken/ppos -MCEXEC=$(MCK_DIR)/bin/mcexec -LTP_DIR=/home/satoken/ltp -OSTEST_DIR=/home/satoken/ostest - -TARGET=config +TARGET = CPPFLAGS = LDFLAGS = all: $(TARGET) -config: - @echo "MCEXEC=$(MCEXEC)" > ./config - @echo "LTP_DIR=$(LTP_DIR)" >> ./config - @echo "OSTEST_DIR=$(OSTEST_DIR)" >> ./config - test: all - ./CT_001.sh - ./CT_002.sh - ./CT_003.sh - ./CT_004.sh - ./CT_005.sh + @sh ./C1102.sh clean: rm -f $(TARGET) *.o diff --git a/test/issues/1102/README b/test/issues/1102/README index 332e5e87..dd15d78a 100644 --- a/test/issues/1102/README +++ b/test/issues/1102/README @@ -1,26 +1,37 @@ +【Issue#1102 動作確認】 +□ テスト内容 +1. Issueで報告された再現方法で症状が再現しないことを確認 +CT_001: ltp-syscall_mkdir09 + ltp-syscall_mkdir09 を30回繰り返し実行し、 + いずれの実行結果もFAILとならないことを確認する -CT_001: - ltp-syscall_mkdir09 を繰り返し実行する - -> いずれの実行結果もFAILとならない +2. 既存のシグナル機能に影響がないことをテストプログラムで確認 +CT_002: ostest のsiginfo.000 + ostest_siginfo.000 を実行し、RESULT: ok が出力されることを確認する -CT_002: - ostest のsiginfo_00 を実行する - -> 実行結果がRESULT: ok となる +CT_003: ostest_siginfo.001 (SIGHUP) + 実行中のostest のsiginfo.001 にSIGHUPを通知し、以下を確認する + - 1度目のSIGHUPはハンドルされ、プロセスが残存する + - 2度目のSIGHUPにより、プロセスが終了する -CT_003: - ostest のsiginfo_01 を実行する - 動作の確認には、SIGHUP を用いる - -> 1度目のSIGHUPはハンドルされ、プロセスが残存する - 2度目のSIGHUPにより、プロセスが終了する +CT_004: ostest_siginfo.001 (SIGINT) + 実行中のostest のsiginfo.001 にSIGINTを通知し、以下を確認する + - 1度目のSIGINTはハンドルされ、プロセスが残存する + - 2度目のSIGINTにより、プロセスが終了する -CT_004: - ostest のsiginfo_01 を実行する - 動作の確認には、SIGINT を用いる - -> 1度目のSIGINTはハンドルされ、プロセスが残存する - 2度目のSIGINTにより、プロセスが終了する +CT_005: ostest_siginfo.001 (SIGTERM) + 実行中のostest のsiginfo.001 にSIGTERMを通知し、以下を確認する + - 1度目のSIGTERMはハンドルされ、プロセスが残存する + - 2度目のSIGTERMにより、プロセスが終了する -CT_005: - ostest のsiginfo_01 を実行する - 動作の確認には、SIGTERM を用いる - -> 1度目のSIGTERMはハンドルされ、プロセスが残存する - 2度目のSIGTERMにより、プロセスが終了する +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1102/result.log b/test/issues/1102/result.log index 52c629ff..b51cdb50 100644 --- a/test/issues/1102/result.log +++ b/test/issues/1102/result.log @@ -1,4 +1,3 @@ -./CT_001.sh *** CT_001 start ******************* ** exec ltp-syscall_mkdir09 30 times mkdir09 1 TPASS : PASS @@ -34,7 +33,6 @@ mkdir09 1 TPASS : PASS [OK] ltp-syscall_mkdir09 30 times all passed *** CT_001 PASSED -./CT_002.sh *** CT_002 start ******************* ** exec ostest siginfo_00 TEST_SUITE: siginfo @@ -189,85 +187,82 @@ RESULT: ok [OK] ostest siginfo_00 passed *** CT_002 PASSED -./CT_003.sh *** CT_003 start ******************* ** exec ostest siginfo_01 and then send SIGHUP to mcexec TEST_SUITE: siginfo TEST_NUMBER: 1 ARGS: ================================================== -Please send signal to mcexec(pid=12295) from console. +Please send signal to mcexec(pid=15217) from console. Exit Once you throw twice the same signal. ================================================== -** back ground process(mcexec): 12295 +** back ground process(mcexec): 15217 ** send SIGHUP to mcexec once Catch signal #1 siginfo->si_signo = 1 siginfo->si_errno = 0 siginfo->si_code = 0x0 -** check existing of 12295 +** check existing of 15217 PID TTY TIME CMD -12295 pts/0 00:00:00 exe -[OK] 12295 exists yet +15217 pts/0 00:00:00 exe +[OK] 15217 exists yet ** send SIGHUP to mcexec again Terminate by signal 1 -./CT_003.sh: line 34: 12295 Hangup ${MCEXEC} ${OSTEST_DIR}/bin/test_mck -s siginfo -n 1 -** check existing of 12295 +./C1102.sh: line 84: 15217 Hangup ${MCEXEC} ${TESTMCK} -s siginfo -n 1 +** check existing of 15217 PID TTY TIME CMD -[OK] 12295 doesn't exist (be killed by signal) +[OK] 15217 doesn't exist (be killed by signal) *** CT_003 PASSED -./CT_004.sh *** CT_004 start ******************* ** exec ostest siginfo_01 and then send SIGINT to mcexec TEST_SUITE: siginfo TEST_NUMBER: 1 ARGS: ================================================== -Please send signal to mcexec(pid=12311) from console. +Please send signal to mcexec(pid=15254) from console. Exit Once you throw twice the same signal. ================================================== -** back ground process(mcexec): 12311 +** back ground process(mcexec): 15254 ** send SIGINT to mcexec once Catch signal #2 siginfo->si_signo = 2 siginfo->si_errno = 0 siginfo->si_code = 0x0 -** check existing of 12311 +** check existing of 15254 PID TTY TIME CMD -12311 pts/0 00:00:00 exe -[OK] 12311 exists yet +15254 pts/0 00:00:00 exe +[OK] 15254 exists yet ** send SIGINT to mcexec again Terminate by signal 2 -** check existing of 12311 +** check existing of 15254 PID TTY TIME CMD -[OK] 12311 doesn't exist (be killed by signal) +[OK] 15254 doesn't exist (be killed by signal) *** CT_004 PASSED -./CT_005.sh *** CT_005 start ******************* ** exec ostest siginfo_01 and then send SIGTERM to mcexec TEST_SUITE: siginfo TEST_NUMBER: 1 ARGS: ================================================== -Please send signal to mcexec(pid=12327) from console. +Please send signal to mcexec(pid=15290) from console. Exit Once you throw twice the same signal. ================================================== -** back ground process(mcexec): 12327 +** back ground process(mcexec): 15290 ** send SIGTERM to mcexec once Catch signal #15 siginfo->si_signo = 15 siginfo->si_errno = 0 siginfo->si_code = 0x0 -** check existing of 12327 +** check existing of 15290 PID TTY TIME CMD -12327 pts/0 00:00:00 exe -[OK] 12327 exists yet +15290 pts/0 00:00:00 exe +[OK] 15290 exists yet ** send SIGTERM to mcexec again Terminate by signal 15 -./CT_005.sh: line 34: 12327 Terminated ${MCEXEC} ${OSTEST_DIR}/bin/test_mck -s siginfo -n 1 -** check existing of 12327 +./C1102.sh: line 172: 15290 Terminated ${MCEXEC} ${TESTMCK} -s siginfo -n 1 +** check existing of 15290 PID TTY TIME CMD -[OK] 12327 doesn't exist (be killed by signal) +[OK] 15290 doesn't exist (be killed by signal) *** CT_005 PASSED diff --git a/test/issues/1109/C1109.sh b/test/issues/1109/C1109.sh new file mode 100644 index 00000000..7472ab4e --- /dev/null +++ b/test/issues/1109/C1109.sh @@ -0,0 +1,36 @@ +#!/bin/sh +USELTP=0 +USEOSTEST=1 + +BOOTPARAM="-c 1-7 -m 10G@0" +. ../../common.sh + +maxmem=`$SBINDIR/ihkosctl 0 query mem | cut -d '@' -f 1` +mem95p=`expr $maxmem \* 95 / 100` +mem110p=`expr $maxmem \* 110 / 100` + +for i in 10240:9961472:01 2097152:2040109466:02 unlimited:$mem95p:03; do + ul=`echo $i|sed 's/:.*//'` + st=`echo $i|sed -e 's/^[^:]*://' -e 's/:[^:]*$//'` + id=`echo $i|sed 's/.*://'` + + sudo sh -c "ulimit -s $ul; $MCEXEC $TESTMCK -s mem_stack_limits -n 0 -- -s $st" 2>&1 | tee C1109T$id.txt + if grep "RESULT: ok" C1109T$id.txt > /dev/null 2>&1; then + echo "*** C1109T$id: OK" + else + echo "*** C1109T$id: NG" + fi +done + +for i in 10M:9961472:04 2G:2040109466:05 100000G:$mem95p:06; do + ul=`echo $i|sed 's/:.*//'` + st=`echo $i|sed -e 's/^[^:]*://' -e 's/:[^:]*$//'` + id=`echo $i|sed 's/.*://'` + + $MCEXEC -s 2M,$ul $TESTMCK -s mem_stack_limits -n 0 -- -s $st 2>&1 | tee C1109T$id.txt + if grep "RESULT: ok" C1109T$id.txt > /dev/null 2>&1; then + echo "*** C1109T$id: OK" + else + echo "*** C1109T$id: NG" + fi +done diff --git a/test/issues/1109/C1109.txt b/test/issues/1109/C1109.txt new file mode 100644 index 00000000..3e33fff3 --- /dev/null +++ b/test/issues/1109/C1109.txt @@ -0,0 +1,49 @@ +Script started on Thu Jun 28 09:14:18 2018 +bash-4.2$ make test +sh ./C1109.sh +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 9961472 +call: 0 GiB + 9 MiB + 512 KiB +used_stack size: 9986480 (0 GiB + 9 MiB + 536 KiB) +RESULT: ok +*** C1109T01: OK +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 2040109466 +call: 1 GiB + 921 MiB + 614 KiB +used_stack size: 2040182784 (1 GiB + 921 MiB + 686 KiB) +RESULT: ok +*** C1109T02: OK +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 10200547328 +call: 9 GiB + 512 MiB + 0 KiB +used_stack size: 10200572336 (9 GiB + 512 MiB + 24 KiB) +RESULT: ok +*** C1109T03: OK +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 9961472 +call: 0 GiB + 9 MiB + 512 KiB +used_stack size: 9986480 (0 GiB + 9 MiB + 536 KiB) +RESULT: ok +*** C1109T04: OK +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 2040109466 +call: 1 GiB + 921 MiB + 614 KiB +used_stack size: 2040182784 (1 GiB + 921 MiB + 686 KiB) +RESULT: ok +*** C1109T05: OK +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 10200547328 +call: 9 GiB + 512 MiB + 0 KiB +used_stack size: 10200572336 (9 GiB + 512 MiB + 24 KiB) +RESULT: ok +*** C1109T06: OK +bash-4.2$ exit +exit + +Script done on Thu Jun 28 09:14:32 2018 diff --git a/test/issues/1109/C1109T01.txt b/test/issues/1109/C1109T01.txt new file mode 100644 index 00000000..1cc5ac3a --- /dev/null +++ b/test/issues/1109/C1109T01.txt @@ -0,0 +1,6 @@ +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 9961472 +call: 0 GiB + 9 MiB + 512 KiB +used_stack size: 9986480 (0 GiB + 9 MiB + 536 KiB) +RESULT: ok diff --git a/test/issues/1109/C1109T02.txt b/test/issues/1109/C1109T02.txt new file mode 100644 index 00000000..d98ed049 --- /dev/null +++ b/test/issues/1109/C1109T02.txt @@ -0,0 +1,6 @@ +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 2040109466 +call: 1 GiB + 921 MiB + 614 KiB +used_stack size: 2040182784 (1 GiB + 921 MiB + 686 KiB) +RESULT: ok diff --git a/test/issues/1109/C1109T03.txt b/test/issues/1109/C1109T03.txt new file mode 100644 index 00000000..3875955c --- /dev/null +++ b/test/issues/1109/C1109T03.txt @@ -0,0 +1,6 @@ +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 10200547328 +call: 9 GiB + 512 MiB + 0 KiB +used_stack size: 10200572336 (9 GiB + 512 MiB + 24 KiB) +RESULT: ok diff --git a/test/issues/1109/C1109T04.txt b/test/issues/1109/C1109T04.txt new file mode 100644 index 00000000..1cc5ac3a --- /dev/null +++ b/test/issues/1109/C1109T04.txt @@ -0,0 +1,6 @@ +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 9961472 +call: 0 GiB + 9 MiB + 512 KiB +used_stack size: 9986480 (0 GiB + 9 MiB + 536 KiB) +RESULT: ok diff --git a/test/issues/1109/C1109T05.txt b/test/issues/1109/C1109T05.txt new file mode 100644 index 00000000..d98ed049 --- /dev/null +++ b/test/issues/1109/C1109T05.txt @@ -0,0 +1,6 @@ +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 2040109466 +call: 1 GiB + 921 MiB + 614 KiB +used_stack size: 2040182784 (1 GiB + 921 MiB + 686 KiB) +RESULT: ok diff --git a/test/issues/1109/C1109T06.txt b/test/issues/1109/C1109T06.txt new file mode 100644 index 00000000..3875955c --- /dev/null +++ b/test/issues/1109/C1109T06.txt @@ -0,0 +1,6 @@ +TEST_SUITE: mem_stack_limits +TEST_NUMBER: 0 +ARGS: -s 10200547328 +call: 9 GiB + 512 MiB + 0 KiB +used_stack size: 10200572336 (9 GiB + 512 MiB + 24 KiB) +RESULT: ok diff --git a/test/issues/1109/Makefile b/test/issues/1109/Makefile new file mode 100644 index 00000000..a8b97fda --- /dev/null +++ b/test/issues/1109/Makefile @@ -0,0 +1,6 @@ +all:: + +test:: + sh ./C1109.sh + +clean:: diff --git a/test/issues/1109/README b/test/issues/1109/README new file mode 100644 index 00000000..7fcb29a2 --- /dev/null +++ b/test/issues/1109/README @@ -0,0 +1,25 @@ +【Issue#1109 動作確認】 +□ テスト内容 +1. スタックサイズ制限を ulimit -s によって指定した場合の実行確認 +C1109T01 ulimit -s 10MB 指定し、9.5MBスタックを使用できることを確認 +C1109T02 ulimit -s 2GB 指定し、1.9GBスタックを使用できることを確認 +C1109T03 ulimit -s unlimited 指定し、全メモリの95%をスタックで使用できる + ことを確認 + +2. スタックサイズをmcexec -s オプションで指定した場合の実行確認 +C1109T04 mcexec -s 2M,10M 指定し、9.5MBスタックを使用できることを確認 +C1109T05 mcexec -s 2M,2G 指定し、1.9GBスタックを使用できることを確認 +C1109T06 mcexec -s 2M,100000G 指定し、全メモリの95%をスタックで使用できる + ことを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1109.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1111/C1111.sh b/test/issues/1111/C1111.sh new file mode 100644 index 00000000..34254147 --- /dev/null +++ b/test/issues/1111/C1111.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_${tid} start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s rt_sigaction -n 5 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_${tid}: PASSED" +else + echo "*** RT_${tid}: FAILED" +fi +echo "" + +sudo ${MCEXEC} ./CT_001 +sudo ${MCEXEC} ./CT_002 +sudo ${MCEXEC} ./CT_003 +sudo ${MCEXEC} ./CT_004 +sudo ${MCEXEC} ./CT_005 + diff --git a/test/issues/1111/CT_001.c b/test/issues/1111/CT_001.c new file mode 100644 index 00000000..2c34d74a --- /dev/null +++ b/test/issues/1111/CT_001.c @@ -0,0 +1,50 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_001" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int status; + int tmp_flag = 0; + struct sigaction sa, old_act; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + sa.sa_handler = test_handler; + sa.sa_flags = SA_RESETHAND; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND"); + + rc = sigaction(SIGUSR1, NULL, &old_act); + OKNG(rc != 0, "sigaction to get current action"); + + if (old_act.sa_handler == test_handler && + old_act.sa_flags & SA_RESETHAND) { + tmp_flag = 1; + } + OKNG(tmp_flag != 1, "check current act"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1111/CT_002.c b/test/issues/1111/CT_002.c new file mode 100644 index 00000000..46615013 --- /dev/null +++ b/test/issues/1111/CT_002.c @@ -0,0 +1,53 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_002" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int status; + int tmp_flag = 0; + struct sigaction sa, old_act; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + sa.sa_handler = test_handler; + sa.sa_flags = SA_RESETHAND; + + rc = sigaction(SIGUSR1, &sa, NULL); + OKNG(rc != 0, "sigaction with SA_RESETHAND"); + + printf(" send 1st SIGUSR1\n"); + kill(getpid(), SIGUSR1); + OKNG(handled_cnt != 1, "invoked test_handler"); + + rc = sigaction(SIGUSR1, NULL, &old_act); + OKNG(rc != 0, "sigaction to get current action"); + + if (old_act.sa_handler == SIG_DFL) { + tmp_flag = 1; + } + OKNG(tmp_flag != 1, "check current act (after reset)"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1111/CT_003.c b/test/issues/1111/CT_003.c new file mode 100644 index 00000000..dacc11aa --- /dev/null +++ b/test/issues/1111/CT_003.c @@ -0,0 +1,47 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_003" + +int handled_cnt; + +void test_handler(int sig) +{ + handled_cnt++; +} + +int main(int argc, char **argv) +{ + int rc = 0; + int status; + int tmp_flag = 0; + struct sigaction sa, old_act; + + printf("*** %s start *******************************\n", TEST_NAME); + handled_cnt = 0; + + rc = sigaction(0, &sa, NULL); + OKNG(rc != -1, "sigaction 0 failed"); + + rc = sigaction(_NSIG, &sa, NULL); + OKNG(rc != -1, "sigaction _NSIG failed"); + + rc = sigaction(SIGKILL, &sa, NULL); + OKNG(rc != -1, "sigaction SIGKILL failed"); + + rc = sigaction(SIGSTOP, &sa, NULL); + OKNG(rc != -1, "sigaction SIGSTOP failed"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1111/CT_004.c b/test/issues/1111/CT_004.c new file mode 100644 index 00000000..9212fa95 --- /dev/null +++ b/test/issues/1111/CT_004.c @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_004" + +int main(int argc, char **argv) +{ + int rc = 0; + int status; + int tmp_flag = 0; + struct sigaction old_act; + + printf("*** %s start *******************************\n", TEST_NAME); + + rc = sigaction(SIGKILL, NULL, &old_act); + OKNG(rc != 0, "sigaction to get SIGKILL action"); + + if (old_act.sa_handler == SIG_DFL) { + tmp_flag = 1; + } + OKNG(tmp_flag != 1, "check SIGKILL act"); + + rc = sigaction(SIGSTOP, NULL, &old_act); + OKNG(rc != 0, "sigaction to get SIGSTOP action"); + + tmp_flag = 0; + if (old_act.sa_handler == SIG_DFL) { + tmp_flag = 1; + } + OKNG(tmp_flag != 1, "check SIGSTOP act"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1111/CT_005.c b/test/issues/1111/CT_005.c new file mode 100644 index 00000000..92f1734b --- /dev/null +++ b/test/issues/1111/CT_005.c @@ -0,0 +1,37 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_005" + +int main(int argc, char **argv) +{ + int rc = 0; + int status; + + printf("*** %s start *******************************\n", TEST_NAME); + + rc = sigaction(SIGUSR1, NULL, NULL); + OKNG(rc != 0, "SIGUSR1 is valid"); + + rc = sigaction(SIGKILL, NULL, NULL); + OKNG(rc != 0, "SIGKILL is valid"); + + rc = sigaction(SIGSTOP, NULL, NULL); + OKNG(rc != 0, "SIGSTOP is valid"); + + rc = sigaction(_NSIG, NULL, NULL); + OKNG(rc != -1, "_NSIG is invalid"); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1111/Makefile b/test/issues/1111/Makefile new file mode 100644 index 00000000..c863d42e --- /dev/null +++ b/test/issues/1111/Makefile @@ -0,0 +1,29 @@ +CC = gcc +TARGET=CT_001 CT_002 CT_003 CT_004 CT_005 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_003: CT_003.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_004: CT_004.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_005: CT_005.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1111.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1111/README b/test/issues/1111/README new file mode 100644 index 00000000..f794dd4c --- /dev/null +++ b/test/issues/1111/README @@ -0,0 +1,44 @@ +【Issue#1111 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-rt_sigaction.005 による確認 + テストが正常に終了し、「RESULT: ok」が出力されることを確認する + +2. 既存のsigaction機能に影響がないことを確認 +CT_001: 設定中のハンドラ情報の取得 (上書き時) + 1. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 2. sigaction(SIGUSR1, NULL, &act) で設定情報を取得できることを確認する + 3. SIG_RESETHANDを指定しないsigaction()でSIG_USR1にデフォルトハンドラを設定 + 4. sigaction(SIGUSR1, NULL, &act) で3.で指定した設定情報を取得できることを確認する + +CT_002: 設定中のハンドラ情報の取得 (デフォルトに戻った時) + 1. SIG_RESETHANDを指定したsigaction()でSIG_USR1にハンドラを設定 + 2. 自身にSIGUSR1を送る + 3. 1.で登録したハンドラが呼び出される + 4. sigaction(SIGUSR1, NULL, &act) で設定情報を取得できることを確認する + +CT_003: 不正なsig_numへのハンドラ登録 + 1. 範囲外(上限、下限)のsignumへのハンドラ登録が失敗する + 2. SIGKILL, SIGSTOPへのハンドラ登録が失敗する + +CT_004: SIGKILL, SIGSTOPのハンドラ情報の取得 + 1. sigaction(SIGKILL, NULL, &act) で設定情報を取得できる + 2. sigaction(SIGSTOP, NULL, &act) で設定情報を取得できる + +CT_005: sig_numの有効確認 + 1. sigaction(SIGUSR1, NULL, NULL) が成功する(有効) + 2. sigaction(SIGKILL, NULL, NULL) が成功する(有効) + 3. sigaction(SIGSTOP, NULL, NULL) が成功する(有効) + 4. sigaction(_NSIG, NULL, NULL) が失敗する(無効) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1111/result.log b/test/issues/1111/result.log new file mode 100644 index 00000000..e8da9629 --- /dev/null +++ b/test/issues/1111/result.log @@ -0,0 +1,45 @@ +*** RT_001 start ******************************* +TEST_SUITE: rt_sigaction +TEST_NUMBER: 5 +ARGS: +sigaction(-1) = -1 (errno=22) +sigaction(65) = -1 (errno=22) +sigaction(9) = -1 (errno=22) +sigaction(19) = -1 (errno=22) +RESULT: ok +*** RT_001: PASSED + +*** CT_001 start ******************************* + [OK] sigaction with SA_RESETHAND + [OK] sigaction to get current action + [OK] check current act +*** CT_001 PASSED + +*** CT_002 start ******************************* + [OK] sigaction with SA_RESETHAND + send 1st SIGUSR1 + [OK] invoked test_handler + [OK] sigaction to get current action + [OK] check current act (after reset) +*** CT_002 PASSED + +*** CT_003 start ******************************* + [OK] sigaction 0 failed + [OK] sigaction _NSIG failed + [OK] sigaction SIGKILL failed + [OK] sigaction SIGSTOP failed +*** CT_003 PASSED + +*** CT_004 start ******************************* + [OK] sigaction to get SIGKILL action + [OK] check SIGKILL act + [OK] sigaction to get SIGSTOP action + [OK] check SIGSTOP act +*** CT_004 PASSED + +*** CT_005 start ******************************* + [OK] SIGUSR1 is valid + [OK] SIGKILL is valid + [OK] SIGSTOP is valid + [OK] _NSIG is invalid +*** CT_005 PASSED diff --git a/test/issues/1111/test_chk.h b/test/issues/1111/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1111/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1112/C1112.sh b/test/issues/1112/C1112.sh new file mode 100644 index 00000000..626f670b --- /dev/null +++ b/test/issues/1112/C1112.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_${tid} start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s mremap_mmap_anon -n 1 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_${tid}: PASSED" +else + echo "*** RT_${tid}: FAILED" +fi +echo "" + +sudo $BINDIR/mcexec ./CT_001 +sudo $BINDIR/mcexec ./CT_002 +sudo $BINDIR/mcexec ./CT_003 +sudo $BINDIR/mcexec ./CT_004 +sudo $BINDIR/mcexec ./CT_005 + diff --git a/test/issues/1112/CT_001.c b/test/issues/1112/CT_001.c new file mode 100644 index 00000000..df6c803a --- /dev/null +++ b/test/issues/1112/CT_001.c @@ -0,0 +1,43 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <errno.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_001" +#define MAP_SIZE 0x2000000 + +int main(int argc, char **argv) +{ + void *map, *remap; + int __errno; + + printf("*** %s start *******************************\n", TEST_NAME); + + map = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + OKNG(map == MAP_FAILED, "mmap returned :%p", map); + + errno = 0; + remap = mremap(map, MAP_SIZE, 0x3000000, MREMAP_MAYMOVE); + __errno = errno; + + OKNG(remap == MAP_FAILED, "mremap returned :%p" + " (expect return is valid addr)", remap); + OKNG(__errno != 0, "errno after mremap :%d" + " (expect error is 0)", __errno); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1112/CT_002.c b/test/issues/1112/CT_002.c new file mode 100644 index 00000000..7bade138 --- /dev/null +++ b/test/issues/1112/CT_002.c @@ -0,0 +1,43 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <errno.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_002" +#define MAP_SIZE 0x2000000 + +int main(int argc, char **argv) +{ + void *map, *remap; + int __errno; + + printf("*** %s start *******************************\n", TEST_NAME); + + map = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + OKNG(map == MAP_FAILED, "mmap returned :%p", map); + + errno = 0; + remap = mremap(map, MAP_SIZE, 0x800000, 0); + __errno = errno; + + OKNG(remap != map, "mremap returned :%p" + " (expect return is %p)", remap, map); + OKNG(__errno != 0, "errno after mremap :%d" + " (expect error is 0)", __errno); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1112/CT_003.c b/test/issues/1112/CT_003.c new file mode 100644 index 00000000..9d8ad623 --- /dev/null +++ b/test/issues/1112/CT_003.c @@ -0,0 +1,43 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <errno.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_003" +#define MAP_SIZE 0x2000000 + +int main(int argc, char **argv) +{ + void *map, *remap; + int __errno; + + printf("*** %s start *******************************\n", TEST_NAME); + + map = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + OKNG(map == MAP_FAILED, "mmap returned :%p", map); + + errno = 0; + remap = mremap(map, 0xffffffffffffe000, 0xffffffffffffe000, 0); + __errno = errno; + + OKNG(remap != map, "mremap returned :%p" + " (expect return is %p)", remap, map); + OKNG(__errno != 0, "errno after mremap :%d" + " (expect error is 0)", __errno); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1112/CT_004.c b/test/issues/1112/CT_004.c new file mode 100644 index 00000000..2abf86ee --- /dev/null +++ b/test/issues/1112/CT_004.c @@ -0,0 +1,44 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <errno.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_004" +#define MAP_SIZE 0x2000000 + +int main(int argc, char **argv) +{ + void *map, *remap; + int __errno; + + printf("*** %s start *******************************\n", TEST_NAME); + + map = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + OKNG(map == MAP_FAILED, "mmap returned :%p", map); + + errno = 0; + remap = mremap(map, 0xffffffffffffe000, 0xffffffffffffe000, + MREMAP_MAYMOVE | MREMAP_FIXED, map + 0x40000000); + __errno = errno; + + OKNG(remap != MAP_FAILED, "mremap returned :%p" + " (expect return is MAP_FAILED)", remap); + OKNG(__errno != EINVAL, "errno after mremap :%d" + " (expect error is EINVAL(%d))", __errno, EINVAL); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1112/CT_005.c b/test/issues/1112/CT_005.c new file mode 100644 index 00000000..87d254d7 --- /dev/null +++ b/test/issues/1112/CT_005.c @@ -0,0 +1,43 @@ +#define _GNU_SOURCE 1 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <errno.h> +#include "./test_chk.h" + +#define TEST_NAME "CT_005" +#define MAP_SIZE 0x2000000 + +int main(int argc, char **argv) +{ + void *map, *remap; + int __errno; + + printf("*** %s start *******************************\n", TEST_NAME); + + map = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + OKNG(map == MAP_FAILED, "mmap returned :%p", map); + + errno = 0; + remap = mremap(map, MAP_SIZE, 0xffffffffffffe000, MREMAP_MAYMOVE); + __errno = errno; + + OKNG(remap != MAP_FAILED, "mremap returned :%p" + " (expect return is MAP_FAILED)", remap); + OKNG(__errno != ENOMEM, "errno after mremap :%d" + " (expect error is ENOMEM(%d))", __errno, ENOMEM); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/1112/Makefile b/test/issues/1112/Makefile new file mode 100644 index 00000000..aaf4b6f0 --- /dev/null +++ b/test/issues/1112/Makefile @@ -0,0 +1,29 @@ +CC = gcc +TARGET=CT_001 CT_002 CT_003 CT_004 CT_005 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_003: CT_003.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_004: CT_004.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_005: CT_005.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1112.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1112/README b/test/issues/1112/README new file mode 100644 index 00000000..ad724f25 --- /dev/null +++ b/test/issues/1112/README @@ -0,0 +1,69 @@ +【Issue#1112 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-rt_sigaction.004 による確認 + テストが正常に終了し、「RESULT: ok」が出力される + +2. 既存のmremap機能に影響がないことを確認 +CT_001: mremapによる領域の拡張 + 1. mmap で32MBの領域を確保 + 2. 下記条件でmremapを実行 + - old_addr : mmapで確保したアドレス + - old_size : 0x2000000 (32MB) + - new_size : 0x3000000 (64MB) + - flags : MREMAP_MAYMOVE + 3. mremap がMAP_FAILED以外の値を返す + 4. mremap がerrnoを設定しない + +CT_002: mremapによる領域の縮小 + 1. mmap で32MBの領域を確保 + 2. 下記条件でmremapを実行 + - old_addr : mmapで確保したアドレス + - old_size : 0x2000000 (32MB) + - new_size : 0x800000 (8MB) + - flags : 0 + 3. mremap がold_addr に指定した値を返す + 4. mremap がerrnoを設定しない + +CT_003: old/new_size 不正時の動作 + 1. mmap で32MBの領域を確保 + 2. 下記条件でmremapを実行 + - old_addr : mmapで確保したアドレス + - old_size : 0xffffffffffffe000 (remap後の領域がアドレス空間の最大値を超える値) + - new_size : 0xffffffffffffe000 (remap後の領域がアドレス空間の最大値を超える値) + - flags : 0 + 3. mremap がold_addr に指定した値を返す + 4. mremap がerrnoを設定しない + +CT_004: old/new_size 不正時の動作 + 1. mmap で32MBの領域を確保 + 2. 下記条件でmremapを実行 + - old_addr : mmapで確保したアドレス + - old_size : 0xffffffffffffe000 (remap後の領域がアドレス空間の最大値を超える値) + - new_size : 0xffffffffffffe000 (remap後の領域がアドレス空間の最大値を超える値) + - flags : MREMAP_MAYMOVE | MREMAP_FIXED + - new_addr : mmapで確保したアドレス + 0x40000000 (1GB) + 3. mremap がMAP_FAILED を返す + 4. mremap がerrnoにEINVALを設定する + +CT_005: old/new_size 不正時の動作 + 1. mmap で32MBの領域を確保 + 2. 下記条件でmremapを実行 + - old_addr : mmapで確保したアドレス + - old_size : 0x2000000 (32MB) + - new_size : 0xffffffffffffe000 (remap後の領域がアドレス空間の最大値を超える値) + - flags : MREMAP_MAYMOVE + 3. mremap がMAP_FAILED を返す + 4. mremap がerrnoにENOMEMを設定する + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1112/result.log b/test/issues/1112/result.log new file mode 100644 index 00000000..6c835e8a --- /dev/null +++ b/test/issues/1112/result.log @@ -0,0 +1,36 @@ +*** RT_001 start ******************************* +TEST_SUITE: mremap_mmap_anon +TEST_NUMBER: 1 +ARGS: +RESULT: ok +*** RT_001: PASSED + +*** CT_001 start ******************************* + [OK] mmap returned :0x2aaaab200000 + [OK] mremap returned :0x2aaaab200000 (expect return is valid addr) + [OK] errno after mremap :0 (expect error is 0) +*** CT_001 PASSED + +*** CT_002 start ******************************* + [OK] mmap returned :0x2aaaab200000 + [OK] mremap returned :0x2aaaab200000 (expect return is 0x2aaaab200000) + [OK] errno after mremap :0 (expect error is 0) +*** CT_002 PASSED + +*** CT_003 start ******************************* + [OK] mmap returned :0x2aaaab200000 + [OK] mremap returned :0x2aaaab200000 (expect return is 0x2aaaab200000) + [OK] errno after mremap :0 (expect error is 0) +*** CT_003 PASSED + +*** CT_004 start ******************************* + [OK] mmap returned :0x2aaaab200000 + [OK] mremap returned :0xffffffffffffffff (expect return is MAP_FAILED) + [OK] errno after mremap :22 (expect error is EINVAL(22)) +*** CT_004 PASSED + +*** CT_005 start ******************************* + [OK] mmap returned :0x2aaaab200000 + [OK] mremap returned :0xffffffffffffffff (expect return is MAP_FAILED) + [OK] errno after mremap :12 (expect error is ENOMEM(12)) +*** CT_005 PASSED diff --git a/test/issues/1112/test_chk.h b/test/issues/1112/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1112/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1121/C1121.sh b/test/issues/1121/C1121.sh index e4baaa81..9e45eb25 100644 --- a/test/issues/1121/C1121.sh +++ b/test/issues/1121/C1121.sh @@ -1,64 +1,13 @@ #!/bin/sh -BIN= -SBIN= -OSTEST= -LTP= -BOOTPARAM="-c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24" -if [ -f ../../../config.h ]; then - str=`grep "^#define BINDIR " ../../../config.h | head -1 | sed 's/^#define BINDIR /BINDIR=/'` - eval $str -fi -if [ "x$BINDIR" = x ];then - BINDIR="$BIN" -fi +USELTP=1 +USEOSTEST=1 -if [ -f ../../../Makefile ]; then - str=`grep ^SBINDIR ../../../Makefile | head -1 | sed 's/ //g'` - eval $str -fi -if [ "x$SBINDIR" = x ];then - SBINDIR="$SBIN" -fi - -if [ -f $HOME/ltp/testcases/bin/sched_setaffinity01 ]; then - LTPDIR=$HOME/ltp/testcases -fi -if [ "x$LTPDIR" = x ]; then - LTPDIR="$LTP" -fi - -if [ -f $HOME/ostest/bin/test_mck ]; then - OSTESTDIR=$HOME/ostest/ -fi -if [ "x$OSTESTDIR" = x ]; then - OSTESTDIR="$OSTEST" -fi - -if [ ! -x $SBINDIR/mcstop+release.sh ]; then - echo mcstop+releas: not found >&2 - exit 1 -fi -echo -n "mcstop+release.sh ... " -sudo $SBINDIR/mcstop+release.sh -echo "done" - -if [ ! -x $SBINDIR/mcreboot.sh ]; then - echo mcreboot: not found >&2 - exit 1 -fi -echo -n "mcreboot.sh $BOOTPARAM ... " -sudo $SBINDIR/mcreboot.sh $BOOTPARAM -echo "done" - -if [ ! -x $BINDIR/mcexec ]; then - echo mcexec: not found >&2 - exit 1 -fi +. ../../common.sh tid=001 echo "*** RT_$tid start *******************************" -sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s sched_setaffinity -n 0 -- -p 20 2>&1 | tee ./RT_${tid}.txt +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 0 -- -p 20 2>&1 | tee ./RT_${tid}.txt if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then echo "*** RT_$tid: PASSED" else @@ -68,7 +17,7 @@ echo "" tid=002 echo "*** RT_$tid start *******************************" -sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s sched_setaffinity -n 1 -- -p 20 2>&1 | tee ./RT_${tid}.txt +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 1 -- -p 20 2>&1 | tee ./RT_${tid}.txt if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then echo "*** RT_$tid: PASSED" else @@ -78,7 +27,7 @@ echo "" tid=003 echo "*** RT_$tid start *******************************" -sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s sched_getaffinity -n 3 -- -p 20 2>&1 | tee ./RT_${tid}.txt +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 3 -- -p 20 2>&1 | tee ./RT_${tid}.txt if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then echo "*** RT_$tid: PASSED" else @@ -88,7 +37,7 @@ echo "" tid=004 echo "*** RT_$tid start *******************************" -sudo $BINDIR/mcexec $OSTESTDIR/bin/test_mck -s sched_getaffinity -n 5 -- -p 20 2>&1 | tee ./RT_${tid}.txt +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 5 -- -p 20 2>&1 | tee ./RT_${tid}.txt if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then echo "*** RT_$tid: PASSED" else @@ -98,7 +47,7 @@ echo "" tid=001 echo "*** LT_$tid start *******************************" -sudo $BINDIR/mcexec $LTPDIR/bin/sched_setaffinity01 2>&1 | tee ./LT_${tid}.txt +sudo ${MCEXEC} ${LTPBIN}/sched_setaffinity01 2>&1 | tee ./LT_${tid}.txt ok=`grep TPASS LT_${tid}.txt | wc -l` ng=`grep TFAIL LT_${tid}.txt | wc -l` if [ $ng = 0 ]; then diff --git a/test/issues/1121/README b/test/issues/1121/README index 2a1d7bfb..fa276740 100644 --- a/test/issues/1121/README +++ b/test/issues/1121/README @@ -25,11 +25,10 @@ LT_001: ltp-sched_setaffinity01 □ 実行手順 $ make test -実行できない場合は、C1121.shの以下の行を適切に書き換えた後に実行。 -BIN= mcexec が存在するパス -SBIN= mcreboot.sh が存在するパス -OSTEST= OSTESTが存在するパス -LTP= LTPが存在するパス +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する □ 実行結果 result.log 参照。 diff --git a/test/issues/1122/C1122.sh b/test/issues/1122/C1122.sh new file mode 100644 index 00000000..6df6adc7 --- /dev/null +++ b/test/issues/1122/C1122.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 1 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=002 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 0 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=003 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 2 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=004 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 3 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=005 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_getaffinity -n 5 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep "RESULT: ok" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" diff --git a/test/issues/1122/Makefile b/test/issues/1122/Makefile new file mode 100644 index 00000000..1246e9f4 --- /dev/null +++ b/test/issues/1122/Makefile @@ -0,0 +1,14 @@ +CC = gcc +TARGET= + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +test: all + @sh ./C1122.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1122/README b/test/issues/1122/README new file mode 100644 index 00000000..a6e192ac --- /dev/null +++ b/test/issues/1122/README @@ -0,0 +1,34 @@ +【Issue#1122 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest-sched_getaffinity.001 + affinityマスクの情報を受け取る第3引数に不正なアドレスを指定した場合、 + -1が返り、errnoにEFAULTが設定されることを確認 + +2. 既存のsched_setaffinity機能に影響がないことをOSTESTを用いて確認 +RT_002: ostest_schedgetaffinity.000 + 自プロセスのaffinityマスクを取得し、実行中のCPUがaffinityマスクに + 含まれていることを確認 + +RT_003: ostest_schedgetaffinity.002 + cpusetsizeに不正な値(0) を指定した場合、 + -1が返り、errnoにEINVALが設定されることを確認 + +RT_004: ostest_schedgetaffinity.003 + pidに存在しないプロセスIDを指定した場合、 + -1が返り、errnoにESRCHが設定されることを確認 + +RT_005: ostest_schedgetaffinity.005 + 子プロセスが親プロセスのaffinityマスクを引き継いでいることを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1122/result.log b/test/issues/1122/result.log new file mode 100644 index 00000000..71ca41d1 --- /dev/null +++ b/test/issues/1122/result.log @@ -0,0 +1,37 @@ +*** RT_001 start ******************************* +TEST_SUITE: sched_getaffinity +TEST_NUMBER: 1 +ARGS: +sched_getaffinity result:-1, errno:14 (expect error is "EFAULT"=14) +RESULT: ok +*** RT_001: PASSED + +*** RT_002 start ******************************* +TEST_SUITE: sched_getaffinity +TEST_NUMBER: 0 +ARGS: -p 20 +RESULT: ok +*** RT_002: PASSED + +*** RT_003 start ******************************* +TEST_SUITE: sched_getaffinity +TEST_NUMBER: 2 +ARGS: -p 20 +sched_getaffinity result:-1, errno:22 (expect error is "EINVAL"=22) +RESULT: ok +*** RT_003: PASSED + +*** RT_004 start ******************************* +TEST_SUITE: sched_getaffinity +TEST_NUMBER: 3 +ARGS: -p 20 +sched_getaffinity result:-1, errno:3 (expect error is "ESRCH"=3) +RESULT: ok +*** RT_004: PASSED + +*** RT_005 start ******************************* +TEST_SUITE: sched_getaffinity +TEST_NUMBER: 5 +ARGS: -p 20 +RESULT: ok +*** RT_005: PASSED diff --git a/test/issues/1141/1141_lin.c b/test/issues/1141/1141_lin.c new file mode 100644 index 00000000..f71bb582 --- /dev/null +++ b/test/issues/1141/1141_lin.c @@ -0,0 +1,88 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include "ihklib.h" + +#define Q(x) #x +#define QUOTE(x) Q(x) + +#define _OKNG(verb, cond, fmt, args...) do { \ + if (cond) { \ + if (verb) \ + printf("[OK] " fmt "\n", ##args); \ + } else { \ + printf("[NG] " fmt "\n", ##args); \ + goto out; \ + } \ +} while (0) + +#define OKNG(args...) _OKNG(1, ##args) +#define NG(args...) _OKNG(0, ##args) + +#define CVAL(event, mask) \ + ((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff)) + +#define CVAL2(event, mask, inv, count) \ + ((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff) | \ + ((inv & 1) << 23) | ((count & 0xff) << 24)) + +/* Intel Nehalem specific */ +struct ihk_perf_event_attr attr[] = { + { .config = CVAL(0xc0, 0x00), .exclude_kernel = 1 }, /* INSTRUCTIONS */ + { .config = CVAL(0x3c, 0x00), .exclude_kernel = 1 }, /* CYCLE */ + { .config = CVAL(0x43, 0x01), .exclude_kernel = 1 }, /* L1D_REQUEST */ + { .config = CVAL(0x51, 0x01), .exclude_kernel = 1 }, /* L1D_MISS */ + { .config = CVAL(0x80, 0x03), .exclude_kernel = 1 }, /* L1I_REQUEST */ + { .config = CVAL(0x80, 0x02), .exclude_kernel = 1 }, /* L1I_MISS */ + { .config = CVAL(0x24, 0xaa), .exclude_kernel = 1 }, /* L2_MISS */ + { .config = CVAL(0x2e, 0x41), .exclude_kernel = 1 }, /* LLC_MISS */ + { .config = CVAL(0x49, 0x01), .exclude_kernel = 1 }, /* DTLB_MISS */ + { .config = CVAL(0x85, 0x01), .exclude_kernel = 1 }, /* ITLB_MISS */ + { .config = CVAL2(0x0e, 0x01, 1, 1), .exclude_kernel = 1 }, /* STALL */ +}; + +int main(int argc, char **argv) +{ + int ret; + int i, j; + int event_num = 0; + char *prefix = QUOTE(MCKDIR); + char cmd[1024]; + + unsigned long counter[4] = {0, 0, 0, 0}; + + // INVALID index + ret = ihk_os_setperfevent(99, attr, 1); + OKNG(ret < 0, "INVALID index ret: %d", ret); + + // event_num 1 + for (j = 1; j < 5; j++) { + printf("*** event_num %d ***************************\n", j); + ret = ihk_os_setperfevent(0, attr, j); + OKNG(ret == j, "setperfevent ret: %d", ret); + + event_num = ret; + + ret = ihk_os_perfctl(0, PERF_EVENT_ENABLE); + OKNG(ret == 0, "ENABLE ret: %d", ret); + + sprintf(cmd, "%s/bin/mcexec ./1141_mck > /dev/null", prefix); + ret = system(cmd); + OKNG(ret == 0, "system mcexec"); + + ret = ihk_os_perfctl(0, PERF_EVENT_DISABLE); + OKNG(ret == 0, "DISABLE ret: %d", ret); + + ret = ihk_os_getperfevent(0, counter, event_num); + OKNG(ret == 0, "getperfevent ret: %d", ret); + + for (i = 0; i < event_num; i++) { + printf("read_value[%d] %ld\n", i, counter[i]); + } + } + ret = 0; +out: + return ret; +} diff --git a/test/issues/1141/1141_mck.c b/test/issues/1141/1141_mck.c new file mode 100644 index 00000000..0bcd9825 --- /dev/null +++ b/test/issues/1141/1141_mck.c @@ -0,0 +1,143 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <unistd.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + + +#define NTHR 1 + +struct thr_arg { + unsigned long delay; +}; + +struct thr_arg thr_arg[NTHR] = { { .delay = 1000000 } }; +pthread_t thr[NTHR]; + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + \ + (end.tv_nsec - start.tv_nsec)) +#define TIMER_KIND CLOCK_MONOTONIC_RAW /* CLOCK_THREAD_CPUTIME_ID */ + +static int print_cpu_last_executed_on(void) +{ + int ret = 0; + char fn[256]; + char *result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int amount = 0; + char *list; + char *token; + int i; + int cpu; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + fd = open(fn, O_RDONLY); + if (fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if (result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + offset = 0; + while (1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if (amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if (amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + list = result; + for (i = 0; i < 39; i++) { + token = strsep(&list, " "); + } + + cpu = sched_getcpu(); + if (cpu == -1) { + printf("getcpu() failed\n"); + goto fn_fail; + } + + printf("[INFO] stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", + token ? atoi(token) : -1, cpu, tid); + fn_exit: + free(result); + return ret; + fn_fail: + ret = -1; + goto fn_exit; +} + +static inline void asm_loop(unsigned long n) +{ + int j; + + for (j = 0; j < (n); j++) { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); + } +} + +void *util_thread(void *_arg) +{ + struct thr_arg *arg = (struct thr_arg *)_arg; + + print_cpu_last_executed_on(); + asm_loop(arg->delay); + pthread_exit(NULL); +} + +int main(int argc, char **argv) +{ + int i; + int ret = 0; + + for (i = 0; i < NTHR; i++) { + + if ((ret = pthread_create(&thr[i], NULL, + util_thread, &thr_arg[i]))) { + fprintf(stderr, "ERROR: pthread_create failed (%d)\n", + ret); + ret = -EINVAL; + goto out; + } + } + + for (i = 0; i < NTHR; i++) { + pthread_join(thr[i], NULL); + } + + out: + return ret; +} + diff --git a/test/issues/1141/Makefile b/test/issues/1141/Makefile new file mode 100644 index 00000000..dd88b496 --- /dev/null +++ b/test/issues/1141/Makefile @@ -0,0 +1,40 @@ +.SUFFIXES: # Clear suffixes +.SUFFIXES: .c + +MCKDIR=$(HOME)/project/os/install +CC=gcc + +CPPFLAGS_LIN=-I$(MCKDIR)/include -DMCKDIR=$(MCKDIR) +CCFLAGS_LIN=-g -Wall +LDFLAGS_LIN=-L$(MCKDIR)/lib -lihk -Wl,-rpath -Wl,$(MCKDIR)/lib -lbfd +SRCS_LIN=$(shell ls *_lin.c) +EXES_LIN=$(SRCS_LIN:.c=) +OBJS_LIN=$(SRCS_LIN:.c=.o) + +CPPFLAGS_MCK = +CCFLAGS_MCK=-g -Wall +LDFLAGS_MCK=-lpthread +SRCS_MCK=$(shell ls *_mck.c) +EXES_MCK=$(SRCS_MCK:.c=) +OBJS_MCK=$(SRCS_MCK:.c=.o) + +all: $(EXES_LIN) $(EXES_MCK) + +test: $(EXES_LIN) $(EXES_MCK) + sudo ./1141_lin + +%_lin: %_lin.o + $(CC) -o $@ $^ $(LDFLAGS_LIN) + +%_lin.o: %_lin.c + $(CC) $(CCFLAGS_LIN) $(CPPFLAGS_LIN) -c $< + +%_mck: %_mck.o + $(CC) -o $@ $^ $(LDFLAGS_MCK) + +%_mck.o: %_mck.c + $(CC) $(CCFLAGS_MCK) $(CPPFLAGS_MCK) -c $< + +clean: + rm -f core $(EXES_LIN) $(OBJS_LIN) $(EXES_MCK) $(OBJS_MCK) + diff --git a/test/issues/1141/README b/test/issues/1141/README new file mode 100644 index 00000000..a3d9ca6e --- /dev/null +++ b/test/issues/1141/README @@ -0,0 +1,13 @@ +=========== +How to test +=========== +(1) Edit the following line in Makefile + + MCKDIR=$(HOME)/project/os/install + +(2) make test +(3) Check if the first value, number of instructions retired, + shows around 3 million. +(4) Check if the second value, number of cycles taken, + shows around 3 million / instructions-per-cycle + (1 - 1.5, depends on the processor). diff --git a/test/issues/1158/C1158.sh b/test/issues/1158/C1158.sh new file mode 100644 index 00000000..774b9aea --- /dev/null +++ b/test/issues/1158/C1158.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=0 + +. ../../common.sh + +tid=001 +echo "*** CT_$tid start *******************************" +tgt_file=`find /sys/devices/ -name local_cpus | head -n 1` +echo "[Linux ] cat ${tgt_file}" +cat ${tgt_file} | tee ./CT_${tid}_lnx.txt +echo "[McKernel] mcexec cat ${tgt_file}" +${MCEXEC} cat ${tgt_file} | tee ./CT_${tid}_mck.txt + +diff ./CT_${tid}_lnx.txt ./CT_${tid}_mck.txt &> /dev/null + +if [ $? == 0 ]; then + echo "[OK] local_cpus is same between Linux and McKernel" + echo "*** CT_$tid: PASSED" +else + echo "[NG] local_cpus is NOT same between Linux and McKernel" + echo "*** CT_$tid: FAILED" +fi +echo "" + +tid=002 +echo "*** CT_$tid start *******************************" +tgt_file=`find /sys/devices/ -name local_cpulist | head -n 1` +echo "[Linux ] cat ${tgt_file}" +cat ${tgt_file} | tee ./CT_${tid}_lnx.txt +echo "[McKernel] mcexec cat ${tgt_file}" +${MCEXEC} cat ${tgt_file} | tee ./CT_${tid}_mck.txt + +diff ./CT_${tid}_lnx.txt ./CT_${tid}_mck.txt &> /dev/null + +if [ $? == 0 ]; then + echo "[OK] local_cpulist is same between Linux and McKernel" + echo "*** CT_$tid: PASSED" +else + echo "[NG] local_cpulist is NOT same between Linux and McKernel" + echo "*** CT_$tid: FAILED" +fi +echo "" + diff --git a/test/issues/1158/Makefile b/test/issues/1158/Makefile new file mode 100644 index 00000000..b6c65539 --- /dev/null +++ b/test/issues/1158/Makefile @@ -0,0 +1,14 @@ +CC = gcc +TARGET= + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +test: all + @sh ./C1158.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1158/README b/test/issues/1158/README new file mode 100644 index 00000000..321fb329 --- /dev/null +++ b/test/issues/1158/README @@ -0,0 +1,32 @@ +【Issue#1158 動作確認】 +□ テスト内容 +Issueで報告された症状は現在発生しない。 +理由は、setup_pci_files()が実行されないためである。 + +なお、setup_pci_files()はsysfsのPCIリソースへのインタフェースのうち、 +local_cpus, local_cpulist のMcKernelでのビューを作成する目的で作成されたが、 +正常に動作しないためコメントアウトされている。 + +McKernelからのlocal_cpus, local_cpulistを参照した場合、 +Linuxと同様の情報が参照されることを確認する。 + +1. テストプログラムによる確認 +CT_001: local_cpus の参照 + /sys/devices/ 配下のlocal_cpusについて、 + Linuxと同様の情報が参照できることを確認する + +CT_002: local_cpulist の参照 + /sys/devices/ 配下のlocal_cpulistについて、 + Linuxと同様の情報が参照できることを確認する + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1158/result.log b/test/issues/1158/result.log new file mode 100644 index 00000000..de8ab128 --- /dev/null +++ b/test/issues/1158/result.log @@ -0,0 +1,15 @@ +*** CT_001 start ******************************* +[Linux ] cat /sys/devices/pci0000:00/0000:00:00.0/local_cpus +00010001 +[McKernel] mcexec cat /sys/devices/pci0000:00/0000:00:00.0/local_cpus +00010001 +[OK] local_cpus is same between Linux and McKernel +*** CT_001: PASSED + +*** CT_002 start ******************************* +[Linux ] cat /sys/devices/pci0000:00/0000:00:00.0/local_cpulist +0,16 +[McKernel] mcexec cat /sys/devices/pci0000:00/0000:00:00.0/local_cpulist +0,16 +[OK] local_cpulist is same between Linux and McKernel +*** CT_002: PASSED diff --git a/test/issues/1164/C1164.sh b/test/issues/1164/C1164.sh new file mode 100644 index 00000000..bb358951 --- /dev/null +++ b/test/issues/1164/C1164.sh @@ -0,0 +1,52 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +sudo /bin/sh ${OSTESTDIR}/util/insmod_test_drv.sh +${MCEXEC} ./CT_001 +sudo /bin/sh ${OSTESTDIR}/util/rmmod_test_drv.sh + + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s procfs -n 0 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=002 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s procfs -n 1 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=003 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s procfs -n 3 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=004 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s procfs -n 6 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + diff --git a/test/issues/1164/CT_001.c b/test/issues/1164/CT_001.c new file mode 100644 index 00000000..382f5ae0 --- /dev/null +++ b/test/issues/1164/CT_001.c @@ -0,0 +1,81 @@ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include "./test_chk.h" + +#define TEST_NAME "CT_001" + +#define PROCFILE_LEN 128 +#define MAP_LEN 4096 +#define DEV_NAME "/dev/test_mck/mmap_dev2" + +int main(int argc, char *argv[]) +{ + int dev_fd = 0, fd = 0, i = 0; + pid_t pid = getpid(); + char pfname[PROCFILE_LEN]; + void *dev_map = NULL; + unsigned long *read_buf = NULL; + off_t ret = 0; + + printf("*** %s start *******************************\n", TEST_NAME); + + /* open device file */ + dev_fd = open(DEV_NAME, O_RDONLY); + OKNG(dev_fd < 0, "open test_device_file:%s", DEV_NAME); + + /* mmap device file */ + dev_map = mmap(NULL, MAP_LEN, PROT_READ, MAP_SHARED, dev_fd, 0); + OKNG(dev_map == MAP_FAILED, "mmap device file"); + printf(" map dev_file to %p\n", dev_map); + + /* allocate read_buf */ + read_buf = malloc(MAP_LEN); + CHKANDJUMP(read_buf == NULL, "malloc read_buf"); + + /* generate proc_mem path */ + sprintf(pfname, "/proc/%d/mem", pid); + + /* open proc_mem */ + fd = open(pfname, O_RDONLY); + CHKANDJUMP(fd < 0, "open proc_mem"); + + /* lseek */ + ret = lseek(fd, (off_t)dev_map, SEEK_SET); + CHKANDJUMP(ret == -1, "lseek"); + + /* read */ + ret = read(fd, read_buf, MAP_LEN); + OKNG(ret != -1 || errno != EIO, "failed to read host's phys_memory"); + + free(read_buf); + close(dev_fd); + close(fd); + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + + if (read_buf) { + free(read_buf); + } + if (dev_fd > 0) { + close(dev_fd); + } + if (fd > 0) { + close(fd); + } + + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; + +} diff --git a/test/issues/1164/Makefile b/test/issues/1164/Makefile new file mode 100644 index 00000000..bb54bae0 --- /dev/null +++ b/test/issues/1164/Makefile @@ -0,0 +1,17 @@ +CC = gcc +TARGET=CT_001 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1164.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1164/README b/test/issues/1164/README new file mode 100644 index 00000000..8c1a95ea --- /dev/null +++ b/test/issues/1164/README @@ -0,0 +1,29 @@ +【Issue#1164 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +CT_001: Linux管理メモリ領域への/proc/<PID>/mem のread + Linux管理メモリ領域を持つデバイスファイルをマップする + 対象領域に対して/proc/<PID>/mem を介してreadし、 + readが失敗し、errnoにEIOが設定されていることを確認する + +2. 既存のprocfs機能に影響がないことをOSTESTを用いて確認 +RT_001: ostest_procfs.000 + /proc/<PID>/auxv の内容を取得できることを確認 +RT_002: ostest_procfs.001 + /proc/<PID>/mem の内容を取得できることを確認 +RT_003: ostest_procfs.003 + /proc/<PID>/stat の内容を取得できることを確認 +RT_004: ostest_procfs.006 + /proc/<PID>/cpuinfo の内容を取得できることを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1164/result.log b/test/issues/1164/result.log new file mode 100644 index 00000000..8ab1722c --- /dev/null +++ b/test/issues/1164/result.log @@ -0,0 +1,169 @@ +insmod /home/satoken/ostest/util/../bin/test_mck.ko +mknod: ‘/dev/test_mck/mmap_dev’: File exists +create character device /dev/test_mck/mmap_dev(244:0) +mknod: ‘/dev/test_mck/mmap_dev2’: File exists +create character device /dev/test_mck/mmap_dev2(244:1) +*** CT_001 start ******************************* + [OK] open test_device_file:/dev/test_mck/mmap_dev2 + [OK] mmap device file + map dev_file to 0x2aaaab014000 + [OK] failed to read host's phys_memory +*** CT_001 PASSED + +remove /dev/test_mck +rmmod /home/satoken/ostest/util/../bin/test_mck.ko +*** RT_001 start ******************************* +TEST_SUITE: procfs +TEST_NUMBER: 0 +ARGS: +dump /proc/17731/auxv: + 0x00000000000021 0x002aaaaac24000 (AT_SYSINFO_EHDR) + 0x00000000000019 0x00547fffffffe0 (AT_RANDOM) + 0x00000000000011 0x00000000000064 (AT_CLKTCK) + 0x00000000000006 0x00000000001000 (AT_PAGESZ) + 0x00000000000003 0x00000000400040 (AT_PHDR) + 0x00000000000004 0x00000000000038 (AT_PHENT) + 0x00000000000005 0x0000000000000a (AT_PHNUM) + 0x00000000000009 0x00000000403430 (AT_ENTRY) + 0000000000000000 0000000000000000 (AT_NULL) +RESULT: you need check AUXV value +*** RT_001: PASSED + +*** RT_002 start ******************************* +TEST_SUITE: procfs +TEST_NUMBER: 1 +ARGS: +allocated: 0x00000000800010 +dump /proc/17765/mem(offset:0x00000000800010): + 0x00000000800010: 0000000000000000 0000000000000001 0000000000000002 0000000000000003 + 0x00000000800030: 0000000000000004 0000000000000005 0000000000000006 0000000000000007 + 0x00000000800050: 0000000000000008 0000000000000009 000000000000000a 000000000000000b + 0x00000000800070: 000000000000000c 000000000000000d 000000000000000e 000000000000000f + 0x00000000800090: 0000000000000010 0000000000000011 0000000000000012 0000000000000013 + 0x000000008000b0: 0000000000000014 0000000000000015 0000000000000016 0000000000000017 + 0x000000008000d0: 0000000000000018 0000000000000019 000000000000001a 000000000000001b + 0x000000008000f0: 000000000000001c 000000000000001d 000000000000001e 000000000000001f +RESULT: you need check MEM value +*** RT_002: PASSED + +*** RT_003 start ******************************* +TEST_SUITE: procfs +TEST_NUMBER: 3 +ARGS: +output /proc/17799/task/17829/stat +0 (exe) R 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 + +RESULT: you need check STAT value +*** RT_003: PASSED + +*** RT_004 start ******************************* +TEST_SUITE: procfs +TEST_NUMBER: 6 +ARGS: +output: /proc/cpuinfo +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 62 +model name : Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz +stepping : 4 +microcode : 0x428 +cpu MHz : 2600.000 +cache size : 20480 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt +bogomips : 5200.17 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 8 +vendor_id : GenuineIntel +cpu family : 6 +model : 62 +model name : Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz +stepping : 4 +microcode : 0x428 +cpu MHz : 2599.898 +cache size : 20480 KB +physical id : 1 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 32 +initial apicid : 32 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt +bogomips : 5205.44 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 16 +vendor_id : GenuineIntel +cpu family : 6 +model : 62 +model name : Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz +stepping : 4 +microcode : 0x428 +cpu MHz : 2599.898 +cache size : 20480 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 1 +initial apicid : 1 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt +bogomips : 5200.17 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 24 +vendor_id : GenuineIntel +cpu family : 6 +model : 62 +model name : Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz +stepping : 4 +microcode : 0x428 +cpu MHz : 2600.000 +cache size : 20480 KB +physical id : 1 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 33 +initial apicid : 33 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt +bogomips : 5205.44 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +RESULT: you need check CPUINFO +*** RT_004: PASSED diff --git a/test/issues/1164/test_chk.h b/test/issues/1164/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/1164/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/1165/C1165.sh b/test/issues/1165/C1165.sh new file mode 100644 index 00000000..002c5aeb --- /dev/null +++ b/test/issues/1165/C1165.sh @@ -0,0 +1,35 @@ +#!/bin/sh +USELTP=1 +USEOSTEST=1 + +BOOTPARAM="-c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1" +. ../../common.sh + +################################################################################ +$MCEXEC ./C1165T01 + +sudo sh "$OSTESTDIR"/util/rmmod_test_drv.sh > /dev/null 2>&1 +sudo sh "$OSTESTDIR"/util/insmod_test_drv.sh +echo a > mmapfile +sudo timeout -s 9 3 $MCEXEC "$TESTMCK" -s force_exit -n 0 -- -d /dev/test_mck/mmap_dev -f mmapfile +rm -f mmapfile +sudo sh "$OSTESTDIR"/util/rmmod_test_drv.sh +"$SBINDIR"/ihkosctl 0 clear_kmsg +"$SBINDIR"/ihkosctl 0 ioctl 40000000 1 +"$SBINDIR"/ihkosctl 0 ioctl 40000000 2 +"$SBINDIR"/ihkosctl 0 kmsg | sed 's/[^:]*://' | awk '$2 == "processes" {p = $1} $2 == "threads" {t = $1}END{if (p != 0 || t != 0) {print "*** C1165T02 NG"} else {print "*** C1165T02 OK"}}' + +for i in clone01:03 clone03:04 clone04:05 clone06:06 clone07:07 fork01:08 \ + fork02:09 fork03:10 fork04:11 fork07:12 fork08:13 fork09:14 \ + fork10:15; do + tp=`echo $i|sed 's/:.*//'` + id=`echo $i|sed 's/.*://'` + sudo $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt + ok=`grep TPASS $tp.txt | wc -l` + ng=`grep TFAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** C1165T$id: $tp OK ($ok)" + else + echo "*** C1165T$id: $tp NG (ok=$ok ng=%ng)" + fi +done diff --git a/test/issues/1165/C1165.txt b/test/issues/1165/C1165.txt new file mode 100644 index 00000000..e0549ddf --- /dev/null +++ b/test/issues/1165/C1165.txt @@ -0,0 +1,72 @@ +Script started on Tue Aug 28 13:52:33 2018 +bash-4.2$ make test +sh ./C1165.sh +mcstop+release.sh ... done +mcreboot.sh -c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1 ... done +*** C1165T01 OK +insmod /home/shirasawa/ostest/util/../bin/test_mck.ko +create charcter device /dev/test_mck/mmap_dev(240:0) +create charcter device /dev/test_mck/mmap_dev2(240:1) +TEST_SUITE: force_exit +TEST_NUMBER: 0 +ARGS: -d /dev/test_mck/mmap_dev -f mmapfile +read 1 byte (a(97)) +mmap(0x2aaaac73b000) +remove /dev/test_mck +rmmod /home/shirasawa/ostest/util/../bin/test_mck.ko +*** C1165T02 OK +clone01 1 TPASS : clone returned 9933 +*** C1165T03: clone01 OK (1) +clone03 1 TPASS : Test passed +*** C1165T04: clone03 OK (1) +clone04 1 TPASS : expected failure; Got EINVAL +*** C1165T05: clone04 OK (1) +clone06 1 TPASS : Test Passed +*** C1165T06: clone06 OK (1) +clone07 1 TPASS : Use of return() in child did not cause SIGSEGV +*** C1165T07: clone07 OK (1) +fork01 1 TPASS : fork() returned 10278 +fork01 2 TPASS : child pid and fork() return agree: 10278 +*** C1165T08: fork01 OK (2) +fork02 0 TINFO : Inside parent +fork02 0 TINFO : Inside child +fork02 0 TINFO : exit status of wait 0 +fork02 1 TPASS : test 1 PASSED +*** C1165T09: fork02 OK (1) +fork03 0 TINFO : process id in parent of child from fork : 10428 +fork03 1 TPASS : test 1 PASSED +*** C1165T10: fork03 OK (1) +fork04 1 TPASS : Env var TERM unchanged after fork(): xterm +fork04 2 TPASS : Env var NoTSetzWq unchanged after fork(): getenv() does not find variable set +fork04 3 TPASS : Env var TESTPROG unchanged after fork(): FRKTCS04 +*** C1165T11: fork04 OK (3) +fork07 0 TINFO : Forking 100 children +fork07 0 TINFO : Forked all 100 children, now collecting +fork07 0 TINFO : Collected all 100 children +fork07 1 TPASS : 100/100 children read correctly from an inheritted fd +*** C1165T12: fork07 OK (1) +fork08 0 TINFO : parent forksval: 1 +fork08 0 TINFO : parent forksval: 2 +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 1 TPASS : parent test PASSED +fork08 0 TINFO : second child got char: b +fork08 1 TPASS : Test passed in childnumber 2 +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 2 TPASS : parent test PASSED +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 3 TPASS : parent test PASSED +fork08 0 TINFO : Number of processes forked is 2 +*** C1165T13: fork08 OK (4) +fork09 0 TINFO : OPEN_MAX is 1024 +fork09 0 TINFO : first file descriptor is 12 +fork09 0 TINFO : Parent reporting 1023 files open +fork09 0 TINFO : Child opened new file #1023 +fork09 1 TPASS : test 1 PASSED +*** C1165T14: fork09 OK (1) +fork10 0 TINFO : fork child A +fork10 1 TPASS : test 1 PASSED +*** C1165T15: fork10 OK (1) +bash-4.2$ exit +exit + +Script done on Tue Aug 28 13:53:01 2018 diff --git a/test/issues/1165/C1165T01.c b/test/issues/1165/C1165T01.c new file mode 100644 index 00000000..dc973d51 --- /dev/null +++ b/test/issues/1165/C1165T01.c @@ -0,0 +1,97 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <errno.h> +#include <fcntl.h> + +int +main(int argc, char **argv) +{ + pid_t pid; + char buf[1024]; + int fd; + int rc; + int st; + int i; + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0) { + perror("open"); + goto err; + } + if ((rc = read(fd, buf, 1024)) != 1024) { + if (rc < 0) { + perror("read"); + } + else if (rc == 0) { + fprintf(stderr, "EOF\n"); + } + else { + fprintf(stderr, "read too short %d\n", rc); + } + goto err; + } + close(fd); + + for (i = 0; i < 1024; i++) + buf[i] = 0x55; + + pid = fork(); + if (!pid) { + for (i = 0; i < 1024; i++) + if (buf[i] != 0x55) { + exit(2); + } + fd = open("/dev/zero", O_RDONLY); + if (fd < 0) { + perror("open"); + exit(1); + } + if ((rc = read(fd, buf, 1024)) != 1024) { + exit(1); + } + close(fd); + for (i = 0; i < 1024; i++) + if (buf[i] != 0) { + exit(3); + } + exit(0); + } + + while (waitpid(pid, &st, 0) == -1 && errno == EINTR); + + if (!WIFEXITED(st)) { + fprintf(stderr, "child failed %08x\n", st); + goto err; + } + else if (WEXITSTATUS(st) != 0) { + if (WEXITSTATUS(st) == 1) { + fprintf(stderr, "child I/O error\n"); + } + else if (WEXITSTATUS(st) == 2) { + fprintf(stderr, "child memory error\n"); + } + else if (WEXITSTATUS(st) == 3) { + fprintf(stderr, "child read error\n"); + } + else { + fprintf(stderr, "child error %08x\n", st); + } + goto err; + } + + for (i = 0; i < 1024; i++) + if (buf[i] != 0x55) { + fprintf(stderr, "BAD value 0x%02x != 0x55\n", buf[i]); + goto err; + } + + fprintf(stderr, "*** C1165T01 OK\n"); + exit(0); +err: + fprintf(stderr, "*** C1165T01 NG\n"); + exit(1); +} diff --git a/test/issues/1165/Makefile b/test/issues/1165/Makefile new file mode 100644 index 00000000..b7193f36 --- /dev/null +++ b/test/issues/1165/Makefile @@ -0,0 +1,13 @@ +CC=gcc +TARGET=C1165T01 + +all:: $(TARGET) + +C1024T01: C1165T01.c + $(CC) -o C1165T01 C1165T01.c -Wall -g + +test:: $(TARGET) + sh ./C1165.sh + +clean:: + rm -f *.o $(TARGET) diff --git a/test/issues/1165/README b/test/issues/1165/README new file mode 100644 index 00000000..4e0bb039 --- /dev/null +++ b/test/issues/1165/README @@ -0,0 +1,53 @@ +【Issue#1165 動作確認】 +□ テスト内容 +Issue#1165 で指摘されている現象は、以下の理由により既に解消されている。 +・ kill および gettid のシステムコールデリゲートは、McKernel と同一PIDを持つ + mcexec に対して発行するようになっているため、他プロセスの情報を参照する + ことは無い。 +・ 対応する mcexec が終了している状態でシステムコールデリゲートしたとしても、 + mcctrl が適切にエラーを返すため、指摘の現象が発生することは無い。 + +以上を踏まえ、Issue#1165 では、以下の改修を行った。 +・ 他 PID の mcexec にシステムコールデリゲートは発生しない (または解消可能) + ので、do_syscall と send_syscall のインタフェースから PID 指定を削除する。 +・ 他 PID の mcexec にシステムコールデリゲートする以下の処理は、同一 PID の + mcexec にシステムコールデリゲートするように変更する。 + - fork 後、親プロセスから子プロセスの mcexec に PTE の初期化を依頼する処理 + +このため、Issue#1165に対して以下のテストを行った。 +1. fork 後、子プロセスのシステムコールデリゲートで親プロセスの領域が破壊 + されないことを確認する。 +C1165T01 親プロセスが read 後 fork し、子プロセスが同じアドレスの領域に read + して子プロセスの領域が書き換えられ、親プロセスの領域が書き換えられ + ないことを確認する。 + +2. 指摘されたプログラムを実行し、残留プロセスが存在しないことを確認する。 +C1165T02 ostest force_exit + +3. LTP を用いて、変更が既存処理に影響しないことを確認する (以下の LTP が PASS + すること)。 +C1165T03 clone01 +C1165T04 clone03 +C1165T05 clone04 +C1165T06 clone06 +C1165T07 clone07 +C1165T08 fork01 +C1165T09 fork02 +C1165T10 fork03 +C1165T11 fork04 +C1165T12 fork07 +C1165T13 fork08 +C1165T14 fork09 +C1165T15 fork10 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1165.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1176/C1176.sh b/test/issues/1176/C1176.sh new file mode 100644 index 00000000..7d3d7a9a --- /dev/null +++ b/test/issues/1176/C1176.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=1 + +. ../../common.sh + +"$MCEXEC" "$TESTMCK" -s getrusage -n 2 2>&1 | tee C1176T01.txt +if grep "RESULT: you need check rusage value" C1176T01.txt > /dev/null 2>&1;then + echo "*** C1176T01: OK" +else + echo "*** C1176T01: NG" +fi + +"$MCEXEC" ./C1176T02 +"$MCEXEC" ./C1176T03 +"$MCEXEC" ./C1176T04 + +for i in kill01:05 kill12:06 pause02:07 sigaction01:08 ; do + tp=`echo $i|sed 's/:.*//'` + id=`echo $i|sed 's/.*://'` + $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt + ok=`grep TPASS $tp.txt | wc -l` + ng=`grep TFAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** C1176T$id: $tp OK ($ok)" + else + echo "*** C1176T$id: $tp NG (ok=$ok ng=%ng)" + fi +done diff --git a/test/issues/1176/C1176.txt b/test/issues/1176/C1176.txt new file mode 100644 index 00000000..fd6a08cd --- /dev/null +++ b/test/issues/1176/C1176.txt @@ -0,0 +1,67 @@ +Script started on Tue Sep 11 14:51:16 2018 +bash-4.2$ make test +gcc -g -Wall -o C1176T02 C1176T02.c +gcc -g -Wall -o C1176T03 C1176T03.c +gcc -g -Wall -o C1176T04 C1176T04.c +sh ./C1176.sh +mcstop+release.sh ... done +mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 ... done +TEST_SUITE: getrusage +TEST_NUMBER: 2 +ARGS: +[parent before] +------------------------------ +show_rusage(): + ru_utime=0s + 582us + ru_stime=0s + 42089us + ru_maxrss=6316 +------------------------------ +[child before] +------------------------------ +show_rusage(): + ru_utime=0s + 6us + ru_stime=0s + 0us + ru_maxrss=14512 +------------------------------ +allocation memory 16777216 byte(16384 KiB) +alarm 2 seconds wait. +sleep 2 seconds wait. +free memory 16777216 byte(16384 KiB) +[child after] +------------------------------ +show_rusage(): + ru_utime=1s + 997934us + ru_stime=2s + 2895us + ru_maxrss=30900 +------------------------------ +[parent after] +------------------------------ +show_rusage(): + ru_utime=0s + 599us + ru_stime=4s + 44489us + ru_maxrss=30900 +------------------------------ +RESULT: you need check rusage value +*** C1176T01: OK +*** C1176T02: OK +*** C1176T03: OK +parent call sleep +child call sleep +parent return from sleep +child return from sleep +*** C1176T04: OK +kill01 1 TPASS : received expected signal 9 +*** C1176T05: kill01 OK (1) +kill12 1 TPASS : Test passed +*** C1176T06: kill12 OK (1) +pause02 1 TPASS : pause was interrupted correctly +*** C1176T07: pause02 OK (1) +sigaction01 1 TPASS : SA_RESETHAND did not cause SA_SIGINFO to be cleared +sigaction01 2 TPASS : SA_RESETHAND was masked when handler executed +sigaction01 3 TPASS : sig has been masked because sa_mask originally contained sig +sigaction01 4 TPASS : siginfo pointer non NULL +*** C1176T08: sigaction01 OK (4) +bash-4.2$ exit +exit + +Script done on Tue Sep 11 14:51:54 2018 diff --git a/test/issues/1176/C1176T02.c b/test/issues/1176/C1176T02.c new file mode 100644 index 00000000..b909ffd2 --- /dev/null +++ b/test/issues/1176/C1176T02.c @@ -0,0 +1,69 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <signal.h> +#include <errno.h> +#include <sys/wait.h> + +int sighandler; + +void +sig(int s) +{ + sighandler = 1; +} + +int +main(int argc, char **argv) +{ + pid_t pid; + int st; + + if ((pid = fork()) == 0) { + struct sigaction act; + char ch; + int rc; + + memset(&act, '\0', sizeof(act)); + act.sa_handler = sig; + sigaction(SIGINT, &act, NULL); + rc = read(0, &ch, 1); + if (rc != -1 || errno != EINTR) { + exit(rc == -1 ? 1 : 2); + } + if (sighandler == 0) { + exit(3); + } + exit(0); + } + sleep(1); + kill(pid, SIGINT); + if (waitpid(pid, &st, 0) == -1) { + fprintf(stderr, "*** C1176T02: NG wait %d\n", errno); + exit(1); + } + if (WIFSIGNALED(st)) { + fprintf(stderr, "*** C1176T02: NG termsig %d\n", WTERMSIG(st)); + exit(1); + } + if (WEXITSTATUS(st) == 1) { + fprintf(stderr, "*** C1176T02: NG BAD read\n"); + exit(1); + } + else if (WEXITSTATUS(st) == 2) { + fprintf(stderr, "*** C1176T02: NG BAD read error\n"); + exit(1); + } + else if (WEXITSTATUS(st) == 3) { + fprintf(stderr, "*** C1176T02: NG don't called sighandler\n"); + exit(1); + } + else if (WEXITSTATUS(st) != 0) { + fprintf(stderr, "*** C1176T02: NG unknown\n"); + exit(1); + } + fprintf(stderr, "*** C1176T02: OK\n"); + exit(0); +} diff --git a/test/issues/1176/C1176T03.c b/test/issues/1176/C1176T03.c new file mode 100644 index 00000000..2b8c8b65 --- /dev/null +++ b/test/issues/1176/C1176T03.c @@ -0,0 +1,74 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <signal.h> +#include <errno.h> +#include <sys/wait.h> +#include <linux/futex.h> +#include <sys/time.h> +#include <sys/syscall.h> + +int sighandler; + +void +sig(int s) +{ + sighandler = 1; +} + +int +main(int argc, char **argv) +{ + pid_t pid; + int st; + + if ((pid = fork()) == 0) { + struct sigaction act; + int wk; + int rc; + + memset(&act, '\0', sizeof(act)); + act.sa_handler = sig; + sigaction(SIGINT, &act, NULL); + wk = 0; + rc = syscall(SYS_futex, &wk, FUTEX_WAIT, 0, NULL, NULL, 0); + if (rc != -1 || errno != EINTR) { + exit(rc == -1 ? 1 : 2); + } + if (sighandler == 0) { + exit(3); + } + exit(0); + } + sleep(1); + kill(pid, SIGINT); + if (waitpid(pid, &st, 0) == -1) { + fprintf(stderr, "*** C1176T03: NG wait %d\n", errno); + exit(1); + } + if (WIFSIGNALED(st)) { + fprintf(stderr, "*** C1176T03: NG termsig %d\n", WTERMSIG(st)); + exit(1); + } + if (WEXITSTATUS(st) == 1) { + fprintf(stderr, "*** C1176T03: NG BAD read\n"); + exit(1); + } + else if (WEXITSTATUS(st) == 2) { + fprintf(stderr, "*** C1176T03: NG BAD read error\n"); + exit(1); + } + else if (WEXITSTATUS(st) == 3) { + fprintf(stderr, "*** C1176T03: NG don't called sighandler\n"); + exit(1); + } + else if (WEXITSTATUS(st) != 0) { + fprintf(stderr, "*** C1176T03: NG unknown\n"); + exit(1); + } + fprintf(stderr, "*** C1176T03: OK\n"); + exit(0); +} diff --git a/test/issues/1176/C1176T04.c b/test/issues/1176/C1176T04.c new file mode 100644 index 00000000..aeea211a --- /dev/null +++ b/test/issues/1176/C1176T04.c @@ -0,0 +1,55 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sched.h> +#include <errno.h> + +int +main(int argc, char **argv) +{ + pid_t pid; + cpu_set_t cpuset; + int st; + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + + if (!(pid = fork())) { + if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + fprintf(stderr, "*** C1176T04: NG\n"); + exit(1); + } + fprintf(stderr, "child call sleep\n"); + fflush(stderr); + sleep(1); + fprintf(stderr, "child return from sleep\n"); + fflush(stderr); + exit(0); + } + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + fprintf(stderr, "*** C1176T04: NG\n"); + exit(1); + } + fprintf(stderr, "parent call sleep\n"); + fflush(stderr); + sleep(1); + fprintf(stderr, "parent return from sleep\n"); + fflush(stderr); + + if (waitpid(pid, &st, 0) == -1) { + fprintf(stderr, "*** C1176T04: NG %d\n", errno); + exit(1); + } + if (!WIFEXITED(st) || WEXITSTATUS(st)) { + fprintf(stderr, "*** C1176T04: NG %08x\n", st); + exit(1); + } + + fprintf(stderr, "*** C1176T04: OK\n"); + exit(0); +} diff --git a/test/issues/1176/Makefile b/test/issues/1176/Makefile new file mode 100644 index 00000000..38fa1729 --- /dev/null +++ b/test/issues/1176/Makefile @@ -0,0 +1,19 @@ +CC=gcc +TARGET=C1176T02 C1176T03 C1176T04 + +all:: $(TARGET) + +C1176T02: C1176T02.c + $(CC) -g -Wall -o $@ $^ + +C1176T03: C1176T03.c + $(CC) -g -Wall -o $@ $^ + +C1176T04: C1176T04.c + $(CC) -g -Wall -o $@ $^ + +test:: $(TARGET) + sh ./C1176.sh + +clean:: + rm -f $(TARGET) *.o diff --git a/test/issues/1176/README b/test/issues/1176/README new file mode 100644 index 00000000..34dda6e2 --- /dev/null +++ b/test/issues/1176/README @@ -0,0 +1,48 @@ +【Issue#1176 動作確認】 +□ テスト内容 +1. 指摘の現象が解消されていることを確認する。 + +C1176T01 指摘の現象が解消されていることを、テストプログラムを用いて確認 + mcexec test_mck -s getrusage -n 2 + +2. 修正が正しく動作することをテストプログラムを用いて確認する。 + 変更は以下の3点である。 + (1) システムコール出口で高速化のためシグナル処理を呼び出していなかったが、 + ロック無しでシグナル受信を判定後に、シグナル処理を呼び出すようにした。 + (2) futex_wait 処理で高速化のためシグナル受信の判定処理をしていなかったが、 + ロック無しでシグナル受信を判定し、受信していた場合にfutex_wait + 処理を脱出するようにした。(その後、システムコール出口でシグナルが + 処理される) + (3) 高速化のためシステムコール出口での再スケジュール処理を呼び出して + いなかったが、ロック無しで再スケジュール要否を判定し、再スケジュールが + 必要な場合は再スケジュール処理を行うよにした。 + +C1176T02 システムコールオフロード中にシグナル受信し、システムコールが中断 + してシグナル処理されることを確認 + +C1176T03 futex_wait 処理中にシグナル受信し、futex_waitが中断してシグナルが + 処理されることを確認 + +C1176T04 2つのプロセスを同一CPUに割り当てた状態で、各々1秒sleepするとき、 + それぞれのプロセスが正しく処理されることを確認。 + sleep完了後にシステムコール出口で再スケジュール処理が行われる + +3. 修正が既存処理に影響しないことをLTPを用いて確認する。 + シグナル関連処理(正常系)を中心にテストプログラムを選定した。 + +C1176T05 kill01: kill の基本機能の確認 +C1176T06 kill12: kill, wait, signal の組み合わせ確認 +C1176T07 pause02: pause の基本機能の確認 +C1176T08 sigaction01: sigaction の基本機能の確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C1176.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/1180/C1180.sh b/test/issues/1180/C1180.sh new file mode 100644 index 00000000..f1ee1eed --- /dev/null +++ b/test/issues/1180/C1180.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=1 + +. ../../common.sh + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 8 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=002 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 1 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=003 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 2 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=004 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 9 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=005 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ${TESTMCK} -s sched_setaffinity -n 10 -- -p 20 2>&1 | tee ./RT_${tid}.txt +if grep -v "RESULT: TP failed" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" diff --git a/test/issues/1180/Makefile b/test/issues/1180/Makefile new file mode 100644 index 00000000..94c38325 --- /dev/null +++ b/test/issues/1180/Makefile @@ -0,0 +1,14 @@ +CC = gcc +TARGET= + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +test: all + @sh ./C1180.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/1180/README b/test/issues/1180/README new file mode 100644 index 00000000..9571fa39 --- /dev/null +++ b/test/issues/1180/README @@ -0,0 +1,29 @@ +【Issue#1180 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +RT_001: ostest_sched_setaffinity.008 + 親プロセスが子プロセスに対して、子プロセスが実行中のCPUを除いたcpusetを + 指定したsched_setaffinity()を実行し、即座にマイグレートすることを確認 + +2. 既存のマイグレーション機能に影響がないことをOSTESTを用いて確認 +RT_002: ostest_sched_setaffinity.001 + 自プロセスをマイグレートできることを確認 +RT_003: ostest_sched_setaffinity.002 + マイグレートした自プロセスのレジスタの値が保持されていることを確認 +RT_004: ostest_sched_setaffinity.009 + マイグレートした子プロセスのレジスタの値が保持されていることを確認 +RT_005: ostest_sched_setaffinity.010 + 複数の子プロセスを同一のCPU上にマイグレートしても、 + それぞれのレジスタの値が保持されていることを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照 +すべての項目をPASSしていることを確認 diff --git a/test/issues/1180/result.log b/test/issues/1180/result.log new file mode 100644 index 00000000..d990c005 --- /dev/null +++ b/test/issues/1180/result.log @@ -0,0 +1,40 @@ +*** RT_001 start ******************************* +TEST_SUITE: sched_setaffinity +TEST_NUMBER: 8 +ARGS: -p 20 +RESULT: ok +*** RT_001: PASSED + +*** RT_002 start ******************************* +TEST_SUITE: sched_setaffinity +TEST_NUMBER: 1 +ARGS: -p 20 +RESULT: ok +*** RT_002: PASSED + +*** RT_003 start ******************************* +TEST_SUITE: sched_setaffinity +TEST_NUMBER: 2 +ARGS: -p 20 +==before migrate process== +f0 = 3.140000, f1 = 42.195000, f2 = 53.849998, f3 = 909.908997 +==after migrate process== +f0 = 3.140000, f1 = 42.195000, f2 = 53.849998, f3 = 909.908997 +RESULT: ok +*** RT_003: PASSED + +*** RT_004 start ******************************* +TEST_SUITE: sched_setaffinity +TEST_NUMBER: 9 +ARGS: -p 20 +==Please check migrated child process keep datas== +RESULT: ok +*** RT_004: PASSED + +*** RT_005 start ******************************* +TEST_SUITE: sched_setaffinity +TEST_NUMBER: 10 +ARGS: -p 20 +==Please check migrated child process keep datas== +RESULT: ok +*** RT_005: PASSED diff --git a/test/issues/1203/C1203.sh b/test/issues/1203/C1203.sh new file mode 100644 index 00000000..e437f427 --- /dev/null +++ b/test/issues/1203/C1203.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +. ../../common.sh + +if "$MCEXEC" ./C1203T01; then + echo "*** C1203T01: OK" +else + echo "*** C1203T01: NG" +fi + +# to run as user, chmod 1777 /dev/hugepages +if sudo HUGETLB_VERBOSE=2 HUGETLB_ELFMAP=RW HUGETLB_DEBUG=1 "$MCEXEC" ./C1203T02; then + echo "*** C1203T02: OK" +else + echo "*** C1203T02: NG" +fi diff --git a/test/issues/1203/C1203T01.c b/test/issues/1203/C1203T01.c new file mode 100644 index 00000000..8993e98c --- /dev/null +++ b/test/issues/1203/C1203T01.c @@ -0,0 +1,101 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <unistd.h> + +#define MAP_SIZE (2 * (2 * 1024 * 1024)) + +int main(int argc, char *argv[]) +{ + int fd; + long int *addr; + pid_t pid; + + + if ((fd = open("/dev/hugepages/foo", O_CREAT|O_RDWR, 0600)) < 0) { + perror("open"); + return -1; + } + unlink("/dev/hugepages/foo"); + if ((pid = fork()) == 0) { + if ((addr = mmap(NULL, MAP_SIZE, PROT_READ|PROT_WRITE, + MAP_SHARED, fd, 0)) == MAP_FAILED) { + perror("mmap"); + return -1; + } + for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) { + if (addr[i] != 0) { + fprintf(stderr, + "memory wasn't zeroed at offset %lx\n", + i * sizeof(long int)); + return -1; + } + } + addr[42] = 12; + if (munmap(addr, MAP_SIZE) < 0) { + perror("munmap"); + return -1; + } + return 0; + } + if (pid < 0) { + perror("fork"); + return -1; + } + + if (waitpid(pid, NULL, 0) <= 0) { + perror("waitpid"); + return -1; + } + + /* bigger extent: check what was set is still here and rest is zero */ + if ((addr = mmap(NULL, 2 * MAP_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, + fd, 0)) == MAP_FAILED) { + perror("mmap, 2"); + return -1; + } + if (addr[42] != 12) { + perror("unexpected content"); + return -1; + } + for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) { + if (addr[MAP_SIZE / sizeof(long int) + i] != 0) { + fprintf(stderr, "memory wasn't zeroed at offset %lx\n", + MAP_SIZE + i * sizeof(long int)); + return -1; + } + } + addr[MAP_SIZE / sizeof(long int) + 17] = 42; + if (munmap(addr, MAP_SIZE) < 0) { + perror("munmap, 2"); + return -1; + } + + /* same with offset */ + if ((addr = mmap(NULL, 2 * MAP_SIZE, PROT_READ|PROT_EXEC, + MAP_PRIVATE|MAP_NORESERVE, fd, MAP_SIZE)) + == MAP_FAILED) { + perror("mmap, 2"); + return -1; + } + if (addr[17] != 42) { + perror("unexpected content (2)"); + return -1; + } + for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) { + if (addr[MAP_SIZE / sizeof(long int) + i] != 0) { + fprintf(stderr, "memory wasn't zeroed at offset %lx\n", + 2 * MAP_SIZE + i * sizeof(long int)); + return -1; + } + } + if (munmap(addr, MAP_SIZE) < 0) { + perror("munmap, 3"); + return -1; + } + + return 0; +} diff --git a/test/issues/1203/C1203T02.c b/test/issues/1203/C1203T02.c new file mode 100644 index 00000000..d693ec05 --- /dev/null +++ b/test/issues/1203/C1203T02.c @@ -0,0 +1,13 @@ +#include <unistd.h> + +#define __unused __attribute__((unused)) + +static __unused int data[1024*1024] = { 1, 0 }; +static __unused int data_zero[1024*1024] = { 0 }; +static __unused int const data_ro[1024*1024] = { 1, 0 }; +static __unused int const data_ro_zero[1024*1024] = { 0 }; + +int main(int argc, char *argv[]) +{ + return 0; +} diff --git a/test/issues/1203/Makefile b/test/issues/1203/Makefile new file mode 100644 index 00000000..3dcf1dc8 --- /dev/null +++ b/test/issues/1203/Makefile @@ -0,0 +1,12 @@ +TARGET = C1203T01 C1203T02 +SCRIPT = ./C1203.sh +C1203T02: LDFLAGS = -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align +CFLAGS = -Wall + +all: $(TARGET) + +test: all + bash $(SCRIPT) + +clean: + rm -f $(TARGET) *.o diff --git a/test/issues/731/C731.sh b/test/issues/731/C731.sh index 32eeaaa7..af5a9586 100644 --- a/test/issues/731/C731.sh +++ b/test/issues/731/C731.sh @@ -1,61 +1,21 @@ #!/bin/sh -BIN= -SBIN= -LTP= +USELTP=1 +USEOSTEST=0 + BOOTPARAM="-c 1-7,17-23,9-15,25-31 -m 10G@0,10G@1" +. ../../common.sh -if [ -f ../../../config.h ]; then - str=`grep "^#define BINDIR " ../../../config.h | head -1 | sed 's/^#define BINDIR /BINDIR=/'` - eval $str -fi -if [ "x$BINDIR" = x ];then - BINDIR="$BIN" -fi - -if [ -f ../../../Makefile ]; then - str=`grep ^SBINDIR ../../../Makefile | head -1 | sed 's/ //g'` - eval $str -fi -if [ "x$SBINDIR" = x ];then - SBINDIR="$SBIN" -fi - -if [ -f $HOME/ltp/testcases/bin/fork01 ]; then - LTPDIR=$HOME/ltp/testcases -fi -if [ "x$LTPDIR" = x ]; then - LTPDIR="$LTP" -fi - -if ! lsmod | grep mcctrl > /dev/null 2>&1; then - if [ ! -x $SBINDIR/mcreboot.sh ]; then - echo no mcreboot found >&2 - exit 1 - fi - sudo $SBINDIR/mcreboot.sh $BOOTPARAM -fi - -if [ ! -x $BINDIR/mcexec ]; then - echo no mcexec found >&2 - exit 1 -fi - -sudo $BINDIR/mcexec ./g310a 2>&1 | tee g310a.txt +sudo $MCEXEC ./g310a 2>&1 | tee g310a.txt if grep "fork: Permission denied" g310a.txt > /dev/null 2>&1 ; then echo "*** C731T001: g310a OK" else echo "*** C731T001: g310a NG" fi -if [ x$LTPDIR = x ]; then - echo no LTP found >&2 - exit 1 -fi - for i in 01:002 02:003 03:004 04:005 07:006 08:007; do tp=`echo $i|sed 's/:.*//'` id=`echo $i|sed 's/.*://'` - $BINDIR/mcexec $LTPDIR/bin/fork$tp 2>&1 | tee fork$tp.txt + $MCEXEC $LTPBIN/fork$tp 2>&1 | tee fork$tp.txt ok=`grep TPASS fork$tp.txt | wc -l` ng=`grep TFAIL fork$tp.txt | wc -l` if [ $ng = 0 ]; then diff --git a/test/issues/731/README b/test/issues/731/README index 49073d94..7aced615 100644 --- a/test/issues/731/README +++ b/test/issues/731/README @@ -25,10 +25,10 @@ C731T007 fork08 の実行確認 □ 実行手順 $ make test -実行できない場合は、C731.shの以下の行を適切に書き換えた後に実行。 -BIN= mcexec が存在するパス -SBIN= mcreboot.sh が存在するパス -LTP= LTPが存在するパス +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する □ 実行結果 C731.txt 参照。 diff --git a/test/issues/732/C732.sh b/test/issues/732/C732.sh new file mode 100644 index 00000000..80916c1d --- /dev/null +++ b/test/issues/732/C732.sh @@ -0,0 +1,116 @@ +#!/bin/sh + +USELTP=0 +USEOSTEST=0 + +. ../../common.sh + +tid=001 +echo "*** CT_${tid} start ***************************" +fail=0 +tgt_file=maps + +${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./CT_${tid}.txt +tail -1 CT_${tid}.txt | grep -e "\^@$" + +if [ $? != 0 ]; then + echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" +else + echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" + fail=1 +fi + +if [ X${fail} != X0 ]; then + echo "*** CT_${tid} FAILED" +else + echo "*** CT_${tid} PASSED" +fi +echo "" + +tid=002 +echo "*** CT_${tid} start ***************************" +fail=0 +tgt_file=stat + +${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./CT_${tid}.txt +tail -1 CT_${tid}.txt | grep -e "\^@$" + +if [ $? != 0 ]; then + echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" +else + echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" + fail=1 +fi + +if [ X${fail} != X0 ]; then + echo "*** CT_${tid} FAILED" +else + echo "*** CT_${tid} PASSED" +fi +echo "" + +tid=003 +echo "*** CT_${tid} start ***************************" +fail=0 +tgt_file=status + +${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./CT_${tid}.txt +tail -1 CT_${tid}.txt | grep -e "\^@$" + +if [ $? != 0 ]; then + echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" +else + echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" + fail=1 +fi + +if [ X${fail} != X0 ]; then + echo "*** CT_${tid} FAILED" +else + echo "*** CT_${tid} PASSED" +fi +echo "" + +tid=004 +echo "*** CT_${tid} start ***************************" +fail=0 +tgt_file=stack + +${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./CT_${tid}.txt +tail -1 CT_${tid}.txt | grep -e "\^@$" + +if [ $? != 0 ]; then + echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" +else + echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" + fail=1 +fi + +if [ X${fail} != X0 ]; then + echo "*** CT_${tid} FAILED" +else + echo "*** CT_${tid} PASSED" +fi +echo "" + +tid=005 +echo "*** CT_${tid} start ***************************" +fail=0 +tgt_file=numa_maps + +${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./CT_${tid}.txt +tail -1 CT_${tid}.txt | grep -e "\^@$" + +if [ $? != 0 ]; then + echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" +else + echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" + fail=1 +fi + +if [ X${fail} != X0 ]; then + echo "*** CT_${tid} FAILED" +else + echo "*** CT_${tid} PASSED" +fi +echo "" diff --git a/test/issues/732/CT_001.sh b/test/issues/732/CT_001.sh deleted file mode 100755 index bcbda069..00000000 --- a/test/issues/732/CT_001.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_001 -tgt_file=maps - -fail=0 - -. ./config - -echo "*** ${TESTNAME} start ***************************" - -${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./${TESTNAME}.log -tail -1 ${TESTNAME}.log | grep -e "\^@$" - -if [ $? != 0 ]; then - echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" -else - echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" - fail=1 -fi - -rm ./${TESTNAME}.log - -if [ X${fail} != X0 ]; then - echo "*** ${TESTNAME} FAILED" -else - echo "*** ${TESTNAME} PASSED" -fi -echo "" diff --git a/test/issues/732/CT_002.sh b/test/issues/732/CT_002.sh deleted file mode 100755 index e9f6a67b..00000000 --- a/test/issues/732/CT_002.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_002 -tgt_file=stat - -fail=0 - -. ./config - -echo "*** ${TESTNAME} start ***************************" - -${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./${TESTNAME}.log -tail -1 ${TESTNAME}.log | grep -e "\^@$" - -if [ $? != 0 ]; then - echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" -else - echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" - fail=1 -fi - -rm ./${TESTNAME}.log - -if [ X${fail} != X0 ]; then - echo "*** ${TESTNAME} FAILED" -else - echo "*** ${TESTNAME} PASSED" -fi -echo "" diff --git a/test/issues/732/CT_003.sh b/test/issues/732/CT_003.sh deleted file mode 100755 index c0ba87e6..00000000 --- a/test/issues/732/CT_003.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_003 -tgt_file=status - -fail=0 - -. ./config - -echo "*** ${TESTNAME} start ***************************" - -${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./${TESTNAME}.log -tail -1 ${TESTNAME}.log | grep -e "\^@$" - -if [ $? != 0 ]; then - echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" -else - echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" - fail=1 -fi - -rm ./${TESTNAME}.log - -if [ X${fail} != X0 ]; then - echo "*** ${TESTNAME} FAILED" -else - echo "*** ${TESTNAME} PASSED" -fi -echo "" diff --git a/test/issues/732/CT_004.sh b/test/issues/732/CT_004.sh deleted file mode 100755 index be01812b..00000000 --- a/test/issues/732/CT_004.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_004 -tgt_file=stack - -fail=0 - -. ./config - -echo "*** ${TESTNAME} start ***************************" - -${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./${TESTNAME}.log -tail -1 ${TESTNAME}.log | grep -e "\^@$" - -if [ $? != 0 ]; then - echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" -else - echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" - fail=1 -fi - -rm ./${TESTNAME}.log - -if [ X${fail} != X0 ]; then - echo "*** ${TESTNAME} FAILED" -else - echo "*** ${TESTNAME} PASSED" -fi -echo "" diff --git a/test/issues/732/CT_005.sh b/test/issues/732/CT_005.sh deleted file mode 100755 index 87ebfd43..00000000 --- a/test/issues/732/CT_005.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -TESTNAME=CT_005 -tgt_file=numa_maps - -fail=0 - -. ./config - -echo "*** ${TESTNAME} start ***************************" - -${MCEXEC} cat -v /proc/self/${tgt_file} | tee ./${TESTNAME}.log -tail -1 ${TESTNAME}.log | grep -e "\^@$" - -if [ $? != 0 ]; then - echo "[OK] end of /proc/<PID>/${tgt_file} is not NULL character" -else - echo "[NG] end of /proc/<PID>/${tgt_file} is unnecessary NULL character" - fail=1 -fi - -rm ./${TESTNAME}.log - -if [ X${fail} != X0 ]; then - echo "*** ${TESTNAME} FAILED" -else - echo "*** ${TESTNAME} PASSED" -fi -echo "" diff --git a/test/issues/732/Makefile b/test/issues/732/Makefile index e616f4fb..f34f368b 100644 --- a/test/issues/732/Makefile +++ b/test/issues/732/Makefile @@ -1,23 +1,14 @@ CC = gcc -MCK_DIR=/home/satoken/ppos -MCEXEC=$(MCK_DIR)/bin/mcexec -TARGET=config +TARGET = CPPFLAGS = LDFLAGS = all: $(TARGET) -config: - @echo "MCEXEC=$(MCEXEC)" > ./config - test: all - ./CT_001.sh - ./CT_002.sh - ./CT_003.sh - ./CT_004.sh - ./CT_005.sh + @sh ./C732.sh clean: rm -f $(TARGET) *.o diff --git a/test/issues/732/README b/test/issues/732/README index 0df48fb9..1c2f7fc7 100644 --- a/test/issues/732/README +++ b/test/issues/732/README @@ -1,20 +1,35 @@ +【Issue#732 動作確認】 +□ テスト内容 +1. Issueで報告された症状が解消されていることを確認 +CT_001: /proc/<PID>/maps + /proc/<PID>/maps の内容を出力し、 + 出力結果の末尾に不要なNULL文字が存在しないことを確認する -CT_001: - /proc/<PID>/maps の内容を出力する - -> 出力結果の末尾に不要なNULL文字が存在しない +2. 他のprocfsインタフェースで同様の症状が発生しないことを確認 +CT_002: /proc/<PID>/stat + /proc/<PID>/stat の内容を出力し、 + 出力結果の末尾に不要なNULL文字が存在しないことを確認する -CT_002: - /proc/<PID>/stat の内容を出力する - -> 出力結果の末尾に不要なNULL文字が存在しない +CT_003: /proc/<PID>/status + /proc/<PID>/status の内容を出力し、 + 出力結果の末尾に不要なNULL文字が存在しないことを確認する -CT_003: - /proc/<PID>/status の内容を出力する - -> 出力結果の末尾に不要なNULL文字が存在しない +CT_004: /proc/<PID>/stack + /proc/<PID>/stck の内容を出力し、 + 出力結果の末尾に不要なNULL文字が存在しないことを確認する -CT_004: - /proc/<PID>/stack の内容を出力する - -> 出力結果の末尾に不要なNULL文字が存在しない +CT_005: /proc/<PID>/numa_maps + /proc/<PID>/numa_maps の内容を出力し、 + 出力結果の末尾に不要なNULL文字が存在しないことを確認する -CT_005: - /proc/<PID>/numa_maps の内容を出力する - -> 出力結果の末尾に不要なNULL文字が存在しない +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/732/resul.log b/test/issues/732/resul.log deleted file mode 100644 index ca5fee1b..00000000 --- a/test/issues/732/resul.log +++ /dev/null @@ -1,127 +0,0 @@ -./CT_001.sh -*** CT_001 start *************************** -000000400000-00000040b000 r-xs 0 0:0 0 -00000060b000-00000060c000 r--s 0 0:0 0 -00000060c000-00000060d000 rw-s 0 0:0 0 -000000800000-000000821000 rw-s 0 0:0 0 [heap] -2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 -2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 -2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 -2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 -2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] -2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 -2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 -2aaaaac4e000-2aaaaae06000 r-xp 0 0:0 0 -2aaaaae06000-2aaaab006000 ---p 0 0:0 0 -2aaaab006000-2aaaab00a000 r--p 0 0:0 0 -2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 -2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 -2aaaab011000-2aaaab013000 rw-p 0 0:0 0 -2aaaab013000-2aaab153c000 r--p 0 0:0 0 -2aaab153c000-2aaab158e000 rw-p 0 0:0 0 -547fff800000-548000000000 rw-s 0 0:0 0 [stack] -[OK] end of /proc/<PID>/maps is not NULL character -*** CT_001 PASSED - -./CT_002.sh -*** CT_002 start *************************** -9360 (exe) S 9359 9342 21102 34816 9342 1073750272 661 0 0 0 0 0 0 0 20 0 10 0 51949304 92908821254144 772 18446744073709551615 93824992231424 93824992276796 140737488343392 140737488328480 140737345437527 0 0 0 2147155711 18446744073709551615 0 0 17 0 0 0 0 0 0 93824994377104 93824994379104 93824994390016 140737488344339 140737488344392 140737488344392 140737488351209 0 -[OK] end of /proc/<PID>/stat is not NULL character -*** CT_002 PASSED - -./CT_003.sh -*** CT_003 start *************************** -Uid: 3505 3505 3505 3505 -Gid: 3002 3002 3002 3002 -State: R (running) -VmLck: 0 kB -Cpus_allowed: ff -Cpus_allowed_list: 0-7 -Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 -Mems_allowed_list: 0 -[OK] end of /proc/<PID>/status is not NULL character -*** CT_003 PASSED - -./CT_004.sh -*** CT_004 start *************************** -[<ffffffff810e26e4>] futex_wait_queue_me+0xc4/0x120 -[<ffffffff810e3259>] futex_wait+0x179/0x280 -[<ffffffff810e52ee>] do_futex+0xfe/0x5b0 -[<ffffffff810e5820>] SyS_futex+0x80/0x180 -[<ffffffff81646c49>] system_call_fastpath+0x16/0x1b -[<ffffffffffffffff>] 0xffffffffffffffff -[OK] end of /proc/<PID>/stack is not NULL character -*** CT_004 PASSED - -./CT_005.sh -*** CT_005 start *************************** -00000000 prefer:0 file=anon_inode:[mckernel] -0060b000 prefer:0 file=anon_inode:[mckernel] -0060c000 prefer:0 file=anon_inode:[mckernel] mapped=3 mapmax=3 active=0 N0=3 kernelpagesize_kB=4 -2aaaaac21000 prefer:0 file=anon_inode:[mckernel] -2aaaaac22000 prefer:0 file=anon_inode:[mckernel] mapped=1 mapmax=3 active=0 N0=1 kernelpagesize_kB=4 -2aaaaac27000 prefer:0 file=anon_inode:[mckernel] -2aaaaac4d000 prefer:0 file=anon_inode:[mckernel] -2aaaaac4e000 prefer:0 file=anon_inode:[mckernel] mapped=1 mapmax=3 active=0 N0=1 kernelpagesize_kB=4 -2aaaab006000 prefer:0 file=anon_inode:[mckernel] -2aaaab00a000 prefer:0 file=anon_inode:[mckernel] dirty=1 mapmax=3 active=0 N0=1 kernelpagesize_kB=4 -2aaaab00c000 prefer:0 file=anon_inode:[mckernel] -2aaaab011000 prefer:0 file=anon_inode:[mckernel] -2aaaab013000 prefer:0 file=anon_inode:[mckernel] -2aaab153c000 prefer:0 file=anon_inode:[mckernel] dirty=3 mapped=512 mapmax=3 active=0 N0=512 kernelpagesize_kB=4 -555555554000 prefer:0 file=/home/satoken/mck_srcs/pposs/ppos_development/bin/mcexec mapped=10 N0=10 kernelpagesize_kB=4 -55555575f000 prefer:0 file=/home/satoken/mck_srcs/pposs/ppos_development/bin/mcexec anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -555555760000 prefer:0 file=/home/satoken/mck_srcs/pposs/ppos_development/bin/mcexec anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -555555761000 prefer:0 heap anon=11 dirty=11 N1=11 kernelpagesize_kB=4 -7ffff29c9000 prefer:0 -7ffff29ca000 prefer:0 stack:9416 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 -7ffff31ca000 prefer:0 -7ffff31cb000 prefer:0 stack:9415 anon=7 dirty=7 N0=6 N1=1 kernelpagesize_kB=4 -7ffff39cb000 prefer:0 -7ffff39cc000 prefer:0 stack:9414 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 -7ffff41cc000 prefer:0 -7ffff41cd000 prefer:0 stack:9413 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 -7ffff49cd000 prefer:0 -7ffff49ce000 prefer:0 stack:9412 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 -7ffff51ce000 prefer:0 -7ffff51cf000 prefer:0 stack:9411 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 -7ffff59cf000 prefer:0 -7ffff59d0000 prefer:0 stack:9410 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 -7ffff61d0000 prefer:0 -7ffff61d1000 prefer:0 stack:9409 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 -7ffff69d1000 prefer:0 -7ffff69d2000 prefer:0 stack:9408 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 -7ffff71d2000 prefer:0 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 mapped=3 mapmax=25 N0=3 kernelpagesize_kB=4 -7ffff71e7000 prefer:0 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 -7ffff73e6000 prefer:0 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff73e7000 prefer:0 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff73e8000 prefer:0 file=/usr/lib64/libc-2.17.so mapped=111 mapmax=52 N0=105 N1=6 kernelpagesize_kB=4 -7ffff75a0000 prefer:0 file=/usr/lib64/libc-2.17.so -7ffff77a0000 prefer:0 file=/usr/lib64/libc-2.17.so anon=4 dirty=4 N1=4 kernelpagesize_kB=4 -7ffff77a4000 prefer:0 file=/usr/lib64/libc-2.17.so anon=2 dirty=2 N1=2 kernelpagesize_kB=4 -7ffff77a6000 prefer:0 anon=3 dirty=3 N1=3 kernelpagesize_kB=4 -7ffff77ab000 prefer:0 file=/usr/lib64/libpthread-2.17.so mapped=17 mapmax=42 N0=17 kernelpagesize_kB=4 -7ffff77c2000 prefer:0 file=/usr/lib64/libpthread-2.17.so -7ffff79c1000 prefer:0 file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff79c2000 prefer:0 file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff79c3000 prefer:0 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff79c7000 prefer:0 file=/usr/lib64/libnuma.so.1 mapped=8 mapmax=3 N1=8 kernelpagesize_kB=4 -7ffff79d1000 prefer:0 file=/usr/lib64/libnuma.so.1 -7ffff7bd1000 prefer:0 file=/usr/lib64/libnuma.so.1 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7bd2000 prefer:0 file=/usr/lib64/libnuma.so.1 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7bd3000 prefer:0 file=/usr/lib64/librt-2.17.so mapped=3 mapmax=33 N0=3 kernelpagesize_kB=4 -7ffff7bda000 prefer:0 file=/usr/lib64/librt-2.17.so -7ffff7dd9000 prefer:0 file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7dda000 prefer:0 file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7ddb000 prefer:0 file=/usr/lib64/ld-2.17.so mapped=28 mapmax=50 N0=28 kernelpagesize_kB=4 -7ffff7fce000 prefer:0 anon=5 dirty=5 N1=5 kernelpagesize_kB=4 -7ffff7ff2000 prefer:0 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7ff3000 prefer:0 anon=6 dirty=6 N1=6 kernelpagesize_kB=4 -7ffff7ff9000 prefer:0 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7ffa000 prefer:0 -7ffff7ffc000 prefer:0 file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7ffd000 prefer:0 file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffff7ffe000 prefer:0 anon=1 dirty=1 N1=1 kernelpagesize_kB=4 -7ffffffdc000 prefer:0 stack anon=9 dirty=9 N1=9 kernelpagesize_kB=4 -[OK] end of /proc/<PID>/numa_maps is not NULL character -*** CT_005 PASSED diff --git a/test/issues/732/result.log b/test/issues/732/result.log new file mode 100644 index 00000000..8cff7ae0 --- /dev/null +++ b/test/issues/732/result.log @@ -0,0 +1,178 @@ +*** CT_001 start *************************** +000000400000-00000040b000 r-xs 0 0:0 0 +00000060b000-00000060c000 r--s 0 0:0 0 +00000060c000-00000060d000 rw-s 0 0:0 0 +000000800000-000000821000 rw-s 0 0:0 0 [heap] +2aaaaa9f8000-2aaaaaa00000 rw-s 0 0:0 0 +2aaaaaa00000-2aaaaaa21000 r-xs 0 0:0 0 +2aaaaac21000-2aaaaac22000 r--s 0 0:0 0 +2aaaaac22000-2aaaaac24000 rw-s 0 0:0 0 +2aaaaac24000-2aaaaac26000 r-xs 0 0:0 0 [vdso] +2aaaaac26000-2aaaaac27000 rw-p 0 0:0 0 +2aaaaac4d000-2aaaaac4e000 rw-p 0 0:0 0 +2aaaaac4e000-2aaaaae06000 r-xp 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaaae06000-2aaaab006000 ---p 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaab006000-2aaaab00a000 r--p 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaab00a000-2aaaab00c000 rw-p 0 0:0 0 /usr/lib64/libc-2.17.so +2aaaab00c000-2aaaab011000 rw-p 0 0:0 0 +2aaaab011000-2aaaab013000 rw-p 0 0:0 0 +2aaaab013000-2aaab153c000 r--p 0 0:0 0 /usr/lib/locale/locale-archive +2aaab153c000-2aaab158e000 rw-p 0 0:0 0 +547fff800000-548000000000 rw-s 0 0:0 0 [stack] +[OK] end of /proc/<PID>/maps is not NULL character +*** CT_001 PASSED + +*** CT_002 start *************************** +3144 (exe) S 2244 2243 15031 34816 2243 1073750272 917 0 0 0 0 0 0 0 20 0 31 0 34234557 92909005017088 851 18446744073709551615 93824992231424 93824992280252 140737488341744 140737488322800 140737342230359 0 0 0 2147155711 18446744073709551615 0 0 17 0 0 0 0 0 0 93824994381168 93824994383328 93824994394112 140737488342765 140737488342818 140737488342818 140737488351209 0 +[OK] end of /proc/<PID>/stat is not NULL character +*** CT_002 PASSED + +*** CT_003 start *************************** +Pid: 3178 +Uid: 3505 3505 3505 3505 +Gid: 3002 3002 3002 3002 +State: R (running) +VmLck: 0 kB +Cpus_allowed: fffffff +Cpus_allowed_list: 0-27 +Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000003 +Mems_allowed_list: 0-1 +[OK] end of /proc/<PID>/status is not NULL character +*** CT_003 PASSED + +*** CT_004 start *************************** +[<ffffffff810e26e4>] futex_wait_queue_me+0xc4/0x120 +[<ffffffff810e3259>] futex_wait+0x179/0x280 +[<ffffffff810e52ee>] do_futex+0xfe/0x5b0 +[<ffffffff810e5820>] SyS_futex+0x80/0x180 +[<ffffffff81646c49>] system_call_fastpath+0x16/0x1b +[<ffffffffffffffff>] 0xffffffffffffffff +[OK] end of /proc/<PID>/stack is not NULL character +*** CT_004 PASSED + +*** CT_005 start *************************** +00000000 prefer:1 file=anon_inode:[mckernel] +0060b000 prefer:1 file=anon_inode:[mckernel] +0060c000 prefer:1 file=anon_inode:[mckernel] mapped=3 mapmax=2 active=0 N0=3 kernelpagesize_kB=4 +2aaaaac21000 prefer:1 file=anon_inode:[mckernel] +2aaaaac22000 prefer:1 file=anon_inode:[mckernel] mapped=1 mapmax=2 active=0 N0=1 kernelpagesize_kB=4 +2aaaaac27000 prefer:1 file=anon_inode:[mckernel] +2aaaaac4d000 prefer:1 file=anon_inode:[mckernel] +2aaaaac4e000 prefer:1 file=anon_inode:[mckernel] mapped=1 mapmax=2 active=0 N0=1 kernelpagesize_kB=4 +2aaaab006000 prefer:1 file=anon_inode:[mckernel] +2aaaab00a000 prefer:1 file=anon_inode:[mckernel] dirty=1 mapmax=2 active=0 N0=1 kernelpagesize_kB=4 +2aaaab00c000 prefer:1 file=anon_inode:[mckernel] +2aaaab011000 prefer:1 file=anon_inode:[mckernel] +2aaaab013000 prefer:1 file=anon_inode:[mckernel] +2aaab153c000 prefer:1 file=anon_inode:[mckernel] dirty=2 mapped=512 mapmax=2 active=0 N0=512 kernelpagesize_kB=4 +555555554000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/bin/mcexec mapped=12 N1=12 kernelpagesize_kB=4 +555555760000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/bin/mcexec anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +555555761000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/bin/mcexec anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +555555762000 prefer:1 heap anon=15 dirty=15 N0=15 kernelpagesize_kB=4 +7fffe7a8b000 prefer:1 +7fffe7a8c000 prefer:1 stack:3277 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffe828c000 prefer:1 +7fffe828d000 prefer:1 stack:3276 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffe8a8d000 prefer:1 +7fffe8a8e000 prefer:1 stack:3275 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffe928e000 prefer:1 +7fffe928f000 prefer:1 stack:3274 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffe9a8f000 prefer:1 +7fffe9a90000 prefer:1 stack:3273 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffea290000 prefer:1 +7fffea291000 prefer:1 stack:3272 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffeaa91000 prefer:1 +7fffeaa92000 prefer:1 stack:3271 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffeb292000 prefer:1 +7fffeb293000 prefer:1 stack:3270 anon=6 dirty=6 N0=2 N1=4 kernelpagesize_kB=4 +7fffeba93000 prefer:1 +7fffeba94000 prefer:1 stack:3269 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffec294000 prefer:1 +7fffec295000 prefer:1 stack:3268 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffeca95000 prefer:1 +7fffeca96000 prefer:1 stack:3267 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffed296000 prefer:1 +7fffed297000 prefer:1 stack:3266 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffeda97000 prefer:1 +7fffeda98000 prefer:1 stack:3265 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffee298000 prefer:1 +7fffee299000 prefer:1 stack:3264 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffeea99000 prefer:1 +7fffeea9a000 prefer:1 stack:3263 anon=5 dirty=5 N0=2 N1=3 kernelpagesize_kB=4 +7fffef29a000 prefer:1 +7fffef29b000 prefer:1 stack:3262 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7fffefa9b000 prefer:1 +7fffefa9c000 prefer:1 stack:3261 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff029c000 prefer:1 +7ffff029d000 prefer:1 stack:3260 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff0a9d000 prefer:1 +7ffff0a9e000 prefer:1 stack:3259 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff129e000 prefer:1 +7ffff129f000 prefer:1 stack:3258 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff1a9f000 prefer:1 +7ffff1aa0000 prefer:1 stack:3257 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff22a0000 prefer:1 +7ffff22a1000 prefer:1 stack:3256 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff2aa1000 prefer:1 +7ffff2aa2000 prefer:1 stack:3255 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff32a2000 prefer:1 +7ffff32a3000 prefer:1 stack:3254 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff3aa3000 prefer:1 +7ffff3aa4000 prefer:1 stack:3253 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 +7ffff42a4000 prefer:1 +7ffff42a5000 prefer:1 stack:3252 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 +7ffff4aa5000 prefer:1 +7ffff4aa6000 prefer:1 stack:3251 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff52a6000 prefer:1 +7ffff52a7000 prefer:1 stack:3250 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 +7ffff5aa7000 prefer:1 +7ffff5aa8000 prefer:1 stack:3249 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 +7ffff62a8000 prefer:1 +7ffff62a9000 prefer:1 stack:3248 anon=3 dirty=3 N0=2 N1=1 kernelpagesize_kB=4 +7ffff6aa9000 prefer:1 file=/usr/lib64/libdl-2.17.so mapped=2 mapmax=9 N0=2 kernelpagesize_kB=4 +7ffff6aab000 prefer:1 file=/usr/lib64/libdl-2.17.so +7ffff6cab000 prefer:1 file=/usr/lib64/libdl-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff6cac000 prefer:1 file=/usr/lib64/libdl-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff6cad000 prefer:1 file=/usr/lib64/libz.so.1.2.7 mapped=3 mapmax=5 N0=3 kernelpagesize_kB=4 +7ffff6cc2000 prefer:1 file=/usr/lib64/libz.so.1.2.7 +7ffff6ec1000 prefer:1 file=/usr/lib64/libz.so.1.2.7 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff6ec2000 prefer:1 file=/usr/lib64/libz.so.1.2.7 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff6ec3000 prefer:1 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 mapped=3 mapmax=30 N0=3 kernelpagesize_kB=4 +7ffff6ed8000 prefer:1 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 +7ffff70d7000 prefer:1 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff70d8000 prefer:1 file=/usr/lib64/libgcc_s-4.8.5-20150702.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff70d9000 prefer:1 file=/usr/lib64/libc-2.17.so mapped=112 mapmax=60 N0=112 kernelpagesize_kB=4 +7ffff7291000 prefer:1 file=/usr/lib64/libc-2.17.so +7ffff7491000 prefer:1 file=/usr/lib64/libc-2.17.so anon=4 dirty=4 N0=4 kernelpagesize_kB=4 +7ffff7495000 prefer:1 file=/usr/lib64/libc-2.17.so anon=2 dirty=2 N0=2 kernelpagesize_kB=4 +7ffff7497000 prefer:1 anon=3 dirty=3 N0=3 kernelpagesize_kB=4 +7ffff749c000 prefer:1 file=/usr/lib64/libpthread-2.17.so mapped=17 mapmax=49 N0=17 kernelpagesize_kB=4 +7ffff74b3000 prefer:1 file=/usr/lib64/libpthread-2.17.so +7ffff76b2000 prefer:1 file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff76b3000 prefer:1 file=/usr/lib64/libpthread-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff76b4000 prefer:1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff76b8000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/lib/libihk.so mapped=50 N1=50 kernelpagesize_kB=4 +7ffff77ad000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/lib/libihk.so +7ffff79ac000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/lib/libihk.so anon=18 dirty=18 N0=18 kernelpagesize_kB=4 +7ffff79be000 prefer:1 file=/home/satoken/mck_srcs/pposs/ppos_gerrit/lib/libihk.so anon=5 dirty=5 N0=5 kernelpagesize_kB=4 +7ffff79c3000 prefer:1 +7ffff79c7000 prefer:1 file=/usr/lib64/libnuma.so.1 mapped=8 mapmax=3 N0=8 kernelpagesize_kB=4 +7ffff79d1000 prefer:1 file=/usr/lib64/libnuma.so.1 +7ffff7bd1000 prefer:1 file=/usr/lib64/libnuma.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7bd2000 prefer:1 file=/usr/lib64/libnuma.so.1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7bd3000 prefer:1 file=/usr/lib64/librt-2.17.so mapped=3 mapmax=38 N0=3 kernelpagesize_kB=4 +7ffff7bda000 prefer:1 file=/usr/lib64/librt-2.17.so +7ffff7dd9000 prefer:1 file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7dda000 prefer:1 file=/usr/lib64/librt-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7ddb000 prefer:1 file=/usr/lib64/ld-2.17.so mapped=28 mapmax=56 N0=28 kernelpagesize_kB=4 +7ffff7fcd000 prefer:1 anon=6 dirty=6 N0=6 kernelpagesize_kB=4 +7ffff7ff2000 prefer:1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7ff3000 prefer:1 anon=6 dirty=6 N0=6 kernelpagesize_kB=4 +7ffff7ff9000 prefer:1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7ffa000 prefer:1 +7ffff7ffc000 prefer:1 file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7ffd000 prefer:1 file=/usr/lib64/ld-2.17.so anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffff7ffe000 prefer:1 anon=1 dirty=1 N0=1 kernelpagesize_kB=4 +7ffffffdb000 prefer:1 stack anon=11 dirty=11 N0=11 kernelpagesize_kB=4 +[OK] end of /proc/<PID>/numa_maps is not NULL character +*** CT_005 PASSED diff --git a/test/issues/765/C765.c b/test/issues/765/C765.c new file mode 100644 index 00000000..102e8cfd --- /dev/null +++ b/test/issues/765/C765.c @@ -0,0 +1,113 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <errno.h> + +#define SMALL_PAGE_SIZE 4096L +#define PAGE_MASK (~(SMALL_PAGE_SIZE - 1)) +#define GET_PAGE_INFO 750 + +int +is_small_page(long pageinfo) +{ + return (pageinfo & PAGE_MASK) == SMALL_PAGE_SIZE; +} + +int +is_shared(long pageinfo) +{ + return pageinfo & 1; +} + +void +print_test(char *id, char *msg, void *p, int valid_small, int valid_shared) +{ + long pageinfo = syscall(GET_PAGE_INFO, p); + int ng = 0; + int small_page = is_small_page(pageinfo); + int shared = is_shared(pageinfo); + char buf[80]; + + if (pageinfo == -1 && errno == ENOSYS) { + fprintf(stderr, "get_page_info: unsupported\n"); + exit(1); + } + + sprintf(buf, "%s %s addr=%p %s %s ", id, msg, p, + small_page ? "SMALL" : "LARGE", shared ? "SHARED" : "PRIVATE"); + + if (valid_small != -1 && + small_page != valid_small) { + ng = 1; + } + if (shared != valid_shared) { + ng = 1; + } + printf("%s %s\n", buf, ng ? "NG" : "OK"); +} + +int +main(int argc, char **argv) +{ + void *p; + char x[10]; + key_t key; + int shmid; + struct shmid_ds buf; + + p = x; + memset(p, '\0', 10); + print_test("C765T01", "stack", p, -1, 0); + + p = malloc(10); + memset(p, '\0', 10); + print_test("C765T02", "heap", p, -1, 0); + + p = mmap(NULL, 8 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + memset(p, '\0', 8 * 1024); + print_test("C765T03", "private(8k)", p, 1, 0); + munmap(p, 8 * 1024); + + p = mmap(NULL, 2 * 1024 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + memset(p, '\0', 2 * 1024 * 1024); + print_test("C765T04", "private(2M)", p, 0, 0); + munmap(p, 2 * 1024 * 1024); + + p = mmap(NULL, 8 * 1024, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + memset(p, '\0', 8 * 1024); + print_test("C765T05", "shared(8k)", p, 1, 1); + munmap(p, 8 * 1024); + + p = mmap(NULL, 2 * 1024 * 1024, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + memset(p, '\0', 2 * 1024 * 1024); + print_test("C765T06", "shared(2M)", p, 1, 1); + munmap(p, 2 * 1024 * 1024); + + key = ftok(argv[0], 1); + shmid = shmget(key, 8 * 1024, IPC_CREAT | 0660); + p = shmat(shmid, NULL, 0); + memset(p, '\0', 8 * 1024); + print_test("C765T07", "shm(8k)", p, 1, 1); + shmctl(shmid, IPC_RMID, &buf); + shmdt(p); + + key = ftok(argv[0], 2); + shmid = shmget(key, 2 * 1024 * 1024, IPC_CREAT | 0660); + p = shmat(shmid, NULL, 0); + memset(p, '\0', 2 * 1024 * 1024); + print_test("C765T08", "shm(2M)", p, 1, 1); + shmctl(shmid, IPC_RMID, &buf); + shmdt(p); + + exit(0); +} diff --git a/test/issues/765/C765.patch b/test/issues/765/C765.patch new file mode 100644 index 00000000..f43b1f23 --- /dev/null +++ b/test/issues/765/C765.patch @@ -0,0 +1,128 @@ +diff --git arch/x86_64/kernel/include/syscall_list.h arch/x86_64/kernel/include/syscall_list.h +index 6de0ccc..912af92 100644 +--- arch/x86_64/kernel/include/syscall_list.h ++++ arch/x86_64/kernel/include/syscall_list.h +@@ -162,6 +162,7 @@ SYSCALL_HANDLED(730, util_migrate_inter_kernel) + SYSCALL_HANDLED(731, util_indicate_clone) + SYSCALL_HANDLED(732, get_system) + SYSCALL_HANDLED(733, util_register_desc) ++SYSCALL_HANDLED(750, get_mem_info) + + /* McKernel Specific */ + SYSCALL_HANDLED(801, swapout) +diff --git kernel/mem.c kernel/mem.c +index fa2e347..3a6adc2 100644 +--- kernel/mem.c ++++ kernel/mem.c +@@ -1604,6 +1604,9 @@ int page_unmap(struct page *page) + return 1; + } + */ ++ if(ihk_atomic_read(&page->count) < 0) { ++ kprintf("page_unmap: BAD count\n"); ++ } + + dkprintf("page_unmap(%p %x %d): 1\n", page, page->mode, page->count); + list_del(&page->hash); +@@ -2538,3 +2541,31 @@ int ihk_mc_get_mem_user_page(void *arg0, page_table_t pt, pte_t *ptep, void *pga + + return 0; + } ++ ++void dbg_page_count(int init) ++{ ++ ++ int i; ++ struct page *page_iter; ++ unsigned long irqflags; ++ int cnt = 0; ++ int bad = 0; ++ ++ for (i = 0; i < PHYS_PAGE_HASH_SIZE; i++) { ++ irqflags = ihk_mc_spinlock_lock(&page_hash_locks[i]); ++ list_for_each_entry(page_iter, &page_hash[i], hash) { ++ cnt++; ++ if (ihk_atomic_read(&page_iter->count) < 0) ++ bad++; ++ } ++ ihk_mc_spinlock_unlock(&page_hash_locks[i], irqflags); ++ } ++ ++ if (init || bad) { ++ if (!bad) ++ kprintf("struct page # = %d\n", cnt); ++ else ++ kprintf("struct page # = %d, bad # = %d\n", cnt, bad); ++ } ++} ++ +diff --git kernel/process.c kernel/process.c +index fad3e91..8de718f 100644 +--- kernel/process.c ++++ kernel/process.c +@@ -84,6 +84,7 @@ extern ihk_spinlock_t cpuid_head_lock; + int ptrace_detach(int pid, int data); + extern void procfs_create_thread(struct thread *); + extern void procfs_delete_thread(struct thread *); ++extern void dbg_page_count(int); + + struct list_head resource_set_list; + mcs_rwlock_lock_t resource_set_lock; +@@ -2773,6 +2774,8 @@ static void idle(void) + v->status = CPU_STATUS_IDLE; + cpu_enable_interrupt(); + ++ dbg_page_count(1); ++ + while (1) { + cpu_local_var(current)->status = PS_STOPPED; + schedule(); +@@ -3287,6 +3290,8 @@ void schedule(void) + + if ((last != NULL) && (last->status == PS_EXITED)) { + release_thread(last); ++ ++ dbg_page_count(0); + } + + /* Have we migrated to another core meanwhile? */ +diff --git kernel/syscall.c kernel/syscall.c +index 466ca89..5aa3449 100644 +--- kernel/syscall.c ++++ kernel/syscall.c +@@ -9287,6 +9287,35 @@ SYSCALL_DECLARE(util_register_desc) + return 0; + } + ++SYSCALL_DECLARE(get_mem_info) ++{ ++ unsigned long addr = ihk_mc_syscall_arg0(ctx); ++ struct thread *thread = cpu_local_var(current); ++ struct vm_range *range = lookup_process_memory_range(thread->vm, ++ addr, addr + 1); ++ struct process_vm *vm = thread->vm; ++ pte_t *ptep; ++ void *pgaddr; ++ size_t pgsize; ++ int p2align; ++ struct page *page = NULL; ++ ++ if (!range) ++ return -EINVAL; ++ ++ ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); ++ ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table, ++ (void *)addr, range->pgshift, &pgaddr, &pgsize, &p2align); ++ if (ptep && !pte_is_null(ptep) && !pte_is_fileoff(ptep, pgsize)) { ++ unsigned long phys; ++ phys = pte_get_phys(ptep); ++ page = phys_to_page(phys); ++ } ++ ++ ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); ++ return pgsize | (page ? 1 : 0); ++} ++ + void + reset_cputime() + { diff --git a/test/issues/765/C765.sh b/test/issues/765/C765.sh new file mode 100755 index 00000000..9915c368 --- /dev/null +++ b/test/issues/765/C765.sh @@ -0,0 +1,17 @@ +#!/bin/sh +USELTP=0 +USEOSTEST=1 + +BOOTPARAM="-c 1-7 -m 4G@0" +. ../../common.sh + +$MCEXEC ./C765 + +$MCEXEC $TESTMCK -s mem_limits -n 0 -- -f mmap -s 7340032 -c 1 + +if $SBINDIR/ihkosctl 0 kmsg | grep -i bad > /dev/null 2>&1; then + $SBINDIR/ihkosctl 0 kmsg + echo C765T09 NG +else + echo C765T09 OK +fi diff --git a/test/issues/765/C765.txt b/test/issues/765/C765.txt new file mode 100644 index 00000000..5a5e1533 --- /dev/null +++ b/test/issues/765/C765.txt @@ -0,0 +1,22 @@ +Script started on Wed Aug 1 14:18:26 2018 +bash-4.2$ make test +gcc -o C765 C765.c -Wall -g +sh ./C765.sh +C765T01 stack addr=0x547ffffffc40 LARGE PRIVATE OK +C765T02 heap addr=0x802f30 SMALL PRIVATE OK +C765T03 private(8k) addr=0x2aaaac739000 SMALL PRIVATE OK +C765T04 private(2M) addr=0x2aaaac800000 LARGE PRIVATE OK +C765T05 shared(8k) addr=0x2aaaaca00000 SMALL SHARED OK +C765T06 shared(2M) addr=0x2aaaaca02000 SMALL SHARED OK +C765T07 shm(8k) addr=0x2aaaacc02000 SMALL SHARED OK +C765T08 shm(2M) addr=0x2aaaacc04000 SMALL SHARED OK +TEST_SUITE: mem_limits +TEST_NUMBER: 0 +ARGS: -f mmap -s 7340032 -c 1 +alloc#0: p=0x2aaaac800000 +RESULT: ok +C765T09 OK +bash-4.2$ exit +exit + +Script done on Wed Aug 1 14:18:40 2018 diff --git a/test/issues/765/Makefile b/test/issues/765/Makefile new file mode 100644 index 00000000..bff4e4be --- /dev/null +++ b/test/issues/765/Makefile @@ -0,0 +1,13 @@ +CC=gcc +TARGET=C765 + +all:: $(TARGET) + +C765: C765.c + $(CC) -o C765 C765.c -Wall -g + +test:: $(TARGET) + sh ./C765.sh + +clean:: + rm -f *.o $(TARGET) diff --git a/test/issues/765/README b/test/issues/765/README new file mode 100644 index 00000000..33a7a203 --- /dev/null +++ b/test/issues/765/README @@ -0,0 +1,49 @@ +【Issue#765 動作確認】 +□ テスト内容 +1. 共有可能なページがラージページに割り当てられないことの確認 +Issue#765の問題は、共有可能なラージページをスモールページに分割したとき、 +共有情報が不正になることであった。 +しかし、McKernel は共有対象の領域をラージページに割り当てないため、共有 +可能なページが分割されることはあり得ず、Issueが顕在化することは無い。 +このことをテストプログラムを用いて確認する。 + +C765T01 スタック領域がラージページに割り当てられ、共有不能なことを確認する。 +C765T02 ヒープ領域がスモールページに割り当てられ、共有不能なことを確認する。 +C765T03 mmapで8kBのMAP_PRIVATE領域がスモールページに割り当てられ、共有不能な + ことを確認する。 +C765T04 mmapで2MBのMAP_PRIVATE領域がラージページに割り当てられ、共有不能な + ことを確認する。 +C765T05 mmapで8kBのMAP_SHARED領域がスモールページに割り当てられ、共有可能な + ことを確認する。 +C765T06 mmapで2MBのMAP_SHARED領域がスモールページに割り当てられ、共有可能な + ことを確認する。 +C765T07 shmatで8kBの領域がスモールページに割り当てられ、共有可能なことを + 確認する。 +C765T08 shmatで2MBの領域がスモールページに割り当てられ、共有可能なことを + 確認する。 + +2. 指摘プログラムで現象が発生しないことの確認 +ostestのmem_limitsを実行し、現象が発生しないことを確認する。 + +C765T09 struct pageの参照カウンタが負にならないことを確認する。 + +□ パッチ適用 +動作確認のため、McKernelにパッチを適用する。 +$ cd /path/to/mckernel +$ patch -p0 < test/issue/765/C765.patch +$ <configure, build, install> + +C765.patch 指定されたメモリのページサイズと共有状況を調べるシステムコールを + 追加するパッチ。 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C765.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/771+1179+1143/C771.sh b/test/issues/771+1179+1143/C771.sh new file mode 100755 index 00000000..95c17b3b --- /dev/null +++ b/test/issues/771+1179+1143/C771.sh @@ -0,0 +1,117 @@ +#!/bin/sh +USELTP=1 +USEOSTEST=1 +GDBBUILDDIR="$HOME/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu" + +. ../../common.sh + +#=============================================================================== +expect -c " +set timeout 60 +spawn "$MCEXEC" gdb ./C771T001 +expect \"(gdb)\" +send -- \"b thr\n\" + +expect \"(gdb)\" +send -- \"r\n\" + +expect \"(gdb)\" +send -- \"info threads\n\" + +expect \"(gdb)\" +send -- \"bt\n\" + +expect \"(gdb)\" +send -- \"n\n\" + +expect \"(gdb)\" +send -- \"thread 1\n\" + +expect \"(gdb)\" +send -- \"bt\n\" + +expect \"(gdb)\" +send -- \"thread 2\n\" + +expect \"(gdb)\" +send -- \"c\n\" + +expect \"(gdb)\" +send -- \"q\n\" + +expect eof +" | tee C771T001.txt + +echo checking result... +awk -f C771T001.awk C771T001.txt +rm -f C771T001.txt + +sleep 5 +"$SBIN"/ihkosctl 0 clear_kmsg +"$SBIN"/ihkosctl 0 ioctl 40000000 1 +"$SBIN"/ihkosctl 0 ioctl 40000000 2 +"$SBIN"/ihkosctl 0 kmsg | tee C771T012.txt +if grep ' 0 processes are found' C771T012.txt > /dev/null 2>&1 && \ + grep ' 0 threads are found' C771T012.txt > /dev/null 2>&1; then + echo "*** C771T012 no processes and threads found OK" +else + echo "*** C771T012 processes and threads are exists NG" +fi +rm -f C771T012.txt + +#=============================================================================== +if [ -x "$GDBBUILDDIR/gdb/testsuite/gdb.threads/bp_in_thread" ] ;then + if [ -d gdb-result ]; then + rm -rf gdb-result + fi + mkdir -p gdb-result/raw/linux gdb-result/raw/mck + mkdir -p gdb-result/linux gdb-result/mck + export gdb_builddir="$GDBBUILDDIR" + export MCEXEC + + id=13 + while read line; do + cat=`echo $line | awk '{print $1}'` + exp=`echo $line | awk '{print $2}'` + ./gdb_test.sh $cat $exp 2>&1 | tee $cat-$exp.txt + if grep "【PASS】" $cat-$exp.txt > /dev/null 2>&1; then + echo "*** C771T0$id: $cat-$exp OK" + else + echo "*** C771T0$id: $cat-$exp NG" + fi + rm -f $cat-$exp.txt + id=`expr $id + 1` + done < gdblist +else + echo '***' No GDB build dir. skip GDB tests >&2 +fi + +#=============================================================================== +$MCEXEC ./C771T033 + +#=============================================================================== +$MCEXEC "$TESTMCK" -s ptrace -n 19 | tee C771T036.txt +if grep "RESULT: ok" C771T036.txt > /dev/null 2>&1; then + echo "*** C771T036: ostest-ptrace-19 OK" +else + echo "*** C771T036: ostest-ptrace-19 NG" +fi +rm -f C771T036.txt + +#=============================================================================== +$MCEXEC ./C771T037 + +#=============================================================================== +id=43 +while read tp; do + sudo $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt + ok=`grep TPASS $tp.txt | wc -l` + ng=`grep TFAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** C771T0$id: $tp OK ($ok)" + else + echo "*** C771T0$id: $tp NG (ok=$ok ng=$ng)" + fi + rm -f $tp.txt + id=`expr $id + 1` +done < ltplist diff --git a/test/issues/771+1179+1143/C771.txt b/test/issues/771+1179+1143/C771.txt new file mode 100644 index 00000000..16a32d99 --- /dev/null +++ b/test/issues/771+1179+1143/C771.txt @@ -0,0 +1,627 @@ +Script started on Mon Sep 24 08:49:43 2018 +bash-4.2$ make test +gcc -g -Wall -o C771T001 C771T001.c -pthread +gcc -g -Wall -o C771T033 C771T033.c -pthread +gcc -g -Wall -o C771T037 C771T037.c -pthread +mcstop+release.sh ... done +mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 ... done +spawn /home/shirasawa/wallaby11-smp-x86/issue771/mic/bin/mcexec gdb ./C771T001 +GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-94.el7 +Copyright (C) 2013 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. Type "show copying" +and "show warranty" for details. +This GDB was configured as "x86_64-redhat-linux-gnu". +For bug reporting instructions, please see: +<http://www.gnu.org/software/gdb/bugs/>... +Reading symbols from /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/C771T001...done. +(gdb) b thr +Breakpoint 1 at 0x400989: file C771T001.c, line 20. +(gdb) r +Starting program: /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/./C771T001 +[Thread debugging using libthread_db enabled] +Using host libthread_db library "/lib64/libthread_db.so.1". +process start pid=5721 +[New Thread 0x2aaaad000700 (LWP 5750)] +pthread_create: 0 +[Switching to Thread 0x2aaaad000700 (LWP 5750)] + +Breakpoint 1, thr (arg=0x0) at C771T001.c:20 +20 fprintf(stderr, "thread start tid=%d\n", (int)syscall(SYS_gettid)); +Missing separate debuginfos, use: debuginfo-install glibc-2.17-222.el7.x86_64 libgcc-4.8.5-28.el7_5.1.x86_64 libgfortran-4.8.5-28.el7_5.1.x86_64 libquadmath-4.8.5-28.el7_5.1.x86_64 mpich-3.2-3.2-2.el7.x86_64 +(gdb) info threads + Id Target Id Frame +* 2 Thread 0x2aaaad000700 (LWP 5750) "exe" thr (arg=0x0) at C771T001.c:20 + 1 Thread 0x2aaaac735e80 (LWP 5721) "exe" 0x0000000000400b61 in main ( + argc=1, argv=0x547ffffffd08) at C771T001.c:52 +(gdb) bt +#0 thr (arg=0x0) at C771T001.c:20 +#1 0x00002aaaaae49e25 in start_thread () from /lib64/libpthread.so.0 +#2 0x00002aaaab15cbad in clone () from /lib64/libc.so.6 +(gdb) n +thread start tid=5750 +21 fflush(stderr); +(gdb) thread 1 +[Switching to thread 1 (Thread 0x2aaaac735e80 (LWP 5721))] +#0 0x0000000000400b61 in main (argc=1, argv=0x547ffffffd08) at C771T001.c:52 +52 while (!m); +(gdb) bt +#0 0x0000000000400b61 in main (argc=1, argv=0x547ffffffd08) at C771T001.c:52 +(gdb) thread 2 +[Switching to thread 2 (Thread 0x2aaaad000700 (LWP 5750))] +#0 thr (arg=0x0) at C771T001.c:21 +21 fflush(stderr); +(gdb) c +Continuing. +mmap m=0x2aaaad001000 errno=0 +update m=0x2aaaad001000 +update *m=1 +munmap rc=0, errno=0 +main done +[Thread 0x2aaaad000700 (LWP 5750) exited] +[Inferior 1 (process 5721) exited normally] +(gdb) q +checking result... +*** C771T001 gdb start OK +*** C771T002 breakpoint command OK +*** C771T003 run command OK +*** C771T004 info threads command OK +*** C771T005 backtrace command OK +*** C771T006 next command OK +*** C771T007 thread command OK +*** C771T008 thread command OK +*** C771T009 thread command OK +*** C771T010 continue command OK +*** C771T011 quit command OK +OK=11 NG=0 +[ 0]: 0 processes are found. +[ 0]: 0 threads are found. + +*** C771T012 no processes and threads found OK +======== a2-run ======== +【SAME】a2-run: Summary. +【SAME】a2-run : Log. +【PASS】a2-run +*** C771T013: base-a2-run OK +======== foll-fork ======== +【SAME】foll-fork: Summary. +【SAME】foll-fork : Log. +【PASS】foll-fork +*** C771T014: base-foll-fork OK +======== fork-detach ======== +【SAME】fork-detach: Summary. +【SAME】fork-detach : Log. +【PASS】fork-detach +*** C771T015: base-fork-detach OK +======== atomic-seq-threaded ======== +【DIFF】atomic-seq-threaded : Summary Difference --- +--- /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/gdb-result/linux/atomic-seq-threaded.sum 2018-09-24 08:52:39.889889082 +0900 ++++ /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/gdb-result/mck/atomic-seq-threaded.sum 2018-09-24 08:52:39.899889116 +0900 +@@ -11,5 +11,5 @@ + === gdb Summary === + + # of unsupported tests 2 +-/home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../../gdb/gdb version 7.6.1-110.el7.centos -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory ++/home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/mcexec_gdb.sh version 7.6.1-94.el7 -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory + +【SAME】atomic-seq-threaded : Log. +【PASS】atomic-seq-threaded +*** C771T016: threads-atomic-seq-threaded OK +======== bp_in_thread ======== +【SAME】bp_in_thread: Summary. +【SAME】bp_in_thread : Log. +【PASS】bp_in_thread +*** C771T017: threads-bp_in_thread OK +======== bt-clone-stop ======== +【SAME】bt-clone-stop: Summary. +【SAME】bt-clone-stop : Log. +【PASS】bt-clone-stop +*** C771T018: threads-bt-clone-stop OK +======== corethreads ======== +【SAME】corethreads: Summary. +【SAME】corethreads : Log. +【PASS】corethreads +*** C771T019: threads-corethreads OK +======== dlopen-libpthread ======== +【SAME】dlopen-libpthread: Summary. +【SAME】dlopen-libpthread : Log. +【PASS】dlopen-libpthread +*** C771T020: threads-dlopen-libpthread OK +======== fork-child-threads ======== +【SAME】fork-child-threads: Summary. +【SAME】fork-child-threads : Log. +【PASS】fork-child-threads +*** C771T021: threads-fork-child-threads OK +======== killed ======== +【SAME】killed: Summary. +【SAME】killed : Log. +【PASS】killed +*** C771T022: threads-killed OK +======== pthread_cond_wait ======== +【SAME】pthread_cond_wait: Summary. +【SAME】pthread_cond_wait : Log. +【PASS】pthread_cond_wait +*** C771T023: threads-pthread_cond_wait OK +======== switch-threads ======== +【SAME】switch-threads: Summary. +【SAME】switch-threads : Log. +【PASS】switch-threads +*** C771T024: threads-switch-threads OK +======== thread-specific ======== +【SAME】thread-specific: Summary. +【SAME】thread-specific : Log. +【PASS】thread-specific +*** C771T025: threads-thread-specific OK +======== thread_check ======== +【SAME】thread_check: Summary. +【DIFF】thread_check : Log Difference --- +--- /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/gdb-result/linux/thread_check.log 2018-09-24 09:00:11.257427754 +0900 ++++ /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/gdb-result/mck/thread_check.log 2018-09-24 09:00:11.270427798 +0900 +@@ -13,4 +13,4 @@ + (gdb) PASS: gdb.threads/thread_check.exp: continue to tf + (gdb) PASS: gdb.threads/thread_check.exp: backtrace from thread function + (gdb) info breakpoints +-(gdb) Quitting /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../../gdb/gdb -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory ++(gdb) Quitting mcexec_gdb.sh -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory +【PASS】thread_check +*** C771T026: threads-thread_check OK +======== thread_events ======== +【SAME】thread_events: Summary. +【DIFF】thread_events : Log Difference --- +--- /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/gdb-result/linux/thread_events.log 2018-09-24 09:01:01.132598675 +0900 ++++ /home/shirasawa/wallaby11-smp-x86/issue771/mckernel/test/issues/771+1179+1143/gdb-result/mck/thread_events.log 2018-09-24 09:01:01.141598706 +0900 +@@ -15,7 +15,7 @@ + (gdb) PASS: gdb.threads/thread_events.exp: continue to threadfunc with messages disabled + (gdb) PASS: gdb.threads/thread_events.exp: continue to after_join_func with messages disabled + (gdb) info breakpoints +-(gdb) Quitting /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../../gdb/gdb -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory ++(gdb) Quitting mcexec_gdb.sh -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory + (gdb) GDB initialized. + (gdb) set width 0 + (gdb) set build-id-verbose 0 +@@ -33,4 +33,4 @@ + (gdb) PASS: gdb.threads/thread_events.exp: continue to threadfunc with messages enabled + (gdb) PASS: gdb.threads/thread_events.exp: continue to after_join_func with messages enabled + (gdb) info breakpoints +-(gdb) Quitting /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../../gdb/gdb -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory ++(gdb) Quitting mcexec_gdb.sh -nw -nx -data-directory /home/shirasawa/rpmbuild/BUILD/gdb-7.6.1/build-x86_64-redhat-linux-gnu/gdb/testsuite/../data-directory +【PASS】thread_events +*** C771T027: threads-thread_events OK +======== threaded-exec ======== +【SAME】threaded-exec: Summary. +【SAME】threaded-exec : Log. +【PASS】threaded-exec +*** C771T028: threads-threaded-exec OK +======== threxit-hop-specific ======== +【SAME】threxit-hop-specific: Summary. +【SAME】threxit-hop-specific : Log. +【PASS】threxit-hop-specific +*** C771T029: threads-threxit-hop-specific OK +======== tls-nodebug ======== +【SAME】tls-nodebug: Summary. +【SAME】tls-nodebug : Log. +【PASS】tls-nodebug +*** C771T030: threads-tls-nodebug OK +======== tls-sepdebug ======== +【SAME】tls-sepdebug: Summary. +【SAME】tls-sepdebug : Log. +【PASS】tls-sepdebug +*** C771T031: threads-tls-sepdebug OK +======== tls-var ======== +【SAME】tls-var: Summary. +【SAME】tls-var : Log. +【PASS】tls-var +*** C771T032: threads-tls-var OK +tid=23026 +*** C771T033 *** ATTACH OK +*** C771T034 *** SYSCALL OK +syscall enter n=9 +*** C771T034 *** SYSCALL OK +syscall return n=9 r=46912535269376 +*** C771T035 DETACH OK +mmap m=0x2aaaad001000 errno=0 +update m=0x2aaaad001000 +update *m=1 +munmap rc=0, errno=0 +main done +TEST_SUITE: ptrace +TEST_NUMBER: 19 +ARGS: +TEST_SUITE: ptrace +TEST_NUMBER: 19 +ARGS: +child is stopped. +RESULT: ok +*** C771T036: ostest-ptrace-19 OK +pid=23121 +tid=23151 +*** C771T037 wait4(pid) OK +*** C771T038 wait4(tid) OK +*** C771T039 wait4(pid, __WCLONE) OK +*** C771T040 wait4(tid, __WCLONE) OK +*** C771T041 wait4(pid, __WALL) OK +*** C771T042 wait4(tid, __WALL) OK +clone01 1 TPASS : clone returned 23215 +*** C771T043: clone01 OK (1) +clone03 1 TPASS : Test passed +*** C771T044: clone03 OK (1) +clone04 1 TPASS : expected failure; Got EINVAL +*** C771T045: clone04 OK (1) +clone06 1 TPASS : Test Passed +*** C771T046: clone06 OK (1) +clone07 1 TPASS : Use of return() in child did not cause SIGSEGV +*** C771T047: clone07 OK (1) +exit01 1 TPASS : exit() test PASSED +*** C771T048: exit01 OK (1) +exit02 1 TPASS : exit() test PASSED +*** C771T049: exit02 OK (1) +exit_group01 1 TPASS : exit_group() succeeded +*** C771T050: exit_group01 OK (1) +fork01 1 TPASS : fork() returned 23754 +fork01 2 TPASS : child pid and fork() return agree: 23754 +*** C771T051: fork01 OK (2) +fork02 0 TINFO : Inside parent +fork02 0 TINFO : Inside child +fork02 0 TINFO : exit status of wait 0 +fork02 1 TPASS : test 1 PASSED +*** C771T052: fork02 OK (1) +fork03 0 TINFO : process id in parent of child from fork : 23896 +fork03 1 TPASS : test 1 PASSED +*** C771T053: fork03 OK (1) +fork04 1 TPASS : Env var TERM unchanged after fork(): xterm +fork04 2 TPASS : Env var NoTSetzWq unchanged after fork(): getenv() does not find variable set +fork04 3 TPASS : Env var TESTPROG unchanged after fork(): FRKTCS04 +*** C771T054: fork04 OK (3) +fork07 0 TINFO : Forking 100 children +fork07 0 TINFO : Forked all 100 children, now collecting +fork07 0 TINFO : Collected all 100 children +fork07 1 TPASS : 100/100 children read correctly from an inheritted fd +*** C771T055: fork07 OK (1) +fork08 0 TINFO : parent forksval: 1 +fork08 0 TINFO : parent forksval: 2 +fork08 0 TINFO : second child got char: b +fork08 1 TPASS : Test passed in childnumber 2 +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 1 TPASS : parent test PASSED +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 2 TPASS : parent test PASSED +fork08 0 TINFO : exit status of wait expected 0 got 0 +fork08 3 TPASS : parent test PASSED +fork08 0 TINFO : Number of processes forked is 2 +*** C771T056: fork08 OK (4) +fork09 0 TINFO : OPEN_MAX is 1024 +fork09 0 TINFO : first file descriptor is 12 +fork09 0 TINFO : Parent reporting 1023 files open +fork09 0 TINFO : Child opened new file #1023 +fork09 1 TPASS : test 1 PASSED +*** C771T057: fork09 OK (1) +fork10 0 TINFO : fork child A +fork10 1 TPASS : test 1 PASSED +*** C771T058: fork10 OK (1) +fork11 1 TPASS : fork test passed, 100 processes +*** C771T059: fork11 OK (1) +kill01 1 TPASS : received expected signal 9 +*** C771T060: kill01 OK (1) +kill02 1 TPASS : The signal was sent to all processes in the process group. +kill02 2 TPASS : The signal was not sent to selective processes that were not in the process group. +*** C771T061: kill02 OK (2) +kill03 1 TPASS : errno set to 22 : Invalid argument, as expected +*** C771T062: kill03 OK (1) +kill04 1 TPASS : errno set to 3 : No such process, as expected +*** C771T063: kill04 OK (1) +kill failed with EPERM +kill05 1 TPASS : received expected errno(EPERM) +*** C771T064: kill05 OK (1) +kill06 1 TPASS : received expected signal 9 +*** C771T065: kill06 OK (1) +kill07 0 TINFO : received expected signal 9 +kill07 1 TPASS : Did not catch signal as expected +*** C771T066: kill07 OK (1) +kill08 1 TPASS : received expected signal 9 +*** C771T067: kill08 OK (1) +kill09 1 TPASS : kill(31358, SIGKILL) returned 0 +*** C771T068: kill09 OK (1) +kill10 1 TPASS : All 2 pgrps received their signals +31429: All 10 children reported in +31460: All 10 children reported in +*** C771T069: kill10 OK (1) +kill11 1 TPASS : signal SIGHUP +kill11 2 TPASS : signal SIGINT +kill11 3 TPASS : signal SIGQUIT dumped core +kill11 4 TPASS : signal SIGILL dumped core +kill11 5 TPASS : signal SIGTRAP dumped core +kill11 6 TPASS : signal SIGIOT/SIGABRT dumped core +kill11 7 TPASS : signal SIGIOT/SIGABRT dumped core +kill11 8 TPASS : signal SIGBUS dumped core +kill11 9 TPASS : signal SIGFPE dumped core +kill11 10 TPASS : signal SIGKILL +kill11 11 TPASS : signal SIGUSR1 +kill11 12 TPASS : signal SIGSEGV dumped core +kill11 13 TPASS : signal SIGUSR2 +kill11 14 TPASS : signal SIGPIPE +kill11 15 TPASS : signal SIGALRM +kill11 16 TPASS : signal SIGTERM +kill11 17 TPASS : signal SIGXCPU dumped core +kill11 18 TPASS : signal SIGXFSZ dumped core +kill11 19 TPASS : signal SIGVTALRM +kill11 20 TPASS : signal SIGPROF +kill11 21 TPASS : signal SIGIO/SIGPOLL +kill11 22 TPASS : signal SIGPWR +kill11 23 TPASS : signal SIGSYS/SIGUNUSED dumped core +*** C771T070: kill11 OK (23) +kill12 1 TPASS : Test passed +*** C771T071: kill12 OK (1) +ptrace01 1 TPASS : Test Passed +ptrace01 2 TPASS : Test Passed +*** C771T072: ptrace01 OK (2) +ptrace02 1 TPASS : Test Passed +ptrace02 2 TPASS : Test Passed +*** C771T073: ptrace02 OK (2) +ptrace03 1 TCONF : ptrace03.c:137: this kernel allows to trace init +ptrace03 2 TPASS : Test Passed +ptrace03 3 TPASS : Test Passed +*** C771T074: ptrace03 OK (2) +ptrace05 0 TINFO : [child] Sending kill(.., 0) +ptrace05 1 TPASS : kill(.., 0) exited with 0, as expected. +ptrace05 0 TINFO : [child] Sending kill(.., 1) +ptrace05 2 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 2) +ptrace05 3 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 3) +ptrace05 4 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 4) +ptrace05 5 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 5) +ptrace05 6 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 6) +ptrace05 7 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 7) +ptrace05 8 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 8) +ptrace05 9 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 9) +ptrace05 10 TPASS : Killed with SIGKILL, as expected. +ptrace05 0 TINFO : [child] Sending kill(.., 10) +ptrace05 11 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 11) +ptrace05 12 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 12) +ptrace05 13 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 13) +ptrace05 14 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 14) +ptrace05 15 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 15) +ptrace05 16 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 16) +ptrace05 17 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 17) +ptrace05 18 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 18) +ptrace05 19 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 19) +ptrace05 20 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 20) +ptrace05 21 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 21) +ptrace05 22 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 22) +ptrace05 23 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 23) +ptrace05 24 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 24) +ptrace05 25 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 25) +ptrace05 26 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 26) +ptrace05 27 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 27) +ptrace05 28 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 28) +ptrace05 29 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 29) +ptrace05 30 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 30) +ptrace05 31 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 31) +ptrace05 32 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 32) +ptrace05 33 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 33) +ptrace05 34 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 34) +ptrace05 35 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 35) +ptrace05 36 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 36) +ptrace05 37 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 37) +ptrace05 38 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 38) +ptrace05 39 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 39) +ptrace05 40 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 40) +ptrace05 41 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 41) +ptrace05 42 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 42) +ptrace05 43 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 43) +ptrace05 44 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 44) +ptrace05 45 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 45) +ptrace05 46 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 46) +ptrace05 47 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 47) +ptrace05 48 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 48) +ptrace05 49 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 49) +ptrace05 50 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 50) +ptrace05 51 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 51) +ptrace05 52 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 52) +ptrace05 53 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 53) +ptrace05 54 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 54) +ptrace05 55 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 55) +ptrace05 56 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 56) +ptrace05 57 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 57) +ptrace05 58 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 58) +ptrace05 59 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 59) +ptrace05 60 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 60) +ptrace05 61 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 61) +ptrace05 62 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 62) +ptrace05 63 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 63) +ptrace05 64 TPASS : Stopped as expected +ptrace05 0 TINFO : [child] Sending kill(.., 64) +ptrace05 65 TPASS : Stopped as expected +*** C771T075: ptrace05 OK (65) +wait02 1 TPASS : wait(&status) returned 3201 +*** C771T076: wait02 OK (1) +wait401 1 TPASS : Received child pid as expected. +wait401 2 TPASS : wait401 call succeeded +*** C771T077: wait401 OK (2) +wait402 1 TPASS : received expected failure - errno = 10 - No child processes +*** C771T078: wait402 OK (1) +waitid01 0 TINFO : Process 3384 terminated: +waitid01 0 TINFO : code = 1 +waitid01 0 TINFO : exit value = 31488 +waitid01 0 TINFO : Process 3414 terminated: +waitid01 0 TINFO : code = 1 +waitid01 0 TINFO : exit value = 0 +waitid01 0 TINFO : Process 3444 terminated: +waitid01 0 TINFO : code = 2 +waitid01 0 TINFO : signal = 1 +waitid01 0 TINFO : Process 3474 terminated: +waitid01 0 TINFO : code = 1 +waitid01 0 TINFO : exit value = 31488 +waitid01 0 TINFO : Process 3504 terminated: +waitid01 0 TINFO : code = 1 +waitid01 0 TINFO : exit value = 0 +waitid01 0 TINFO : Process 3534 terminated: +waitid01 0 TINFO : code = 2 +waitid01 0 TINFO : signal = 1 +waitid01 0 TINFO : Process 3564 terminated: +waitid01 0 TINFO : code = 1 +waitid01 0 TINFO : exit value = 31488 +waitid01 0 TINFO : Process 3594 terminated: +waitid01 0 TINFO : code = 1 +waitid01 0 TINFO : exit value = 0 +waitid01 0 TINFO : Process 3624 terminated: +waitid01 0 TINFO : code = 2 +waitid01 0 TINFO : signal = 1 +waitid01 1 TPASS : waitid(): system call passed +*** C771T079: waitid01 OK (1) +waitid02 0 TINFO : WNOHANG +waitid02 0 TINFO : (3664) waitid(0, 0, 0x547ffffffcf0, 1) +waitid02 1 TPASS : exp_errno=22 +waitid02 0 TINFO : si_pid = 0 ; si_code = 0 ; si_status = 0 +waitid02 0 TINFO : WNOHANG | WEXITED no child +waitid02 0 TINFO : (3664) waitid(0, 0, 0x547ffffffcf0, 5) +waitid02 2 TPASS : exp_errno=10 +waitid02 0 TINFO : si_pid = 0 ; si_code = 0 ; si_status = 0 +waitid02 0 TINFO : WNOHANG | WEXITED with child +waitid02 0 TINFO : (3664) waitid(0, 0, 0x547ffffffcf0, 5) +waitid02 3 TPASS : ret: 0 +waitid02 0 TINFO : si_pid = 0 ; si_code = 0 ; si_status = 0 +waitid02 0 TINFO : P_PGID, WEXITED wait for child +waitid02 0 TINFO : (3664) waitid(2, 5365, 0x547ffffffcf0, 4) +waitid02 4 TPASS : ret: 0 +waitid02 0 TINFO : si_pid = 3725 ; si_code = 1 ; si_status = 0 +waitid02 0 TINFO : P_PID, WEXITED wait for child +waitid02 0 TINFO : (3664) waitid(1, 3755, 0x547ffffffcf0, 4) +waitid02 5 TPASS : ret: 0 +waitid02 0 TINFO : si_pid = 3755 ; si_code = 1 ; si_status = 0 +waitid02 0 TINFO : P_PID, WSTOPPED | WNOWAIT +waitid02 0 TINFO : (3664) waitid(1, 3785, 0x547ffffffcf0, 16777218) +waitid02 6 TPASS : ret: 0 +waitid02 0 TINFO : si_pid = 3785 ; si_code = 5 ; si_status = 4991 +waitid02 0 TINFO : P_PID, WCONTINUED +waitid02 0 TINFO : (3664) waitid(1, 3815, 0x547ffffffcf0, 8) +waitid02 7 TPASS : ret: 0 +waitid02 0 TINFO : si_pid = 3815 ; si_code = 6 ; si_status = 65535 +waitid02 0 TINFO : P_PID, WEXITED not a child of the calling process +waitid02 0 TINFO : (3664) waitid(1, 1, 0x547ffffffcf0, 4) +waitid02 8 TPASS : exp_errno=10 +waitid02 0 TINFO : si_pid = 0 ; si_code = 0 ; si_status = 0 +*** C771T080: waitid02 OK (8) +waitpid01 1 TPASS : recieved expected pid +waitpid01 2 TPASS : recieved expected signal +*** C771T081: waitpid01 OK (2) +waitpid02 1 TPASS : recieved expected pid +waitpid02 2 TPASS : recieved expected signal +waitpid02 3 TPASS : recieved expected exit value +*** C771T082: waitpid02 OK (3) +waitpid03 1 TPASS : Got correct child PID +waitpid03 2 TPASS : Condition 2 test passed +*** C771T083: waitpid03 OK (2) +waitpid04 1 TPASS : condition 1 test passed +waitpid04 2 TPASS : condition 2 test passed +waitpid04 3 TPASS : condition 3 test passed +*** C771T084: waitpid04 OK (3) +waitpid05 1 TPASS : received expected pid. +waitpid05 2 TPASS : received expected exit number. +waitpid05 3 TPASS : received expected pid. +waitpid05 4 TPASS : received expected exit number. +waitpid05 5 TPASS : received expected pid. +waitpid05 6 TPASS : received expected exit number. +waitpid05 7 TPASS : received expected pid. +waitpid05 8 TPASS : received expected exit number. +waitpid05 9 TPASS : received expected pid. +waitpid05 10 TPASS : received expected exit number. +waitpid05 11 TPASS : received expected pid. +waitpid05 12 TPASS : received expected exit number. +waitpid05 13 TPASS : received expected pid. +waitpid05 14 TPASS : received expected exit number. +waitpid05 15 TPASS : received expected pid. +waitpid05 16 TPASS : received expected exit number. +waitpid05 17 TPASS : received expected pid. +waitpid05 18 TPASS : received expected exit number. +waitpid05 19 TPASS : received expected pid. +waitpid05 20 TPASS : received expected exit number. +waitpid05 21 TPASS : received expected pid. +waitpid05 22 TPASS : received expected exit number. +waitpid05 23 TPASS : received expected pid. +waitpid05 24 TPASS : received expected exit number. +*** C771T085: waitpid05 OK (24) +waitpid06 1 TPASS : waitpid06 PASSED +*** C771T086: waitpid06 OK (1) +waitpid07 1 TPASS : waitpid07 PASSED +*** C771T087: waitpid07 OK (1) +waitpid08 1 TPASS : waitpid08 PASSED +*** C771T088: waitpid08 OK (1) +waitpid09 1 TPASS : case 1 PASSED +waitpid09 2 TPASS : case 2 PASSED +*** C771T089: waitpid09 OK (2) +waitpid10 1 TPASS : Test PASSED +*** C771T090: waitpid10 OK (1) +waitpid11 1 TPASS : Test PASSED +waitpid11 1 TPASS : waitpid11 PASSED +*** C771T091: waitpid11 OK (2) +waitpid12 1 TPASS : Test PASSED +waitpid12 1 TPASS : waitpid12 PASSED +*** C771T092: waitpid12 OK (2) +waitpid13 1 TPASS : Test PASSED +waitpid13 1 TPASS : waitpid13 PASSED +*** C771T093: waitpid13 OK (2) +bash-4.2$ exit +exit + +Script done on Mon Sep 24 09:07:34 2018 diff --git a/test/issues/771+1179+1143/C771T001.awk b/test/issues/771+1179+1143/C771T001.awk new file mode 100644 index 00000000..d2439d5d --- /dev/null +++ b/test/issues/771+1179+1143/C771T001.awk @@ -0,0 +1,176 @@ +#! /usr/bin/awk -f +BEGIN{ + ok = 0 + ng = 0 +} + +/^\(gdb\) b thr/{ + print "*** C771T001 gdb start OK" + ok++ + st = 2 + next +} + +/^\(gdb\) r/{ + st = 3 + next +} + +/^\(gdb\) info threads/{ + st = 4 + next +} + +/^\(gdb\) bt/{ + st = (bt == 0) ? 5 : 8 + bt++ + next +} + +/^\(gdb\) n/{ + st = 6 + next +} + +/^\(gdb\) thread 1/{ + st = 7 + next +} + +/^\(gdb\) thread 2/{ + if (st != 0) { + printf("*** C771T%03d backtrace command NG\n", st) + ng++ + } + st = 9 + next +} + +/^\(gdb\) c/{ + st = 10 + next +} + +/^\(gdb\) q/{ + st = 11 + next +} + +/^\(gdb\)/ { + printf("*** C771T%03d NG\n", st) + ng++ + exit(1) +} + +st == 2 { + if ($0 ~/^Breakpoint 1 at/) { + print "*** C771T002 breakpoint command OK" + ok++ + } + else { + print "*** C771T002 breakpoint command NG" + ng++ + } + st = 0 +} + +st == 3 { + if ($0 ~/^Starting program/) { + print "*** C771T003 run command OK" + ok++ + } + else { + print "*** C771T003 run command NG" + ng++ + } + st = 0 +} + +st == 4 { + if ($0 ~/^ Id /) { + print "*** C771T004 info threads command OK" + ok++ + } + else { + print "*** C771T004 info threadsrun command NG" + ng++ + } + st = 0 +} + +st == 5 { + if ($0 ~/^#0 thr/) { + print "*** C771T005 backtrace command OK" + ok++ + } + else { + print "*** C771T005 backtrace command NG" + ng++ + } + st = 0 +} + +st == 6 { + if ($0 ~/^thread start tid=/) { + print "*** C771T006 next command OK" + ok++ + } + else { + print "*** C771T006 next command NG" + ng++ + } + st = 0 +} + +st == 7 { + if ($0 ~/^\[Switching to thread 1/) { + print "*** C771T007 thread command OK" + ok++ + } + else { + print "*** C771T007 thread command NG" + ng++ + } + st = 0 +} + +st == 8 { + if ($0 ~/ in main /) { + print "*** C771T008 thread command OK" + ok++ + st = 0 + } +} + +st == 9 { + if ($0 ~/^\[Switching to thread 2/) { + print "*** C771T009 thread command OK" + ok++ + } + else { + print "*** C771T009 thread command NG" + ng++ + } + st = 0 +} + +st == 10 { + if ($0 ~/^Continuing/) { + print "*** C771T010 continue command OK" + ok++ + } + else { + print "*** C771T010 continue command NG" + ng++ + } + st = 0 +} + +END{ + if (st == 11) { + print "*** C771T011 quit command OK" + ok++ + } + print "OK=" ok " NG=" ng + exit(ng > 0 ? 1: 0) +} diff --git a/test/issues/771+1179+1143/C771T001.c b/test/issues/771+1179+1143/C771T001.c new file mode 100644 index 00000000..1cb98e02 --- /dev/null +++ b/test/issues/771+1179+1143/C771T001.c @@ -0,0 +1,63 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/mman.h> + +volatile char *m; + +void * +thr(void *arg) +{ + int rc; + char *mm; + + fprintf(stderr, "thread start tid=%d\n", (int)syscall(SYS_gettid)); + fflush(stderr); + errno = 0; + mm = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + fprintf(stderr, "mmap m=%p errno=%d\n", mm, errno); + fflush(stderr); + memset(mm, '\0', 4096); + m = mm; + *mm = '1'; + while (*m); + rc = munmap(mm, 4096); + fprintf(stderr, "munmap rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + return NULL; +} + +int +main(int argc, char **argv) +{ + pthread_t th; + int rc; + + fprintf(stderr, "process start pid=%d\n", getpid()); + fflush(stderr); + rc = pthread_create(&th, NULL, thr, NULL); + if (rc) { + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "pthread_create: %d\n", rc); + fflush(stderr); + while (!m); + fprintf(stderr, "update m=%p\n", m); + fflush(stderr); + while (!*m); + fprintf(stderr, "update *m=%c\n", *m); + fflush(stderr); + *m = '\0'; + pthread_join(th, NULL); + fprintf(stderr, "main done\n"); + fflush(stderr); + exit(0); +} diff --git a/test/issues/771+1179+1143/C771T033.c b/test/issues/771+1179+1143/C771T033.c new file mode 100644 index 00000000..0548544a --- /dev/null +++ b/test/issues/771+1179+1143/C771T033.c @@ -0,0 +1,175 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include <sys/wait.h> + +volatile char *m; +volatile int *x; + +void * +thr(void *arg) +{ + int rc; + pid_t tid; + char *mm; + + tid = syscall(SYS_gettid); + *x = tid; + while (*x == tid); + + errno = 0; + mm = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + fprintf(stderr, "mmap m=%p errno=%d\n", mm, errno); + fflush(stderr); + memset(mm, '\0', 4096); + m = mm; + *mm = '1'; + while (*m); + rc = munmap(mm, 4096); + fprintf(stderr, "munmap rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + return NULL; +} + +int +main(int argc, char **argv) +{ + pthread_t th; + int rc; + pid_t tid; + pid_t pid; + int sig; + int st; + + x = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, + -1, 0); + + if (x == (void *)-1) { + perror("mmap"); + exit(1); + } + *x = 0; + + rc = pthread_create(&th, NULL, thr, NULL); + if (rc) { + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + + while (*x == 0); + tid = *x; + fprintf(stderr, "tid=%d\n", tid); + + if ((pid = fork()) == 0) { + if (ptrace(PTRACE_ATTACH, tid, 0, 0) == -1) { + fprintf(stderr, "*** C771T033 *** ATTACH NG err=%d\n", + errno); + exit(1); + } + rc = waitpid(-1, &st, __WALL); + if (rc == tid) { + fprintf(stderr, "*** C771T033 *** ATTACH OK\n"); + } + else { + fprintf(stderr, "*** C771T033 *** ATTACH NG rc=%d\n", + rc); + exit(1); + } + if (ptrace(PTRACE_SETOPTIONS, tid, 0, PTRACE_O_TRACESYSGOOD) == + -1) { + fprintf(stderr, "PTRACE_SETOPTIONS errno=%d\n", errno); + exit(1); + } + *x = 0; + sig = 0; + for (;;) { + rc = ptrace(PTRACE_SYSCALL, tid, 0, sig); + if (rc == -1) { + fprintf(stderr, + "*** C771T034 *** SYSCALL NG err=%d\n", + errno); + exit(1); + } + rc = waitpid(-1, &st, __WALL); + if (rc == tid) { + fprintf(stderr, + "*** C771T034 *** SYSCALL OK\n"); + } + else { + fprintf(stderr, + "*** C771T034 *** SYSCALL NG rc=%d\n", + rc); + exit(1); + } + + if (WIFEXITED(st) || WIFSIGNALED(st)) { + fprintf(stderr, "thread terminated %08x\n", st); + exit(1); + } + if (!WIFSTOPPED(st)) { + fprintf(stderr, "warning: st=%08x\n", st); + continue; + } + if (WSTOPSIG(st) & 0x80) { // syscall + struct user_regs_struct arg; + int num; + long ret; + + if (ptrace(PTRACE_GETREGS, tid, NULL, &arg) == + -1) { + } + num = arg.orig_rax; + ret = arg.rax; + if (ret == -ENOSYS) { + fprintf(stderr, + "syscall enter n=%d\n", num); + } + else { + fprintf(stderr, + "syscall return n=%d r=%ld\n", + num, ret); + if (ptrace(PTRACE_DETACH, tid, NULL, + NULL) == -1) { + fprintf(stderr, + "*** C771T035 DETACH NG" + "err=%d\n", errno); + exit(1); + } + else { + fprintf(stderr, + "*** C771T035 DETACH OK" + "\n"); + exit(0); + } + } + } + else { // signal + sig = WSTOPSIG(st) & 0x7f; + } + } + } + + while (!m); + fprintf(stderr, "update m=%p\n", m); + fflush(stderr); + while (!*m); + fprintf(stderr, "update *m=%c\n", *m); + fflush(stderr); + *m = '\0'; + waitpid(pid, &st, 0); + pthread_join(th, NULL); + fprintf(stderr, "main done\n"); + fflush(stderr); + exit(0); +} diff --git a/test/issues/771+1179+1143/C771T037.c b/test/issues/771+1179+1143/C771T037.c new file mode 100644 index 00000000..2833b9e0 --- /dev/null +++ b/test/issues/771+1179+1143/C771T037.c @@ -0,0 +1,150 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include <sys/wait.h> + +volatile char *m; +volatile int *x; + +void * +thr(void *arg) +{ + pid_t tid; + + tid = syscall(SYS_gettid); + *x = tid; + while (*x == tid); + return NULL; +} + +int +main(int argc, char **argv) +{ + pthread_t th; + pid_t tid; + pid_t pid; + int st; + int rc; + + x = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, + -1, 0); + + if (x == (void *)-1) { + perror("mmap"); + exit(1); + } + *x = 0; + + rc = pthread_create(&th, NULL, thr, NULL); + if (rc) { + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + + while (*x == 0); + tid = *x; + fprintf(stderr, "pid=%d\n", getpid()); + fprintf(stderr, "tid=%d\n", tid); + + if ((pid = fork()) == 0) { + pid = getppid(); + if (ptrace(PTRACE_ATTACH, tid, 0, 0) == -1) { + fprintf(stderr, "*** C771T037 ATTACH NG err=%d\n", + errno); + exit(1); + } + if (ptrace(PTRACE_ATTACH, pid, 0, 0) == -1) { + fprintf(stderr, "*** C771T037 ATTACH NG err=%d\n", + errno); + exit(1); + } + + rc = wait4(pid, &st, WNOHANG, NULL); + if (rc == -1) { + fprintf(stderr, "*** C771T037 wait4(pid) NG err=%d\n", + errno); + } + else { + fprintf(stderr, "*** C771T037 wait4(pid) OK\n"); + } + + rc = wait4(tid, &st, WNOHANG, NULL); + if (rc == -1 && errno == ECHILD) { + fprintf(stderr, "*** C771T038 wait4(tid) OK\n"); + } + else { + fprintf(stderr, "*** C771T038 wait4(tid) NG err=%d\n", + errno); + } + + rc = wait4(pid, &st, WNOHANG|__WCLONE, NULL); + if (rc == -1 && errno == ECHILD) { + fprintf(stderr, + "*** C771T039 wait4(pid, __WCLONE) OK\n"); + } + else { + fprintf(stderr, + "*** C771T039 wait4(pid, __WCLONE) NG err=%d\n", + errno); + } + + rc = wait4(tid, &st, WNOHANG|__WCLONE, NULL); + if (rc == -1) { + fprintf(stderr, + "*** C771T040 wait4(tid, __WCLONE) NG err=%d\n", + errno); + } + else { + fprintf(stderr, + "*** C771T040 wait4(tid, __WCLONE) OK\n"); + } + + rc = wait4(pid, &st, WNOHANG|__WALL, NULL); + if (rc == -1) { + fprintf(stderr, + "*** C771T041 wait4(pid, __WALL) NG err=%d\n", + errno); + } + else { + fprintf(stderr, + "*** C771T041 wait4(pid, __WALL) OK\n"); + } + + rc = wait4(tid, &st, WNOHANG|__WALL, NULL); + if (rc == -1) { + fprintf(stderr, + "*** C771T042 wait4(tid, __WALL) NG err=%d\n", + errno); + } + else { + fprintf(stderr, + "*** C771T042 wait4(tid, __WALL) OK\n"); + } + + if (ptrace(PTRACE_DETACH, tid, NULL, NULL) == -1) { + fprintf(stderr, "*** C771T042 DETACH NG err=%d\n", + errno); + exit(1); + } + if (ptrace(PTRACE_DETACH, pid, NULL, NULL) == -1) { + fprintf(stderr, "*** C771T042 DETACH NG err=%d\n", + errno); + exit(1); + } + *x = 0; + exit(0); + } + + while (*x == tid); + exit(0); +} diff --git a/test/issues/771+1179+1143/Makefile b/test/issues/771+1179+1143/Makefile new file mode 100644 index 00000000..6621057e --- /dev/null +++ b/test/issues/771+1179+1143/Makefile @@ -0,0 +1,23 @@ +CC = gcc +TARGET= C771T001 C771T033 C771T037 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +C771T001: C771T001.c + $(CC) -g -Wall -o $@ $^ -pthread + +C771T033: C771T033.c + $(CC) -g -Wall -o $@ $^ -pthread + +C771T037: C771T037.c + $(CC) -g -Wall -o $@ $^ -pthread + +test: all + @sh ./C771.sh + +clean: + rm -f $(TARGET) *.o + rm -rf gdb-result diff --git a/test/issues/771+1179+1143/README b/test/issues/771+1179+1143/README new file mode 100644 index 00000000..9c198ed7 --- /dev/null +++ b/test/issues/771+1179+1143/README @@ -0,0 +1,144 @@ +【Issue#771 Issue#1179 Issue#1143 動作確認】 +□ テスト内容 +各 Issue は以下の理由により、一括して対応した。 +・ Issue#771 は ptrace のスレッドサポートである。 +・ Issue#1179 は PTRACE_ATTACH の不具合対応であるが、スレッドの PTRACE_ATTACH + 対応が Issue#771 に含まれるため、一括して対応することにした。 +・ Issue#1143 は wait4 の __WALL フラグサポートであるが、スレッドの wait4 対応 + (__WCLONEフラグサポート) が Issue#771 に含まれるため、一括して対応すること + にした。 + +テスト内容は以下の通りである。 + +1. gdb を用いてスレッドを使用するプログラムのデバッグが行えることを確認する。 +C771T001 mcexec gdb C771T001 を実行すると、McKernelでgdbの実行を開始し、 + (gdb) のプロンプトが表示されること +C771T002 b thr を実行し、スレッドにブレークポイントを設定できること +C771T003 r コマンド実行後、スレッドに設定したブレークポイントで停止できること +C771T004 info threads コマンドで、スレッド一覧が表示できること +C771T005 bt コマンドでスレッドのバックトレースが表示できること +C771T006 n コマンドでスレッドのステップ実行ができること +C771T007 thread 1 コマンドでメインスレッドに切り替えできること +C771T008 bt コマンドでメインスレッドのバックトレースが表示できること +C771T009 thread 2 コマンドで再びスレッドに切り替えできること +C771T010 c コマンドでスレッドとメインスレッドの実行が再開されること +C771T011 q コマンドで gdb を正しく終了できること +C771T012 McKernel にスレッドとプロセスの残留が無いこと + +2. gdb のテストケースを実行し、gdb が使用する ptrace の機能が動作することを + 確認する。thread を使用するテストを中心に、以下のテストケースを選定した。 +C771T013 a2-run が PASS すること +C771T014 foll-fork が PASS すること +C771T015 fork-detach が PASS すること +C771T016 atomic-seq-threaded が PASS すること +C771T017 bp_in_thread が PASS すること +C771T018 bt-clone-stop が PASS すること +C771T019 corethreads が PASS すること +C771T020 dlopen-libpthread が PASS すること +C771T021 fork-child-threads が PASS すること +C771T022 killed が PASS すること +C771T023 pthread_cond_wait が PASS すること +C771T024 switch-threads が PASS すること +C771T025 thread-specific が PASS すること +C771T026 thread_check が PASS すること +C771T027 thread_events が PASS すること +C771T028 threaded-exec が PASS すること +C771T029 threxit-hop-specific が PASS すること +C771T030 tls-nodebug が PASS すること +C771T031 tls-sepdebug が PASS すること +C771T032 tls-var が PASS すること + +3. ptrace のスレッドサポートの基本機能を確認する。 +C771T033 スレッドに PTRACE_ATTACH できること +C771T034 PTRACE_SYSCALL により、スレッドのシステムコールを補足できること +C771T035 PTRACE_DETACH により、スレッドのトレースを中止できること + +4. Issue#1179 の指摘プログラムが正常動作することを確認する。 +CT771T036 test_mck -s ptrace -n 19 が PASS すること + +5. wait4 で __WALL フラグ、__WCLONE フラグが正しく機能することを確認する。 +CT771T037 wait4 のフラグに __WALL, __WCLONE 共に指定しないとき、pid に + 子プロセスを指定してエラーにならないこと +CT771T038 wait4 のフラグに __WALL, __WCLONE 共に指定しないとき、pid に + スレッドをを指定してエラー(ECHILD)になること +CT771T039 wait4 のフラグに __WCLONE を指定し、pid に子プロセスを指定して + エラー(ECHILD)になること +CT771T040 wait4 のフラグに __WCLONE を指定し、pid にスレッドをを指定して + エラーにならないこと +CT771T041 wait4 のフラグに __WALL を指定し、pid に子プロセスを指定して + エラーにならないこと +CT771T042 wait4 のフラグに __WALL を指定し、pid にスレッドをを指定して + エラーにならないこと + +6. LTP を用いて変更が既存処理に影響しないことを確認する。 + プロセス関連のシステムコール(clone, exit, fork, kill, ptrace, waitなど) + を中心に以下のテストプログラムを選定した。 +CT771T043 clone01 が PASS すること +CT771T044 clone03 が PASS すること +CT771T045 clone04 が PASS すること +CT771T046 clone06 が PASS すること +CT771T047 clone07 が PASS すること +CT771T048 exit01 が PASS すること +CT771T049 exit02 が PASS すること +CT771T050 exit_group01 が PASS すること +CT771T051 fork01 が PASS すること +CT771T052 fork02 が PASS すること +CT771T053 fork03 が PASS すること +CT771T054 fork04 が PASS すること +CT771T055 fork07 が PASS すること +CT771T056 fork08 が PASS すること +CT771T057 fork09 が PASS すること +CT771T058 fork10 が PASS すること +CT771T059 fork11 が PASS すること +CT771T060 kill01 が PASS すること +CT771T061 kill02 が PASS すること +CT771T062 kill03 が PASS すること +CT771T063 kill04 が PASS すること +CT771T064 kill05 が PASS すること +CT771T065 kill06 が PASS すること +CT771T066 kill07 が PASS すること +CT771T067 kill08 が PASS すること +CT771T068 kill09 が PASS すること +CT771T069 kill11 が PASS すること +CT771T070 kill12 が PASS すること +CT771T071 ptrace01 が PASS すること +CT771T072 ptrace02 が PASS すること +CT771T073 ptrace03 が PASS すること +CT771T074 ptrace04 が PASS すること +CT771T075 ptrace05 が PASS すること +CT771T076 wait02 が PASS すること +CT771T077 wait401 が PASS すること +CT771T078 wait402 が PASS すること +CT771T079 waitid01 が PASS すること +CT771T080 waitid02 が PASS すること +CT771T081 waitpid01 が PASS すること +CT771T082 waitpid02 が PASS すること +CT771T083 waitpid03 が PASS すること +CT771T084 waitpid04 が PASS すること +CT771T085 waitpid05 が PASS すること +CT771T086 waitpid06 が PASS すること +CT771T087 waitpid07 が PASS すること +CT771T088 waitpid08 が PASS すること +CT771T089 waitpid09 が PASS すること +CT771T090 waitpid10 が PASS すること +CT771T091 waitpid11 が PASS すること +CT771T092 waitpid12 が PASS すること +CT771T093 waitpid13 が PASS すること + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 $HOME/.mck_test_config +を参照する。.mck_test_config は、McKernel をビルドした際に生成される +mck_test_config.sample ファイルを $HOME にコピーし、適宜編集すること。 + +また、C771.sh の以下の指定を適宜変更すること。 +GDBBUILDDIR= GDB 構築したパス ($HOME にて gdb を src.rpm から構築している + 場合は変更不要) + +gdb のテストケースでは、dejagnu パッケージに含まれる runtest コマンドを使用 +する。システムにインストールされていない場合は、予めインストールすること。 + +□ 実行結果 +C771.txt 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/771+1179+1143/gdb_test.sh b/test/issues/771+1179+1143/gdb_test.sh new file mode 100755 index 00000000..396866b7 --- /dev/null +++ b/test/issues/771+1179+1143/gdb_test.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +if [ $# -lt 2 ]; then + echo "$0 option error" >&2 + echo "Usage: $0 category test_exp" >&2 + exit 1 +fi + +cat=$1 +test_exp=$2 + +gdb_installdir=/usr +if [ "X$gdb_builddir" = X ];then + echo gdb_builddir was not set >&2 + exit 1 +fi +if [ "X$MCEXEC" = X ];then + echo MCEXEC was not set >&2 + exit 1 +fi + +log_dir="$gdb_builddir/gdb/testsuite" + +if ! which runtest > /dev/null 2>&1; then + echo no runtest found >&2 + exit 1 +fi + +result=`pwd`/gdb-result +result_raw=`pwd`/gdb-result/raw +export PATH=`pwd`:$PATH + +cd ${gdb_builddir} + +echo "======== ${test_exp} ========" +# exec by linux +make check RUNTESTFLAGS="--verbose gdb.${cat}/${test_exp}.exp" &> /dev/null +mv ${log_dir}/gdb.log ${result_raw}/linux/${test_exp}.log +mv ${log_dir}/gdb.sum ${result_raw}/linux/${test_exp}.sum + +# exec by mcexec +make check RUNTESTFLAGS="--verbose GDB=mcexec_gdb.sh gdb.${cat}/${test_exp}.exp" &> /dev/null +mv ${log_dir}/gdb.log ${result_raw}/mck/${test_exp}.log +mv ${log_dir}/gdb.sum ${result_raw}/mck/${test_exp}.sum + +# extract important part +sed -n '/gdb tests/,/expected passes/p' ${result_raw}/linux/${test_exp}.sum > ${result}/linux/${test_exp}.sum +sed -n '/gdb tests/,/expected passes/p' ${result_raw}/mck/${test_exp}.sum > ${result}/mck/${test_exp}.sum + +grep -e '^(gdb) [a-zA-Z0-9]' ${result_raw}/linux/${test_exp}.log > ${result}/linux/${test_exp}.log +grep -e '^(gdb) [a-zA-Z0-9]' ${result_raw}/mck/${test_exp}.log > ${result}/mck/${test_exp}.log + +diff -u ${result}/linux/${test_exp}.sum ${result}/mck/${test_exp}.sum > /dev/null +if [ $? -eq 0 ]; then + echo "【SAME】${test_exp}: Summary." +else + echo "【DIFF】${test_exp} : Summary Difference ---" + diff -u ${result}/linux/${test_exp}.sum ${result}/mck/${test_exp}.sum +fi + +diff -u ${result}/linux/${test_exp}.log ${result}/mck/${test_exp}.log > /dev/null +if [ $? -eq 0 ]; then + echo "【SAME】${test_exp} : Log." +else + echo "【DIFF】${test_exp} : Log Difference ---" + diff -u ${result}/linux/${test_exp}.log ${result}/mck/${test_exp}.log +fi + +diff -u <(grep 'of expected passes' ${result}/linux/${test_exp}.sum) <(grep 'of expected passes' ${result}/mck/${test_exp}.sum) > /dev/null +if [ $? -eq 0 ]; then + echo "【PASS】${test_exp}" +else + echo "【FAIL】${test_exp}" + diff -u <(grep 'of expected passes' ${result}/linux/${test_exp}.sum) <(grep 'of expected passes' ${result}/mck/${test_exp}.sum) > /dev/null +fi diff --git a/test/issues/771+1179+1143/gdblist b/test/issues/771+1179+1143/gdblist new file mode 100644 index 00000000..d7286eca --- /dev/null +++ b/test/issues/771+1179+1143/gdblist @@ -0,0 +1,20 @@ +base a2-run +base foll-fork +base fork-detach +threads atomic-seq-threaded +threads bp_in_thread +threads bt-clone-stop +threads corethreads +threads dlopen-libpthread +threads fork-child-threads +threads killed +threads pthread_cond_wait +threads switch-threads +threads thread-specific +threads thread_check +threads thread_events +threads threaded-exec +threads threxit-hop-specific +threads tls-nodebug +threads tls-sepdebug +threads tls-var diff --git a/test/issues/771+1179+1143/ltplist b/test/issues/771+1179+1143/ltplist new file mode 100644 index 00000000..84107844 --- /dev/null +++ b/test/issues/771+1179+1143/ltplist @@ -0,0 +1,51 @@ +clone01 +clone03 +clone04 +clone06 +clone07 +exit01 +exit02 +exit_group01 +fork01 +fork02 +fork03 +fork04 +fork07 +fork08 +fork09 +fork10 +fork11 +kill01 +kill02 +kill03 +kill04 +kill05 +kill06 +kill07 +kill08 +kill09 +kill10 +kill11 +kill12 +ptrace01 +ptrace02 +ptrace03 +ptrace05 +wait02 +wait401 +wait402 +waitid01 +waitid02 +waitpid01 +waitpid02 +waitpid03 +waitpid04 +waitpid05 +waitpid06 +waitpid07 +waitpid08 +waitpid09 +waitpid10 +waitpid11 +waitpid12 +waitpid13 diff --git a/test/issues/771+1179+1143/mcexec_gdb.sh b/test/issues/771+1179+1143/mcexec_gdb.sh new file mode 100755 index 00000000..d53e64c7 --- /dev/null +++ b/test/issues/771+1179+1143/mcexec_gdb.sh @@ -0,0 +1,8 @@ +#!/bin/sh -x + +if [ x$MCEXEC = x ]; then + echo MCEXEC was not set >&2 + exit 1 +fi + +exec $MCEXEC gdb "$@" diff --git a/test/issues/840/C840.sh b/test/issues/840/C840.sh index 39b174cc..4a72f6ad 100644 --- a/test/issues/840/C840.sh +++ b/test/issues/840/C840.sh @@ -1,152 +1,214 @@ #!/bin/sh +USELTP=0 +USEOSTEST=0 + +BOOTPARAM="-c 1-7 -m 4G@0" +. ../../common.sh + if ! sudo ls /sys/kernel/debug | grep kmemleak > /dev/null 2>&1; then echo kmemleak: not found >&2 exit 1 fi -echo 'C840T01... ' -ng=0 +dd if=/dev/zero of=rpf.data bs=1M count=1024 sync -sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 -./ihkosctl 0 clear_kmsg -sudo dmesg -c > /dev/null -sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' -./mcexec ./C840T01 -if [ `sudo cat /sys/kernel/debug/kmemleak | wc -l` != 0 ]; then - echo 'C840T01: NG (kmemleak)' - ng=1 -fi -if ! dmesg | grep 'remote_page_fault:interrupted. -512' > /dev/null 2>&1; then - echo 'C840T01: WARN (remote_page_fault)' - ng=1 -fi -if ! ./ihkosctl 0 kmsg | grep 'is dead, terminate()' > /dev/null 2>&1; then - echo 'C840T01: WARN (syscall offloading)' - ng=1 -fi -if [ $ng = 0 ]; then - echo C840T01: OK -fi + +echo 'C840T01... ' +b= +while [ x$b = x ]; do + ng=0 + sync + sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 + sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' + sudo $SBINDIR/mcreboot.sh $BOOTPARAM + $SBINDIR/ihkosctl 0 clear_kmsg + sudo dmesg -c > /dev/null + $MCEXEC ./C840T01 + sleep 3 + rpf=`dmesg | grep 'remote_page_fault:interrupted. -512'` + offload=` $SBINDIR/ihkosctl 0 kmsg | grep 'is dead, terminate()'` + sudo $SBINDIR/mcstop+release.sh + sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' + if [ x"$rpf" = x ]; then + echo '*** C840T01: WARN (remote_page_fault)' + ng=1 + elif [ x"$offload" = x ]; then + echo '*** C840T01: WARN (syscall offloading)' + ng=1 + elif sudo cat /sys/kernel/debug/kmemleak | tee C840T01.kmemleak | grep 'mcctrl'; then + echo '*** C840T01: NG (kmemleak)' + ng=1 + b=1 + fi + if [ $ng = 0 ]; then + echo '*** C840T01: OK' + b=1 + fi +done echo 'C840T02... ' -ng=0 -sync -sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 -./ihkosctl 0 clear_kmsg -sudo dmesg -c > /dev/null -sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' -./mcexec ./C840T02 -if [ `sudo cat /sys/kernel/debug/kmemleak | wc -l` != 0 ]; then - echo 'C840T02: NG (kmemleak)' - ng=1 -fi -if dmesg | grep 'remote_page_fault:interrupted. -512' > /dev/null 2>&1; then - echo 'C840T02: WARN (remote_page_fault)' - ng=1 -fi -if ! ./ihkosctl 0 kmsg | grep 'is dead, terminate()' > /dev/null 2>&1; then - echo 'C840T02: WARN (syscall offloading)' - ng=1 -fi -if [ $ng = 0 ]; then - echo C840T02: OK -fi +b= +while [ x$b = x ]; do + ng=0 + sync + sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 + sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' + sudo $SBINDIR/mcreboot.sh $BOOTPARAM + $SBINDIR/ihkosctl 0 clear_kmsg + sudo dmesg -c > /dev/null + $MCEXEC ./C840T02 + sleep 3 + rpf=`dmesg | grep 'remote_page_fault:interrupted. -512'` + offload=` $SBINDIR/ihkosctl 0 kmsg | grep 'is dead, terminate()'` + sudo $SBINDIR/mcstop+release.sh + sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' + if [ x"$rpf" != x ]; then + echo '*** C840T02: WARN (remote_page_fault)' + ng=1 + elif [ x"$offload" = x ]; then + echo '*** C840T02: WARN (syscall offloading)' + ng=1 + elif sudo cat /sys/kernel/debug/kmemleak | tee C840T02.kmemleak | grep 'mcctrl'; then + echo '*** C840T02: NG (kmemleak)' + ng=1 + b=1 + fi + if [ $ng = 0 ]; then + echo '*** C840T02: OK' + b=1 + fi +done echo 'C840T03... ' -ng=0 -sync -sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 -./ihkosctl 0 clear_kmsg -sudo dmesg -c > /dev/null -sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' -./mcexec ./C840T03 -if [ `sudo cat /sys/kernel/debug/kmemleak | wc -l` != 0 ]; then - echo 'C840T03: NG (kmemleak)' - ng=1 -fi -if dmesg | grep 'remote_page_fault:interrupted. -512' > /dev/null 2>&1; then - echo 'C840T03: WARN (remote_page_fault)' - ng=1 -fi -if ./ihkosctl 0 kmsg | grep 'is dead, terminate()' > /dev/null 2>&1; then - echo 'C840T03: WARN (syscall offloading)' - ng=1 -fi -if [ $ng = 0 ]; then - echo C840T03: OK -fi +b= +while [ x$b = x ]; do + ng=0 + sync + sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 + sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' + sudo $SBINDIR/mcreboot.sh $BOOTPARAM + $SBINDIR/ihkosctl 0 clear_kmsg + sudo dmesg -c > /dev/null + $MCEXEC ./C840T03 + sleep 3 + rpf=`dmesg | grep 'remote_page_fault:interrupted. -512'` + offload=` $SBINDIR/ihkosctl 0 kmsg | grep 'is dead, terminate()'` + sudo $SBINDIR/mcstop+release.sh + sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' + if [ x"$rpf" != x ]; then + echo '*** C840T03: WARN (remote_page_fault)' + ng=1 + elif [ x"$offload" != x ]; then + echo '*** C840T03: WARN (syscall offloading)' + ng=1 + elif sudo cat /sys/kernel/debug/kmemleak | tee C840T03.kmemleak | grep 'mcctrl'; then + echo '*** C840T03: NG (kmemleak)' + ng=1 + b=1 + fi + if [ $ng = 0 ]; then + echo '*** C840T03: OK' + b=1 + fi +done echo 'C840T04... ' -ng=0 -sync -sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 -./ihkosctl 0 clear_kmsg -sudo dmesg -c > /dev/null -sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' -timeout -s 9 2 ./mcexec ./C840T04 -sleep 2 -if [ `sudo cat /sys/kernel/debug/kmemleak | wc -l` != 0 ]; then - echo 'C840T04: NG (kmemleak)' - ng=1 -fi -if ! dmesg | grep 'remote_page_fault:interrupted. -512' > /dev/null 2>&1; then - echo 'C840T04: WARN (remote_page_fault)' - ng=1 -fi -if ! ./ihkosctl 0 kmsg | grep 'is dead, terminate()' > /dev/null 2>&1; then - echo 'C840T04: WARN (syscall offloading)' - ng=1 -fi -if [ $ng = 0 ]; then - echo C840T04: OK -fi +b= +while [ x$b = x ]; do + ng=0 + sync + sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 + sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' + sudo $SBINDIR/mcreboot.sh $BOOTPARAM + $SBINDIR/ihkosctl 0 clear_kmsg + sudo dmesg -c > /dev/null + timeout -s 9 2 $MCEXEC ./C840T04 + sleep 3 + rpf=`dmesg | grep 'remote_page_fault:interrupted. -512'` + offload=` $SBINDIR/ihkosctl 0 kmsg | grep 'is dead, terminate()'` + sudo $SBINDIR/mcstop+release.sh + sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' + if [ x"$rpf" = x ]; then + echo '*** C840T04: WARN (remote_page_fault)' + ng=1 + elif [ x"$offload" = x ]; then + echo '*** C840T04: WARN (syscall offloading)' + ng=1 + elif sudo cat /sys/kernel/debug/kmemleak | tee C840T04.kmemleak | grep 'mcctrl'; then + echo '*** C840T04: NG (kmemleak)' + ng=1 + b=1 + fi + if [ $ng = 0 ]; then + echo '*** C840T04: OK' + b=1 + fi +done echo 'C840T05... ' -ng=0 -sync -sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 -./ihkosctl 0 clear_kmsg -sudo dmesg -c > /dev/null -sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' -timeout -s 9 2 ./mcexec ./C840T05 -sleep 2 -if [ `sudo cat /sys/kernel/debug/kmemleak | wc -l` != 0 ]; then - echo 'C840T05: NG (kmemleak)' - ng=1 -fi -if dmesg | grep 'remote_page_fault:interrupted. -512' > /dev/null 2>&1; then - echo 'C840T05: WARN (remote_page_fault)' - ng=1 -fi -if ! ./ihkosctl 0 kmsg | grep 'is dead, terminate()' > /dev/null 2>&1; then - echo 'C840T05: WARN (syscall offloading)' - ng=1 -fi -if [ $ng = 0 ]; then - echo C840T05: OK -fi +b= +while [ x$b = x ]; do + ng=0 + sync + sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 + sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' + sudo $SBINDIR/mcreboot.sh $BOOTPARAM + $SBINDIR/ihkosctl 0 clear_kmsg + sudo dmesg -c > /dev/null + timeout -s 9 2 $MCEXEC ./C840T05 + sleep 3 + rpf=`dmesg | grep 'remote_page_fault:interrupted. -512'` + offload=` $SBINDIR/ihkosctl 0 kmsg | grep 'is dead, terminate()'` + sudo $SBINDIR/mcstop+release.sh + sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' + if [ x"$rpf" != x ]; then + echo '*** C840T05: WARN (remote_page_fault)' + ng=1 + elif [ x"$offload" = x ]; then + echo '*** C840T05: WARN (syscall offloading)' + ng=1 + elif sudo cat /sys/kernel/debug/kmemleak | tee C840T05.kmemleak | grep 'mcctrl'; then + echo '*** C840T05: NG (kmemleak)' + ng=1 + b=1 + fi + if [ $ng = 0 ]; then + echo '*** C840T05: OK' + b=1 + fi +done echo 'C840T06... ' -ng=0 -sync -sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 -./ihkosctl 0 clear_kmsg -sudo dmesg -c > /dev/null -sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' -timeout -s 9 2 ./mcexec ./C840T06 -sleep 2 -if [ `sudo cat /sys/kernel/debug/kmemleak | wc -l` != 0 ]; then - echo 'C840T06: NG (kmemleak)' - ng=1 -fi -if dmesg | grep 'remote_page_fault:interrupted. -512' > /dev/null 2>&1; then - echo 'C840T06: WARN (remote_page_fault)' - ng=1 -fi -if ./ihkosctl 0 kmsg | grep 'is dead, terminate()' > /dev/null 2>&1; then - echo 'C840T06: WARN (syscall offloading)' - ng=1 -fi -if [ $ng = 0 ]; then - echo C840T06: OK -fi +b= +while [ x$b = x ]; do + ng=0 + sync + sudo /sbin/sysctl vm.drop_caches=3 > /dev/null 2>&1 + sudo sh -c 'echo clear > /sys/kernel/debug/kmemleak' + sudo $SBINDIR/mcreboot.sh $BOOTPARAM + $SBINDIR/ihkosctl 0 clear_kmsg + sudo dmesg -c > /dev/null + timeout -s 9 2 $MCEXEC ./C840T06 + sleep 3 + rpf=`dmesg | grep 'remote_page_fault:interrupted. -512'` + offload=` $SBINDIR/ihkosctl 0 kmsg | grep 'is dead, terminate()'` + sudo $SBINDIR/mcstop+release.sh + sudo sh -c 'echo scan > /sys/kernel/debug/kmemleak' + if [ x"$rpf" != x ]; then + echo '*** C840T06: WARN (remote_page_fault)' + ng=1 + elif [ x"$offload" != x ]; then + echo '*** C840T06: WARN (syscall offloading)' + ng=1 + elif sudo cat /sys/kernel/debug/kmemleak | tee C840T06.kmemleak | grep 'mcctrl'; then + echo '*** C840T06: NG (kmemleak)' + ng=1 + b=1 + fi + if [ $ng = 0 ]; then + echo '*** C840T06: OK' + b=1 + fi +done + +rm -f rpf.data rpf.out diff --git a/test/issues/840/C840.txt b/test/issues/840/C840.txt index 7795e8f6..4f8ddc72 100644 --- a/test/issues/840/C840.txt +++ b/test/issues/840/C840.txt @@ -1,28 +1,40 @@ -スクリプトは Wed Jun 6 14:38:21 2018 - に開始しました[?1034hbash-4.2$ sh C840.sh +Script started on Thu Jul 5 15:32:52 2018 +[?1034hbash-4.2$ make test +gcc -o C840T01 C840T01.c -Wall -g +gcc -o C840T02 C840T02.c -Wall -g +gcc -o C840T03 C840T03.c -Wall -g +gcc -o C840T04 C840T04.c -Wall -g +gcc -o C840T05 C840T05.c -Wall -g +gcc -o C840T06 C840T06.c -Wall -g +sh ./C840.sh +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB) copied, 23.8845 s, 45.0 MB/s C840T01... Terminate by signal 9 -C840.sh: 14 行: 22464 強制終了 ./mcexec ./C840T01 -C840T01: OK +./C840.sh: line 67: 8889 Killed $BINDIR/mcexec ./C840T01 +*** C840T01: WARN (remote_page_fault) +Terminate by signal 9 +./C840.sh: line 67: 9231 Killed $BINDIR/mcexec ./C840T01 +*** C840T01: OK C840T02... Terminate by signal 9 -C840.sh: 38 行: 22500 強制終了 ./mcexec ./C840T02 -C840T02: OK +./C840.sh: line 100: 9580 Killed $BINDIR/mcexec ./C840T02 +*** C840T02: OK C840T03... Terminate by signal 9 -C840.sh: 62 行: 22535 強制終了 ./mcexec ./C840T03 -C840T03: OK +./C840.sh: line 133: 9949 Killed $BINDIR/mcexec ./C840T03 +*** C840T03: OK C840T04... -C840.sh: 86 行: 22570 強制終了 timeout -s 9 2 ./mcexec ./C840T04 -C840T04: OK +./C840.sh: line 166: 10301 Killed timeout -s 9 2 $BINDIR/mcexec ./C840T04 +*** C840T04: OK C840T05... -C840.sh: 111 行: 22598 強制終了 timeout -s 9 2 ./mcexec ./C840T05 -C840T05: OK +./C840.sh: line 199: 10644 Killed timeout -s 9 2 $BINDIR/mcexec ./C840T05 +*** C840T05: OK C840T06... -C840.sh: 136 行: 22626 強制終了 timeout -s 9 2 ./mcexec ./C840T06 -C840T06: OK +./C840.sh: line 232: 10975 Killed timeout -s 9 2 $BINDIR/mcexec ./C840T06 +*** C840T06: OK bash-4.2$ exit exit -スクリプトは Wed Jun 6 14:38:51 2018 - に終了しました \ No newline at end of file +Script done on Thu Jul 5 15:46:17 2018 diff --git a/test/issues/840/Makefile b/test/issues/840/Makefile new file mode 100644 index 00000000..7d3f0f1a --- /dev/null +++ b/test/issues/840/Makefile @@ -0,0 +1,33 @@ +CC=gcc +TARGET= C840T01 \ + C840T02 \ + C840T03 \ + C840T04 \ + C840T05 \ + C840T06 + +all:: $(TARGET) + +C840T01: C840T01.c + $(CC) -o C840T01 C840T01.c -Wall -g + +C840T02: C840T02.c + $(CC) -o C840T02 C840T02.c -Wall -g + +C840T03: C840T03.c + $(CC) -o C840T03 C840T03.c -Wall -g + +C840T04: C840T04.c + $(CC) -o C840T04 C840T04.c -Wall -g + +C840T05: C840T05.c + $(CC) -o C840T05 C840T05.c -Wall -g + +C840T06: C840T06.c + $(CC) -o C840T06 C840T06.c -Wall -g + +test:: $(TARGET) + sh ./C840.sh + +clean:: + rm -f *.o $(TARGET) *.kmemleak rpf.data rpf.out diff --git a/test/issues/840/README b/test/issues/840/README new file mode 100644 index 00000000..9565c0ca --- /dev/null +++ b/test/issues/840/README @@ -0,0 +1,29 @@ +【Issue#840 動作確認】 +□ テスト内容 +以下の条件のとき、mcctrlのシステムコール処理でメモリリークしないことを確認。 +C840T01 オフロード中のシステムコールでRPF処理中にMcKプロセスがSIGKILLを受ける +C840T02 オフロード中のシステムコールでRPF処理外にMcKプロセスがSIGKILLを受ける +C840T03 システムコールオフロードしていないときにMcKプロセスがSIGKILLを受ける +C840T04 オフロード中のシステムコールでRPF処理中にmcexecがSIGKILLを受ける +C840T05 オフロード中のシステムコールでRPF処理外にmcexecがSIGKILLを受ける +C840T06 システムコールオフロードしていないときにmcexecがSIGKILLを受ける + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +※注意 +・メモリリークのテストのため、kmemleakが使用可能なLinuxカーネルを用いること。 + kmemleakが使用不可の場合、make testは実行できない。 +・シグナルを受けるタイミングによっては、テスト条件を満たさないことがある + (WARNINGを表示)。 + WARNINGの場合、make testはテスト条件を満たすまで当該テストを繰り返す。 + +□ 実行結果 +C840.txt 参照。 +全ての項目が OK となっていることを確認。 + diff --git a/test/issues/926/C926.c b/test/issues/926/C926.c index 40b220bd..c458427b 100644 --- a/test/issues/926/C926.c +++ b/test/issues/926/C926.c @@ -12,6 +12,80 @@ #include <sys/ipc.h> #include <sys/shm.h> +void +additional_test() +{ + key_t key; + int shmid1; + int shmid2; + int shmid3; + int *sp1; + int *sp2; + int *sp3; + int *sp4; + struct shmid_ds buf; + + key = ftok("/", 1); + + if ((shmid1 = shmget(key, 4096, IPC_CREAT | 0660)) == -1) { + perror("shmget 1"); + exit(1); + } + if ((sp1 = shmat(shmid1, NULL, 0)) == (void *)-1) { + perror("shmat 1"); + exit(1); + } + if (shmctl(shmid1, IPC_RMID, &buf) == -1) { + perror("RMID 1"); + exit(1); + } + *sp1 = 1; + + if ((shmid2 = shmget(key, 4096, IPC_CREAT | 0660)) == -1) { + perror("shmget 2"); + exit(1); + } + if ((sp2 = shmat(shmid2, NULL, 0)) == (void *)-1) { + perror("shmat 2"); + exit(1); + } + *sp2 = 2; + + printf("C926T09... "); + if ((shmid3 = shmget(key, 4096, IPC_CREAT | 0660)) == -1) { + perror("shmget 3"); + exit(1); + } + if ((sp3 = shmat(shmid3, NULL, 0)) == (void *)-1) { + perror("shmat 3"); + exit(1); + } + if (shmid3 == shmid2 && *sp3 == 2) { + printf("OK\n"); + } + else { + if (shmid3 != shmid2) { + printf("NG shmid %d!=%d\n", shmid3, shmid2); + } + else { + printf("NG valie=%d\n", *sp3); + } + } + + printf("C926T10... "); + if ((sp4 = shmat(shmid1, NULL, 0)) == (void *)-1) { + perror("shmat 4"); + exit(1); + } + if (*sp4 == 1) { + printf("OK\n"); + } + else { + printf("NG valie=%d\n", *sp4); + } + shmctl(shmid2, IPC_RMID, &buf); +} + int main(int argc, char **argv) { @@ -85,6 +159,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -150,6 +225,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -223,6 +299,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -294,6 +371,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -362,6 +440,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -428,6 +507,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -499,6 +579,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -569,6 +650,7 @@ main(int argc, char **argv) sp = shmat(shmid, NULL, 0); st = *sp == valid3? 1: 0; shmdt(sp); + shmctl(shmid, IPC_RMID, &buf); exit(st); } @@ -583,5 +665,7 @@ main(int argc, char **argv) printf("NG\n"); } + additional_test(); + exit(0); } diff --git a/test/issues/926/C926.sh b/test/issues/926/C926.sh new file mode 100644 index 00000000..8f73ed33 --- /dev/null +++ b/test/issues/926/C926.sh @@ -0,0 +1,8 @@ +#!/bin/sh +USELTP=0 +USEOSTEST=0 + +BOOTPARAM="-c 1-7 -m 2G@0" +. ../../common.sh + +$MCEXEC ./C926 diff --git a/test/issues/926/C926.txt b/test/issues/926/C926.txt index f77bf070..59d400dd 100644 --- a/test/issues/926/C926.txt +++ b/test/issues/926/C926.txt @@ -1,15 +1,18 @@ -スクリプトは Wed Jun 6 14:39:24 2018 - に開始しました[?1034hbash-4.2$ ./mcexec C./C926 -C926T01... OK -C926T02... OK -C926T03... OK -C926T04... OK -C926T05... OK -C926T06... OK -C926T07... OK -C926T08... OK -bash-4.2$ exit -exit +Script started on Thu Jun 28 09:11:00 2018 +bash-4.2$ make test +gcc -o C926 C926.c -Wall -g +sh ./C926.sh +C926T01... OK +C926T02... OK +C926T03... OK +C926T04... OK +C926T05... OK +C926T06... OK +C926T07... OK +C926T08... OK +C926T09... OK +C926T10... OK +bash-4.2$ exit +exit -スクリプトは Wed Jun 6 14:39:38 2018 - に終了しました \ No newline at end of file +Script done on Thu Jun 28 09:11:07 2018 diff --git a/test/issues/926/Makefile b/test/issues/926/Makefile new file mode 100644 index 00000000..6f9b09d5 --- /dev/null +++ b/test/issues/926/Makefile @@ -0,0 +1,13 @@ +CC=gcc +TARGET=C926 + +all:: $(TARGET) + +C926: C926.c + $(CC) -o C926 C926.c -Wall -g + +test:: $(TARGET) + sh ./C926.sh + +clean:: + rm -f *.o $(TARGET) diff --git a/test/issues/926/README b/test/issues/926/README new file mode 100644 index 00000000..980c32db --- /dev/null +++ b/test/issues/926/README @@ -0,0 +1,27 @@ +【Issue#926 動作確認】 +□ テスト内容 +1. shmat, RMID, shmdtの組み合せ確認。shmat直前にshmget実行。()内は別プロセス。 +C926T01 shmat->shmdt->(shmat->shmdt)で同じ領域が使われていることを確認 +C926T02 shmat->exit->(shmat->shmdt)で同じ領域が使われていることを確認 +C926T03 shmat->RMID->(shmat)->shmdt->(shmdt)で別領域が使われることを確認 +C926T04 shmat->RMID->(shmat->shmdt)->shmdtで別領域が使われることを確認 +C926T05 shmat->RMID->shmdt->(shmat->shmdt)で別領域が使われることを確認 +C926T06 shmat->RMID->exit->(shmat->shmdt)で別領域が使われることを確認 +C926T07 shmat->(shmat)->RMID->exit->(shmdt)で同じ領域が使われることを確認 +C926T08 shmat->(shmat)->exit->(shmdt)で同じ領域が使われることを確認 + +2. shmgetとshmatの組み合せ確認。shmgetのキーは同じ。 +C926T09 shmget->shmat->RMID->shmget->shmatで別領域が使われることを確認 +C926T10 shmget->shmat->RMID->shmatで同じ領域が使われることを確認 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C926.txt 参照。 +全ての項目が OK となっていることを確認。 diff --git a/test/issues/976/C976.sh b/test/issues/976/C976.sh new file mode 100644 index 00000000..b2c83e54 --- /dev/null +++ b/test/issues/976/C976.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=0 + +. ../../common.sh + +${MCEXEC} ./CT_001 +${MCEXEC} ./CT_002 +${MCEXEC} ./CT_003 + +tid=001 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=002 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve02 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=003 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve03 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=004 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve05 20 ${LTPBIN}/execve05 ${LTPBIN}/execve05 4 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + diff --git a/test/issues/976/CT_001.c b/test/issues/976/CT_001.c new file mode 100644 index 00000000..69fcfef9 --- /dev/null +++ b/test/issues/976/CT_001.c @@ -0,0 +1,64 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <error.h> +#include <sys/mman.h> +#include "./test_chk.h" + +#include <signal.h> + +#define TEST_NAME "CT_001" + +int main(int argc, char *argv[]) +{ + int rc = 0; + stack_t cur_stack; + stack_t set_stack; + void *stack_area = NULL; + char *exargv[3] = {argv[0], "stop", NULL}; + char *exenvp[1] = {NULL}; + + printf("*** %s start ********************************\n", TEST_NAME); + rc = sigaltstack(NULL, &cur_stack); + OKNG(rc != 0, "sigaltstack() to get current returned %d" + "\n (expect return is 0)", rc); + + OKNG(cur_stack.ss_sp != NULL, "default ss_sp is %p" + "\n (expect ss_sp is NULL)", cur_stack.ss_sp); + + stack_area = mmap(0, MINSIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + OKNG(stack_area == MAP_FAILED, "alloc altstack area %p" + "\n (expect area is valid vaddr)", stack_area); + + set_stack.ss_sp = stack_area; + set_stack.ss_flags = 0; + set_stack.ss_size = MINSIGSTKSZ; + + rc = sigaltstack(&set_stack, NULL); + OKNG(rc != 0, "sigaltstack() to set new stack returned %d" + "\n (expect return is 0)", rc); + + rc = sigaltstack(NULL, &cur_stack); + OKNG(rc != 0, "sigaltstack() to get current returned %d" + "\n (expect return is 0)", rc); + + OKNG(cur_stack.ss_sp != stack_area, "new ss_sp is %p" + "\n (expect ss_sp is %p)", cur_stack.ss_sp, stack_area); + + if (argc < 2) { + printf("** Re-run by execve\n"); + execve(exargv[0], exargv, exenvp); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/976/CT_002.c b/test/issues/976/CT_002.c new file mode 100644 index 00000000..86d28e86 --- /dev/null +++ b/test/issues/976/CT_002.c @@ -0,0 +1,52 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "./test_chk.h" + +#include <fenv.h> + +#define TEST_NAME "CT_002" + +int main(int argc, char *argv[]) +{ + fenv_t fenv; + int rc = 0; + int round = 0; + double dummy = 0; + char *exargv[3] = {argv[0], "stop", NULL}; + char *exenvp[1] = {NULL}; + + printf("*** %s start ********************************\n", TEST_NAME); + rc = fetestexcept(FE_ALL_EXCEPT); + OKNG(rc != 0, "fetestexcept(FE_ALL_EXCEPT) returned %d" + "\n (expect return is 0)", rc); + + dummy = (double)0 / 0; + rc = fetestexcept(FE_ALL_EXCEPT); + OKNG(rc != FE_INVALID, "fetestexcept(FE_ALL_EXCEPT) returned %d" + "\n (expect return is FE_INVALID(%d))", rc, FE_INVALID); + + rc = feraiseexcept(FE_ALL_EXCEPT); + OKNG(rc != 0, "feraiseexcept(FE_ALL_EXCEPT) returned %d" + "\n (expect return is 0)", rc); + + rc = fetestexcept(FE_ALL_EXCEPT); + OKNG(rc != FE_ALL_EXCEPT, "fetestexcept(FE_ALL_EXCEPT) returned %d" + "\n (expect return is FE_ALL_EXCEPT(%d))", + rc, FE_ALL_EXCEPT); + + if (argc < 2) { + printf("** Re-run by execve\n"); + execve(exargv[0], exargv, exenvp); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/976/CT_003.c b/test/issues/976/CT_003.c new file mode 100644 index 00000000..0059eaeb --- /dev/null +++ b/test/issues/976/CT_003.c @@ -0,0 +1,48 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "./test_chk.h" + +#include <fenv.h> + +#define TEST_NAME "CT_003" + +int main(int argc, char *argv[]) +{ + fenv_t fenv; + int rc = 0; + int round = 0; + char *exargv[3] = {argv[0], "stop", NULL}; + char *exenvp[1] = {NULL}; + + printf("*** %s start ********************************\n", TEST_NAME); + round = fegetround(); + OKNG(round != FE_TONEAREST, "fegetround returned %d" + "\n (expect return is FE_TONEAREST(%d))", + round, FE_TONEAREST); + + rc = fesetround(FE_TOWARDZERO); + OKNG(rc != 0, "fesetround(FE_TOWARDZERO) returned %d" + "\n (expect return is 0)", rc); + + round = fegetround(); + OKNG(round != FE_TOWARDZERO, "fegetround returned %d" + "\n (expect return is FE_TOWARDZERO(%d))", + round, FE_TOWARDZERO); + + + if (argc < 2) { + printf("** Re-run by execve\n"); + execve(exargv[0], exargv, exenvp); + } + + printf("*** %s PASSED\n\n", TEST_NAME); + + return 0; + +fn_fail: + printf("*** %s FAILED\n\n", TEST_NAME); + + return -1; +} diff --git a/test/issues/976/Makefile b/test/issues/976/Makefile new file mode 100644 index 00000000..50bf79aa --- /dev/null +++ b/test/issues/976/Makefile @@ -0,0 +1,23 @@ +CC = gcc +TARGET=CT_001 CT_002 CT_003 + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +CT_001: CT_001.c + $(CC) -o $@ $^ $(LDFLAGS) + +CT_002: CT_002.c + $(CC) -o $@ $^ $(LDFLAGS) -lm + +CT_003: CT_003.c + $(CC) -o $@ $^ $(LDFLAGS) -lm + +test: all + @sh ./C976.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/976/README b/test/issues/976/README new file mode 100644 index 00000000..d26203e0 --- /dev/null +++ b/test/issues/976/README @@ -0,0 +1,49 @@ +【Issue#976 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +CT_001: sigaltstackがexecve時に初期化されていることの確認 + 1. 自プロセスの代替シグナルスタック(ss_sp)がNULLであることを確認 + 2. sigaltstack()で新たに代替シグナルスタックを設定 + 3. execve で自身を再実行し、自プロセスの代替シグナルスタックがNULLであることを確認 + +CT_002: fenvの浮動小数点例外発生フラグがexecve時に初期化されていることを確認 + 1. fetestexcept(FE_ALL_EXCEPT)で自プロセスの浮動小数点例外の発生フラグが + すべて0であることを確認 + 2. 0除算を実行し、浮動小数点例外(FE_INVALID)を発生させる + 3. fetestexcept(FE_ALL_EXCEPT)で自プロセスのFE_INVALIDフラグが1に + なっていることを確認 + 4. feraiseexcept(FE_ALL_EXCEPT)で自プロセスの浮動小数点例外の発生フラグを + すべて1にする + 5. execve で自身を再実行し、浮動小数点例外の発生フラグがすべて0であることを確認 + +CT_003: fenvの浮動小数丸めの設定がexecve時に初期化されていることを確認 + 1. fegetround() で自プロセスの丸めモードを取得し、FE_TONEARESTであることを確認 + 2. fesetround(FE_TOWARDZERO) で自プロセスの丸めモードをFE_TOWARDZEROに変更する + 3. execve で自身を再実行し、自プロセスの丸めモードがFE_TONEARESTであることを確認 + +2. 既存のexecve機能に影響がないことをLTPを用いて確認 +LT_001: ltp-execve01 + 子プロセスがexecveを実行し、正常に終了することを確認 (TPASS 1件) + +LT_002: ltp-execve02 + rootのみに実行権限が付与された実行ファイルを、 + 一般ユーザがexecveした場合に失敗することを確認 (TPASS 1件) + +LT_003: ltp-execve03 + 下記の不正な引数でexecveを実行した場合、返り値と設定されるerrnoが + 正しいことを確認 (TPASS 6件) + +LT_004: ltp-execve05 + execveの標準的な動作を確認 (TPASS 8件) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/976/result.log b/test/issues/976/result.log new file mode 100644 index 00000000..87d71eae --- /dev/null +++ b/test/issues/976/result.log @@ -0,0 +1,103 @@ +*** CT_001 start ******************************** + [OK] sigaltstack() to get current returned 0 + (expect return is 0) + [OK] default ss_sp is (nil) + (expect ss_sp is NULL) + [OK] alloc altstack area 0x2aaaab014000 + (expect area is valid vaddr) + [OK] sigaltstack() to set new stack returned 0 + (expect return is 0) + [OK] sigaltstack() to get current returned 0 + (expect return is 0) + [OK] new ss_sp is 0x2aaaab014000 + (expect ss_sp is 0x2aaaab014000) +** Re-run by execve +*** CT_001 start ******************************** + [OK] sigaltstack() to get current returned 0 + (expect return is 0) + [OK] default ss_sp is (nil) + (expect ss_sp is NULL) + [OK] alloc altstack area 0x2aaaab014000 + (expect area is valid vaddr) + [OK] sigaltstack() to set new stack returned 0 + (expect return is 0) + [OK] sigaltstack() to get current returned 0 + (expect return is 0) + [OK] new ss_sp is 0x2aaaab014000 + (expect ss_sp is 0x2aaaab014000) +*** CT_001 PASSED + +*** CT_002 start ******************************** + [OK] fetestexcept(FE_ALL_EXCEPT) returned 0 + (expect return is 0) + [OK] fetestexcept(FE_ALL_EXCEPT) returned 1 + (expect return is FE_INVALID(1)) + [OK] feraiseexcept(FE_ALL_EXCEPT) returned 0 + (expect return is 0) + [OK] fetestexcept(FE_ALL_EXCEPT) returned 61 + (expect return is FE_ALL_EXCEPT(61)) +** Re-run by execve +*** CT_002 start ******************************** + [OK] fetestexcept(FE_ALL_EXCEPT) returned 0 + (expect return is 0) + [OK] fetestexcept(FE_ALL_EXCEPT) returned 1 + (expect return is FE_INVALID(1)) + [OK] feraiseexcept(FE_ALL_EXCEPT) returned 0 + (expect return is 0) + [OK] fetestexcept(FE_ALL_EXCEPT) returned 61 + (expect return is FE_ALL_EXCEPT(61)) +*** CT_002 PASSED + +*** CT_003 start ******************************** + [OK] fegetround returned 0 + (expect return is FE_TONEAREST(0)) + [OK] fesetround(FE_TOWARDZERO) returned 0 + (expect return is 0) + [OK] fegetround returned 3072 + (expect return is FE_TOWARDZERO(3072)) +** Re-run by execve +*** CT_003 start ******************************** + [OK] fegetround returned 0 + (expect return is FE_TONEAREST(0)) + [OK] fesetround(FE_TOWARDZERO) returned 0 + (expect return is 0) + [OK] fegetround returned 3072 + (expect return is FE_TOWARDZERO(3072)) +*** CT_003 PASSED + +*** LT_001 start ******************************* +execl01_child 1 TPASS : execve01_child executed +execve01 0 TINFO : Child process returned TPASS +*** LT_001: PASSED (ok:2) + +*** LT_002 start ******************************* +Error: Failed to open execve_child +execve(): error loading ELF for file execve_child +execve02 1 TPASS : execve() failed expectedly: TEST_ERRNO=EACCES(13): Permission denied +execve02 0 TINFO : Child process returned TPASS +*** LT_002: PASSED (ok:2) + +*** LT_003 start ******************************* +lookup_exec_path: lookup_exec_path(): error stat +Error: /tmp/exeiG2gZP/fake.11925 is not an executable?, errno: 13 +execve(): error loading ELF for file /tmp/exeiG2gZP/fake.11925 +Error: file /tmp/exeiG2gZP/execve03.11925 is zero length +execve(): error loading ELF for file /tmp/exeiG2gZP/execve03.11925 +execve03 1 TPASS : execve failed as expected: TEST_ERRNO=ENAMETOOLONG(36): File name too long +execve03 2 TPASS : execve failed as expected: TEST_ERRNO=ENOENT(2): No such file or directory +execve03 3 TPASS : execve failed as expected: TEST_ERRNO=ENOTDIR(20): Not a directory +execve03 4 TPASS : execve failed as expected: TEST_ERRNO=EFAULT(14): Bad address +execve03 5 TPASS : execve failed as expected: TEST_ERRNO=EACCES(13): Permission denied +execve03 6 TPASS : execve failed as expected: TEST_ERRNO=ENOEXEC(8): Exec format error +*** LT_003: PASSED (ok:6) + +*** LT_004 start ******************************* +execve05 1 TPASS : Test DONE, pid 11995, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12025, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12055, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12085, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12115, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12145, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12175, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 12205, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +*** LT_004: PASSED (ok:8) diff --git a/test/issues/976/test_chk.h b/test/issues/976/test_chk.h new file mode 100644 index 00000000..4cef42e8 --- /dev/null +++ b/test/issues/976/test_chk.h @@ -0,0 +1,23 @@ +#ifndef HEADER_TEST_CHK_H +#define HEADER_TEST_CHK_H + +#define CHKANDJUMP(cond, ...) do {\ + if (cond) {\ + fprintf(stderr, " [NG] ");\ + fprintf(stderr, __VA_ARGS__);\ + fprintf(stderr, " failed\n");\ + goto fn_fail;\ + } \ + } while (0) + +#define OKNG(cond, ...) do {\ + if (cond) {\ + CHKANDJUMP(cond, __VA_ARGS__);\ + } else {\ + fprintf(stdout, " [OK] ");\ + fprintf(stdout, __VA_ARGS__);\ + fprintf(stdout, "\n");\ + } \ + } while (0) + +#endif diff --git a/test/issues/995/C995.sh b/test/issues/995/C995.sh new file mode 100644 index 00000000..704c673c --- /dev/null +++ b/test/issues/995/C995.sh @@ -0,0 +1,74 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=0 + +. ../../common.sh + +tid=001 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ./call_execve ./test_01.sh | tee ./RT_${tid}.txt +if grep "argv\[0\]: ./syml_put_args" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=002 +echo "*** RT_$tid start *******************************" +sudo ${MCEXEC} ./call_execve ./test_02.sh | tee ./RT_${tid}.txt +if grep "argv\[0\]: `pwd`/syml_put_args" ./RT_${tid}.txt > /dev/null 2>&1 ; then + echo "*** RT_$tid: PASSED" +else + echo "*** RT_$tid: FAILED" +fi +echo "" + +tid=001 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve01 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=002 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve02 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=003 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve03 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" + +tid=004 +echo "*** LT_$tid start *******************************" +sudo PATH=${LTPBIN}:${PATH} ${MCEXEC} ${LTPBIN}/execve05 20 ${LTPBIN}/execve05 ${LTPBIN}/execve05 4 2>&1 | tee ./LT_${tid}.txt +ok=`grep TPASS LT_${tid}.txt | wc -l` +ng=`grep TFAIL LT_${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** LT_$tid: PASSED (ok:$ok)" +else + echo "*** LT_$tid: FAILED (ok:$ok, ng:$ng)" +fi +echo "" diff --git a/test/issues/995/Makefile b/test/issues/995/Makefile new file mode 100644 index 00000000..29e29a9f --- /dev/null +++ b/test/issues/995/Makefile @@ -0,0 +1,23 @@ +CC = gcc + +TARGET=put_args call_execve + +all: $(TARGET) + +put_args: put_args.c + $(CC) -o $@ $^ + +call_execve: call_execve.c + $(CC) -o $@ $^ + +test: all + @ln -nfs ./put_args ./syml_put_args + @echo "#!./syml_put_args" > test_01.sh + @echo "#!`pwd`/syml_put_args" > test_02.sh + @chmod +x ./test_01.sh + @chmod +x ./test_02.sh + @sh ./C995.sh + +clean: + rm -f $(TARGET) *.o ./syml_put_args ./test_01.sh ./test_02.sh + diff --git a/test/issues/995/README b/test/issues/995/README new file mode 100644 index 00000000..b91aadc9 --- /dev/null +++ b/test/issues/995/README @@ -0,0 +1,37 @@ +【Issue#995 動作確認】 +□ テスト内容 +1. Issueで報告された再現プログラムでの確認 +CT_001: interpがシンボリックリンクを含む相対パスの場合 + execveで実行されたシェルスクリプトのinterpに渡されるargv[0]が + シンボリックリンク未解決の相対パスとして設定されることを確認 + +CT_002: interpがシンボリックリンクを含む絶対パスの場合 + execveで実行されたシェルスクリプトのinterpに渡されるargv[0]が + シンボリックリンク未解決の絶対パスとして設定されることを確認 + +2. 既存のexecve機能に影響がないことをLTPを用いて確認 +LT_001: ltp-execve01 + 子プロセスがexecveを実行し、正常に終了することを確認 (TPASS 1件) + +LT_002: ltp-execve02 + rootのみに実行権限が付与された実行ファイルを、 + 一般ユーザがexecveした場合に失敗することを確認 (TPASS 1件) + +LT_003: ltp-execve03 + 下記の不正な引数でexecveを実行した場合、返り値と設定されるerrnoが + 正しいことを確認 (TPASS 6件) + +LT_004: ltp-execve05 + execveの標準的な動作を確認 (TPASS 8件) + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/995/put_args.c b/test/issues/995/put_args.c new file mode 100644 index 00000000..2b6766b9 --- /dev/null +++ b/test/issues/995/put_args.c @@ -0,0 +1,10 @@ +#include <stdio.h> + +void main(int argc, char **argv) +{ + int i; + + for (i = 0; i < argc; i++) { + printf("argv[%d]: %s\n", i, argv[i]); + } +} diff --git a/test/issues/995/result.log b/test/issues/995/result.log new file mode 100644 index 00000000..4e242067 --- /dev/null +++ b/test/issues/995/result.log @@ -0,0 +1,45 @@ +*** RT_001 start ******************************* +argv[0]: ./syml_put_args +argv[1]: ./test_01.sh +*** RT_001: PASSED + +*** RT_002 start ******************************* +argv[0]: /home/satoken/mck_srcs/for_tests/mckernel/test/issues/995/syml_put_args +argv[1]: ./test_02.sh +*** RT_002: PASSED + +*** LT_001 start ******************************* +execl01_child 1 TPASS : execve01_child executed +execve01 0 TINFO : Child process returned TPASS +*** LT_001: PASSED (ok:2) + +*** LT_002 start ******************************* +Error: Failed to open execve_child +execve(): error loading ELF for file execve_child +execve02 1 TPASS : execve() failed expectedly: TEST_ERRNO=EACCES(13): Permission denied +execve02 0 TINFO : Child process returned TPASS +*** LT_002: PASSED (ok:2) + +*** LT_003 start ******************************* +Error: /tmp/exehdVdU3/fake.20952 is not an executable?, errno: 13 +execve(): error loading ELF for file /tmp/exehdVdU3/fake.20952 +Error: file /tmp/exehdVdU3/execve03.20952 is zero length +execve(): error loading ELF for file /tmp/exehdVdU3/execve03.20952 +execve03 1 TPASS : execve failed as expected: TEST_ERRNO=ENAMETOOLONG(36): File name too long +execve03 2 TPASS : execve failed as expected: TEST_ERRNO=ENOENT(2): No such file or directory +execve03 3 TPASS : execve failed as expected: TEST_ERRNO=ENOTDIR(20): Not a directory +execve03 4 TPASS : execve failed as expected: TEST_ERRNO=EFAULT(14): Bad address +execve03 5 TPASS : execve failed as expected: TEST_ERRNO=EACCES(13): Permission denied +execve03 6 TPASS : execve failed as expected: TEST_ERRNO=ENOEXEC(8): Exec format error +*** LT_003: PASSED (ok:6) + +*** LT_004 start ******************************* +execve05 1 TPASS : Test DONE, pid 21022, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21052, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21082, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21112, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21142, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21172, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21202, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +execve05 1 TPASS : Test DONE, pid 21232, -- /home/satoken/ltp/testcases/bin/execve05 0 /home/satoken/ltp/testcases/bin/execve05 /home/satoken/ltp/testcases/bin/execve05 +*** LT_004: PASSED (ok:8) diff --git a/test/issues/998+999/C998+999.sh b/test/issues/998+999/C998+999.sh new file mode 100644 index 00000000..232bbc83 --- /dev/null +++ b/test/issues/998+999/C998+999.sh @@ -0,0 +1,22 @@ +#!/bin/sh +USELTP=0 +USEOSTEST=1 + +BOOTPARAM="-c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24" +. ../../common.sh + +for i in {1..10}; do + for j in {1..100}; do + $MCEXEC "$TESTMCK" -s wait4 -n 3 > testmck.log 2>&1 + if [ $? != 0 ]; then + echo "****** ERROR ******" + cat testmck.log + exit 1 + fi + echo -n . + done + echo + echo "*** $i"00" ****************************" +done + +echo "*** C998+999 OK ****************************" diff --git a/test/issues/998+999/C998+999.txt b/test/issues/998+999/C998+999.txt new file mode 100644 index 00000000..1fdc4420 --- /dev/null +++ b/test/issues/998+999/C998+999.txt @@ -0,0 +1,29 @@ +Script started on Wed Aug 1 15:08:25 2018 +bash-4.2$ make test +mcstop+release.sh ... done +mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 ... done +.................................................................................................... +*** 100 **************************** +.................................................................................................... +*** 200 **************************** +.................................................................................................... +*** 300 **************************** +.................................................................................................... +*** 400 **************************** +.................................................................................................... +*** 500 **************************** +.................................................................................................... +*** 600 **************************** +.................................................................................................... +*** 700 **************************** +.................................................................................................... +*** 800 **************************** +.................................................................................................... +*** 900 **************************** +.................................................................................................... +*** 1000 **************************** +*** C998+999 OK **************************** +bash-4.2$ exit +exit + +Script done on Wed Aug 1 15:48:56 2018 diff --git a/test/issues/998+999/Makefile b/test/issues/998+999/Makefile new file mode 100644 index 00000000..769e211b --- /dev/null +++ b/test/issues/998+999/Makefile @@ -0,0 +1,14 @@ +CC = gcc +TARGET= + +CPPFLAGS = +LDFLAGS = + +all: + +test: all + @sh ./C998+999.sh + +clean: + rm -f $(TARGET) *.o + diff --git a/test/issues/998+999/README b/test/issues/998+999/README new file mode 100644 index 00000000..07fdd60d --- /dev/null +++ b/test/issues/998+999/README @@ -0,0 +1,32 @@ +【Issue#998 Issue#999 動作確認】 +□ テスト内容 +Issue#998 で指摘された現象は、既に解消している。 +これは、IKC3のサポートによりシステムコールデリゲーションの実装が変更され、 +システムコール要求がキューによって管理されるようになったため取りこぼしが +発生しないようになっているためである。 + +Issue#999 で指摘された現象は、既に解消されている。 +これは、シグナルをMcKernelからmcexecに通知するスレッドをシグナル送信元から +シグナル受信先に変更されたことによって、シグナルの通知とシステムコール要求の +順序が入れ替わることが無くなったためである。 + +以上により、Issue#998とIssue#999に対するプログラム修正は不要であるが、 +現象が本当に解消されていることを指摘プログラムを用いて確認する。 + +C998+999 mcexec test_mck -s wait4 -n 3 を 1000 回(※)連続実行し、プログラムが + 正常終了することを確認する。 + +※ McKernelの動作タイミングによって Issue#998 と Issue#999 の現象が発生したり + しなかったりしていたため、十分な繰り返しを行い、再発しない確認とする。 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +C998+999.txt 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/mck_test_config.sample.in b/test/mck_test_config.sample.in new file mode 100644 index 00000000..dc89d515 --- /dev/null +++ b/test/mck_test_config.sample.in @@ -0,0 +1,7 @@ +# Config file for McKernel tests +: ${BIN:=@prefix@/bin} +: ${SBIN:=@prefix@/sbin} +: ${OSTEST:=} +: ${LTP:=} +: ${BOOTPARAM:=-c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24} +: ${MCKERNEL_VERSION:=@MCKERNEL_VERSION@} diff --git a/test/mem_dest_prev/LTP_testcase.txt b/test/mem_dest_prev/LTP_testcase.txt new file mode 100644 index 00000000..f5757cb4 --- /dev/null +++ b/test/mem_dest_prev/LTP_testcase.txt @@ -0,0 +1,100 @@ +brk01 +clone01 +clone03 +clone04 +clone06 +clone07 +close01 +close02 +dup01 +dup02 +dup03 +dup04 +dup05 +dup06 +dup07 +fork01 +fork02 +fork03 +fork04 +fork07 +fork08 +fork09 +fork10 +fork11 +mbind01 +mem01 +mem02 +mem03 +memcpy01 +memfd_create02 +memset01 +mkdir01 +mkdir08 +mkdirat01 +mknodat01 +mmap001 +mmap01 +mmap02 +mmap03 +mmap04 +mmap06 +mmap07 +mmap08 +mmap09 +mmap12 +mmapstress02 +mmapstress04 +mmapstress05 +mremap01 +mremap05 +open01 +open03 +open04 +open06 +open07 +open09 +open13 +poll01 +posix_fadvise01 +read01 +read02 +read03 +read04 +sbrk01 +sbrk02 +sendfile02 +sendfile03 +sendfile04 +sendfile05 +sendfile06 +sendfile07 +sendfile08 +sendfile09 +semctl01 +semctl03 +semctl05 +socket01 +socket02 +stream01 +stream02 +stream03 +stream04 +stream05 +unlink05 +unlink06 +unlink07 +unlink08 +vfork01 +vfork02 +vma01 +vmsplice01 +vmsplice02 +write01 +write03 +write04 +write05 +writetest +writev01 +writev02 +writev07 diff --git a/test/mem_dest_prev/README b/test/mem_dest_prev/README new file mode 100644 index 00000000..f411617a --- /dev/null +++ b/test/mem_dest_prev/README @@ -0,0 +1,25 @@ +=================== +Advance preparation +=================== +1)Implement patch of test_memtest_destroy.patch +cd mckernel +patch -p0 < test_memtest_destroy.patch +make +make install + + +2)Compile command execution processing +cd mckernel/test/mem_dest_prev/mcexec_test_proc/ +make + +3)Write the LTP path to LTP_DIR in the configuration file +vi config + + ex) LTP_DIR=$HOME/test/mem_dest_prev/ltp/testcases/bin/ + + +========== +How to run +========== +./go_test_McKernal.sh + diff --git a/test/mem_dest_prev/config b/test/mem_dest_prev/config new file mode 100644 index 00000000..e703a0ac --- /dev/null +++ b/test/mem_dest_prev/config @@ -0,0 +1,13 @@ +MCMOD_DIR=$HOME/ppos +LTP_DIR=$HOME/test/mem_dest_prev/ltp/testcases/bin/ +LTP_TESTCASE_FILE=LTP_testcase.txt +MCRBT_OPT_LTP="-m 3G@0,3G@1 -s" +USR_PROC="mcexec_test_proc/memtest_destroy" +OS_IDX=0 + +export MCMOD_DIR +export LTP_DIR +export LTP_TESTCASE_FILE +export MCRBT_OPT_LTP +export USR_PROC +export OS_IDX diff --git a/test/mem_dest_prev/go_test_McKernal.sh b/test/mem_dest_prev/go_test_McKernal.sh new file mode 100755 index 00000000..88a1d289 --- /dev/null +++ b/test/mem_dest_prev/go_test_McKernal.sh @@ -0,0 +1,101 @@ +#!/bin/sh + +# read config +source ./config + +#logfile="./result/test_result.log" + +# mcexec processのkill +./utils/kill_mcexec.sh &> /dev/null + +for test_case in `ls -1 ./testcases/*.txt` +do + # read testcase param + source ${test_case} + case_name=`basename ${test_case} .txt` + echo "####################" + echo "Test No:${case_name}" + + # Out-of-range address Test(Before correspondence) + echo ">>> Out-of-range address Test(Before correspondence) Start" + + # stop mckernel + sudo ${MCMOD_DIR}/sbin/mcstop+release.sh + sleep 1 + # boot mckernel + echo "${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_BEFORE%,}" + sudo ${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_BEFORE%,} + sleep 1 + + echo " ${MCMOD_DIR}/bin/mcexec ${USR_PROC}" + timeout -sKILL 5 ${MCMOD_DIR}/bin/mcexec ${USR_PROC} + STATUS=$? + + echo "${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg" + sudo ${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg + + if [ "$STATUS" -ne 21 ]; + then + echo ">>> Out-of-range address Test End(Timeout!!!)" + else + echo ">>> Out-of-range address Test End" + fi + + # Out-of-range address Test(After correspondence) + echo ">>> Out-of-range address(After correspondence) Test Start" + + # stop mckernel + sudo ${MCMOD_DIR}/sbin/mcstop+release.sh + sleep 1 + # boot mckernel + echo "${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_AFTER%,}" + sudo ${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_AFTER%,} + sleep 1 + + echo " ${MCMOD_DIR}/bin/mcexec ${USR_PROC}" + timeout -sKILL 5 ${MCMOD_DIR}/bin/mcexec ${USR_PROC} + STATUS=$? + + echo "${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg" + sudo ${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg + + if [ "$STATUS" -ne 21 ]; + then + echo ">>> Out-of-range address Test End(Timeout!!!)" + else + echo ">>> Out-of-range address Test End" + fi +done + +### LTP START ################################################## +# stop mckernel +sudo ${MCMOD_DIR}/sbin/mcstop+release.sh +sleep 1 + +# boot mckernel +echo "${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_LTP%,}" +sudo ${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_LTP%,} +sleep 1 + +if [ ! -e "/dev/mcos0" ]; then + echo "Error: failed to mcreboot" + exit 1 +fi + + +TEST_NUM=`wc -l ${LTP_TESTCASE_FILE} | awk '{print $1}'` +echo ">>> LTP Test Start( $TEST_NUM counts )" + +# exec mckernel test program +COUNT=0 +while read line +do + ((COUNT++)) + echo "$COUNT:${MCMOD_DIR}/bin/mcexec ${LTP_DIR}$line" +# ${MCMOD_DIR}/bin/mcexec ${LTP_DIR}$line &>> ${logfile} + ${MCMOD_DIR}/bin/mcexec ${LTP_DIR}$line +done < ${LTP_TESTCASE_FILE} + +echo ">>> LTP Test End" +### LTP END #################################################### + diff --git a/test/mem_dest_prev/mcexec_test_proc/Makefile b/test/mem_dest_prev/mcexec_test_proc/Makefile new file mode 100644 index 00000000..0f42b64f --- /dev/null +++ b/test/mem_dest_prev/mcexec_test_proc/Makefile @@ -0,0 +1,7 @@ +OBJS = memtest_destroy + +all:$(OBJS) + +clean: + rm $(OBJS) + diff --git a/test/mem_dest_prev/mcexec_test_proc/memtest_destroy.c b/test/mem_dest_prev/mcexec_test_proc/memtest_destroy.c new file mode 100644 index 00000000..d42def7c --- /dev/null +++ b/test/mem_dest_prev/mcexec_test_proc/memtest_destroy.c @@ -0,0 +1,13 @@ +#include <stdio.h> +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> + +main() { + int rst = 0; + + rst = syscall(950); + printf("mem_destroy result:%d\n",rst); + + return; +} diff --git a/test/mem_dest_prev/test_memtest_destroy.patch b/test/mem_dest_prev/test_memtest_destroy.patch new file mode 100644 index 00000000..8497b30e --- /dev/null +++ b/test/mem_dest_prev/test_memtest_destroy.patch @@ -0,0 +1,50 @@ +diff --git arch/x86/kernel/include/syscall_list.h arch/x86/kernel/include/syscall_list.h +index 42d1e2e..f5769b8 100644 +--- arch/x86/kernel/include/syscall_list.h ++++ arch/x86/kernel/include/syscall_list.h +@@ -156,5 +156,7 @@ SYSCALL_HANDLED(__NR_profile, profile) + SYSCALL_HANDLED(730, util_migrate_inter_kernel) + SYSCALL_HANDLED(731, util_indicate_clone) + SYSCALL_HANDLED(732, get_system) ++/* McKernel Specific */ ++SYSCALL_HANDLED(950, mem_destroy) + + /**** End of File ****/ +diff --git arch/x86/kernel/syscall.c arch/x86/kernel/syscall.c +index 2260b66..e96776a 100644 +--- arch/x86/kernel/syscall.c ++++ arch/x86/kernel/syscall.c +@@ -1887,4 +1887,33 @@ save_uctx(void *uctx, struct x86_user_context *regs) + ctx->fregsize = 0; + } + ++ ++#define ADD_ADDR_VAL 0x400 ++SYSCALL_DECLARE(mem_destroy) ++{ ++ int rst = 0; ++ int mem_chunks_num, chunk_id, get_numa_id; ++ unsigned long get_start, get_end; ++ unsigned long *addr; ++ ++ mem_chunks_num = ihk_mc_get_nr_memory_chunks(); ++ kprintf("%s: memory chunk %d.\n", __FUNCTION__, mem_chunks_num); ++ ++ for (chunk_id = 0; chunk_id < mem_chunks_num; chunk_id++) { ++ rst = ihk_mc_get_memory_chunk(chunk_id, &get_start, &get_end, &get_numa_id); ++ kprintf("%s: mem chunk[%d] numa ID(%d)\n" ++ ,__FUNCTION__ ,chunk_id ,get_numa_id); ++ kprintf(" phys(0x%lx - 0x%lx) virt(0x%lx - 0x%lx)\n" ++ ,get_start ,get_end ,phys_to_virt(get_start) ,phys_to_virt(get_end)); ++ } ++ ++ addr = phys_to_virt(get_end + ADD_ADDR_VAL); ++#if 1 ++ *addr = 0x1; ++#endif ++ kprintf("%s: Address out of range 0x%lx(val:%d)\n",__FUNCTION__ ,addr ,*addr); ++ ++ return rst; ++} ++ + /*** End of File ***/ diff --git a/test/mem_dest_prev/testcases/0001.txt b/test/mem_dest_prev/testcases/0001.txt new file mode 100644 index 00000000..2284d336 --- /dev/null +++ b/test/mem_dest_prev/testcases/0001.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 3G 1`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 3G 1` -s" diff --git a/test/mem_dest_prev/testcases/0002.txt b/test/mem_dest_prev/testcases/0002.txt new file mode 100644 index 00000000..89e20223 --- /dev/null +++ b/test/mem_dest_prev/testcases/0002.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 2`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 2` -s" diff --git a/test/mem_dest_prev/testcases/0003.txt b/test/mem_dest_prev/testcases/0003.txt new file mode 100644 index 00000000..cef2deed --- /dev/null +++ b/test/mem_dest_prev/testcases/0003.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 4`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 4` -s" diff --git a/test/mem_dest_prev/testcases/0004.txt b/test/mem_dest_prev/testcases/0004.txt new file mode 100644 index 00000000..d16cf1b2 --- /dev/null +++ b/test/mem_dest_prev/testcases/0004.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 8`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 8` -s" diff --git a/test/mem_dest_prev/testcases/0005.txt b/test/mem_dest_prev/testcases/0005.txt new file mode 100644 index 00000000..af667d25 --- /dev/null +++ b/test/mem_dest_prev/testcases/0005.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 16`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 16` -s" diff --git a/test/mem_dest_prev/testcases/0006.txt b/test/mem_dest_prev/testcases/0006.txt new file mode 100644 index 00000000..29e875a3 --- /dev/null +++ b/test/mem_dest_prev/testcases/0006.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 32`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 32` -s" diff --git a/test/mem_dest_prev/testcases/0007.txt b/test/mem_dest_prev/testcases/0007.txt new file mode 100644 index 00000000..22377eb2 --- /dev/null +++ b/test/mem_dest_prev/testcases/0007.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 48`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 48` -s" diff --git a/test/mem_dest_prev/testcases/0008.txt b/test/mem_dest_prev/testcases/0008.txt new file mode 100644 index 00000000..38d99d51 --- /dev/null +++ b/test/mem_dest_prev/testcases/0008.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 64`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 64` -s" diff --git a/test/mem_dest_prev/testcases/0009.txt b/test/mem_dest_prev/testcases/0009.txt new file mode 100644 index 00000000..7b365b05 --- /dev/null +++ b/test/mem_dest_prev/testcases/0009.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 96`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 96` -s" diff --git a/test/mem_dest_prev/testcases/0010.txt b/test/mem_dest_prev/testcases/0010.txt new file mode 100644 index 00000000..9a85ebb9 --- /dev/null +++ b/test/mem_dest_prev/testcases/0010.txt @@ -0,0 +1,2 @@ +MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 128`" +MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 128` -s" diff --git a/test/mem_dest_prev/utils/gen_mem_chunks.sh b/test/mem_dest_prev/utils/gen_mem_chunks.sh new file mode 100755 index 00000000..ce81c872 --- /dev/null +++ b/test/mem_dest_prev/utils/gen_mem_chunks.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +NUMAS=$1 +MEM_SIZE=$2 +REP=$3 +CHUNKS="" + +for numa in ${NUMAS} +do + for rep in `seq 1 ${REP}` + do + CHUNKS="${CHUNKS}${MEM_SIZE}@${numa}," + done +done + +echo ${CHUNKS%,} diff --git a/test/mem_dest_prev/utils/kill_mcexec.sh b/test/mem_dest_prev/utils/kill_mcexec.sh new file mode 100755 index 00000000..74c4d6d2 --- /dev/null +++ b/test/mem_dest_prev/utils/kill_mcexec.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +count=`pgrep -c -f 'mcexec '` +if [ ${count} -gt 0 ] +then + echo "kill process :" ${count} + pgrep -l -f 'mcexec ' + pgrep -f 'mcexec ' | xargs kill -9 +fi + diff --git a/test/mng_mod/issues/898_928/CT_005.c b/test/mng_mod/issues/898_928/CT_005.c index ce454a7f..104c5b81 100644 --- a/test/mng_mod/issues/898_928/CT_005.c +++ b/test/mng_mod/issues/898_928/CT_005.c @@ -39,8 +39,8 @@ int main(int argc, char** argv) { char fn[256]; char kargs[256]; - int cpus[4] = {6, 7, 8, 9}; - int num_cpus = 4; + int cpus[3] = {1, 2, 3}; + int num_cpus = 3; struct ihk_mem_chunk mem_chunks[4]; int num_mem_chunks; @@ -53,7 +53,7 @@ int main(int argc, char** argv) { status = system(cmd); // ihk_os_destroy_pseudofs - ret_ihklib = ihk_os_destroy_pseudofs(0); + ret_ihklib = ihk_os_destroy_pseudofs(0, 0, 0); fp = popen("cat /proc/mounts | grep /tmp/mcos/mcos0_sys", "r"); nread = fread(buf, 1, sizeof(buf), fp); buf[nread] = 0; @@ -82,20 +82,16 @@ int main(int argc, char** argv) { //OKNG(ret_ihklib == 0, "ihk_os_assign_cpu\n"); // reserve mem 128m@0,128m@1 - num_mem_chunks = 2; + num_mem_chunks = 1; mem_chunks[0].size = 128*1024*1024ULL; mem_chunks[0].numa_node_number = 0; - mem_chunks[1].size = 128*1024*1024ULL; - mem_chunks[1].numa_node_number = 1; ret_ihklib = ihk_reserve_mem(0, mem_chunks, num_mem_chunks); //OKNG(ret_ihklib == 0, "ihk_reserve_mem (2)\n"); // assign mem 128m@0,128m@1 - num_mem_chunks = 2; + num_mem_chunks = 1; mem_chunks[0].size = 128*1024*1024ULL; mem_chunks[0].numa_node_number = 0; - mem_chunks[1].size = 128*1024*1024ULL; - mem_chunks[1].numa_node_number = 1; ret_ihklib = ihk_os_assign_mem(0, mem_chunks, num_mem_chunks); //OKNG(ret_ihklib == 0, "ihk_os_assign_mem (2)\n"); @@ -122,7 +118,7 @@ int main(int argc, char** argv) { usleep(100*1000); // create pseudofs - ret_ihklib = ihk_os_create_pseudofs(0); + ret_ihklib = ihk_os_create_pseudofs(0, 0, 0); fp = popen("cat /proc/mounts | grep /tmp/mcos/mcos0_sys", "r"); nread = fread(buf, 1, sizeof(buf), fp); buf[nread] = 0; @@ -162,7 +158,7 @@ destroy: status = system(cmd); // destroy pseudofs - ret_ihklib = ihk_os_destroy_pseudofs(0); + ret_ihklib = ihk_os_destroy_pseudofs(0, 0, 0); fp = popen("cat /proc/mounts | grep /tmp/mcos/mcos0_sys", "r"); nread = fread(buf, 1, sizeof(buf), fp); buf[nread] = 0; diff --git a/test/rusage/000_mck.c b/test/rusage/000_mck.c new file mode 100644 index 00000000..f16004e4 --- /dev/null +++ b/test/rusage/000_mck.c @@ -0,0 +1,44 @@ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include "util.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + int i; + int sz_index; + void *anon[NUM_AREAS]; + int ret = 0; + + CHKANDJUMP(argc != 2, 255, "%s <sz_index>\n", argv[0]); + sz_index = atoi(argv[1]); + + for (i = 0; i < NUM_AREAS; i++) { + anon[i] = mmap(0, sz_anon[sz_index], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)anon[i]) = 0x123456789abcdef0; + } + + for (i = 0; i < NUM_AREAS; i++) { + munmap(anon[i], sz_anon[sz_index]); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/001_mck.c b/test/rusage/001_mck.c new file mode 100644 index 00000000..94d170c2 --- /dev/null +++ b/test/rusage/001_mck.c @@ -0,0 +1,50 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + unsigned long val; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_SHARED, + fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + val = *((unsigned long *)mem); + printf("val=%lx\n", val); + // memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long *)mem) = 0x123456789abcdef0; + // ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); + // *((unsigned long *)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + // ret = close(fd); + // CHKANDJUMP(ret != 0, 255, "close failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/002_mck.c b/test/rusage/002_mck.c new file mode 100644 index 00000000..d66f1b0e --- /dev/null +++ b/test/rusage/002_mck.c @@ -0,0 +1,55 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + char fn[256] = "/dev/shm/Intel_MPI"; + + fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, 0600); + CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); + + ret = ftruncate(fd, sz_mem[SZ_INDEX]); + CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_SHARED, + fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long *)mem); + //memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long *)mem) = 0x123456789abcdef0; + // ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); + // *((unsigned long *)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + ret = close(fd); + CHKANDJUMP(ret != 0, 255, "close failed\n"); + ret = unlink(fn); + CHKANDJUMP(ret != 0, 255, "shm_unlink failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/003_mck.c b/test/rusage/003_mck.c new file mode 100644 index 00000000..d66f1b0e --- /dev/null +++ b/test/rusage/003_mck.c @@ -0,0 +1,55 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + char fn[256] = "/dev/shm/Intel_MPI"; + + fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, 0600); + CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); + + ret = ftruncate(fd, sz_mem[SZ_INDEX]); + CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_SHARED, + fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long *)mem); + //memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long *)mem) = 0x123456789abcdef0; + // ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); + // *((unsigned long *)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + ret = close(fd); + CHKANDJUMP(ret != 0, 255, "close failed\n"); + ret = unlink(fn); + CHKANDJUMP(ret != 0, 255, "shm_unlink failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/004_mck.c b/test/rusage/004_mck.c new file mode 100644 index 00000000..ff33a208 --- /dev/null +++ b/test/rusage/004_mck.c @@ -0,0 +1,56 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + int fd; + unsigned long val; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_SHARED, + fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + val = *((unsigned long *)mem); + printf("val=%lx\n", val); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/006_mck.c b/test/rusage/006_mck.c new file mode 100644 index 00000000..6740923c --- /dev/null +++ b/test/rusage/006_mck.c @@ -0,0 +1,61 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + int fd; + + char fn[256] = "/dev/shm/Intel_MPI"; + + fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, 0600); + CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); + + ret = ftruncate(fd, sz_mem[SZ_INDEX]); + CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_SHARED, + fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long *)mem); + *((unsigned long *)mem) = 0x123456789abcdef0; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/007_mck.c b/test/rusage/007_mck.c new file mode 100644 index 00000000..6740923c --- /dev/null +++ b/test/rusage/007_mck.c @@ -0,0 +1,61 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + int fd; + + char fn[256] = "/dev/shm/Intel_MPI"; + + fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, 0600); + CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); + + ret = ftruncate(fd, sz_mem[SZ_INDEX]); + CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_SHARED, + fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long *)mem); + *((unsigned long *)mem) = 0x123456789abcdef0; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/008_mck.c b/test/rusage/008_mck.c new file mode 100644 index 00000000..bc2ee26b --- /dev/null +++ b/test/rusage/008_mck.c @@ -0,0 +1,66 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + int fd; + unsigned long val; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + val = *((unsigned long *)mem); + printf("val=%lx\n", val); + + _exit(123); + } else { + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + val = *((unsigned long *)mem); + printf("val=%lx\n", val); + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/009_mck.c b/test/rusage/009_mck.c new file mode 100644 index 00000000..1efaadbc --- /dev/null +++ b/test/rusage/009_mck.c @@ -0,0 +1,75 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + key_t key = ftok(argv[0], 0); + int shmid; + + shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); + CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + mem = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void *)-1, 255, "shmat failed: %s\n", + strerror(errno)); + + *((unsigned long *)mem) = 0x1234; + + ret = shmdt(mem); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + _exit(123); + } else { + mem = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void *)-1, 255, "shmat failed: %s\n", + strerror(errno)); + + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + + printf("%lx\n", *((unsigned long *)mem)); + +#if 0 + struct shmid_ds buf; + + ret = shmctl(shmid, IPC_RMID, &buf); + CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); +#endif + + ret = shmdt(mem); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/010_mck.c b/test/rusage/010_mck.c new file mode 100644 index 00000000..f16004e4 --- /dev/null +++ b/test/rusage/010_mck.c @@ -0,0 +1,44 @@ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include "util.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + int i; + int sz_index; + void *anon[NUM_AREAS]; + int ret = 0; + + CHKANDJUMP(argc != 2, 255, "%s <sz_index>\n", argv[0]); + sz_index = atoi(argv[1]); + + for (i = 0; i < NUM_AREAS; i++) { + anon[i] = mmap(0, sz_anon[sz_index], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)anon[i]) = 0x123456789abcdef0; + } + + for (i = 0; i < NUM_AREAS; i++) { + munmap(anon[i], sz_anon[sz_index]); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/011_mck.c b/test/rusage/011_mck.c new file mode 100644 index 00000000..3dfc0d28 --- /dev/null +++ b/test/rusage/011_mck.c @@ -0,0 +1,108 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <xpmem.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + key_t key = ftok(argv[0], 0); + int shmid; + xpmem_segid_t segid; + + shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); + CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + memset(mem, 0, sz_mem[SZ_INDEX]); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + void *shm = shmat(shmid, NULL, 0); + + CHKANDJUMP(shm == (void *)-1, 255, "shmat failed: %s\n", + strerror(errno)); + + while ((segid = *(xpmem_segid_t *)shm) == 0) { + }; + + ret = shmdt(shm); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, + XPMEM_PERMIT_MODE, NULL); + CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", + strerror(errno)); + + struct xpmem_addr addr = { .apid = apid, .offset = 0 }; + void *attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); + + CHKANDJUMP(attach == (void *)-1, 255, + "xpmem_attach failed: %s\n", strerror(errno)); + + *((unsigned long *)attach) = 0x1234; + + ret = xpmem_detach(attach); + CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); + + _exit(123); + } else { + void *shm = shmat(shmid, NULL, 0); + + CHKANDJUMP(mem == (void *)-1, 255, "shmat failed: %s\n", + strerror(errno)); + + segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, + (void *)0666); + CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", + strerror(errno)); + + *(xpmem_segid_t *)shm = segid; + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + + printf("%lx\n", *((unsigned long *)mem)); + + struct shmid_ds buf; + + ret = shmctl(shmid, IPC_RMID, &buf); + CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); + + ret = shmdt(shm); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + ret = xpmem_remove(segid); + CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/012_mck.c b/test/rusage/012_mck.c new file mode 100644 index 00000000..c62b5594 --- /dev/null +++ b/test/rusage/012_mck.c @@ -0,0 +1,78 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <xpmem.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + pid_t pid; + int status; + xpmem_segid_t segid; + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + memset(mem, 0, sz_mem[SZ_INDEX]); + + segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, + (void *)0666); + CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", + strerror(errno)); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if (pid == 0) { + xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, + XPMEM_PERMIT_MODE, NULL); + + CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", + strerror(errno)); + + struct xpmem_addr addr = { .apid = apid, .offset = 0 }; + void *attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); + + CHKANDJUMP(attach == (void *)-1, 255, + "xpmem_attach failed: %s\n", strerror(errno)); + + *((unsigned long *)attach) = 0x1234; + + ret = xpmem_detach(attach); + CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); + + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + + printf("%lx\n", *((unsigned long *)mem)); + + ret = xpmem_remove(segid); + CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/013_mck.c b/test/rusage/013_mck.c new file mode 100644 index 00000000..03ff7974 --- /dev/null +++ b/test/rusage/013_mck.c @@ -0,0 +1,48 @@ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + // unsigned long val = *((unsigned long *)mem); + // memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long *)mem) = 0x123456789abcdef0; + // ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); + // *((unsigned long *)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + // ret = close(fd); + // CHKANDJUMP(ret != 0, 255, "close failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/014_mck.c b/test/rusage/014_mck.c new file mode 100644 index 00000000..d9cca63f --- /dev/null +++ b/test/rusage/014_mck.c @@ -0,0 +1,51 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define DEBUG + +int sz_unit[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30)}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, 2 * sz_unit[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + ret = remap_file_pages(mem + 0 * sz_unit[SZ_INDEX], + sz_unit[SZ_INDEX], 0, 1, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + ret = remap_file_pages(mem + 1 * sz_unit[SZ_INDEX], + sz_unit[SZ_INDEX], 0, 0, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + *((unsigned long *)mem) = 0x123456789abcdef0; + *(unsigned long *)((char *)mem + 1 * sz_unit[SZ_INDEX]) = + 0x123456789abcdef0; + + munmap(mem, 2 * sz_unit[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/015_mck.c b/test/rusage/015_mck.c new file mode 100644 index 00000000..2d185801 --- /dev/null +++ b/test/rusage/015_mck.c @@ -0,0 +1,55 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <unistd.h> +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define DEBUG + +int sz_unit[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30)}; + +#define SZ_INDEX 0 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, 2 * sz_unit[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + *((unsigned long *)mem) = 0x123456789abcdef0; + *(unsigned long *)((char *)mem + 1 * sz_unit[SZ_INDEX]) = + 0xbeefbeefbeefbeef; + + ret = remap_file_pages(mem + 0 * sz_unit[SZ_INDEX], + sz_unit[SZ_INDEX], 0, 1, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + ret = remap_file_pages(mem + 1 * sz_unit[SZ_INDEX], + sz_unit[SZ_INDEX], 0, 0, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + *((unsigned long *)mem) = 0x123456789abcdef0; + *(unsigned long *)((char *)mem + 1 * sz_unit[SZ_INDEX]) = + 0xbeefbeefbeefbeef; + + munmap(mem, 2 * sz_unit[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/016_mck.c b/test/rusage/016_mck.c new file mode 100644 index 00000000..e8812324 --- /dev/null +++ b/test/rusage/016_mck.c @@ -0,0 +1,43 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include "util.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + void *newmem; + int ret = 0; + + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)mem) = 0x123456789abcdef0; + + newmem = mremap(mem, sz_anon[SZ_INDEX], sz_anon[SZ_INDEX + 1], + MREMAP_MAYMOVE); + CHKANDJUMP(newmem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)mem) = 0xbeefbeefbeefbeef; + + munmap(newmem, sz_anon[SZ_INDEX + 1]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/017_mck.c b/test/rusage/017_mck.c new file mode 100644 index 00000000..5b67712b --- /dev/null +++ b/test/rusage/017_mck.c @@ -0,0 +1,53 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, 3 * sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + *(unsigned long *)((char *)mem + 0) = 0x123456789abcdef0; + *(unsigned long *)((char *)mem + sz_mem[SZ_INDEX]) = + 0xbeefbeefbeefbeef; + *(unsigned long *)((char *)mem + 2 * sz_mem[SZ_INDEX]) = + 0xbeefbeefbeefbeef; + + ret = mprotect(mem + sz_mem[SZ_INDEX], sz_mem[SZ_INDEX], + PROT_READ | PROT_WRITE | PROT_EXEC); + CHKANDJUMP(ret != 0, 255, "mprotect failed\n"); + + munmap(mem, 3 * sz_mem[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/018_mck.c b/test/rusage/018_mck.c new file mode 100644 index 00000000..80e38afc --- /dev/null +++ b/test/rusage/018_mck.c @@ -0,0 +1,49 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 1 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + void *mem; + int ret = 0; + + mem = mmap(0, 3 * sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + *(unsigned long *)((char *)mem + 0) = 0x123456789abcdef0; + *(unsigned long *)((char *)mem + sz_mem[SZ_INDEX]) = + 0xbeefbeefbeefbeef; + *(unsigned long *)((char *)mem + 2 * sz_mem[SZ_INDEX]) = + 0xbeefbeefbeefbeef; + + ret = mprotect(mem + sz_mem[SZ_INDEX - 1], sz_mem[SZ_INDEX - 1], + PROT_READ | PROT_WRITE | PROT_EXEC); + CHKANDJUMP(ret != 0, 255, "mprotect failed\n"); + + munmap(mem, 3 * sz_mem[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/020_mck.c b/test/rusage/020_mck.c new file mode 100644 index 00000000..81406c04 --- /dev/null +++ b/test/rusage/020_mck.c @@ -0,0 +1,44 @@ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include "util.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + int i; + int sz_index; + void *anon[NUM_AREAS]; + int ret = 0; + + CHKANDJUMP(argc != 2, 255, "%s <sz_index>\n", argv[0]); + sz_index = atoi(argv[1]); + + for (i = 0; i < NUM_AREAS; i++) { + anon[i] = mmap(0, sz_anon[sz_index], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)anon[i]) = 0x123456789abcdef0; + } + + for (i = 0; i < NUM_AREAS; i++) { + munmap(anon[i], sz_anon[sz_index]); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/030_mck.c b/test/rusage/030_mck.c new file mode 100644 index 00000000..f16004e4 --- /dev/null +++ b/test/rusage/030_mck.c @@ -0,0 +1,44 @@ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include "util.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char **argv) +{ + int i; + int sz_index; + void *anon[NUM_AREAS]; + int ret = 0; + + CHKANDJUMP(argc != 2, 255, "%s <sz_index>\n", argv[0]); + sz_index = atoi(argv[1]); + + for (i = 0; i < NUM_AREAS; i++) { + anon[i] = mmap(0, sz_anon[sz_index], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)anon[i]) = 0x123456789abcdef0; + } + + for (i = 0; i < NUM_AREAS; i++) { + munmap(anon[i], sz_anon[sz_index]); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage100.patch b/test/rusage/100.patch similarity index 100% rename from test/rusage/rusage100.patch rename to test/rusage/100.patch diff --git a/test/rusage/100_mck.c b/test/rusage/100_mck.c new file mode 100644 index 00000000..1f1e96e7 --- /dev/null +++ b/test/rusage/100_mck.c @@ -0,0 +1,70 @@ +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include "util.h" +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char **argv) +{ + int i, j, ret = 0; + void *mem; + struct mckernel_rusage rusage; + + for (j = 0; j < NLOOP; j++) { + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", i, + rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", i, + rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", + rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", i, + rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", i, + rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", + rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/101_mck.c b/test/rusage/101_mck.c new file mode 100644 index 00000000..f0df5d14 --- /dev/null +++ b/test/rusage/101_mck.c @@ -0,0 +1,88 @@ +#include <stdio.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include "util.h" +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char **argv) +{ + int i, j, ret = 0; + void *mem; + struct mckernel_rusage rusage; + pid_t pid; + int status; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed"); + if (pid == 0) { + + for (j = 0; j < NLOOP; j++) { + mem = mmap(0, sz_anon[SZ_INDEX], + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", + i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", + i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", + rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", + rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", + rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", + i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", + i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", + rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", + rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/102_mck.c b/test/rusage/102_mck.c new file mode 100644 index 00000000..1bb29b93 --- /dev/null +++ b/test/rusage/102_mck.c @@ -0,0 +1,78 @@ +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char **argv) +{ + int i, j, ret = 0; + void *mem; + int fd; + struct mckernel_rusage rusage; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + for (j = 0; j < NLOOP; j++) { + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_SHARED, fd, sz_mem[SZ_INDEX] * j); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", + i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", + i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", + rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", + i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", + i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", + rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/103_mck.c b/test/rusage/103_mck.c new file mode 100644 index 00000000..b0deb9f2 --- /dev/null +++ b/test/rusage/103_mck.c @@ -0,0 +1,70 @@ +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include "util.h" +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char **argv) +{ + int i, j, ret = 0; + void *mem; + struct mckernel_rusage rusage; + + for (j = 0; j < NLOOP; j++) { + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ | PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long *)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", + i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", + i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", + rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", + i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", + i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", + rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage104.patch b/test/rusage/200.patch similarity index 100% rename from test/rusage/rusage104.patch rename to test/rusage/200.patch diff --git a/test/rusage/rusage104_lin.c b/test/rusage/200_lin.c similarity index 97% rename from test/rusage/rusage104_lin.c rename to test/rusage/200_lin.c index 192c36a8..aec6f7fe 100644 --- a/test/rusage/rusage104_lin.c +++ b/test/rusage/200_lin.c @@ -20,7 +20,7 @@ int main(int argc, char **argv) if ((ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)))) { fprintf(stderr, "%s: ihk_os_getrusage failed\n", __func__); ret = -EINVAL; - goto out; + goto fn_fail; } OKNG(WITHIN_RANGE(rusage.cpuacct_usage_percpu[1], DELAY0, SCALE), @@ -32,7 +32,7 @@ int main(int argc, char **argv) printf("All tests finished\n"); - out: + fn_fail: return ret; } diff --git a/test/rusage/rusage104_mck.c b/test/rusage/200_mck.c similarity index 99% rename from test/rusage/rusage104_mck.c rename to test/rusage/200_mck.c index a68f7d97..b3925e27 100644 --- a/test/rusage/rusage104_mck.c +++ b/test/rusage/200_mck.c @@ -12,6 +12,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include "util.h" #define DELAY0 (100UL * 1000 * 1000) @@ -115,7 +116,6 @@ unsigned long nsec; void fwq_init(void) { struct timespec start, end; - int i; clock_gettime(TIMER_KIND, &start); #define N_INIT 10000000 diff --git a/test/rusage/Makefile b/test/rusage/Makefile index b10e3926..7b8f1bc4 100644 --- a/test/rusage/Makefile +++ b/test/rusage/Makefile @@ -1,179 +1,60 @@ -.SUFFIXES: # Clear suffixes -.SUFFIXES: .c +include $(HOME)/.mck_test_config.mk +XPMEM_DIR=$(HOME)/usr CC = gcc -CPPFLAGS = -I$(HOME)/project/os/install/include -CCFLAGS = -g -LDFLAGS = -L$(HOME)/project/os/install/lib -lihk -Wl,-rpath -Wl,$(HOME)/project/os/install/lib -lpthread -EXES = -SRCS = -OBJS = $(SRCS:.c=.o) +CPPFLAGSLIN = -I$(MCK_DIR)/include +CFLAGSLIN = -Wall -Werror -g +LDFLAGSLIN = -L$(MCK_DIR)/lib -lihk -Wl,-rpath -Wl,$(MCK_DIR)/lib -lpthread +SRCSLIN = $(shell ls *_lin.c) +EXESLIN = $(SRCSLIN:.c=) +OBJSLIN = $(SRCSLIN:.c=.o) -CPPFLAGSMCK = -I$(HOME)/usr/include -CCFLAGSMCK = -g -O0 -LDFLAGSMCK = -static -lpthread -SRCSMCK = $(shell ls rusage*.c) +CPPFLAGSMCK = +CFLAGSMCK = -Wall -Werror -g -O0 +LDFLAGSMCK = +SRCSMCK = $(shell ls *.c | grep -vE '_lin') EXESMCK = $(SRCSMCK:.c=) OBJSMCK = $(SRCSMCK:.c=.o) -all: $(EXES) $(EXESMCK) +all: $(EXESLIN) $(EXESMCK) -rusage000: rusage000.o - $(CC) -o $@ $^ $(LDFLAGSMCK) +011_mck: 011_mck.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -L$(XPMEM_DIR)/lib -lxpmem -rusage000.o: rusage000.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< +011_mck.o: 011_mck.c + $(CC) $(CFLAGSMCK) $(CPPFLAGSMCK) -I$(XPMEM_DIR)/include -c $< -rusage010: rusage010.o - $(CC) -o $@ $^ $(LDFLAGSMCK) +012_mck: 012_mck.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -L$(XPMEM_DIR)/lib -lxpmem -Wl,-rpath -Wl,$(XPMEM_DIR)/lib -rusage010.o: rusage010.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< +012_mck.o: 012_mck.c + $(CC) $(CFLAGSMCK) $(CPPFLAGSMCK) -I$(XPMEM_DIR)/include -c $< -rusage020: rusage020.o - $(CC) -o $@ $^ $(LDFLAGSMCK) +10%_mck: 10%_mck.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -L$(MCK_DIR)/lib -lihk -Wl,-rpath -Wl,$(MCK_DIR)/lib -rusage020.o: rusage020.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< +10%_mck.o:: 10%_mck.c + $(CC) $(CFLAGSMCK) $(CPPFLAGSMCK) -I$(MCK_DIR)/include -c $< -rusage030: rusage030.o - $(CC) -o $@ $^ $(LDFLAGSMCK) +20%_mck: 20%_mck.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -lpthread -rusage030.o: rusage030.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage001: rusage001.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage001.o: rusage001.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage002: rusage002.o - $(CC) -o $@ $^ $(LDFLAGSMCK) -lrt - -rusage002.o: rusage002.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage003: rusage003.o - $(CC) -o $@ $^ $(LDFLAGSMCK) -lrt - -rusage003.o: rusage003.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage004: rusage004.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage004.o: rusage004.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage006: rusage006.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage006.o: rusage006.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage007: rusage007.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage007.o: rusage007.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage008: rusage008.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage008.o: rusage008.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage009: rusage009.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage009.o: rusage009.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage011: rusage011.o - $(CC) -o $@ $^ $(LDFLAGSMCK) -L/home/takagi/usr/lib -lxpmem - -rusage011.o: rusage011.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage012: rusage012.o - $(CC) -o $@ $^ $(LDFLAGSMCK) -L/home/takagi/usr/lib -lxpmem - -rusage012.o: rusage012.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage013: rusage013.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage013.o: rusage013.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage014: rusage014.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage014.o: rusage014.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage015: rusage015.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage015.o: rusage015.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage016: rusage016.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage016.o: rusage016.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage017: rusage017.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage017.o: rusage017.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage018: rusage018.o - $(CC) -o $@ $^ $(LDFLAGSMCK) - -rusage018.o: rusage018.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< - -rusage100: rusage100.o - $(CC) -o $@ $^ $(LDFLAGS) - -rusage100.o: rusage100.c - $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< - -rusage101: rusage101.o - $(CC) -o $@ $^ $(LDFLAGS) - -rusage101.o: rusage101.c - $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< - -rusage102: rusage102.o - $(CC) -o $@ $^ $(LDFLAGS) - -rusage102.o: rusage102.c - $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< - -rusage103: rusage103.o - $(CC) -o $@ $^ $(LDFLAGS) - -rusage103.o: rusage103.c - $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< +20%_mck.o:: 20%_mck.c + $(CC) $(CFLAGSMCK) $(CPPFLAGSMCK) -c $< %_mck: %_mck.o $(CC) -o $@ $^ $(LDFLAGSMCK) %_mck.o:: %_mck.c - $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + $(CC) $(CFLAGSMCK) $(CPPFLAGSMCK) -c $< + %_lin.o:: %_lin.c - $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + $(CC) $(CFLAGSLIN) $(CPPFLAGSLIN) -c $< %_lin: %_lin.o - $(CC) -o $@ $^ $(LDFLAGS) + $(CC) -o $@ $^ $(LDFLAGSLIN) clean: - rm -f core $(EXES) $(OBJS) $(EXESMCK) $(OBJSMCK) + rm -f core $(EXESLIN) $(OBJSLIN) $(EXESMCK) $(OBJSMCK) diff --git a/test/rusage/README b/test/rusage/README index 15ba045f..018bda98 100644 --- a/test/rusage/README +++ b/test/rusage/README @@ -1,10 +1,14 @@ ========== How to run ========== -./run.sh <rusagexxx> - -Example: -./run.sh rusage000 +(1) cp <build>/mckernel/mck_test_config.sample \ + ~/.mck_test_config +(2) cp <build>/ihk/mck_test_config.mk.sample \ + ~/.mck_test_config.mk +(3) Edit the following line in Makefile: + XPMEM_DIR=/usr/ +(4) ./run.sh <test_id> + Example: ./run.sh 000 =================================== How to judge it behaves as expected @@ -14,7 +18,7 @@ Follow the instruction given by run.sh. ===================== Descripation of tests ===================== -rusage000: Excercise the following execution paths: +000: Excercise the following execution paths: args_envs()->set_range() text,data,arg,env[OK] args_envs()->set_range() !vdso [OK], args_envs()->set_range() stack[OK], @@ -29,89 +33,89 @@ rusage000: Excercise the following execution paths: exit_group()->free_process_memory_range()->!vdso[OK], exit_group()->free_process_memory_range()->stack[OK], -rusage010: Excercise the following execution paths: +010: Excercise the following execution paths: app->mmap() 2M,anon,pre-page ->set_range()->munmap()->free_process_memory_range()->clear_range()[OK] -rusage020: Excercise the following execution paths: +020: Excercise the following execution paths: app->mmap() 1G,anon,private,demand-page -> pf -> set_range() [NA (failed to get 1GB page)], app->mmap() 1G,anon,private,demand-page -> pf -> set_range()->munmap()->free_process_memory_range()->clear_range()[NA (failed to get 1GB page)] -rusage030: Excercise the following execution paths: +030: Excercise the following execution paths: app->mmap() 128M,anon,private,demand-page(=zeroobj) -> pf -> set_range() [OK], app->mmap() 128M,anon,private,demand-page(=zeroobj) -> pf -> set_range()->munmap()->free_process_memory_range()->clear_range()[OK] -rusage001: Excercise the following execution paths: +001: Excercise the following execution paths: do_mmap(),file,demand-page->get_page(),ld->set_pte()->munmap()->clear_range() [OK] do_mmap(),file,demand-page->get_page(),ld->set_pte()->st->munmap()->clear_range() [OK] do_mmap(),file,demand-page->get_page(),st->set_pte()->flush()->munmap()->clear_range() [OK] -rusage002: Excercise the following execution paths: +002: Excercise the following execution paths: do_mmap(),/dev/shm with --mpol_shm_premap,pre-page->get_page(),st->set_pte()->munmap()->clear_range() [OK] -rusage003: Excercise the following execution paths: +003: Excercise the following execution paths: do_mmap(),/dev/shm without --mpol_shm_premap,pre-page->get_page(),st->set_pte()->munmap()->clear_range() [OK] -rusage004: Excercise the following execution paths related to clone +004: Excercise the following execution paths related to clone load segments->copy_user_ranges()->clear_range() [OK] filemap(demand-paging)->copy_user_ranges()->clear_range() [OK] -rusage005: Excercise the following execution paths related to device file (ib ping-pong) +005: Excercise the following execution paths related to device file (ib ping-pong) devobj()->get_page()->pf->munmap()->clear_range() [OK] remote page fault->cow->clear_range() [OK] ld-linux.so->mmap private->cow->clear_range() [OK] -rusage006: Excercise the following execution paths related to clone +006: Excercise the following execution paths related to clone filemap,/dev/shm with --mpol_shm_premap->copy_user_ranges()->clear_range() [OK] -rusage007: Excercise the following execution paths related to clone +007: Excercise the following execution paths related to clone filemap,/dev/shm without --mpol_shm_premap->copy_user_ranges()->clear_range() [OK] -rusage008: Excercise the following execution paths related to sharing file-map page +008: Excercise the following execution paths related to sharing file-map page fork()->filemap->pf->clear_range() [OK] -rusage009: Excercise the following execution paths related to sharing shmget() page +009: Excercise the following execution paths related to sharing shmget() page fork()->shmat()->pf->clear_range() [OK] -rusage011: Excercise the following execution paths related to sharing xpmem page +011: Excercise the following execution paths related to sharing xpmem page fork()->xpmem_attach()->pf->clear_range() [OK] -rusage012: #925 +012: #925 -rusage013: Excercise the following execution paths: +013: Excercise the following execution paths: fileobj(prvate)->cow->set_pte()->clear_range() [OK] -rusage014: Excercise the following execution paths related to remap_file_pages +014: Excercise the following execution paths related to remap_file_pages do_mmap->fileobj-->remap_file_pages()->st->clear_range() [OK] -rusage015: Excercise the following execution paths related to remap_file_pages +015: Excercise the following execution paths related to remap_file_pages do_mmap->fileobj-->st-->remap_one_page()->clear_range() [OK] -rusage016: Excercise the following execution paths related to mremap +016: Excercise the following execution paths related to mremap mremap()->move_pte_range()-->clear_range() [OK] -rusage017: Excercise the following execution paths related to split_process_memory_range, file-map +017: Excercise the following execution paths related to split_process_memory_range, file-map mprotect()->split_process_memory_range()->change_prot_process_memory_range()->clear_range()/file_obj_release() [OK] -rusage018: Excercise the following execution paths related to split_process_memory_range, 2M-page +018: Excercise the following execution paths related to split_process_memory_range, 2M-page mprotect()->ihk_mc_pt_split()->change_prot_process_memory_range()->clear_range() [OK] -rusage019: Run npb bt-mz.S.2 +019: Run npb bt-mz.S.2 1-ppn x 2-tpn x 2-node[OK] -rusage021: Run npb bt-mz.S.4 +021: Run npb bt-mz.S.4 2-ppn x 2-tpn x 2-node (wallaby{14,15}) [OK] 2-ppn x 1-tpn x 2-node (polaris,kochab) [OK] -rusage100: Test ihk_os_getrusage() +100: Test ihk_os_getrusage(), call from McKernel process anon mmap,num_threads=1 [OK] -rusage101: Test ihk_os_getrusage() +101: Test ihk_os_getrusage(), call from McKernel process anon mmap,num_threads=2 [OK] -rusage102: Test ihk_os_getrusage() +102: Test ihk_os_getrusage(), call from McKernel process file map,num_threads=1 [OK] -rusage103: Test ihk_os_getrusage() +103: Test ihk_os_getrusage(), call from McKernel process anon mmap@numa#1 [OK] -rusage104: Test ihk_os_getrusage(), user time per CPU +200: Test ihk_os_getrusage(), user time per CPU diff --git a/test/rusage/run.sh b/test/rusage/run.sh index e54f4ee1..f0a06c95 100755 --- a/test/rusage/run.sh +++ b/test/rusage/run.sh @@ -1,5 +1,7 @@ #!/usr/bin/bash +. ${HOME}/.mck_test_config + testname=$1 bootopt="-m 256M" mcexecopt="" @@ -7,21 +9,19 @@ testopt="" kill="n" dryrun="n" sleepopt="0.4" -home=$(eval echo \$\{HOME\}) -install=${home}/project/os/install echo Executing ${testname} case ${testname} in - rusage011 | rusage012) + 011 | 012) printf "*** Enable debug messages in rusage.h, memory.c, fileobj.c, shmobj.c, process.c by defining DEBUG macro, e.g. #define RUSAGE_DEBUG and then recompile IHK/McKernel.\n" printf "*** Install xpmem by git-clone https://github.com/hjelmn/xpmem.\n" ;; - rusage100 | rusage101 | rusage102 | rusage103) + 100 | 101 | 102 | 103) printf "*** Refer to rusage100.patch to add syscall #900 by editing syscall_list.h and syscall.c and recompile IHK/McKernel.\n" ;; - rusage104) - printf "*** Apply rusage104.patch to enable syscall #900" + 200) + printf "*** Apply rusage200.patch to enable syscall #900" printf "which reports rusage values.\n" ;; *) @@ -32,33 +32,34 @@ esac read -p "*** Hit return when ready!" key case ${testname} in - rusage005) - ssh wallaby -c '(cd ${home}/project/src/rusage/verbs; make rdma_wr)' - bn=verbs/rdma_wr + 005) + ssh wallaby -c '(cd ${HOME}/project/src/rusage/verbs; make rdma_wr)' + bn_mck=verbs/rdma_wr ;; - rusage019) - #ssh wallaby -c '(cd ${home}/project/src/rusage/npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI; make bt-mz CLASS=S NPROCS=2)' - bn=npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI/bin/bt-mz.S.2 + 019) + #ssh wallaby -c '(cd ${HOME}/project/src/rusage/npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI; make bt-mz CLASS=S NPROCS=2)' + bn_mck=npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI/bin/bt-mz.S.2 perl -e 'print "wallaby14\nwallaby15\n"' > ./hostfile ;; - rusage021) + 021) if ! grep /var/log/local6 /etc/rsyslog.conf &>/dev/null; then echo "Insert a line of local6.* /var/log/local6 into /etc/rsyslog.conf" exit 255 fi - ssh wallaby bash -c '(cd ${home}/project/src/rusage/npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI; make bt-mz CLASS=S NPROCS=4)' - bn=npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI/bin/bt-mz.S.4 + ssh wallaby bash -c '(cd ${HOME}/project/src/rusage/npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI; make bt-mz CLASS=S NPROCS=4)' + bn_mck=npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI/bin/bt-mz.S.4 perl -e 'print "polaris:2\nkochab:2\n"' > ./hostfile ;; - rusage104) - bn=${testname} + 200) + bn_mck=${testname}_mck + bn_lin=${testname}_lin make clean > /dev/null 2> /dev/null - make ${bn}_mck ${bn}_lin + make $bn_mck $bn_lin ;; *) - bn=${testname} + bn_mck=${testname}_mck make clean > /dev/null 2> /dev/null - make ${bn} + make $bn_mck esac pid=`pidof mcexec` @@ -67,91 +68,82 @@ if [ "${pid}" != "" ]; then fi case ${testname} in - rusage000) + 000) testopt="0" ;; - rusage010) + 010) testopt="1" ;; - rusage020) + 020) bootopt="-m 256M@0,1G@0" testopt="2" kill="y" ;; - rusage030) + 030) testopt="3" ;; - rusage001) - cp ${bn} ./file + 001) + cp $bn_mck ./file kill="n" ;; - rusage002) + 002) mcexecopt="--mpol-shm-premap" ;; - rusage003) + 003) ;; - rusage004) - cp ${bn} ./file + 004) + cp $bn_mck ./file ;; - rusage005) - echo ssh wallaby15.aics-sys.riken.jp ${home}/project/src/verbs/rdma_wr -p 10000& + 005) + echo ssh wallaby15.aics-sys.riken.jp ${HOME}/project/src/verbs/rdma_wr -p 10000& read -p "Run rdma_wr on wallaby15 and enter the port number." port testopt="-s wallaby15.aics-sys.riken.jp -p ${port}" ;; - rusage006) + 006) mcexecopt="--mpol-shm-premap" ;; - rusage007) + 007) ;; - rusage008) - cp ${bn} ./file + 008) + cp $bn_mck ./file ;; - rusage009) + 009) ;; - rusage011) + 011) sudo insmod /home/takagi/usr/lib/module/xpmem.ko sudo chmod og+rw /dev/xpmem dryrun="n" kill="n" sleepopt="5" ;; - rusage012) + 012) sudo insmod /home/takagi/usr/lib/module/xpmem.ko sudo chmod og+rw /dev/xpmem dryrun="n" kill="n" sleepopt="5" ;; - rusage013) - cp ${bn} ./file + 013 | 014 | 015 | 017) + cp $bn_mck ./file ;; - rusage014) - cp ${bn} ./file + 016) ;; - rusage015) - cp ${bn} ./file + 018) ;; - rusage016) - ;; - rusage017) - cp ${bn} ./file - ;; - rusage018) - ;; - rusage019 | rusage021) + 019 | 021) bootopt="-k 1 -m 256M" ;; - rusage100) + 100) ;; - rusage101) + 101) ;; - rusage102) - cp ${bn} ./file + 102) + cp $bn_lin ./file ;; - rusage103) + 103) bootopt="-m 256M@1" ;; - rusage104) + 200) bootopt="-c 1,2,3 -m 256M" ;; *) @@ -164,7 +156,7 @@ exit fi case ${testname} in - rusage019 | rusage021) + 019 | 021) sudo rm /var/log/local6 sudo touch /var/log/local6 sudo chmod 600 /var/log/local6 @@ -175,60 +167,69 @@ case ${testname} in esac case ${testname} in - rusage019 | rusage021) - echo sudo ssh wallaby15 ${install}/sbin/mcstop+release.sh && - echo sudo ssh wallaby15 ${install}/sbin/mcreboot.sh + 019 | 021) + echo sudo ssh wallaby15 ${MCK_DIR}/sbin/mcstop+release.sh && + echo sudo ssh wallaby15 ${MCK_DIR}/sbin/mcreboot.sh read -p "Boot mckernel on wallaby15." ans ;; *) ;; esac -sudo ${install}/sbin/mcstop+release.sh && -sudo ${install}/sbin/mcreboot.sh ${bootopt} +sudo ${MCK_DIR}/sbin/mcstop+release.sh && +sudo ${MCK_DIR}/sbin/mcreboot.sh ${bootopt} if [ ${kill} == "y" ]; then - ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} & + ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} & sleep ${sleepopt} - sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg > ./${testname}.log pid=`pidof mcexec` if [ "${pid}" != "" ]; then kill -9 ${pid} > /dev/null 2> /dev/null fi else case ${testname} in - rusage005) - ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} + 005) + ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn_mck} ${testopt} #read -p "Run rdma_wr." ans - sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg > ./${testname}.log ;; - rusage019 | rusage021) - echo OMP_NUM_THREADS=2 mpiexec -machinefile ./hostfile ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} - read -p "Run ${bn} and hit return." ans + 019 | 021) + echo OMP_NUM_THREADS=2 mpiexec -machinefile ./hostfile ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn_mck} ${testopt} + read -p "Run ${bn_mck} and hit return." ans sleep 1.5 sudo cat /var/log/local6 > ./${testname}.log ;; - rusage100 | rusage101 | rusage102 | rusage103) - ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} > ./${testname}.log + 100 | 101 | 102 | 103) + ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn_mck} ${testopt} > ./${testname}.log echo "================================================" >> ./${testname}.log - sudo ${install}/sbin/ihkosctl 0 kmsg >> ./${testname}.log + sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg >> ./${testname}.log ;; - rusage104) - ${install}/bin/mcexec ${mcexecopt} ./${bn}_mck - ${install}/bin/mcexec ${mcexecopt} ./${bn}_lin - sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + 200) + ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn_mck} + ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn_lin} + sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg > ./${testname}.log grep user ./${testname}.log ;; *) - ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} - sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + ${MCK_DIR}/bin/mcexec ${mcexecopt} ./${bn_mck} ${testopt} + sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg > ./${testname}.log esac fi + case ${testname} in - rusage100 | rusage101 | rusage102 | rusage103) + 011 | 012) + sudo rmmod xpmem + ;; + *) + ;; +esac + +case ${testname} in + 100 | 101 | 102 | 103) printf "*** Check the ihk_os_getrusage() result (the first part of ${testname}.log) matches with the syscall #900 result (the second part) \n" ;; - rusage104) + 200) printf "*** It behaves as expected when there's no [NG] and " printf "\"All tests finished\" is shown\n" ;; @@ -238,4 +239,4 @@ case ${testname} in ;; esac -sudo ${install}/sbin/mcstop+release.sh +sudo ${MCK_DIR}/sbin/mcstop+release.sh diff --git a/test/rusage/rusage000.c b/test/rusage/rusage000.c deleted file mode 100644 index 19bc8e74..00000000 --- a/test/rusage/rusage000.c +++ /dev/null @@ -1,67 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <string.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_anon[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - int i; - int sz_index; - void* anon[NUM_AREAS]; - int ret = 0; - CHKANDJUMP(argc != 2, 255, "%s <sz_index>\n", argv[0]); - sz_index = atoi(argv[1]); - - for(i = 0; i < NUM_AREAS; i++) { - anon[i] = mmap(0, sz_anon[sz_index], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)anon[i]) = 0x123456789abcdef0; - } - - for(i = 0; i < NUM_AREAS; i++) { - munmap(anon[i], sz_anon[sz_index]); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage001.c b/test/rusage/rusage001.c deleted file mode 100644 index cad64d48..00000000 --- a/test/rusage/rusage001.c +++ /dev/null @@ -1,72 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - int fd; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - unsigned long val = *((unsigned long*)mem); - //memset(mem, 0, sz_mem[SZ_INDEX]); - - *((unsigned long*)mem) = 0x123456789abcdef0; -//ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); -// *((unsigned long*)mem) = 0x123456789abcdef0; - - munmap(mem, sz_mem[SZ_INDEX]); - //ret = close(fd); - //CHKANDJUMP(ret != 0, 255, "close failed\n"); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage002.c b/test/rusage/rusage002.c deleted file mode 100644 index f613334e..00000000 --- a/test/rusage/rusage002.c +++ /dev/null @@ -1,79 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - int fd; - char fn[256] = "/dev/shm/Intel_MPI"; - - fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR); - CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); - - ret = ftruncate(fd, sz_mem[SZ_INDEX]); - CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - //unsigned long val = *((unsigned long*)mem); - //memset(mem, 0, sz_mem[SZ_INDEX]); - - *((unsigned long*)mem) = 0x123456789abcdef0; - // ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); - // *((unsigned long*)mem) = 0x123456789abcdef0; - - munmap(mem, sz_mem[SZ_INDEX]); - ret = close(fd); - CHKANDJUMP(ret != 0, 255, "close failed\n"); - ret = unlink(fn); - CHKANDJUMP(ret != 0, 255, "shm_unlink failed\n"); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage003.c b/test/rusage/rusage003.c deleted file mode 120000 index 239a35e2..00000000 --- a/test/rusage/rusage003.c +++ /dev/null @@ -1 +0,0 @@ -rusage002.c \ No newline at end of file diff --git a/test/rusage/rusage004.c b/test/rusage/rusage004.c deleted file mode 100644 index bf5aea0f..00000000 --- a/test/rusage/rusage004.c +++ /dev/null @@ -1,78 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/wait.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - pid_t pid; - int status; - int fd; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - unsigned long val = *((unsigned long*)mem); - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed\n"); - if(pid == 0) { - _exit(123); - } else { - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - printf("exit status=%d\n", WEXITSTATUS(status)); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage006.c b/test/rusage/rusage006.c deleted file mode 100644 index e76368f4..00000000 --- a/test/rusage/rusage006.c +++ /dev/null @@ -1,85 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/wait.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - pid_t pid; - int status; - int fd; - - char fn[256] = "/dev/shm/Intel_MPI"; - - fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR); - CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); - - ret = ftruncate(fd, sz_mem[SZ_INDEX]); - CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - //unsigned long val = *((unsigned long*)mem); - *((unsigned long*)mem) = 0x123456789abcdef0; - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed\n"); - if(pid == 0) { - _exit(123); - } else { - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - printf("exit status=%d\n", WEXITSTATUS(status)); - } - - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage007.c b/test/rusage/rusage007.c deleted file mode 120000 index 1a64aac5..00000000 --- a/test/rusage/rusage007.c +++ /dev/null @@ -1 +0,0 @@ -rusage006.c \ No newline at end of file diff --git a/test/rusage/rusage008.c b/test/rusage/rusage008.c deleted file mode 100644 index f28ecc89..00000000 --- a/test/rusage/rusage008.c +++ /dev/null @@ -1,86 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/wait.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - pid_t pid; - int status; - int fd; - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed\n"); - if(pid == 0) { - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - unsigned long val = *((unsigned long*)mem); - - _exit(123); - } else { - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - unsigned long val = *((unsigned long*)mem); - - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - printf("exit status=%d\n", WEXITSTATUS(status)); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage009.c b/test/rusage/rusage009.c deleted file mode 100644 index 698d6f18..00000000 --- a/test/rusage/rusage009.c +++ /dev/null @@ -1,97 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/wait.h> -#include <sys/ipc.h> -#include <sys/shm.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - pid_t pid; - int status; - key_t key = ftok(argv[0], 0); - int shmid; - - shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); - CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed\n"); - if(pid == 0) { - mem = shmat(shmid, NULL, 0); - CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); - - *((unsigned long*)mem) = 0x1234; - - ret = shmdt(mem); - CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); - - _exit(123); - } else { - mem = shmat(shmid, NULL, 0); - CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); - - - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - - printf("%lx\n", *((unsigned long*)mem)); - -#if 0 - struct shmid_ds buf; - ret = shmctl(shmid, IPC_RMID, &buf); - CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); -#endif - - ret = shmdt(mem); - CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage010.c b/test/rusage/rusage010.c deleted file mode 120000 index dcda4d36..00000000 --- a/test/rusage/rusage010.c +++ /dev/null @@ -1 +0,0 @@ -rusage000.c \ No newline at end of file diff --git a/test/rusage/rusage011.c b/test/rusage/rusage011.c deleted file mode 100644 index fd7ba260..00000000 --- a/test/rusage/rusage011.c +++ /dev/null @@ -1,126 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/wait.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <xpmem.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - pid_t pid; - int status; - key_t key = ftok(argv[0], 0); - int shmid; - xpmem_segid_t segid; - - shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); - CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - memset(mem, 0, sz_mem[SZ_INDEX]); - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed\n"); - if(pid == 0) { - void *shm = shmat(shmid, NULL, 0); - CHKANDJUMP(shm == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); - - while((segid = *(xpmem_segid_t*)shm) == 0) { }; - - ret = shmdt(shm); - CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); - - ret = xpmem_init(); - CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); - - xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); - CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", strerror(errno)); - - struct xpmem_addr addr = { .apid = apid, .offset = 0 }; - void* attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); - CHKANDJUMP(attach == (void*)-1, 255, "xpmem_attach failed: %s\n", strerror(errno)); - - *((unsigned long*)attach) = 0x1234; - - ret = xpmem_detach(attach); - CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); - - _exit(123); - } else { - void *shm = shmat(shmid, NULL, 0); - CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); - - ret = xpmem_init(); - CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); - - segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, (void*)0666); - CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", strerror(errno)); - - *(xpmem_segid_t*)shm = segid; - - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - - printf("%lx\n", *((unsigned long*)mem)); - - struct shmid_ds buf; - ret = shmctl(shmid, IPC_RMID, &buf); - CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); - - ret = shmdt(shm); - CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); - - ret = xpmem_remove(segid); - CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage012.c b/test/rusage/rusage012.c deleted file mode 100644 index 663b7bb0..00000000 --- a/test/rusage/rusage012.c +++ /dev/null @@ -1,98 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/wait.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <xpmem.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - pid_t pid; - int status; - xpmem_segid_t segid; - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - memset(mem, 0, sz_mem[SZ_INDEX]); - - ret = xpmem_init(); - CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); - - segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, (void*)0666); - CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", strerror(errno)); - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed\n"); - if(pid == 0) { - xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); - CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", strerror(errno)); - - struct xpmem_addr addr = { .apid = apid, .offset = 0 }; - void* attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); - CHKANDJUMP(attach == (void*)-1, 255, "xpmem_attach failed: %s\n", strerror(errno)); - - *((unsigned long*)attach) = 0x1234; - - ret = xpmem_detach(attach); - CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); - - _exit(123); - } else { - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - - printf("%lx\n", *((unsigned long*)mem)); - - ret = xpmem_remove(segid); - CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage013.c b/test/rusage/rusage013.c deleted file mode 100644 index 2ed29080..00000000 --- a/test/rusage/rusage013.c +++ /dev/null @@ -1,72 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - int fd; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - //unsigned long val = *((unsigned long*)mem); - //memset(mem, 0, sz_mem[SZ_INDEX]); - - *((unsigned long*)mem) = 0x123456789abcdef0; -//ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); -// *((unsigned long*)mem) = 0x123456789abcdef0; - - munmap(mem, sz_mem[SZ_INDEX]); - //ret = close(fd); - //CHKANDJUMP(ret != 0, 255, "close failed\n"); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage014.c b/test/rusage/rusage014.c deleted file mode 100644 index 932bc63f..00000000 --- a/test/rusage/rusage014.c +++ /dev/null @@ -1,71 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_unit[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30)}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - int fd; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, 2 * sz_unit[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - ret = remap_file_pages(mem + 0 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 1, MAP_SHARED); - CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); - - ret = remap_file_pages(mem + 1 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 0, MAP_SHARED); - CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); - - *((unsigned long*)mem) = 0x123456789abcdef0; - *(unsigned long*)((char*)mem + 1 * sz_unit[SZ_INDEX]) = 0x123456789abcdef0; - - munmap(mem, 2 * sz_unit[SZ_INDEX]); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage015.c b/test/rusage/rusage015.c deleted file mode 100644 index 43975b13..00000000 --- a/test/rusage/rusage015.c +++ /dev/null @@ -1,74 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_unit[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30)}; - -#define SZ_INDEX 0 - -int main(int argc, char** argv) { - void* mem; - int ret = 0; - int fd; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, 2 * sz_unit[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - *((unsigned long*)mem) = 0x123456789abcdef0; - *(unsigned long*)((char*)mem + 1 * sz_unit[SZ_INDEX]) = 0xbeefbeefbeefbeef; - - ret = remap_file_pages(mem + 0 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 1, MAP_SHARED); - CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); - - ret = remap_file_pages(mem + 1 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 0, MAP_SHARED); - CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); - - *((unsigned long*)mem) = 0x123456789abcdef0; - *(unsigned long*)((char*)mem + 1 * sz_unit[SZ_INDEX]) = 0xbeefbeefbeefbeef; - - munmap(mem, 2 * sz_unit[SZ_INDEX]); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage016.c b/test/rusage/rusage016.c deleted file mode 100644 index af4a4599..00000000 --- a/test/rusage/rusage016.c +++ /dev/null @@ -1,67 +0,0 @@ -#define _GNU_SOURCE /* See feature_test_macros(7) */ - -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <string.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_anon[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - int i; - void* mem; - void* newmem; - int ret = 0; - - mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)mem) = 0x123456789abcdef0; - - newmem = mremap(mem, sz_anon[SZ_INDEX], sz_anon[SZ_INDEX + 1], MREMAP_MAYMOVE); - CHKANDJUMP(newmem == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)mem) = 0xbeefbeefbeefbeef; - - munmap(newmem, sz_anon[SZ_INDEX + 1]); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage017.c b/test/rusage/rusage017.c deleted file mode 100644 index 00ffcefd..00000000 --- a/test/rusage/rusage017.c +++ /dev/null @@ -1,76 +0,0 @@ -#define _GNU_SOURCE /* See feature_test_macros(7) */ - -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - int i; - void* mem; - void* newmem; - int ret = 0; - int fd; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - mem = mmap(0, 3 * sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - *(unsigned long*)((char*)mem + 0) = 0x123456789abcdef0; - *(unsigned long*)((char*)mem + sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; - *(unsigned long*)((char*)mem + 2 * sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; - - ret = mprotect(mem + sz_mem[SZ_INDEX], sz_mem[SZ_INDEX], PROT_READ | PROT_EXEC); - CHKANDJUMP(ret != 0, 255, "mprotect failed\n"); - - munmap(mem, 3 * sz_mem[SZ_INDEX]); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage018.c b/test/rusage/rusage018.c deleted file mode 100644 index 4737b3aa..00000000 --- a/test/rusage/rusage018.c +++ /dev/null @@ -1,73 +0,0 @@ -#define _GNU_SOURCE /* See feature_test_macros(7) */ - -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#define eprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#define eprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - eprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 1 -#define NUM_AREAS 1 - -int main(int argc, char** argv) { - int i; - void* mem; - void* newmem; - int ret = 0; - int fd; - - mem = mmap(0, 3 * sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - - *(unsigned long*)((char*)mem + 0) = 0x123456789abcdef0; - *(unsigned long*)((char*)mem + sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; - *(unsigned long*)((char*)mem + 2 * sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; - - ret = mprotect(mem + sz_mem[SZ_INDEX - 1], sz_mem[SZ_INDEX - 1], PROT_READ | PROT_EXEC); - CHKANDJUMP(ret != 0, 255, "mprotect failed\n"); - - munmap(mem, 3 * sz_mem[SZ_INDEX]); - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage020.c b/test/rusage/rusage020.c deleted file mode 120000 index dcda4d36..00000000 --- a/test/rusage/rusage020.c +++ /dev/null @@ -1 +0,0 @@ -rusage000.c \ No newline at end of file diff --git a/test/rusage/rusage030.c b/test/rusage/rusage030.c deleted file mode 120000 index dcda4d36..00000000 --- a/test/rusage/rusage030.c +++ /dev/null @@ -1 +0,0 @@ -rusage000.c \ No newline at end of file diff --git a/test/rusage/rusage100.c b/test/rusage/rusage100.c deleted file mode 100644 index 28177f4a..00000000 --- a/test/rusage/rusage100.c +++ /dev/null @@ -1,81 +0,0 @@ -#include <stdio.h> -#include <sys/mman.h> -#include <unistd.h> -#include <sys/syscall.h> /* For SYS_xxx definitions */ -#include "ihklib.h" -#include "mckernel/ihklib_rusage.h" - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - dprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_anon[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NLOOP 2 - -int main(int argc, char** argv) { - int i, j, ret = 0, ret_ihklib; - void* mem; - struct mckernel_rusage rusage; - - for (j = 0; j < NLOOP; j++) { - mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)mem) = 0x123456789abcdef0; - - ret = syscall(900); - CHKANDJUMP(ret != 0, 255, "syscall failed\n"); - - ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); - CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); - - for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); - printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); - } - printf("memory_max_usage=%ld\n", rusage.memory_max_usage); - printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); - printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); -#define NUM_NUMA_NODES 2 - for (i = 0; i < NUM_NUMA_NODES; i++) { - printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); - } -#define NUM_CPUS 2 - for (i = 0; i < NUM_CPUS; i++) { - printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); - } - printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); - printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); - printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); - - printf("num_threads=%d\n", rusage.num_threads); - printf("max_num_threads=%d\n", rusage.max_num_threads); - } - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage101.c b/test/rusage/rusage101.c deleted file mode 100644 index 2d2e29ab..00000000 --- a/test/rusage/rusage101.c +++ /dev/null @@ -1,93 +0,0 @@ -#include <stdio.h> -#include <sys/mman.h> -#include <unistd.h> -#include <sys/syscall.h> /* For SYS_xxx definitions */ -#include "ihklib.h" -#include "mckernel/ihklib_rusage.h" - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - dprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_anon[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NLOOP 2 - -int main(int argc, char** argv) { - int i, j, ret = 0, ret_ihklib; - void* mem; - struct mckernel_rusage rusage; - pid_t pid; - int status; - - pid = fork(); - CHKANDJUMP(pid == -1, 255, "fork failed"); - if (pid == 0) { - - for (j = 0; j < NLOOP; j++) { - mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)mem) = 0x123456789abcdef0; - - ret = syscall(900); - CHKANDJUMP(ret != 0, 255, "syscall failed\n"); - - ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); - CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); - - for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); - printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); - } - printf("memory_max_usage=%ld\n", rusage.memory_max_usage); - printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); - printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); -#define NUM_NUMA_NODES 2 - for (i = 0; i < NUM_NUMA_NODES; i++) { - printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); - } -#define NUM_CPUS 2 - for (i = 0; i < NUM_CPUS; i++) { - printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); - } - printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); - printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); - printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); - - printf("num_threads=%d\n", rusage.num_threads); - printf("max_num_threads=%d\n", rusage.max_num_threads); - } - _exit(123); - } else { - ret = waitpid(pid, &status, 0); - CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); - } - - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage102.c b/test/rusage/rusage102.c deleted file mode 100644 index 53cf306c..00000000 --- a/test/rusage/rusage102.c +++ /dev/null @@ -1,89 +0,0 @@ -#include <stdio.h> -#include <sys/mman.h> -#include <unistd.h> -#include <sys/syscall.h> /* For SYS_xxx definitions */ -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include "ihklib.h" -#include "mckernel/ihklib_rusage.h" - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - dprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_mem[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NLOOP 2 - -int main(int argc, char** argv) { - int i, j, ret = 0, ret_ihklib; - void* mem; - int fd; - struct mckernel_rusage rusage; - - fd = open("./file", O_RDWR); - CHKANDJUMP(fd == -1, 255, "open failed\n"); - - for (j = 0; j < NLOOP; j++) { - - mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, sz_mem[SZ_INDEX] * j); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)mem) = 0x123456789abcdef0; - - ret = syscall(900); - CHKANDJUMP(ret != 0, 255, "syscall failed\n"); - - ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); - CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); - - for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); - printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); - } - printf("memory_max_usage=%ld\n", rusage.memory_max_usage); - printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); - printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); -#define NUM_NUMA_NODES 2 - for (i = 0; i < NUM_NUMA_NODES; i++) { - printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); - } -#define NUM_CPUS 2 - for (i = 0; i < NUM_CPUS; i++) { - printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); - } - printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); - printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); - printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); - - printf("num_threads=%d\n", rusage.num_threads); - printf("max_num_threads=%d\n", rusage.max_num_threads); - } - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/rusage103.c b/test/rusage/rusage103.c deleted file mode 100644 index 28177f4a..00000000 --- a/test/rusage/rusage103.c +++ /dev/null @@ -1,81 +0,0 @@ -#include <stdio.h> -#include <sys/mman.h> -#include <unistd.h> -#include <sys/syscall.h> /* For SYS_xxx definitions */ -#include "ihklib.h" -#include "mckernel/ihklib_rusage.h" - -#define DEBUG - -#ifdef DEBUG -#define dprintf(...) \ - do { \ - char msg[1024]; \ - sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ - } while (0); -#else -#define dprintf(...) do { } while (0) -#endif - -#define CHKANDJUMP(cond, err, ...) \ - do { \ - if(cond) { \ - dprintf(__VA_ARGS__); \ - ret = err; \ - goto fn_fail; \ - } \ - } while(0) - -int sz_anon[] = { - 4 * (1ULL<<10), - 2 * (1ULL<<20), - 1 * (1ULL<<30), - 134217728}; - -#define SZ_INDEX 0 -#define NLOOP 2 - -int main(int argc, char** argv) { - int i, j, ret = 0, ret_ihklib; - void* mem; - struct mckernel_rusage rusage; - - for (j = 0; j < NLOOP; j++) { - mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); - *((unsigned long*)mem) = 0x123456789abcdef0; - - ret = syscall(900); - CHKANDJUMP(ret != 0, 255, "syscall failed\n"); - - ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); - CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); - - for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { - printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); - printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); - } - printf("memory_max_usage=%ld\n", rusage.memory_max_usage); - printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); - printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); -#define NUM_NUMA_NODES 2 - for (i = 0; i < NUM_NUMA_NODES; i++) { - printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); - } -#define NUM_CPUS 2 - for (i = 0; i < NUM_CPUS; i++) { - printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); - } - printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); - printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); - printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); - - printf("num_threads=%d\n", rusage.num_threads); - printf("max_num_threads=%d\n", rusage.max_num_threads); - } - fn_exit: - return ret; - fn_fail: - goto fn_exit; -} diff --git a/test/rusage/util.h b/test/rusage/util.h index f33a1868..a21ea7ae 100644 --- a/test/rusage/util.h +++ b/test/rusage/util.h @@ -7,7 +7,7 @@ #define dprintf(...) do { \ char msg[1024]; \ sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __func__, msg); \ + fprintf(stderr, "%s: %s", __func__, msg); \ } while (0) #else #define dprintf(...) do { } while (0) @@ -16,7 +16,7 @@ #define eprintf(...) do { \ char msg[1024]; \ sprintf(msg, __VA_ARGS__); \ - fprintf(stderr, "%s,%s", __func__, msg); \ + fprintf(stderr, "%s: ERROR: %s", __func__, msg); \ } while (0) #define CHKANDJUMP(cond, err, ...) do { \ @@ -36,7 +36,7 @@ printf("[ NG ] "); \ printf(__VA_ARGS__); \ ret = -EINVAL; \ - goto out; \ + goto fn_fail; \ } \ } while (0) diff --git a/test/user_space/ReadMe b/test/user_space/ReadMe index 09681c3f..6baed158 100644 --- a/test/user_space/ReadMe +++ b/test/user_space/ReadMe @@ -1,6 +1,12 @@ User spaceテストについて 1.テストの準備 -1.1 パッチファイル +1.1 設定ファイル + +以下のコマンドでシェルスクリプトの変数設定用includeファイルを準備します。 + + cp -i <build>/mckernel/mck_test_config.sample ~/.mck_test_config + +1.2 パッチファイル テストは、修正部分にkprintfをパッチで追加し確認を行います。 パッチファイルは以下のディレクトリに格納しています。  mckernel/test/user_space/patch/ @@ -18,7 +24,7 @@ User spaceテストについて なし swapout so_023-so_025 qlmpilib.patch swapout so_026 -1.2 パッチの適用 +1.3 パッチの適用 mckernelディレクトリで以下コマンドを実行してパッチを当ててください。 パッチ実行後ビルドしてください。 @@ -26,7 +32,7 @@ User spaceテストについて (XXXX.patchはパッチファイル名) -1.3 その他プログラムのコンパイル +1.4 その他プログラムのコンパイル (1)user_space/swapoutディレクトリで、makeし、swaptest実行モジュールを作成してください。 (2)qlmpi/qlmpi_testsuiteディレクトリで、テスト用プログラムのコンパイルを実施してください。 (3)LTPプログラムの実行モジュールを準備する必要があります。 diff --git a/test/user_space/futex/config b/test/user_space/futex/config deleted file mode 100644 index e5f48348..00000000 --- a/test/user_space/futex/config +++ /dev/null @@ -1,2 +0,0 @@ -MCPATH=/home/iizuka/mckernel -LTP_EXE_DIR=/home/iizuka/LTP/futex diff --git a/test/user_space/futex/futex_test.sh b/test/user_space/futex/futex_test.sh index 2bafa7b8..470cba63 100755 --- a/test/user_space/futex/futex_test.sh +++ b/test/user_space/futex/futex_test.sh @@ -32,7 +32,7 @@ function ng_out() { function ltp_test() { TEST_NAME=$1 #LTP programを実行 logを保存 - sudo ${MCPATH}/bin/mcexec ${LTP_EXE_DIR}/${TEST_NAME} >./result/${TEST_NAME}.log + sudo ${MCK_DIR}/bin/mcexec ${LTP}/testcases/bin/${TEST_NAME} >./result/${TEST_NAME}.log #LTP log 確認 NUM=`cat ./test_cases/${TEST_NAME}.txt |wc -l` @@ -56,21 +56,16 @@ TEST_CODE=001 TEST_PREFIX=futex_ ME=`whoami` -if [ $# -ne 2 ]; then - source ./config -else - MCPATH=$1 - LTP_EXE_DIR=$2/futex -fi +source ${HOME}/.mck_test_config mkdir -p result reboot #LTP programを実行 logを保存 -mcexec ${LTP_EXE_DIR}/futex_wait01 >./result/futex_wait01.log +${MCK_DIR}/bin/mcexec ${LTP}/testcases/bin/futex_wait01 >./result/futex_wait01.log #kmsgを保存 -sudo ${MCPATH}/sbin/ihkosctl 0 kmsg >./result/futex_wait01.kmsg +sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg >./result/futex_wait01.kmsg #kmsgで結果を出力する。 #futex-001 アドレスが正しく引き継いでいることを確認 diff --git a/test/user_space/perf_event_open/config b/test/user_space/perf_event_open/config deleted file mode 100644 index 7692ff25..00000000 --- a/test/user_space/perf_event_open/config +++ /dev/null @@ -1,2 +0,0 @@ -MCPATH=/home/iizuka/mckernel -LTP_EXE_DIR=/home/iizuka/LTP/perf_event_open diff --git a/test/user_space/perf_event_open/perf_event_open_test.sh b/test/user_space/perf_event_open/perf_event_open_test.sh index f70e73cc..dffff8e7 100755 --- a/test/user_space/perf_event_open/perf_event_open_test.sh +++ b/test/user_space/perf_event_open/perf_event_open_test.sh @@ -32,7 +32,7 @@ function ng_out() { function ltp_test() { TEST_NAME=$1 #LTP programを実行 logを保存 - ${MCPATH}/bin/mcexec ${LTP_EXE_DIR}/${TEST_NAME} >./result/${TEST_NAME}.log + ${MCK_DIR}/bin/mcexec ${LTP}/testcases/bin/${TEST_NAME} >./result/${TEST_NAME}.log #LTP log 確認 NUM=`cat ./test_cases/${TEST_NAME}.txt |wc -l` @@ -55,20 +55,15 @@ TEST_CODE=001 TEST_PREFIX=perf_ ME=`whoami` -if [ $# -ne 2 ]; then - source ./config -else - MCPATH=$1 - LTP_EXE_DIR=$2/perf_event_open -fi +source ${HOME}/.mck_test_config mkdir -p ./result reboot #LTP programを実行 logを保存 -mcexec ${LTP_EXE_DIR}/perf_event_open01 >./result/perf_event_open01.log +${MCK_DIR}/bin/mcexec ${LTP}/testcases/bin/perf_event_open01 >./result/perf_event_open01.log #kmsgを保存 -sudo ${MCPATH}/sbin/ihkosctl 0 kmsg >./result/perf_event_open01.kmsg +sudo ${MCK_DIR}/sbin/ihkosctl 0 kmsg >./result/perf_event_open01.kmsg #kmsgで結果を出力する。 NUM=`cat ./test_cases/perd_event_open01.kmsg.txt |wc -l` diff --git a/test/user_space/swapout/swapout_copy_to_01.sh b/test/user_space/swapout/swapout_copy_to_01.sh index 23419919..52562979 100755 --- a/test/user_space/swapout/swapout_copy_to_01.sh +++ b/test/user_space/swapout/swapout_copy_to_01.sh @@ -1,5 +1,7 @@ #!/bin/sh +. ${HOME}/.mck_test_config + # Functions function reboot() { count=`pgrep -c -f 'mcexec '` @@ -99,8 +101,8 @@ fi #swapout011 do_pageout si->swphdr->version version=`grep "do_pageout:.*,si->swphdr->version" ./result/swapout_copy_to_01.kmsg | head -n 1 | sed s/"^.*copy_to_user .*,si->swphdr->version:\(.*\),si->swphdr->count_sarea:.*$"/"\1"/ ` #echo ${version} -if [ "${version}" = "0.9.0" ]; then - text=`echo "do_pageout:si->swphdr->version is 0.9.0"` +if [ "${version}" = "${MCKERNEL_VERSION}" ]; then + text=`echo "do_pageout:si->swphdr->version is ${MCKERNEL_VERSION}"` ok_out "${text} :(${version})" else ng_out "do_pageout: does not match the value of si->swphdr->version." diff --git a/test/uti/CT01.c b/test/uti/CT01.c new file mode 100644 index 00000000..5a209b47 --- /dev/null +++ b/test/uti/CT01.c @@ -0,0 +1,137 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +pthread_mutex_t mutex1; +pthread_cond_t cond1; +pthread_mutex_t mutex2; +pthread_cond_t cond2; +char *m; +int flag1, flag2; + +int sigst; +pthread_t thr; + +void +sigsegv(int s) +{ + if (sigst == 1) { + fprintf(stderr, "CT01007 munmap OK (SIGSEGV)\n"); + pthread_join(thr, NULL); + fprintf(stderr, "CT01008 exit(pthread_join) OK\n"); + fprintf(stderr, "CT01009 futex (pthread_mutex/pthread_cond) OK\n"); + fprintf(stderr, "CT01010 END\n"); + exit(0); + } + printf("BAD SIGSEGV\n"); + exit(1); +} + +void * +util_thread(void *arg) +{ + int rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT01003 running on Linux OK\n"); + else { + fprintf(stderr, "CT01003 running on McKernel NG\n", rc); + exit(1); + } + errno = 0; + m = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (m != (void *)-1) { + fprintf(stderr, "CT01004 mmap OK\n"); + } + else { + fprintf(stderr, "CT01004 mmap NG errno=%d\n", errno); + exit(1); + } + strcpy(m, "mmap OK"); + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + while(!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + rc = munmap(m, 4096); + if (rc == 0) { + fprintf(stderr, "CT01006 munmap OK\n"); + } + else { + fprintf(stderr, "CT01006 munmap NG errno=%d\n", errno); + exit(1); + } + + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + signal(SIGSEGV, sigsegv); + pthread_mutex_init(&mutex1, NULL); + pthread_cond_init(&cond1, NULL); + pthread_mutex_init(&mutex2, NULL); + pthread_cond_init(&cond2, NULL); + + fprintf(stderr, "CT01001 mmap/munmap/futex/exit START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT01002 pthread_create OK\n"); + pthread_mutex_lock(&mutex1); + while(!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + + fprintf(stderr, "CT01005 %s\n", m); + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + + pthread_mutex_lock(&mutex1); + while(!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + + sigst = 1; + fprintf(stderr, "%s\n", m); + fprintf(stderr, "CT01007 munmap NG\n"); + pthread_join(thr, NULL); + fprintf(stderr, "CT01008 exit(pthread_join) OK\n"); + fprintf(stderr, "CT01009 futex (pthread_mutex/pthread_cond) OK\n"); + fprintf(stderr, "CT01010 END\n"); + exit(0); +} diff --git a/test/uti/CT01.sh b/test/uti/CT01.sh new file mode 100755 index 00000000..7756e8cb --- /dev/null +++ b/test/uti/CT01.sh @@ -0,0 +1,86 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=$HOME +UTI_TOP=${MYHOME}/project/os/mckernel/test/uti + +MCK=${MYHOME}/project/os/install +unset DISABLE_UTI + +cmdline="./CT01" + +stop=0 +reboot=0 +go=0 + +mck=0 +nloops=1 + +while getopts srgac:n:mdl: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=1 + ;; + c) cmdline=$OPTARG + ;; + n) ndoubles=$OPTARG + ;; + m) + mck=1 + ;; + d) export DISABLE_UTI=1 + ;; + l) nloops=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" +else + MCEXEC= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + sudo ${MCK}/sbin/mcreboot.sh -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +if [ ${go} -eq 1 ]; then + cd ${UTI_TOP} + make $cmdline + for i in `seq 1 ${nloops}`; do + ${MCK}/bin/mcexec --enable-uti $cmdline + wait + echo =====; + echo $i; + echo =====; i=$((i+1)); + done +fi + + + diff --git a/test/uti/CT02.c b/test/uti/CT02.c new file mode 100644 index 00000000..fdfe51d2 --- /dev/null +++ b/test/uti/CT02.c @@ -0,0 +1,162 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +int flag1; +pthread_mutex_t mutex1; +pthread_cond_t cond1; + +int flag2; +pthread_mutex_t mutex2; +pthread_cond_t cond2; +char *m; + +int sigst; +pthread_t thr; + +void +sigsegv(int s) +{ + if (sigst == 1) { + fprintf(stderr, "CT02007 mremap OK (SIGSEGV)\n"); + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + pthread_join(thr, NULL); + fprintf(stderr, "CT02009 pthread_join OK\n"); + fprintf(stderr, "CT02010 END\n"); + exit(0); + } + printf("BAD SIGSEGV\n"); + exit(1); +} + +void * +util_thread(void *arg) +{ + int rc; + char *n; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT02003 get_system OK\n"); + else { + fprintf(stderr, "CT02003 get_system NG get_system=%d\n", rc); + exit(1); + } + errno = 0; + m = mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (m != (void *)-1) { + fprintf(stderr, "CT02004 mmap OK\n"); + } + else { + fprintf(stderr, "CT02004 mmap NG errno=%d\n", errno); + exit(1); + } + strcpy(m + 4096, "mmap OK"); + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + pthread_mutex_lock(&mutex2); + while (!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + n = mremap(m, 8192, 4096, 0); + if (n == m) { + fprintf(stderr, "CT02006 mremap OK\n"); + } + else if (n != (void *)-1){ + fprintf(stderr, "CT02006 mremap remapped, test stop\n"); + exit(1); + } + else { + fprintf(stderr, "CT02006 mremap NG errno=%d\n", errno); + exit(1); + } + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + while (!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + rc = munmap(m, 4096); + if (rc == 0) { + fprintf(stderr, "CT02008 munmap OK\n"); + } + else { + fprintf(stderr, "CT02008 munmap NG errno=%d\n", errno); + exit(1); + } + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + signal(SIGSEGV, sigsegv); + pthread_mutex_init(&mutex1, NULL); + pthread_cond_init(&cond1, NULL); + pthread_mutex_init(&mutex2, NULL); + pthread_cond_init(&cond2, NULL); + + fprintf(stderr, "CT02001 mremap START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT02002 pthread_create OK\n"); + pthread_mutex_lock(&mutex1); + while (!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + fprintf(stderr, "CT02005 %s\n", m + 4096); + + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + + pthread_mutex_lock(&mutex1); + while (!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + sigst = 1; + fprintf(stderr, "%s\n", m + 4096); + fprintf(stderr, "CT02007 mremap NG\n"); + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + pthread_join(thr, NULL); + fprintf(stderr, "CT02009 pthread_join OK\n"); + fprintf(stderr, "CT02010 END\n"); + exit(0); +} diff --git a/test/uti/CT03.c b/test/uti/CT03.c new file mode 100644 index 00000000..6f79b2d5 --- /dev/null +++ b/test/uti/CT03.c @@ -0,0 +1,171 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +int flag1; +pthread_mutex_t mutex1; +pthread_cond_t cond1; + +int flag2; +pthread_mutex_t mutex2; +pthread_cond_t cond2; + +char *m; + +int sigst; +pthread_t thr; + +void +sigsegv(int s) +{ + if (sigst == 1) { + fprintf(stderr, "CT03007 mprotect OK (SIGSEGV)\n"); + + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + + pthread_join(thr, NULL); + fprintf(stderr, "CT03009 pthread_join OK\n"); + fprintf(stderr, "CT03010 END\n"); + exit(0); + } + printf("BAD SIGSEGV\n"); + exit(1); +} + +void * +util_thread(void *arg) +{ + int rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT03003 get_system OK\n"); + else { + fprintf(stderr, "CT03003 get_system NG get_system=%d\n", rc); + exit(1); + } + errno = 0; + m = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (m != (void *)-1) { + fprintf(stderr, "CT03004 mmap OK\n"); + } + else { + fprintf(stderr, "CT03004 mmap NG errno=%d\n", errno); + exit(1); + } + strcpy(m, "mmap OK"); + + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + while (!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + + rc = mprotect(m, 4096, PROT_READ); + if (rc == 0) { + fprintf(stderr, "CT03006 mprotect OK\n"); + } + else { + fprintf(stderr, "CT03006 mprotect NG errno=%d\n", errno); + exit(1); + } + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + while (!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + + rc = munmap(m, 4096); + if (rc == 0) { + fprintf(stderr, "CT03008 munmap OK\n"); + } + else { + fprintf(stderr, "CT03008 munmap NG errno=%d\n", errno); + exit(1); + } + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + signal(SIGSEGV, sigsegv); + pthread_mutex_init(&mutex1, NULL); + pthread_cond_init(&cond1, NULL); + pthread_mutex_init(&mutex2, NULL); + pthread_cond_init(&cond2, NULL); + + fprintf(stderr, "CT03001 mprotect START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT03002 pthread_create OK\n"); + + pthread_mutex_lock(&mutex1); + while (!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + + fprintf(stderr, "CT03005 %s\n", m); + + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + + + pthread_mutex_lock(&mutex1); + while (!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + + sigst = 1; + strcpy(m, "mprotect NG"); + fprintf(stderr, "%s\n", m); + fprintf(stderr, "CT03007 mprotect NG\n"); + + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + + pthread_join(thr, NULL); + fprintf(stderr, "CT03009 pthread_join OK\n"); + fprintf(stderr, "CT03010 END\n"); + exit(0); +} diff --git a/test/uti/CT04.c b/test/uti/CT04.c new file mode 100644 index 00000000..7ecd17b0 --- /dev/null +++ b/test/uti/CT04.c @@ -0,0 +1,106 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +int flag1; +pthread_mutex_t mutex1; +pthread_cond_t cond1; + +int flag2; +pthread_mutex_t mutex2; +pthread_cond_t cond2; + +char *a; +char *b; +char *c; + + +void * +util_thread(void *arg) +{ + int rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT04003 get_system OK\n"); + else { + fprintf(stderr, "CT04003 get_system NG get_system=%d\n", rc); + exit(1); + } + errno = 0; + a = sbrk(0); + fprintf(stderr, "CT04004 sbrk OK\n"); + b = sbrk(4096); + strcpy(a, "sbrk OK"); + + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + while(!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + + b = sbrk(0); + if (c == b) { + fprintf(stderr, "CT04006 sbrk OK\n"); + } + else { + fprintf(stderr, "CT04006 sbrk NG %p != %p\n", c, b); + } + return NULL; +} + +int +main(int argc, char **argv) +{ + pthread_t thr; + int rc; + + pthread_mutex_init(&mutex1, NULL); + pthread_cond_init(&cond1, NULL); + pthread_mutex_init(&mutex2, NULL); + pthread_cond_init(&cond2, NULL); + + fprintf(stderr, "CT04001 brk START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT04002 pthread_create OK\n"); + + pthread_mutex_lock(&mutex1); + while(!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + fprintf(stderr, "CT04005 %s\n", a); + + c = sbrk(0); + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + pthread_join(thr, NULL); + fprintf(stderr, "CT04007 pthread_join OK\n"); + fprintf(stderr, "CT04008 END\n"); + exit(0); +} diff --git a/test/uti/CT05.c b/test/uti/CT05.c new file mode 100644 index 00000000..ad5d4918 --- /dev/null +++ b/test/uti/CT05.c @@ -0,0 +1,67 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +void * +util_thread(void *arg) +{ + int rc; + int tid; + + rc = syscall(732); + if (rc == 0) + fprintf(stderr, "CT05003 get_system OK\n"); + else { + fprintf(stderr, "CT05003 get_system NG get_system=%d\n", rc); + exit(1); + } + tid = syscall(SYS_gettid); + fprintf(stderr, "CT05004 gettid OK %d\n", tid); + rc = syscall(730); + if (rc == 0) { + fprintf(stderr, "CT05005 util_migrate_inter_kernel OK\n"); + } + else { + fprintf(stderr, "CT05005 util_migrate_inter_kernel NG rc=%d errno=%d\n", rc, errno); + } + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT05006 get_system OK\n"); + else { + fprintf(stderr, "CT05006 get_system NG get_system=%d\n", rc); + exit(1); + } + if ((rc = syscall(SYS_gettid)) == tid) { + fprintf(stderr, "CT05007 gettid OK %d\n", tid); + } + else { + fprintf(stderr, "CT05007 gettid NG %d\n", rc); + } + return NULL; +} + +int +main(int argc, char **argv) +{ + pthread_t thr; + int rc; + + fprintf(stderr, "CT05001 gettid START\n"); + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT05002 pthread_create OK\n"); + pthread_join(thr, NULL); + fprintf(stderr, "CT05008 pthread_join OK\n"); + fprintf(stderr, "CT05009 END\n"); + exit(0); +} diff --git a/test/uti/CT06.c b/test/uti/CT06.c new file mode 100644 index 00000000..61d1d238 --- /dev/null +++ b/test/uti/CT06.c @@ -0,0 +1,79 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +void * +util_thread(void *arg) +{ + long rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT06003 get_system OK\n"); + else { + fprintf(stderr, "CT06003 get_system NG get_system=%d\n", rc); + exit(1); + } + + syscall(SYS_exit_group, 99); + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + pthread_t thr; + int st; + pid_t pid; + + fprintf(stderr, "CT06001 syscall error START\n"); + + pid = fork(); + if (pid) { + if (pid == -1) { + perror("fork"); + exit(1); + } + while ((rc = waitpid(pid, &st, 0)) == -1 && errno == EINTR); + if (rc == -1) { + fprintf(stderr, "CT06004 exit_group NG rc=%d errno=%d\n", rc, errno); + exit(1); + } + if (!WIFEXITED(st)) { + fprintf(stderr, "CT06004 exit_group NG st=%08x\n", st); + exit(1); + } + if (WEXITSTATUS(st) != 99) { + fprintf(stderr, "CT06004 exit_group NG st=%d\n", WEXITSTATUS(st)); + exit(1); + } + fprintf(stderr, "CT06004 exit_group OK\n"); + exit(0); + } + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT06002 pthread_create OK\n"); + + pthread_join(thr, NULL); + fprintf(stderr, "CT06004 pthread_join NG\n"); + fprintf(stderr, "CT06004 END\n"); + exit(0); +} diff --git a/test/uti/CT07.c b/test/uti/CT07.c new file mode 100644 index 00000000..9eff04ca --- /dev/null +++ b/test/uti/CT07.c @@ -0,0 +1,86 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> +#include <sys/types.h> + +void * +util_thread(void *arg) +{ + long rc; + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT07003 get_system OK\n"); + else { + fprintf(stderr, "CT07003 get_system NG get_system=%d\n", rc); + exit(1); + } + + rc = syscall(SYS_clone); + if (rc == -1 && errno == ENOSYS) { + fprintf(stderr, "CT07004 clone OK\n"); + } + else { + fprintf(stderr, "CT07004 clone NG rc=%ld errno=%d\n", rc, errno); + } + + rc = syscall(SYS_fork); + if (rc == -1 && errno == ENOSYS) { + fprintf(stderr, "CT07005 fork OK\n"); + } + else { + fprintf(stderr, "CT07005 fork NG rc=%ld errno=%d\n", rc, errno); + } + +#if 0 /* It looks like syscall_intercept can't hook vfork */ + rc = syscall(SYS_vfork); + //rc = vfork(); + fprintf(stderr, "CT07006 vfork rc=%d,errno=%d\n", rc, errno); + if (rc == -1 && errno == ENOSYS) { + fprintf(stderr, "CT07006 vfork OK\n"); + } + else { + fprintf(stderr, "CT07006 vfork NG rc=%ld errno=%d\n", rc, errno); + } +#endif + + rc = syscall(SYS_execve); + if (rc == -1 && errno == ENOSYS) { + fprintf(stderr, "CT07007 execve OK\n"); + } + else { + fprintf(stderr, "CT07007 execve NG rc=%ld errno=%d\n", rc, errno); + } + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + pthread_t thr; + + fprintf(stderr, "CT07001 syscall error START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT07002 pthread_create OK\n"); + + pthread_join(thr, NULL); + fprintf(stderr, "CT07008 pthread_join OK\n"); + fprintf(stderr, "CT07010 END\n"); + exit(0); +} diff --git a/test/uti/CT08.c b/test/uti/CT08.c new file mode 100644 index 00000000..8ce75e2e --- /dev/null +++ b/test/uti/CT08.c @@ -0,0 +1,165 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdint.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sched.h> + +#define UTI_FLAG_NUMA_SET (1ULL<<1) /* Indicates NUMA_SET is specified */ + +#define UTI_FLAG_SAME_NUMA_DOMAIN (1ULL<<2) +#define UTI_FLAG_DIFFERENT_NUMA_DOMAIN (1ULL<<3) + +#define UTI_FLAG_SAME_L1 (1ULL<<4) +#define UTI_FLAG_SAME_L2 (1ULL<<5) +#define UTI_FLAG_SAME_L3 (1ULL<<6) + +#define UTI_FLAG_DIFFERENT_L1 (1ULL<<7) +#define UTI_FLAG_DIFFERENT_L2 (1ULL<<8) +#define UTI_FLAG_DIFFERENT_L3 (1ULL<<9) + +#define UTI_FLAG_EXCLUSIVE_CPU (1ULL<<10) +#define UTI_FLAG_CPU_INTENSIVE (1ULL<<11) +#define UTI_FLAG_HIGH_PRIORITY (1ULL<<12) +#define UTI_FLAG_NON_COOPERATIVE (1ULL<<13) + +/* Linux default value is used */ +#define UTI_MAX_NUMA_DOMAINS (1024) + +typedef struct uti_attr { + /* UTI_CPU_SET environmental variable is used to denote the preferred + location of utility thread */ + uint64_t numa_set[(UTI_MAX_NUMA_DOMAINS + sizeof(uint64_t) * 8 - 1) / + (sizeof(uint64_t) * 8)]; + uint64_t flags; /* Representing location and behavior hints by bitmap */ +} uti_attr_t; + +void +print_sched() +{ + cpu_set_t cpuset; + int sched; + + sched_getaffinity(0, sizeof cpuset, &cpuset); + sched = sched_getscheduler(0); + fprintf(stderr, "\tsched cpu=%16lx sched=%d\n", *(long *)&cpuset, sched); +} + +void * +util_thread(void *arg) +{ + print_sched(); + return NULL; +} + +void +thread_test(uti_attr_t *attr, char *msg) +{ + pthread_t thr; + int rc; + + fprintf(stderr, "%s\n", msg); + rc = syscall(731, 1, attr); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + pthread_join(thr, NULL); +} + +int +main(int argc, char **argv) +{ + uti_attr_t attr; + + memset(&attr, '\0', sizeof attr); + attr.numa_set[0] = 2; // NUMA domain == 1 + attr.flags = UTI_FLAG_NUMA_SET; + thread_test(&attr, "CT08001 UTI_FLAG_NUMA_SET"); + + memset(&attr, '\0', sizeof attr); + attr.numa_set[0] = 2; + attr.flags = UTI_FLAG_NUMA_SET | UTI_FLAG_EXCLUSIVE_CPU; + thread_test(&attr, "CT08002 UTI_FLAG_NUMA_SET|UTI_FLAG_EXCLUSIVE_CPU"); + + memset(&attr, '\0', sizeof attr); + attr.numa_set[0] = 2; + attr.flags = UTI_FLAG_NUMA_SET | UTI_FLAG_EXCLUSIVE_CPU; + thread_test(&attr, "CT08003 UTI_FLAG_NUMA_SET|UTI_FLAG_EXCLUSIVE_CPU(2)"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_NUMA_DOMAIN; + thread_test(&attr, "CT08004 UTI_FLAG_SAME_NUMA_DOMAIN"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_NUMA_DOMAIN | UTI_FLAG_CPU_INTENSIVE; + thread_test(&attr, "CT08005 UTI_FLAG_SAME_NUMA_DOMAIN|UTI_FLAG_CPU_INTENSIVE"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_NUMA_DOMAIN; + thread_test(&attr, "CT08006 UTI_FLAG_DIFFERENT_NUMA_DOMAIN"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_NUMA_DOMAIN | UTI_FLAG_HIGH_PRIORITY; + thread_test(&attr, "CT08007 UTI_FLAG_DIFFERENT_NUMA_DOMAIN|UTI_FLAG_HIGH_PRIORITY"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_L1; + thread_test(&attr, "CT08008 UTI_FLAG_SAME_L1"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_L1 | UTI_FLAG_NON_COOPERATIVE; + thread_test(&attr, "CT08009 UTI_FLAG_SAME_L1|UTI_FLAG_NON_COOPERATIVE"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_L2; + thread_test(&attr, "CT08010 UTI_FLAG_SAME_L2"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_L2 | UTI_FLAG_CPU_INTENSIVE; + thread_test(&attr, "CT08011 UTI_FLAG_SAME_L2|UTI_FLAG_CPU_INTENSIVE"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_L3; + thread_test(&attr, "CT08012 UTI_FLAG_SAME_L3"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_SAME_L3 | UTI_FLAG_CPU_INTENSIVE; + thread_test(&attr, "CT08013 UTI_FLAG_SAME_L3|UTI_FLAG_CPU_INTENSIVE"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_L1; + thread_test(&attr, "CT08014 UTI_FLAG_DIFFERENT_L1"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_L1 | UTI_FLAG_CPU_INTENSIVE; + thread_test(&attr, "CT08015 UTI_FLAG_DIFFERENT_L1|UTI_FLAG_CPU_INTENSIVE"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_L2; + thread_test(&attr, "CT08016 UTI_FLAG_DIFFERENT_L2"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_L2 | UTI_FLAG_CPU_INTENSIVE; + thread_test(&attr, "CT08017 UTI_FLAG_DIFFERENT_L2|UTI_FLAG_CPU_INTENSIVE"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_L3; + thread_test(&attr, "CT08018 UTI_FLAG_DIFFERENT_L3"); + + memset(&attr, '\0', sizeof attr); + attr.flags = UTI_FLAG_DIFFERENT_L3 | UTI_FLAG_CPU_INTENSIVE; + thread_test(&attr, "CT08019 UTI_FLAG_DIFFERENT_L3|UTI_FLAG_CPU_INTENSIVE"); + + exit(0); +} diff --git a/test/uti/CT09.c b/test/uti/CT09.c new file mode 100644 index 00000000..b8bed45b --- /dev/null +++ b/test/uti/CT09.c @@ -0,0 +1,278 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_DELAY (98600) /* 98.6 usec */ +#define RTS_DELAY (1000) /* 1 usec, CPU time for sending Request-to-Send packet */ +#define NIC_DELAY (3000) /* 5 usec, RTS packet propagation time + RDMA-read on the responder side + CPU time for sending DONE packet + DONE packet network propagation time */ +#define POLL_DELAY ( 200) /* 0.2 usec, CPU time for checking DRAM event queue */ +#define COMPL_DELAY ( 200) /* 0.2 usec, CPU time for updates MPI_Request */ +#define NSPIN 1 +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { +#if 0 + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" + "movq %%rax, %0\n\t" + : "+rm" (*ptr) + : + : "rax", "cc", "memory"); +#endif + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + +pthread_mutex_t ep_lock; /* Ownership of channel instance */ + +struct thr_arg { + int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + unsigned long mem; /* Per-thread storage */ +}; + +struct thr_arg thr_args; + +unsigned long mem; /* Per-thread storage */ +volatile int nevents; +volatile int terminate; +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(long delay_nsec, unsigned long* mem) { + if (delay_nsec < 0) { + printf("%s: delay_nsec<0\n", __FUNCTION__); + } + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void mydelay(long delay_nsec, long *mem) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec) > delay_nsec) { + break; + } + FIXED_SIZE_WORK(mem); + } +} + +void *progress_fn(void *_arg) { + struct thr_arg *arg = (struct thr_arg *)_arg; + int rc; + int spin_count = 0; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + } + + printf("tid=%d,bar_count=%d\n", syscall(__NR_gettid), arg->bar_count); + + pthread_mutex_lock(&arg->bar_lock); + arg->bar_count++; + if (arg->bar_count == 2) { + if ((rc = pthread_cond_broadcast(&arg->bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (arg->bar_count != 2) { + if ((rc = pthread_cond_wait(&arg->bar_cond, &arg->bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&arg->bar_lock); + + printf("after barrier\n"); + + /* Start progress */ + pthread_mutex_lock(&ep_lock); + while(1) { + if (terminate) { + break; + } + + fwq(POLL_DELAY, &arg->mem); + + /* Event found */ + if (nevents > 0) { + fwq(COMPL_DELAY, &arg->mem); /* Simulate MPI protocol response */ + nevents = 0; + } + + spin_count++; + if (spin_count >= NSPIN) { + spin_count = 0; + pthread_mutex_unlock(&ep_lock); + sched_yield(); + pthread_mutex_lock(&ep_lock); + } + } + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int i; + char *uti_str; + int uti_val; + struct timespec start, end; + int disable_progress; + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(&mem); + pthread_mutex_init(&ep_lock, NULL); + + thr_args.bar_count = 0; + pthread_cond_init(&thr_args.bar_cond, NULL); + pthread_mutex_init(&thr_args.bar_lock, NULL); + + disable_progress = (argc > 1 && strcmp(argv[1], "-d") == 0) ? 1 : 0; + + if (disable_progress) { + goto skip1; + } + + uti_str = getenv("DISABLE_UTI"); + uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n", rc); + } + + rc = pthread_create(&thr_args.pthread, NULL, progress_fn, &thr_args); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + pthread_mutex_lock(&thr_args.bar_lock); + thr_args.bar_count++; + if (thr_args.bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_args.bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_args.bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_args.bar_cond, &thr_args.bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_args.bar_lock); + + fprintf(stdout, "CT09004 pthread_create OK\n"); + skip1: + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < 10000; i++) { /* It takes 1 sec */ + if(!disable_progress) { + + /* Acquire endpoint and send request-to-send packet */ + pthread_mutex_lock(&ep_lock); + fwq(RTS_DELAY, &mem); + pthread_mutex_unlock(&ep_lock); + + /* Start calculation */ + + /* Generate event on behaf of responder */ + fwq(NIC_DELAY, &mem); + nevents++; + + fwq(CALC_DELAY - NIC_DELAY, &mem); /* Overlap remainder */ + + /* Wait until async thread consumes the event */ + while (nevents > 0) { + FIXED_SIZE_WORK(&mem); + } + } else { + /* No overlap case */ + fwq(RTS_DELAY + CALC_DELAY + POLL_DELAY + COMPL_DELAY, &mem); + } + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + + if(!disable_progress) { + terminate = 1; + + pthread_join(thr_args.pthread, NULL); + } + fprintf(stderr, "total %ld nsec\n", TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + fprintf(stdout, "CT09006 END\n"); + + + exit(0); +} diff --git a/test/uti/CT09.sh b/test/uti/CT09.sh new file mode 100755 index 00000000..b28b6d5e --- /dev/null +++ b/test/uti/CT09.sh @@ -0,0 +1,51 @@ +#!/usr/bin/bash +MYHOME="/work/gg10/e29005" +MCK="${MYHOME}/project/os/install" +MCEXEC= +export DISABLE_UTI=0 + +stop=0 +reset=0 +go=0 +nodes="c[8194]" + +while getopts srgmd OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reset=1 + ;; + g) go=1 + ;; + m) MCEXEC="${MCK}/bin/mcexec" + ;; + d) export DISABLE_UTI=1 + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo mount /work + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo ${MCK}/sbin/mcstop+release.sh +fi + +if [ ${reset} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo mount /work + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,20-35,36-51,52-67 -r 2-5:0+6-9:1+10-13:68+14-17:69+20-23:136+24-27:137+28-31:204+32-35:205+36-39:18+40-43:19+44-47:86+48-51:87+52-55:154+56-59:155+60-63:222+64-67:223 -m 32G@0,12G@1 +fi + +if [ ${go} -eq 1 ]; then + > ./log + for i in {1..10}; do (${MCEXEC} --enable-uti ./CT09 1>/dev/null 2>> ./log); done + #${MCEXEC} ./CT09 + perl CT11.pl < ./log +fi diff --git a/test/uti/CT10.c b/test/uti/CT10.c new file mode 100644 index 00000000..763e8a53 --- /dev/null +++ b/test/uti/CT10.c @@ -0,0 +1,103 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +pthread_mutex_t mutex1; +pthread_cond_t cond1; +pthread_mutex_t mutex2; +pthread_cond_t cond2; +char *m; +int flag1, flag2; + +int sigst; +pthread_t thr; + +void * +util_thread(void *arg) +{ + int rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT10100 running on Linux OK\n"); + else { + fprintf(stderr, "CT10100 running on Linux NG (%d)\n", rc); + } + errno = 0; + + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + while(!flag2) { + pthread_cond_wait(&cond2, &mutex2); + } + flag2 = 0; + pthread_mutex_unlock(&mutex2); + + pthread_mutex_lock(&mutex1); + flag1 = 1; + pthread_cond_signal(&cond1); + pthread_mutex_unlock(&mutex1); + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + pthread_mutex_init(&mutex1, NULL); + pthread_cond_init(&cond1, NULL); + pthread_mutex_init(&mutex2, NULL); + pthread_cond_init(&cond2, NULL); + + fprintf(stderr, "CT10001 futex START\n"); +#if 1 + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } +#endif + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT10002 pthread_create OK\n"); + + pthread_mutex_lock(&mutex1); + while(!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + + pthread_mutex_lock(&mutex2); + flag2 = 1; + pthread_cond_signal(&cond2); + pthread_mutex_unlock(&mutex2); + + pthread_mutex_lock(&mutex1); + while(!flag1) { + pthread_cond_wait(&cond1, &mutex1); + } + flag1 = 0; + pthread_mutex_unlock(&mutex1); + + pthread_join(thr, NULL); + fprintf(stderr, "CT10003 pthread_join OK\n"); + + fprintf(stderr, "CT10004 END\n"); + exit(0); +} diff --git a/test/uti/CT11.c b/test/uti/CT11.c new file mode 100644 index 00000000..04d77e3e --- /dev/null +++ b/test/uti/CT11.c @@ -0,0 +1,275 @@ +#define _GNU_SOURCE +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sys/mman.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <linux/futex.h> + +#define NLOOP 10 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define SZCHUNK 4096 +#define IHK_DEVICE_CREATE_OS 0x112900 +#define IHK_DEVICE_DESTROY_OS 0x112901 + +pthread_mutex_t mutex; +pthread_cond_t cond; +int sem; +int futex_flag; +pthread_t thr; + +struct syscall { + int number; + const char *name; +}; + +struct syscall syscalls[] = { + { .number = __NR_getuid, .name = "getuid" }, + { .number = __NR_ioctl, .name = "ioctl" }, + { .number = __NR_futex, .name = "futex" }, + { .number = __NR_mmap, .name = "mmap" }, + { .number = __NR_munmap, .name = "munmap" }, + { .number = __NR_brk, .name = "brk" }, + { .number = __NR_gettid, .name = "gettid" }, + { .number = __NR_mprotect, .name = "mprotect" }, + { .number = __NR_mremap, .name = "mremap" }, + { .number = __NR_open, .name = "open" }, + { .number = __NR_read, .name = "read" }, + { .number = __NR_write, .name = "write" } +}; + +void *util_thread(void *arg) { + int i, j; + int rc; + uid_t uid; + int osnum; + int fds[NLOOP]; + void *mems[NLOOP]; + void *memremaps[NLOOP]; + void *brk_cur; + char* buf = malloc(SZCHUNK*NLOOP); + struct timespec start, end; + long nsec; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "[INFO] Child is running on Liux\n"); + else { + fprintf(stdout, "[INFO] Child is running on McKernel\n"); + } + errno = 0; + + for (i = 0; i < sizeof(syscalls) / sizeof(syscalls[0]); i++) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + switch (syscalls[i].number) { + case __NR_brk: + brk_cur = sbrk(0); + break; + case __NR_mprotect: + if((mems[0] = mmap(0, SZCHUNK, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) == (void*)-1) { + fprintf(stderr, "mmap failed: %s\n", strerror(errno)); + } + case __NR_munmap: + case __NR_mremap: + for (j = 0; j < NLOOP; j++) { + if((mems[j] = mmap(0, SZCHUNK, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) == (void*)-1) { + fprintf(stderr, "mmap failed: %s\n", strerror(errno)); + } + } + break; + case __NR_ioctl: + if((fds[0] = open("/dev/hello", O_RDWR)) < 0) { + fprintf(stderr, "ioctl, open failed: %s\n", strerror(errno)); + exit(1); + } + break; + case __NR_read: + case __NR_write: + if((fds[0] = open("./file", O_RDWR)) < 0) { + fprintf(stderr, "write, open failed: %s\n", strerror(errno)); + exit(1); + } + break; + default: + break; + } + + for (j = 0; j < NLOOP; j++) { + switch (syscalls[i].number) { + case __NR_gettid: + if((rc = syscall(syscalls[i].number)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_futex: + futex_flag = 1; + if((rc = syscall(__NR_futex, &futex_flag, FUTEX_WAKE, 1, NULL, NULL, 0)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_brk: + if((rc = brk(brk_cur)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_mmap: + if((mems[j] = mmap(0, SZCHUNK, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) == (void*)-1) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_munmap: + if((rc = munmap(mems[j], SZCHUNK)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_mprotect: + if((rc = mprotect(mems[0], SZCHUNK, PROT_READ)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_mremap: + if((memremaps[j] = mremap(mems[j], SZCHUNK, 8192, MREMAP_MAYMOVE)) == (void*)-1) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_getuid: + if((uid = syscall(syscalls[i].number)) < 0) { + fprintf(stderr, "%s failed: uid=%d,%s\n", syscalls[i].name, uid, strerror(errno)); + } + break; + case __NR_open: + if((fds[j] = open("./file", O_RDONLY)) < 0) { + fprintf(stderr, "%s ./file failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_ioctl: + if((rc = syscall(syscalls[i].number, fds[0], 0, 0)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_read: + if((rc = read(fds[0], buf + j * SZCHUNK, SZCHUNK)) < 0) { + fprintf(stderr, "%s failed: %s\n", syscalls[i].name, strerror(errno)); + } + break; + case __NR_write: + if((rc = write(fds[0], buf + j * SZCHUNK, SZCHUNK)) < 0) { + fprintf(stderr, "%s failed: rc=%d,%s\n", syscalls[i].name, rc, strerror(errno)); + } + break; + } + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + fprintf(stderr, "%s %ld nsec\n", syscalls[i].name, nsec / NLOOP); + + switch (syscalls[i].number) { + case __NR_mmap: + for (j = 0; j < NLOOP; j++) { + if((rc = munmap(mems[j], SZCHUNK)) < 0) { + fprintf(stderr, "munmap failed: %s\n", strerror(errno)); + } + } + break; + case __NR_mprotect: + if((rc = munmap(mems[0], SZCHUNK)) < 0) { + fprintf(stderr, "munmap failed: %s\n", strerror(errno)); + } + break; + case __NR_mremap: + for (j = 0; j < NLOOP; j++) { + if((rc = munmap(memremaps[j], SZCHUNK)) < 0) { + fprintf(stderr, "munmap failed: %s\n", strerror(errno)); + } + } + break; + case __NR_open: + for (j = 0; j < NLOOP; j++) { + if((rc = close(fds[j])) < 0) { + fprintf(stderr, "close failed: %s\n", strerror(errno)); + } + } + break; + case __NR_ioctl: + case __NR_read: + case __NR_write: + if((rc = close(fds[0])) < 0) { + fprintf(stderr, "close failed: %s\n", strerror(errno)); + } + break; + default: + break; + } + } + + pthread_mutex_lock(&mutex); + while (!sem) { + pthread_cond_wait(&cond, &mutex); + } + sem = 0; + pthread_mutex_unlock(&mutex); + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + char *uti_str; + int disable_syscall_intercept = 0; + int opt; + + while ((opt = getopt(argc, argv, "+I:")) != -1) { + switch (opt) { + case 'I': + disable_syscall_intercept = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (disable_syscall_intercept == 0) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT11002 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT11002 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT11002 INFO: uti disabled\n", rc); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if (rc) { + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stdout, "CT11003 pthread_create OK\n"); + + while (!futex_flag) { + rc = syscall(__NR_futex, &futex_flag, FUTEX_WAIT, 0, NULL, NULL, 0); + if (rc == -1) { + fprintf(stderr, "CT11101 FUTEX_WAIT ERROR: %s\n", strerror(errno)); + } + } + + pthread_mutex_lock(&mutex); + sem = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + pthread_join(thr, NULL); + + fprintf(stdout, "CT10005 END\n"); + exit(0); +} diff --git a/test/uti/CT11.pl b/test/uti/CT11.pl new file mode 100755 index 00000000..8e64f98e --- /dev/null +++ b/test/uti/CT11.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl + +while(<>) { +# print $_; + @row = split(/\s+/, $_); +# print $row[0]."\n"; + $nsec{$row[0]} += $row[1]; + $count{$row[0]}++; + if ($bitmap{$row[0]} == "") { + push @names, ($row[0]); + } + $bitmap{$row[0]} = 1; +} + +foreach $name (@names) { + print $name . ',' . $nsec{$name} / $count{$name} . "\n"; +} diff --git a/test/uti/CT11.sh b/test/uti/CT11.sh new file mode 100755 index 00000000..80a48625 --- /dev/null +++ b/test/uti/CT11.sh @@ -0,0 +1,110 @@ +#!/usr/bin/bash + +MYHOME=$HOME + +MCK="${MYHOME}/project/os/install" + +stop=0 +reset=0 +go=0 +measure=0 + +mck=0 +disable_syscall_intercept=0 +nloops=1 + +while getopts srgmI:l:M OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reset=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + I) disable_syscall_intercept=$OPTARG + ;; + l) nloops=$OPTARG + ;; + M) measure=1 + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" + if [ ${disable_syscall_intercept} -eq 0 ]; then + mcexecopt="--enable-uti" + else + mcexecopt= + fi +else + MCEXEC= + mcexecopt= +fi + +if [ ${stop} -eq 1 ]; then +# sudo mount /work + + sudo ${MCK}/sbin/mcstop+release.sh +fi + +if [ ${reset} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + #sudo mount /work + : + fi + + if hostname | grep ofp &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + elif hostname | grep koala &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + else + sudo ${MCK}/sbin/mcreboot.sh -c 1,2 -m 512M + fi +fi + +function init_mod() { + if grep hello /proc/devices > /dev/null; then + sudo rm -f /dev/hello + sudo rmmod "driver/hello.ko" + fi + + (cd driver; make) + + if ! grep hello /proc/devices > /dev/null; then + sudo insmod "driver/hello.ko" + major=`grep hello /proc/devices | cut -d' ' -f 1` + sudo mknod /dev/hello c $major 0 + sudo chmod og+rw /dev/hello + fi +} + +if [ ${measure} -eq 1 ]; then + init_mod + + rm -f ./CT11 + make ./CT11 + + > ./log + for i in {1..10}; do (${MCEXEC} $mcexecopt ./CT11 -I $disable_syscall_intercept 1>/dev/null 2>> ./log); done + perl CT11.pl < ./log +fi + +if [ ${go} -eq 1 ]; then + init_mod + + rm -f ./CT11 + make ./CT11 + + for i in `seq 1 ${nloops}`; do + ${MCEXEC} $mcexecopt ./CT11 -I $disable_syscall_intercept + echo =====; + echo $i; + echo =====; i=$((i+1)); + done +fi diff --git a/test/uti/CT12.c b/test/uti/CT12.c new file mode 100644 index 00000000..b630b902 --- /dev/null +++ b/test/uti/CT12.c @@ -0,0 +1,118 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> + +int passed = 0, sem = 0; +pthread_t thr; + +unsigned long mem; /* delay functions issue ld/st instructions on this address */ +double nspw; /* nsec per work */ + +/* Timer related macros */ +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define N_INIT 10000000 + +static inline void fixed_size_work(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void delay_loop(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(ptr); + } +} + +void delay_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + delay_loop(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void delay_nsec(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + delay_loop(delay_nsec / nspw, mem); +} + +void *util_thread(void *arg) { + int rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT12100 running on Linux CPU OK\n"); + else { + fprintf(stderr, "CT12100 running on Linux CPU NG (%d)\n", rc); + } + + passed = 1; + + rc = syscall(__NR_futex, &sem, FUTEX_WAIT, 0, NULL, NULL, 0); + if (rc != 0) { + fprintf(stderr, "CT12101 FUTEX_WAIT NG (%s)\n", strerror(errno)); + } else { + fprintf(stderr, "CT12101 FUTEX_WAIT OK\n"); + } + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + fprintf(stderr, "CT12001 futex START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT12002 pthread_create OK\n"); + + retry: + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(100000); + + rc = syscall(__NR_futex, &sem, FUTEX_WAKE, 1, NULL, NULL, 0); + if (rc != 1) { + fprintf(stderr, "CT12003 FUTEX_WAKE NG (%d,%s)\n", rc, strerror(errno)); + } else { + fprintf(stderr, "CT12003 FUTEX_WAKE OK\n"); + } + + pthread_join(thr, NULL); + fprintf(stderr, "CT12004 pthread_join OK\n"); + + fprintf(stderr, "CT12005 END\n"); + exit(0); +} diff --git a/test/uti/CT13.c b/test/uti/CT13.c new file mode 100644 index 00000000..1f6105c1 --- /dev/null +++ b/test/uti/CT13.c @@ -0,0 +1,74 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> + +int passed = 0, sem = 0; +pthread_t thr; + +void *util_thread(void *arg) { + int rc; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT13100 running on Linux CPU OK\n"); + else { + fprintf(stderr, "CT13100 running on Linux CPU NG (%d)\n", rc); + } + + retry: + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(100000); /* debug messages via serial takes 0.05 sec */ + + rc = syscall(__NR_futex, &sem, FUTEX_WAKE, 1, NULL, NULL, 0); + if (rc != 1) { + fprintf(stderr, "CT13101 FUTEX_WAKE NG (%d,%s)\n", rc, strerror(errno)); + } else { + fprintf(stderr, "CT13101 FUTEX_WAKE OK\n"); + } + + return NULL; +} + +int main(int argc, char **argv) +{ + int rc; + + fprintf(stderr, "CT13001 futex START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT13002 pthread_create OK\n"); + + passed = 1; + + rc = syscall(__NR_futex, &sem, FUTEX_WAIT, 0, NULL, NULL, 0); + if (rc != 0) { + fprintf(stderr, "CT13003 FUTEX_WAIT NG (%s)\n", strerror(errno)); + } else { + fprintf(stderr, "CT13003 FUTEX_WAIT OK\n"); + } + + pthread_join(thr, NULL); + fprintf(stderr, "CT13004 pthread_join OK\n"); + + fprintf(stderr, "CT13005 END\n"); + exit(0); +} diff --git a/test/uti/CT14.c b/test/uti/CT14.c new file mode 100644 index 00000000..279613fe --- /dev/null +++ b/test/uti/CT14.c @@ -0,0 +1,121 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +pthread_mutex_t mutex; +int owned; +pthread_t thr; + +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void * +util_thread(void *arg) +{ + int rc; + unsigned long mem; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT14100 running on Linux OK\n"); + else { + fprintf(stderr, "CT14100 running on Linux NG (%d)\n", rc); + } + errno = 0; + + fwq(500 * 1000 * 1000UL, &mem); /* Sending debug messages through serial takes 0.05 sec */ + + pthread_mutex_lock(&mutex); + if (owned) { + fprintf(stderr, "CT14101 lock second OK\n"); + } else { + fprintf(stderr, "CT14101 lock second NG\n"); + } + owned = 1; + pthread_mutex_unlock(&mutex); + + return NULL; +} + +int main(int argc, char **argv) { + int rc; + unsigned long mem; + + pthread_mutex_init(&mutex, NULL); + fwq_init(&mem); + + fprintf(stderr, "CT14001 futex START\n"); + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "CT14002 util_indicate_clone NG (rc=%d, errno=%d)\n", rc, errno); + fflush(stderr); + } else { + fprintf(stderr, "CT14002 util_indicate_clone OK\n"); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT14003 pthread_create OK\n"); + + pthread_mutex_lock(&mutex); + if (!owned) { + fprintf(stderr, "CT14004 lock first OK\n"); + } else { + fprintf(stderr, "CT14004 lock first NG\n"); + } + owned = 1; + fwq(2000 * 1000 * 1000UL, &mem); /* Need 2 sec to make child sleep */ + pthread_mutex_unlock(&mutex); + + pthread_join(thr, NULL); + fprintf(stderr, "CT14005 pthread_join OK\n"); + + fprintf(stderr, "CT14006 END\n"); + exit(0); +} diff --git a/test/uti/CT15.c b/test/uti/CT15.c new file mode 100644 index 00000000..3c6306b0 --- /dev/null +++ b/test/uti/CT15.c @@ -0,0 +1,121 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +pthread_mutex_t mutex; +int owned; +pthread_t thr; + +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void * +util_thread(void *arg) +{ + int rc; + unsigned long mem; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT14100 running on Linux OK\n"); + else { + fprintf(stderr, "CT14100 running on Linux NG (%d)\n", rc); + } + errno = 0; + + pthread_mutex_lock(&mutex); + if (!owned) { + fprintf(stderr, "CT14101 lock first OK\n"); + } else { + fprintf(stderr, "CT14101 lock first NG\n"); + } + owned = 1; + fwq(2000 * 1000 * 1000UL, &mem); /* Need 2 sec to make parent sleep */ + pthread_mutex_unlock(&mutex); + + return NULL; +} + +int main(int argc, char **argv) { + int rc; + unsigned long mem; + + pthread_mutex_init(&mutex, NULL); + fwq_init(&mem); + + fprintf(stderr, "CT14001 futex START\n"); + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "CT14002 util_indicate_clone NG (rc=%d, errno=%d)\n", rc, errno); + fflush(stderr); + } else { + fprintf(stderr, "CT14002 util_indicate_clone OK\n"); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT14003 pthread_create OK\n"); + + fwq(500 * 1000 * 1000UL, &mem); /* Sending debug messages through serial takes 0.05 sec */ + + pthread_mutex_lock(&mutex); + if (owned) { + fprintf(stderr, "CT14004 lock second OK\n"); + } else { + fprintf(stderr, "CT14004 lock second NG\n"); + } + owned = 1; + pthread_mutex_unlock(&mutex); + + pthread_join(thr, NULL); + fprintf(stderr, "CT14005 pthread_join OK\n"); + + fprintf(stderr, "CT14006 END\n"); + exit(0); +} diff --git a/test/uti/CT16.c b/test/uti/CT16.c new file mode 100644 index 00000000..ec29ccda --- /dev/null +++ b/test/uti/CT16.c @@ -0,0 +1,83 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +pthread_mutex_t mutex; +pthread_cond_t cond; +int passed, flag; +pthread_t thr; + +void * +util_thread(void *arg) +{ + int rc; + unsigned long mem; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT16101 running on Linux OK\n"); + else { + fprintf(stderr, "CT16101 running on Linux NG (%d)\n", rc); + } + errno = 0; + + passed = 1; + pthread_mutex_lock(&mutex); + while(!flag) { + pthread_cond_wait(&cond, &mutex); + } + flag = 0; + pthread_mutex_unlock(&mutex); + + fprintf(stderr, "CT16102 return from pthread_cond_wait() OK\n"); + + return NULL; +} + +int main(int argc, char **argv) { + int rc; + unsigned long mem; + + pthread_mutex_init(&mutex, NULL); + pthread_cond_init(&cond, NULL); + + fprintf(stderr, "CT16001 futex START\n"); + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "CT16002 util_indicate_clone NG (rc=%d, errno=%d)\n", rc, errno); + fflush(stderr); + } else { + fprintf(stderr, "CT16002 util_indicate_clone OK\n"); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT16003 pthread_create OK\n"); + + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(100 * 1000UL); /* Send debug message through serial takes 0.05 sec */ + + pthread_mutex_lock(&mutex); + flag = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + pthread_join(thr, NULL); + fprintf(stderr, "CT16004 pthread_join OK\n"); + + fprintf(stderr, "CT16005 END\n"); + exit(0); +} diff --git a/test/uti/CT17.c b/test/uti/CT17.c new file mode 100644 index 00000000..795a002a --- /dev/null +++ b/test/uti/CT17.c @@ -0,0 +1,81 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +pthread_mutex_t mutex; +pthread_cond_t cond; +int passed, flag; +pthread_t thr; + +void *util_thread(void *arg) { + int rc; + unsigned long mem; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT17100 running on Linux OK\n"); + else { + fprintf(stderr, "CT17100 running on Linux NG (%d)\n", rc); + } + + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(100 * 1000UL); /* Send debug message through serial takes 0.05 sec */ + + pthread_mutex_lock(&mutex); + flag = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + return NULL; +} + +int main(int argc, char **argv) { + int rc; + unsigned long mem; + + pthread_mutex_init(&mutex, NULL); + pthread_cond_init(&cond, NULL); + + fprintf(stderr, "CT17001 futex START\n"); + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "CT17002 util_indicate_clone NG (rc=%d, errno=%d)\n", rc, errno); + fflush(stderr); + } else { + fprintf(stderr, "CT17002 util_indicate_clone OK\n"); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if(rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT17003 pthread_create OK\n"); + + passed = 1; + pthread_mutex_lock(&mutex); + fprintf(stderr, "CT17004 lock on %p OK\n", &mutex); + while(!flag) { + pthread_cond_wait(&cond, &mutex); + fprintf(stderr, "CT17005 wake on %p OK\n", &cond); + } + flag = 0; + + pthread_mutex_unlock(&mutex); + + pthread_join(thr, NULL); + fprintf(stderr, "CT17006 pthread_join OK\n"); + + fprintf(stderr, "CT17007 END\n"); + exit(0); +} diff --git a/test/uti/CT18.c b/test/uti/CT18.c new file mode 100644 index 00000000..9ef11b78 --- /dev/null +++ b/test/uti/CT18.c @@ -0,0 +1,111 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> + +int passed, sem, flag; +pthread_t thr; +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) + +void *util_thread(void *arg) { + int rc; + struct timespec start, timeout, end; + unsigned long elapsed; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT18101 running on Linux CPU OK\n"); + else { + fprintf(stderr, "CT18101 running on Linux CPU NG (%d)\n", rc); + } + + passed = 1; + + rc = clock_gettime(CLOCK_REALTIME, &start); + if (rc != 0) { + fprintf(stderr, "clock_gettime failed\n"); + return NULL; + } + fprintf(stderr, "start=%ld.%09ld\n", start.tv_sec, start.tv_nsec); + + timeout.tv_sec = start.tv_sec; + timeout.tv_nsec = start.tv_nsec + 800UL * 1000 * 1000; + if (timeout.tv_nsec > 1000UL * 1000 * 1000) { + timeout.tv_sec += 1; + timeout.tv_nsec -= 1000UL * 1000* 1000; + } + rc = syscall(__NR_futex, &sem, FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME, 0, &timeout, NULL, 0x12345678); + fprintf(stderr, "op=%x\n", FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME); + + rc = clock_gettime(CLOCK_REALTIME, &end); + if (rc != 0) { + fprintf(stderr, "clock_gettime failed\n"); + return NULL; + } + fprintf(stderr, "end=%ld.%09ld\n", end.tv_sec, end.tv_nsec); + + if (rc != 0) { + fprintf(stderr, "CT18102 FUTEX_WAIT NG (%s)\n", strerror(errno)); + } else { + fprintf(stderr, "CT18102 FUTEX_WAIT OK\n"); + } + + elapsed = TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec); + if (flag == 0 || elapsed < 800UL * 1000 * 1000 + 80UL * 1000 * 1000) { + fprintf(stderr, "CT18103 timeout OK\n"); + } else { + fprintf(stderr, "CT18103 timeout NG (%lx)\n", elapsed); + } + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + fprintf(stderr, "CT18001 futex START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT18002 pthread_create OK\n"); + + retry: + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(800 * 1000UL * 10); + + flag = 1; + rc = syscall(__NR_futex, &sem, FUTEX_WAKE_BITSET, 1, NULL, NULL, 0x12345678); + if (rc != 0) { + fprintf(stderr, "CT18003 FUTEX_WAKE missing the waiter NG (%d,%s)\n", rc, strerror(errno)); + } else { + fprintf(stderr, "CT18003 FUTEX_WAKE missing the waiter OK\n"); + } + + pthread_join(thr, NULL); + fprintf(stderr, "CT18004 pthread_join OK\n"); + + fprintf(stderr, "CT18005 END\n"); + exit(0); +} diff --git a/test/uti/CT19.c b/test/uti/CT19.c new file mode 100644 index 00000000..0d90168d --- /dev/null +++ b/test/uti/CT19.c @@ -0,0 +1,112 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> + +int passed, sem, flag; +pthread_t thr; +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) + +void *util_thread(void *arg) { + int rc; + struct timespec start, timeout, end; + unsigned long elapsed; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT19100 running on Linux CPU OK\n"); + else { + fprintf(stderr, "CT19100 running on Linux CPU NG (%d)\n", rc); + } + + passed = 1; + + rc = clock_gettime(CLOCK_MONOTONIC, &start); + if (rc != 0) { + fprintf(stderr, "clock_gettime failed\n"); + return NULL; + } + fprintf(stderr, "start=%ld.%09ld\n", start.tv_sec, start.tv_nsec); + + timeout.tv_sec = start.tv_sec; + timeout.tv_nsec = start.tv_nsec + 800UL * 1000 * 1000; + if (timeout.tv_nsec > 1000UL * 1000 * 1000) { + timeout.tv_sec += 1; + timeout.tv_nsec -= 1000UL * 1000* 1000; + } + /* timeout - clock_gettime(CLOCK_MONOTONIC) */ + rc = syscall(__NR_futex, &sem, FUTEX_WAIT_BITSET, 0, &timeout, NULL, 0x12345678); + fprintf(stderr, "op=%x\n", FUTEX_WAIT_BITSET); + + rc = clock_gettime(CLOCK_MONOTONIC, &end); + if (rc != 0) { + fprintf(stderr, "clock_gettime failed\n"); + return NULL; + } + fprintf(stderr, "end=%ld.%09ld\n", end.tv_sec, end.tv_nsec); + + if (rc != 0) { + fprintf(stderr, "CT19101 FUTEX_WAIT NG (%s)\n", strerror(errno)); + } else { + fprintf(stderr, "CT19101 FUTEX_WAIT OK\n"); + } + + elapsed = TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec); + if (flag == 0 || elapsed < 800UL * 1000 * 1000 + 80UL * 1000 * 1000) { + fprintf(stderr, "CT19102 timeout OK\n"); + } else { + fprintf(stderr, "CT19101 timeout NG\n"); + } + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + fprintf(stderr, "CT19001 futex START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT19002 pthread_create OK\n"); + + retry: + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(2000 * 1000UL); + + flag = 1; + rc = syscall(__NR_futex, &sem, FUTEX_WAKE_BITSET, 1, NULL, NULL, 0x12345678); + if (rc != 0) { + fprintf(stderr, "CT19003 FUTEX_WAKE missing the waiter NG (%d,%s)\n", rc, strerror(errno)); + } else { + fprintf(stderr, "CT19003 FUTEX_WAKE missing the waiter OK\n"); + } + + pthread_join(thr, NULL); + fprintf(stderr, "CT19004 pthread_join OK\n"); + + fprintf(stderr, "CT19005 END\n"); + exit(0); +} diff --git a/test/uti/CT20.c b/test/uti/CT20.c new file mode 100644 index 00000000..42bdc973 --- /dev/null +++ b/test/uti/CT20.c @@ -0,0 +1,106 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> + +int passed, sem, flag; +pthread_t thr; +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) + +void *util_thread(void *arg) { + int rc; + struct timespec start, timeout, end; + unsigned long elapsed; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT20100 running on Linux CPU OK\n"); + else { + fprintf(stderr, "CT20100 running on Linux CPU NG (%d)\n", rc); + } + + passed = 1; + + rc = clock_gettime(CLOCK_REALTIME, &start); + if (rc != 0) { + fprintf(stderr, "clock_gettime failed\n"); + return NULL; + } + fprintf(stderr, "start=%ld.%09ld\n", start.tv_sec, start.tv_nsec); + + timeout.tv_sec = 0; + timeout.tv_nsec = 800ULL * 1000 * 1000; + rc = syscall(__NR_futex, &sem, FUTEX_WAIT, 0, &timeout, NULL, 0); + + rc = clock_gettime(CLOCK_REALTIME, &end); + if (rc != 0) { + fprintf(stderr, "clock_gettime failed\n"); + return NULL; + } + fprintf(stderr, "end=%ld.%09ld\n", end.tv_sec, end.tv_nsec); + + if (rc != 0) { + fprintf(stderr, "CT20101 FUTEX_WAIT NG (%s)\n", strerror(errno)); + } else { + fprintf(stderr, "CT20101 FUTEX_WAIT OK\n"); + } + + elapsed = TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec); + if (flag == 0 || elapsed < 800UL * 1000 * 1000 + 80UL * 1000 * 1000) { + fprintf(stderr, "CT20102 timeout OK\n"); + } else { + fprintf(stderr, "CT20101 timeout NG\n"); + } + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + + fprintf(stderr, "CT20001 futex START\n"); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } + + rc = pthread_create(&thr, NULL, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT20002 pthread_create OK\n"); + + retry: + while (!passed) { + asm volatile("pause" ::: "memory"); + } + usleep(2000 * 1000UL); + + flag = 1; + rc = syscall(__NR_futex, &sem, FUTEX_WAKE, 1, NULL, NULL, 0); + if (rc != 0) { + fprintf(stderr, "CT20003 FUTEX_WAKE missing the waiter NG (%d,%s)\n", rc, strerror(errno)); + } else { + fprintf(stderr, "CT20003 FUTEX_WAKE missing the waiter OK\n"); + } + + pthread_join(thr, NULL); + fprintf(stderr, "CT20004 pthread_join OK\n"); + + fprintf(stderr, "CT20005 END\n"); + exit(0); +} diff --git a/test/uti/CT21.c b/test/uti/CT21.c new file mode 100644 index 00000000..8c9552d4 --- /dev/null +++ b/test/uti/CT21.c @@ -0,0 +1,210 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define NTHR 1 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_DELAY (93000) /* 93 usec */ +#define INIT_DELAY (2000) /* 2 usec, CPU sends CTS packet */ +#define NIC_DELAY (3000) /* 3 usec, NIC reads by RDMA-read */ +#define POLL_DELAY (200) /* .2 usec, CPU fetces event queue entry from DRAM */ +#define RESP_DELAY (2000) /* 2 usec, CPU sends DONE packet and updates MPI_Request */ +#define NSPIN 1 +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + + +pthread_mutex_t ep_lock; /* Ownership of channel instance */ + +struct thr_arg { + int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + unsigned long mem; /* Per-thread storage */ +}; + +struct thr_arg thr_args[NTHR]; + +unsigned long mem; /* Per-thread storage */ +volatile int nevents; +volatile int terminate; +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void mydelay(long delay_nsec, long *mem) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec) > delay_nsec) { + break; + } + FIXED_SIZE_WORK(mem); + } +} + +void *progress_fn(void *_arg) { + struct thr_arg *arg = (struct thr_arg *)_arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + return NULL; + } + + pthread_mutex_lock(&arg->bar_lock); + while(arg->bar_count == 0) { + pthread_cond_wait(&arg->bar_cond, &arg->bar_lock); + } + pthread_mutex_unlock(&arg->bar_lock); + + /* Start progress */ + pthread_mutex_lock(&ep_lock); + while(1) { + if (terminate) { + break; + } + + /* Event found */ + if (nevents > 0) { + nevents = 0; + } + + pthread_mutex_unlock(&ep_lock); + fwq(random() % 1000000000, &mem); /* 0 - 1 sec */ + pthread_mutex_lock(&ep_lock); + } + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int i; + struct timespec start, end; + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(&mem); + pthread_mutex_init(&ep_lock, NULL); + + for(i = 0; i < NTHR; i++) { + thr_args[i].bar_count = 0; + pthread_cond_init(&thr_args[i].bar_cond, NULL); + pthread_mutex_init(&thr_args[i].bar_lock, NULL); + } + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stdout); + } + for (i = 0; i < NTHR; i++) { + rc = pthread_create(&thr_args[i].pthread, NULL, progress_fn, &thr_args[i]); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + } + for (i = 0; i < NTHR; i++) { + pthread_mutex_lock(&thr_args[i].bar_lock); + thr_args[i].bar_count++; + pthread_cond_signal(&thr_args[i].bar_cond); + pthread_mutex_unlock(&thr_args[i].bar_lock); + } + + fprintf(stdout, "CT09004 pthread_create OK\n"); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < 10; i++) { + pthread_mutex_lock(&ep_lock); + nevents++; + fwq(random() % 1000000000, &mem); /* 0 - 1 sec */ + pthread_mutex_unlock(&ep_lock); + while (nevents > 0) { + FIXED_SIZE_WORK(&mem); + } + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + + terminate = 1; + + for (i = 0; i < NTHR; i++) { + pthread_join(thr_args[i].pthread, NULL); + } + fprintf(stdout, "CT09005 takes %ld nsec INFO\n", TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + fprintf(stdout, "CT09006 END\n"); + + + exit(0); +} diff --git a/test/uti/CT22.c b/test/uti/CT22.c new file mode 100644 index 00000000..627b1beb --- /dev/null +++ b/test/uti/CT22.c @@ -0,0 +1,210 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define NTHR 1 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_DELAY (93000) /* 93 usec */ +#define INIT_DELAY (2000) /* 2 usec, CPU sends CTS packet */ +#define NIC_DELAY (3000) /* 3 usec, NIC reads by RDMA-read */ +#define POLL_DELAY (200) /* .2 usec, CPU fetces event queue entry from DRAM */ +#define RESP_DELAY (2000) /* 2 usec, CPU sends DONE packet and updates MPI_Request */ +#define NSPIN 1 +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + + +pthread_mutex_t ep_lock; /* Ownership of channel instance */ + +struct thr_arg { + int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + unsigned long mem; /* Per-thread storage */ +}; + +struct thr_arg thr_args[NTHR]; + +unsigned long mem; /* Per-thread storage */ +volatile int nevents; +volatile int terminate; +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void mydelay(long delay_nsec, long *mem) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec) > delay_nsec) { + break; + } + FIXED_SIZE_WORK(mem); + } +} + +void *progress_fn(void *_arg) { + struct thr_arg *arg = (struct thr_arg *)_arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + return NULL; + } + + pthread_mutex_lock(&arg->bar_lock); + while(arg->bar_count == 0) { + pthread_cond_wait(&arg->bar_cond, &arg->bar_lock); + } + pthread_mutex_unlock(&arg->bar_lock); + + for (i = 0; i < 100; i++) { + pthread_mutex_lock(&ep_lock); + nevents++; + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_unlock(&ep_lock); + while (nevents > 0) { + FIXED_SIZE_WORK(&mem); + } + } + terminate = 1; + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int i; + struct timespec start, end; + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(&mem); + pthread_mutex_init(&ep_lock, NULL); + + for(i = 0; i < NTHR; i++) { + thr_args[i].bar_count = 0; + pthread_cond_init(&thr_args[i].bar_cond, NULL); + pthread_mutex_init(&thr_args[i].bar_lock, NULL); + } + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stdout); + } + for (i = 0; i < NTHR; i++) { + rc = pthread_create(&thr_args[i].pthread, NULL, progress_fn, &thr_args[i]); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + } + for (i = 0; i < NTHR; i++) { + pthread_mutex_lock(&thr_args[i].bar_lock); + thr_args[i].bar_count++; + pthread_cond_signal(&thr_args[i].bar_cond); + pthread_mutex_unlock(&thr_args[i].bar_lock); + } + + fprintf(stdout, "CT09004 pthread_create OK\n"); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + /* Start progress */ + pthread_mutex_lock(&ep_lock); + while(1) { + if (terminate) { + break; + } + + /* Event found */ + if (nevents > 0) { + nevents = 0; + } + + pthread_mutex_unlock(&ep_lock); + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_lock(&ep_lock); + } + pthread_mutex_unlock(&ep_lock); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + + for (i = 0; i < NTHR; i++) { + pthread_join(thr_args[i].pthread, NULL); + } + fprintf(stdout, "CT09005 takes %ld nsec INFO\n", TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + fprintf(stdout, "CT09006 END\n"); + + + exit(0); +} diff --git a/test/uti/CT23.c b/test/uti/CT23.c new file mode 100644 index 00000000..69a19991 --- /dev/null +++ b/test/uti/CT23.c @@ -0,0 +1,212 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define NTHR 1 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_DELAY (93000) /* 93 usec */ +#define INIT_DELAY (2000) /* 2 usec, CPU sends CTS packet */ +#define NIC_DELAY (3000) /* 3 usec, NIC reads by RDMA-read */ +#define POLL_DELAY (200) /* .2 usec, CPU fetces event queue entry from DRAM */ +#define RESP_DELAY (2000) /* 2 usec, CPU sends DONE packet and updates MPI_Request */ +#define NSPIN 1 +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + + +pthread_cond_t ep_cond; +pthread_mutex_t ep_lock; /* Ownership of channel instance */ + +struct thr_arg { + int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + unsigned long mem; /* Per-thread storage */ +}; + +struct thr_arg thr_args[NTHR]; + +unsigned long mem; /* Per-thread storage */ +volatile int nevents; +volatile int terminate; +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void mydelay(long delay_nsec, long *mem) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec) > delay_nsec) { + break; + } + FIXED_SIZE_WORK(mem); + } +} + +void *progress_fn(void *_arg) { + struct thr_arg *arg = (struct thr_arg *)_arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + return NULL; + } + + pthread_mutex_lock(&arg->bar_lock); + while(arg->bar_count == 0) { + pthread_cond_wait(&arg->bar_cond, &arg->bar_lock); + } + pthread_mutex_unlock(&arg->bar_lock); + + /* Start progress */ + pthread_mutex_lock(&ep_lock); + while(1) { + if (terminate) { + break; + } + while(nevents == 0) { + pthread_cond_wait(&ep_cond, &ep_lock); + } + nevents = 0; + pthread_mutex_unlock(&ep_lock); + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_lock(&ep_lock); + } + pthread_mutex_unlock(&ep_lock); + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int i; + struct timespec start, end; + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(&mem); + pthread_cond_init(&ep_cond, NULL); + pthread_mutex_init(&ep_lock, NULL); + + for(i = 0; i < NTHR; i++) { + thr_args[i].bar_count = 0; + pthread_cond_init(&thr_args[i].bar_cond, NULL); + pthread_mutex_init(&thr_args[i].bar_lock, NULL); + } + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stdout); + } + for (i = 0; i < NTHR; i++) { + rc = pthread_create(&thr_args[i].pthread, NULL, progress_fn, &thr_args[i]); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + } + for (i = 0; i < NTHR; i++) { + pthread_mutex_lock(&thr_args[i].bar_lock); + thr_args[i].bar_count++; + pthread_cond_signal(&thr_args[i].bar_cond); + pthread_mutex_unlock(&thr_args[i].bar_lock); + } + + fprintf(stdout, "CT09004 pthread_create OK\n"); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < 100; i++) { + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_lock(&ep_lock); + nevents++; + pthread_cond_signal(&ep_cond); + pthread_mutex_unlock(&ep_lock); + while (nevents > 0) { + FIXED_SIZE_WORK(&mem); + } + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + + terminate = 1; + + for (i = 0; i < NTHR; i++) { + pthread_join(thr_args[i].pthread, NULL); + } + fprintf(stdout, "CT09005 takes %ld nsec INFO\n", TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + fprintf(stdout, "CT09006 END\n"); + + + exit(0); +} diff --git a/test/uti/CT24.c b/test/uti/CT24.c new file mode 100644 index 00000000..fcde9496 --- /dev/null +++ b/test/uti/CT24.c @@ -0,0 +1,210 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define NTHR 1 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_DELAY (93000) /* 93 usec */ +#define INIT_DELAY (2000) /* 2 usec, CPU sends CTS packet */ +#define NIC_DELAY (3000) /* 3 usec, NIC reads by RDMA-read */ +#define POLL_DELAY (200) /* .2 usec, CPU fetces event queue entry from DRAM */ +#define RESP_DELAY (2000) /* 2 usec, CPU sends DONE packet and updates MPI_Request */ +#define NSPIN 1 +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + + +pthread_cond_t ep_cond; +pthread_mutex_t ep_lock; /* Ownership of channel instance */ + +struct thr_arg { + int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + unsigned long mem; /* Per-thread storage */ +}; + +struct thr_arg thr_args[NTHR]; + +unsigned long mem; /* Per-thread storage */ +volatile int nevents; +volatile int terminate; +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void mydelay(long delay_nsec, long *mem) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec) > delay_nsec) { + break; + } + FIXED_SIZE_WORK(mem); + } +} + +void *progress_fn(void *_arg) { + struct thr_arg *arg = (struct thr_arg *)_arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + return NULL; + } + + pthread_mutex_lock(&arg->bar_lock); + while(arg->bar_count == 0) { + pthread_cond_wait(&arg->bar_cond, &arg->bar_lock); + } + pthread_mutex_unlock(&arg->bar_lock); + + for (i = 0; i < 100; i++) { + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_lock(&ep_lock); + nevents++; + pthread_cond_signal(&ep_cond); + pthread_mutex_unlock(&ep_lock); + while (nevents > 0) { + FIXED_SIZE_WORK(&mem); + } + } + terminate = 1; + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int i; + struct timespec start, end; + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(&mem); + pthread_mutex_init(&ep_lock, NULL); + + for(i = 0; i < NTHR; i++) { + thr_args[i].bar_count = 0; + pthread_cond_init(&thr_args[i].bar_cond, NULL); + pthread_mutex_init(&thr_args[i].bar_lock, NULL); + } + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stdout); + } + for (i = 0; i < NTHR; i++) { + rc = pthread_create(&thr_args[i].pthread, NULL, progress_fn, &thr_args[i]); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + } + for (i = 0; i < NTHR; i++) { + pthread_mutex_lock(&thr_args[i].bar_lock); + thr_args[i].bar_count++; + pthread_cond_signal(&thr_args[i].bar_cond); + pthread_mutex_unlock(&thr_args[i].bar_lock); + } + + fprintf(stdout, "CT09004 pthread_create OK\n"); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + /* Start progress */ + pthread_mutex_lock(&ep_lock); + while(1) { + if (terminate) { + break; + } + while(nevents == 0) { + pthread_cond_wait(&ep_cond, &ep_lock); + } + nevents = 0; + pthread_mutex_unlock(&ep_lock); + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_lock(&ep_lock); + } + pthread_mutex_unlock(&ep_lock); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + + for (i = 0; i < NTHR; i++) { + pthread_join(thr_args[i].pthread, NULL); + } + fprintf(stdout, "CT09005 takes %ld nsec INFO\n", TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + fprintf(stdout, "CT09006 END\n"); + + + exit(0); +} diff --git a/test/uti/CT25.c b/test/uti/CT25.c new file mode 100644 index 00000000..1aa5dd4d --- /dev/null +++ b/test/uti/CT25.c @@ -0,0 +1,163 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> +#include <semaphore.h> + +pthread_t thr; + +unsigned long mem; /* delay functions issue ld/st instructions on this address */ +double nspw; /* nsec per work */ + +sem_t sem_kick, sem_report; +int nentry, szentry; +char **sendv, **recvv; + + + +/* Timer related macros */ +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define N_INIT 10000000 + +static inline void fixed_size_work(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void delay_loop(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(ptr); + } +} + +void delay_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + delay_loop(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void delay_nsec(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + delay_loop(delay_nsec / nspw, mem); +} + +void *util_thread(void *arg) { + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT25101 running on Linux CPU OK\n"); + else { + fprintf(stderr, "CT25101 running on Linux CPU NG (%d)\n", rc); + } + + sem_wait(&sem_kick); + + /* Cause remote page fault */ + for (i = 0; i < nentry; i++) { + memset(recvv[i], 0, szentry); + } + + sem_post(&sem_report); + + return NULL; +} + +pid_t gettid(void) +{ + return syscall(SYS_gettid); +} + +int +main(int argc, char **argv) +{ + int ret = 0; + int rc; + int i; + pthread_attr_t attr; + + if(argc == 3) { + szentry = (1ULL << atoi(argv[1])); + nentry = atoi(argv[2]); + } + + if (argc != 3 || szentry == 0) { + fprintf(stderr, "usage: CT25 <log-size of one buffer entry> <# of entries>\n"); + ret = 1; + goto fn_fail; + } + + sem_init(&sem_kick, 0, 0); + sem_init(&sem_report, 0, 0); + + fprintf(stderr, "CT25001 START\n"); + fprintf(stderr, "CT25001 INFO (pid=%d,tid=%d)\n", getpid(), gettid()); + + sendv = malloc(sizeof(char *) * nentry); + if(!sendv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < nentry; i++) { + sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + memset(sendv[i], 0xaa, szentry); + } + + recvv = malloc(sizeof(char *) * nentry); + if(!recvv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < nentry; i++) { + recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + memset(recvv[i], 0, szentry); + } + + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "CT25002 util_indicate_clone INFO (rc=%d, errno=%d)\n", rc, errno); + } else { + fprintf(stderr, "CT25002 util_indicate_clone OK\n", rc, errno); + } + + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + rc = pthread_create(&thr, &attr, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT25002 pthread_create OK\n"); + + sem_post(&sem_kick); + sem_wait(&sem_report); + + pthread_join(thr, NULL); + + fprintf(stderr, "CT25003 END\n"); + ret = 0; + + fn_exit: + exit(ret); + + fn_fail: + goto fn_exit; +} diff --git a/test/uti/CT26.c b/test/uti/CT26.c new file mode 100644 index 00000000..4ca3a8b8 --- /dev/null +++ b/test/uti/CT26.c @@ -0,0 +1,139 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> +#include <signal.h> + +int passed = 0; +pthread_t thr; + +unsigned long mem; /* delay functions issue ld/st instructions on this address */ +double nspw; /* nsec per work */ + +/* Timer related macros */ +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define N_INIT 10000000 + +static inline void fixed_size_work(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void delay_loop(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(ptr); + } +} + +void delay_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + delay_loop(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void delay_nsec(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + delay_loop(delay_nsec / nspw, mem); +} + +void halt(int sig) { + while(1) { } +} + +void *util_thread(void *arg) { + int rc; + fprintf(stderr, "CT12101 enter OK\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stderr, "CT12102 running on Linux CPU OK (tid=%d)\n", syscall(__NR_gettid)); + else { + fprintf(stderr, "CT12102 running on Linux CPU NG (tid=%d,rc=%d)\n", syscall(__NR_gettid), rc); + } + + passed = 1; + + rc = syscall(888); + if (rc != -1) { + fprintf(stderr, "CT12103 syscall(888) OK (%x)\n", rc); + } else { + fprintf(stderr, "CT12103 syscall(888) NG (%x)\n", rc); + } + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + pthread_attr_t attr; + struct sigaction act; + + fprintf(stderr, "CT12001 futex START (tid=%d)\n", syscall(__NR_gettid)); +#if 0 + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "util_indicate_clone rc=%d, errno=%d\n", rc, errno); + fflush(stderr); + } +#endif + sigaction(SIGINT, NULL, &act); + act.sa_handler = halt; + act.sa_flags &= ~(SA_RESTART); + sigaction(SIGINT, &act, NULL); + + rc = pthread_attr_init(&attr); + if (rc){ + fprintf(stderr, "pthread_attr_init: %d\n", rc); + exit(1); + } +#if 0 + rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if (rc){ + fprintf(stderr, "pthread_attr_setdetachstate: %d\n", rc); + exit(1); + } +#endif + rc = pthread_create(&thr, &attr, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT12002 pthread_create OK\n"); + +#if 1 + while (!passed) { + asm volatile("pause" ::: "memory"); + } +#endif + usleep(200000); + +#if 1 + pthread_join(thr, NULL); + fprintf(stderr, "CT12004 pthread_join OK\n"); +#endif + //fprintf(stderr, "CT12005 END\n"); + exit(0); +} diff --git a/test/uti/CT27.c b/test/uti/CT27.c new file mode 100644 index 00000000..65df96df --- /dev/null +++ b/test/uti/CT27.c @@ -0,0 +1,497 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <fcntl.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define NPROC 1 +#define MAX_NOPS 10 +int NOPS=1;/* RDMA:1, accumulate:10 */ +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_CPU (100000) /* 100,000 nsec, CPU time for calculation */ +#define I2R_OCC ( 200) /* 200 nsec, occupation time for for sending AM packet */ +#define I2R_NET (1000) /* 1,000 nsec, Network time for packet to arrive at responder */ +int R2I_OCC= (10200/*400*/); /* RDMA:10,200 nsec, accumulate:400ns, occupation time for perforing accumulate or RDMA-RD and sending ACK packet . Note that 10GB/s means 100KB/10,000 ns */ +#define R2I_NET (1000) /* 1000 nsec, Network time for packet to arrive at initiator */ +#define POLL_CPU ( 200) /* 200 nsec, CPU time for checking DRAM event queue */ +#define REQ_UPDATE_CPU ( 200) /* 200 nsec, CPU time for updates MPI_Request */ +#define NSPIN 1 +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +struct thr_arg { + int rank; + volatile int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + + pthread_mutex_t ep_lock; /* mutex for endpoint manipulation */ + volatile long ini_ev[MAX_NOPS]; /* events on the responder */ + volatile long res_ev[MAX_NOPS]; /* events on the initiator */ + volatile int terminate; + long ini_busy; /* Initiator is busy sending AM packet or RTS packet etc. */ + long res_busy; /* Responder is busy doing accumulate or RDMA-RD etc. */ +}; + +struct per_proc { + int rank; + struct thr_arg thr_arg; + long nsec; + +}; + +struct proc_glb { + struct per_proc per_procs[NPROC]; + volatile int bar_count; + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; +}; + +struct proc_glb *proc_glb; + +unsigned long mem; /* Per-thread storage */ +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init() { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + printf("%s: delay_nsec<0\n", __FUNCTION__); + } + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + bulk_fsw(delay_nsec / nspw); +} +int progress_responder(struct thr_arg *thr_arg) { + int ret = 0; + int j; + struct timespec now_ts; + long now_long; + clock_gettime(CLOCK_REALTIME, &now_ts); + now_long = TS2NS(now_ts.tv_sec, now_ts.tv_nsec); + + pthread_mutex_lock(&thr_arg->ep_lock); /* This lock is for consistency */ + for (j = 0; j < NOPS; j++) { + if (thr_arg->res_busy <= now_long && thr_arg->res_ev[j] && thr_arg->res_ev[j] <= now_long) { + //if(thr_arg->rank == 0) { printf("res_ev=%ld,busy=%ld,now=%ld\n", thr_arg->res_ev[j] % 1000000000UL, thr_arg->res_busy % 1000000000UL, now_long % 1000000000UL); } + thr_arg->ini_ev[j] = now_long + R2I_OCC + R2I_NET; + thr_arg->res_ev[j] = 0; + thr_arg->res_busy = now_long + R2I_OCC; /* responder is busy for AM or RDMA-RD etc. */ + ret = 1; + } + } + pthread_mutex_unlock(&thr_arg->ep_lock); + return ret; +} + +int progress_initiator(struct thr_arg* thr_arg) { + int ret = 0; + int j; + struct timespec now_ts; + long now_long; + clock_gettime(CLOCK_REALTIME, &now_ts); + now_long = TS2NS(now_ts.tv_sec, now_ts.tv_nsec); + + pthread_mutex_lock(&thr_arg->ep_lock); + for (j = 0; j < NOPS; j++) { + //if(thr_arg->rank == 0) { printf("ini_ev=%ld,now=%ld\n", thr_arg->ini_ev[j], now_long); } + if (thr_arg->ini_busy <= now_long && thr_arg->ini_ev[j] && thr_arg->ini_ev[j] <= now_long) { + fwq(POLL_CPU); /* Account for cache miss */ + fwq(REQ_UPDATE_CPU); + now_long += POLL_CPU + REQ_UPDATE_CPU; + thr_arg->ini_ev[j] = 0; /* Event is consumed */ + thr_arg->ini_busy = now_long; + ret = 1; + } + } + pthread_mutex_unlock(&thr_arg->ep_lock); + return ret; +} + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int spin_count = 0; + int i, j; + struct timespec now_ts; + long now_long; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + } + + printf("progress,enter,rank=%d\n", thr_arg->rank); + + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count++; + if (thr_arg->bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_arg->bar_cond))) { + printf("[%d] pthread_cond_broadcast failed,rc=%d\n", thr_arg->rank, rc); + } + } + while (thr_arg->bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_arg->bar_cond, &thr_arg->bar_lock))) { + printf("[%d] pthread_cond_wait failed,rc=%d\n", thr_arg->rank, rc); + } + } + pthread_mutex_unlock(&thr_arg->bar_lock); + + printf("[%d] progress,after barrier\n", thr_arg->rank); + //#define NO_ASYNC +#ifdef NO_ASYNC + return NULL; +#endif + /* Start progress */ + while(1) { + if (thr_arg->terminate) { + break; + } + + if (progress_responder(thr_arg)) { + //if (thr_arg->rank == 0) { printf("progress_fn, responder progressed\n"); } + } + + if (progress_initiator(thr_arg)) { + //if (thr_arg->rank == 0) { printf("progress_fn, initiator progressed\n"); } + } + + spin_count++; + if (spin_count >= NSPIN) { + spin_count = 0; + sched_yield(); + } + } + printf("progress,exit,rank=%d\n", thr_arg->rank); + return NULL; +} + +void parent_fn(struct per_proc *per_proc) { + int i, j; + int rc; + char* uti_str; + int uti_val; + struct timespec start, end; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + struct timespec now_ts; + long now_long; + + printf("[%d] parent_fn,enter,proc_glb=%p,bar_count=%d\n", per_proc->rank, proc_glb, proc_glb->bar_count); + + pthread_mutex_lock(&proc_glb->bar_lock); + proc_glb->bar_count++; + if (proc_glb->bar_count == NPROC) { + if ((rc = pthread_cond_broadcast(&proc_glb->bar_cond))) { + printf("[%d] pthread_cond_broadcast failed,rc=%d\n", per_proc->rank, rc); + } + } + while (proc_glb->bar_count != NPROC) { + if ((rc = pthread_cond_wait(&proc_glb->bar_cond, &proc_glb->bar_lock))) { + printf("[%d] pthread_cond_wait failed,rc=%d\n", per_proc->rank, rc); + } + } + pthread_mutex_unlock(&proc_glb->bar_lock); + + //printf("[%d] parent,after barrier\n", per_proc->rank); + + pthread_mutexattr_init(&mutexattr); + //pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&per_proc->thr_arg.ep_lock, &mutexattr); + + per_proc->thr_arg.bar_count = 0; + + pthread_condattr_init(&condattr); + //pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(&per_proc->thr_arg.bar_cond, &condattr); + + pthread_mutexattr_init(&mutexattr); + //pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&per_proc->thr_arg.bar_lock, &mutexattr); + + uti_str = getenv("DISABLE_UTI"); + uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n", rc); + } + + per_proc->thr_arg.rank = per_proc->rank; + rc = pthread_create(&per_proc->thr_arg.pthread, NULL, progress_fn, &per_proc->thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + pthread_mutex_lock(&per_proc->thr_arg.bar_lock); + per_proc->thr_arg.bar_count++; + if (per_proc->thr_arg.bar_count == 2) { + if ((rc = pthread_cond_broadcast(&per_proc->thr_arg.bar_cond))) { + printf("[%d] pthread_cond_broadcast failed,rc=%d\n", per_proc->rank, rc); + } + } + while (per_proc->thr_arg.bar_count != 2) { + if ((rc = pthread_cond_wait(&per_proc->thr_arg.bar_cond, &per_proc->thr_arg.bar_lock))) { + printf("[%d] pthread_cond_wait failed,rc=%d\n", per_proc->rank, rc); + } + } + pthread_mutex_unlock(&per_proc->thr_arg.bar_lock); + + printf("[%d] parent,after barrier\n", per_proc->rank); + //fprintf(stdout, "CT09004 pthread_create OK\n"); + + //#define TIMER_KIND CLOCK_THREAD_CPUTIME_ID +#define TIMER_KIND CLOCK_REALTIME + clock_gettime(TIMER_KIND, &start); + for (i = 0; i < 10000; i++) { /* It takes 1 sec */ + + /* Send request-to-send packet */ + clock_gettime(CLOCK_REALTIME, &now_ts); + now_long = TS2NS(now_ts.tv_sec, now_ts.tv_nsec); + + for (j = 0; j < NOPS; j++) { + pthread_mutex_lock(&per_proc->thr_arg.ep_lock); /* Lock is taken per MPI_Accumulate() */ + fwq(I2R_OCC); + now_long += I2R_OCC; + per_proc->thr_arg.res_ev[j] = now_long + I2R_NET; + per_proc->thr_arg.ini_busy = now_long; + //printf("res_ev=%ld,ini_busy=%ld,now=%ld\n", per_proc->thr_arg.res_ev[j] % 1000000000UL, per_proc->thr_arg.ini_busy % 1000000000UL, now_long % 1000000000UL); + pthread_mutex_unlock(&per_proc->thr_arg.ep_lock); + } + + /* Start calculation */ + fwq(CALC_CPU); + + /* Progress responder and initiator */ + int more_reap_needed; + while (1) { + if (progress_responder(&per_proc->thr_arg)) { + //printf("parent_fn, responder progressed\n"); + } + + if (progress_initiator(&per_proc->thr_arg)) { + //printf("parent_fn, initiator progressed\n"); + } + + more_reap_needed = 0; + for (j = 0; j < NOPS; j++) { + if (per_proc->thr_arg.res_ev[j] || per_proc->thr_arg.ini_ev[j]) { + more_reap_needed = 1; + break; + } + } + if (!more_reap_needed) { + break; + } + } + } + clock_gettime(TIMER_KIND, &end); + + per_proc->thr_arg.terminate = 1; + pthread_join(per_proc->thr_arg.pthread, NULL); + + per_proc->nsec = TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int i; + char *uti_str; + int uti_val; + int st; + pid_t pid; + long max; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + int fd; + key_t key = ftok(argv[0], 0); + int shmid; + int opt; + + while ((opt = getopt_long(argc, argv, "+ar", options, NULL)) != -1) { + switch (opt) { + case 'a': /* accumulate */ + NOPS = 10; /* ten accumulates */ + R2I_OCC = 400; /* 200 ns to accumulate, 200 ns to send ACK */ + break; + case 'r': + NOPS = 6; /* 3D stencil, RDMA */ + R2I_OCC = 10200; /* 10000 ns to RDMA-RD, 200 ns to send DONE */ + break; + default: /* '?' */ + printf("usage: [-a] [-r]"); + exit(1); + } + } + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(); + +#define SHMPOSIX 1 +#define SHMSYSV 2 +#define SHMANON 3 +#define SHM_METHOD SHMPOSIX +#if SHM_METHOD==SHMPOSIX + printf("posix1\n"); + if((fd = shm_open("/CT27", O_RDWR | O_CREAT, 0644)) == -1) { + fprintf(stdout, "shm_open failed\n"); + } + if(ftruncate(fd, sizeof(struct proc_glb))) { + fprintf(stdout, "ftruncate failed\n"); + } + proc_glb = mmap(0, sizeof(struct proc_glb), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (proc_glb == (void*)-1) { + fprintf(stdout, "mmap failed\n"); + exit(1); + } +#elif SHM_METHOD==SHMSYSV + printf("sysv1\n"); + if ((shmid = shmget(key, sizeof(struct proc_glb), IPC_CREAT | 0660)) == -1) { + fprintf(stdout, "shmget failed: %s\n", strerror(errno)); + } + proc_glb = shmat(shmid, NULL, 0); + if (proc_glb == (void*)-1) { + fprintf(stdout, "shmat failed\n"); + exit(1); + } +#elif SHM_METHOD==SHMANON + printf("anon1\n"); + proc_glb = mmap(0, sizeof(struct proc_glb), PROT_READ | PROT_WRITE, MAP_SHARED/* | MAP_ANONYMOUS*/, /*-1*/fd, 0); + if (proc_glb == (void*)-1) { + fprintf(stdout, "mmap failed\n"); + exit(1); + } +#endif + + memset(proc_glb, 0, sizeof(struct proc_glb)); + + proc_glb->bar_count = 0; + + pthread_condattr_init(&condattr); + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(&proc_glb->bar_cond, &condattr); + + pthread_mutexattr_init(&mutexattr); + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&proc_glb->bar_lock, &mutexattr); + + for (i = 0; i < NPROC; i++) { + proc_glb->per_procs[i].rank = i; + printf("[0] i=%d,rank=%d\n", i, proc_glb->per_procs[i].rank); + } + for (i = 1; i < NPROC; i++) { + pid = fork(); + if(pid < 0) { + fprintf(stdout, "fork failed: %s\n", strerror(errno)); + exit(1); + } else if (pid == 0) { +#if SHM_METHOD==SHMSYSV + printf("sysv2\n"); + proc_glb = shmat(shmid, NULL, 0); +#endif + printf("[%d] rank=%d\n", i, proc_glb->per_procs[i].rank); + parent_fn(&proc_glb->per_procs[i]); + exit(0); + } + } + parent_fn(&proc_glb->per_procs[0]); + + while ((pid = waitpid(-1, &st, __WALL)) > 0); + + max = -1; + for (i = 0; i < NPROC; i++) { + if (max < proc_glb->per_procs[i].nsec) { + max = proc_glb->per_procs[i].nsec; + } + } + + fprintf(stderr, "max %ld nsec\n", max); + fprintf(stdout, "CT09006 END\n"); +} + diff --git a/test/uti/CT27.sh b/test/uti/CT27.sh new file mode 100755 index 00000000..7561b523 --- /dev/null +++ b/test/uti/CT27.sh @@ -0,0 +1,64 @@ +#!/usr/bin/bash +MYHOME="/work/gg10/e29005" +MCK="${MYHOME}/project/os/install" +MCEXEC= +MCEXECOPT="--enable-uti" +export DISABLE_UTI=0 + +stop=0 +reset=0 +go=0 +acc=0 +nodes="c[8195]" + +while getopts srgamd OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reset=1 + ;; + g) go=1 + ;; + a) acc=1 # accumulate, otherwise RDMA + ;; + m) + MCEXEC="${MCK}/bin/mcexec" + ;; + d) export DISABLE_UTI=1 + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +if [ ${acc} -eq 1 ]; then + exeopt="-a" +else + exeopt="-r" +fi + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo mount /work + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo ${MCK}/sbin/mcstop+release.sh +fi + +if [ ${reset} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo mount /work + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,20-35,36-51,52-67 -r 2-5:0+6-9:1+10-13:68+14-17:69+20-23:136+24-27:137+28-31:204+32-35:205+36-39:18+40-43:19+44-47:86+48-51:87+52-55:154+56-59:155+60-63:222+64-67:223 -m 32G@0,12G@1 +fi + +if [ ${go} -eq 1 ]; then + make + + > ./log + for i in {1..10}; do (${MCEXEC} ${MCEXECOPT} taskset -c 0-7 ./CT27 $exeopt 1>/dev/null 2>> ./log); done + perl CT11.pl < ./log + #${MCEXEC} ${MCEXECOPT} taskset -c 0-7 ./CT27 $exeopt +fi diff --git a/test/uti/CT28.c b/test/uti/CT28.c new file mode 100644 index 00000000..d579fb52 --- /dev/null +++ b/test/uti/CT28.c @@ -0,0 +1,441 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <fcntl.h> +#include <signal.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stdout, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define NPROC 8 +#define NINC 10000 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define NSPIN 1 + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +struct thr_arg { + int rank; + volatile int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + + pthread_mutex_t ep_lock; /* mutex for endpoint manipulation */ + volatile long count; /* events on the responder */ + volatile int terminate; +}; + +struct per_proc { + int rank; + struct thr_arg thr_arg; + long nsec; +}; + +struct proc_glb { + struct per_proc per_procs[NPROC]; + volatile int bar_count; + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; +}; + +struct proc_glb *proc_glb; + +unsigned long mem; /* Per-thread storage */ +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +void fwq_init() { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + printf("%s: delay_nsec<0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} + +void init_bar(struct thr_arg* thr_arg) { + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count= 0; + pthread_mutex_unlock(&thr_arg->bar_lock); +} + +void bar(struct thr_arg* thr_arg) { + int rc; + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count++; + if (thr_arg->bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_arg->bar_cond))) { + printf("[%d] pthread_cond_broadcast failed,rc=%d\n", thr_arg->rank, rc); + } + } + while (thr_arg->bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_arg->bar_cond, &thr_arg->bar_lock))) { + printf("[%d] pthread_cond_wait failed,rc=%d\n", thr_arg->rank, rc); + } + } + pthread_mutex_unlock(&thr_arg->bar_lock); +} + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int spin_count = 0; + int i, j; + struct timespec now_ts; + long now_long; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG\n", rc); + } + + printf("[%d] progress,enter,", thr_arg->rank); + print_cpu_last_executed_on(); + + bar(thr_arg); + + printf("[%d] progress,after barrier\n", thr_arg->rank); + + for (i = 0; i < NINC; i++) { + pthread_mutex_lock(&thr_arg->ep_lock); + thr_arg->count++; + pthread_mutex_unlock(&thr_arg->ep_lock); + sched_yield(); + } + + bar(thr_arg); + printf("progress,exit,rank=%d\n", thr_arg->rank); + + return NULL; +} + +#define TIMER_KIND CLOCK_THREAD_CPUTIME_ID +//#define TIMER_KIND CLOCK_REALTIME + +void parent_fn(struct per_proc *per_proc) { + int i, j; + int rc; + char* uti_str; + int uti_val; + struct timespec start, end; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + struct timespec now_ts; + long now_long; + + printf("[%d] parent_fn,enter,", per_proc->rank); + print_cpu_last_executed_on(); + + pthread_mutex_lock(&proc_glb->bar_lock); + proc_glb->bar_count++; + if (proc_glb->bar_count == NPROC) { + if ((rc = pthread_cond_broadcast(&proc_glb->bar_cond))) { + printf("[%d] pthread_cond_broadcast failed,rc=%d\n", per_proc->rank, rc); + } + } + while (proc_glb->bar_count != NPROC) { + if ((rc = pthread_cond_wait(&proc_glb->bar_cond, &proc_glb->bar_lock))) { + printf("[%d] pthread_cond_wait failed,rc=%d\n", per_proc->rank, rc); + } + } + pthread_mutex_unlock(&proc_glb->bar_lock); + + + pthread_mutexattr_init(&mutexattr); + pthread_mutex_init(&per_proc->thr_arg.ep_lock, &mutexattr); + + per_proc->thr_arg.bar_count = 0; + + pthread_condattr_init(&condattr); + pthread_cond_init(&per_proc->thr_arg.bar_cond, &condattr); + + pthread_mutexattr_init(&mutexattr); + pthread_mutex_init(&per_proc->thr_arg.bar_lock, &mutexattr); + + uti_str = getenv("DISABLE_UTI"); + uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n", rc); + } + + per_proc->thr_arg.rank = per_proc->rank; + rc = pthread_create(&per_proc->thr_arg.pthread, NULL, progress_fn, &per_proc->thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + init_bar(&per_proc->thr_arg); + bar(&per_proc->thr_arg); + + printf("[%d] parent,after barrier\n", per_proc->rank); + + clock_gettime(TIMER_KIND, &start); + for (i = 0; i < NINC; i++) { + pthread_mutex_lock(&per_proc->thr_arg.ep_lock); /* Lock is taken per MPI_Accumulate() */ + per_proc->thr_arg.count++; + pthread_mutex_unlock(&per_proc->thr_arg.ep_lock); + } + init_bar(&per_proc->thr_arg); + bar(&per_proc->thr_arg); + clock_gettime(TIMER_KIND, &end); + + pthread_join(per_proc->thr_arg.pthread, NULL); + + per_proc->nsec = TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int i; + char *uti_str; + int uti_val; + int st; + pid_t pid; + long max; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + int fd; + key_t key = ftok(argv[0], 0); + int shmid; + int opt; + + while ((opt = getopt_long(argc, argv, "+", options, NULL)) != -1) { + switch (opt) { + default: /* '?' */ + printf("unknown option: %c\n", optopt); + exit(1); + } + } + + fprintf(stdout, "CT09001 MPI progress thread skelton START\n"); + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09002 main running on Linux INFO\n"); + else { + fprintf(stdout, "CT09002 main running on McKernel INFO\n"); + } + + fwq_init(); + +#define SHMPOSIX 1 +#define SHMSYSV 2 +#define SHMANON 3 +#define SHM_METHOD SHMPOSIX +#if SHM_METHOD==SHMPOSIX + printf("posix1\n"); + if((fd = shm_open("/CT27", O_RDWR | O_CREAT, 0644)) == -1) { + fprintf(stdout, "shm_open failed\n"); + } + if(ftruncate(fd, sizeof(struct proc_glb))) { + fprintf(stdout, "ftruncate failed\n"); + } + proc_glb = mmap(0, sizeof(struct proc_glb), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (proc_glb == (void*)-1) { + fprintf(stdout, "mmap failed\n"); + exit(1); + } +#elif SHM_METHOD==SHMSYSV + printf("sysv1\n"); + if ((shmid = shmget(key, sizeof(struct proc_glb), IPC_CREAT | 0660)) == -1) { + fprintf(stdout, "shmget failed: %s\n", strerror(errno)); + } + proc_glb = shmat(shmid, NULL, 0); + if (proc_glb == (void*)-1) { + fprintf(stdout, "shmat failed\n"); + exit(1); + } +#elif SHM_METHOD==SHMANON + printf("anon1\n"); + proc_glb = mmap(0, sizeof(struct proc_glb), PROT_READ | PROT_WRITE, MAP_SHARED/* | MAP_ANONYMOUS*/, /*-1*/fd, 0); + if (proc_glb == (void*)-1) { + fprintf(stdout, "mmap failed\n"); + exit(1); + } +#endif + + memset(proc_glb, 0, sizeof(struct proc_glb)); + + proc_glb->bar_count = 0; + + pthread_condattr_init(&condattr); + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(&proc_glb->bar_cond, &condattr); + + pthread_mutexattr_init(&mutexattr); + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&proc_glb->bar_lock, &mutexattr); + + for (i = 0; i < NPROC; i++) { + proc_glb->per_procs[i].rank = i; + printf("[0] i=%d,rank=%d\n", i, proc_glb->per_procs[i].rank); + } + for (i = 1; i < NPROC; i++) { + pid = fork(); + if(pid < 0) { + fprintf(stdout, "fork failed: %s\n", strerror(errno)); + exit(1); + } else if (pid == 0) { +#if SHM_METHOD==SHMSYSV + printf("sysv2\n"); + proc_glb = shmat(shmid, NULL, 0); +#endif + printf("[%d] rank=%d\n", i, proc_glb->per_procs[i].rank); + parent_fn(&proc_glb->per_procs[i]); + exit(0); + } + } + parent_fn(&proc_glb->per_procs[0]); + + while ((pid = waitpid(-1, &st, __WALL)) > 0); + + max = -1; + for (i = 0; i < NPROC; i++) { + if (max < proc_glb->per_procs[i].nsec) { + max = proc_glb->per_procs[i].nsec; + } + } + + fprintf(stderr, "max %ld nsec\n", max); + fprintf(stdout, "CT09006 END\n"); +} + diff --git a/test/uti/CT28.sh b/test/uti/CT28.sh new file mode 100755 index 00000000..7ccef343 --- /dev/null +++ b/test/uti/CT28.sh @@ -0,0 +1,76 @@ +#!/usr/bin/bash -x +MYHOME="/work/gg10/e29005" +MCK="${MYHOME}/project/os/install" +MCEXECOPT="--enable-uti" +export DISABLE_UTI=0 + +stop=0 +reset=0 +go=0 +mck=0; +loop=0 +nodes="c[8195]" +NPROC=8 + +while getopts srglamd OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reset=1 + ;; + g) go=1 + ;; + l) loop=1 + ;; + m) + mck=1 + ;; + d) export DISABLE_UTI=1 + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" + cpus="0-7" + NUMACTL= +else + MCEXEC= + cpus="2-9" + NUMACTL="numactl -C $cpus" +fi + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo mount /work + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo ${MCK}/sbin/mcstop+release.sh +fi + +if [ ${reset} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo mount /work + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w ${nodes} \ + sudo ${MCK}/sbin/mcreboot.sh `${HOME}/project/src/tools/cpus.pl $NPROC` -m 32G@0,12G@1 + #sudo ${MCK}/sbin/mcreboot.sh -c 2-17,20-35,36-51,52-67 -r 2-5:0+6-9:1+10-13:68+14-17:69+20-23:136+24-27:137+28-31:204+32-35:205+36-39:18+40-43:19+44-47:86+48-51:87+52-55:154+56-59:155+60-63:222+64-67:223 -m 32G@0,12G@1 +fi + +if [ ${go} -eq 1 ]; then + cd $MYHOME/project/os/mckernel/test/uti + rm -f ./CT28 + make -DNPROC=$NPROC + + if [ ${loop} -eq 1 ]; then + > ./log + for i in {1..10}; do (${MCEXEC} ${MCEXECOPT} $NUMACTL ./CT28 1> ./log1 2>> ./log); done + perl CT11.pl < ./log + else + ${MCEXEC} ${MCEXECOPT} $NUMACTL ./CT28 + fi +fi diff --git a/test/uti/CT29.c b/test/uti/CT29.c new file mode 100644 index 00000000..d5f15ca7 --- /dev/null +++ b/test/uti/CT29.c @@ -0,0 +1,117 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <linux/futex.h> +#include <sys/time.h> +#include <string.h> +#include <signal.h> + +int passed = 0; +pthread_t thr; + +unsigned long mem; /* delay functions issue ld/st instructions on this address */ +double nspw; /* nsec per work */ + +/* Timer related macros */ +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +#define N_INIT 1000000 + +void fwq_init() { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + printf("%s: delay_nsec<0\n", __FUNCTION__); + } + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + bulk_fsw(delay_nsec / nspw); +} + +void *util_thread(void *arg) { + int rc; + + fwq(1000*1000); + + return NULL; +} + +int +main(int argc, char **argv) +{ + int rc; + pthread_attr_t attr; + struct sigaction act; + + fwq_init(); + + fprintf(stderr, "CT29001 INFO start (tid=%d)\n", syscall(__NR_gettid)); + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stderr, "CT29002 INFO uti not supported (rc=%d, errno=%d)\n", rc, errno); + fflush(stderr); + } + + rc = pthread_attr_init(&attr); + if (rc){ + fprintf(stderr, "pthread_attr_init: %d\n", rc); + exit(1); + } +#if 1 + rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if (rc){ + fprintf(stderr, "pthread_attr_setdetachstate: %d\n", rc); + exit(1); + } +#endif + rc = pthread_create(&thr, &attr, util_thread, NULL); + if (rc){ + fprintf(stderr, "pthread_create: %d\n", rc); + exit(1); + } + fprintf(stderr, "CT29003 pthread_create OK\n"); + + fwq(100*1000*1000); + +#if 0 + pthread_join(thr, NULL); + fprintf(stderr, "CT29004 pthread_join OK\n"); +#endif + exit(0); +} diff --git a/test/uti/CT30.c b/test/uti/CT30.c new file mode 100644 index 00000000..34a97ef0 --- /dev/null +++ b/test/uti/CT30.c @@ -0,0 +1,177 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> +#include "util.h" + +#define NTHR 1 +#define TS2NS(sec, nsec) ((unsigned long)(sec) * 1000000000ULL + (unsigned long)(nsec)) +#define CALC_DELAY (93000) /* 93 usec */ +#define INIT_DELAY (2000) /* 2 usec, CPU sends CTS packet */ +#define NIC_DELAY (3000) /* 3 usec, NIC reads by RDMA-read */ +#define POLL_DELAY (200) /* .2 usec, CPU fetces event queue entry from DRAM */ +#define RESP_DELAY (2000) /* 2 usec, CPU sends DONE packet and updates MPI_Request */ +#define NSPIN 1 +static inline void FIXED_SIZE_WORK(unsigned long *ptr) { + asm volatile("movq %0, %%rax\n\t" + "addq $1, %%rax\n\t" \ + "movq %%rax, %0\n\t" \ + : "+rm" (*ptr) \ + : \ + : "rax", "cc", "memory"); \ +} + +static inline void BULK_FSW(unsigned long n, unsigned long *ptr) { + int j; + for (j = 0; j < (n); j++) { + FIXED_SIZE_WORK(ptr); + } +} + + +pthread_mutex_t ep_lock; /* Ownership of channel instance */ +pthread_barrier_t bar; + +struct thr_arg { + pthread_t pthread; + unsigned long mem; /* Per-thread storage */ +}; + +struct thr_arg thr_args[NTHR]; + +unsigned long mem; /* Per-thread storage */ +volatile int nevents; +volatile int terminate; +int wps = 1; /* work per sec */ +double nspw; /* nsec per work */ + +#define N_INIT 10000000 + +void fwq_init(unsigned long *mem) { + struct timespec start, end; + unsigned long nsec; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + BULK_FSW(N_INIT, mem); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)); + nspw = nsec / (double)N_INIT; + printf("[INFO] nsec=%ld, nspw=%f\n", nsec, nspw); +} + +void fwq(unsigned long delay_nsec, unsigned long* mem) { + //printf("delay_nsec=%ld,count=%f\n", delay_nsec, delay_nsec / nspw); + BULK_FSW(delay_nsec / nspw, mem); +} + +void fwq_omp(unsigned long delay_nsec, unsigned long* mem) { +#pragma omp parallel + { + BULK_FSW(delay_nsec / nspw, mem); + } +} + +void mydelay(long delay_nsec, long *mem) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec) > delay_nsec) { + break; + } + FIXED_SIZE_WORK(mem); + } +} + +void *util_fn(void *_arg) { + struct thr_arg *arg = (struct thr_arg *)_arg; + int ret; + int i; + + ret = syscall(732); + OKNGNOJUMP(ret == -1, "util_fn running on Linux, tid=%d\n", syscall(SYS_gettid)); + + pthread_barrier_wait(&bar); + + /* Start progress */ + while (1) { + pthread_mutex_lock(&ep_lock); + if (terminate) { + pthread_mutex_unlock(&ep_lock); + break; + } + + if (nevents > 0) { + nevents--; + fwq(random() % 100000000, &mem); /* 0 - 0.1 sec */ + } + pthread_mutex_unlock(&ep_lock); + } + + fn_fail: + return NULL; +} + +int main(int argc, char **argv) { + int ret; + int i; + struct timespec start, end; + + ret = syscall(732); + OKNGNOJUMP(ret != -1, "Master is running on McKernel\n"); + + fwq_init(&mem); + pthread_mutex_init(&ep_lock, NULL); + + pthread_barrier_init(&bar, NULL, NTHR + 1); + + if ((ret = syscall(731, 1, NULL))) { + fprintf(stdout, "Error: util_indicate_clone: %s\n", strerror(errno)); + } + + for (i = 0; i < NTHR; i++) { + if ((ret = pthread_create(&thr_args[i].pthread, NULL, util_fn, &thr_args[i]))) { + fprintf(stdout, "Error: pthread_create: %s\n", strerror(errno)); + exit(1); + } + } + + pthread_barrier_wait(&bar); + +#pragma omp parallel for + for (i = 0; i < omp_get_num_threads(); i++) { + printf("[INFO] thread_num=%d,tid=%d\n", i, syscall(SYS_gettid)); + } + + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < 10; i++) { + pthread_mutex_lock(&ep_lock); + nevents++; + fwq_omp(random() % 100000000, &mem); /* 0 - 0.1 sec */ + pthread_mutex_unlock(&ep_lock); + + while (nevents > 0) { + FIXED_SIZE_WORK(&mem); + } + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + + terminate = 1; + + for (i = 0; i < NTHR; i++) { + pthread_join(thr_args[i].pthread, NULL); + } + + printf("[INFO] Time: %ld usec\n", (TS2NS(end.tv_sec, end.tv_nsec) - TS2NS(start.tv_sec, start.tv_nsec)) / 1000); + + ret = 0; + fn_fail: + return ret; +} diff --git a/test/uti/CT30.sh b/test/uti/CT30.sh new file mode 100755 index 00000000..fc0198fa --- /dev/null +++ b/test/uti/CT30.sh @@ -0,0 +1,92 @@ +#!/usr/bin/bash + +bn=`basename $0` +fn=`echo $bn | sed 's/.sh//'` + +stop=0 +reboot=0 +go=0 +mck=0 +NNODES=1 +NPROC=$((16 * NNODES)) +LASTNODE=8200 + +while getopts srgmN:P:L: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + N) NNODES=$OPTARG + ;; + P) NPROC=$OPTARG + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +MYHOME=/work/gg10/e29005 +ABS_SRCDIR=${MYHOME}/project/os/mckernel/test/uti +MCK=${MYHOME}/project/os/install + +NODES=`echo $(seq -s ",c" $(($LASTNODE + 1 - $NNODES)) $LASTNODE) | sed 's/^/c/'` +PPN=$((NPROC / NNODES)) +echo NPROC=$NPROC NNODES=$NNODES PPN=$PPN NODES=$NODES + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" + mcexecopt="--enable-uti" +else + MCEXEC= + mcexecopt= +fi + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo mount /work + + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + /sbin/pidof mcexec \| xargs -r kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo mount /work + + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + : + fi +fi + +if [ ${go} -eq 1 ]; then + cd $ABS_SRCDIR + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -u 16384; + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -s unlimited + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -c unlimited + + export KMP_STACKSIZE=64M + export OMP_NUM_THREADS=4 + + $MCEXEC $mcexecopt ./$fn +fi + diff --git a/test/uti/CT31.c b/test/uti/CT31.c new file mode 100644 index 00000000..e5f839de --- /dev/null +++ b/test/uti/CT31.c @@ -0,0 +1,158 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <signal.h> +#include <getopt.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "util.h" + +#define WAITER_CPU 0 +#define WAKER_CPU 1 + +pthread_mutex_t mutex; +pthread_cond_t cond; +pthread_barrier_t bar; +int flag; +pthread_t thr; +long t_cond_wait, t_fwq; +long nloop; +long blocktime = 10L * 1000 * 1000; + +void *util_fn(void *arg) +{ + int i; + int ret; + long start, end; + + print_cpu_last_executed_on("Utility thread"); + + ret = syscall(732); + OKNGNOJUMP(ret == -1, "Utility thread is running on Linux\n"); + + pthread_barrier_wait(&bar); + for (i = 0; i < nloop; i++) { + start = rdtsc_light(); + + fwq(blocktime); + + end = rdtsc_light(); + t_fwq += end - start; + + pthread_mutex_lock(&mutex); + flag = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + } + + fn_fail: + return NULL; +} + +static struct option options[] = { + /* end */ + { NULL, 0, NULL, 0, } +}; + +int main(int argc, char **argv) +{ + int i; + int ret; + long start, end; + cpu_set_t cpuset; + pthread_attr_t attr; + pthread_barrierattr_t bar_attr; + struct sched_param param = { .sched_priority = 99 }; + int opt; + + while ((opt = getopt_long(argc, argv, "+b:", options, NULL)) != -1) { + switch (opt) { + case 'b': + blocktime = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + nloop = (10 * 1000000000UL) / blocktime; + printf("[INFO] nloop=%ld,blocktime=%ld\n", nloop, blocktime); + + + CPU_ZERO(&cpuset); + CPU_SET(WAITER_CPU, &cpuset); + if ((ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset))) { + printf("Error: sched_setaffinity: %s\n", strerror(errno)); + goto fn_fail; + } + print_cpu_last_executed_on("Master thread"); + + fwq_init(); + + pthread_mutex_init(&mutex, NULL); + pthread_cond_init(&cond, NULL); + + pthread_barrierattr_init(&bar_attr); + pthread_barrier_init(&bar, &bar_attr, 2); + + ret = syscall(732); + OKNGNOJUMP(ret != -1, "Master thread is running on McKernel\n"); + + ret = syscall(731, 1, NULL); + OKNGNOJUMP(ret != -1, "util_indicate_clone\n"); + + if ((ret = pthread_attr_init(&attr))) { + printf("%s: Error: pthread_attr_init failed (%d)\n", __FUNCTION__, ret); + goto fn_fail; + } + + CPU_ZERO(&cpuset); + CPU_SET(WAKER_CPU, &cpuset); + + if ((ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset))) { + printf("%s: Error: pthread_attr_setaffinity_np failed (%d)\n", __FUNCTION__, ret); + goto fn_fail; + } + + if ((ret = pthread_create(&thr, &attr, util_fn, NULL))) { + fprintf(stderr, "Error: pthread_create failed (%d)\n", ret); + goto fn_fail; + } + + if ((ret = sched_setscheduler(0, SCHED_FIFO, ¶m))) { + fprintf(stderr, "Error: sched_setscheduler failed (%d)\n", ret); + goto fn_fail; + } + + syscall(701, 1 | 2); + pthread_barrier_wait(&bar); + for (i = 0; i < nloop; i++) { + start = rdtsc_light(); + + pthread_mutex_lock(&mutex); /* no futex */ + while(!flag) { + pthread_cond_wait(&cond, &mutex); /* 1st futex */ + } + flag = 0; + pthread_mutex_unlock(&mutex); /* 2nd futex */ + + end = rdtsc_light(); + t_cond_wait += end - start; + } + syscall(701, 4 | 8); + + pthread_join(thr, NULL); + printf("[INFO] waker: %ld cycles, waiter: %ld cycles, (waiter - waker) / nloop: %ld cycles\n", t_fwq, t_cond_wait, (t_cond_wait - t_fwq) / nloop); + + ret = 0; + fn_fail: + return ret; +} diff --git a/test/uti/CT31.sh b/test/uti/CT31.sh new file mode 100755 index 00000000..8a6a6e28 --- /dev/null +++ b/test/uti/CT31.sh @@ -0,0 +1,102 @@ +#!/usr/bin/bash + +bn=`basename $0` +fn=`echo $bn | sed 's/.sh//'` + +nloop=800 +stop=0 +reboot=0 +go=0 +mck=0 +NNODES=1 +NPROC=$((1 * NNODES)) +LASTNODE=8200 +use_hfi=0 + +while getopts srgmh:N:P:L: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + h) use_hfi=1 + ;; + N) NNODES=$OPTARG + ;; + P) NPROC=$OPTARG + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +MYHOME=/work/gg10/e29005 +ABS_SRCDIR=${MYHOME}/project/os/mckernel/test/uti +MCK=${MYHOME}/project/os/install + +NODES=`echo $(seq -s ",c" $(($LASTNODE + 1 - $NNODES)) $LASTNODE) | sed 's/^/c/'` +PPN=$((NPROC / NNODES)) +echo NPROC=$NPROC NNODES=$NNODES PPN=$PPN NODES=$NODES + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" + mcexecopt="--enable-uti" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi +else + MCEXEC= + mcexecopt= +fi + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo mount /work + + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + /sbin/pidof mcexec \| xargs -r kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo mount /work + + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + : + fi +fi + +if [ ${go} -eq 1 ]; then + cd $ABS_SRCDIR + make $fn + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -u 16384; + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -s unlimited + + for((count=0;count<nloop;count++)); do + sudo $MCEXEC $mcexecopt ./$fn + echo ===== + echo $count + echo ===== + done + +fi + diff --git a/test/uti/CT32.c b/test/uti/CT32.c new file mode 100644 index 00000000..432ad91e --- /dev/null +++ b/test/uti/CT32.c @@ -0,0 +1,191 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <linux/futex.h> +#include <getopt.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdint.h> +#include <uti.h> +#include "util.h" + +#define WAITER_CPU 0 +#define WAKER_CPU 1 + +int sem; +pthread_barrier_t bar; +int flag; +pthread_t thr; +long t_futex_wait, t_fwq; +long nloop; +long blocktime = 10L * 1000 * 1000; + +void *util_fn(void *arg) +{ + int i; + int ret; + long start, end; + int testid = 32101; + + print_cpu_last_executed_on("Utility thread"); + + ret = syscall(732); + OKNGNOJUMP(ret == -1, "Utility thread is running on Linux\n"); + + pthread_barrier_wait(&bar); + + for (i = 0; i < nloop; i++) { + start = rdtsc_light(); + + fwq(blocktime); + + end = rdtsc_light(); + t_fwq += end - start; + + if ((ret = syscall(__NR_futex, &sem, FUTEX_WAKE, 1, NULL, NULL, 0)) == -1) { + printf("Error: futex wake: %s\n", strerror(errno)); + } + + //pthread_barrier_wait(&bar); + + } + + ret = 0; + fn_fail: + return NULL; +} + +static struct option options[] = { + /* end */ + { NULL, 0, NULL, 0, } +}; + +int main(int argc, char **argv) +{ + int i; + int ret; + long start, end; + cpu_set_t cpuset; + pthread_attr_t attr; + pthread_barrierattr_t bar_attr; + struct sched_param param = { .sched_priority = 99 }; + int opt; + + while ((opt = getopt_long(argc, argv, "+b:", options, NULL)) != -1) { + switch (opt) { + case 'b': + blocktime = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + nloop = (10 * 1000000000UL) / blocktime; + printf("[INFO] nloop=%ld,blocktime=%ld\n", nloop, blocktime); + + + CPU_ZERO(&cpuset); + CPU_SET(WAITER_CPU, &cpuset); + if ((ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset))) { + printf("Error: sched_setaffinity: %s\n", strerror(errno)); + goto fn_fail; + } + print_cpu_last_executed_on("Master thread"); + + fwq_init(); + + pthread_barrierattr_init(&bar_attr); + pthread_barrier_init(&bar, &bar_attr, 2); + + if ((ret = pthread_attr_init(&attr))) { + printf("Error: pthread_attr_init: %s\n", strerror(errno)); + goto fn_fail; + } + +#if 0 + uti_attr_t uti_attr; + ret = uti_attr_init(&uti_attr); + if (ret) { + printf("%s: Error: uti_attr_init failed (%d)\n", __FUNCTION__, ret); + exit(1); + } + + /* Give a hint that it's beneficial to prioritize it in scheduling. */ + ret = UTI_ATTR_HIGH_PRIORITY(&uti_attr); + if (ret) { + printf("%s: Error: UTI_ATTR_HIGH_PRIORITY failed (%d)\n", __FUNCTION__, ret); + exit(1); + } + + if ((ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))) { + printf("%s: Error: pthread_attr_setdetachstate failed (%d)\n", __FUNCTION__, ret); + exit(1); + } + + if ((ret = uti_pthread_create(&thr, &attr, progress_function, NULL, &uti_attr))) { + printf("%s: Error: uti_pthread_create: %s\n", __FUNCTION__, strerror(errno)); + exit(1); + } + + if ((ret = uti_attr_destroy(&uti_attr))) { + printf("%s: Error: uti_attr_destroy failed (%d)\n", __FUNCTION__, ret); + exit(1); + } +#else + CPU_ZERO(&cpuset); + CPU_SET(WAKER_CPU, &cpuset); + + if ((ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset))) { + printf("Error: pthread_attr_setaffinity_np: %s\n", strerror(errno)); + goto fn_fail; + } + + ret = syscall(732); + OKNGNOJUMP(ret != -1, "Master thread is running on McKernel\n"); + + ret = syscall(731, 1, NULL); + OKNGNOJUMP(ret != -1, "util_indicate_clone\n"); + + if ((ret = pthread_create(&thr, &attr, util_fn, NULL))) { + printf("Error: pthread_create: %s\n", strerror(errno)); + goto fn_fail; + } + +#endif + + if ((ret = sched_setscheduler(0, SCHED_FIFO, ¶m))) { + printf("Error: sched_setscheduler: %s\n", strerror(errno)); + ret = -errno; + goto fn_fail; + } + + syscall(701, 1 | 2); + pthread_barrier_wait(&bar); + start = rdtsc_light(); + for (i = 0; i < nloop; i++) { + + if ((ret = syscall(__NR_futex, &sem, FUTEX_WAIT, 0, NULL, NULL, 0))) { + printf("Error: futex wait failed (%s)\n", strerror(errno)); + } + + //pthread_barrier_wait(&bar); /* 2nd futex */ + } + end = rdtsc_light(); + t_futex_wait += end - start; + syscall(701, 4 | 8); + + pthread_join(thr, NULL); + printf("[INFO] waiter: %ld cycles, waker: %ld cycles, (waiter - waker) / nloop: %ld cycles\n", t_fwq, t_futex_wait, (t_futex_wait - t_fwq) / nloop); + + ret = 0; + fn_fail: + return ret; +} diff --git a/test/uti/CT32.sh b/test/uti/CT32.sh new file mode 100755 index 00000000..854cc27f --- /dev/null +++ b/test/uti/CT32.sh @@ -0,0 +1,104 @@ +#!/usr/bin/bash + +bn=`basename $0` +fn=`echo $bn | sed 's/.sh//'` + +stop=0 +reboot=0 +go=0 +mck=0 +disable_uti=1 +NNODES=1 +NPROC=$((1 * NNODES)) +LASTNODE=8200 +use_hfi=0 + +while getopts srgmh:N:P:L:d: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + h) use_hfi=1 + ;; + d) disable_uti=$OPTARG + ;; + N) NNODES=$OPTARG + ;; + P) NPROC=$OPTARG + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +MYHOME=/work/gg10/e29005 +ABS_SRCDIR=${MYHOME}/project/os/mckernel/test/uti +MCK=${MYHOME}/project/os/install + +NODES=`echo $(seq -s ",c" $(($LASTNODE + 1 - $NNODES)) $LASTNODE) | sed 's/^/c/'` +PPN=$((NPROC / NNODES)) +echo NPROC=$NPROC NNODES=$NNODES PPN=$PPN NODES=$NODES + +if [ $disable_uti -eq 1 ]; then + export DISABLE_UTI=1 +else + unset DISABLE_UTI +fi + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" + mcexecopt="--enable-uti" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi +else + MCEXEC= + mcexecopt= +fi + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo mount /work + + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + /sbin/pidof mcexec \| xargs -r kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo mount /work + + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + : + fi +fi + +if [ ${go} -eq 1 ]; then + cd $ABS_SRCDIR + make $fn + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -u 16384; + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $NODES \ + ulimit -s unlimited + + sudo $MCEXEC $mcexecopt ./$fn +fi + diff --git a/test/uti/CT33.c b/test/uti/CT33.c new file mode 100644 index 00000000..7a2a9f96 --- /dev/null +++ b/test/uti/CT33.c @@ -0,0 +1,167 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <linux/futex.h> +#include <getopt.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdint.h> +#include "util.h" + +#define WAITER_CPU 0 +#define WAKER_CPU 1 + +int sem; +pthread_barrier_t bar; +int flag; +pthread_t thr; +long t_fwq, t_futex_wake, t_futex_wait; +long t_fwq2; +long nloop; +long blocktime = 10 * 1000 * 1000L; + +void *util_fn(void *arg) +{ + int i; + int ret; + long start, end; + long start2, end2; + + print_cpu_last_executed_on("Utility thread"); + + ret = syscall(732); + OKNGNOJUMP(ret == -1, "Utility thread is running on Linux\n"); + + /* Measure fwq time */ + start = rdtsc_light(); + for (i = 0; i < nloop; i++) { + fwq(blocktime); + } + end = rdtsc_light(); + t_fwq2 += end - start; + + /* Measure fwq + futex time */ + syscall(701, 1 | 2 | 0x80000000); + pthread_barrier_wait(&bar); + start = rdtsc_light(); + for (i = 0; i < nloop; i++) { + start2 = rdtsc_light(); + + fwq(blocktime); + + end2 = rdtsc_light(); + t_fwq += end2 - start2; + + if ((ret = syscall(__NR_futex, &sem, FUTEX_WAKE, 1, NULL, NULL, 0)) != 1) { + printf("Error: futex wake failed (%d,%s)\n", ret, strerror(errno)); + } + + //pthread_barrier_wait(&bar); + } + end = rdtsc_light(); + t_futex_wake += end - start; + + syscall(701, 4 | 8 | 0x80000000); + + fn_fail: + return NULL; +} + +static struct option options[] = { + /* end */ + { NULL, 0, NULL, 0, } +}; + +int main(int argc, char **argv) +{ + int i, j; + int ret; + long start, end; + cpu_set_t cpuset; + pthread_attr_t attr; + pthread_barrierattr_t bar_attr; + struct sched_param param = { .sched_priority = 99 }; + int opt; + + while ((opt = getopt_long(argc, argv, "+b:", options, NULL)) != -1) { + switch (opt) { + case 'b': + blocktime = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + nloop = 10 * 1000000000UL / blocktime; + printf("[INFO] nloop=%ld,blocktime=%ld\n", nloop, blocktime); + + + CPU_ZERO(&cpuset); + CPU_SET(WAITER_CPU, &cpuset); + if ((ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset))) { + printf("Error: sched_setaffinity: %s\n", strerror(errno)); + goto fn_fail; + } + print_cpu_last_executed_on("Master thread"); + + fwq_init(); + + pthread_barrierattr_init(&bar_attr); + pthread_barrier_init(&bar, &bar_attr, 2); + + ret = syscall(732); + OKNGNOJUMP(ret != -1, "Master thread is running on McKernel\n"); + + ret = syscall(731, 1, NULL); + OKNGNOJUMP(ret != -1, "util_indicate_clone\n"); + + if ((ret = pthread_attr_init(&attr))) { + printf("Error: pthread_attr_init failed: %s\n", strerror(errno)); + goto fn_fail; + } + + CPU_ZERO(&cpuset); + CPU_SET(WAKER_CPU, &cpuset); + + if ((ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset))) { + printf("Error: pthread_attr_setaffinity_np: %s\n", strerror(errno)); + goto fn_fail; + } + + if ((ret = pthread_create(&thr, &attr, util_fn, NULL))) { + printf("Error: pthread_create: %s\n", strerror(errno)); + goto fn_fail; + } + + if ((ret = sched_setscheduler(0, SCHED_FIFO, ¶m))) { + printf("Error: sched_setscheduler: %s\n", strerror(errno)); + goto fn_fail; + } + + pthread_barrier_wait(&bar); + start = rdtsc_light(); + for (i = 0; i < nloop; i++) { + + if ((ret = syscall(__NR_futex, &sem, FUTEX_WAIT, 0, NULL, NULL, 0))) { + printf("Error: futex wait: %s\n", strerror(errno)); + } + + //pthread_barrier_wait(&bar); + } + end = rdtsc_light(); + t_futex_wait += end - start; + + pthread_join(thr, NULL); + printf("[INFO] compute: %ld, wake: %ld, wait: %ld, wake - compute: %ld, wait - compute: %ld (cycles)\n", t_fwq, t_futex_wake, t_futex_wait, (t_futex_wake - t_fwq) / nloop, (t_futex_wait - t_fwq) / nloop); + + fn_fail: + return ret; +} diff --git a/test/uti/CT33.sh b/test/uti/CT33.sh new file mode 100755 index 00000000..5c83ba1c --- /dev/null +++ b/test/uti/CT33.sh @@ -0,0 +1,93 @@ +#!/usr/bin/bash + +bn=`basename $0` +fn=`echo $bn | sed 's/.sh//'` + +stop=0 +reboot=0 +go=0 +mck=0 +NNODES=1 +NPROC=$((1 * NNODES)) +LASTNODE=8200 +use_hfi=0 + +while getopts srgmh:N:P:L: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + h) use_hfi=1 + ;; + N) NNODES=$OPTARG + ;; + P) NPROC=$OPTARG + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +MYHOME=/work/gg10/e29005 +ABS_SRCDIR=${MYHOME}/project/os/mckernel/test/uti +MCK=${MYHOME}/project/os/install + +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $NNODES)) $LASTNODE) | sed 's/^/c/'` +PPN=$((NPROC / NNODES)) +echo NPROC=$NPROC NNODES=$NNODES PPN=$PPN nodes=$nodes + +if [ "`cat /etc/mtab | while read line; do cut -d" " -f 2; done | grep /work`" == "" ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo mount /work +fi + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" + mcexecopt="--enable-uti" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi +else + MCEXEC= + mcexecopt= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /sbin/pidof mcexec \| xargs -r kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${MCK}/sbin/mcreboot.sh -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + : + fi +fi + +if [ ${go} -eq 1 ]; then + cd $ABS_SRCDIR + make $fn + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + ulimit -u 16384; + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + ulimit -s unlimited + + sudo $MCEXEC $mcexecopt ./$fn +fi + diff --git a/test/uti/CT34.c b/test/uti/CT34.c new file mode 100644 index 00000000..f4c8a98b --- /dev/null +++ b/test/uti/CT34.c @@ -0,0 +1,62 @@ +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <pthread.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdint.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sched.h> +#include "util.h" + +void *util_fn(void *arg) +{ + int ret; + ret = syscall(732); + OKNGNOJUMP(ret == -1, "Utility thread is running on Linux\n"); + fn_fail: + return NULL; +} + +int my_thread_create() +{ + pthread_t thr; + int ret = 0; + + ret = syscall(731, 1, NULL); + OKNGNOJUMP(ret == 0, "util_indicate_clone,ret=%d,errno=%d\n", ret, errno); + + if ((ret = pthread_create(&thr, NULL, util_fn, NULL))) { + printf("Error: pthread_create: %s\n", strerror(errno)); + } + + if ((ret = pthread_join(thr, NULL))) { + printf("Error: pthread_join: %s\n", strerror(errno)); + } + + fn_exit: + return ret; + + fn_fail: + ret = -1; + goto fn_exit; +} + +int +main(int argc, char **argv) +{ + int ret = 0; + + if ((ret = my_thread_create())) { + printf("Error: my_thread_create,ret=%d\n", ret); + } + + fn_exit: + return ret; + + fn_fail: + ret = -1; + goto fn_exit; +} diff --git a/test/uti/CT35.sh b/test/uti/CT35.sh new file mode 100755 index 00000000..48bac59f --- /dev/null +++ b/test/uti/CT35.sh @@ -0,0 +1,36 @@ +#!/usr/bin/bash + +mck_dir=/work/gg10/e29005/project/os/install +nloop=800 +exe=CT26 +mck=1 + +mcexec="${mck_dir}/bin/mcexec" + +sudo ${mck_dir}/sbin/mcstop+release.sh +sudo ${mck_dir}/sbin/mcreboot.sh -c 1,2,3 -m 512M + +ulimit -c unlimited + +for((count=0;count<nloop;count++)); do + if [ $mck -eq 1 ]; then + export MCKERNEL_LD_PRELOAD=./preloadlib.so + # $mcexec --enable-uti ./$exe +# $mcexec gdb -batch -ex "run" -ex "bt" ./$exe + $mcexec ./$exe + else + export LD_PRELOAD=./preloadlib.so + ./$exe + fi + + rc=$? + if [ $rc -ne 0 ]; then + echo mcexec returned $rc + exit + fi + + echo ===== + echo $count + echo ===== + +done diff --git a/test/uti/Makefile b/test/uti/Makefile new file mode 100644 index 00000000..2b0be066 --- /dev/null +++ b/test/uti/Makefile @@ -0,0 +1,46 @@ +.SUFFIXES: # Disable implicit rules + +SYSCALLL_INTERCEPT_DIR=$(HOME)/usr +UTI_DIR=$(HOME)/project/uti/install + +CC = gcc + +CPPFLAGS = -I$(UTI_DIR)/include +CCFLAGS = -g -O0 +LDFLAGS = -L$(UTI_DIR)/lib -Wl,-rpath,$(UTI_DIR)/lib -luti -lpthread -lrt + +SRCS = $(shell ls CT*.c) +EXES = $(SRCS:.c=) +OBJS = $(SRCS:.c=.o) + +CFLAGS_SO = -g -O2 -I$(SYSCALLL_INTERCEPT_DIR)/include +LDFLAGS_SO = -L$(SYSCALLL_INTERCEPT_DIR)/lib64 -Wl,-rpath,$(SYSCALLL_INTERCEPT_DIR)/lib64 -lsyscall_intercept -fpic -shared +SO_SRCS = preloadlib.c +SOS = $(SO_SRCS:.c=.so) + + +all: $(EXES) file $(SOS) + +file:: + dd bs=4096 count=1000 if=/dev/zero of=./file + +CT30.o:: CT30.c + icc $(CCFLAGS) -qopenmp $(CPPFLAGS) -c $< + +CT30: CT30.o + icc -o $@ $^ $(LDFLAGS) -qopenmp + +%.o:: %.c + $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + +%: %.o util.o + $(CC) -o $@ $^ $(LDFLAGS) + +util.o:: util.c + $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + +preloadlib.so: preloadlib.c + $(CC) $(CFLAGS_SO) $(LDFLAGS_SO) $^ -o $@ + +clean : + rm -f core $(EXES) $(OBJS) diff --git a/test/uti/README b/test/uti/README new file mode 100644 index 00000000..b3ce5467 --- /dev/null +++ b/test/uti/README @@ -0,0 +1,322 @@ +Linuxへのスレッド生成 +結合テスト仕様 + +CT01 システムコールテスト mmap/munmap/futex/exit +□ CT01001 mmap/munmap/futex/exit START + Linuxにスレッドを生成 (pthread_create)。成功 +□ CT01002 pthread_create OK + get_system() の戻り値が -1 (Linux で動作) +□ CT01003 get_system OK + mmap 発行。戻り値が (void *)-1 以外 +□ CT01004 mmap OK + mmap 領域に "mmap OK" を書き込む + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドにて、mmap 領域参照 (mmap 領域の内容を表示) +□ CT01005 mmap OK + メインスレッドからLinuxスレッドに cond_signal + Linux スレッドにて mmap 領域を munmap。戻り値が 0 +□ CT01006 munmap OK + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドが mmap 領域を参照 -> SIGSEGV発生 +□ CT01007 munmap OK (SIGSEGV) + Linux スレッド終了 + メインスレッドにて pthread_join。成功 +□ CT01008 exit (pthread_join) OK +□ CT01009 futex (pthread_mutex/pthread_cond) OK +□ CT01010 END + +CT02 システムコールテスト mremap +□ CT02001 mremap START + Linuxにスレッドを生成 (pthread_create)。成功 +□ CT02002 pthread_create OK + get_system() の戻り値が -1 (Linux で動作) +□ CT02003 get_system OK + mmap 発行。戻り値が (void *)-1 以外 +□ CT02004 mmap OK + mmap 領域の縮小予定の領域に "mmap OK" を書き込む + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドにて、mmap 領域の書き込んだ領域を参照 (mmap 領域の内容を表示) +□ CT02005 mmap OK + メインスレッドからLinuxスレッドに cond_signal + Linux スレッドにて mmap 領域を mremap して縮小。戻り値が 0 +□ CT02006 mremap OK + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドが mmap 領域の縮小した領域を参照 -> SIGSEGV発生 +□ CT02007 mremap OK (SIGSEGV) + メインスレッドからLinuxスレッドに cond_signal + Linux スレッドにて mmap 領域を munmap。戻り値が 0 +□ CT02008 munmap OK + Linux スレッド終了 + メインスレッドにて pthread_join。成功 +□ CT02009 pthread_join OK +□ CT02010 END + +CT03 システムコールテスト mprotect +□ CT03001 mprotect START + Linuxにスレッドを生成 (pthread_create)。成功 +□ CT03002 pthread_create OK + get_system() の戻り値が -1 (Linux で動作) +□ CT03003 get_system OK + mmap 発行。戻り値が (void *)-1 以外 +□ CT03004 mmap OK + mmap 領域に "mmap OK" を書き込む + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドにて、mmap 領域を参照 (mmap 領域の内容を表示) +□ CT03005 mmap OK + メインスレッドからLinuxスレッドに cond_signal + Linux スレッドにて mmap 領域を mprotect して参照権のみ設定。戻り値が 0 +□ CT03006 mprotect OK + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドが mmap 領域に書き込み -> SIGSEGV発生 +□ CT03007 mremap OK (SIGSEGV) + メインスレッドからLinuxスレッドに cond_signal + Linux スレッドにて mmap 領域を munmap。戻り値が 0 +□ CT03008 munmap OK + Linux スレッド終了 + メインスレッドにて pthread_join。成功 +□ CT03009 pthread_join OK +□ CT03010 END + +CT04 システムコールテスト brk +□ CT04001 brk START + Linuxにスレッドを生成 (pthread_create)。成功 +□ CT04002 pthread_create OK + get_system() の戻り値が -1 (Linux で動作) +□ CT04003 get_system OK + sbrk(0)発行。戻り値を保存…(A) +□ CT04004 sbrk OK + sbrk(4096)発行。戻り値を保存…(B) + (A)の場所に "sbrk OK" を書き込む + Linuxスレッドからメインスレッドに対して cond_signal + メインスレッドにて、(A) 領域を参照 (領域の内容を表示) +□ CT04005 sbrk OK + メインスレッドにてsbrk(0)発行。戻り値を保存…(C) + メインスレッドからLinuxスレッドに cond_signal + Linuxスレッドでsbrk(0)発行。戻り値が(C)と一致している +□ CT04006 sbrk OK + Linux スレッド終了 + メインスレッドにて pthread_join。成功 +□ CT04007 pthread_join OK +□ CT04008 END + +CT05 システムコールテスト gettid +□ CT05001 gettid START + McKernelにスレッドを生成 (pthread_create)。成功 +□ CT05002 pthread_create OK + get_system() の戻り値が 0 (McKernel で動作) +□ CT05003 get_system OK + gettid() の戻り値を保存…(A) +□ CT05004 gettid OK %d + util_migrate_inter_kernel 発行。戻り値が 0 +□ CT05005 util_migrate_inter_kernel OK + get_system() の戻り値が -1 (Linux で動作) +□ CT05006 get_system OK + gettid() の戻り値が(A)と一致 +□ CT05007 gettid OK %d + Linux スレッド終了 + メインスレッドにて pthread_join。成功 +□ CT05008 pthread_join OK +□ CT05009 END + +CT06 システムコールテスト exit_group +□ CT06001 exit_group START + fork して子プロセス生成。以下、子プロセスの処理 + Linuxにスレッドを生成 (pthread_create)。成功 +□ CT06002 pthread_create OK + get_system() の戻り値が -1 (Linux で動作) +□ CT06003 get_system OK + Linuxスレッドが exit_group(99) +□ CT06004 pthread_join NG が表示されない + 親プロセスが wait。子プロセスの終了ステータスが 99 +□ CT06004 exit_group OK +□ CT06005 END + +CT07 システムコールテスト エラー系 +□ CT07001 error START + Linuxにスレッドを生成 (pthread_create)。成功 +□ CT07002 pthread_create OK + get_system() の戻り値が -1 (Linux で動作) +□ CT07003 get_system OK + clone() の戻り値が -1 で errno が ENOSYS +□ CT07004 clone OK %d + fork() の戻り値が -1 で errno が ENOSYS +□ CT07005 fork OK %d + vfork() の戻り値が -1 で errno が ENOSYS + +□ CT07006 vfork OK %d + execve() の戻り値が -1 で errno が ENOSYS +※ syscall_interceptの不具合によりvforkはSegmentation faultを起こすため、除外している + +□ CT07007 execve OK %d + Linux スレッド終了 + メインスレッドにて pthread_join。成功 +□ CT07008 pthread_join OK +□ CT07009 END + +CT08 uti_attr_t関連 +uti_attr_t の動作は実行環境によって変化するため、機械的にOK/NGの判断ができない。 +このため、affinityとschedulerを目視確認して、OK/NGを判断して下さい。 +UTI_FLAG_SAME_NUMA_DOMAIN のテストを容易にするため、mcreboot では特定のNUMA +ドメインにCPUを寄せて下さい。 +また、UTI_FLAG_SAME_L1のテストを容易にするために、論理コアを1つ以上空けるように +CPUを割り当てて下さい。 + +sched cpu には Linux に生成したスレッドの sched_getaffinity の結果を表示する。 +sched には同じく sched_getscheduler の結果を表示する。 + +□ CT08001 UTI_FLAG_NUMA_SET + sched cpu に NUMA domain 2 に属すLinux CPU集合が表示されること。 + sched=0 であること。 +□ CT08002 UTI_FLAG_NUMA_SET|UTI_FLAG_EXCLUSIVE_CPU + sched cpu に NUMA domain 2 に属すLinux CPUの内、1つが表示されること。 + (CT08001 のCPU集合のメンバであること) + sched=1 であること。 +□ CT08003 UTI_FLAG_NUMA_SET|UTI_FLAG_EXCLUSIVE_CPU(2) + sched cpu に NUMA domain 2 に属すLinux CPUの内、1つが表示されること。 + sched cpu は CT08002 とは異なるCPUが表示されていること(ラウンドロビン)。 + sched=1 であること。 +□ CT08004 UTI_FLAG_SAME_NUMA_DOMAIN + sched cpu にMcKernelに割り当てたCPUと同じNUMAドメインに属すLinux CPU集合が + 表示されること。 + sched=0 であること。 +□ CT08005 UTI_FLAG_SAME_NUMA_DOMAIN|UTI_FLAG_CPU_INTENSIVE + sched cpu に NUMA domain 2 に属すLinux CPUの内、1つが表示されること。 + (CT08004 のCPU集合のメンバであること) + sched=0 であること。 +□ CT08006 UTI_FLAG_DIFFERENT_NUMA_DOMAIN + sched cpu にMcKernelに割り当てたCPUと異なるNUMAドメインに属すLinux CPU集合が + 表示されること。 + sched=0 であること。 +□ CT08007 UTI_FLAG_DIFFERENT_NUMA_DOMAIN|UTI_FLAG_HIGH_PRIORITY + sched cpu にMcKernelに割り当てたCPUと異なるNUMAドメインに属すLinux CPU集合の + 内、1つが表示されること。(CT08006 のCPU集合のメンバであること) + sched=1 であること。 +□ CT08008 UTI_FLAG_SAME_L1 + sched cpu にMcKernelの親プロセスが実行するCPUとL1キャッシュを共有するLinuxの + CPU集合が表示されること。(McKernelへのCPU割り当て状態に依存するが、2論理コア + /物理コアの場合、高々1CPUのみが該当する。該当コアが存在しない場合は、全ての + コアが対象となる)。 + sched=0 であること。 +□ CT08009 UTI_FLAG_SAME_L1|UTI_FLAG_NON_COOPERATIVE + sched cpu にMcKernelの親プロセスが実行するCPUとL1キャッシュを共有するLinuxの + CPUの内1つが表示されること。(CT08008のCPU集合のメンバ。但し、CT08008で該当 + CPUが存在しない場合は、全てのコアが対象になる)。 + sched=0 であること。 +□ CT08010 UTI_FLAG_SAME_L2 + sched cpu にMcKernelの親プロセスが実行するCPUとL2キャッシュを共有するLinuxの + CPU集合が表示されること。(McKernelへのCPU割り当て状態に依存するが、2論理コア + /物理コアの場合、高々1CPUのみが該当する。該当コアが存在しない場合は、全ての + コアが対象となる)。 + sched=0 であること。 +□ CT08011 UTI_FLAG_SAME_L2|UTI_FLAG_CPU_INTENSIVE + sched cpu にMcKernelの親プロセスが実行するCPUとL2キャッシュを共有するLinuxの + CPUの内1つが表示されること。(CT08010のCPU集合のメンバ。但し、CT08010で該当 + CPUが存在しない場合は、全てのコアが対象になる)。 + sched=0 であること。 +□ CT08012 UTI_FLAG_SAME_L3 + sched cpu にMcKernelの親プロセスが実行するCPUとL3キャッシュを共有するLinuxの + CPU集合が表示されること。 + sched=0 であること。 +□ CT08013 UTI_FLAG_SAME_L3|UTI_FLAG_CPU_INTENSIVE + sched cpu にMcKernelの親プロセスが実行するCPUとL3キャッシュを共有するLinuxの + CPUの内1つが表示されること。(CT08012のCPU集合のメンバ)。 + sched=0 であること。 +□ CT08014 UTI_FLAG_DIFFERENT_L1 + sched cpu にMcKernelの親プロセスが実行するCPUとL1キャッシュを共有しない + LinuxのCPU集合が表示されること。 + sched=0 であること。 +□ CT08015 UTI_FLAG_DIFFERENT_L1|UTI_FLAG_CPU_INTENSIVE + sched cpu にMcKernelの親プロセスが実行するCPUとL1キャッシュを共有しない + LinuxのCPUの内、1つが表示されること(CT08014のCPU集合のメンバ)。 + sched=0 であること。 +□ CT08016 UTI_FLAG_DIFFERENT_L2 + sched cpu にMcKernelの親プロセスが実行するCPUとL2キャッシュを共有しない + LinuxのCPU集合が表示されること。 + コアが対象となる)。 + sched=0 であること。 +□ CT08017 UTI_FLAG_DIFFERENT_L2|UTI_FLAG_CPU_INTENSIVE + sched cpu にMcKernelの親プロセスが実行するCPUとL2キャッシュを共有しない + LinuxのCPUの内、1つが表示されること(CT08016のCPU集合のメンバ)。 + sched=0 であること。 +□ CT08018 UTI_FLAG_DIFFERENT_L3 + sched cpu にMcKernelの親プロセスが実行するCPUとL3キャッシュを共有しない + LinuxのCPU集合が表示されること。 + sched=0 であること。 +□ CT08019 UTI_FLAG_DIFFERENT_L3|UTI_FLAG_CPU_INTENSIVE + sched cpu にMcKernelの親プロセスが実行するCPUとL3キャッシュを共有しない + LinuxのCPUの内、1つが表示されること(CT08018のCPU集合のメンバ)。 + sched=0 であること。 + +CT09 プログレス処理オーバーヘッド測定 + +MPI通信処理とMPIプログレス処理とのロック競合を模すことで、MPIプログレス処理の +オーバーヘッドを測定する。 + +MPI通信処理のステップは以下の通り。 +(1) 1usの間オブジェクトをロック +(2) 30usの間計算を行う +MPIプログレス処理のステップは以下の通り。 +(1) 10msに一回オブジェクトをロック +(2) 通信が終了したタイミングに重なった場合は2usの処理を行う。そうでない + 場合は直ちにアンロックする + +CT10 pthread_cond_{wait,signal}() [OK] + +CT11 measure time of system calls [OK] + +CT12 child (helper thread) futex() wait [OK] + +CT13 parent futex() wait [OK] + +CT14 child pthread_lock wait [OK] + +CT15 parent pthread_lock wait [OK] + +CT16 child pthread_cond_wait [OK] +Linuxはcondで起きる。その後mutexで起きたりしない。 + +CT17 parent pthread_cond_wait [OK] +McKernelはcondで起きる。その後mutexで起きる。 + +CT18 child (helper thread) futex() wait with FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME and non-zero timeout [OK] + +CT19 child (helper thread) futex() wait with FUTEX_WAIT_BITSET and non-zero timeout [OK] + +CT20 child (helper thread) futex() wait with FUTEX_WAIT and non-zero timeout [OK] + +CT21 progress-threadのlockタイミングを変化させたテスト + +CT22 compute-threadのlockタイミングを変化させたテスト + +CT23 progress-threadのcond_waitタイミングを変化させたテスト + +CT24 compute-threadのcond_waitタイミングを変化させたテスト + +CT25 MPI_Isend()でのプロセス終了時メモリ破壊不具合のスケルトン。パラメタは以下の通り。 +* 1MB x 250 (./CT25 20 250) +* 128K x 1024 (./CT25 17 1024) + +CT26 終了時レースコンディションのテスト +* thread->statusがPS_EXITEDの場合もhold_thread()を呼んでデッドロックする不具合のテスト + +CT27 プログレス処理オーバーヘッド測定 +* CT09の複数プロセス版。async progressによってオーバーサブスクライブになった場合のオーバーヘッドを測定する。 + +CT28 taskset -c 0-7 lock-inc-lock x 10000 + +CT29 no reverse offload + +CT30 CT21にopenmpスレッドを追加したテスト + +CT31 pthread_cond_waitオーバーヘッド測定 +* waiterとwakerのCPUは、それぞれ、WAITER_CPU、WAKER_CPUで設定 + +CT32 futex waitオーバーヘッド測定 +* waiterとwakerのCPUは、それぞれ、WAITER_CPU、WAKER_CPUで設定 + +CT33 futex wakeオーバーヘッド測定 +* waiterとwakerのCPUは、それぞれ、WAITER_CPU、WAKER_CPUで設定 + +CT34 繰り返しpthread_create + +CT35 LD_PRELOADでsyscall_interceptを用いたsoをつけた場合のテスト \ No newline at end of file diff --git a/test/uti/driver/Makefile b/test/uti/driver/Makefile new file mode 100644 index 00000000..5d0a015b --- /dev/null +++ b/test/uti/driver/Makefile @@ -0,0 +1,12 @@ +obj-m += hello.o + +hello-y = driver.o + +.PHONY: clean install modules + +modules: + $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules + +clean: + $(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp* + diff --git a/test/uti/driver/driver.c b/test/uti/driver/driver.c new file mode 100644 index 00000000..262aaea9 --- /dev/null +++ b/test/uti/driver/driver.c @@ -0,0 +1,80 @@ +/* + * This file is created by mixing the following two codes. + * + * URL: https://www.apriorit.com/dev-blog/195-simple-driver-for-linux-os + * Author: Danil Ishkov, Apriorit + * + * URL: http://www.linuxdevcenter.com/pub/a/linux/2007/07/05/devhelloworld-a-simple-introduction-to-device-drivers-under-linux.html + * Author: Valerie Henson <val@nmt.edu> + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <asm/uaccess.h> +#include <asm/errno.h> +#include <linux/init.h> + +static int hello_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int hello_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static long hello_ioctl(struct file *file, unsigned int request, unsigned long arg) +{ + return 0; +} + +static struct file_operations fops = { + .open = hello_open, + .release = hello_release, + .unlocked_ioctl = hello_ioctl, +}; + +static int device_file_major_number = 0; +static const char device_name[] = "hello"; +static int register_device(void) +{ + int result = 0; + result = register_chrdev( 0, device_name, &fops ); + if( result < 0 ) { + printk( KERN_WARNING "hello: register_chrdev failed,result=%i", result ); + return result; + } + device_file_major_number = result; + printk( KERN_NOTICE "hello: major number=%i,try \"grep hello /proc/devices\"", device_file_major_number ); + return 0; +} + +void unregister_device(void) +{ + printk( KERN_NOTICE "hello: unregister_device() is called" ); + if(device_file_major_number != 0) { + unregister_chrdev(device_file_major_number, device_name); + } +} + +static int __init hello_init(void) +{ + register_device(); + return 0; +} + +module_init(hello_init); + +static void __exit hello_exit(void) +{ + unregister_device(); +} + +module_exit(hello_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(" Danil Ishkov, Apriorit and Valerie Henson"); +MODULE_DESCRIPTION("Module that does nothing"); +MODULE_VERSION("1.0"); diff --git a/test/uti/mpi/001.c b/test/uti/mpi/001.c new file mode 100755 index 00000000..2584f3a0 --- /dev/null +++ b/test/uti/mpi/001.c @@ -0,0 +1,216 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define SZENTRY_DEFAULT (65536) /* Size of one slot */ +#define NENTRY_DEFAULT 10000 /* Number of slots */ + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int src, int dest, MPI_Request* reqs, MPI_Status* status, double usec) { + int i; + if(rank == 1) { + for(i = 0; i < nentry; i++) { + MPI_Isend(sendv[i], szentry, MPI_CHAR, dest, 0, MPI_COMM_WORLD, &reqs[i]); + if (nentry > 10 && i % (nentry / 10) == 0) { + printf("s"); fflush(stdout); + } + } + MPI_Waitall(nentry, reqs, status); + printf("w\n"); fflush(stdout); + } else { + for(i = 0; i < nentry; i++) { + MPI_Irecv(recvv[i], szentry, MPI_CHAR, src, 0, MPI_COMM_WORLD, &reqs[i]); + if (nentry > 10 && i % (nentry / 10) == 0) { + printf("r"); fflush(stdout); + } + } + usleep(usec); + MPI_Waitall(nentry, reqs, status); + printf("W\n"); fflush(stdout); + } +} + +int main(int argc, char **argv) { + int my_rank = -1, size = -1; + int i, j; + char **sendv, **recvv; + MPI_Status* status; + MPI_Request* reqs; + long szentry; + long nentry; + int src, dest; + struct timespec start, end; + double diffusec; + + if(argc == 3) { + szentry = atoi(argv[1]); + nentry = atoi(argv[2]); + } else { + szentry = SZENTRY_DEFAULT; + nentry = NENTRY_DEFAULT; + } + printf("szentry=%ld,nentry=%ld\n", szentry, nentry); + + status = (MPI_Status*)malloc(sizeof(MPI_Status) * nentry); + reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nentry); + + int actual; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + printf("Thread support level is %d\n", actual); + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + src = (size + my_rank - 1) % size; + dest = (my_rank + 1) % size; + + printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest); + + sendv = malloc(sizeof(char *) * nentry); + if(!sendv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < nentry; i++) { +#if 0 + int fd; + fd = open("./file", O_RDWR); + if(fd == -1) { printf("open failed\n"); goto fn_fail; } + sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); +#else + sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif + if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]); + memset(sendv[i], 0xaa, szentry); + } + + recvv = malloc(sizeof(char *) * nentry); + if(!recvv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < nentry; i++) { +#if 0 + int fd; + fd = open("./file", O_RDWR); + if(fd == -1) { printf("open failed\n"); goto fn_fail; } + recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); +#else + recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); +#endif + if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]); + memset(recvv[i], 0, szentry); + } + + printf("after memset\n"); + + print_cpu_last_executed_on(); + + for (i = 0; i < 1; i++) { + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &start); + } + sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, 0); + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &end); + diffusec = DIFFNSEC(end, start) / (double)1000; + printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout); + } + + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &start); + } + sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, diffusec); + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &end); + printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout); + } + } + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} + diff --git a/test/uti/mpi/002.c b/test/uti/mpi/002.c new file mode 100755 index 00000000..5a85014c --- /dev/null +++ b/test/uti/mpi/002.c @@ -0,0 +1,127 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define SZENTRY_DEFAULT (65536) /* Size of one slot */ +#define NENTRY_DEFAULT 10000 /* Number of slots */ + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +int main(int argc, char **argv) { + int my_rank = -1, size = -1; + int i, j; + struct timespec start, end; + + int actual; + + printf("nloop=%d\n", atoi(argv[1])); + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + printf("Thread support level is %d\n", actual); + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + print_cpu_last_executed_on(); + + printf("Before 1st barrier\n"); fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + + printf("Before 2nd barrier\n"); fflush(stdout); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &start); + } + for (i = 0; i < atoi(argv[1]); i++) { + MPI_Barrier(MPI_COMM_WORLD); + } + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &end); + printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout); + } + + + fn_exit: + //MPI_Finalize(); + usleep(100000); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/003.c b/test/uti/mpi/003.c new file mode 100755 index 00000000..fa696ee2 --- /dev/null +++ b/test/uti/mpi/003.c @@ -0,0 +1,188 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define SZENTRY_DEFAULT (65536) /* Size of one slot */ +#define NENTRY_DEFAULT 10000 /* Number of slots */ + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int src, int dest, MPI_Request* reqs, MPI_Status* status, double usec) { + int i; + if(rank == 1) { + for(i = 0; i < nentry; i++) { + if (i % (nentry / 10) == 0) { + printf("s"); fflush(stdout); + } + MPI_Isend(sendv[0], szentry, MPI_CHAR, dest, 0, MPI_COMM_WORLD, &reqs[i]); + } + printf("\n"); fflush(stdout); + MPI_Waitall(nentry, reqs, status); + } else { + for(i = 0; i < nentry; i++) { + if (i % (nentry / 10) == 0) { + printf("r"); fflush(stdout); + } + MPI_Irecv(recvv[0], szentry, MPI_CHAR, src, 0, MPI_COMM_WORLD, &reqs[i]); + } + usleep(usec); + MPI_Waitall(nentry, reqs, status); + } +} + +int main(int argc, char **argv) { + int my_rank = -1, size = -1; + int i, j; + char **sendv, **recvv; + MPI_Status* status; + MPI_Request* reqs; + long szentry; + long nentry; + int src, dest; + struct timespec start, end; + double diffusec; + + if(argc == 3) { + szentry = atoi(argv[1]); + nentry = atoi(argv[2]); + } else { + szentry = SZENTRY_DEFAULT; + nentry = NENTRY_DEFAULT; + } + + status = (MPI_Status*)malloc(sizeof(MPI_Status) * nentry); + reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nentry); + + int actual; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + printf("Thread support level is %d\n", actual); + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + src = (size + my_rank - 1) % size; + dest = (my_rank + 1) % size; + + printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest); + + sendv = malloc(sizeof(char *) * nentry); + if(!sendv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < 1; i++) { + sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]); + memset(sendv[i], 0xaa, szentry); + } + + recvv = malloc(sizeof(char *) * nentry); + if(!recvv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < 1; i++) { + recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]); + memset(recvv[i], 0, szentry); + } + + printf("after memset\n"); + + print_cpu_last_executed_on(); + + printf("Before 1st barrier\n"); fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &start); + } + sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, 0); + printf("Before 2nd barrier\n"); fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &end); + diffusec = DIFFNSEC(end, start) / (double)1000; + printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout); + } + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/004.c b/test/uti/mpi/004.c new file mode 100755 index 00000000..bf92ca19 --- /dev/null +++ b/test/uti/mpi/004.c @@ -0,0 +1,281 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* isend-calc-wait */ +void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf, int ndoubles, MPI_Request* reqs, long calc_nsec) { + int i; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + r++; + req++; + MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + s++; + req++; + } + } + fwq(calc_nsec); + MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int actual; + int ppn = -1; + int nproc; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *sbuf, *rbuf; + MPI_Request* reqs; + struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: Thread support level is %d (it should be 3)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nproc * 2); + if(!reqs) { printf("malloc failed"); goto fn_fail; } + + sbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!sbuf) { printf("malloc failed"); goto fn_fail; } + memset(sbuf, 0, sizeof(double) * ndoubles); + printf("tid=%d,pid=%d,sbuf=%p\n", syscall(__NR_gettid), getpid(), sbuf); + + rbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * ndoubles); + printf("tid=%d,pid=%d,rbuf=%p\n", syscall(__NR_gettid), getpid(), rbuf); + + print_cpu_last_executed_on(); + + /* Measure isend-wait time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NSKIP 5 +#define NPURE 30 + for (i = 0; i < NPURE + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + } + my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, 0); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_pure_l = DIFFNSEC(end, start) / NPURE; + //printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL); + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL); + + /* Measure isend-calc-wait time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NOVERALL 30 + for (i = 0; i < NOVERALL + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + } + my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, t_pure); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_overall_l = DIFFNSEC(end, start) / NOVERALL; + //printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL); + MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL); + if (my_rank == 0) { + long t_abs = (t_pure * 2) - t_overall; + printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure); + } + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/005.c b/test/uti/mpi/005.c new file mode 100755 index 00000000..0803ebb8 --- /dev/null +++ b/test/uti/mpi/005.c @@ -0,0 +1,338 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +#if 1 +#define BEGIN_EPOCH(win) do { MPI_Win_fence(0, win); } while(0) +#define END_EPOCH(win) do { MPI_Win_fence(0, win); } while(0) +#define BAR_EPOCH do { } while(0) +#else +#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0) +#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0) +#define BAR_EPOCH do { MPI_Barrier(MPI_COMM_WORLD); } while(0) +#endif + + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + + if (delay_nsec < 0) { return; } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* fence-accumulate-calc-fence */ +void accumulate(int nproc, int ppn, int rank, double *wbuf, double *rbuf, int ndoubles, MPI_Win win, long calc_nsec) { + int i, j; + int r = 0, s = 0; + int req = 0; + BEGIN_EPOCH(win); + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + for (j = 0; j < ndoubles; j++) { + //printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n", i, j, rbuf[i * ndoubles + j], wbuf[i * ndoubles + j]); + MPI_Accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE, i, i * ndoubles + j, 1, MPI_DOUBLE, MPI_SUM, win); + } + } + } + fwq(calc_nsec); + END_EPOCH(win); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int actual; + int ppn = -1; + int nproc; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *wbuf, *rbuf; + MPI_Win win; + struct timespec start, end; + long t_fence_l, t_pure_l, t_overall_l; + long t_fence, t_pure, t_overall; + int opt; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + /* write-to buffer */ + wbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!wbuf) { printf("malloc failed"); goto fn_fail; } + memset(wbuf, 0, sizeof(double) * ndoubles * nproc); + + /* read-from buffer */ + rbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * ndoubles * nproc); + + if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) { + printf("MPI_Win_create failed,rc=%d\n", rc); + } + + print_cpu_last_executed_on(); + + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + wbuf[i * ndoubles + j] = i + 1 + j; + rbuf[i * ndoubles + j] = (i + 1) * 2 + j; + } + } + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + } + } +#endif + /* Measure fence-fence time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NSKIP 5 +#define NFENCE 30 + for (i = 0; i < NFENCE + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + } + BEGIN_EPOCH(win); + END_EPOCH(win); + } + BAR_EPOCH; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_fence_l = DIFFNSEC(end, start) / NFENCE; + //printf("t_fence (local): %ld usec\n", t_fence_l / 1000UL); + MPI_Allreduce(&t_fence_l, &t_fence, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_fence (max): %ld usec\n", t_fence / 1000UL); + + /* Measure fence-acc-fence time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NPURE 30 + for (i = 0; i < NPURE + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +} + accumulate(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0); + } + BAR_EPOCH; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_pure_l = DIFFNSEC(end, start) / NPURE; + //printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL); + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL); + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + } + } +#endif + + /* Measure fenc-acc-calc-fence time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NOVERALL 30 + for (i = 0; i < NOVERALL + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +} + accumulate(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, t_pure - t_fence); + } + BAR_EPOCH; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_overall_l = DIFFNSEC(end, start) / NOVERALL; + //printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL); + MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL); + if (my_rank == 0) { + long t_abs = (t_pure * 2) - t_overall; + printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure); +} + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/005.sh b/test/uti/mpi/005.sh new file mode 100755 index 00000000..416e9ecc --- /dev/null +++ b/test/uti/mpi/005.sh @@ -0,0 +1,127 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=$HOME +UTI_MPI_TOP=${MYHOME}/project/os/mckernel/test/uti/mpi + +MCK=${MYHOME}/project/os/install +unset DISABLE_UTI + +cmdline="./005" + +stop=0 +reboot=0 +go=0 + +mck=0 +nloops=1 +ppn=1 + +while getopts srgac:n:mdl:P:o: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=1 + ;; + c) cmdline=$OPTARG + ;; + n) ndoubles=$OPTARG + ;; + m) mck=1 + ;; + d) export DISABLE_UTI=1 + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +if [ ${mck} -eq 1 ]; then + mcexec="${mck_dir}/bin/mcexec" + mcexecopt="--enable-uti --uti-thread-rank=$uti_thread_rank" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + mcexecopt="-n $ppn -t $((256 / ppn + 4)) -m 1 $mcexecopt" +else + mcexec= + mcexecopt= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off +else + i_mpi_pin=on +fi + +if [ "$i_mpi_pin" == on ] ; then + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$((omp_num_threads + 1)):scatter" +else + i_mpi_pin_domain= +fi + +if [ $async -eq 0 ] || [ "$async_progress_pin" == "" ] ; then + i_mpi_async_progress_pin= +else + i_mpi_async_progress_pin="export I_MPI_ASYNC_PROGRESS_PIN=$async_progress_pin" +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -s -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + sudo ${MCK}/sbin/mcreboot.sh -s -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +cd ${UTI_MPI_TOP} +( +cat <<EOF +#!/bin/sh + +export I_MPI_DEBUG=4 +export I_MPI_HYDRA_DEBUG=on +export PSM2_RCVTHREAD=0 + +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_domain +export KMP_AFFINITY=granularity=thread,scatter + +export I_MPI_ASYNC_PROGRESS=$async +$i_mpi_async_progress_pin + + +${MCK}/bin/mcexec taskset -c 3 ./005 --ppn 16 +EOF +) > ./job.sh + +if [ ${go} -eq 1 ]; then + cd ${UTI_MPI_TOP} + make CC=gcc 008 + ./job.sh +fi + + + diff --git a/test/uti/mpi/006.c b/test/uti/mpi/006.c new file mode 100755 index 00000000..d7aa6e61 --- /dev/null +++ b/test/uti/mpi/006.c @@ -0,0 +1,625 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <errno.h> + +#include <psm2.h> /* required for core PSM2 functions */ +#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */ + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define BUFFER_LENGTH 8000000 +#define CONNECT_ARRAY_SIZE 8 +void die(char *msg, int rc) { + fprintf(stderr, "%s: %d\n", msg, rc); +} + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* isend-calc-wait */ +void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf, int ndoubles, MPI_Request* reqs, long calc_nsec) { + int i; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + r++; + req++; + MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + s++; + req++; + } + } + fwq(calc_nsec); + MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE); +} + + +/* Helper functions to find the server's PSM2 endpoint identifier (epid). */ +psm2_epid_t find_server(int rank) { + FILE *fp = NULL; + psm2_epid_t server_epid = 0; + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + printf("PSM2 client waiting for epid mapping file to appear...\n"); + while (!fp) { + sleep(1); + fp = fopen(fn, "r"); + } + fscanf(fp, "%lx", &server_epid); + fclose(fp); + printf("PSM2 client found server epid = 0x%lx\n", server_epid); + return server_epid; +} + +void write_epid_to_file(int rank, psm2_epid_t myepid) { + FILE *fp; + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + fp = fopen(fn, "w"); + if (!fp) { + fprintf(stderr, + "Exiting, couldn't write server's epid mapping file: "); + die(strerror(errno), errno); + } + fprintf(fp, "0x%lx", myepid); + fclose(fp); + printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid); + return; +} + +int psm2_sendrecv(int rank, int sender, int receiver) { + struct psm2_ep_open_opts o; + psm2_uuid_t uuid; /* 16 byte */ + psm2_ep_t myep; + psm2_epid_t myepid; + psm2_epid_t server_epid; + psm2_epid_t epid_array[CONNECT_ARRAY_SIZE]; + int epid_array_mask[CONNECT_ARRAY_SIZE]; + psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE]; + psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE]; + int rc; + int ver_major = PSM2_VERNO_MAJOR; + int ver_minor = PSM2_VERNO_MINOR; + char msgbuf[BUFFER_LENGTH]; + psm2_mq_t q; + psm2_mq_req_t req_mq; + memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */ + *((int *)&uuid) = rand(); +/* Try to initialize PSM2 with the requested library version. + * * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR + * * as defined in the PSM2 headers, ensure that we are linking with + * * the same version of PSM2 as we compiled against. */ + + if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) { + die("couldn't init", rc); + return -1; + } + printf("PSM2 init done.\n"); + /* Setup the endpoint options struct */ + if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) { + die("couldn't set default opts", rc); + return -1; + } + printf("PSM2 opts_get_defaults done.\n"); + /* Attempt to open a PSM2 endpoint. This allocates hardware resources. */ + if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) { + die("couldn't psm2_ep_open()", rc); + return -1; + } + printf("PSM2 endpoint open done.\n"); + int is_server = (rank == receiver) ? 1 : 0; + if (is_server) { + write_epid_to_file(rank, myepid); + } else { + server_epid = find_server(receiver); + } + if (is_server) { + /* Server does nothing here. A connection does not have to be + * * established to receive messages. */ + printf("PSM2 server up.\n"); + } else { + /* Setup connection request info */ + /* PSM2 can connect to a single epid per request, + * * or an arbitrary number of epids in a single connect call. + * * For this example, use part of an array of + * * connection requests. */ + memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE); + epid_array[0] = server_epid; + epid_array_mask[0] = 1; + /* Begin the connection process. + * * note that if a requested epid is not responding, + * * the connect call will still return OK. + * * The errors array will contain the state of individual + * * connection requests. */ + if ((rc = psm2_ep_connect(myep, + CONNECT_ARRAY_SIZE, + epid_array, + epid_array_mask, + epid_connect_errors, + epaddr_array, + 0 /* no timeout */ + )) != PSM2_OK) { + die("couldn't ep_connect", rc); + return -1; + } + printf("PSM2 connect request processed.\n"); + /* Now check if our connection to the server is ready */ + if (epid_connect_errors[0] != PSM2_OK) { + die("couldn't connect to server", + epid_connect_errors[0]); + return -1; + } + printf("PSM2 client-server connection established.\n"); + } + /* Setup our PSM2 message queue */ + if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q)) + != PSM2_OK) { + die("couldn't initialize PSM2 MQ", rc); + return -1; + } + printf("PSM2 MQ init done.\n"); + if (is_server) { + psm2_mq_tag_t t = {0xABCD}; + psm2_mq_tag_t tm = {-1}; + /* Post the receive request */ + if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR, + &t, /* message tag */ + &tm, /* message tag mask */ + 0, /* no flags */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_irecv()", rc); + return -1; + } + printf("PSM2 MQ irecv() posted\n"); + /* Wait until the message arrives */ + if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) { + die("couldn't wait for the irecv", rc); + return -1; + } + printf("PSM2 MQ wait() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + unlink("psm2-demo-server-epid"); + } else { + /* Say hello */ + snprintf(msgbuf, BUFFER_LENGTH, + "Hello world from epid=0x%lx, pid=%d.\n", + myepid, getpid()); + psm2_mq_tag_t t = {0xABCD}; + if ((rc = psm2_mq_send2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + return -1; + } + printf("PSM2 MQ send() done.\n"); + } +/* Close down the MQ */ + if ((rc = psm2_mq_finalize(q)) != PSM2_OK) { + die("couldn't psm2_mq_finalize()", rc); + return -1; + } + printf("PSM2 MQ finalized.\n"); +/* Close our ep, releasing all hardware resources. + * * Try to close all connections properly */ + if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL, + 0 /* no timeout */)) != PSM2_OK) { + die("couldn't psm2_ep_close()", rc); + return -1; + } + printf("PSM2 ep closed.\n"); + /* Release all local PSM2 resources */ + if ((rc = psm2_finalize()) != PSM2_OK) { + die("couldn't psm2_finalize()", rc); + return -1; + } + printf("PSM2 shut down, exiting.\n"); + return 0; +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +struct thr_arg { + volatile int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + int rank; + int ppn; + int nproc; +}; + +struct thr_arg thr_arg; + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc); + } + + printf("progress,enter\n"); + + /* barrier */ + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count++; + if (thr_arg->bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_arg->bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg->bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_arg->bar_cond, &thr_arg->bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg->bar_lock); + +#if 0 + printf("progress,after barrier\n"); + for (i = 0; i < thr_arg->nproc; i++) { + if (!on_same_node(thr_arg->ppn, thr_arg->rank, i)) { + if (thr_arg->rank < i) { + psm2_sendrecv(thr_arg->rank, thr_arg->rank, i); + } else { + psm2_sendrecv(thr_arg->rank, i, thr_arg->rank); + } + } + } +#endif + + /* barrier */ + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count--; + if (thr_arg->bar_count == 0) { + if ((rc = pthread_cond_broadcast(&thr_arg->bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg->bar_count != 0) { + if ((rc = pthread_cond_wait(&thr_arg->bar_cond, &thr_arg->bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg->bar_lock); + + + printf("progress,exit\n"); + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int actual; + int nproc; + int ppn = -1; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *sbuf, *rbuf; + MPI_Request* reqs; + struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: Thread support level is %d (it should be 3)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + /* Spawn a thread */ + thr_arg.rank = my_rank; + thr_arg.ppn = ppn; + thr_arg.nproc = nproc; + thr_arg.bar_count = 0; + + pthread_condattr_init(&condattr); + pthread_cond_init(&thr_arg.bar_cond, &condattr); + + pthread_mutexattr_init(&mutexattr); + pthread_mutex_init(&thr_arg.bar_lock, &mutexattr); + + char *uti_str = getenv("DISABLE_UTI"); + int uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n"); + } + + rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + /* barrier */ + pthread_mutex_lock(&thr_arg.bar_lock); + thr_arg.bar_count++; + if (thr_arg.bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_arg.bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg.bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_arg.bar_cond, &thr_arg.bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg.bar_lock); + + printf("parent,after barrier\n"); + + + reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nproc * 2); + if(!reqs) { printf("malloc failed"); goto fn_fail; } + + sbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!sbuf) { printf("malloc failed"); goto fn_fail; } + memset(sbuf, 0, sizeof(double) * ndoubles); + printf("tid=%d,pid=%d,sbuf=%p\n", syscall(__NR_gettid), getpid(), sbuf); + + rbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * ndoubles); + printf("tid=%d,pid=%d,rbuf=%p\n", syscall(__NR_gettid), getpid(), rbuf); + + print_cpu_last_executed_on(); + + /* Measure isend-wait time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NSKIP 5 +#define NPURE 30 + for (i = 0; i < NPURE + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + } + my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, 0); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_pure_l = DIFFNSEC(end, start) / NPURE; + //printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL); + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL); + + /* Measure isend-calc-wait time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NOVERALL 30 + for (i = 0; i < NOVERALL + NSKIP; i++) { + if (i == NSKIP) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + } + my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, t_pure); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_overall_l = DIFFNSEC(end, start) / NOVERALL; + //printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL); + MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL); + if (my_rank == 0) { + long t_abs = (t_pure * 2) - t_overall; + printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure); + } + + /* barrier */ + pthread_mutex_lock(&thr_arg.bar_lock); + thr_arg.bar_count--; + if (thr_arg.bar_count == 0) { + if ((rc = pthread_cond_broadcast(&thr_arg.bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg.bar_count != 0) { + if ((rc = pthread_cond_wait(&thr_arg.bar_cond, &thr_arg.bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg.bar_lock); + + + pthread_join(thr_arg.pthread, NULL); + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/007.c b/test/uti/mpi/007.c new file mode 100755 index 00000000..af31c581 --- /dev/null +++ b/test/uti/mpi/007.c @@ -0,0 +1,563 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <errno.h> + +#include <psm2.h> /* required for core PSM2 functions */ +#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */ + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define BUFFER_LENGTH 8000000 +#define CONNECT_ARRAY_SIZE 8 +void die(char *msg, int rc) { + fprintf(stderr, "%s: %d\n", msg, rc); + exit(1); +} + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* isend-calc-wait */ +void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf, int ndoubles, MPI_Request* reqs, long calc_nsec) { + int i; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + r++; + req++; + MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + s++; + req++; + } + } + fwq(calc_nsec); + MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE); +} + + +/* Helper functions to find the server's PSM2 endpoint identifier (epid). */ +psm2_epid_t find_server(int rank) { + FILE *fp = NULL; + psm2_epid_t server_epid = 0; + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + printf("PSM2 client waiting for epid mapping file to appear...\n"); + while (!fp) { + sleep(1); + fp = fopen(fn, "r"); + } + fscanf(fp, "%lx", &server_epid); + fclose(fp); + printf("PSM2 client found server epid = 0x%lx\n", server_epid); + return server_epid; +} + +void write_epid_to_file(int rank, psm2_epid_t myepid) { + FILE *fp; + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + fp = fopen(fn, "w"); + if (!fp) { + fprintf(stderr, + "Exiting, couldn't write server's epid mapping file: "); + die(strerror(errno), errno); + } + fprintf(fp, "0x%lx", myepid); + fclose(fp); + printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid); + return; +} + +int psm2_sendrecv(int rank, int sender, int receiver) { + struct psm2_ep_open_opts o; + psm2_uuid_t uuid; + psm2_ep_t myep; + psm2_epid_t myepid; + psm2_epid_t server_epid; + psm2_epid_t epid_array[CONNECT_ARRAY_SIZE]; + int epid_array_mask[CONNECT_ARRAY_SIZE]; + psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE]; + psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE]; + int rc; + int ver_major = PSM2_VERNO_MAJOR; + int ver_minor = PSM2_VERNO_MINOR; + char msgbuf[BUFFER_LENGTH]; + psm2_mq_t q; + psm2_mq_req_t req_mq; + memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */ +/* Try to initialize PSM2 with the requested library version. + * * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR + * * as defined in the PSM2 headers, ensure that we are linking with + * * the same version of PSM2 as we compiled against. */ + + if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) { + die("couldn't init", rc); + } + printf("PSM2 init done.\n"); + /* Setup the endpoint options struct */ + if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) { + die("couldn't set default opts", rc); + } + printf("PSM2 opts_get_defaults done.\n"); + /* Attempt to open a PSM2 endpoint. This allocates hardware resources. */ + if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) { + die("couldn't psm2_ep_open()", rc); + } + printf("PSM2 endpoint open done.\n"); + int is_server = (rank == receiver) ? 1 : 0; + if (is_server) { + write_epid_to_file(rank, myepid); + } else { + server_epid = find_server(receiver); + } + if (is_server) { + /* Server does nothing here. A connection does not have to be + * * established to receive messages. */ + printf("PSM2 server up.\n"); + } else { + /* Setup connection request info */ + /* PSM2 can connect to a single epid per request, + * * or an arbitrary number of epids in a single connect call. + * * For this example, use part of an array of + * * connection requests. */ + memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE); + epid_array[0] = server_epid; + epid_array_mask[0] = 1; + /* Begin the connection process. + * * note that if a requested epid is not responding, + * * the connect call will still return OK. + * * The errors array will contain the state of individual + * * connection requests. */ + if ((rc = psm2_ep_connect(myep, + CONNECT_ARRAY_SIZE, + epid_array, + epid_array_mask, + epid_connect_errors, + epaddr_array, + 0 /* no timeout */ + )) != PSM2_OK) { + die("couldn't ep_connect", rc); + } + printf("PSM2 connect request processed.\n"); + /* Now check if our connection to the server is ready */ + if (epid_connect_errors[0] != PSM2_OK) { + die("couldn't connect to server", + epid_connect_errors[0]); + } + printf("PSM2 client-server connection established.\n"); + } + /* Setup our PSM2 message queue */ + if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q)) + != PSM2_OK) { + die("couldn't initialize PSM2 MQ", rc); + } + printf("PSM2 MQ init done.\n"); + if (is_server) { + psm2_mq_tag_t t = {0xABCD}; + psm2_mq_tag_t tm = {-1}; + /* Post the receive request */ + if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR, + &t, /* message tag */ + &tm, /* message tag mask */ + 0, /* no flags */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_irecv()", rc); + } + printf("PSM2 MQ irecv() posted\n"); + /* Wait until the message arrives */ + if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) { + die("couldn't wait for the irecv", rc); + } + printf("PSM2 MQ wait() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + unlink("psm2-demo-server-epid"); + } else { + /* Say hello */ + snprintf(msgbuf, BUFFER_LENGTH, + "Hello world from epid=0x%lx, pid=%d.\n", + myepid, getpid()); + psm2_mq_tag_t t = {0xABCD}; + if ((rc = psm2_mq_send2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + } + printf("PSM2 MQ send() done.\n"); + } +/* Close down the MQ */ + if ((rc = psm2_mq_finalize(q)) != PSM2_OK) { + die("couldn't psm2_mq_finalize()", rc); + } + printf("PSM2 MQ finalized.\n"); +/* Close our ep, releasing all hardware resources. + * * Try to close all connections properly */ + if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL, + 0 /* no timeout */)) != PSM2_OK) { + die("couldn't psm2_ep_close()", rc); + } + printf("PSM2 ep closed.\n"); + /* Release all local PSM2 resources */ + if ((rc = psm2_finalize()) != PSM2_OK) { + die("couldn't psm2_finalize()", rc); + } + printf("PSM2 shut down, exiting.\n"); + return 0; +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +struct thr_arg { + volatile int bar_count; /* Barrier before entering loop */ + pthread_mutex_t bar_lock; + pthread_cond_t bar_cond; + pthread_t pthread; + int rank; + int ppn; + int nproc; +}; + +struct thr_arg thr_arg; + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc); + } + + printf("progress,enter\n"); + + /* barrier */ + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count++; + if (thr_arg->bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_arg->bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg->bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_arg->bar_cond, &thr_arg->bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg->bar_lock); + + printf("progress,after barrier\n"); +#if 1 + for (i = 0; i < thr_arg->nproc; i++) { + if (!on_same_node(thr_arg->ppn, thr_arg->rank, i)) { + if (thr_arg->rank < i) { + psm2_sendrecv(thr_arg->rank, thr_arg->rank, i); + } else { + psm2_sendrecv(thr_arg->rank, i, thr_arg->rank); + } + } + } +#endif + + /* barrier */ + pthread_mutex_lock(&thr_arg->bar_lock); + thr_arg->bar_count--; + if (thr_arg->bar_count == 0) { + if ((rc = pthread_cond_broadcast(&thr_arg->bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg->bar_count != 0) { + if ((rc = pthread_cond_wait(&thr_arg->bar_cond, &thr_arg->bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg->bar_lock); + + + printf("progress,exit\n"); + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int actual; + int nproc; + int ppn = -1; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *sbuf, *rbuf; + MPI_Request* reqs; + struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + char *rank_str = getenv("PMI_RANK"); + if (!rank_str) { + printf("getenv failed\n"); + exit(1); + } + my_rank = atoi(rank_str); + nproc = 2; + + if (my_rank == 0) { + printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + /* Spawn a thread */ + thr_arg.rank = my_rank; + thr_arg.ppn = ppn; + thr_arg.nproc = nproc; + thr_arg.bar_count = 0; + + pthread_condattr_init(&condattr); + pthread_cond_init(&thr_arg.bar_cond, &condattr); + + pthread_mutexattr_init(&mutexattr); + pthread_mutex_init(&thr_arg.bar_lock, &mutexattr); + + char *uti_str = getenv("DISABLE_UTI"); + int uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n"); + } + + rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + /* barrier */ + pthread_mutex_lock(&thr_arg.bar_lock); + thr_arg.bar_count++; + if (thr_arg.bar_count == 2) { + if ((rc = pthread_cond_broadcast(&thr_arg.bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg.bar_count != 2) { + if ((rc = pthread_cond_wait(&thr_arg.bar_cond, &thr_arg.bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg.bar_lock); + + printf("parent,after barrier\n"); + + + print_cpu_last_executed_on(); + + /* barrier */ + pthread_mutex_lock(&thr_arg.bar_lock); + thr_arg.bar_count--; + if (thr_arg.bar_count == 0) { + if ((rc = pthread_cond_broadcast(&thr_arg.bar_cond))) { + printf("pthread_cond_broadcast failed,rc=%d\n", rc); + } + } + while (thr_arg.bar_count != 0) { + if ((rc = pthread_cond_wait(&thr_arg.bar_cond, &thr_arg.bar_lock))) { + printf("pthread_cond_wait failed,rc=%d\n", rc); + } + } + pthread_mutex_unlock(&thr_arg.bar_lock); + + + pthread_join(thr_arg.pthread, NULL); + + fn_exit: + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/008.c b/test/uti/mpi/008.c new file mode 100755 index 00000000..6db6e3ae --- /dev/null +++ b/test/uti/mpi/008.c @@ -0,0 +1,589 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <errno.h> + +#include <psm2.h> /* required for core PSM2 functions */ +#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */ + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define BUFFER_LENGTH /*8000000*/(1ULL<<12) +#define CONNECT_ARRAY_SIZE 8 +void die(char *msg, int rc) { + fprintf(stderr, "%s: %d\n", msg, rc); + fflush(stderr); +} + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* Helper functions to find the server's PSM2 endpoint identifier (epid). */ +psm2_epid_t find_server(int rank) { + FILE *fp = NULL; + psm2_epid_t server_epid = 0; + char fn[256]; + printf("%s: enter\n", __FUNCTION__); fflush(stdout); + + sprintf(fn, "psm2-demo-server-epid-%d", rank); + printf("PSM2 client waiting for epid mapping file to appear...\n"); fflush(stdout); + while (!fp) { + usleep(250*1000); + fp = fopen(fn, "r"); + } + fscanf(fp, "%lx", &server_epid); + fclose(fp); + printf("PSM2 client found server epid = 0x%lx\n", server_epid); + return server_epid; +} + +void write_epid_to_file(int rank, psm2_epid_t myepid) { + FILE *fp; + char fn[256]; + printf("%s: enter\n", __FUNCTION__); + sprintf(fn, "psm2-demo-server-epid-%d", rank); + fp = fopen(fn, "w"); + if (!fp) { + fprintf(stderr, + "Exiting, couldn't write server's epid mapping file: "); + die(strerror(errno), errno); + } + fprintf(fp, "0x%lx", myepid); + fclose(fp); + printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid); + return; +} + +psm2_uuid_t uuid; +psm2_ep_t myep; +psm2_epid_t myepid; +psm2_epid_t server_epid; +psm2_epid_t epid_array[CONNECT_ARRAY_SIZE]; +int epid_array_mask[CONNECT_ARRAY_SIZE]; +psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE]; +psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE]; + +int my_psm2_init(int my_rank, int server_rank) { + struct psm2_ep_open_opts o; + int rc; + int ver_major = PSM2_VERNO_MAJOR; + int ver_minor = PSM2_VERNO_MINOR; + memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */ +/* Try to initialize PSM2 with the requested library version. + * * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR + * * as defined in the PSM2 headers, ensure that we are linking with + * * the same version of PSM2 as we compiled against. */ + + if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) { + die("couldn't init", rc); + } + printf("PSM2 init done.\n"); + /* Setup the endpoint options struct */ + if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) { + die("couldn't set default opts", rc); + } + printf("PSM2 opts_get_defaults done.\n"); + /* Attempt to open a PSM2 endpoint. This allocates hardware resources. */ + if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) { + die("couldn't psm2_ep_open()", rc); + } + printf("PSM2 endpoint open done.\n"); + + return 0; +} + +psm2_mq_t q; + +int my_psm2_connect(int my_rank, int server_rank) { + int rc; + int is_server = (my_rank == server_rank) ? 1 : 0; + printf("%s: enter\n", __FUNCTION__); fflush(stdout); + if (is_server) { + write_epid_to_file(my_rank, myepid); + } else { + server_epid = find_server(server_rank); + } + printf("%s: epid exchange done\n", __FUNCTION__); fflush(stdout); + if (is_server) { + /* Server does nothing here. A connection does not have to be + * * established to receive messages. */ + printf("PSM2 server up.\n"); + } else { + /* Setup connection request info */ + /* PSM2 can connect to a single epid per request, + * * or an arbitrary number of epids in a single connect call. + * * For this example, use part of an array of + * * connection requests. */ + memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE); + epid_array[0] = server_epid; + epid_array_mask[0] = 1; + /* Begin the connection process. + * * note that if a requested epid is not responding, + * * the connect call will still return OK. + * * The errors array will contain the state of individual + * * connection requests. */ + printf("calling ep_connect\n"); + int count = 0; + while ((rc = psm2_ep_connect(myep, + CONNECT_ARRAY_SIZE, + epid_array, + epid_array_mask, + epid_connect_errors, + epaddr_array, + 1 /* 0.5 sec timeout */ + )) != PSM2_OK) { + struct timespec ts = { .tv_sec = 0, .tv_nsec = 500*1000*1000 }; + nanosleep(&ts, NULL); + printf("."); fflush(stdout); + count++; + if (count > 30) { + break; + } + } + + if (rc != PSM2_OK) { + printf("psm2_ep_connect timed-out\n"); + return -1; + } + + printf("PSM2 connect request processed.\n"); + /* Now check if our connection to the server is ready */ + if (epid_connect_errors[0] != PSM2_OK) { + die("couldn't connect to server", epid_connect_errors[0]); + return -1; + } + printf("PSM2 client-server connection established.\n"); + } + + /* Setup our PSM2 message queue */ + if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q)) + != PSM2_OK) { + die("couldn't initialize PSM2 MQ", rc); + } + printf("PSM2 MQ init done.\n"); + + return 0; +} +char msgbuf[BUFFER_LENGTH]; + +int my_psm2_sendrecv(int rank, int sender, int receiver) { + int is_server = (rank == receiver) ? 1 : 0; + int rc; + psm2_mq_req_t req_mq; + //char msgbuf[BUFFER_LENGTH]; + + register long rsp asm ("rsp"); + printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf); fflush(stdout); + + memset(msgbuf, 0, BUFFER_LENGTH); + + if (is_server) { + psm2_mq_tag_t t = {0xABCD}; + psm2_mq_tag_t tm = {-1}; + /* Post the receive request */ + if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR, + &t, /* message tag */ + &tm, /* message tag mask */ + 0, /* no flags */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_irecv()", rc); + } + printf("PSM2 MQ irecv() posted\n"); + +#if 0 + /* Wait until the message arrives */ + if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) { + die("couldn't wait for the irecv", rc); + } + printf("PSM2 MQ wait() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + + if (is_server) { + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + unlink(fn); + } +#else + int count = 0; + while ((rc = psm2_mq_ipeek(q, &req_mq, NULL)) != PSM2_OK) { + struct timespec ts = { .tv_sec = 0, .tv_nsec = 500*1000*1000 }; + nanosleep(&ts, NULL); + printf("."); fflush(stdout); + count++; + if (count > 2) { + break; + } + } + if (rc == PSM2_OK) { + if ((rc = psm2_mq_test(&req_mq, NULL)) != PSM2_OK) { + printf("psm2_mq_test failed\n"); + } else { + printf("PSM2 MQ test() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + } + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + unlink(fn); + } else { + printf("PSM2 MQ test() timed-out.\n"); + } +#endif + } else { + /* Say hello */ + snprintf(msgbuf, BUFFER_LENGTH, + "Hello world from epid=0x%lx, pid=%d.\n", + myepid, getpid()); + psm2_mq_tag_t t = {0xABCD}; +#if 0 + if ((rc = psm2_mq_send2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + } + printf("PSM2 MQ send() done.\n"); +#else + if ((rc = psm2_mq_isend2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + } + printf("PSM2 MQ isend() posted\n"); + + int count = 0; + while ((rc = psm2_mq_ipeek2(q, &req_mq, NULL)) != PSM2_OK) { + struct timespec ts = { .tv_sec = 0, .tv_nsec = 500*1000*1000 }; + nanosleep(&ts, NULL); + printf("."); fflush(stdout); + count++; + if (count > 30) { + break; + } + } + if (rc == PSM2_OK) { + if ((rc = psm2_mq_test2(&req_mq, NULL)) != PSM2_OK) { + printf("PSM2 MQ test() failed.\n"); + } else { + printf("PSM2 MQ test() done.\n"); + } + } else { + printf("PSM2 MQ test() timeout.\n"); + } +#endif + } +/* Close down the MQ */ + if ((rc = psm2_mq_finalize(q)) != PSM2_OK) { + die("couldn't psm2_mq_finalize()", rc); + } + printf("PSM2 MQ finalized.\n"); +/* Close our ep, releasing all hardware resources. + * * Try to close all connections properly */ + if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL, + 0 /* no timeout */)) != PSM2_OK) { + die("couldn't psm2_ep_close()", rc); + } + printf("PSM2 ep closed.\n"); + /* Release all local PSM2 resources */ + if ((rc = psm2_finalize()) != PSM2_OK) { + die("couldn't psm2_finalize()", rc); + } + printf("PSM2 shut down, exiting.\n"); + return 0; +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +struct thr_arg { + pthread_barrier_t bar; + pthread_t pthread; + int rank; + int ppn; + int nproc; +}; + +struct thr_arg thr_arg; + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc); + } + + printf("progress,enter\n"); + + pthread_barrier_wait(&thr_arg->bar); + +#if 1 + for (i = 0; i < thr_arg->nproc; i++) { + if (!on_same_node(thr_arg->ppn, thr_arg->rank, i)) { + if (thr_arg->rank < i) { + my_psm2_sendrecv(thr_arg->rank, thr_arg->rank, i); + } else { + my_psm2_sendrecv(thr_arg->rank, i, thr_arg->rank); + } + } + } +#endif + + pthread_barrier_wait(&thr_arg->bar); + + +#if 0 + printf("progress,entering infinite loop\n"); + while(1) { } +#endif + printf("progress,returning\n"); + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int actual; + int nproc; + int ppn = -1; + int my_rank = -1, size = -1; + int i, j; + struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + pthread_barrierattr_t barrierattr; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+P:", options, NULL)) != -1) { + switch (opt) { + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ppn == -1) { + printf("specify ppn with --ppn"); + exit(1); + } + + char *rank_str = getenv("PMI_RANK"); + if (!rank_str) { + printf("getenv failed\n"); + exit(1); + } + my_rank = atoi(rank_str); + printf("my_rank=%d\n", my_rank); fflush(stdout); + + nproc = 2; + + if (my_rank == 0) { + printf("tid=%d,pid=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + int server_rank = ppn + (my_rank % ppn); + my_psm2_init(my_rank, server_rank); + my_psm2_connect(my_rank, server_rank); + + /* Spawn a thread */ + thr_arg.rank = my_rank; + thr_arg.ppn = ppn; + thr_arg.nproc = nproc; + + pthread_barrierattr_init(&barrierattr); + pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc); + + char *uti_str = getenv("DISABLE_UTI"); + int uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n"); + } + + rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + pthread_barrier_wait(&thr_arg.bar); + + pthread_barrier_wait(&thr_arg.bar); + + pthread_join(thr_arg.pthread, NULL); + + fn_exit: + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/008.sh b/test/uti/mpi/008.sh new file mode 100755 index 00000000..24a310cf --- /dev/null +++ b/test/uti/mpi/008.sh @@ -0,0 +1,89 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=$HOME +UTI_MPI_TOP=${MYHOME}/project/os/mckernel/test/uti/mpi + +MCK=${MYHOME}/project/os/install +unset DISABLE_UTI + +cmdline="./008" + +stop=0 +reboot=0 +go=0 + +mck=0 +nloops=1 + +while getopts srgac:n:mdl: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=1 + ;; + c) cmdline=$OPTARG + ;; + n) ndoubles=$OPTARG + ;; + m) + mck=1 + ;; + d) export DISABLE_UTI=1 + ;; + l) nloops=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +if [ ${mck} -eq 1 ]; then + MCEXEC="${MCK}/bin/mcexec" +else + MCEXEC= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -s -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + sudo ${MCK}/sbin/mcreboot.sh -s -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +if [ ${go} -eq 1 ]; then + cd ${UTI_MPI_TOP} + make CC=gcc 008 + for i in `seq 1 ${nloops}`; do + rm -f psm2-demo-server-epid-* + #PSM2_RCVTHREAD=0 PMI_RANK=0 DISABLE_UTI=1 ${MCK}/bin/mcexec --enable-uti taskset -c 2 ./008 --ppn 1 & + PSM2_RCVTHREAD=0 PMI_RANK=1 DISABLE_UTI=0 ${MCK}/bin/mcexec --enable-uti taskset -c 3 ./008 --ppn 1 + #wait + echo =====; + echo $i; + echo =====; i=$((i+1)); + #sleep 2 + done +fi + + + diff --git a/test/uti/mpi/009.c b/test/uti/mpi/009.c new file mode 100755 index 00000000..3a1209a5 --- /dev/null +++ b/test/uti/mpi/009.c @@ -0,0 +1,537 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <errno.h> + +#include <psm2.h> /* required for core PSM2 functions */ +#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */ + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define BUFFER_LENGTH /*8000000*/(1ULL<<12) +#define CONNECT_ARRAY_SIZE 8 +void die(char *msg, int rc) { + fprintf(stderr, "%s: %d\n", msg, rc); + fflush(stderr); +} + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* isend-calc-wait */ +void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf, int ndoubles, MPI_Request* reqs, long calc_nsec) { + int i; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + r++; + req++; + MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]); + s++; + req++; + } + } + fwq(calc_nsec); + MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE); +} + + +/* Helper functions to find the server's PSM2 endpoint identifier (epid). */ +psm2_epid_t find_server(int rank) { + FILE *fp = NULL; + psm2_epid_t server_epid = 0; + char fn[256]; + printf("%s: enter\n", __FUNCTION__); fflush(stdout); + + sprintf(fn, "psm2-demo-server-epid-%d", rank); + printf("PSM2 client waiting for epid mapping file to appear...\n"); fflush(stdout); + while (!fp) { + sleep(1); + fp = fopen(fn, "r"); + } + fscanf(fp, "%lx", &server_epid); + fclose(fp); + printf("PSM2 client found server epid = 0x%lx\n", server_epid); + return server_epid; +} + +void write_epid_to_file(int rank, psm2_epid_t myepid) { + FILE *fp; + char fn[256]; + printf("%s: enter\n", __FUNCTION__); + sprintf(fn, "psm2-demo-server-epid-%d", rank); + fp = fopen(fn, "w"); + if (!fp) { + fprintf(stderr, + "Exiting, couldn't write server's epid mapping file: "); + die(strerror(errno), errno); + } + fprintf(fp, "0x%lx", myepid); + fclose(fp); + printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid); + return; +} + +psm2_uuid_t uuid; +psm2_ep_t myep; +psm2_epid_t myepid; +psm2_epid_t server_epid; +psm2_epid_t epid_array[CONNECT_ARRAY_SIZE]; +int epid_array_mask[CONNECT_ARRAY_SIZE]; +psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE]; +psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE]; + +int my_psm2_init(int my_rank, int server_rank) { + struct psm2_ep_open_opts o; + int rc; + int ver_major = PSM2_VERNO_MAJOR; + int ver_minor = PSM2_VERNO_MINOR; + + printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout); + memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */ +/* Try to initialize PSM2 with the requested library version. + * * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR + * * as defined in the PSM2 headers, ensure that we are linking with + * * the same version of PSM2 as we compiled against. */ + + if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) { + die("couldn't init", rc); + } + printf("PSM2 init done.\n"); + /* Setup the endpoint options struct */ + if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) { + die("couldn't set default opts", rc); + } + printf("PSM2 opts_get_defaults done.\n"); + /* Attempt to open a PSM2 endpoint. This allocates hardware resources. */ + if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) { + die("couldn't psm2_ep_open()", rc); + } + printf("PSM2 endpoint open done.\n"); + + return 0; +} +int my_psm2_connect(int my_rank, int server_rank) { + int rc; + int is_server = (my_rank == server_rank) ? 1 : 0; + printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout); + if (is_server) { + write_epid_to_file(my_rank, myepid); + } else { + server_epid = find_server(server_rank); + } + printf("%s: epid exchange done\n", __FUNCTION__); fflush(stdout); + if (is_server) { + /* Server does nothing here. A connection does not have to be + * * established to receive messages. */ + printf("PSM2 server up.\n"); + } else { + /* Setup connection request info */ + /* PSM2 can connect to a single epid per request, + * * or an arbitrary number of epids in a single connect call. + * * For this example, use part of an array of + * * connection requests. */ + memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE); + epid_array[0] = server_epid; + epid_array_mask[0] = 1; + /* Begin the connection process. + * * note that if a requested epid is not responding, + * * the connect call will still return OK. + * * The errors array will contain the state of individual + * * connection requests. */ + if ((rc = psm2_ep_connect(myep, + CONNECT_ARRAY_SIZE, + epid_array, + epid_array_mask, + epid_connect_errors, + epaddr_array, + 0 /* no timeout */ + )) != PSM2_OK) { + die("couldn't ep_connect", rc); + return -1; + } + printf("PSM2 connect request processed.\n"); + /* Now check if our connection to the server is ready */ + if (epid_connect_errors[0] != PSM2_OK) { + die("couldn't connect to server", epid_connect_errors[0]); + return -1; + } + printf("PSM2 client-server connection established.\n"); + } + return 0; +} +char msgbuf[BUFFER_LENGTH]; + +int my_psm2_sendrecv(int rank, int sender, int receiver) { + int is_server = (rank == receiver) ? 1 : 0; + int rc; + psm2_mq_t q; + psm2_mq_req_t req_mq; + //char msgbuf[BUFFER_LENGTH]; + + register long rsp asm ("rsp"); + printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf); fflush(stdout); + + memset(msgbuf, 0, BUFFER_LENGTH); + + /* Setup our PSM2 message queue */ + if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q)) + != PSM2_OK) { + die("couldn't initialize PSM2 MQ", rc); + } + printf("PSM2 MQ init done.\n"); + if (is_server) { + psm2_mq_tag_t t = {0xABCD}; + psm2_mq_tag_t tm = {-1}; + /* Post the receive request */ + if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR, + &t, /* message tag */ + &tm, /* message tag mask */ + 0, /* no flags */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_irecv()", rc); + } + printf("PSM2 MQ irecv() posted\n"); + /* Wait until the message arrives */ + if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) { + die("couldn't wait for the irecv", rc); + } + printf("PSM2 MQ wait() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + + if (is_server) { + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + unlink(fn); + } + } else { + /* Say hello */ + snprintf(msgbuf, BUFFER_LENGTH, + "Hello world from epid=0x%lx, pid=%d.\n", + myepid, getpid()); + psm2_mq_tag_t t = {0xABCD}; + if ((rc = psm2_mq_send2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + } + printf("PSM2 MQ send() done.\n"); + } +/* Close down the MQ */ + if ((rc = psm2_mq_finalize(q)) != PSM2_OK) { + die("couldn't psm2_mq_finalize()", rc); + } + printf("PSM2 MQ finalized.\n"); +/* Close our ep, releasing all hardware resources. + * * Try to close all connections properly */ + if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL, + 0 /* no timeout */)) != PSM2_OK) { + die("couldn't psm2_ep_close()", rc); + } + printf("PSM2 ep closed.\n"); + /* Release all local PSM2 resources */ + if ((rc = psm2_finalize()) != PSM2_OK) { + die("couldn't psm2_finalize()", rc); + } + printf("PSM2 shut down, exiting.\n"); + return 0; +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +struct thr_arg { + pthread_barrier_t bar; + pthread_t pthread; + int rank; + int ppn; + int nproc; + int server_rank; +}; + +struct thr_arg thr_arg; + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc); + } + + printf("progress,enter\n"); + + pthread_barrier_wait(&thr_arg->bar); + +#if 1 + my_psm2_init(thr_arg->rank, thr_arg->server_rank); + my_psm2_connect(thr_arg->rank, thr_arg->server_rank); + + for (i = 0; i < thr_arg->nproc; i++) { + if (!on_same_node(thr_arg->ppn, thr_arg->rank, i)) { + if (thr_arg->rank < i) { + my_psm2_sendrecv(thr_arg->rank, thr_arg->rank, i); + } else { + my_psm2_sendrecv(thr_arg->rank, i, thr_arg->rank); + } + } + } +#endif + + pthread_barrier_wait(&thr_arg->bar); + + + printf("progress,exit\n"); + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int actual; + int nproc; + int ppn = -1; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *sbuf, *rbuf; + MPI_Request* reqs; + struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + pthread_barrierattr_t barrierattr; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + char *rank_str = getenv("PMI_RANK"); + if (!rank_str) { + printf("getenv failed\n"); + exit(1); + } + my_rank = atoi(rank_str); + printf("my_rank=%d\n", my_rank); fflush(stdout); + + nproc = 2; + + if (my_rank == 0) { + printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + + /* Spawn a thread */ + thr_arg.rank = my_rank; + thr_arg.ppn = ppn; + thr_arg.nproc = nproc; + thr_arg.server_rank = ppn + (my_rank % ppn); + + pthread_barrierattr_init(&barrierattr); + pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc); + + char *uti_str = getenv("DISABLE_UTI"); + int uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n"); + } + + rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + pthread_barrier_wait(&thr_arg.bar); + + pthread_barrier_wait(&thr_arg.bar); + + pthread_join(thr_arg.pthread, NULL); + + fn_exit: + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/010.c b/test/uti/mpi/010.c new file mode 100755 index 00000000..65ed6d55 --- /dev/null +++ b/test/uti/mpi/010.c @@ -0,0 +1,508 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <errno.h> + +#include <psm2.h> /* required for core PSM2 functions */ +#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */ + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define BUFFER_LENGTH /*8000000*/(1ULL<<12) +#define CONNECT_ARRAY_SIZE 8 +void die(char *msg, int rc) { + fprintf(stderr, "%s: %d\n", msg, rc); + fflush(stderr); +} + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* Helper functions to find the server's PSM2 endpoint identifier (epid). */ +psm2_epid_t find_server(int rank) { + FILE *fp = NULL; + psm2_epid_t server_epid = 0; + char fn[256]; + printf("%s: enter\n", __FUNCTION__); fflush(stdout); + + sprintf(fn, "psm2-demo-server-epid-%d", rank); + printf("PSM2 client waiting for epid mapping file to appear...\n"); fflush(stdout); + while (!fp) { + sleep(1); + fp = fopen(fn, "r"); + } + fscanf(fp, "%lx", &server_epid); + fclose(fp); + printf("PSM2 client found server epid = 0x%lx\n", server_epid); + return server_epid; +} + +void write_epid_to_file(int rank, psm2_epid_t myepid) { + FILE *fp; + char fn[256]; + printf("%s: enter\n", __FUNCTION__); + sprintf(fn, "psm2-demo-server-epid-%d", rank); + fp = fopen(fn, "w"); + if (!fp) { + fprintf(stderr, + "Exiting, couldn't write server's epid mapping file: "); + die(strerror(errno), errno); + } + fprintf(fp, "0x%lx", myepid); + fclose(fp); + printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid); + return; +} + +psm2_uuid_t uuid; +psm2_ep_t myep; +psm2_epid_t myepid; +psm2_epid_t server_epid; +psm2_epid_t epid_array[CONNECT_ARRAY_SIZE]; +int epid_array_mask[CONNECT_ARRAY_SIZE]; +psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE]; +psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE]; + +int my_psm2_init(int my_rank, int server_rank) { + struct psm2_ep_open_opts o; + int rc; + int ver_major = PSM2_VERNO_MAJOR; + int ver_minor = PSM2_VERNO_MINOR; + + printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout); + memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */ +/* Try to initialize PSM2 with the requested library version. + * * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR + * * as defined in the PSM2 headers, ensure that we are linking with + * * the same version of PSM2 as we compiled against. */ + + if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) { + die("couldn't init", rc); + } + printf("PSM2 init done.\n"); + /* Setup the endpoint options struct */ + if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) { + die("couldn't set default opts", rc); + } + printf("PSM2 opts_get_defaults done.\n"); + /* Attempt to open a PSM2 endpoint. This allocates hardware resources. */ + if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) { + die("couldn't psm2_ep_open()", rc); + } + printf("PSM2 endpoint open done.\n"); + + return 0; +} +int my_psm2_connect(int my_rank, int server_rank) { + int rc; + int is_server = (my_rank == server_rank) ? 1 : 0; + printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout); + if (is_server) { + write_epid_to_file(my_rank, myepid); + } else { + server_epid = find_server(server_rank); + } + printf("%s: epid exchange done\n", __FUNCTION__); fflush(stdout); + if (is_server) { + /* Server does nothing here. A connection does not have to be + * * established to receive messages. */ + printf("PSM2 server up.\n"); + } else { + /* Setup connection request info */ + /* PSM2 can connect to a single epid per request, + * * or an arbitrary number of epids in a single connect call. + * * For this example, use part of an array of + * * connection requests. */ + memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE); + epid_array[0] = server_epid; + epid_array_mask[0] = 1; + /* Begin the connection process. + * * note that if a requested epid is not responding, + * * the connect call will still return OK. + * * The errors array will contain the state of individual + * * connection requests. */ + if ((rc = psm2_ep_connect(myep, + CONNECT_ARRAY_SIZE, + epid_array, + epid_array_mask, + epid_connect_errors, + epaddr_array, + 0 /* no timeout */ + )) != PSM2_OK) { + die("couldn't ep_connect", rc); + return -1; + } + printf("PSM2 connect request processed.\n"); + /* Now check if our connection to the server is ready */ + if (epid_connect_errors[0] != PSM2_OK) { + die("couldn't connect to server", epid_connect_errors[0]); + return -1; + } + printf("PSM2 client-server connection established.\n"); + } + return 0; +} +char msgbuf[BUFFER_LENGTH]; + +int my_psm2_sendrecv(int rank, int sender, int receiver) { + int is_server = (rank == receiver) ? 1 : 0; + int rc; + psm2_mq_t q; + psm2_mq_req_t req_mq; + //char msgbuf[BUFFER_LENGTH]; + + register long rsp asm ("rsp"); + printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf); fflush(stdout); + + memset(msgbuf, 0, BUFFER_LENGTH); + + /* Setup our PSM2 message queue */ + if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q)) + != PSM2_OK) { + die("couldn't initialize PSM2 MQ", rc); + } + printf("PSM2 MQ init done.\n"); + if (is_server) { + psm2_mq_tag_t t = {0xABCD}; + psm2_mq_tag_t tm = {-1}; + /* Post the receive request */ + if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR, + &t, /* message tag */ + &tm, /* message tag mask */ + 0, /* no flags */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_irecv()", rc); + } + printf("PSM2 MQ irecv() posted\n"); + /* Wait until the message arrives */ + if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) { + die("couldn't wait for the irecv", rc); + } + printf("PSM2 MQ wait() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + + if (is_server) { + char fn[256]; + sprintf(fn, "psm2-demo-server-epid-%d", rank); + unlink(fn); + } + } else { + /* Say hello */ + snprintf(msgbuf, BUFFER_LENGTH, + "Hello world from epid=0x%lx, pid=%d.\n", + myepid, getpid()); + psm2_mq_tag_t t = {0xABCD}; + if ((rc = psm2_mq_send2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + } + printf("PSM2 MQ send() done.\n"); + } +/* Close down the MQ */ + if ((rc = psm2_mq_finalize(q)) != PSM2_OK) { + die("couldn't psm2_mq_finalize()", rc); + } + printf("PSM2 MQ finalized.\n"); +/* Close our ep, releasing all hardware resources. + * * Try to close all connections properly */ + if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL, + 0 /* no timeout */)) != PSM2_OK) { + die("couldn't psm2_ep_close()", rc); + } + printf("PSM2 ep closed.\n"); + /* Release all local PSM2 resources */ + if ((rc = psm2_finalize()) != PSM2_OK) { + die("couldn't psm2_finalize()", rc); + } + printf("PSM2 shut down, exiting.\n"); + return 0; +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +struct thr_arg { + pthread_barrier_t bar; + pthread_t pthread; + int rank; + int ppn; + int nproc; + int server_rank; +}; + +struct thr_arg thr_arg; + +void *progress_fn(void *arg) { + struct thr_arg *thr_arg = (struct thr_arg *)arg; + int rc; + int i; + + rc = syscall(732); + if (rc == -1) + fprintf(stdout, "CT09100 progress_fn running on Linux OK\n"); + else { + fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc); + } + + printf("progress,enter\n"); + + pthread_barrier_wait(&thr_arg->bar); + + pthread_barrier_wait(&thr_arg->bar); + + + printf("progress,exit\n"); + return NULL; +} + +int main(int argc, char **argv) { + int rc; + int actual; + int nproc; + int ppn = -1; + int my_rank = -1, size = -1; + int i, j; + struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + pthread_barrierattr_t barrierattr; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+P:", options, NULL)) != -1) { + switch (opt) { + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ppn == -1) { + printf("specify ppn with --ppn"); + exit(1); + } + + char *rank_str = getenv("PMI_RANK"); + if (!rank_str) { + printf("getenv failed\n"); + exit(1); + } + my_rank = atoi(rank_str); + printf("my_rank=%d\n", my_rank); fflush(stdout); + + nproc = 2; + + if (my_rank == 0) { + printf("tid=%d,pid=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + + /* Spawn a thread */ + thr_arg.rank = my_rank; + thr_arg.ppn = ppn; + thr_arg.nproc = nproc; + thr_arg.server_rank = ppn + (my_rank % ppn); + + pthread_barrierattr_init(&barrierattr); + pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc); + + char *uti_str = getenv("DISABLE_UTI"); + int uti_val = uti_str ? atoi(uti_str) : 0; + if (!uti_val) { + rc = syscall(731, 1, NULL); + if (rc) { + fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc); + } else { + fprintf(stdout, "CT09003 INFO: uti available\n"); + } + } else { + fprintf(stdout, "CT09003 INFO: uti disabled\n"); + } + + rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg); + if (rc){ + fprintf(stdout, "pthread_create: %d\n", rc); + exit(1); + } + + pthread_barrier_wait(&thr_arg.bar); + + my_psm2_init(thr_arg.rank, thr_arg.server_rank); + my_psm2_connect(thr_arg.rank, thr_arg.server_rank); + + for (i = 0; i < thr_arg.nproc; i++) { + if (!on_same_node(thr_arg.ppn, thr_arg.rank, i)) { + if (thr_arg.rank < i) { + my_psm2_sendrecv(thr_arg.rank, thr_arg.rank, i); + } else { + my_psm2_sendrecv(thr_arg.rank, i, thr_arg.rank); + } + } + } + + pthread_barrier_wait(&thr_arg.bar); + + pthread_join(thr_arg.pthread, NULL); + + fn_exit: + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/011.c b/test/uti/mpi/011.c new file mode 100755 index 00000000..0cc48cb3 --- /dev/null +++ b/test/uti/mpi/011.c @@ -0,0 +1,220 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define SZENTRY_DEFAULT (65536) /* Size of one slot */ +#define NENTRY_DEFAULT 10000 /* Number of slots */ + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int src, int dest, MPI_Request* reqs, MPI_Status* status, double usec) { + int i; + if(rank == 1) { + for(i = 0; i < nentry; i++) { + MPI_Isend(sendv[i], szentry, MPI_CHAR, dest, 0, MPI_COMM_WORLD, &reqs[i]); + if (nentry > 10 && i % (nentry / 10) == 0) { + printf("s"); fflush(stdout); + } + } + MPI_Waitall(nentry, reqs, status); + printf("w\n"); fflush(stdout); + } else { + for(i = 0; i < nentry; i++) { + MPI_Irecv(recvv[i], szentry, MPI_CHAR, src, 0, MPI_COMM_WORLD, &reqs[i]); + if (nentry > 10 && i % (nentry / 10) == 0) { + printf("r"); fflush(stdout); + } + } + usleep(usec); + MPI_Waitall(nentry, reqs, status); + printf("W\n"); fflush(stdout); + } +} + +int main(int argc, char **argv) { + int my_rank = -1, size = -1; + int i, j; + char **sendv, **recvv; + MPI_Status* status; + MPI_Request* reqs; + long szentry; + long nentry; + int src, dest; + struct timespec start, end; + double diffusec; + + if(argc == 3) { + szentry = atoi(argv[1]); + nentry = atoi(argv[2]); + } else { + szentry = SZENTRY_DEFAULT; + nentry = NENTRY_DEFAULT; + } + printf("szentry=%ld,nentry=%ld\n", szentry, nentry); + + status = (MPI_Status*)malloc(sizeof(MPI_Status) * nentry); + reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nentry); + + int actual; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + printf("Thread support level is %d\n", actual); + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + src = (size + my_rank - 1) % size; + dest = (my_rank + 1) % size; + + printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest); + + sendv = malloc(sizeof(char *) * nentry); + if(!sendv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < nentry; i++) { +#if 0 + int fd; + fd = open("./file", O_RDWR); + if(fd == -1) { printf("open failed\n"); goto fn_fail; } + sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); +#else + sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif + if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]); + memset(sendv[i], 0xaa, szentry); + } + + recvv = malloc(sizeof(char *) * nentry); + if(!recvv) { printf("malloc failed"); goto fn_fail; } + for (i = 0; i < nentry; i++) { +#if 0 + int fd; + fd = open("./file", O_RDWR); + if(fd == -1) { printf("open failed\n"); goto fn_fail; } + recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); +#else + recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); +#endif + if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; } + dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]); + memset(recvv[i], 0, szentry); + } + + printf("after memset\n"); + + print_cpu_last_executed_on(); + +#pragma omp parallel for + for (i = 0; i < omp_get_num_threads(); i++) { + printf("thread_num=%d,tid=%d\n", i, syscall(SYS_gettid)); + } + + for (i = 0; i < 1; i++) { + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &start); + } + sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, 0); + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &end); + diffusec = DIFFNSEC(end, start) / (double)1000; + printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout); + } + + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &start); + } + sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, diffusec); + MPI_Barrier(MPI_COMM_WORLD); + if(my_rank == 0) { + clock_gettime(CLOCK_REALTIME, &end); + printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout); + } + } + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/012.c b/test/uti/mpi/012.c new file mode 100755 index 00000000..9510de5e --- /dev/null +++ b/test/uti/mpi/012.c @@ -0,0 +1,338 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0) +#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0) +#define FLUSH(win) do { MPI_Win_flush_local_all(win); } while(0) + + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 0 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + + if (delay_nsec < 0) { return; } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* get_acc-calc-flush_local */ +void rma(int nproc, int ppn, int rank, double *wbuf, double *rbuf, double *result, int ndoubles, MPI_Win win, long calc_nsec) { + int i, j; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + for (j = 0; j < ndoubles; j++) { + //printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n", i, j, rbuf[i * ndoubles + j], wbuf[i * ndoubles + j]); + MPI_Get_accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE, + result + i * ndoubles + j, 1, MPI_DOUBLE, + i, i * ndoubles + j, 1, MPI_DOUBLE, + MPI_SUM, win); + } + } + } + fwq(calc_nsec); + FLUSH(win); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int actual; + int ppn = -1; + int nproc; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *wbuf, *rbuf, *result; + MPI_Win win; + struct timespec start, end; + long t_flush_l, t_pure_l, t_overall_l; + long t_flush, t_pure, t_overall; + int opt; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + /* accumulate-to buffer */ + wbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!wbuf) { printf("malloc failed"); goto fn_fail; } + memset(wbuf, 0, sizeof(double) * ndoubles * nproc); + + /* read-from buffer */ + rbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * ndoubles * nproc); + + /* fetch-to buffer */ + result = malloc(sizeof(double) * ndoubles * nproc); + if(!result) { printf("malloc failed"); goto fn_fail; } + memset(result, 0, sizeof(double) * ndoubles * nproc); + + /* Expose accumulate-to buffer*/ + if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) { + printf("MPI_Win_create failed,rc=%d\n", rc); + } + + //print_cpu_last_executed_on(); + + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1); + rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1); + result[i * ndoubles + j] = (i + 1) * 100000 + (j + 1); + } + } + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + printf("result,proc=%d,j=%d,val=%f\n", i, j, result[i * ndoubles + j]); + } + } +#endif + /* Measure flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NFENCE 10 + BEGIN_EPOCH(win); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < NFENCE; i++) { + FLUSH(win); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + END_EPOCH(win); + t_flush_l = DIFFNSEC(end, start) / NFENCE; + //printf("t_flush (local): %ld usec\n", t_flush_l / 1000UL); + MPI_Allreduce(&t_flush_l, &t_flush, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_flush (max): %ld usec\n", t_flush / 1000UL); + + /* Measure get_acc-flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NPURE 10 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < NPURE; i++) { + BEGIN_EPOCH(win); + rma(nproc, ppn, my_rank, wbuf, rbuf, result, ndoubles, win, 0); + END_EPOCH(win); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_pure_l = DIFFNSEC(end, start) / NPURE; + //printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL); + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL); + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + printf("result,proc=%d,j=%d,val=%f\n", i, j, result[i * ndoubles + j]); + } + } +#endif + + /* Measure get_acc-calc-flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NOVERALL 10 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < NOVERALL; i++) { + BEGIN_EPOCH(win); + rma(nproc, ppn, my_rank, wbuf, rbuf, result, ndoubles, win, t_pure - t_flush); + END_EPOCH(win); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_overall_l = DIFFNSEC(end, start) / NOVERALL; + //printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL); + MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL); + if (my_rank == 0) { + long t_abs = (t_pure * 2) - t_overall; + printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure); +} + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/012.sh b/test/uti/mpi/012.sh new file mode 100755 index 00000000..e5ff9bb6 --- /dev/null +++ b/test/uti/mpi/012.sh @@ -0,0 +1,174 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=/work/gg10/e29005 +UTI_MPI_TOP=${MYHOME}/project/os/mckernel/test/uti/mpi + +MCK=${MYHOME}/project/os/install +unset DISABLE_UTI + +stop=0 +reboot=0 +go=0 + +async=0 +mck=0 +nnodes=2 +LASTNODE=8200 +ndoubles=10 #12-15 +omp_num_threads=1 +ppn=1 #16 +async_progress_pin=64,132,200,268,65,133,201,269,66,134,202,270,67,135,203,271 +lpp=4 # logical-per-physical +ncpu_mt=256 # number of CPUs for main-thread + +while getopts srga:c:n:mdl:N:P:o: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=$OPTARG + ;; + n) ndoubles=$OPTARG + ;; + m) mck=1 + ;; + d) export DISABLE_UTI=1 + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +nprocs=$((ppn * nnodes)) +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'` +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes + +if [ ${mck} -eq 1 ]; then + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 4)) + mcexecopt="--uti-thread-rank=$uti_thread_rank" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + mcexecopt="-n $ppn -t $nmcexecthr $mcexecopt" +else + mcexec= + mcexecopt= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +else + # Let each domain have all logical cores and use KMP_AFFINITY=scatter if you want to use only physical cores + i_mpi_pin=on + if [ $((omp_num_threads * lpp * ppn)) -le $ncpu_mt ]; then + domain=$((omp_num_threads * lpp)) # Prefer physical but adjacent physicals share L1 + else + domain=$((ncpu_mt / ppn)) # Use logical as well + fi + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" +fi + +if [ $async -eq 0 ] || [ "$async_progress_pin" == "" ] ; then + i_mpi_async_progress_pin= +else + i_mpi_async_progress_pin="export I_MPI_ASYNC_PROGRESS_PIN=$async_progress_pin" +fi + +if [ ${stop} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo mount /work + + if [ ${mck} -eq 1 ]; then + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo mount /work + + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -s -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + sudo ${MCK}/sbin/mcreboot.sh -s -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +cd ${UTI_MPI_TOP} +( +cat <<EOF +#!/bin/sh + +export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh +export I_MPI_HYDRA_BOOTSTRAP=ssh + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 +export PSM2_RCVTHREAD=0 + +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_domain +$i_mpi_pin_order + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +export KMP_AFFINITY=granularity=thread,scatter +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=$async +$i_mpi_async_progress_pin + +#export I_MPI_STATS=native:20,ipm +export I_MPI_STATS=ipm +export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +mpiexec.hydra -l -n $nprocs -ppn $ppn -hosts $nodes $ilpopt $mcexec $mcexecopt `pwd`/012 --ppn $ppn -d $ndoubles + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + cd ${UTI_MPI_TOP} + if [ $mck -eq 1 ]; then + make clean && make 012 + else + make clean && make CC=mpiicc 012 + fi + ./job.sh +fi + + + diff --git a/test/uti/mpi/013.c b/test/uti/mpi/013.c new file mode 100755 index 00000000..0f3bc2b1 --- /dev/null +++ b/test/uti/mpi/013.c @@ -0,0 +1,335 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0) +#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0) + + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 0 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __FUNCTION__); + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + + if (delay_nsec < 0) { return; } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + while (1) { + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on OFP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +static inline int on_same_node(int ppn, int me, int you) { + return (me / ppn == you / ppn); +} + +/* get_acc-calc-flush_local */ +void rma(int nproc, int ppn, int rank, double *wbuf, double *rbuf, int ndoubles, MPI_Win win, long calc_nsec, int flush_only) { + int i, j; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (!on_same_node(ppn, rank, i)) { + for (j = 0; j < ndoubles; j++) { + //printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n", i, j, rbuf[i * ndoubles + j], wbuf[i * ndoubles + j]); + if (!flush_only) { + MPI_Accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE, + i, i * ndoubles + j, 1, MPI_DOUBLE, + MPI_SUM, win); + } + MPI_Win_flush_local(i, win); + } + } + } + fwq(calc_nsec); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int actual; + int ppn = -1; + int nproc; + int ndoubles = -1; + int my_rank = -1, size = -1; + int i, j; + double *wbuf, *rbuf; + MPI_Win win; + struct timespec start, end; + long t_flush_l, t_pure_l, t_overall_l; + long t_flush, t_pure, t_overall; + int opt; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = (1ULL << atoi(optarg)); + break; + case 'P': + ppn = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc); + printf("nsec=%ld, nspw=%f\n", nsec, nspw); + } + + /* accumulate-to buffer */ + wbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!wbuf) { printf("malloc failed"); goto fn_fail; } + memset(wbuf, 0, sizeof(double) * ndoubles * nproc); + + /* read-from buffer */ + rbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * ndoubles * nproc); + + /* Expose accumulate-to buffer*/ + if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) { + printf("MPI_Win_create failed,rc=%d\n", rc); + } + + //print_cpu_last_executed_on(); + + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1); + rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1); + } + } + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + } + } +#endif + + /* Measure flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NFENCE 10 + BEGIN_EPOCH(win); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + for (i = 0; i < NFENCE; i++) { + rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0, 1); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + END_EPOCH(win); + t_flush_l = DIFFNSEC(end, start) / NFENCE; + //printf("t_flush (local): %ld usec\n", t_flush_l / 1000UL); + MPI_Allreduce(&t_flush_l, &t_flush, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_flush (max): %ld usec\n", t_flush / 1000UL); + + /* Measure get_acc-flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NPURE 10 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + //MPI_Pcontrol(1, "rma"); + for (i = 0; i < NPURE; i++) { + BEGIN_EPOCH(win); + rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0, 0); + END_EPOCH(win); + } + //MPI_Pcontrol(-1, "rma"); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_pure_l = DIFFNSEC(end, start) / NPURE; + //printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL); + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL); + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + } + } +#endif + + /* Measure get_acc-calc-flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NOVERALL 10 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + //MPI_Pcontrol(1, "rma-calc"); + for (i = 0; i < NOVERALL; i++) { + BEGIN_EPOCH(win); + rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, t_pure - t_flush, 0); + END_EPOCH(win); + } + //MPI_Pcontrol(-1, "rma-calc"); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + t_overall_l = DIFFNSEC(end, start) / NOVERALL; + //printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL); + MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL); + if (my_rank == 0) { + long t_abs = (t_pure * 2) - t_overall; + printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure); +} + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/013.sh b/test/uti/mpi/013.sh new file mode 100755 index 00000000..56edfe86 --- /dev/null +++ b/test/uti/mpi/013.sh @@ -0,0 +1,176 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=/work/gg10/e29005 +UTI_MPI_TOP=${MYHOME}/project/os/mckernel/test/uti/mpi + +MCK=${MYHOME}/project/os/install +unset DISABLE_UTI + +stop=0 +reboot=0 +go=0 + +async=0 +mck=0 +nnodes=4 +LASTNODE=8200 +ndoubles=10 #12-15 +omp_num_threads=1 +ppn=16 #16 +async_progress_pin=64,132,200,268,65,133,201,269,66,134,202,270,67,135,203,271 +lpp=4 # logical-per-physical +ncpu_mt=256 # number of CPUs for main-thread +exe=`basename $0 | sed 's/\.sh$//'` + +while getopts srga:c:n:mdl:N:P:o: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=$OPTARG + ;; + n) ndoubles=$OPTARG + ;; + m) mck=1 + ;; + d) export DISABLE_UTI=1 + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +nprocs=$((ppn * nnodes)) +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'` +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes + +if [ ${mck} -eq 1 ]; then + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 4)) + mcexecopt="--uti-thread-rank=$uti_thread_rank" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + mcexecopt="-n $ppn -t $nmcexecthr $mcexecopt" +else + mcexec= + mcexecopt= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +else + # Let each domain have all logical cores and use KMP_AFFINITY=scatter if you want to use only physical cores + i_mpi_pin=on + if [ $((omp_num_threads * lpp * ppn)) -le $ncpu_mt ]; then + domain=$((omp_num_threads * lpp)) # Prefer physical but adjacent physicals share L1 + else + domain=$((ncpu_mt / ppn)) # Use logical as well + fi + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" +fi + +if [ $async -eq 0 ] || [ "$async_progress_pin" == "" ] ; then + i_mpi_async_progress_pin= +else + i_mpi_async_progress_pin="export I_MPI_ASYNC_PROGRESS_PIN=$async_progress_pin" +fi + +if [ ${stop} -eq 1 ]; then + + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo mount /work + + if [ ${mck} -eq 1 ]; then + sudo ${MCK}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo mount /work + + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + sudo ${MCK}/sbin/mcreboot.sh -s -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + sudo ${MCK}/sbin/mcreboot.sh -s -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +cd ${UTI_MPI_TOP} +( +cat <<EOF +#!/bin/sh + +export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh +export I_MPI_HYDRA_BOOTSTRAP=ssh + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 +export PSM2_RCVTHREAD=0 + +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_domain +$i_mpi_pin_order + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +export KMP_AFFINITY=granularity=thread,scatter +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=$async +$i_mpi_async_progress_pin + +#export I_MPI_STATS=native:20,ipm +export I_MPI_STATS=ipm +#export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +mpiexec.hydra -l -n $nprocs -ppn $ppn -hosts $nodes $ilpopt $mcexec $mcexecopt `pwd`/$exe --ppn $ppn -d $ndoubles + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + cd ${UTI_MPI_TOP} + if [ $mck -eq 1 ]; then + make clean && make $exe + else + make clean && make CC=mpiicc $exe + fi + ./job.sh +fi + + + diff --git a/test/uti/mpi/014.c b/test/uti/mpi/014.c new file mode 100755 index 00000000..6fa95045 --- /dev/null +++ b/test/uti/mpi/014.c @@ -0,0 +1,242 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <getopt.h> +#include <sched.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "async_progress.h" +#include "util.h" + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +static struct option options[] = { + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int actual; + int nproc; + int nsamples = -1; + int my_rank = -1, size = -1; + int i, j, k, l, m; + double *wbuf, *rbuf, *result; + MPI_Win win; + long start, end; + long t_pure_l, t_pure, t_pure0 = 0; + int opt; + int szbuf = 8; + struct rusage ru_start, ru_end; + struct timeval tv_start, tv_end; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+n:", options, NULL)) != -1) { + switch (opt) { + case 'n': + nsamples = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (nsamples == -1) { + printf("specify nsamples with -n"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("nsamples=%d,nproc=%d\n", nsamples, nproc); + } + + /* accumulate-to buffer */ + wbuf = malloc(sizeof(double) * szbuf); + if(!wbuf) { printf("malloc failed"); goto fn_fail; } + memset(wbuf, 0, sizeof(double) * szbuf); + + /* read-from buffer */ + rbuf = malloc(sizeof(double) * szbuf); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * szbuf); + + /* fetch-to buffer */ + result = malloc(sizeof(double) * szbuf); + if(!result) { printf("malloc failed"); goto fn_fail; } + memset(result, 0, sizeof(double) * szbuf); + + /* Expose accumulate-to buffer*/ + if (rc = MPI_Win_create(wbuf, sizeof(double) * szbuf, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) { + printf("MPI_Win_create failed,rc=%d\n", rc); + } + + for (j = 0; j < szbuf; j++) { + wbuf[j] = j + 1; + rbuf[j] = 10000 + j + 1; + result[j] = 100000 + j + 1; + } + +#if 0 + for (j = 0; j < szbuf; j++) { + printf("wbuf,j=%d,val=%f\n", j, wbuf[j]); + printf("rbuf,j=%d,val=%f\n", j, rbuf[j]); + printf("result,j=%d,val=%f\n", j, result[j]); + } + } +#endif + + for (k = 0; k < 2; k++) { + + if (k == 1) { + + print_cpu_last_executed_on("main"); + + INIT_ASYNC_THREAD_(); + + if ((rc = getrusage(RUSAGE_THREAD, &ru_start))) { + printf("%s: ERROR: getrusage failed (%d)\n", __FUNCTION__, rc); + } + + if ((rc = gettimeofday(&tv_start, NULL))) { + printf("%s: ERROR: gettimeofday failed (%d)\n", __FUNCTION__, rc); + } + + syscall(701, 1 | 2 | 0x80000000); + } + + for (m = 0; m < 3; m++) { + + for (l = 0; l <= 10; l++) { + long calc_cyc = /*(k == 1 && l == 0) ? (double)t_pure0 * 0.1 :*/ t_pure0 / 10 * l; + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Win_lock_all(0, win); + //clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + + start = rdtsc_light(); + for (j = 0; j < nsamples; j++) { + for (i = 0; i < nproc; i++) { + int target = j % nproc; + if (target == my_rank) { + continue; + } +#if 0 + MPI_Get_accumulate(rbuf + j % szbuf, 1, MPI_DOUBLE, + result + j % szbuf, 1, MPI_DOUBLE, + i, + j % szbuf, 1, MPI_DOUBLE, + MPI_SUM, win); +#endif +#if 1 + MPI_Get_accumulate(rbuf, szbuf, MPI_DOUBLE, + result, szbuf, MPI_DOUBLE, + i, + 0, szbuf, MPI_DOUBLE, + MPI_SUM, win); +#endif +#if 0 + MPI_Accumulate(rbuf, szbuf, MPI_DOUBLE, + i, + 0, szbuf, MPI_DOUBLE, + MPI_SUM, win); +#endif +#if 0 + MPI_Get(rbuf + j % szbuf, 1, MPI_DOUBLE, + i, + j % szbuf, 1, MPI_DOUBLE, + win); +#endif + } + } + fwq(calc_cyc * nsamples); + MPI_Win_flush_local_all(win); + end = rdtsc_light(); + + //clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + MPI_Win_unlock_all(win); + MPI_Barrier(MPI_COMM_WORLD); + t_pure_l = (end - start) / nsamples; + //t_pure_l = DIFFNSEC(end, start) / nsamples; + + if (1||m == 2) { + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) { + if (l == 0) { + printf("async: %d, trial: %d\n", k, m); + } + if (k == 0) { + printf("%ld\t%ld\n", calc_cyc, t_pure); + } else { + printf("%ld\n", t_pure); + } + } + } + + if (k == 0 && l == 0) { + t_pure0 = t_pure; + } +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < sbuf; j++) { + printf("wbuf,j=%d,val=%f\n", j, wbuf[j]); + printf("rbuf,j=%d,val=%f\n", j, rbuf[j]); + printf("result,j=%d,val=%f\n", j, result[j]); + } + } +#endif + } + } + + if (k == 1) { + FINALIZE_ASYNC_THREAD_(); + +#if 0 + if ((rc = getrusage(RUSAGE_THREAD, &ru_end))) { + printf("%s: ERROR: getrusage failed (%d)\n", __FUNCTION__, rc); + } + + if ((rc = gettimeofday(&tv_end, NULL))) { + printf("%s: ERROR: gettimeofday failed (%d)\n", __FUNCTION__, rc); + } + + printf("%s: wall: %ld, user: %ld, sys: %ld\n", __FUNCTION__, + DIFFUSEC(tv_end, tv_start), + DIFFUSEC(ru_end.ru_utime, ru_start.ru_utime), + DIFFUSEC(ru_end.ru_stime, ru_start.ru_stime)); + syscall(701, 4 | 8 | 0x80000000); +#endif + } + } + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/014.sh b/test/uti/mpi/014.sh new file mode 100755 index 00000000..371e3e21 --- /dev/null +++ b/test/uti/mpi/014.sh @@ -0,0 +1,191 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=/work/gg10/e29005 +UTI_MPI_TOP=${MYHOME}/project/os/mckernel/test/uti/mpi + +mck_dir=${MYHOME}/project/os/install + +exe=`basename $0 | sed 's/\.sh//'` + +stop=0 +reboot=0 +go=0 + +async=0 +mck=0 +nnodes=2 +LASTNODE=8200 +nsamples=100 #2^12-15 +use_hfi=0 +omp_num_threads=1 +ppn=4 +lpp=4 # logical-per-physical +ncpu_mt=256 # number of CPUs for main-thread +myasync=1 +async_in_mck=0 + +while getopts srga:c:n:ml:N:P:ho:A:M: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=$OPTARG + ;; + n) nsamples=$OPTARG + ;; + m) mck=1 + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + h) use_hfi=1 + ;; + o) omp_num_threads=$OPTARG + ;; + A) myasync=$OPTARG + ;; + M) async_in_mck=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +nprocs=$((ppn * nnodes)) +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'` +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes + +PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\' + +if [ $mck -eq 0 ] || [ $async_in_mck -eq 1 ]; then + export DISABLE_UTI=1 +else + unset DISABLE_UTI +fi + +if [ $mck -eq 0 ]; then + async_progress_pin=64,65,66,67,132,133,134,135,200,201,202,203,268,269,270,271 +else + async_progress_pin=`(for ((i=0;i<ppn;i++)) do printf "%d," $((i * (ncpu_mt / ppn) +1)); done) | sed 's/,$//'` + # same tile, different physical core +fi +echo async_progress_pin=$async_progress_pin + +if [ ${mck} -eq 1 ]; then + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 4)) + mcexecopt="--uti-thread-rank=$uti_thread_rank" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + mcexecopt="-n $ppn -t $nmcexecthr $mcexecopt" +else + mcexec= + mcexecopt= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +else + # Let each domain have all logical cores and use KMP_AFFINITY=scatter if you want to use only physical cores + i_mpi_pin=on + if [ $((omp_num_threads * lpp * ppn)) -le $ncpu_mt ]; then + domain=$((omp_num_threads * lpp)) # Prefer physical but adjacent physicals share L1 + else + domain=$((ncpu_mt / ppn)) # Use logical as well + fi + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" +fi + +if [[ ($async -eq 1 && "$async_progress_pin" != "" ) || $myasync -eq 1 ]]; then + i_mpi_async_progress_pin="export I_MPI_ASYNC_PROGRESS_PIN=$async_progress_pin" +else + i_mpi_async_progress_pin= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcreboot.sh -s -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcreboot.sh -s -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +cd ${UTI_MPI_TOP} +( +cat <<EOF +#!/bin/sh + +export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh +export I_MPI_HYDRA_BOOTSTRAP=ssh + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 +export PSM2_RCVTHREAD=0 + +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_domain +$i_mpi_pin_order + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +export KMP_AFFINITY=granularity=thread,scatter +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=$async +$i_mpi_async_progress_pin +export MY_ASYNC_PROGRESS=$myasync + +#export I_MPI_STATS=native:20,ipm +#export I_MPI_STATS=ipm +#export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +mpiexec.hydra -l -n $nprocs -ppn $ppn -hosts $nodes $ilpopt $mcexec $mcexecopt ./$exe -n $nsamples + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + . /home/opt/local/cores/intel/compilers_and_libraries_2018.1.163/linux/bin/compilervars.sh intel64 + cd ${UTI_MPI_TOP} + make ./$exe + bash -c '. /home/opt/local/cores/intel/compilers_and_libraries_2018.1.163/linux/bin/compilervars.sh intel64; ./job.sh' +fi diff --git a/test/uti/mpi/015.c b/test/uti/mpi/015.c new file mode 100755 index 00000000..cde43202 --- /dev/null +++ b/test/uti/mpi/015.c @@ -0,0 +1,346 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include "async_progress.h" + +//#define DEBUG +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf {} +#endif + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) + +#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0) +#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0) + +static inline uint64_t rdtsc_light(void ) +{ + uint64_t x; + __asm__ __volatile__("rdtscp;" /* rdtscp don't jump over earlier instructions */ + "shl $32, %%rdx;" + "or %%rdx, %%rax" : + "=a"(x) : + : + "%rcx", "%rdx", "memory"); + return x; +} + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +long cyc, cycpw; /* cycles per work */ + +void fwq_init() { + long start, end; + int i; + start = rdtsc_light(); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + end = rdtsc_light(); + cyc = end - start; + cycpw = cyc / (double)N_INIT; +} + +#if 0 +void fwq(long delay_cyc) { + if (delay_cyc < 0) { + return; + //printf("%s: delay_cyc < 0\n", __FUNCTION__); + } + bulk_fsw(delay_cyc / cycpw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_cyc) { + long start, end; + + if (delay_cyc < 0) { return; } + start = rdtsc_light(); + + while (1) { + end = rdtsc_light(); + if (end - start >= delay_cyc) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +static int print_cpu_last_executed_on() { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + +/* ga_acc per rank:ga_sync=40:1 */ +void rma(int nproc, int my_rank, double *wbuf, double *rbuf, int ndoubles, MPI_Win win, long calc_nsec) { + int i, j; + int r = 0, s = 0; + int req = 0; + for (i = 0; i < nproc; i++) { + if (i != my_rank) { + for (j = 0; j < ndoubles; j++) { + MPI_Accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE, + i, i * ndoubles + j, 1, MPI_DOUBLE, + MPI_SUM, win); + MPI_Win_flush_local(i, win); /* ga_acc() calls flush_local() immediately */ + } + } + } + fwq(calc_nsec); +} + +static struct option options[] = { + { + .name = "ppn", + .has_arg = required_argument, + .flag = NULL, + .val = 'P', + }, + /* end */ + { NULL, 0, NULL, 0, }, +}; + +int main(int argc, char **argv) { + int rc; + int actual; + int ppn = -1; + int nproc; + int ndoubles = -1; + double add_rate = 1.0; + int my_rank = -1, size = -1; + int i, j, k, l; + double *wbuf, *rbuf, *result; + MPI_Win win; + long start, end; + //struct timespec start, end; + long t_pure_l, t_overall_l; + long t_pure, t_overall; + int opt; + + fwq_init(); + + while ((opt = getopt_long(argc, argv, "+d:P:R:", options, NULL)) != -1) { + switch (opt) { + case 'd': + ndoubles = atoi(optarg); + break; + case 'P': + ppn = atoi(optarg); + break; + case 'R': + add_rate = atof(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + exit(1); + } + } + + if (ndoubles == -1 || ppn == -1) { + printf("specify ndoubles with -d and ppn with --ppn"); + exit(1); + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != 3) { + printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual); + exit(1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (my_rank == 0) { + printf("ndoubles=%d,nproc=%d,add_rate=%f\n", ndoubles, nproc, add_rate); + printf("cyc=%ld, cycpw=%ld\n", cyc, cycpw); + } + + /* accumulate-to buffer */ + wbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!wbuf) { printf("malloc failed"); goto fn_fail; } + memset(wbuf, 0, sizeof(double) * ndoubles * nproc); + + /* read-from buffer */ + rbuf = malloc(sizeof(double) * ndoubles * nproc); + if(!rbuf) { printf("malloc failed"); goto fn_fail; } + memset(rbuf, 0, sizeof(double) * ndoubles * nproc); + + /* Expose accumulate-to buffer*/ + if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) { + printf("MPI_Win_create failed,rc=%d\n", rc); + } + + //print_cpu_last_executed_on(); + + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1); + rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1); + } + } + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + } + } +#endif + + for (k = 0; k < 2; k++) { + if (k == 1) { + INIT_ASYNC_THREAD_(); + } + + /* Measure get_acc-flush time */ + MPI_Barrier(MPI_COMM_WORLD); +#define NPURE 10 + //clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + start = rdtsc_light(); + MPI_Pcontrol(1, "rma"); + syscall(701, 1); + syscall(701, 2); + for (i = 0; i < NPURE; i++) { + BEGIN_EPOCH(win); + rma(nproc, my_rank, wbuf, rbuf, ndoubles, win, 0); + END_EPOCH(win); + } + MPI_Pcontrol(-1, "rma"); + syscall(701, 4); + syscall(701, 8); + end = rdtsc_light(); + //clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + MPI_Barrier(MPI_COMM_WORLD); + t_pure_l = (end - start) / NPURE; + //t_pure_l = DIFFNSEC(end, start) / NPURE; + //printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL); + MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_pure (max): %ld cycles\n", t_pure); + + +#if 1 + for (l = 1; l <= 10; l++) { + MPI_Barrier(MPI_COMM_WORLD); +#define NOVERALL 10 + start = rdtsc_light(); + for (i = 0; i < NOVERALL; i++) { + BEGIN_EPOCH(win); + rma(nproc, my_rank, wbuf, rbuf, ndoubles, win, 100UL * 1000000 * l); + END_EPOCH(win); + } + end = rdtsc_light(); + MPI_Barrier(MPI_COMM_WORLD); + t_overall_l = (end - start) / NOVERALL; + MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD); + if (my_rank == 0) printf("t_overall (max): %ld cycle\n", t_overall); + } +#endif + + if (k == 1) { + FINALIZE_ASYNC_THREAD_(); + } + +#if 0 + for (i = 0; i < nproc; i++) { + for (j = 0; j < ndoubles; j++) { + printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]); + printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]); + printf("result,proc=%d,j=%d,val=%f\n", i, j, result[i * ndoubles + j]); + } + } +#endif + } + + fn_exit: + MPI_Finalize(); + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/uti/mpi/015.sh b/test/uti/mpi/015.sh new file mode 100755 index 00000000..719cd6ba --- /dev/null +++ b/test/uti/mpi/015.sh @@ -0,0 +1,189 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=/work/gg10/e29005 +UTI_MPI_TOP=${MYHOME}/project/os/mckernel/test/uti/mpi + +mck_dir=${MYHOME}/project/os/install + +exe=`basename $0 | sed 's/\.sh//'` + +stop=0 +reboot=0 +go=0 + +async=0 +mck=0 +nnodes=2 +LASTNODE=8200 +ndoubles=16 #2^12-15 +add_rate="1.0" +disable_uti=0 +omp_num_threads=1 +ppn=16 #16 +async_progress_pin=64,132,200,268,65,133,201,269,66,134,202,270,67,135,203,271 +lpp=4 # logical-per-physical +ncpu_mt=256 # number of CPUs for main-thread +myasync=1 +use_hfi=0 + +while getopts srga:c:n:md:l:N:P:o:A:R: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + a) async=$OPTARG + ;; + n) ndoubles=$OPTARG + ;; + m) mck=1 + ;; + d) disable_uti=$OPTARG + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + A) myasync=$OPTARG + ;; + R) add_rate=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +nprocs=$((ppn * nnodes)) +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'` +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes + +PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\' + +if [ $disable_uti -eq 1 ]; then + export DISABLE_UTI=1 +else + unset DISABLE_UTI +fi + +if [ ${mck} -eq 1 ]; then + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 4)) + mcexecopt="--uti-thread-rank=$uti_thread_rank" + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + mcexecopt="-n $ppn -t $nmcexecthr $mcexecopt" +else + mcexec= + mcexecopt= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +else + # Let each domain have all logical cores and use KMP_AFFINITY=scatter if you want to use only physical cores + i_mpi_pin=on + if [ $((omp_num_threads * lpp * ppn)) -le $ncpu_mt ]; then + domain=$((omp_num_threads * lpp)) # Prefer physical but adjacent physicals share L1 + else + domain=$((ncpu_mt / ppn)) # Use logical as well + fi + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" +fi + +if [[ ($async -eq 1 && "$async_progress_pin" != "" ) || $myasync -eq 1 ]]; then + i_mpi_async_progress_pin="export I_MPI_ASYNC_PROGRESS_PIN=$async_progress_pin" +else + i_mpi_async_progress_pin= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcreboot.sh -s -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + else + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcreboot.sh -s -c 1-15,65-79,129-143,193-207,17-31,81-95,145-159,209-223,33-47,97-111,161-175,225-239,49-63,113-127,177-191,241-255 -r 1-15:0+65-79:64+129-143:128+193-207:192+17-31:16+81-95:80+145-159:144+209-223:208+33-47:32+97-111:96+161-175:160+225-239:224+49-63:48+113-127:112+177-191:176+241-255:240 -m 12G@0,12G@1,12G@2,12G@3,3920M@4,3920M@5,3920M@6,3920M@7 + fi + else + : + fi +fi + +cd ${UTI_MPI_TOP} +( +cat <<EOF +#!/bin/sh + +export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh +export I_MPI_HYDRA_BOOTSTRAP=ssh + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 +export PSM2_RCVTHREAD=0 + +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_domain +$i_mpi_pin_order + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +export KMP_AFFINITY=granularity=thread,scatter +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=$async +$i_mpi_async_progress_pin +export MY_ASYNC_PROGRESS=$myasync + +#export I_MPI_STATS=native:20,ipm +#export I_MPI_STATS=ipm +#export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +mpiexec.hydra -l -n $nprocs -ppn $ppn -hosts $nodes $ilpopt $mcexec $mcexecopt ./$exe --ppn $ppn -d $ndoubles -R $add_rate + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + cd ${UTI_MPI_TOP} + if [ $mck -eq 1 ]; then + make $exe + else + . /home/opt/local/cores/intel/compilers_and_libraries_2018.1.163/linux/bin/compilervars.sh intel64 + make CC=mpiicc $exe + fi + ./job.sh +fi + + + diff --git a/test/uti/mpi/016.c b/test/uti/mpi/016.c new file mode 100755 index 00000000..fc83c198 --- /dev/null +++ b/test/uti/mpi/016.c @@ -0,0 +1,349 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <mpi.h> +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <getopt.h> +#include <sched.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "async_progress.h" +#include "util.h" + +#define MYTIME_UNIT "usec" +#define MYTIME_TOUSEC 1000000 +#define MYTIME_TONSEC 1000000000 + +#define NROW 16 /* 0%, 10%, ..., 140% */ +#define NCOL 4 + +#define NSAMPLES_DROP 5/*10*/ +#define NSAMPLES_COMM 10/*20*/ +#define NSAMPLES_TOTAL 10/*20*/ +#define NSAMPLES_INNER 5 + +#define PROGRESS_CALC_PHASE_ONLY + +static inline double mytime() { + return /*rdtsc_light()*/MPI_Wtime(); +} + +static int ppn = -1; + +void init_buf(double *origin_buf, double *result, double *target_buf, int szbuf, int rank, int id) { + int j; + for (j = 0; j < szbuf; j++) { + origin_buf[j] = (rank + 1) * 100.0 + (j + 1); + result[j] = (id + 1) * 100000000.0 + (rank + 1) * 10000.0 + (j + 1); + target_buf[j] = (rank + 1) * 1000000.0 + (j + 1); + } +} + +void pr_buf(double *origin_buf, double *result, double *target_buf, int szbuf, int rank, int nproc) { + int i, j; + for (i = 0; i < nproc; i++) { + MPI_Barrier(MPI_COMM_WORLD); + + if (i != rank) { + usleep(100000); + continue; + } + + for (j = 0; j < szbuf; j++) { + pr_debug("[%d] origin_buf,j=%d,val=%f\n", rank, j, origin_buf[j]); + pr_debug("[%d] result,j=%d,val=%f\n", rank, j, result[j]); + pr_debug("[%d] target_buf,j=%d,val=%f\n", rank, j, target_buf[j]); + } + } +} + +void rma(int rank, int nproc, MPI_Win win, double *origin_buf, double *result, int szbuf, long nsec_calc, int async_progress, int sync_progress, double pct_calc) { + int i, j, target_rank; + int completed, ret; + + for (j = 0; j < NSAMPLES_INNER; j++) { + for (i = 1; i < nproc; i++) { + target_rank = (rank + i) % nproc; + + MPI_Get_accumulate(origin_buf, szbuf, MPI_DOUBLE, + result, szbuf, MPI_DOUBLE, + target_rank, + 0, szbuf, MPI_DOUBLE, + MPI_NO_OP, win); +#if 0 + if (sync_progress) { + if ((ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret); + } + } +#endif + } + } + + if (async_progress) { +#ifdef PROGRESS_CALC_PHASE_ONLY + progress_start(); +#endif + } + + ndelay(nsec_calc); + + if (async_progress) { +#ifdef PROGRESS_CALC_PHASE_ONLY + progress_stop(); +#endif + } + +#define MAX2(x,y) ((x) > (y) ? (x) : (y)) + +#if 1 + /* iprobe is 10 times faster than win_flush_local_all, + 20679 usec / (8*63*5) messages for 8-ppn 8-node case */ + if (1/*!sync_progress*/) + for (j = 0; j < (async_progress ? MAX2(NSAMPLES_INNER * (nproc - 1) * (1.0 - pct_calc), nproc - 1) : NSAMPLES_INNER * (nproc - 1)); j++) { + //for (j = 0; j < MAX2(NSAMPLES_INNER * (nproc - 1) * (1.0 - pct_calc), nproc - 1); j++) { + if ((ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret); + } + } +#endif + + MPI_Win_flush_local_all(win); +} + +double measure(int rank, int nproc, MPI_Win win, double *origin_buf, double* result, double *target_buf, int szbuf, long nsec_calc, int async_progress, int sync_progress, int nsamples, int nsamples_drop, double pct_calc) { + int i; + double t_l, t_g, t_sum = 0; + double start, end; + + for (i = 0; i < nsamples + nsamples_drop; i++) { + MPI_Barrier(MPI_COMM_WORLD); + MPI_Win_lock_all(0, win); + + /* Set parameter based on current IPC and frequency */ + ndelay_init(0); + + start = mytime(); + rma(rank, nproc, win, origin_buf, result, szbuf, nsec_calc, async_progress, sync_progress, pct_calc); + end = mytime(); + + MPI_Win_unlock_all(win); + MPI_Barrier(MPI_COMM_WORLD); + + t_l = end - start; + MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + if (i < nsamples_drop) { + continue; + } + + t_sum += t_g; + } + return t_sum / nsamples; +} + +int main(int argc, char **argv) +{ + int ret; + int actual; + int rank = -1; + int nproc; + int i, j, progress, l, m; + double *target_buf, *origin_buf, *result; + MPI_Win win; + double t_comm_l, t_comm_g, t_comm_sum, t_comm_ave; + double t_total_l, t_total_g, t_total_sum, t_total_ave; + double t_table[NROW][NCOL]; + int opt; + int szbuf = 1; /* Number of doubles to send */ + struct rusage ru_start, ru_end; + struct timeval tv_start, tv_end; + int disable_syscall_intercept = 0; + + cpu_set_t cpuset; + + //test_set_loglevel(TEST_LOGLEVEL_WARN); + ndelay_init(1); + + while ((opt = getopt(argc, argv, "+p:I:")) != -1) { + switch (opt) { + case 'p': + ppn = atoi(optarg); + break; + case 'I': + disable_syscall_intercept = atoi(optarg); + break; + default: /* '?' */ + printf("unknown option %c\n", optopt); + ret = -1; + goto out; + } + } + + if (ppn == -1) { + pr_err("Error: Specify processes-per-rank with -p"); + ret = -1; + goto out; + } + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual); + if (actual != MPI_THREAD_MULTIPLE) { + pr_err("Error: MPI_THREAD_MULTIPLE is not available\n"); + ret = -1; + goto out; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + if (rank == 0) { + printf("ndoubles=%d,nproc=%d\n", szbuf, nproc); + +#pragma omp parallel + { + //printf("%d cpu\n", sched_getcpu()); + if (omp_get_thread_num() == 0) { + printf("#threads=%d\n", omp_get_num_threads()); + } + } + } + + /* accumulate-to buffer */ + target_buf = malloc(sizeof(double) * szbuf); + if (!target_buf) { + pr_err("Error: allocating target_buf"); + ret = -1; + goto out; + } + memset(target_buf, 0, sizeof(double) * szbuf); + + /* read-from buffer */ + origin_buf = malloc(sizeof(double) * szbuf); + if (!origin_buf) { + pr_err("Error: alloacting origin_buf"); + ret = -1; + goto out; + } + memset(origin_buf, 0, sizeof(double) * szbuf); + + /* fetch-to buffer */ + result = malloc(sizeof(double) * szbuf); + if (!result) { + pr_err("Error: allocating result"); + ret = -1; + goto out; + } + memset(result, 0, sizeof(double) * szbuf); + + /* Expose accumulate-to buffer*/ + ret = MPI_Win_create(target_buf, sizeof(double) * szbuf, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + if (ret != 0) { + pr_err("Error: MPI_Win_create returned %d\n", ret); + ret = -1; + goto out; + } + + /* Measure RMA-only time */ + init_buf(origin_buf, result, target_buf, szbuf, rank, 99); + t_comm_ave = measure(rank, nproc, win, origin_buf, result, target_buf, szbuf, 0, 0, 1, NSAMPLES_COMM, NSAMPLES_DROP, 0); + + if (rank == 0) { + printf("t_comm_ave: %.0f %s\n", t_comm_ave * MYTIME_TOUSEC, MYTIME_UNIT); + } + +#ifdef PROFILE + syscall(701, 1 | 2 | 0x80000000); /* syscall profile start */ +#endif + + /* 0: no progress, 1: progress, no uti, 2: progress, uti */ + for (progress = 0; progress <= (disable_syscall_intercept ? 0 : 2); progress += 1) { + + if (progress == 1) { + setenv("DISABLE_UTI", "1", 1); /* Don't use uti_attr and pin to Linux/McKernel CPUs */ + progress_init(); + } else if (progress == 2) { + progress_finalize(); + unsetenv("DISABLE_UTI"); + progress_init(); + } + + if (progress == 1 || progress == 2) { +#ifndef PROGRESS_CALC_PHASE_ONLY + //progress_start(); +#endif + } + + /* RMA-start, compute for T_{RMA} * l / 10, RMA-flush */ + for (l = 0; l <= NROW - 1; l += 1) { + long nsec_calc = (t_comm_ave * MYTIME_TONSEC * l) / 10; + + init_buf(origin_buf, result, target_buf, szbuf, rank, l); + //pr_buf(origin_buf, result, target_buf, szbuf, rank, nproc); + t_total_ave = measure(rank, nproc, win, origin_buf, result, target_buf, szbuf, nsec_calc, progress, 0, NSAMPLES_TOTAL, NSAMPLES_DROP, l / 10.0); + //pr_buf(origin_buf, result, target_buf, szbuf, rank, nproc); + + if (rank == 0) { + + if (l == 0) { + pr_debug("progress=%d\n", progress); + if (progress == 0) { + pr_debug("calc\ttotal\n"); + } else { + pr_debug("total\n"); + } + } + + t_table[l][0] = nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC); + if (progress == 0) { + pr_debug("%.0f\t%.0f\n", nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC), t_total_ave * MYTIME_TOUSEC); + t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC; + } else { + pr_debug("%.0f\n", t_total_ave * MYTIME_TOUSEC); + t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC; + } + } + } + + if (progress == 1 || progress == 2) { +#ifndef PROGRESS_CALC_PHASE_ONLY + //progress_stop(); +#endif + } + + } + +#ifdef PROFILE + syscall(701, 4 | 8 | 0x80000000); /* syscall profile report */ +#endif + + if (rank == 0) { + printf("calc,no prog,prog and no uti, prog and uti\n"); + for (l = 0; l <= NROW - 1; l++) { + for (i = 0; i < NCOL; i++) { + if (i > 0) { + printf(","); + } + printf("%.0f", t_table[l][i]); + } + printf("\n"); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + + if (progress >= 1) { + progress_finalize(); + } + + MPI_Finalize(); + ret = 0; +out: + return ret; +} diff --git a/test/uti/mpi/016.sh b/test/uti/mpi/016.sh new file mode 100755 index 00000000..90d87107 --- /dev/null +++ b/test/uti/mpi/016.sh @@ -0,0 +1,272 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=/home/e29005 +test_dir=`pwd -P` +mck_dir=${MYHOME}/project/os/install +uti_dir_lin=${MYHOME}/project/uti/install_linux +uti_dir_mck=${MYHOME}/project/uti/install_mckernel + +exe=`basename $0 | sed 's/\.sh//'` + +stop=0 +reboot=0 +go=0 + +interactive=0 +pjsub=0 +gdb=0 +disable_syscall_intercept=0 +mck=0 +nnodes=2 +LASTNODE=8196 +use_hfi=0 +omp_num_threads=32 +ppn=4 + +while getopts srgc:ml:N:P:o:hGI:ipL: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + h) use_hfi=1 + ;; + G) gdb=1 + ;; + I) disable_syscall_intercept=$OPTARG + ;; + i) interactive=1 + ;; + p) pjsub=1 + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +nprocs=$((ppn * nnodes)) +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'` + +# vertical cut, excluding phys loaded with Linux tasks +uti_cpu_set_lin=1,69,137,205,18-19,86-87,154-155,222-223 +exclude_list=0-1,68-69,136-137,204-205,18-19,86-87,154-155,222-223 +#64-67,132-135,200-203,268-271 + +uti_cpu_set_mck=1,69,137,205,18-19,86-87,154-155,222-223 + +# horizontal cut, excluding phys loaded with Linux tasks for mckernel +#uti_cpu_set_lin=204-271 +#uti_cpu_set_mck=1-67 + +if [ $mck -eq 0 ]; then + uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_lin" + i_mpi_pin_processor_exclude_list="export I_MPI_PIN_PROCESSOR_EXCLUDE_LIST=$exclude_list" +else + uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_mck" + i_mpi_pin_processor_exclude_list= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +# if [ $omp_num_threads -eq 1 ]; then +# # Avoid binding main thread and uti thread to one CPU + kmp_affinity="export KMP_AFFINITY=disabled" +# else +# # Bind rank to OMP_NUM_THREAD-sized CPU-domain +# kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter" +# fi +else + i_mpi_pin=on + domain=$omp_num_threads # Use 32 when you want to match mck's -n division + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" + kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter" +fi + +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes domain=$domain + +if [ ${mck} -eq 1 ]; then + makeopt="UTI_DIR=$uti_dir_mck" + use_mck="#PJM -x MCK=$mck_dir" + mck_mem="#PJM -x MCK_MEM=32G@0,8G@1" + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 4)) + mcexecopt="-n $ppn --uti-use-last-cpu" # -t $nmcexecthr + + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + + if [ $disable_syscall_intercept -eq 0 ]; then + mcexecopt="--enable-uti $mcexecopt" + fi + +else + offline=`PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes lscpu \| grep Off 2>&1 | dshbak -c | grep Off` + if [ "$offline" != "" ]; then + echo "Error: Some CPUs are offline: $offline" + exit + fi + + makeopt="UTI_DIR=$uti_dir_lin" + use_mck= + mck_mem= + mcexec= + mcexecopt= +fi + +if [ $gdb -eq 1 ]; then + enable_x="-enable-x" + gdbcmd="xterm -display localhost:11 -hold -e gdb -ex run --args" +fi + +if [ $interactive -eq 1 ]; then + i_mpi_hydra_bootstrap_exec= + i_mpi_hydra_bootstrap= + hosts= + opt_dir=/opt/intel + ssh= +else +# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\' + i_mpi_hydra_bootstrap_exec="export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh" + i_mpi_hydra_bootstrap="export I_MPI_HYDRA_BOOTSTRAP=ssh" + hosts="-hosts $nodes" + opt_dir=/home/opt/local/cores/intel + ssh="ssh -A c$LASTNODE" +fi + +# If using ssh +# Latest versions are: 1.163, 2.199, 3.222 +if [ $pjsub -eq 0 ] && [ $interactive -eq 0 ]; then + compilervars=". ${opt_dir}/compilers_and_libraries_2018.2.199/linux/bin/compilervars.sh intel64" +else + compilervars= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof mcexec \| xargs -r sudo kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof $exe \| xargs -r sudo kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcstop+release.sh + else + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof $exe \| xargs -r sudo kill -9 + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + + # -h: Hide idle thread to prevent KNL CPU from mux-ing resource and halving throughput + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + +# perl -e 'for ($i=0;$i<68;$i++){if($i>0){print "+";}printf("%d,%d,%d:%d", $i+68,$i+136,$i+204,$i);}' + +# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ +# sudo ${mck_dir}/sbin/mcreboot.sh -O -c 68-271 -r 68,136,204:0+69,137,205:1+70,138,206:2+71,139,207:3+72,140,208:4+73,141,209:5+74,142,210:6+75,143,211:7+76,144,212:8+77,145,213:9+78,146,214:10+79,147,215:11+80,148,216:12+81,149,217:13+82,150,218:14+83,151,219:15+84,152,220:16+85,153,221:17+86,154,222:18+87,155,223:19+88,156,224:20+89,157,225:21+90,158,226:22+91,159,227:23+92,160,228:24+93,161,229:25+94,162,230:26+95,163,231:27+96,164,232:28+97,165,233:29+98,166,234:30+99,167,235:31+100,168,236:32+101,169,237:33+102,170,238:34+103,171,239:35+104,172,240:36+105,173,241:37+106,174,242:38+107,175,243:39+108,176,244:40+109,177,245:41+110,178,246:42+111,179,247:43+112,180,248:44+113,181,249:45+114,182,250:46+115,183,251:47+116,184,252:48+117,185,253:49+118,186,254:50+119,187,255:51+120,188,256:52+121,189,257:53+122,190,258:54+123,191,259:55+124,192,260:56+125,193,261:57+126,194,262:58+127,195,263:59+128,196,264:60+129,197,265:61+130,198,266:62+131,199,267:63+132,200,268:64+133,201,269:65+134,202,270:66+135,203,271:67 -m 32G@0,12G@1 + else + echo "unkwon host type" + exit 1 + fi + else + : + fi +fi + +( +cat <<EOF +#!/bin/sh + +#PJM -L rscgrp=$rg +#PJM -L node=$nnodes +#PJM --mpi proc=$nprocs +#PJM -L elapse=$elapse +#PJM -L proc-crproc=16384 +#PJM -g gg10 +#PJM -j +#PJM -s +$use_mck +$mck_mem + +$i_mpi_hydra_bootstrap_exec +$i_mpi_hydra_bootstrap + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 +export PSM2_RCVTHREAD=0 + +$uti_cpu_set_str +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_processor_exclude_list +$i_mpi_pin_domain +$i_mpi_pin_order +$kmp_affinity + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=off + +#export I_MPI_STATS=native:20,ipm +#export I_MPI_STATS=ipm +#export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +ulimit -c unlimited + +$compilervars +mpiexec.hydra -n $nprocs -ppn $ppn $hosts $ilpopt $enable_x $gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -p $ppn -I $disable_syscall_intercept +#-l + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + if [ $pjsub -eq 1 ]; then + pjsub ./job.sh + else + if [ $interactive -eq 0 ]; then + . ${opt_dir}/compilers_and_libraries_2018.2.199/linux/bin/compilervars.sh intel64 + fi + rm ./$exe + make $makeopt ./$exe + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof $exe \| xargs -r sudo kill -9 + $ssh ${test_dir}/job.sh + fi +fi diff --git a/test/uti/mpi/Makefile b/test/uti/mpi/Makefile new file mode 100755 index 00000000..43418165 --- /dev/null +++ b/test/uti/mpi/Makefile @@ -0,0 +1,56 @@ +.SUFFIXES: # Clear suffixes + +MYHOME=/home/e29005 + +# Specify it via 016.sh +UTI_DIR=${MYHOME}/project/uti/install_linux + +CC=mpiicc +LD=$(CC) + +CFLAGS = -g -O0 -Wall +LDFLAGS = -lpthread -lpsm2 -L$(UTI_DIR)/lib -Wl,-rpath -Wl,$(UTI_DIR)/lib -luti +SRCS = $(shell ls *.c) +OBJS = $(SRCS:.c=.o) +EXES = $(SRCS:.c=) +TMPFILES = $(shell ls psm2-demo-* 2>/dev/null) + +all: $(EXES) file + +file: $(TMPFILES) + rm -f $(TMPFILES) + dd if=/dev/zero of=./file bs=1M count=1 + +async_progress.o:: async_progress.c util.h + $(CC) $(CFLAGS) -I$(UTI_DIR)/include -c $< + +util.o:: util.c util.h + $(CC) $(CFLAGS) -qopenmp -c $< + +014: 014.o async_progress.o util.o + $(LD) -o $@ $^ $(LDFLAGS) + +015: 015.o async_progress.o + $(LD) -o $@ $^ $(LDFLAGS) + +016: 016.o async_progress.o util.o + $(LD) -o $@ $^ $(LDFLAGS) -qopenmp + +016.o::016.c + $(CC) $(CFLAGS) -qopenmp -c $< + +011: 011.o + $(LD) -o $@ $^ $(LDFLAGS) -qopenmp + +011.o::011.c + $(CC) $(CFLAGS) -qopenmp -c $< + +%: %.o + $(LD) -o $@ $^ $(LDFLAGS) + +%.o::%.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f core $(EXES) $(OBJS) $(DSRCS) + diff --git a/test/uti/mpi/README b/test/uti/mpi/README new file mode 100644 index 00000000..592b6d56 --- /dev/null +++ b/test/uti/mpi/README @@ -0,0 +1,25 @@ +001 isend 送受信に使用するバッファは毎回異なる +002 barrier +003 isend 送受信に使用するバッファは一つ、waitの前にsleepしない +004 isend-calc-wait, all-to-all +005 lockall-accumulate-calc-unlockall, all-to-all +006 parent isend-calc-wait, child does nothing --> crash +007 parent isend-calc-wait, child psm2 send/recv --> one ep per process +008 parent psm2-init and psm2-connect, child psm2-send/recv --> receiver side crash +009 parent does nothing, child psm2-init, psm2-connect, psm2-send/recv --> receiver side crash +010 parent psm2-init, psm2-connect, psm2-send/recv, child does nothing +011 001にopenmpスレッドを追加 +012 get_acc-calc-flush_local_all, all-to-all. Execute ./012.sh +013 acc-flush_local-calc, all-to-all, acc:flush_local=1:1 +014 012 + async progress thread. +015 013 + async progress thread + +016 MPI_Get_accumulate()のオーバーラップ + +* 通信パターンは全対全、 +* CPUはいくつかをprogress thread専用に割く +* ステップは以下の通り + (1) MPI_Get_accumulate() + (2) MPI_Get_accumulate()とMPI_Flush_local_all()だけを行った場合の +   時間の0.i倍の計算を実行 + (3) MPI_Flush_local_all() diff --git a/test/uti/mpi/async_progress.c b/test/uti/mpi/async_progress.c new file mode 100644 index 00000000..3034ee28 --- /dev/null +++ b/test/uti/mpi/async_progress.c @@ -0,0 +1,530 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <pthread.h> +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <uti.h> +#include "util.h" +#include "async_progress.h" + +//#define PROFILE + +#define STOP_BY_MPI 0 +#define STOP_BY_MEM 1 +#define STOP_TYPE STOP_BY_MEM/*STOP_BY_MPI*/ + +#define POLL_BY_PROBE 0 +#define POLL_BY_WAIT 1 +#define POLL_BY_TEST 2 +#define POLL_TYPE POLL_BY_PROBE/*POLL_BY_WAIT*/ + +static int progress_rank, progress_world_rank, progress_world_nproc; +static pthread_t progress_thr; +static pthread_mutex_t progress_mutex; +static pthread_cond_t progress_cond_down; +static volatile int progress_flag_up, progress_flag_down; + +static enum progress_state progress_state; +static int progress_stop_flag; +static MPI_Comm progress_comm; +static int progress_refc; +#define WAKE_TAG 100 + +#define NROW_STAT 10 +#define NRANK_STAT 1 +#define RECORD_STAT(count, array, end, start) do { \ + if (count < NROW_STAT) { \ + array[count++] += (end - start); \ + } \ +} while(0) + +static int cyc_prog1_count, cyc_prog2_count, cyc_init1_count, cyc_init2_count, cyc_start_count, cyc_stop1_count, cyc_stop2_count, cyc_stop3_count, cyc_finalize_count; +static unsigned long cyc_prog1[NROW_STAT]; +static unsigned long cyc_prog2[NROW_STAT]; +static unsigned long cyc_init1[NROW_STAT]; +static unsigned long cyc_init2[NROW_STAT]; +static unsigned long cyc_start[NROW_STAT]; +static unsigned long cyc_stop1[NROW_STAT]; +static unsigned long cyc_stop2[NROW_STAT]; +static unsigned long cyc_stop3[NROW_STAT]; +static unsigned long cyc_finalize[NROW_STAT]; + +#define MIN2(x,y) ((x) < (y) ? (x) : (y)) + +void pr_stat(char *name, int count, unsigned long *array) { + int i; + + pr_debug("[%d] %s: ", progress_world_rank, name); + for (i = 0; i < MIN2(count, NROW_STAT); i++) { + if (i > 0) pr_debug(","); + pr_debug("%ld", array[i]); + } + pr_debug("\n"); +} + +static void *progress_fn(void* data) +{ + int ret; + MPI_Request req; + struct rusage ru_start, ru_end; + struct timeval tv_start, tv_end; + unsigned long start, end; + +#if 0 + ret = syscall(732); + if (ret == -1) { + pr_debug("Progress is running on Linux\n"); + } else { + pr_debug("Progress is running on McKernel\n"); + } + + if ((ret = getrusage(RUSAGE_THREAD, &ru_start))) { + pr_err("%s: error: getrusage failed (%d)\n", __func__, ret); + } + + if ((ret = gettimeofday(&tv_start, NULL))) { + pr_err("%s: error: gettimeofday failed (%d)\n", __func__, ret); + } + +#endif + +#if STOP_TYPE == STOP_BY_MEM && POLL_TYPE == POLL_BY_TEST + + if ((ret = MPI_Irecv(NULL, 0, MPI_CHAR, progress_rank, WAKE_TAG, progress_comm, &req)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Irecv: %d\n", __func__, ret); + } + +#endif + +init: +#ifdef PROFILE + start = rdtsc_light(); +#endif + + /* Wait for state transition */ + pthread_mutex_lock(&progress_mutex); + while (!progress_flag_down) { + pthread_cond_wait(&progress_cond_down, &progress_mutex); + } + progress_flag_down = 0; + + if (progress_state == PROGRESS_FINALIZE) { + pthread_mutex_unlock(&progress_mutex); + goto finalize; + } + + if (progress_state != PROGRESS_START) { + pr_err("%s: error: unexpected state: %d\n", __func__, progress_state); + pthread_mutex_unlock(&progress_mutex); + goto finalize; + } + + pthread_mutex_unlock(&progress_mutex); + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_prog1_count, cyc_prog1, end, start); +#endif + + //if (progress_world_rank < 2) pr_debug("[%d] poll,cpu=%d\n", progress_world_rank, sched_getcpu()); + +#ifdef PROFILE + start = rdtsc_light(); +#endif + +#if STOP_TYPE == STOP_BY_MEM + +#if POLL_TYPE == POLL_BY_PROBE + + int completed = 0; + while (!progress_stop_flag) { + if ((ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret); + break; + } + //usleep(1); + } + +#elif POLL_TYPE == POLL_BY_TEST + + int completed = 0; + while (!completed && !progress_stop_flag) { + if ((ret = MPI_Test(&req, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret); + break; + } + //usleep(1); + } + +#endif /* POLL_TYPE */ + +#elif STOP_TYPE == STOP_BY_MPI + + +#if POLL_TYPE == POLL_BY_WAIT + + if ((ret = MPI_Irecv(NULL, 0, MPI_CHAR, progress_rank, WAKE_TAG, progress_comm, &req)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Irecv: %d\n", __func__, ret); + } + + if ((ret = MPI_Wait(&req, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Wait failed (%d)\n", __func__, ret); + } + +#elif POLL_TYPE == POLL_BY_PROBE + + int completed = 0; + while (!completed) { + if ((ret = MPI_Iprobe(progress_rank, WAKE_TAG, progress_comm, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret); + break; + } + usleep(1); + } + + if ((ret = MPI_Recv(NULL, 0, MPI_CHAR, progress_rank, WAKE_TAG, progress_comm, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Irecv: %d\n", __func__, ret); + } + +#endif /* POLL_TYPE */ +#endif /* STOP_TYPE */ + + progress_state = PROGRESS_INIT; + __sync_synchronize(); /* st-st barrier */ + progress_flag_up = 1; + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_prog2_count, cyc_prog2, end, start); +#endif + goto init; + + finalize: + + if ((ret = getrusage(RUSAGE_THREAD, &ru_end))) { + pr_err("%s: error: getrusage failed (%d)\n", __func__, ret); + } + + if ((ret = gettimeofday(&tv_end, NULL))) { + pr_err("%s: error: gettimeofday failed (%d)\n", __func__, ret); + } + +#if 0 + pr_debug("%s: wall: %ld, user: %ld, sys: %ld\n", __func__, + DIFFUSEC(tv_end, tv_start), + DIFFUSEC(ru_end.ru_utime, ru_start.ru_utime), + DIFFUSEC(ru_end.ru_stime, ru_start.ru_stime)); +#endif + + progress_state = PROGRESS_INIT; + __sync_synchronize(); /* st-st barrier */ + progress_flag_up = 1; + + return NULL; +} + +void progress_init() +{ + int ret = 0; + pthread_attr_t pthread_attr; + uti_attr_t uti_attr; + unsigned long start, end; + +#ifdef PROFILE + start = rdtsc_light(); +#endif + MPI_Comm_rank(MPI_COMM_WORLD, &progress_world_rank); + MPI_Comm_size(MPI_COMM_WORLD, &progress_world_nproc); + + if (__sync_val_compare_and_swap(&progress_refc, 0, 1) == 1) { + return; + } + + /* printf costs much in MPI */ + uti_set_loglevel(UTI_LOGLEVEL_ERR); + + if ((ret = MPI_Comm_dup(MPI_COMM_SELF, &progress_comm))) { + pr_err("%s: error: MPI_Comm_dup failed (%d)\n", __func__, ret); + goto out; + } + + MPI_Comm_rank(progress_comm, &progress_rank); + + if ((ret = pthread_mutex_init(&progress_mutex, NULL))) { + pr_err("%s: error: pthread_mutex_init failed (%d)\n", __func__, ret); + goto out; + } + + if ((ret = pthread_cond_init(&progress_cond_down, NULL))) { + pr_err("%s: error: pthread_cond_init failed (%d)\n", __func__, ret); + goto out; + } + + if ((ret = pthread_attr_init(&pthread_attr))) { + pr_err("%s: error: pthread_attr_init failed (%d)\n", __func__, ret); + goto out; + } + + if ((ret = uti_attr_init(&uti_attr))) { + pr_err("%s: error: uti_attr_init failed (%d)\n", __func__, ret); + goto out; + } + +#if 0 + if ((ret = UTI_ATTR_SAME_L1(&uti_attr))) { + pr_err("%s: error: UTI_ATTR_SAME_L1 failed\n", __func__); + } +#endif + +#if 1 /* Expecting round-robin binding */ + if ((ret = UTI_ATTR_CPU_INTENSIVE(&uti_attr))) { + pr_err("%s: error: UTI_ATTR_CPU_INTENSIVE failed\n", __func__); + } + +#endif + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_init1_count, cyc_init1, end, start); +#endif + +#ifdef PROFILE + start = rdtsc_light(); +#endif + + if ((ret = uti_pthread_create(&progress_thr, &pthread_attr, progress_fn, NULL, &uti_attr))) { + pr_err("%s: error: uti_pthread_create failed (%d)\n", __func__, ret); + goto out; + } + + ret = 0; + out: + if (ret) { + __sync_fetch_and_sub(&progress_refc, 1); + } + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_init2_count, cyc_init2, end, start); +#endif +} + +void progress_start() +{ + unsigned long start, end; + + if (progress_refc == 0) { + progress_init(); + } + +#ifdef PROFILE + start = rdtsc_light(); +#endif + pthread_mutex_lock(&progress_mutex); + + if (progress_state == PROGRESS_FINALIZE) { + pr_warn("%s: warning: FINALIZE\n", __func__); + pthread_mutex_unlock(&progress_mutex); + return; + } + + if (progress_state == PROGRESS_START) { + //pr_warn("%s: warning: START\n", __func__); + pthread_mutex_unlock(&progress_mutex); + return; + } + + if (progress_state != PROGRESS_INIT) { + pr_err("%s: error: unexpected state: %d\n", __func__, progress_state); + pthread_mutex_unlock(&progress_mutex); + return; + } + + progress_state = PROGRESS_START; +#if STOP_TYPE == STOP_BY_MEM + progress_stop_flag = 0; +#endif + __sync_synchronize(); /* memory barrier instruction */ + progress_flag_down = 1; + pthread_cond_signal(&progress_cond_down); + pthread_mutex_unlock(&progress_mutex); + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_start_count, cyc_start, end, start); +#endif +} + +void do_progress_stop() +{ + int ret; + unsigned long start, end; + + //if (progress_world_rank < 2) pr_debug("[%d] stop,cpu=%d\n", progress_world_rank, sched_getcpu()); + +#ifdef PROFILE + start = rdtsc_light(); +#endif + +#if STOP_TYPE == STOP_BY_MEM + + progress_stop_flag = 1; + __sync_synchronize(); /* st-st barrier */ + +#elif STOP_TYPE == STOP_BY_MPI + + if ((ret = MPI_Send(NULL, 0, MPI_CHAR, progress_rank, WAKE_TAG, progress_comm)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Send failed (%d)\n", __func__, ret); + return; + } + + +#endif /* STOP_TYPE */ + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_stop2_count, cyc_stop2, end, start); + start = rdtsc_light(); +#endif + + /* Make sure the following command will observe INIT */ + while (!progress_flag_up) { + } + progress_flag_up = 0; + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_stop3_count, cyc_stop3, end, start); +#endif +} + +void progress_stop() +{ + unsigned long start, end; + +#ifdef PROFILE + start = rdtsc_light(); +#endif + + if (progress_refc == 0) { + return; + } + + pthread_mutex_lock(&progress_mutex); + + if (progress_state == PROGRESS_INIT) { + pthread_mutex_unlock(&progress_mutex); + return; + } + + if (progress_state == PROGRESS_FINALIZE) { + pthread_mutex_unlock(&progress_mutex); + return; + } + + if (progress_state != PROGRESS_START) { + pr_err("%s: error: unexpected state: %d\n", __func__, progress_state); + pthread_mutex_unlock(&progress_mutex); + return; + } + + pthread_mutex_unlock(&progress_mutex); + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_stop1_count, cyc_stop1, end, start); +#endif + + do_progress_stop(); +} + +void progress_finalize() +{ + int ret; + int i, j; + MPI_Request req; + unsigned long start, end; + int nproc; + + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + +#ifdef PROFILE + start = rdtsc_light(); +#endif + + if (progress_refc == 0) { + return; + } + + retry: + pthread_mutex_lock(&progress_mutex); + + if (progress_state == PROGRESS_START) { + pthread_mutex_unlock(&progress_mutex); + do_progress_stop(); + goto retry; + } + + if (progress_state == PROGRESS_FINALIZE) { + pthread_mutex_unlock(&progress_mutex); + return; + } + + if (progress_state != PROGRESS_INIT) { + pr_err("%s: error: unexpected state: %d\n", __func__, progress_state); + pthread_mutex_unlock(&progress_mutex); + return; + } + + progress_state = PROGRESS_FINALIZE; + __sync_synchronize(); /* st-st barrier */ + progress_flag_down = 1; + pthread_cond_signal(&progress_cond_down); + pthread_mutex_unlock(&progress_mutex); + + /* Make sure the following command will observe INIT */ + while (!progress_flag_up) { + } + progress_flag_up = 0; + + pthread_join(progress_thr, NULL); + + if ((ret = MPI_Comm_free(&progress_comm)) != MPI_SUCCESS) { + pr_err("%s: error: MPI_Comm_free failed (%d)\n", __func__, ret); + return; + } + + progress_refc = 0; + +#ifdef PROFILE + end = rdtsc_light(); + RECORD_STAT(cyc_finalize_count, cyc_finalize, end, start); + + for (j = 0; j < NRANK_STAT; j++) { + + MPI_Barrier(MPI_COMM_WORLD); + + if (j != progress_world_rank) { + usleep(1000000); + continue; + } + + pr_stat("cyc_prog1", cyc_prog1_count, cyc_prog1); + pr_stat("cyc_prog2", cyc_prog2_count, cyc_prog2); + pr_stat("cyc_init1", cyc_init1_count, cyc_init1); + pr_stat("cyc_init2", cyc_init2_count, cyc_init2); + pr_stat("cyc_start", cyc_start_count, cyc_start); + pr_stat("cyc_stop1", cyc_stop1_count, cyc_stop1); + pr_stat("cyc_stop2", cyc_stop2_count, cyc_stop2); + pr_stat("cyc_stop3", cyc_stop3_count, cyc_stop3); + pr_stat("cyc_finalize", cyc_finalize_count, cyc_finalize); + } +#endif +} diff --git a/test/uti/mpi/async_progress.h b/test/uti/mpi/async_progress.h new file mode 100644 index 00000000..bd0d39a2 --- /dev/null +++ b/test/uti/mpi/async_progress.h @@ -0,0 +1,15 @@ +#ifndef _ASYNC_PROGRESS_INCLUDED_ +#define _ASYNC_PROGRESS_INCLUDED_ + +enum progress_state { + PROGRESS_INIT = 0, + PROGRESS_START, + PROGRESS_FINALIZE +}; + +void progress_init(); +void progress_start(); +void progress_stop(); +void progress_finalize(); + +#endif diff --git a/test/uti/mpi/env_intel.sh b/test/uti/mpi/env_intel.sh new file mode 100644 index 00000000..5455e20f --- /dev/null +++ b/test/uti/mpi/env_intel.sh @@ -0,0 +1,17 @@ +export HYDRA_BOOTSTRAP_EXEC=/bin/pjrsh +export HYDRA_BOOTSTRAP=rsh +export HYDRA_PROXY_RETRY_COUNT=30 + +#export HYDRA_BRANCH_COUNT=4 + +export I_MPI_PIN=off +export HFI_NO_CPUAFFINITY=1 +export KMP_AFFINITY=granularity=thread,scatter +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 diff --git a/test/uti/mpi/env_mpich.sh b/test/uti/mpi/env_mpich.sh new file mode 100644 index 00000000..e61e71d8 --- /dev/null +++ b/test/uti/mpi/env_mpich.sh @@ -0,0 +1,5 @@ +export HYDRA_BOOTSTRAP_EXEC=/bin/pjrsh +export HYDRA_BOOTSTRAP=rsh +export HYDRA_PROXY_RETRY_COUNT=30 +export MPIR_CVAR_OFI_USE_PROVIDER=psm2 + diff --git a/test/uti/mpi/filter.pl b/test/uti/mpi/filter.pl new file mode 100755 index 00000000..e61c66ef --- /dev/null +++ b/test/uti/mpi/filter.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +while(<>) { # For each line of hostfile + open(); + $found = 0; + while(<>) { + if($_ =~ /progress_fn,enter,tid=(\d+)/) { + $tid = $1; + $found = 1; + # print 'tid='.$tid."\n" + } + if($found == 1 && $_ =~ /^$tid/) { + if($_ =~ /^$tid\s(\w+)/) { + # print $1."\n"; + $freq{$1}{$hostname}++; + } + } + } +} +foreach $key (sort(keys(%freq))) { + print $key.",".$freq{$key}."\n"; +} diff --git a/test/uti/mpi/mpi_progress_thread.pl b/test/uti/mpi/mpi_progress_thread.pl new file mode 100755 index 00000000..273f1d84 --- /dev/null +++ b/test/uti/mpi/mpi_progress_thread.pl @@ -0,0 +1,100 @@ +#!/usr/bin/perl + +# Usage ./mpi_progress.pl <#procs> <#nnodes> (mck|lin) (mpich|intel) + +use File::Basename; +use File::Copy "cp"; + +($nprocs, $nnodes, $os, $mpi) = @ARGV; +$ppn = $nprocs / $nnodes; + +@command = split /\s+/, basename($0); +@fn = split /\./, $command[0]; + +if($nnodes <= 16) { + $rg = 'MCK-FLAT-QUADRANT'; +} elsif($ARGV[1] <= 128) { + $rg = 'debug-flat'; +} else { + $rg = 'regular-flat'; +} + +%elapse = ( +'1', '00:10:00', +'2', '00:10:00', +'4', '00:10:00', +'8', '00:10:00', +'16', '00:10:00', +'32', '00:10:00', +'64', '00:05:00', +'128', '00:05:00', +'256', '00:10:00', +'512', '00:15:00', +'1024', '00:15:00', +'2048', '00:30:00', + ); + +if ($os eq 'lin') { + $use_mck = ''; + $mck_mem = ''; + $mcexec = ''; + $mcexecopt = ''; +} else { + $path_to_mck = '/work/gg10/e29005/project/os/install'; + $use_mck = '#PJM -x MCK='.$path_to_mck; + $mck_mem = '#PJM -x MCK_MEM=32G@0,8G@1'; + $mcexec = $path_to_mck.'/bin/mcexec'; + $mcexecopt = '-n '.$ppn; +} + +if ($mpi eq 'intel') { + $cc = 'mpiicc'; + $mpiexec = 'mpiexec'; + $genv = ''; + $progress = '-genv I_MPI_ASYNC_PROGRESS 1'; # -genv I_MPI_ASYNC_PROGRESS_PIN 1 +} else { + $mpi_lib = '/work/gg10/e29005/project/mpich/install'; + $cc = $mpi_lib.'/bin/mpicc'; + $mpiexec = $mpi_lib.'/bin/mpiexec'; + $genv = '-genv LD_LIBRARY_PATH '.$mpi_lib.'/lib:$LD_LIBRARY_PATH'; + $progress = '-genv MPIR_CVAR_ASYNC_PROGRESS 1'; +} + +system("make clean; make CC=$cc"); + +$dir=$ARGV[2].'_'.$ARGV[0].'_'.$ARGV[1].'_'.`date +%Y%m%d_%H%M%S`; +chomp($dir); +print 'less '.$dir.'/job.sh.o*'."\n"; + +mkdir $dir; +chdir $dir; +cp('../001', './001') or die 'copy failed'; +open(IN, "../$fn[0].sh.in"); +open(OUT, ">./job.sh"); +while(<IN>) { + s/\@rg@/$rg/g; + s/\@nnodes@/$nnodes/g; + s/\@nprocs@/$nprocs/g; + s/\@elapse@/$elapse{$nnodes}/g; + s/\@use_mck@/$use_mck/g; + s/\@mck_mem@/$mck_mem/g; + s/\@progress@/$progress/g; + s/\@genv@/$genv/g; + s/\@mpiexec@/$mpiexec/g; + s/\@mcexec@/$mcexec/g; + s/\@mcexecopt@/$mcexecopt/g; + if(/\@env@/) { + open(INCL, "../env_$mpi.sh"); + while(my $line = <INCL>) { + print OUT $line; + } + next; + } + print OUT $_; +} +close(IN); +close(OUT); + +$cmd = 'PJM_MCK_AVAILABLE=1 pjsub ./job.sh'; +#print $cmd."\n"; +exec($cmd); diff --git a/test/uti/mpi/mpi_progress_thread.sh.in b/test/uti/mpi/mpi_progress_thread.sh.in new file mode 100644 index 00000000..f149a1f8 --- /dev/null +++ b/test/uti/mpi/mpi_progress_thread.sh.in @@ -0,0 +1,16 @@ +#!/bin/sh + +#PJM -L rscgrp=@rg@ +#PJM -L node=@nnodes@ +#PJM --mpi proc=@nprocs@ +#PJM -L elapse=@elapse@ +#PJM -L proc-crproc=16384 +#PJM -g gg10 +#PJM -j +#PJM -s +@use_mck@ +@mck_mem@ + +@env@ + +@mpiexec@ @genv@ @progress@ -np @nprocs@ -machinefile ${PJM_O_NODEINF} @mcexec@ @mcexecopt@ ./001 1048576 1000 diff --git a/test/uti/mpi/util.c b/test/uti/mpi/util.c new file mode 100644 index 00000000..cdf51140 --- /dev/null +++ b/test/uti/mpi/util.c @@ -0,0 +1,186 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <time.h> +#include "util.h" + +/* Messaging */ +enum test_loglevel test_loglevel = TEST_LOGLEVEL_DEBUG; + +/* Calculation */ +static inline void asmloop(unsigned long n) { + int j; + + for (j = 0; j < n; j++) { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); + } +} + +#define N_INIT 10000000 +double nspw; /* nsec per work */ + +void ndelay_init(int verbose) { + struct timeval start, end; + + //clock_gettime(TIMER_KIND, &start); + gettimeofday(&start, NULL); + +#pragma omp parallel + { + asmloop(N_INIT); + } + + //clock_gettime(TIMER_KIND, &end); + gettimeofday(&end, NULL); + + nspw = DIFFUSEC(end, start) * 1000 / (double)N_INIT; + if (verbose) { + pr_debug("nspw=%f\n", nspw); + } +} + +#if 1 +void ndelay(long delay_nsec) { + if (delay_nsec < 0) { + printf("delay_nsec < 0\n"); + return; + } +#pragma omp parallel + { + asmloop(delay_nsec / nspw); + } +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void ndelay(long delay_nsec) { + struct timespec start, end; + + if (delay_nsec < 0) { return; } + clock_gettime(TIMER_KIND, &start); + + while (1) { + clock_gettime(TIMER_KIND, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + asmloop(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +double cycpw; /* cyc per work */ + +void cdlay_init() { + unsigned long start, end; + + start = rdtsc_light(); +#define N_INIT 10000000 + asmloop(N_INIT); + end = rdtsc_light(); + cycpw = (end - start) / (double)N_INIT; +} + +#if 0 +void cdelay(long delay_cyc) { + if (delay_cyc < 0) { + return; + } + asmloop(delay_cyc / cycpw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void cdelay(long delay_cyc) { + unsigned long start, end; + + if (delay_cyc < 0) { return; } + start = rdtsc_light(); + + while (1) { + end = rdtsc_light(); + if (end - start >= delay_cyc) { + break; + } + asmloop(2); + } +} +#endif + + +int print_cpu_last_executed_on(const char *name) { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + int rc; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getpu() failed\n"); + goto fn_fail; + } + + rc = syscall(732); + + printf("%s: pmi_rank=%02d,os=%s,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", name, atoi(getenv("PMI_RANK")), rc == -1 ? "lin" : "mck", atoi(field), cpu, tid); fflush(stdout); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} diff --git a/test/uti/mpi/util.h b/test/uti/mpi/util.h new file mode 100644 index 00000000..3482aae3 --- /dev/null +++ b/test/uti/mpi/util.h @@ -0,0 +1,73 @@ +#ifndef __UTIL_H_INCLUDED__ +#define __UTIL_H_INCLUDED__ + +#include <stdint.h> + +/* Messaging */ + +enum test_loglevel { + TEST_LOGLEVEL_ERR = 0, + TEST_LOGLEVEL_WARN, + TEST_LOGLEVEL_DEBUG +}; + +extern enum test_loglevel test_loglevel; +static inline void test_set_loglevel(enum test_loglevel level) +{ + test_loglevel = level; +} + +#define pr_level(level, fmt, args...) do { \ + if (test_loglevel >= level) { \ + fprintf(stdout, fmt, ##args); \ + } \ +} while (0) + +#define pr_err(fmt, args...) pr_level(TEST_LOGLEVEL_ERR, fmt, ##args) +#define pr_warn(fmt, args...) pr_level(TEST_LOGLEVEL_WARN, fmt, ##args) +#define pr_debug(fmt, args...) pr_level(TEST_LOGLEVEL_DEBUG, fmt, ##args) + +#define _OKNG(verb, jump, cond, fmt, args...) do { \ + if (cond) { \ + if (verb) \ + printf("[ OK ] " fmt, ##args); \ + } else { \ + printf("[ NG ] " fmt, ##args); \ + if (jump) { \ + ret = -1; \ + goto out; \ + } \ + } \ +} while (0) + +#define OKNG(args...) _OKNG(1, 1, ##args) +#define NG(args...) _OKNG(0, 1, ##args) +#define OKNGNOJUMP(args...) _OKNG(1, 0, ##args) + +/* Time */ +inline uint64_t rdtsc_light(void) +{ + uint64_t x; + __asm__ __volatile__("rdtscp;" /* rdtscp don't jump over earlier instructions */ + "shl $32, %%rdx;" + "or %%rdx, %%rax" : + "=a"(x) : + : + "%rcx", "%rdx", "memory"); + return x; +} + +#define DIFFUSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000UL + (end.tv_usec - start.tv_usec)) +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) +#define TIMER_KIND CLOCK_MONOTONIC_RAW /* CLOCK_THREAD_CPUTIME_ID */ + +/* Calculation emulation */ +void ndelay_init(); +void ndelay(long delay_nsec); +void cdelay_init(); +void cdelay(long delay_cyc); + +/* CPU location */ +int print_cpu_last_executed_on(); + +#endif diff --git a/test/uti/posix_aio/001.c b/test/uti/posix_aio/001.c new file mode 100644 index 00000000..2e09b9cf --- /dev/null +++ b/test/uti/posix_aio/001.c @@ -0,0 +1,517 @@ +#include <fcntl.h> +#include <limits.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <aio.h> +#include <signal.h> +#include <libgen.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <mpi.h> +#include <linux/limits.h> +#include "util.h" + +#define NREQS 1 /* # of parallel I/O requests per process */ +#define SZBUF (1ULL<<23) + +#define MYTIME_TOUSEC 1000000 +#define MYTIME_TONSEC 1000000000 + +#define NROW 11 +#define NCOL 4 + +#define NSAMPLES_DROP 0/*10*/ +#define NSAMPLES_IO 2/*20*/ +#define NSAMPLES_TOTAL 2/*20*/ +#define NSAMPLES_INNER 1 + +#define Q(x) #x +#define QUOTE(x) Q(x) + +char test_srcdir[PATH_MAX]; + +static inline double mytime() { + return /*rdtsc_light()*/MPI_Wtime(); +} + +struct aioreq { + int rank; + int status; + struct aiocb *aiocbp; +}; + +static void aio_sighandler(int sig, siginfo_t *si, void *ucontext) +{ + if (si->si_code == SI_ASYNCIO) { + //struct aioreq *aioreq = si->si_value.sival_ptr; + //pr_debug("I/O completion signal received\n"); + } +} + +int my_aio_init(int nreqs, struct aioreq *iolist, struct aiocb *aiocblist, char *aiobufs[NREQS]) { + int j; + + for (j = 0; j < nreqs; j++) { + iolist[j].rank = j; + iolist[j].aiocbp = &aiocblist[j]; + iolist[j].aiocbp->aio_buf = aiobufs[j]; + iolist[j].aiocbp->aio_nbytes = SZBUF; + iolist[j].aiocbp->aio_reqprio = 0; + iolist[j].aiocbp->aio_offset = 0; + iolist[j].aiocbp->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + iolist[j].aiocbp->aio_sigevent.sigev_signo = SIGUSR1; + iolist[j].aiocbp->aio_sigevent.sigev_value.sival_ptr = &iolist[j]; + } + + return 0; +} + + +int my_aio_evict(int nreqs, char **fn) { + int ret; + int i; + char cmd[PATH_MAX]; + + for (i = 0; i < NREQS; i++) { + + sprintf(cmd, "%s -e %s > /dev/null", QUOTE(VMTOUCH), fn[i]); + ret = system(cmd); + + if (ret == -1) { + pr_err("%s: error: system\n", + __func__); + goto out; + } + + if (WEXITSTATUS(ret)) { + pr_err("%s: error: system returned %d\n", + __func__, WEXITSTATUS(ret)); + ret = WEXITSTATUS(ret); + goto out; + } + } + ret = 0; + out: + return ret; +} +int my_aio_open(int nreqs, struct aioreq *iolist, char **fn) { + int ret; + int j; + + for (j = 0; j < NREQS; j++) { + iolist[j].aiocbp->aio_fildes = open(fn[j], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH); + if (iolist[j].aiocbp->aio_fildes == -1) { + pr_err("%s: error: open %s: %s\n", + __func__, fn[j], strerror(errno)); + ret = 1; + goto out; + } + } + + ret = 0; + out: + return ret; +} + +int my_aio_check(int nreqs, char **fn, char **mem_data) { + int ret; + int i; + FILE *fp[NREQS] = { 0 }; + char *file_data[NREQS]; + + /* Check contents */ + for (i = 0; i < nreqs; i++) { + + if (!(file_data[i] = malloc(SZBUF))) { + pr_err("error: allocating data\n"); + ret = -ENOMEM; + goto out; + } + + if (!(fp[i] = fopen(fn[i], "r+"))) { + pr_err("error: fopen %s: %s\n", + fn[i], strerror(errno)); + ret = -errno; + goto out; + } + + if (fread(file_data[i], sizeof(char), SZBUF, fp[i]) != SZBUF) { + pr_err("error: fread: %s\n", + strerror(errno)); + ret = -1; + goto out; + } + + fclose(fp[i]); + + if (memcmp((const char *)file_data[i], mem_data[i], SZBUF)) { + pr_err("%s: file_data[%d] and mem_data[%d] doesn't match\n", + __func__, i, i); + ret = -1; + goto out; + } + + free(file_data[i]); + } + ret = 0; + out: + return ret; +} + +void my_aio_close(int nreqs, struct aioreq *iolist) { + int j; + + for (j = 0; j < NREQS; j++) { + close(iolist[j].aiocbp->aio_fildes); + iolist[j].aiocbp->aio_fildes = -1; + } +} + +int my_aio(int nreqs, struct aioreq *iolist, char **fn, long nsec_calc) { + int ret; + int i, j; + + /* Start async IO */ + for (j = 0; j < NSAMPLES_INNER; j++) { + int completion_count = 0; + + //pr_debug("debug: opening file\n"); + if ((ret = my_aio_open(nreqs, iolist, fn)) == -1) { + pr_err("%s: error: aio_read: %s\n", + __func__, strerror(errno)); + ret = -errno; + goto out; + } + + //pr_debug("debug: issuing write command\n"); + for (j = 0; j < nreqs; j++) { + + /* Reset completion notice */ + iolist[j].status = EINPROGRESS; + + if ((ret = aio_write(iolist[j].aiocbp)) == -1) { + pr_err("%s: error: aio_read: %s\n", + __func__, strerror(errno)); + ret = -errno; + goto out; + } + } + + /* Emulate calcuation phase */ + ndelay(nsec_calc); + + /* Wait for completion of async IO */ + //pr_debug("debug: waiting for completion\n"); + while (completion_count != nreqs) { + for (j = 0; j < nreqs; j++) { + if (iolist[j].status != EINPROGRESS) { + continue; + } + + iolist[j].status = aio_error(iolist[j].aiocbp); + + switch (iolist[j].status) { + case 0: /* Succeeded */ + goto completed; + case EINPROGRESS: + break; + case ECANCELED: + pr_err("%s: error: aio is cancelled\n", + __func__); + goto completed; + default: + pr_err("%s: error: unexpected status: %d\n", + __func__, iolist[j].status); + goto completed; + completed: + completion_count++; + break; + } + } + } + + /* Check write amount */ + for (j = 0; j < nreqs; j++) { + ssize_t size; + + if ((size = aio_return(iolist[j].aiocbp)) != SZBUF) { + pr_err("%s: Expected to have written %ld B but reported to have written %ld B\n", + __func__, SZBUF, size); + ret = -1; + goto out; + } + } + + my_aio_close(nreqs, iolist); + } + ret = 0; + out: + return ret; +} + +int measure(double *result, int nsamples, int nsamples_drop, int nreqs, struct aioreq *iolist, char **fn, char **aiobufs, long nsec_calc) { + int ret; + int i; + double t_l, t_g, t_sum = 0; + double start, end; + + for (i = 0; i < nsamples + nsamples_drop; i++) { + +#if 0 + pr_debug("debug: evicting file cache\n"); + if ((ret = my_aio_evict(nreqs, fn))) { + pr_err("%s: error: my_aio_evict returned %d\n", + __func__, ret); + } +#endif + MPI_Barrier(MPI_COMM_WORLD); + + start = mytime(); + if ((ret = my_aio(nreqs, iolist, fn, nsec_calc))) { + pr_err("%s: error: my_aio_read returned %d\n", + __func__, ret); + } + end = mytime(); + + MPI_Barrier(MPI_COMM_WORLD); + + /* Check contents */ + if ((ret = my_aio_check(nreqs, fn, aiobufs))) { + pr_err("%s: error: my_aio_check returned %d\n", + __func__, ret); + } + + if (i < nsamples_drop) { + continue; + } + + /* Take max */ + t_l = end - start; + MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + t_sum += t_g; + } + + *result = t_sum / nsamples; + ret = 0; + out: + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + int i, j, progress, l; + int rank, nproc; + int disable_syscall_intercept = 0, ppn = -1; + struct aioreq *iolist; + struct aiocb *aiocblist; + struct sigaction sa; + double t_io_ave, t_total_ave; + double t_table[NROW][NCOL] = { 0 }; + int opt; + char *aiobufs[NREQS] = { 0 }; + char **fn; + + opterr = 0; /* Don't print out error when not recognizing option character */ + + while ((opt = getopt(argc, argv, ":I:p:")) != -1) { + switch (opt) { + case 'I': + disable_syscall_intercept = atoi(optarg); + break; + case 'p': + ppn = atoi(optarg); + break; + case '?': + pr_err("error: invalid option: -%c\n", + optopt); + ret = 1; + goto out; + case ':': + pr_err("error: option -%c requires an argument\n", + optopt); + ret = 1; + goto out; + } + } + + if (ppn == -1) { + pr_err("error: specify -p <PPN>\n"); + ret = -EINVAL; + goto out; + } + + /* Initialize MPI */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + /* Show parameters */ + if (rank == 0) { +#pragma omp parallel + { + if (omp_get_thread_num() == 0) { + printf("nproc: %d, ppn: %d, #threads: %d\n", nproc, ppn, omp_get_num_threads()); + } + } + } + + /* Set verbosity */ + //test_set_loglevel(TEST_LOGLEVEL_WARN); + + /* Initialize delay function */ + ndelay_init(); + + /* Prepare file names */ + +#define TEST_SRCDIR "/work/gg10/e29005" + sprintf(test_srcdir, "%s", /*TEST_SRCDIR*/dirname(argv[0])); + + if (!(fn = malloc(sizeof(char *) * NREQS))) { + pr_err("error: allocating fn\n"); + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < NREQS; i++) { + if (!(fn[i] = malloc(PATH_MAX))) { + pr_err("error: allocating fn\n"); + ret = -ENOMEM; + goto out; + } + + sprintf(fn[i], "%s/rank%d-number%d", test_srcdir, rank, i); + if (rank == 0) pr_debug("debug: rank: %d, fn[%d]: %s\n", + rank, i, fn[i]); + } + + /* Allocate aio commands */ + if (!(iolist = calloc(NREQS, sizeof(struct aioreq)))) { + pr_err("%s: error: allocating iolist\n", + __func__); + ret = 1; + goto out; + } + + if (!(aiocblist = calloc(NREQS, sizeof(struct aiocb)))) { + pr_err("%s: error: allocating aiocblist\n", + __func__); + ret = 1; + goto out; + } + + /* Prepare contents to be written */ + for (i = 0; i < NREQS; i++) { + aiobufs[i] = malloc(SZBUF); + if (!aiobufs[i]) { + pr_err("%s: error: allocating aiobufs\n", + __func__); + ret = 1; + goto out; + } + + for (j = 0; j < SZBUF; j++) { + *(aiobufs[i] + j) = i + j + rank; + } + } + + /* Set signal handlers */ + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sa.sa_sigaction = aio_sighandler; + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + pr_err("%s: error: sigaction: %s\n", + __func__, strerror(errno)); + ret = 1; + goto out; + } + + /* Set aio parameters except fd and status */ + if ((ret = my_aio_init(NREQS, iolist, aiocblist, aiobufs))) { + pr_err("%s: error: my_aio_init returned %d\n", + __func__, ret); + goto out; + } + + /* Measure IO only time */ + //pr_debug("debug: measuring IO only time\n"); + if ((ret = measure(&t_io_ave, NSAMPLES_IO, NSAMPLES_DROP, NREQS, iolist, fn, aiobufs, 0))) { + pr_err("error: measure returned %d\n", ret); + goto out; + } + + if (rank == 0) { + printf("t_io_ave: %.0f usec, %.0f MB/s per node\n", + t_io_ave * MYTIME_TOUSEC, + SZBUF * ppn / t_io_ave / 1000000); + } + + /* Measure time with no progress, progress and no uti, progress and uti */ + for (progress = 0; progress <= (disable_syscall_intercept ? 0 : 0); progress += 1) { + + /* Spawn helper thread onto compute CPUs with ignoring uti_attr */ + if (progress == 1) { + setenv("DISABLE_UTI", "1", 1); + } + /* Spawn helper thread onto dedicated CPUs with respecting uti_attr */ + else if (progress == 2) { + unsetenv("DISABLE_UTI"); + } + + /* Measure with various calculation time */ + for (l = 0; l <= 10; l += 2) { + long nsec_calc = (t_io_ave * MYTIME_TONSEC * l) / 10; + + if ((ret = measure(&t_total_ave, NSAMPLES_TOTAL, NSAMPLES_DROP, NREQS, iolist, fn, aiobufs, nsec_calc))) { + pr_err("error: measure returned %d\n", ret); + goto out; + } + + if (rank == 0) { + if (l == 0) { + pr_debug("progress=%d\n", progress); + if (progress == 0) { + pr_debug("calc\ttotal\n"); + } else { + pr_debug("total\n"); + } + } + + t_table[l][0] = nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC); + if (progress == 0) { + pr_debug("%.0f\t%.0f\n", nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC), t_total_ave * MYTIME_TOUSEC); + t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC; + } else { + pr_debug("%.0f\n", t_total_ave * MYTIME_TOUSEC); + t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC; + } + } + } + } + + if (rank == 0) { + printf("calc,no prog,prog and no uti, prog and uti\n"); + for (l = 0; l <= 10; l++) { + for (i = 0; i < NCOL; i++) { + if (i > 0) { + printf(","); + } + printf("%.0f", t_table[l][i]); + } + printf("\n"); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Finalize(); + + ret = 0; +out: + for (i = 0; i < NREQS; i++) { + free(aiobufs[i]); + } + return ret; +} diff --git a/test/uti/posix_aio/001.sh b/test/uti/posix_aio/001.sh new file mode 100755 index 00000000..6d3289d8 --- /dev/null +++ b/test/uti/posix_aio/001.sh @@ -0,0 +1,270 @@ +#!/usr/bin/bash + +#!/usr/bin/bash -x + +MYHOME=/home/e29005 +test_dir=`pwd -P` +mck_dir=${MYHOME}/project/os/install +uti_dir_lin=${MYHOME}/project/uti/install_linux +uti_dir_mck=${MYHOME}/project/uti/install_mckernel + +exe=`basename $0 | sed 's/\.sh//'` + +stop=0 +reboot=0 +go=0 + +interactive=0 +pjsub=0 +gdb=0 +disable_syscall_intercept=0 +mck=0 +nnodes=2 +LASTNODE=8196 +use_hfi=0 +omp_num_threads=1 +ppn=4 + +while getopts srgc:ml:N:P:o:hGI:ipL: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + h) use_hfi=1 + ;; + G) gdb=1 + ;; + I) disable_syscall_intercept=$OPTARG + ;; + i) interactive=1 + ;; + p) pjsub=1 + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +nprocs=$((ppn * nnodes)) +nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'` + +# vertical cut, excluding phys loaded with Linux tasks +uti_cpu_set_lin=1,69,137,205,18-19,86-87,154-155,222-223 +exclude_list=0-1,68-69,136-137,204-205,18-19,86-87,154-155,222-223 +#64-67,132-135,200-203,268-271 + +uti_cpu_set_mck=1,69,137,205,18-19,86-87,154-155,222-223 + +# horizontal cut, excluding phys loaded with Linux tasks for mckernel +#uti_cpu_set_lin=204-271 +#uti_cpu_set_mck=1-67 + +if [ $mck -eq 0 ]; then + uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_lin" + i_mpi_pin_processor_exclude_list="export I_MPI_PIN_PROCESSOR_EXCLUDE_LIST=$exclude_list" +else + uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_mck" + i_mpi_pin_processor_exclude_list= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +# if [ $omp_num_threads -eq 1 ]; then +# # Avoid binding main thread and uti thread to one CPU + kmp_affinity="export KMP_AFFINITY=disabled" +# else +# # Bind rank to OMP_NUM_THREAD-sized CPU-domain +# kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter" +# fi +else + i_mpi_pin=on + domain=$omp_num_threads # Use 32 when you want to match mck's -n division + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" + kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter" +fi + +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes omp_num_threads=$omp_num_threads + +if [ ${mck} -eq 1 ]; then + makeopt="UTI_DIR=$uti_dir_mck" + use_mck="#PJM -x MCK=$mck_dir" + mck_mem="#PJM -x MCK_MEM=32G@0,8G@1" + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 4)) + mcexecopt="-n $ppn --uti-use-last-cpu" # -t $nmcexecthr + + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + + if [ $disable_syscall_intercept -eq 0 ]; then + mcexecopt="--enable-uti $mcexecopt" + fi + +else + offline=`PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes lscpu \| grep Off 2>&1 | dshbak -c | grep Off` + if [ "$offline" != "" ]; then + echo "Error: Some CPUs are offline: $offline" + exit + fi + + makeopt="UTI_DIR=$uti_dir_lin" + use_mck= + mck_mem= + mcexec= + mcexecopt= +fi + +if [ $gdb -eq 1 ]; then + enable_x="-enable-x" + gdbcmd="xterm -display localhost:11 -hold -e gdb -ex run --args" +fi + +if [ $interactive -eq 1 ]; then + i_mpi_hydra_bootstrap_exec= + i_mpi_hydra_bootstrap= + hosts= + opt_dir=/opt/intel + ssh= +else +# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\' + i_mpi_hydra_bootstrap_exec="export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh" + i_mpi_hydra_bootstrap="export I_MPI_HYDRA_BOOTSTRAP=ssh" + hosts="-hosts $nodes" + opt_dir=/home/opt/local/cores/intel + ssh="ssh -A c$LASTNODE" +fi + +# If using ssh +# Latest versions are: 1.163, 2.199, 3.222 +if [ $pjsub -eq 0 ] && [ $interactive -eq 0 ]; then + compilervars=". ${opt_dir}/compilers_and_libraries_2018.2.199/linux/bin/compilervars.sh intel64" +else + compilervars= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof mcexec \| xargs -r sudo kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof $exe \| xargs -r sudo kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + if hostname | grep ofp &>/dev/null; then + + # -h: Hide idle thread to prevent KNL CPU from mux-ing resource and halving throughput + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + +# perl -e 'for ($i=0;$i<68;$i++){if($i>0){print "+";}printf("%d,%d,%d:%d", $i+68,$i+136,$i+204,$i);}' + +# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ +# sudo ${mck_dir}/sbin/mcreboot.sh -O -c 68-271 -r 68,136,204:0+69,137,205:1+70,138,206:2+71,139,207:3+72,140,208:4+73,141,209:5+74,142,210:6+75,143,211:7+76,144,212:8+77,145,213:9+78,146,214:10+79,147,215:11+80,148,216:12+81,149,217:13+82,150,218:14+83,151,219:15+84,152,220:16+85,153,221:17+86,154,222:18+87,155,223:19+88,156,224:20+89,157,225:21+90,158,226:22+91,159,227:23+92,160,228:24+93,161,229:25+94,162,230:26+95,163,231:27+96,164,232:28+97,165,233:29+98,166,234:30+99,167,235:31+100,168,236:32+101,169,237:33+102,170,238:34+103,171,239:35+104,172,240:36+105,173,241:37+106,174,242:38+107,175,243:39+108,176,244:40+109,177,245:41+110,178,246:42+111,179,247:43+112,180,248:44+113,181,249:45+114,182,250:46+115,183,251:47+116,184,252:48+117,185,253:49+118,186,254:50+119,187,255:51+120,188,256:52+121,189,257:53+122,190,258:54+123,191,259:55+124,192,260:56+125,193,261:57+126,194,262:58+127,195,263:59+128,196,264:60+129,197,265:61+130,198,266:62+131,199,267:63+132,200,268:64+133,201,269:65+134,202,270:66+135,203,271:67 -m 32G@0,12G@1 + else + echo "unkwon host type" + exit 1 + fi + else + : + fi +fi + +( +cat <<EOF +#!/bin/sh + +#PJM -L rscgrp=$rg +#PJM -L node=$nnodes +#PJM --mpi proc=$nprocs +#PJM -L elapse=$elapse +#PJM -L proc-crproc=16384 +#PJM -g gg10 +#PJM -j +#PJM -s +$use_mck +$mck_mem + +$i_mpi_hydra_bootstrap_exec +$i_mpi_hydra_bootstrap + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 +export PSM2_RCVTHREAD=0 + +$uti_cpu_set_str +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_processor_exclude_list +$i_mpi_pin_domain +$i_mpi_pin_order +$kmp_affinity + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +export I_MPI_FABRICS=shm:tmi +export PSM2_RCVTHREAD=0 +export I_MPI_TMI_PROVIDER=psm2 +export I_MPI_FALLBACK=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=off + +#export I_MPI_STATS=native:20,ipm +#export I_MPI_STATS=ipm +#export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +ulimit -c unlimited + +$compilervars +mpiexec.hydra -n $nprocs -ppn $ppn $hosts $ilpopt $enable_x $gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -I $disable_syscall_intercept -p $ppn +#-l + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + if [ $pjsub -eq 1 ]; then + pjsub ./job.sh + else + if [ $interactive -eq 0 ]; then + . ${opt_dir}/compilers_and_libraries_2018.2.199/linux/bin/compilervars.sh intel64 + fi + #rm ./$exe + make $makeopt ./$exe + + $ssh ${test_dir}/job.sh + fi +fi diff --git a/test/uti/posix_aio/002.c b/test/uti/posix_aio/002.c new file mode 100644 index 00000000..f36ee18a --- /dev/null +++ b/test/uti/posix_aio/002.c @@ -0,0 +1,658 @@ +#define _GNU_SOURCE +#include <limits.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <aio.h> +#include <signal.h> +#include <libgen.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <mpi.h> +#include <linux/limits.h> +#include <semaphore.h> +#include "util.h" + +#define SZBUF (1ULL << 23)/*23*/ + +#define MYTIME_TOUSEC 1000000 +#define MYTIME_TONSEC 1000000000 + +#define NROW 16 +#define NCOL 4 + +#define NSAMPLES_PROFILE 3 +#define NSAMPLES_DROP 1/*10*/ +#define NSAMPLES_IO 5/*20*/ +#define NSAMPLES_TOTAL 5/*20*/ +#define NSAMPLES_INNER 1 + +#define WAIT_TYPE_BUSY_LOOP 0 +#define WAIT_TYPE_SEM 1 +#define WAIT_TYPE WAIT_TYPE_SEM + +static sem_t aio_sem; +volatile int completion_count; + +static inline double mytime() { + return /*rdtsc_light()*/MPI_Wtime(); +} + +struct aioreq { + int rank, aio_num_threads; + int status; + struct aiocb *aiocbp; +}; + +static void aio_handler(sigval_t sigval) +{ + struct aioreq *aioreq = sigval.sival_ptr; + int ret; + + //pr_debug("%s: debug: rank=%d\n", __func__, aioreq->rank); + ret = __sync_add_and_fetch(&completion_count, 1); + if (ret == aioreq->aio_num_threads) { + if (sem_post(&aio_sem)) { + pr_err("%s: error: sem_post: %s\n", + __func__, strerror(errno)); + } + } + + //pr_debug("%s: debug: completion_count: %d\n", __func__, ret); +} + +static void aio_sighandler(int sig, siginfo_t *si, void *ucontext) +{ + pr_debug("%s: debug: enter\n", __func__); +#if WAIT_TYPE == WAIT_TYPE_SEM + struct aioreq *aioreq = si->si_value.sival_ptr; + + if (si->si_code != SI_ASYNCIO) { + pr_err("%s: error: unexpected si_code: %d\n", + __func__, si->si_code); + } + + aioreq->status = aio_error(aioreq->aiocbp); + if (aioreq->status != 0) { + pr_err("%s: error: unexpected status: %d\n", + __func__, aioreq->status); + } + + if (__sync_add_and_fetch(&completion_count, 1) == aioreq->aio_num_threads) { + if (sem_post(&aio_sem)) { + pr_err("%s: error: sem_post: %s\n", + __func__, strerror(errno)); + } + } + + //pr_debug("%s: debug: completion_count: %d\n", __func__, completion_count); +#endif /* WAIT_TYPE */ +} + +int my_aio_init(int nreqs, struct aioreq *iolist, struct aiocb *aiocblist, char **aiobufs) { + int ret; + int i; + + for (i = 0; i < nreqs; i++) { + iolist[i].rank = i; + iolist[i].aio_num_threads = nreqs; + iolist[i].aiocbp = &aiocblist[i]; + iolist[i].aiocbp->aio_fildes = -1; + iolist[i].aiocbp->aio_buf = aiobufs[i]; + iolist[i].aiocbp->aio_nbytes = SZBUF; + iolist[i].aiocbp->aio_reqprio = 0; + iolist[i].aiocbp->aio_offset = 0; +#if 0 + iolist[i].aiocbp->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + iolist[i].aiocbp->aio_sigevent.sigev_signo = SIGUSR1; + iolist[i].aiocbp->aio_sigevent.sigev_value.sival_ptr = &iolist[i]; +#else + iolist[i].aiocbp->aio_sigevent.sigev_notify = SIGEV_THREAD; + iolist[i].aiocbp->aio_sigevent.sigev_notify_function = aio_handler; + iolist[i].aiocbp->aio_sigevent.sigev_notify_attributes = NULL; + iolist[i].aiocbp->aio_sigevent.sigev_value.sival_ptr = &iolist[i]; +#endif + } + + ret = 0; + return ret; +} + +int my_aio_open(int aio_num_threads, struct aioreq *iolist, char **fn) { + int ret; + int i; + + for (i = 0; i < aio_num_threads; i++) { + iolist[i].aiocbp->aio_fildes = open(fn[i], O_RDWR | O_CREAT | O_TRUNC | O_DIRECT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH); + if (iolist[i].aiocbp->aio_fildes == -1) { + pr_err("%s: error: open %s: %s\n", + __func__, fn[i], strerror(errno)); + ret = 1; + goto out; + } + } + ret = 0; + out: + return ret; +} + +int my_aio_check(struct aioreq *iolist, int aio_num_threads, char **fn) { + int ret; + int i; + FILE **fp = { 0 }; + char *data; + + if (!(fp = malloc(sizeof(FILE *) * aio_num_threads))) { + pr_err("error: allocating fp\n"); + ret = -ENOMEM; + goto out; + } + + /* Check contents */ + for (i = 0; i < aio_num_threads; i++) { + if (!(data = malloc(SZBUF))) { + pr_err("error: allocating data\n"); + ret = -ENOMEM; + goto out; + } + + if (!(fp[i] = fopen(fn[i], "r+"))) { + pr_err("%s: error: fopen %s: %s\n", + __func__, fn[i], strerror(errno)); + ret = -errno; + goto out; + } + + if (fread(data, sizeof(char), SZBUF, fp[i]) != SZBUF) { + pr_err("%s: error: fread\n", + __func__); + ret = -1; + goto out; + } + + if (memcmp((const void*)iolist[i].aiocbp->aio_buf, data, SZBUF)) { + pr_err("%s: Data written to file %s differs from data in memory\n", + __func__, fn[i]); + ret = -1; + goto out; + } + } + ret = 0; + out: + for (i = 0; i < aio_num_threads; i++) { + fclose(fp[i]); + } + + return ret; +} + +void my_aio_close(int aio_num_threads, struct aioreq *iolist) { + int ret; + int i; + + for (i = 0; i < aio_num_threads; i++) { + if (iolist[i].aiocbp->aio_fildes != -1) { + close(iolist[i].aiocbp->aio_fildes); + iolist[i].aiocbp->aio_fildes = -1; + } + } +} + +int my_aio(int aio_num_threads, struct aioreq *iolist, char **fn, long nsec_calc, int no_aio) { + int ret; + int i, j; + + //pr_debug("%s: debug: enter\n", __func__); + + + /* Start async IO */ + for (i = 0; i < NSAMPLES_INNER; i++) { + if (no_aio) goto skip1; + + if ((ret = my_aio_open(aio_num_threads, iolist, fn)) == -1) { + pr_err("%s: error: my_aio_open: %s\n", + __func__, strerror(errno)); + ret = -errno; + goto out; + } + //pr_debug("%s: debug: after my_aio_open\n", __func__); + + + /* Reset completion */ + completion_count = 0; + __sync_synchronize(); + + for (j = 0; j < aio_num_threads; j++) { + iolist[j].status = EINPROGRESS; + + if ((ret = aio_write(iolist[j].aiocbp)) == -1) { + pr_err("%s: error: aio_write: %s\n", + __func__, strerror(errno)); + ret = -errno; + goto out; + } + + //pr_debug("%s: debug: after %d-th aio_write\n", __func__, j); + } + skip1: + /* Emulate calcuation phase */ + ndelay(nsec_calc); + if (no_aio) goto skip2; + +#if 0 + int k; + for (k = 0; k < 20; k++) { + char cmd[256]; + sprintf(cmd, "ls /proc/%d/task | wc -l", getpid()); + system(cmd); + usleep(200000); + } +#endif + + /* Wait for completion of async IO */ +#if WAIT_TYPE == WAIT_TYPE_SEM + + retry: + ret = sem_wait(&aio_sem); + if (ret == -1) { + if (errno == EINTR) { + pr_warn("%s: warning: sem_wait interrupted\n", + __func__); + goto retry; + } else { + pr_err("%s: error: sem_wait: %s\n", + __func__, strerror(errno)); + } + } + //pr_debug("%s: debug: completion_count: %d\n", __func__, completion_count); + +#elif WAIT_TYPE == WAIT_TYPE_BUSY_LOOP + + while (completion_count != aio_num_threads) { + for (j = 0; j < aio_num_threads; j++) { + if (iolist[j].status != EINPROGRESS) { + continue; + } + + iolist[j].status = aio_error(iolist[j].aiocbp); + + switch (iolist[j].status) { + case 0: /* Completed */ + goto completed; + case EINPROGRESS: + break; + case ECANCELED: + pr_err("%s: error: aio is cancelled\n", + __func__); + goto completed; + default: + pr_err("%s: error: aio_error: %s\n", + __func__, strerror(iolist[j].status)); + goto completed; + completed: + completion_count++; + break; + } + } + } +#endif /* WAIT_TYPE */ + /* Check amount read */ + for (j = 0; j < aio_num_threads; j++) { + ssize_t size; + + if ((size = aio_return(iolist[j].aiocbp)) != SZBUF) { + pr_err("%s: Expected to read %ld B but #%d has read %ld B\n", + __func__, SZBUF, j, size); + continue; + } + } + + my_aio_close(aio_num_threads, iolist); + skip2:; + } + ret = 0; + out: + my_aio_close(aio_num_threads, iolist); + return ret; +} + +int measure(double *result, int nsamples, int nsamples_drop, int aio_num_threads, struct aioreq *iolist, char **fn, long nsec_calc, int rank, int profile, int no_aio) { + int ret; + int i; + double t_l, t_g, t_sum = 0; + double start, end; + + for (i = 0; i < nsamples + nsamples_drop; i++) { + + MPI_Barrier(MPI_COMM_WORLD); + + /* Set parameter based on current IPC and frequency */ + ndelay_init(0); + + start = mytime(); + + struct rusage ru_start, ru_end; + struct timeval tv_start, tv_end; + + if (profile) { + if ((ret = getrusage(RUSAGE_SELF, &ru_start))) { + pr_err("%s: error: getrusage failed (%d)\n", __func__, ret); + } + + if ((ret = gettimeofday(&tv_start, NULL))) { + pr_err("%s: error: gettimeofday failed (%d)\n", __func__, ret); + } + } + + if ((ret = my_aio(aio_num_threads, iolist, fn, nsec_calc, no_aio))) { + pr_err("%s: error: my_aio returned %d\n", + __func__, ret); + } + + if (profile) { + if ((ret = getrusage(RUSAGE_SELF, &ru_end))) { + pr_err("%s: error: getrusage failed (%d)\n", __func__, ret); + } + + if ((ret = gettimeofday(&tv_end, NULL))) { + pr_err("%s: error: gettimeofday failed (%d)\n", __func__, ret); + } + + if (rank == 0) pr_debug("%s: wall: %ld, user: %ld, sys: %ld\n", __func__, + DIFFUSEC(tv_end, tv_start), + DIFFUSEC(ru_end.ru_utime, ru_start.ru_utime), + DIFFUSEC(ru_end.ru_stime, ru_start.ru_stime)); + } + + end = mytime(); + + MPI_Barrier(MPI_COMM_WORLD); + + /* Check contents */ + if ((ret = my_aio_check(iolist, aio_num_threads, fn))) { + pr_err("%s: error: my_aio_check returned %d\n", + __func__, ret); + } + + if (i < nsamples_drop) { + continue; + } + + /* Take max */ + t_l = end - start; + MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + t_sum += t_g; + } + + *result = t_sum / nsamples; + ret = 0; + + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + int i, j, progress, l; + int rank, nproc; + int ppn = -1; + int aio_num_threads = -1; + int disable_syscall_intercept = 0; + struct aioreq *iolist; + struct aiocb *aiocblist; + struct sigaction sa; + double t_io_ave, t_total_ave; + double t_table[NROW][NCOL] = { 0 }; + int opt; + char **aiobufs; + char **fn; + char src_dir[PATH_MAX]; + char *argv0; + + opterr = 0; /* Don't print out error when not recognizing option character */ + + while ((opt = getopt(argc, argv, ":I:p:t:")) != -1) { + switch (opt) { + case 'I': + disable_syscall_intercept = atoi(optarg); + break; + case 'p': + ppn = atoi(optarg); + break; + case 't': + aio_num_threads = atoi(optarg); + break; + case '?': + pr_err("error: invalid option: -%c\n", + optopt); + ret = 1; + goto out; + case ':': + pr_err("error: option -%c requires an argument\n", + optopt); + ret = 1; + goto out; + } + } + + if (ppn == -1) { + pr_err("error: specify ppn with -p <ppn>\n"); + ret = 1; + goto out; + } + + if (aio_num_threads == -1) { + pr_err("error: specify aio_num_threads with -p <aio_num_threads>\n"); + ret = 1; + goto out; + } + + /* Initialize MPI */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + +#if 0 + int k; + for (k = 0; k < 20; k++) { + char cmd[256]; + sprintf(cmd, "ls /proc/%d/task | wc -l", getpid()); + system(cmd); + usleep(200000); + } +#endif + + /* Show parameters */ + if (rank == 0) { +#pragma omp parallel + { + if (omp_get_thread_num() == 0) { + printf("nproc=%d,#threads=%d\n", nproc, omp_get_num_threads()); + } + } + } + + /* Set verbosity */ + //test_set_loglevel(TEST_LOGLEVEL_WARN); + + /* Set parameter based on current IPC and frequency */ + ndelay_init(1); + + /* Initialize files */ + if (!(fn = malloc(sizeof(char *) * aio_num_threads))) { + pr_err("error: allocating fn\n"); + ret = -ENOMEM; + goto out; + } + + argv0 = strdup(argv[0]); + sprintf(src_dir, "%s", dirname(argv0)); + for (i = 0; i < aio_num_threads; i++) { + if (!(fn[i] = malloc(SZBUF))) { + pr_err("error: allocating data\n"); + ret = -ENOMEM; + goto out; + } + + sprintf(fn[i], "%s/rank%d-number%d", src_dir, rank, i); + if (rank < 2 && i < 2) { + pr_debug("debug: rank: %d, fn[%d]: %s\n", + rank, i, fn[i]); + } + } + + /* Allocate aio arrays */ + if (!(iolist = calloc(aio_num_threads, sizeof(struct aioreq)))) { + pr_err("%s: error: allocating iolist\n", + __func__); + ret = 1; + goto out; + } + + if (!(aiocblist = calloc(aio_num_threads, sizeof(struct aiocb)))) { + pr_err("%s: error: allocating aiocblist\n", + __func__); + ret = 1; + goto out; + } + + /* Prepare data to be written */ + if (!(aiobufs = malloc(sizeof(char *) * aio_num_threads))) { + pr_err("error: allocating aiobufs\n"); + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < aio_num_threads; i++) { + aiobufs[i] = malloc(SZBUF); + if (!aiobufs[i]) { + pr_err("%s: error: allocating aiobufs\n", + __func__); + ret = 1; + goto out; + } + + for (j = 0; j < SZBUF; j++) { + *(aiobufs[i] + j) = i + j + rank; + } + } + + /* Initialize aio parameters except fd and status */ + if ((ret = my_aio_init(aio_num_threads, iolist, aiocblist, aiobufs))) { + pr_err("%s: error: my_aio_init returned %d\n", + __func__, ret); + goto out; + } + +#if 0 + /* Set signal handlers */ + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sa.sa_sigaction = aio_sighandler; + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + pr_err("%s: error: sigaction: %s\n", + __func__, strerror(errno)); + ret = 1; + goto out; + } +#endif + + /* Initialize semaphore */ + if ((ret = sem_init(&aio_sem, 0, 0))) { + pr_err("%s: error: sem_init: %s\n", __func__, strerror(errno)); + ret = -errno; + goto out; + } + + /* Take profile */ + if ((ret = measure(&t_io_ave, NSAMPLES_PROFILE, 0, aio_num_threads, iolist, fn, 0, rank, 1, 0))) { + pr_err("error: measure returned %d\n", ret); + goto out; + } + + /* Measure IO only time */ + if ((ret = measure(&t_io_ave, NSAMPLES_IO, NSAMPLES_DROP, aio_num_threads, iolist, fn, 0, rank, 0, 0))) { + pr_err("error: measure returned %d\n", ret); + goto out; + } + + if (rank == 0) { + printf("t_io_ave: %.0f usec, %.0f MB/s per node\n", + t_io_ave * MYTIME_TOUSEC, + SZBUF * ppn * aio_num_threads / t_io_ave / 1000000); + } + + /* Measure time with no progress, progress and no uti, progress and uti */ + for (progress = 0; progress <= (disable_syscall_intercept ? 0 : -1); progress += 1) { + + if (progress == 1) { + /* Ignore uti_attr, spawn a thread onto compute CPUs */ + setenv("DISABLE_UTI", "1", 1); + } else if (progress == 2) { + unsetenv("DISABLE_UTI"); + } + + /* Increasing calculation time up to 100% of IO time */ + for (l = 0; l <= NROW - 1; l += 1) { + long nsec_calc = (t_io_ave * MYTIME_TONSEC * l) / 10; + + if ((ret = measure(&t_total_ave, NSAMPLES_TOTAL, NSAMPLES_DROP, aio_num_threads, iolist, fn, nsec_calc, rank, 0, 0))) { + pr_err("error: measure returned %d\n", ret); + goto out; + } + + if (rank == 0) { + if (l == 0) { + pr_debug("progress=%d\n", progress); + if (progress == 0) { + pr_debug("calc\ttotal\n"); + } else { + pr_debug("total\n"); + } + } + + t_table[l][0] = nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC); + if (progress == 0) { + pr_debug("%.0f\t%.0f\n", nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC), t_total_ave * MYTIME_TOUSEC); + t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC; + } else { + pr_debug("%.0f\n", t_total_ave * MYTIME_TOUSEC); + t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC; + } + } + } + } + + if (rank == 0) { + printf("calc,no prog,prog and no uti, prog and uti\n"); + for (l = 0; l <= NROW - 1; l++) { + for (i = 0; i < NCOL; i++) { + if (i > 0) { + printf(","); + } + printf("%.0f", t_table[l][i]); + } + printf("\n"); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + //pr_debug("after barrier\n"); + + MPI_Finalize(); + //pr_debug("after finalize\n"); + + ret = 0; +out: + if ((ret = sem_destroy(&aio_sem))) { + pr_err("%s: error: sem_destroy: %s\n", __func__, strerror(errno)); + goto out; + } + + free(argv0); + return ret; +} diff --git a/test/uti/posix_aio/002.sh b/test/uti/posix_aio/002.sh new file mode 100755 index 00000000..6e09a7ab --- /dev/null +++ b/test/uti/posix_aio/002.sh @@ -0,0 +1,308 @@ +#!/usr/bin/bash + +test_dir=`pwd -P` +mck_dir=${HOME}/project/os/install +uti_dir_lin=${HOME}/project/uti/install_linux +uti_dir_mck=${HOME}/project/uti/install_mckernel + +exe=`basename $0 | sed 's/\.sh//'` + +stop=0 +reboot=0 +go=0 + +interactive=0 +pjsub=0 +gdb=0 +disable_syscall_intercept=0 +mck=0 +nnodes=2 +host_type=wallaby +LASTNODE=15 +use_hfi=0 +omp_num_threads=4 +ppn=4 +aio_num_threads=1 + +while getopts srgc:ml:N:P:o:hGI:ipL: OPT +do + case ${OPT} in + s) stop=1 + ;; + r) reboot=1 + ;; + g) go=1 + ;; + m) mck=1 + ;; + N) nnodes=$OPTARG + ;; + P) ppn=$OPTARG + ;; + o) omp_num_threads=$OPTARG + ;; + h) use_hfi=1 + ;; + G) gdb=1 + ;; + I) disable_syscall_intercept=$OPTARG + ;; + i) interactive=1 + ;; + p) pjsub=1 + ;; + L) LASTNODE=$OPTARG + ;; + *) echo "invalid option -${OPT}" >&2 + exit 1 + esac +done + +case $host_type in + wallaby) hnprefix=wallaby + ;; + ofp) hnprefix=c + ;; + *) echo "invalid host_type $host_type" + exit 1 +esac + +nprocs=$((ppn * nnodes)) +nodes="$hnprefix`echo $(seq -s ",$hnprefix" $(($LASTNODE + 1 - $nnodes)) $LASTNODE)`" + +case $host_type in + wallaby) + uti_cpu_set_lin=0,16,8,24 + exclude_list=0,16,8,24 + uti_cpu_set_mck=0,16,8,24 + ;; + ofp) + # vertical cut, excluding phys loaded with Linux tasks + uti_cpu_set_lin=1,69,137,205,18-19,86-87,154-155,222-223 + exclude_list=0-1,68-69,136-137,204-205,18-19,86-87,154-155,222-223 + #64-67,132-135,200-203,268-271 + + uti_cpu_set_mck=1,69,137,205,18-19,86-87,154-155,222-223 + + # horizontal cut, excluding phys loaded with Linux tasks for mckernel + #uti_cpu_set_lin=204-271 + #uti_cpu_set_mck=1-67 + ;; + *) echo "invalid host_type $host_type" + exit 1 +esac + +if [ $mck -eq 0 ]; then + uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_lin" + i_mpi_pin_processor_exclude_list="export I_MPI_PIN_PROCESSOR_EXCLUDE_LIST=$exclude_list" +else + uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_mck" + i_mpi_pin_processor_exclude_list= +fi + +if [ ${mck} -eq 1 ]; then + i_mpi_pin=off + i_mpi_pin_domain= + i_mpi_pin_order= +# if [ $omp_num_threads -eq 1 ]; then +# # Avoid binding main thread and uti thread to one CPU + kmp_affinity="export KMP_AFFINITY=disabled" +# else +# # Bind rank to OMP_NUM_THREAD-sized CPU-domain +# kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter" +# fi +else + i_mpi_pin=on + domain=$omp_num_threads # Use 32 when you want to match mck's -n division + i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain" + i_mpi_pin_order="export I_MPI_PIN_ORDER=compact" + kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter" +fi + +echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes domain=$domain + +if [ ${mck} -eq 1 ]; then + makeopt="UTI_DIR=$uti_dir_mck" + use_mck="#PJM -x MCK=$mck_dir" + mck_mem="#PJM -x MCK_MEM=32G@0,8G@1" + mcexec="${mck_dir}/bin/mcexec" + nmcexecthr=$((omp_num_threads + 1 + aio_num_threads * 2 + 2)) + mcexecopt="-n $ppn -t $nmcexecthr" # --uti-use-last-cpu + + if [ ${use_hfi} -eq 1 ]; then + mcexecopt="--enable-hfi1 $mcexecopt" + fi + + if [ $disable_syscall_intercept -eq 0 ]; then + mcexecopt="--enable-uti $mcexecopt" + fi + +else + offline=`PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes lscpu 2>&1 | dshbak -c | grep Off-line` + if [ "$offline" != "" ]; then + echo "Error: Some CPUs are offline: $offline" + exit + fi + + makeopt="UTI_DIR=$uti_dir_lin" + use_mck= + mck_mem= + mcexec= + mcexecopt= +fi + +if [ $gdb -eq 1 ]; then + enable_x="-enable-x" + gdbcmd="xterm -display localhost:11 -hold -e gdb -ex run --args" +fi + +if [ $interactive -eq 1 ]; then + i_mpi_hydra_bootstrap_exec= + i_mpi_hydra_bootstrap= + hosts= + ssh= +else +# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\' + i_mpi_hydra_bootstrap_exec="export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh" + i_mpi_hydra_bootstrap="export I_MPI_HYDRA_BOOTSTRAP=ssh" + hosts="-hosts $nodes" + ssh="ssh -A $(echo $nodes | cut -d',' -f1)" +fi + +case $host_type in + wallaby) + i_mpi_fabrics="export I_MPI_FABRICS=shm:dapl" + i_mpi_tmi_provider= + + opt_dir=/opt/intel + impiver=2018.3.222 # 1.163, 2.199, 3.222 + ;; + ofp) + i_mpi_fabrics="export I_MPI_FABRICS=shm:tmi" + i_mpi_tmi_provider="export I_MPI_TMI_PROVIDER=psm2" + + if [ $interactive -eq 1 ]; then + opt_dir=/opt/intel + else + opt_dir=/home/opt/local/cores/intel + fi + impiver=2018.1.163 # 1.163, 2.199, 3.222 + ;; + *) echo "invalid host_type $host_type" + exit 1 +esac + +# If using ssh +if [ $pjsub -eq 0 ] && [ $interactive -eq 0 ]; then + compilervars=". ${opt_dir}/compilers_and_libraries_${impiver}/linux/bin/compilervars.sh intel64" +else + compilervars= +fi + +if [ ${stop} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof mcexec \| xargs -r sudo kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof $exe \| xargs -r sudo kill -9 + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcstop+release.sh + else + : + fi +fi + +if [ ${reboot} -eq 1 ]; then + if [ ${mck} -eq 1 ]; then + case $host_type in + wallaby) hnprefix=wallaby + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 1-7,17-23,9-15,25-31 -r 1-7:0+17-23:16+9-15:8+25-31:24 -m 10G@0,10G@1 + #PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 1-4 -r 1-4:0 -m 10G@0,10G@1 + ;; + ofp) + # -h: Prevent unnessary CPU resource division for KNL + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1 + ;; + *) echo "invalid host_type $host_type" + exit 1 + esac + else + : + fi +fi + +( +cat <<EOF +#!/bin/sh + +#PJM -L rscgrp=$rg +#PJM -L node=$nnodes +#PJM --mpi proc=$nprocs +#PJM -L elapse=$elapse +#PJM -L proc-crproc=16384 +#PJM -g gg10 +#PJM -j +#PJM -s +$use_mck +$mck_mem + +$i_mpi_hydra_bootstrap_exec +$i_mpi_hydra_bootstrap + +export OMP_NUM_THREADS=$omp_num_threads +#export OMP_STACKSIZE=64M +export KMP_BLOCKTIME=1 + +$uti_cpu_set_str +export I_MPI_PIN=$i_mpi_pin +$i_mpi_pin_processor_exclude_list +$i_mpi_pin_domain +$i_mpi_pin_order +$kmp_affinity + +export HFI_NO_CPUAFFINITY=1 +export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304 +$i_mpi_fabrics +$i_mpi_tmi_provider +export I_MPI_FALLBACK=0 +export PSM2_RCVTHREAD=0 +export PSM2_MQ_RNDV_HFI_WINDOW=4194304 +export PSM2_MQ_EAGER_SDMA_SZ=65536 +export PSM2_MQ_RNDV_HFI_THRESH=200000 + +export MCKERNEL_RLIMIT_STACK=32M,16G +export KMP_STACKSIZE=64m +#export KMP_HW_SUBSET=64c,1t + +export I_MPI_ASYNC_PROGRESS=off + +#export I_MPI_STATS=native:20,ipm +#export I_MPI_STATS=ipm +#export I_MPI_DEBUG=4 +#export I_MPI_HYDRA_DEBUG=on + +ulimit -c unlimited + +$compilervars +mpiexec.hydra -n $nprocs -ppn $ppn $hosts $ilpopt $enable_x $gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -I $disable_syscall_intercept -p $ppn -t $aio_num_threads +#$gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -I $disable_syscall_intercept -p $ppn -t $aio_num_threads +#-l + +EOF +) > ./job.sh +chmod u+x ./job.sh + +if [ ${go} -eq 1 ]; then + if [ $pjsub -eq 1 ]; then + pjsub ./job.sh + else + if [ $interactive -eq 0 ]; then + eval $compilervars + fi + make $makeopt ./$exe + PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \ + /usr/sbin/pidof $exe \| xargs -r sudo kill -9 + $ssh ${test_dir}/job.sh + fi +fi diff --git a/test/uti/posix_aio/Makefile b/test/uti/posix_aio/Makefile new file mode 100755 index 00000000..4f027e77 --- /dev/null +++ b/test/uti/posix_aio/Makefile @@ -0,0 +1,51 @@ +.SUFFIXES: # Clear suffixes +.ONESHELL: # Pack all the lines and pass it to shell + +VMTOUCH=$(HOME)/project/src/vmtouch/install/bin/vmtouch + +# Specify it via *.sh +UTI_DIR=${HOME}/project/uti/install_linux + +CC=mpiicc +LD=$(CC) + +CFLAGS = -g -O0 -Wall -DVMTOUCH=$(VMTOUCH) +LDFLAGS = -lpthread -L$(UTI_DIR)/lib -Wl,-rpath -Wl,$(UTI_DIR)/lib -luti -lrt +SRCS = $(shell ls 0*.c) +OBJS = $(SRCS:.c=.o) util.o +EXES = $(SRCS:.c=) + +define create_files = + for i in {1..2}; do + dd if=/dev/zero of=./data/$i bs=1M count=1 + done +endef + +all: $(EXES) + +file:: + $(value create_files) + +util.o:: util.c util.h + $(CC) $(CFLAGS) -qopenmp -c $< + +001: 001.o util.o + $(LD) -o $@ $^ $(LDFLAGS) -qopenmp + +001.o:: 001.c + $(CC) $(CFLAGS) -qopenmp -c $< + +002: 002.o util.o + $(LD) -o $@ $^ $(LDFLAGS) -qopenmp + +002.o:: 002.c + $(CC) $(CFLAGS) -qopenmp -c $< + +%: %.o + $(LD) -o $@ $^ $(LDFLAGS) + +%.o::%.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f core $(EXES) $(OBJS) $(DSRCS) diff --git a/test/uti/posix_aio/README b/test/uti/posix_aio/README new file mode 100644 index 00000000..097b1b40 --- /dev/null +++ b/test/uti/posix_aio/README @@ -0,0 +1,15 @@ +============================================= +Benchmarks of asynchronous I/O with busy CPUs +============================================= + +The purpose is to show the benefit of spawning the asynchronous threads onto dedicated CPUs. + +--- +001 +--- +Write + +--- +002 +--- +Write, IO completion is notified by spawning thread diff --git a/test/uti/posix_aio/util.c b/test/uti/posix_aio/util.c new file mode 100644 index 00000000..673639ab --- /dev/null +++ b/test/uti/posix_aio/util.c @@ -0,0 +1,133 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <time.h> +#include <mpi.h> +#include "util.h" + +/* Messaging */ +enum test_loglevel test_loglevel = TEST_LOGLEVEL_DEBUG; + +/* Calculation */ +static inline void asmloop(unsigned long n) { + int j; + + for (j = 0; j < n; j++) { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); + } +} + +#define N_INIT 10000000 +double nspw; /* nsec per work */ + +void ndelay_init(int verbose) { + struct timeval start, end; + int rank, nproc; + double min, sum, max; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + + //clock_gettime(TIMER_KIND, &start); + gettimeofday(&start, NULL); + +#pragma omp parallel + { + asmloop(N_INIT); + } + + //clock_gettime(TIMER_KIND, &end); + gettimeofday(&end, NULL); + + nspw = DIFFUSEC(end, start) * 1000 / (double)N_INIT; + + if (verbose) { + MPI_Reduce(&nspw, &min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&nspw, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&nspw, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + if (rank == 0) { + pr_debug("nspw: min=%.0f, ave=%.0f, max=%.0f\n", min, sum / nproc, max); + } + } +} + +#if 1 +void ndelay(long delay_nsec) { + if (delay_nsec < 0) { + printf("delay_nsec < 0\n"); + return; + } +#pragma omp parallel + { + asmloop(delay_nsec / nspw); + } +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void ndelay(long delay_nsec) { + struct timespec start, end; + + if (delay_nsec < 0) { return; } + clock_gettime(TIMER_KIND, &start); + + while (1) { + clock_gettime(TIMER_KIND, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + asmloop(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + + +double cycpw; /* cyc per work */ + +void cdlay_init() { + unsigned long start, end; + + start = rdtsc_light(); +#define N_INIT 10000000 + asmloop(N_INIT); + end = rdtsc_light(); + cycpw = (end - start) / (double)N_INIT; +} + +#if 0 +void cdelay(long delay_cyc) { + if (delay_cyc < 0) { + return; + } + asmloop(delay_cyc / cycpw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void cdelay(long delay_cyc) { + unsigned long start, end; + + if (delay_cyc < 0) { return; } + start = rdtsc_light(); + + while (1) { + end = rdtsc_light(); + if (end - start >= delay_cyc) { + break; + } + asmloop(2); + } +} +#endif diff --git a/test/uti/posix_aio/util.h b/test/uti/posix_aio/util.h new file mode 100644 index 00000000..48b53fcd --- /dev/null +++ b/test/uti/posix_aio/util.h @@ -0,0 +1,70 @@ +#ifndef __UTIL_H_INCLUDED__ +#define __UTIL_H_INCLUDED__ + +#include <stdint.h> + +/* Messaging */ + +enum test_loglevel { + TEST_LOGLEVEL_ERR = 0, + TEST_LOGLEVEL_WARN, + TEST_LOGLEVEL_DEBUG +}; + +extern enum test_loglevel test_loglevel; +static inline void test_set_loglevel(enum test_loglevel level) +{ + test_loglevel = level; +} + +#define pr_level(level, fmt, args...) do { \ + if (test_loglevel >= level) { \ + fprintf(stdout, fmt, ##args); \ + } \ +} while (0) + +#define pr_err(fmt, args...) pr_level(TEST_LOGLEVEL_ERR, fmt, ##args) +#define pr_warn(fmt, args...) pr_level(TEST_LOGLEVEL_WARN, fmt, ##args) +#define pr_debug(fmt, args...) pr_level(TEST_LOGLEVEL_DEBUG, fmt, ##args) + +#define _OKNG(verb, jump, cond, fmt, args...) do { \ + if (cond) { \ + if (verb) \ + printf("[ OK ] " fmt, ##args); \ + } else { \ + printf("[ NG ] " fmt, ##args); \ + if (jump) { \ + ret = -1; \ + goto out; \ + } \ + } \ +} while (0) + +#define OKNG(args...) _OKNG(1, 1, ##args) +#define NG(args...) _OKNG(0, 1, ##args) +#define OKNGNOJUMP(args...) _OKNG(1, 0, ##args) + +/* Time */ +inline uint64_t rdtsc_light(void) +{ + uint64_t x; + __asm__ __volatile__("rdtscp;" /* rdtscp don't jump over earlier instructions */ + "shl $32, %%rdx;" + "or %%rdx, %%rax" : + "=a"(x) : + : + "%rcx", "%rdx", "memory"); + return x; +} + +#define DIFFUSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000UL + (end.tv_usec - start.tv_usec)) +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) +#define TIMER_KIND CLOCK_MONOTONIC_RAW /* CLOCK_THREAD_CPUTIME_ID */ + +/* Calculation emulation */ +void ndelay_init(); +void ndelay(long delay_nsec); +void cdelay_init(); +void cdelay(long delay_cyc); + +#endif diff --git a/test/uti/preloadlib.c b/test/uti/preloadlib.c new file mode 100644 index 00000000..7b8ba350 --- /dev/null +++ b/test/uti/preloadlib.c @@ -0,0 +1,40 @@ +#include <libsyscall_intercept_hook_point.h> +#include <syscall.h> +#include <errno.h> +#define __USE_GNU +#include <dlfcn.h> + +static int +hook(long syscall_number, + long arg0, long arg1, + long arg2, long arg3, + long arg4, long arg5, + long *result) +{ + if (syscall_number == SYS_getdents) { + /* + * Prevent the application from + * using the getdents syscall. From + * the point of view of the calling + * process, it is as if the kernel + * would return the ENOTSUP error + * code from the syscall. + */ + *result = -ENOTSUP; + return 0; + } else { + /* + * Ignore any other syscalls + * i.e.: pass them on to the kernel + * as would normally happen. + */ + return 1; + } +} + +static __attribute__((constructor)) void +init(void) +{ + // Set up the callback function + intercept_hook_point = hook; +} diff --git a/test/uti/psm2/Makefile b/test/uti/psm2/Makefile new file mode 100755 index 00000000..4fcb442c --- /dev/null +++ b/test/uti/psm2/Makefile @@ -0,0 +1,27 @@ +.SUFFIXES: # Clear suffixes + +CC=gcc + +LD=$(CC) + +CFLAGS = -g -O2 +LDFLAGS = -lpthread -lpsm2 +SRCS = $(shell ls *.c) +OBJS = $(SRCS:.c=.o) +EXES = $(SRCS:.c=) +TMPFILES = $(shell ls psm2-demo-*) + +all: $(EXES) file + +file::$(TMPFILES) + rm -f $(TMPFILES) + +%: %.o + $(LD) -o $@ $^ $(LDFLAGS) + +%.o::%.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f core $(EXES) $(OBJS) $(DSRCS) + diff --git a/test/uti/psm2/psm2-demo.c b/test/uti/psm2/psm2-demo.c new file mode 100644 index 00000000..955d76bd --- /dev/null +++ b/test/uti/psm2/psm2-demo.c @@ -0,0 +1,212 @@ +/* + * PSM2 example program. + * Start two instances of this program from the same working directory. + * These processes can execute on the same host, or on two hosts connected + * with OPA. + * Compile with: gcc psm2-demo.c -o psm2-demo -lpsm2 + * Run as: ./psm2-demo -s # this is the server process + * and: ./psm2-demo # this is the client process + * Copyright(c) 2015 Intel Corporation. + * */ +#include <stdio.h> +#include <psm2.h> /* required for core PSM2 functions */ +#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */ +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> + +#define BUFFER_LENGTH 8000000 +#define CONNECT_ARRAY_SIZE 8 +void die(char *msg, int rc) { + fprintf(stderr, "%s: %d\n", msg, rc); + exit(1); +} + +/* Helper functions to find the server's PSM2 endpoint identifier (epid). */ +psm2_epid_t find_server() { + FILE *fp = NULL; + psm2_epid_t server_epid = 0; + printf("PSM2 client waiting for epid mapping file to appear...\n"); + while (!fp) { + sleep(1); + fp = fopen("psm2-demo-server-epid", "r"); + } + fscanf(fp, "%lx", &server_epid); + fclose(fp); + printf("PSM2 client found server epid = 0x%lx\n", server_epid); + return server_epid; +} + +void write_epid_to_file(psm2_epid_t myepid) { + FILE *fp; + fp = fopen("psm2-demo-server-epid", "w"); + if (!fp) { + fprintf(stderr, + "Exiting, couldn't write server's epid mapping file: "); + die(strerror(errno), errno); + } + fprintf(fp, "0x%lx", myepid); + fclose(fp); + printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid); + return; +} + +int main(int argc, char **argv) { + struct psm2_ep_open_opts o; + psm2_uuid_t uuid; + psm2_ep_t myep; + psm2_epid_t myepid; + psm2_epid_t server_epid; + psm2_epid_t epid_array[CONNECT_ARRAY_SIZE]; + int epid_array_mask[CONNECT_ARRAY_SIZE]; + psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE]; + psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE]; + int rc; + int ver_major = PSM2_VERNO_MAJOR; + int ver_minor = PSM2_VERNO_MINOR; + char msgbuf[BUFFER_LENGTH]; + psm2_mq_t q; + psm2_mq_req_t req_mq; + int is_server = 0; + if (argc > 2) { + die("To run in server mode, invoke as ./psm2-demo -s\n" \ + "or run in client mode, invoke as ./psm2-demo\n" \ + "Wrong number of args", argc); + } + is_server = argc - 1; /* Assume any command line argument is -s */ + memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */ +/* Try to initialize PSM2 with the requested library version. + * * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR + * * as defined in the PSM2 headers, ensure that we are linking with + * * the same version of PSM2 as we compiled against. */ + + if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) { + die("couldn't init", rc); + } + printf("PSM2 init done.\n"); + /* Setup the endpoint options struct */ + if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) { + die("couldn't set default opts", rc); + } + printf("PSM2 opts_get_defaults done.\n"); + /* Attempt to open a PSM2 endpoint. This allocates hardware resources. */ + if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) { + die("couldn't psm2_ep_open()", rc); + } + printf("PSM2 endpoint open done.\n"); + if (is_server) { + write_epid_to_file(myepid); + } else { + server_epid = find_server(); + } + if (is_server) { + /* Server does nothing here. A connection does not have to be + * * established to receive messages. */ + printf("PSM2 server up.\n"); + } else { + /* Setup connection request info */ + /* PSM2 can connect to a single epid per request, + * * or an arbitrary number of epids in a single connect call. + * * For this example, use part of an array of + * * connection requests. */ + memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE); + epid_array[0] = server_epid; + epid_array_mask[0] = 1; + /* Begin the connection process. + * * note that if a requested epid is not responding, + * * the connect call will still return OK. + * * The errors array will contain the state of individual + * * connection requests. */ + if ((rc = psm2_ep_connect(myep, + CONNECT_ARRAY_SIZE, + epid_array, + epid_array_mask, + epid_connect_errors, + epaddr_array, + 0 /* no timeout */ + )) != PSM2_OK) { + die("couldn't ep_connect", rc); + } + printf("PSM2 connect request processed.\n"); + /* Now check if our connection to the server is ready */ + if (epid_connect_errors[0] != PSM2_OK) { + die("couldn't connect to server", + epid_connect_errors[0]); + } + printf("PSM2 client-server connection established.\n"); + } + /* Setup our PSM2 message queue */ + if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q)) + != PSM2_OK) { + die("couldn't initialize PSM2 MQ", rc); + } + printf("PSM2 MQ init done.\n"); + if (is_server) { + psm2_mq_tag_t t = {0xABCD}; + psm2_mq_tag_t tm = {-1}; + /* Post the receive request */ + if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR, + &t, /* message tag */ + &tm, /* message tag mask */ + 0, /* no flags */ + msgbuf, BUFFER_LENGTH, + NULL, /* no context to add */ + &req_mq /* track irecv status */ + )) != PSM2_OK) { + die("couldn't post psm2_mq_irecv()", rc); + } + printf("PSM2 MQ irecv() posted\n"); + /* Wait until the message arrives */ + if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) { + die("couldn't wait for the irecv", rc); + } + printf("PSM2 MQ wait() done.\n"); + printf("Message from client:\n"); + printf("%s", msgbuf); + unlink("psm2-demo-server-epid"); + } else { + /* Say hello */ + snprintf(msgbuf, BUFFER_LENGTH, + "Hello world from epid=0x%lx, pid=%d.\n", + myepid, getpid()); + psm2_mq_tag_t t = {0xABCD}; + if ((rc = psm2_mq_send2(q, + epaddr_array[0], /* destination epaddr */ + PSM2_MQ_FLAG_SENDSYNC, /* no flags */ + &t, /* tag */ + msgbuf, BUFFER_LENGTH + )) != PSM2_OK) { + die("couldn't post psm2_mq_isend", rc); + } + printf("PSM2 MQ send() done.\n"); + } +/* Close down the MQ */ + if ((rc = psm2_mq_finalize(q)) != PSM2_OK) { + die("couldn't psm2_mq_finalize()", rc); + } + printf("PSM2 MQ finalized.\n"); +/* Close our ep, releasing all hardware resources. + * * Try to close all connections properly */ + if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL, + 0 /* no timeout */)) != PSM2_OK) { + die("couldn't psm2_ep_close()", rc); + } + printf("PSM2 ep closed.\n"); + /* Release all local PSM2 resources */ + if ((rc = psm2_finalize()) != PSM2_OK) { + die("couldn't psm2_finalize()", rc); + } + printf("PSM2 shut down, exiting.\n"); + return 0; +} + + + + + + + + + diff --git a/test/uti/util.c b/test/uti/util.c new file mode 100644 index 00000000..7e1965d5 --- /dev/null +++ b/test/uti/util.c @@ -0,0 +1,130 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include "util.h" + +static inline void fixed_size_work() { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); +} + +static inline void bulk_fsw(unsigned long n) { + int j; + for (j = 0; j < (n); j++) { + fixed_size_work(); + } +} + +double nspw; /* nsec per work */ +unsigned long nsec; + +void fwq_init() { + struct timespec start, end; + int i; + clock_gettime(TIMER_KIND, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(TIMER_KIND, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +#if 1 +void fwq(long delay_nsec) { + if (delay_nsec < 0) { + return; + } + bulk_fsw(delay_nsec / nspw); +} +#else /* For machines with large core-to-core performance variation (e.g. OFP) */ +void fwq(long delay_nsec) { + struct timespec start, end; + + if (delay_nsec < 0) { return; } + clock_gettime(TIMER_KIND, &start); + + while (1) { + clock_gettime(TIMER_KIND, &end); + if (DIFFNSEC(end, start) >= delay_nsec) { + break; + } + bulk_fsw(2); /* ~150 ns per iteration on FOP */ + } +} +#endif + +int print_cpu_last_executed_on(const char *name) { + char fn[256]; + char* result; + pid_t tid = syscall(SYS_gettid); + int fd; + int offset; + int mpi_errno = 0; + + sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid); + //printf("fn=%s\n", fn); + fd = open(fn, O_RDONLY); + if(fd == -1) { + printf("open() failed\n"); + goto fn_fail; + } + + result = malloc(65536); + if(result == NULL) { + printf("malloc() failed"); + goto fn_fail; + } + + int amount = 0; + offset = 0; + while(1) { + amount = read(fd, result + offset, 65536); + // printf("amount=%d\n", amount); + if(amount == -1) { + printf("read() failed"); + goto fn_fail; + } + if(amount == 0) { + goto eof; + } + offset += amount; + } + eof:; + //printf("result:%s\n", result); + + char* next_delim = result; + char* field; + int i; + for(i = 0; i < 39; i++) { + field = strsep(&next_delim, " "); + } + + int cpu = sched_getcpu(); + if(cpu == -1) { + printf("getcpu() failed\n"); + goto fn_fail; + } + + printf("[INFO] %s (tid: %d) is running on %02d,%02d\n", name, tid, atoi(field), cpu); + fn_exit: + free(result); + return mpi_errno; + fn_fail: + mpi_errno = -1; + goto fn_exit; +} + diff --git a/test/uti/util.h b/test/uti/util.h new file mode 100644 index 00000000..396f5183 --- /dev/null +++ b/test/uti/util.h @@ -0,0 +1,70 @@ +#ifndef __UTIL_H_INCLUDED__ +#define __UTIL_H_INCLUDED__ + +#include <stdint.h> + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __func__, msg); \ +} while (0) +#else +#define dprintf(...) do { } while (0) +#endif + +#define eprintf(...) do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __func__, msg); \ +} while (0) + +#define CHKANDJUMP(cond, err, ...) do { \ + if (cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ +} while (0) + +#define _OKNG(verb, jump, cond, fmt, args...) do { \ + if (cond) { \ + if (verb) \ + printf("[ OK ] " fmt, ##args); \ + } else { \ + printf("[ NG ] " fmt, ##args); \ + if (jump) \ + goto fn_fail; \ + } \ +} while (0) + +#define OKNG(args...) _OKNG(1, 1, ##args) +#define NG(args...) _OKNG(0, 1, ##args) +#define OKNGNOJUMP(args...) _OKNG(1, 0, ##args) + +#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec)) +#define TIMER_KIND CLOCK_MONOTONIC_RAW /* CLOCK_THREAD_CPUTIME_ID */ + +static inline uint64_t rdtsc_light(void ) +{ + uint64_t x; + __asm__ __volatile__("rdtscp;" /* rdtscp works as instruction execution barrier */ + "shl $32, %%rdx;" + "or %%rdx, %%rax" : + "=a"(x) : + : + "%rcx", "%rdx", "memory"); + return x; +} + +extern double nspw; /* nsec per work */ +extern unsigned long nsec; + +void fwq_init(); +void fwq(long delay_nsec); +int print_cpu_last_executed_on(const char *name); + +#endif + diff --git a/tools/mcstat/Makefile.in b/tools/mcstat/Makefile.in index 459c7862..b834af06 100644 --- a/tools/mcstat/Makefile.in +++ b/tools/mcstat/Makefile.in @@ -26,4 +26,6 @@ clean:: $(RM) $(TARGET) *.o install:: + install -m 755 -d $(MANDIR)/man1 + install -m 644 mcstat.1 $(MANDIR)/man1/mcstat.1 install -m 755 mcstat $(BINDIR) diff --git a/tools/mcstat/mcstat.1in b/tools/mcstat/mcstat.1in index 6a5d29f0..a7d000fa 100644 --- a/tools/mcstat/mcstat.1in +++ b/tools/mcstat/mcstat.1in @@ -8,7 +8,7 @@ mcstat \- report McKernel statistics and status .\" ---------------------------- SYNOPSIS ---------------------------- .SH SYNOPSIS -.B mcstat \fR [\fIoptions\fR] \fR [\fI<delay>\fR \fI<count>\fR ] +.B mcstat \fR [\fIoptions\fR] \fR [\fI<delay>\fR \fI<count>\fR] .\" ---------------------------- DESCRIPTION ---------------------------- .SH DESCRIPTION