diff --git a/arch/arm64/kernel/Makefile.arch.in b/arch/arm64/kernel/Makefile.arch.in index 1961e00e..5e1e2246 100644 --- a/arch/arm64/kernel/Makefile.arch.in +++ b/arch/arm64/kernel/Makefile.arch.in @@ -6,13 +6,15 @@ VDSO_SO_O = $(O)/vdso.so.o IHK_OBJS += assert.o cache.o cpu.o cputable.o context.o entry.o entry-fpsimd.o IHK_OBJS += fault.o head.o hyp-stub.o local.o perfctr.o perfctr_armv8pmu.o proc.o proc-macros.o IHK_OBJS += psci.o smp.o trampoline.o traps.o fpsimd.o -IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o +IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o timer.o IHK_OBJS += $(notdir $(VDSO_SO_O)) memory.o syscall.o vdso.o IHK_OBJS += irq-gic-v2.o irq-gic-v3.o IHK_OBJS += memcpy.o memset.o IHK_OBJS += cpufeature.o +IHK_OBJS += imp-sysreg.o + # POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. # IHK_OBJS added coredump.o IHK_OBJS += coredump.o @@ -21,8 +23,8 @@ $(VDSO_SO_O): $(VDSO_BUILDDIR)/vdso.so $(VDSO_BUILDDIR)/vdso.so: FORCE $(call echo_cmd,BUILD VDSO,$(TARGET)) - @mkdir -p $(O)/vdso - @TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare - @TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) + mkdir -p $(O)/vdso + TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare + TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) FORCE: diff --git a/arch/arm64/kernel/assert.c b/arch/arm64/kernel/assert.c index 0d05b00c..fa8c5c20 100644 --- a/arch/arm64/kernel/assert.c +++ b/arch/arm64/kernel/assert.c @@ -50,3 +50,6 @@ STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved ALIGN_UP(sizeof(struct _aarch64_ctx), 16) > sizeof(struct extra_context)); STATIC_ASSERT(SVE_PT_FPSIMD_OFFSET == sizeof(struct user_sve_header)); STATIC_ASSERT(SVE_PT_SVE_OFFSET == sizeof(struct user_sve_header)); + +/* assert for struct arm64_cpu_local_thread member offset define */ +STATIC_ASSERT(offsetof(struct arm64_cpu_local_thread, panic_regs) == 160); diff --git a/arch/arm64/kernel/cpu.c b/arch/arm64/kernel/cpu.c index 47984099..3895ff67 100644 --- a/arch/arm64/kernel/cpu.c +++ b/arch/arm64/kernel/cpu.c @@ -1,4 +1,4 @@ -/* cpu.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* cpu.c COPYRIGHT FUJITSU LIMITED 2015-2018 */ #include #include #include @@ -32,6 +32,7 @@ #include #include #include +#include //#define DEBUG_PRINT_CPU @@ -43,7 +44,6 @@ #endif struct cpuinfo_arm64 cpuinfo_data[NR_CPUS]; /* index is logical cpuid */ -static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 }; static struct list_head handlers[1024]; static void cpu_init_interrupt_handler(void); @@ -114,137 +114,54 @@ static struct ihk_mc_interrupt_handler cpu_stop_handler = { .priv = NULL, }; -/* @ref.impl include/clocksource/arm_arch_timer.h */ -#define ARCH_TIMER_CTRL_ENABLE (1 << 0) -#define ARCH_TIMER_CTRL_IT_MASK (1 << 1) -#define ARCH_TIMER_CTRL_IT_STAT (1 << 2) - -static void physical_timer_handler(void *priv) +extern long freeze_thaw(void *nmi_ctx); +static void multi_nm_interrupt_handler(void *priv) { - unsigned int ctrl = 0; - int cpu = ihk_mc_get_processor_id(); - - dkprintf("CPU%d: catch physical timer\n", cpu); - - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (ctrl)); - if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { - unsigned int zero = 0; - unsigned int val = ctrl; - unsigned int clocks = per_cpu_timer_val[cpu]; - unsigned long irqstate; - struct cpu_local_var *v = get_this_cpu_local_var(); - - /* set resched flag */ - irqstate = ihk_mc_spinlock_lock(&v->runq_lock); - v->flags |= CPU_FLAG_NEED_RESCHED; - ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); - - /* gen control register value */ - val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK); - val |= ARCH_TIMER_CTRL_ENABLE; - - /* set timer re-enable for periodic */ - asm volatile("msr cntp_ctl_el0, %0" : : "r" (zero)); - asm volatile("msr cntp_tval_el0, %0" : : "r" (clocks)); - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); - } -} - -static struct ihk_mc_interrupt_handler phys_timer_handler = { - .func = physical_timer_handler, - .priv = NULL, -}; - -static void virtual_timer_handler(void *priv) -{ - unsigned int ctrl = 0; - int cpu = ihk_mc_get_processor_id(); - - dkprintf("CPU%d: catch virtual timer\n", cpu); - - asm volatile("mrs %0, cntv_ctl_el0" : "=r" (ctrl)); - if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { - unsigned int zero = 0; - unsigned int val = ctrl; - unsigned int clocks = per_cpu_timer_val[cpu]; - unsigned long irqstate; - struct cpu_local_var *v = get_this_cpu_local_var(); - - /* set resched flag */ - irqstate = ihk_mc_spinlock_lock(&v->runq_lock); - v->flags |= CPU_FLAG_NEED_RESCHED; - ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); - - /* gen control register value */ - val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK); - val |= ARCH_TIMER_CTRL_ENABLE; - - /* set timer re-enable for periodic */ - asm volatile("msr cntv_ctl_el0, %0" : : "r" (zero)); - asm volatile("msr cntv_tval_el0, %0" : : "r" (clocks)); - asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); - } -} - -static struct ihk_mc_interrupt_handler virt_timer_handler = { - .func = virtual_timer_handler, - .priv = NULL, -}; - -static void memdump_interrupt_handler(void *priv) -{ - struct pt_regs *regs; + extern int nmi_mode; + struct pt_regs *regs = (struct pt_regs *)priv; union arm64_cpu_local_variables *clv; - regs = cpu_local_var(current)->uctx; - clv = get_arm64_this_cpu_local(); + switch (nmi_mode) { + case 1: + case 2: + /* mode == 1or2, for FREEZER NMI */ + dkprintf("%s: freeze mode NMI catch. (nmi_mode=%d)\n", + __func__, nmi_mode); + freeze_thaw(NULL); + break; - if (regs && interrupt_from_user(regs)) { - memcpy(clv->arm64_cpu_local_thread.panic_regs, regs->regs, sizeof(regs->regs)); - clv->arm64_cpu_local_thread.panic_regs[31] = regs->sp; - clv->arm64_cpu_local_thread.panic_regs[32] = regs->pc; - clv->arm64_cpu_local_thread.panic_regs[33] = regs->pstate; - } - else { - asm volatile ( - "stp x0, x1, [%3, #16 * 0]\n" - "stp x2, x3, [%3, #16 * 1]\n" - "stp x4, x5, [%3, #16 * 2]\n" - "stp x6, x7, [%3, #16 * 3]\n" - "stp x8, x9, [%3, #16 * 4]\n" - "stp x10, x11, [%3, #16 * 5]\n" - "stp x12, x13, [%3, #16 * 6]\n" - "stp x14, x15, [%3, #16 * 7]\n" - "stp x16, x17, [%3, #16 * 8]\n" - "stp x18, x19, [%3, #16 * 9]\n" - "stp x20, x21, [%3, #16 * 10]\n" - "stp x22, x23, [%3, #16 * 11]\n" - "stp x24, x25, [%3, #16 * 12]\n" - "stp x26, x27, [%3, #16 * 13]\n" - "stp x28, x29, [%3, #16 * 14]\n" - "str x30, [%3, #16 * 15]\n" - "mov %0, sp\n" - "adr %1, 1f\n" - "mrs %2, spsr_el1\n" - "1:" - : "=r" (clv->arm64_cpu_local_thread.panic_regs[31]), /* sp */ - "=r" (clv->arm64_cpu_local_thread.panic_regs[32]), /* pc */ - "=r" (clv->arm64_cpu_local_thread.panic_regs[33]) /* spsr_el1 */ - : "r" (&clv->arm64_cpu_local_thread.panic_regs) - : "memory" - ); - } + case 0: + /* mode == 0, for MEMDUMP NMI */ + clv = get_arm64_this_cpu_local(); - clv->arm64_cpu_local_thread.paniced = 1; + if (regs) { + memcpy(clv->arm64_cpu_local_thread.panic_regs, + regs->regs, sizeof(regs->regs)); + clv->arm64_cpu_local_thread.panic_regs[31] = regs->sp; + clv->arm64_cpu_local_thread.panic_regs[32] = regs->pc; + clv->arm64_cpu_local_thread.panic_regs[33] = + regs->pstate; + } + clv->arm64_cpu_local_thread.paniced = 1; + ihk_mc_query_mem_areas(); + /* memdump-nmi is halted McKernel, break is unnecessary. */ + /* fall through */ + case 3: + /* mode == 3, for SHUTDOWN-WAIT NMI */ + while (1) { + cpu_halt(); + } + break; - while(1) - { - cpu_halt(); + default: + ekprintf("%s: Unknown nmi-mode(%d) detected.\n", + __func__, nmi_mode); + break; } } -static struct ihk_mc_interrupt_handler memdump_handler = { - .func = memdump_interrupt_handler, +static struct ihk_mc_interrupt_handler multi_nmi_handler = { + .func = multi_nm_interrupt_handler, .priv = NULL, }; @@ -331,19 +248,160 @@ static void setup_processor(void) static char *trampoline_va, *first_page_va; -unsigned long is_use_virt_timer(void) +static inline uint64_t pwr_arm64hpc_read_imp_fj_core_uarch_restrection_el1(void) { - extern unsigned long ihk_param_use_virt_timer; + uint64_t reg; - switch (ihk_param_use_virt_timer) { - case 0: /* physical */ - case 1: /* virtual */ - break; - default: /* invalid */ - panic("PANIC: is_use_virt_timer(): timer select neither phys-timer nor virt-timer.\n"); - break; + asm volatile("mrs_s %0, " __stringify(IMP_FJ_CORE_UARCH_RESTRECTION_EL1) + : "=r" (reg) : : "memory"); + return reg; +} + +static ihk_spinlock_t imp_fj_core_uarch_restrection_el1_lock = + SPIN_LOCK_UNLOCKED; +static inline void pwr_arm64hpc_write_imp_fj_core_uarch_restrection_el1(uint64_t set_bit, + uint64_t clear_bit) +{ + uint64_t reg; + unsigned long flags; + + flags = ihk_mc_spinlock_lock(&imp_fj_core_uarch_restrection_el1_lock); + reg = pwr_arm64hpc_read_imp_fj_core_uarch_restrection_el1(); + reg = (reg & ~clear_bit) | set_bit; + asm volatile("msr_s " __stringify(IMP_FJ_CORE_UARCH_RESTRECTION_EL1) ", %0" + : : "r" (reg) : "memory"); + ihk_mc_spinlock_unlock(&imp_fj_core_uarch_restrection_el1_lock, flags); +} + +static inline uint64_t pwr_arm64hpc_read_imp_soc_standby_ctrl_el1(void) +{ + uint64_t reg; + + asm volatile("mrs_s %0, " __stringify(IMP_SOC_STANDBY_CTRL_EL1) + : "=r" (reg) : : "memory"); + return reg; +} + +static ihk_spinlock_t imp_soc_standby_ctrl_el1_lock = SPIN_LOCK_UNLOCKED; +static inline void pwr_arm64hpc_write_imp_soc_standby_ctrl_el1(uint64_t set_bit, + uint64_t clear_bit) +{ + unsigned long flags; + uint64_t reg; + + flags = ihk_mc_spinlock_lock(&imp_soc_standby_ctrl_el1_lock); + reg = pwr_arm64hpc_read_imp_soc_standby_ctrl_el1(); + reg = (reg & ~clear_bit) | set_bit; + asm volatile("msr_s " __stringify(IMP_SOC_STANDBY_CTRL_EL1) ", %0" + : : "r" (reg) : "memory"); + ihk_mc_spinlock_unlock(&imp_soc_standby_ctrl_el1_lock, flags); +} + +static unsigned long *retention_state_flag; + +static inline int is_hpcpwr_available(void) +{ + if (retention_state_flag) + return 1; + else + return 0; +} + +static inline void pwr_arm64hpc_map_retention_state_flag(void) +{ + extern unsigned long ihk_param_retention_state_flag_pa; + unsigned long size = BITS_TO_LONGS(NR_CPUS) * sizeof(unsigned long); + + if (!ihk_param_retention_state_flag_pa) { + return; } - return ihk_param_use_virt_timer; + retention_state_flag = map_fixed_area(ihk_param_retention_state_flag_pa, + size, 0); +} + +static inline int pwr_arm64hpc_retention_state_get(uint64_t *val) +{ + unsigned long linux_core_id; + int cpu = ihk_mc_get_processor_id(); + int ret; + + if (!is_hpcpwr_available()) { + *val = 0; + return 0; + } + + ret = ihk_mc_get_core(cpu, &linux_core_id, NULL, NULL); + if (ret) { + return ret; + } + *val = test_bit(linux_core_id, retention_state_flag); + return ret; +} + +static inline int pwr_arm64hpc_retention_state_set(uint64_t val) +{ + unsigned long linux_core_id; + int cpu = ihk_mc_get_processor_id(); + int ret; + + if (!is_hpcpwr_available()) { + return 0; + } + + ret = ihk_mc_get_core(cpu, &linux_core_id, NULL, NULL); + if (ret) { + return ret; + } + + if (val) { + set_bit(cpu, retention_state_flag); + } else { + clear_bit(cpu, retention_state_flag); + } + return ret; +} + +static inline int pwr_arm64hpc_enable_retention_state(void) +{ + if (!is_hpcpwr_available()) { + return 0; + } + + pwr_arm64hpc_write_imp_soc_standby_ctrl_el1(IMP_SOC_STANDBY_CTRL_EL1_RETENTION, + 0); + return 0; +} + +static inline void init_power_management(void) +{ + int state; + uint64_t imp_fj_clear_bit = 0; + uint64_t imp_soc_clear_bit = 0; + + if (!is_hpcpwr_available()) { + return; + } + + /* retention state */ + state = pwr_arm64hpc_retention_state_set(0); + if (state) { + panic("error: initialize power management\n"); + } + + /* issue state */ + imp_fj_clear_bit |= IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION; + + /* eco_state */ + imp_fj_clear_bit |= IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS; + imp_soc_clear_bit |= IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE; + + /* ex_pipe_state */ + imp_fj_clear_bit |= IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION; + + /* write */ + pwr_arm64hpc_write_imp_fj_core_uarch_restrection_el1(0, + imp_fj_clear_bit); + pwr_arm64hpc_write_imp_soc_standby_ctrl_el1(0, imp_soc_clear_bit); } /*@ @@ -361,23 +419,19 @@ void ihk_mc_init_ap(void) kprintf("# of cpus : %d\n", cpu_info->ncpus); init_processors_local(cpu_info->ncpus); - kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n", - ihk_ikc_irq, ihk_ikc_irq_apicid); - /* Do initialization for THIS cpu (BSP) */ assign_processor_id(); ihk_mc_register_interrupt_handler(INTRID_CPU_STOP, &cpu_stop_handler); - ihk_mc_register_interrupt_handler(INTRID_MEMDUMP, &memdump_handler); + ihk_mc_register_interrupt_handler(INTRID_MULTI_NMI, &multi_nmi_handler); ihk_mc_register_interrupt_handler( - ihk_mc_get_vector(IHK_TLB_FLUSH_IRQ_VECTOR_START), &remote_tlb_flush_handler); + ihk_mc_get_vector(IHK_TLB_FLUSH_IRQ_VECTOR_START), + &remote_tlb_flush_handler); + ihk_mc_register_interrupt_handler(get_timer_intrid(), + get_timer_handler()); - if (is_use_virt_timer()) { - ihk_mc_register_interrupt_handler(get_virt_timer_intrid(), &virt_timer_handler); - } else { - ihk_mc_register_interrupt_handler(get_phys_timer_intrid(), &phys_timer_handler); - } init_smp_processor(); + init_power_management(); } extern void vdso_init(void); @@ -386,15 +440,13 @@ long (*__arm64_syscall_handler)(int, ihk_mc_user_context_t *); /* @ref.impl arch/arm64/include/asm/arch_timer.h::arch_timer_get_cntkctl */ static inline unsigned int arch_timer_get_cntkctl(void) { - unsigned int cntkctl; - asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); - return cntkctl; + return read_sysreg(cntkctl_el1); } /* @ref.impl arch/arm64/include/asm/arch_timer.h::arch_timer_set_cntkctl */ static inline void arch_timer_set_cntkctl(unsigned int cntkctl) { - asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); + write_sysreg(cntkctl, cntkctl_el1); } #ifdef CONFIG_ARM_ARCH_TIMER_EVTSTREAM @@ -460,19 +512,19 @@ void init_cpu(void) { if(gic_enable) gic_enable(); + + arm64_init_per_cpu_perfctr(); arm64_enable_pmu(); + + if (xos_is_tchip()) { + vhbm_barrier_registers_init(); + scdrv_registers_init(); + hpc_registers_init(); + } + arm64_enable_user_access_pmu_regs(); } #ifdef CONFIG_ARM64_VHE -/* @ref.impl arch/arm64/include/asm/virt.h */ -static inline int is_kernel_in_hyp_mode(void) -{ - unsigned long el; - - asm("mrs %0, CurrentEL" : "=r" (el)); - return el == CurrentEL_EL2; -} - /* @ref.impl arch/arm64/kernel/smp.c */ /* Whether the boot CPU is running in HYP mode or not */ static int boot_cpu_hyp_mode; @@ -517,8 +569,12 @@ void setup_arm64(void) arm64_init_perfctr(); + arch_timer_init(); + gic_init(); + pwr_arm64hpc_map_retention_state_flag(); + init_cpu(); init_gettime_support(); @@ -561,6 +617,7 @@ void setup_arm64_ap(void (*next_func)(void)) debug_monitors_init(); arch_timer_configure_evtstream(); init_cpu(); + init_power_management(); call_ap_func(next_func); /* BUG */ @@ -590,6 +647,7 @@ static void show_context_stack(struct pt_regs *regs) max_loop = (stack_top - sp) / min_stack_frame_size; for (i = 0; i < max_loop; i++) { + extern char _head[], _end[]; uintptr_t *fp, *lr; fp = (uintptr_t *)sp; lr = (uintptr_t *)(sp + 8); @@ -598,7 +656,8 @@ static void show_context_stack(struct pt_regs *regs) break; } - if ((*lr < MAP_KERNEL_START) || (*lr > MAP_KERNEL_START + MAP_KERNEL_SIZE)) { + if ((*lr < (unsigned long)_head) || + (*lr > (unsigned long)_end)) { break; } @@ -623,7 +682,7 @@ void handle_IPI(unsigned int vector, struct pt_regs *regs) else { list_for_each_entry(h, &handlers[vector], list) { if (h->func) { - h->func(h->priv); + h->func(h->priv == NULL ? regs : h->priv); } } } @@ -639,7 +698,18 @@ static void __arm64_wakeup(int hw_cpuid, unsigned long entry) /** IHK Functions **/ /* send WFI(Wait For Interrupt) instruction */ -extern void cpu_do_idle(void); +static inline void cpu_do_idle(void) +{ + extern void __cpu_do_idle(void); + uint64_t retention; + int state; + + state = pwr_arm64hpc_retention_state_get(&retention); + if ((state == 0) && (retention != 0)) { + pwr_arm64hpc_enable_retention_state(); + } + __cpu_do_idle(); +} /* halt by WFI(Wait For Interrupt) */ void cpu_halt(void) @@ -1113,20 +1183,32 @@ long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int int j = 0; /* generate strings */ - loff += snprintf(lbuf + loff, lbuf_size - loff, "processor\t: %d\n", cpuinfo->hwid); - loff += snprintf(lbuf + loff, lbuf_size - loff, "Features\t:"); + loff += scnprintf(lbuf + loff, lbuf_size - loff, + "processor\t: %d\n", cpuinfo->hwid); + loff += scnprintf(lbuf + loff, lbuf_size - loff, "Features\t:"); for (j = 0; hwcap_str[j]; j++) { if (elf_hwcap & (1 << j)) { - loff += snprintf(lbuf + loff, lbuf_size - loff, " %s", hwcap_str[j]); + loff += scnprintf(lbuf + loff, + lbuf_size - loff, + " %s", hwcap_str[j]); } } - loff += snprintf(lbuf + loff, lbuf_size - loff, "\n"); - loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU implementer\t: 0x%02x\n", MIDR_IMPLEMENTOR(midr)); - loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU architecture: 8\n"); - loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); - loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); - loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); + loff += scnprintf(lbuf + loff, lbuf_size - loff, "\n"); + loff += scnprintf(lbuf + loff, lbuf_size - loff, + "CPU implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + loff += scnprintf(lbuf + loff, lbuf_size - loff, + "CPU architecture: 8\n"); + loff += scnprintf(lbuf + loff, lbuf_size - loff, + "CPU variant\t: 0x%x\n", + MIDR_VARIANT(midr)); + loff += scnprintf(lbuf + loff, lbuf_size - loff, + "CPU part\t: 0x%03x\n", + MIDR_PARTNUM(midr)); + loff += scnprintf(lbuf + loff, lbuf_size - loff, + "CPU revision\t: %d\n\n", + MIDR_REVISION(midr)); /* check buffer depletion */ if ((i < num_processors - 1) && ((lbuf_size - loff) == 1)) { @@ -1408,6 +1490,10 @@ out: void save_fp_regs(struct thread *thread) { + if (thread == &cpu_local_var(idle)) { + return; + } + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { if (check_and_allocate_fp_regs(thread) != 0) { // alloc error. @@ -1468,26 +1554,6 @@ restore_fp_regs(struct thread *thread) } } -void -lapic_timer_enable(unsigned int clocks) -{ - unsigned int val = 0; - - /* gen control register value */ - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); - val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK); - val |= ARCH_TIMER_CTRL_ENABLE; - - if (is_use_virt_timer()) { - asm volatile("msr cntv_tval_el0, %0" : : "r" (clocks)); - asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); - } else { - asm volatile("msr cntp_tval_el0, %0" : : "r" (clocks)); - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); - } - per_cpu_timer_val[ihk_mc_get_processor_id()] = clocks; -} - void unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) { @@ -1540,26 +1606,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) return; } -void -lapic_timer_disable() -{ - unsigned int zero = 0; - unsigned int val = 0; - - /* gen control register value */ - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); - val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE); - - if (is_use_virt_timer()) { - asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); - asm volatile("msr cntv_tval_el0, %0" : : "r" (zero)); - } else { - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); - asm volatile("msr cntp_tval_el0, %0" : : "r" (zero)); - } - per_cpu_timer_val[ihk_mc_get_processor_id()] = 0; -} - void init_tick(void) { dkprintf("init_tick():\n"); @@ -1588,25 +1634,174 @@ void arch_start_pvclock(void) void mod_nmi_ctx(void *nmi_ctx, void (*func)()) { - /* TODO: skeleton for rusage */ + func(); +} + +extern void freeze(void); +void __freeze(void) +{ + freeze(); +} + + +#define SYSREG_READ_S(sys_reg) case (sys_reg): asm volatile("mrs_s %0, " __stringify(sys_reg) : "=r" (*val)); break +static inline int arch_cpu_mrs(uint32_t sys_reg, uint64_t *val) +{ + int ret = 0; + + switch (sys_reg) { + SYSREG_READ_S(IMP_FJ_TAG_ADDRESS_CTRL_EL1); + SYSREG_READ_S(IMP_SCCR_CTRL_EL1); + SYSREG_READ_S(IMP_SCCR_ASSIGN_EL1); + SYSREG_READ_S(IMP_SCCR_SET0_L2_EL1); + SYSREG_READ_S(IMP_SCCR_SET1_L2_EL1); + SYSREG_READ_S(IMP_SCCR_L1_EL0); + SYSREG_READ_S(IMP_PF_CTRL_EL1); + SYSREG_READ_S(IMP_PF_STREAM_DETECT_CTRL_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL0_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL1_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL2_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL3_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL4_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL5_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL6_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_CTRL7_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE0_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE1_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE2_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE3_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE4_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE5_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE6_EL0); + SYSREG_READ_S(IMP_PF_INJECTION_DISTANCE7_EL0); + SYSREG_READ_S(IMP_BARRIER_CTRL_EL1); + SYSREG_READ_S(IMP_BARRIER_BST_BIT_EL1); + SYSREG_READ_S(IMP_BARRIER_INIT_SYNC_BB0_EL1); + SYSREG_READ_S(IMP_BARRIER_INIT_SYNC_BB1_EL1); + SYSREG_READ_S(IMP_BARRIER_INIT_SYNC_BB2_EL1); + SYSREG_READ_S(IMP_BARRIER_INIT_SYNC_BB3_EL1); + SYSREG_READ_S(IMP_BARRIER_INIT_SYNC_BB4_EL1); + SYSREG_READ_S(IMP_BARRIER_INIT_SYNC_BB5_EL1); + SYSREG_READ_S(IMP_BARRIER_ASSIGN_SYNC_W0_EL1); + SYSREG_READ_S(IMP_BARRIER_ASSIGN_SYNC_W1_EL1); + SYSREG_READ_S(IMP_BARRIER_ASSIGN_SYNC_W2_EL1); + SYSREG_READ_S(IMP_BARRIER_ASSIGN_SYNC_W3_EL1); + SYSREG_READ_S(IMP_SOC_STANDBY_CTRL_EL1); + SYSREG_READ_S(IMP_FJ_CORE_UARCH_CTRL_EL2); + SYSREG_READ_S(IMP_FJ_CORE_UARCH_RESTRECTION_EL1); + /* fall through */ + default: + return -EINVAL; + } + return ret; +} + +static inline int arch_cpu_read_register(struct ihk_os_cpu_register *desc) +{ + int ret = 0; + uint64_t value; + + if (desc->addr) { + panic("memory mapped register is not supported.\n"); + } else if (desc->addr_ext) { + ret = arch_cpu_mrs(desc->addr_ext, &value); + if (ret == 0) { + desc->val = value; + } + } else { + ret = -EINVAL; + } + return ret; +} + +#define SYSREG_WRITE_S(sys_reg) case (sys_reg): asm volatile("msr_s " __stringify(sys_reg) ", %0" :: "r" (val)); break +static inline int arch_cpu_msr(uint32_t sys_reg, uint64_t val) +{ + int ret = 0; + + switch (sys_reg) { + SYSREG_WRITE_S(IMP_FJ_TAG_ADDRESS_CTRL_EL1); + SYSREG_WRITE_S(IMP_SCCR_CTRL_EL1); + SYSREG_WRITE_S(IMP_SCCR_ASSIGN_EL1); + SYSREG_WRITE_S(IMP_SCCR_SET0_L2_EL1); + SYSREG_WRITE_S(IMP_SCCR_SET1_L2_EL1); + SYSREG_WRITE_S(IMP_SCCR_L1_EL0); + SYSREG_WRITE_S(IMP_PF_CTRL_EL1); + SYSREG_WRITE_S(IMP_PF_STREAM_DETECT_CTRL_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL0_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL1_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL2_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL3_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL4_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL5_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL6_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_CTRL7_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE0_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE1_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE2_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE3_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE4_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE5_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE6_EL0); + SYSREG_WRITE_S(IMP_PF_INJECTION_DISTANCE7_EL0); + SYSREG_WRITE_S(IMP_BARRIER_CTRL_EL1); + SYSREG_WRITE_S(IMP_BARRIER_BST_BIT_EL1); + SYSREG_WRITE_S(IMP_BARRIER_INIT_SYNC_BB0_EL1); + SYSREG_WRITE_S(IMP_BARRIER_INIT_SYNC_BB1_EL1); + SYSREG_WRITE_S(IMP_BARRIER_INIT_SYNC_BB2_EL1); + SYSREG_WRITE_S(IMP_BARRIER_INIT_SYNC_BB3_EL1); + SYSREG_WRITE_S(IMP_BARRIER_INIT_SYNC_BB4_EL1); + SYSREG_WRITE_S(IMP_BARRIER_INIT_SYNC_BB5_EL1); + SYSREG_WRITE_S(IMP_BARRIER_ASSIGN_SYNC_W0_EL1); + SYSREG_WRITE_S(IMP_BARRIER_ASSIGN_SYNC_W1_EL1); + SYSREG_WRITE_S(IMP_BARRIER_ASSIGN_SYNC_W2_EL1); + SYSREG_WRITE_S(IMP_BARRIER_ASSIGN_SYNC_W3_EL1); + SYSREG_WRITE_S(IMP_FJ_CORE_UARCH_CTRL_EL2); + SYSREG_WRITE_S(IMP_FJ_CORE_UARCH_RESTRECTION_EL1); + /* fallthrough */ + case IMP_SOC_STANDBY_CTRL_EL1: + asm volatile("msr_s " __stringify(IMP_SOC_STANDBY_CTRL_EL1) ", %0" + : : "r" (val) : "memory"); + if (val & IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE) { + wfe(); + } + break; + default: + return -EINVAL; + } + return ret; +} + +static inline int arch_cpu_write_register(struct ihk_os_cpu_register *desc) +{ + int ret = 0; + + if (desc->addr) { + panic("memory mapped register is not supported.\n"); + } else if (desc->addr_ext) { + ret = arch_cpu_msr(desc->addr_ext, desc->val); + } else { + ret = -EINVAL; + } + return ret; } int arch_cpu_read_write_register( struct ihk_os_cpu_register *desc, enum mcctrl_os_cpu_operation op) { - /* TODO: skeleton for patch:0676 */ + int ret; if (op == MCCTRL_OS_CPU_READ_REGISTER) { -// desc->val = rdmsr(desc->addr); + ret = arch_cpu_read_register(desc); } else if (op == MCCTRL_OS_CPU_WRITE_REGISTER) { -// wrmsr(desc->addr, desc->val); + ret = arch_cpu_write_register(desc); } else { - return -1; + ret = -1; } - return 0; + return ret; } int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 403c6a91..3d904372 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -52,6 +52,19 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; +/* @ref.impl linux4.16.0 arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64ISAR1_DPB_SHIFT, 4, 0), + ARM64_FTR_END, +}; + /* @ref.impl arch/arm64/kernel/cpufeature.c */ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), @@ -302,7 +315,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d8cb7e36..92c870ad 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -77,11 +77,11 @@ .macro kernel_exit, el, need_enable_step = 0 .if \el == 0 + bl check_need_resched // or reschedule is needed. mov x0, #0 mov x1, sp mov x2, #0 bl check_signal // check whether the signal is delivered - bl check_need_resched // or reschedule is needed. mov x0, #0 mov x1, sp mov x2, #0 @@ -550,9 +550,7 @@ ENTRY(ret_from_fork) blr x19 1: get_thread_info tsk bl release_runq_lock + bl utilthr_migrate b ret_to_user ENDPROC(ret_from_fork) -/* TODO: skeleton for rusage */ -ENTRY(__freeze) -ENDPROC(__freeze) diff --git a/arch/arm64/kernel/fault.c b/arch/arm64/kernel/fault.c index d8dce27e..0e7b583a 100644 --- a/arch/arm64/kernel/fault.c +++ b/arch/arm64/kernel/fault.c @@ -13,7 +13,6 @@ unsigned long __page_fault_handler_address; extern int interrupt_from_user(void *); -void set_signal(int sig, void *regs, struct siginfo *info); static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs); static int do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs); static int do_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -105,12 +104,13 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = fault_info + (esr & 63); struct siginfo info; + const int from_user = interrupt_from_user(regs); /* set_cputime called in inf->fn() */ if (!inf->fn(addr, esr, regs)) return; - set_cputime(interrupt_from_user(regs)? 1: 2); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); kprintf("Unhandled fault: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr); info.si_signo = inf->sig; info.si_errno = 0; @@ -118,7 +118,7 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) info._sifields._sigfault.si_addr = (void*)addr; arm64_notify_die("", regs, &info, esr); - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } /* @@ -127,21 +127,24 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; + const int from_user = interrupt_from_user(regs); - set_cputime(interrupt_from_user(regs)? 1: 2); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRALN; info._sifields._sigfault.si_addr = (void*)addr; arm64_notify_die("", regs, &info, esr); - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; - set_cputime(interrupt_from_user(regs) ? 1: 2); + const int from_user = interrupt_from_user(regs); + + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); /* * If we are in kernel mode at this point, we have no context to * handle this fault with. @@ -163,7 +166,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); panic("OOps."); } - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } static int is_el0_instruction_abort(unsigned int esr) @@ -192,6 +195,7 @@ static int do_page_fault(unsigned long addr, unsigned int esr, } } + /* set_cputime() call in page_fault_handler() */ page_fault_handler = (void *)__page_fault_handler_address; (*page_fault_handler)((void *)addr, reason, regs); @@ -252,10 +256,10 @@ int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *reg { const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); struct siginfo info; - int from_user = interrupt_from_user(regs); + const int from_user = interrupt_from_user(regs); int ret = -1; - set_cputime(from_user ? 1: 2); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); if (!inf->fn(addr, esr, regs)) { ret = 1; @@ -274,7 +278,7 @@ int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *reg ret = 0; out: - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); return ret; } @@ -283,7 +287,9 @@ out: */ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - set_cputime(interrupt_from_user(regs) ? 1: 2); - set_cputime(0); + const int from_user = interrupt_from_user(regs); + + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); return 1; } diff --git a/arch/arm64/kernel/gencore.c b/arch/arm64/kernel/gencore.c index 238cbed0..1c0aa5fc 100644 --- a/arch/arm64/kernel/gencore.c +++ b/arch/arm64/kernel/gencore.c @@ -19,6 +19,12 @@ #define DDEBUG_DEFAULT DDEBUG_PRINT #endif +/* Exclude reserved (mckernel's internal use), device file, + * hole created by mprotect + */ +#define GENCORE_RANGE_IS_INACCESSIBLE(range) \ + ((range->flag & (VR_RESERVED | VR_MEMTYPE_UC | VR_DONTDUMP))) + /* * Generate a core file image, which consists of many chunks. * Returns an allocated table, an etnry of which is a pair of the address @@ -239,10 +245,10 @@ int gencore(struct thread *thread, void *regs, dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", range->start, range->end, range->flag, range->objoff); - /* We omit reserved areas because they are only for - mckernel's internal use. */ - if (range->flag & VR_RESERVED) + + if (GENCORE_RANGE_IS_INACCESSIBLE(range)) { continue; + } /* We need a chunk for each page for a demand paging area. This can be optimized for spacial complexity but we would lose simplicity instead. */ @@ -331,8 +337,9 @@ int gencore(struct thread *thread, void *regs, unsigned long flag = range->flag; unsigned long size = range->end - range->start; - if (range->flag & VR_RESERVED) + if (GENCORE_RANGE_IS_INACCESSIBLE(range)) { continue; + } ph[i].p_type = PT_LOAD; ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0) @@ -374,8 +381,9 @@ int gencore(struct thread *thread, void *regs, unsigned long phys; - if (range->flag & VR_RESERVED) + if (GENCORE_RANGE_IS_INACCESSIBLE(range)) { continue; + } if (range->flag & VR_DEMAND_PAGING) { /* Just an ad hoc kluge. */ unsigned long p, start, phys; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9a706887..f1b162cc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -1,4 +1,4 @@ -/* head.S COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* head.S COPYRIGHT FUJITSU LIMITED 2015-2018 */ #include #include @@ -11,8 +11,6 @@ #include #define KERNEL_RAM_VADDR MAP_KERNEL_START -#define EARLY_ALLOC_VADDR MAP_EARLY_ALLOC -#define BOOT_PARAM_VADDR MAP_BOOT_PARAM //#ifndef CONFIG_SMP //# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF @@ -50,16 +48,6 @@ add \ttb0, \ttb0, \virt_to_phys .endm -#ifdef CONFIG_ARM64_64K_PAGES -# define BLOCK_SHIFT PAGE_SHIFT -# define BLOCK_SIZE PAGE_SIZE -# define TABLE_SHIFT PMD_SHIFT -#else -# define BLOCK_SHIFT SECTION_SHIFT -# define BLOCK_SIZE SECTION_SIZE -# define TABLE_SHIFT PUD_SHIFT -#endif - #define KERNEL_START KERNEL_RAM_VADDR #define KERNEL_END _end @@ -87,6 +75,7 @@ #define TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE 0x08 #define TRAMPOLINE_DATA_CPU_MAP_SIZE (NR_CPUS * 8) #define TRAMPOLINE_DATA_DATA_RDISTS_PA_SIZE (NR_CPUS * 8) +#define TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE 0x08 #define TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE 0x04 #define TRAMPOLINE_DATA_PMU_AFF_SIZE (CONFIG_SMP_MAX_CORES * 4) @@ -105,9 +94,9 @@ .globl ihk_param_gic_percpu_offset, ihk_param_gic_version .globl ihk_param_lpj, ihk_param_hz, ihk_param_psci_method .globl ihk_param_cpu_logical_map, ihk_param_gic_rdist_base_pa - .globl ihk_param_pmu_irq_affiniry, ihk_param_nr_pmu_irq_affiniry + .globl ihk_param_pmu_irq_affi, ihk_param_nr_pmu_irq_affi .globl ihk_param_use_virt_timer, ihk_param_evtstrm_timer_rate - .globl ihk_param_default_vl + .globl ihk_param_retention_state_flag_pa, ihk_param_default_vl ihk_param_head: ihk_param_param_addr: .quad 0 @@ -145,9 +134,11 @@ ihk_param_cpu_logical_map: .skip NR_CPUS * 8 /* array of the MPIDR and the core number */ ihk_param_gic_rdist_base_pa: .skip NR_CPUS * 8 /* per-cpu re-distributer PA */ -ihk_param_pmu_irq_affiniry: +ihk_param_retention_state_flag_pa: + .quad 0 +ihk_param_pmu_irq_affi: .skip CONFIG_SMP_MAX_CORES * 4 /* array of the pmu affinity list */ -ihk_param_nr_pmu_irq_affiniry: +ihk_param_nr_pmu_irq_affi: .word 0 /* number of pmu affinity list elements. */ /* @ref.impl arch/arm64/include/asm/kvm_arm.h */ @@ -265,13 +256,17 @@ ENTRY(arch_start) mov x16, #NR_CPUS /* calc next data */ lsl x16, x16, 3 add x0, x0, x16 - /* nr_pmu_irq_affiniry */ + /* retention_state_flag_pa */ + ldr x16, [x0], #TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE + adr x15, ihk_param_retention_state_flag_pa + str x16, [x15] + /* nr_pmu_irq_affi */ ldr w16, [x0], #TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE - adr x15, ihk_param_nr_pmu_irq_affiniry + adr x15, ihk_param_nr_pmu_irq_affi str w16, [x15] - /* pmu_irq_affiniry */ + /* pmu_irq_affi */ mov x18, x0 - adr x15, ihk_param_pmu_irq_affiniry + adr x15, ihk_param_pmu_irq_affi b 2f 1: ldr w17, [x18], #4 str w17, [x15], #4 @@ -410,14 +405,17 @@ __create_page_tables: * Map the early_alloc_pages area, kernel_img next block */ ldr x3, =KERNEL_END - add x3, x3, x28 // __pa(KERNEL_END) + add x3, x3, x28 // __pa(KERNEL_END) add x3, x3, #BLOCK_SIZE - sub x3, x3, #1 - bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc. - ldr x5, =EARLY_ALLOC_VADDR // get start VA - mov x6, #1 - lsl x6, x6, #(PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT) + sub x3, x3, #1 + bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc. + ldr x5, =KERNEL_END // get start VA + add x5, x5, #BLOCK_SIZE + sub x5, x5, #1 + bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc. + mov x6, #MAP_EARLY_ALLOC_SIZE add x6, x5, x6 // end VA calc + mov x23, x6 // save end VA sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 @@ -425,11 +423,13 @@ __create_page_tables: * Map the boot_param area */ adr x3, ihk_param_param_addr - ldr x3, [x3] // get boot_param PA - ldr x5, =BOOT_PARAM_VADDR // get boot_param VA - mov x6, #1 - lsl x6, x6, #MAP_BOOT_PARAM_SHIFT - add x6, x5, x6 // end VA calc + ldr x3, [x3] // get boot_param PA + mov x5, x23 // get start VA + add x5, x5, #BLOCK_SIZE + sub x5, x5, #1 + bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc + mov x6, #MAP_BOOT_PARAM_SIZE + add x6, x5, x6 // end VA calc. sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 28cfe6b0..cf98d023 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -7,6 +7,7 @@ #include #include #include +#include /* @ref.impl arch/arm64/kernel/hw_breakpoint.c::core_num_[brps|wrps] */ /* Number of BRP/WRP registers on this CPU. */ diff --git a/arch/arm64/kernel/imp-sysreg.c b/arch/arm64/kernel/imp-sysreg.c new file mode 100644 index 00000000..fd6b3ec7 --- /dev/null +++ b/arch/arm64/kernel/imp-sysreg.c @@ -0,0 +1,131 @@ +/* imp-sysreg.c COPYRIGHT FUJITSU LIMITED 2018 */ +#include + +/* hpc */ +ACCESS_REG_FUNC(fj_tag_address_ctrl_el1, IMP_FJ_TAG_ADDRESS_CTRL_EL1); +ACCESS_REG_FUNC(pf_ctrl_el1, IMP_PF_CTRL_EL1); +ACCESS_REG_FUNC(pf_stream_detect_ctrl_el0, IMP_PF_STREAM_DETECT_CTRL_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl0_el0, IMP_PF_INJECTION_CTRL0_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl1_el0, IMP_PF_INJECTION_CTRL1_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl2_el0, IMP_PF_INJECTION_CTRL2_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl3_el0, IMP_PF_INJECTION_CTRL3_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl4_el0, IMP_PF_INJECTION_CTRL4_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl5_el0, IMP_PF_INJECTION_CTRL5_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl6_el0, IMP_PF_INJECTION_CTRL6_EL0); +ACCESS_REG_FUNC(pf_injection_ctrl7_el0, IMP_PF_INJECTION_CTRL7_EL0); +ACCESS_REG_FUNC(pf_injection_distance0_el0, IMP_PF_INJECTION_DISTANCE0_EL0); +ACCESS_REG_FUNC(pf_injection_distance1_el0, IMP_PF_INJECTION_DISTANCE1_EL0); +ACCESS_REG_FUNC(pf_injection_distance2_el0, IMP_PF_INJECTION_DISTANCE2_EL0); +ACCESS_REG_FUNC(pf_injection_distance3_el0, IMP_PF_INJECTION_DISTANCE3_EL0); +ACCESS_REG_FUNC(pf_injection_distance4_el0, IMP_PF_INJECTION_DISTANCE4_EL0); +ACCESS_REG_FUNC(pf_injection_distance5_el0, IMP_PF_INJECTION_DISTANCE5_EL0); +ACCESS_REG_FUNC(pf_injection_distance6_el0, IMP_PF_INJECTION_DISTANCE6_EL0); +ACCESS_REG_FUNC(pf_injection_distance7_el0, IMP_PF_INJECTION_DISTANCE7_EL0); + +static void hpc_prefetch_regs_init(void) +{ + uint64_t reg = 0; + + /* PF_CTRL_EL1 */ + reg = IMP_PF_CTRL_EL1_EL1AE_ENABLE | IMP_PF_CTRL_EL1_EL0AE_ENABLE; + xos_access_pf_ctrl_el1(WRITE_ACCESS, ®); + + /* PF_STREAM_DETECT_CTRL */ + reg = 0; + xos_access_pf_stream_detect_ctrl_el0(WRITE_ACCESS, ®); + + /* PF_INJECTION_CTRL */ + reg = 0; + xos_access_pf_injection_ctrl0_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl1_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl2_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl3_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl4_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl5_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl6_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_ctrl7_el0(WRITE_ACCESS, ®); + + /* PF_INJECTION_DISTANCE */ + reg = 0; + xos_access_pf_injection_distance0_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance1_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance2_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance3_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance4_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance5_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance6_el0(WRITE_ACCESS, ®); + xos_access_pf_injection_distance7_el0(WRITE_ACCESS, ®); +} + +static void hpc_tag_address_regs_init(void) +{ + uint64_t reg = IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK | + IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK | + IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK; + + /* FJ_TAG_ADDRESS_CTRL */ + xos_access_fj_tag_address_ctrl_el1(WRITE_ACCESS, ®); +} + +void hpc_registers_init(void) +{ + hpc_prefetch_regs_init(); + hpc_tag_address_regs_init(); +} + +/* vhbm */ +ACCESS_REG_FUNC(barrier_ctrl_el1, IMP_BARRIER_CTRL_EL1); +ACCESS_REG_FUNC(barrier_bst_bit_el1, IMP_BARRIER_BST_BIT_EL1); +ACCESS_REG_FUNC(barrier_init_sync_bb0_el1, IMP_BARRIER_INIT_SYNC_BB0_EL1); +ACCESS_REG_FUNC(barrier_init_sync_bb1_el1, IMP_BARRIER_INIT_SYNC_BB1_EL1); +ACCESS_REG_FUNC(barrier_init_sync_bb2_el1, IMP_BARRIER_INIT_SYNC_BB2_EL1); +ACCESS_REG_FUNC(barrier_init_sync_bb3_el1, IMP_BARRIER_INIT_SYNC_BB3_EL1); +ACCESS_REG_FUNC(barrier_init_sync_bb4_el1, IMP_BARRIER_INIT_SYNC_BB4_EL1); +ACCESS_REG_FUNC(barrier_init_sync_bb5_el1, IMP_BARRIER_INIT_SYNC_BB5_EL1); +ACCESS_REG_FUNC(barrier_assign_sync_w0_el1, IMP_BARRIER_ASSIGN_SYNC_W0_EL1); +ACCESS_REG_FUNC(barrier_assign_sync_w1_el1, IMP_BARRIER_ASSIGN_SYNC_W1_EL1); +ACCESS_REG_FUNC(barrier_assign_sync_w2_el1, IMP_BARRIER_ASSIGN_SYNC_W2_EL1); +ACCESS_REG_FUNC(barrier_assign_sync_w3_el1, IMP_BARRIER_ASSIGN_SYNC_W3_EL1); + +void vhbm_barrier_registers_init(void) +{ + uint64_t reg = 0; + + reg = IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE | + IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE; + xos_access_barrier_ctrl_el1(WRITE_ACCESS, ®); + + reg = 0; + + xos_access_barrier_init_sync_bb0_el1(WRITE_ACCESS, ®); + xos_access_barrier_init_sync_bb1_el1(WRITE_ACCESS, ®); + xos_access_barrier_init_sync_bb2_el1(WRITE_ACCESS, ®); + xos_access_barrier_init_sync_bb3_el1(WRITE_ACCESS, ®); + xos_access_barrier_init_sync_bb4_el1(WRITE_ACCESS, ®); + xos_access_barrier_init_sync_bb5_el1(WRITE_ACCESS, ®); + xos_access_barrier_assign_sync_w0_el1(WRITE_ACCESS, ®); + xos_access_barrier_assign_sync_w1_el1(WRITE_ACCESS, ®); + xos_access_barrier_assign_sync_w2_el1(WRITE_ACCESS, ®); + xos_access_barrier_assign_sync_w3_el1(WRITE_ACCESS, ®); +} + +/* sccr */ +ACCESS_REG_FUNC(sccr_ctrl_el1, IMP_SCCR_CTRL_EL1); +ACCESS_REG_FUNC(sccr_assign_el1, IMP_SCCR_ASSIGN_EL1); +ACCESS_REG_FUNC(sccr_set0_l2_el1, IMP_SCCR_SET0_L2_EL1); +ACCESS_REG_FUNC(sccr_l1_el0, IMP_SCCR_L1_EL0); + +void scdrv_registers_init(void) +{ + uint64_t reg = 0; + + reg = IMP_SCCR_CTRL_EL1_EL1AE_MASK; + xos_access_sccr_ctrl_el1(WRITE_ACCESS, ®); + + reg = 0; + xos_access_sccr_assign_el1(WRITE_ACCESS, ®); + xos_access_sccr_l1_el0(WRITE_ACCESS, ®); + + reg = (14UL << IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT); + xos_access_sccr_set0_l2_el1(WRITE_ACCESS, ®); +} diff --git a/arch/arm64/kernel/include/arch-futex.h b/arch/arm64/kernel/include/arch-futex.h index efccba9f..4f548520 100644 --- a/arch/arm64/kernel/include/arch-futex.h +++ b/arch/arm64/kernel/include/arch-futex.h @@ -32,12 +32,13 @@ * @ref.impl * linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_op_inuser */ -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, + int __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (encoded_op & 0x00fff000) >> 12; + int cmparg = encoded_op & 0xfff; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) diff --git a/arch/arm64/kernel/include/arch-lock.h b/arch/arm64/kernel/include/arch-lock.h index cd1a7691..5d4d092d 100644 --- a/arch/arm64/kernel/include/arch-lock.h +++ b/arch/arm64/kernel/include/arch-lock.h @@ -1,4 +1,4 @@ -/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2018 */ #ifndef __HEADER_ARM64_COMMON_ARCH_LOCK_H #define __HEADER_ARM64_COMMON_ARCH_LOCK_H @@ -21,14 +21,14 @@ int __kprintf(const char *format, ...); /* @ref.impl arch/arm64/include/asm/spinlock_types.h::arch_spinlock_t */ typedef struct { -//#ifdef __AARCH64EB__ -// uint16_t next; -// uint16_t owner; -//#else /* __AARCH64EB__ */ +#ifdef __AARCH64EB__ + uint16_t next; + uint16_t owner; +#else /* __AARCH64EB__ */ uint16_t owner; uint16_t next; -//#endif /* __AARCH64EB__ */ -} ihk_spinlock_t; +#endif /* __AARCH64EB__ */ +} __attribute__((aligned(4))) ihk_spinlock_t; extern void preempt_enable(void); extern void preempt_disable(void); @@ -36,14 +36,100 @@ extern void preempt_disable(void); /* @ref.impl arch/arm64/include/asm/spinlock_types.h::__ARCH_SPIN_LOCK_UNLOCKED */ #define SPIN_LOCK_UNLOCKED { 0, 0 } +/* @ref.impl arch/arm64/include/asm/barrier.h::__nops */ +#define __nops(n) ".rept " #n "\nnop\n.endr\n" + +/* @ref.impl ./arch/arm64/include/asm/lse.h::ARM64_LSE_ATOMIC_INSN */ +/* else defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) */ +#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc + /* initialized spinlock struct */ static void ihk_mc_spinlock_init(ihk_spinlock_t *lock) { *lock = (ihk_spinlock_t)SPIN_LOCK_UNLOCKED; } -/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */ -/* spinlock lock */ +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_trylock_noirq(l) { \ + int rc; \ + __kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", \ + ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ + rc = __ihk_mc_spinlock_trylock_noirq(l); \ + __kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", \ + ihk_mc_get_processor_id()); \ + rc; \ +} +#else +#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq +#endif + +/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_trylock */ +/* spinlock trylock */ +static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock) +{ + unsigned int tmp; + ihk_spinlock_t lockval; + int success; + + preempt_disable(); + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " prfm pstl1strm, %2\n" + "1: ldaxr %w0, %2\n" + " eor %w1, %w0, %w0, ror #16\n" + " cbnz %w1, 2f\n" + " add %w0, %w0, %3\n" + " stxr %w1, %w0, %2\n" + " cbnz %w1, 1b\n" + "2:", + /* LSE atomics */ + " ldr %w0, %2\n" + " eor %w1, %w0, %w0, ror #16\n" + " cbnz %w1, 1f\n" + " add %w1, %w0, %3\n" + " casa %w0, %w1, %2\n" + " sub %w1, %w1, %3\n" + " eor %w1, %w1, %w0\n" + "1:") + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : "I" (1 << TICKET_SHIFT) + : "memory"); + + success = !tmp; + if (!success) { + preempt_enable(); + } + return success; +} + +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_trylock(l, result) ({ \ + unsigned long rc; \ + __kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", \ + ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ + rc = __ihk_mc_spinlock_trylock(l, result); \ + __kprintf("[%d] ret ihk_mc_spinlock_trylock\n", \ + ihk_mc_get_processor_id()); \ + rc; \ +}) +#else +#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock +#endif + +/* spinlock trylock & interrupt disable & PSTATE.DAIF save */ +static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock, + int *result) +{ + unsigned long flags; + + flags = cpu_disable_interrupt_save(); + + *result = __ihk_mc_spinlock_trylock_noirq(lock); + + return flags; +} + #ifdef DEBUG_SPINLOCK #define ihk_mc_spinlock_lock_noirq(l) { \ __kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ @@ -54,6 +140,8 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \ #define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq #endif +/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */ +/* spinlock lock */ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) { unsigned int tmp; @@ -63,11 +151,19 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) asm volatile( /* Atomically increment the next ticket. */ + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ " prfm pstl1strm, %3\n" "1: ldaxr %w0, %3\n" " add %w1, %w0, %w5\n" " stxr %w2, %w1, %3\n" -" cbnz %w2, 1b\n" +" cbnz %w2, 1b\n", + /* LSE atomics */ +" mov %w2, %w5\n" +" ldadda %w2, %w0, %3\n" + __nops(3) + ) + /* Did we get the lock? */ " eor %w1, %w0, %w0, ror #16\n" " cbz %w1, 3f\n" @@ -87,7 +183,6 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) : "memory"); } -/* spinlock lock & interrupt disable & PSTATE.DAIF save */ #ifdef DEBUG_SPINLOCK #define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\ __kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ @@ -97,6 +192,8 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\ #else #define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock #endif + +/* spinlock lock & interrupt disable & PSTATE.DAIF save */ static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock) { unsigned long flags; @@ -108,8 +205,6 @@ static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock) return flags; } -/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */ -/* spinlock unlock */ #ifdef DEBUG_SPINLOCK #define ihk_mc_spinlock_unlock_noirq(l) { \ __kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ @@ -119,12 +214,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); #else #define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq #endif + +/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */ +/* spinlock unlock */ static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock) { - asm volatile( -" stlrh %w1, %0\n" - : "=Q" (lock->owner) - : "r" (lock->owner + 1) + unsigned long tmp; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " ldrh %w1, %0\n" + " add %w1, %w1, #1\n" + " stlrh %w1, %0", + /* LSE atomics */ + " mov %w1, #1\n" + " staddlh %w1, %0\n" + __nops(1)) + : "=Q" (lock->owner), "=&r" (tmp) + : : "memory"); preempt_enable(); @@ -606,16 +713,18 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_ #endif } +#if defined(CONFIG_HAS_NMI) +#include static inline int irqflags_can_interrupt(unsigned long flags) { -#ifdef CONFIG_HAS_NMI -#warning irqflags_can_interrupt needs testing/fixing on such a target - return flags > ICC_PMR_EL1_MASKED; -#else - // PSTATE.DAIF I bit clear means interrupt is possible - return !(flags & (1 << 7)); -#endif + return (flags == ICC_PMR_EL1_UNMASKED); } +#else /* CONFIG_HAS_NMI */ +static inline int irqflags_can_interrupt(unsigned long flags) +{ + return !(flags & 0x2); +} +#endif /* CONFIG_HAS_NMI */ #endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */ diff --git a/arch/arm64/kernel/include/arch-memory.h b/arch/arm64/kernel/include/arch-memory.h index 44cb1c7b..5cd0f65c 100644 --- a/arch/arm64/kernel/include/arch-memory.h +++ b/arch/arm64/kernel/include/arch-memory.h @@ -1,4 +1,4 @@ -/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2018 */ #ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H #define __HEADER_ARM64_COMMON_ARCH_MEMORY_H @@ -16,22 +16,45 @@ void panic(const char *); #define _SZ64KB (1UL<<16) #ifdef CONFIG_ARM64_64K_PAGES -# define GRANULE_SIZE _SZ64KB +# define GRANULE_SIZE _SZ64KB +# define BLOCK_SHIFT PAGE_SHIFT +# define BLOCK_SIZE PAGE_SIZE +# define TABLE_SHIFT PMD_SHIFT #else -# define GRANULE_SIZE _SZ4KB +# define GRANULE_SIZE _SZ4KB +# define BLOCK_SHIFT SECTION_SHIFT +# define BLOCK_SIZE SECTION_SIZE +# define TABLE_SHIFT PUD_SHIFT #endif + #define VA_BITS CONFIG_ARM64_VA_BITS /* * Address define */ -#define MAP_KERNEL_SHIFT 21 -#define MAP_KERNEL_SIZE (UL(1) << MAP_KERNEL_SHIFT) +/* early alloc area address */ +/* START:_end, SIZE:512 pages */ #define MAP_EARLY_ALLOC_SHIFT 9 #define MAP_EARLY_ALLOC_SIZE (UL(1) << (PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT)) + +#ifndef __ASSEMBLY__ +# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align) +# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1)) +extern char _end[]; +# define MAP_EARLY_ALLOC (ALIGN_UP((unsigned long)_end, BLOCK_SIZE)) +# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) +#endif /* !__ASSEMBLY__ */ + +/* bootparam area address */ +/* START:early alloc area end, SIZE:2MiB */ #define MAP_BOOT_PARAM_SHIFT 21 #define MAP_BOOT_PARAM_SIZE (UL(1) << MAP_BOOT_PARAM_SHIFT) +#ifndef __ASSEMBLY__ +# define MAP_BOOT_PARAM (ALIGN_UP(MAP_EARLY_ALLOC_END, BLOCK_SIZE)) +# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) +#endif /* !__ASSEMBLY__ */ + #if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB) # # define TASK_UNMAPPED_BASE UL(0x0000000800000000) @@ -40,12 +63,7 @@ void panic(const char *); # define MAP_VMAP_SIZE UL(0x0000000100000000) # define MAP_FIXED_START UL(0xffffffbffbdfd000) # define MAP_ST_START UL(0xffffffc000000000) -# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000 -# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_003f_ff80_0000 -# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000 -# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) -# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000 -# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000 +# define MAP_KERNEL_START UL(0xffffffffff800000) # #elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB) # @@ -55,12 +73,7 @@ void panic(const char *); # define MAP_VMAP_SIZE UL(0x0000000100000000) # define MAP_FIXED_START UL(0xfffffdfffbdd0000) # define MAP_ST_START UL(0xfffffe0000000000) -# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000 -# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_01ff_e000_0000 -# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000 -# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) -# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000 -# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000 +# define MAP_KERNEL_START UL(0xffffffffe0000000) # #elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB) # @@ -70,13 +83,7 @@ void panic(const char *); # define MAP_VMAP_SIZE UL(0x0000000100000000) # define MAP_FIXED_START UL(0xffff7ffffbdfd000) # define MAP_ST_START UL(0xffff800000000000) -# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000 -# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_ff80_0000 -# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000 -# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) -# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000 -# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000 -# +# define MAP_KERNEL_START UL(0xffffffffff800000) # #elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB) # @@ -86,18 +93,14 @@ void panic(const char *); # define MAP_VMAP_SIZE UL(0x0000000100000000) # define MAP_FIXED_START UL(0xffff7ffffbdd0000) # define MAP_ST_START UL(0xffff800000000000) -# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000 -# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_e000_0000 -# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000 -# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) -# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000 -# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000 +# define MAP_KERNEL_START UL(0xffffffffe0000000) # #else # error address space is not defined. #endif -#define STACK_TOP(region) ((region)->user_end) +#define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) +#define STACK_TOP(region) ((region)->user_end) /* * pagetable define @@ -129,7 +132,7 @@ void panic(const char *); # define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5) # define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 7) #elif GRANULE_SIZE == _SZ64KB -# define __PTL4_SHIFT 0 +# define __PTL4_SHIFT 55 # define __PTL3_SHIFT 42 # define __PTL2_SHIFT 29 # define __PTL1_SHIFT 16 diff --git a/arch/arm64/kernel/include/arch-perfctr.h b/arch/arm64/kernel/include/arch-perfctr.h index a1eb501d..0e73b499 100644 --- a/arch/arm64/kernel/include/arch-perfctr.h +++ b/arch/arm64/kernel/include/arch-perfctr.h @@ -4,6 +4,13 @@ #include #include +#include + +struct per_cpu_arm_pmu { + int num_events; +#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 + DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); +}; /* @ref.impl arch/arm64/include/asm/pmu.h */ struct arm_pmu { @@ -19,9 +26,12 @@ struct arm_pmu { int (*disable_intens)(int); int (*set_event_filter)(unsigned long*, int); void (*write_evtype)(int, uint32_t); - int (*get_event_idx)(int, unsigned long); + int (*get_event_idx)(int num_events, unsigned long used_mask, + unsigned long config); int (*map_event)(uint32_t, uint64_t); - int num_events; + void (*enable_user_access_pmu_regs)(void); + void (*disable_user_access_pmu_regs)(void); + struct per_cpu_arm_pmu *per_cpu; }; static inline const struct arm_pmu* get_cpu_pmu(void) @@ -29,10 +39,21 @@ static inline const struct arm_pmu* get_cpu_pmu(void) extern struct arm_pmu cpu_pmu; return &cpu_pmu; } + +static inline const struct per_cpu_arm_pmu *get_per_cpu_pmu(void) +{ + const struct arm_pmu *cpu_pmu = get_cpu_pmu(); + + return &cpu_pmu->per_cpu[ihk_mc_get_processor_id()]; +} + int arm64_init_perfctr(void); +void arm64_init_per_cpu_perfctr(void); int arm64_enable_pmu(void); void arm64_disable_pmu(void); int armv8pmu_init(struct arm_pmu* cpu_pmu); +void armv8pmu_per_cpu_init(struct per_cpu_arm_pmu *per_cpu); +void arm64_enable_user_access_pmu_regs(void); +void arm64_disable_user_access_pmu_regs(void); -/* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */ #endif diff --git a/arch/arm64/kernel/include/arch-timer.h b/arch/arm64/kernel/include/arch-timer.h index 8c3720cb..9f39ecf1 100644 --- a/arch/arm64/kernel/include/arch-timer.h +++ b/arch/arm64/kernel/include/arch-timer.h @@ -1,7 +1,9 @@ -/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016 */ +/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016-2018 */ #ifndef __HEADER_ARM64_COMMON_ARCH_TIMER_H #define __HEADER_ARM64_COMMON_ARCH_TIMER_H +#include + /* @ref.impl include/clocksource/arm_arch_timer.h */ #define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */ #define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */ @@ -11,4 +13,19 @@ #define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */ #define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */ +/* @ref.impl linux4.10.16 */ +/* include/clocksource/arm_arch_timer.h */ +#define ARCH_TIMER_CTRL_ENABLE (1 << 0) +#define ARCH_TIMER_CTRL_IT_MASK (1 << 1) +#define ARCH_TIMER_CTRL_IT_STAT (1 << 2) + +enum arch_timer_reg { + ARCH_TIMER_REG_CTRL, + ARCH_TIMER_REG_TVAL, +}; + +extern int get_timer_intrid(void); +extern void arch_timer_init(void); +extern struct ihk_mc_interrupt_handler *get_timer_handler(void); + #endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */ diff --git a/arch/arm64/kernel/include/arch/cpu.h b/arch/arm64/kernel/include/arch/cpu.h index 5a5161be..803d6ad7 100644 --- a/arch/arm64/kernel/include/arch/cpu.h +++ b/arch/arm64/kernel/include/arch/cpu.h @@ -1,4 +1,4 @@ -/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2018 */ #ifndef __HEADER_ARM64_ARCH_CPU_H #define __HEADER_ARM64_ARCH_CPU_H @@ -12,6 +12,8 @@ #define dmb(opt) asm volatile("dmb " #opt : : : "memory") #define dsb(opt) asm volatile("dsb " #opt : : : "memory") +#include + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) @@ -69,12 +71,10 @@ do { \ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */ #define read_tsc() \ ({ \ unsigned long cval; \ - isb(); \ - asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); \ + cval = rdtsc(); \ cval; \ }) diff --git a/arch/arm64/kernel/include/arch_rusage.h b/arch/arm64/kernel/include/arch_rusage.h index e8409e5a..2fb59636 100644 --- a/arch/arm64/kernel/include/arch_rusage.h +++ b/arch/arm64/kernel/include/arch_rusage.h @@ -3,30 +3,29 @@ #include -//#define DEBUG_RUSAGE +#define DEBUG_RUSAGE + +#define IHK_OS_PGSIZE_4KB 0 +#define IHK_OS_PGSIZE_2MB 1 +#define IHK_OS_PGSIZE_1GB 2 extern struct rusage_global *rusage; -#define IHK_OS_PGSIZE_4KB 0 -#define IHK_OS_PGSIZE_16KB 1 -#define IHK_OS_PGSIZE_64KB 2 - static inline int rusage_pgsize_to_pgtype(size_t pgsize) { int ret = IHK_OS_PGSIZE_4KB; - switch (pgsize) { - case __PTL1_SIZE: + + if (pgsize == PTL1_SIZE) { ret = IHK_OS_PGSIZE_4KB; - break; - case __PTL2_SIZE: - ret = IHK_OS_PGSIZE_16KB; - break; - case __PTL3_SIZE: - ret = IHK_OS_PGSIZE_64KB; - break; - default: + } + else if (pgsize == PTL2_SIZE) { + ret = IHK_OS_PGSIZE_2MB; + } + else if (pgsize == PTL3_SIZE) { + ret = IHK_OS_PGSIZE_1GB; + } + else { kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize); - break; } return ret; } diff --git a/arch/arm64/kernel/include/arm-gic-v3.h b/arch/arm64/kernel/include/arm-gic-v3.h index f4a2009c..ac037ad3 100644 --- a/arch/arm64/kernel/include/arm-gic-v3.h +++ b/arch/arm64/kernel/include/arm-gic-v3.h @@ -19,6 +19,7 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H #define __LINUX_IRQCHIP_ARM_GIC_V3_H +#include /* @ref.impl include/linux/irqchip/arm-gic-v3.h */ #include @@ -381,11 +382,4 @@ #define ICH_AP1R2_EL2 __AP1Rx_EL2(2) #define ICH_AP1R3_EL2 __AP1Rx_EL2(3) - -/** - * @ref.impl host-kernel/include/linux/stringify.h - */ -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - #endif /* __LINUX_IRQCHIP_ARM_GIC_V3_H */ diff --git a/arch/arm64/kernel/include/ihk/context.h b/arch/arm64/kernel/include/ihk/context.h index bf799959..4bfb3419 100644 --- a/arch/arm64/kernel/include/ihk/context.h +++ b/arch/arm64/kernel/include/ihk/context.h @@ -65,17 +65,17 @@ static inline void pt_regs_write_reg(struct pt_regs *regs, int r, } /* temp */ -#define ihk_mc_syscall_arg0(uc) (uc)->regs[0] -#define ihk_mc_syscall_arg1(uc) (uc)->regs[1] -#define ihk_mc_syscall_arg2(uc) (uc)->regs[2] -#define ihk_mc_syscall_arg3(uc) (uc)->regs[3] -#define ihk_mc_syscall_arg4(uc) (uc)->regs[4] -#define ihk_mc_syscall_arg5(uc) (uc)->regs[5] +#define ihk_mc_syscall_arg0(uc) ((uc)->regs[0]) +#define ihk_mc_syscall_arg1(uc) ((uc)->regs[1]) +#define ihk_mc_syscall_arg2(uc) ((uc)->regs[2]) +#define ihk_mc_syscall_arg3(uc) ((uc)->regs[3]) +#define ihk_mc_syscall_arg4(uc) ((uc)->regs[4]) +#define ihk_mc_syscall_arg5(uc) ((uc)->regs[5]) -#define ihk_mc_syscall_ret(uc) (uc)->regs[0] -#define ihk_mc_syscall_number(uc) (uc)->regs[8] +#define ihk_mc_syscall_ret(uc) ((uc)->regs[0]) +#define ihk_mc_syscall_number(uc) ((uc)->regs[8]) -#define ihk_mc_syscall_pc(uc) (uc)->pc -#define ihk_mc_syscall_sp(uc) (uc)->sp +#define ihk_mc_syscall_pc(uc) ((uc)->pc) +#define ihk_mc_syscall_sp(uc) ((uc)->sp) #endif /* !__HEADER_ARM64_IHK_CONTEXT_H */ diff --git a/arch/arm64/kernel/include/imp-sysreg.h b/arch/arm64/kernel/include/imp-sysreg.h new file mode 100644 index 00000000..4bacdaa8 --- /dev/null +++ b/arch/arm64/kernel/include/imp-sysreg.h @@ -0,0 +1,102 @@ +/* imp-sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */ +#ifndef __ASM_IMP_SYSREG_H +#define __ASM_IMP_SYSREG_H + +#ifndef __ASSEMBLY__ + +/* register sys_reg list */ +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1 sys_reg(3, 0, 11, 2, 0) +#define IMP_SCCR_CTRL_EL1 sys_reg(3, 0, 11, 8, 0) +#define IMP_SCCR_ASSIGN_EL1 sys_reg(3, 0, 11, 8, 1) +#define IMP_SCCR_SET0_L2_EL1 sys_reg(3, 0, 15, 8, 2) +#define IMP_SCCR_SET1_L2_EL1 sys_reg(3, 0, 15, 8, 3) +#define IMP_SCCR_L1_EL0 sys_reg(3, 3, 11, 8, 2) +#define IMP_PF_CTRL_EL1 sys_reg(3, 0, 11, 4, 0) +#define IMP_PF_STREAM_DETECT_CTRL_EL0 sys_reg(3, 3, 11, 4, 0) +#define IMP_PF_INJECTION_CTRL0_EL0 sys_reg(3, 3, 11, 6, 0) +#define IMP_PF_INJECTION_CTRL1_EL0 sys_reg(3, 3, 11, 6, 1) +#define IMP_PF_INJECTION_CTRL2_EL0 sys_reg(3, 3, 11, 6, 2) +#define IMP_PF_INJECTION_CTRL3_EL0 sys_reg(3, 3, 11, 6, 3) +#define IMP_PF_INJECTION_CTRL4_EL0 sys_reg(3, 3, 11, 6, 4) +#define IMP_PF_INJECTION_CTRL5_EL0 sys_reg(3, 3, 11, 6, 5) +#define IMP_PF_INJECTION_CTRL6_EL0 sys_reg(3, 3, 11, 6, 6) +#define IMP_PF_INJECTION_CTRL7_EL0 sys_reg(3, 3, 11, 6, 7) +#define IMP_PF_INJECTION_DISTANCE0_EL0 sys_reg(3, 3, 11, 7, 0) +#define IMP_PF_INJECTION_DISTANCE1_EL0 sys_reg(3, 3, 11, 7, 1) +#define IMP_PF_INJECTION_DISTANCE2_EL0 sys_reg(3, 3, 11, 7, 2) +#define IMP_PF_INJECTION_DISTANCE3_EL0 sys_reg(3, 3, 11, 7, 3) +#define IMP_PF_INJECTION_DISTANCE4_EL0 sys_reg(3, 3, 11, 7, 4) +#define IMP_PF_INJECTION_DISTANCE5_EL0 sys_reg(3, 3, 11, 7, 5) +#define IMP_PF_INJECTION_DISTANCE6_EL0 sys_reg(3, 3, 11, 7, 6) +#define IMP_PF_INJECTION_DISTANCE7_EL0 sys_reg(3, 3, 11, 7, 7) +#define IMP_BARRIER_CTRL_EL1 sys_reg(3, 0, 11, 12, 0) +#define IMP_BARRIER_BST_BIT_EL1 sys_reg(3, 0, 11, 12, 4) +#define IMP_BARRIER_INIT_SYNC_BB0_EL1 sys_reg(3, 0, 15, 13, 0) +#define IMP_BARRIER_INIT_SYNC_BB1_EL1 sys_reg(3, 0, 15, 13, 1) +#define IMP_BARRIER_INIT_SYNC_BB2_EL1 sys_reg(3, 0, 15, 13, 2) +#define IMP_BARRIER_INIT_SYNC_BB3_EL1 sys_reg(3, 0, 15, 13, 3) +#define IMP_BARRIER_INIT_SYNC_BB4_EL1 sys_reg(3, 0, 15, 13, 4) +#define IMP_BARRIER_INIT_SYNC_BB5_EL1 sys_reg(3, 0, 15, 13, 5) +#define IMP_BARRIER_ASSIGN_SYNC_W0_EL1 sys_reg(3, 0, 15, 15, 0) +#define IMP_BARRIER_ASSIGN_SYNC_W1_EL1 sys_reg(3, 0, 15, 15, 1) +#define IMP_BARRIER_ASSIGN_SYNC_W2_EL1 sys_reg(3, 0, 15, 15, 2) +#define IMP_BARRIER_ASSIGN_SYNC_W3_EL1 sys_reg(3, 0, 15, 15, 3) +#define IMP_SOC_STANDBY_CTRL_EL1 sys_reg(3, 0, 11, 0, 0) +#define IMP_FJ_CORE_UARCH_CTRL_EL2 sys_reg(3, 4, 11, 0, 4) +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1 sys_reg(3, 0, 11, 0, 5) + +/* macros */ +#define PWR_REG_MASK(reg, feild) (((UL(1) << ((reg##_##feild##_MSB) - (reg##_##feild##_LSB) + 1)) - 1) << (reg##_##feild##_LSB)) + +/* IMP_FJ_TAG_ADDRESS_CTRL_EL1 */ +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT (0) +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT (8) +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT (9) +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT) +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT) +#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT) + +/* IMP_SCCR_CTRL_EL1 */ +#define IMP_SCCR_CTRL_EL1_EL1AE_SHIFT (63) +#define IMP_SCCR_CTRL_EL1_EL1AE_MASK (1UL << IMP_SCCR_CTRL_EL1_EL1AE_SHIFT) + +/* IMP_SCCR_SET0_L2_EL1 */ +#define IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT (0) + +/* IMP_PF_CTRL_EL1 */ +#define IMP_PF_CTRL_EL1_EL1AE_ENABLE (1UL << 63) +#define IMP_PF_CTRL_EL1_EL0AE_ENABLE (1UL << 62) + +/* IMP_BARRIER_CTRL_EL1 */ +#define IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE (1UL << 63) +#define IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE (1UL << 62) + +/* IMP_SOC_STANDBY_CTRL_EL1 */ +#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_MSB 2 +#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_LSB 2 +#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_MSB 1 +#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_LSB 1 +#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_MSB 0 +#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_LSB 0 +#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, ECO_MODE) +#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, MODE_CHANGE) +#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, RETENTION) + +/* IMP_FJ_CORE_UARCH_RESTRECTION_EL1 */ +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_MSB 33 +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_LSB 33 +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_MSB 9 +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_LSB 8 +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_MSB 0 +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_LSB 0 +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, FL_RESTRICT_TRANS) +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, ISSUE_RESTRICTION) +#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, EX_RESTRICTION) + +void scdrv_registers_init(void); +void hpc_registers_init(void); +void vhbm_barrier_registers_init(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_IMP_SYSREG_H */ diff --git a/arch/arm64/kernel/include/irq.h b/arch/arm64/kernel/include/irq.h index 91fd6ffd..17f641b8 100644 --- a/arch/arm64/kernel/include/irq.h +++ b/arch/arm64/kernel/include/irq.h @@ -1,4 +1,4 @@ -/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2018 */ #ifndef __HEADER_ARM64_IRQ_H #define __HEADER_ARM64_IRQ_H @@ -15,42 +15,15 @@ #define INTRID_CPU_STOP 3 #define INTRID_TLB_FLUSH 4 #define INTRID_STACK_TRACE 6 -#define INTRID_MEMDUMP 7 +#define INTRID_MULTI_NMI 7 /* use PPI interrupt number */ +#define INTRID_PERF_OVF 23 #define INTRID_HYP_PHYS_TIMER 26 /* cnthp */ #define INTRID_VIRT_TIMER 27 /* cntv */ #define INTRID_HYP_VIRT_TIMER 28 /* cnthv */ #define INTRID_PHYS_TIMER 30 /* cntp */ -/* timer intrid getter */ -static int get_virt_timer_intrid(void) -{ -#ifdef CONFIG_ARM64_VHE - unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1); - - if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) { - return INTRID_HYP_VIRT_TIMER; - } -#endif /* CONFIG_ARM64_VHE */ - return INTRID_VIRT_TIMER; -} - -static int get_phys_timer_intrid(void) -{ -#ifdef CONFIG_ARM64_VHE - unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1); - - if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) { - return INTRID_HYP_PHYS_TIMER; - } -#endif /* CONFIG_ARM64_VHE */ - return INTRID_PHYS_TIMER; -} - -/* use timer checker */ -extern unsigned long is_use_virt_timer(void); - /* Functions for GICv2 */ extern void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size); extern void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size); diff --git a/arch/arm64/kernel/include/registers.h b/arch/arm64/kernel/include/registers.h index 805768e2..11b5ac41 100644 --- a/arch/arm64/kernel/include/registers.h +++ b/arch/arm64/kernel/include/registers.h @@ -1,9 +1,10 @@ -/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2018 */ #ifndef __HEADER_ARM64_COMMON_REGISTERS_H #define __HEADER_ARM64_COMMON_REGISTERS_H #include #include +#include #define RFLAGS_CF (1 << 0) #define RFLAGS_PF (1 << 2) @@ -76,15 +77,12 @@ static unsigned long rdmsr(unsigned int index) return 0; } -/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */ -static unsigned long rdtsc(void) +/* @ref.impl linux4.10.16 */ +/* arch/arm64/include/asm/arch_timer.h:arch_counter_get_cntvct() */ +static inline unsigned long rdtsc(void) { - unsigned long cval; - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); - - return cval; + return read_sysreg(cntvct_el0); } static void set_perfctl(int counter, int event, int mask) diff --git a/arch/arm64/kernel/include/signal.h b/arch/arm64/kernel/include/signal.h index 2cce641a..99bc37cc 100644 --- a/arch/arm64/kernel/include/signal.h +++ b/arch/arm64/kernel/include/signal.h @@ -407,8 +407,6 @@ struct ucontext { }; void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err); -void set_signal(int sig, void *regs, struct siginfo *info); -void check_signal(unsigned long rc, void *regs, int num); void check_signal_irq_disabled(unsigned long rc, void *regs, int num); #endif /* __HEADER_ARM64_COMMON_SIGNAL_H */ diff --git a/arch/arm64/kernel/include/syscall_list.h b/arch/arm64/kernel/include/syscall_list.h index 8c440b55..0c08672f 100644 --- a/arch/arm64/kernel/include/syscall_list.h +++ b/arch/arm64/kernel/include/syscall_list.h @@ -5,6 +5,7 @@ SYSCALL_DELEGATED(17, getcwd) SYSCALL_DELEGATED(22, epoll_pwait) SYSCALL_DELEGATED(25, fcntl) SYSCALL_HANDLED(29, ioctl) +SYSCALL_DELEGATED(35, unlinkat) SYSCALL_DELEGATED(43, statfs) SYSCALL_DELEGATED(44, fstatfs) SYSCALL_HANDLED(56, openat) @@ -112,6 +113,8 @@ SYSCALL_HANDLED(238, migrate_pages) SYSCALL_HANDLED(239, move_pages) #ifdef PERF_ENABLE SYSCALL_HANDLED(241, perf_event_open) +#else // PERF_ENABLE +SYSCALL_DELEGATED(241, perf_event_open) #endif // PERF_ENABLE SYSCALL_HANDLED(260, wait4) SYSCALL_HANDLED(270, process_vm_readv) @@ -138,9 +141,8 @@ SYSCALL_HANDLED(804, resume_threads) SYSCALL_HANDLED(811, linux_spawn) SYSCALL_DELEGATED(1024, open) -SYSCALL_DELEGATED(1026, unlink) SYSCALL_DELEGATED(1035, readlink) SYSCALL_HANDLED(1045, signalfd) SYSCALL_DELEGATED(1049, stat) SYSCALL_DELEGATED(1060, getpgrp) -SYSCALL_DELEGATED(1062, time) +SYSCALL_HANDLED(1062, time) diff --git a/arch/arm64/kernel/include/sysreg.h b/arch/arm64/kernel/include/sysreg.h index abfdbef1..049267f4 100644 --- a/arch/arm64/kernel/include/sysreg.h +++ b/arch/arm64/kernel/include/sysreg.h @@ -1,4 +1,4 @@ -/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */ /* * Macros for accessing system registers with older binutils. * @@ -23,6 +23,7 @@ #include #include +#include /* * ARMv8 ARM reserves the following encoding for system registers: @@ -56,12 +57,6 @@ #define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask) #define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask) -#ifdef __ASSEMBLY__ -#define __emit_inst(x).inst (x) -#else -#define __emit_inst(x)".inst " __stringify((x)) "\n\t" -#endif - #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -143,6 +138,12 @@ #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_AES_SHIFT 4 +/* id_aa64isar1 */ +#define ID_AA64ISAR1_LRCPC_SHIFT 20 +#define ID_AA64ISAR1_FCMA_SHIFT 16 +#define ID_AA64ISAR1_JSCVT_SHIFT 12 +#define ID_AA64ISAR1_DPB_SHIFT 0 + /* id_aa64pfr0 */ #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_GIC_SHIFT 24 @@ -272,15 +273,46 @@ /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (1UL << 31) -#ifdef __ASSEMBLY__ +/* SYS_MIDR_EL1 */ +//mask +#define SYS_MIDR_EL1_IMPLEMENTER_MASK (0xFFUL) +#define SYS_MIDR_EL1_PPNUM_MASK (0xFFFUL) +//shift +#define SYS_MIDR_EL1_IMPLEMENTER_SHIFT (24) +#define SYS_MIDR_EL1_PPNUM_SHIFT (0x4) +//val +#define SYS_MIDR_EL1_IMPLEMENTER_FJ (0x46) +#define SYS_MIDR_EL1_PPNUM_TCHIP (0x1) +#define READ_ACCESS (0) +#define WRITE_ACCESS (1) +#define ACCESS_REG_FUNC(name, reg) \ + static void xos_access_##name(uint8_t flag, uint64_t *reg_value) \ + { \ + if (flag == READ_ACCESS) { \ + __asm__ __volatile__("mrs_s %0," __stringify(reg) "\n\t" \ + :"=&r"(*reg_value)::); \ + } \ + else if (flag == WRITE_ACCESS) { \ + __asm__ __volatile__("msr_s" __stringify(reg) ", %0\n\t" \ + ::"r"(*reg_value):); \ + } else { \ + ; \ + } \ + } + +#define XOS_FALSE (0) +#define XOS_TRUE (1) + +#ifdef __ASSEMBLY__ +#define __emit_inst(x).inst (x) .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 .equ .L__reg_num_x\num, \num .endr .equ .L__reg_num_xzr, 31 .macro mrs_s, rt, sreg - __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) .endm .macro msr_s, sreg, rt @@ -288,7 +320,7 @@ .endm #else - +#define __emit_inst(x)".inst " __stringify((x)) "\n\t" asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ .L__reg_num_x\\num, \\num\n" @@ -304,6 +336,28 @@ asm( " .endm\n" ); +ACCESS_REG_FUNC(midr_el1, SYS_MIDR_EL1); +static int xos_is_tchip(void) +{ + uint64_t reg = 0; + int ret = 0, impl = 0, part = 0; + + xos_access_midr_el1(READ_ACCESS, ®); + + impl = (reg >> SYS_MIDR_EL1_IMPLEMENTER_SHIFT) & + SYS_MIDR_EL1_IMPLEMENTER_MASK; + part = (reg >> SYS_MIDR_EL1_PPNUM_SHIFT) & SYS_MIDR_EL1_PPNUM_MASK; + + if ((impl == SYS_MIDR_EL1_IMPLEMENTER_FJ) && + (part == SYS_MIDR_EL1_PPNUM_TCHIP)) { + ret = XOS_TRUE; + } + else { + ret = XOS_FALSE; + } + + return ret; +} #endif /* @@ -344,4 +398,6 @@ asm( /* @ref.impl arch/arm64/include/asm/kvm_arm.h */ #define CPTR_EL2_TZ (1 << 8) +#include "imp-sysreg.h" + #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/include/thread_info.h b/arch/arm64/kernel/include/thread_info.h index 069cdf0c..746c190a 100644 --- a/arch/arm64/kernel/include/thread_info.h +++ b/arch/arm64/kernel/include/thread_info.h @@ -1,15 +1,22 @@ -/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2018 */ #ifndef __HEADER_ARM64_COMMON_THREAD_INFO_H #define __HEADER_ARM64_COMMON_THREAD_INFO_H -#define KERNEL_STACK_SIZE 32768 /* 8 page */ +#define MIN_KERNEL_STACK_SHIFT 15 + +#include + +#if (MIN_KERNEL_STACK_SHIFT < PAGE_SHIFT) +#define KERNEL_STACK_SHIFT PAGE_SHIFT +#else +#define KERNEL_STACK_SHIFT MIN_KERNEL_STACK_SHIFT +#endif + +#define KERNEL_STACK_SIZE (UL(1) << KERNEL_STACK_SHIFT) #define THREAD_START_SP KERNEL_STACK_SIZE - 16 #ifndef __ASSEMBLY__ -#define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align) -#define ALIGN_DOWN(x, align) ((x) & ~((align) - 1)) - #include #include @@ -53,8 +60,8 @@ struct thread_info { struct arm64_cpu_local_thread { struct thread_info thread_info; - unsigned long paniced; /* 136 */ - uint64_t panic_regs[34]; /* 144 */ + unsigned long paniced; + uint64_t panic_regs[34]; }; union arm64_cpu_local_variables { diff --git a/arch/arm64/kernel/include/virt.h b/arch/arm64/kernel/include/virt.h index 0fbffe32..06d27c90 100644 --- a/arch/arm64/kernel/include/virt.h +++ b/arch/arm64/kernel/include/virt.h @@ -2,7 +2,21 @@ #ifndef __HEADER_ARM64_COMMON_VIRT_H #define __HEADER_ARM64_COMMON_VIRT_H +/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */ #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) +#ifndef __ASSEMBLY__ + +#include +#include + +/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */ +static inline int is_kernel_in_hyp_mode(void) +{ + return read_sysreg(CurrentEL) == CurrentEL_EL2; +} + +#endif /* !__ASSEMBLY__ */ + #endif /* !__HEADER_ARM64_COMMON_VIRT_H */ diff --git a/arch/arm64/kernel/irq-gic-v2.c b/arch/arm64/kernel/irq-gic-v2.c index 995b3bb4..d9162cb2 100644 --- a/arch/arm64/kernel/irq-gic-v2.c +++ b/arch/arm64/kernel/irq-gic-v2.c @@ -1,13 +1,15 @@ -/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ - +/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2018 */ #include #include #include #include #include #include +#include #include #include +#include +#include // #define DEBUG_GICV2 @@ -52,17 +54,11 @@ static void arm64_raise_sgi_gicv2(unsigned int cpuid, unsigned int vector) * arm64_raise_spi_gicv2 * @ref.impl nothing. */ -extern unsigned int ihk_ikc_irq_apicid; static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector) { uint64_t spi_reg_offset; uint32_t spi_set_pending_bitpos; - if (cpuid != ihk_ikc_irq_apicid) { - ekprintf("SPI(irq#%d) cannot send other than the host.\n", vector); - return; - } - /** * calculates register offset and bit position corresponding to the numbers. * @@ -109,8 +105,9 @@ extern int interrupt_from_user(void *); void handle_interrupt_gicv2(struct pt_regs *regs) { unsigned int irqstat, irqnr; + const int from_user = interrupt_from_user(regs); - set_cputime(interrupt_from_user(regs)? 1: 2); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); do { // get GICC_IAR.InterruptID irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); @@ -130,7 +127,13 @@ void handle_interrupt_gicv2(struct pt_regs *regs) */ break; } while (1); - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); + + /* for migration by IPI */ + if (get_this_cpu_local_var()->flags & CPU_FLAG_NEED_MIGRATE) { + schedule(); + check_signal(0, regs, 0); + } } void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size) @@ -147,10 +150,6 @@ void gic_enable_gicv2(void) { unsigned int enable_ppi_sgi = 0; - if (is_use_virt_timer()) { - enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid(); - } else { - enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid(); - } + enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid(); writel_relaxed(enable_ppi_sgi, dist_base + GIC_DIST_ENABLE_SET); } diff --git a/arch/arm64/kernel/irq-gic-v3.c b/arch/arm64/kernel/irq-gic-v3.c index 99a352bc..452cb27f 100644 --- a/arch/arm64/kernel/irq-gic-v3.c +++ b/arch/arm64/kernel/irq-gic-v3.c @@ -1,5 +1,4 @@ -/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ - +/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2018 */ #include #include #include @@ -8,6 +7,8 @@ #include #include #include +#include +#include //#define DEBUG_GICV3 @@ -264,6 +265,7 @@ static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector) static void arm64_raise_lpi_gicv3(uint32_t cpuid, uint32_t vector) { // @todo.impl + ekprintf("%s called.\n", __func__); } void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector) @@ -281,7 +283,7 @@ void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector) // send LPI (allow only to host) arm64_raise_lpi_gicv3(cpuid, vector); } else { - ekprintf("#%d is bad irq number.", vector); + ekprintf("#%d is bad irq number.\n", vector); } } @@ -289,10 +291,11 @@ extern int interrupt_from_user(void *); void handle_interrupt_gicv3(struct pt_regs *regs) { uint64_t irqnr; + const int from_user = interrupt_from_user(regs); irqnr = gic_read_iar(); cpu_enable_nmi(); - set_cputime(interrupt_from_user(regs)? 1: 2); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); while (irqnr != ICC_IAR1_EL1_SPURIOUS) { if ((irqnr < 1020) || (irqnr >= 8192)) { gic_write_eoir(irqnr); @@ -300,11 +303,51 @@ void handle_interrupt_gicv3(struct pt_regs *regs) } irqnr = gic_read_iar(); } - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); + + /* for migration by IPI */ + if (get_this_cpu_local_var()->flags & CPU_FLAG_NEED_MIGRATE) { + schedule(); + check_signal(0, regs, 0); + } +} + +static uint64_t gic_mpidr_to_affinity(unsigned long mpidr) +{ + uint64_t aff; + + aff = ((uint64_t)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + return aff; +} + +static void init_spi_routing(uint32_t irq, uint32_t linux_cpu) +{ + uint64_t spi_route_reg_val, spi_route_reg_offset; + + if (irq < 32 || 1020 <= irq) { + ekprintf("%s: irq is not spi number. (irq=%d)\n", + __func__, irq); + return; + } + + /* write to GICD_IROUTER */ + spi_route_reg_offset = irq * 8; + spi_route_reg_val = gic_mpidr_to_affinity(cpu_logical_map(linux_cpu)); + + writeq_relaxed(spi_route_reg_val, + (void *)(dist_base + GICD_IROUTER + + spi_route_reg_offset)); } void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size) { + extern int spi_table[]; + extern int nr_spi_table; + int i; + dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/); #ifdef USE_CAVIUM_THUNDER_X @@ -313,6 +356,14 @@ void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size) is_cavium_thunderx = 1; } #endif + + /* initialize spi routing */ + for (i = 0; i < nr_spi_table; i++) { + if (spi_table[i] == -1) { + continue; + } + init_spi_routing(spi_table[i], i); + } } void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size) @@ -349,11 +400,23 @@ void gic_enable_gicv3(void) void *rd_sgi_base = rbase + 0x10000 /* SZ_64K */; int i; unsigned int enable_ppi_sgi = GICD_INT_EN_SET_SGI; + extern int ihk_param_nr_pmu_irq_affi; + extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES]; - if (is_use_virt_timer()) { - enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid(); - } else { - enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid(); + enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid(); + + if (0 < ihk_param_nr_pmu_irq_affi) { + for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) { + if ((0 <= ihk_param_pmu_irq_affi[i]) && + (ihk_param_pmu_irq_affi[i] < + sizeof(enable_ppi_sgi) * BITS_PER_BYTE)) { + enable_ppi_sgi |= GICD_ENABLE << + ihk_param_pmu_irq_affi[i]; + } + } + } + else { + enable_ppi_sgi |= GICD_ENABLE << INTRID_PERF_OVF; } /* @@ -366,9 +429,10 @@ void gic_enable_gicv3(void) /* * Set priority on PPI and SGI interrupts */ - for (i = 0; i < 32; i += 4) + for (i = 0; i < 32; i += 4) { writel_relaxed(GICD_INT_DEF_PRI_X4, - rd_sgi_base + GIC_DIST_PRI + i * 4 / 4); + rd_sgi_base + GIC_DIST_PRI + i); + } /* sync wait */ gic_do_wait_for_rwp(rbase); @@ -404,9 +468,12 @@ void gic_enable_gicv3(void) gic_write_bpr1(0); /* Set specific IPI to NMI */ - writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP); - writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP); - writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE); + writeb_relaxed(GICD_INT_NMI_PRI, + rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP); + writeb_relaxed(GICD_INT_NMI_PRI, + rd_sgi_base + GIC_DIST_PRI + INTRID_MULTI_NMI); + writeb_relaxed(GICD_INT_NMI_PRI, + rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE); /* sync wait */ gic_do_wait_for_rwp(rbase); diff --git a/arch/arm64/kernel/local.c b/arch/arm64/kernel/local.c index 81a030b8..a71de78a 100644 --- a/arch/arm64/kernel/local.c +++ b/arch/arm64/kernel/local.c @@ -1,4 +1,4 @@ -/* local.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +/* local.c COPYRIGHT FUJITSU LIMITED 2015-2018 */ #include #include #include @@ -7,24 +7,31 @@ #include #include -#define LOCALS_SPAN (8 * PAGE_SIZE) - /* BSP initialized stack area */ union arm64_cpu_local_variables init_thread_info __attribute__((aligned(KERNEL_STACK_SIZE))); /* BSP/AP idle stack pointer head */ static union arm64_cpu_local_variables *locals; -size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */ +size_t arm64_cpu_local_variables_span = KERNEL_STACK_SIZE; /* for debugger */ /* allocate & initialize BSP/AP idle stack */ void init_processors_local(int max_id) { int i = 0; - const int sz = (max_id + 1) * KERNEL_STACK_SIZE; union arm64_cpu_local_variables *tmp; + const int npages = ((max_id + 1) * + (ALIGN_UP(KERNEL_STACK_SIZE, PAGE_SIZE) >> + PAGE_SHIFT)); + + if (npages < 1) { + panic("idle kernel stack allocation failed."); + } /* allocate one more for alignment */ - locals = ihk_mc_alloc_pages(((sz + PAGE_SIZE - 1) / PAGE_SIZE), IHK_MC_AP_CRITICAL); + locals = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL); + if (locals == NULL) { + panic("idle kernel stack allocation failed."); + } locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE); /* clear struct process, struct process_vm, struct thread_info area */ diff --git a/arch/arm64/kernel/memory.c b/arch/arm64/kernel/memory.c index 4cefc9f2..1168cfcc 100644 --- a/arch/arm64/kernel/memory.c +++ b/arch/arm64/kernel/memory.c @@ -1,4 +1,4 @@ -/* memory.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* memory.c COPYRIGHT FUJITSU LIMITED 2015-2018 */ #include #include #include @@ -15,10 +15,18 @@ #include #include #include +#include + +//#define DEBUG + +#ifdef DEBUG +#undef DDEBUG_DEFAULT +#define DDEBUG_DEFAULT DDEBUG_PRINT +#endif #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) -static char *last_page = (void*)MAP_EARLY_ALLOC; +static char *last_page; extern char _head[], _end[]; char empty_zero_page[PAGE_SIZE] = { 0 }; @@ -27,14 +35,20 @@ extern unsigned long arm64_kernel_phys_base; extern unsigned long arm64_st_phys_base; extern unsigned long arm64_st_phys_size; +int safe_kernel_map; + /* Arch specific early allocation routine */ void *early_alloc_pages(int nr_pages) { void *p; - if (last_page == (void *)-1) { + if (last_page == NULL) { + last_page = (void *)MAP_EARLY_ALLOC; + } + else if (last_page == (void *)-1) { panic("Early allocator is already finalized. Do not use it.\n"); - } else if (MAP_EARLY_ALLOC_END <= (unsigned long)last_page) { + } + else if (MAP_EARLY_ALLOC_END <= (unsigned long)last_page) { panic("Early allocator is out of memory.\n"); } p = last_page; @@ -115,15 +129,22 @@ static inline void arch_flush_tlb_single(const int asid, const unsigned long add void flush_tlb_single(unsigned long addr) { + struct thread *thread = cpu_local_var(current); struct process_vm* vm = NULL; + struct address_space *adsp = NULL; struct page_table* pt = NULL; int asid = 0; - vm = cpu_local_var(current)->vm; - if (vm) { - pt = vm->address_space->page_table; - if (pt) { - asid = get_address_space_id(pt); + if (thread) { + vm = thread->vm; + if (vm) { + adsp = vm->address_space; + if (adsp) { + pt = adsp->page_table; + if (pt) { + asid = get_address_space_id(pt); + } + } } } arch_flush_tlb_single(asid, addr); @@ -1179,7 +1200,7 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage) uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt) { - uint64_t ret = 0; + uint64_t ret = PM_PSHIFT(PAGE_SHIFT); unsigned long v = (unsigned long)virt; pte_t* ptep; translation_table_t* tt; @@ -1676,31 +1697,43 @@ static int split_large_page(pte_t *ptep, size_t pgsize) } } tt[i] = pte; + if (pgsize == PTL3_SIZE) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", + pte_is_fileoff(ptep, pgsize) ? + pte_get_off(&pte, pgsize) : + pte_get_phys(&pte), + __func__, PTL2_SIZE, PTL2_SIZE); + memory_stat_rss_add(PTL2_SIZE, PTL2_SIZE); + } + else if (pgsize == PTL2_SIZE) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),size=%ld,pgsize=%ld\n", + pte_is_fileoff(ptep, pgsize) ? + pte_get_off(&pte, pgsize) : + pte_get_phys(&pte), + __func__, PTL1_SIZE, PTL1_SIZE); + memory_stat_rss_add(PTL1_SIZE, PTL1_SIZE); + } pte += under_pgsize; } - d_table = (pte_t)((unsigned long)tt_pa & PT_PHYSMASK) | PFL_PDIR_TBL_ATTR; + d_table = (pte_t)((unsigned long)tt_pa & PT_PHYSMASK) | + PFL_PDIR_TBL_ATTR; ptl_set(ptep, d_table, table_level); -#if 1 - // revert git:4c8f583c0c0bb6f6fb2b103a006caee67e6668be - // always page_unmap. - pgsize = PTL1_SIZE; -#endif + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),size=%ld,pgsize=%ld\n", + phys_base, __func__, pgsize, pgsize); + memory_stat_rss_sub(pgsize, pgsize); - /* Do not do this check for large pages as they don't come from the zeroobj - * and are not actually mapped. - * TODO: clean up zeroobj as we don't really need it, anonymous mappings - * should be allocated for real */ - if (pgsize != PTL2_SIZE) { - if (phys_base != NOPHYS) { - page = phys_to_page(phys_base); - if (pgsize != PTL2_SIZE && page && page_unmap(page)) { - kprintf("split_large_page:page_unmap:%p\n", page); -#ifndef POSTK_DEBUG_TEMP_FIX_15 - panic("split_large_page:page_unmap\n"); -#endif /* POSTK_DEBUG_TEMP_FIX_15 */ - } + /* Do not do this check for large pages as they don't come from the + * zeroobj and are not actually mapped. + * TODO: clean up zeroobj as we don't really need it, anonymous + * mappings should be allocated for real + */ + if (phys_base != NOPHYS) { + page = phys_to_page(phys_base); + if (page && page_unmap(page)) { + ekprintf("%s: error: page_unmap of %p returned true\n", + __func__, page); } } return 0; @@ -1830,12 +1863,266 @@ int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift, return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args); } -int visit_pte_range_safe(page_table_t pt, void *start0, void *end0, int pgshift, - enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg) +static int walk_pte_l1_safe(translation_table_t *tt, uint64_t base, + uint64_t start, uint64_t end, walk_pte_fn_t *funcp, + void *args) { - return 0; + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + unsigned long phys; + + if (!tt) + return 0; + + six = (start <= base) ? 0 : ((start - base) >> PTL1_SHIFT); + eix = ((end == 0) || ((base + PTL2_SIZE) <= end)) ? PTL1_ENTRIES + : (((end - base) + (PTL1_SIZE - 1)) >> PTL1_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + phys = ptl_phys(&tt[i], 1); + if (-1 == ihk_mc_chk_page_address(phys)) + continue; + off = i * PTL1_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; } +static int walk_pte_l2_safe(translation_table_t *tt, uint64_t base, + uint64_t start, uint64_t end, walk_pte_fn_t *funcp, + void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + unsigned long phys; + + if (!tt) + return 0; + + six = (start <= base) ? 0 : ((start - base) >> PTL2_SHIFT); + eix = ((end == 0) || ((base + PTL3_SIZE) <= end)) ? PTL2_ENTRIES : + (((end - base) + (PTL2_SIZE - 1)) >> PTL2_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + phys = ptl_phys(&tt[i], 2); + if (-1 == ihk_mc_chk_page_address(phys)) + continue; + off = i * PTL2_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int walk_pte_l3_safe(translation_table_t *tt, uint64_t base, + uint64_t start, uint64_t end, walk_pte_fn_t *funcp, + void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + unsigned long phys; + + if (!tt) + return 0; + + six = (start <= base) ? 0 : ((start - base) >> PTL3_SHIFT); + eix = ((end == 0) || ((base + PTL4_SIZE) <= end)) ? PTL3_ENTRIES : + (((end - base) + (PTL3_SIZE - 1)) >> PTL3_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + phys = ptl_phys(&tt[i], 3); + if (-1 == ihk_mc_chk_page_address(phys)) + continue; + off = i * PTL3_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int walk_pte_l4_safe(translation_table_t *tt, uint64_t base, + uint64_t start, uint64_t end, walk_pte_fn_t *funcp, + void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + unsigned long phys; + + if (!tt) + return 0; + + six = (start <= base) ? 0 : ((start - base) >> PTL4_SHIFT); + eix = (end == 0) ? PTL4_ENTRIES : + (((end - base) + (PTL4_SIZE - 1)) >> PTL4_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + phys = ptl_phys(&tt[i], 4); + if (-1 == ihk_mc_chk_page_address(phys)) + continue; + off = i * PTL4_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int visit_pte_range_middle_safe(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level); + +static int visit_pte_l1_safe(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + struct visit_pte_args *args = arg0; + + if (ptl1_null(ptep)) + return 0; + + return (*args->funcp)(args->arg, args->pt, ptep, (void *)base, + PTL1_SHIFT); +} + +static int visit_pte_l2_safe(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + return visit_pte_range_middle_safe(arg0, ptep, base, start, end, 2); +} + +static int visit_pte_l3_safe(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + return visit_pte_range_middle_safe(arg0, ptep, base, start, end, 3); +} + +static int visit_pte_l4_safe(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + return visit_pte_range_middle_safe(arg0, ptep, base, start, end, 4); +} + +static int visit_pte_range_middle_safe(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level) +{ + const struct table { + walk_pte_t *walk; + walk_pte_fn_t *callback; + unsigned long pgsize; /* curent level page size */ + unsigned long pgshift; /* curent level page shift */ + } table[] = { + { walk_pte_l1_safe, visit_pte_l1_safe, PTL2_SIZE, PTL2_SHIFT }, /*PTL2*/ + { walk_pte_l2_safe, visit_pte_l2_safe, PTL3_SIZE, PTL3_SHIFT }, /*PTL3*/ + { walk_pte_l3_safe, visit_pte_l3_safe, PTL4_SIZE, PTL4_SHIFT }, /*PTL4*/ + }; + const struct table tbl = table[level-2]; + + int error; + struct visit_pte_args *args = arg0; + translation_table_t *tt; + + if (ptl_null(ptep, level)) + return 0; + + if (ptl_type_page(ptep, level) + && (start <= base) + && (((base + tbl.pgsize) <= end) + || (end == 0)) + && (!args->pgshift || (args->pgshift == tbl.pgshift))) { + error = (*args->funcp)(args->arg, args->pt, ptep, + (void *)base, tbl.pgshift); + if (error != -E2BIG) { + return error; + } + } + + if (ptl_type_page(ptep, level)) { + ekprintf("%s(level=%d):split large page\n", + __func__, level); + return -ENOMEM; + } + + tt = (translation_table_t *)phys_to_virt(ptl_phys(ptep, level)); + + return tbl.walk(tt, base, start, end, tbl.callback, arg0); +} + +int visit_pte_range_safe(page_table_t pt, void *start0, void *end0, + int pgshift, enum visit_pte_flag flags, + pte_visitor_t *funcp, void *arg) +{ + const struct table { + walk_pte_t *walk; + walk_pte_fn_t *callback; + } tables[] = { + { walk_pte_l2_safe, visit_pte_l2_safe }, /*second*/ + { walk_pte_l3_safe, visit_pte_l3_safe }, /*first*/ + { walk_pte_l4_safe, visit_pte_l4_safe }, /*zero*/ + }; + const struct table initial_lookup = + tables[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + + const uintptr_t start = (uintptr_t)start0; + const uintptr_t end = (uintptr_t)end0; + struct visit_pte_args args; + translation_table_t *tt; + + args.pt = pt; + args.flags = flags; + args.funcp = funcp; + args.arg = arg; + args.pgshift = pgshift; + + tt = get_translation_table(pt); + return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, + &args); +} struct clear_range_args { int free_physical; struct memobj *memobj; @@ -1853,12 +2140,15 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, struct page *page; pte_t old; + //dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end); + if (ptl1_null(ptep)) { return -ENOENT; } old = xchg(ptep, PTE_NULL); - arch_flush_tlb_single(get_address_space_id(args->vm->address_space->page_table), base); + arch_flush_tlb_single(get_address_space_id(args->vm->address_space->page_table), + base); page = NULL; if (!ptl1_fileoff(&old)) { @@ -1866,16 +2156,52 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, page = phys_to_page(phys); } - if (page && page_is_in_memobj(page) && ptl1_dirty(&old) && (args->memobj) && - !(args->memobj->flags & MF_ZEROFILL)) { + if (page) { + dkprintf("%s: page=%p,is_in_memobj=%d,(old & PTE_DIRTY)=%lx,memobj=%p,args->memobj->flags=%x\n", + __func__, page, page_is_in_memobj(page), + (old & PTE_DIRTY), args->memobj, + args->memobj ? args->memobj->flags : -1); + } + if (page && page_is_in_memobj(page) && ptl1_dirty(&old) && + (args->memobj) && !(args->memobj->flags & MF_ZEROFILL)) { memobj_flush_page(args->memobj, phys, PTL1_SIZE); } - if (!ptl1_fileoff(&old) && args->free_physical) { - if (!page || (page && page_unmap(page))) { - int npages = PTL1_SIZE / PAGE_SIZE; - ihk_mc_free_pages_user(phys_to_virt(phys), npages); - dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base); + if (!ptl1_fileoff(&old)) { + if (args->free_physical) { + if (!page) { + /* Anonymous || !XPMEM attach */ + if (!args->memobj || + !(args->memobj->flags & MF_XPMEM)) { + ihk_mc_free_pages_user(phys_to_virt(phys), + 1); + dkprintf("%s: freeing regular page at 0x%lx\n", + __func__, base); + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", + pte_get_phys(&old), + __func__, pte_get_phys(&old), + PTL1_SIZE, PTL1_SIZE); + memory_stat_rss_sub(PTL1_SIZE, + PTL1_SIZE); + } else { + dkprintf("%s: XPMEM attach,phys=%lx\n", + __func__, phys); + } + } else if (page_unmap(page)) { + ihk_mc_free_pages_user(phys_to_virt(phys), 1); + dkprintf("%s: freeing file-backed page at 0x%lx\n", + __func__, base); + /* Track page->count for !MF_PREMAP pages */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", + pte_get_phys(&old), __func__, + pte_get_phys(&old), PTL1_SIZE, + PTL1_SIZE); + rusage_memory_stat_sub(args->memobj, + PTL1_SIZE, PTL1_SIZE); + } + } else { + dkprintf("%s: !calling memory_stat_rss_sub(),virt=%lx,phys=%lx\n", + __func__, base, pte_get_phys(&old)); } } @@ -1921,6 +2247,8 @@ static int clear_range_middle(void *args0, pte_t *ptep, uint64_t base, struct page *page; pte_t old; + //dkprintf("%s: %lx,%lx,%lx\n", __FUNCTION__, base, start, end); + if (ptl_null(ptep, level)) { return -ENOENT; } @@ -1949,12 +2277,42 @@ static int clear_range_middle(void *args0, pte_t *ptep, uint64_t base, memobj_flush_page(args->memobj, phys, tbl.pgsize); } - - if (!ptl_fileoff(&old, level) && args->free_physical) { - if (!page || (page && page_unmap(page))) { - int npages = tbl.pgsize / PAGE_SIZE; - ihk_mc_free_pages_user(phys_to_virt(phys), npages); - dkprintf("%s(level=%d): freeing large page at 0x%lx\n", __FUNCTION__, level, base); + if (!ptl_fileoff(&old, level)) { + if (args->free_physical) { + if (!page) { + /* Anonymous || !XPMEM attach */ + if (!args->memobj || + !(args->memobj->flags & MF_XPMEM)) { + ihk_mc_free_pages_user(phys_to_virt(phys), + tbl.pgsize / PAGE_SIZE); + dkprintf("%s: freeing large page at 0x%lx\n", + __func__, base); + dkprintf("%lx-,%s: memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", + pte_get_phys(&old), + __func__, + pte_get_phys(&old), + tbl.pgsize, + tbl.pgsize); + memory_stat_rss_sub(tbl.pgsize, + tbl.pgsize); + } else { + dkprintf("%s: XPMEM attach,phys=%lx\n", + __func__, phys); + } + } else if (page_unmap(page)) { + ihk_mc_free_pages_user(phys_to_virt(phys), + tbl.pgsize / PAGE_SIZE); + dkprintf("%s: having unmapped page-struct, freeing large page at 0x%lx\n", + __func__, base); + /* Track page->count for !MF_PREMAP pages */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", + pte_get_phys(&old), __func__, + pte_get_phys(&old), + tbl.pgsize, tbl.pgsize); + rusage_memory_stat_sub(args->memobj, + tbl.pgsize, + tbl.pgsize); + } } } @@ -1996,6 +2354,9 @@ static int clear_range(struct page_table *pt, struct process_vm *vm, pte_t *ptep; size_t pgsize; + dkprintf("%s: %p,%lx,%lx,%d,%p\n", + __func__, pt, start, end, free_physical, memobj); + if ((start < vm->region.user_start) || (vm->region.user_end < end) || (end <= start)) { @@ -2356,6 +2717,15 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, ptl1_set(ptep, pte); error = 0; + // call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", + phys, __func__, base, phys, PTL1_SIZE, PTL1_SIZE); + } else { + dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", + __func__, base, phys, PTL1_SIZE, PTL1_SIZE); + } + out: dkprintf("set_range_l1(%lx,%lx,%lx): %d %lx\n", base, start, end, error, *ptep); @@ -2439,6 +2809,18 @@ retry: dkprintf("set_range_middle(%lx,%lx,%lx,%d):" "large page. %d %lx\n", base, start, end, level, error, *ptep); + // Call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, + tbl.pgsize, + tbl.pgsize)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", + phys, __func__, base, phys, + tbl.pgsize, tbl.pgsize); + } else { + dkprintf("%s: !calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", + __func__, base, phys, + tbl.pgsize, tbl.pgsize); + } goto out; } } @@ -2703,7 +3085,7 @@ int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep, out: dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n", - args, cursize, error, newsize, p2align); + args, cursize, error, newsize, p2align); return error; } @@ -2787,7 +3169,7 @@ out: } int move_pte_range(page_table_t pt, struct process_vm *vm, - void *src, void *dest, size_t size, struct vm_range *range) + void *src, void *dest, size_t size, struct vm_range *range) { int error; struct move_args args; @@ -2975,7 +3357,7 @@ void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator, unsigned long virt_to_phys(void *v) { unsigned long va = (unsigned long)v; - + if (MAP_KERNEL_START <= va) { return va - MAP_KERNEL_START + arm64_kernel_phys_base; } diff --git a/arch/arm64/kernel/mikc.c b/arch/arm64/kernel/mikc.c index 2d61af01..2c052b98 100644 --- a/arch/arm64/kernel/mikc.c +++ b/arch/arm64/kernel/mikc.c @@ -19,7 +19,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel, memset(channel, 0, sizeof(struct ihk_ikc_channel_desc)); - mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE) + mikc_queue_pages = ((4 * num_processors * MASTER_IKCQ_PKTSIZE) + (PAGE_SIZE - 1)) / PAGE_SIZE; /* Place both sides in this side */ diff --git a/arch/arm64/kernel/perfctr.c b/arch/arm64/kernel/perfctr.c index 7c3a7dc4..259014e3 100644 --- a/arch/arm64/kernel/perfctr.c +++ b/arch/arm64/kernel/perfctr.c @@ -6,32 +6,60 @@ #include #include #include +#include +#include /* * @ref.impl arch/arm64/kernel/perf_event.c * Set at runtime when we know what CPU type we are. */ struct arm_pmu cpu_pmu; -extern int ihk_param_pmu_irq_affiniry[CONFIG_SMP_MAX_CORES]; -extern int ihk_param_nr_pmu_irq_affiniry; - +extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES]; +extern int ihk_param_nr_pmu_irq_affi; int arm64_init_perfctr(void) { int ret; int i; + int pages; + const struct ihk_mc_cpu_info *cpu_info; memset(&cpu_pmu, 0, sizeof(cpu_pmu)); ret = armv8pmu_init(&cpu_pmu); - if (!ret) { + if (ret) { return ret; } - for (i = 0; i < ihk_param_nr_pmu_irq_affiniry; i++) { - ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affiniry[i], cpu_pmu.handler); + + cpu_info = ihk_mc_get_cpu_info(); + pages = (sizeof(struct per_cpu_arm_pmu) * cpu_info->ncpus + + PAGE_SIZE - 1) >> PAGE_SHIFT; + cpu_pmu.per_cpu = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT); + if (cpu_pmu.per_cpu == NULL) { + return -ENOMEM; + } + memset(cpu_pmu.per_cpu, 0, pages * PAGE_SIZE); + + if (0 < ihk_param_nr_pmu_irq_affi) { + for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) { + ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affi[i], + cpu_pmu.handler); + if (ret) { + break; + } + } + } + else { + ret = ihk_mc_register_interrupt_handler(INTRID_PERF_OVF, + cpu_pmu.handler); } return ret; } +void arm64_init_per_cpu_perfctr(void) +{ + armv8pmu_per_cpu_init(&cpu_pmu.per_cpu[ihk_mc_get_processor_id()]); +} + int arm64_enable_pmu(void) { int ret; @@ -47,11 +75,21 @@ void arm64_disable_pmu(void) cpu_pmu.disable_pmu(); } +void arm64_enable_user_access_pmu_regs(void) +{ + cpu_pmu.enable_user_access_pmu_regs(); +} + +void arm64_disable_user_access_pmu_regs(void) +{ + cpu_pmu.disable_user_access_pmu_regs(); +} + extern unsigned int *arm64_march_perfmap; static int __ihk_mc_perfctr_init(int counter, uint32_t type, uint64_t config, int mode) { - int ret; + int ret = -1; unsigned long config_base = 0; int mapping; @@ -61,17 +99,17 @@ static int __ihk_mc_perfctr_init(int counter, uint32_t type, uint64_t config, in } ret = cpu_pmu.disable_counter(counter); - if (!ret) { + if (ret < 0) { return ret; } ret = cpu_pmu.enable_intens(counter); - if (!ret) { + if (ret < 0) { return ret; } ret = cpu_pmu.set_event_filter(&config_base, mode); - if (!ret) { + if (ret) { return ret; } config_base |= (unsigned long)mapping; @@ -95,22 +133,19 @@ int ihk_mc_perfctr_init(int counter, uint64_t config, int mode) int ihk_mc_perfctr_start(unsigned long counter_mask) { - int ret = 0; - int counter; - unsigned long counter_bit; + int ret = 0, i; - for (counter = 0, counter_bit = 1; - counter_bit < counter_mask; - counter++, counter_bit <<= 1) { - if (!(counter_mask & counter_bit)) - continue; - - ret = cpu_pmu.enable_counter(counter_mask); - if (ret < 0) - break; + for (i = 0; i < sizeof(counter_mask) * BITS_PER_BYTE; i++) { + if (counter_mask & (1UL << i)) { + ret = cpu_pmu.enable_counter(i); + if (ret < 0) { + kprintf("%s: enable failed(idx=%d)\n", + __func__, i); + break; + } + } } - - return ret < 0 ? ret : 0; + return ret; } int ihk_mc_perfctr_stop(unsigned long counter_mask) @@ -146,8 +181,7 @@ int ihk_mc_perfctr_reset(int counter) return 0; } -//int ihk_mc_perfctr_set(int counter, unsigned long val) -int ihk_mc_perfctr_set(int counter, long val) /* 0416_patchtemp */ +int ihk_mc_perfctr_set(int counter, long val) { // TODO[PMU]: 共通部でサンプリングレートの計算をして、設定するカウンタ値をvalに渡してくるようになると想定。サンプリングレートの扱いを見てから本実装。 uint32_t v = val; @@ -169,19 +203,31 @@ unsigned long ihk_mc_perfctr_read(int counter) return count; } -//int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status) -int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status) /* 0416_patchtemp */ +int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, + unsigned long pmc_status) { int ret; - ret = cpu_pmu.get_event_idx(cpu_pmu.num_events, pmc_status); - return ret; -} -/* 0416_patchtemp */ -/* ihk_mc_perfctr_fixed_init() stub added. */ -int ihk_mc_perfctr_fixed_init(int counter, int mode) -{ - return -1; + if (*type == PERF_TYPE_HARDWARE) { + switch (*config) { + case PERF_COUNT_HW_INSTRUCTIONS: + ret = cpu_pmu.map_event(*type, *config); + if (ret < 0) { + return -1; + } + *type = PERF_TYPE_RAW; + break; + default: + // Unexpected config + return -1; + } + } + else if (*type != PERF_TYPE_RAW) { + return -1; + } + ret = cpu_pmu.get_event_idx(get_per_cpu_pmu()->num_events, pmc_status, + *config); + return ret; } int ihk_mc_perf_counter_mask_check(unsigned long counter_mask) @@ -193,3 +239,9 @@ int ihk_mc_perf_get_num_counters(void) { return cpu_pmu.per_cpu[ihk_mc_get_processor_id()].num_events; } + +int ihk_mc_perfctr_set_extra(struct mc_perf_event *event) +{ + /* Nothing to do. */ + return 0; +} diff --git a/arch/arm64/kernel/perfctr_armv8pmu.c b/arch/arm64/kernel/perfctr_armv8pmu.c index 3bdcfa66..3ce970ed 100644 --- a/arch/arm64/kernel/perfctr_armv8pmu.c +++ b/arch/arm64/kernel/perfctr_armv8pmu.c @@ -5,6 +5,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #define BIT(nr) (1UL << (nr)) @@ -16,240 +23,204 @@ /* - * @ref.impl arch/arm64/kernel/perf_event.c + * @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c * Perf Events' indices */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + get_cpu_pmu()->num_events - 1) +#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + get_per_cpu_pmu()->num_events - 1) -#define ARMV8_MAX_COUNTERS 32 -#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) +/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h */ +#define ARMV8_PMU_MAX_COUNTERS 32 +#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) /* * ARMv8 low level PMU access */ /* - * @ref.impl arch/arm64/kernel/perf_event.c + * @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c * Perf Event to low level counters mapping */ #define ARMV8_IDX_TO_COUNTER(x) \ - (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) /* - * @ref.impl arch/arm64/kernel/perf_event.c + * @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h * Per-CPU PMCR: config reg */ -#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ -#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ -#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ -#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ -#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMCR_N_MASK 0x1f -#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMU_PMCR_N_MASK 0x1f +#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ /* - * @ref.impl arch/arm64/kernel/perf_event.c + * @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h * PMOVSR: counters overflow flag status reg */ -#define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK +#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK /* - * @ref.impl arch/arm64/kernel/perf_event.c + * @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ -#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ /* - * @ref.impl arch/arm64/kernel/perf_event.c + * @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h * Event filters for PMUv3 */ -#define ARMV8_EXCLUDE_EL1 (1 << 31) -#define ARMV8_EXCLUDE_EL0 (1 << 30) -#define ARMV8_INCLUDE_EL2 (1 << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30) +#define ARMV8_PMU_INCLUDE_EL2 (1 << 27) /* - * @ref.impl arch/arm64/kernel/perf_event.c - * ARMv8 PMUv3 Performance Events handling code. - * Common event types. + * @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h + * PMUSERENR: user enable reg */ -enum armv8_pmuv3_perf_types { - /* Required events. */ - ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, - ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, - ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, +#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ +#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ +#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ +#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ - /* At least one of the following is required. */ - ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, - ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, +/* + * @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h + * PMUv3 event types: required events + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 - /* Common architectural events. */ - ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, - ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, - ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, - ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, - ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, - ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, - ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, - ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, +/* + * @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c + * ARMv8 PMUv3 Performance Events handling code. + * Common event types (some are defined in asm/perf_event.h). + */ - /* Common microarchitectural events. */ - ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, - ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, - ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, - ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, - ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, - ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, - ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, - ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, - ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, - ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, - ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, -}; +/* At least one of the following is required. */ +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* Common architectural events. */ +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 + +/* Common microarchitectural events. */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30 + +/* @ref.impl linux-v4.15-rc3 include/linux/perf/arm_pmu.h */ #define HW_OP_UNSUPPORTED 0xFFFF - -/* @ref.impl arch/arm64/kernel/perf_event.c */ -#define C(_x) \ - PERF_COUNT_HW_CACHE_##_x - -/* @ref.impl arch/arm64/kernel/perf_event.c */ +#define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ + }, \ +} + +/* PMUv3 HW events mapping. */ + /* - * @ref.impl arch/arm64/kernel/perf_event.c - * PMUv3 HW events mapping. + * @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c + * ARMv8 Architectural defined events, not all of these may + * be supported on any given implementation. Undefined events will + * be disabled at run-time. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED, /* TODO[PMU]: PERF_COUNT_HW_REF_CPU_CYCLESはCentOSに無かったので確認.*/ + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */ static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -271,6 +242,9 @@ armpmu_map_cache_event(const unsigned (*cache_map) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + if (!cache_map) + return -ENOENT; + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) @@ -279,38 +253,43 @@ armpmu_map_cache_event(const unsigned (*cache_map) return ret; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */ static int -armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], uint64_t config) +armpmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX], + uint64_t config) { int mapping; if (config >= PERF_COUNT_HW_MAX) return -EINVAL; + if (!event_map) + return -ENOENT; + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */ static int armpmu_map_raw_event(uint32_t raw_event_mask, uint64_t config) { return (int)(config & raw_event_mask); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ -static int map_cpu_event(uint32_t type, uint64_t config, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - uint32_t raw_event_mask) +/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */ +static int +armpmu_map_event(uint32_t type, uint64_t config, + const unsigned int (*event_map)[PERF_COUNT_HW_MAX], + const unsigned int (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + uint32_t raw_event_mask) { switch (type) { case PERF_TYPE_HARDWARE: - return armpmu_map_event(event_map, config); + return armpmu_map_hw_event(event_map, config); case PERF_TYPE_HW_CACHE: return armpmu_map_cache_event(cache_map, config); case PERF_TYPE_RAW: @@ -319,239 +298,273 @@ static int map_cpu_event(uint32_t type, uint64_t config, return -ENOENT; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_counter_valid(int idx) { - return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; + return idx >= ARMV8_IDX_CYCLE_COUNTER && + idx <= ARMV8_IDX_COUNTER_LAST; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline uint32_t armv8pmu_getreset_flags(void) { uint32_t value; /* Read */ - asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); + value = read_sysreg(pmovsclr_el0); /* Write to clear flags */ - value &= ARMV8_OVSR_MASK; - asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); + value &= ARMV8_PMU_OVSR_MASK; + write_sysreg(value, pmovsclr_el0); return value; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_has_overflowed(uint32_t pmovsr) { - return pmovsr & ARMV8_OVERFLOWED_MASK; + return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ -static inline int armv8pmu_counter_has_overflowed(uint32_t pmnc, int idx) +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ +static int __armv8_pmuv3_map_event(uint32_t type, uint64_t config, + const unsigned int (*extra_event_map) + [PERF_COUNT_HW_MAX], + const unsigned int (*extra_cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]) { - int ret = 0; - uint32_t counter; + int hw_event_id; - if (!armv8pmu_counter_valid(idx)) { - ekprintf("CPU%u checking wrong counter %d overflow status\n", - ihk_mc_get_processor_id(), idx); - } else { - counter = ARMV8_IDX_TO_COUNTER(idx); - ret = pmnc & BIT(counter); + hw_event_id = armpmu_map_event(type, config, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); + + /* Onl expose micro/arch events supported by this PMU */ + if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) + && test_bit(hw_event_id, get_per_cpu_pmu()->pmceid_bitmap)) { + return hw_event_id; } - return ret; + return armpmu_map_event(type, config, extra_event_map, extra_cache_map, + ARMV8_PMU_EVTYPE_EVENT); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static int armv8_pmuv3_map_event(uint32_t type, uint64_t config) { - return map_cpu_event(type, config, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(type, config, NULL, NULL); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline uint32_t armv8pmu_pmcr_read(void) { - uint32_t val; - asm volatile("mrs %0, pmcr_el0" : "=r" (val)); - return val; + return read_sysreg(pmcr_el0); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline void armv8pmu_pmcr_write(uint32_t val) { - val &= ARMV8_PMCR_MASK; + val &= ARMV8_PMU_PMCR_MASK; isb(); - asm volatile("msr pmcr_el0, %0" :: "r" (val)); + write_sysreg(val, pmcr_el0); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_select_counter(int idx) { uint32_t counter; if (!armv8pmu_counter_valid(idx)) { - ekprintf("CPU%u selecting wrong PMNC counter %d\n", - ihk_mc_get_processor_id(), idx); + ekprintf("%s: The count_register#%d is not implemented.\n", + __func__, idx); return -EINVAL; } counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmselr_el0, %0" :: "r" (counter)); + write_sysreg(counter, pmselr_el0); isb(); return idx; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline uint32_t armv8pmu_read_counter(int idx) { uint32_t value = 0; - if (!armv8pmu_counter_valid(idx)) - ekprintf("CPU%u reading wrong counter %d\n", - ihk_mc_get_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); - else if (armv8pmu_select_counter(idx) == idx) - asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); + if (!armv8pmu_counter_valid(idx)) { + ekprintf("%s: The count_register#%d is not implemented.\n", + __func__, idx); + } + else if (idx == ARMV8_IDX_CYCLE_COUNTER) { + value = read_sysreg(pmccntr_el0); + } + else if (armv8pmu_select_counter(idx) == idx) { + value = read_sysreg(pmxevcntr_el0); + } return value; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline void armv8pmu_write_counter(int idx, uint32_t value) { - if (!armv8pmu_counter_valid(idx)) - ekprintf("CPU%u writing wrong counter %d\n", - ihk_mc_get_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("msr pmccntr_el0, %0" :: "r" (value)); - else if (armv8pmu_select_counter(idx) == idx) - asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); + if (!armv8pmu_counter_valid(idx)) { + ekprintf("%s: The count_register#%d is not implemented.\n", + __func__, idx); + } + else if (idx == ARMV8_IDX_CYCLE_COUNTER) { + /* + * Set the upper 32bits as this is a 64bit counter but we only + * count using the lower 32bits and we want an interrupt when + * it overflows. + */ + uint64_t value64 = 0xffffffff00000000ULL | value; + + write_sysreg(value64, pmccntr_el0); + } + else if (armv8pmu_select_counter(idx) == idx) { + write_sysreg(value, pmxevcntr_el0); + } } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_enable_intens(int idx) { uint32_t counter; if (!armv8pmu_counter_valid(idx)) { - ekprintf("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - ihk_mc_get_processor_id(), idx); + ekprintf("%s: The count_register#%d is not implemented.\n", + __func__, idx); return -EINVAL; } counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmintenset_el1); return idx; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_disable_intens(int idx) { - uint32_t counter; + uint32_t counter = ARMV8_IDX_TO_COUNTER(idx); - if (!armv8pmu_counter_valid(idx)) { - ekprintf("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - ihk_mc_get_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmintenclr_el1); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmovsclr_el0); isb(); + return idx; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ -static int armv8pmu_set_event_filter(unsigned long* config_base, int mode) +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ +static int armv8pmu_set_event_filter(unsigned long *config_base, int mode) { - if (!(mode & PERFCTR_USER_MODE)) { - *config_base |= ARMV8_EXCLUDE_EL0; - } + /* exclude_idle is unused mode, unsupported */ +// if (attr->exclude_idle) +// return -EPERM; - if (!(mode & PERFCTR_KERNEL_MODE)) { - *config_base |= ARMV8_EXCLUDE_EL1; - } + /* + * If we're running in hyp mode, then we *are* the hypervisor. + * Therefore we ignore exclude_hv in this configuration, since + * there's no hypervisor to sample anyway. This is consistent + * with other architectures (x86 and Power). + */ + if (is_kernel_in_hyp_mode()) { + if (mode & PERFCTR_KERNEL_MODE) + *config_base |= ARMV8_PMU_INCLUDE_EL2; + } else { + if (!(mode & PERFCTR_KERNEL_MODE)) + *config_base |= ARMV8_PMU_EXCLUDE_EL1; + /* exclude_hv is unused mode, unsupported */ +// if (!attr->exclude_hv) +// config_base |= ARMV8_PMU_INCLUDE_EL2; + } + if (!(mode & PERFCTR_USER_MODE)) + *config_base |= ARMV8_PMU_EXCLUDE_EL0; - if (0) { - /* 共通部がexclude_hvを無視してくるので常にexcludeとする。 */ - *config_base |= ARMV8_INCLUDE_EL2; - } return 0; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline void armv8pmu_write_evtype(int idx, uint32_t val) { if (armv8pmu_select_counter(idx) == idx) { - val &= ARMV8_EVTYPE_MASK; - asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); + val &= ARMV8_PMU_EVTYPE_MASK; + write_sysreg(val, pmxevtyper_el0); } } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_enable_counter(int idx) { uint32_t counter; if (!armv8pmu_counter_valid(idx)) { - ekprintf("CPU%u enabling wrong PMNC counter %d\n", - ihk_mc_get_processor_id(), idx); + ekprintf("%s: The count_register#%d is not implemented.\n", + __func__, idx); return -EINVAL; } counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmcntenset_el0); return idx; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static inline int armv8pmu_disable_counter(int idx) { uint32_t counter; if (!armv8pmu_counter_valid(idx)) { - ekprintf("CPU%u disabling wrong PMNC counter %d\n", - ihk_mc_get_processor_id(), idx); + ekprintf("%s: The count_register#%d is not implemented.\n", + __func__, idx); return -EINVAL; } counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); + write_sysreg(BIT(counter), pmcntenclr_el0); return idx; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ +static ihk_spinlock_t pmu_lock = SPIN_LOCK_UNLOCKED; static int armv8pmu_start(void) { - /* Enable user-mode access to counters. */ - asm volatile("msr pmuserenr_el0, %0" :: "r"(1)); + unsigned long flags; + flags = ihk_mc_spinlock_lock(&pmu_lock); /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + ihk_mc_spinlock_unlock(&pmu_lock, flags); + return 0; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static void armv8pmu_stop(void) { - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); + unsigned long flags; - /* Disable user-mode access to counters. */ - asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); + flags = ihk_mc_spinlock_lock(&pmu_lock); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); + ihk_mc_spinlock_unlock(&pmu_lock, flags); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ static void armv8pmu_disable_event(int idx) { + unsigned long flags; + + /* + * Disable counter and interrupt + */ + flags = ihk_mc_spinlock_lock(&pmu_lock); + /* * Disable counter */ @@ -561,54 +574,81 @@ static void armv8pmu_disable_event(int idx) * Disable interrupt for this counter */ armv8pmu_disable_intens(idx); + + ihk_mc_spinlock_unlock(&pmu_lock, flags); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ -static void armv8pmu_reset(void* info) +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ +static void armv8pmu_reset(void *info) { - struct arm_pmu* cpu_pmu = (struct arm_pmu*)info; - uint32_t idx, nb_cnt = cpu_pmu->num_events; - + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; + uint32_t idx, nb_cnt = + cpu_pmu->per_cpu[ihk_mc_get_processor_id()].num_events; + /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) - armv8pmu_disable_event(idx); - - /* Initialize & Reset PMNC: C and P bits. */ - armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); - - /* Disable access from userspace. */ - asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv8pmu_disable_counter(idx); + armv8pmu_disable_intens(idx); + } + + /* + * Initialize & Reset PMNC. Request overflow interrupt for + * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + */ + armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | + ARMV8_PMU_PMCR_LC); } -/* @ref.impl arch/arm64/kernel/perf_event.c */ -static int armv8pmu_get_event_idx(int num_events, unsigned long used_mask) +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */ +static int armv8pmu_get_event_idx(int num_events, unsigned long used_mask, + unsigned long config) { int idx; - for (idx = ARMV8_IDX_COUNTER0; idx < num_events; ++idx) { - if (!(used_mask & (1UL << idx))) { - return idx; - } + unsigned long evtype = config & ARMV8_PMU_EVTYPE_EVENT; + + /* Always prefer to place a cycle counter into the cycle counter. */ + if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { + if (!(used_mask & (1UL << ARMV8_IDX_CYCLE_COUNTER))) + return ARMV8_IDX_CYCLE_COUNTER; } + + /* + * Otherwise use events counters + */ + for (idx = ARMV8_IDX_COUNTER0; idx < num_events; ++idx) { + if (!(used_mask & (1UL << idx))) + return idx; + } + /* The counters are all in use. */ return -EAGAIN; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ -static uint32_t armv8pmu_read_num_pmnc_events(void) +/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c:__armv8pmu_probe_pmu() */ +/* Extract get num_events processing. */ +static uint32_t armv8pmu_read_num_pmnc_events(void) { - uint32_t nb_cnt; + uint32_t num_events = 0; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) + & ARMV8_PMU_PMCR_N_MASK; - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + /* Add the CPU cycles counter */ + num_events += 1; + + return num_events; } -/* @ref.impl arch/arm64/kernel/perf_event.c */ static void armv8pmu_handle_irq(void *priv) { + struct siginfo info; uint32_t pmovsr; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + long irqstate; + struct mckfd *fdp; + struct pt_regs *regs = (struct pt_regs *)priv; /* * Get and reset the IRQ flags @@ -622,8 +662,57 @@ static void armv8pmu_handle_irq(void *priv) return; /* - * TODO[PMU]: Handle the counter(s) overflow(s) + * Handle the counter(s) overflow(s) */ + /* same as x86_64 mckernel */ + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for (fdp = proc->mckfd; fdp; fdp = fdp->next) { + if (fdp->sig_no > 0) + break; + } + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + + if (fdp) { + memset(&info, '\0', sizeof(info)); + info.si_signo = fdp->sig_no; + info._sifields._sigfault.si_addr = (void *)regs->pc; + info._sifields._sigpoll.si_fd = fdp->fd; + set_signal(fdp->sig_no, regs, &info); + } + else { + set_signal(SIGIO, regs, NULL); + } +} + +static void armv8pmu_enable_user_access_pmu_regs(void) +{ + uint32_t value = 0; + + value = read_sysreg(pmuserenr_el0); + write_sysreg(value | (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR), + pmuserenr_el0); +} + +static void armv8pmu_disable_user_access_pmu_regs(void) +{ + uint32_t value = 0; + + value = read_sysreg(pmuserenr_el0); + write_sysreg(value & ~(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR), + pmuserenr_el0); +} + +static void armv8pmu_create_pmceid_bitmap(unsigned long *bitmap, uint32_t nbits) +{ + uint32_t pmceid[2]; + + memset(bitmap, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); + + pmceid[0] = read_sysreg(pmceid0_el0); + bitmap[0] = (unsigned long)pmceid[0]; + + pmceid[1] = read_sysreg(pmceid1_el0); + bitmap[0] |= (unsigned long)pmceid[1] << 32; } static struct ihk_mc_interrupt_handler armv8pmu_handler = { @@ -633,19 +722,30 @@ static struct ihk_mc_interrupt_handler armv8pmu_handler = { int armv8pmu_init(struct arm_pmu* cpu_pmu) { - cpu_pmu->num_events = armv8pmu_read_num_pmnc_events(); cpu_pmu->read_counter = armv8pmu_read_counter; cpu_pmu->write_counter = armv8pmu_write_counter; - cpu_pmu->set_event_filter = armv8pmu_set_event_filter; - cpu_pmu->write_evtype = armv8pmu_write_evtype; - cpu_pmu->enable_intens = armv8pmu_enable_intens; - cpu_pmu->disable_intens = armv8pmu_disable_intens; - cpu_pmu->enable_counter = armv8pmu_enable_counter; - cpu_pmu->disable_counter = armv8pmu_disable_counter; + cpu_pmu->reset = armv8pmu_reset; cpu_pmu->enable_pmu = armv8pmu_start; cpu_pmu->disable_pmu = armv8pmu_stop; + cpu_pmu->enable_counter = armv8pmu_enable_counter; + cpu_pmu->disable_counter = armv8pmu_disable_counter; + cpu_pmu->enable_intens = armv8pmu_enable_intens; + cpu_pmu->disable_intens = armv8pmu_disable_intens; + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->write_evtype = armv8pmu_write_evtype; cpu_pmu->get_event_idx = armv8pmu_get_event_idx; cpu_pmu->map_event = armv8_pmuv3_map_event; + cpu_pmu->enable_user_access_pmu_regs = + armv8pmu_enable_user_access_pmu_regs; + cpu_pmu->disable_user_access_pmu_regs = + armv8pmu_disable_user_access_pmu_regs; cpu_pmu->handler = &armv8pmu_handler; return 0; } + +void armv8pmu_per_cpu_init(struct per_cpu_arm_pmu *per_cpu) +{ + per_cpu->num_events = armv8pmu_read_num_pmnc_events(); + armv8pmu_create_pmceid_bitmap(per_cpu->pmceid_bitmap, + ARMV8_PMUV3_MAX_COMMON_EVENTS); +} diff --git a/arch/arm64/kernel/proc.S b/arch/arm64/kernel/proc.S index bdc382d8..bbab38b4 100644 --- a/arch/arm64/kernel/proc.S +++ b/arch/arm64/kernel/proc.S @@ -30,7 +30,7 @@ */ #if defined(CONFIG_HAS_NMI) #include -ENTRY(cpu_do_idle) +ENTRY(__cpu_do_idle) mrs x0, daif // save I bit msr daifset, #2 // set I bit mrs_s x1, ICC_PMR_EL1 // save PMR @@ -41,13 +41,13 @@ ENTRY(cpu_do_idle) msr_s ICC_PMR_EL1, x1 // restore PMR msr daif, x0 // restore I bit ret -ENDPROC(cpu_do_idle) +ENDPROC(__cpu_do_idle) #else /* defined(CONFIG_HAS_NMI) */ -ENTRY(cpu_do_idle) +ENTRY(__cpu_do_idle) dsb sy // WFI may enter a low-power mode wfi ret -ENDPROC(cpu_do_idle) +ENDPROC(__cpu_do_idle) #endif /* defined(CONFIG_HAS_NMI) */ /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 306cece6..0bbd909d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -23,7 +23,6 @@ #define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) extern void save_debugreg(unsigned long *debugreg); -extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern int interrupt_from_user(void *); enum aarch64_regset { @@ -948,23 +947,32 @@ void ptrace_report_signal(struct thread *thread, int sig) } mcs_rwlock_writer_lock(&proc->update_lock, &lock); - if(!(proc->ptrace & PT_TRACED)){ + if (!(thread->ptrace & PT_TRACED)) { mcs_rwlock_writer_unlock(&proc->update_lock, &lock); return; } - thread->exit_status = sig; + /* Transition thread state */ - proc->status = PS_DELAY_TRACED; + thread->exit_status = sig; thread->status = PS_TRACED; - proc->ptrace &= ~PT_TRACE_SYSCALL; - if (sig == SIGSTOP || sig == SIGTSTP || - sig == SIGTTIN || sig == SIGTTOU) { - proc->signal_flags |= SIGNAL_STOP_STOPPED; - } else { - proc->signal_flags &= ~SIGNAL_STOP_STOPPED; - } - parent_pid = proc->parent->pid; + thread->ptrace &= ~PT_TRACE_SYSCALL; save_debugreg(thread->ptrace_debugreg); + if (sig == SIGSTOP || sig == SIGTSTP || + sig == SIGTTIN || sig == SIGTTOU) { + thread->signal_flags |= SIGNAL_STOP_STOPPED; + } + else { + thread->signal_flags &= ~SIGNAL_STOP_STOPPED; + } + + if (thread == proc->main_thread) { + proc->status = PS_DELAY_TRACED; + parent_pid = proc->parent->pid; + } + else { + parent_pid = thread->report_proc->pid; + waitq_wakeup(&thread->report_proc->waitpid_q); + } mcs_rwlock_writer_unlock(&proc->update_lock, &lock); memset(&info, '\0', sizeof info); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 21c4966b..d3bd981a 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include #include extern void ptrace_report_signal(struct thread *thread, int sig); @@ -55,9 +55,13 @@ static int cpuid_head = 1; extern int num_processors; -int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { +int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) +{ int min_queue_len = -1; int i, min_cpu = -1; + unsigned long irqstate; + + irqstate = ihk_mc_spinlock_lock(&runq_reservation_lock); /* cpu_head lock */ ihk_mc_spinlock_lock_noirq(&cpuid_head_lock); @@ -65,7 +69,6 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { /* Find the first allowed core with the shortest run queue */ for (i = 0; i < num_processors; cpuid_head++, i++) { struct cpu_local_var *v; - unsigned long irqstate; /* cpuid_head over cpu_info->ncpus, cpuid_head = BSP reset. */ if (cpuid_head >= num_processors) { @@ -75,12 +78,15 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { if (!CPU_ISSET(cpuid_head, cpu_set)) continue; v = get_cpu_local_var(cpuid_head); - irqstate = ihk_mc_spinlock_lock(&v->runq_lock); - if (min_queue_len == -1 || v->runq_len < min_queue_len) { - min_queue_len = v->runq_len; + ihk_mc_spinlock_lock_noirq(&v->runq_lock); + dkprintf("%s: cpu=%d,runq_len=%d,runq_reserved=%d\n", + __func__, cpuid_head, v->runq_len, v->runq_reserved); + if (min_queue_len == -1 || + v->runq_len + v->runq_reserved < min_queue_len) { + min_queue_len = v->runq_len + v->runq_reserved; min_cpu = cpuid_head; } - ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + ihk_mc_spinlock_unlock_noirq(&v->runq_lock); if (min_queue_len == 0) { cpuid_head++; @@ -94,7 +100,11 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) { if (min_cpu != -1) { if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED) get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED; + __sync_fetch_and_add(&get_cpu_local_var(min_cpu)->runq_reserved, + 1); } + ihk_mc_spinlock_unlock(&runq_reservation_lock, irqstate); + return min_cpu; } @@ -139,8 +149,6 @@ SYSCALL_DECLARE(rt_sigaction) struct k_sigaction new_sa, old_sa; int rc; - if(sig == SIGKILL || sig == SIGSTOP || sig <= 0 || sig > SIGRTMAX) - return -EINVAL; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; @@ -536,8 +544,8 @@ SYSCALL_DECLARE(rt_sigreturn) regs->regs[0] = ksigsp.sigrc; clear_single_step(thread); set_signal(SIGTRAP, regs, &info); - check_signal(0, regs, 0); check_need_resched(); + check_signal(0, regs, -1); } return ksigsp.sigrc; @@ -552,7 +560,6 @@ bad_frame: } extern struct cpu_local_var *clv; -extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern void interrupt_syscall(struct thread *, int sig); extern int num_processors; @@ -654,9 +661,15 @@ extern void coredump(struct thread *thread, void *regs); static int isrestart(int syscallno, unsigned long rc, int sig, int restart) { - if(syscallno == 0 || rc != -EINTR) + if (sig == SIGKILL || sig == SIGSTOP) return 0; + if (syscallno < 0 || rc != -EINTR) + return 0; + + if (sig == SIGCHLD) + return 1; + /* * The following interfaces are never restarted after being interrupted * by a signal handler, regardless of the use of SA_RESTART @@ -676,7 +689,7 @@ isrestart(int syscallno, unsigned long rc, int sig, int restart) * poll(2) -> ppoll * select(2) -> pselect6 */ - switch(syscallno){ + switch (syscallno) { case __NR_rt_sigsuspend: case __NR_rt_sigtimedwait: case __NR_epoll_pwait: @@ -692,9 +705,7 @@ isrestart(int syscallno, unsigned long rc, int sig, int restart) return 0; } - if(sig == SIGCHLD) - return 1; - if(restart) + if (restart) return 1; return 0; } @@ -1071,7 +1082,7 @@ static int setup_rt_frame(int usig, unsigned long rc, int to_restart, return err; } -void +int do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int syscallno) { struct pt_regs *regs = regs0; @@ -1083,14 +1094,15 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi int ptraceflag = 0; struct mcs_rwlock_node_irqsave lock; struct mcs_rwlock_node_irqsave mcs_rw_node; + int restart = 0; for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1); dkprintf("do_signal(): tid=%d, pid=%d, sig=%d\n", thread->tid, proc->pid, sig); orgsig = sig; - if((proc->ptrace & PT_TRACED) && - pending->ptracecont == 0 && - sig != SIGKILL) { + if ((thread->ptrace & PT_TRACED) && + pending->ptracecont == 0 && + sig != SIGKILL) { ptraceflag = 1; sig = SIGSTOP; } @@ -1108,28 +1120,24 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi if(k->sa.sa_handler == SIG_IGN){ kfree(pending); mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); - return; + goto out; } else if(k->sa.sa_handler){ - /* save signal frame */ - int to_restart = 0; - // check syscall to have restart ? - to_restart = isrestart(syscallno, rc, sig, k->sa.sa_flags & SA_RESTART); - if(syscallno != 0 && rc == -EINTR && sig == SIGCHLD) { - to_restart = 1; - } - if(to_restart == 1) { + restart = isrestart(syscallno, rc, sig, + k->sa.sa_flags & SA_RESTART); + if (restart == 1) { /* Prepare for system call restart. */ regs->regs[0] = regs->orig_x0; } - if (setup_rt_frame(sig, rc, to_restart, syscallno, k, pending, regs, thread)) { + if (setup_rt_frame(sig, rc, restart, syscallno, k, pending, + regs, thread)) { kfree(pending); mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); kprintf("do_signal,page_fault_thread_vm failed\n"); terminate(0, sig); - return; + goto out; } // check signal handler is ONESHOT @@ -1148,13 +1156,14 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi }; clear_single_step(thread); set_signal(SIGTRAP, regs, &info); - check_signal(0, regs, 0); check_need_resched(); + check_signal(0, regs, -1); } } else { int coredumped = 0; siginfo_t info; + int ptc = pending->ptracecont; if(ptraceflag){ if(thread->ptrace_recvsig) @@ -1181,22 +1190,37 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi info.si_code = CLD_STOPPED; info._sifields._sigchld.si_pid = thread->proc->pid; info._sifields._sigchld.si_status = (sig << 8) | 0x7f; - do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0); - dkprintf("do_signal,SIGSTOP,changing state\n"); + if (ptc == 2 && + thread != thread->proc->main_thread) { + thread->signal_flags = + SIGNAL_STOP_STOPPED; + thread->status = PS_STOPPED; + thread->exit_status = SIGSTOP; + do_kill(thread, + thread->report_proc->pid, -1, + SIGCHLD, &info, 0); + waitq_wakeup( + &thread->report_proc->waitpid_q); + } + else { + /* Update thread state in fork tree */ + mcs_rwlock_writer_lock( + &proc->update_lock, &lock); + proc->group_exit_status = SIGSTOP; - /* Update thread state in fork tree */ - mcs_rwlock_writer_lock(&proc->update_lock, &lock); - proc->group_exit_status = SIGSTOP; + /* Reap and set new signal_flags */ + proc->main_thread->signal_flags = + SIGNAL_STOP_STOPPED; - /* Reap and set new signal_flags */ - proc->signal_flags = SIGNAL_STOP_STOPPED; + proc->status = PS_DELAY_STOPPED; + thread->status = PS_STOPPED; + mcs_rwlock_writer_unlock( + &proc->update_lock, &lock); - proc->status = PS_DELAY_STOPPED; - thread->status = PS_STOPPED; - mcs_rwlock_writer_unlock(&proc->update_lock, &lock); - - dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n", - proc->pid, thread->tid); + do_kill(thread, + thread->proc->parent->pid, -1, + SIGCHLD, &info, 0); + } /* Sleep */ schedule(); dkprintf("SIGSTOP(): woken up\n"); @@ -1204,16 +1228,28 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi break; case SIGTRAP: dkprintf("do_signal,SIGTRAP\n"); - if(!(proc->ptrace & PT_TRACED)) { + if (!(thread->ptrace & PT_TRACED)) { goto core; } /* Update thread state in fork tree */ - mcs_rwlock_writer_lock(&proc->update_lock, &lock); thread->exit_status = SIGTRAP; - proc->status = PS_DELAY_TRACED; thread->status = PS_TRACED; - mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + if (thread == proc->main_thread) { + mcs_rwlock_writer_lock(&proc->update_lock, + &lock); + proc->group_exit_status = SIGTRAP; + proc->status = PS_DELAY_TRACED; + mcs_rwlock_writer_unlock(&proc->update_lock, + &lock); + do_kill(thread, thread->proc->parent->pid, -1, + SIGCHLD, &info, 0); + } + else { + do_kill(thread, thread->report_proc->pid, -1, + SIGCHLD, &info, 0); + waitq_wakeup(&thread->report_proc->waitpid_q); + } /* Sleep */ dkprintf("do_signal,SIGTRAP,sleeping\n"); @@ -1228,7 +1264,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi info._sifields._sigchld.si_pid = proc->pid; info._sifields._sigchld.si_status = 0x0000ffff; do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0); - proc->signal_flags = SIGNAL_STOP_CONTINUED; + proc->main_thread->signal_flags = SIGNAL_STOP_CONTINUED; proc->status = PS_RUNNING; dkprintf("do_signal,SIGCONT,do nothing\n"); break; @@ -1249,6 +1285,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi break; case SIGCHLD: case SIGURG: + case SIGWINCH: break; default: dkprintf("do_signal,default,terminate,sig=%d\n", sig); @@ -1256,6 +1293,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi break; } } +out: + return restart; } static struct sig_pending * @@ -1272,28 +1311,34 @@ getsigpending(struct thread *thread, int delflag){ lock = &thread->sigcommon->lock; head = &thread->sigcommon->sigpending; for(;;) { - if (delflag) + if (delflag) { mcs_rwlock_writer_lock(lock, &mcs_rw_node); - else + } + else { mcs_rwlock_reader_lock(lock, &mcs_rw_node); + } list_for_each_entry_safe(pending, next, head, list){ if(!(pending->sigmask.__val[0] & w)){ if(delflag) list_del(&pending->list); - if (delflag) + if (delflag) { mcs_rwlock_writer_unlock(lock, &mcs_rw_node); - else + } + else { mcs_rwlock_reader_unlock(lock, &mcs_rw_node); + } return pending; } } - if (delflag) + if (delflag) { mcs_rwlock_writer_unlock(lock, &mcs_rw_node); - else + } + else { mcs_rwlock_reader_unlock(lock, &mcs_rw_node); + } if(lock == &thread->sigpendinglock) return NULL; @@ -1308,6 +1353,11 @@ getsigpending(struct thread *thread, int delflag){ struct sig_pending * hassigpending(struct thread *thread) { + if (list_empty(&thread->sigpending) && + list_empty(&thread->sigcommon->sigpending)) { + return NULL; + } + return getsigpending(thread, 0); } @@ -1374,6 +1424,11 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled) goto out; } + if (list_empty(&thread->sigpending) && + list_empty(&thread->sigcommon->sigpending)) { + goto out; + } + for(;;){ /* When this function called from check_signal_irq_disabled, * return with interrupt invalid. @@ -1390,7 +1445,9 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled) if (irq_disabled == 1) { cpu_restore_interrupt(irqstate); } - do_signal(rc, regs, thread, pending, num); + if (do_signal(rc, regs, thread, pending, num)) { + num = -1; + } } out: @@ -1556,15 +1613,21 @@ done: mcs_rwlock_reader_lock_noirq(&tproc->update_lock, &updatelock); savelock = &tthread->sigpendinglock; head = &tthread->sigpending; - if(sig == SIGKILL || - (tproc->status != PS_EXITED && - tproc->status != PS_ZOMBIE && - tthread->status != PS_EXITED)){ - hold_thread(tthread); + mcs_rwlock_reader_lock_noirq(&tproc->threads_lock, &lock); + if (tthread->status != PS_EXITED && + (sig == SIGKILL || + (tproc->status != PS_EXITED && + tproc->status != PS_ZOMBIE))) { + if ((rc = hold_thread(tthread))) { + kprintf("%s: ERROR hold_thread returned %d,tid=%d\n", + __func__, rc, tthread->tid); + tthread = NULL; + } } else{ tthread = NULL; } + mcs_rwlock_reader_unlock_noirq(&tproc->threads_lock, &lock); mcs_rwlock_reader_unlock_noirq(&tproc->update_lock, &updatelock); mcs_rwlock_reader_unlock_noirq(&thash->lock[hash], &lock); } @@ -1591,7 +1654,9 @@ done: } if (tthread->uti_state == UTI_STATE_RUNNING_IN_LINUX) { - interrupt_syscall(tthread, sig); + if (!tthread->proc->nohost) { + interrupt_syscall(tthread, sig); + } release_thread(tthread); return 0; } @@ -1605,10 +1670,10 @@ done: in check_signal */ rc = 0; k = tthread->sigcommon->action + sig - 1; - if((sig != SIGKILL && (tproc->ptrace & PT_TRACED)) || - (k->sa.sa_handler != SIG_IGN && - (k->sa.sa_handler != NULL || - (sig != SIGCHLD && sig != SIGURG)))){ + if ((sig != SIGKILL && (tthread->ptrace & PT_TRACED)) || + (k->sa.sa_handler != SIG_IGN && + (k->sa.sa_handler != NULL || + (sig != SIGCHLD && sig != SIGURG)))) { struct sig_pending *pending = NULL; if (sig < SIGRTMIN) { // SIGRTMIN - SIGRTMAX list_for_each_entry(pending, head, list){ @@ -1626,6 +1691,7 @@ done: rc = -ENOMEM; } else{ + memset(pending, 0, sizeof(struct sig_pending)); pending->sigmask.__val[0] = mask; memcpy(&pending->info, info, sizeof(siginfo_t)); pending->ptracecont = ptracecont; @@ -1690,21 +1756,24 @@ set_signal(int sig, void *regs0, siginfo_t *info) ihk_mc_user_context_t *regs = regs0; struct thread *thread = cpu_local_var(current); - if(thread == NULL || thread->proc->pid == 0) + if (thread == NULL || thread->proc->pid == 0) return; - if((__sigmask(sig) & thread->sigmask.__val[0]) || - !interrupt_from_user(regs)){ + if (!interrupt_from_user(regs)) { + ihk_mc_debug_show_interrupt_context(regs); + panic("panic: kernel mode signal"); + } + + if ((__sigmask(sig) & thread->sigmask.__val[0])) { coredump(thread, regs0); terminate(0, sig | 0x80); } - do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0); } SYSCALL_DECLARE(mmap) { - const int supported_flags = 0 + const unsigned int supported_flags = 0 | MAP_SHARED // 0x01 | MAP_PRIVATE // 0x02 | MAP_FIXED // 0x10 @@ -1712,7 +1781,7 @@ SYSCALL_DECLARE(mmap) | MAP_LOCKED // 0x2000 | MAP_POPULATE // 0x8000 | MAP_HUGETLB // 00040000 - | (0x3F << MAP_HUGE_SHIFT) // FC000000 + | (0x3FU << MAP_HUGE_SHIFT) // FC000000 ; const int ignored_flags = 0 | MAP_DENYWRITE // 0x0800 @@ -1870,7 +1939,23 @@ out: void save_uctx(void *uctx, struct pt_regs *regs) { - /* TODO: skeleton for UTI */ + struct trans_uctx { + volatile int cond; + int fregsize; + struct user_pt_regs regs; + unsigned long tls_baseaddr; + } *ctx = uctx; + + if (!regs) { + regs = current_pt_regs(); + } + + ctx->cond = 0; + ctx->fregsize = 0; + ctx->regs = regs->user_regs; + asm volatile( + " mrs %0, tpidr_el0" + : "=r" (ctx->tls_baseaddr)); } int do_process_vm_read_writev(int pid, @@ -1914,7 +1999,7 @@ int do_process_vm_read_writev(int pid, range = lookup_process_memory_range(lthread->vm, (uintptr_t)local_iov, - (uintptr_t)(local_iov + liovcnt * sizeof(struct iovec))); + (uintptr_t)(local_iov + liovcnt)); if (!range) { ret = -EFAULT; @@ -1923,7 +2008,7 @@ int do_process_vm_read_writev(int pid, range = lookup_process_memory_range(lthread->vm, (uintptr_t)remote_iov, - (uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec))); + (uintptr_t)(remote_iov + riovcnt)); if (!range) { ret = -EFAULT; @@ -2196,110 +2281,100 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) if (nr_cpus == 1) { switch (cpu_index) { - case 0: - memcpy(mpsr->virt_addr, mpsr->user_virt_addr, - sizeof(void *) * count); - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); - memset(mpsr->ptep, 0, sizeof(pte_t) * count); - memset(mpsr->status, 0, sizeof(int) * count); - memset(mpsr->nr_pages, 0, sizeof(int) * count); - memset(mpsr->dst_phys, 0, - sizeof(unsigned long) * count); - mpsr->nodes_ready = 1; - break; + case 0: + memcpy(mpsr->virt_addr, mpsr->user_virt_addr, + sizeof(void *) * count); + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + memset(mpsr->ptep, 0, sizeof(pte_t) * count); + memset(mpsr->status, 0, sizeof(int) * count); + memset(mpsr->nr_pages, 0, sizeof(int) * count); + memset(mpsr->dst_phys, 0, + sizeof(unsigned long) * count); + mpsr->nodes_ready = 1; + break; - default: - break; + default: + break; } } else if (nr_cpus > 1 && nr_cpus < 4) { switch (cpu_index) { - case 0: - memcpy(mpsr->virt_addr, mpsr->user_virt_addr, - sizeof(void *) * count); - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - case 1: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); - memset(mpsr->ptep, 0, sizeof(pte_t) * count); - memset(mpsr->status, 0, sizeof(int) * count); - memset(mpsr->nr_pages, 0, sizeof(int) * count); - memset(mpsr->dst_phys, 0, - sizeof(unsigned long) * count); - mpsr->nodes_ready = 1; - break; + case 0: + memcpy(mpsr->virt_addr, mpsr->user_virt_addr, + sizeof(void *) * count); + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + mpsr->nodes_ready = 1; + break; + case 1: + memset(mpsr->ptep, 0, sizeof(pte_t) * count); + memset(mpsr->status, 0, sizeof(int) * count); + memset(mpsr->nr_pages, 0, sizeof(int) * count); + memset(mpsr->dst_phys, 0, + sizeof(unsigned long) * count); + break; - default: - break; + default: + break; } } - else if (nr_cpus >= 4 && nr_cpus < 8) { + else if (nr_cpus >= 4 && nr_cpus < 7) { switch (cpu_index) { - case 0: - memcpy(mpsr->virt_addr, mpsr->user_virt_addr, - sizeof(void *) * count); - break; - case 1: - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - break; - case 2: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); - mpsr->nodes_ready = 1; - break; - case 3: - memset(mpsr->ptep, 0, sizeof(pte_t) * count); - memset(mpsr->status, 0, sizeof(int) * count); - memset(mpsr->nr_pages, 0, sizeof(int) * count); - memset(mpsr->dst_phys, 0, - sizeof(unsigned long) * count); - break; + case 0: + memcpy(mpsr->virt_addr, mpsr->user_virt_addr, + sizeof(void *) * count); + break; + case 1: + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + mpsr->nodes_ready = 1; + break; + case 2: + memset(mpsr->ptep, 0, sizeof(pte_t) * count); + memset(mpsr->status, 0, sizeof(int) * count); + break; + case 3: + memset(mpsr->nr_pages, 0, sizeof(int) * count); + memset(mpsr->dst_phys, 0, + sizeof(unsigned long) * count); + break; - default: - break; + default: + break; } } - else if (nr_cpus >= 8) { + else { switch (cpu_index) { - case 0: - memcpy(mpsr->virt_addr, mpsr->user_virt_addr, - sizeof(void *) * (count / 2)); - break; - case 1: - memcpy(mpsr->virt_addr + (count / 2), - mpsr->user_virt_addr + (count / 2), - sizeof(void *) * (count / 2)); - break; - case 2: - memcpy(mpsr->status, mpsr->user_status, - sizeof(int) * count); - break; - case 3: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); - mpsr->nodes_ready = 1; - break; - case 4: - memset(mpsr->ptep, 0, sizeof(pte_t) * count); - break; - case 5: - memset(mpsr->status, 0, sizeof(int) * count); - break; - case 6: - memset(mpsr->nr_pages, 0, sizeof(int) * count); - break; - case 7: - memset(mpsr->dst_phys, 0, - sizeof(unsigned long) * count); - break; - - default: - break; + case 0: + memcpy(mpsr->virt_addr, mpsr->user_virt_addr, + sizeof(void *) * (count / 2)); + break; + case 1: + memcpy(mpsr->virt_addr + (count / 2), + mpsr->user_virt_addr + (count / 2), + sizeof(void *) * (count / 2)); + break; + case 2: + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + mpsr->nodes_ready = 1; + break; + case 3: + memset(mpsr->ptep, 0, sizeof(pte_t) * count); + break; + case 4: + memset(mpsr->status, 0, sizeof(int) * count); + break; + case 5: + memset(mpsr->nr_pages, 0, sizeof(int) * count); + break; + case 6: + memset(mpsr->dst_phys, 0, + sizeof(unsigned long) * count); + break; + default: + break; } } @@ -2503,15 +2578,21 @@ out: return mpsr->phase_ret; } -time_t time(void) { +time_t time(void) +{ struct timespec ats; + time_t ret = 0; if (gettime_local_support) { calculate_time_from_tsc(&ats); - return ats.tv_sec; + ret = ats.tv_sec; } + return ret; +} - return (time_t)0; +SYSCALL_DECLARE(time) +{ + return time(); } /*** End of File ***/ diff --git a/arch/arm64/kernel/timer.c b/arch/arm64/kernel/timer.c new file mode 100644 index 00000000..55cbb4cd --- /dev/null +++ b/arch/arm64/kernel/timer.c @@ -0,0 +1,203 @@ +/* timer.c COPYRIGHT FUJITSU LIMITED 2018 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG_PRINT_TIMER + +#ifdef DEBUG_PRINT_TIMER +#define dkprintf kprintf +#define ekprintf kprintf +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf kprintf +#endif + +static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 }; +static int timer_intrid = INTRID_VIRT_TIMER; + +static void arch_timer_virt_reg_write(enum arch_timer_reg reg, uint32_t val); +static void (*arch_timer_reg_write)(enum arch_timer_reg, uint32_t) = + arch_timer_virt_reg_write; + +static uint32_t arch_timer_virt_reg_read(enum arch_timer_reg reg); +static uint32_t (*arch_timer_reg_read)(enum arch_timer_reg) = + arch_timer_virt_reg_read; + +static void arch_timer_phys_reg_write(enum arch_timer_reg reg, uint32_t val) +{ + switch (reg) { + case ARCH_TIMER_REG_CTRL: + write_sysreg(val, cntp_ctl_el0); + break; + case ARCH_TIMER_REG_TVAL: + write_sysreg(val, cntp_tval_el0); + break; + } + isb(); +} + +static void arch_timer_virt_reg_write(enum arch_timer_reg reg, uint32_t val) +{ + switch (reg) { + case ARCH_TIMER_REG_CTRL: + write_sysreg(val, cntv_ctl_el0); + break; + case ARCH_TIMER_REG_TVAL: + write_sysreg(val, cntv_tval_el0); + break; + } + isb(); +} + +static uint32_t arch_timer_phys_reg_read(enum arch_timer_reg reg) +{ + uint32_t val = 0; + + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = read_sysreg(cntp_ctl_el0); + break; + case ARCH_TIMER_REG_TVAL: + val = read_sysreg(cntp_tval_el0); + break; + } + return val; +} + +static uint32_t arch_timer_virt_reg_read(enum arch_timer_reg reg) +{ + uint32_t val = 0; + + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = read_sysreg(cntv_ctl_el0); + break; + case ARCH_TIMER_REG_TVAL: + val = read_sysreg(cntv_tval_el0); + break; + } + return val; +} + +static void timer_handler(void *priv) +{ + unsigned long ctrl; + const int cpu = ihk_mc_get_processor_id(); + + dkprintf("CPU%d: catch %s timer\n", cpu, + ((timer_intrid == INTRID_PHYS_TIMER) || + (timer_intrid == INTRID_HYP_PHYS_TIMER)) ? + "physical" : "virtual"); + + ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); + if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { + const unsigned int clocks = per_cpu_timer_val[cpu]; + struct cpu_local_var *v = get_this_cpu_local_var(); + unsigned long irqstate; + + /* set resched flag */ + irqstate = ihk_mc_spinlock_lock(&v->runq_lock); + v->flags |= CPU_FLAG_NEED_RESCHED; + ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + + /* gen control register value */ + ctrl &= ~ARCH_TIMER_CTRL_IT_STAT; + + /* set timer re-enable for periodic */ + arch_timer_reg_write(ARCH_TIMER_REG_TVAL, clocks); + arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); + } +} + +static unsigned long is_use_virt_timer(void) +{ + extern unsigned long ihk_param_use_virt_timer; + + switch (ihk_param_use_virt_timer) { + case 0: /* physical */ + case 1: /* virtual */ + break; + default: /* invalid */ + panic("PANIC: is_use_virt_timer(): timer select neither phys-timer nor virt-timer.\n"); + break; + } + return ihk_param_use_virt_timer; +} + +static struct ihk_mc_interrupt_handler timer_interrupt_handler = { + .func = timer_handler, + .priv = NULL, +}; + +/* other source use functions */ + +struct ihk_mc_interrupt_handler *get_timer_handler(void) +{ + return &timer_interrupt_handler; +} + +void +lapic_timer_enable(unsigned int clocks) +{ + unsigned long ctrl = 0; + + /* gen control register value */ + ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_IT_STAT); + arch_timer_reg_write(ARCH_TIMER_REG_TVAL, clocks); + arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); + + per_cpu_timer_val[ihk_mc_get_processor_id()] = clocks; +} + +void +lapic_timer_disable() +{ + unsigned long ctrl = 0; + + ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); + ctrl &= ~ARCH_TIMER_CTRL_ENABLE; + arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); + + per_cpu_timer_val[ihk_mc_get_processor_id()] = 0; +} + +int get_timer_intrid(void) +{ + return timer_intrid; +} + +void arch_timer_init(void) +{ + const unsigned long is_virt = is_use_virt_timer(); +#ifdef CONFIG_ARM64_VHE + const unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1); +#endif /* CONFIG_ARM64_VHE */ + + if (is_virt) { + timer_intrid = INTRID_VIRT_TIMER; + arch_timer_reg_write = arch_timer_virt_reg_write; + arch_timer_reg_read = arch_timer_virt_reg_read; + } else { + timer_intrid = INTRID_PHYS_TIMER; + arch_timer_reg_write = arch_timer_phys_reg_write; + arch_timer_reg_read = arch_timer_phys_reg_read; + } +#ifdef CONFIG_ARM64_VHE + if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) { + if (is_virt) { + timer_intrid = INTRID_HYP_VIRT_TIMER; + } else { + timer_intrid = INTRID_HYP_PHYS_TIMER; + } + } +#endif /* CONFIG_ARM64_VHE */ +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4fc4cbd8..d9471053 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -29,12 +29,14 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *inf */ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) { + const int from_user = interrupt_from_user(regs); + // /* TODO: implement lazy context saving/restoring */ - set_cputime(1); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); // WARN_ON(1); kprintf("WARNING: CPU: %d PID: %d Trapped FP/ASIMD access.\n", ihk_mc_get_processor_id(), cpu_local_var(current)->proc->pid); - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } /* @@ -51,7 +53,9 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) { siginfo_t info; unsigned int si_code = 0; - set_cputime(1); + const int from_user = interrupt_from_user(regs); + + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); if (esr & FPEXC_IOF) si_code = FPE_FLTINV; @@ -70,7 +74,7 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) info._sifields._sigfault.si_addr = (void*)regs->pc; set_signal(SIGFPE, regs, &info); - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } /* @ref.impl arch/arm64/kernel/traps.c */ @@ -133,8 +137,9 @@ exit: void do_undefinstr(struct pt_regs *regs) { siginfo_t info; + const int from_user = interrupt_from_user(regs); - set_cputime(interrupt_from_user(regs)? 1: 2); + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); if (call_undef_hook(regs) == 0) { goto out; @@ -147,7 +152,7 @@ void do_undefinstr(struct pt_regs *regs) arm64_notify_die("Oops - undefined instruction", regs, &info, 0); out: - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } /* @@ -157,7 +162,9 @@ out: void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { siginfo_t info; - set_cputime(interrupt_from_user(regs)? 1: 2); + const int from_user = interrupt_from_user(regs); + + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); kprintf("entering bad_mode !! (regs:0x%p, reason:%d, esr:0x%x)\n", regs, reason, esr); kprintf("esr Analyse:\n"); @@ -173,5 +180,5 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) info._sifields._sigfault.si_addr = (void*)regs->pc; arm64_notify_die("Oops - bad mode", regs, &info, 0); - set_cputime(0); + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 9dc21ba5..b082ce38 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -156,6 +156,7 @@ int arch_map_vdso(struct process_vm *vm) unsigned long start, end; unsigned long flag; int ret; + struct vm_range *range; vdso_text_len = vdso.vdso_npages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -174,7 +175,7 @@ int arch_map_vdso(struct process_vm *vm) flag = VR_REMOTE | VR_PROT_READ; flag |= VRFLAG_PROT_TO_MAXPROT(flag); ret = add_process_memory_range(vm, start, end, vdso.vvar_phys, flag, - NULL, 0, PAGE_SHIFT, NULL); + NULL, 0, PAGE_SHIFT, &range); if (ret != 0){ dkprintf("ERROR: adding memory range for tod_data\n"); goto exit; @@ -186,7 +187,7 @@ int arch_map_vdso(struct process_vm *vm) flag = VR_REMOTE | VR_PROT_READ | VR_PROT_EXEC; flag |= VRFLAG_PROT_TO_MAXPROT(flag); ret = add_process_memory_range(vm, start, end, vdso.vdso_physlist[0], flag, - NULL, 0, PAGE_SHIFT, NULL); + NULL, 0, PAGE_SHIFT, &range); if (ret != 0) { dkprintf("ERROR: adding memory range for vdso_text\n"); diff --git a/arch/arm64/kernel/vdso/Makefile.in b/arch/arm64/kernel/vdso/Makefile.in index 55858572..e432fd52 100644 --- a/arch/arm64/kernel/vdso/Makefile.in +++ b/arch/arm64/kernel/vdso/Makefile.in @@ -2,6 +2,8 @@ # @ref.impl arch/arm64/kernel/vdso/Makefile # Building a vDSO image for AArch64. +RMDIR=rmdir + HOST_DIR=@KDIR@ HOST_CONFIG=$(HOST_DIR)/.config HOST_KERNEL_CONFIG_ARM64_4K_PAGES=$(shell grep -E "^CONFIG_ARM64_4K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_4K_PAGES=||g') @@ -10,7 +12,6 @@ HOST_KERNEL_CONFIG_ARM64_64K_PAGES=$(shell grep -E "^CONFIG_ARM64_64K_PAGES=y" $ VDSOSRC = @abs_srcdir@ VDSOBUILD = @abs_builddir@ -INCDIR = $(VDSOSRC)/../include ECHO_SUFFIX = [VDSO] VDSOOBJS := gettimeofday.o @@ -23,11 +24,12 @@ $(if $(VDSOSRC),,$(error IHK output directory is not specified)) $(if $(TARGET),,$(error Target is not specified)) #CFLAGS := -nostdinc -mlittle-endian -Wall -mabi=lp64 -Wa,-gdwarf-2 -CFLAGS := -nostdinc -mlittle-endian -Wall -Wa,-gdwarf-2 -CFLAGS += -D__KERNEL__ -I$(SRC)/include -CFLAGS += -I$(SRC)/../lib/include -I$(INCDIR) -I$(IHKBASE)/smp/arm64/include -CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) -CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) +CFLAGS += -nostdinc -mlittle-endian -Wall -Wa,-gdwarf-2 +CFLAGS += -I$(SRC)/include -I$(SRC)/../lib/include -I$(VDSOSRC)/../include +CFLAGS += -I$(IHKBASE)/$(TARGETDIR)/include -Wno-unused-function -I$(IHKBASE)/../ikc/include -I$(IHKBASE)/../linux/include +CFLAGS += -DIHK_OS_MANYCORE -D__KERNEL__ +CFLAGS += $(foreach i, $(shell seq 6 120), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) +CFLAGS += $(foreach i, $(shell seq 6 110), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) LDFLAGS := -nostdinc -mlittle-endian -Wall -Wundef -Wstrict-prototypes LDFLAGS += -Wno-trigraphs -fno-strict-aliasing -fno-common @@ -66,6 +68,7 @@ CFLAGS_lds += -DPAGE_SIZE=0x10000 endif #load mckernel config (append CPPFLAGS) +#include @abs_top_builddir@/../ihk/cokernel/Makefile.common include @abs_top_builddir@/../ihk/cokernel/$(TARGETDIR)/Makefile.predefines default: all @@ -96,7 +99,9 @@ $(VDSOBUILD)/vdso.lds: $(VDSOSRC)/vdso.lds.S $(lds_cmd) clean: - $(rm_cmd) $(DESTOBJS) $(DESTASMOBJS) $(VDSOBUILD)/Makefile.dep $(VDSOBUILD)/vdso.* -r $(VDSOBUILD)/../include + $(rm_cmd) -f $(DESTOBJS) $(DESTASMOBJS) $(VDSOBUILD)/Makefile.dep $(VDSOBUILD)/vdso.lds + $(rm_cmd) -f $(VDSOBUILD)/vdso.so* $(VDSOBUILD)/../include/vdso-offsets.h $(VDSOBUILD)/../include/vdso-so-path.h + $(rmdir_cmd) --ignore-fail-on-non-empty $(VDSOBUILD)/../include depend: $(VDSOBUILD)/Makefile.dep @@ -121,6 +126,7 @@ cc_cmd = $(call echo_cmd,CC,$<)$(CC) $(CFLAGS) -c -o $@ ld_cmd = $(call echo_cmd,LD,$@)$(CC) $(LDFLAGS) $^ -o $@ dep_cmd = $(call echo_cmd,DEPEND,)$(CC) $(CFLAGS) -MM $1 > $@ rm_cmd = $(call echo_cmd,CLEAN,)$(RM) +rmdir_cmd = $(call echo_cmd,CLEAN,)$(RMDIR) objcopy_cmd = $(call echo_cmd,OBJCOPY,$<)$(OBJCOPY) $(OBJCOPYFLAGS) $< $@ lds_cmd = $(call echo_cmd,LDS,$<)$(CC) $(CFLAGS_lds) -c -o $@ $< diff --git a/arch/arm64/kernel/vdso/syscall.h b/arch/arm64/kernel/vdso/asm_syscall.h similarity index 88% rename from arch/arm64/kernel/vdso/syscall.h rename to arch/arm64/kernel/vdso/asm_syscall.h index 721b25a1..7deffbf8 100644 --- a/arch/arm64/kernel/vdso/syscall.h +++ b/arch/arm64/kernel/vdso/asm_syscall.h @@ -1,4 +1,4 @@ -/* syscall.h COPYRIGHT FUJITSU LIMITED 2016 */ +/* asm_syscall.h COPYRIGHT FUJITSU LIMITED 2016 */ #ifndef __HEADER_ARM64_VDSO_SYSCALL_H #define __HEADER_ARM64_VDSO_SYSCALL_H diff --git a/arch/arm64/kernel/vdso/gettimeofday.c b/arch/arm64/kernel/vdso/gettimeofday.c index d6616024..f18474b5 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.c +++ b/arch/arm64/kernel/vdso/gettimeofday.c @@ -1,58 +1,25 @@ -/* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */ - -#include -#include +/* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016-2018 */ #include +#include +#include #include #include #include -extern int __kernel_gettimeofday(struct timeval *tv, void *tz); - -static inline void cpu_pause_for_vsyscall(void) +#define UNUSED(x) ((void)(x)) +void vdso_gettimeofday_unused_funcs(void) { - asm volatile ("yield" ::: "memory"); - return; -} - -static inline void calculate_time_from_tsc(struct timespec *ts, - struct tod_data_s *tod_data) -{ - long ver; - unsigned long current_tsc; - __time_t sec_delta; - long ns_delta; - - for (;;) { - while ((ver = ihk_atomic64_read(&tod_data->version)) & 1) { - /* settimeofday() is in progress */ - cpu_pause_for_vsyscall(); - } - rmb(); - *ts = tod_data->origin; - rmb(); - if (ver == ihk_atomic64_read(&tod_data->version)) { - break; - } - - /* settimeofday() has intervened */ - cpu_pause_for_vsyscall(); - } - - current_tsc = rdtsc(); - sec_delta = current_tsc / tod_data->clocks_per_sec; - ns_delta = NS_PER_SEC * (current_tsc % tod_data->clocks_per_sec) - / tod_data->clocks_per_sec; - /* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */ - - ts->tv_sec += sec_delta; - ts->tv_nsec += ns_delta; - if (ts->tv_nsec >= NS_PER_SEC) { - ts->tv_nsec -= NS_PER_SEC; - ++ts->tv_sec; - } - - return; + UNUSED(xgetbv); + UNUSED(xsetbv); + UNUSED(rdpmc); + UNUSED(rdmsr); + UNUSED(set_perfctl); + UNUSED(start_perfctr); + UNUSED(stop_perfctr); + UNUSED(clear_perfctl); + UNUSED(set_perfctr); + UNUSED(read_perfctr); + UNUSED(xos_is_tchip); } static inline struct tod_data_s *get_tod_data_addr(void) @@ -82,7 +49,7 @@ int __kernel_gettimeofday(struct timeval *tv, void *tz) /* DO it locally if supported */ if (!tz && tod_data->do_local) { - calculate_time_from_tsc(&ats, tod_data); + calculate_time_from_tsc(&ats); tv->tv_sec = ats.tv_sec; tv->tv_usec = ats.tv_nsec / 1000; @@ -106,7 +73,6 @@ int __kernel_gettimeofday(struct timeval *tv, void *tz) return (int)ret; } - /* * The IDs of the various system clocks (for POSIX.1b interval timers): * @ref.impl include/uapi/linux/time.h @@ -146,7 +112,7 @@ int __kernel_clock_gettime(clockid_t clk_id, struct timespec *tp) /* DO it locally if supported */ if (tod_data->do_local && clk_id == CLOCK_REALTIME) { - calculate_time_from_tsc(&ats, tod_data); + calculate_time_from_tsc(&ats); tp->tv_sec = ats.tv_sec; tp->tv_nsec = ats.tv_nsec; diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index d4d3ae29..7c3bff13 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -22,7 +22,7 @@ */ #include -#include "syscall.h" +#include "asm_syscall.h" .text diff --git a/executer/kernel/mcctrl/Makefile.in b/executer/kernel/mcctrl/Makefile.in index 6567c0f6..b76b6405 100644 --- a/executer/kernel/mcctrl/Makefile.in +++ b/executer/kernel/mcctrl/Makefile.in @@ -31,8 +31,8 @@ mcctrl-y += sysfs.o sysfs_files.o arch/$(ARCH)/archdeps.o KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../../ihk/linux/core/Module.symvers ifeq ($(ARCH), arm64) -EXTRA_CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) -EXTRA_CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) +EXTRA_CFLAGS += $(foreach i, $(shell seq 6 120), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) +EXTRA_CFLAGS += $(foreach i, $(shell seq 6 110), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) endif .PHONY: clean install modules diff --git a/executer/kernel/mcctrl/arch/arm64/include/archdeps.h b/executer/kernel/mcctrl/arch/arm64/include/archdeps.h index d49aaa90..0ef7fff2 100644 --- a/executer/kernel/mcctrl/arch/arm64/include/archdeps.h +++ b/executer/kernel/mcctrl/arch/arm64/include/archdeps.h @@ -2,6 +2,10 @@ #ifndef __HEADER_MCCTRL_ARM64_ARCHDEPS_H #define __HEADER_MCCTRL_ARM64_ARCHDEPS_H +#ifdef POSTK_DEBUG_ARCH_DEP_100 /* rus_mmap() setting vm_flags arch depend defined */ +#include +#endif /* POSTK_DEBUG_ARCH_DEP_100 */ + extern int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, unsigned long *rpap, unsigned long *pgsizep); diff --git a/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h b/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h index 46c16934..b20c2737 100644 --- a/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h +++ b/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h @@ -2,6 +2,10 @@ #ifndef __HEADER_MCCTRL_X86_64_ARCHDEPS_H #define __HEADER_MCCTRL_X86_64_ARCHDEPS_H +#ifdef POSTK_DEBUG_ARCH_DEP_100 /* rus_mmap() setting vm_flags arch depend defined */ +#include +#endif /* POSTK_DEBUG_ARCH_DEP_100 */ + extern int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, unsigned long *rpap, unsigned long *pgsizep); diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 25ccb335..e5e7752a 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -449,7 +449,8 @@ int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data *ppd, void *data); void mcctrl_put_per_thread_data_unsafe(struct mcctrl_per_thread_data *ptd); void mcctrl_put_per_thread_data(struct mcctrl_per_thread_data* ptd); #ifdef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */ -inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task) +static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, + struct task_struct *task) { struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index bec3f947..11afaab1 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in COPYRIGHT FUJITSU LIMITED 2015-2016 +# Makefile.in COPYRIGHT FUJITSU LIMITED 2015-2018 CC=@CC@ MCC=mpicc BINDIR=@BINDIR@ @@ -35,8 +35,20 @@ ifeq ($(WITH_SYSCALL_INTERCEPT),yes) endif ifeq ($(ARCH), arm64) -CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) -CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) +CFLAGS += $(foreach i, $(shell seq 6 120), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) +CFLAGS += $(foreach i, $(shell seq 6 110), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) + +# get HOST-kernel memory settings +HOST_DIR=@KDIR@ +HOST_CONFIG=$(HOST_DIR)/.config +HOST_KERNEL_64K_PAGES=$(shell grep -E "^CONFIG_ARM64_64K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_64K_PAGES=||g') +HOST_KERNEL_VA_BITS=$(shell grep -E "^CONFIG_ARM64_VA_BITS=" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_VA_BITS=||g') + +ifeq ($(HOST_KERNEL_64K_PAGES), y) +CFLAGS += -DCONFIG_ARM64_64K_PAGES +endif +CFLAGS += -DCONFIG_ARM64_VA_BITS=$(HOST_KERNEL_VA_BITS) + endif all: $(TARGET) diff --git a/executer/user/arch/arm64/Makefile.in b/executer/user/arch/arm64/Makefile.in index b1bb74ae..61143789 100644 --- a/executer/user/arch/arm64/Makefile.in +++ b/executer/user/arch/arm64/Makefile.in @@ -13,8 +13,8 @@ all: $(TARGET) ../../libmcexec.a: archdep.o arch_syscall.o $(AR) cr ../../libmcexec.a archdep.o arch_syscall.o -archdep.o: archdep.S - $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $< +archdep.o: archdep.c archdep.S + $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $^ arch_syscall.o: arch_syscall.c $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $< diff --git a/executer/user/arch/arm64/arch-eclair.c b/executer/user/arch/arm64/arch-eclair.c index a64f47ed..5f8ac8bb 100644 --- a/executer/user/arch/arm64/arch-eclair.c +++ b/executer/user/arch/arm64/arch-eclair.c @@ -1,4 +1,4 @@ -/* arch-eclair.c COPYRIGHT FUJITSU LIMITED 2016 */ +/* arch-eclair.c COPYRIGHT FUJITSU LIMITED 2016-2018 */ #include #include #include @@ -49,3 +49,20 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs) return total; } + +#ifdef POSTK_DEBUG_ARCH_DEP_34 +uintptr_t virt_to_phys(uintptr_t va) +{ + extern uintptr_t kernel_base; + + if (va >= MAP_KERNEL) { + return (va - MAP_KERNEL + kernel_base); + } + + if (va >= MAP_ST) { + return (va - MAP_ST); + } + return NOPHYS; +} /* virt_to_phys() */ +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + diff --git a/executer/user/arch/arm64/arch_args.h b/executer/user/arch/arm64/arch_args.h index 2df5c305..a34c67f0 100644 --- a/executer/user/arch/arm64/arch_args.h +++ b/executer/user/arch/arm64/arch_args.h @@ -4,123 +4,300 @@ #include -typedef struct user_pt_regs syscall_args; +#ifndef NT_ARM_SYSTEM_CALL +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#endif /* !NT_ARM_SYSTEM_CALL */ + +typedef struct { + struct user_pt_regs regs; + unsigned long orig_x0; + unsigned long ret_value; + pid_t target_pid; + int bypass; +} syscall_args; + +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static inline int +syscall_enter(syscall_args *args) +{ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return -1; + } + + switch (args->regs.regs[7]) { + case PTRACE_SYSCALL_ENTER: + return 1; + case PTRACE_SYSCALL_EXIT: + return 0; + default: + printf("%s: x7 is neither SYSCALL_ENTER nor SYSCALL_EXIT.\n", + __func__); + return -1; + } +} static inline int get_syscall_args(int pid, syscall_args *args) { - /* TODO: skeleton for UTI */ - return -1; + struct iovec iov; + long ret = -1; + + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return -1; + } + args->target_pid = pid; + args->bypass = 0; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &args->regs; + iov.iov_len = sizeof(args->regs); + ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov); + if (!ret) { + if (syscall_enter(args)) { + args->orig_x0 = args->regs.regs[0]; + args->ret_value = 0; + } + else { + /* orig_x0 is saved */ + args->ret_value = args->regs.regs[0]; + } + } + return ret; } static inline int set_syscall_args(int pid, syscall_args *args) { - /* TODO: skeleton for UTI */ - return -1; + struct iovec iov; + + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return -1; + } + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &args->regs; + iov.iov_len = sizeof(args->regs); + return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov); } static inline unsigned long get_syscall_number(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; -} + int sysno = -1; + long ret = -1; + struct iovec iov; -static inline unsigned long -get_syscall_return(syscall_args *args) -{ - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return -1; + } + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &sysno; + iov.iov_len = sizeof(sysno); + ret = ptrace(PTRACE_GETREGSET, args->target_pid, NT_ARM_SYSTEM_CALL, + &iov); + if (ret) { + printf("%s: ptrace(PTRACE_GETREGSET) failed. (%d)", + __func__, errno); + } + return sysno; } static inline unsigned long get_syscall_arg1(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return 0; + } + return args->orig_x0; } static inline unsigned long get_syscall_arg2(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return 0; + } + return args->regs.regs[1]; } static inline unsigned long get_syscall_arg3(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return 0; + } + return args->regs.regs[2]; } static inline unsigned long get_syscall_arg4(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return 0; + } + return args->regs.regs[3]; } static inline unsigned long get_syscall_arg5(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return 0; + } + return args->regs.regs[4]; } static inline unsigned long get_syscall_arg6(syscall_args *args) { - /* TODO: skeleton for UTI */ - return 0; + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return 0; + } + return args->regs.regs[5]; } static inline void set_syscall_number(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + int sysno = (int)value; + long ret = -1; + struct iovec iov; + + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &sysno; + iov.iov_len = sizeof(sysno); + ret = ptrace(PTRACE_SETREGSET, args->target_pid, NT_ARM_SYSTEM_CALL, + &iov); + if (ret) { + printf("%s: ptrace(PTRACE_GETREGSET) failed. (%d)", + __func__, errno); + } + else { + if (value == (unsigned long)-1) { + args->bypass = 1; + } + } +} + +static inline void +set_syscall_ret_or_arg1(syscall_args *args, unsigned long value, int ret_flag) +{ + /* called by set_syscall_return() */ + if (ret_flag == 1) { + /* stopped syscall-enter */ + if (syscall_enter(args) == 1) { + /* syscall no bypass */ + if (args->bypass != 1) { + /* no effect */ + goto out; + } + } + } + /* called by set_syscall_arg1() */ + else if (ret_flag == 0) { + /* stopped syscall-return */ + if (syscall_enter(args) == 0) { + /* no effect */ + goto out; + } + /* set original arg1 */ + args->orig_x0 = value; + } + /* illegal ret_flag */ + else { + /* no effect */ + goto out; + } + + /* set value */ + args->regs.regs[0] = value; +out: + return; } static inline void set_syscall_return(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + set_syscall_ret_or_arg1(args, value, 1); } static inline void set_syscall_arg1(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + set_syscall_ret_or_arg1(args, value, 0); } static inline void set_syscall_arg2(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + args->regs.regs[1] = value; } static inline void set_syscall_arg3(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + args->regs.regs[2] = value; } static inline void set_syscall_arg4(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + args->regs.regs[3] = value; } static inline void set_syscall_arg5(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + args->regs.regs[4] = value; } static inline void set_syscall_arg6(syscall_args *args, unsigned long value) { - /* TODO: skeleton for UTI */ + if (!args) { + printf("%s: input args is NULL.\n", __func__); + return; + } + args->regs.regs[5] = value; } #endif /* !ARCH_ARGS_H */ diff --git a/executer/user/arch/arm64/archdep.S b/executer/user/arch/arm64/archdep.S index 59685e1b..57f19660 100644 --- a/executer/user/arch/arm64/archdep.S +++ b/executer/user/arch/arm64/archdep.S @@ -1,16 +1,39 @@ /* archdep.S COPYRIGHT FUJITSU LIMITED 2017-2018 */ -/* TODO: skeleton for UTI */ + +#include + +/* + * int switch_ctx(int fd, unsigned long cmd, void **param, void *lctx, + * void *rctx); + * : + * x0 : int fd + * x1 : unsigned long cmd + * x2 : void **param + * x3 : void *lctx + * x4 : void *rctx + */ .global switch_ctx switch_ctx: + /* send ioctl(fd, cmd, param) */ + mov w8, #__NR_ioctl + svc #0x0 + + /* if (syscall_ret < 0 && -4095(-MAX_ERRNO) <= syscall_ret) goto 1f; */ + cmp x0, #0xfffffffffffff000 + b.hi 1f + + /* if (syscall_ret != 0) goto 2f; */ + cmp x0, #0x0 + b.ne 2f + + /* function_ret = 0 */ + mov x0, #0x0 ret -/* TODO: skeleton for UTI */ -.global compare_and_swap -compare_and_swap: - ret - -/* TODO: skeleton for UTI */ -.global compare_and_swap_int -compare_and_swap_int: + /* error case */ +1: + /* function_ret = -1 */ + mov x0, #0xffffffffffffffff +2: ret diff --git a/executer/user/arch/arm64/archdep.c b/executer/user/arch/arm64/archdep.c new file mode 100644 index 00000000..2000ac15 --- /dev/null +++ b/executer/user/arch/arm64/archdep.c @@ -0,0 +1,49 @@ +/* archdep.c COPYRIGHT FUJITSU LIMITED 2017 */ + +#include + +/* @ref.impl arch/arm64/include/asm/atomic_ll_sc.h::__ll_sc___cmpxchg_case_mb_8() */ +unsigned long compare_and_swap(unsigned long *addr, + unsigned long old, unsigned long new) +{ + unsigned long tmp, oldval; + + asm volatile( + " prfm pstl1strm, %2\n" + "1: ldxr %1, %2\n" + " eor %0, %1, %3\n" + " cbnz %0, 2f\n" + " stlxr %w0, %4, %2\n" + " cbnz %w0, 1b\n" + " dmb ish\n" + " mov %1, %3\n" + "2:" + : "=&r"(tmp), "=&r"(oldval), "+Q"(*addr) + : "Lr"(old), "r"(new) + : "memory"); + + return oldval; +} + +/* @ref.impl arch/arm64/include/asm/atomic_ll_sc.h::__ll_sc___cmpxchg_case_mb_4() */ +unsigned int compare_and_swap_int(unsigned int *addr, + unsigned int old, unsigned int new) +{ + unsigned long tmp, oldval; + + asm volatile( + " prfm pstl1strm, %2\n" + "1: ldxr %w1, %2\n" + " eor %w0, %w1, %w3\n" + " cbnz %w0, 2f\n" + " stlxr %w0, %w4, %2\n" + " cbnz %w0, 1b\n" + " dmb ish\n" + " mov %w1, %w3\n" + "2:" + : "=&r"(tmp), "=&r"(oldval), "+Q"(*addr) + : "Lr"(old), "r"(new) + : "memory"); + + return oldval; +} diff --git a/executer/user/arch/arm64/include/arch-eclair.h b/executer/user/arch/arm64/include/arch-eclair.h index bf73ac31..4702229a 100644 --- a/executer/user/arch/arm64/include/arch-eclair.h +++ b/executer/user/arch/arm64/include/arch-eclair.h @@ -1,11 +1,48 @@ -/* arch-eclair.h COPYRIGHT FUJITSU LIMITED 2016 */ +/* arch-eclair.h COPYRIGHT FUJITSU LIMITED 2016-2018 */ #ifndef HEADER_USER_ARM64_ECLAIR_H #define HEADER_USER_ARM64_ECLAIR_H -/* VA_BITS=48, 4K_PAGE address */ -#define MAP_KERNEL 0xffffffffff800000 -#define MAP_ST 0xffff800000000000 -#define MAP_KERNEL_TEXT "0xffffffffff800000" +#ifdef CONFIG_ARM64_64K_PAGES +# +# if (CONFIG_ARM64_VA_BITS == 42) +# /* VA_BITS=42, 64K_PAGE address */ +# define MAP_KERNEL 0xffffffffe0000000 +# define MAP_ST 0xfffffe0000000000 +# define MAP_KERNEL_TEXT "0xffffffffe0000000" +# +# elif (CONFIG_ARM64_VA_BITS == 48) +# /* VA_BITS=48, 64K_PAGE address */ +# define MAP_KERNEL 0xffffffffe0000000 +# define MAP_ST 0xffff800000000000 +# define MAP_KERNEL_TEXT "0xffffffffe0000000" +# +# else +# +# error "No support VA_BITS and PAGE_SIZE" +# +# endif +# +#else /* CONFIG_ARM64_64K_PAGES */ +# +# if (CONFIG_ARM64_VA_BITS == 39) +# /* VA_BITS=39, 4K_PAGE address */ +# define MAP_KERNEL 0xffffffffff800000 +# define MAP_ST 0xffffffc000000000 +# define MAP_KERNEL_TEXT "0xffffffffff800000" +# +# elif (CONFIG_ARM64_VA_BITS == 48) +# /* VA_BITS=48, 4K_PAGE address */ +# define MAP_KERNEL 0xffffffffff800000 +# define MAP_ST 0xffff800000000000 +# define MAP_KERNEL_TEXT "0xffffffffff800000" +# +# else +# +# error "No support VA_BITS and PAGE_SIZE" +# +# endif +# +#endif /* CONFIG_ARM64_64K_PAGES */ #define ARCH_CLV_SPAN "arm64_cpu_local_variables_span" @@ -13,7 +50,7 @@ #define ARCH_REGS 34 -#define PANIC_REGS_OFFSET 144 +#define PANIC_REGS_OFFSET 160 struct arch_kregs { unsigned long x19, x20, x21, x22, x23; @@ -21,4 +58,8 @@ struct arch_kregs { unsigned long fp, sp, pc; }; +#ifdef POSTK_DEBUG_ARCH_DEP_34 +uintptr_t virt_to_phys(uintptr_t va); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + #endif /* HEADER_USER_ARM64_ECLAIR_H */ diff --git a/executer/user/arch/x86_64/arch-eclair.c b/executer/user/arch/x86_64/arch-eclair.c index 6b1b0d2a..3e17d559 100644 --- a/executer/user/arch/x86_64/arch-eclair.c +++ b/executer/user/arch/x86_64/arch-eclair.c @@ -99,3 +99,26 @@ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs) return total; } + +#ifdef POSTK_DEBUG_ARCH_DEP_34 +static uintptr_t virt_to_phys(uintptr_t va) +{ + extern uintptr_t kernel_base; + + if (va >= MAP_KERNEL_START) { + return va - MAP_KERNEL_START + kernel_base; + } + else if (va >= LINUX_PAGE_OFFSET) { + return va - LINUX_PAGE_OFFSET; + } + else if (va >= MAP_FIXED_START) { + return va - MAP_FIXED_START; + } + else if (va >= MAP_ST_START) { + return va - MAP_ST_START; + } + + return NOPHYS; +} /* virt_to_phys() */ +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + diff --git a/executer/user/arch/x86_64/arch_args.h b/executer/user/arch/x86_64/arch_args.h index d9bb0a16..8cb498d0 100644 --- a/executer/user/arch/x86_64/arch_args.h +++ b/executer/user/arch/x86_64/arch_args.h @@ -25,11 +25,13 @@ get_syscall_number(syscall_args *args) return args->orig_rax; } +#ifndef POSTK_DEBUG_ARCH_DEP_90 /* syscall_enter check is arch depend. */ static inline unsigned long get_syscall_return(syscall_args *args) { return args->rax; } +#endif /* !POSTK_DEBUG_ARCH_DEP_90 */ static inline unsigned long get_syscall_arg1(syscall_args *args) @@ -120,4 +122,8 @@ set_syscall_arg6(syscall_args *args, unsigned long value) { args->r9 = value; } + +#ifdef POSTK_DEBUG_ARCH_DEP_90 /* syscall_enter check is arch depend. */ +#define syscall_enter(argsp) (get_syscall_return(argsp) == -ENOSYS) +#endif /* POSTK_DEBUG_ARCH_DEP_90 */ #endif diff --git a/executer/user/arch/x86_64/include/arch-eclair.h b/executer/user/arch/x86_64/include/arch-eclair.h index 99350247..1dfa09c3 100644 --- a/executer/user/arch/x86_64/include/arch-eclair.h +++ b/executer/user/arch/x86_64/include/arch-eclair.h @@ -31,4 +31,8 @@ struct arch_kregs { uintptr_t r15, rflags, rsp0; }; +#ifdef POSTK_DEBUG_ARCH_DEP_34 +uintptr_t virt_to_phys(uintptr_t va); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + #endif /* HEADER_USER_x86_ECLAIR_H */ diff --git a/executer/user/eclair.c b/executer/user/eclair.c index 603f4838..a59ba9b4 100644 --- a/executer/user/eclair.c +++ b/executer/user/eclair.c @@ -73,7 +73,11 @@ static dump_mem_chunks_t *mem_chunks; static int num_processors = -1; static asymbol **symtab = NULL; static ssize_t nsyms; +#ifdef POSTK_DEBUG_ARCH_DEP_34 +uintptr_t kernel_base; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ static uintptr_t kernel_base; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ static struct thread_info *tihead = NULL; static struct thread_info **titailp = &tihead; static struct thread_info *curr_thread = NULL; @@ -93,6 +97,7 @@ uintptr_t lookup_symbol(char *name) { return NOSYMBOL; } /* lookup_symbol() */ +#ifndef POSTK_DEBUG_ARCH_DEP_34 #define NOPHYS ((uintptr_t)-1) static uintptr_t virt_to_phys(uintptr_t va) { @@ -111,6 +116,7 @@ static uintptr_t virt_to_phys(uintptr_t va) { return NOPHYS; } /* virt_to_phys() */ +#endif /* !POSTK_DEBUG_ARCH_DEP_34 */ static int read_physmem(uintptr_t pa, void *buf, size_t size) { off_t off; diff --git a/executer/user/eclair.h b/executer/user/eclair.h index e0e81669..73ee5cd4 100644 --- a/executer/user/eclair.h +++ b/executer/user/eclair.h @@ -15,4 +15,8 @@ ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size); /* arch depend */ int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs); +#ifdef POSTK_DEBUG_ARCH_DEP_34 +#define NOPHYS ((uintptr_t)-1) +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + #endif /* HEADER_USER_COMMON_ECLAIR_H */ diff --git a/executer/user/ldump2mcdump.c b/executer/user/ldump2mcdump.c index 3ae0d6f8..5b094f5e 100644 --- a/executer/user/ldump2mcdump.c +++ b/executer/user/ldump2mcdump.c @@ -244,7 +244,11 @@ void cmd_ldump2mcdump(void) bfd_init(); +#ifdef POSTK_DEBUG_ARCH_DEP_34 /* use bfd_open target is NULL(automatic) */ + abfd = bfd_fopen(fname, NULL, "w", -1); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ abfd = bfd_fopen(fname, "elf64-x86-64", "w", -1); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ if (!abfd) { bfd_perror("bfd_fopen"); return; diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index b48f2d57..2ca02ec7 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -8,13 +8,13 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += rbtree.o hugefileobj.o OBJS += pager.o -# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. DEPSRCS=$(wildcard $(SRC)/*.c) # OBJS added gencore.o ifeq ($(ARCH), arm64) OBJS += gencore.o -DEPSRCS += $(SRC)/../arch/arm64/kernel/gencore.c +# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation, delete unnecessary code. +#DEPSRCS += $(SRC)/../arch/arm64/kernel/gencore.c endif CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions @@ -77,8 +77,9 @@ $(IHKOBJ): FORCE %.o: $(SRC)/%.c $(cc_cmd) -gencore.o: ../arch/arm64/kernel/gencore.c - $(cc_cmd) +# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation, delete unnecessary code. +#gencore.o: ../arch/arm64/kernel/gencore.c +# $(cc_cmd) FORCE: diff --git a/kernel/Makefile.in b/kernel/Makefile.in index 72013e0d..7bafb8ac 100644 --- a/kernel/Makefile.in +++ b/kernel/Makefile.in @@ -44,7 +44,9 @@ $(O): clean: rm -rf $(O) ifeq ($(ARCH), arm64) - @rm -f $(vdsodir)/*.o $(vdsodir)/vdso.* $(vdsodir)/Makefile.dep -r $(vdsodir)/../include + @rm -f $(vdsodir)/*.o $(vdsodir)/vdso.lds $(vdsodir)/vdso.so* $(vdsodir)/Makefile.dep + @rm -f $(vdsodir)/../include/vdso-offsets.h $(vdsodir)/../include/vdso-so-path.h + @if [ -d $(vdsodir)/../include ]; then rmdir --ignore-fail-on-non-empty $(vdsodir)/../include; fi endif install: diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index a7628905..58de29bf 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -379,13 +379,8 @@ struct coretable { /* table entry for a core chunk */ unsigned long addr; /* physical addr of the chunk */ }; -#ifdef POSTK_DEBUG_TEMP_FIX_1 -void create_proc_procfs_files(int pid, int tid, int cpuid); -void delete_proc_procfs_files(int pid, int tid); -#else /* POSTK_DEBUG_TEMP_FIX_1 */ void create_proc_procfs_files(int pid, int cpuid); void delete_proc_procfs_files(int pid); -#endif /* POSTK_DEBUG_TEMP_FIX_1 */ void create_os_procfs_files(void); void delete_os_procfs_files(void); diff --git a/kernel/mem.c b/kernel/mem.c index 9756d0cb..faa87642 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -979,7 +979,6 @@ void coredump(struct thread *thread, void *regs) return; } -#ifndef POSTK_DEBUG_ARCH_DEP_18 ret = gencore(thread, regs, &coretable, &chunks); if (ret != 0) { dkprintf("could not generate a core file image\n"); @@ -996,7 +995,6 @@ void coredump(struct thread *thread, void *regs) kprintf("core dump failed.\n"); } freecore(&coretable); -#endif /* POSTK_DEBUG_ARCH_DEP_18 */ } #ifndef POSTK_DEBUG_ARCH_DEP_8 @@ -1665,7 +1663,7 @@ void *ihk_mc_map_virtual(unsigned long phys, int npages, flush_tlb_single((unsigned long)(p + (i << PAGE_SHIFT))); } - barrier(); + barrier(); /* Temporary fix for Thunder-X */ return (char *)p + offset; } diff --git a/kernel/process.c b/kernel/process.c index 77d519a5..b4fe76cf 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1,4 +1,4 @@ -/* process.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* process.c COPYRIGHT FUJITSU LIMITED 2015-2018 */ /** * \file process.c * License details are found in the file LICENSE.