From dd37443fc7cd476ebd0a333b8601e34e8768c8c2 Mon Sep 17 00:00:00 2001 From: Ken Sato Date: Fri, 24 Jun 2016 13:50:12 +0900 Subject: [PATCH] PAPI support: performance counter's overflow. and support mckfd fcntl. --- arch/x86/kernel/cpu.c | 27 ++++- arch/x86/kernel/include/syscall_list.h | 2 +- arch/x86/kernel/perfctr.c | 73 ++++++++++--- kernel/include/process.h | 2 + kernel/syscall.c | 135 +++++++++++++++---------- lib/include/ihk/perfctr.h | 4 +- 6 files changed, 170 insertions(+), 73 deletions(-) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index 7f847fbb..3e2ddd18 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -883,13 +883,36 @@ void handle_interrupt(int vector, struct x86_user_context *regs) dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc()); } else if (vector == LOCAL_PERF_VECTOR) { + struct siginfo info; unsigned long value; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + long irqstate; + struct mckfd *fdp; + + lapic_write(LAPIC_LVTPC, LOCAL_PERF_VECTOR); value = rdmsr(MSR_PERF_GLOBAL_STATUS); wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, value); wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, 0); - //TODO: counter overflow signal - //set_signal(0x1d, regs, NULL); // SIGIO + + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for(fdp = proc->mckfd; fdp; fdp = fdp->next) { + if(fdp->sig_no > 0) + break; + } + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + + if(fdp) { + memset(&info, '\0', sizeof info); + info.si_signo = fdp->sig_no; + info._sifields._sigfault.si_addr = (void *)regs->gpr.rip; + info._sifields._sigpoll.si_fd = fdp->fd; + set_signal(fdp->sig_no, regs, &info); + } + else { + set_signal(SIGIO, regs, NULL); + } } else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START && vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) { diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 2a2b88a5..92ac7a77 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -66,7 +66,7 @@ SYSCALL_DELEGATED(65, semop) SYSCALL_HANDLED(67, shmdt) SYSCALL_DELEGATED(69, msgsnd) SYSCALL_DELEGATED(70, msgrcv) -SYSCALL_DELEGATED(72, fcntl) +SYSCALL_HANDLED(72, fcntl) SYSCALL_DELEGATED(79, getcwd) SYSCALL_DELEGATED(89, readlink) SYSCALL_HANDLED(96, gettimeofday) diff --git a/arch/x86/kernel/perfctr.c b/arch/x86/kernel/perfctr.c index 0cda9908..b2c2cd09 100644 --- a/arch/x86/kernel/perfctr.c +++ b/arch/x86/kernel/perfctr.c @@ -12,6 +12,7 @@ #include #include #include +#include extern unsigned int *x86_march_perfmap; extern int running_on_kvm(void); @@ -26,6 +27,7 @@ unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0; void x86_init_perfctr(void) { + int i = 0; unsigned long reg; unsigned long value = 0; uint64_t op; @@ -61,6 +63,16 @@ void x86_init_perfctr(void) X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS); } + /* Clear Fixed Counter Control */ + value = rdmsr(MSR_PERF_FIXED_CTRL); + value &= 0xfffffffffffff000L; + wrmsr(MSR_PERF_FIXED_CTRL, value); + + /* Clear Generic Counter Control */ + for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { + wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0); + } + /* Enable PMC Control */ value = rdmsr(MSR_PERF_GLOBAL_CTRL); value |= X86_IA32_PERF_COUNTERS_MASK; @@ -98,7 +110,7 @@ static int set_perfctr_x86_direct(int counter, int mode, unsigned int value) return 0; } -static int set_pmc_x86_direct(int counter, unsigned long val) +static int set_pmc_x86_direct(int counter, long val) { unsigned long cnt_bit = 0; @@ -106,6 +118,8 @@ static int set_pmc_x86_direct(int counter, unsigned long val) return -EINVAL; } + val &= 0x000000ffffffffff; // 40bit Mask + cnt_bit = 1UL << counter; if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) { // set generic pmc @@ -132,7 +146,7 @@ static int set_perfctr_x86(int counter, int event, int mask, int inv, int count, static int set_fixed_counter(int counter, int mode) { unsigned long value = 0; - unsigned int ctr_mask = 0x7; + unsigned int ctr_mask = 0xf; int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ; unsigned int set_val = 0; @@ -213,6 +227,24 @@ int ihk_mc_perfctr_stop(unsigned long counter_mask) value &= ~counter_mask; wrmsr(MSR_PERF_GLOBAL_CTRL, value); + if(counter_mask >> 32 & 0x1) { + value = rdmsr(MSR_PERF_FIXED_CTRL); + value &= ~(0xf); + wrmsr(MSR_PERF_FIXED_CTRL, value); + } + + if(counter_mask >> 32 & 0x2) { + value = rdmsr(MSR_PERF_FIXED_CTRL); + value &= ~(0xf << 4); + wrmsr(MSR_PERF_FIXED_CTRL, value); + } + + if(counter_mask >> 32 & 0x4) { + value = rdmsr(MSR_PERF_FIXED_CTRL); + value &= ~(0xf << 8); + wrmsr(MSR_PERF_FIXED_CTRL, value); + } + return 0; } @@ -220,7 +252,7 @@ int ihk_mc_perfctr_stop(unsigned long counter_mask) int ihk_mc_perfctr_fixed_init(int counter, int mode) { unsigned long value = 0; - unsigned int ctr_mask = 0x7; + unsigned int ctr_mask = 0xf; int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ; unsigned int set_val = 0; @@ -240,6 +272,9 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode) set_val |= 1; } + // enable PMI on overflow + set_val |= 1 << 3; + set_val <<= counter_idx * 4; value |= set_val; @@ -253,7 +288,7 @@ int ihk_mc_perfctr_reset(int counter) return set_pmc_x86_direct(counter, 0); } -int ihk_mc_perfctr_set(int counter, unsigned long val) +int ihk_mc_perfctr_set(int counter, long val) { return set_pmc_x86_direct(counter, val); } @@ -327,23 +362,33 @@ unsigned long ihk_mc_perfctr_read_msr(int counter) return retval; } -int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status) +int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status) { + int ret = -1; int i = 0; - int ret = -1; - // find avail generic counter - for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { + if(*type == PERF_TYPE_HARDWARE) { + switch(*config){ + case PERF_COUNT_HW_INSTRUCTIONS : + *type = PERF_TYPE_RAW; + *config = 0x5300c0; + break; + default : + // Unexpected config + return -1; + } + } + else if(*type != PERF_TYPE_RAW) { + return -1; + } + + // find avail generic counter + for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { if(!(pmc_status & (1 << i))) { ret = i; - pmc_status |= (1 << i); break; } } - if(ret < 0){ - return ret; - } - - return ret; + return ret; } diff --git a/kernel/include/process.h b/kernel/include/process.h index b00a742a..eab37eee 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -319,12 +319,14 @@ struct process_vm; struct mckfd { struct mckfd *next; int fd; + int sig_no; long data; void *opt; long (*read_cb)(struct mckfd *, ihk_mc_user_context_t *); int (*ioctl_cb)(struct mckfd *, ihk_mc_user_context_t *); long (*mmap_cb)(struct mckfd *, ihk_mc_user_context_t *); int (*close_cb)(struct mckfd *, ihk_mc_user_context_t *); + int (*fcntl_cb)(struct mckfd *, ihk_mc_user_context_t *); }; #define SFD_CLOEXEC 02000000 diff --git a/kernel/syscall.c b/kernel/syscall.c index aa3290d8..3224f99c 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -2470,6 +2470,31 @@ SYSCALL_DECLARE(close) return rc; } +SYSCALL_DECLARE(fcntl) +{ + int fd = ihk_mc_syscall_arg0(ctx); + // int cmd = ihk_mc_syscall_arg1(ctx); + long rc; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct mckfd *fdp; + long irqstate; + + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for(fdp = proc->mckfd; fdp; fdp = fdp->next) + if(fdp->fd == fd) + break; + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + + if(fdp && fdp->fcntl_cb){ + rc = fdp->fcntl_cb(fdp, ctx); + } + else{ + rc = syscall_generic_forwarding(__NR_fcntl, ctx); + } + return rc; +} + SYSCALL_DECLARE(rt_sigprocmask) { int how = ihk_mc_syscall_arg0(ctx); @@ -2621,26 +2646,12 @@ perf_counter_alloc(struct mc_perf_event *event) struct perf_event_attr *attr = &event->attr; struct mc_perf_event *leader = event->group_leader; - if(attr->type == PERF_TYPE_HARDWARE) { - - event->counter_id = ihk_mc_perfctr_alloc_counter(leader->pmc_status); - - } else if(attr->type == PERF_TYPE_RAW) { - // PAPI_REF_CYC counted by fixed counter - if((attr->config & 0x0000ffff) == 0x00000300) { - event->counter_id = 2 + X86_IA32_BASE_FIXED_PERF_COUNTERS; - return ret; - } - - event->counter_id = ihk_mc_perfctr_alloc_counter(leader->pmc_status); - } else { - // Not supported type. - ret = -1; - } + ret = ihk_mc_perfctr_alloc_counter(&attr->type, &attr->config, leader->pmc_status); if(ret >= 0) { - leader->pmc_status |= 1UL << event->counter_id; + leader->pmc_status |= 1UL << ret; } + event->counter_id = ret; return ret; } @@ -2649,7 +2660,6 @@ int perf_counter_start(struct mc_perf_event *event) { int ret = 0; - enum ihk_perfctr_type type; struct perf_event_attr *attr = &event->attr; int mode = 0x00; @@ -2660,52 +2670,34 @@ perf_counter_start(struct mc_perf_event *event) mode |= PERFCTR_USER_MODE; } - if(attr->type == PERF_TYPE_HARDWARE) { - switch(attr->config){ - case PERF_COUNT_HW_CPU_CYCLES : - type = APT_TYPE_CYCLE; - break; - case PERF_COUNT_HW_INSTRUCTIONS : - type = APT_TYPE_INSTRUCTIONS; - break; - default : - // Not supported config. - type = PERFCTR_MAX_TYPE; - } - - ret = ihk_mc_perfctr_init(event->counter_id, type, mode); - ihk_mc_perfctr_set(event->counter_id, event->sample_freq * -1); - ihk_mc_perfctr_start(1UL << event->counter_id); - - } else if(attr->type == PERF_TYPE_RAW) { - // PAPI_REF_CYC counted by fixed counter - if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS) { - ret = ihk_mc_perfctr_fixed_init(event->counter_id, mode); - ihk_mc_perfctr_set(event->counter_id, event->sample_freq * -1); - ihk_mc_perfctr_start(1UL << event->counter_id); - return ret; - } - + if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) { ret = ihk_mc_perfctr_init_raw(event->counter_id, attr->config, mode); - ihk_mc_perfctr_set(event->counter_id, event->sample_freq * -1); ihk_mc_perfctr_start(1UL << event->counter_id); - } else { - // Not supported type. + } + else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS && + event->counter_id < X86_IA32_BASE_FIXED_PERF_COUNTERS + X86_IA32_NUM_FIXED_PERF_COUNTERS) { + ret = ihk_mc_perfctr_fixed_init(event->counter_id, mode); + ihk_mc_perfctr_start(1UL << event->counter_id); + } + else { ret = -1; } - + return ret; } unsigned long perf_event_read_value(struct mc_perf_event *event) { unsigned long rtn_count = 0; + unsigned long pmc_count = 0; int counter_id = event->counter_id; - if(event->pid == 0) - event->count = ihk_mc_perfctr_read(counter_id); + if(event->pid == 0) { + pmc_count = ihk_mc_perfctr_read(counter_id) + event->attr.sample_freq; + pmc_count &= 0x000000ffffffffffL; // 40bit MASK + } - rtn_count += event->count; + rtn_count += event->count + pmc_count; if(event->attr.inherit) rtn_count += event->child_count_total; @@ -2922,11 +2914,21 @@ perf_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx) break; case PERF_EVENT_IOC_RESET: // TODO: reset other process - ihk_mc_perfctr_reset(counter_id); + ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1); + event->count = 0L; break; case PERF_EVENT_IOC_REFRESH: // TODO: refresh other process - ihk_mc_perfctr_set(counter_id, event->sample_freq * -1); + + // not supported on inherited events + if(event->attr.inherit) + return -EINVAL; + + event->count += event->attr.sample_freq; + ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1); + + perf_start(event); + break; default : return -1; @@ -2945,6 +2947,28 @@ perf_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx) return 0; } +static int +perf_fcntl(struct mckfd *sfd, ihk_mc_user_context_t *ctx) +{ + int cmd = ihk_mc_syscall_arg1(ctx); + long arg = ihk_mc_syscall_arg2(ctx); + int rc = 0; + + switch(cmd) { + case 10: // F_SETSIG + sfd->sig_no = arg; + break; + case 0xf: // F_SETOWN_EX + break; + default : + break; + } + + rc = syscall_generic_forwarding(__NR_fcntl, ctx); + + return rc; +} + static long perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx) { @@ -2963,6 +2987,7 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx) // setup perf_event_mmap_page page = (struct perf_event_mmap_page *)rc; + page->data_head = 16; page->cap_user_rdpmc = 1; return rc; @@ -3014,7 +3039,7 @@ SYSCALL_DECLARE(perf_event_open) event->sample_freq = attr->sample_freq; event->nr_siblings = 0; - event->count = 0; + event->count = 0L; event->child_count_total = 0; event->parent = NULL; event->pid = pid; @@ -3050,10 +3075,12 @@ SYSCALL_DECLARE(perf_event_open) if(!sfd) return -ENOMEM; sfd->fd = fd; + sfd->sig_no = -1; sfd->read_cb = perf_read; sfd->ioctl_cb = perf_ioctl; sfd->close_cb = perf_close; sfd->mmap_cb = perf_mmap; + sfd->fcntl_cb = perf_fcntl; sfd->data = (long)event; irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); diff --git a/lib/include/ihk/perfctr.h b/lib/include/ihk/perfctr.h index 82221038..4df7d011 100644 --- a/lib/include/ihk/perfctr.h +++ b/lib/include/ihk/perfctr.h @@ -54,11 +54,11 @@ int ihk_mc_perfctr_start(unsigned long counter_mask); int ihk_mc_perfctr_stop(unsigned long counter_mask); int ihk_mc_perfctr_fixed_init(int counter, int mode); int ihk_mc_perfctr_reset(int counter); -int ihk_mc_perfctr_set(int counter, unsigned long value); +int ihk_mc_perfctr_set(int counter, long value); int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value); unsigned long ihk_mc_perfctr_read(int counter); unsigned long ihk_mc_perfctr_read_msr(int counter); -int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status); +int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status); #endif