From aa191b87d3f27008732bcf0cde78ae578d9aa945 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 7 Aug 2015 08:41:00 +0900 Subject: [PATCH] schedule(): use XSAVE/XRSTOR and swap floating point registers in context switch --- arch/x86/kernel/cpu.c | 65 ++++++++++++++++++++++------- arch/x86/kernel/include/registers.h | 1 + kernel/process.c | 9 ++++ 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index 803dde32..9fffcc38 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -143,6 +143,8 @@ static void init_idt(void) reload_idt(); } +static int xsave_available = 0; + void init_fpu(void) { unsigned long reg; @@ -170,20 +172,24 @@ void init_fpu(void) reg |= ((1 << 9) | (1 << 10)); if(cpuid01_ecx & (1 << 26)) { /* XSAVE set, enable access to xcr0 */ + dkprintf("init_fpu(): XSAVE available\n"); + xsave_available = 1; reg |= (1 << 18); } asm volatile("movq %0, %%cr4" : : "r"(reg)); - kprintf("init_fpu(): SSE init: CR4 = 0x%016lX; ", reg); + dkprintf("init_fpu(): SSE init: CR4 = 0x%016lX\n", reg); /* Set xcr0[2:1] to enable avx ops */ if(cpuid01_ecx & (1 << 28)) { reg = xgetbv(0); reg |= 0x6; xsetbv(0, reg); + dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg); } - kprintf("XCR0 = 0x%016lX\n", reg); + /* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */ + #else kprintf("init_fpu(): SSE not enabled\n"); #endif @@ -1186,10 +1192,11 @@ release_fp_regs(struct process *proc) { int pages; - if (!proc->fp_regs) + if (proc && !proc->fp_regs) return; + pages = (sizeof(fp_regs_struct) + 4095) >> 12; - ihk_mc_free_pages(proc->fp_regs, 1); + ihk_mc_free_pages(proc->fp_regs, pages); proc->fp_regs = NULL; } @@ -1198,14 +1205,30 @@ save_fp_regs(struct process *proc) { int pages; - if (proc->fp_regs) - return; - pages = (sizeof(fp_regs_struct) + 4095) >> 12; - proc->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT); - if(!proc->fp_regs) - return; - memset(proc->fp_regs, 0, sizeof(fp_regs_struct)); - // TODO: do xsave + if (!proc->fp_regs) { + pages = (sizeof(fp_regs_struct) + 4095) >> 12; + proc->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT); + + if (!proc->fp_regs) { + kprintf("error: allocating fp_regs pages\n"); + return; + } + + memset(proc->fp_regs, 0, sizeof(fp_regs_struct)); + } + + if (xsave_available) { + unsigned int low, high; + + /* Request full save of x87, SSE and AVX states */ + low = 0x7; + high = 0; + + asm volatile("xsave %0" : : "m" (*proc->fp_regs), "a" (low), "d" (high) + : "memory"); + + dkprintf("fp_regs for TID %d saved\n", proc->ftn->tid); + } } void @@ -1213,8 +1236,22 @@ restore_fp_regs(struct process *proc) { if (!proc->fp_regs) return; - // TODO: do xrstor - release_fp_regs(proc); + + if (xsave_available) { + unsigned int low, high; + + /* Request full restore of x87, SSE and AVX states */ + low = 0x7; + high = 0; + + asm volatile("xrstor %0" : : "m" (*proc->fp_regs), + "a" (low), "d" (high)); + + dkprintf("fp_regs for TID %d restored\n", proc->ftn->tid); + } + + // XXX: why release?? + //release_fp_regs(proc); } ihk_mc_user_context_t *lookup_user_context(struct process *proc) diff --git a/arch/x86/kernel/include/registers.h b/arch/x86/kernel/include/registers.h index 98eb1cb1..39eb2812 100644 --- a/arch/x86/kernel/include/registers.h +++ b/arch/x86/kernel/include/registers.h @@ -58,6 +58,7 @@ #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 #define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad #define MSR_IA32_CR_PAT 0x00000277 +#define MSR_IA32_XSS 0xda0 #define CVAL(event, mask) \ diff --git a/kernel/process.c b/kernel/process.c index e6b05475..572e065b 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2304,6 +2304,15 @@ redo: restore_debugreg(next->ptrace_debugreg); } + /* Take care of floating point registers except for idle process */ + if (prev && prev != &cpu_local_var(idle)) { + save_fp_regs(prev); + } + + if (next != &cpu_local_var(idle)) { + restore_fp_regs(next); + } + ihk_mc_load_page_table(next->vm->page_table); dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n",