Compare commits
147 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a029bcac37 | ||
|
|
bd913c503b | ||
|
|
e838affde8 | ||
|
|
59ee251e1c | ||
|
|
fa79db3bcc | ||
|
|
b7c5cba361 | ||
|
|
382614ddae | ||
|
|
aa959c6b34 | ||
|
|
aabc3d386d | ||
|
|
4ebe778ede | ||
|
|
fbb776e4fb | ||
|
|
41b85281a4 | ||
|
|
5532e3c663 | ||
|
|
2af2b1205f | ||
|
|
7d5a68be1b | ||
|
|
f4162dff52 | ||
|
|
a0d909af75 | ||
|
|
63669b7f71 | ||
|
|
4946964ed0 | ||
|
|
5f19842a6a | ||
|
|
9271d5346d | ||
|
|
7bba05cfa4 | ||
|
|
c2a1f933e8 | ||
|
|
055769254d | ||
|
|
786ae83380 | ||
|
|
8c662c83be | ||
|
|
4698bc40c2 | ||
|
|
f5d935b703 | ||
|
|
d53865ac5f | ||
|
|
8934eb91a4 | ||
|
|
ed6d94a358 | ||
|
|
fa923da0e3 | ||
|
|
1f8265efbc | ||
|
|
b553de7435 | ||
|
|
6a82412d64 | ||
|
|
fa29c34995 | ||
|
|
f84b5acf79 | ||
|
|
8b24f60861 | ||
|
|
f82bb284bb | ||
|
|
bf12a5c45e | ||
|
|
ea5681232e | ||
|
|
e6011be1af | ||
|
|
9946ccd6b1 | ||
|
|
daec7de828 | ||
|
|
9ad48083aa | ||
|
|
2eac58aab3 | ||
|
|
22d8d169b6 | ||
|
|
8db54c2637 | ||
|
|
063fa963c3 | ||
|
|
a6488adcc1 | ||
|
|
2239a6b09b | ||
|
|
8c179d506a | ||
|
|
377341ce5f | ||
|
|
8caeba7cba | ||
|
|
1d2f5d9893 | ||
|
|
e4f47df3c3 | ||
|
|
4751055ee4 | ||
|
|
305ebfed0e | ||
|
|
b66b950129 | ||
|
|
4aa8ba2eef | ||
|
|
fab2c2aa97 | ||
|
|
026164eda4 | ||
|
|
e91d1e5b7b | ||
|
|
73743eeeb0 | ||
|
|
c1c1fd578a | ||
|
|
f35cc66d18 | ||
|
|
d9cf1d49b1 | ||
|
|
3d426ada01 | ||
|
|
0307f6a6cc | ||
|
|
0dee04f16b | ||
|
|
0e98e87b95 | ||
|
|
d35e60c1a3 | ||
|
|
037e17c4ed | ||
|
|
2baf274dac | ||
|
|
3b04043f2a | ||
|
|
c0edb6fe6f | ||
|
|
bb137bc9bb | ||
|
|
16af976a71 | ||
|
|
6485578a7f | ||
|
|
d2d0fc6721 | ||
|
|
9574a28a5f | ||
|
|
dbe4ec3247 | ||
|
|
99debc548f | ||
|
|
fa15f6b106 | ||
|
|
8568a73f33 | ||
|
|
8b57b2ee57 | ||
|
|
9a36e7b84a | ||
|
|
d998691425 | ||
|
|
8cdf70c500 | ||
|
|
0e0bc548f6 | ||
|
|
d21ae28843 | ||
|
|
a4a806bef7 | ||
|
|
d30d8fe71c | ||
|
|
a5bdd41c3d | ||
|
|
5f5ab34559 | ||
|
|
b26fa4e87c | ||
|
|
bd5f43b119 | ||
|
|
f97f8dbab3 | ||
|
|
e30946f1f0 | ||
|
|
c3ade864d9 | ||
|
|
9c35935671 | ||
|
|
ed33ee65b2 | ||
|
|
d04b5a09bd | ||
|
|
08cc31f9bf | ||
|
|
cf2166f830 | ||
|
|
765de119dc | ||
|
|
d46110b4d9 | ||
|
|
74f0aec478 | ||
|
|
e3eb7e68bc | ||
|
|
912b8a886c | ||
|
|
e25d35a191 | ||
|
|
a9aad67541 | ||
|
|
cd6e663f48 | ||
|
|
5f095b3952 | ||
|
|
811a275176 | ||
|
|
b388f59ebd | ||
|
|
ff47261337 | ||
|
|
a91bf9a13d | ||
|
|
fcfa94cea1 | ||
|
|
55f7ee1526 | ||
|
|
b1b6fab7b8 | ||
|
|
391886a6f1 | ||
|
|
c810afe224 | ||
|
|
5566ed1a63 | ||
|
|
f0f91d2246 | ||
|
|
0942bf0ce0 | ||
|
|
9c94e90007 | ||
|
|
a6ac906105 | ||
|
|
d4ba4dc8b3 | ||
|
|
815d907ca4 | ||
|
|
3c24315f91 | ||
|
|
25f108bf78 | ||
|
|
cc9d30efbf | ||
|
|
af83f1be64 | ||
|
|
b2cab453f1 | ||
|
|
8909597499 | ||
|
|
86f2a9067b | ||
|
|
a5889fb5df | ||
|
|
f1a86cfbd3 | ||
|
|
c1cf630a94 | ||
|
|
8f30e16976 | ||
|
|
58e2e0a246 | ||
|
|
ea02628f2b | ||
|
|
89acf5c5d6 | ||
|
|
ac8e2a0c40 | ||
|
|
ab7aa3354f | ||
|
|
c4e0b84792 |
29
Makefile.in
29
Makefile.in
@@ -6,7 +6,7 @@ all::
|
||||
@(cd executer/kernel; make modules)
|
||||
@(cd executer/user; make)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make) \
|
||||
;; \
|
||||
*) \
|
||||
@@ -19,7 +19,7 @@ install::
|
||||
@(cd executer/kernel; make install)
|
||||
@(cd executer/user; make install)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make install) \
|
||||
;; \
|
||||
*) \
|
||||
@@ -27,19 +27,38 @@ install::
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
if [ "$(TARGET)" = attached-mic ]; then \
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
fi
|
||||
;; \
|
||||
builtin-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
smp-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
|
||||
clean::
|
||||
@(cd executer/kernel; make clean)
|
||||
@(cd executer/user; make clean)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make clean) \
|
||||
;; \
|
||||
*) \
|
||||
|
||||
2
arch/x86/kernel/Makefile.arch
Normal file
2
arch/x86/kernel/Makefile.arch
Normal file
@@ -0,0 +1,2 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
@@ -5,13 +5,18 @@
|
||||
* Control CPU.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
* 2015/02/26: bgerofi - set pstate, turbo mode and power/perf bias MSRs
|
||||
* 2015/02/12: Dave - enable AVX if supported
|
||||
*/
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/mm.h>
|
||||
#include <types.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
@@ -22,6 +27,7 @@
|
||||
#include <march.h>
|
||||
#include <signal.h>
|
||||
#include <process.h>
|
||||
#include <cls.h>
|
||||
|
||||
#define LAPIC_ID 0x020
|
||||
#define LAPIC_TIMER 0x320
|
||||
@@ -49,7 +55,7 @@
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
|
||||
@@ -106,7 +112,12 @@ void reload_idt(void)
|
||||
}
|
||||
|
||||
static struct list_head handlers[256 - 32];
|
||||
extern char nmi[];
|
||||
extern char page_fault[], general_protection_exception[];
|
||||
extern char debug_exception[], int3_exception[];
|
||||
|
||||
uint64_t boot_pat_state = 0;
|
||||
int no_turbo = 0; /* May be updated by early parsing of kargs */
|
||||
|
||||
static void init_idt(void)
|
||||
{
|
||||
@@ -122,15 +133,20 @@ static void init_idt(void)
|
||||
set_idt_entry(i, generic_common_handlers[i]);
|
||||
}
|
||||
|
||||
set_idt_entry(2, (uintptr_t)nmi);
|
||||
set_idt_entry(13, (unsigned long)general_protection_exception);
|
||||
set_idt_entry(14, (unsigned long)page_fault);
|
||||
|
||||
set_idt_entry_trap_gate(1, (unsigned long)debug_exception);
|
||||
set_idt_entry_trap_gate(3, (unsigned long)int3_exception);
|
||||
|
||||
reload_idt();
|
||||
}
|
||||
|
||||
void init_fpu(void)
|
||||
{
|
||||
unsigned long reg;
|
||||
unsigned long cpuid01_ecx;
|
||||
|
||||
asm volatile("movq %%cr0, %0" : "=r"(reg));
|
||||
/* Unset EM and TS flag. */
|
||||
@@ -140,10 +156,36 @@ void init_fpu(void)
|
||||
asm volatile("movq %0, %%cr0" : : "r"(reg));
|
||||
|
||||
#ifdef ENABLE_SSE
|
||||
asm volatile("cpuid" : "=c" (cpuid01_ecx) : "a" (0x1) : "%rbx", "%rdx");
|
||||
asm volatile("movq %%cr4, %0" : "=r"(reg));
|
||||
/* Set OSFXSR flag. */
|
||||
reg |= (1 << 9);
|
||||
/* Cr4 flags:
|
||||
OSFXSR[b9] - enables SSE instructions
|
||||
OSXMMEXCPT[b10] - generate SIMD FP exception instead of invalid op
|
||||
OSXSAVE[b18] - enables access to xcr0
|
||||
|
||||
CPUID.01H:ECX flags:
|
||||
XSAVE[b26] - verify existence of extended crs/XSAVE
|
||||
AVX[b28] - verify existence of AVX instructions
|
||||
*/
|
||||
reg |= ((1 << 9) | (1 << 10));
|
||||
if(cpuid01_ecx & (1 << 26)) {
|
||||
/* XSAVE set, enable access to xcr0 */
|
||||
reg |= (1 << 18);
|
||||
}
|
||||
asm volatile("movq %0, %%cr4" : : "r"(reg));
|
||||
|
||||
kprintf("init_fpu(): SSE init: CR4 = 0x%016lX; ", reg);
|
||||
|
||||
/* Set xcr0[2:1] to enable avx ops */
|
||||
if(cpuid01_ecx & (1 << 28)) {
|
||||
reg = xgetbv(0);
|
||||
reg |= 0x6;
|
||||
xsetbv(0, reg);
|
||||
}
|
||||
|
||||
kprintf("XCR0 = 0x%016lX\n", reg);
|
||||
#else
|
||||
kprintf("init_fpu(): SSE not enabled\n");
|
||||
#endif
|
||||
|
||||
asm volatile("finit");
|
||||
@@ -203,6 +245,153 @@ void lapic_icr_write(unsigned int h, unsigned int l)
|
||||
lapic_write(LAPIC_ICR0, l);
|
||||
}
|
||||
|
||||
|
||||
void print_msr(int idx)
|
||||
{
|
||||
int bit;
|
||||
unsigned long long val;
|
||||
|
||||
val = rdmsr(idx);
|
||||
|
||||
__kprintf("MSR 0x%x val (dec): %llu\n", idx, val);
|
||||
__kprintf("MSR 0x%x val (hex): 0x%llx\n", idx, val);
|
||||
|
||||
__kprintf(" ");
|
||||
for (bit = 63; bit >= 0; --bit) {
|
||||
__kprintf("%3d", bit);
|
||||
}
|
||||
__kprintf("\n");
|
||||
|
||||
__kprintf("MSR 0x%x val (bin):", idx);
|
||||
for (bit = 63; bit >= 0; --bit) {
|
||||
__kprintf("%3d", (val & ((unsigned long)1 << bit)) ? 1 : 0);
|
||||
}
|
||||
__kprintf("\n");
|
||||
}
|
||||
|
||||
|
||||
void init_pstate_and_turbo(void)
|
||||
{
|
||||
uint64_t value;
|
||||
uint64_t eax, ecx;
|
||||
|
||||
asm volatile("cpuid" : "=a" (eax), "=c" (ecx) : "a" (0x6) : "%rbx", "%rdx");
|
||||
|
||||
/* Query and set max pstate value:
|
||||
*
|
||||
* IA32_PERF_CTL (0x199H) bit 15:0:
|
||||
* Target performance State Value
|
||||
*
|
||||
* The base operating ratio can be read
|
||||
* from MSR_PLATFORM_INFO[15:8].
|
||||
*/
|
||||
value = rdmsr(MSR_PLATFORM_INFO);
|
||||
value &= 0xFF00;
|
||||
|
||||
/* Turbo boost setting:
|
||||
* Bit 1 of EAX in Leaf 06H (i.e. CPUID.06H:EAX[1]) indicates opportunistic
|
||||
* processor performance operation, such as IDA, has been enabled by BIOS.
|
||||
*
|
||||
* IA32_PERF_CTL (0x199H) bit 32: IDA (i.e., turbo boost) Engage. (R/W)
|
||||
* When set to 1: disengages IDA
|
||||
* When set to 0: enables IDA
|
||||
*/
|
||||
if ((eax & (1 << 1))) {
|
||||
if (!no_turbo) {
|
||||
uint64_t turbo_value;
|
||||
|
||||
turbo_value = rdmsr(MSR_NHM_TURBO_RATIO_LIMIT);
|
||||
turbo_value &= 0xFF;
|
||||
value = turbo_value << 8;
|
||||
|
||||
/* Enable turbo boost */
|
||||
value &= ~((uint64_t)1 << 32);
|
||||
}
|
||||
/* Turbo boost feature is supported, but requested to be turned off */
|
||||
else {
|
||||
/* Disable turbo boost */
|
||||
value |= (uint64_t)1 << 32;
|
||||
}
|
||||
}
|
||||
|
||||
wrmsr(MSR_IA32_PERF_CTL, value);
|
||||
|
||||
/* IA32_ENERGY_PERF_BIAS (0x1B0H) bit 3:0:
|
||||
* (The processor supports this capability if CPUID.06H:ECX.SETBH[bit 3] is set.)
|
||||
* Power Policy Preference:
|
||||
* 0 indicates preference to highest performance.
|
||||
* 15 indicates preference to maximize energy saving.
|
||||
*
|
||||
* Set energy/perf bias to high performance
|
||||
*/
|
||||
if (ecx & (1 << 3)) {
|
||||
wrmsr(MSR_IA32_ENERGY_PERF_BIAS, 0);
|
||||
}
|
||||
|
||||
//print_msr(MSR_IA32_MISC_ENABLE);
|
||||
//print_msr(MSR_IA32_PERF_CTL);
|
||||
//print_msr(MSR_IA32_ENERGY_PERF_BIAS);
|
||||
}
|
||||
|
||||
enum {
|
||||
PAT_UC = 0, /* uncached */
|
||||
PAT_WC = 1, /* Write combining */
|
||||
PAT_WT = 4, /* Write Through */
|
||||
PAT_WP = 5, /* Write Protected */
|
||||
PAT_WB = 6, /* Write Back (default) */
|
||||
PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
|
||||
};
|
||||
|
||||
#define PAT(x, y) ((uint64_t)PAT_ ## y << ((x)*8))
|
||||
|
||||
void init_pat(void)
|
||||
{
|
||||
uint64_t pat;
|
||||
uint64_t edx;
|
||||
|
||||
/*
|
||||
* An operating system or executive can detect the availability of the
|
||||
* PAT by executing the CPUID instruction with a value of 1 in the EAX
|
||||
* register. Support for the PAT is indicated by the PAT flag (bit 16
|
||||
* of the values returned to EDX register). If the PAT is supported,
|
||||
* the operating system or executive can use the IA32_PAT MSR to program
|
||||
* the PAT. When memory types have been assigned to entries in the PAT,
|
||||
* software can then use of the PAT-index bit (PAT) in the page-table and
|
||||
* page-directory entries along with the PCD and PWT bits to assign memory
|
||||
* types from the PAT to individual pages.
|
||||
*/
|
||||
|
||||
asm volatile("cpuid" : "=d" (edx) : "a" (0x1) : "%rbx", "%rcx");
|
||||
if (!(edx & ((uint64_t)1 << 16))) {
|
||||
kprintf("PAT not supported.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set PWT to Write-Combining. All other bits stay the same */
|
||||
/* (Based on Linux' settings)
|
||||
*
|
||||
* PTE encoding used in Linux:
|
||||
* PAT
|
||||
* |PCD
|
||||
* ||PWT
|
||||
* |||
|
||||
* 000 WB _PAGE_CACHE_WB
|
||||
* 001 WC _PAGE_CACHE_WC
|
||||
* 010 UC- _PAGE_CACHE_UC_MINUS
|
||||
* 011 UC _PAGE_CACHE_UC
|
||||
* PAT bit unused
|
||||
*/
|
||||
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
|
||||
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
|
||||
|
||||
/* Boot CPU check */
|
||||
if (!boot_pat_state)
|
||||
boot_pat_state = rdmsr(MSR_IA32_CR_PAT);
|
||||
|
||||
wrmsr(MSR_IA32_CR_PAT, pat);
|
||||
kprintf("PAT support detected and reconfigured.\n");
|
||||
}
|
||||
|
||||
void init_lapic(void)
|
||||
{
|
||||
unsigned long baseaddr;
|
||||
@@ -262,16 +451,23 @@ static void init_smp_processor(void)
|
||||
|
||||
static char *trampoline_va, *first_page_va;
|
||||
|
||||
/*@
|
||||
@ assigns torampoline_va;
|
||||
@ assigns first_page_va;
|
||||
@*/
|
||||
void ihk_mc_init_ap(void)
|
||||
{
|
||||
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
|
||||
|
||||
trampoline_va = map_fixed_area(AP_TRAMPOLINE, AP_TRAMPOLINE_SIZE,
|
||||
0);
|
||||
trampoline_va = map_fixed_area(ap_trampoline, AP_TRAMPOLINE_SIZE, 0);
|
||||
kprintf("Trampoline area: 0x%lx \n", ap_trampoline);
|
||||
first_page_va = map_fixed_area(0, PAGE_SIZE, 0);
|
||||
|
||||
kprintf("# of cpus : %d\n", cpu_info->ncpus);
|
||||
init_processors_local(cpu_info->ncpus);
|
||||
|
||||
kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n",
|
||||
ihk_ikc_irq, ihk_ikc_irq_apicid);
|
||||
|
||||
/* Do initialization for THIS cpu (BSP) */
|
||||
assign_processor_id();
|
||||
@@ -355,6 +551,8 @@ void init_cpu(void)
|
||||
init_lapic();
|
||||
init_syscall();
|
||||
x86_init_perfctr();
|
||||
init_pstate_and_turbo();
|
||||
init_pat();
|
||||
}
|
||||
|
||||
void setup_x86(void)
|
||||
@@ -409,29 +607,63 @@ void set_signal(int sig, void *regs, struct siginfo *info);
|
||||
void check_signal(unsigned long rc, void *regs);
|
||||
extern void tlb_flush_handler(int vector);
|
||||
|
||||
void handle_interrupt(int vector, struct x86_regs *regs)
|
||||
void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
{
|
||||
struct ihk_mc_interrupt_handler *h;
|
||||
|
||||
lapic_ack();
|
||||
|
||||
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
|
||||
ihk_mc_get_processor_id(), vector, regs->rip);
|
||||
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
|
||||
|
||||
if (vector < 0 || vector > 255) {
|
||||
panic("Invalid interrupt vector.");
|
||||
}
|
||||
else if (vector < 32) {
|
||||
if (vector == 8 ||
|
||||
(vector >= 10 && vector <= 15) || vector == 17) {
|
||||
struct siginfo info;
|
||||
switch(vector){
|
||||
case 0:
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = SIGFPE;
|
||||
info.si_code = FPE_INTDIV;
|
||||
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
|
||||
set_signal(SIGFPE, regs, &info);
|
||||
break;
|
||||
case 9:
|
||||
case 16:
|
||||
case 19:
|
||||
set_signal(SIGFPE, regs, NULL);
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
break;
|
||||
case 6:
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = SIGILL;
|
||||
info.si_code = ILL_ILLOPN;
|
||||
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
|
||||
set_signal(SIGILL, regs, &info);
|
||||
break;
|
||||
case 10:
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
break;
|
||||
case 11:
|
||||
case 12:
|
||||
set_signal(SIGBUS, regs, NULL);
|
||||
break;
|
||||
case 17:
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = SIGBUS;
|
||||
info.si_code = BUS_ADRALN;
|
||||
set_signal(SIGBUS, regs, &info);
|
||||
break;
|
||||
default:
|
||||
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
|
||||
vector, regs->rflags, regs->cs, regs->rip);
|
||||
} else {
|
||||
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
|
||||
vector, regs->rflags, regs->cs, regs->rip);
|
||||
vector, regs->gpr.rflags, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
panic("Unhandled exception");
|
||||
}
|
||||
arch_show_interrupt_context(regs);
|
||||
panic("Unhandled exception");
|
||||
}
|
||||
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
|
||||
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
|
||||
@@ -450,20 +682,64 @@ void handle_interrupt(int vector, struct x86_regs *regs)
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
void gpe_handler(struct x86_regs *regs)
|
||||
void gpe_handler(struct x86_user_context *regs)
|
||||
{
|
||||
struct siginfo info;
|
||||
|
||||
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
|
||||
regs->error, regs->cs, regs->rip);
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
memset(&info, '\0', sizeof info);
|
||||
set_signal(SIGILL, regs, &info);
|
||||
if ((regs->gpr.cs & 3) == 0) {
|
||||
panic("gpe_handler");
|
||||
}
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
check_signal(0, regs);
|
||||
check_need_resched();
|
||||
// panic("GPF");
|
||||
}
|
||||
|
||||
void debug_handler(struct x86_user_context *regs)
|
||||
{
|
||||
unsigned long db6;
|
||||
int si_code = 0;
|
||||
struct siginfo info;
|
||||
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
kprintf("debug exception (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
#endif
|
||||
|
||||
asm("mov %%db6, %0" :"=r" (db6));
|
||||
if (db6 & DB6_BS) {
|
||||
regs->gpr.rflags &= ~RFLAGS_TF;
|
||||
si_code = TRAP_TRACE;
|
||||
} else if (db6 & (DB6_B3|DB6_B2|DB6_B1|DB6_B0)) {
|
||||
si_code = TRAP_HWBKPT;
|
||||
}
|
||||
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = si_code;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs);
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
void int3_handler(struct x86_user_context *regs)
|
||||
{
|
||||
struct siginfo info;
|
||||
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
#endif
|
||||
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = TRAP_BRKPT;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs);
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
void x86_issue_ipi(unsigned int apicid, unsigned int low)
|
||||
{
|
||||
lapic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
|
||||
@@ -524,31 +800,65 @@ void cpu_halt(void)
|
||||
asm volatile("hlt");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@*/
|
||||
void cpu_safe_halt(void)
|
||||
{
|
||||
asm volatile("sti; hlt");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@*/
|
||||
void cpu_enable_interrupt(void)
|
||||
{
|
||||
asm volatile("sti");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@*/
|
||||
void cpu_disable_interrupt(void)
|
||||
{
|
||||
asm volatile("cli");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ behavior to_enabled:
|
||||
@ assumes flags & RFLAGS_IF;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@ behavior to_disabled:
|
||||
@ assumes !(flags & RFLAGS_IF);
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@*/
|
||||
void cpu_restore_interrupt(unsigned long flags)
|
||||
{
|
||||
asm volatile("push %0; popf" : : "g"(flags) : "memory", "cc");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void cpu_pause(void)
|
||||
{
|
||||
asm volatile("pause" ::: "memory");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@ behavior from_enabled:
|
||||
@ assumes \interrupt_disabled == 0;
|
||||
@ ensures \result & RFLAGS_IF;
|
||||
@ behavior from_disabled:
|
||||
@ assumes \interrupt_disabled > 0;
|
||||
@ ensures !(\result & RFLAGS_IF);
|
||||
@*/
|
||||
unsigned long cpu_disable_interrupt_save(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -558,6 +868,17 @@ unsigned long cpu_disable_interrupt_save(void)
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior valid_vector:
|
||||
@ assumes 32 <= vector <= 255;
|
||||
@ requires \valid(h);
|
||||
@ assigns handlers[vector-32];
|
||||
@ ensures \result == 0;
|
||||
@ behavior invalid_vector:
|
||||
@ assumes (vector < 32) || (255 < vector);
|
||||
@ assigns \nothing;
|
||||
@ ensures \result == -EINVAL;
|
||||
@*/
|
||||
int ihk_mc_register_interrupt_handler(int vector,
|
||||
struct ihk_mc_interrupt_handler *h)
|
||||
{
|
||||
@@ -579,6 +900,11 @@ int ihk_mc_unregister_interrupt_handler(int vector,
|
||||
|
||||
extern unsigned long __page_fault_handler_address;
|
||||
|
||||
/*@
|
||||
@ requires \valid(h);
|
||||
@ assigns __page_fault_handler_address;
|
||||
@ ensures __page_fault_handler_address == h;
|
||||
@*/
|
||||
void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *))
|
||||
{
|
||||
__page_fault_handler_address = (unsigned long)h;
|
||||
@@ -588,6 +914,18 @@ extern char trampoline_code_data[], trampoline_code_data_end[];
|
||||
struct page_table *get_init_page_table(void);
|
||||
unsigned long get_transit_page_table(void);
|
||||
|
||||
/* reusable, but not reentrant */
|
||||
/*@
|
||||
@ requires \valid_apicid(cpuid); // valid APIC ID or not
|
||||
@ requires \valid(pc);
|
||||
@ requires \valid(trampoline_va);
|
||||
@ requires \valid(trampoline_code_data
|
||||
@ +(0..(trampoline_code_data_end - trampoline_code_data)));
|
||||
@ requires \valid_physical(ap_trampoline); // valid physical address or not
|
||||
@ assigns (char *)trampoline_va+(0..trampoline_code_data_end - trampoline_code_data);
|
||||
@ assigns cpu_boot_status;
|
||||
@ ensures cpu_boot_status != 0;
|
||||
@*/
|
||||
void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
|
||||
{
|
||||
unsigned long *p;
|
||||
@@ -607,7 +945,7 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
|
||||
|
||||
cpu_boot_status = 0;
|
||||
|
||||
__x86_wakeup(cpuid, AP_TRAMPOLINE);
|
||||
__x86_wakeup(cpuid, ap_trampoline);
|
||||
|
||||
/* XXX: Time out */
|
||||
while (!cpu_boot_status) {
|
||||
@@ -615,6 +953,11 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(new_ctx);
|
||||
@ requires (stack_pointer == NULL) || \valid((unsigned long *)stack_pointer-1);
|
||||
@ requires \valid(next_function);
|
||||
@*/
|
||||
void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
|
||||
void *stack_pointer, void (*next_function)(void))
|
||||
{
|
||||
@@ -634,6 +977,15 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
|
||||
|
||||
extern char enter_user_mode[];
|
||||
|
||||
/*@
|
||||
@ requires \valid(ctx);
|
||||
@ requires \valid(puctx);
|
||||
@ requires \valid((ihk_mc_user_context_t *)stack_pointer-1);
|
||||
@ requires \valid_user(new_pc); // valid user space address or not
|
||||
@ requires \valid_user(user_sp-1);
|
||||
@ assigns *((ihk_mc_user_context_t *)stack_pointer-1);
|
||||
@ assigns ctx->rsp0;
|
||||
@*/
|
||||
void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
|
||||
ihk_mc_user_context_t **puctx,
|
||||
void *stack_pointer, unsigned long new_pc,
|
||||
@@ -649,49 +1001,95 @@ void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
|
||||
*puctx = uctx;
|
||||
|
||||
memset(uctx, 0, sizeof(ihk_mc_user_context_t));
|
||||
uctx->cs = USER_CS;
|
||||
uctx->rip = new_pc;
|
||||
uctx->ss = USER_DS;
|
||||
uctx->rsp = user_sp;
|
||||
uctx->rflags = RFLAGS_IF;
|
||||
uctx->gpr.cs = USER_CS;
|
||||
uctx->gpr.rip = new_pc;
|
||||
uctx->gpr.ss = USER_DS;
|
||||
uctx->gpr.rsp = user_sp;
|
||||
uctx->gpr.rflags = RFLAGS_IF;
|
||||
uctx->is_gpr_valid = 1;
|
||||
|
||||
ihk_mc_init_context(ctx, sp, (void (*)(void))enter_user_mode);
|
||||
ctx->rsp0 = (unsigned long)stack_pointer;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior rsp:
|
||||
@ assumes reg == IHK_UCR_STACK_POINTER;
|
||||
@ requires \valid(uctx);
|
||||
@ assigns uctx->gpr.rsp;
|
||||
@ ensures uctx->gpr.rsp == value;
|
||||
@ behavior rip:
|
||||
@ assumes reg == IHK_UCR_PROGRAM_COUNTER;
|
||||
@ requires \valid(uctx);
|
||||
@ assigns uctx->gpr.rip;
|
||||
@ ensures uctx->gpr.rip == value;
|
||||
@*/
|
||||
void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx,
|
||||
enum ihk_mc_user_context_regtype reg,
|
||||
unsigned long value)
|
||||
{
|
||||
if (reg == IHK_UCR_STACK_POINTER) {
|
||||
uctx->rsp = value;
|
||||
uctx->gpr.rsp = value;
|
||||
} else if (reg == IHK_UCR_PROGRAM_COUNTER) {
|
||||
uctx->rip = value;
|
||||
uctx->gpr.rip = value;
|
||||
}
|
||||
}
|
||||
|
||||
void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx)
|
||||
{
|
||||
kprintf("CS:RIP = %04lx:%16lx\n", uctx->cs, uctx->rip);
|
||||
kprintf("CS:RIP = %04lx:%16lx\n", uctx->gpr.cs, uctx->gpr.rip);
|
||||
kprintf("%16lx %16lx %16lx %16lx\n%16lx %16lx %16lx\n",
|
||||
uctx->rax, uctx->rbx, uctx->rcx, uctx->rdx,
|
||||
uctx->rsi, uctx->rdi, uctx->rsp);
|
||||
uctx->gpr.rax, uctx->gpr.rbx, uctx->gpr.rcx, uctx->gpr.rdx,
|
||||
uctx->gpr.rsi, uctx->gpr.rdi, uctx->gpr.rsp);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(handler);
|
||||
@ assigns __x86_syscall_handler;
|
||||
@ ensures __x86_syscall_handler == handler;
|
||||
@*/
|
||||
void ihk_mc_set_syscall_handler(long (*handler)(int, ihk_mc_user_context_t *))
|
||||
{
|
||||
__x86_syscall_handler = handler;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void ihk_mc_delay_us(int us)
|
||||
{
|
||||
arch_delay(us);
|
||||
}
|
||||
|
||||
#define EXTENDED_ARCH_SHOW_CONTEXT
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
void arch_show_extended_context(void)
|
||||
{
|
||||
unsigned long cr0, cr4, msr, xcr0;
|
||||
|
||||
/* Read and print CRs, MSR_EFER, XCR0 */
|
||||
asm volatile("movq %%cr0, %0" : "=r"(cr0));
|
||||
asm volatile("movq %%cr4, %0" : "=r"(cr4));
|
||||
msr = rdmsr(MSR_EFER);
|
||||
xcr0 = xgetbv(0);
|
||||
|
||||
__kprintf("\n CR0 CR4\n");
|
||||
__kprintf("%016lX %016lX\n", cr0, cr4);
|
||||
|
||||
__kprintf(" MSR_EFER\n");
|
||||
__kprintf("%016lX\n", msr);
|
||||
|
||||
__kprintf(" XCR0\n");
|
||||
__kprintf("%016lX\n", xcr0);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
void arch_show_interrupt_context(const void *reg)
|
||||
{
|
||||
const struct x86_regs *regs = reg;
|
||||
int irqflags;
|
||||
const struct x86_user_context *uctx = reg;
|
||||
const struct x86_basic_regs *regs = &uctx->gpr;
|
||||
unsigned long irqflags;
|
||||
|
||||
irqflags = kprintf_lock();
|
||||
|
||||
@@ -711,10 +1109,22 @@ void arch_show_interrupt_context(const void *reg)
|
||||
__kprintf(" CS SS RFLAGS ERROR\n");
|
||||
__kprintf("%16lx %16lx %16lx %16lx\n",
|
||||
regs->cs, regs->ss, regs->rflags, regs->error);
|
||||
|
||||
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
arch_show_extended_context();
|
||||
#endif
|
||||
|
||||
kprintf_unlock(irqflags);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior fs_base:
|
||||
@ assumes type == IHK_ASR_X86_FS;
|
||||
@ ensures \result == 0;
|
||||
@ behavior invaiid_type:
|
||||
@ assumes type != IHK_ASR_X86_FS;
|
||||
@ ensures \result == -EINVAL;
|
||||
@*/
|
||||
int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
|
||||
unsigned long value)
|
||||
{
|
||||
@@ -728,6 +1138,15 @@ int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior fs_base:
|
||||
@ assumes type == IHK_ASR_X86_FS;
|
||||
@ requires \valid(value);
|
||||
@ ensures \result == 0;
|
||||
@ behavior invalid_type:
|
||||
@ assumes type != IHK_ASR_X86_FS;
|
||||
@ ensures \result == -EINVAL;
|
||||
@*/
|
||||
int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
|
||||
unsigned long *value)
|
||||
{
|
||||
@@ -741,6 +1160,10 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid_apicid(cpu); // valid APIC ID or not
|
||||
@ ensures \result == 0
|
||||
@*/
|
||||
int ihk_mc_interrupt_cpu(int cpu, int vector)
|
||||
{
|
||||
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
|
||||
@@ -749,3 +1172,68 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
|
||||
x86_issue_ipi(cpu, vector);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(proc);
|
||||
@ ensures proc->fp_regs == NULL;
|
||||
@*/
|
||||
void
|
||||
release_fp_regs(struct process *proc)
|
||||
{
|
||||
int pages;
|
||||
|
||||
if (!proc->fp_regs)
|
||||
return;
|
||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
||||
ihk_mc_free_pages(proc->fp_regs, 1);
|
||||
proc->fp_regs = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
save_fp_regs(struct process *proc)
|
||||
{
|
||||
int pages;
|
||||
|
||||
if (proc->fp_regs)
|
||||
return;
|
||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
||||
proc->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
|
||||
if(!proc->fp_regs)
|
||||
return;
|
||||
memset(proc->fp_regs, 0, sizeof(fp_regs_struct));
|
||||
// TODO: do xsave
|
||||
}
|
||||
|
||||
void
|
||||
restore_fp_regs(struct process *proc)
|
||||
{
|
||||
if (!proc->fp_regs)
|
||||
return;
|
||||
// TODO: do xrstor
|
||||
release_fp_regs(proc);
|
||||
}
|
||||
|
||||
ihk_mc_user_context_t *lookup_user_context(struct process *proc)
|
||||
{
|
||||
ihk_mc_user_context_t *uctx = proc->uctx;
|
||||
|
||||
if ((!(proc->ftn->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE
|
||||
| PS_STOPPED | PS_TRACED))
|
||||
&& (proc != cpu_local_var(current)))
|
||||
|| !uctx->is_gpr_valid) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!uctx->is_sr_valid) {
|
||||
uctx->sr.fs_base = proc->thread.tlsblock_base;
|
||||
uctx->sr.gs_base = 0;
|
||||
uctx->sr.ds = 0;
|
||||
uctx->sr.es = 0;
|
||||
uctx->sr.fs = 0;
|
||||
uctx->sr.gs = 0;
|
||||
|
||||
uctx->is_sr_valid = 1;
|
||||
}
|
||||
|
||||
return uctx;
|
||||
} /* lookup_user_context() */
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
@@ -86,7 +86,8 @@ void fill_prstatus(struct note *head, struct process *proc, void *regs0)
|
||||
{
|
||||
void *name;
|
||||
struct elf_prstatus64 *prstatus;
|
||||
struct x86_regs *regs = regs0;
|
||||
struct x86_user_context *uctx = regs0;
|
||||
struct x86_basic_regs *regs = &uctx->gpr;
|
||||
register unsigned long _r12 asm("r12");
|
||||
register unsigned long _r13 asm("r13");
|
||||
register unsigned long _r14 asm("r14");
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
* Define and declare memory management macros and functions
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -117,6 +119,25 @@
|
||||
#define PTE_NULL ((pte_t)0)
|
||||
typedef unsigned long pte_t;
|
||||
|
||||
/*
|
||||
* pagemap kernel ABI bits
|
||||
*/
|
||||
#define PM_ENTRY_BYTES sizeof(uint64_t)
|
||||
#define PM_STATUS_BITS 3
|
||||
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
|
||||
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
|
||||
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
|
||||
#define PM_PSHIFT_BITS 6
|
||||
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
|
||||
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
|
||||
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
|
||||
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
|
||||
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
|
||||
|
||||
#define PM_PRESENT PM_STATUS(4LL)
|
||||
#define PM_SWAP PM_STATUS(2LL)
|
||||
|
||||
|
||||
/* For easy conversion, it is better to be the same as architecture's ones */
|
||||
enum ihk_mc_pt_attribute {
|
||||
PTATTR_ACTIVE = 0x01,
|
||||
@@ -128,6 +149,7 @@ enum ihk_mc_pt_attribute {
|
||||
PTATTR_NO_EXECUTE = 0x8000000000000000,
|
||||
PTATTR_UNCACHABLE = 0x10000,
|
||||
PTATTR_FOR_USER = 0x20000,
|
||||
PTATTR_WRITE_COMBINED = 0x40000,
|
||||
};
|
||||
|
||||
static inline int pte_is_null(pte_t *ptep)
|
||||
@@ -185,6 +207,12 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
|
||||
return (off_t)(*ptep & PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
*ptep = PTE_NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_make_fileoff(off_t off,
|
||||
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
|
||||
{
|
||||
@@ -216,6 +244,36 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
|
||||
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
|
||||
#endif
|
||||
|
||||
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
struct page_table;
|
||||
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
|
||||
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
|
||||
@@ -227,8 +285,9 @@ void flush_tlb_single(unsigned long addr);
|
||||
|
||||
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
|
||||
|
||||
#define AP_TRAMPOLINE 0x10000
|
||||
#define AP_TRAMPOLINE_SIZE 0x4000
|
||||
extern unsigned long ap_trampoline;
|
||||
//#define AP_TRAMPOLINE 0x10000
|
||||
#define AP_TRAMPOLINE_SIZE 0x2000
|
||||
|
||||
/* Local is cachable */
|
||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)
|
||||
|
||||
36
arch/x86/kernel/include/arch/mman.h
Normal file
36
arch/x86/kernel/include/arch/mman.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/**
|
||||
* \file mman.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* memory management declarations
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_ARCH_MMAN_H
|
||||
#define HEADER_ARCH_MMAN_H
|
||||
|
||||
/*
|
||||
* mapping flags
|
||||
*/
|
||||
#define MAP_32BIT 0x40
|
||||
#define MAP_GROWSDOWN 0x0100
|
||||
#define MAP_DENYWRITE 0x0800
|
||||
#define MAP_EXECUTABLE 0x1000
|
||||
#define MAP_LOCKED 0x2000
|
||||
#define MAP_NORESERVE 0x4000
|
||||
#define MAP_POPULATE 0x8000
|
||||
#define MAP_NONBLOCK 0x00010000
|
||||
#define MAP_STACK 0x00020000
|
||||
#define MAP_HUGETLB 0x00040000
|
||||
|
||||
/*
|
||||
* for mlockall()
|
||||
*/
|
||||
#define MCL_CURRENT 0x01
|
||||
#define MCL_FUTURE 0x02
|
||||
|
||||
#endif /* HEADER_ARCH_MMAN_H */
|
||||
40
arch/x86/kernel/include/arch/shm.h
Normal file
40
arch/x86/kernel/include/arch/shm.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* \file shm.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* header file for System V shared memory
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_ARCH_SHM_H
|
||||
#define HEADER_ARCH_SHM_H
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t cuid;
|
||||
gid_t cgid;
|
||||
uint16_t mode;
|
||||
uint8_t padding[2];
|
||||
uint16_t seq;
|
||||
uint8_t padding2[22];
|
||||
};
|
||||
|
||||
struct shmid_ds {
|
||||
struct ipc_perm shm_perm;
|
||||
size_t shm_segsz;
|
||||
time_t shm_atime;
|
||||
time_t shm_dtime;
|
||||
time_t shm_ctime;
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
uint64_t shm_nattch;
|
||||
uint8_t padding[16];
|
||||
};
|
||||
|
||||
#endif /* HEADER_ARCH_SHM_H */
|
||||
@@ -42,7 +42,10 @@ struct x86_cpu_local_variables {
|
||||
uint64_t gdt[10];
|
||||
/* 128 */
|
||||
struct tss64 tss;
|
||||
|
||||
/* 232 */
|
||||
unsigned long paniced;
|
||||
uint64_t panic_regs[21];
|
||||
/* 408 */
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
|
||||
|
||||
@@ -22,19 +22,35 @@ struct x86_kregs {
|
||||
};
|
||||
|
||||
typedef struct x86_kregs ihk_mc_kernel_context_t;
|
||||
|
||||
/* XXX: User context should contain floating point registers */
|
||||
typedef struct x86_regs ihk_mc_user_context_t;
|
||||
struct x86_user_context {
|
||||
struct x86_sregs sr;
|
||||
|
||||
#define ihk_mc_syscall_arg0(uc) (uc)->rdi
|
||||
#define ihk_mc_syscall_arg1(uc) (uc)->rsi
|
||||
#define ihk_mc_syscall_arg2(uc) (uc)->rdx
|
||||
#define ihk_mc_syscall_arg3(uc) (uc)->r10
|
||||
#define ihk_mc_syscall_arg4(uc) (uc)->r8
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->r9
|
||||
/* 16-byte boundary here */
|
||||
uint8_t is_gpr_valid;
|
||||
uint8_t is_sr_valid;
|
||||
uint8_t spare_flags6;
|
||||
uint8_t spare_flags5;
|
||||
uint8_t spare_flags4;
|
||||
uint8_t spare_flags3;
|
||||
uint8_t spare_flags2;
|
||||
uint8_t spare_flags1;
|
||||
struct x86_basic_regs gpr; /* must be last */
|
||||
/* 16-byte boundary here */
|
||||
};
|
||||
typedef struct x86_user_context ihk_mc_user_context_t;
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->rax
|
||||
#define ihk_mc_syscall_arg0(uc) (uc)->gpr.rdi
|
||||
#define ihk_mc_syscall_arg1(uc) (uc)->gpr.rsi
|
||||
#define ihk_mc_syscall_arg2(uc) (uc)->gpr.rdx
|
||||
#define ihk_mc_syscall_arg3(uc) (uc)->gpr.r10
|
||||
#define ihk_mc_syscall_arg4(uc) (uc)->gpr.r8
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->rsp
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
* Machine Specific Registers (MSR)
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -16,7 +18,31 @@
|
||||
|
||||
#include <types.h>
|
||||
|
||||
#define RFLAGS_CF (1 << 0)
|
||||
#define RFLAGS_PF (1 << 2)
|
||||
#define RFLAGS_AF (1 << 4)
|
||||
#define RFLAGS_ZF (1 << 6)
|
||||
#define RFLAGS_SF (1 << 7)
|
||||
#define RFLAGS_TF (1 << 8)
|
||||
#define RFLAGS_IF (1 << 9)
|
||||
#define RFLAGS_DF (1 << 10)
|
||||
#define RFLAGS_OF (1 << 11)
|
||||
#define RFLAGS_IOPL (3 << 12)
|
||||
#define RFLAGS_NT (1 << 14)
|
||||
#define RFLAGS_RF (1 << 16)
|
||||
#define RFLAGS_VM (1 << 17)
|
||||
#define RFLAGS_AC (1 << 18)
|
||||
#define RFLAGS_VIF (1 << 19)
|
||||
#define RFLAGS_VIP (1 << 20)
|
||||
#define RFLAGS_ID (1 << 21)
|
||||
|
||||
#define DB6_B0 (1 << 0)
|
||||
#define DB6_B1 (1 << 1)
|
||||
#define DB6_B2 (1 << 2)
|
||||
#define DB6_B3 (1 << 3)
|
||||
#define DB6_BD (1 << 13)
|
||||
#define DB6_BS (1 << 14)
|
||||
#define DB6_BT (1 << 15)
|
||||
|
||||
#define MSR_EFER 0xc0000080
|
||||
#define MSR_STAR 0xc0000081
|
||||
@@ -26,6 +52,13 @@
|
||||
#define MSR_GS_BASE 0xc0000101
|
||||
|
||||
#define MSR_IA32_APIC_BASE 0x000000001b
|
||||
#define MSR_PLATFORM_INFO 0x000000ce
|
||||
#define MSR_IA32_PERF_CTL 0x00000199
|
||||
#define MSR_IA32_MISC_ENABLE 0x000001a0
|
||||
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
|
||||
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
|
||||
#define MSR_IA32_CR_PAT 0x00000277
|
||||
|
||||
|
||||
#define CVAL(event, mask) \
|
||||
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
|
||||
@@ -37,6 +70,25 @@
|
||||
#define MSR_PERF_CTL_0 0xc0010000
|
||||
#define MSR_PERF_CTR_0 0xc0010004
|
||||
|
||||
static unsigned long xgetbv(unsigned int index)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (index));
|
||||
|
||||
return low | ((unsigned long)high << 32);
|
||||
}
|
||||
|
||||
static void xsetbv(unsigned int index, unsigned long val)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
low = val;
|
||||
high = val >> 32;
|
||||
|
||||
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
|
||||
}
|
||||
|
||||
static void wrmsr(unsigned int idx, unsigned long value){
|
||||
unsigned int high, low;
|
||||
|
||||
@@ -135,10 +187,19 @@ struct tss64 {
|
||||
unsigned short iomap_address;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_regs {
|
||||
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
|
||||
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
|
||||
unsigned long error, rip, cs, rflags, rsp, ss;
|
||||
struct x86_basic_regs {
|
||||
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
|
||||
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
|
||||
unsigned long rip, cs, rflags, rsp, ss;
|
||||
};
|
||||
|
||||
struct x86_sregs {
|
||||
unsigned long fs_base;
|
||||
unsigned long gs_base;
|
||||
unsigned long ds;
|
||||
unsigned long es;
|
||||
unsigned long fs;
|
||||
unsigned long gs;
|
||||
};
|
||||
|
||||
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
|
||||
@@ -162,7 +223,72 @@ enum x86_pf_error_code {
|
||||
PF_RSVD = 1 << 3,
|
||||
PF_INSTR = 1 << 4,
|
||||
|
||||
PF_PATCH = 1 << 29,
|
||||
PF_POPULATE = 1 << 30,
|
||||
};
|
||||
|
||||
struct i387_fxsave_struct {
|
||||
unsigned short cwd;
|
||||
unsigned short swd;
|
||||
unsigned short twd;
|
||||
unsigned short fop;
|
||||
union {
|
||||
struct {
|
||||
unsigned long rip;
|
||||
unsigned long rdp;
|
||||
};
|
||||
struct {
|
||||
unsigned int fip;
|
||||
unsigned int fcs;
|
||||
unsigned int foo;
|
||||
unsigned int fos;
|
||||
};
|
||||
};
|
||||
unsigned int mxcsr;
|
||||
unsigned int mxcsr_mask;
|
||||
unsigned int st_space[32];
|
||||
unsigned int xmm_space[64];
|
||||
unsigned int padding[12];
|
||||
union {
|
||||
unsigned int padding1[12];
|
||||
unsigned int sw_reserved[12];
|
||||
};
|
||||
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
struct ymmh_struct {
|
||||
unsigned int ymmh_space[64];
|
||||
};
|
||||
|
||||
struct lwp_struct {
|
||||
unsigned char reserved[128];
|
||||
};
|
||||
|
||||
struct bndreg {
|
||||
unsigned long lower_bound;
|
||||
unsigned long upper_bound;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bndcsr {
|
||||
unsigned long bndcfgu;
|
||||
unsigned long bndstatus;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct xsave_hdr_struct {
|
||||
unsigned long xstate_bv;
|
||||
unsigned long xcomp_bv;
|
||||
unsigned long reserved[6];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct xsave_struct {
|
||||
struct i387_fxsave_struct i387;
|
||||
struct xsave_hdr_struct xsave_hdr;
|
||||
struct ymmh_struct ymmh;
|
||||
struct lwp_struct lwp;
|
||||
struct bndreg bndreg[4];
|
||||
struct bndcsr bndcsr;
|
||||
} __attribute__ ((packed, aligned (64)));
|
||||
|
||||
typedef struct xsave_struct fp_regs_struct;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
SYSCALL_DELEGATED(0, read)
|
||||
SYSCALL_DELEGATED(1, write)
|
||||
SYSCALL_DELEGATED(2, open)
|
||||
SYSCALL_DELEGATED(3, close)
|
||||
SYSCALL_HANDLED(3, close)
|
||||
SYSCALL_DELEGATED(4, stat)
|
||||
SYSCALL_DELEGATED(5, fstat)
|
||||
SYSCALL_DELEGATED(8, lseek)
|
||||
@@ -41,7 +41,12 @@ SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
SYSCALL_HANDLED(25, mremap)
|
||||
SYSCALL_HANDLED(26, msync)
|
||||
SYSCALL_HANDLED(27, mincore)
|
||||
SYSCALL_HANDLED(28, madvise)
|
||||
SYSCALL_HANDLED(29, shmget)
|
||||
SYSCALL_HANDLED(30, shmat)
|
||||
SYSCALL_HANDLED(31, shmctl)
|
||||
SYSCALL_HANDLED(34, pause)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
SYSCALL_HANDLED(56, clone)
|
||||
@@ -52,27 +57,48 @@ SYSCALL_HANDLED(60, exit)
|
||||
SYSCALL_HANDLED(61, wait4)
|
||||
SYSCALL_HANDLED(62, kill)
|
||||
SYSCALL_DELEGATED(63, uname)
|
||||
SYSCALL_HANDLED(67, shmdt)
|
||||
SYSCALL_DELEGATED(72, fcntl)
|
||||
SYSCALL_DELEGATED(79, getcwd)
|
||||
SYSCALL_DELEGATED(89, readlink)
|
||||
SYSCALL_DELEGATED(96, gettimeofday)
|
||||
SYSCALL_HANDLED(97, getrlimit)
|
||||
SYSCALL_HANDLED(101, ptrace)
|
||||
SYSCALL_DELEGATED(102, getuid)
|
||||
SYSCALL_DELEGATED(104, getgid)
|
||||
SYSCALL_DELEGATED(107, geteuid)
|
||||
SYSCALL_DELEGATED(108, getegid)
|
||||
SYSCALL_HANDLED(102, getuid)
|
||||
SYSCALL_HANDLED(104, getgid)
|
||||
SYSCALL_HANDLED(105, setuid)
|
||||
SYSCALL_HANDLED(106, setgid)
|
||||
SYSCALL_HANDLED(107, geteuid)
|
||||
SYSCALL_HANDLED(108, getegid)
|
||||
SYSCALL_HANDLED(109, setpgid)
|
||||
SYSCALL_DELEGATED(110, getppid)
|
||||
SYSCALL_HANDLED(110, getppid)
|
||||
SYSCALL_DELEGATED(111, getpgrp)
|
||||
SYSCALL_HANDLED(113, setreuid)
|
||||
SYSCALL_HANDLED(114, setregid)
|
||||
SYSCALL_HANDLED(117, setresuid)
|
||||
SYSCALL_HANDLED(118, getresuid)
|
||||
SYSCALL_HANDLED(119, setresgid)
|
||||
SYSCALL_HANDLED(120, getresgid)
|
||||
SYSCALL_HANDLED(122, setfsuid)
|
||||
SYSCALL_HANDLED(123, setfsgid)
|
||||
SYSCALL_HANDLED(127, rt_sigpending)
|
||||
SYSCALL_HANDLED(128, rt_sigtimedwait)
|
||||
SYSCALL_HANDLED(129, rt_sigqueueinfo)
|
||||
SYSCALL_HANDLED(130, rt_sigsuspend)
|
||||
SYSCALL_HANDLED(131, sigaltstack)
|
||||
SYSCALL_HANDLED(142, sched_setparam)
|
||||
SYSCALL_HANDLED(143, sched_getparam)
|
||||
SYSCALL_HANDLED(144, sched_setscheduler)
|
||||
SYSCALL_HANDLED(145, sched_getscheduler)
|
||||
SYSCALL_HANDLED(146, sched_get_priority_max)
|
||||
SYSCALL_HANDLED(147, sched_get_priority_min)
|
||||
SYSCALL_HANDLED(148, sched_rr_get_interval)
|
||||
SYSCALL_HANDLED(149, mlock)
|
||||
SYSCALL_HANDLED(150, munlock)
|
||||
SYSCALL_HANDLED(151, mlockall)
|
||||
SYSCALL_HANDLED(152, munlockall)
|
||||
SYSCALL_HANDLED(158, arch_prctl)
|
||||
SYSCALL_HANDLED(160, setrlimit)
|
||||
SYSCALL_HANDLED(186, gettid)
|
||||
SYSCALL_DELEGATED(201, time)
|
||||
SYSCALL_HANDLED(202, futex)
|
||||
@@ -83,12 +109,21 @@ SYSCALL_DELEGATED(217, getdents64)
|
||||
SYSCALL_HANDLED(218, set_tid_address)
|
||||
SYSCALL_HANDLED(231, exit_group)
|
||||
SYSCALL_HANDLED(234, tgkill)
|
||||
SYSCALL_HANDLED(237, mbind)
|
||||
SYSCALL_HANDLED(238, set_mempolicy)
|
||||
SYSCALL_HANDLED(239, get_mempolicy)
|
||||
SYSCALL_HANDLED(247, waitid)
|
||||
SYSCALL_HANDLED(256, migrate_pages)
|
||||
SYSCALL_HANDLED(273, set_robust_list)
|
||||
SYSCALL_HANDLED(279, move_pages)
|
||||
SYSCALL_HANDLED(282, signalfd)
|
||||
SYSCALL_HANDLED(289, signalfd4)
|
||||
#ifdef DCFA_KMOD
|
||||
SYSCALL_HANDLED(303, mod_call)
|
||||
#endif
|
||||
SYSCALL_HANDLED(309, getcpu)
|
||||
SYSCALL_HANDLED(310, process_vm_readv)
|
||||
SYSCALL_HANDLED(311, process_vm_writev)
|
||||
SYSCALL_HANDLED(601, pmc_init)
|
||||
SYSCALL_HANDLED(602, pmc_start)
|
||||
SYSCALL_HANDLED(603, pmc_stop)
|
||||
|
||||
@@ -24,39 +24,56 @@
|
||||
#define USER_CS (48 + 3)
|
||||
#define USER_DS (56 + 3)
|
||||
|
||||
#define PUSH_ALL_REGS \
|
||||
pushq %rbp; \
|
||||
pushq %rax; \
|
||||
pushq %rbx; \
|
||||
pushq %rcx; \
|
||||
pushq %rdx; \
|
||||
pushq %rsi; \
|
||||
pushq %rdi; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15;
|
||||
#define POP_ALL_REGS \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rdi; \
|
||||
popq %rsi; \
|
||||
popq %rdx; \
|
||||
popq %rcx; \
|
||||
popq %rbx; \
|
||||
popq %rax; \
|
||||
popq %rbp
|
||||
|
||||
/* struct x86_user_context */
|
||||
#define X86_SREGS_BASE (0)
|
||||
#define X86_SREGS_SIZE 48
|
||||
|
||||
#define X86_FLAGS_BASE (X86_SREGS_BASE + X86_SREGS_SIZE)
|
||||
#define X86_FLAGS_SIZE 8
|
||||
|
||||
#define X86_REGS_BASE (X86_FLAGS_BASE + X86_FLAGS_SIZE)
|
||||
#define RAX_OFFSET (X86_REGS_BASE + 80)
|
||||
#define ERROR_OFFSET (X86_REGS_BASE + 120)
|
||||
#define RSP_OFFSET (X86_REGS_BASE + 152)
|
||||
|
||||
#define PUSH_ALL_REGS \
|
||||
pushq %rdi; \
|
||||
pushq %rsi; \
|
||||
pushq %rdx; \
|
||||
pushq %rcx; \
|
||||
pushq %rax; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11; \
|
||||
pushq %rbx; \
|
||||
pushq %rbp; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15; \
|
||||
pushq $1; /* is_gpr_valid is set, and others are cleared */ \
|
||||
subq $X86_FLAGS_BASE,%rsp /* for x86_sregs, etc. */
|
||||
|
||||
#define POP_ALL_REGS \
|
||||
movq $0,X86_FLAGS_BASE(%rsp); /* clear all flags */ \
|
||||
addq $X86_REGS_BASE,%rsp; /* discard x86_sregs, flags, etc. */ \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %rbp; \
|
||||
popq %rbx; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rax; \
|
||||
popq %rcx; \
|
||||
popq %rdx; \
|
||||
popq %rsi; \
|
||||
popq %rdi
|
||||
|
||||
.data
|
||||
.globl generic_common_handlers
|
||||
generic_common_handlers:
|
||||
@@ -75,7 +92,7 @@ vector=vector+1
|
||||
|
||||
common_interrupt:
|
||||
PUSH_ALL_REGS
|
||||
movq 120(%rsp), %rdi
|
||||
movq ERROR_OFFSET(%rsp), %rdi
|
||||
movq %rsp, %rsi
|
||||
call handle_interrupt /* Enter C code */
|
||||
POP_ALL_REGS
|
||||
@@ -91,7 +108,7 @@ page_fault:
|
||||
cld
|
||||
PUSH_ALL_REGS
|
||||
movq %cr2, %rdi
|
||||
movq 120(%rsp),%rsi
|
||||
movq ERROR_OFFSET(%rsp),%rsi
|
||||
movq %rsp,%rdx
|
||||
movq __page_fault_handler_address(%rip), %rax
|
||||
andq %rax, %rax
|
||||
@@ -113,10 +130,53 @@ general_protection_exception:
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl nmi
|
||||
nmi:
|
||||
#define PANICED 232
|
||||
#define PANIC_REGS 240
|
||||
movq %rax,%gs:PANIC_REGS+0x00
|
||||
movq %rbx,%gs:PANIC_REGS+0x08
|
||||
movq %rcx,%gs:PANIC_REGS+0x10
|
||||
movq %rdx,%gs:PANIC_REGS+0x18
|
||||
movq %rsi,%gs:PANIC_REGS+0x20
|
||||
movq %rdi,%gs:PANIC_REGS+0x28
|
||||
movq %rbp,%gs:PANIC_REGS+0x30
|
||||
movq 0x18(%rsp),%rax /* rsp */
|
||||
movq %rax,%gs:PANIC_REGS+0x38
|
||||
movq %r8, %gs:PANIC_REGS+0x40
|
||||
movq %r9, %gs:PANIC_REGS+0x48
|
||||
movq %r10,%gs:PANIC_REGS+0x50
|
||||
movq %r11,%gs:PANIC_REGS+0x58
|
||||
movq %r12,%gs:PANIC_REGS+0x60
|
||||
movq %r13,%gs:PANIC_REGS+0x68
|
||||
movq %r14,%gs:PANIC_REGS+0x70
|
||||
movq %r15,%gs:PANIC_REGS+0x78
|
||||
movq 0x00(%rsp),%rax /* rip */
|
||||
movq %rax,%gs:PANIC_REGS+0x80
|
||||
movq 0x10(%rsp),%rax /* rflags */
|
||||
movl %eax,%gs:PANIC_REGS+0x88
|
||||
movq 0x08(%rsp),%rax /* cs */
|
||||
movl %eax,%gs:PANIC_REGS+0x8C
|
||||
movq 0x20(%rsp),%rax /* ss */
|
||||
movl %eax,%gs:PANIC_REGS+0x90
|
||||
xorq %rax,%rax
|
||||
movw %ds,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x94
|
||||
movw %es,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x98
|
||||
movw %fs,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x9C
|
||||
movw %gs,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0xA0
|
||||
movq $1,%gs:PANICED
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
|
||||
.globl x86_syscall
|
||||
x86_syscall:
|
||||
cld
|
||||
movq %rsp, %gs:24
|
||||
movq %rsp, %gs:X86_CPU_LOCAL_OFFSET_USTACK
|
||||
movq %gs:(X86_CPU_LOCAL_OFFSET_SP0), %rsp
|
||||
|
||||
pushq $(USER_DS)
|
||||
@@ -124,21 +184,19 @@ x86_syscall:
|
||||
pushq %r11
|
||||
pushq $(USER_CS)
|
||||
pushq %rcx
|
||||
pushq $0
|
||||
movq %gs:24, %rcx
|
||||
movq %rcx, 32(%rsp)
|
||||
pushq %rax /* error code (= system call number) */
|
||||
PUSH_ALL_REGS
|
||||
movq 104(%rsp), %rdi
|
||||
movq %gs:X86_CPU_LOCAL_OFFSET_USTACK, %rcx
|
||||
movq %rcx, RSP_OFFSET(%rsp)
|
||||
movq RAX_OFFSET(%rsp), %rdi
|
||||
movw %ss, %ax
|
||||
movw %ax, %ds
|
||||
movq %rsp, %rsi
|
||||
callq *__x86_syscall_handler(%rip)
|
||||
1:
|
||||
movq %rax, 104(%rsp)
|
||||
movq %rax, RAX_OFFSET(%rsp)
|
||||
POP_ALL_REGS
|
||||
#ifdef USE_SYSRET
|
||||
movq 8(%rsp), %rcx
|
||||
movq 24(%rsp), %r11
|
||||
movq 32(%rsp), %rsp
|
||||
sysretq
|
||||
#else
|
||||
@@ -147,7 +205,32 @@ x86_syscall:
|
||||
#endif
|
||||
|
||||
.globl enter_user_mode
|
||||
enter_user_mode:
|
||||
enter_user_mode:
|
||||
movq $0, %rdi
|
||||
movq %rsp, %rsi
|
||||
call check_signal
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl debug_exception
|
||||
debug_exception:
|
||||
cld
|
||||
pushq $0 /* error */
|
||||
PUSH_ALL_REGS
|
||||
movq %rsp, %rdi
|
||||
call debug_handler
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl int3_exception
|
||||
int3_exception:
|
||||
cld
|
||||
pushq $0 /* error */
|
||||
PUSH_ALL_REGS
|
||||
movq %rsp, %rdi
|
||||
call int3_handler
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
* resides in memory.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -19,13 +21,19 @@
|
||||
#include <registers.h>
|
||||
#include <string.h>
|
||||
|
||||
#define LOCALS_SPAN (4 * PAGE_SIZE)
|
||||
|
||||
struct x86_cpu_local_variables *locals;
|
||||
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
|
||||
|
||||
void init_processors_local(int max_id)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
size = LOCALS_SPAN * max_id;
|
||||
/* Is contiguous allocating adequate?? */
|
||||
locals = ihk_mc_alloc_pages(max_id, IHK_MC_AP_CRITICAL);
|
||||
memset(locals, 0, PAGE_SIZE * max_id);
|
||||
locals = ihk_mc_alloc_pages(size/PAGE_SIZE, IHK_MC_AP_CRITICAL);
|
||||
memset(locals, 0, size);
|
||||
|
||||
kprintf("locals = %p\n", locals);
|
||||
}
|
||||
@@ -33,12 +41,12 @@ void init_processors_local(int max_id)
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
|
||||
{
|
||||
return (struct x86_cpu_local_variables *)
|
||||
((char *)locals + (id << PAGE_SHIFT));
|
||||
((char *)locals + (LOCALS_SPAN * id));
|
||||
}
|
||||
|
||||
static void *get_x86_cpu_local_kstack(int id)
|
||||
{
|
||||
return ((char *)locals + ((id + 1) << PAGE_SHIFT));
|
||||
return ((char *)locals + (LOCALS_SPAN * (id + 1)));
|
||||
}
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_this_cpu_local(void)
|
||||
@@ -80,6 +88,15 @@ void assign_processor_id(void)
|
||||
v->processor_id = id;
|
||||
}
|
||||
|
||||
void init_boot_processor_local(void)
|
||||
{
|
||||
static struct x86_cpu_local_variables avar;
|
||||
|
||||
memset(&avar, -1, sizeof(avar));
|
||||
set_gs_base(&avar);
|
||||
return;
|
||||
}
|
||||
|
||||
/** IHK **/
|
||||
int ihk_mc_get_processor_id(void)
|
||||
{
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
* Acquire physical pages and manipulate page table entries.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -20,8 +22,9 @@
|
||||
#include <list.h>
|
||||
#include <process.h>
|
||||
#include <page.h>
|
||||
#include <cls.h>
|
||||
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
static char *last_page;
|
||||
@@ -263,7 +266,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
|
||||
{
|
||||
if (attr & PTATTR_UNCACHABLE) {
|
||||
return (attr & ATTR_MASK) | PFL1_PCD | PFL1_PWT;
|
||||
} else {
|
||||
}
|
||||
else if (attr & PTATTR_WRITE_COMBINED) {
|
||||
return (attr & ATTR_MASK) | PFL1_PWT;
|
||||
}
|
||||
else {
|
||||
return (attr & ATTR_MASK);
|
||||
}
|
||||
}
|
||||
@@ -367,6 +374,7 @@ static int __set_pt_page(struct page_table *pt, void *virt, unsigned long phys,
|
||||
unsigned long init_pt_lock_flags;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
init_pt_lock_flags = 0; /* for avoidance of warning */
|
||||
if (in_kernel) {
|
||||
init_pt_lock_flags = ihk_mc_spinlock_lock(&init_pt_lock);
|
||||
}
|
||||
@@ -494,8 +502,52 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage)
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt)
|
||||
{
|
||||
int l4idx, l3idx, l2idx, l1idx;
|
||||
unsigned long v = (unsigned long)virt;
|
||||
uint64_t ret = 0;
|
||||
|
||||
if (!pt) {
|
||||
pt = init_pt;
|
||||
}
|
||||
|
||||
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
|
||||
|
||||
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
|
||||
|
||||
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
|
||||
|
||||
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
if ((pt->entry[l2idx] & PFL2_SIZE)) {
|
||||
|
||||
ret = PM_PFRAME(((pt->entry[l2idx] & LARGE_PAGE_MASK) +
|
||||
(v & (LARGE_PAGE_SIZE - 1))) >> PAGE_SHIFT);
|
||||
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
|
||||
return ret;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK);
|
||||
|
||||
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PM_PFRAME((pt->entry[l1idx] & PT_PHYSMASK) >> PAGE_SHIFT);
|
||||
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
|
||||
void *virt, unsigned long *phys)
|
||||
const void *virt, unsigned long *phys)
|
||||
{
|
||||
int l4idx, l3idx, l2idx, l1idx;
|
||||
unsigned long v = (unsigned long)virt;
|
||||
@@ -1824,7 +1876,8 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
|
||||
attr = common_vrflag_to_ptattr(flag, fault, ptep);
|
||||
|
||||
if ((fault & PF_PROT)
|
||||
|| ((fault & PF_POPULATE) && (flag & VR_PRIVATE))) {
|
||||
|| ((fault & (PF_POPULATE | PF_PATCH))
|
||||
&& (flag & VR_PRIVATE))) {
|
||||
attr |= PTATTR_DIRTY;
|
||||
}
|
||||
|
||||
@@ -2043,7 +2096,7 @@ void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end,
|
||||
/* Reserve Text + temporal heap */
|
||||
cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
|
||||
/* Reserve trampoline area to boot the second ap */
|
||||
cb(AP_TRAMPOLINE, AP_TRAMPOLINE + AP_TRAMPOLINE_SIZE, 0);
|
||||
cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
|
||||
/* Reserve the null page */
|
||||
cb(0, PAGE_SIZE, 0);
|
||||
/* Micro-arch specific */
|
||||
@@ -2072,9 +2125,9 @@ void *phys_to_virt(unsigned long p)
|
||||
return (void *)(p + MAP_ST_START);
|
||||
}
|
||||
|
||||
int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
int copy_from_user(void *dst, const void *src, size_t siz)
|
||||
{
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
struct vm_range *range;
|
||||
size_t pos;
|
||||
size_t wsiz;
|
||||
@@ -2101,9 +2154,62 @@ int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)
|
||||
{
|
||||
struct process_vm *vm = proc->vm;
|
||||
const uintptr_t ustart = (uintptr_t)usrc;
|
||||
const uintptr_t uend = ustart + siz;
|
||||
uint64_t reason;
|
||||
uintptr_t addr;
|
||||
int error;
|
||||
const void *from;
|
||||
void *to;
|
||||
size_t remain;
|
||||
size_t cpsize;
|
||||
unsigned long pa;
|
||||
void *va;
|
||||
|
||||
if ((ustart < vm->region.user_start)
|
||||
|| (vm->region.user_end <= ustart)
|
||||
|| ((vm->region.user_end - ustart) < siz)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
reason = PF_USER; /* page not present */
|
||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
from = usrc;
|
||||
to = kdst;
|
||||
remain = siz;
|
||||
while (remain > 0) {
|
||||
cpsize = PAGE_SIZE - ((uintptr_t)from & (PAGE_SIZE - 1));
|
||||
if (cpsize > remain) {
|
||||
cpsize = remain;
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(vm->page_table, from, &pa);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(to, va, cpsize);
|
||||
|
||||
from += cpsize;
|
||||
to += cpsize;
|
||||
remain -= cpsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} /* read_process_vm() */
|
||||
|
||||
int copy_to_user(void *dst, const void *src, size_t siz)
|
||||
{
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
struct vm_range *range;
|
||||
size_t pos;
|
||||
size_t wsiz;
|
||||
@@ -2130,3 +2236,114 @@ int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
memcpy(dst, src, siz);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
|
||||
{
|
||||
const uintptr_t ustart = (uintptr_t)udst;
|
||||
const uintptr_t uend = ustart + siz;
|
||||
uint64_t reason;
|
||||
uintptr_t addr;
|
||||
int error;
|
||||
const void *from;
|
||||
void *to;
|
||||
size_t remain;
|
||||
size_t cpsize;
|
||||
unsigned long pa;
|
||||
void *va;
|
||||
|
||||
if ((ustart < vm->region.user_start)
|
||||
|| (vm->region.user_end <= ustart)
|
||||
|| ((vm->region.user_end - ustart) < siz)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
from = ksrc;
|
||||
to = udst;
|
||||
remain = siz;
|
||||
while (remain > 0) {
|
||||
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
|
||||
if (cpsize > remain) {
|
||||
cpsize = remain;
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(va, from, cpsize);
|
||||
|
||||
from += cpsize;
|
||||
to += cpsize;
|
||||
remain -= cpsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} /* write_process_vm() */
|
||||
|
||||
int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
|
||||
{
|
||||
const uintptr_t ustart = (uintptr_t)udst;
|
||||
const uintptr_t uend = ustart + siz;
|
||||
uint64_t reason;
|
||||
uintptr_t addr;
|
||||
int error;
|
||||
const void *from;
|
||||
void *to;
|
||||
size_t remain;
|
||||
size_t cpsize;
|
||||
unsigned long pa;
|
||||
void *va;
|
||||
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx)\n", vm, udst, ksrc, siz);
|
||||
if ((ustart < vm->region.user_start)
|
||||
|| (vm->region.user_end <= ustart)
|
||||
|| ((vm->region.user_end - ustart) < siz)) {
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):not in user\n", vm, udst, ksrc, siz);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
reason = PF_PATCH | PF_WRITE | PF_USER;
|
||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):pf(%lx):%d\n", vm, udst, ksrc, siz, addr, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
from = ksrc;
|
||||
to = udst;
|
||||
remain = siz;
|
||||
while (remain > 0) {
|
||||
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
|
||||
if (cpsize > remain) {
|
||||
cpsize = remain;
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa);
|
||||
if (error) {
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(va, from, cpsize);
|
||||
|
||||
from += cpsize;
|
||||
to += cpsize;
|
||||
remain -= cpsize;
|
||||
}
|
||||
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):%d\n", vm, udst, ksrc, siz, 0);
|
||||
return 0;
|
||||
} /* patch_process_vm() */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,8 @@
|
||||
* implements x86's vsyscall
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 Hitachi, Ltd.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -58,3 +60,17 @@ long vsyscall_time(void *tp)
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
extern int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
|
||||
__attribute__ ((section (".vsyscall.getcpu")));
|
||||
|
||||
int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
|
||||
{
|
||||
int error;
|
||||
|
||||
asm ("syscall" : "=a" (error)
|
||||
: "a" (__NR_getcpu), "D" (cpup), "S" (nodep), "d" (tcachep)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
46
arch/x86/tools/mcreboot-builtin-x86.sh.in
Executable file
46
arch/x86/tools/mcreboot-builtin-x86.sh.in
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash -x
|
||||
|
||||
# \file arch/x86/tools/mcreboot-builtin-x86.sh.in
|
||||
# License details are found in the file LICENSE.
|
||||
# \brief
|
||||
# mckernel boot script
|
||||
# \author Masamichi Takagi <masamichi.takagi@riken.jp> \par
|
||||
# Copyright (C) 2014 RIKEN AICS
|
||||
|
||||
# HISTORY:
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
kill -9 `pidof mcexec`
|
||||
if lsmod | grep mcctrl > /dev/null 2>&1; then
|
||||
rmmod mcctrl || exit 1
|
||||
fi
|
||||
if lsmod | grep dcfa > /dev/null 2>&1; then
|
||||
rmmod dcfa || exit 1
|
||||
fi
|
||||
if lsmod | grep ihk_builtin > /dev/null 2>&1; then
|
||||
rmmod ihk_builtin || exit 1
|
||||
fi
|
||||
if lsmod | grep ihk > /dev/null 2>&1; then
|
||||
rmmod ihk || exit 1
|
||||
fi
|
||||
insmod "$KMODDIR/ihk.ko" &&
|
||||
insmod "$KMODDIR/ihk_builtin.ko" &&
|
||||
"$SBINDIR/ihkconfig" 0 create &&
|
||||
NCORE=`dmesg | grep -E 'SHIMOS: CPU Status:'|awk '{split($0,a," "); for (i = 1; i <= length(a); i++) { if(a[i] ~ /2/) {count++}} print count;}'`
|
||||
MEM=`free -g | grep -E 'Mem:' | awk '{print int($2/4)}'`
|
||||
"$SBINDIR/ihkosctl" 0 alloc "$NCORE" "$MEM"g &&
|
||||
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img" &&
|
||||
"$SBINDIR/ihkosctl" 0 kargs hidos osnum=0 &&
|
||||
"$SBINDIR/ihkosctl" 0 boot &&
|
||||
sleep 1 &&
|
||||
"$SBINDIR/ihkosctl" 0 kmsg &&
|
||||
insmod "$KMODDIR/mcctrl.ko" &&
|
||||
sleep 1 &&
|
||||
"$SBINDIR/ihkosctl" 0 kmsg &&
|
||||
exit 0
|
||||
79
arch/x86/tools/mcreboot-smp-x86.sh.in
Normal file
79
arch/x86/tools/mcreboot-smp-x86.sh.in
Normal file
@@ -0,0 +1,79 @@
|
||||
#!/bin/bash
|
||||
|
||||
# IHK SMP-x86 example boot script.
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2014 RIKEN AICS
|
||||
#
|
||||
# This is an example script for loading IHK, configuring a partition and
|
||||
# booting McKernel on it.
|
||||
# The script reserves half of the CPU cores and 512MB of RAM from NUMA node 0
|
||||
# when IHK is loaded for the first time, otherwise it destroys the current
|
||||
# McKernel instance and reboots it using the same set of resources as it used
|
||||
# previously.
|
||||
# Note that the script does not output anything unless an error occurs.
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
mem="512M@0"
|
||||
|
||||
# Get the number of CPUs on NUMA node 0
|
||||
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
|
||||
|
||||
# Use the second half of the cores
|
||||
let nr_cpus="$nr_cpus / 2"
|
||||
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
|
||||
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
||||
fi
|
||||
|
||||
# Load IHK if not loaded
|
||||
if [ "`lsmod | grep ihk`" == "" ]; then
|
||||
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk"; exit; fi;
|
||||
fi
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
|
||||
ihk_irq=""
|
||||
for i in `seq 64 255`; do
|
||||
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep $i`" == "" ]; then
|
||||
ihk_irq=$i
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
|
||||
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq; then echo "error: loading ihk-smp-x86"; exit; fi;
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
||||
fi
|
||||
|
||||
# Check for existing OS instance and destroy
|
||||
if [ -c /dev/mcos0 ]; then
|
||||
# Query CPU cores and memory of OS instance so that the same values are used as previously
|
||||
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem=`${SBINDIR}/ihkosctl 0 query mem`
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed"; fi
|
||||
else
|
||||
# Otherwise query IHK-SMP for resources
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs hidos; then echo "error: setting kernel arguments"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
|
||||
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi
|
||||
16
arch/x86/tools/mcshutdown-builtin-x86.sh.in
Normal file
16
arch/x86/tools/mcshutdown-builtin-x86.sh.in
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
|
||||
# License details are found in the file LICENSE.
|
||||
# \brief
|
||||
# mckernel shutdown script
|
||||
#
|
||||
# \author McKernel Development Team
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
"$SBINDIR/ihkosctl" 0 shutdown
|
||||
25
configure.ac
25
configure.ac
@@ -24,7 +24,7 @@ AC_ARG_WITH([kernelsrc],
|
||||
|
||||
AC_ARG_WITH([target],
|
||||
AC_HELP_STRING(
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86}],[target, default is attached-mic]),
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]),
|
||||
[WITH_TARGET=$withval],[WITH_TARGET=yes])
|
||||
|
||||
AC_ARG_ENABLE([dcfa],
|
||||
@@ -111,6 +111,26 @@ case $WITH_TARGET in
|
||||
MANDIR="$prefix/attached/man"
|
||||
fi
|
||||
;;
|
||||
smp-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/smp-x86/kernel"
|
||||
fi
|
||||
if test "X$BINDIR" = X; then
|
||||
BINDIR="$prefix/bin"
|
||||
fi
|
||||
if test "X$SBINDIR" = X; then
|
||||
SBINDIR="$prefix/sbin"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-x86/man"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
|
||||
;;
|
||||
@@ -145,6 +165,9 @@ AC_CONFIG_FILES([
|
||||
kernel/Makefile.build
|
||||
arch/x86/tools/mcreboot-attached-mic.sh
|
||||
arch/x86/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot-smp-x86.sh
|
||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||
])
|
||||
|
||||
|
||||
@@ -38,6 +38,9 @@
|
||||
#define MCEXEC_UP_SEND_SIGNAL 0x30a02906
|
||||
#define MCEXEC_UP_GET_CPU 0x30a02907
|
||||
#define MCEXEC_UP_STRNCPY_FROM_USER 0x30a02908
|
||||
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
|
||||
#define MCEXEC_UP_GET_CRED 0x30a0290a
|
||||
#define MCEXEC_UP_GET_CREDV 0x30a0290b
|
||||
|
||||
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
|
||||
#define MCEXEC_UP_FREE_DMA 0x30a02911
|
||||
@@ -45,6 +48,8 @@
|
||||
#define MCEXEC_UP_OPEN_EXEC 0x30a02912
|
||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0
|
||||
#define MCEXEC_UP_TRANSFER_FROM_REMOTE 1
|
||||
|
||||
@@ -67,6 +72,7 @@ struct program_image_section {
|
||||
};
|
||||
|
||||
#define SHELL_PATH_MAX_LEN 1024
|
||||
#define MCK_RLIM_MAX 20
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
@@ -76,6 +82,7 @@ struct program_load_desc {
|
||||
int err;
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@@ -90,8 +97,7 @@ struct program_load_desc {
|
||||
unsigned long args_len;
|
||||
char *envs;
|
||||
unsigned long envs_len;
|
||||
unsigned long rlimit_stack_cur;
|
||||
unsigned long rlimit_stack_max;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
struct program_image_section sections[0];
|
||||
@@ -156,4 +162,8 @@ struct signal_desc {
|
||||
char info[128];
|
||||
};
|
||||
|
||||
struct newprocess_desc {
|
||||
int pid;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2,13 +2,14 @@ KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
src = @abs_srcdir@
|
||||
KMODDIR=@KMODDIR@
|
||||
BINDIR=@BINDIR@
|
||||
IHK_BASE=$(src)/../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../ihk/linux/core/Module.symvers
|
||||
|
||||
|
||||
276
executer/kernel/binfmt_mcexec.c
Normal file
276
executer/kernel/binfmt_mcexec.c
Normal file
@@ -0,0 +1,276 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/elfcore.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/version.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
static int pathcheck(const char *file, const char *list)
|
||||
{
|
||||
const char *p;
|
||||
const char *q;
|
||||
const char *r;
|
||||
int l;
|
||||
|
||||
if(!*list)
|
||||
return 1;
|
||||
p = list;
|
||||
do{
|
||||
q = strchr(p, ':');
|
||||
if(!q)
|
||||
q = strchr(p, '\0');
|
||||
for(r = q - 1; r >= p && *r == '/'; r--);
|
||||
l = r - p + 1;
|
||||
|
||||
if(!strncmp(file, p, l) &&
|
||||
file[l] == '/')
|
||||
return 1;
|
||||
|
||||
p = q + 1;
|
||||
} while(*q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_elf(struct linux_binprm *bprm
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
, struct pt_regs *regs
|
||||
#endif
|
||||
)
|
||||
{
|
||||
char mcexec[BINPRM_BUF_SIZE];
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
|
||||
const
|
||||
#endif
|
||||
char *wp;
|
||||
char *cp;
|
||||
struct file *file;
|
||||
int rc;
|
||||
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *val;
|
||||
int l;
|
||||
} envdata;
|
||||
envdata env[] = {
|
||||
{.name = "MCEXEC"},
|
||||
#define env_mcexec (env[0].val)
|
||||
{.name = "MCEXEC_WL"},
|
||||
#define env_mcexec_wl (env[1].val)
|
||||
{.name = "MCEXEC_BL"},
|
||||
#define env_mcexec_bl (env[2].val)
|
||||
{.name = NULL}
|
||||
};
|
||||
envdata *ep;
|
||||
unsigned long off = 0;
|
||||
struct page *page;
|
||||
char *addr = NULL;
|
||||
int i;
|
||||
unsigned long p;
|
||||
int st;
|
||||
int mode;
|
||||
int cnt[2];
|
||||
char buf[32];
|
||||
int l;
|
||||
int pass;
|
||||
|
||||
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
|
||||
return -ENOEXEC;
|
||||
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
|
||||
return -ENOEXEC;
|
||||
|
||||
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
|
||||
return -ENOEXEC;
|
||||
|
||||
cp = strrchr(bprm->interp, '/');
|
||||
if(!cp ||
|
||||
!strcmp(cp, "/mcexec") ||
|
||||
!strcmp(cp, "/ihkosctl") ||
|
||||
!strcmp(cp, "/ihkconfig"))
|
||||
return -ENOEXEC;
|
||||
|
||||
cnt[0] = bprm->argc;
|
||||
cnt[1] = bprm->envc;
|
||||
for(pass = 0; pass < 2; pass++){
|
||||
p = bprm->p;
|
||||
mode = cnt[0] == 0? (cnt[1] == 0? 2: 1): 0;
|
||||
if(pass == 1){
|
||||
for(ep = env; ep->name; ep++){
|
||||
if(ep->l)
|
||||
ep->val = kmalloc(ep->l, GFP_KERNEL);
|
||||
}
|
||||
}
|
||||
ep = NULL;
|
||||
l = 0;
|
||||
for(i = 0, st = 0; mode != 2;){
|
||||
if(st == 0){
|
||||
off = p & ~PAGE_MASK;
|
||||
rc = get_user_pages(current, bprm->mm,
|
||||
bprm->p, 1, 0, 1,
|
||||
&page, NULL);
|
||||
if(rc <= 0)
|
||||
return -EFAULT;
|
||||
addr = kmap_atomic(page
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
|
||||
, KM_USER0
|
||||
#endif
|
||||
);
|
||||
st = 1;
|
||||
}
|
||||
if(addr[off]){
|
||||
if(mode == 1){
|
||||
if(ep){
|
||||
if(pass == 1)
|
||||
ep->val[l] = addr[off];
|
||||
l++;
|
||||
}
|
||||
else if(addr[off] == '='){
|
||||
if(l < 32)
|
||||
buf[l] = '\0';
|
||||
buf[31] = '\0';
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(!strcmp(ep->name, buf))
|
||||
break;
|
||||
if(ep->name)
|
||||
l = 0;
|
||||
else
|
||||
ep = NULL;
|
||||
}
|
||||
else{
|
||||
if(l < 32)
|
||||
buf[l] = addr[off];
|
||||
l++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(mode == 1 && ep){
|
||||
if(pass == 0){
|
||||
ep->l = l + 1;
|
||||
}
|
||||
else{
|
||||
ep->val[l] = '\0';
|
||||
}
|
||||
}
|
||||
ep = NULL;
|
||||
l = 0;
|
||||
i++;
|
||||
if(i == cnt[mode]){
|
||||
i = 0;
|
||||
mode++;
|
||||
}
|
||||
}
|
||||
off++;
|
||||
p++;
|
||||
if(off == PAGE_SIZE || mode == 2){
|
||||
kunmap_atomic(addr
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
|
||||
, KM_USER0
|
||||
#endif
|
||||
);
|
||||
put_page(page);
|
||||
st = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!env_mcexec || !strcmp(env_mcexec, "0") || !strcmp(env_mcexec, "off"))
|
||||
rc = 1;
|
||||
else{
|
||||
rc = 0;
|
||||
if(strchr(env_mcexec, '/') && strlen(env_mcexec) < BINPRM_BUF_SIZE)
|
||||
strcpy(mcexec, env_mcexec);
|
||||
else
|
||||
strcpy(mcexec, MCEXEC_PATH);
|
||||
}
|
||||
|
||||
if(rc);
|
||||
else if(env_mcexec_wl)
|
||||
rc = !pathcheck(bprm->interp, env_mcexec_wl);
|
||||
else if(env_mcexec_bl)
|
||||
rc = pathcheck(bprm->interp, env_mcexec_bl);
|
||||
else
|
||||
rc = pathcheck(bprm->interp, "/usr:/bin:/sbin:/opt");
|
||||
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(ep->val)
|
||||
kfree(ep->val);
|
||||
if(rc)
|
||||
return -ENOEXEC;
|
||||
|
||||
file = open_exec(mcexec);
|
||||
if (IS_ERR(file))
|
||||
return -ENOEXEC;
|
||||
|
||||
rc = remove_arg_zero(bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
rc = copy_strings_kernel(1, &bprm->interp, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
wp = mcexec;
|
||||
rc = copy_strings_kernel(1, &wp, bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
#if 1
|
||||
rc = bprm_change_interp(mcexec, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
if(brpm->interp != bprm->filename)
|
||||
kfree(brpm->interp);
|
||||
kfree(brpm->filename);
|
||||
bprm->filename = bprm->interp = kstrdup(mcexec, GFP_KERNEL);
|
||||
if(!bprm->interp){
|
||||
fput(file);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
allow_write_access(bprm->file);
|
||||
fput(bprm->file);
|
||||
bprm->file = file;
|
||||
|
||||
rc = prepare_binprm(bprm);
|
||||
if (rc < 0){
|
||||
return rc;
|
||||
}
|
||||
return search_binary_handler(bprm
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
, regs
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static struct linux_binfmt mcexec_format = {
|
||||
.module = THIS_MODULE,
|
||||
.load_binary = load_elf,
|
||||
};
|
||||
|
||||
void __init binfmt_mcexec_init(void)
|
||||
{
|
||||
insert_binfmt(&mcexec_format);
|
||||
}
|
||||
|
||||
void __exit binfmt_mcexec_exit(void)
|
||||
{
|
||||
unregister_binfmt(&mcexec_format);
|
||||
}
|
||||
@@ -31,12 +31,15 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/version.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/io.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
//#define DEBUG
|
||||
|
||||
#ifdef DEBUG
|
||||
#define dprintk printk
|
||||
#else
|
||||
@@ -242,19 +245,69 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
|
||||
|
||||
//extern unsigned long last_thread_exec;
|
||||
|
||||
struct handlerinfo {
|
||||
int pid;
|
||||
};
|
||||
|
||||
static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
|
||||
{
|
||||
struct ikc_scd_packet isp;
|
||||
|
||||
memset(&isp, '\0', sizeof isp);
|
||||
isp.msg = SCD_MSG_DEBUG_LOG;
|
||||
isp.arg = arg;
|
||||
mcctrl_ikc_send(os, 0, &isp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_handler(ihk_os_t os, void *param)
|
||||
{
|
||||
struct handlerinfo *info = param;
|
||||
struct ikc_scd_packet isp;
|
||||
|
||||
memset(&isp, '\0', sizeof isp);
|
||||
isp.msg = SCD_MSG_CLEANUP_PROCESS;
|
||||
isp.pid = info->pid;
|
||||
|
||||
mcctrl_ikc_send(os, 0, &isp);
|
||||
kfree(param);
|
||||
}
|
||||
|
||||
static long mcexec_newprocess(ihk_os_t os,
|
||||
struct newprocess_desc *__user udesc,
|
||||
struct file *file)
|
||||
{
|
||||
struct newprocess_desc desc;
|
||||
struct handlerinfo *info;
|
||||
|
||||
if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
|
||||
info->pid = desc.pid;
|
||||
ihk_os_register_release_handler(file, release_handler, info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long mcexec_start_image(ihk_os_t os,
|
||||
struct program_load_desc * __user udesc)
|
||||
struct program_load_desc * __user udesc,
|
||||
struct file *file)
|
||||
{
|
||||
struct program_load_desc desc;
|
||||
struct ikc_scd_packet isp;
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct handlerinfo *info;
|
||||
|
||||
if (copy_from_user(&desc, udesc,
|
||||
sizeof(struct program_load_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
|
||||
info->pid = desc.pid;
|
||||
ihk_os_register_release_handler(file, release_handler, info);
|
||||
|
||||
c = usrdata->channels + desc.cpu;
|
||||
|
||||
mcctrl_ikc_set_recv_cpu(os, desc.cpu);
|
||||
@@ -439,14 +492,15 @@ retry_alloc:
|
||||
|
||||
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
||||
|
||||
if (ret) {
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
/* Remove per-process wait queue head */
|
||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
list_del(&wqhln->list);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
||||
if (ret) {
|
||||
kfree(wqhln);
|
||||
return -EINTR;
|
||||
}
|
||||
kfree(wqhln);
|
||||
|
||||
if (c->param.request_va->number == 61 &&
|
||||
@@ -723,7 +777,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
|
||||
}
|
||||
|
||||
LIST_HEAD(mckernel_exec_files);
|
||||
spinlock_t mckernel_exec_file_lock = SPIN_LOCK_UNLOCKED;
|
||||
DEFINE_SPINLOCK(mckernel_exec_file_lock);
|
||||
|
||||
|
||||
struct mckernel_exec_file {
|
||||
@@ -733,6 +787,47 @@ struct mckernel_exec_file {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
#define GUIDVAL(x) (x)
|
||||
#else
|
||||
#define GUIDVAL(x) ((x).val)
|
||||
#endif
|
||||
|
||||
|
||||
int
|
||||
mcexec_getcred(unsigned long phys)
|
||||
{
|
||||
int *virt = phys_to_virt(phys);
|
||||
|
||||
virt[0] = GUIDVAL(current_uid());
|
||||
virt[1] = GUIDVAL(current_euid());
|
||||
virt[2] = GUIDVAL(current_suid());
|
||||
virt[3] = GUIDVAL(current_fsuid());
|
||||
virt[4] = GUIDVAL(current_gid());
|
||||
virt[5] = GUIDVAL(current_egid());
|
||||
virt[6] = GUIDVAL(current_sgid());
|
||||
virt[7] = GUIDVAL(current_fsgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
mcexec_getcredv(int __user *virt)
|
||||
{
|
||||
int wk[8];
|
||||
|
||||
wk[0] = GUIDVAL(current_uid());
|
||||
wk[1] = GUIDVAL(current_euid());
|
||||
wk[2] = GUIDVAL(current_suid());
|
||||
wk[3] = GUIDVAL(current_fsuid());
|
||||
wk[4] = GUIDVAL(current_gid());
|
||||
wk[5] = GUIDVAL(current_egid());
|
||||
wk[6] = GUIDVAL(current_sgid());
|
||||
wk[7] = GUIDVAL(current_fsgid());
|
||||
if(copy_to_user(virt, wk, sizeof(int) * 8))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
||||
{
|
||||
struct file *file;
|
||||
@@ -857,7 +952,8 @@ long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __use
|
||||
return 0;
|
||||
}
|
||||
|
||||
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
struct file *file)
|
||||
{
|
||||
switch (req) {
|
||||
case MCEXEC_UP_PREPARE_IMAGE:
|
||||
@@ -867,7 +963,7 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
return mcexec_transfer_image(os, (struct remote_transfer *)arg);
|
||||
|
||||
case MCEXEC_UP_START_IMAGE:
|
||||
return mcexec_start_image(os, (struct program_load_desc *)arg);
|
||||
return mcexec_start_image(os, (struct program_load_desc *)arg, file);
|
||||
|
||||
case MCEXEC_UP_WAIT_SYSCALL:
|
||||
return mcexec_wait_syscall(os, (struct syscall_wait_desc *)arg);
|
||||
@@ -888,6 +984,10 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
return mcexec_strncpy_from_user(os,
|
||||
(struct strncpy_from_user_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_NEW_PROCESS:
|
||||
return mcexec_newprocess(os, (struct newprocess_desc *)arg,
|
||||
file);
|
||||
|
||||
case MCEXEC_UP_OPEN_EXEC:
|
||||
return mcexec_open_exec(os, (char *)arg);
|
||||
|
||||
@@ -899,6 +999,15 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
|
||||
case MCEXEC_UP_FREE_DMA:
|
||||
return mcexec_free_region(os, (unsigned long *)arg);
|
||||
|
||||
case MCEXEC_UP_GET_CRED:
|
||||
return mcexec_getcred((unsigned long)arg);
|
||||
|
||||
case MCEXEC_UP_GET_CREDV:
|
||||
return mcexec_getcredv((int *)arg);
|
||||
|
||||
case MCEXEC_UP_DEBUG_LOG:
|
||||
return mcexec_debug_log(os, arg);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -29,7 +29,8 @@
|
||||
|
||||
#define OS_MAX_MINOR 64
|
||||
|
||||
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long);
|
||||
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long,
|
||||
struct file *);
|
||||
extern int prepare_ikc_channels(ihk_os_t os);
|
||||
extern void destroy_ikc_channels(ihk_os_t os);
|
||||
#ifndef DO_USER_MODE
|
||||
@@ -38,11 +39,15 @@ extern void mcctrl_syscall_init(void);
|
||||
extern void procfs_init(int);
|
||||
extern void procfs_exit(int);
|
||||
|
||||
extern void rus_page_hash_init(void);
|
||||
extern void rus_page_hash_put_pages(void);
|
||||
extern void binfmt_mcexec_init(void);
|
||||
extern void binfmt_mcexec_exit(void);
|
||||
|
||||
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
|
||||
unsigned long arg)
|
||||
unsigned long arg, struct file *file)
|
||||
{
|
||||
return __mcctrl_control(os, request, arg);
|
||||
return __mcctrl_control(os, request, arg, file);
|
||||
}
|
||||
|
||||
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
@@ -55,10 +60,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
static struct ihk_os_user_call mcctrl_uc_proto = {
|
||||
@@ -101,6 +110,8 @@ static int __init mcctrl_init(void)
|
||||
mcctrl_syscall_init();
|
||||
#endif
|
||||
|
||||
rus_page_hash_init();
|
||||
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if (os[i]) {
|
||||
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||
@@ -113,6 +124,8 @@ static int __init mcctrl_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
binfmt_mcexec_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -120,6 +133,7 @@ static void __exit mcctrl_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
binfmt_mcexec_exit();
|
||||
printk("mcctrl: unregistered.\n");
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if(os[i]){
|
||||
@@ -128,6 +142,8 @@ static void __exit mcctrl_exit(void)
|
||||
procfs_exit(i);
|
||||
}
|
||||
}
|
||||
|
||||
rus_page_hash_put_pages();
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
@@ -48,12 +48,15 @@
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
|
||||
@@ -10,12 +10,15 @@
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/resource.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
|
||||
//#define PROCFS_DEBUG
|
||||
|
||||
@@ -26,16 +29,16 @@
|
||||
#endif
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
|
||||
|
||||
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat);
|
||||
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *ppos);
|
||||
|
||||
/* A private data for the procfs driver. */
|
||||
struct procfs_list_entry;
|
||||
|
||||
struct procfs_list_entry {
|
||||
struct list_head list;
|
||||
struct proc_dir_entry *entry;
|
||||
struct proc_dir_entry *parent;
|
||||
struct procfs_list_entry *parent;
|
||||
ihk_os_t os;
|
||||
int osnum;
|
||||
int pid;
|
||||
@@ -53,6 +56,28 @@ struct procfs_list_entry {
|
||||
LIST_HEAD(procfs_file_list);
|
||||
static ihk_spinlock_t procfs_file_list_lock;
|
||||
|
||||
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
|
||||
{
|
||||
switch (orig) {
|
||||
case 0:
|
||||
file->f_pos = offset;
|
||||
break;
|
||||
case 1:
|
||||
file->f_pos += offset;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return file->f_pos;
|
||||
}
|
||||
|
||||
static const struct file_operations mckernel_procfs_file_operations = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = NULL,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Return specified procfs entry.
|
||||
*
|
||||
@@ -71,22 +96,22 @@ static ihk_spinlock_t procfs_file_list_lock;
|
||||
/*
|
||||
* XXX: Two or more entries which have same name can be created.
|
||||
*
|
||||
* get_procfs_entry() avoids creating an entry which has already been created.
|
||||
* get_procfs_list_entry() avoids creating an entry which has already been created.
|
||||
* But, it allows creating an entry which is being created by another thread.
|
||||
*
|
||||
* This problem occurred when two requests which created files with a common
|
||||
* ancestor directory which was not explicitly created were racing.
|
||||
*/
|
||||
|
||||
static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode)
|
||||
{
|
||||
char *r;
|
||||
struct proc_dir_entry *ret = NULL, *parent = NULL;
|
||||
struct procfs_list_entry *e;
|
||||
struct proc_dir_entry *pde = NULL;
|
||||
struct procfs_list_entry *e, *ret = NULL, *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("get_procfs_entry: %s for osnum %d mode %o\n", p, osnum, mode);
|
||||
dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode);
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry(e, &procfs_file_list, list) {
|
||||
if (e == NULL) {
|
||||
@@ -95,7 +120,8 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
}
|
||||
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
|
||||
/* We found the entry */
|
||||
ret = e->entry;
|
||||
ret = e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
@@ -107,19 +133,19 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
/* We have non-null parent dir. */
|
||||
strncpy(name, p, r - p);
|
||||
name[r - p] = '\0';
|
||||
parent = get_procfs_entry(name, osnum, 0);
|
||||
parent = get_procfs_list_entry(name, osnum, 0);
|
||||
if (parent == NULL) {
|
||||
/* We counld not get a parent procfs entry. Give up.*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
e = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
|
||||
if (e == NULL) {
|
||||
ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
|
||||
if (ret == NULL) {
|
||||
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
/* Fill the fname field of the entry */
|
||||
strncpy(e->fname, p, PROCFS_NAME_MAX);
|
||||
strncpy(ret->fname, p, PROCFS_NAME_MAX);
|
||||
|
||||
if (r != NULL) {
|
||||
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
|
||||
@@ -127,25 +153,38 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
strncpy(name, p, PROCFS_NAME_MAX);
|
||||
}
|
||||
if (mode == 0) {
|
||||
ret = proc_mkdir(name, parent);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = proc_mkdir(name, parent ? parent->entry : NULL);
|
||||
#else
|
||||
pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret);
|
||||
#endif
|
||||
} else {
|
||||
ret = create_proc_entry(name, mode, parent);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = create_proc_entry(name, mode, parent->entry);
|
||||
if (pde)
|
||||
pde->proc_fops = &mckernel_procfs_file_operations;
|
||||
#else
|
||||
pde = proc_create_data(name, mode, parent->entry,
|
||||
&mckernel_procfs_file_operations, ret);
|
||||
#endif
|
||||
}
|
||||
if (ret == NULL) {
|
||||
if (pde == NULL) {
|
||||
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
|
||||
kfree(e);
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
}
|
||||
ret->data = e;
|
||||
e->osnum = osnum;
|
||||
e->entry = ret;
|
||||
e->parent = parent;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde->data = ret;
|
||||
#endif
|
||||
ret->osnum = osnum;
|
||||
ret->entry = pde;
|
||||
ret->parent = parent;
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_add(&(e->list), &procfs_file_list);
|
||||
list_add(&(ret->list), &procfs_file_list);
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
|
||||
dprintk("get_procfs_entry: %s done\n", p);
|
||||
dprintk("get_procfs_list_entry: %s done\n", p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -161,7 +200,6 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
|
||||
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
struct procfs_list_entry *e;
|
||||
ihk_device_t dev = ihk_os_to_dev(__os);
|
||||
unsigned long parg;
|
||||
@@ -183,18 +221,16 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
|
||||
printk("ERROR: procfs_creat: file name not properly terminated.\n");
|
||||
goto quit;
|
||||
}
|
||||
entry = get_procfs_entry(name, osnum, mode);
|
||||
if (entry == NULL) {
|
||||
e = get_procfs_list_entry(name, osnum, mode);
|
||||
if (e == NULL) {
|
||||
printk("ERROR: could not create a procfs entry for %s.\n", name);
|
||||
goto quit;
|
||||
}
|
||||
|
||||
e = entry->data;
|
||||
e->os = __os;
|
||||
e->cpu = ref;
|
||||
e->pid = pid;
|
||||
|
||||
entry->read_proc = mckernel_procfs_read;
|
||||
quit:
|
||||
f->status = 1; /* Now the peer can free the data. */
|
||||
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
|
||||
@@ -216,7 +252,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
unsigned long parg;
|
||||
struct procfs_file *f;
|
||||
struct procfs_list_entry *e;
|
||||
struct proc_dir_entry *parent = NULL;
|
||||
struct procfs_list_entry *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
char *r;
|
||||
unsigned long irqflags;
|
||||
@@ -230,8 +266,10 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
|
||||
(e->osnum == osnum)) {
|
||||
list_del(&e->list);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
parent = e->parent;
|
||||
kfree(e);
|
||||
r = strrchr(f->fname, '/');
|
||||
@@ -241,7 +279,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
strncpy(name, r + 1, PROCFS_NAME_MAX);
|
||||
}
|
||||
dprintk("found and remove %s from the list.\n", name);
|
||||
remove_proc_entry(name, parent);
|
||||
remove_proc_entry(name, parent->entry);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -271,27 +309,50 @@ void procfs_answer(unsigned int arg, int err)
|
||||
* This function conforms to the 2) way of fs/proc/generic.c
|
||||
* from linux-2.6.39.4.
|
||||
*/
|
||||
|
||||
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat)
|
||||
static ssize_t
|
||||
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct procfs_list_entry *e = dat;
|
||||
struct inode * inode = file->f_path.dentry->d_inode;
|
||||
char *kern_buffer;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret, retrycount = 0;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
|
||||
dprintk("mckernel_procfs_read: invoked for %s\n", e->fname);
|
||||
|
||||
if (count <= 0 || dat == NULL || offset < 0) {
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
e->fname, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(buffer);
|
||||
if (pbuf / PAGE_SIZE != (pbuf + count - 1) / PAGE_SIZE) {
|
||||
/* Truncate the read count upto the nearest page boundary */
|
||||
count = ((pbuf + count - 1) / PAGE_SIZE) * PAGE_SIZE - pbuf;
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
return -ENOMEM;
|
||||
@@ -309,18 +370,23 @@ retry:
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = e->cpu;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
if ((r->ret == 0) && (r->eof != 1)) {
|
||||
@@ -335,13 +401,20 @@ retry:
|
||||
dprintk("retry\n");
|
||||
goto retry;
|
||||
}
|
||||
if (r->eof == 1) {
|
||||
dprintk("reached end of file.\n");
|
||||
*peof = 1;
|
||||
|
||||
if (r->ret > 0) {
|
||||
if (copy_to_user(buf, kern_buffer, r->ret)) {
|
||||
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ppos += r->ret;
|
||||
}
|
||||
*start = buffer;
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
@@ -367,7 +440,7 @@ void procfs_exit(int osnum) {
|
||||
int error;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
struct kstat stat;
|
||||
struct proc_dir_entry *parent;
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e, *temp = NULL;
|
||||
unsigned long irqflags;
|
||||
|
||||
@@ -378,8 +451,10 @@ void procfs_exit(int osnum) {
|
||||
if (e->osnum == osnum) {
|
||||
dprintk("found entry for %s.\n", e->fname);
|
||||
list_del(&e->list);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
parent = e->parent;
|
||||
r = strrchr(e->fname, '/');
|
||||
if (r == NULL) {
|
||||
@@ -387,7 +462,9 @@ void procfs_exit(int osnum) {
|
||||
} else {
|
||||
r += 1;
|
||||
}
|
||||
remove_proc_entry(r, parent);
|
||||
if (parent) {
|
||||
remove_proc_entry(r, parent->entry);
|
||||
}
|
||||
dprintk("free the entry\n");
|
||||
kfree(e);
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -43,6 +45,7 @@
|
||||
#include <asm/delay.h>
|
||||
#include <asm/io.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
|
||||
#define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6)
|
||||
|
||||
@@ -319,6 +322,109 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
#define RUS_PAGE_HASH_SHIFT 8
|
||||
#define RUS_PAGE_HASH_SIZE (1UL << RUS_PAGE_HASH_SHIFT)
|
||||
#define RUS_PAGE_HASH_MASK (RUS_PAGE_HASH_SIZE - 1)
|
||||
|
||||
struct list_head rus_page_hash[RUS_PAGE_HASH_SIZE];
|
||||
spinlock_t rus_page_hash_lock;
|
||||
|
||||
struct rus_page {
|
||||
struct list_head hash;
|
||||
struct page *page;
|
||||
int refcount;
|
||||
int put_page;
|
||||
};
|
||||
|
||||
void rus_page_hash_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock_init(&rus_page_hash_lock);
|
||||
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
|
||||
INIT_LIST_HEAD(&rus_page_hash[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* rus_page_hash_lock must be held */
|
||||
struct rus_page *_rus_page_hash_lookup(struct page *page)
|
||||
{
|
||||
struct rus_page *rp = NULL;
|
||||
struct rus_page *rp_iter;
|
||||
|
||||
list_for_each_entry(rp_iter,
|
||||
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK], hash) {
|
||||
|
||||
if (rp_iter->page != page)
|
||||
continue;
|
||||
|
||||
rp = rp_iter;
|
||||
break;
|
||||
}
|
||||
|
||||
return rp;
|
||||
}
|
||||
|
||||
|
||||
static int rus_page_hash_insert(struct page *page)
|
||||
{
|
||||
int ret = 0;
|
||||
struct rus_page *rp;
|
||||
|
||||
spin_lock(&rus_page_hash_lock);
|
||||
|
||||
rp = _rus_page_hash_lookup(page);
|
||||
if (!rp) {
|
||||
rp = kmalloc(sizeof(*rp), GFP_ATOMIC);
|
||||
|
||||
if (!rp) {
|
||||
printk("rus_page_add_hash(): error allocating rp\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rp->page = page;
|
||||
rp->put_page = 0;
|
||||
|
||||
get_page(page);
|
||||
|
||||
rp->refcount = 0; /* Will be increased below */
|
||||
|
||||
list_add_tail(&rp->hash,
|
||||
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK]);
|
||||
}
|
||||
|
||||
++rp->refcount;
|
||||
|
||||
|
||||
out:
|
||||
spin_unlock(&rus_page_hash_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void rus_page_hash_put_pages(void)
|
||||
{
|
||||
int i;
|
||||
struct rus_page *rp_iter;
|
||||
struct rus_page *rp_iter_next;
|
||||
|
||||
spin_lock(&rus_page_hash_lock);
|
||||
|
||||
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
|
||||
|
||||
list_for_each_entry_safe(rp_iter, rp_iter_next,
|
||||
&rus_page_hash[i], hash) {
|
||||
list_del(&rp_iter->hash);
|
||||
|
||||
put_page(rp_iter->page);
|
||||
kfree(rp_iter);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&rus_page_hash_lock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* By remap_pfn_range(), VM_PFN_AT_MMAP may be raised.
|
||||
* VM_PFN_AT_MMAP cause the following problems.
|
||||
@@ -329,6 +435,7 @@ out:
|
||||
* These problems may be solved in linux-3.7.
|
||||
* It uses vm_insert_pfn() until it is fixed.
|
||||
*/
|
||||
|
||||
#define USE_VM_INSERT_PFN 1
|
||||
|
||||
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
@@ -409,15 +516,11 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
|
||||
if (pfn_valid(pfn+pix)) {
|
||||
page = pfn_to_page(pfn+pix);
|
||||
if (!page_count(page)) {
|
||||
get_page(page);
|
||||
/*
|
||||
* TODO:
|
||||
* The pages which get_page() has been called with
|
||||
* should be recorded. Because these pages have to
|
||||
* be passed to put_page() before they are freed.
|
||||
*/
|
||||
|
||||
if ((error = rus_page_hash_insert(page)) < 0) {
|
||||
printk("rus_vm_fault: error hashing page??\n");
|
||||
}
|
||||
|
||||
error = vm_insert_page(vma, rva+(pix*PAGE_SIZE), page);
|
||||
if (error) {
|
||||
printk("vm_insert_page: %d\n", error);
|
||||
@@ -448,7 +551,11 @@ static struct vm_operations_struct rus_vmops = {
|
||||
|
||||
static int rus_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
|
||||
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP;
|
||||
#else
|
||||
vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP;
|
||||
#endif
|
||||
vma->vm_ops = &rus_vmops;
|
||||
return 0;
|
||||
}
|
||||
@@ -491,9 +598,18 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un
|
||||
if (vma) {
|
||||
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
start = do_mmap_pgoff(file, 0, end,
|
||||
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
|
||||
#endif
|
||||
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
start = vm_mmap(file, 0, end,
|
||||
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
|
||||
#endif
|
||||
|
||||
revert_creds(original);
|
||||
put_cred(promoted);
|
||||
@@ -782,19 +898,19 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
|
||||
error = vfs_fstat(fd, &st);
|
||||
if (error) {
|
||||
printk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
|
||||
dprintk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
|
||||
goto out;
|
||||
}
|
||||
if (!S_ISREG(st.mode)) {
|
||||
error = -ESRCH;
|
||||
printk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
|
||||
dprintk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
file = fget(fd);
|
||||
if (!file) {
|
||||
error = -EBADF;
|
||||
printk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
|
||||
dprintk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -817,7 +933,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
}
|
||||
if (!(maxprot & PROT_READ)) {
|
||||
error = -EACCES;
|
||||
printk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
|
||||
dprintk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1100,7 +1216,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
|
||||
struct pager_map_result *resp;
|
||||
uintptr_t phys;
|
||||
|
||||
printk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
|
||||
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
|
||||
pager = kzalloc(sizeof(*pager), GFP_KERNEL);
|
||||
if (!pager) {
|
||||
error = -ENOMEM;
|
||||
@@ -1128,8 +1244,17 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
|
||||
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
#define ANY_WHERE 0
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff);
|
||||
#endif
|
||||
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
va = vm_mmap(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff << PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
if (IS_ERR_VALUE(va)) {
|
||||
printk("pager_req_map(%p,%d,%lx,%lx,%lx):do_mmap_pgoff failed. %d\n", os, fd, len, off, result_rpa, (int)va);
|
||||
error = va;
|
||||
@@ -1140,6 +1265,9 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
|
||||
pager->map_uaddr = va;
|
||||
pager->map_len = len;
|
||||
pager->map_off = off;
|
||||
|
||||
dprintk("pager_req_map(%s): 0x%lx - 0x%lx (len: %lu)\n",
|
||||
file->f_dentry->d_name.name, va, va + len, len);
|
||||
|
||||
phys = ihk_device_map_memory(dev, result_rpa, sizeof(*resp));
|
||||
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
|
||||
@@ -1158,10 +1286,11 @@ out:
|
||||
if (pager) {
|
||||
kfree(pager);
|
||||
}
|
||||
printk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
|
||||
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppfn_rpa)
|
||||
{
|
||||
const ihk_device_t dev = ihk_os_to_dev(os);
|
||||
@@ -1176,7 +1305,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
|
||||
uintptr_t phys;
|
||||
uintptr_t *ppfn;
|
||||
|
||||
printk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
|
||||
dprintk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
|
||||
|
||||
if ((off < pager->map_off) || ((pager->map_off+pager->map_len) < (off + PAGE_SIZE))) {
|
||||
error = -ERANGE;
|
||||
@@ -1201,6 +1330,12 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
|
||||
pfn = (uintptr_t)pte_pfn(*pte) << PAGE_SHIFT;
|
||||
#define PFN_PRESENT ((uintptr_t)1 << 0)
|
||||
pfn |= PFN_VALID | PFN_PRESENT;
|
||||
|
||||
/* Check if mapping is write-combined */
|
||||
if ((pte_flags(*pte) & _PAGE_PWT) &&
|
||||
!(pte_flags(*pte) & _PAGE_PCD)) {
|
||||
pfn |= _PAGE_PWT;
|
||||
}
|
||||
}
|
||||
pte_unmap(pte);
|
||||
}
|
||||
@@ -1216,7 +1351,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
printk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
|
||||
dprintk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1225,11 +1360,15 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
|
||||
struct pager * const pager = (void *)handle;
|
||||
int error;
|
||||
|
||||
printk("pager_req_unmap(%p,%lx)\n", os, handle);
|
||||
dprintk("pager_req_unmap(%p,%lx)\n", os, handle);
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
error = do_munmap(current->mm, pager->map_uaddr, pager->map_len);
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
#else
|
||||
error = vm_munmap(pager->map_uaddr, pager->map_len);
|
||||
#endif
|
||||
|
||||
if (error) {
|
||||
printk("pager_req_unmap(%p,%lx):do_munmap failed. %d\n", os, handle, error);
|
||||
@@ -1237,7 +1376,7 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
|
||||
}
|
||||
|
||||
kfree(pager);
|
||||
printk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
|
||||
dprintk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1325,9 +1464,18 @@ static int remap_user_space(uintptr_t rva, size_t len, int prot)
|
||||
start = rva;
|
||||
pgoff = vma->vm_pgoff + ((rva - vma->vm_start) >> PAGE_SHIFT);
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
map = do_mmap_pgoff(file, start, len,
|
||||
prot, MAP_FIXED|MAP_SHARED, pgoff);
|
||||
#endif
|
||||
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
map = vm_mmap(file, start, len,
|
||||
prot, MAP_FIXED|MAP_SHARED, pgoff << PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
out:
|
||||
dprintk("remap_user_space(%lx,%lx,%x): %lx (%ld)\n",
|
||||
rva, len, prot, (long)map, (long)map);
|
||||
@@ -1469,6 +1617,8 @@ fail:
|
||||
return error;
|
||||
}
|
||||
|
||||
#define SCHED_CHECK_SAME_OWNER 0x01
|
||||
#define SCHED_CHECK_ROOT 0x02
|
||||
|
||||
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc)
|
||||
{
|
||||
@@ -1556,6 +1706,71 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
|
||||
error = writecore(os, sc->args[1], sc->args[0]);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case __NR_sched_setparam: {
|
||||
|
||||
switch (sc->args[0]) {
|
||||
|
||||
case SCHED_CHECK_SAME_OWNER: {
|
||||
const struct cred *cred = current_cred();
|
||||
const struct cred *pcred;
|
||||
bool match;
|
||||
struct task_struct *p;
|
||||
int pid = sc->args[1];
|
||||
|
||||
rcu_read_lock();
|
||||
p = pid_task(find_get_pid(pid), PIDTYPE_PID);
|
||||
if (!p) {
|
||||
rcu_read_unlock();
|
||||
ret = -ESRCH;
|
||||
goto sched_setparam_out;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
rcu_read_lock();
|
||||
pcred = __task_cred(p);
|
||||
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
|
||||
match = (uid_eq(cred->euid, pcred->euid) ||
|
||||
uid_eq(cred->euid, pcred->uid));
|
||||
#else
|
||||
match = ((cred->euid == pcred->euid) ||
|
||||
(cred->euid == pcred->uid));
|
||||
#endif
|
||||
rcu_read_unlock();
|
||||
|
||||
if (match) {
|
||||
ret = 0;
|
||||
}
|
||||
else {
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case SCHED_CHECK_ROOT: {
|
||||
const struct cred *cred = current_cred();
|
||||
bool match;
|
||||
|
||||
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
|
||||
match = uid_eq(cred->euid, GLOBAL_ROOT_UID);
|
||||
#else
|
||||
match = (cred->euid == 0);
|
||||
#endif
|
||||
if (match) {
|
||||
ret = 0;
|
||||
}
|
||||
else {
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sched_setparam_out:
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
error = -ENOSYS;
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#include <ctype.h>
|
||||
#include <sys/mman.h>
|
||||
#include <asm/unistd.h>
|
||||
#include "../include/uprotocol.h"
|
||||
#include <sched.h>
|
||||
|
||||
#include <termios.h>
|
||||
@@ -49,6 +48,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/fsuid.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
@@ -56,7 +56,10 @@
|
||||
#include <dirent.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <signal.h>
|
||||
#include <sys/signalfd.h>
|
||||
#include "../include/uprotocol.h"
|
||||
|
||||
//#define DEBUG
|
||||
|
||||
@@ -97,6 +100,13 @@ typedef unsigned char cc_t;
|
||||
typedef unsigned int speed_t;
|
||||
typedef unsigned int tcflag_t;
|
||||
|
||||
struct sigfd {
|
||||
struct sigfd *next;
|
||||
int sigpipe[2];
|
||||
};
|
||||
|
||||
struct sigfd *sigfdtop;
|
||||
|
||||
#ifdef NCCS
|
||||
#undef NCCS
|
||||
#endif
|
||||
@@ -111,14 +121,29 @@ struct kernel_termios {
|
||||
cc_t c_cc[NCCS]; /* control characters */
|
||||
};
|
||||
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid);
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock);
|
||||
|
||||
static int mcosid;
|
||||
static int fd;
|
||||
static char *exec_path = NULL;
|
||||
static char *altroot;
|
||||
static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK";
|
||||
static int ischild;
|
||||
|
||||
struct fork_sync {
|
||||
pid_t pid;
|
||||
int status;
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
struct fork_sync_container {
|
||||
struct fork_sync_container *next;
|
||||
struct fork_sync *fs;
|
||||
};
|
||||
|
||||
struct fork_sync_container *fork_sync_top;
|
||||
pthread_mutex_t fork_sync_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
pid_t gettid(void)
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
@@ -218,7 +243,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
|
||||
desc->pid = getpid();
|
||||
desc->pgid = getpgid(0);
|
||||
desc->entry = hdr.e_entry;
|
||||
|
||||
ioctl(fd, MCEXEC_UP_GET_CREDV, desc->cred);
|
||||
desc->at_phdr = load_addr + hdr.e_phoff;
|
||||
desc->at_phent = sizeof(phdr);
|
||||
desc->at_phnum = hdr.e_phnum;
|
||||
@@ -546,11 +571,32 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p,
|
||||
/* Drop old name if exists */
|
||||
if (exec_path) {
|
||||
free(exec_path);
|
||||
exec_path = NULL;
|
||||
}
|
||||
|
||||
exec_path = strdup(filename);
|
||||
if (!exec_path) {
|
||||
fprintf(stderr, "WARNING: strdup(filename) failed\n");
|
||||
if (!strncmp("/", filename, 1)) {
|
||||
exec_path = strdup(filename);
|
||||
|
||||
if (!exec_path) {
|
||||
fprintf(stderr, "WARNING: strdup(filename) failed\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
}
|
||||
else {
|
||||
char *cwd = getcwd(NULL, 0);
|
||||
if (!cwd) {
|
||||
fprintf(stderr, "Error: getting current working dir pathname\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
exec_path = malloc(strlen(cwd) + strlen(filename) + 2);
|
||||
if (!exec_path) {
|
||||
fprintf(stderr, "Error: allocating exec_path\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
sprintf(exec_path, "%s/%s", cwd, filename);
|
||||
free(cwd);
|
||||
}
|
||||
|
||||
desc = load_elf(fp, &interp_path);
|
||||
@@ -764,7 +810,6 @@ struct thread_data_s {
|
||||
pthread_t thread_id;
|
||||
int fd;
|
||||
int cpu;
|
||||
int mcosid;
|
||||
int ret;
|
||||
pid_t tid;
|
||||
int terminate;
|
||||
@@ -785,11 +830,13 @@ static void *main_loop_thread_func(void *arg)
|
||||
td->tid = gettid();
|
||||
td->remote_tid = (int)td->tid;
|
||||
pthread_barrier_wait(&init_ready);
|
||||
td->ret = main_loop(td->fd, td->cpu, td->lock, td->mcosid);
|
||||
td->ret = main_loop(td->fd, td->cpu, td->lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define LOCALSIG SIGURG
|
||||
|
||||
void
|
||||
sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
{
|
||||
@@ -801,7 +848,10 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
struct signal_desc sigdesc;
|
||||
|
||||
if(siginfo->si_pid == pid &&
|
||||
siginfo->si_signo == SIGINT)
|
||||
siginfo->si_signo == LOCALSIG)
|
||||
return;
|
||||
|
||||
if(siginfo->si_signo == SIGCHLD)
|
||||
return;
|
||||
|
||||
for(i = 0; i < ncpu; i++){
|
||||
@@ -839,6 +889,94 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
}
|
||||
}
|
||||
|
||||
long
|
||||
act_signalfd4(struct syscall_wait_desc *w)
|
||||
{
|
||||
struct sigfd *sfd;
|
||||
struct sigfd *sb;
|
||||
int mode = w->sr.args[0];
|
||||
int flags;
|
||||
int tmp;
|
||||
int rc = 0;
|
||||
struct signalfd_siginfo *info;
|
||||
|
||||
switch(mode){
|
||||
case 0: /* new signalfd */
|
||||
sfd = malloc(sizeof(struct sigfd));
|
||||
tmp = w->sr.args[1];
|
||||
flags = 0;
|
||||
if(tmp & SFD_NONBLOCK)
|
||||
flags |= O_NONBLOCK;
|
||||
if(tmp & SFD_CLOEXEC)
|
||||
flags |= O_CLOEXEC;
|
||||
pipe2(sfd->sigpipe, flags);
|
||||
sfd->next = sigfdtop;
|
||||
sigfdtop = sfd;
|
||||
rc = sfd->sigpipe[0];
|
||||
break;
|
||||
case 1: /* close signalfd */
|
||||
tmp = w->sr.args[1];
|
||||
for(sfd = sigfdtop, sb = NULL; sfd; sb = sfd, sfd = sfd->next)
|
||||
if(sfd->sigpipe[0] == tmp)
|
||||
break;
|
||||
if(!sfd)
|
||||
rc = -EBADF;
|
||||
else{
|
||||
if(sb)
|
||||
sb->next = sfd->next;
|
||||
else
|
||||
sigfdtop = sfd->next;
|
||||
close(sfd->sigpipe[0]);
|
||||
close(sfd->sigpipe[1]);
|
||||
free(sfd);
|
||||
}
|
||||
break;
|
||||
case 2: /* push signal */
|
||||
tmp = w->sr.args[1];
|
||||
for(sfd = sigfdtop; sfd; sfd = sfd->next)
|
||||
if(sfd->sigpipe[0] == tmp)
|
||||
break;
|
||||
if(!sfd)
|
||||
rc = -EBADF;
|
||||
else{
|
||||
info = (struct signalfd_siginfo *)w->sr.args[2];
|
||||
write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo));
|
||||
}
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void
|
||||
act_sigaction(struct syscall_wait_desc *w)
|
||||
{
|
||||
struct sigaction act;
|
||||
int sig;
|
||||
|
||||
sig = w->sr.args[0];
|
||||
if (sig == SIGCHLD || sig == LOCALSIG)
|
||||
return;
|
||||
memset(&act, '\0', sizeof act);
|
||||
if (w->sr.args[1] == (unsigned long)SIG_IGN)
|
||||
act.sa_handler = SIG_IGN;
|
||||
else{
|
||||
act.sa_sigaction = sendsig;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
}
|
||||
sigaction(sig, &act, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
act_sigprocmask(struct syscall_wait_desc *w)
|
||||
{
|
||||
sigset_t set;
|
||||
|
||||
sigemptyset(&set);
|
||||
memcpy(&set, &w->sr.args[0], sizeof(unsigned long));
|
||||
sigdelset(&set, LOCALSIG);
|
||||
sigprocmask(SIG_SETMASK, &set, NULL);
|
||||
}
|
||||
|
||||
static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
|
||||
{
|
||||
int n;
|
||||
@@ -891,8 +1029,7 @@ void init_sigaction(void)
|
||||
|
||||
master_tid = gettid();
|
||||
for (i = 1; i <= 64; i++) {
|
||||
if (i != SIGCHLD && i != SIGCONT && i != SIGSTOP &&
|
||||
i != SIGTSTP && i != SIGTTIN && i != SIGTTOU) {
|
||||
if (i != SIGKILL && i != SIGSTOP && i != SIGCHLD) {
|
||||
struct sigaction act;
|
||||
|
||||
sigaction(i, NULL, &act);
|
||||
@@ -904,7 +1041,7 @@ void init_sigaction(void)
|
||||
}
|
||||
}
|
||||
|
||||
void init_worker_threads(int fd, int mcosid)
|
||||
void init_worker_threads(int fd)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -916,7 +1053,6 @@ void init_worker_threads(int fd, int mcosid)
|
||||
|
||||
thread_data[i].fd = fd;
|
||||
thread_data[i].cpu = i;
|
||||
thread_data[i].mcosid = mcosid;
|
||||
thread_data[i].lock = &lock;
|
||||
thread_data[i].init_ready = &init_ready;
|
||||
thread_data[i].terminate = 0;
|
||||
@@ -930,7 +1066,75 @@ void init_worker_threads(int fd, int mcosid)
|
||||
}
|
||||
|
||||
pthread_barrier_wait(&init_ready);
|
||||
}
|
||||
}
|
||||
|
||||
#define MCK_RLIMIT_AS 0
|
||||
#define MCK_RLIMIT_CORE 1
|
||||
#define MCK_RLIMIT_CPU 2
|
||||
#define MCK_RLIMIT_DATA 3
|
||||
#define MCK_RLIMIT_FSIZE 4
|
||||
#define MCK_RLIMIT_LOCKS 5
|
||||
#define MCK_RLIMIT_MEMLOCK 6
|
||||
#define MCK_RLIMIT_MSGQUEUE 7
|
||||
#define MCK_RLIMIT_NICE 8
|
||||
#define MCK_RLIMIT_NOFILE 9
|
||||
#define MCK_RLIMIT_NPROC 10
|
||||
#define MCK_RLIMIT_RSS 11
|
||||
#define MCK_RLIMIT_RTPRIO 12
|
||||
#define MCK_RLIMIT_RTTIME 13
|
||||
#define MCK_RLIMIT_SIGPENDING 14
|
||||
#define MCK_RLIMIT_STACK 15
|
||||
|
||||
static int rlimits[] = {
|
||||
#ifdef RLIMIT_AS
|
||||
RLIMIT_AS, MCK_RLIMIT_AS,
|
||||
#endif
|
||||
#ifdef RLIMIT_CORE
|
||||
RLIMIT_CORE, MCK_RLIMIT_CORE,
|
||||
#endif
|
||||
#ifdef RLIMIT_CPU
|
||||
RLIMIT_CPU, MCK_RLIMIT_CPU,
|
||||
#endif
|
||||
#ifdef RLIMIT_DATA
|
||||
RLIMIT_DATA, MCK_RLIMIT_DATA,
|
||||
#endif
|
||||
#ifdef RLIMIT_FSIZE
|
||||
RLIMIT_FSIZE, MCK_RLIMIT_FSIZE,
|
||||
#endif
|
||||
#ifdef RLIMIT_LOCKS
|
||||
RLIMIT_LOCKS, MCK_RLIMIT_LOCKS,
|
||||
#endif
|
||||
#ifdef RLIMIT_MEMLOCK
|
||||
RLIMIT_MEMLOCK, MCK_RLIMIT_MEMLOCK,
|
||||
#endif
|
||||
#ifdef RLIMIT_MSGQUEUE
|
||||
RLIMIT_MSGQUEUE,MCK_RLIMIT_MSGQUEUE,
|
||||
#endif
|
||||
#ifdef RLIMIT_NICE
|
||||
RLIMIT_NICE, MCK_RLIMIT_NICE,
|
||||
#endif
|
||||
#ifdef RLIMIT_NOFILE
|
||||
RLIMIT_NOFILE, MCK_RLIMIT_NOFILE,
|
||||
#endif
|
||||
#ifdef RLIMIT_NPROC
|
||||
RLIMIT_NPROC, MCK_RLIMIT_NPROC,
|
||||
#endif
|
||||
#ifdef RLIMIT_RSS
|
||||
RLIMIT_RSS, MCK_RLIMIT_RSS,
|
||||
#endif
|
||||
#ifdef RLIMIT_RTPRIO
|
||||
RLIMIT_RTPRIO, MCK_RLIMIT_RTPRIO,
|
||||
#endif
|
||||
#ifdef RLIMIT_RTTIME
|
||||
RLIMIT_RTTIME, MCK_RLIMIT_RTTIME,
|
||||
#endif
|
||||
#ifdef RLIMIT_SIGPENDING
|
||||
RLIMIT_SIGPENDING,MCK_RLIMIT_SIGPENDING,
|
||||
#endif
|
||||
#ifdef RLIMIT_STACK
|
||||
RLIMIT_STACK, MCK_RLIMIT_STACK,
|
||||
#endif
|
||||
};
|
||||
|
||||
char dev[64];
|
||||
|
||||
@@ -952,7 +1156,6 @@ int main(int argc, char **argv)
|
||||
unsigned long lcur;
|
||||
unsigned long lmax;
|
||||
int target_core = 0;
|
||||
int mcosid = 0;
|
||||
int opt;
|
||||
char path[1024];
|
||||
char *shell = NULL;
|
||||
@@ -1056,7 +1259,9 @@ int main(int argc, char **argv)
|
||||
if (shell) {
|
||||
argv[optind] = path;
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < sizeof(rlimits) / sizeof(int); i += 2)
|
||||
getrlimit(rlimits[i], &desc->rlimit[rlimits[i + 1]]);
|
||||
desc->envs_len = envs_len;
|
||||
desc->envs = envs;
|
||||
//print_flat(envs);
|
||||
@@ -1091,8 +1296,8 @@ int main(int argc, char **argv)
|
||||
rlim_stack.rlim_cur = lcur;
|
||||
rlim_stack.rlim_max = lmax;
|
||||
}
|
||||
desc->rlimit_stack_cur = rlim_stack.rlim_cur;
|
||||
desc->rlimit_stack_max = rlim_stack.rlim_max;
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
|
||||
|
||||
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
|
||||
if(ncpu == -1){
|
||||
@@ -1173,7 +1378,7 @@ int main(int argc, char **argv)
|
||||
|
||||
init_sigaction();
|
||||
|
||||
init_worker_threads(fd, mcosid);
|
||||
init_worker_threads(fd);
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_START_IMAGE, (unsigned long)desc) != 0) {
|
||||
perror("exec");
|
||||
@@ -1244,13 +1449,13 @@ static void
|
||||
kill_thread(unsigned long cpu)
|
||||
{
|
||||
if(cpu >= 0 && cpu < ncpu){
|
||||
pthread_kill(thread_data[cpu].thread_id, SIGINT);
|
||||
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
|
||||
}
|
||||
else{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpu; ++i) {
|
||||
pthread_kill(thread_data[i].thread_id, SIGINT);
|
||||
pthread_kill(thread_data[i].thread_id, LOCALSIG);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1351,7 +1556,32 @@ int close_cloexec_fds(int mcos_fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
|
||||
char *
|
||||
chgpath(char *in, char *buf)
|
||||
{
|
||||
char *fn = in;
|
||||
struct stat sb;
|
||||
|
||||
if (!strncmp(fn, "/proc/self/", 11)){
|
||||
sprintf(buf, "/proc/mcos%d/%d/%s", mcosid, getpid(), fn + 11);
|
||||
fn = buf;
|
||||
}
|
||||
else if(!strncmp(fn, "/proc/", 6)){
|
||||
sprintf(buf, "/proc/mcos%d/%s", mcosid, fn + 6);
|
||||
fn = buf;
|
||||
}
|
||||
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
|
||||
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
|
||||
}
|
||||
else
|
||||
return in;
|
||||
|
||||
if(stat(fn, &sb) == -1)
|
||||
return in;
|
||||
return fn;
|
||||
}
|
||||
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
{
|
||||
struct syscall_wait_desc w;
|
||||
long ret;
|
||||
@@ -1389,14 +1619,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
|
||||
}
|
||||
__dprintf("open: %s\n", pathbuf);
|
||||
|
||||
fn = pathbuf;
|
||||
if(!strncmp(fn, "/proc/", 6)){
|
||||
sprintf(tmpbuf, "/proc/mcos%d/%s", mcosid, fn + 6);
|
||||
fn = tmpbuf;
|
||||
}
|
||||
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
|
||||
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
|
||||
}
|
||||
fn = chgpath(pathbuf, tmpbuf);
|
||||
|
||||
ret = open(fn, w.sr.args[1], w.sr.args[2]);
|
||||
SET_ERR(ret);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
@@ -1505,112 +1729,156 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
|
||||
}
|
||||
|
||||
case __NR_fork: {
|
||||
int child;
|
||||
int sync_pipe_fd[2];
|
||||
char sync_msg;
|
||||
struct fork_sync *fs;
|
||||
struct fork_sync_container *fsc;
|
||||
struct fork_sync_container *fp;
|
||||
struct fork_sync_container *fb;
|
||||
int rc = -1;
|
||||
pid_t pid;
|
||||
|
||||
if (pipe(sync_pipe_fd) != 0) {
|
||||
fprintf(stderr, "fork(): error creating sync pipe\n");
|
||||
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
|
||||
fsc = malloc(sizeof(struct fork_sync_container));
|
||||
memset(fsc, '\0', sizeof(struct fork_sync_container));
|
||||
pthread_mutex_lock(&fork_sync_mutex);
|
||||
fsc->next = fork_sync_top;
|
||||
fork_sync_top = fsc;
|
||||
pthread_mutex_unlock(&fork_sync_mutex);
|
||||
fsc->fs = fs = mmap(NULL, sizeof(struct fork_sync),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if(fs == (void *)-1){
|
||||
goto fork_err;
|
||||
}
|
||||
|
||||
memset(fs, '\0', sizeof(struct fork_sync));
|
||||
sem_init(&fs->sem, 1, 0);
|
||||
|
||||
pid = fork();
|
||||
|
||||
switch (pid) {
|
||||
/* Error */
|
||||
case -1:
|
||||
fprintf(stderr, "fork(): error forking child process\n");
|
||||
rc = -errno;
|
||||
break;
|
||||
|
||||
/* Child process */
|
||||
case 0: {
|
||||
int i;
|
||||
int ret = 1;
|
||||
struct newprocess_desc npdesc;
|
||||
|
||||
ischild = 1;
|
||||
/* Reopen device fd */
|
||||
close(fd);
|
||||
fd = open(dev, O_RDWR);
|
||||
if (fd < 0) {
|
||||
fs->status = -errno;
|
||||
fprintf(stderr, "ERROR: opening %s\n", dev);
|
||||
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
/* Reinit signals and syscall threads */
|
||||
init_sigaction();
|
||||
init_worker_threads(fd);
|
||||
|
||||
__dprintf("pid(%d): signals and syscall threads OK\n",
|
||||
getpid());
|
||||
|
||||
/* Hold executable also in the child process */
|
||||
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
|
||||
!= 0) {
|
||||
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
|
||||
exec_path, ret, fd);
|
||||
fs->status = -errno;
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
fork_child_sync_pipe:
|
||||
sem_post(&fs->sem);
|
||||
if (fs->status)
|
||||
exit(1);
|
||||
|
||||
for (fp = fork_sync_top; fp;) {
|
||||
fb = fp->next;
|
||||
if (fp->fs)
|
||||
munmap(fp->fs, sizeof(struct fork_sync));
|
||||
free(fp);
|
||||
fp = fb;
|
||||
}
|
||||
fork_sync_top = NULL;
|
||||
pthread_mutex_init(&fork_sync_mutex, NULL);
|
||||
|
||||
npdesc.pid = getpid();
|
||||
ioctl(fd, MCEXEC_UP_NEW_PROCESS, &npdesc);
|
||||
|
||||
/* TODO: does the forked thread run in a pthread context? */
|
||||
for (i = 0; i <= ncpu; ++i) {
|
||||
pthread_join(thread_data[i].thread_id, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parent */
|
||||
default:
|
||||
fs->pid = pid;
|
||||
while ((rc = sem_trywait(&fs->sem)) == -1 && (errno == EAGAIN || errno == EINTR)) {
|
||||
int st;
|
||||
int wrc;
|
||||
|
||||
wrc = waitpid(pid, &st, WNOHANG);
|
||||
if(wrc == pid) {
|
||||
fs->status = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
if (fs->status != 0) {
|
||||
fprintf(stderr, "fork(): error with child process after fork\n");
|
||||
rc = fs->status;
|
||||
break;
|
||||
}
|
||||
|
||||
rc = pid;
|
||||
break;
|
||||
}
|
||||
|
||||
child = fork();
|
||||
|
||||
switch (child) {
|
||||
/* Error */
|
||||
case -1:
|
||||
fprintf(stderr, "fork(): error forking child process\n");
|
||||
close(sync_pipe_fd[0]);
|
||||
close(sync_pipe_fd[1]);
|
||||
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
/* Child process */
|
||||
case 0: {
|
||||
int i;
|
||||
int ret = 1;
|
||||
|
||||
ischild = 1;
|
||||
/* Reopen device fd */
|
||||
close(fd);
|
||||
fd = open(dev, O_RDWR);
|
||||
if (fd < 0) {
|
||||
/* TODO: tell parent something went wrong? */
|
||||
fprintf(stderr, "ERROR: opening %s\n", dev);
|
||||
|
||||
/* Tell parent something went wrong */
|
||||
sync_msg = 1;
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
/* Reinit signals and syscall threads */
|
||||
init_sigaction();
|
||||
init_worker_threads(fd, mcosid);
|
||||
|
||||
__dprintf("pid(%d): signals and syscall threads OK\n",
|
||||
getpid());
|
||||
|
||||
/* Hold executable also in the child process */
|
||||
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
|
||||
!= 0) {
|
||||
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
|
||||
exec_path, ret, fd);
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
/* Tell parent everything went OK */
|
||||
sync_msg = 0;
|
||||
fork_child_sync_pipe:
|
||||
if (write(sync_pipe_fd[1], &sync_msg, 1) != 1) {
|
||||
fprintf(stderr, "ERROR: writing sync pipe\n");
|
||||
goto fork_child_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
fork_child_out:
|
||||
close(sync_pipe_fd[0]);
|
||||
close(sync_pipe_fd[1]);
|
||||
|
||||
/* TODO: does the forked thread run in a pthread context? */
|
||||
for (i = 0; i <= ncpu; ++i) {
|
||||
pthread_join(thread_data[i].thread_id, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parent */
|
||||
default:
|
||||
|
||||
if (read(sync_pipe_fd[0], &sync_msg, 1) != 1) {
|
||||
fprintf(stderr, "fork(): error reading sync message\n");
|
||||
child = -1;
|
||||
goto sync_out;
|
||||
}
|
||||
|
||||
if (sync_msg != 0) {
|
||||
fprintf(stderr, "fork(): error with child process after fork\n");
|
||||
child = -1;
|
||||
goto sync_out;
|
||||
}
|
||||
|
||||
sync_out:
|
||||
close(sync_pipe_fd[0]);
|
||||
close(sync_pipe_fd[1]);
|
||||
do_syscall_return(fd, cpu, child, 0, 0, 0, 0);
|
||||
sem_destroy(&fs->sem);
|
||||
munmap(fs, sizeof(struct fork_sync));
|
||||
fork_err:
|
||||
pthread_mutex_lock(&fork_sync_mutex);
|
||||
for(fp = fork_sync_top, fb = NULL; fp; fb = fp, fp = fp->next)
|
||||
if(fp == fsc)
|
||||
break;
|
||||
if(fp){
|
||||
if(fb)
|
||||
fb->next = fsc->next;
|
||||
else
|
||||
fork_sync_top = fsc->next;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&fork_sync_mutex);
|
||||
do_syscall_return(fd, cpu, rc, 0, 0, 0, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
case __NR_wait4: {
|
||||
int status;
|
||||
int ret;
|
||||
pid_t pid = w.sr.args[0];
|
||||
int options = w.sr.args[2];
|
||||
siginfo_t info;
|
||||
int opt;
|
||||
|
||||
if ((ret = waitpid(pid, &status, 0)) != pid) {
|
||||
opt = WEXITED | (options & WNOWAIT);
|
||||
memset(&info, '\0', sizeof info);
|
||||
while((ret = waitid(P_PID, pid, &info, opt)) == -1 &&
|
||||
errno == EINTR);
|
||||
if(ret == 0){
|
||||
ret = info.si_pid;
|
||||
}
|
||||
|
||||
if(ret != pid) {
|
||||
fprintf(stderr, "ERROR: waiting for %lu\n", w.sr.args[0]);
|
||||
}
|
||||
|
||||
@@ -1747,6 +2015,32 @@ return_execve2:
|
||||
break;
|
||||
}
|
||||
|
||||
case __NR_signalfd4:
|
||||
ret = act_signalfd4(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_rt_sigaction:
|
||||
act_sigaction(&w);
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_rt_sigprocmask:
|
||||
act_sigprocmask(&w);
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setfsuid:
|
||||
if(w.sr.args[1] == 1){
|
||||
ioctl(fd, MCEXEC_UP_GET_CRED, w.sr.args[0]);
|
||||
ret = 0;
|
||||
}
|
||||
else{
|
||||
ret = setfsuid(w.sr.args[0]);
|
||||
}
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_close:
|
||||
if(w.sr.args[0] == fd)
|
||||
ret = -EBADF;
|
||||
@@ -1756,8 +2050,8 @@ return_execve2:
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = do_generic_syscall(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
ret = do_generic_syscall(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ -g
|
||||
LDFLAGS += -e arch_start
|
||||
IHKOBJ = ihk/ihk.o
|
||||
|
||||
|
||||
@@ -30,6 +30,9 @@ SECTIONS
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
|
||||
@@ -30,6 +30,9 @@ SECTIONS
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
|
||||
@@ -30,6 +30,9 @@ SECTIONS
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
@@ -39,8 +42,4 @@ SECTIONS
|
||||
. = ALIGN(4096);
|
||||
_end = .;
|
||||
|
||||
/DISCARD/ : {
|
||||
*(.eh_frame)
|
||||
*(.note.gnu.build-id)
|
||||
}
|
||||
}
|
||||
|
||||
2
kernel/config/config.smp-x86
Normal file
2
kernel/config/config.smp-x86
Normal file
@@ -0,0 +1,2 @@
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -T $(SRC)/config/smp-x86.lds
|
||||
49
kernel/config/smp-x86.lds
Normal file
49
kernel/config/smp-x86.lds
Normal file
@@ -0,0 +1,49 @@
|
||||
PHDRS
|
||||
{
|
||||
text PT_LOAD FLAGS(5);
|
||||
data PT_LOAD FLAGS(7);
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
. = 0xffffffff80001000;
|
||||
_head = .;
|
||||
|
||||
.text : {
|
||||
*(.text);
|
||||
} : text
|
||||
|
||||
. = ALIGN(4096);
|
||||
.data : {
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
} :data
|
||||
.rodata : {
|
||||
*(.rodata .rodata.*)
|
||||
} :data
|
||||
|
||||
.vsyscall : ALIGN(0x1000) {
|
||||
vsyscall_page = .;
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
.bss : {
|
||||
*(.bss .bss.*)
|
||||
}
|
||||
. = ALIGN(4096);
|
||||
_end = .;
|
||||
|
||||
/DISCARD/ : {
|
||||
*(.eh_frame)
|
||||
*(.note.gnu.build-id)
|
||||
}
|
||||
}
|
||||
@@ -45,12 +45,12 @@ void kputs(char *buf)
|
||||
|
||||
#define KPRINTF_LOCAL_BUF_LEN 1024
|
||||
|
||||
int kprintf_lock()
|
||||
unsigned long kprintf_lock(void)
|
||||
{
|
||||
return ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
}
|
||||
|
||||
void kprintf_unlock(int irqflags)
|
||||
void kprintf_unlock(unsigned long irqflags)
|
||||
{
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* memory mapped device pager client
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -32,9 +33,18 @@
|
||||
#include <pager.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <process.h>
|
||||
|
||||
//#define DEBUG_PRINT_DEVOBJ
|
||||
|
||||
#ifdef DEBUG_PRINT_DEVOBJ
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define dkprintf(...)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct devobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
@@ -76,7 +86,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
struct devobj *obj = NULL;
|
||||
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
kprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
||||
dkprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
||||
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
|
||||
if (npages > MAX_PAGES_IN_DEVOBJ) {
|
||||
error = -EFBIG;
|
||||
@@ -111,8 +121,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
|
||||
goto out;
|
||||
}
|
||||
kprintf("devobj_create:handle: %lx\n", result.handle);
|
||||
kprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
||||
dkprintf("devobj_create:handle: %lx\n", result.handle);
|
||||
dkprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
||||
|
||||
obj->memobj.ops = &devobj_ops;
|
||||
obj->memobj.flags = MF_HAS_PAGER;
|
||||
@@ -134,7 +144,7 @@ out:
|
||||
}
|
||||
kfree(obj);
|
||||
}
|
||||
kprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
||||
dkprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -142,7 +152,7 @@ static void devobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
|
||||
kprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
memobj_lock(&obj->memobj);
|
||||
++obj->ref;
|
||||
memobj_unlock(&obj->memobj);
|
||||
@@ -155,7 +165,7 @@ static void devobj_release(struct memobj *memobj)
|
||||
struct devobj *free_obj = NULL;
|
||||
uintptr_t handle;
|
||||
|
||||
kprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
@@ -187,12 +197,12 @@ static void devobj_release(struct memobj *memobj)
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
kprintf("devobj_release(%p %lx):free %p\n",
|
||||
dkprintf("devobj_release(%p %lx):free %p\n",
|
||||
obj, handle, free_obj);
|
||||
return;
|
||||
}
|
||||
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
|
||||
{
|
||||
const off_t pgoff = off >> PAGE_SHIFT;
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
@@ -202,7 +212,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
ihk_mc_user_context_t ctx;
|
||||
int ix;
|
||||
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
|
||||
|
||||
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
|
||||
error = -EFBIG;
|
||||
@@ -210,7 +220,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
goto out;
|
||||
}
|
||||
ix = pgoff - obj->pfn_pgoff;
|
||||
kprintf("ix: %ld\n", ix);
|
||||
dkprintf("ix: %ld\n", ix);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
pfn = obj->pfn_table[ix];
|
||||
@@ -230,12 +240,20 @@ kprintf("ix: %ld\n", ix);
|
||||
|
||||
if (pfn & PFN_PRESENT) {
|
||||
/* convert remote physical into local physical */
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
attr = pfn & ~PFN_PFN;
|
||||
|
||||
/* TODO: do an arch dependent PTE to mapping flag conversion
|
||||
* instead of this inline check, also, we rely on having the
|
||||
* same PAT config as Linux here.. */
|
||||
if ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)) {
|
||||
*flag |= VR_WRITE_COMBINED;
|
||||
}
|
||||
|
||||
pfn = ihk_mc_map_memory(NULL, (pfn & PFN_PFN), PAGE_SIZE);
|
||||
pfn &= PFN_PFN;
|
||||
pfn |= attr;
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
}
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
@@ -253,6 +271,6 @@ kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->
|
||||
*physp = pfn & PFN_PFN;
|
||||
|
||||
out:
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
static ihk_spinlock_t fileobj_list_lock = SPIN_LOCK_UNLOCKED;
|
||||
@@ -46,6 +46,7 @@ static memobj_ref_func_t fileobj_ref;
|
||||
static memobj_get_page_func_t fileobj_get_page;
|
||||
static memobj_copy_page_func_t fileobj_copy_page;
|
||||
static memobj_flush_page_func_t fileobj_flush_page;
|
||||
static memobj_invalidate_page_func_t fileobj_invalidate_page;
|
||||
|
||||
static struct memobj_ops fileobj_ops = {
|
||||
.release = &fileobj_release,
|
||||
@@ -53,6 +54,7 @@ static struct memobj_ops fileobj_ops = {
|
||||
.get_page = &fileobj_get_page,
|
||||
.copy_page = &fileobj_copy_page,
|
||||
.flush_page = &fileobj_flush_page,
|
||||
.invalidate_page = &fileobj_invalidate_page,
|
||||
};
|
||||
|
||||
static struct fileobj *to_fileobj(struct memobj *memobj)
|
||||
@@ -383,7 +385,7 @@ out:
|
||||
return;
|
||||
}
|
||||
|
||||
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
|
||||
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
@@ -577,3 +579,33 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
memobj_lock(&obj->memobj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
int error;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n",
|
||||
memobj, phys, pgsize);
|
||||
|
||||
if (!(page = phys_to_page(phys))
|
||||
|| !(page = page_list_lookup(obj, page->offset))) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ihk_atomic_read(&page->count) == 1) {
|
||||
if (page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys),
|
||||
pgsize/PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n",
|
||||
memobj, phys, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
#ifdef DEBUG_PRINT_FUTEX
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
int futex_cmpxchg_enabled;
|
||||
|
||||
@@ -28,13 +28,15 @@
|
||||
#include <process.h>
|
||||
#include <page.h>
|
||||
#include <mman.h>
|
||||
#include <init.h>
|
||||
#include <kmalloc.h>
|
||||
|
||||
//#define DEBUG_PRINT_HOST
|
||||
|
||||
#ifdef DEBUG_PRINT_HOST
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
void check_mapping_for_proc(struct process *proc, unsigned long addr)
|
||||
@@ -69,7 +71,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
unsigned long args_envs_p, args_envs_rp;
|
||||
unsigned long s, e, up;
|
||||
char **argv;
|
||||
int i, n, argc, envc, args_envs_npages;
|
||||
char **a;
|
||||
int i, n, argc, envc, args_envs_npages, l;
|
||||
char **env;
|
||||
int range_npages;
|
||||
void *up_v;
|
||||
@@ -173,23 +176,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
pn->entry);
|
||||
}
|
||||
|
||||
#if 1
|
||||
/*
|
||||
Fix for the problem where brk grows to hit .bss section
|
||||
when using dynamically linked executables.
|
||||
Test code resides in /home/takagi/project/mpich/src/brk_icc_mic.
|
||||
This is because when using
|
||||
ld.so (i.e. using shared objects), mckernel/kernel/host.c sets "brk" to
|
||||
the end of .bss of ld.so (e.g. 0x21f000), and then ld.so places a
|
||||
main-program after this (e.g. 0x400000), so "brk" will hit .bss
|
||||
eventually.
|
||||
*/
|
||||
proc->vm->region.brk_start = proc->vm->region.brk_end =
|
||||
(USER_END / 4) & LARGE_PAGE_MASK;
|
||||
#else
|
||||
proc->vm->region.brk_start = proc->vm->region.brk_end =
|
||||
proc->vm->region.data_end;
|
||||
#endif
|
||||
|
||||
/* Map, copy and update args and envs */
|
||||
flags = VR_PROT_READ | VR_PROT_WRITE;
|
||||
@@ -284,13 +272,21 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
dkprintf("argc: %d\n", argc);
|
||||
|
||||
argv = (char **)(args_envs + (sizeof(int)));
|
||||
while (*argv) {
|
||||
char **_argv = argv;
|
||||
dkprintf("%s\n", args_envs + (unsigned long)*argv);
|
||||
*argv = (char *)addr + (unsigned long)*argv; // Process' address space!
|
||||
argv = ++_argv;
|
||||
if(proc->saved_cmdline){
|
||||
kfree(proc->saved_cmdline);
|
||||
proc->saved_cmdline_len = 0;
|
||||
}
|
||||
for(a = argv, l = 0; *a; a++)
|
||||
l += strlen(args_envs + (unsigned long)*a) + 1;
|
||||
proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT);
|
||||
if(!proc->saved_cmdline)
|
||||
goto err;
|
||||
proc->saved_cmdline_len = l;
|
||||
for(a = argv, l = 0; *a; a++){
|
||||
strcpy(proc->saved_cmdline + l, args_envs + (unsigned long)*a);
|
||||
l += strlen(args_envs + (unsigned long)*a) + 1;
|
||||
*a = (char *)addr + (unsigned long)*a; // Process' address space!
|
||||
}
|
||||
argv = (char **)(args_envs + (sizeof(int)));
|
||||
|
||||
envc = *((int*)(args_envs + p->args_len));
|
||||
dkprintf("envc: %d\n", envc);
|
||||
@@ -308,7 +304,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
|
||||
p->rprocess = (unsigned long)proc;
|
||||
p->rpgtable = virt_to_phys(proc->vm->page_table);
|
||||
|
||||
|
||||
if (init_process_stack(proc, pn, argc, argv, envc, env) != 0) {
|
||||
goto err;
|
||||
}
|
||||
@@ -363,12 +359,21 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
}
|
||||
proc->ftn->pid = pn->pid;
|
||||
proc->ftn->pgid = pn->pgid;
|
||||
|
||||
proc->ftn->ruid = pn->cred[0];
|
||||
proc->ftn->euid = pn->cred[1];
|
||||
proc->ftn->suid = pn->cred[2];
|
||||
proc->ftn->fsuid = pn->cred[3];
|
||||
proc->ftn->rgid = pn->cred[4];
|
||||
proc->ftn->egid = pn->cred[5];
|
||||
proc->ftn->sgid = pn->cred[6];
|
||||
proc->ftn->fsgid = pn->cred[7];
|
||||
|
||||
proc->vm->region.user_start = pn->user_start;
|
||||
proc->vm->region.user_end = pn->user_end;
|
||||
proc->vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
|
||||
proc->vm->region.map_end = proc->vm->region.map_start;
|
||||
proc->rlimit_stack.rlim_cur = pn->rlimit_stack_cur;
|
||||
proc->rlimit_stack.rlim_max = pn->rlimit_stack_max;
|
||||
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
|
||||
|
||||
/* TODO: Clear it at the proper timing */
|
||||
cpu_local_var(scp).post_idx = 0;
|
||||
@@ -379,7 +384,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
goto err;
|
||||
}
|
||||
|
||||
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
|
||||
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->ftn->pid,
|
||||
proc->vm->page_table);
|
||||
|
||||
ihk_mc_free(pn);
|
||||
@@ -387,6 +392,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
flush_tlb();
|
||||
|
||||
return 0;
|
||||
err:
|
||||
ihk_mc_free(pn);
|
||||
@@ -467,13 +473,15 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
|
||||
ihk_ikc_send(c, packet, 0);
|
||||
}
|
||||
|
||||
extern unsigned long do_kill(int, int, int, struct siginfo *);
|
||||
extern unsigned long do_kill(int, int, int, struct siginfo *, int ptracecont);
|
||||
extern void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
|
||||
|
||||
extern void process_procfs_request(unsigned long rarg);
|
||||
extern int memcheckall();
|
||||
extern int freecheck(int runcount);
|
||||
extern int runcount;
|
||||
extern void terminate_host(int pid);
|
||||
extern void debug_log(long);
|
||||
|
||||
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
void *__packet, void *ihk_os)
|
||||
@@ -490,6 +498,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
struct siginfo info;
|
||||
} *sp, info;
|
||||
unsigned long pp;
|
||||
int cpuid;
|
||||
|
||||
switch (packet->msg) {
|
||||
case SCD_MSG_INIT_CHANNEL_ACKED:
|
||||
@@ -521,11 +530,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
return 0;
|
||||
|
||||
case SCD_MSG_SCHEDULE_PROCESS:
|
||||
cpuid = obtain_clone_cpuid();
|
||||
if(cpuid == -1){
|
||||
kprintf("No CPU available\n");
|
||||
return -1;
|
||||
}
|
||||
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
|
||||
proc = (struct process *)packet->arg;
|
||||
|
||||
settid(proc, 0, ihk_mc_get_processor_id(), -1);
|
||||
runq_add_proc(proc, ihk_mc_get_processor_id());
|
||||
settid(proc, 0, cpuid, -1);
|
||||
proc->ftn->status = PS_RUNNING;
|
||||
runq_add_proc(proc, cpuid);
|
||||
|
||||
//cpu_local_var(next) = (struct process *)packet->arg;
|
||||
return 0;
|
||||
@@ -541,12 +556,20 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
pckt.arg = packet->arg;
|
||||
syscall_channel_send(c, &pckt);
|
||||
|
||||
rc = do_kill(info.pid, info.tid, info.sig, &info.info);
|
||||
rc = do_kill(info.pid, info.tid, info.sig, &info.info, 0);
|
||||
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
|
||||
return 0;
|
||||
case SCD_MSG_PROCFS_REQUEST:
|
||||
process_procfs_request(packet->arg);
|
||||
return 0;
|
||||
case SCD_MSG_CLEANUP_PROCESS:
|
||||
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
|
||||
terminate_host(packet->pid);
|
||||
return 0;
|
||||
case SCD_MSG_DEBUG_LOG:
|
||||
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
|
||||
debug_log(packet->arg);
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ struct malloc_header {
|
||||
#define CPU_STATUS_DISABLE (0)
|
||||
#define CPU_STATUS_IDLE (1)
|
||||
#define CPU_STATUS_RUNNING (2)
|
||||
#define CPU_STATUS_RESERVED (3)
|
||||
extern ihk_spinlock_t cpu_status_lock;
|
||||
|
||||
#define CPU_FLAG_NEED_RESCHED 0x1U
|
||||
|
||||
@@ -18,11 +18,19 @@
|
||||
#include <ihk/lock.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
#include <shm.h>
|
||||
|
||||
/* begin types.h */
|
||||
typedef int32_t key_t;
|
||||
typedef uint32_t uid_t;
|
||||
typedef uint32_t gid_t;
|
||||
typedef int64_t time_t;
|
||||
typedef int32_t pid_t;
|
||||
/* end types.h */
|
||||
|
||||
enum {
|
||||
/* for memobj.flags */
|
||||
MF_HAS_PAGER = 0x0001,
|
||||
MF_SHMDT_OK = 0x0002,
|
||||
};
|
||||
|
||||
struct memobj {
|
||||
@@ -34,9 +42,10 @@ struct memobj {
|
||||
|
||||
typedef void memobj_release_func_t(struct memobj *obj);
|
||||
typedef void memobj_ref_func_t(struct memobj *obj);
|
||||
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
|
||||
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
|
||||
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
|
||||
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
|
||||
struct memobj_ops {
|
||||
memobj_release_func_t * release;
|
||||
@@ -44,6 +53,7 @@ struct memobj_ops {
|
||||
memobj_get_page_func_t * get_page;
|
||||
memobj_copy_page_func_t * copy_page;
|
||||
memobj_flush_page_func_t * flush_page;
|
||||
memobj_invalidate_page_func_t * invalidate_page;
|
||||
};
|
||||
|
||||
static inline void memobj_release(struct memobj *obj)
|
||||
@@ -61,10 +71,10 @@ static inline void memobj_ref(struct memobj *obj)
|
||||
}
|
||||
|
||||
static inline int memobj_get_page(struct memobj *obj, off_t off,
|
||||
int p2align, uintptr_t *physp)
|
||||
int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
if (obj->ops->get_page) {
|
||||
return (*obj->ops->get_page)(obj, off, p2align, physp);
|
||||
return (*obj->ops->get_page)(obj, off, p2align, physp, pflag);
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
@@ -86,6 +96,15 @@ static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t p
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
if (obj->ops->invalidate_page) {
|
||||
return (*obj->ops->invalidate_page)(obj, phys, pgsize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memobj_lock(struct memobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&obj->lock);
|
||||
@@ -102,6 +121,7 @@ static inline int memobj_has_pager(struct memobj *obj)
|
||||
}
|
||||
|
||||
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
|
||||
struct shmid_ds;
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
||||
int zeroobj_create(struct memobj **objp);
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp);
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
* memory management declarations
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2013 Hitachi, Ltd.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -13,6 +15,8 @@
|
||||
#ifndef HEADER_MMAN_H
|
||||
#define HEADER_MMAN_H
|
||||
|
||||
#include <arch/mman.h>
|
||||
|
||||
/*
|
||||
* memory protection
|
||||
*/
|
||||
@@ -32,16 +36,6 @@
|
||||
#define MAP_PRIVATE 0x02
|
||||
#define MAP_FIXED 0x10
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#define MAP_32BIT 0x40
|
||||
#define MAP_GROWSDOWN 0x0100
|
||||
#define MAP_DENYWRITE 0x0800
|
||||
#define MAP_EXECUTABLE 0x1000
|
||||
#define MAP_LOCKED 0x2000
|
||||
#define MAP_NORESERVE 0x4000
|
||||
#define MAP_POPULATE 0x8000
|
||||
#define MAP_NONBLOCK 0x00010000
|
||||
#define MAP_STACK 0x00020000
|
||||
#define MAP_HUGETLB 0x00040000
|
||||
|
||||
/*
|
||||
* memory advice
|
||||
@@ -69,4 +63,11 @@
|
||||
#define MREMAP_MAYMOVE 0x01
|
||||
#define MREMAP_FIXED 0x02
|
||||
|
||||
/*
|
||||
* for msync()
|
||||
*/
|
||||
#define MS_ASYNC 0x01
|
||||
#define MS_INVALIDATE 0x02
|
||||
#define MS_SYNC 0x04
|
||||
|
||||
#endif /* HEADER_MMAN_H */
|
||||
|
||||
60
kernel/include/prio.h
Normal file
60
kernel/include/prio.h
Normal file
@@ -0,0 +1,60 @@
|
||||
#ifndef _SCHED_PRIO_H
|
||||
#define _SCHED_PRIO_H
|
||||
|
||||
#define MAX_NICE 19
|
||||
#define MIN_NICE -20
|
||||
#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1)
|
||||
|
||||
/*
|
||||
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
|
||||
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
|
||||
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
|
||||
* values are inverted: lower p->prio value means higher priority.
|
||||
*
|
||||
* The MAX_USER_RT_PRIO value allows the actual maximum
|
||||
* RT priority to be separate from the value exported to
|
||||
* user-space. This allows kernel threads to set their
|
||||
* priority to a value higher than any user task. Note:
|
||||
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
|
||||
*/
|
||||
|
||||
#define MAX_USER_RT_PRIO 100
|
||||
#define MAX_RT_PRIO MAX_USER_RT_PRIO
|
||||
|
||||
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
|
||||
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
|
||||
|
||||
/*
|
||||
* Convert user-nice values [ -20 ... 0 ... 19 ]
|
||||
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
|
||||
* and back.
|
||||
*/
|
||||
#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO)
|
||||
#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO)
|
||||
|
||||
/*
|
||||
* 'User priority' is the nice value converted to something we
|
||||
* can work with better when scaling various scheduler parameters,
|
||||
* it's a [ 0 ... 39 ] range.
|
||||
*/
|
||||
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
|
||||
#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
|
||||
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
|
||||
|
||||
/*
|
||||
* Convert nice value [19,-20] to rlimit style value [1,40].
|
||||
*/
|
||||
static inline long nice_to_rlimit(long nice)
|
||||
{
|
||||
return (MAX_NICE - nice + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert rlimit style value [1,40] to nice value [-20, 19].
|
||||
*/
|
||||
static inline long rlimit_to_nice(long prio)
|
||||
{
|
||||
return (MAX_NICE - prio + 1);
|
||||
}
|
||||
|
||||
#endif /* _SCHED_PRIO_H */
|
||||
@@ -21,12 +21,14 @@
|
||||
#include <signal.h>
|
||||
#include <memobj.h>
|
||||
#include <affinity.h>
|
||||
#include <syscall.h>
|
||||
|
||||
#define VR_NONE 0x0
|
||||
#define VR_STACK 0x1
|
||||
#define VR_RESERVED 0x2
|
||||
#define VR_IO_NOCACHE 0x100
|
||||
#define VR_REMOTE 0x200
|
||||
#define VR_WRITE_COMBINED 0x400
|
||||
#define VR_DEMAND_PAGING 0x1000
|
||||
#define VR_PRIVATE 0x2000
|
||||
#define VR_LOCKED 0x4000
|
||||
@@ -61,6 +63,9 @@
|
||||
|
||||
#define PT_TRACED 0x80 /* The process is ptraced */
|
||||
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
|
||||
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
|
||||
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
|
||||
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
|
||||
|
||||
#define PTRACE_TRACEME 0
|
||||
#define PTRACE_PEEKTEXT 1
|
||||
@@ -106,6 +111,8 @@
|
||||
#define PTRACE_EVENT_VFORK_DONE 5
|
||||
#define PTRACE_EVENT_EXIT 6
|
||||
|
||||
#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state */
|
||||
|
||||
#define SIGNAL_STOP_STOPPED 0x1 /* The process has been stopped by SIGSTOP */
|
||||
#define SIGNAL_STOP_CONTINUED 0x2 /* The process has been resumed by SIGCONT */
|
||||
|
||||
@@ -118,6 +125,11 @@
|
||||
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
|
||||
#define __WCLONE 0x80000000
|
||||
|
||||
/* idtype */
|
||||
#define P_ALL 0
|
||||
#define P_PID 1
|
||||
#define P_PGID 2
|
||||
|
||||
/* If WIFEXITED(STATUS), the low-order 8 bits of the status. */
|
||||
#define __WEXITSTATUS(status) (((status) & 0xff00) >> 8)
|
||||
|
||||
@@ -145,7 +157,6 @@
|
||||
|
||||
#include <waitq.h>
|
||||
#include <futex.h>
|
||||
#include <rlimit.h>
|
||||
|
||||
struct user_fpregs_struct
|
||||
{
|
||||
@@ -212,7 +223,7 @@ struct user
|
||||
unsigned long int u_debugreg [8];
|
||||
};
|
||||
|
||||
#define AUXV_LEN 14
|
||||
#define AUXV_LEN 16
|
||||
|
||||
struct vm_range {
|
||||
struct list_head list;
|
||||
@@ -233,9 +244,18 @@ struct vm_regions {
|
||||
|
||||
struct process_vm;
|
||||
|
||||
struct sigfd {
|
||||
struct sigfd *next;
|
||||
int fd;
|
||||
__sigset_t mask;
|
||||
};
|
||||
#define SFD_CLOEXEC 02000000
|
||||
#define SFD_NONBLOCK 04000
|
||||
|
||||
struct sig_handler {
|
||||
ihk_spinlock_t lock;
|
||||
ihk_atomic_t use;
|
||||
struct sigfd *sigfd;
|
||||
struct k_sigaction action[_NSIG];
|
||||
};
|
||||
|
||||
@@ -243,6 +263,7 @@ struct sig_pending {
|
||||
struct list_head list;
|
||||
sigset_t sigmask;
|
||||
siginfo_t info;
|
||||
int ptracecont;
|
||||
};
|
||||
|
||||
struct sig_shared {
|
||||
@@ -267,6 +288,14 @@ struct fork_tree_node {
|
||||
int pid;
|
||||
int tid;
|
||||
int pgid;
|
||||
int ruid;
|
||||
int euid;
|
||||
int suid;
|
||||
int fsuid;
|
||||
int rgid;
|
||||
int egid;
|
||||
int sgid;
|
||||
int fsgid;
|
||||
|
||||
struct fork_tree_node *parent;
|
||||
struct list_head children;
|
||||
@@ -295,6 +324,12 @@ struct fork_tree_node {
|
||||
*/
|
||||
int ptrace;
|
||||
|
||||
/* Store ptrace event message.
|
||||
PTRACE_O_xxx will store event message here.
|
||||
PTRACE_GETEVENTMSG will get from here.
|
||||
*/
|
||||
unsigned long ptrace_eventmsg;
|
||||
|
||||
/* Store event related to signal. For example,
|
||||
it represents that the proceess has been resumed by SIGCONT. */
|
||||
int signal_flags;
|
||||
@@ -306,6 +341,29 @@ struct fork_tree_node {
|
||||
void hold_fork_tree_node(struct fork_tree_node *ftn);
|
||||
void release_fork_tree_node(struct fork_tree_node *ftn);
|
||||
|
||||
/*
|
||||
* Scheduling policies
|
||||
*/
|
||||
#define SCHED_NORMAL 0
|
||||
#define SCHED_FIFO 1
|
||||
#define SCHED_RR 2
|
||||
#define SCHED_BATCH 3
|
||||
/* SCHED_ISO: reserved but not implemented yet */
|
||||
#define SCHED_IDLE 5
|
||||
#define SCHED_DEADLINE 6
|
||||
|
||||
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
|
||||
#define SCHED_RESET_ON_FORK 0x40000000
|
||||
|
||||
/*
|
||||
* For the sched_{set,get}attr() calls
|
||||
*/
|
||||
#define SCHED_FLAG_RESET_ON_FORK 0x01
|
||||
|
||||
struct sched_param {
|
||||
int sched_priority;
|
||||
};
|
||||
|
||||
struct process {
|
||||
int cpu_id;
|
||||
|
||||
@@ -317,6 +375,8 @@ struct process {
|
||||
|
||||
// Runqueue list entry
|
||||
struct list_head sched_list;
|
||||
int sched_policy;
|
||||
struct sched_param sched_param;
|
||||
|
||||
ihk_spinlock_t spin_sleep_lock;
|
||||
int spin_sleep;
|
||||
@@ -327,6 +387,8 @@ struct process {
|
||||
} thread;
|
||||
|
||||
volatile int sigevent;
|
||||
int nohost;
|
||||
int execed;
|
||||
sigset_t sigmask;
|
||||
stack_t sigstack;
|
||||
ihk_spinlock_t sigpendinglock;
|
||||
@@ -334,7 +396,7 @@ struct process {
|
||||
struct sig_shared *sigshared;
|
||||
struct sig_handler *sighandler;
|
||||
|
||||
struct rlimit rlimit_stack;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
pgio_func_t *pgio_fp;
|
||||
void *pgio_arg;
|
||||
|
||||
@@ -343,7 +405,12 @@ struct process {
|
||||
cpu_set_t cpu_set;
|
||||
unsigned long saved_auxv[AUXV_LEN];
|
||||
|
||||
struct user *userp;
|
||||
unsigned long *ptrace_debugreg; /* debug registers for ptrace */
|
||||
struct sig_pending *ptrace_recvsig;
|
||||
struct sig_pending *ptrace_sendsig;
|
||||
fp_regs_struct *fp_regs;
|
||||
char *saved_cmdline;
|
||||
long saved_cmdline_len;
|
||||
};
|
||||
|
||||
struct process_vm {
|
||||
@@ -364,6 +431,7 @@ struct process_vm {
|
||||
|
||||
cpu_set_t cpu_set;
|
||||
ihk_spinlock_t cpu_set_lock;
|
||||
int exiting;
|
||||
};
|
||||
|
||||
|
||||
@@ -393,6 +461,10 @@ int change_prot_process_memory_range(
|
||||
unsigned long newflag);
|
||||
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end, off_t off);
|
||||
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end);
|
||||
int invalidate_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *lookup_process_memory_range(
|
||||
struct process_vm *vm, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *next_process_memory_range(
|
||||
@@ -402,7 +474,8 @@ struct vm_range *previous_process_memory_range(
|
||||
int extend_up_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t newend);
|
||||
|
||||
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
|
||||
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr,
|
||||
uint64_t reason);
|
||||
int remove_process_region(struct process *proc,
|
||||
unsigned long start, unsigned long end);
|
||||
struct program_load_desc;
|
||||
@@ -428,5 +501,6 @@ void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
|
||||
|
||||
struct process *findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate);
|
||||
void process_unlock(void *savelock, unsigned long irqstate);
|
||||
void release_cpuid(int cpuid);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* header file for System V shared memory
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 - 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -12,38 +13,71 @@
|
||||
#ifndef HEADER_SHM_H
|
||||
#define HEADER_SHM_H
|
||||
|
||||
/* begin types.h */
|
||||
typedef int32_t key_t;
|
||||
typedef uint32_t uid_t;
|
||||
typedef uint32_t gid_t;
|
||||
typedef int64_t time_t;
|
||||
typedef int32_t pid_t;
|
||||
/* end types.h */
|
||||
#include <list.h>
|
||||
#include <memobj.h>
|
||||
#include <arch/shm.h>
|
||||
|
||||
typedef uint64_t shmatt_t;
|
||||
enum {
|
||||
/* for key_t */
|
||||
IPC_PRIVATE = 0,
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t cuid;
|
||||
gid_t cgid;
|
||||
uint16_t mode;
|
||||
uint8_t padding[2];
|
||||
uint16_t seq;
|
||||
uint8_t padding2[22];
|
||||
/* for shmflg */
|
||||
IPC_CREAT = 01000,
|
||||
IPC_EXCL = 02000,
|
||||
|
||||
SHM_RDONLY = 010000,
|
||||
SHM_RND = 020000,
|
||||
SHM_REMAP = 040000,
|
||||
SHM_EXEC = 0100000,
|
||||
|
||||
/* for shm_mode */
|
||||
SHM_DEST = 01000,
|
||||
SHM_LOCKED = 02000,
|
||||
|
||||
/* for cmd of shmctl() */
|
||||
IPC_RMID = 0,
|
||||
IPC_SET = 1,
|
||||
IPC_STAT = 2,
|
||||
IPC_INFO = 3,
|
||||
|
||||
SHM_LOCK = 11,
|
||||
SHM_UNLOCK = 12,
|
||||
SHM_STAT = 13,
|
||||
SHM_INFO = 14,
|
||||
};
|
||||
|
||||
struct shmid_ds {
|
||||
struct ipc_perm shm_perm;
|
||||
size_t shm_segsz;
|
||||
time_t shm_atime;
|
||||
time_t shm_dtime;
|
||||
time_t shm_ctime;
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
shmatt_t shm_nattch;
|
||||
uint8_t padding[16];
|
||||
struct shmobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
int index;
|
||||
uint8_t padding[4];
|
||||
size_t real_segsz;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
struct list_head chain; /* shmobj_list */
|
||||
};
|
||||
|
||||
struct shminfo {
|
||||
uint64_t shmmax;
|
||||
uint64_t shmmin;
|
||||
uint64_t shmmni;
|
||||
uint64_t shmseg;
|
||||
uint64_t shmall;
|
||||
uint8_t padding[32];
|
||||
};
|
||||
|
||||
struct shm_info {
|
||||
int32_t used_ids;
|
||||
uint8_t padding[4];
|
||||
uint64_t shm_tot;
|
||||
uint64_t shm_rss;
|
||||
uint64_t shm_swp;
|
||||
uint64_t swap_attempts;
|
||||
uint64_t swap_successes;
|
||||
};
|
||||
|
||||
void shmobj_list_lock(void);
|
||||
void shmobj_list_unlock(void);
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
|
||||
void shmobj_destroy(struct shmobj *obj);
|
||||
|
||||
#endif /* HEADER_SHM_H */
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <ihk/context.h>
|
||||
#include <ihk/memconst.h>
|
||||
#include <rlimit.h>
|
||||
|
||||
#define NUM_SYSCALLS 255
|
||||
|
||||
@@ -34,12 +35,15 @@
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
@@ -109,6 +113,24 @@ struct program_image_section {
|
||||
};
|
||||
|
||||
#define SHELL_PATH_MAX_LEN 1024
|
||||
#define MCK_RLIM_MAX 20
|
||||
|
||||
#define MCK_RLIMIT_AS 0
|
||||
#define MCK_RLIMIT_CORE 1
|
||||
#define MCK_RLIMIT_CPU 2
|
||||
#define MCK_RLIMIT_DATA 3
|
||||
#define MCK_RLIMIT_FSIZE 4
|
||||
#define MCK_RLIMIT_LOCKS 5
|
||||
#define MCK_RLIMIT_MEMLOCK 6
|
||||
#define MCK_RLIMIT_MSGQUEUE 7
|
||||
#define MCK_RLIMIT_NICE 8
|
||||
#define MCK_RLIMIT_NOFILE 9
|
||||
#define MCK_RLIMIT_NPROC 10
|
||||
#define MCK_RLIMIT_RSS 11
|
||||
#define MCK_RLIMIT_RTPRIO 12
|
||||
#define MCK_RLIMIT_RTTIME 13
|
||||
#define MCK_RLIMIT_SIGPENDING 14
|
||||
#define MCK_RLIMIT_STACK 15
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
@@ -118,6 +140,7 @@ struct program_load_desc {
|
||||
int err;
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@@ -132,8 +155,7 @@ struct program_load_desc {
|
||||
unsigned long args_len;
|
||||
char *envs;
|
||||
unsigned long envs_len;
|
||||
unsigned long rlimit_stack_cur;
|
||||
unsigned long rlimit_stack_max;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
struct program_image_section sections[0];
|
||||
@@ -217,9 +239,9 @@ struct syscall_params {
|
||||
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
|
||||
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
|
||||
|
||||
#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0)
|
||||
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
|
||||
|
||||
extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu, int pid);
|
||||
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
|
||||
extern int obtain_clone_cpuid();
|
||||
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <process.h>
|
||||
#include <init.h>
|
||||
#include <cls.h>
|
||||
#include <syscall.h>
|
||||
|
||||
//#define IOCTL_FUNC_EXTENSION
|
||||
#ifdef IOCTL_FUNC_EXTENSION
|
||||
@@ -40,7 +41,7 @@
|
||||
#ifdef DEBUG_PRINT_INIT
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
int osnum = 0;
|
||||
@@ -118,6 +119,27 @@ char *find_command_line(char *name)
|
||||
return strstr(cmdline, name);
|
||||
}
|
||||
|
||||
static void parse_kargs(void)
|
||||
{
|
||||
kprintf("KCommand Line: %s\n", ihk_mc_get_kernel_args());
|
||||
|
||||
if (1) {
|
||||
char *key = "osnum=";
|
||||
char *p;
|
||||
|
||||
p = find_command_line(key);
|
||||
if (p != NULL) {
|
||||
p += strlen(key);
|
||||
osnum = 0;
|
||||
while (('0' <= *p) && (*p <= '9')) {
|
||||
osnum *= 10;
|
||||
osnum += *p++ - '0';
|
||||
}
|
||||
kprintf("osnum: %d\n", osnum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pc_init(void)
|
||||
{
|
||||
int i;
|
||||
@@ -134,15 +156,6 @@ void pc_init(void)
|
||||
APT_TYPE_STALL, APT_TYPE_CYCLE }, // not updated for KNC
|
||||
};
|
||||
|
||||
p = find_command_line("osnum=");
|
||||
if (p != NULL) {
|
||||
while (('0' <= *p) && (*p <= '9')) {
|
||||
osnum *= 10;
|
||||
osnum += *p++ - '0';
|
||||
}
|
||||
}
|
||||
dkprintf("osnum: %d\n", osnum);
|
||||
|
||||
|
||||
if (!(p = find_command_line("perfctr"))) {
|
||||
dkprintf("perfctr not initialized.\n");
|
||||
@@ -189,10 +202,6 @@ static void pc_test(void)
|
||||
|
||||
static void rest_init(void)
|
||||
{
|
||||
char *cmdline;
|
||||
cmdline = ihk_mc_get_kernel_args();
|
||||
kprintf("KCommand Line: %s\n", cmdline);
|
||||
|
||||
handler_init();
|
||||
|
||||
#ifdef USE_DMA
|
||||
@@ -229,6 +238,7 @@ static void post_init(void)
|
||||
ihk_mc_spinlock_init(&syscall_lock);
|
||||
}
|
||||
ap_start();
|
||||
create_os_procfs_files();
|
||||
}
|
||||
#ifdef DCFA_RUN
|
||||
extern void user_main();
|
||||
@@ -247,6 +257,14 @@ int main(void)
|
||||
|
||||
arch_init();
|
||||
|
||||
/*
|
||||
* In attached-mic,
|
||||
* bootparam is not mapped until arch_init() is finished.
|
||||
* In builtin-mic and builtin-x86,
|
||||
* virtual address of bootparam is changed in arch_init().
|
||||
*/
|
||||
parse_kargs();
|
||||
|
||||
mem_init();
|
||||
|
||||
rest_init();
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
|
||||
43
kernel/mem.c
43
kernel/mem.c
@@ -44,7 +44,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
@@ -198,8 +198,7 @@ void coredump(struct process *proc, void *regs)
|
||||
request.args[0] = chunks;
|
||||
request.args[1] = virt_to_phys(coretable);
|
||||
/* no data for now */
|
||||
ret = do_syscall(&request, proc->uctx,
|
||||
proc->cpu_id, proc->ftn->pid);
|
||||
ret = do_syscall(&request, proc->cpu_id, proc->ftn->pid);
|
||||
if (ret == 0) {
|
||||
kprintf("dumped core.\n");
|
||||
} else {
|
||||
@@ -214,8 +213,8 @@ static void unhandled_page_fault(struct process *proc, void *fault_addr, void *r
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
int irqflags;
|
||||
unsigned long error = ((struct x86_regs *)regs)->error;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||
|
||||
irqflags = kprintf_lock();
|
||||
dkprintf("[%d] Page fault for 0x%lX\n",
|
||||
@@ -371,20 +370,50 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
||||
dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n",
|
||||
ihk_mc_get_processor_id(), fault_addr, reason, regs);
|
||||
|
||||
error = page_fault_process(proc, fault_addr, reason);
|
||||
cpu_enable_interrupt();
|
||||
|
||||
error = page_fault_process_vm(proc->vm, fault_addr, reason);
|
||||
if (error) {
|
||||
struct siginfo info;
|
||||
|
||||
if (error == -ECANCELED) {
|
||||
kprintf("process is exiting, terminate.\n");
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->ftn->lock);
|
||||
proc->ftn->status = PS_ZOMBIE;
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock);
|
||||
release_fork_tree_node(proc->ftn->parent);
|
||||
release_fork_tree_node(proc->ftn);
|
||||
//release_process(proc);
|
||||
|
||||
schedule();
|
||||
}
|
||||
|
||||
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
|
||||
"fault proc failed. %d\n",
|
||||
"fault vm failed. %d\n",
|
||||
ihk_mc_get_processor_id(), fault_addr,
|
||||
reason, regs, error);
|
||||
unhandled_page_fault(proc, fault_addr, regs);
|
||||
memset(&info, '\0', sizeof info);
|
||||
if (error == -ERANGE) {
|
||||
info.si_signo = SIGBUS;
|
||||
info.si_code = BUS_ADRERR;
|
||||
info._sifields._sigfault.si_addr = fault_addr;
|
||||
set_signal(SIGBUS, regs, &info);
|
||||
}
|
||||
else {
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct vm_range *range;
|
||||
|
||||
info.si_signo = SIGSEGV;
|
||||
info.si_code = SEGV_MAPERR;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= (unsigned long)fault_addr && range->end > (unsigned long)fault_addr) {
|
||||
info.si_code = SEGV_ACCERR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
info._sifields._sigfault.si_addr = fault_addr;
|
||||
set_signal(SIGSEGV, regs, &info);
|
||||
}
|
||||
check_signal(0, regs);
|
||||
|
||||
397
kernel/process.c
397
kernel/process.c
@@ -38,14 +38,22 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
extern long do_arch_prctl(unsigned long code, unsigned long address);
|
||||
extern long alloc_debugreg(struct process *proc);
|
||||
extern void save_debugreg(unsigned long *debugreg);
|
||||
extern void restore_debugreg(unsigned long *debugreg);
|
||||
extern void clear_debugreg(void);
|
||||
extern void clear_single_step(struct process *proc);
|
||||
static void insert_vm_range_list(struct process_vm *vm,
|
||||
struct vm_range *newrange);
|
||||
static int copy_user_ranges(struct process *proc, struct process *org);
|
||||
extern void release_fp_regs(struct process *proc);
|
||||
extern void save_fp_regs(struct process *proc);
|
||||
extern void restore_fp_regs(struct process *proc);
|
||||
void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
|
||||
|
||||
int refcount_fork_tree_node(struct fork_tree_node *ftn)
|
||||
@@ -98,6 +106,10 @@ void init_fork_tree_node(struct fork_tree_node *ftn,
|
||||
ftn->parent = NULL;
|
||||
if (parent) {
|
||||
ftn->parent = parent;
|
||||
ftn->pgid = parent->pgid;
|
||||
ftn->ruid = parent->ruid;
|
||||
ftn->euid = parent->euid;
|
||||
ftn->suid = parent->suid;
|
||||
}
|
||||
INIT_LIST_HEAD(&ftn->children);
|
||||
INIT_LIST_HEAD(&ftn->siblings_list);
|
||||
@@ -125,7 +137,8 @@ static int init_process_vm(struct process *owner, struct process_vm *vm)
|
||||
vm->owner_process = owner;
|
||||
memset(&vm->cpu_set, 0, sizeof(cpu_set_t));
|
||||
ihk_mc_spinlock_init(&vm->cpu_set_lock);
|
||||
|
||||
vm->exiting = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -149,6 +162,8 @@ struct process *create_process(unsigned long user_pc)
|
||||
}
|
||||
}
|
||||
|
||||
proc->sched_policy = SCHED_NORMAL;
|
||||
|
||||
proc->sighandler = kmalloc(sizeof(struct sig_handler), IHK_MC_AP_NOWAIT);
|
||||
if(!proc->sighandler){
|
||||
goto err_free_process;
|
||||
@@ -235,7 +250,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
|
||||
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_STACK_POINTER, sp);
|
||||
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER, pc);
|
||||
|
||||
proc->rlimit_stack = org->rlimit_stack;
|
||||
memcpy(proc->rlimit, org->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
|
||||
proc->sigmask = org->sigmask;
|
||||
|
||||
proc->ftn = kmalloc(sizeof(struct fork_tree_node), IHK_MC_AP_NOWAIT);
|
||||
@@ -245,14 +260,13 @@ struct process *clone_process(struct process *org, unsigned long pc,
|
||||
|
||||
proc->ftn->termsig = termsig;
|
||||
|
||||
init_fork_tree_node(proc->ftn, (clone_flags & CLONE_VM) ? NULL : org->ftn,
|
||||
proc);
|
||||
init_fork_tree_node(proc->ftn, org->ftn, proc);
|
||||
|
||||
/* clone() */
|
||||
if (clone_flags & CLONE_VM) {
|
||||
ihk_atomic_inc(&org->vm->refcount);
|
||||
proc->vm = org->vm;
|
||||
proc->sched_policy = org->sched_policy;
|
||||
proc->sched_param.sched_priority = org->sched_param.sched_priority;
|
||||
|
||||
/* clone signal handlers */
|
||||
if (clone_flags & CLONE_SIGHAND) {
|
||||
proc->sigstack.ss_sp = NULL;
|
||||
proc->sigstack.ss_flags = SS_DISABLE;
|
||||
proc->sigstack.ss_size = 0;
|
||||
@@ -266,7 +280,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
|
||||
ihk_mc_spinlock_init(&proc->sigpendinglock);
|
||||
INIT_LIST_HEAD(&proc->sigpending);
|
||||
}
|
||||
/* fork() */
|
||||
/* copy signal handlers (i.e., fork()) */
|
||||
else {
|
||||
dkprintf("fork(): sighandler\n");
|
||||
proc->sighandler = kmalloc(sizeof(struct sig_handler),
|
||||
@@ -291,7 +305,15 @@ struct process *clone_process(struct process *org, unsigned long pc,
|
||||
INIT_LIST_HEAD(&proc->sigshared->sigpending);
|
||||
ihk_mc_spinlock_init(&proc->sigpendinglock);
|
||||
INIT_LIST_HEAD(&proc->sigpending);
|
||||
}
|
||||
|
||||
/* clone VM */
|
||||
if (clone_flags & CLONE_VM) {
|
||||
ihk_atomic_inc(&org->vm->refcount);
|
||||
proc->vm = org->vm;
|
||||
}
|
||||
/* fork() */
|
||||
else {
|
||||
proc->vm = (struct process_vm *)(proc + 1);
|
||||
|
||||
dkprintf("fork(): init_process_vm()\n");
|
||||
@@ -309,18 +331,18 @@ struct process *clone_process(struct process *org, unsigned long pc,
|
||||
}
|
||||
|
||||
dkprintf("fork(): copy_user_ranges() OK\n");
|
||||
|
||||
/* Add proc's fork_tree_node to parent's children list */
|
||||
ihk_mc_spinlock_lock_noirq(&org->ftn->lock);
|
||||
list_add_tail(&proc->ftn->siblings_list, &org->ftn->children);
|
||||
ihk_mc_spinlock_unlock_noirq(&org->ftn->lock);
|
||||
|
||||
/* We hold a reference to parent */
|
||||
hold_fork_tree_node(proc->ftn->parent);
|
||||
|
||||
/* Parent holds a reference to us */
|
||||
hold_fork_tree_node(proc->ftn);
|
||||
}
|
||||
|
||||
/* Add thread/proc's fork_tree_node to parent's children list */
|
||||
ihk_mc_spinlock_lock_noirq(&org->ftn->lock);
|
||||
list_add_tail(&proc->ftn->siblings_list, &org->ftn->children);
|
||||
ihk_mc_spinlock_unlock_noirq(&org->ftn->lock);
|
||||
|
||||
/* We hold a reference to parent */
|
||||
hold_fork_tree_node(proc->ftn->parent);
|
||||
|
||||
/* Parent holds a reference to us */
|
||||
hold_fork_tree_node(proc->ftn);
|
||||
|
||||
ihk_mc_spinlock_init(&proc->spin_sleep_lock);
|
||||
proc->spin_sleep = 0;
|
||||
@@ -345,18 +367,15 @@ int ptrace_traceme(void){
|
||||
struct fork_tree_node *child, *next;
|
||||
dkprintf("ptrace_traceme,pid=%d,proc->ftn->parent=%p\n", proc->ftn->pid, proc->ftn->parent);
|
||||
|
||||
if (proc->ftn->parent == NULL) {
|
||||
if (proc->ftn->parent == NULL || proc->ftn->ptrace) {
|
||||
error = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("ptrace_traceme,parent->pid=%d\n", proc->ftn->parent->pid);
|
||||
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->ftn->lock);
|
||||
|
||||
proc->ftn->ptrace = PT_TRACED | PT_TRACE_EXEC;
|
||||
proc->ftn->ppid_parent = proc->ftn->parent;
|
||||
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock);
|
||||
list_for_each_entry_safe(child, next, &proc->ftn->parent->children, siblings_list) {
|
||||
if(child == proc->ftn) {
|
||||
@@ -368,12 +387,20 @@ int ptrace_traceme(void){
|
||||
error = -EPERM;
|
||||
goto out_notfound;
|
||||
found:
|
||||
proc->ftn->ptrace = PT_TRACED | PT_TRACE_EXEC;
|
||||
proc->ftn->ppid_parent = proc->ftn->parent;
|
||||
|
||||
list_add_tail(&proc->ftn->ptrace_siblings_list, &proc->ftn->parent->ptrace_children);
|
||||
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock);
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock);
|
||||
|
||||
if (proc->ptrace_debugreg == NULL) {
|
||||
error = alloc_debugreg(proc);
|
||||
}
|
||||
|
||||
clear_single_step(proc);
|
||||
|
||||
out:
|
||||
dkprintf("ptrace_traceme,returning,error=%d\n", error);
|
||||
return error;
|
||||
@@ -428,6 +455,15 @@ static int copy_user_ranges(struct process *proc, struct process *org)
|
||||
vaddr += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
if (1) {
|
||||
struct page *page;
|
||||
|
||||
page = phys_to_page(pte_get_phys(ptep));
|
||||
if (page && page_is_in_memobj(page)) {
|
||||
vaddr += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
dkprintf("copy_user_ranges(): 0x%lx PTE found\n", vaddr);
|
||||
|
||||
@@ -859,8 +895,8 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa
|
||||
attr |= PTATTR_NO_EXECUTE;
|
||||
}
|
||||
|
||||
if ((flag & VR_MEMTYPE_MASK) == VR_MEMTYPE_UC) {
|
||||
attr |= PTATTR_UNCACHABLE;
|
||||
if (flag & VR_WRITE_COMBINED) {
|
||||
attr |= PTATTR_WRITE_COMBINED;
|
||||
}
|
||||
|
||||
return attr;
|
||||
@@ -1196,6 +1232,154 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
struct sync_args {
|
||||
struct memobj *memobj;
|
||||
};
|
||||
|
||||
static int sync_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
void *pgaddr, size_t pgsize)
|
||||
{
|
||||
struct sync_args *args = arg0;
|
||||
int error;
|
||||
uintptr_t phys;
|
||||
|
||||
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize);
|
||||
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)
|
||||
|| !pte_is_dirty(ptep, pgsize)) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pte_clear_dirty(ptep, pgsize);
|
||||
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
|
||||
|
||||
phys = pte_get_phys(ptep);
|
||||
error = memobj_flush_page(args->memobj, phys, pgsize);
|
||||
if (error) {
|
||||
ekprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):"
|
||||
"flush failed. %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
pte_set_dirty(ptep, pgsize);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct sync_args args;
|
||||
|
||||
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx)\n",
|
||||
vm, range, start, end);
|
||||
args.memobj = range->memobj;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
|
||||
VPTEF_SKIP_NULL, &sync_one_page, &args);
|
||||
memobj_unlock(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):"
|
||||
"visit failed%d\n",
|
||||
vm, range, start, end, error);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
struct invalidate_args {
|
||||
struct vm_range *range;
|
||||
};
|
||||
|
||||
static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
void *pgaddr, size_t pgsize)
|
||||
{
|
||||
struct invalidate_args *args = arg0;
|
||||
struct vm_range *range = args->range;
|
||||
int error;
|
||||
uintptr_t phys;
|
||||
struct page *page;
|
||||
off_t linear_off;
|
||||
pte_t apte;
|
||||
|
||||
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize);
|
||||
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
phys = pte_get_phys(ptep);
|
||||
page = phys_to_page(phys);
|
||||
linear_off = range->objoff + ((uintptr_t)pgaddr - range->start);
|
||||
if (page && (page->offset == linear_off)) {
|
||||
pte_make_null(&apte, pgsize);
|
||||
}
|
||||
else {
|
||||
pte_make_fileoff(page->offset, 0, pgsize, &apte);
|
||||
}
|
||||
pte_xchg(ptep, &apte);
|
||||
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
|
||||
|
||||
if (page && page_unmap(page)) {
|
||||
panic("invalidate_one_page");
|
||||
}
|
||||
|
||||
error = memobj_invalidate_page(range->memobj, phys, pgsize);
|
||||
if (error) {
|
||||
ekprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):"
|
||||
"invalidate failed. %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int invalidate_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct invalidate_args args;
|
||||
|
||||
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx)\n",
|
||||
vm, range, start, end);
|
||||
args.range = range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
|
||||
VPTEF_SKIP_NULL, &invalidate_one_page, &args);
|
||||
memobj_unlock(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):"
|
||||
"visit failed%d\n",
|
||||
vm, range, start, end, error);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
|
||||
{
|
||||
int error;
|
||||
@@ -1206,12 +1390,13 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
uintptr_t phys;
|
||||
struct page *page = NULL;
|
||||
unsigned long memobj_flag = 0;
|
||||
|
||||
dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx)\n", vm, range->start, range->end, range->flag, fault_addr, reason);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
/*****/
|
||||
ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align);
|
||||
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)
|
||||
if (!(reason & (PF_PROT | PF_PATCH)) && ptep && !pte_is_null(ptep)
|
||||
&& !pte_is_fileoff(ptep, pgsize)) {
|
||||
if (!pte_is_present(ptep)) {
|
||||
error = -EFAULT;
|
||||
@@ -1232,7 +1417,6 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
pgsize = PAGE_SIZE;
|
||||
p2align = PAGE_P2ALIGN;
|
||||
}
|
||||
attr = arch_vrflag_to_ptattr(range->flag, reason, ptep);
|
||||
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
|
||||
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
|
||||
if (range->memobj) {
|
||||
@@ -1244,7 +1428,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
else {
|
||||
off = pte_get_off(ptep, pgsize);
|
||||
}
|
||||
error = memobj_get_page(range->memobj, off, p2align, &phys);
|
||||
error = memobj_get_page(range->memobj, off, p2align,
|
||||
&phys, &memobj_flag);
|
||||
if (error) {
|
||||
if (error != -ERESTART) {
|
||||
}
|
||||
@@ -1270,9 +1455,16 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
else {
|
||||
phys = pte_get_phys(ptep);
|
||||
}
|
||||
|
||||
page = phys_to_page(phys);
|
||||
|
||||
attr = arch_vrflag_to_ptattr(range->flag | memobj_flag, reason, ptep);
|
||||
|
||||
/*****/
|
||||
if ((range->flag & VR_PRIVATE) && (!page || page_is_in_memobj(page) || page_is_multi_mapped(page))) {
|
||||
if (((range->flag & VR_PRIVATE)
|
||||
|| ((reason & PF_PATCH)
|
||||
&& !(range->flag & VR_PROT_WRITE)))
|
||||
&& (!page || page_is_in_memobj(page) || page_is_multi_mapped(page))) {
|
||||
if (!(attr & PTATTR_DIRTY)) {
|
||||
attr &= ~PTATTR_WRITABLE;
|
||||
}
|
||||
@@ -1324,37 +1516,41 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64_t reason)
|
||||
static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, uint64_t reason)
|
||||
{
|
||||
struct process_vm *vm = proc->vm;
|
||||
int error;
|
||||
const uintptr_t fault_addr = (uintptr_t)fault_addr0;
|
||||
struct vm_range *range;
|
||||
|
||||
dkprintf("[%d]do_page_fault_process(%p,%lx,%lx)\n",
|
||||
ihk_mc_get_processor_id(), proc, fault_addr0, reason);
|
||||
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
|
||||
ihk_mc_get_processor_id(), vm, fault_addr0, reason);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
if (vm->exiting) {
|
||||
error = -ECANCELED;
|
||||
goto out;
|
||||
}
|
||||
|
||||
range = lookup_process_memory_range(vm, fault_addr, fault_addr+1);
|
||||
if (range == NULL) {
|
||||
error = -EFAULT;
|
||||
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||
kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):"
|
||||
"out of range. %d\n",
|
||||
ihk_mc_get_processor_id(), proc,
|
||||
ihk_mc_get_processor_id(), vm,
|
||||
fault_addr0, reason, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE)
|
||||
|| ((reason & PF_WRITE)
|
||||
|| (((reason & PF_WRITE) && !(reason & PF_PATCH))
|
||||
&& !(range->flag & VR_PROT_WRITE))
|
||||
|| ((reason & PF_INSTR)
|
||||
&& !(range->flag & VR_PROT_EXEC))) {
|
||||
error = -EFAULT;
|
||||
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||
kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):"
|
||||
"access denied. %d\n",
|
||||
ihk_mc_get_processor_id(), proc,
|
||||
ihk_mc_get_processor_id(), vm,
|
||||
fault_addr0, reason, error);
|
||||
goto out;
|
||||
}
|
||||
@@ -1391,9 +1587,9 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
|
||||
goto out;
|
||||
}
|
||||
if (error) {
|
||||
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||
kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):"
|
||||
"fault range failed. %d\n",
|
||||
ihk_mc_get_processor_id(), proc,
|
||||
ihk_mc_get_processor_id(), vm,
|
||||
fault_addr0, reason, error);
|
||||
goto out;
|
||||
}
|
||||
@@ -1401,22 +1597,19 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
|
||||
error = 0;
|
||||
out:
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
dkprintf("[%d]do_page_fault_process(%p,%lx,%lx): %d\n",
|
||||
ihk_mc_get_processor_id(), proc, fault_addr0,
|
||||
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx): %d\n",
|
||||
ihk_mc_get_processor_id(), vm, fault_addr0,
|
||||
reason, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason)
|
||||
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr, uint64_t reason)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (proc != cpu_local_var(current)) {
|
||||
panic("page_fault_process: other process");
|
||||
}
|
||||
struct process *proc = cpu_local_var(current);
|
||||
|
||||
for (;;) {
|
||||
error = do_page_fault_process(proc, fault_addr, reason);
|
||||
error = do_page_fault_process_vm(fault_vm, fault_addr, reason);
|
||||
if (error != -ERESTART) {
|
||||
break;
|
||||
}
|
||||
@@ -1445,10 +1638,11 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
|
||||
int error;
|
||||
unsigned long *p;
|
||||
unsigned long minsz;
|
||||
unsigned long at_rand;
|
||||
|
||||
/* create stack range */
|
||||
minsz = PAGE_SIZE;
|
||||
size = process->rlimit_stack.rlim_cur & PAGE_MASK;
|
||||
size = process->rlimit[MCK_RLIMIT_STACK].rlim_cur & PAGE_MASK;
|
||||
if (size > (USER_END / 2)) {
|
||||
size = USER_END / 2;
|
||||
}
|
||||
@@ -1487,6 +1681,12 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
|
||||
/* set up initial stack frame */
|
||||
p = (unsigned long *)(stack + minsz);
|
||||
s_ind = -1;
|
||||
|
||||
/* "random" 16 bytes on the very top */
|
||||
p[s_ind--] = 0x010101011;
|
||||
p[s_ind--] = 0x010101011;
|
||||
at_rand = end + sizeof(unsigned long) * s_ind;
|
||||
|
||||
/* auxiliary vector */
|
||||
/* If you add/delete entires, please increase/decrease
|
||||
AUXV_LEN in include/process.h. */
|
||||
@@ -1504,10 +1704,14 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
|
||||
p[s_ind--] = AT_PAGESZ;
|
||||
p[s_ind--] = pn->at_clktck; /* AT_CLKTCK */
|
||||
p[s_ind--] = AT_CLKTCK;
|
||||
p[s_ind--] = at_rand; /* AT_RANDOM */
|
||||
p[s_ind--] = AT_RANDOM;
|
||||
|
||||
/* Save auxiliary vector for later use. */
|
||||
memcpy(process->saved_auxv, &p[s_ind + 1],
|
||||
sizeof(process->saved_auxv));
|
||||
p[s_ind--] = 0; /* envp terminating NULL */
|
||||
|
||||
p[s_ind--] = 0; /* envp terminating NULL */
|
||||
/* envp */
|
||||
for (arg_ind = envc - 1; arg_ind > -1; --arg_ind) {
|
||||
p[s_ind--] = (unsigned long)env[arg_ind];
|
||||
@@ -1656,6 +1860,8 @@ void flush_process_memory(struct process *proc)
|
||||
|
||||
dkprintf("flush_process_memory(%p)\n", proc);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
/* Let concurrent page faults know the VM will be gone */
|
||||
vm->exiting = 1;
|
||||
list_for_each_entry_safe(range, next, &vm->vm_range_list, list) {
|
||||
if (range->memobj) {
|
||||
// XXX: temporary of temporary
|
||||
@@ -1736,9 +1942,9 @@ int populate_process_memory(struct process *proc, void *start, size_t len)
|
||||
|
||||
end = (uintptr_t)start + len;
|
||||
for (addr = (uintptr_t)start; addr < end; addr += PAGE_SIZE) {
|
||||
error = page_fault_process(proc, (void *)addr, reason);
|
||||
error = page_fault_process_vm(proc->vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
ekprintf("populate_process_range:page_fault_process"
|
||||
ekprintf("populate_process_range:page_fault_process_vm"
|
||||
"(%p,%lx,%lx) failed %d\n",
|
||||
proc, addr, reason, error);
|
||||
goto out;
|
||||
@@ -1788,6 +1994,21 @@ void destroy_process(struct process *proc)
|
||||
list_del(&pending->list);
|
||||
kfree(pending);
|
||||
}
|
||||
if (proc->ptrace_debugreg) {
|
||||
kfree(proc->ptrace_debugreg);
|
||||
}
|
||||
if (proc->ptrace_recvsig) {
|
||||
kfree(proc->ptrace_recvsig);
|
||||
}
|
||||
if (proc->ptrace_sendsig) {
|
||||
kfree(proc->ptrace_sendsig);
|
||||
}
|
||||
if (proc->fp_regs) {
|
||||
release_fp_regs(proc);
|
||||
}
|
||||
if (proc->saved_cmdline) {
|
||||
kfree(proc->saved_cmdline);
|
||||
}
|
||||
ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES);
|
||||
}
|
||||
|
||||
@@ -1820,7 +2041,8 @@ static void idle(void)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
|
||||
v->status = CPU_STATUS_IDLE;
|
||||
if(v->status == CPU_STATUS_RUNNING)
|
||||
v->status = CPU_STATUS_IDLE;
|
||||
cpu_enable_interrupt();
|
||||
|
||||
while (1) {
|
||||
@@ -1845,7 +2067,8 @@ static void idle(void)
|
||||
* 4) The idle process was resumed, and halted for waiting for
|
||||
* the interrupt that had already been handled.
|
||||
*/
|
||||
if (v->status == CPU_STATUS_IDLE) {
|
||||
if (v->status == CPU_STATUS_IDLE ||
|
||||
v->status == CPU_STATUS_RESERVED) {
|
||||
long s;
|
||||
struct process *p;
|
||||
|
||||
@@ -1858,7 +2081,8 @@ static void idle(void)
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, s);
|
||||
}
|
||||
if (v->status == CPU_STATUS_IDLE) {
|
||||
if (v->status == CPU_STATUS_IDLE ||
|
||||
v->status == CPU_STATUS_RESERVED) {
|
||||
cpu_safe_halt();
|
||||
}
|
||||
else {
|
||||
@@ -2014,7 +2238,7 @@ redo:
|
||||
/* No process? Run idle.. */
|
||||
if (!next) {
|
||||
next = &cpu_local_var(idle);
|
||||
v->status = CPU_STATUS_IDLE;
|
||||
v->status = v->runq_len? CPU_STATUS_RESERVED: CPU_STATUS_IDLE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2026,7 +2250,17 @@ redo:
|
||||
if (switch_ctx) {
|
||||
dkprintf("[%d] schedule: %d => %d \n",
|
||||
ihk_mc_get_processor_id(),
|
||||
prev ? prev->tid : 0, next ? next->tid : 0);
|
||||
prev ? prev->ftn->tid : 0, next ? next->ftn->tid : 0);
|
||||
|
||||
if (prev && prev->ptrace_debugreg) {
|
||||
save_debugreg(prev->ptrace_debugreg);
|
||||
if (next->ptrace_debugreg == NULL) {
|
||||
clear_debugreg();
|
||||
}
|
||||
}
|
||||
if (next->ptrace_debugreg) {
|
||||
restore_debugreg(next->ptrace_debugreg);
|
||||
}
|
||||
|
||||
ihk_mc_load_page_table(next->vm->page_table);
|
||||
|
||||
@@ -2061,6 +2295,13 @@ redo:
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
release_cpuid(int cpuid)
|
||||
{
|
||||
if (!get_cpu_local_var(cpuid)->runq_len)
|
||||
get_cpu_local_var(cpuid)->status = CPU_STATUS_IDLE;
|
||||
}
|
||||
|
||||
void check_need_resched(void)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
@@ -2168,11 +2409,11 @@ void __runq_add_proc(struct process *proc, int cpu_id)
|
||||
list_add_tail(&proc->sched_list, &v->runq);
|
||||
++v->runq_len;
|
||||
proc->cpu_id = cpu_id;
|
||||
proc->ftn->status = PS_RUNNING;
|
||||
//proc->ftn->status = PS_RUNNING; /* not set here */
|
||||
get_cpu_local_var(cpu_id)->status = CPU_STATUS_RUNNING;
|
||||
|
||||
dkprintf("runq_add_proc(): tid %d added to CPU[%d]'s runq\n",
|
||||
proc->tid, cpu_id);
|
||||
proc->ftn->tid, cpu_id);
|
||||
}
|
||||
|
||||
void runq_add_proc(struct process *proc, int cpu_id)
|
||||
@@ -2236,3 +2477,29 @@ process_unlock(void *savelock, unsigned long irqstate)
|
||||
{
|
||||
ihk_mc_spinlock_unlock((ihk_spinlock_t *)savelock, irqstate);
|
||||
}
|
||||
|
||||
void
|
||||
debug_log(unsigned long arg)
|
||||
{
|
||||
struct cpu_local_var *v;
|
||||
struct process *p;
|
||||
int i;
|
||||
extern int num_processors;
|
||||
unsigned long irqstate;
|
||||
|
||||
switch(arg){
|
||||
case 1:
|
||||
for(i = 0; i < num_processors; i++){
|
||||
v = get_cpu_local_var(i);
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
list_for_each_entry(p, &(v->runq), sched_list){
|
||||
if(p->ftn->pid <= 0)
|
||||
continue;
|
||||
kprintf("cpu=%d pid=%d tid=%d status=%d\n",
|
||||
i, p->ftn->pid, p->ftn->tid, p->ftn->status);
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
367
kernel/procfs.c
367
kernel/procfs.c
@@ -47,6 +47,9 @@ static void create_proc_procfs_file(int pid, char *fname, int mode, int cpuid);
|
||||
static void delete_proc_procfs_file(int pid, char *fname);
|
||||
static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, int cpuid);
|
||||
|
||||
int copy_from_user(void *dst, const void *src, size_t siz);
|
||||
int copy_to_user(void *dst, const void *src, size_t siz);
|
||||
|
||||
/**
|
||||
* \brief Create all procfs files for process.
|
||||
*
|
||||
@@ -63,9 +66,21 @@ void create_proc_procfs_files(int pid, int cpuid)
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0400, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0400, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid);
|
||||
create_proc_procfs_file(pid, fname, 0400, cpuid);
|
||||
|
||||
@@ -116,6 +131,18 @@ void delete_proc_procfs_files(int pid)
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
@@ -139,6 +166,42 @@ static void delete_proc_procfs_file(int pid, char *fname)
|
||||
dprintf("delete procfs file: %s done\n", fname);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief create a procfs file for this operating system
|
||||
* \param fname relative path name from "host:/proc".
|
||||
* \param mode permissions of the file to be created
|
||||
*
|
||||
* Though operate_proc_procfs_file() is intended to create a process
|
||||
* specific file, it is reused to create a OS specific file by
|
||||
* specifying -1 as the pid parameter.
|
||||
*/
|
||||
static void create_os_procfs_file(char *fname, int mode)
|
||||
{
|
||||
const pid_t pid = -1;
|
||||
const int msg = SCD_MSG_PROCFS_CREATE;
|
||||
const int cpuid = ihk_mc_get_processor_id(); /* i.e. BSP */
|
||||
|
||||
operate_proc_procfs_file(pid, fname, msg, mode, cpuid);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief create all procfs files for this operating system
|
||||
*/
|
||||
void create_os_procfs_files(void)
|
||||
{
|
||||
char *fname = NULL;
|
||||
size_t n;
|
||||
|
||||
fname = kmalloc(PROCFS_NAME_MAX, IHK_MC_AP_CRITICAL);
|
||||
|
||||
n = snprintf(fname, PROCFS_NAME_MAX, "mcos%d/stat", osnum);
|
||||
if (n >= PROCFS_NAME_MAX) panic("/proc/stat");
|
||||
create_os_procfs_file(fname, 0444);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create/delete a procfs file for process.
|
||||
*
|
||||
@@ -202,6 +265,10 @@ void process_procfs_request(unsigned long rarg)
|
||||
struct ihk_ikc_channel_desc *syscall_channel;
|
||||
ihk_spinlock_t *savelock;
|
||||
unsigned long irqstate;
|
||||
unsigned long offset;
|
||||
int count;
|
||||
int npages;
|
||||
int is_current = 1; /* is 'proc' same as 'current'? */
|
||||
|
||||
dprintf("process_procfs_request: invoked.\n");
|
||||
|
||||
@@ -221,7 +288,9 @@ void process_procfs_request(unsigned long rarg)
|
||||
dprintf("remote pbuf: %x\n", r->pbuf);
|
||||
pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count);
|
||||
dprintf("pbuf: %x\n", pbuf);
|
||||
buf = ihk_mc_map_virtual(pbuf, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||
count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1));
|
||||
npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE;
|
||||
buf = ihk_mc_map_virtual(pbuf, npages, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||
dprintf("buf: %p\n", buf);
|
||||
if (buf == NULL) {
|
||||
kprintf("ERROR: process_procfs_request: got a null buffer.\n");
|
||||
@@ -229,6 +298,8 @@ void process_procfs_request(unsigned long rarg)
|
||||
goto bufunavail;
|
||||
}
|
||||
|
||||
count = r->count;
|
||||
offset = r->offset;
|
||||
dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count);
|
||||
|
||||
/*
|
||||
@@ -270,18 +341,56 @@ void process_procfs_request(unsigned long rarg)
|
||||
|
||||
dprintf("mismatched pid. We are %d, but requested pid is %d.\n",
|
||||
pid, cpu_local_var(current)->pid);
|
||||
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
|
||||
tid = pid; /* main thread */
|
||||
proc = findthread_and_lock(pid, tid, &savelock, &irqstate);
|
||||
if (!proc) {
|
||||
dprintf("We cannot find the proper cpu for requested pid.\n");
|
||||
goto end;
|
||||
}
|
||||
else if (proc->cpu_id != ihk_mc_get_processor_id()) {
|
||||
/* The target process has gone by migration. */
|
||||
r->newcpu = proc->cpu_id;
|
||||
dprintf("expected cpu id is %d.\n", proc->cpu_id);
|
||||
process_unlock(savelock, irqstate);
|
||||
ans = 0;
|
||||
} else {
|
||||
dprintf("We cannot find the proper cpu for requested pid.\n");
|
||||
goto end;
|
||||
}
|
||||
else {
|
||||
process_unlock(savelock, irqstate);
|
||||
/* 'proc' is not 'current' */
|
||||
is_current = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!strcmp(p, "stat")) { /* "/proc/stat" */
|
||||
extern int num_processors; /* kernel/ap.c */
|
||||
char *p;
|
||||
size_t remain;
|
||||
int cpu;
|
||||
|
||||
if (offset > 0) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
} else {
|
||||
p = buf;
|
||||
remain = count;
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
size_t n;
|
||||
|
||||
n = snprintf(p, remain, "cpu%d\n", cpu);
|
||||
if (n >= remain) {
|
||||
ans = -ENOSPC;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
p += n;
|
||||
}
|
||||
ans = p - buf;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
else {
|
||||
goto end;
|
||||
}
|
||||
dprintf("matched PID: %d.\n", pid);
|
||||
@@ -297,19 +406,215 @@ void process_procfs_request(unsigned long rarg)
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
dprintf("range: %lx - %lx\n", range->start, range->end);
|
||||
if ((range->start <= r->offset) &&
|
||||
(r->offset < range->end)) {
|
||||
unsigned int len = r->count;
|
||||
if (range->end < r->offset + r->count) {
|
||||
len = range->end - r->offset;
|
||||
if (!is_current) {
|
||||
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
unsigned long offset = r->offset;
|
||||
unsigned long left = r->count;
|
||||
int ret;
|
||||
|
||||
ans = 0;
|
||||
if(left == 0)
|
||||
goto end;
|
||||
|
||||
while(left){
|
||||
unsigned long pa;
|
||||
char *va;
|
||||
int pos = offset & (PAGE_SIZE - 1);
|
||||
int size = PAGE_SIZE - pos;
|
||||
|
||||
if(size > left)
|
||||
size = left;
|
||||
ret = page_fault_process_vm(proc->vm,
|
||||
(void *)offset, reason);
|
||||
if(ret){
|
||||
if(ans == 0)
|
||||
ans = -EIO;
|
||||
goto end;
|
||||
}
|
||||
memcpy((void *)buf, (void *)range->start, len);
|
||||
ans = len;
|
||||
ret = ihk_mc_pt_virt_to_phys(vm->page_table,
|
||||
(void *)offset, &pa);
|
||||
if(ret){
|
||||
if(ans == 0)
|
||||
ans = -EIO;
|
||||
goto end;
|
||||
}
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(buf + ans, va, size);
|
||||
offset += size;
|
||||
left -= size;
|
||||
ans += size;
|
||||
}
|
||||
}
|
||||
else{
|
||||
unsigned long offset = r->offset;
|
||||
unsigned long left = r->count;
|
||||
unsigned long pos;
|
||||
unsigned long l;
|
||||
ans = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
dprintf("range: %lx - %lx\n", range->start, range->end);
|
||||
while (left &&
|
||||
(range->start <= offset) &&
|
||||
(offset < range->end)) {
|
||||
pos = offset & (PAGE_SIZE - 1);
|
||||
l = PAGE_SIZE - pos;
|
||||
if(l > left)
|
||||
l = left;
|
||||
if(copy_from_user(buf, (void *)offset, l)){
|
||||
if(ans == 0)
|
||||
ans = -EIO;
|
||||
goto end;
|
||||
}
|
||||
buf += l;
|
||||
ans += l;
|
||||
offset += l;
|
||||
left -= l;
|
||||
}
|
||||
}
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/maps
|
||||
*/
|
||||
if (strcmp(p, "maps") == 0) {
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
int left = r->count - 1; /* extra 1 for terminating NULL */
|
||||
int written = 0;
|
||||
char *_buf = buf;
|
||||
|
||||
/* Starting from the middle of a proc file is not supported for maps */
|
||||
if (offset > 0) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
int written_now;
|
||||
|
||||
/* format is (from man proc):
|
||||
* address perms offset dev inode pathname
|
||||
* 08048000-08056000 r-xp 00000000 03:0c 64593 /usr/sbin/gpm
|
||||
*/
|
||||
written_now = snprintf(_buf, left,
|
||||
"%lx-%lx %s%s%s%s %lx %lx:%lx %d %s\n",
|
||||
range->start, range->end,
|
||||
range->flag & VR_PROT_READ ? "r" : "-",
|
||||
range->flag & VR_PROT_WRITE ? "w" : "-",
|
||||
range->flag & VR_PROT_EXEC ? "x" : "-",
|
||||
range->flag & VR_PRIVATE ? "p" : "s",
|
||||
/* TODO: fill in file details! */
|
||||
0UL,
|
||||
0UL,
|
||||
0UL,
|
||||
0,
|
||||
""
|
||||
);
|
||||
|
||||
left -= written_now;
|
||||
_buf += written_now;
|
||||
written += written_now;
|
||||
|
||||
if (left == 0) {
|
||||
kprintf("%s(): WARNING: buffer too small to fill proc/maps\n",
|
||||
__FUNCTION__);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
ans = written + 1;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/pagemap
|
||||
*/
|
||||
if (strcmp(p, "pagemap") == 0) {
|
||||
struct process_vm *vm = proc->vm;
|
||||
uint64_t *_buf = (uint64_t *)buf;
|
||||
uint64_t start, end;
|
||||
|
||||
if (offset < PAGE_SIZE) {
|
||||
kprintf("WARNING: /proc/pagemap queried for NULL page\n");
|
||||
ans = 0;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* Check alignment */
|
||||
if ((offset % sizeof(uint64_t) != 0) ||
|
||||
(count % sizeof(uint64_t) != 0)) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
start = (offset / sizeof(uint64_t)) << PAGE_SHIFT;
|
||||
end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
while (start < end) {
|
||||
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->page_table, start);
|
||||
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->ftn->pid,
|
||||
start, *_buf);
|
||||
start += PAGE_SIZE;
|
||||
++_buf;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n",
|
||||
start, end, count);
|
||||
|
||||
ans = count;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/status
|
||||
*/
|
||||
if (strcmp(p, "status") == 0) {
|
||||
struct vm_range *range;
|
||||
unsigned long lockedsize = 0;
|
||||
char tmp[1024];
|
||||
int len;
|
||||
struct fork_tree_node *ftn = proc->ftn;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
|
||||
list_for_each_entry(range, &proc->vm->vm_range_list, list) {
|
||||
if(range->flag & VR_LOCKED)
|
||||
lockedsize += range->end - range->start;
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
|
||||
|
||||
sprintf(tmp,
|
||||
"Uid:\t%d\t%d\t%d\t%d\n"
|
||||
"Gid:\t%d\t%d\t%d\t%d\n"
|
||||
"VmLck:\t%9lu kB\n",
|
||||
ftn->ruid, ftn->euid, ftn->suid, ftn->fsuid,
|
||||
ftn->rgid, ftn->egid, ftn->sgid, ftn->fsgid,
|
||||
(lockedsize + 1023) >> 10);
|
||||
len = strlen(tmp);
|
||||
if (r->offset < len) {
|
||||
if (r->offset + r->count < len) {
|
||||
ans = r->count;
|
||||
} else {
|
||||
eof = 1;
|
||||
ans = len;
|
||||
}
|
||||
strncpy(buf, tmp + r->offset, ans);
|
||||
} else if (r->offset == len) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
@@ -335,6 +640,35 @@ void process_procfs_request(unsigned long rarg)
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/cmdline
|
||||
*/
|
||||
if (strcmp(p, "cmdline") == 0) {
|
||||
unsigned int limit = proc->saved_cmdline_len;
|
||||
unsigned int len = r->count;
|
||||
|
||||
if(!proc->saved_cmdline){
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (r->offset < limit) {
|
||||
if (limit < r->offset + r->count) {
|
||||
len = limit - r->offset;
|
||||
}
|
||||
memcpy((void *)buf, ((char *) proc->saved_cmdline) + r->offset, len);
|
||||
ans = len;
|
||||
if (r->offset + len == limit) {
|
||||
eof = 1;
|
||||
}
|
||||
} else if (r->offset == limit) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/taks/PID/mem
|
||||
*
|
||||
@@ -351,6 +685,9 @@ void process_procfs_request(unsigned long rarg)
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
|
||||
if (!is_current) {
|
||||
goto end;
|
||||
}
|
||||
if (pid != tid) {
|
||||
/* We are not multithreaded yet. */
|
||||
goto end;
|
||||
@@ -445,7 +782,7 @@ void process_procfs_request(unsigned long rarg)
|
||||
*/
|
||||
dprintf("could not find a matching entry for %s.\n", p);
|
||||
end:
|
||||
ihk_mc_unmap_virtual(buf, 1, 0);
|
||||
ihk_mc_unmap_virtual(buf, npages, 0);
|
||||
dprintf("ret: %d, eof: %d\n", ans, eof);
|
||||
r->ret = ans;
|
||||
r->eof = eof;
|
||||
|
||||
24
kernel/script/mkimage.smp-x86
Normal file
24
kernel/script/mkimage.smp-x86
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/bin/sh
|
||||
|
||||
cp $1 $2
|
||||
|
||||
exit 0
|
||||
|
||||
#set -e
|
||||
#
|
||||
#O=`pwd`
|
||||
#
|
||||
#make -C $3/../arch/x86/kboot O=$O clean
|
||||
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
|
||||
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x101001000
|
||||
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x901001000
|
||||
#
|
||||
#make -C $3/../arch/x86/elfboot O=$O clean
|
||||
#make -C $3/../arch/x86/elfboot O=$O
|
||||
#
|
||||
#cat elfboot/elfboot kboot/kboot.elf > $2
|
||||
#
|
||||
#make -C $3/../arch/x86/kboot O=$O clean
|
||||
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
|
||||
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x201001000
|
||||
#cat elfboot/elfboot kboot/kboot.elf > $2.8G
|
||||
188
kernel/shmobj.c
188
kernel/shmobj.c
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* shared memory object
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 - 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -26,12 +27,8 @@
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#define fkprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct shmobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
long ref;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
};
|
||||
static LIST_HEAD(shmobj_list_head);
|
||||
static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED;
|
||||
|
||||
static memobj_release_func_t shmobj_release;
|
||||
static memobj_ref_func_t shmobj_ref;
|
||||
@@ -98,6 +95,25 @@ static struct page *page_list_first(struct shmobj *obj)
|
||||
return list_first_entry(&obj->page_list, struct page, list);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* shmobj_list
|
||||
*/
|
||||
void shmobj_list_lock(void)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&shmobj_list_lock_body);
|
||||
return;
|
||||
}
|
||||
|
||||
void shmobj_list_unlock(void)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(&shmobj_list_lock_body);
|
||||
return;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* operations
|
||||
*/
|
||||
int the_seq = 0;
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
{
|
||||
struct shmobj *obj = NULL;
|
||||
@@ -114,8 +130,11 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
obj->memobj.ops = &shmobj_ops;
|
||||
obj->ref = 1;
|
||||
obj->ds = *ds;
|
||||
obj->ds.shm_perm.seq = the_seq++;
|
||||
obj->ds.shm_nattch = 1;
|
||||
obj->index = -1;
|
||||
obj->real_segsz = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
|
||||
@@ -127,65 +146,124 @@ out:
|
||||
if (obj) {
|
||||
kfree(obj);
|
||||
}
|
||||
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
|
||||
dkprintf("shmobj_create_indexed(%p %#lx,%p):%d %p\n",
|
||||
ds, ds->shm_segsz, objp, error, *objp);
|
||||
return error;
|
||||
}
|
||||
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp)
|
||||
{
|
||||
int error;
|
||||
struct memobj *obj;
|
||||
|
||||
error = shmobj_create(ds, &obj);
|
||||
if (!error) {
|
||||
obj->flags |= MF_SHMDT_OK;
|
||||
*objp = to_shmobj(obj);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
void shmobj_destroy(struct shmobj *obj)
|
||||
{
|
||||
extern struct shm_info the_shm_info;
|
||||
extern struct list_head kds_free_list;
|
||||
extern int the_maxi;
|
||||
|
||||
dkprintf("shmobj_destroy(%p [%d %o])\n", obj, obj->index, obj->ds.shm_perm.mode);
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
int count;
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
page_list_remove(obj, page);
|
||||
|
||||
dkprintf("shmobj_destroy(%p):"
|
||||
"release page. %p %#lx %d %d",
|
||||
obj, page, page_to_phys(page),
|
||||
page->mode, page->count);
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
if (!((page->mode == PM_MAPPED) && (count == 0))) {
|
||||
fkprintf("shmobj_destroy(%p): "
|
||||
"page %p phys %#lx mode %#x"
|
||||
" count %d off %#lx\n",
|
||||
obj, page,
|
||||
page_to_phys(page),
|
||||
page->mode, count,
|
||||
page->offset);
|
||||
panic("shmobj_release");
|
||||
}
|
||||
|
||||
/* XXX:NYI: large pages */
|
||||
page->mode = PM_NONE;
|
||||
free_pages(phys_to_virt(page_to_phys(page)), 1);
|
||||
}
|
||||
if (obj->index < 0) {
|
||||
kfree(obj);
|
||||
}
|
||||
else {
|
||||
list_del(&obj->chain);
|
||||
--the_shm_info.used_ids;
|
||||
|
||||
list_add(&obj->chain, &kds_free_list);
|
||||
for (;;) {
|
||||
struct shmobj *p;
|
||||
|
||||
list_for_each_entry(p, &kds_free_list, chain) {
|
||||
if (p->index == the_maxi) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (&p->chain == &kds_free_list) {
|
||||
break;
|
||||
}
|
||||
|
||||
list_del(&p->chain);
|
||||
kfree(p);
|
||||
--the_maxi;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void shmobj_release(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
struct shmobj *freeobj = NULL;
|
||||
long newref;
|
||||
extern time_t time(void);
|
||||
extern pid_t getpid(void);
|
||||
|
||||
dkprintf("shmobj_release(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
if (obj->ref <= 0) {
|
||||
if (obj->ref < 0) {
|
||||
if (obj->index >= 0) {
|
||||
obj->ds.shm_dtime = time();
|
||||
obj->ds.shm_lpid = getpid();
|
||||
dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch);
|
||||
}
|
||||
newref = --obj->ds.shm_nattch;
|
||||
if (newref <= 0) {
|
||||
if (newref < 0) {
|
||||
fkprintf("shmobj_release(%p):ref %ld\n",
|
||||
memobj, obj->ref);
|
||||
memobj, newref);
|
||||
panic("shmobj_release:freeing free shmobj");
|
||||
}
|
||||
freeobj = obj;
|
||||
if (obj->ds.shm_perm.mode & SHM_DEST) {
|
||||
freeobj = obj;
|
||||
}
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
if (freeobj) {
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
int count;
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
page_list_remove(obj, page);
|
||||
|
||||
dkprintf("shmobj_release(%p):"
|
||||
"release page. %p %#lx %d %d",
|
||||
memobj, page, page_to_phys(page),
|
||||
page->mode, page->count);
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
if (!((page->mode == PM_MAPPED) && (count == 0))) {
|
||||
fkprintf("shmobj_release(%p): "
|
||||
"page %p phys %#lx mode %#x"
|
||||
" count %d off %#lx\n",
|
||||
memobj, page,
|
||||
page_to_phys(page),
|
||||
page->mode, count,
|
||||
page->offset);
|
||||
panic("shmobj_release");
|
||||
}
|
||||
|
||||
/* XXX:NYI: large pages */
|
||||
page->mode = PM_NONE;
|
||||
free_pages(phys_to_virt(page_to_phys(page)), 1);
|
||||
}
|
||||
dkprintf("shmobj_release(%p):free shmobj", memobj);
|
||||
kfree(freeobj);
|
||||
shmobj_list_lock();
|
||||
shmobj_destroy(freeobj);
|
||||
shmobj_list_unlock();
|
||||
}
|
||||
dkprintf("shmobj_release(%p):\n", memobj);
|
||||
dkprintf("shmobj_release(%p): %ld\n", memobj, newref);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -193,17 +271,23 @@ static void shmobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
long newref;
|
||||
extern time_t time(void);
|
||||
extern pid_t getpid(void);
|
||||
|
||||
dkprintf("shmobj_ref(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
newref = ++obj->ref;
|
||||
newref = ++obj->ds.shm_nattch;
|
||||
if (obj->index >= 0) {
|
||||
obj->ds.shm_atime = time();
|
||||
obj->ds.shm_lpid = getpid();
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
|
||||
return;
|
||||
}
|
||||
|
||||
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp)
|
||||
uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
int error;
|
||||
@@ -227,13 +311,13 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (obj->ds.shm_segsz <= off) {
|
||||
if (obj->real_segsz <= off) {
|
||||
error = -ERANGE;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
|
||||
if ((obj->real_segsz - off) < (PAGE_SIZE << p2align)) {
|
||||
error = -ENOSPC;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
|
||||
3023
kernel/syscall.c
3023
kernel/syscall.c
File diff suppressed because it is too large
Load Diff
@@ -38,7 +38,7 @@
|
||||
#ifdef DEBUG_PRINT_TIMER
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
#define LOOP_TIMEOUT 10
|
||||
@@ -167,7 +167,7 @@ void wake_timers_loop(void)
|
||||
list_del(&timer->list);
|
||||
|
||||
dkprintf("timers timeout occurred, waking up pid: %d\n",
|
||||
timer->proc->pid);
|
||||
timer->proc->ftn->pid);
|
||||
|
||||
waitq_wakeup(&timer->processes);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* read-only zeroed page object
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -165,7 +166,7 @@ out:
|
||||
}
|
||||
|
||||
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp)
|
||||
uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
int error;
|
||||
struct zeroobj *obj = to_zeroobj(memobj);
|
||||
|
||||
@@ -99,4 +99,7 @@ enum ihk_asr_type {
|
||||
int ihk_mc_arch_set_special_register(enum ihk_asr_type, unsigned long value);
|
||||
int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value);
|
||||
|
||||
extern unsigned int ihk_ikc_irq;
|
||||
extern unsigned int ihk_ikc_irq_apicid;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,8 +22,8 @@ struct ihk_kmsg_buf {
|
||||
};
|
||||
|
||||
extern int kprintf(const char *format, ...);
|
||||
extern int kprintf_lock();
|
||||
extern void kprintf_unlock(int irqflags);
|
||||
extern unsigned long kprintf_lock(void);
|
||||
extern void kprintf_unlock(unsigned long irqflags);
|
||||
extern int __kprintf(const char *format, ...);
|
||||
|
||||
extern void panic(const char *msg);
|
||||
|
||||
@@ -147,7 +147,8 @@ struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag);
|
||||
void ihk_mc_pt_destroy(struct page_table *pt);
|
||||
void ihk_mc_load_page_table(struct page_table *pt);
|
||||
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
|
||||
void *virt, unsigned long *phys);
|
||||
const void *virt, unsigned long *phys);
|
||||
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
|
||||
|
||||
void remote_flush_tlb_cpumask(struct process_vm *vm,
|
||||
unsigned long addr, int cpu_id);
|
||||
|
||||
Reference in New Issue
Block a user