Compare commits
234 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eb6de9d1de | ||
|
|
42c8ef6539 | ||
|
|
780d4fc29b | ||
|
|
94fcc5bb9a | ||
|
|
e822fc47dd | ||
|
|
26492a2895 | ||
|
|
1a5ff7f535 | ||
|
|
4c181d7fc0 | ||
|
|
be78eb752e | ||
|
|
0ad7c8ac50 | ||
|
|
e9458a6cd3 | ||
|
|
9e3b0b5866 | ||
|
|
0eaa27291a | ||
|
|
0b07dd1b79 | ||
|
|
c25f8c7a39 | ||
|
|
9e53ae20d4 | ||
|
|
09c9ee58d1 | ||
|
|
153a59a6f4 | ||
|
|
cad72a8562 | ||
|
|
343bfbd30a | ||
|
|
4e4f1208f7 | ||
|
|
a325a78866 | ||
|
|
6ae99454da | ||
|
|
04e193de13 | ||
|
|
2ca46fabfd | ||
|
|
5b737b499d | ||
|
|
cb4f3a4d65 | ||
|
|
51789fcd38 | ||
|
|
9f50c5dc3a | ||
|
|
cd905f7ad1 | ||
|
|
79266f6b97 | ||
|
|
a666b69c2c | ||
|
|
47e8552eba | ||
|
|
8dd9175411 | ||
|
|
f08e0c0054 | ||
|
|
d862f345be | ||
|
|
a14768c49a | ||
|
|
56e57775e7 | ||
|
|
b3b752ba41 | ||
|
|
7b32f2f73b | ||
|
|
ea5a1a8693 | ||
|
|
92f8fb2b2b | ||
|
|
a3e440414d | ||
|
|
10ba03ccea | ||
|
|
ccb7c30a05 | ||
|
|
7dfeb8e7ce | ||
|
|
b1b706453f | ||
|
|
bd5708286d | ||
|
|
c8a13cf213 | ||
|
|
5ad0a03d18 | ||
|
|
3819eec03f | ||
|
|
40b8587a8a | ||
|
|
e7b1115572 | ||
|
|
e1a01803d0 | ||
|
|
69f4b0e1ad | ||
|
|
0909a5bed5 | ||
|
|
9dd224385e | ||
|
|
4176c59fd3 | ||
|
|
afeee5432f | ||
|
|
9ae5bcf46e | ||
|
|
b8f166e608 | ||
|
|
c85a9b99e1 | ||
|
|
7c816a6b73 | ||
|
|
5a0cd3f53f | ||
|
|
9fa62adfe7 | ||
|
|
f0ab8ec89a | ||
|
|
f4cc82578d | ||
|
|
9ba40dc0ff | ||
|
|
8d6c97ea5c | ||
|
|
386f59000a | ||
|
|
215cd370a1 | ||
|
|
0a0e2c04a0 | ||
|
|
aa191b87d3 | ||
|
|
d5c243571f | ||
|
|
328e69a335 | ||
|
|
b77755d0f7 | ||
|
|
d7bae14707 | ||
|
|
4e58d08f5c | ||
|
|
9b1e691588 | ||
|
|
3988b0fc61 | ||
|
|
54eb345847 | ||
|
|
bbe7aef95b | ||
|
|
1ff4cf68c2 | ||
|
|
1bc84d3feb | ||
|
|
f7d78c8b7d | ||
|
|
7647c99cc2 | ||
|
|
43a774fbfc | ||
|
|
a029bcac37 | ||
|
|
bd913c503b | ||
|
|
e838affde8 | ||
|
|
59ee251e1c | ||
|
|
fa79db3bcc | ||
|
|
b7c5cba361 | ||
|
|
382614ddae | ||
|
|
aa959c6b34 | ||
|
|
aabc3d386d | ||
|
|
4ebe778ede | ||
|
|
fbb776e4fb | ||
|
|
41b85281a4 | ||
|
|
5532e3c663 | ||
|
|
2af2b1205f | ||
|
|
7d5a68be1b | ||
|
|
f4162dff52 | ||
|
|
a0d909af75 | ||
|
|
63669b7f71 | ||
|
|
4946964ed0 | ||
|
|
5f19842a6a | ||
|
|
9271d5346d | ||
|
|
7bba05cfa4 | ||
|
|
c2a1f933e8 | ||
|
|
055769254d | ||
|
|
786ae83380 | ||
|
|
8c662c83be | ||
|
|
4698bc40c2 | ||
|
|
f5d935b703 | ||
|
|
d53865ac5f | ||
|
|
8934eb91a4 | ||
|
|
ed6d94a358 | ||
|
|
fa923da0e3 | ||
|
|
1f8265efbc | ||
|
|
b553de7435 | ||
|
|
6a82412d64 | ||
|
|
fa29c34995 | ||
|
|
f84b5acf79 | ||
|
|
8b24f60861 | ||
|
|
f82bb284bb | ||
|
|
bf12a5c45e | ||
|
|
ea5681232e | ||
|
|
e6011be1af | ||
|
|
9946ccd6b1 | ||
|
|
daec7de828 | ||
|
|
9ad48083aa | ||
|
|
2eac58aab3 | ||
|
|
22d8d169b6 | ||
|
|
8db54c2637 | ||
|
|
063fa963c3 | ||
|
|
a6488adcc1 | ||
|
|
2239a6b09b | ||
|
|
8c179d506a | ||
|
|
377341ce5f | ||
|
|
8caeba7cba | ||
|
|
1d2f5d9893 | ||
|
|
e4f47df3c3 | ||
|
|
4751055ee4 | ||
|
|
305ebfed0e | ||
|
|
b66b950129 | ||
|
|
4aa8ba2eef | ||
|
|
fab2c2aa97 | ||
|
|
026164eda4 | ||
|
|
e91d1e5b7b | ||
|
|
73743eeeb0 | ||
|
|
c1c1fd578a | ||
|
|
f35cc66d18 | ||
|
|
d9cf1d49b1 | ||
|
|
3d426ada01 | ||
|
|
0307f6a6cc | ||
|
|
0dee04f16b | ||
|
|
0e98e87b95 | ||
|
|
d35e60c1a3 | ||
|
|
037e17c4ed | ||
|
|
2baf274dac | ||
|
|
3b04043f2a | ||
|
|
c0edb6fe6f | ||
|
|
bb137bc9bb | ||
|
|
16af976a71 | ||
|
|
6485578a7f | ||
|
|
d2d0fc6721 | ||
|
|
9574a28a5f | ||
|
|
dbe4ec3247 | ||
|
|
99debc548f | ||
|
|
fa15f6b106 | ||
|
|
8568a73f33 | ||
|
|
8b57b2ee57 | ||
|
|
9a36e7b84a | ||
|
|
d998691425 | ||
|
|
8cdf70c500 | ||
|
|
0e0bc548f6 | ||
|
|
d21ae28843 | ||
|
|
a4a806bef7 | ||
|
|
d30d8fe71c | ||
|
|
a5bdd41c3d | ||
|
|
5f5ab34559 | ||
|
|
b26fa4e87c | ||
|
|
bd5f43b119 | ||
|
|
f97f8dbab3 | ||
|
|
e30946f1f0 | ||
|
|
c3ade864d9 | ||
|
|
9c35935671 | ||
|
|
ed33ee65b2 | ||
|
|
d04b5a09bd | ||
|
|
08cc31f9bf | ||
|
|
cf2166f830 | ||
|
|
765de119dc | ||
|
|
d46110b4d9 | ||
|
|
74f0aec478 | ||
|
|
e3eb7e68bc | ||
|
|
912b8a886c | ||
|
|
e25d35a191 | ||
|
|
a9aad67541 | ||
|
|
cd6e663f48 | ||
|
|
5f095b3952 | ||
|
|
811a275176 | ||
|
|
b388f59ebd | ||
|
|
ff47261337 | ||
|
|
a91bf9a13d | ||
|
|
fcfa94cea1 | ||
|
|
55f7ee1526 | ||
|
|
b1b6fab7b8 | ||
|
|
391886a6f1 | ||
|
|
c810afe224 | ||
|
|
5566ed1a63 | ||
|
|
f0f91d2246 | ||
|
|
0942bf0ce0 | ||
|
|
9c94e90007 | ||
|
|
a6ac906105 | ||
|
|
d4ba4dc8b3 | ||
|
|
815d907ca4 | ||
|
|
3c24315f91 | ||
|
|
25f108bf78 | ||
|
|
cc9d30efbf | ||
|
|
af83f1be64 | ||
|
|
b2cab453f1 | ||
|
|
8909597499 | ||
|
|
86f2a9067b | ||
|
|
a5889fb5df | ||
|
|
f1a86cfbd3 | ||
|
|
c1cf630a94 | ||
|
|
8f30e16976 | ||
|
|
58e2e0a246 | ||
|
|
ea02628f2b | ||
|
|
89acf5c5d6 | ||
|
|
ac8e2a0c40 | ||
|
|
ab7aa3354f | ||
|
|
c4e0b84792 |
30
Makefile.in
30
Makefile.in
@@ -6,7 +6,7 @@ all::
|
||||
@(cd executer/kernel; make modules)
|
||||
@(cd executer/user; make)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make) \
|
||||
;; \
|
||||
*) \
|
||||
@@ -19,7 +19,7 @@ install::
|
||||
@(cd executer/kernel; make install)
|
||||
@(cd executer/user; make install)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make install) \
|
||||
;; \
|
||||
*) \
|
||||
@@ -27,19 +27,39 @@ install::
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
if [ "$(TARGET)" = attached-mic ]; then \
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
fi
|
||||
;; \
|
||||
builtin-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
smp-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
|
||||
clean::
|
||||
@(cd executer/kernel; make clean)
|
||||
@(cd executer/user; make clean)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make clean) \
|
||||
;; \
|
||||
*) \
|
||||
|
||||
2
arch/x86/kernel/Makefile.arch
Normal file
2
arch/x86/kernel/Makefile.arch
Normal file
@@ -0,0 +1,2 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
@@ -5,13 +5,18 @@
|
||||
* Control CPU.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
* 2015/02/26: bgerofi - set pstate, turbo mode and power/perf bias MSRs
|
||||
* 2015/02/12: Dave - enable AVX if supported
|
||||
*/
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/mm.h>
|
||||
#include <types.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
@@ -22,6 +27,7 @@
|
||||
#include <march.h>
|
||||
#include <signal.h>
|
||||
#include <process.h>
|
||||
#include <cls.h>
|
||||
|
||||
#define LAPIC_ID 0x020
|
||||
#define LAPIC_TIMER 0x320
|
||||
@@ -33,6 +39,7 @@
|
||||
#define LAPIC_ICR0 0x300
|
||||
#define LAPIC_ICR2 0x310
|
||||
#define LAPIC_ESR 0x280
|
||||
#define LOCAL_TIMER_VECTOR 0xef
|
||||
|
||||
#define APIC_INT_LEVELTRIG 0x08000
|
||||
#define APIC_INT_ASSERT 0x04000
|
||||
@@ -42,6 +49,8 @@
|
||||
#define APIC_DM_NMI 0x00400
|
||||
#define APIC_DM_INIT 0x00500
|
||||
#define APIC_DM_STARTUP 0x00600
|
||||
#define APIC_DIVISOR 16
|
||||
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
|
||||
|
||||
|
||||
//#define DEBUG_PRINT_CPU
|
||||
@@ -49,7 +58,7 @@
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
|
||||
@@ -58,6 +67,7 @@ void assign_processor_id(void);
|
||||
void arch_delay(int);
|
||||
void x86_set_warm_reset(unsigned long ip, char *first_page_va);
|
||||
void x86_init_perfctr(void);
|
||||
int gettime_local_support = 0;
|
||||
|
||||
extern int kprintf(const char *format, ...);
|
||||
|
||||
@@ -106,7 +116,12 @@ void reload_idt(void)
|
||||
}
|
||||
|
||||
static struct list_head handlers[256 - 32];
|
||||
extern char nmi[];
|
||||
extern char page_fault[], general_protection_exception[];
|
||||
extern char debug_exception[], int3_exception[];
|
||||
|
||||
uint64_t boot_pat_state = 0;
|
||||
int no_turbo = 0; /* May be updated by early parsing of kargs */
|
||||
|
||||
static void init_idt(void)
|
||||
{
|
||||
@@ -122,15 +137,22 @@ static void init_idt(void)
|
||||
set_idt_entry(i, generic_common_handlers[i]);
|
||||
}
|
||||
|
||||
set_idt_entry(2, (uintptr_t)nmi);
|
||||
set_idt_entry(13, (unsigned long)general_protection_exception);
|
||||
set_idt_entry(14, (unsigned long)page_fault);
|
||||
|
||||
set_idt_entry_trap_gate(1, (unsigned long)debug_exception);
|
||||
set_idt_entry_trap_gate(3, (unsigned long)int3_exception);
|
||||
|
||||
reload_idt();
|
||||
}
|
||||
|
||||
static int xsave_available = 0;
|
||||
|
||||
void init_fpu(void)
|
||||
{
|
||||
unsigned long reg;
|
||||
unsigned long cpuid01_ecx;
|
||||
|
||||
asm volatile("movq %%cr0, %0" : "=r"(reg));
|
||||
/* Unset EM and TS flag. */
|
||||
@@ -140,10 +162,40 @@ void init_fpu(void)
|
||||
asm volatile("movq %0, %%cr0" : : "r"(reg));
|
||||
|
||||
#ifdef ENABLE_SSE
|
||||
asm volatile("cpuid" : "=c" (cpuid01_ecx) : "a" (0x1) : "%rbx", "%rdx");
|
||||
asm volatile("movq %%cr4, %0" : "=r"(reg));
|
||||
/* Set OSFXSR flag. */
|
||||
reg |= (1 << 9);
|
||||
/* Cr4 flags:
|
||||
OSFXSR[b9] - enables SSE instructions
|
||||
OSXMMEXCPT[b10] - generate SIMD FP exception instead of invalid op
|
||||
OSXSAVE[b18] - enables access to xcr0
|
||||
|
||||
CPUID.01H:ECX flags:
|
||||
XSAVE[b26] - verify existence of extended crs/XSAVE
|
||||
AVX[b28] - verify existence of AVX instructions
|
||||
*/
|
||||
reg |= ((1 << 9) | (1 << 10));
|
||||
if(cpuid01_ecx & (1 << 26)) {
|
||||
/* XSAVE set, enable access to xcr0 */
|
||||
dkprintf("init_fpu(): XSAVE available\n");
|
||||
xsave_available = 1;
|
||||
reg |= (1 << 18);
|
||||
}
|
||||
asm volatile("movq %0, %%cr4" : : "r"(reg));
|
||||
|
||||
dkprintf("init_fpu(): SSE init: CR4 = 0x%016lX\n", reg);
|
||||
|
||||
/* Set xcr0[2:1] to enable avx ops */
|
||||
if(cpuid01_ecx & (1 << 28)) {
|
||||
reg = xgetbv(0);
|
||||
reg |= 0x6;
|
||||
xsetbv(0, reg);
|
||||
dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg);
|
||||
}
|
||||
|
||||
/* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */
|
||||
|
||||
#else
|
||||
kprintf("init_fpu(): SSE not enabled\n");
|
||||
#endif
|
||||
|
||||
asm volatile("finit");
|
||||
@@ -203,6 +255,174 @@ void lapic_icr_write(unsigned int h, unsigned int l)
|
||||
lapic_write(LAPIC_ICR0, l);
|
||||
}
|
||||
|
||||
|
||||
void lapic_timer_enable(unsigned int clocks)
|
||||
{
|
||||
unsigned int lvtt_value;
|
||||
|
||||
lapic_write(LAPIC_TIMER_INITIAL, clocks / APIC_DIVISOR);
|
||||
lapic_write(LAPIC_TIMER_DIVIDE, 3);
|
||||
|
||||
/* initialize periodic timer */
|
||||
lvtt_value = LOCAL_TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC;
|
||||
lapic_write(LAPIC_TIMER, lvtt_value);
|
||||
}
|
||||
|
||||
void lapic_timer_disable()
|
||||
{
|
||||
lapic_write(LAPIC_TIMER_INITIAL, 0);
|
||||
}
|
||||
|
||||
void print_msr(int idx)
|
||||
{
|
||||
int bit;
|
||||
unsigned long long val;
|
||||
|
||||
val = rdmsr(idx);
|
||||
|
||||
__kprintf("MSR 0x%x val (dec): %llu\n", idx, val);
|
||||
__kprintf("MSR 0x%x val (hex): 0x%llx\n", idx, val);
|
||||
|
||||
__kprintf(" ");
|
||||
for (bit = 63; bit >= 0; --bit) {
|
||||
__kprintf("%3d", bit);
|
||||
}
|
||||
__kprintf("\n");
|
||||
|
||||
__kprintf("MSR 0x%x val (bin):", idx);
|
||||
for (bit = 63; bit >= 0; --bit) {
|
||||
__kprintf("%3d", (val & ((unsigned long)1 << bit)) ? 1 : 0);
|
||||
}
|
||||
__kprintf("\n");
|
||||
}
|
||||
|
||||
|
||||
void init_pstate_and_turbo(void)
|
||||
{
|
||||
uint64_t value;
|
||||
uint64_t eax, ecx;
|
||||
|
||||
asm volatile("cpuid" : "=a" (eax), "=c" (ecx) : "a" (0x6) : "%rbx", "%rdx");
|
||||
if (!(ecx & 0x01)) {
|
||||
/* P-states and/or Turbo Boost are not supported. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Query and set max pstate value:
|
||||
*
|
||||
* IA32_PERF_CTL (0x199H) bit 15:0:
|
||||
* Target performance State Value
|
||||
*
|
||||
* The base operating ratio can be read
|
||||
* from MSR_PLATFORM_INFO[15:8].
|
||||
*/
|
||||
value = rdmsr(MSR_PLATFORM_INFO);
|
||||
value &= 0xFF00;
|
||||
|
||||
/* Turbo boost setting:
|
||||
* Bit 1 of EAX in Leaf 06H (i.e. CPUID.06H:EAX[1]) indicates opportunistic
|
||||
* processor performance operation, such as IDA, has been enabled by BIOS.
|
||||
*
|
||||
* IA32_PERF_CTL (0x199H) bit 32: IDA (i.e., turbo boost) Engage. (R/W)
|
||||
* When set to 1: disengages IDA
|
||||
* When set to 0: enables IDA
|
||||
*/
|
||||
if ((eax & (1 << 1))) {
|
||||
if (!no_turbo) {
|
||||
uint64_t turbo_value;
|
||||
|
||||
turbo_value = rdmsr(MSR_NHM_TURBO_RATIO_LIMIT);
|
||||
turbo_value &= 0xFF;
|
||||
value = turbo_value << 8;
|
||||
|
||||
/* Enable turbo boost */
|
||||
value &= ~((uint64_t)1 << 32);
|
||||
}
|
||||
/* Turbo boost feature is supported, but requested to be turned off */
|
||||
else {
|
||||
/* Disable turbo boost */
|
||||
value |= (uint64_t)1 << 32;
|
||||
}
|
||||
}
|
||||
|
||||
wrmsr(MSR_IA32_PERF_CTL, value);
|
||||
|
||||
/* IA32_ENERGY_PERF_BIAS (0x1B0H) bit 3:0:
|
||||
* (The processor supports this capability if CPUID.06H:ECX.SETBH[bit 3] is set.)
|
||||
* Power Policy Preference:
|
||||
* 0 indicates preference to highest performance.
|
||||
* 15 indicates preference to maximize energy saving.
|
||||
*
|
||||
* Set energy/perf bias to high performance
|
||||
*/
|
||||
if (ecx & (1 << 3)) {
|
||||
wrmsr(MSR_IA32_ENERGY_PERF_BIAS, 0);
|
||||
}
|
||||
|
||||
//print_msr(MSR_IA32_MISC_ENABLE);
|
||||
//print_msr(MSR_IA32_PERF_CTL);
|
||||
//print_msr(MSR_IA32_ENERGY_PERF_BIAS);
|
||||
}
|
||||
|
||||
enum {
|
||||
PAT_UC = 0, /* uncached */
|
||||
PAT_WC = 1, /* Write combining */
|
||||
PAT_WT = 4, /* Write Through */
|
||||
PAT_WP = 5, /* Write Protected */
|
||||
PAT_WB = 6, /* Write Back (default) */
|
||||
PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
|
||||
};
|
||||
|
||||
#define PAT(x, y) ((uint64_t)PAT_ ## y << ((x)*8))
|
||||
|
||||
void init_pat(void)
|
||||
{
|
||||
uint64_t pat;
|
||||
uint64_t edx;
|
||||
|
||||
/*
|
||||
* An operating system or executive can detect the availability of the
|
||||
* PAT by executing the CPUID instruction with a value of 1 in the EAX
|
||||
* register. Support for the PAT is indicated by the PAT flag (bit 16
|
||||
* of the values returned to EDX register). If the PAT is supported,
|
||||
* the operating system or executive can use the IA32_PAT MSR to program
|
||||
* the PAT. When memory types have been assigned to entries in the PAT,
|
||||
* software can then use of the PAT-index bit (PAT) in the page-table and
|
||||
* page-directory entries along with the PCD and PWT bits to assign memory
|
||||
* types from the PAT to individual pages.
|
||||
*/
|
||||
|
||||
asm volatile("cpuid" : "=d" (edx) : "a" (0x1) : "%rbx", "%rcx");
|
||||
if (!(edx & ((uint64_t)1 << 16))) {
|
||||
kprintf("PAT not supported.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set PWT to Write-Combining. All other bits stay the same */
|
||||
/* (Based on Linux' settings)
|
||||
*
|
||||
* PTE encoding used in Linux:
|
||||
* PAT
|
||||
* |PCD
|
||||
* ||PWT
|
||||
* |||
|
||||
* 000 WB _PAGE_CACHE_WB
|
||||
* 001 WC _PAGE_CACHE_WC
|
||||
* 010 UC- _PAGE_CACHE_UC_MINUS
|
||||
* 011 UC _PAGE_CACHE_UC
|
||||
* PAT bit unused
|
||||
*/
|
||||
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
|
||||
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
|
||||
|
||||
/* Boot CPU check */
|
||||
if (!boot_pat_state)
|
||||
boot_pat_state = rdmsr(MSR_IA32_CR_PAT);
|
||||
|
||||
wrmsr(MSR_IA32_CR_PAT, pat);
|
||||
dkprintf("PAT support detected and reconfigured.\n");
|
||||
}
|
||||
|
||||
void init_lapic(void)
|
||||
{
|
||||
unsigned long baseaddr;
|
||||
@@ -262,16 +482,23 @@ static void init_smp_processor(void)
|
||||
|
||||
static char *trampoline_va, *first_page_va;
|
||||
|
||||
/*@
|
||||
@ assigns torampoline_va;
|
||||
@ assigns first_page_va;
|
||||
@*/
|
||||
void ihk_mc_init_ap(void)
|
||||
{
|
||||
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
|
||||
|
||||
trampoline_va = map_fixed_area(AP_TRAMPOLINE, AP_TRAMPOLINE_SIZE,
|
||||
0);
|
||||
trampoline_va = map_fixed_area(ap_trampoline, AP_TRAMPOLINE_SIZE, 0);
|
||||
kprintf("Trampoline area: 0x%lx \n", ap_trampoline);
|
||||
first_page_va = map_fixed_area(0, PAGE_SIZE, 0);
|
||||
|
||||
kprintf("# of cpus : %d\n", cpu_info->ncpus);
|
||||
init_processors_local(cpu_info->ncpus);
|
||||
|
||||
kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n",
|
||||
ihk_ikc_irq, ihk_ikc_irq_apicid);
|
||||
|
||||
/* Do initialization for THIS cpu (BSP) */
|
||||
assign_processor_id();
|
||||
@@ -347,6 +574,29 @@ static void check_no_execute(void)
|
||||
return;
|
||||
}
|
||||
|
||||
void init_gettime_support(void)
|
||||
{
|
||||
uint64_t op;
|
||||
uint64_t eax;
|
||||
uint64_t ebx;
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
/* Check if Invariant TSC supported.
|
||||
* Processor’s support for invariant TSC is indicated by
|
||||
* CPUID.80000007H:EDX[8].
|
||||
* See page 2498 of the Intel64 and IA-32 Architectures Software
|
||||
* Developer’s Manual - combined */
|
||||
|
||||
op = 0x80000007;
|
||||
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (op));
|
||||
|
||||
if (edx & (1 << 8)) {
|
||||
gettime_local_support = 1;
|
||||
kprintf("Invariant TSC supported.\n");
|
||||
}
|
||||
}
|
||||
|
||||
void init_cpu(void)
|
||||
{
|
||||
enable_page_protection_fault();
|
||||
@@ -355,6 +605,8 @@ void init_cpu(void)
|
||||
init_lapic();
|
||||
init_syscall();
|
||||
x86_init_perfctr();
|
||||
init_pstate_and_turbo();
|
||||
init_pat();
|
||||
}
|
||||
|
||||
void setup_x86(void)
|
||||
@@ -371,6 +623,8 @@ void setup_x86(void)
|
||||
|
||||
init_cpu();
|
||||
|
||||
init_gettime_support();
|
||||
|
||||
kprintf("setup_x86 done.\n");
|
||||
}
|
||||
|
||||
@@ -406,32 +660,77 @@ void setup_x86_ap(void (*next_func)(void))
|
||||
|
||||
void arch_show_interrupt_context(const void *reg);
|
||||
void set_signal(int sig, void *regs, struct siginfo *info);
|
||||
void check_signal(unsigned long rc, void *regs);
|
||||
void check_signal(unsigned long, void *, int);
|
||||
extern void tlb_flush_handler(int vector);
|
||||
|
||||
void handle_interrupt(int vector, struct x86_regs *regs)
|
||||
void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
{
|
||||
struct ihk_mc_interrupt_handler *h;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
|
||||
lapic_ack();
|
||||
++v->in_interrupt;
|
||||
|
||||
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
|
||||
ihk_mc_get_processor_id(), vector, regs->rip);
|
||||
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
|
||||
|
||||
if (vector < 0 || vector > 255) {
|
||||
panic("Invalid interrupt vector.");
|
||||
}
|
||||
else if (vector < 32) {
|
||||
if (vector == 8 ||
|
||||
(vector >= 10 && vector <= 15) || vector == 17) {
|
||||
struct siginfo info;
|
||||
switch(vector){
|
||||
case 0:
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = SIGFPE;
|
||||
info.si_code = FPE_INTDIV;
|
||||
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
|
||||
set_signal(SIGFPE, regs, &info);
|
||||
break;
|
||||
case 9:
|
||||
case 16:
|
||||
case 19:
|
||||
set_signal(SIGFPE, regs, NULL);
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
break;
|
||||
case 6:
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = SIGILL;
|
||||
info.si_code = ILL_ILLOPN;
|
||||
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
|
||||
set_signal(SIGILL, regs, &info);
|
||||
break;
|
||||
case 10:
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
break;
|
||||
case 11:
|
||||
case 12:
|
||||
set_signal(SIGBUS, regs, NULL);
|
||||
break;
|
||||
case 17:
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = SIGBUS;
|
||||
info.si_code = BUS_ADRALN;
|
||||
set_signal(SIGBUS, regs, &info);
|
||||
break;
|
||||
default:
|
||||
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
|
||||
vector, regs->rflags, regs->cs, regs->rip);
|
||||
} else {
|
||||
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
|
||||
vector, regs->rflags, regs->cs, regs->rip);
|
||||
vector, regs->gpr.rflags, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
panic("Unhandled exception");
|
||||
}
|
||||
arch_show_interrupt_context(regs);
|
||||
panic("Unhandled exception");
|
||||
}
|
||||
else if (vector == LOCAL_TIMER_VECTOR) {
|
||||
unsigned long irqstate;
|
||||
/* Timer interrupt, enabled only on oversubscribed CPU cores,
|
||||
* request reschedule */
|
||||
irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
|
||||
v->flags |= CPU_FLAG_NEED_RESCHED;
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
|
||||
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
|
||||
}
|
||||
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
|
||||
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
|
||||
@@ -446,27 +745,85 @@ void handle_interrupt(int vector, struct x86_regs *regs)
|
||||
}
|
||||
}
|
||||
|
||||
check_signal(0, regs);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
|
||||
--v->in_interrupt;
|
||||
}
|
||||
|
||||
void gpe_handler(struct x86_regs *regs)
|
||||
void gpe_handler(struct x86_user_context *regs)
|
||||
{
|
||||
struct siginfo info;
|
||||
|
||||
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
|
||||
regs->error, regs->cs, regs->rip);
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
memset(&info, '\0', sizeof info);
|
||||
set_signal(SIGILL, regs, &info);
|
||||
check_signal(0, regs);
|
||||
if ((regs->gpr.cs & 3) == 0) {
|
||||
panic("gpe_handler");
|
||||
}
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
// panic("GPF");
|
||||
}
|
||||
|
||||
void debug_handler(struct x86_user_context *regs)
|
||||
{
|
||||
unsigned long db6;
|
||||
int si_code = 0;
|
||||
struct siginfo info;
|
||||
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
kprintf("debug exception (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
#endif
|
||||
|
||||
asm("mov %%db6, %0" :"=r" (db6));
|
||||
if (db6 & DB6_BS) {
|
||||
regs->gpr.rflags &= ~RFLAGS_TF;
|
||||
si_code = TRAP_TRACE;
|
||||
} else if (db6 & (DB6_B3|DB6_B2|DB6_B1|DB6_B0)) {
|
||||
si_code = TRAP_HWBKPT;
|
||||
}
|
||||
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = si_code;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
void int3_handler(struct x86_user_context *regs)
|
||||
{
|
||||
struct siginfo info;
|
||||
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
#endif
|
||||
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = TRAP_BRKPT;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
static void wait_icr_idle(void)
|
||||
{
|
||||
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
void x86_issue_ipi(unsigned int apicid, unsigned int low)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
flags = cpu_disable_interrupt_save();
|
||||
wait_icr_idle();
|
||||
lapic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
|
||||
cpu_restore_interrupt(flags);
|
||||
}
|
||||
|
||||
static void outb(uint8_t v, uint16_t port)
|
||||
@@ -479,13 +836,6 @@ static void set_warm_reset_vector(unsigned long ip)
|
||||
x86_set_warm_reset(ip, first_page_va);
|
||||
}
|
||||
|
||||
static void wait_icr_idle(void)
|
||||
{
|
||||
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
static void __x86_wakeup(int apicid, unsigned long ip)
|
||||
{
|
||||
int retry = 3;
|
||||
@@ -499,7 +849,6 @@ static void __x86_wakeup(int apicid, unsigned long ip)
|
||||
/* INIT */
|
||||
x86_issue_ipi(apicid,
|
||||
APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
|
||||
wait_icr_idle();
|
||||
|
||||
x86_issue_ipi(apicid,
|
||||
APIC_INT_LEVELTRIG | APIC_DM_INIT);
|
||||
@@ -524,31 +873,65 @@ void cpu_halt(void)
|
||||
asm volatile("hlt");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@*/
|
||||
void cpu_safe_halt(void)
|
||||
{
|
||||
asm volatile("sti; hlt");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@*/
|
||||
void cpu_enable_interrupt(void)
|
||||
{
|
||||
asm volatile("sti");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@*/
|
||||
void cpu_disable_interrupt(void)
|
||||
{
|
||||
asm volatile("cli");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ behavior to_enabled:
|
||||
@ assumes flags & RFLAGS_IF;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@ behavior to_disabled:
|
||||
@ assumes !(flags & RFLAGS_IF);
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@*/
|
||||
void cpu_restore_interrupt(unsigned long flags)
|
||||
{
|
||||
asm volatile("push %0; popf" : : "g"(flags) : "memory", "cc");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void cpu_pause(void)
|
||||
{
|
||||
asm volatile("pause" ::: "memory");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@ behavior from_enabled:
|
||||
@ assumes \interrupt_disabled == 0;
|
||||
@ ensures \result & RFLAGS_IF;
|
||||
@ behavior from_disabled:
|
||||
@ assumes \interrupt_disabled > 0;
|
||||
@ ensures !(\result & RFLAGS_IF);
|
||||
@*/
|
||||
unsigned long cpu_disable_interrupt_save(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -558,6 +941,17 @@ unsigned long cpu_disable_interrupt_save(void)
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior valid_vector:
|
||||
@ assumes 32 <= vector <= 255;
|
||||
@ requires \valid(h);
|
||||
@ assigns handlers[vector-32];
|
||||
@ ensures \result == 0;
|
||||
@ behavior invalid_vector:
|
||||
@ assumes (vector < 32) || (255 < vector);
|
||||
@ assigns \nothing;
|
||||
@ ensures \result == -EINVAL;
|
||||
@*/
|
||||
int ihk_mc_register_interrupt_handler(int vector,
|
||||
struct ihk_mc_interrupt_handler *h)
|
||||
{
|
||||
@@ -579,6 +973,11 @@ int ihk_mc_unregister_interrupt_handler(int vector,
|
||||
|
||||
extern unsigned long __page_fault_handler_address;
|
||||
|
||||
/*@
|
||||
@ requires \valid(h);
|
||||
@ assigns __page_fault_handler_address;
|
||||
@ ensures __page_fault_handler_address == h;
|
||||
@*/
|
||||
void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *))
|
||||
{
|
||||
__page_fault_handler_address = (unsigned long)h;
|
||||
@@ -588,6 +987,18 @@ extern char trampoline_code_data[], trampoline_code_data_end[];
|
||||
struct page_table *get_init_page_table(void);
|
||||
unsigned long get_transit_page_table(void);
|
||||
|
||||
/* reusable, but not reentrant */
|
||||
/*@
|
||||
@ requires \valid_apicid(cpuid); // valid APIC ID or not
|
||||
@ requires \valid(pc);
|
||||
@ requires \valid(trampoline_va);
|
||||
@ requires \valid(trampoline_code_data
|
||||
@ +(0..(trampoline_code_data_end - trampoline_code_data)));
|
||||
@ requires \valid_physical(ap_trampoline); // valid physical address or not
|
||||
@ assigns (char *)trampoline_va+(0..trampoline_code_data_end - trampoline_code_data);
|
||||
@ assigns cpu_boot_status;
|
||||
@ ensures cpu_boot_status != 0;
|
||||
@*/
|
||||
void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
|
||||
{
|
||||
unsigned long *p;
|
||||
@@ -607,7 +1018,7 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
|
||||
|
||||
cpu_boot_status = 0;
|
||||
|
||||
__x86_wakeup(cpuid, AP_TRAMPOLINE);
|
||||
__x86_wakeup(cpuid, ap_trampoline);
|
||||
|
||||
/* XXX: Time out */
|
||||
while (!cpu_boot_status) {
|
||||
@@ -615,6 +1026,11 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(new_ctx);
|
||||
@ requires (stack_pointer == NULL) || \valid((unsigned long *)stack_pointer-1);
|
||||
@ requires \valid(next_function);
|
||||
@*/
|
||||
void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
|
||||
void *stack_pointer, void (*next_function)(void))
|
||||
{
|
||||
@@ -633,7 +1049,28 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
|
||||
}
|
||||
|
||||
extern char enter_user_mode[];
|
||||
|
||||
/*
|
||||
* Release runq_lock before entering user space.
|
||||
* This is needed because schedule() holds the runq lock throughout
|
||||
* the context switch and when a new process is created it starts
|
||||
* execution in enter_user_mode, which in turn calls this function.
|
||||
*/
|
||||
void release_runq_lock(void)
|
||||
{
|
||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
|
||||
cpu_local_var(runq_irqstate));
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(ctx);
|
||||
@ requires \valid(puctx);
|
||||
@ requires \valid((ihk_mc_user_context_t *)stack_pointer-1);
|
||||
@ requires \valid_user(new_pc); // valid user space address or not
|
||||
@ requires \valid_user(user_sp-1);
|
||||
@ assigns *((ihk_mc_user_context_t *)stack_pointer-1);
|
||||
@ assigns ctx->rsp0;
|
||||
@*/
|
||||
void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
|
||||
ihk_mc_user_context_t **puctx,
|
||||
void *stack_pointer, unsigned long new_pc,
|
||||
@@ -649,49 +1086,95 @@ void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
|
||||
*puctx = uctx;
|
||||
|
||||
memset(uctx, 0, sizeof(ihk_mc_user_context_t));
|
||||
uctx->cs = USER_CS;
|
||||
uctx->rip = new_pc;
|
||||
uctx->ss = USER_DS;
|
||||
uctx->rsp = user_sp;
|
||||
uctx->rflags = RFLAGS_IF;
|
||||
uctx->gpr.cs = USER_CS;
|
||||
uctx->gpr.rip = new_pc;
|
||||
uctx->gpr.ss = USER_DS;
|
||||
uctx->gpr.rsp = user_sp;
|
||||
uctx->gpr.rflags = RFLAGS_IF;
|
||||
uctx->is_gpr_valid = 1;
|
||||
|
||||
ihk_mc_init_context(ctx, sp, (void (*)(void))enter_user_mode);
|
||||
ctx->rsp0 = (unsigned long)stack_pointer;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior rsp:
|
||||
@ assumes reg == IHK_UCR_STACK_POINTER;
|
||||
@ requires \valid(uctx);
|
||||
@ assigns uctx->gpr.rsp;
|
||||
@ ensures uctx->gpr.rsp == value;
|
||||
@ behavior rip:
|
||||
@ assumes reg == IHK_UCR_PROGRAM_COUNTER;
|
||||
@ requires \valid(uctx);
|
||||
@ assigns uctx->gpr.rip;
|
||||
@ ensures uctx->gpr.rip == value;
|
||||
@*/
|
||||
void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx,
|
||||
enum ihk_mc_user_context_regtype reg,
|
||||
unsigned long value)
|
||||
{
|
||||
if (reg == IHK_UCR_STACK_POINTER) {
|
||||
uctx->rsp = value;
|
||||
uctx->gpr.rsp = value;
|
||||
} else if (reg == IHK_UCR_PROGRAM_COUNTER) {
|
||||
uctx->rip = value;
|
||||
uctx->gpr.rip = value;
|
||||
}
|
||||
}
|
||||
|
||||
void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx)
|
||||
{
|
||||
kprintf("CS:RIP = %04lx:%16lx\n", uctx->cs, uctx->rip);
|
||||
kprintf("CS:RIP = %04lx:%16lx\n", uctx->gpr.cs, uctx->gpr.rip);
|
||||
kprintf("%16lx %16lx %16lx %16lx\n%16lx %16lx %16lx\n",
|
||||
uctx->rax, uctx->rbx, uctx->rcx, uctx->rdx,
|
||||
uctx->rsi, uctx->rdi, uctx->rsp);
|
||||
uctx->gpr.rax, uctx->gpr.rbx, uctx->gpr.rcx, uctx->gpr.rdx,
|
||||
uctx->gpr.rsi, uctx->gpr.rdi, uctx->gpr.rsp);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(handler);
|
||||
@ assigns __x86_syscall_handler;
|
||||
@ ensures __x86_syscall_handler == handler;
|
||||
@*/
|
||||
void ihk_mc_set_syscall_handler(long (*handler)(int, ihk_mc_user_context_t *))
|
||||
{
|
||||
__x86_syscall_handler = handler;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void ihk_mc_delay_us(int us)
|
||||
{
|
||||
arch_delay(us);
|
||||
}
|
||||
|
||||
#define EXTENDED_ARCH_SHOW_CONTEXT
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
void arch_show_extended_context(void)
|
||||
{
|
||||
unsigned long cr0, cr4, msr, xcr0;
|
||||
|
||||
/* Read and print CRs, MSR_EFER, XCR0 */
|
||||
asm volatile("movq %%cr0, %0" : "=r"(cr0));
|
||||
asm volatile("movq %%cr4, %0" : "=r"(cr4));
|
||||
msr = rdmsr(MSR_EFER);
|
||||
xcr0 = xgetbv(0);
|
||||
|
||||
__kprintf("\n CR0 CR4\n");
|
||||
__kprintf("%016lX %016lX\n", cr0, cr4);
|
||||
|
||||
__kprintf(" MSR_EFER\n");
|
||||
__kprintf("%016lX\n", msr);
|
||||
|
||||
__kprintf(" XCR0\n");
|
||||
__kprintf("%016lX\n", xcr0);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
void arch_show_interrupt_context(const void *reg)
|
||||
{
|
||||
const struct x86_regs *regs = reg;
|
||||
int irqflags;
|
||||
const struct x86_user_context *uctx = reg;
|
||||
const struct x86_basic_regs *regs = &uctx->gpr;
|
||||
unsigned long irqflags;
|
||||
|
||||
irqflags = kprintf_lock();
|
||||
|
||||
@@ -711,10 +1194,22 @@ void arch_show_interrupt_context(const void *reg)
|
||||
__kprintf(" CS SS RFLAGS ERROR\n");
|
||||
__kprintf("%16lx %16lx %16lx %16lx\n",
|
||||
regs->cs, regs->ss, regs->rflags, regs->error);
|
||||
|
||||
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
arch_show_extended_context();
|
||||
#endif
|
||||
|
||||
kprintf_unlock(irqflags);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior fs_base:
|
||||
@ assumes type == IHK_ASR_X86_FS;
|
||||
@ ensures \result == 0;
|
||||
@ behavior invaiid_type:
|
||||
@ assumes type != IHK_ASR_X86_FS;
|
||||
@ ensures \result == -EINVAL;
|
||||
@*/
|
||||
int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
|
||||
unsigned long value)
|
||||
{
|
||||
@@ -728,6 +1223,15 @@ int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ behavior fs_base:
|
||||
@ assumes type == IHK_ASR_X86_FS;
|
||||
@ requires \valid(value);
|
||||
@ ensures \result == 0;
|
||||
@ behavior invalid_type:
|
||||
@ assumes type != IHK_ASR_X86_FS;
|
||||
@ ensures \result == -EINVAL;
|
||||
@*/
|
||||
int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
|
||||
unsigned long *value)
|
||||
{
|
||||
@@ -741,11 +1245,116 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid_apicid(cpu); // valid APIC ID or not
|
||||
@ ensures \result == 0
|
||||
@*/
|
||||
int ihk_mc_interrupt_cpu(int cpu, int vector)
|
||||
{
|
||||
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
|
||||
|
||||
wait_icr_idle();
|
||||
x86_issue_ipi(cpu, vector);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(proc);
|
||||
@ ensures proc->fp_regs == NULL;
|
||||
@*/
|
||||
void
|
||||
release_fp_regs(struct thread *thread)
|
||||
{
|
||||
int pages;
|
||||
|
||||
if (thread && !thread->fp_regs)
|
||||
return;
|
||||
|
||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
||||
ihk_mc_free_pages(thread->fp_regs, pages);
|
||||
thread->fp_regs = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
save_fp_regs(struct thread *thread)
|
||||
{
|
||||
int pages;
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
||||
thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
kprintf("error: allocating fp_regs pages\n");
|
||||
return;
|
||||
}
|
||||
|
||||
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
|
||||
}
|
||||
|
||||
if (xsave_available) {
|
||||
unsigned int low, high;
|
||||
|
||||
/* Request full save of x87, SSE and AVX states */
|
||||
low = 0x7;
|
||||
high = 0;
|
||||
|
||||
asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)
|
||||
: "memory");
|
||||
|
||||
dkprintf("fp_regs for TID %d saved\n", thread->tid);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
restore_fp_regs(struct thread *thread)
|
||||
{
|
||||
if (!thread->fp_regs)
|
||||
return;
|
||||
|
||||
if (xsave_available) {
|
||||
unsigned int low, high;
|
||||
|
||||
/* Request full restore of x87, SSE and AVX states */
|
||||
low = 0x7;
|
||||
high = 0;
|
||||
|
||||
asm volatile("xrstor %0" : : "m" (*thread->fp_regs),
|
||||
"a" (low), "d" (high));
|
||||
|
||||
dkprintf("fp_regs for TID %d restored\n", thread->tid);
|
||||
}
|
||||
|
||||
// XXX: why release??
|
||||
//release_fp_regs(thread);
|
||||
}
|
||||
|
||||
ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
|
||||
{
|
||||
ihk_mc_user_context_t *uctx = thread->uctx;
|
||||
|
||||
if ((!(thread->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE
|
||||
| PS_STOPPED | PS_TRACED))
|
||||
&& (thread != cpu_local_var(current)))
|
||||
|| !uctx->is_gpr_valid) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!uctx->is_sr_valid) {
|
||||
uctx->sr.fs_base = thread->tlsblock_base;
|
||||
uctx->sr.gs_base = 0;
|
||||
uctx->sr.ds = 0;
|
||||
uctx->sr.es = 0;
|
||||
uctx->sr.fs = 0;
|
||||
uctx->sr.gs = 0;
|
||||
|
||||
uctx->is_sr_valid = 1;
|
||||
}
|
||||
|
||||
return uctx;
|
||||
} /* lookup_user_context() */
|
||||
|
||||
|
||||
void zero_tsc(void)
|
||||
{
|
||||
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
@@ -78,15 +78,16 @@ int get_prstatus_size(void)
|
||||
* \brief Fill a prstatus structure.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs0 A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_prstatus(struct note *head, struct process *proc, void *regs0)
|
||||
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
|
||||
{
|
||||
void *name;
|
||||
struct elf_prstatus64 *prstatus;
|
||||
struct x86_regs *regs = regs0;
|
||||
struct x86_user_context *uctx = regs0;
|
||||
struct x86_basic_regs *regs = &uctx->gpr;
|
||||
register unsigned long _r12 asm("r12");
|
||||
register unsigned long _r13 asm("r13");
|
||||
register unsigned long _r14 asm("r14");
|
||||
@@ -159,11 +160,11 @@ int get_prpsinfo_size(void)
|
||||
* \brief Fill a prpsinfo structure.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
|
||||
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
|
||||
{
|
||||
void *name;
|
||||
struct elf_prpsinfo64 *prpsinfo;
|
||||
@@ -175,8 +176,8 @@ void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
|
||||
memcpy(name, "CORE", sizeof("CORE"));
|
||||
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
|
||||
|
||||
prpsinfo->pr_state = proc->ftn->status;
|
||||
prpsinfo->pr_pid = proc->ftn->pid;
|
||||
prpsinfo->pr_state = thread->status;
|
||||
prpsinfo->pr_pid = thread->proc->pid;
|
||||
|
||||
/*
|
||||
We leave most of the fields unfilled.
|
||||
@@ -209,11 +210,11 @@ int get_auxv_size(void)
|
||||
* \brief Fill an AUXV structure.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_auxv(struct note *head, struct process *proc, void *regs)
|
||||
void fill_auxv(struct note *head, struct thread *thread, void *regs)
|
||||
{
|
||||
void *name;
|
||||
void *auxv;
|
||||
@@ -224,7 +225,7 @@ void fill_auxv(struct note *head, struct process *proc, void *regs)
|
||||
name = (void *) (head + 1);
|
||||
memcpy(name, "CORE", sizeof("CORE"));
|
||||
auxv = name + align32(sizeof("CORE"));
|
||||
memcpy(auxv, proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
|
||||
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -242,23 +243,23 @@ int get_note_size(void)
|
||||
* \brief Fill the NOTE segment.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_note(void *note, struct process *proc, void *regs)
|
||||
void fill_note(void *note, struct thread *thread, void *regs)
|
||||
{
|
||||
fill_prstatus(note, proc, regs);
|
||||
fill_prstatus(note, thread, regs);
|
||||
note += get_prstatus_size();
|
||||
fill_prpsinfo(note, proc, regs);
|
||||
fill_prpsinfo(note, thread, regs);
|
||||
note += get_prpsinfo_size();
|
||||
fill_auxv(note, proc, regs);
|
||||
fill_auxv(note, thread, regs);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Generate an image of the core file.
|
||||
*
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
* \param coretable(out) An array of core chunks.
|
||||
* \param chunks(out) Number of the entires of coretable.
|
||||
@@ -270,7 +271,7 @@ void fill_note(void *note, struct process *proc, void *regs)
|
||||
* should be zero.
|
||||
*/
|
||||
|
||||
int gencore(struct process *proc, void *regs,
|
||||
int gencore(struct thread *thread, void *regs,
|
||||
struct coretable **coretable, int *chunks)
|
||||
{
|
||||
struct coretable *ct = NULL;
|
||||
@@ -278,7 +279,7 @@ int gencore(struct process *proc, void *regs,
|
||||
Elf64_Phdr *ph = NULL;
|
||||
void *note = NULL;
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct process_vm *vm = thread->vm;
|
||||
int segs = 1; /* the first one is for NOTE */
|
||||
int notesize, phsize, alignednotesize;
|
||||
unsigned int offset = 0;
|
||||
@@ -305,7 +306,7 @@ int gencore(struct process *proc, void *regs,
|
||||
unsigned long p, phys;
|
||||
int prevzero = 0;
|
||||
for (p = range->start; p < range->end; p += PAGE_SIZE) {
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
|
||||
(void *)p, &phys) != 0) {
|
||||
prevzero = 1;
|
||||
} else {
|
||||
@@ -325,7 +326,7 @@ int gencore(struct process *proc, void *regs,
|
||||
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
|
||||
|
||||
{
|
||||
struct vm_regions region = proc->vm->region;
|
||||
struct vm_regions region = thread->vm->region;
|
||||
|
||||
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
|
||||
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
|
||||
@@ -363,7 +364,7 @@ int gencore(struct process *proc, void *regs,
|
||||
goto fail;
|
||||
}
|
||||
memset(note, 0, alignednotesize);
|
||||
fill_note(note, proc, regs);
|
||||
fill_note(note, thread, regs);
|
||||
|
||||
/* prgram header for NOTE segment is exceptional */
|
||||
ph[0].p_type = PT_NOTE;
|
||||
@@ -433,7 +434,7 @@ int gencore(struct process *proc, void *regs,
|
||||
|
||||
for (start = p = range->start;
|
||||
p < range->end; p += PAGE_SIZE) {
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
|
||||
(void *)p, &phys) != 0) {
|
||||
if (prevzero == 0) {
|
||||
/* We begin a new chunk */
|
||||
@@ -471,9 +472,9 @@ int gencore(struct process *proc, void *regs,
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
if ((proc->vm->region.user_start <= range->start) &&
|
||||
(range->end <= proc->vm->region.user_end)) {
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if ((thread->vm->region.user_start <= range->start) &&
|
||||
(range->end <= thread->vm->region.user_end)) {
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
|
||||
(void *)range->start, &phys) != 0) {
|
||||
dkprintf("could not convert user virtual address %lx"
|
||||
"to physical address", range->start);
|
||||
|
||||
@@ -5,15 +5,20 @@
|
||||
#define __HEADER_X86_COMMON_ARCH_LOCK
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
#include <ihk/atomic.h>
|
||||
|
||||
//#define DEBUG_SPINLOCK
|
||||
//#define DEBUG_MCS_RWLOCK
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK)
|
||||
int __kprintf(const char *format, ...);
|
||||
#endif
|
||||
|
||||
typedef int ihk_spinlock_t;
|
||||
|
||||
extern void preempt_enable(void);
|
||||
extern void preempt_disable(void);
|
||||
|
||||
#define IHK_STATIC_SPINLOCK_FUNCS
|
||||
|
||||
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
|
||||
@@ -22,7 +27,17 @@ static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
|
||||
}
|
||||
#define SPIN_LOCK_UNLOCKED 0
|
||||
|
||||
static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_lock_noirq(l) { \
|
||||
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__ihk_mc_spinlock_lock_noirq(l); \
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
|
||||
#endif
|
||||
|
||||
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
int inc = 0x00010000;
|
||||
int tmp;
|
||||
@@ -41,10 +56,8 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
__kprintf("[%d] trying to grab lock: 0x%lX\n",
|
||||
ihk_mc_get_processor_id(), lock);
|
||||
#endif
|
||||
preempt_disable();
|
||||
|
||||
asm volatile("lock; xaddl %0, %1\n"
|
||||
"movzwl %w0, %2\n\t"
|
||||
"shrl $16, %0\n\t"
|
||||
@@ -60,36 +73,431 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
:
|
||||
: "memory", "cc");
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
__kprintf("[%d] holding lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned long ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
|
||||
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
rc = __ihk_mc_spinlock_lock(l);\
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
|
||||
})
|
||||
#else
|
||||
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
|
||||
#endif
|
||||
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
flags = cpu_disable_interrupt_save();
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(lock);
|
||||
__ihk_mc_spinlock_lock_noirq(lock);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static void ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_unlock_noirq(l) { \
|
||||
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__ihk_mc_spinlock_unlock_noirq(l); \
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
|
||||
#endif
|
||||
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_unlock(l, f) { \
|
||||
__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__ihk_mc_spinlock_unlock((l), (f)); \
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
|
||||
#endif
|
||||
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(lock);
|
||||
__ihk_mc_spinlock_unlock_noirq(lock);
|
||||
|
||||
cpu_restore_interrupt(flags);
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
__kprintf("[%d] released lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
|
||||
}
|
||||
|
||||
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
|
||||
typedef struct mcs_lock_node {
|
||||
unsigned long locked;
|
||||
struct mcs_lock_node *next;
|
||||
} __attribute__((aligned(64))) mcs_lock_node_t;
|
||||
|
||||
static void mcs_lock_init(struct mcs_lock_node *node)
|
||||
{
|
||||
node->locked = 0;
|
||||
node->next = NULL;
|
||||
}
|
||||
|
||||
static void mcs_lock_lock(struct mcs_lock_node *lock,
|
||||
struct mcs_lock_node *node)
|
||||
{
|
||||
struct mcs_lock_node *pred;
|
||||
|
||||
node->next = NULL;
|
||||
node->locked = 0;
|
||||
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
|
||||
(unsigned long)node);
|
||||
|
||||
if (pred) {
|
||||
node->locked = 1;
|
||||
pred->next = node;
|
||||
while (node->locked != 0) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mcs_lock_unlock(struct mcs_lock_node *lock,
|
||||
struct mcs_lock_node *node)
|
||||
{
|
||||
if (node->next == NULL) {
|
||||
struct mcs_lock_node *old = (struct mcs_lock_node *)
|
||||
atomic_cmpxchg8((unsigned long *)&lock->next,
|
||||
(unsigned long)node, (unsigned long)0);
|
||||
|
||||
if (old == node) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (node->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
node->next->locked = 0;
|
||||
}
|
||||
|
||||
// reader/writer lock
|
||||
typedef struct mcs_rwlock_node {
|
||||
ihk_atomic_t count; // num of readers (use only common reader)
|
||||
char type; // lock type
|
||||
#define MCS_RWLOCK_TYPE_COMMON_READER 0
|
||||
#define MCS_RWLOCK_TYPE_READER 1
|
||||
#define MCS_RWLOCK_TYPE_WRITER 2
|
||||
char locked; // lock
|
||||
#define MCS_RWLOCK_LOCKED 1
|
||||
#define MCS_RWLOCK_UNLOCKED 0
|
||||
char dmy1; // unused
|
||||
char dmy2; // unused
|
||||
struct mcs_rwlock_node *next;
|
||||
} __attribute__((aligned(64))) mcs_rwlock_node_t;
|
||||
|
||||
typedef struct mcs_rwlock_node_irqsave {
|
||||
struct mcs_rwlock_node node;
|
||||
unsigned long irqsave;
|
||||
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
|
||||
|
||||
typedef struct mcs_rwlock_lock {
|
||||
struct mcs_rwlock_node reader; /* common reader lock */
|
||||
struct mcs_rwlock_node *node; /* base */
|
||||
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
|
||||
|
||||
static void
|
||||
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
|
||||
{
|
||||
ihk_atomic_set(&lock->reader.count, 0);
|
||||
lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER;
|
||||
lock->node = NULL;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_lock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_lock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
struct mcs_rwlock_node *pred;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
node->type = MCS_RWLOCK_TYPE_WRITER;
|
||||
node->next = NULL;
|
||||
|
||||
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
|
||||
(unsigned long)node);
|
||||
|
||||
if (pred) {
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
pred->next = node;
|
||||
while (node->locked != MCS_RWLOCK_UNLOCKED) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
|
||||
{
|
||||
struct mcs_rwlock_node *p;
|
||||
struct mcs_rwlock_node *f = NULL;
|
||||
struct mcs_rwlock_node *n;
|
||||
|
||||
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
|
||||
for(p = &lock->reader; p->next; p = n){
|
||||
n = p->next;
|
||||
if(p->next->type == MCS_RWLOCK_TYPE_READER){
|
||||
p->next = n->next;
|
||||
if(lock->node == n){
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
(unsigned long *)&lock->node,
|
||||
(unsigned long)n,
|
||||
(unsigned long)p);
|
||||
|
||||
if(old != n){ // couldn't change
|
||||
while (n->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
p->next = n->next;
|
||||
}
|
||||
}
|
||||
else if(p->next == NULL){
|
||||
while (n->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
p->next = n->next;
|
||||
}
|
||||
if(f){
|
||||
ihk_atomic_inc(&lock->reader.count);
|
||||
n->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
else
|
||||
f = n;
|
||||
n = p;
|
||||
}
|
||||
if(n->next == NULL && lock->node != n){
|
||||
while (n->next == NULL && lock->node != n) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_unlock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_unlock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
if (node->next == NULL) {
|
||||
struct mcs_rwlock_node *old = (struct mcs_rwlock_node *)
|
||||
atomic_cmpxchg8((unsigned long *)&lock->node,
|
||||
(unsigned long)node, (unsigned long)0);
|
||||
|
||||
if (old == node) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (node->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
if(node->next->type == MCS_RWLOCK_TYPE_READER){
|
||||
lock->reader.next = node->next;
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
}
|
||||
else{
|
||||
node->next->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_lock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_lock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
struct mcs_rwlock_node *pred;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
node->type = MCS_RWLOCK_TYPE_READER;
|
||||
node->next = NULL;
|
||||
node->dmy1 = ihk_mc_get_processor_id();
|
||||
|
||||
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
|
||||
(unsigned long)node);
|
||||
|
||||
if (pred) {
|
||||
if(pred == &lock->reader){
|
||||
if(ihk_atomic_inc_return(&pred->count) != 1){
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
(unsigned long *)&lock->node,
|
||||
(unsigned long)node,
|
||||
(unsigned long)pred);
|
||||
|
||||
if (old == node) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (node->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
pred->next = node->next;
|
||||
if(node->next->type == MCS_RWLOCK_TYPE_READER)
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
goto out;
|
||||
}
|
||||
ihk_atomic_dec(&pred->count);
|
||||
}
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
pred->next = node;
|
||||
while (node->locked != MCS_RWLOCK_UNLOCKED) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
else {
|
||||
lock->reader.next = node;
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
}
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_unlock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_unlock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
if(ihk_atomic_dec_return(&lock->reader.count))
|
||||
goto out;
|
||||
|
||||
if (lock->reader.next == NULL) {
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
(unsigned long *)&lock->node,
|
||||
(unsigned long)&lock->reader,
|
||||
(unsigned long)0);
|
||||
|
||||
if (old == &lock->reader) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (lock->reader.next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
}
|
||||
else{
|
||||
lock->reader.next->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_lock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_lock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
node->irqsave = cpu_disable_interrupt_save();
|
||||
__mcs_rwlock_writer_lock_noirq(lock, &node->node);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_unlock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_unlock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
__mcs_rwlock_writer_unlock_noirq(lock, &node->node);
|
||||
cpu_restore_interrupt(node->irqsave);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_lock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_lock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
node->irqsave = cpu_disable_interrupt_save();
|
||||
__mcs_rwlock_reader_lock_noirq(lock, &node->node);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_unlock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_unlock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
__mcs_rwlock_reader_unlock_noirq(lock, &node->node);
|
||||
cpu_restore_interrupt(node->irqsave);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
* Define and declare memory management macros and functions
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -117,6 +119,25 @@
|
||||
#define PTE_NULL ((pte_t)0)
|
||||
typedef unsigned long pte_t;
|
||||
|
||||
/*
|
||||
* pagemap kernel ABI bits
|
||||
*/
|
||||
#define PM_ENTRY_BYTES sizeof(uint64_t)
|
||||
#define PM_STATUS_BITS 3
|
||||
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
|
||||
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
|
||||
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
|
||||
#define PM_PSHIFT_BITS 6
|
||||
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
|
||||
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
|
||||
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
|
||||
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
|
||||
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
|
||||
|
||||
#define PM_PRESENT PM_STATUS(4LL)
|
||||
#define PM_SWAP PM_STATUS(2LL)
|
||||
|
||||
|
||||
/* For easy conversion, it is better to be the same as architecture's ones */
|
||||
enum ihk_mc_pt_attribute {
|
||||
PTATTR_ACTIVE = 0x01,
|
||||
@@ -128,6 +149,7 @@ enum ihk_mc_pt_attribute {
|
||||
PTATTR_NO_EXECUTE = 0x8000000000000000,
|
||||
PTATTR_UNCACHABLE = 0x10000,
|
||||
PTATTR_FOR_USER = 0x20000,
|
||||
PTATTR_WRITE_COMBINED = 0x40000,
|
||||
};
|
||||
|
||||
static inline int pte_is_null(pte_t *ptep)
|
||||
@@ -185,6 +207,12 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
|
||||
return (off_t)(*ptep & PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
*ptep = PTE_NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_make_fileoff(off_t off,
|
||||
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
|
||||
{
|
||||
@@ -216,6 +244,36 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
|
||||
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
|
||||
#endif
|
||||
|
||||
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
struct page_table;
|
||||
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
|
||||
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
|
||||
@@ -227,8 +285,9 @@ void flush_tlb_single(unsigned long addr);
|
||||
|
||||
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
|
||||
|
||||
#define AP_TRAMPOLINE 0x10000
|
||||
#define AP_TRAMPOLINE_SIZE 0x4000
|
||||
extern unsigned long ap_trampoline;
|
||||
//#define AP_TRAMPOLINE 0x10000
|
||||
#define AP_TRAMPOLINE_SIZE 0x2000
|
||||
|
||||
/* Local is cachable */
|
||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)
|
||||
|
||||
28
arch/x86/kernel/include/arch/cpu.h
Normal file
28
arch/x86/kernel/include/arch/cpu.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* \file cpu.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Declare architecture-dependent types and functions to control CPU.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef ARCH_CPU_H
|
||||
#define ARCH_CPU_H
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
|
||||
static inline void rmb(void)
|
||||
{
|
||||
barrier();
|
||||
}
|
||||
|
||||
static inline void wmb(void)
|
||||
{
|
||||
barrier();
|
||||
}
|
||||
|
||||
#endif /* ARCH_CPU_H */
|
||||
36
arch/x86/kernel/include/arch/mman.h
Normal file
36
arch/x86/kernel/include/arch/mman.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/**
|
||||
* \file mman.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* memory management declarations
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_ARCH_MMAN_H
|
||||
#define HEADER_ARCH_MMAN_H
|
||||
|
||||
/*
|
||||
* mapping flags
|
||||
*/
|
||||
#define MAP_32BIT 0x40
|
||||
#define MAP_GROWSDOWN 0x0100
|
||||
#define MAP_DENYWRITE 0x0800
|
||||
#define MAP_EXECUTABLE 0x1000
|
||||
#define MAP_LOCKED 0x2000
|
||||
#define MAP_NORESERVE 0x4000
|
||||
#define MAP_POPULATE 0x8000
|
||||
#define MAP_NONBLOCK 0x00010000
|
||||
#define MAP_STACK 0x00020000
|
||||
#define MAP_HUGETLB 0x00040000
|
||||
|
||||
/*
|
||||
* for mlockall()
|
||||
*/
|
||||
#define MCL_CURRENT 0x01
|
||||
#define MCL_FUTURE 0x02
|
||||
|
||||
#endif /* HEADER_ARCH_MMAN_H */
|
||||
40
arch/x86/kernel/include/arch/shm.h
Normal file
40
arch/x86/kernel/include/arch/shm.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* \file shm.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* header file for System V shared memory
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_ARCH_SHM_H
|
||||
#define HEADER_ARCH_SHM_H
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t cuid;
|
||||
gid_t cgid;
|
||||
uint16_t mode;
|
||||
uint8_t padding[2];
|
||||
uint16_t seq;
|
||||
uint8_t padding2[22];
|
||||
};
|
||||
|
||||
struct shmid_ds {
|
||||
struct ipc_perm shm_perm;
|
||||
size_t shm_segsz;
|
||||
time_t shm_atime;
|
||||
time_t shm_dtime;
|
||||
time_t shm_ctime;
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
uint64_t shm_nattch;
|
||||
uint8_t padding[16];
|
||||
};
|
||||
|
||||
#endif /* HEADER_ARCH_SHM_H */
|
||||
@@ -42,7 +42,10 @@ struct x86_cpu_local_variables {
|
||||
uint64_t gdt[10];
|
||||
/* 128 */
|
||||
struct tss64 tss;
|
||||
|
||||
/* 232 */
|
||||
unsigned long paniced;
|
||||
uint64_t panic_regs[21];
|
||||
/* 408 */
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
|
||||
|
||||
@@ -13,6 +13,10 @@
|
||||
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
|
||||
#define HEADER_X86_COMMON_IHK_ATOMIC_H
|
||||
|
||||
/***********************************************************************
|
||||
* ihk_atomic_t
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int counter;
|
||||
} ihk_atomic_t;
|
||||
@@ -95,6 +99,30 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
|
||||
#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v))
|
||||
#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v))
|
||||
|
||||
/***********************************************************************
|
||||
* ihk_atomic64_t
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
long counter64;
|
||||
} ihk_atomic64_t;
|
||||
|
||||
#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) }
|
||||
|
||||
static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
|
||||
{
|
||||
return *(volatile long *)&(v)->counter64;
|
||||
}
|
||||
|
||||
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
|
||||
{
|
||||
asm volatile ("lock incq %0" : "+m"(v->counter64));
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* others
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
|
||||
* Note 2: xchg has side effect, so that attribute volatile is necessary,
|
||||
@@ -112,6 +140,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
|
||||
__x; \
|
||||
})
|
||||
|
||||
static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
|
||||
{
|
||||
unsigned long __x = (x);
|
||||
asm volatile("xchgq %0,%1"
|
||||
: "=r" (__x)
|
||||
: "m" (*(volatile unsigned long*)(ptr)), "0" (__x)
|
||||
: "memory");
|
||||
|
||||
return __x;
|
||||
}
|
||||
|
||||
#define __xchg(x, ptr, size) \
|
||||
({ \
|
||||
__typeof(*(ptr)) __x = (x); \
|
||||
@@ -150,5 +189,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
|
||||
#define xchg(ptr, v) \
|
||||
__xchg((v), (ptr), sizeof(*ptr))
|
||||
|
||||
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
|
||||
unsigned long oldval,
|
||||
unsigned long newval)
|
||||
{
|
||||
asm volatile("lock; cmpxchgq %2, %1\n"
|
||||
: "=a" (oldval), "+m" (*addr)
|
||||
: "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,19 +22,35 @@ struct x86_kregs {
|
||||
};
|
||||
|
||||
typedef struct x86_kregs ihk_mc_kernel_context_t;
|
||||
|
||||
/* XXX: User context should contain floating point registers */
|
||||
typedef struct x86_regs ihk_mc_user_context_t;
|
||||
struct x86_user_context {
|
||||
struct x86_sregs sr;
|
||||
|
||||
#define ihk_mc_syscall_arg0(uc) (uc)->rdi
|
||||
#define ihk_mc_syscall_arg1(uc) (uc)->rsi
|
||||
#define ihk_mc_syscall_arg2(uc) (uc)->rdx
|
||||
#define ihk_mc_syscall_arg3(uc) (uc)->r10
|
||||
#define ihk_mc_syscall_arg4(uc) (uc)->r8
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->r9
|
||||
/* 16-byte boundary here */
|
||||
uint8_t is_gpr_valid;
|
||||
uint8_t is_sr_valid;
|
||||
uint8_t spare_flags6;
|
||||
uint8_t spare_flags5;
|
||||
uint8_t spare_flags4;
|
||||
uint8_t spare_flags3;
|
||||
uint8_t spare_flags2;
|
||||
uint8_t spare_flags1;
|
||||
struct x86_basic_regs gpr; /* must be last */
|
||||
/* 16-byte boundary here */
|
||||
};
|
||||
typedef struct x86_user_context ihk_mc_user_context_t;
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->rax
|
||||
#define ihk_mc_syscall_arg0(uc) (uc)->gpr.rdi
|
||||
#define ihk_mc_syscall_arg1(uc) (uc)->gpr.rsi
|
||||
#define ihk_mc_syscall_arg2(uc) (uc)->gpr.rdx
|
||||
#define ihk_mc_syscall_arg3(uc) (uc)->gpr.r10
|
||||
#define ihk_mc_syscall_arg4(uc) (uc)->gpr.r8
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->rsp
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
* Machine Specific Registers (MSR)
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -16,7 +18,31 @@
|
||||
|
||||
#include <types.h>
|
||||
|
||||
#define RFLAGS_CF (1 << 0)
|
||||
#define RFLAGS_PF (1 << 2)
|
||||
#define RFLAGS_AF (1 << 4)
|
||||
#define RFLAGS_ZF (1 << 6)
|
||||
#define RFLAGS_SF (1 << 7)
|
||||
#define RFLAGS_TF (1 << 8)
|
||||
#define RFLAGS_IF (1 << 9)
|
||||
#define RFLAGS_DF (1 << 10)
|
||||
#define RFLAGS_OF (1 << 11)
|
||||
#define RFLAGS_IOPL (3 << 12)
|
||||
#define RFLAGS_NT (1 << 14)
|
||||
#define RFLAGS_RF (1 << 16)
|
||||
#define RFLAGS_VM (1 << 17)
|
||||
#define RFLAGS_AC (1 << 18)
|
||||
#define RFLAGS_VIF (1 << 19)
|
||||
#define RFLAGS_VIP (1 << 20)
|
||||
#define RFLAGS_ID (1 << 21)
|
||||
|
||||
#define DB6_B0 (1 << 0)
|
||||
#define DB6_B1 (1 << 1)
|
||||
#define DB6_B2 (1 << 2)
|
||||
#define DB6_B3 (1 << 3)
|
||||
#define DB6_BD (1 << 13)
|
||||
#define DB6_BS (1 << 14)
|
||||
#define DB6_BT (1 << 15)
|
||||
|
||||
#define MSR_EFER 0xc0000080
|
||||
#define MSR_STAR 0xc0000081
|
||||
@@ -26,6 +52,14 @@
|
||||
#define MSR_GS_BASE 0xc0000101
|
||||
|
||||
#define MSR_IA32_APIC_BASE 0x000000001b
|
||||
#define MSR_PLATFORM_INFO 0x000000ce
|
||||
#define MSR_IA32_PERF_CTL 0x00000199
|
||||
#define MSR_IA32_MISC_ENABLE 0x000001a0
|
||||
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
|
||||
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
|
||||
#define MSR_IA32_CR_PAT 0x00000277
|
||||
#define MSR_IA32_XSS 0xda0
|
||||
|
||||
|
||||
#define CVAL(event, mask) \
|
||||
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
|
||||
@@ -37,6 +71,25 @@
|
||||
#define MSR_PERF_CTL_0 0xc0010000
|
||||
#define MSR_PERF_CTR_0 0xc0010004
|
||||
|
||||
static unsigned long xgetbv(unsigned int index)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (index));
|
||||
|
||||
return low | ((unsigned long)high << 32);
|
||||
}
|
||||
|
||||
static void xsetbv(unsigned int index, unsigned long val)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
low = val;
|
||||
high = val >> 32;
|
||||
|
||||
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
|
||||
}
|
||||
|
||||
static void wrmsr(unsigned int idx, unsigned long value){
|
||||
unsigned int high, low;
|
||||
|
||||
@@ -135,10 +188,19 @@ struct tss64 {
|
||||
unsigned short iomap_address;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_regs {
|
||||
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
|
||||
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
|
||||
unsigned long error, rip, cs, rflags, rsp, ss;
|
||||
struct x86_basic_regs {
|
||||
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
|
||||
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
|
||||
unsigned long rip, cs, rflags, rsp, ss;
|
||||
};
|
||||
|
||||
struct x86_sregs {
|
||||
unsigned long fs_base;
|
||||
unsigned long gs_base;
|
||||
unsigned long ds;
|
||||
unsigned long es;
|
||||
unsigned long fs;
|
||||
unsigned long gs;
|
||||
};
|
||||
|
||||
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
|
||||
@@ -162,7 +224,72 @@ enum x86_pf_error_code {
|
||||
PF_RSVD = 1 << 3,
|
||||
PF_INSTR = 1 << 4,
|
||||
|
||||
PF_PATCH = 1 << 29,
|
||||
PF_POPULATE = 1 << 30,
|
||||
};
|
||||
|
||||
struct i387_fxsave_struct {
|
||||
unsigned short cwd;
|
||||
unsigned short swd;
|
||||
unsigned short twd;
|
||||
unsigned short fop;
|
||||
union {
|
||||
struct {
|
||||
unsigned long rip;
|
||||
unsigned long rdp;
|
||||
};
|
||||
struct {
|
||||
unsigned int fip;
|
||||
unsigned int fcs;
|
||||
unsigned int foo;
|
||||
unsigned int fos;
|
||||
};
|
||||
};
|
||||
unsigned int mxcsr;
|
||||
unsigned int mxcsr_mask;
|
||||
unsigned int st_space[32];
|
||||
unsigned int xmm_space[64];
|
||||
unsigned int padding[12];
|
||||
union {
|
||||
unsigned int padding1[12];
|
||||
unsigned int sw_reserved[12];
|
||||
};
|
||||
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
struct ymmh_struct {
|
||||
unsigned int ymmh_space[64];
|
||||
};
|
||||
|
||||
struct lwp_struct {
|
||||
unsigned char reserved[128];
|
||||
};
|
||||
|
||||
struct bndreg {
|
||||
unsigned long lower_bound;
|
||||
unsigned long upper_bound;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bndcsr {
|
||||
unsigned long bndcfgu;
|
||||
unsigned long bndstatus;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct xsave_hdr_struct {
|
||||
unsigned long xstate_bv;
|
||||
unsigned long xcomp_bv;
|
||||
unsigned long reserved[6];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct xsave_struct {
|
||||
struct i387_fxsave_struct i387;
|
||||
struct xsave_hdr_struct xsave_hdr;
|
||||
struct ymmh_struct ymmh;
|
||||
struct lwp_struct lwp;
|
||||
struct bndreg bndreg[4];
|
||||
struct bndcsr bndcsr;
|
||||
} __attribute__ ((packed, aligned (64)));
|
||||
|
||||
typedef struct xsave_struct fp_regs_struct;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -23,9 +23,10 @@
|
||||
SYSCALL_DELEGATED(0, read)
|
||||
SYSCALL_DELEGATED(1, write)
|
||||
SYSCALL_DELEGATED(2, open)
|
||||
SYSCALL_DELEGATED(3, close)
|
||||
SYSCALL_HANDLED(3, close)
|
||||
SYSCALL_DELEGATED(4, stat)
|
||||
SYSCALL_DELEGATED(5, fstat)
|
||||
SYSCALL_DELEGATED(7, poll)
|
||||
SYSCALL_DELEGATED(8, lseek)
|
||||
SYSCALL_HANDLED(9, mmap)
|
||||
SYSCALL_HANDLED(10, mprotect)
|
||||
@@ -39,10 +40,17 @@ SYSCALL_DELEGATED(17, pread64)
|
||||
SYSCALL_DELEGATED(18, pwrite64)
|
||||
SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_DELEGATED(23, select)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
SYSCALL_HANDLED(25, mremap)
|
||||
SYSCALL_HANDLED(26, msync)
|
||||
SYSCALL_HANDLED(27, mincore)
|
||||
SYSCALL_HANDLED(28, madvise)
|
||||
SYSCALL_HANDLED(29, shmget)
|
||||
SYSCALL_HANDLED(30, shmat)
|
||||
SYSCALL_HANDLED(31, shmctl)
|
||||
SYSCALL_HANDLED(34, pause)
|
||||
SYSCALL_HANDLED(35, nanosleep)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
SYSCALL_HANDLED(56, clone)
|
||||
SYSCALL_DELEGATED(57, fork)
|
||||
@@ -52,43 +60,84 @@ SYSCALL_HANDLED(60, exit)
|
||||
SYSCALL_HANDLED(61, wait4)
|
||||
SYSCALL_HANDLED(62, kill)
|
||||
SYSCALL_DELEGATED(63, uname)
|
||||
SYSCALL_DELEGATED(65, semop)
|
||||
SYSCALL_HANDLED(67, shmdt)
|
||||
SYSCALL_DELEGATED(69, msgsnd)
|
||||
SYSCALL_DELEGATED(70, msgrcv)
|
||||
SYSCALL_DELEGATED(72, fcntl)
|
||||
SYSCALL_DELEGATED(79, getcwd)
|
||||
SYSCALL_DELEGATED(89, readlink)
|
||||
SYSCALL_DELEGATED(96, gettimeofday)
|
||||
SYSCALL_HANDLED(96, gettimeofday)
|
||||
SYSCALL_HANDLED(97, getrlimit)
|
||||
SYSCALL_HANDLED(101, ptrace)
|
||||
SYSCALL_DELEGATED(102, getuid)
|
||||
SYSCALL_DELEGATED(104, getgid)
|
||||
SYSCALL_DELEGATED(107, geteuid)
|
||||
SYSCALL_DELEGATED(108, getegid)
|
||||
SYSCALL_HANDLED(102, getuid)
|
||||
SYSCALL_HANDLED(104, getgid)
|
||||
SYSCALL_HANDLED(105, setuid)
|
||||
SYSCALL_HANDLED(106, setgid)
|
||||
SYSCALL_HANDLED(107, geteuid)
|
||||
SYSCALL_HANDLED(108, getegid)
|
||||
SYSCALL_HANDLED(109, setpgid)
|
||||
SYSCALL_DELEGATED(110, getppid)
|
||||
SYSCALL_HANDLED(110, getppid)
|
||||
SYSCALL_DELEGATED(111, getpgrp)
|
||||
SYSCALL_HANDLED(113, setreuid)
|
||||
SYSCALL_HANDLED(114, setregid)
|
||||
SYSCALL_HANDLED(117, setresuid)
|
||||
SYSCALL_HANDLED(118, getresuid)
|
||||
SYSCALL_HANDLED(119, setresgid)
|
||||
SYSCALL_HANDLED(120, getresgid)
|
||||
SYSCALL_HANDLED(122, setfsuid)
|
||||
SYSCALL_HANDLED(123, setfsgid)
|
||||
SYSCALL_HANDLED(127, rt_sigpending)
|
||||
SYSCALL_HANDLED(128, rt_sigtimedwait)
|
||||
SYSCALL_HANDLED(129, rt_sigqueueinfo)
|
||||
SYSCALL_HANDLED(130, rt_sigsuspend)
|
||||
SYSCALL_HANDLED(131, sigaltstack)
|
||||
SYSCALL_HANDLED(142, sched_setparam)
|
||||
SYSCALL_HANDLED(143, sched_getparam)
|
||||
SYSCALL_HANDLED(144, sched_setscheduler)
|
||||
SYSCALL_HANDLED(145, sched_getscheduler)
|
||||
SYSCALL_HANDLED(146, sched_get_priority_max)
|
||||
SYSCALL_HANDLED(147, sched_get_priority_min)
|
||||
SYSCALL_HANDLED(148, sched_rr_get_interval)
|
||||
SYSCALL_HANDLED(149, mlock)
|
||||
SYSCALL_HANDLED(150, munlock)
|
||||
SYSCALL_HANDLED(151, mlockall)
|
||||
SYSCALL_HANDLED(152, munlockall)
|
||||
SYSCALL_HANDLED(158, arch_prctl)
|
||||
SYSCALL_HANDLED(160, setrlimit)
|
||||
SYSCALL_HANDLED(164, settimeofday)
|
||||
SYSCALL_HANDLED(186, gettid)
|
||||
SYSCALL_DELEGATED(201, time)
|
||||
SYSCALL_HANDLED(202, futex)
|
||||
SYSCALL_HANDLED(203, sched_setaffinity)
|
||||
SYSCALL_HANDLED(204, sched_getaffinity)
|
||||
SYSCALL_DELEGATED(208, io_getevents)
|
||||
SYSCALL_HANDLED(216, remap_file_pages)
|
||||
SYSCALL_DELEGATED(217, getdents64)
|
||||
SYSCALL_HANDLED(218, set_tid_address)
|
||||
SYSCALL_DELEGATED(220, semtimedop)
|
||||
SYSCALL_DELEGATED(230, clock_nanosleep)
|
||||
SYSCALL_HANDLED(231, exit_group)
|
||||
SYSCALL_DELEGATED(232, epoll_wait)
|
||||
SYSCALL_HANDLED(234, tgkill)
|
||||
SYSCALL_HANDLED(237, mbind)
|
||||
SYSCALL_HANDLED(238, set_mempolicy)
|
||||
SYSCALL_HANDLED(239, get_mempolicy)
|
||||
SYSCALL_HANDLED(247, waitid)
|
||||
SYSCALL_HANDLED(256, migrate_pages)
|
||||
SYSCALL_DELEGATED(270, pselect6)
|
||||
SYSCALL_DELEGATED(271, ppoll)
|
||||
SYSCALL_HANDLED(273, set_robust_list)
|
||||
SYSCALL_HANDLED(279, move_pages)
|
||||
SYSCALL_DELEGATED(281, epoll_pwait)
|
||||
SYSCALL_HANDLED(282, signalfd)
|
||||
SYSCALL_HANDLED(289, signalfd4)
|
||||
#ifdef DCFA_KMOD
|
||||
SYSCALL_HANDLED(303, mod_call)
|
||||
#endif
|
||||
SYSCALL_HANDLED(309, getcpu)
|
||||
SYSCALL_HANDLED(310, process_vm_readv)
|
||||
SYSCALL_HANDLED(311, process_vm_writev)
|
||||
SYSCALL_HANDLED(601, pmc_init)
|
||||
SYSCALL_HANDLED(602, pmc_start)
|
||||
SYSCALL_HANDLED(603, pmc_stop)
|
||||
|
||||
@@ -24,39 +24,56 @@
|
||||
#define USER_CS (48 + 3)
|
||||
#define USER_DS (56 + 3)
|
||||
|
||||
#define PUSH_ALL_REGS \
|
||||
pushq %rbp; \
|
||||
pushq %rax; \
|
||||
pushq %rbx; \
|
||||
pushq %rcx; \
|
||||
pushq %rdx; \
|
||||
pushq %rsi; \
|
||||
pushq %rdi; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15;
|
||||
#define POP_ALL_REGS \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rdi; \
|
||||
popq %rsi; \
|
||||
popq %rdx; \
|
||||
popq %rcx; \
|
||||
popq %rbx; \
|
||||
popq %rax; \
|
||||
popq %rbp
|
||||
|
||||
/* struct x86_user_context */
|
||||
#define X86_SREGS_BASE (0)
|
||||
#define X86_SREGS_SIZE 48
|
||||
|
||||
#define X86_FLAGS_BASE (X86_SREGS_BASE + X86_SREGS_SIZE)
|
||||
#define X86_FLAGS_SIZE 8
|
||||
|
||||
#define X86_REGS_BASE (X86_FLAGS_BASE + X86_FLAGS_SIZE)
|
||||
#define RAX_OFFSET (X86_REGS_BASE + 80)
|
||||
#define ERROR_OFFSET (X86_REGS_BASE + 120)
|
||||
#define RSP_OFFSET (X86_REGS_BASE + 152)
|
||||
|
||||
#define PUSH_ALL_REGS \
|
||||
pushq %rdi; \
|
||||
pushq %rsi; \
|
||||
pushq %rdx; \
|
||||
pushq %rcx; \
|
||||
pushq %rax; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11; \
|
||||
pushq %rbx; \
|
||||
pushq %rbp; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15; \
|
||||
pushq $1; /* is_gpr_valid is set, and others are cleared */ \
|
||||
subq $X86_FLAGS_BASE,%rsp /* for x86_sregs, etc. */
|
||||
|
||||
#define POP_ALL_REGS \
|
||||
movq $0,X86_FLAGS_BASE(%rsp); /* clear all flags */ \
|
||||
addq $X86_REGS_BASE,%rsp; /* discard x86_sregs, flags, etc. */ \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %rbp; \
|
||||
popq %rbx; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rax; \
|
||||
popq %rcx; \
|
||||
popq %rdx; \
|
||||
popq %rsi; \
|
||||
popq %rdi
|
||||
|
||||
.data
|
||||
.globl generic_common_handlers
|
||||
generic_common_handlers:
|
||||
@@ -75,7 +92,7 @@ vector=vector+1
|
||||
|
||||
common_interrupt:
|
||||
PUSH_ALL_REGS
|
||||
movq 120(%rsp), %rdi
|
||||
movq ERROR_OFFSET(%rsp), %rdi
|
||||
movq %rsp, %rsi
|
||||
call handle_interrupt /* Enter C code */
|
||||
POP_ALL_REGS
|
||||
@@ -91,7 +108,7 @@ page_fault:
|
||||
cld
|
||||
PUSH_ALL_REGS
|
||||
movq %cr2, %rdi
|
||||
movq 120(%rsp),%rsi
|
||||
movq ERROR_OFFSET(%rsp),%rsi
|
||||
movq %rsp,%rdx
|
||||
movq __page_fault_handler_address(%rip), %rax
|
||||
andq %rax, %rax
|
||||
@@ -113,10 +130,53 @@ general_protection_exception:
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl nmi
|
||||
nmi:
|
||||
#define PANICED 232
|
||||
#define PANIC_REGS 240
|
||||
movq %rax,%gs:PANIC_REGS+0x00
|
||||
movq %rbx,%gs:PANIC_REGS+0x08
|
||||
movq %rcx,%gs:PANIC_REGS+0x10
|
||||
movq %rdx,%gs:PANIC_REGS+0x18
|
||||
movq %rsi,%gs:PANIC_REGS+0x20
|
||||
movq %rdi,%gs:PANIC_REGS+0x28
|
||||
movq %rbp,%gs:PANIC_REGS+0x30
|
||||
movq 0x18(%rsp),%rax /* rsp */
|
||||
movq %rax,%gs:PANIC_REGS+0x38
|
||||
movq %r8, %gs:PANIC_REGS+0x40
|
||||
movq %r9, %gs:PANIC_REGS+0x48
|
||||
movq %r10,%gs:PANIC_REGS+0x50
|
||||
movq %r11,%gs:PANIC_REGS+0x58
|
||||
movq %r12,%gs:PANIC_REGS+0x60
|
||||
movq %r13,%gs:PANIC_REGS+0x68
|
||||
movq %r14,%gs:PANIC_REGS+0x70
|
||||
movq %r15,%gs:PANIC_REGS+0x78
|
||||
movq 0x00(%rsp),%rax /* rip */
|
||||
movq %rax,%gs:PANIC_REGS+0x80
|
||||
movq 0x10(%rsp),%rax /* rflags */
|
||||
movl %eax,%gs:PANIC_REGS+0x88
|
||||
movq 0x08(%rsp),%rax /* cs */
|
||||
movl %eax,%gs:PANIC_REGS+0x8C
|
||||
movq 0x20(%rsp),%rax /* ss */
|
||||
movl %eax,%gs:PANIC_REGS+0x90
|
||||
xorq %rax,%rax
|
||||
movw %ds,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x94
|
||||
movw %es,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x98
|
||||
movw %fs,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x9C
|
||||
movw %gs,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0xA0
|
||||
movq $1,%gs:PANICED
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
|
||||
.globl x86_syscall
|
||||
x86_syscall:
|
||||
cld
|
||||
movq %rsp, %gs:24
|
||||
movq %rsp, %gs:X86_CPU_LOCAL_OFFSET_USTACK
|
||||
movq %gs:(X86_CPU_LOCAL_OFFSET_SP0), %rsp
|
||||
|
||||
pushq $(USER_DS)
|
||||
@@ -124,21 +184,19 @@ x86_syscall:
|
||||
pushq %r11
|
||||
pushq $(USER_CS)
|
||||
pushq %rcx
|
||||
pushq $0
|
||||
movq %gs:24, %rcx
|
||||
movq %rcx, 32(%rsp)
|
||||
pushq %rax /* error code (= system call number) */
|
||||
PUSH_ALL_REGS
|
||||
movq 104(%rsp), %rdi
|
||||
movq %gs:X86_CPU_LOCAL_OFFSET_USTACK, %rcx
|
||||
movq %rcx, RSP_OFFSET(%rsp)
|
||||
movq RAX_OFFSET(%rsp), %rdi
|
||||
movw %ss, %ax
|
||||
movw %ax, %ds
|
||||
movq %rsp, %rsi
|
||||
callq *__x86_syscall_handler(%rip)
|
||||
1:
|
||||
movq %rax, 104(%rsp)
|
||||
movq %rax, RAX_OFFSET(%rsp)
|
||||
POP_ALL_REGS
|
||||
#ifdef USE_SYSRET
|
||||
movq 8(%rsp), %rcx
|
||||
movq 24(%rsp), %r11
|
||||
movq 32(%rsp), %rsp
|
||||
sysretq
|
||||
#else
|
||||
@@ -147,7 +205,33 @@ x86_syscall:
|
||||
#endif
|
||||
|
||||
.globl enter_user_mode
|
||||
enter_user_mode:
|
||||
enter_user_mode:
|
||||
callq release_runq_lock
|
||||
movq $0, %rdi
|
||||
movq %rsp, %rsi
|
||||
call check_signal
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl debug_exception
|
||||
debug_exception:
|
||||
cld
|
||||
pushq $0 /* error */
|
||||
PUSH_ALL_REGS
|
||||
movq %rsp, %rdi
|
||||
call debug_handler
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl int3_exception
|
||||
int3_exception:
|
||||
cld
|
||||
pushq $0 /* error */
|
||||
PUSH_ALL_REGS
|
||||
movq %rsp, %rdi
|
||||
call int3_handler
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
* resides in memory.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -19,13 +21,19 @@
|
||||
#include <registers.h>
|
||||
#include <string.h>
|
||||
|
||||
#define LOCALS_SPAN (4 * PAGE_SIZE)
|
||||
|
||||
struct x86_cpu_local_variables *locals;
|
||||
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
|
||||
|
||||
void init_processors_local(int max_id)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
size = LOCALS_SPAN * max_id;
|
||||
/* Is contiguous allocating adequate?? */
|
||||
locals = ihk_mc_alloc_pages(max_id, IHK_MC_AP_CRITICAL);
|
||||
memset(locals, 0, PAGE_SIZE * max_id);
|
||||
locals = ihk_mc_alloc_pages(size/PAGE_SIZE, IHK_MC_AP_CRITICAL);
|
||||
memset(locals, 0, size);
|
||||
|
||||
kprintf("locals = %p\n", locals);
|
||||
}
|
||||
@@ -33,12 +41,12 @@ void init_processors_local(int max_id)
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
|
||||
{
|
||||
return (struct x86_cpu_local_variables *)
|
||||
((char *)locals + (id << PAGE_SHIFT));
|
||||
((char *)locals + (LOCALS_SPAN * id));
|
||||
}
|
||||
|
||||
static void *get_x86_cpu_local_kstack(int id)
|
||||
{
|
||||
return ((char *)locals + ((id + 1) << PAGE_SHIFT));
|
||||
return ((char *)locals + (LOCALS_SPAN * (id + 1)));
|
||||
}
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_this_cpu_local(void)
|
||||
@@ -80,6 +88,15 @@ void assign_processor_id(void)
|
||||
v->processor_id = id;
|
||||
}
|
||||
|
||||
void init_boot_processor_local(void)
|
||||
{
|
||||
static struct x86_cpu_local_variables avar;
|
||||
|
||||
memset(&avar, -1, sizeof(avar));
|
||||
set_gs_base(&avar);
|
||||
return;
|
||||
}
|
||||
|
||||
/** IHK **/
|
||||
int ihk_mc_get_processor_id(void)
|
||||
{
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
* Acquire physical pages and manipulate page table entries.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@@ -20,8 +22,9 @@
|
||||
#include <list.h>
|
||||
#include <process.h>
|
||||
#include <page.h>
|
||||
#include <cls.h>
|
||||
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
static char *last_page;
|
||||
@@ -263,7 +266,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
|
||||
{
|
||||
if (attr & PTATTR_UNCACHABLE) {
|
||||
return (attr & ATTR_MASK) | PFL1_PCD | PFL1_PWT;
|
||||
} else {
|
||||
}
|
||||
else if (attr & PTATTR_WRITE_COMBINED) {
|
||||
return (attr & ATTR_MASK) | PFL1_PWT;
|
||||
}
|
||||
else {
|
||||
return (attr & ATTR_MASK);
|
||||
}
|
||||
}
|
||||
@@ -367,6 +374,7 @@ static int __set_pt_page(struct page_table *pt, void *virt, unsigned long phys,
|
||||
unsigned long init_pt_lock_flags;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
init_pt_lock_flags = 0; /* for avoidance of warning */
|
||||
if (in_kernel) {
|
||||
init_pt_lock_flags = ihk_mc_spinlock_lock(&init_pt_lock);
|
||||
}
|
||||
@@ -494,8 +502,52 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage)
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt)
|
||||
{
|
||||
int l4idx, l3idx, l2idx, l1idx;
|
||||
unsigned long v = (unsigned long)virt;
|
||||
uint64_t ret = 0;
|
||||
|
||||
if (!pt) {
|
||||
pt = init_pt;
|
||||
}
|
||||
|
||||
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
|
||||
|
||||
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
|
||||
|
||||
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
|
||||
|
||||
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
if ((pt->entry[l2idx] & PFL2_SIZE)) {
|
||||
|
||||
ret = PM_PFRAME(((pt->entry[l2idx] & LARGE_PAGE_MASK) +
|
||||
(v & (LARGE_PAGE_SIZE - 1))) >> PAGE_SHIFT);
|
||||
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
|
||||
return ret;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK);
|
||||
|
||||
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PM_PFRAME((pt->entry[l1idx] & PT_PHYSMASK) >> PAGE_SHIFT);
|
||||
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
|
||||
void *virt, unsigned long *phys)
|
||||
const void *virt, unsigned long *phys)
|
||||
{
|
||||
int l4idx, l3idx, l2idx, l1idx;
|
||||
unsigned long v = (unsigned long)virt;
|
||||
@@ -1824,7 +1876,8 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
|
||||
attr = common_vrflag_to_ptattr(flag, fault, ptep);
|
||||
|
||||
if ((fault & PF_PROT)
|
||||
|| ((fault & PF_POPULATE) && (flag & VR_PRIVATE))) {
|
||||
|| ((fault & (PF_POPULATE | PF_PATCH))
|
||||
&& (flag & VR_PRIVATE))) {
|
||||
attr |= PTATTR_DIRTY;
|
||||
}
|
||||
|
||||
@@ -2043,7 +2096,7 @@ void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end,
|
||||
/* Reserve Text + temporal heap */
|
||||
cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
|
||||
/* Reserve trampoline area to boot the second ap */
|
||||
cb(AP_TRAMPOLINE, AP_TRAMPOLINE + AP_TRAMPOLINE_SIZE, 0);
|
||||
cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
|
||||
/* Reserve the null page */
|
||||
cb(0, PAGE_SIZE, 0);
|
||||
/* Micro-arch specific */
|
||||
@@ -2072,9 +2125,9 @@ void *phys_to_virt(unsigned long p)
|
||||
return (void *)(p + MAP_ST_START);
|
||||
}
|
||||
|
||||
int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
int copy_from_user(void *dst, const void *src, size_t siz)
|
||||
{
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
struct vm_range *range;
|
||||
size_t pos;
|
||||
size_t wsiz;
|
||||
@@ -2101,9 +2154,62 @@ int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)
|
||||
{
|
||||
struct process_vm *vm = proc->vm;
|
||||
const uintptr_t ustart = (uintptr_t)usrc;
|
||||
const uintptr_t uend = ustart + siz;
|
||||
uint64_t reason;
|
||||
uintptr_t addr;
|
||||
int error;
|
||||
const void *from;
|
||||
void *to;
|
||||
size_t remain;
|
||||
size_t cpsize;
|
||||
unsigned long pa;
|
||||
void *va;
|
||||
|
||||
if ((ustart < vm->region.user_start)
|
||||
|| (vm->region.user_end <= ustart)
|
||||
|| ((vm->region.user_end - ustart) < siz)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
reason = PF_USER; /* page not present */
|
||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
from = usrc;
|
||||
to = kdst;
|
||||
remain = siz;
|
||||
while (remain > 0) {
|
||||
cpsize = PAGE_SIZE - ((uintptr_t)from & (PAGE_SIZE - 1));
|
||||
if (cpsize > remain) {
|
||||
cpsize = remain;
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(to, va, cpsize);
|
||||
|
||||
from += cpsize;
|
||||
to += cpsize;
|
||||
remain -= cpsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} /* read_process_vm() */
|
||||
|
||||
int copy_to_user(void *dst, const void *src, size_t siz)
|
||||
{
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
struct vm_range *range;
|
||||
size_t pos;
|
||||
size_t wsiz;
|
||||
@@ -2130,3 +2236,114 @@ int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
|
||||
memcpy(dst, src, siz);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
|
||||
{
|
||||
const uintptr_t ustart = (uintptr_t)udst;
|
||||
const uintptr_t uend = ustart + siz;
|
||||
uint64_t reason;
|
||||
uintptr_t addr;
|
||||
int error;
|
||||
const void *from;
|
||||
void *to;
|
||||
size_t remain;
|
||||
size_t cpsize;
|
||||
unsigned long pa;
|
||||
void *va;
|
||||
|
||||
if ((ustart < vm->region.user_start)
|
||||
|| (vm->region.user_end <= ustart)
|
||||
|| ((vm->region.user_end - ustart) < siz)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
from = ksrc;
|
||||
to = udst;
|
||||
remain = siz;
|
||||
while (remain > 0) {
|
||||
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
|
||||
if (cpsize > remain) {
|
||||
cpsize = remain;
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(va, from, cpsize);
|
||||
|
||||
from += cpsize;
|
||||
to += cpsize;
|
||||
remain -= cpsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} /* write_process_vm() */
|
||||
|
||||
int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
|
||||
{
|
||||
const uintptr_t ustart = (uintptr_t)udst;
|
||||
const uintptr_t uend = ustart + siz;
|
||||
uint64_t reason;
|
||||
uintptr_t addr;
|
||||
int error;
|
||||
const void *from;
|
||||
void *to;
|
||||
size_t remain;
|
||||
size_t cpsize;
|
||||
unsigned long pa;
|
||||
void *va;
|
||||
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx)\n", vm, udst, ksrc, siz);
|
||||
if ((ustart < vm->region.user_start)
|
||||
|| (vm->region.user_end <= ustart)
|
||||
|| ((vm->region.user_end - ustart) < siz)) {
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):not in user\n", vm, udst, ksrc, siz);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
reason = PF_PATCH | PF_WRITE | PF_USER;
|
||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||
if (error) {
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):pf(%lx):%d\n", vm, udst, ksrc, siz, addr, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
from = ksrc;
|
||||
to = udst;
|
||||
remain = siz;
|
||||
while (remain > 0) {
|
||||
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
|
||||
if (cpsize > remain) {
|
||||
cpsize = remain;
|
||||
}
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa);
|
||||
if (error) {
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(va, from, cpsize);
|
||||
|
||||
from += cpsize;
|
||||
to += cpsize;
|
||||
remain -= cpsize;
|
||||
}
|
||||
|
||||
kprintf("patch_process_vm(%p,%p,%p,%lx):%d\n", vm, udst, ksrc, siz, 0);
|
||||
return 0;
|
||||
} /* patch_process_vm() */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,8 @@
|
||||
* implements x86's vsyscall
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 Hitachi, Ltd.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -16,20 +18,93 @@
|
||||
*/
|
||||
|
||||
#include <syscall.h>
|
||||
#include <ihk/atomic.h>
|
||||
#include <arch/cpu.h>
|
||||
|
||||
extern int vsyscall_gettimeofday(void *tv, void *tz)
|
||||
extern int vsyscall_gettimeofday(struct timeval *tv, void *tz)
|
||||
__attribute__ ((section (".vsyscall.gettimeofday")));
|
||||
|
||||
int vsyscall_gettimeofday(void *tv, void *tz)
|
||||
struct tod_data_s tod_data
|
||||
__attribute__ ((section(".vsyscall.gettimeofday.data"))) = {
|
||||
.do_local = 0,
|
||||
.version = IHK_ATOMIC64_INIT(0),
|
||||
};
|
||||
|
||||
static inline void cpu_pause_for_vsyscall(void)
|
||||
{
|
||||
asm volatile ("pause" ::: "memory");
|
||||
return;
|
||||
} /* cpu_pause_for_vsyscall() */
|
||||
|
||||
static inline void calculate_time_from_tsc(struct timespec *ts)
|
||||
{
|
||||
long ver;
|
||||
unsigned long current_tsc;
|
||||
__time_t sec_delta;
|
||||
long ns_delta;
|
||||
|
||||
for (;;) {
|
||||
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
|
||||
/* settimeofday() is in progress */
|
||||
cpu_pause_for_vsyscall();
|
||||
}
|
||||
rmb();
|
||||
*ts = tod_data.origin;
|
||||
rmb();
|
||||
if (ver == ihk_atomic64_read(&tod_data.version)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* settimeofday() has intervened */
|
||||
cpu_pause_for_vsyscall();
|
||||
}
|
||||
|
||||
current_tsc = rdtsc();
|
||||
sec_delta = current_tsc / tod_data.clocks_per_sec;
|
||||
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
|
||||
/ tod_data.clocks_per_sec;
|
||||
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
|
||||
|
||||
ts->tv_sec += sec_delta;
|
||||
ts->tv_nsec += ns_delta;
|
||||
if (ts->tv_nsec >= NS_PER_SEC) {
|
||||
ts->tv_nsec -= NS_PER_SEC;
|
||||
++ts->tv_sec;
|
||||
}
|
||||
|
||||
return;
|
||||
} /* calculate_time_from_tsc() */
|
||||
|
||||
int vsyscall_gettimeofday(struct timeval *tv, void *tz)
|
||||
{
|
||||
int error;
|
||||
struct timespec ats;
|
||||
|
||||
if (!tv && !tz) {
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Do it locally if supported */
|
||||
if (!tz && tod_data.do_local) {
|
||||
calculate_time_from_tsc(&ats);
|
||||
|
||||
tv->tv_sec = ats.tv_sec;
|
||||
tv->tv_usec = ats.tv_nsec / 1000;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Otherwise syscall */
|
||||
asm ("syscall" : "=a" (error)
|
||||
: "a" (__NR_gettimeofday), "D" (tv), "S" (tz)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
if (error) {
|
||||
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
|
||||
}
|
||||
return error;
|
||||
}
|
||||
} /* vsyscall_gettimeofday() */
|
||||
|
||||
extern long vsyscall_time(void *tp)
|
||||
__attribute__ ((section (".vsyscall.time")));
|
||||
@@ -58,3 +133,17 @@ long vsyscall_time(void *tp)
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
extern int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
|
||||
__attribute__ ((section (".vsyscall.getcpu")));
|
||||
|
||||
int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
|
||||
{
|
||||
int error;
|
||||
|
||||
asm ("syscall" : "=a" (error)
|
||||
: "a" (__NR_getcpu), "D" (cpup), "S" (nodep), "d" (tcachep)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
46
arch/x86/tools/mcreboot-builtin-x86.sh.in
Executable file
46
arch/x86/tools/mcreboot-builtin-x86.sh.in
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash -x
|
||||
|
||||
# \file arch/x86/tools/mcreboot-builtin-x86.sh.in
|
||||
# License details are found in the file LICENSE.
|
||||
# \brief
|
||||
# mckernel boot script
|
||||
# \author Masamichi Takagi <masamichi.takagi@riken.jp> \par
|
||||
# Copyright (C) 2014 RIKEN AICS
|
||||
|
||||
# HISTORY:
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
kill -9 `pidof mcexec`
|
||||
if lsmod | grep mcctrl > /dev/null 2>&1; then
|
||||
rmmod mcctrl || exit 1
|
||||
fi
|
||||
if lsmod | grep dcfa > /dev/null 2>&1; then
|
||||
rmmod dcfa || exit 1
|
||||
fi
|
||||
if lsmod | grep ihk_builtin > /dev/null 2>&1; then
|
||||
rmmod ihk_builtin || exit 1
|
||||
fi
|
||||
if lsmod | grep ihk > /dev/null 2>&1; then
|
||||
rmmod ihk || exit 1
|
||||
fi
|
||||
insmod "$KMODDIR/ihk.ko" &&
|
||||
insmod "$KMODDIR/ihk_builtin.ko" &&
|
||||
"$SBINDIR/ihkconfig" 0 create &&
|
||||
NCORE=`dmesg | grep -E 'SHIMOS: CPU Status:'|awk '{split($0,a," "); for (i = 1; i <= length(a); i++) { if(a[i] ~ /2/) {count++}} print count;}'`
|
||||
MEM=`free -g | grep -E 'Mem:' | awk '{print int($2/4)}'`
|
||||
"$SBINDIR/ihkosctl" 0 alloc "$NCORE" "$MEM"g &&
|
||||
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img" &&
|
||||
"$SBINDIR/ihkosctl" 0 kargs hidos osnum=0 &&
|
||||
"$SBINDIR/ihkosctl" 0 boot &&
|
||||
sleep 1 &&
|
||||
"$SBINDIR/ihkosctl" 0 kmsg &&
|
||||
insmod "$KMODDIR/mcctrl.ko" &&
|
||||
sleep 1 &&
|
||||
"$SBINDIR/ihkosctl" 0 kmsg &&
|
||||
exit 0
|
||||
98
arch/x86/tools/mcreboot-smp-x86.sh.in
Normal file
98
arch/x86/tools/mcreboot-smp-x86.sh.in
Normal file
@@ -0,0 +1,98 @@
|
||||
#!/bin/bash
|
||||
|
||||
# IHK SMP-x86 example boot script.
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2014 RIKEN AICS
|
||||
#
|
||||
# This is an example script for loading IHK, configuring a partition and
|
||||
# booting McKernel on it.
|
||||
# The script reserves half of the CPU cores and 512MB of RAM from NUMA node 0
|
||||
# when IHK is loaded for the first time, otherwise it destroys the current
|
||||
# McKernel instance and reboots it using the same set of resources as it used
|
||||
# previously.
|
||||
# Note that the script does not output anything unless an error occurs.
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
mem="512M@0"
|
||||
cpus=""
|
||||
ihk_ikc_irq_core=0
|
||||
|
||||
|
||||
if [ "$cpus" == "" ]; then
|
||||
# Get the number of CPUs on NUMA node 0
|
||||
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
|
||||
|
||||
# Use the second half of the cores
|
||||
let nr_cpus="$nr_cpus / 2"
|
||||
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
|
||||
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
|
||||
fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
||||
fi
|
||||
|
||||
# Load IHK if not loaded
|
||||
if [ "`lsmod | grep ihk`" == "" ]; then
|
||||
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk"; exit; fi;
|
||||
fi
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
|
||||
ihk_irq=""
|
||||
for i in `seq 64 255`; do
|
||||
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
|
||||
ihk_irq=$i
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
|
||||
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86"; exit; fi;
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
||||
# If loaded, but no resources allocated, get CPUs and memory
|
||||
else
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus_allocated=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||
if [ "$cpus_allocated" == "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem_allocated=`${SBINDIR}/ihkosctl 0 query mem`
|
||||
if [ "$mem_allocated" == "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for existing OS instance and destroy
|
||||
if [ -c /dev/mcos0 ]; then
|
||||
# Query CPU cores and memory of OS instance so that the same values are used as previously
|
||||
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem=`${SBINDIR}/ihkosctl 0 query mem`
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed"; fi
|
||||
else
|
||||
# Otherwise query IHK-SMP for resources
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs hidos; then echo "error: setting kernel arguments"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
|
||||
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi
|
||||
if ! chown `logname` /dev/mcd* /dev/mcos*; then echo "error: chowning device files"; exit; fi
|
||||
16
arch/x86/tools/mcshutdown-builtin-x86.sh.in
Normal file
16
arch/x86/tools/mcshutdown-builtin-x86.sh.in
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
|
||||
# License details are found in the file LICENSE.
|
||||
# \brief
|
||||
# mckernel shutdown script
|
||||
#
|
||||
# \author McKernel Development Team
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
"$SBINDIR/ihkosctl" 0 shutdown
|
||||
47
arch/x86/tools/mcstop+release-smp-x86.sh.in
Normal file
47
arch/x86/tools/mcstop+release-smp-x86.sh.in
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
|
||||
# IHK SMP-x86 example McKernel unload script.
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2015 RIKEN AICS
|
||||
#
|
||||
# This is an example script for destroying McKernel and releasing IHK resources
|
||||
# Note that the script does no output anything unless an error occurs.
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
mem=""
|
||||
cpus=""
|
||||
|
||||
# No SMP module? Exit.
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit; fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
||||
fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed"; exit; fi
|
||||
done
|
||||
|
||||
# Query IHK-SMP resources and release them
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs"; exit; fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory"; exit; fi
|
||||
|
||||
# Remove SMP module
|
||||
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
|
||||
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86"; exit; fi
|
||||
fi
|
||||
|
||||
|
||||
26
configure.ac
26
configure.ac
@@ -24,7 +24,7 @@ AC_ARG_WITH([kernelsrc],
|
||||
|
||||
AC_ARG_WITH([target],
|
||||
AC_HELP_STRING(
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86}],[target, default is attached-mic]),
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]),
|
||||
[WITH_TARGET=$withval],[WITH_TARGET=yes])
|
||||
|
||||
AC_ARG_ENABLE([dcfa],
|
||||
@@ -111,6 +111,26 @@ case $WITH_TARGET in
|
||||
MANDIR="$prefix/attached/man"
|
||||
fi
|
||||
;;
|
||||
smp-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/smp-x86/kernel"
|
||||
fi
|
||||
if test "X$BINDIR" = X; then
|
||||
BINDIR="$prefix/bin"
|
||||
fi
|
||||
if test "X$SBINDIR" = X; then
|
||||
SBINDIR="$prefix/sbin"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-x86/man"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
|
||||
;;
|
||||
@@ -145,6 +165,10 @@ AC_CONFIG_FILES([
|
||||
kernel/Makefile.build
|
||||
arch/x86/tools/mcreboot-attached-mic.sh
|
||||
arch/x86/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot-smp-x86.sh
|
||||
arch/x86/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||
])
|
||||
|
||||
|
||||
@@ -38,6 +38,9 @@
|
||||
#define MCEXEC_UP_SEND_SIGNAL 0x30a02906
|
||||
#define MCEXEC_UP_GET_CPU 0x30a02907
|
||||
#define MCEXEC_UP_STRNCPY_FROM_USER 0x30a02908
|
||||
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
|
||||
#define MCEXEC_UP_GET_CRED 0x30a0290a
|
||||
#define MCEXEC_UP_GET_CREDV 0x30a0290b
|
||||
|
||||
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
|
||||
#define MCEXEC_UP_FREE_DMA 0x30a02911
|
||||
@@ -45,6 +48,8 @@
|
||||
#define MCEXEC_UP_OPEN_EXEC 0x30a02912
|
||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0
|
||||
#define MCEXEC_UP_TRANSFER_FROM_REMOTE 1
|
||||
|
||||
@@ -67,6 +72,7 @@ struct program_image_section {
|
||||
};
|
||||
|
||||
#define SHELL_PATH_MAX_LEN 1024
|
||||
#define MCK_RLIM_MAX 20
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
@@ -76,6 +82,7 @@ struct program_load_desc {
|
||||
int err;
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@@ -90,8 +97,7 @@ struct program_load_desc {
|
||||
unsigned long args_len;
|
||||
char *envs;
|
||||
unsigned long envs_len;
|
||||
unsigned long rlimit_stack_cur;
|
||||
unsigned long rlimit_stack_max;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
struct program_image_section sections[0];
|
||||
@@ -156,4 +162,8 @@ struct signal_desc {
|
||||
char info[128];
|
||||
};
|
||||
|
||||
struct newprocess_desc {
|
||||
int pid;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2,13 +2,14 @@ KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
src = @abs_srcdir@
|
||||
KMODDIR=@KMODDIR@
|
||||
BINDIR=@BINDIR@
|
||||
IHK_BASE=$(src)/../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../ihk/linux/core/Module.symvers
|
||||
|
||||
|
||||
284
executer/kernel/binfmt_mcexec.c
Normal file
284
executer/kernel/binfmt_mcexec.c
Normal file
@@ -0,0 +1,284 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/elfcore.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/version.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
static int pathcheck(const char *file, const char *list)
|
||||
{
|
||||
const char *p;
|
||||
const char *q;
|
||||
const char *r;
|
||||
int l;
|
||||
|
||||
if(!*list)
|
||||
return 1;
|
||||
p = list;
|
||||
do{
|
||||
q = strchr(p, ':');
|
||||
if(!q)
|
||||
q = strchr(p, '\0');
|
||||
for(r = q - 1; r >= p && *r == '/'; r--);
|
||||
l = r - p + 1;
|
||||
|
||||
if(!strncmp(file, p, l) &&
|
||||
file[l] == '/')
|
||||
return 1;
|
||||
|
||||
p = q + 1;
|
||||
} while(*q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_elf(struct linux_binprm *bprm
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
, struct pt_regs *regs
|
||||
#endif
|
||||
)
|
||||
{
|
||||
char mcexec[BINPRM_BUF_SIZE];
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
|
||||
const
|
||||
#endif
|
||||
char *wp;
|
||||
char *cp;
|
||||
struct file *file;
|
||||
int rc;
|
||||
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *val;
|
||||
int l;
|
||||
} envdata;
|
||||
envdata env[] = {
|
||||
{.name = "MCEXEC"},
|
||||
#define env_mcexec (env[0].val)
|
||||
{.name = "MCEXEC_WL"},
|
||||
#define env_mcexec_wl (env[1].val)
|
||||
{.name = "MCEXEC_BL"},
|
||||
#define env_mcexec_bl (env[2].val)
|
||||
{.name = NULL}
|
||||
};
|
||||
envdata *ep;
|
||||
unsigned long off = 0;
|
||||
struct page *page;
|
||||
char *addr = NULL;
|
||||
int i;
|
||||
unsigned long p;
|
||||
int st;
|
||||
int mode;
|
||||
int cnt[2];
|
||||
char buf[32];
|
||||
int l;
|
||||
int pass;
|
||||
char pbuf[1024];
|
||||
const char *path;
|
||||
|
||||
if(bprm->envc == 0)
|
||||
return -ENOEXEC;
|
||||
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
|
||||
return -ENOEXEC;
|
||||
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
|
||||
return -ENOEXEC;
|
||||
|
||||
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
|
||||
return -ENOEXEC;
|
||||
|
||||
path = d_path(&bprm->file->f_path, pbuf, 1024);
|
||||
if(!path || IS_ERR(path))
|
||||
path = bprm->interp;
|
||||
|
||||
cp = strrchr(path, '/');
|
||||
if(!cp ||
|
||||
!strcmp(cp, "/mcexec") ||
|
||||
!strcmp(cp, "/ihkosctl") ||
|
||||
!strcmp(cp, "/ihkconfig"))
|
||||
return -ENOEXEC;
|
||||
|
||||
cnt[0] = bprm->argc;
|
||||
cnt[1] = bprm->envc;
|
||||
for(pass = 0; pass < 2; pass++){
|
||||
p = bprm->p;
|
||||
mode = cnt[0] == 0? 1: 0;
|
||||
if(pass == 1){
|
||||
for(ep = env; ep->name; ep++){
|
||||
if(ep->l)
|
||||
ep->val = kmalloc(ep->l, GFP_KERNEL);
|
||||
}
|
||||
}
|
||||
ep = NULL;
|
||||
l = 0;
|
||||
for(i = 0, st = 0; mode != 2;){
|
||||
if(st == 0){
|
||||
off = p & ~PAGE_MASK;
|
||||
rc = get_user_pages(current, bprm->mm,
|
||||
bprm->p, 1, 0, 1,
|
||||
&page, NULL);
|
||||
if(rc <= 0)
|
||||
return -EFAULT;
|
||||
addr = kmap_atomic(page
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
|
||||
, KM_USER0
|
||||
#endif
|
||||
);
|
||||
st = 1;
|
||||
}
|
||||
if(addr[off]){
|
||||
if(mode == 1){
|
||||
if(ep){
|
||||
if(pass == 1)
|
||||
ep->val[l] = addr[off];
|
||||
l++;
|
||||
}
|
||||
else if(addr[off] == '='){
|
||||
if(l < 32)
|
||||
buf[l] = '\0';
|
||||
buf[31] = '\0';
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(!strcmp(ep->name, buf))
|
||||
break;
|
||||
if(ep->name)
|
||||
l = 0;
|
||||
else
|
||||
ep = NULL;
|
||||
}
|
||||
else{
|
||||
if(l < 32)
|
||||
buf[l] = addr[off];
|
||||
l++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(mode == 1 && ep){
|
||||
if(pass == 0){
|
||||
ep->l = l + 1;
|
||||
}
|
||||
else{
|
||||
ep->val[l] = '\0';
|
||||
}
|
||||
}
|
||||
ep = NULL;
|
||||
l = 0;
|
||||
i++;
|
||||
if(i == cnt[mode]){
|
||||
i = 0;
|
||||
mode++;
|
||||
}
|
||||
}
|
||||
off++;
|
||||
p++;
|
||||
if(off == PAGE_SIZE || mode == 2){
|
||||
kunmap_atomic(addr
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
|
||||
, KM_USER0
|
||||
#endif
|
||||
);
|
||||
put_page(page);
|
||||
st = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!env_mcexec || !strcmp(env_mcexec, "0") || !strcmp(env_mcexec, "off"))
|
||||
rc = 1;
|
||||
else{
|
||||
rc = 0;
|
||||
if(strchr(env_mcexec, '/') && strlen(env_mcexec) < BINPRM_BUF_SIZE)
|
||||
strcpy(mcexec, env_mcexec);
|
||||
else
|
||||
strcpy(mcexec, MCEXEC_PATH);
|
||||
}
|
||||
|
||||
if(rc);
|
||||
else if(env_mcexec_wl)
|
||||
rc = !pathcheck(path, env_mcexec_wl);
|
||||
else if(env_mcexec_bl)
|
||||
rc = pathcheck(path, env_mcexec_bl);
|
||||
else
|
||||
rc = pathcheck(path, "/usr:/bin:/sbin:/opt");
|
||||
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(ep->val)
|
||||
kfree(ep->val);
|
||||
if(rc)
|
||||
return -ENOEXEC;
|
||||
|
||||
file = open_exec(mcexec);
|
||||
if (IS_ERR(file))
|
||||
return -ENOEXEC;
|
||||
|
||||
rc = remove_arg_zero(bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
rc = copy_strings_kernel(1, &bprm->interp, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
wp = mcexec;
|
||||
rc = copy_strings_kernel(1, &wp, bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
#if 1
|
||||
rc = bprm_change_interp(mcexec, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
if(brpm->interp != bprm->filename)
|
||||
kfree(brpm->interp);
|
||||
kfree(brpm->filename);
|
||||
bprm->filename = bprm->interp = kstrdup(mcexec, GFP_KERNEL);
|
||||
if(!bprm->interp){
|
||||
fput(file);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
allow_write_access(bprm->file);
|
||||
fput(bprm->file);
|
||||
bprm->file = file;
|
||||
|
||||
rc = prepare_binprm(bprm);
|
||||
if (rc < 0){
|
||||
return rc;
|
||||
}
|
||||
return search_binary_handler(bprm
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
, regs
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static struct linux_binfmt mcexec_format = {
|
||||
.module = THIS_MODULE,
|
||||
.load_binary = load_elf,
|
||||
};
|
||||
|
||||
void __init binfmt_mcexec_init(void)
|
||||
{
|
||||
insert_binfmt(&mcexec_format);
|
||||
}
|
||||
|
||||
void __exit binfmt_mcexec_exit(void)
|
||||
{
|
||||
unregister_binfmt(&mcexec_format);
|
||||
}
|
||||
@@ -31,12 +31,15 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/version.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/io.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
//#define DEBUG
|
||||
|
||||
#ifdef DEBUG
|
||||
#define dprintk printk
|
||||
#else
|
||||
@@ -242,19 +245,69 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
|
||||
|
||||
//extern unsigned long last_thread_exec;
|
||||
|
||||
struct handlerinfo {
|
||||
int pid;
|
||||
};
|
||||
|
||||
static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
|
||||
{
|
||||
struct ikc_scd_packet isp;
|
||||
|
||||
memset(&isp, '\0', sizeof isp);
|
||||
isp.msg = SCD_MSG_DEBUG_LOG;
|
||||
isp.arg = arg;
|
||||
mcctrl_ikc_send(os, 0, &isp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_handler(ihk_os_t os, void *param)
|
||||
{
|
||||
struct handlerinfo *info = param;
|
||||
struct ikc_scd_packet isp;
|
||||
|
||||
memset(&isp, '\0', sizeof isp);
|
||||
isp.msg = SCD_MSG_CLEANUP_PROCESS;
|
||||
isp.pid = info->pid;
|
||||
|
||||
mcctrl_ikc_send(os, 0, &isp);
|
||||
kfree(param);
|
||||
}
|
||||
|
||||
static long mcexec_newprocess(ihk_os_t os,
|
||||
struct newprocess_desc *__user udesc,
|
||||
struct file *file)
|
||||
{
|
||||
struct newprocess_desc desc;
|
||||
struct handlerinfo *info;
|
||||
|
||||
if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
|
||||
info->pid = desc.pid;
|
||||
ihk_os_register_release_handler(file, release_handler, info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long mcexec_start_image(ihk_os_t os,
|
||||
struct program_load_desc * __user udesc)
|
||||
struct program_load_desc * __user udesc,
|
||||
struct file *file)
|
||||
{
|
||||
struct program_load_desc desc;
|
||||
struct ikc_scd_packet isp;
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct handlerinfo *info;
|
||||
|
||||
if (copy_from_user(&desc, udesc,
|
||||
sizeof(struct program_load_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
|
||||
info->pid = desc.pid;
|
||||
ihk_os_register_release_handler(file, release_handler, info);
|
||||
|
||||
c = usrdata->channels + desc.cpu;
|
||||
|
||||
mcctrl_ikc_set_recv_cpu(os, desc.cpu);
|
||||
@@ -366,10 +419,10 @@ retry_alloc:
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
list_add_tail(&wqhln->list, &c->wq_list);
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
|
||||
|
||||
wqhln->req = 1;
|
||||
wake_up(&wqhln->wq_syscall);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -439,14 +492,15 @@ retry_alloc:
|
||||
|
||||
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
||||
|
||||
if (ret) {
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
/* Remove per-process wait queue head */
|
||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
list_del(&wqhln->list);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
||||
if (ret && !wqhln->req) {
|
||||
kfree(wqhln);
|
||||
return -EINTR;
|
||||
}
|
||||
kfree(wqhln);
|
||||
|
||||
if (c->param.request_va->number == 61 &&
|
||||
@@ -723,7 +777,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
|
||||
}
|
||||
|
||||
LIST_HEAD(mckernel_exec_files);
|
||||
spinlock_t mckernel_exec_file_lock = SPIN_LOCK_UNLOCKED;
|
||||
DEFINE_SPINLOCK(mckernel_exec_file_lock);
|
||||
|
||||
|
||||
struct mckernel_exec_file {
|
||||
@@ -733,6 +787,47 @@ struct mckernel_exec_file {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
#define GUIDVAL(x) (x)
|
||||
#else
|
||||
#define GUIDVAL(x) ((x).val)
|
||||
#endif
|
||||
|
||||
|
||||
int
|
||||
mcexec_getcred(unsigned long phys)
|
||||
{
|
||||
int *virt = phys_to_virt(phys);
|
||||
|
||||
virt[0] = GUIDVAL(current_uid());
|
||||
virt[1] = GUIDVAL(current_euid());
|
||||
virt[2] = GUIDVAL(current_suid());
|
||||
virt[3] = GUIDVAL(current_fsuid());
|
||||
virt[4] = GUIDVAL(current_gid());
|
||||
virt[5] = GUIDVAL(current_egid());
|
||||
virt[6] = GUIDVAL(current_sgid());
|
||||
virt[7] = GUIDVAL(current_fsgid());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
mcexec_getcredv(int __user *virt)
|
||||
{
|
||||
int wk[8];
|
||||
|
||||
wk[0] = GUIDVAL(current_uid());
|
||||
wk[1] = GUIDVAL(current_euid());
|
||||
wk[2] = GUIDVAL(current_suid());
|
||||
wk[3] = GUIDVAL(current_fsuid());
|
||||
wk[4] = GUIDVAL(current_gid());
|
||||
wk[5] = GUIDVAL(current_egid());
|
||||
wk[6] = GUIDVAL(current_sgid());
|
||||
wk[7] = GUIDVAL(current_fsgid());
|
||||
if(copy_to_user(virt, wk, sizeof(int) * 8))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
||||
{
|
||||
struct file *file;
|
||||
@@ -857,7 +952,8 @@ long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __use
|
||||
return 0;
|
||||
}
|
||||
|
||||
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
struct file *file)
|
||||
{
|
||||
switch (req) {
|
||||
case MCEXEC_UP_PREPARE_IMAGE:
|
||||
@@ -867,7 +963,7 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
return mcexec_transfer_image(os, (struct remote_transfer *)arg);
|
||||
|
||||
case MCEXEC_UP_START_IMAGE:
|
||||
return mcexec_start_image(os, (struct program_load_desc *)arg);
|
||||
return mcexec_start_image(os, (struct program_load_desc *)arg, file);
|
||||
|
||||
case MCEXEC_UP_WAIT_SYSCALL:
|
||||
return mcexec_wait_syscall(os, (struct syscall_wait_desc *)arg);
|
||||
@@ -888,6 +984,10 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
return mcexec_strncpy_from_user(os,
|
||||
(struct strncpy_from_user_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_NEW_PROCESS:
|
||||
return mcexec_newprocess(os, (struct newprocess_desc *)arg,
|
||||
file);
|
||||
|
||||
case MCEXEC_UP_OPEN_EXEC:
|
||||
return mcexec_open_exec(os, (char *)arg);
|
||||
|
||||
@@ -899,6 +999,15 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
|
||||
case MCEXEC_UP_FREE_DMA:
|
||||
return mcexec_free_region(os, (unsigned long *)arg);
|
||||
|
||||
case MCEXEC_UP_GET_CRED:
|
||||
return mcexec_getcred((unsigned long)arg);
|
||||
|
||||
case MCEXEC_UP_GET_CREDV:
|
||||
return mcexec_getcredv((int *)arg);
|
||||
|
||||
case MCEXEC_UP_DEBUG_LOG:
|
||||
return mcexec_debug_log(os, arg);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -29,7 +29,8 @@
|
||||
|
||||
#define OS_MAX_MINOR 64
|
||||
|
||||
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long);
|
||||
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long,
|
||||
struct file *);
|
||||
extern int prepare_ikc_channels(ihk_os_t os);
|
||||
extern void destroy_ikc_channels(ihk_os_t os);
|
||||
#ifndef DO_USER_MODE
|
||||
@@ -38,11 +39,15 @@ extern void mcctrl_syscall_init(void);
|
||||
extern void procfs_init(int);
|
||||
extern void procfs_exit(int);
|
||||
|
||||
extern void rus_page_hash_init(void);
|
||||
extern void rus_page_hash_put_pages(void);
|
||||
extern void binfmt_mcexec_init(void);
|
||||
extern void binfmt_mcexec_exit(void);
|
||||
|
||||
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
|
||||
unsigned long arg)
|
||||
unsigned long arg, struct file *file)
|
||||
{
|
||||
return __mcctrl_control(os, request, arg);
|
||||
return __mcctrl_control(os, request, arg, file);
|
||||
}
|
||||
|
||||
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
@@ -55,10 +60,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
static struct ihk_os_user_call mcctrl_uc_proto = {
|
||||
@@ -101,6 +110,8 @@ static int __init mcctrl_init(void)
|
||||
mcctrl_syscall_init();
|
||||
#endif
|
||||
|
||||
rus_page_hash_init();
|
||||
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if (os[i]) {
|
||||
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||
@@ -113,6 +124,8 @@ static int __init mcctrl_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
binfmt_mcexec_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -120,6 +133,7 @@ static void __exit mcctrl_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
binfmt_mcexec_exit();
|
||||
printk("mcctrl: unregistered.\n");
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if(os[i]){
|
||||
@@ -128,6 +142,8 @@ static void __exit mcctrl_exit(void)
|
||||
procfs_exit(i);
|
||||
}
|
||||
}
|
||||
|
||||
rus_page_hash_put_pages();
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
@@ -48,12 +48,15 @@
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
|
||||
@@ -10,12 +10,15 @@
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/resource.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
|
||||
//#define PROCFS_DEBUG
|
||||
|
||||
@@ -26,16 +29,16 @@
|
||||
#endif
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
|
||||
|
||||
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat);
|
||||
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *ppos);
|
||||
|
||||
/* A private data for the procfs driver. */
|
||||
struct procfs_list_entry;
|
||||
|
||||
struct procfs_list_entry {
|
||||
struct list_head list;
|
||||
struct proc_dir_entry *entry;
|
||||
struct proc_dir_entry *parent;
|
||||
struct procfs_list_entry *parent;
|
||||
ihk_os_t os;
|
||||
int osnum;
|
||||
int pid;
|
||||
@@ -53,6 +56,28 @@ struct procfs_list_entry {
|
||||
LIST_HEAD(procfs_file_list);
|
||||
static ihk_spinlock_t procfs_file_list_lock;
|
||||
|
||||
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
|
||||
{
|
||||
switch (orig) {
|
||||
case 0:
|
||||
file->f_pos = offset;
|
||||
break;
|
||||
case 1:
|
||||
file->f_pos += offset;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return file->f_pos;
|
||||
}
|
||||
|
||||
static const struct file_operations mckernel_procfs_file_operations = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = NULL,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Return specified procfs entry.
|
||||
*
|
||||
@@ -71,22 +96,22 @@ static ihk_spinlock_t procfs_file_list_lock;
|
||||
/*
|
||||
* XXX: Two or more entries which have same name can be created.
|
||||
*
|
||||
* get_procfs_entry() avoids creating an entry which has already been created.
|
||||
* get_procfs_list_entry() avoids creating an entry which has already been created.
|
||||
* But, it allows creating an entry which is being created by another thread.
|
||||
*
|
||||
* This problem occurred when two requests which created files with a common
|
||||
* ancestor directory which was not explicitly created were racing.
|
||||
*/
|
||||
|
||||
static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode)
|
||||
{
|
||||
char *r;
|
||||
struct proc_dir_entry *ret = NULL, *parent = NULL;
|
||||
struct procfs_list_entry *e;
|
||||
struct proc_dir_entry *pde = NULL;
|
||||
struct procfs_list_entry *e, *ret = NULL, *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("get_procfs_entry: %s for osnum %d mode %o\n", p, osnum, mode);
|
||||
dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode);
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry(e, &procfs_file_list, list) {
|
||||
if (e == NULL) {
|
||||
@@ -95,7 +120,8 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
}
|
||||
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
|
||||
/* We found the entry */
|
||||
ret = e->entry;
|
||||
ret = e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
@@ -107,19 +133,19 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
/* We have non-null parent dir. */
|
||||
strncpy(name, p, r - p);
|
||||
name[r - p] = '\0';
|
||||
parent = get_procfs_entry(name, osnum, 0);
|
||||
parent = get_procfs_list_entry(name, osnum, 0);
|
||||
if (parent == NULL) {
|
||||
/* We counld not get a parent procfs entry. Give up.*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
e = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
|
||||
if (e == NULL) {
|
||||
ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
|
||||
if (ret == NULL) {
|
||||
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
/* Fill the fname field of the entry */
|
||||
strncpy(e->fname, p, PROCFS_NAME_MAX);
|
||||
strncpy(ret->fname, p, PROCFS_NAME_MAX);
|
||||
|
||||
if (r != NULL) {
|
||||
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
|
||||
@@ -127,25 +153,38 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
strncpy(name, p, PROCFS_NAME_MAX);
|
||||
}
|
||||
if (mode == 0) {
|
||||
ret = proc_mkdir(name, parent);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = proc_mkdir(name, parent ? parent->entry : NULL);
|
||||
#else
|
||||
pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret);
|
||||
#endif
|
||||
} else {
|
||||
ret = create_proc_entry(name, mode, parent);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = create_proc_entry(name, mode, parent->entry);
|
||||
if (pde)
|
||||
pde->proc_fops = &mckernel_procfs_file_operations;
|
||||
#else
|
||||
pde = proc_create_data(name, mode, parent->entry,
|
||||
&mckernel_procfs_file_operations, ret);
|
||||
#endif
|
||||
}
|
||||
if (ret == NULL) {
|
||||
if (pde == NULL) {
|
||||
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
|
||||
kfree(e);
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
}
|
||||
ret->data = e;
|
||||
e->osnum = osnum;
|
||||
e->entry = ret;
|
||||
e->parent = parent;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde->data = ret;
|
||||
#endif
|
||||
ret->osnum = osnum;
|
||||
ret->entry = pde;
|
||||
ret->parent = parent;
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_add(&(e->list), &procfs_file_list);
|
||||
list_add(&(ret->list), &procfs_file_list);
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
|
||||
dprintk("get_procfs_entry: %s done\n", p);
|
||||
dprintk("get_procfs_list_entry: %s done\n", p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -161,7 +200,6 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
|
||||
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
struct procfs_list_entry *e;
|
||||
ihk_device_t dev = ihk_os_to_dev(__os);
|
||||
unsigned long parg;
|
||||
@@ -183,18 +221,16 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
|
||||
printk("ERROR: procfs_creat: file name not properly terminated.\n");
|
||||
goto quit;
|
||||
}
|
||||
entry = get_procfs_entry(name, osnum, mode);
|
||||
if (entry == NULL) {
|
||||
e = get_procfs_list_entry(name, osnum, mode);
|
||||
if (e == NULL) {
|
||||
printk("ERROR: could not create a procfs entry for %s.\n", name);
|
||||
goto quit;
|
||||
}
|
||||
|
||||
e = entry->data;
|
||||
e->os = __os;
|
||||
e->cpu = ref;
|
||||
e->pid = pid;
|
||||
|
||||
entry->read_proc = mckernel_procfs_read;
|
||||
quit:
|
||||
f->status = 1; /* Now the peer can free the data. */
|
||||
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
|
||||
@@ -216,7 +252,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
unsigned long parg;
|
||||
struct procfs_file *f;
|
||||
struct procfs_list_entry *e;
|
||||
struct proc_dir_entry *parent = NULL;
|
||||
struct procfs_list_entry *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
char *r;
|
||||
unsigned long irqflags;
|
||||
@@ -230,8 +266,10 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
|
||||
(e->osnum == osnum)) {
|
||||
list_del(&e->list);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
parent = e->parent;
|
||||
kfree(e);
|
||||
r = strrchr(f->fname, '/');
|
||||
@@ -241,7 +279,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
strncpy(name, r + 1, PROCFS_NAME_MAX);
|
||||
}
|
||||
dprintk("found and remove %s from the list.\n", name);
|
||||
remove_proc_entry(name, parent);
|
||||
remove_proc_entry(name, parent->entry);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -271,27 +309,50 @@ void procfs_answer(unsigned int arg, int err)
|
||||
* This function conforms to the 2) way of fs/proc/generic.c
|
||||
* from linux-2.6.39.4.
|
||||
*/
|
||||
|
||||
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat)
|
||||
static ssize_t
|
||||
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct procfs_list_entry *e = dat;
|
||||
struct inode * inode = file->f_path.dentry->d_inode;
|
||||
char *kern_buffer;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret, retrycount = 0;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
|
||||
dprintk("mckernel_procfs_read: invoked for %s\n", e->fname);
|
||||
|
||||
if (count <= 0 || dat == NULL || offset < 0) {
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
e->fname, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(buffer);
|
||||
if (pbuf / PAGE_SIZE != (pbuf + count - 1) / PAGE_SIZE) {
|
||||
/* Truncate the read count upto the nearest page boundary */
|
||||
count = ((pbuf + count - 1) / PAGE_SIZE) * PAGE_SIZE - pbuf;
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
return -ENOMEM;
|
||||
@@ -309,18 +370,23 @@ retry:
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = e->cpu;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
if ((r->ret == 0) && (r->eof != 1)) {
|
||||
@@ -335,13 +401,20 @@ retry:
|
||||
dprintk("retry\n");
|
||||
goto retry;
|
||||
}
|
||||
if (r->eof == 1) {
|
||||
dprintk("reached end of file.\n");
|
||||
*peof = 1;
|
||||
|
||||
if (r->ret > 0) {
|
||||
if (copy_to_user(buf, kern_buffer, r->ret)) {
|
||||
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ppos += r->ret;
|
||||
}
|
||||
*start = buffer;
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
@@ -367,7 +440,7 @@ void procfs_exit(int osnum) {
|
||||
int error;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
struct kstat stat;
|
||||
struct proc_dir_entry *parent;
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e, *temp = NULL;
|
||||
unsigned long irqflags;
|
||||
|
||||
@@ -378,8 +451,10 @@ void procfs_exit(int osnum) {
|
||||
if (e->osnum == osnum) {
|
||||
dprintk("found entry for %s.\n", e->fname);
|
||||
list_del(&e->list);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
parent = e->parent;
|
||||
r = strrchr(e->fname, '/');
|
||||
if (r == NULL) {
|
||||
@@ -387,7 +462,9 @@ void procfs_exit(int osnum) {
|
||||
} else {
|
||||
r += 1;
|
||||
}
|
||||
remove_proc_entry(r, parent);
|
||||
if (parent) {
|
||||
remove_proc_entry(r, parent->entry);
|
||||
}
|
||||
dprintk("free the entry\n");
|
||||
kfree(e);
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -43,6 +45,7 @@
|
||||
#include <asm/delay.h>
|
||||
#include <asm/io.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
|
||||
#define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6)
|
||||
|
||||
@@ -319,6 +322,109 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
#define RUS_PAGE_HASH_SHIFT 8
|
||||
#define RUS_PAGE_HASH_SIZE (1UL << RUS_PAGE_HASH_SHIFT)
|
||||
#define RUS_PAGE_HASH_MASK (RUS_PAGE_HASH_SIZE - 1)
|
||||
|
||||
struct list_head rus_page_hash[RUS_PAGE_HASH_SIZE];
|
||||
spinlock_t rus_page_hash_lock;
|
||||
|
||||
struct rus_page {
|
||||
struct list_head hash;
|
||||
struct page *page;
|
||||
int refcount;
|
||||
int put_page;
|
||||
};
|
||||
|
||||
void rus_page_hash_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock_init(&rus_page_hash_lock);
|
||||
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
|
||||
INIT_LIST_HEAD(&rus_page_hash[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* rus_page_hash_lock must be held */
|
||||
struct rus_page *_rus_page_hash_lookup(struct page *page)
|
||||
{
|
||||
struct rus_page *rp = NULL;
|
||||
struct rus_page *rp_iter;
|
||||
|
||||
list_for_each_entry(rp_iter,
|
||||
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK], hash) {
|
||||
|
||||
if (rp_iter->page != page)
|
||||
continue;
|
||||
|
||||
rp = rp_iter;
|
||||
break;
|
||||
}
|
||||
|
||||
return rp;
|
||||
}
|
||||
|
||||
|
||||
static int rus_page_hash_insert(struct page *page)
|
||||
{
|
||||
int ret = 0;
|
||||
struct rus_page *rp;
|
||||
|
||||
spin_lock(&rus_page_hash_lock);
|
||||
|
||||
rp = _rus_page_hash_lookup(page);
|
||||
if (!rp) {
|
||||
rp = kmalloc(sizeof(*rp), GFP_ATOMIC);
|
||||
|
||||
if (!rp) {
|
||||
printk("rus_page_add_hash(): error allocating rp\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rp->page = page;
|
||||
rp->put_page = 0;
|
||||
|
||||
get_page(page);
|
||||
|
||||
rp->refcount = 0; /* Will be increased below */
|
||||
|
||||
list_add_tail(&rp->hash,
|
||||
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK]);
|
||||
}
|
||||
|
||||
++rp->refcount;
|
||||
|
||||
|
||||
out:
|
||||
spin_unlock(&rus_page_hash_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void rus_page_hash_put_pages(void)
|
||||
{
|
||||
int i;
|
||||
struct rus_page *rp_iter;
|
||||
struct rus_page *rp_iter_next;
|
||||
|
||||
spin_lock(&rus_page_hash_lock);
|
||||
|
||||
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
|
||||
|
||||
list_for_each_entry_safe(rp_iter, rp_iter_next,
|
||||
&rus_page_hash[i], hash) {
|
||||
list_del(&rp_iter->hash);
|
||||
|
||||
put_page(rp_iter->page);
|
||||
kfree(rp_iter);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&rus_page_hash_lock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* By remap_pfn_range(), VM_PFN_AT_MMAP may be raised.
|
||||
* VM_PFN_AT_MMAP cause the following problems.
|
||||
@@ -329,6 +435,7 @@ out:
|
||||
* These problems may be solved in linux-3.7.
|
||||
* It uses vm_insert_pfn() until it is fixed.
|
||||
*/
|
||||
|
||||
#define USE_VM_INSERT_PFN 1
|
||||
|
||||
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
@@ -409,15 +516,11 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
|
||||
if (pfn_valid(pfn+pix)) {
|
||||
page = pfn_to_page(pfn+pix);
|
||||
if (!page_count(page)) {
|
||||
get_page(page);
|
||||
/*
|
||||
* TODO:
|
||||
* The pages which get_page() has been called with
|
||||
* should be recorded. Because these pages have to
|
||||
* be passed to put_page() before they are freed.
|
||||
*/
|
||||
|
||||
if ((error = rus_page_hash_insert(page)) < 0) {
|
||||
printk("rus_vm_fault: error hashing page??\n");
|
||||
}
|
||||
|
||||
error = vm_insert_page(vma, rva+(pix*PAGE_SIZE), page);
|
||||
if (error) {
|
||||
printk("vm_insert_page: %d\n", error);
|
||||
@@ -448,7 +551,11 @@ static struct vm_operations_struct rus_vmops = {
|
||||
|
||||
static int rus_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
|
||||
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP;
|
||||
#else
|
||||
vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP;
|
||||
#endif
|
||||
vma->vm_ops = &rus_vmops;
|
||||
return 0;
|
||||
}
|
||||
@@ -491,9 +598,18 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un
|
||||
if (vma) {
|
||||
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
start = do_mmap_pgoff(file, 0, end,
|
||||
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
|
||||
#endif
|
||||
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
start = vm_mmap(file, 0, end,
|
||||
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
|
||||
#endif
|
||||
|
||||
revert_creds(original);
|
||||
put_cred(promoted);
|
||||
@@ -782,19 +898,19 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
|
||||
error = vfs_fstat(fd, &st);
|
||||
if (error) {
|
||||
printk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
|
||||
dprintk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
|
||||
goto out;
|
||||
}
|
||||
if (!S_ISREG(st.mode)) {
|
||||
error = -ESRCH;
|
||||
printk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
|
||||
dprintk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
file = fget(fd);
|
||||
if (!file) {
|
||||
error = -EBADF;
|
||||
printk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
|
||||
dprintk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -817,7 +933,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
}
|
||||
if (!(maxprot & PROT_READ)) {
|
||||
error = -EACCES;
|
||||
printk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
|
||||
dprintk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1100,7 +1216,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
|
||||
struct pager_map_result *resp;
|
||||
uintptr_t phys;
|
||||
|
||||
printk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
|
||||
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
|
||||
pager = kzalloc(sizeof(*pager), GFP_KERNEL);
|
||||
if (!pager) {
|
||||
error = -ENOMEM;
|
||||
@@ -1128,8 +1244,17 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
|
||||
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
#define ANY_WHERE 0
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff);
|
||||
#endif
|
||||
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
va = vm_mmap(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff << PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
if (IS_ERR_VALUE(va)) {
|
||||
printk("pager_req_map(%p,%d,%lx,%lx,%lx):do_mmap_pgoff failed. %d\n", os, fd, len, off, result_rpa, (int)va);
|
||||
error = va;
|
||||
@@ -1140,6 +1265,9 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
|
||||
pager->map_uaddr = va;
|
||||
pager->map_len = len;
|
||||
pager->map_off = off;
|
||||
|
||||
dprintk("pager_req_map(%s): 0x%lx - 0x%lx (len: %lu)\n",
|
||||
file->f_dentry->d_name.name, va, va + len, len);
|
||||
|
||||
phys = ihk_device_map_memory(dev, result_rpa, sizeof(*resp));
|
||||
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
|
||||
@@ -1158,10 +1286,11 @@ out:
|
||||
if (pager) {
|
||||
kfree(pager);
|
||||
}
|
||||
printk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
|
||||
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppfn_rpa)
|
||||
{
|
||||
const ihk_device_t dev = ihk_os_to_dev(os);
|
||||
@@ -1176,7 +1305,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
|
||||
uintptr_t phys;
|
||||
uintptr_t *ppfn;
|
||||
|
||||
printk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
|
||||
dprintk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
|
||||
|
||||
if ((off < pager->map_off) || ((pager->map_off+pager->map_len) < (off + PAGE_SIZE))) {
|
||||
error = -ERANGE;
|
||||
@@ -1201,6 +1330,12 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
|
||||
pfn = (uintptr_t)pte_pfn(*pte) << PAGE_SHIFT;
|
||||
#define PFN_PRESENT ((uintptr_t)1 << 0)
|
||||
pfn |= PFN_VALID | PFN_PRESENT;
|
||||
|
||||
/* Check if mapping is write-combined */
|
||||
if ((pte_flags(*pte) & _PAGE_PWT) &&
|
||||
!(pte_flags(*pte) & _PAGE_PCD)) {
|
||||
pfn |= _PAGE_PWT;
|
||||
}
|
||||
}
|
||||
pte_unmap(pte);
|
||||
}
|
||||
@@ -1216,7 +1351,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
printk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
|
||||
dprintk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1225,11 +1360,15 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
|
||||
struct pager * const pager = (void *)handle;
|
||||
int error;
|
||||
|
||||
printk("pager_req_unmap(%p,%lx)\n", os, handle);
|
||||
dprintk("pager_req_unmap(%p,%lx)\n", os, handle);
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
error = do_munmap(current->mm, pager->map_uaddr, pager->map_len);
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
#else
|
||||
error = vm_munmap(pager->map_uaddr, pager->map_len);
|
||||
#endif
|
||||
|
||||
if (error) {
|
||||
printk("pager_req_unmap(%p,%lx):do_munmap failed. %d\n", os, handle, error);
|
||||
@@ -1237,7 +1376,7 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
|
||||
}
|
||||
|
||||
kfree(pager);
|
||||
printk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
|
||||
dprintk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1325,9 +1464,18 @@ static int remap_user_space(uintptr_t rva, size_t len, int prot)
|
||||
start = rva;
|
||||
pgoff = vma->vm_pgoff + ((rva - vma->vm_start) >> PAGE_SHIFT);
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
map = do_mmap_pgoff(file, start, len,
|
||||
prot, MAP_FIXED|MAP_SHARED, pgoff);
|
||||
#endif
|
||||
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
map = vm_mmap(file, start, len,
|
||||
prot, MAP_FIXED|MAP_SHARED, pgoff << PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
out:
|
||||
dprintk("remap_user_space(%lx,%lx,%x): %lx (%ld)\n",
|
||||
rva, len, prot, (long)map, (long)map);
|
||||
@@ -1469,6 +1617,8 @@ fail:
|
||||
return error;
|
||||
}
|
||||
|
||||
#define SCHED_CHECK_SAME_OWNER 0x01
|
||||
#define SCHED_CHECK_ROOT 0x02
|
||||
|
||||
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc)
|
||||
{
|
||||
@@ -1556,6 +1706,71 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
|
||||
error = writecore(os, sc->args[1], sc->args[0]);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case __NR_sched_setparam: {
|
||||
|
||||
switch (sc->args[0]) {
|
||||
|
||||
case SCHED_CHECK_SAME_OWNER: {
|
||||
const struct cred *cred = current_cred();
|
||||
const struct cred *pcred;
|
||||
bool match;
|
||||
struct task_struct *p;
|
||||
int pid = sc->args[1];
|
||||
|
||||
rcu_read_lock();
|
||||
p = pid_task(find_get_pid(pid), PIDTYPE_PID);
|
||||
if (!p) {
|
||||
rcu_read_unlock();
|
||||
ret = -ESRCH;
|
||||
goto sched_setparam_out;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
rcu_read_lock();
|
||||
pcred = __task_cred(p);
|
||||
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
|
||||
match = (uid_eq(cred->euid, pcred->euid) ||
|
||||
uid_eq(cred->euid, pcred->uid));
|
||||
#else
|
||||
match = ((cred->euid == pcred->euid) ||
|
||||
(cred->euid == pcred->uid));
|
||||
#endif
|
||||
rcu_read_unlock();
|
||||
|
||||
if (match) {
|
||||
ret = 0;
|
||||
}
|
||||
else {
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case SCHED_CHECK_ROOT: {
|
||||
const struct cred *cred = current_cred();
|
||||
bool match;
|
||||
|
||||
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
|
||||
match = uid_eq(cred->euid, GLOBAL_ROOT_UID);
|
||||
#else
|
||||
match = (cred->euid == 0);
|
||||
#endif
|
||||
if (match) {
|
||||
ret = 0;
|
||||
}
|
||||
else {
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sched_setparam_out:
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
error = -ENOSYS;
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#include <ctype.h>
|
||||
#include <sys/mman.h>
|
||||
#include <asm/unistd.h>
|
||||
#include "../include/uprotocol.h"
|
||||
#include <sched.h>
|
||||
|
||||
#include <termios.h>
|
||||
@@ -49,6 +48,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/fsuid.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
@@ -56,7 +56,10 @@
|
||||
#include <dirent.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <signal.h>
|
||||
#include <sys/signalfd.h>
|
||||
#include "../include/uprotocol.h"
|
||||
|
||||
//#define DEBUG
|
||||
|
||||
@@ -97,6 +100,13 @@ typedef unsigned char cc_t;
|
||||
typedef unsigned int speed_t;
|
||||
typedef unsigned int tcflag_t;
|
||||
|
||||
struct sigfd {
|
||||
struct sigfd *next;
|
||||
int sigpipe[2];
|
||||
};
|
||||
|
||||
struct sigfd *sigfdtop;
|
||||
|
||||
#ifdef NCCS
|
||||
#undef NCCS
|
||||
#endif
|
||||
@@ -111,14 +121,29 @@ struct kernel_termios {
|
||||
cc_t c_cc[NCCS]; /* control characters */
|
||||
};
|
||||
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid);
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock);
|
||||
|
||||
static int mcosid;
|
||||
static int fd;
|
||||
static char *exec_path = NULL;
|
||||
static char *altroot;
|
||||
static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK";
|
||||
static int ischild;
|
||||
|
||||
struct fork_sync {
|
||||
pid_t pid;
|
||||
int status;
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
struct fork_sync_container {
|
||||
struct fork_sync_container *next;
|
||||
struct fork_sync *fs;
|
||||
};
|
||||
|
||||
struct fork_sync_container *fork_sync_top;
|
||||
pthread_mutex_t fork_sync_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
pid_t gettid(void)
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
@@ -158,6 +183,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
|
||||
|
||||
desc = malloc(sizeof(struct program_load_desc)
|
||||
+ sizeof(struct program_image_section) * nhdrs);
|
||||
desc->shell_path[0] = '\0';
|
||||
fseek(fp, hdr.e_phoff, SEEK_SET);
|
||||
j = 0;
|
||||
desc->num_sections = nhdrs;
|
||||
@@ -218,7 +244,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
|
||||
desc->pid = getpid();
|
||||
desc->pgid = getpgid(0);
|
||||
desc->entry = hdr.e_entry;
|
||||
|
||||
ioctl(fd, MCEXEC_UP_GET_CREDV, desc->cred);
|
||||
desc->at_phdr = load_addr + hdr.e_phoff;
|
||||
desc->at_phent = sizeof(phdr);
|
||||
desc->at_phnum = hdr.e_phnum;
|
||||
@@ -546,11 +572,32 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p,
|
||||
/* Drop old name if exists */
|
||||
if (exec_path) {
|
||||
free(exec_path);
|
||||
exec_path = NULL;
|
||||
}
|
||||
|
||||
exec_path = strdup(filename);
|
||||
if (!exec_path) {
|
||||
fprintf(stderr, "WARNING: strdup(filename) failed\n");
|
||||
if (!strncmp("/", filename, 1)) {
|
||||
exec_path = strdup(filename);
|
||||
|
||||
if (!exec_path) {
|
||||
fprintf(stderr, "WARNING: strdup(filename) failed\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
}
|
||||
else {
|
||||
char *cwd = getcwd(NULL, 0);
|
||||
if (!cwd) {
|
||||
fprintf(stderr, "Error: getting current working dir pathname\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
exec_path = malloc(strlen(cwd) + strlen(filename) + 2);
|
||||
if (!exec_path) {
|
||||
fprintf(stderr, "Error: allocating exec_path\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
sprintf(exec_path, "%s/%s", cwd, filename);
|
||||
free(cwd);
|
||||
}
|
||||
|
||||
desc = load_elf(fp, &interp_path);
|
||||
@@ -764,7 +811,6 @@ struct thread_data_s {
|
||||
pthread_t thread_id;
|
||||
int fd;
|
||||
int cpu;
|
||||
int mcosid;
|
||||
int ret;
|
||||
pid_t tid;
|
||||
int terminate;
|
||||
@@ -785,11 +831,13 @@ static void *main_loop_thread_func(void *arg)
|
||||
td->tid = gettid();
|
||||
td->remote_tid = (int)td->tid;
|
||||
pthread_barrier_wait(&init_ready);
|
||||
td->ret = main_loop(td->fd, td->cpu, td->lock, td->mcosid);
|
||||
td->ret = main_loop(td->fd, td->cpu, td->lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define LOCALSIG SIGURG
|
||||
|
||||
void
|
||||
sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
{
|
||||
@@ -801,7 +849,10 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
struct signal_desc sigdesc;
|
||||
|
||||
if(siginfo->si_pid == pid &&
|
||||
siginfo->si_signo == SIGINT)
|
||||
siginfo->si_signo == LOCALSIG)
|
||||
return;
|
||||
|
||||
if(siginfo->si_signo == SIGCHLD)
|
||||
return;
|
||||
|
||||
for(i = 0; i < ncpu; i++){
|
||||
@@ -839,6 +890,94 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
}
|
||||
}
|
||||
|
||||
long
|
||||
act_signalfd4(struct syscall_wait_desc *w)
|
||||
{
|
||||
struct sigfd *sfd;
|
||||
struct sigfd *sb;
|
||||
int mode = w->sr.args[0];
|
||||
int flags;
|
||||
int tmp;
|
||||
int rc = 0;
|
||||
struct signalfd_siginfo *info;
|
||||
|
||||
switch(mode){
|
||||
case 0: /* new signalfd */
|
||||
sfd = malloc(sizeof(struct sigfd));
|
||||
tmp = w->sr.args[1];
|
||||
flags = 0;
|
||||
if(tmp & SFD_NONBLOCK)
|
||||
flags |= O_NONBLOCK;
|
||||
if(tmp & SFD_CLOEXEC)
|
||||
flags |= O_CLOEXEC;
|
||||
pipe2(sfd->sigpipe, flags);
|
||||
sfd->next = sigfdtop;
|
||||
sigfdtop = sfd;
|
||||
rc = sfd->sigpipe[0];
|
||||
break;
|
||||
case 1: /* close signalfd */
|
||||
tmp = w->sr.args[1];
|
||||
for(sfd = sigfdtop, sb = NULL; sfd; sb = sfd, sfd = sfd->next)
|
||||
if(sfd->sigpipe[0] == tmp)
|
||||
break;
|
||||
if(!sfd)
|
||||
rc = -EBADF;
|
||||
else{
|
||||
if(sb)
|
||||
sb->next = sfd->next;
|
||||
else
|
||||
sigfdtop = sfd->next;
|
||||
close(sfd->sigpipe[0]);
|
||||
close(sfd->sigpipe[1]);
|
||||
free(sfd);
|
||||
}
|
||||
break;
|
||||
case 2: /* push signal */
|
||||
tmp = w->sr.args[1];
|
||||
for(sfd = sigfdtop; sfd; sfd = sfd->next)
|
||||
if(sfd->sigpipe[0] == tmp)
|
||||
break;
|
||||
if(!sfd)
|
||||
rc = -EBADF;
|
||||
else{
|
||||
info = (struct signalfd_siginfo *)w->sr.args[2];
|
||||
write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo));
|
||||
}
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void
|
||||
act_sigaction(struct syscall_wait_desc *w)
|
||||
{
|
||||
struct sigaction act;
|
||||
int sig;
|
||||
|
||||
sig = w->sr.args[0];
|
||||
if (sig == SIGCHLD || sig == LOCALSIG)
|
||||
return;
|
||||
memset(&act, '\0', sizeof act);
|
||||
if (w->sr.args[1] == (unsigned long)SIG_IGN)
|
||||
act.sa_handler = SIG_IGN;
|
||||
else{
|
||||
act.sa_sigaction = sendsig;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
}
|
||||
sigaction(sig, &act, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
act_sigprocmask(struct syscall_wait_desc *w)
|
||||
{
|
||||
sigset_t set;
|
||||
|
||||
sigemptyset(&set);
|
||||
memcpy(&set, &w->sr.args[0], sizeof(unsigned long));
|
||||
sigdelset(&set, LOCALSIG);
|
||||
sigprocmask(SIG_SETMASK, &set, NULL);
|
||||
}
|
||||
|
||||
static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
|
||||
{
|
||||
int n;
|
||||
@@ -891,8 +1030,7 @@ void init_sigaction(void)
|
||||
|
||||
master_tid = gettid();
|
||||
for (i = 1; i <= 64; i++) {
|
||||
if (i != SIGCHLD && i != SIGCONT && i != SIGSTOP &&
|
||||
i != SIGTSTP && i != SIGTTIN && i != SIGTTOU) {
|
||||
if (i != SIGKILL && i != SIGSTOP && i != SIGCHLD) {
|
||||
struct sigaction act;
|
||||
|
||||
sigaction(i, NULL, &act);
|
||||
@@ -904,7 +1042,7 @@ void init_sigaction(void)
|
||||
}
|
||||
}
|
||||
|
||||
void init_worker_threads(int fd, int mcosid)
|
||||
void init_worker_threads(int fd)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -916,7 +1054,6 @@ void init_worker_threads(int fd, int mcosid)
|
||||
|
||||
thread_data[i].fd = fd;
|
||||
thread_data[i].cpu = i;
|
||||
thread_data[i].mcosid = mcosid;
|
||||
thread_data[i].lock = &lock;
|
||||
thread_data[i].init_ready = &init_ready;
|
||||
thread_data[i].terminate = 0;
|
||||
@@ -930,7 +1067,75 @@ void init_worker_threads(int fd, int mcosid)
|
||||
}
|
||||
|
||||
pthread_barrier_wait(&init_ready);
|
||||
}
|
||||
}
|
||||
|
||||
#define MCK_RLIMIT_AS 0
|
||||
#define MCK_RLIMIT_CORE 1
|
||||
#define MCK_RLIMIT_CPU 2
|
||||
#define MCK_RLIMIT_DATA 3
|
||||
#define MCK_RLIMIT_FSIZE 4
|
||||
#define MCK_RLIMIT_LOCKS 5
|
||||
#define MCK_RLIMIT_MEMLOCK 6
|
||||
#define MCK_RLIMIT_MSGQUEUE 7
|
||||
#define MCK_RLIMIT_NICE 8
|
||||
#define MCK_RLIMIT_NOFILE 9
|
||||
#define MCK_RLIMIT_NPROC 10
|
||||
#define MCK_RLIMIT_RSS 11
|
||||
#define MCK_RLIMIT_RTPRIO 12
|
||||
#define MCK_RLIMIT_RTTIME 13
|
||||
#define MCK_RLIMIT_SIGPENDING 14
|
||||
#define MCK_RLIMIT_STACK 15
|
||||
|
||||
static int rlimits[] = {
|
||||
#ifdef RLIMIT_AS
|
||||
RLIMIT_AS, MCK_RLIMIT_AS,
|
||||
#endif
|
||||
#ifdef RLIMIT_CORE
|
||||
RLIMIT_CORE, MCK_RLIMIT_CORE,
|
||||
#endif
|
||||
#ifdef RLIMIT_CPU
|
||||
RLIMIT_CPU, MCK_RLIMIT_CPU,
|
||||
#endif
|
||||
#ifdef RLIMIT_DATA
|
||||
RLIMIT_DATA, MCK_RLIMIT_DATA,
|
||||
#endif
|
||||
#ifdef RLIMIT_FSIZE
|
||||
RLIMIT_FSIZE, MCK_RLIMIT_FSIZE,
|
||||
#endif
|
||||
#ifdef RLIMIT_LOCKS
|
||||
RLIMIT_LOCKS, MCK_RLIMIT_LOCKS,
|
||||
#endif
|
||||
#ifdef RLIMIT_MEMLOCK
|
||||
RLIMIT_MEMLOCK, MCK_RLIMIT_MEMLOCK,
|
||||
#endif
|
||||
#ifdef RLIMIT_MSGQUEUE
|
||||
RLIMIT_MSGQUEUE,MCK_RLIMIT_MSGQUEUE,
|
||||
#endif
|
||||
#ifdef RLIMIT_NICE
|
||||
RLIMIT_NICE, MCK_RLIMIT_NICE,
|
||||
#endif
|
||||
#ifdef RLIMIT_NOFILE
|
||||
RLIMIT_NOFILE, MCK_RLIMIT_NOFILE,
|
||||
#endif
|
||||
#ifdef RLIMIT_NPROC
|
||||
RLIMIT_NPROC, MCK_RLIMIT_NPROC,
|
||||
#endif
|
||||
#ifdef RLIMIT_RSS
|
||||
RLIMIT_RSS, MCK_RLIMIT_RSS,
|
||||
#endif
|
||||
#ifdef RLIMIT_RTPRIO
|
||||
RLIMIT_RTPRIO, MCK_RLIMIT_RTPRIO,
|
||||
#endif
|
||||
#ifdef RLIMIT_RTTIME
|
||||
RLIMIT_RTTIME, MCK_RLIMIT_RTTIME,
|
||||
#endif
|
||||
#ifdef RLIMIT_SIGPENDING
|
||||
RLIMIT_SIGPENDING,MCK_RLIMIT_SIGPENDING,
|
||||
#endif
|
||||
#ifdef RLIMIT_STACK
|
||||
RLIMIT_STACK, MCK_RLIMIT_STACK,
|
||||
#endif
|
||||
};
|
||||
|
||||
char dev[64];
|
||||
|
||||
@@ -952,7 +1157,6 @@ int main(int argc, char **argv)
|
||||
unsigned long lcur;
|
||||
unsigned long lmax;
|
||||
int target_core = 0;
|
||||
int mcosid = 0;
|
||||
int opt;
|
||||
char path[1024];
|
||||
char *shell = NULL;
|
||||
@@ -1056,7 +1260,9 @@ int main(int argc, char **argv)
|
||||
if (shell) {
|
||||
argv[optind] = path;
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < sizeof(rlimits) / sizeof(int); i += 2)
|
||||
getrlimit(rlimits[i], &desc->rlimit[rlimits[i + 1]]);
|
||||
desc->envs_len = envs_len;
|
||||
desc->envs = envs;
|
||||
//print_flat(envs);
|
||||
@@ -1091,8 +1297,8 @@ int main(int argc, char **argv)
|
||||
rlim_stack.rlim_cur = lcur;
|
||||
rlim_stack.rlim_max = lmax;
|
||||
}
|
||||
desc->rlimit_stack_cur = rlim_stack.rlim_cur;
|
||||
desc->rlimit_stack_max = rlim_stack.rlim_max;
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
|
||||
|
||||
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
|
||||
if(ncpu == -1){
|
||||
@@ -1173,7 +1379,7 @@ int main(int argc, char **argv)
|
||||
|
||||
init_sigaction();
|
||||
|
||||
init_worker_threads(fd, mcosid);
|
||||
init_worker_threads(fd);
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_START_IMAGE, (unsigned long)desc) != 0) {
|
||||
perror("exec");
|
||||
@@ -1244,13 +1450,13 @@ static void
|
||||
kill_thread(unsigned long cpu)
|
||||
{
|
||||
if(cpu >= 0 && cpu < ncpu){
|
||||
pthread_kill(thread_data[cpu].thread_id, SIGINT);
|
||||
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
|
||||
}
|
||||
else{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpu; ++i) {
|
||||
pthread_kill(thread_data[i].thread_id, SIGINT);
|
||||
pthread_kill(thread_data[i].thread_id, LOCALSIG);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1351,7 +1557,32 @@ int close_cloexec_fds(int mcos_fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
|
||||
char *
|
||||
chgpath(char *in, char *buf)
|
||||
{
|
||||
char *fn = in;
|
||||
struct stat sb;
|
||||
|
||||
if (!strncmp(fn, "/proc/self/", 11)){
|
||||
sprintf(buf, "/proc/mcos%d/%d/%s", mcosid, getpid(), fn + 11);
|
||||
fn = buf;
|
||||
}
|
||||
else if(!strncmp(fn, "/proc/", 6)){
|
||||
sprintf(buf, "/proc/mcos%d/%s", mcosid, fn + 6);
|
||||
fn = buf;
|
||||
}
|
||||
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
|
||||
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
|
||||
}
|
||||
else
|
||||
return in;
|
||||
|
||||
if(stat(fn, &sb) == -1)
|
||||
return in;
|
||||
return fn;
|
||||
}
|
||||
|
||||
int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
{
|
||||
struct syscall_wait_desc w;
|
||||
long ret;
|
||||
@@ -1389,14 +1620,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
|
||||
}
|
||||
__dprintf("open: %s\n", pathbuf);
|
||||
|
||||
fn = pathbuf;
|
||||
if(!strncmp(fn, "/proc/", 6)){
|
||||
sprintf(tmpbuf, "/proc/mcos%d/%s", mcosid, fn + 6);
|
||||
fn = tmpbuf;
|
||||
}
|
||||
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
|
||||
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
|
||||
}
|
||||
fn = chgpath(pathbuf, tmpbuf);
|
||||
|
||||
ret = open(fn, w.sr.args[1], w.sr.args[2]);
|
||||
SET_ERR(ret);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
@@ -1505,113 +1730,157 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
|
||||
}
|
||||
|
||||
case __NR_fork: {
|
||||
int child;
|
||||
int sync_pipe_fd[2];
|
||||
char sync_msg;
|
||||
struct fork_sync *fs;
|
||||
struct fork_sync_container *fsc;
|
||||
struct fork_sync_container *fp;
|
||||
struct fork_sync_container *fb;
|
||||
int rc = -1;
|
||||
pid_t pid;
|
||||
|
||||
if (pipe(sync_pipe_fd) != 0) {
|
||||
fprintf(stderr, "fork(): error creating sync pipe\n");
|
||||
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
|
||||
fsc = malloc(sizeof(struct fork_sync_container));
|
||||
memset(fsc, '\0', sizeof(struct fork_sync_container));
|
||||
pthread_mutex_lock(&fork_sync_mutex);
|
||||
fsc->next = fork_sync_top;
|
||||
fork_sync_top = fsc;
|
||||
pthread_mutex_unlock(&fork_sync_mutex);
|
||||
fsc->fs = fs = mmap(NULL, sizeof(struct fork_sync),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if(fs == (void *)-1){
|
||||
goto fork_err;
|
||||
}
|
||||
|
||||
memset(fs, '\0', sizeof(struct fork_sync));
|
||||
sem_init(&fs->sem, 1, 0);
|
||||
|
||||
pid = fork();
|
||||
|
||||
switch (pid) {
|
||||
/* Error */
|
||||
case -1:
|
||||
fprintf(stderr, "fork(): error forking child process\n");
|
||||
rc = -errno;
|
||||
break;
|
||||
|
||||
/* Child process */
|
||||
case 0: {
|
||||
int i;
|
||||
int ret = 1;
|
||||
struct newprocess_desc npdesc;
|
||||
|
||||
ischild = 1;
|
||||
/* Reopen device fd */
|
||||
close(fd);
|
||||
fd = open(dev, O_RDWR);
|
||||
if (fd < 0) {
|
||||
fs->status = -errno;
|
||||
fprintf(stderr, "ERROR: opening %s\n", dev);
|
||||
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
/* Reinit signals and syscall threads */
|
||||
init_sigaction();
|
||||
init_worker_threads(fd);
|
||||
|
||||
__dprintf("pid(%d): signals and syscall threads OK\n",
|
||||
getpid());
|
||||
|
||||
/* Hold executable also in the child process */
|
||||
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
|
||||
!= 0) {
|
||||
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
|
||||
exec_path, ret, fd);
|
||||
fs->status = -errno;
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
fork_child_sync_pipe:
|
||||
sem_post(&fs->sem);
|
||||
if (fs->status)
|
||||
exit(1);
|
||||
|
||||
for (fp = fork_sync_top; fp;) {
|
||||
fb = fp->next;
|
||||
if (fp->fs)
|
||||
munmap(fp->fs, sizeof(struct fork_sync));
|
||||
free(fp);
|
||||
fp = fb;
|
||||
}
|
||||
fork_sync_top = NULL;
|
||||
pthread_mutex_init(&fork_sync_mutex, NULL);
|
||||
|
||||
npdesc.pid = getpid();
|
||||
ioctl(fd, MCEXEC_UP_NEW_PROCESS, &npdesc);
|
||||
|
||||
/* TODO: does the forked thread run in a pthread context? */
|
||||
for (i = 0; i <= ncpu; ++i) {
|
||||
pthread_join(thread_data[i].thread_id, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parent */
|
||||
default:
|
||||
fs->pid = pid;
|
||||
while ((rc = sem_trywait(&fs->sem)) == -1 && (errno == EAGAIN || errno == EINTR)) {
|
||||
int st;
|
||||
int wrc;
|
||||
|
||||
wrc = waitpid(pid, &st, WNOHANG);
|
||||
if(wrc == pid) {
|
||||
fs->status = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
if (fs->status != 0) {
|
||||
fprintf(stderr, "fork(): error with child process after fork\n");
|
||||
rc = fs->status;
|
||||
break;
|
||||
}
|
||||
|
||||
rc = pid;
|
||||
break;
|
||||
}
|
||||
|
||||
child = fork();
|
||||
|
||||
switch (child) {
|
||||
/* Error */
|
||||
case -1:
|
||||
fprintf(stderr, "fork(): error forking child process\n");
|
||||
close(sync_pipe_fd[0]);
|
||||
close(sync_pipe_fd[1]);
|
||||
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
/* Child process */
|
||||
case 0: {
|
||||
int i;
|
||||
int ret = 1;
|
||||
|
||||
ischild = 1;
|
||||
/* Reopen device fd */
|
||||
close(fd);
|
||||
fd = open(dev, O_RDWR);
|
||||
if (fd < 0) {
|
||||
/* TODO: tell parent something went wrong? */
|
||||
fprintf(stderr, "ERROR: opening %s\n", dev);
|
||||
|
||||
/* Tell parent something went wrong */
|
||||
sync_msg = 1;
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
/* Reinit signals and syscall threads */
|
||||
init_sigaction();
|
||||
init_worker_threads(fd, mcosid);
|
||||
|
||||
__dprintf("pid(%d): signals and syscall threads OK\n",
|
||||
getpid());
|
||||
|
||||
/* Hold executable also in the child process */
|
||||
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
|
||||
!= 0) {
|
||||
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
|
||||
exec_path, ret, fd);
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
/* Tell parent everything went OK */
|
||||
sync_msg = 0;
|
||||
fork_child_sync_pipe:
|
||||
if (write(sync_pipe_fd[1], &sync_msg, 1) != 1) {
|
||||
fprintf(stderr, "ERROR: writing sync pipe\n");
|
||||
goto fork_child_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
fork_child_out:
|
||||
close(sync_pipe_fd[0]);
|
||||
close(sync_pipe_fd[1]);
|
||||
|
||||
/* TODO: does the forked thread run in a pthread context? */
|
||||
for (i = 0; i <= ncpu; ++i) {
|
||||
pthread_join(thread_data[i].thread_id, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parent */
|
||||
default:
|
||||
|
||||
if (read(sync_pipe_fd[0], &sync_msg, 1) != 1) {
|
||||
fprintf(stderr, "fork(): error reading sync message\n");
|
||||
child = -1;
|
||||
goto sync_out;
|
||||
}
|
||||
|
||||
if (sync_msg != 0) {
|
||||
fprintf(stderr, "fork(): error with child process after fork\n");
|
||||
child = -1;
|
||||
goto sync_out;
|
||||
}
|
||||
|
||||
sync_out:
|
||||
close(sync_pipe_fd[0]);
|
||||
close(sync_pipe_fd[1]);
|
||||
do_syscall_return(fd, cpu, child, 0, 0, 0, 0);
|
||||
sem_destroy(&fs->sem);
|
||||
munmap(fs, sizeof(struct fork_sync));
|
||||
fork_err:
|
||||
pthread_mutex_lock(&fork_sync_mutex);
|
||||
for(fp = fork_sync_top, fb = NULL; fp; fb = fp, fp = fp->next)
|
||||
if(fp == fsc)
|
||||
break;
|
||||
if(fp){
|
||||
if(fb)
|
||||
fb->next = fsc->next;
|
||||
else
|
||||
fork_sync_top = fsc->next;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&fork_sync_mutex);
|
||||
do_syscall_return(fd, cpu, rc, 0, 0, 0, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
case __NR_wait4: {
|
||||
int status;
|
||||
int ret;
|
||||
pid_t pid = w.sr.args[0];
|
||||
int options = w.sr.args[2];
|
||||
siginfo_t info;
|
||||
int opt;
|
||||
|
||||
if ((ret = waitpid(pid, &status, 0)) != pid) {
|
||||
fprintf(stderr, "ERROR: waiting for %lu\n", w.sr.args[0]);
|
||||
opt = WEXITED | (options & WNOWAIT);
|
||||
memset(&info, '\0', sizeof info);
|
||||
while((ret = waitid(P_PID, pid, &info, opt)) == -1 &&
|
||||
errno == EINTR);
|
||||
if(ret == 0){
|
||||
ret = info.si_pid;
|
||||
}
|
||||
|
||||
if(ret != pid) {
|
||||
fprintf(stderr, "ERROR: waiting for %lu rc=%d errno=%d\n", w.sr.args[0], ret, errno);
|
||||
}
|
||||
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
@@ -1747,6 +2016,32 @@ return_execve2:
|
||||
break;
|
||||
}
|
||||
|
||||
case __NR_signalfd4:
|
||||
ret = act_signalfd4(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_rt_sigaction:
|
||||
act_sigaction(&w);
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_rt_sigprocmask:
|
||||
act_sigprocmask(&w);
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setfsuid:
|
||||
if(w.sr.args[1] == 1){
|
||||
ioctl(fd, MCEXEC_UP_GET_CRED, w.sr.args[0]);
|
||||
ret = 0;
|
||||
}
|
||||
else{
|
||||
ret = setfsuid(w.sr.args[0]);
|
||||
}
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_close:
|
||||
if(w.sr.args[0] == fd)
|
||||
ret = -EBADF;
|
||||
@@ -1756,8 +2051,8 @@ return_execve2:
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = do_generic_syscall(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
ret = do_generic_syscall(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ -g
|
||||
LDFLAGS += -e arch_start
|
||||
IHKOBJ = ihk/ihk.o
|
||||
|
||||
|
||||
16
kernel/ap.c
16
kernel/ap.c
@@ -24,18 +24,21 @@
|
||||
#include <process.h>
|
||||
#include <init.h>
|
||||
#include <march.h>
|
||||
#include <cls.h>
|
||||
|
||||
int num_processors = 1;
|
||||
static volatile int ap_stop = 1;
|
||||
extern void zero_tsc(void);
|
||||
|
||||
static void ap_wait(void)
|
||||
{
|
||||
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
|
||||
|
||||
while (ap_stop) {
|
||||
barrier();
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
zero_tsc();
|
||||
|
||||
kmalloc_init();
|
||||
sched_init();
|
||||
|
||||
@@ -64,8 +67,6 @@ void ap_init(void)
|
||||
|
||||
ihk_mc_init_ap();
|
||||
|
||||
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
|
||||
|
||||
cpu_info = ihk_mc_get_cpu_info();
|
||||
bsp_hw_id = ihk_mc_get_hardware_processor_id();
|
||||
|
||||
@@ -74,18 +75,17 @@ void ap_init(void)
|
||||
return;
|
||||
}
|
||||
|
||||
kprintf("BSP HW ID = %d, ", bsp_hw_id);
|
||||
kprintf("AP Booting :");
|
||||
kprintf("BSP HW ID = %d\n", bsp_hw_id);
|
||||
|
||||
for (i = 0; i < cpu_info->ncpus; i++) {
|
||||
if (cpu_info->hw_ids[i] == bsp_hw_id) {
|
||||
continue;
|
||||
}
|
||||
kprintf("AP Booting: %d (HW ID: %d)\n", i, cpu_info->hw_ids[i]);
|
||||
ihk_mc_boot_cpu(cpu_info->hw_ids[i], (unsigned long)ap_wait);
|
||||
kprintf(" %d", cpu_info->hw_ids[i]);
|
||||
|
||||
num_processors++;
|
||||
}
|
||||
kprintf(" .. Done\n");
|
||||
kprintf("AP Booting: Done\n");
|
||||
}
|
||||
|
||||
|
||||
14
kernel/cls.c
14
kernel/cls.c
@@ -23,6 +23,7 @@
|
||||
extern int num_processors;
|
||||
|
||||
struct cpu_local_var *clv;
|
||||
static int cpu_local_var_initialized = 0;
|
||||
|
||||
void cpu_local_var_init(void)
|
||||
{
|
||||
@@ -33,9 +34,22 @@ void cpu_local_var_init(void)
|
||||
|
||||
clv = allocate_pages(z, IHK_MC_AP_CRITICAL);
|
||||
memset(clv, 0, z * PAGE_SIZE);
|
||||
cpu_local_var_initialized = 1;
|
||||
}
|
||||
|
||||
struct cpu_local_var *get_cpu_local_var(int id)
|
||||
{
|
||||
return clv + id;
|
||||
}
|
||||
|
||||
void preempt_enable(void)
|
||||
{
|
||||
if (cpu_local_var_initialized)
|
||||
--cpu_local_var(no_preempt);
|
||||
}
|
||||
|
||||
void preempt_disable(void)
|
||||
{
|
||||
if (cpu_local_var_initialized)
|
||||
++cpu_local_var(no_preempt);
|
||||
}
|
||||
|
||||
@@ -26,10 +26,14 @@ SECTIONS
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
|
||||
@@ -26,10 +26,14 @@ SECTIONS
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
|
||||
@@ -26,10 +26,14 @@ SECTIONS
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
@@ -39,8 +43,4 @@ SECTIONS
|
||||
. = ALIGN(4096);
|
||||
_end = .;
|
||||
|
||||
/DISCARD/ : {
|
||||
*(.eh_frame)
|
||||
*(.note.gnu.build-id)
|
||||
}
|
||||
}
|
||||
|
||||
2
kernel/config/config.smp-x86
Normal file
2
kernel/config/config.smp-x86
Normal file
@@ -0,0 +1,2 @@
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -T $(SRC)/config/smp-x86.lds
|
||||
45
kernel/config/smp-x86.lds
Normal file
45
kernel/config/smp-x86.lds
Normal file
@@ -0,0 +1,45 @@
|
||||
PHDRS
|
||||
{
|
||||
text PT_LOAD FLAGS(5);
|
||||
data PT_LOAD FLAGS(7);
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
. = 0xffffffff80001000;
|
||||
_head = .;
|
||||
|
||||
.text : {
|
||||
*(.text);
|
||||
} : text
|
||||
|
||||
. = ALIGN(4096);
|
||||
.data : {
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
} :data
|
||||
.rodata : {
|
||||
*(.rodata .rodata.*)
|
||||
} :data
|
||||
|
||||
.vsyscall : ALIGN(0x1000) {
|
||||
vsyscall_page = .;
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
.bss : {
|
||||
*(.bss .bss.*)
|
||||
}
|
||||
. = ALIGN(4096);
|
||||
_end = .;
|
||||
}
|
||||
@@ -28,7 +28,7 @@ void kputs(char *buf)
|
||||
int len = strlen(buf);
|
||||
unsigned long flags;
|
||||
|
||||
flags = ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
|
||||
if (len + kmsg_buf.tail > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
@@ -40,19 +40,19 @@ void kputs(char *buf)
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
}
|
||||
|
||||
#define KPRINTF_LOCAL_BUF_LEN 1024
|
||||
|
||||
int kprintf_lock()
|
||||
unsigned long kprintf_lock(void)
|
||||
{
|
||||
return ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
return __ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
}
|
||||
|
||||
void kprintf_unlock(int irqflags)
|
||||
void kprintf_unlock(unsigned long irqflags)
|
||||
{
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
|
||||
}
|
||||
|
||||
/* Caller must hold kmsg_lock! */
|
||||
@@ -85,7 +85,7 @@ int kprintf(const char *format, ...)
|
||||
unsigned long flags;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
flags = ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
|
||||
/* Copy into the local buf */
|
||||
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
|
||||
@@ -101,7 +101,7 @@ int kprintf(const char *format, ...)
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* memory mapped device pager client
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -32,9 +33,18 @@
|
||||
#include <pager.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <process.h>
|
||||
|
||||
//#define DEBUG_PRINT_DEVOBJ
|
||||
|
||||
#ifdef DEBUG_PRINT_DEVOBJ
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define dkprintf(...)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct devobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
@@ -76,7 +86,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
struct devobj *obj = NULL;
|
||||
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
kprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
||||
dkprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
||||
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
|
||||
if (npages > MAX_PAGES_IN_DEVOBJ) {
|
||||
error = -EFBIG;
|
||||
@@ -111,8 +121,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
|
||||
goto out;
|
||||
}
|
||||
kprintf("devobj_create:handle: %lx\n", result.handle);
|
||||
kprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
||||
dkprintf("devobj_create:handle: %lx\n", result.handle);
|
||||
dkprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
||||
|
||||
obj->memobj.ops = &devobj_ops;
|
||||
obj->memobj.flags = MF_HAS_PAGER;
|
||||
@@ -134,7 +144,7 @@ out:
|
||||
}
|
||||
kfree(obj);
|
||||
}
|
||||
kprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
||||
dkprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -142,7 +152,7 @@ static void devobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
|
||||
kprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
memobj_lock(&obj->memobj);
|
||||
++obj->ref;
|
||||
memobj_unlock(&obj->memobj);
|
||||
@@ -155,7 +165,7 @@ static void devobj_release(struct memobj *memobj)
|
||||
struct devobj *free_obj = NULL;
|
||||
uintptr_t handle;
|
||||
|
||||
kprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
@@ -187,12 +197,12 @@ static void devobj_release(struct memobj *memobj)
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
kprintf("devobj_release(%p %lx):free %p\n",
|
||||
dkprintf("devobj_release(%p %lx):free %p\n",
|
||||
obj, handle, free_obj);
|
||||
return;
|
||||
}
|
||||
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
|
||||
{
|
||||
const off_t pgoff = off >> PAGE_SHIFT;
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
@@ -202,7 +212,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
ihk_mc_user_context_t ctx;
|
||||
int ix;
|
||||
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
|
||||
|
||||
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
|
||||
error = -EFBIG;
|
||||
@@ -210,7 +220,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
goto out;
|
||||
}
|
||||
ix = pgoff - obj->pfn_pgoff;
|
||||
kprintf("ix: %ld\n", ix);
|
||||
dkprintf("ix: %ld\n", ix);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
pfn = obj->pfn_table[ix];
|
||||
@@ -230,12 +240,20 @@ kprintf("ix: %ld\n", ix);
|
||||
|
||||
if (pfn & PFN_PRESENT) {
|
||||
/* convert remote physical into local physical */
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
attr = pfn & ~PFN_PFN;
|
||||
|
||||
/* TODO: do an arch dependent PTE to mapping flag conversion
|
||||
* instead of this inline check, also, we rely on having the
|
||||
* same PAT config as Linux here.. */
|
||||
if ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)) {
|
||||
*flag |= VR_WRITE_COMBINED;
|
||||
}
|
||||
|
||||
pfn = ihk_mc_map_memory(NULL, (pfn & PFN_PFN), PAGE_SIZE);
|
||||
pfn &= PFN_PFN;
|
||||
pfn |= attr;
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
}
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
@@ -253,6 +271,6 @@ kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->
|
||||
*physp = pfn & PFN_PFN;
|
||||
|
||||
out:
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
static ihk_spinlock_t fileobj_list_lock = SPIN_LOCK_UNLOCKED;
|
||||
@@ -46,6 +46,7 @@ static memobj_ref_func_t fileobj_ref;
|
||||
static memobj_get_page_func_t fileobj_get_page;
|
||||
static memobj_copy_page_func_t fileobj_copy_page;
|
||||
static memobj_flush_page_func_t fileobj_flush_page;
|
||||
static memobj_invalidate_page_func_t fileobj_invalidate_page;
|
||||
|
||||
static struct memobj_ops fileobj_ops = {
|
||||
.release = &fileobj_release,
|
||||
@@ -53,6 +54,7 @@ static struct memobj_ops fileobj_ops = {
|
||||
.get_page = &fileobj_get_page,
|
||||
.copy_page = &fileobj_copy_page,
|
||||
.flush_page = &fileobj_flush_page,
|
||||
.invalidate_page = &fileobj_invalidate_page,
|
||||
};
|
||||
|
||||
static struct fileobj *to_fileobj(struct memobj *memobj)
|
||||
@@ -383,9 +385,9 @@ out:
|
||||
return;
|
||||
}
|
||||
|
||||
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
|
||||
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct thread *proc = cpu_local_var(current);
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
int error;
|
||||
void *virt = NULL;
|
||||
@@ -577,3 +579,33 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
memobj_lock(&obj->memobj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
int error;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n",
|
||||
memobj, phys, pgsize);
|
||||
|
||||
if (!(page = phys_to_page(phys))
|
||||
|| !(page = page_list_lookup(obj, page->offset))) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ihk_atomic_read(&page->count) == 1) {
|
||||
if (page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys),
|
||||
pgsize/PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n",
|
||||
memobj, phys, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -76,9 +76,11 @@
|
||||
#ifdef DEBUG_PRINT_FUTEX
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
extern struct sigpending *hassigpending(struct thread *thread);
|
||||
|
||||
int futex_cmpxchg_enabled;
|
||||
|
||||
/**
|
||||
@@ -103,7 +105,7 @@ int futex_cmpxchg_enabled;
|
||||
struct futex_q {
|
||||
struct plist_node list;
|
||||
|
||||
struct process *task;
|
||||
struct thread *task;
|
||||
ihk_spinlock_t *lock_ptr;
|
||||
union futex_key key;
|
||||
union futex_key *requeue_pi_key;
|
||||
@@ -243,7 +245,7 @@ static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
*/
|
||||
static void wake_futex(struct futex_q *q)
|
||||
{
|
||||
struct process *p = q->task;
|
||||
struct thread *p = q->task;
|
||||
|
||||
/*
|
||||
* We set q->lock_ptr = NULL _before_ we wake up the task. If
|
||||
@@ -263,7 +265,7 @@ static void wake_futex(struct futex_q *q)
|
||||
barrier();
|
||||
q->lock_ptr = NULL;
|
||||
|
||||
sched_wakeup_process(p, PS_NORMAL);
|
||||
sched_wakeup_thread(p, PS_NORMAL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -658,7 +660,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
* queue_me() calls spin_unlock() upon completion, both serializing
|
||||
* access to the hash list and forcing another memory barrier.
|
||||
*/
|
||||
xchg4(&(cpu_local_var(current)->ftn->status), PS_INTERRUPTIBLE);
|
||||
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
|
||||
queue_me(q, hb);
|
||||
|
||||
if (!plist_node_empty(&q->list)) {
|
||||
@@ -674,7 +676,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
}
|
||||
|
||||
/* This does not need to be serialized */
|
||||
cpu_local_var(current)->ftn->status = PS_RUNNING;
|
||||
cpu_local_var(current)->status = PS_RUNNING;
|
||||
|
||||
return time_remain;
|
||||
}
|
||||
@@ -775,6 +777,11 @@ retry:
|
||||
if (timeout && !time_remain)
|
||||
goto out_put_key;
|
||||
|
||||
if(hassigpending(cpu_local_var(current))){
|
||||
ret = -EINTR;
|
||||
goto out_put_key;
|
||||
}
|
||||
|
||||
/* RIKEN: no signals */
|
||||
put_futex_key(fshared, &q.key);
|
||||
goto retry;
|
||||
|
||||
184
kernel/host.c
184
kernel/host.c
@@ -28,20 +28,22 @@
|
||||
#include <process.h>
|
||||
#include <page.h>
|
||||
#include <mman.h>
|
||||
#include <init.h>
|
||||
#include <kmalloc.h>
|
||||
|
||||
//#define DEBUG_PRINT_HOST
|
||||
|
||||
#ifdef DEBUG_PRINT_HOST
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
void check_mapping_for_proc(struct process *proc, unsigned long addr)
|
||||
void check_mapping_for_proc(struct thread *thread, unsigned long addr)
|
||||
{
|
||||
unsigned long __phys;
|
||||
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, (void*)addr, &__phys)) {
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void*)addr, &__phys)) {
|
||||
kprintf("check_map: no mapping for 0x%lX\n", addr);
|
||||
}
|
||||
else {
|
||||
@@ -58,7 +60,7 @@ void check_mapping_for_proc(struct process *proc, unsigned long addr)
|
||||
* NOTE: if args, args_len, envs, envs_len are zero,
|
||||
* the function constructs them based on the descriptor
|
||||
*/
|
||||
int prepare_process_ranges_args_envs(struct process *proc,
|
||||
int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
struct program_load_desc *pn,
|
||||
struct program_load_desc *p,
|
||||
enum ihk_mc_pt_attribute attr,
|
||||
@@ -69,7 +71,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
unsigned long args_envs_p, args_envs_rp;
|
||||
unsigned long s, e, up;
|
||||
char **argv;
|
||||
int i, n, argc, envc, args_envs_npages;
|
||||
char **a;
|
||||
int i, n, argc, envc, args_envs_npages, l;
|
||||
char **env;
|
||||
int range_npages;
|
||||
void *up_v;
|
||||
@@ -77,6 +80,10 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
unsigned long flags;
|
||||
uintptr_t interp_obase = -1;
|
||||
uintptr_t interp_nbase = -1;
|
||||
size_t map_size;
|
||||
struct process *proc = thread->proc;
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct address_space *as = vm->address_space;
|
||||
|
||||
n = p->num_sections;
|
||||
|
||||
@@ -85,7 +92,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) {
|
||||
interp_obase = pn->sections[i].vaddr;
|
||||
interp_obase -= (interp_obase % pn->interp_align);
|
||||
interp_nbase = proc->vm->region.map_start;
|
||||
interp_nbase = vm->region.map_start;
|
||||
interp_nbase = (interp_nbase + pn->interp_align - 1)
|
||||
& ~(pn->interp_align - 1);
|
||||
}
|
||||
@@ -110,7 +117,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
}
|
||||
|
||||
up = virt_to_phys(up_v);
|
||||
if (add_process_memory_range(proc, s, e, up, flags, NULL, 0) != 0) {
|
||||
if (add_process_memory_range(vm, s, e, up, flags, NULL, 0) != 0) {
|
||||
ihk_mc_free_pages(up_v, range_npages);
|
||||
kprintf("ERROR: adding memory range for ELF section %i\n", i);
|
||||
goto err;
|
||||
@@ -119,14 +126,14 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
{
|
||||
void *_virt = (void *)s;
|
||||
unsigned long _phys;
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if (ihk_mc_pt_virt_to_phys(as->page_table,
|
||||
_virt, &_phys)) {
|
||||
kprintf("ERROR: no mapping for 0x%lX\n", _virt);
|
||||
}
|
||||
for (_virt = (void *)s + PAGE_SIZE;
|
||||
(unsigned long)_virt < e; _virt += PAGE_SIZE) {
|
||||
unsigned long __phys;
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if (ihk_mc_pt_virt_to_phys(as->page_table,
|
||||
_virt, &__phys)) {
|
||||
kprintf("ERROR: no mapping for 0x%lX\n", _virt);
|
||||
panic("mapping");
|
||||
@@ -145,23 +152,23 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
|
||||
/* TODO: Maybe we need flag */
|
||||
if (pn->sections[i].interp) {
|
||||
proc->vm->region.map_end = e;
|
||||
vm->region.map_end = e;
|
||||
}
|
||||
else if (i == 0) {
|
||||
proc->vm->region.text_start = s;
|
||||
proc->vm->region.text_end = e;
|
||||
vm->region.text_start = s;
|
||||
vm->region.text_end = e;
|
||||
}
|
||||
else if (i == 1) {
|
||||
proc->vm->region.data_start = s;
|
||||
proc->vm->region.data_end = e;
|
||||
vm->region.data_start = s;
|
||||
vm->region.data_end = e;
|
||||
}
|
||||
else {
|
||||
proc->vm->region.data_start =
|
||||
(s < proc->vm->region.data_start ?
|
||||
s : proc->vm->region.data_start);
|
||||
proc->vm->region.data_end =
|
||||
(e > proc->vm->region.data_end ?
|
||||
e : proc->vm->region.data_end);
|
||||
vm->region.data_start =
|
||||
(s < vm->region.data_start ?
|
||||
s : vm->region.data_start);
|
||||
vm->region.data_end =
|
||||
(e > vm->region.data_end ?
|
||||
e : vm->region.data_end);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,32 +176,17 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
pn->entry -= interp_obase;
|
||||
pn->entry += interp_nbase;
|
||||
p->entry = pn->entry;
|
||||
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER,
|
||||
pn->entry);
|
||||
ihk_mc_modify_user_context(thread->uctx,
|
||||
IHK_UCR_PROGRAM_COUNTER,
|
||||
pn->entry);
|
||||
}
|
||||
|
||||
#if 1
|
||||
/*
|
||||
Fix for the problem where brk grows to hit .bss section
|
||||
when using dynamically linked executables.
|
||||
Test code resides in /home/takagi/project/mpich/src/brk_icc_mic.
|
||||
This is because when using
|
||||
ld.so (i.e. using shared objects), mckernel/kernel/host.c sets "brk" to
|
||||
the end of .bss of ld.so (e.g. 0x21f000), and then ld.so places a
|
||||
main-program after this (e.g. 0x400000), so "brk" will hit .bss
|
||||
eventually.
|
||||
*/
|
||||
proc->vm->region.brk_start = proc->vm->region.brk_end =
|
||||
(USER_END / 4) & LARGE_PAGE_MASK;
|
||||
#else
|
||||
proc->vm->region.brk_start = proc->vm->region.brk_end =
|
||||
proc->vm->region.data_end;
|
||||
#endif
|
||||
vm->region.brk_start = vm->region.brk_end = vm->region.data_end;
|
||||
|
||||
/* Map, copy and update args and envs */
|
||||
flags = VR_PROT_READ | VR_PROT_WRITE;
|
||||
flags |= VRFLAG_PROT_TO_MAXPROT(flags);
|
||||
addr = proc->vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
|
||||
addr = vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
|
||||
e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT;
|
||||
|
||||
if((args_envs = ihk_mc_alloc_pages(ARGENV_PAGE_COUNT, IHK_MC_AP_NOWAIT)) == NULL){
|
||||
@@ -203,7 +195,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
}
|
||||
args_envs_p = virt_to_phys(args_envs);
|
||||
|
||||
if(add_process_memory_range(proc, addr, e, args_envs_p,
|
||||
if(add_process_memory_range(vm, addr, e, args_envs_p,
|
||||
flags, NULL, 0) != 0){
|
||||
ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT);
|
||||
kprintf("ERROR: adding memory range for args/envs\n");
|
||||
@@ -217,7 +209,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
/* Only map remote address if it wasn't specified as an argument */
|
||||
if (!args) {
|
||||
// Map in remote physical addr of args and copy it
|
||||
args_envs_npages = (p->args_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
map_size = ((uintptr_t)p->args & (PAGE_SIZE - 1)) + p->args_len;
|
||||
args_envs_npages = (map_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
dkprintf("args_envs_npages: %d\n", args_envs_npages);
|
||||
args_envs_rp = ihk_mc_map_memory(NULL,
|
||||
(unsigned long)p->args, p->args_len);
|
||||
@@ -250,7 +243,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
/* Only map remote address if it wasn't specified as an argument */
|
||||
if (!envs) {
|
||||
// Map in remote physical addr of envs and copy it after args
|
||||
args_envs_npages = (p->envs_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
map_size = ((uintptr_t)p->envs & (PAGE_SIZE - 1)) + p->envs_len;
|
||||
args_envs_npages = (map_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
dkprintf("args_envs_npages: %d\n", args_envs_npages);
|
||||
args_envs_rp = ihk_mc_map_memory(NULL, (unsigned long)p->envs,
|
||||
p->envs_len);
|
||||
@@ -284,13 +278,21 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
dkprintf("argc: %d\n", argc);
|
||||
|
||||
argv = (char **)(args_envs + (sizeof(int)));
|
||||
while (*argv) {
|
||||
char **_argv = argv;
|
||||
dkprintf("%s\n", args_envs + (unsigned long)*argv);
|
||||
*argv = (char *)addr + (unsigned long)*argv; // Process' address space!
|
||||
argv = ++_argv;
|
||||
if(proc->saved_cmdline){
|
||||
kfree(proc->saved_cmdline);
|
||||
proc->saved_cmdline_len = 0;
|
||||
}
|
||||
for(a = argv, l = 0; *a; a++)
|
||||
l += strlen(args_envs + (unsigned long)*a) + 1;
|
||||
proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT);
|
||||
if(!proc->saved_cmdline)
|
||||
goto err;
|
||||
proc->saved_cmdline_len = l;
|
||||
for(a = argv, l = 0; *a; a++){
|
||||
strcpy(proc->saved_cmdline + l, args_envs + (unsigned long)*a);
|
||||
l += strlen(args_envs + (unsigned long)*a) + 1;
|
||||
*a = (char *)addr + (unsigned long)*a; // Process' address space!
|
||||
}
|
||||
argv = (char **)(args_envs + (sizeof(int)));
|
||||
|
||||
envc = *((int*)(args_envs + p->args_len));
|
||||
dkprintf("envc: %d\n", envc);
|
||||
@@ -306,10 +308,10 @@ int prepare_process_ranges_args_envs(struct process *proc,
|
||||
|
||||
dkprintf("env OK\n");
|
||||
|
||||
p->rprocess = (unsigned long)proc;
|
||||
p->rpgtable = virt_to_phys(proc->vm->page_table);
|
||||
|
||||
if (init_process_stack(proc, pn, argc, argv, envc, env) != 0) {
|
||||
p->rprocess = (unsigned long)thread;
|
||||
p->rpgtable = virt_to_phys(as->page_table);
|
||||
|
||||
if (init_process_stack(thread, pn, argc, argv, envc, env) != 0) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
@@ -328,7 +330,9 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
unsigned long phys, sz;
|
||||
struct program_load_desc *p, *pn;
|
||||
int npages, n;
|
||||
struct thread *thread;
|
||||
struct process *proc;
|
||||
struct process_vm *vm;
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
|
||||
attr = PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER;
|
||||
@@ -355,45 +359,57 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
memcpy_long(pn, p, sizeof(struct program_load_desc)
|
||||
+ sizeof(struct program_image_section) * n);
|
||||
|
||||
if((proc = create_process(p->entry)) == NULL){
|
||||
if((thread = create_thread(p->entry)) == NULL){
|
||||
ihk_mc_free(pn);
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
return -ENOMEM;
|
||||
}
|
||||
proc->ftn->pid = pn->pid;
|
||||
proc->ftn->pgid = pn->pgid;
|
||||
proc->vm->region.user_start = pn->user_start;
|
||||
proc->vm->region.user_end = pn->user_end;
|
||||
proc->vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
|
||||
proc->vm->region.map_end = proc->vm->region.map_start;
|
||||
proc->rlimit_stack.rlim_cur = pn->rlimit_stack_cur;
|
||||
proc->rlimit_stack.rlim_max = pn->rlimit_stack_max;
|
||||
proc = thread->proc;
|
||||
vm = thread->vm;
|
||||
|
||||
proc->pid = pn->pid;
|
||||
proc->vm->address_space->pids[0] = pn->pid;
|
||||
proc->pgid = pn->pgid;
|
||||
proc->ruid = pn->cred[0];
|
||||
proc->euid = pn->cred[1];
|
||||
proc->suid = pn->cred[2];
|
||||
proc->fsuid = pn->cred[3];
|
||||
proc->rgid = pn->cred[4];
|
||||
proc->egid = pn->cred[5];
|
||||
proc->sgid = pn->cred[6];
|
||||
proc->fsgid = pn->cred[7];
|
||||
|
||||
vm->region.user_start = pn->user_start;
|
||||
vm->region.user_end = pn->user_end;
|
||||
vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
|
||||
vm->region.map_end = proc->vm->region.map_start;
|
||||
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
|
||||
|
||||
/* TODO: Clear it at the proper timing */
|
||||
cpu_local_var(scp).post_idx = 0;
|
||||
|
||||
if (prepare_process_ranges_args_envs(proc, pn, p, attr,
|
||||
if (prepare_process_ranges_args_envs(thread, pn, p, attr,
|
||||
NULL, 0, NULL, 0) != 0) {
|
||||
kprintf("error: preparing process ranges, args, envs, stack\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
|
||||
proc->vm->page_table);
|
||||
vm->address_space->page_table);
|
||||
|
||||
ihk_mc_free(pn);
|
||||
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
flush_tlb();
|
||||
|
||||
return 0;
|
||||
err:
|
||||
ihk_mc_free(pn);
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
free_process_memory(proc);
|
||||
destroy_process(proc);
|
||||
destroy_thread(thread);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -467,13 +483,15 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
|
||||
ihk_ikc_send(c, packet, 0);
|
||||
}
|
||||
|
||||
extern unsigned long do_kill(int, int, int, struct siginfo *);
|
||||
extern void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
|
||||
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
|
||||
extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
|
||||
|
||||
extern void process_procfs_request(unsigned long rarg);
|
||||
extern int memcheckall();
|
||||
extern int freecheck(int runcount);
|
||||
extern int runcount;
|
||||
extern void terminate_host(int pid);
|
||||
extern void debug_log(long);
|
||||
|
||||
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
void *__packet, void *ihk_os)
|
||||
@@ -481,6 +499,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
struct ikc_scd_packet *packet = __packet;
|
||||
struct ikc_scd_packet pckt;
|
||||
int rc;
|
||||
struct thread *thread;
|
||||
struct process *proc;
|
||||
struct mcctrl_signal {
|
||||
int cond;
|
||||
@@ -490,6 +509,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
struct siginfo info;
|
||||
} *sp, info;
|
||||
unsigned long pp;
|
||||
int cpuid;
|
||||
|
||||
switch (packet->msg) {
|
||||
case SCD_MSG_INIT_CHANNEL_ACKED:
|
||||
@@ -521,13 +541,23 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
return 0;
|
||||
|
||||
case SCD_MSG_SCHEDULE_PROCESS:
|
||||
cpuid = obtain_clone_cpuid();
|
||||
if(cpuid == -1){
|
||||
kprintf("No CPU available\n");
|
||||
return -1;
|
||||
}
|
||||
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
|
||||
proc = (struct process *)packet->arg;
|
||||
thread = (struct thread *)packet->arg;
|
||||
proc = thread->proc;
|
||||
|
||||
settid(proc, 0, ihk_mc_get_processor_id(), -1);
|
||||
runq_add_proc(proc, ihk_mc_get_processor_id());
|
||||
settid(thread, 0, cpuid, -1);
|
||||
proc->status = PS_RUNNING;
|
||||
thread->status = PS_RUNNING;
|
||||
chain_thread(thread);
|
||||
chain_process(proc);
|
||||
runq_add_thread(thread, cpuid);
|
||||
|
||||
//cpu_local_var(next) = (struct process *)packet->arg;
|
||||
//cpu_local_var(next) = (struct thread *)packet->arg;
|
||||
return 0;
|
||||
case SCD_MSG_SEND_SIGNAL:
|
||||
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
|
||||
@@ -541,12 +571,20 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
pckt.arg = packet->arg;
|
||||
syscall_channel_send(c, &pckt);
|
||||
|
||||
rc = do_kill(info.pid, info.tid, info.sig, &info.info);
|
||||
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
|
||||
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
|
||||
return 0;
|
||||
case SCD_MSG_PROCFS_REQUEST:
|
||||
process_procfs_request(packet->arg);
|
||||
return 0;
|
||||
case SCD_MSG_CLEANUP_PROCESS:
|
||||
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
|
||||
terminate_host(packet->pid);
|
||||
return 0;
|
||||
case SCD_MSG_DEBUG_LOG:
|
||||
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
|
||||
debug_log(packet->arg);
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ struct malloc_header {
|
||||
#define CPU_STATUS_DISABLE (0)
|
||||
#define CPU_STATUS_IDLE (1)
|
||||
#define CPU_STATUS_RUNNING (2)
|
||||
#define CPU_STATUS_RESERVED (3)
|
||||
extern ihk_spinlock_t cpu_status_lock;
|
||||
|
||||
#define CPU_FLAG_NEED_RESCHED 0x1U
|
||||
@@ -40,12 +41,14 @@ struct cpu_local_var {
|
||||
struct malloc_header free_list;
|
||||
ihk_spinlock_t free_list_lock;
|
||||
|
||||
struct process idle;
|
||||
struct fork_tree_node idle_ftn;
|
||||
struct thread idle;
|
||||
struct process idle_proc;
|
||||
struct process_vm idle_vm;
|
||||
struct address_space idle_asp;
|
||||
|
||||
ihk_spinlock_t runq_lock;
|
||||
struct process *current;
|
||||
unsigned long runq_irqstate;
|
||||
struct thread *current;
|
||||
struct list_head runq;
|
||||
size_t runq_len;
|
||||
|
||||
@@ -56,6 +59,7 @@ struct cpu_local_var {
|
||||
struct ihk_ikc_channel_desc *syscall_channel2;
|
||||
struct syscall_params scp2;
|
||||
struct ikc_scd_init_param iip2;
|
||||
struct resource_set *resource_set;
|
||||
|
||||
int status;
|
||||
int fs;
|
||||
@@ -66,6 +70,9 @@ struct cpu_local_var {
|
||||
|
||||
ihk_spinlock_t migq_lock;
|
||||
struct list_head migq;
|
||||
int in_interrupt;
|
||||
int no_preempt;
|
||||
int timer_enabled;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
|
||||
|
||||
@@ -14,8 +14,18 @@
|
||||
#define __HEADER_KMALLOC_H
|
||||
|
||||
#include <ihk/mm.h>
|
||||
#include <cls.h>
|
||||
|
||||
#define kmalloc(size, flag) _kmalloc(size, flag, __FILE__, __LINE__)
|
||||
void panic(const char *);
|
||||
int kprintf(const char *format, ...);
|
||||
|
||||
#define kmalloc(size, flag) ({\
|
||||
void *r = _kmalloc(size, flag, __FILE__, __LINE__);\
|
||||
if(r == NULL){\
|
||||
kprintf("kmalloc: out of memory %s:%d no_preempt=%d\n", __FILE__, __LINE__, cpu_local_var(no_preempt)); \
|
||||
}\
|
||||
r;\
|
||||
})
|
||||
#define kfree(ptr) _kfree(ptr, __FILE__, __LINE__)
|
||||
#define memcheck(ptr, msg) _memcheck(ptr, msg, __FILE__, __LINE__, 0)
|
||||
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line);
|
||||
|
||||
@@ -18,11 +18,20 @@
|
||||
#include <ihk/lock.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
#include <shm.h>
|
||||
|
||||
/* begin types.h */
|
||||
typedef int32_t key_t;
|
||||
typedef uint32_t uid_t;
|
||||
typedef uint32_t gid_t;
|
||||
typedef int64_t time_t;
|
||||
typedef int32_t pid_t;
|
||||
/* end types.h */
|
||||
|
||||
enum {
|
||||
/* for memobj.flags */
|
||||
MF_HAS_PAGER = 0x0001,
|
||||
MF_SHMDT_OK = 0x0002,
|
||||
MF_IS_REMOVABLE = 0x0004,
|
||||
};
|
||||
|
||||
struct memobj {
|
||||
@@ -34,9 +43,10 @@ struct memobj {
|
||||
|
||||
typedef void memobj_release_func_t(struct memobj *obj);
|
||||
typedef void memobj_ref_func_t(struct memobj *obj);
|
||||
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
|
||||
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
|
||||
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
|
||||
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
|
||||
struct memobj_ops {
|
||||
memobj_release_func_t * release;
|
||||
@@ -44,6 +54,7 @@ struct memobj_ops {
|
||||
memobj_get_page_func_t * get_page;
|
||||
memobj_copy_page_func_t * copy_page;
|
||||
memobj_flush_page_func_t * flush_page;
|
||||
memobj_invalidate_page_func_t * invalidate_page;
|
||||
};
|
||||
|
||||
static inline void memobj_release(struct memobj *obj)
|
||||
@@ -61,10 +72,10 @@ static inline void memobj_ref(struct memobj *obj)
|
||||
}
|
||||
|
||||
static inline int memobj_get_page(struct memobj *obj, off_t off,
|
||||
int p2align, uintptr_t *physp)
|
||||
int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
if (obj->ops->get_page) {
|
||||
return (*obj->ops->get_page)(obj, off, p2align, physp);
|
||||
return (*obj->ops->get_page)(obj, off, p2align, physp, pflag);
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
@@ -86,6 +97,15 @@ static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t p
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
if (obj->ops->invalidate_page) {
|
||||
return (*obj->ops->invalidate_page)(obj, phys, pgsize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memobj_lock(struct memobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&obj->lock);
|
||||
@@ -101,7 +121,13 @@ static inline int memobj_has_pager(struct memobj *obj)
|
||||
return !!(obj->flags & MF_HAS_PAGER);
|
||||
}
|
||||
|
||||
static inline int memobj_is_removable(struct memobj *obj)
|
||||
{
|
||||
return !!(obj->flags & MF_IS_REMOVABLE);
|
||||
}
|
||||
|
||||
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
|
||||
struct shmid_ds;
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
||||
int zeroobj_create(struct memobj **objp);
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp);
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
* memory management declarations
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2013 Hitachi, Ltd.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -13,6 +15,8 @@
|
||||
#ifndef HEADER_MMAN_H
|
||||
#define HEADER_MMAN_H
|
||||
|
||||
#include <arch/mman.h>
|
||||
|
||||
/*
|
||||
* memory protection
|
||||
*/
|
||||
@@ -32,16 +36,6 @@
|
||||
#define MAP_PRIVATE 0x02
|
||||
#define MAP_FIXED 0x10
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#define MAP_32BIT 0x40
|
||||
#define MAP_GROWSDOWN 0x0100
|
||||
#define MAP_DENYWRITE 0x0800
|
||||
#define MAP_EXECUTABLE 0x1000
|
||||
#define MAP_LOCKED 0x2000
|
||||
#define MAP_NORESERVE 0x4000
|
||||
#define MAP_POPULATE 0x8000
|
||||
#define MAP_NONBLOCK 0x00010000
|
||||
#define MAP_STACK 0x00020000
|
||||
#define MAP_HUGETLB 0x00040000
|
||||
|
||||
/*
|
||||
* memory advice
|
||||
@@ -69,4 +63,11 @@
|
||||
#define MREMAP_MAYMOVE 0x01
|
||||
#define MREMAP_FIXED 0x02
|
||||
|
||||
/*
|
||||
* for msync()
|
||||
*/
|
||||
#define MS_ASYNC 0x01
|
||||
#define MS_INVALIDATE 0x02
|
||||
#define MS_SYNC 0x04
|
||||
|
||||
#endif /* HEADER_MMAN_H */
|
||||
|
||||
@@ -66,4 +66,6 @@ static inline int page_is_multi_mapped(struct page *page)
|
||||
return (ihk_atomic_read(&page->count) > 1);
|
||||
}
|
||||
|
||||
/* Should we take page faults on ANONYMOUS mappings? */
|
||||
extern int anon_on_demand;
|
||||
#endif
|
||||
|
||||
60
kernel/include/prio.h
Normal file
60
kernel/include/prio.h
Normal file
@@ -0,0 +1,60 @@
|
||||
#ifndef _SCHED_PRIO_H
|
||||
#define _SCHED_PRIO_H
|
||||
|
||||
#define MAX_NICE 19
|
||||
#define MIN_NICE -20
|
||||
#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1)
|
||||
|
||||
/*
|
||||
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
|
||||
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
|
||||
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
|
||||
* values are inverted: lower p->prio value means higher priority.
|
||||
*
|
||||
* The MAX_USER_RT_PRIO value allows the actual maximum
|
||||
* RT priority to be separate from the value exported to
|
||||
* user-space. This allows kernel threads to set their
|
||||
* priority to a value higher than any user task. Note:
|
||||
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
|
||||
*/
|
||||
|
||||
#define MAX_USER_RT_PRIO 100
|
||||
#define MAX_RT_PRIO MAX_USER_RT_PRIO
|
||||
|
||||
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
|
||||
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
|
||||
|
||||
/*
|
||||
* Convert user-nice values [ -20 ... 0 ... 19 ]
|
||||
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
|
||||
* and back.
|
||||
*/
|
||||
#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO)
|
||||
#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO)
|
||||
|
||||
/*
|
||||
* 'User priority' is the nice value converted to something we
|
||||
* can work with better when scaling various scheduler parameters,
|
||||
* it's a [ 0 ... 39 ] range.
|
||||
*/
|
||||
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
|
||||
#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
|
||||
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
|
||||
|
||||
/*
|
||||
* Convert nice value [19,-20] to rlimit style value [1,40].
|
||||
*/
|
||||
static inline long nice_to_rlimit(long nice)
|
||||
{
|
||||
return (MAX_NICE - nice + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert rlimit style value [1,40] to nice value [-20, 19].
|
||||
*/
|
||||
static inline long rlimit_to_nice(long prio)
|
||||
{
|
||||
return (MAX_NICE - prio + 1);
|
||||
}
|
||||
|
||||
#endif /* _SCHED_PRIO_H */
|
||||
@@ -21,12 +21,14 @@
|
||||
#include <signal.h>
|
||||
#include <memobj.h>
|
||||
#include <affinity.h>
|
||||
#include <syscall.h>
|
||||
|
||||
#define VR_NONE 0x0
|
||||
#define VR_STACK 0x1
|
||||
#define VR_RESERVED 0x2
|
||||
#define VR_IO_NOCACHE 0x100
|
||||
#define VR_REMOTE 0x200
|
||||
#define VR_WRITE_COMBINED 0x400
|
||||
#define VR_DEMAND_PAGING 0x1000
|
||||
#define VR_PRIVATE 0x2000
|
||||
#define VR_LOCKED 0x4000
|
||||
@@ -49,6 +51,7 @@
|
||||
#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4)
|
||||
#define VRFLAG_MAXPROT_TO_PROT(vrflag) (((vrflag) & VR_MAXPROT_MASK) >> 4)
|
||||
|
||||
// struct process.status, struct thread.status
|
||||
#define PS_RUNNING 0x1
|
||||
#define PS_INTERRUPTIBLE 0x2
|
||||
#define PS_UNINTERRUPTIBLE 0x4
|
||||
@@ -56,12 +59,19 @@
|
||||
#define PS_EXITED 0x10
|
||||
#define PS_STOPPED 0x20
|
||||
#define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */
|
||||
#define PS_STOPPING 0x80
|
||||
#define PS_TRACING 0x100
|
||||
|
||||
#define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE)
|
||||
|
||||
// struct process.ptrace
|
||||
#define PT_TRACED 0x80 /* The process is ptraced */
|
||||
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
|
||||
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
|
||||
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
|
||||
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
|
||||
|
||||
// ptrace(2) request
|
||||
#define PTRACE_TRACEME 0
|
||||
#define PTRACE_PEEKTEXT 1
|
||||
#define PTRACE_PEEKDATA 2
|
||||
@@ -90,6 +100,7 @@
|
||||
#define PTRACE_GETREGSET 0x4204
|
||||
#define PTRACE_SETREGSET 0x4205
|
||||
|
||||
// ptrace(2) options
|
||||
#define PTRACE_O_TRACESYSGOOD 1
|
||||
#define PTRACE_O_TRACEFORK 2
|
||||
#define PTRACE_O_TRACEVFORK 4
|
||||
@@ -99,6 +110,7 @@
|
||||
#define PTRACE_O_TRACEEXIT 0x40
|
||||
#define PTRACE_O_MASK 0x7f
|
||||
|
||||
// ptrace(2) events
|
||||
#define PTRACE_EVENT_FORK 1
|
||||
#define PTRACE_EVENT_VFORK 2
|
||||
#define PTRACE_EVENT_CLONE 3
|
||||
@@ -106,6 +118,8 @@
|
||||
#define PTRACE_EVENT_VFORK_DONE 5
|
||||
#define PTRACE_EVENT_EXIT 6
|
||||
|
||||
#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state */
|
||||
|
||||
#define SIGNAL_STOP_STOPPED 0x1 /* The process has been stopped by SIGSTOP */
|
||||
#define SIGNAL_STOP_CONTINUED 0x2 /* The process has been resumed by SIGCONT */
|
||||
|
||||
@@ -118,6 +132,11 @@
|
||||
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
|
||||
#define __WCLONE 0x80000000
|
||||
|
||||
/* idtype */
|
||||
#define P_ALL 0
|
||||
#define P_PID 1
|
||||
#define P_PGID 2
|
||||
|
||||
/* If WIFEXITED(STATUS), the low-order 8 bits of the status. */
|
||||
#define __WEXITSTATUS(status) (((status) & 0xff00) >> 8)
|
||||
|
||||
@@ -145,7 +164,64 @@
|
||||
|
||||
#include <waitq.h>
|
||||
#include <futex.h>
|
||||
#include <rlimit.h>
|
||||
|
||||
struct resource_set;
|
||||
struct process_hash;
|
||||
struct thread_hash;
|
||||
struct address_space;
|
||||
struct process;
|
||||
struct thread;
|
||||
struct process_vm;
|
||||
struct vm_regions;
|
||||
struct vm_range;
|
||||
|
||||
#define HASH_SIZE 73
|
||||
|
||||
struct resource_set {
|
||||
struct list_head list;
|
||||
char *path;
|
||||
struct process_hash *process_hash;
|
||||
struct thread_hash *thread_hash;
|
||||
struct list_head phys_mem_list;
|
||||
mcs_rwlock_lock_t phys_mem_lock;
|
||||
cpu_set_t cpu_set;
|
||||
mcs_rwlock_lock_t cpu_set_lock;
|
||||
struct process *pid1;
|
||||
};
|
||||
|
||||
extern struct list_head resource_set_list;
|
||||
extern mcs_rwlock_lock_t resource_set_lock;
|
||||
|
||||
struct process_hash {
|
||||
struct list_head list[HASH_SIZE];
|
||||
mcs_rwlock_lock_t lock[HASH_SIZE];
|
||||
};
|
||||
|
||||
static inline int
|
||||
process_hash(int pid)
|
||||
{
|
||||
return pid % HASH_SIZE;
|
||||
}
|
||||
|
||||
static inline int
|
||||
thread_hash(int tid)
|
||||
{
|
||||
return tid % HASH_SIZE;
|
||||
}
|
||||
|
||||
struct thread_hash {
|
||||
struct list_head list[HASH_SIZE];
|
||||
mcs_rwlock_lock_t lock[HASH_SIZE];
|
||||
};
|
||||
|
||||
struct address_space {
|
||||
struct page_table *page_table;
|
||||
int type;
|
||||
#define ADDRESS_SPACE_NORMAL 1
|
||||
#define ADDRESS_SPACE_PVAS 2
|
||||
int nslots;
|
||||
int pids[];
|
||||
};
|
||||
|
||||
struct user_fpregs_struct
|
||||
{
|
||||
@@ -212,7 +288,7 @@ struct user
|
||||
unsigned long int u_debugreg [8];
|
||||
};
|
||||
|
||||
#define AUXV_LEN 14
|
||||
#define AUXV_LEN 16
|
||||
|
||||
struct vm_range {
|
||||
struct list_head list;
|
||||
@@ -223,6 +299,7 @@ struct vm_range {
|
||||
};
|
||||
|
||||
struct vm_regions {
|
||||
unsigned long vm_start, vm_end;
|
||||
unsigned long text_start, text_end;
|
||||
unsigned long data_start, data_end;
|
||||
unsigned long brk_start, brk_end;
|
||||
@@ -233,22 +310,27 @@ struct vm_regions {
|
||||
|
||||
struct process_vm;
|
||||
|
||||
struct sig_handler {
|
||||
struct sigfd {
|
||||
struct sigfd *next;
|
||||
int fd;
|
||||
__sigset_t mask;
|
||||
};
|
||||
#define SFD_CLOEXEC 02000000
|
||||
#define SFD_NONBLOCK 04000
|
||||
|
||||
struct sig_common {
|
||||
ihk_spinlock_t lock;
|
||||
ihk_atomic_t use;
|
||||
ihk_atomic_t use;
|
||||
struct sigfd *sigfd;
|
||||
struct k_sigaction action[_NSIG];
|
||||
struct list_head sigpending;
|
||||
};
|
||||
|
||||
struct sig_pending {
|
||||
struct list_head list;
|
||||
sigset_t sigmask;
|
||||
siginfo_t info;
|
||||
};
|
||||
|
||||
struct sig_shared {
|
||||
ihk_spinlock_t lock;
|
||||
ihk_atomic_t use;
|
||||
struct list_head sigpending;
|
||||
int ptracecont;
|
||||
};
|
||||
|
||||
typedef void pgio_func_t(void *arg);
|
||||
@@ -257,142 +339,232 @@ typedef void pgio_func_t(void *arg);
|
||||
* corresponding process exited due to references from the parent and/or
|
||||
* children and is used for implementing wait/waitpid without having a
|
||||
* special "init" process */
|
||||
struct fork_tree_node {
|
||||
ihk_spinlock_t lock;
|
||||
ihk_atomic_t refcount;
|
||||
int exit_status;
|
||||
int status;
|
||||
|
||||
struct process *owner;
|
||||
int pid;
|
||||
int tid;
|
||||
int pgid;
|
||||
|
||||
struct fork_tree_node *parent;
|
||||
struct list_head children;
|
||||
struct list_head siblings_list;
|
||||
|
||||
/* The ptracing process behave as the parent of the ptraced process
|
||||
after using PTRACE_ATTACH except getppid. So we save it here. */
|
||||
struct fork_tree_node *ppid_parent;
|
||||
|
||||
/* Manage ptraced processes in the separate list to make it easy to
|
||||
restore the orginal parent child relationship when
|
||||
performing PTRACE_DETACH */
|
||||
struct list_head ptrace_children;
|
||||
struct list_head ptrace_siblings_list;
|
||||
|
||||
struct waitq waitpid_q;
|
||||
|
||||
/* Store exit_status for a group of threads when stopped by SIGSTOP.
|
||||
exit_status can't be used because values of exit_status of threads
|
||||
might divert while the threads are exiting by group_exit(). */
|
||||
int group_exit_status;
|
||||
|
||||
/* Store ptrace flags.
|
||||
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
|
||||
* Other bits are for inner use of the McKernel.
|
||||
*/
|
||||
int ptrace;
|
||||
|
||||
/* Store event related to signal. For example,
|
||||
it represents that the proceess has been resumed by SIGCONT. */
|
||||
int signal_flags;
|
||||
|
||||
/* Store signal sent to parent when the process terminates. */
|
||||
int termsig;
|
||||
};
|
||||
|
||||
void hold_fork_tree_node(struct fork_tree_node *ftn);
|
||||
void release_fork_tree_node(struct fork_tree_node *ftn);
|
||||
|
||||
struct process {
|
||||
int cpu_id;
|
||||
struct list_head hash_list;
|
||||
mcs_rwlock_lock_t update_lock; // lock for parent, status, ...?
|
||||
|
||||
ihk_atomic_t refcount;
|
||||
// process vm
|
||||
struct process_vm *vm;
|
||||
|
||||
// threads and children
|
||||
struct list_head threads_list;
|
||||
mcs_rwlock_lock_t threads_lock; // lock for threads_list
|
||||
|
||||
/* The ptracing process behave as the parent of the ptraced process
|
||||
after using PTRACE_ATTACH except getppid. So we save it here. */
|
||||
struct process *parent;
|
||||
struct process *ppid_parent;
|
||||
struct list_head children_list;
|
||||
struct list_head ptraced_children_list;
|
||||
mcs_rwlock_lock_t children_lock; // lock for children_list and ptraced_children_list
|
||||
struct list_head siblings_list; // lock parent
|
||||
struct list_head ptraced_siblings_list; // lock ppid_parent
|
||||
|
||||
ihk_atomic_t refcount;
|
||||
|
||||
// process status and exit status
|
||||
int status; // PS_RUNNING -> PS_EXITED -> PS_ZOMBIE
|
||||
// | ^ ^
|
||||
// | |---+ |
|
||||
// V | |
|
||||
// PS_STOPPING-)---+
|
||||
// (PS_TRACING)| |
|
||||
// | | |
|
||||
// V +---- |
|
||||
// PS_STOPPED -----+
|
||||
// (PS_TRACED)
|
||||
int exit_status;
|
||||
|
||||
/* Store exit_status for a group of threads when stopped by SIGSTOP.
|
||||
exit_status can't be used because values of exit_status of threads
|
||||
might divert while the threads are exiting by group_exit(). */
|
||||
int group_exit_status;
|
||||
|
||||
/* Manage ptraced processes in the separate list to make it easy to
|
||||
restore the orginal parent child relationship when
|
||||
performing PTRACE_DETACH */
|
||||
struct waitq waitpid_q;
|
||||
|
||||
// process info and credentials etc.
|
||||
int pid;
|
||||
int pgid;
|
||||
int ruid;
|
||||
int euid;
|
||||
int suid;
|
||||
int fsuid;
|
||||
int rgid;
|
||||
int egid;
|
||||
int sgid;
|
||||
int fsgid;
|
||||
int execed;
|
||||
int nohost;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long saved_auxv[AUXV_LEN];
|
||||
char *saved_cmdline;
|
||||
long saved_cmdline_len;
|
||||
|
||||
/* Store ptrace flags.
|
||||
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
|
||||
* Other bits are for inner use of the McKernel.
|
||||
*/
|
||||
int ptrace;
|
||||
|
||||
/* Store ptrace event message.
|
||||
* PTRACE_O_xxx will store event message here.
|
||||
* PTRACE_GETEVENTMSG will get from here.
|
||||
*/
|
||||
unsigned long ptrace_eventmsg;
|
||||
|
||||
/* Store event related to signal. For example,
|
||||
it represents that the proceess has been resumed by SIGCONT. */
|
||||
int signal_flags;
|
||||
|
||||
/* Store signal sent to parent when the process terminates. */
|
||||
int termsig;
|
||||
|
||||
};
|
||||
|
||||
void hold_thread(struct thread *ftn);
|
||||
void release_thread(struct thread *ftn);
|
||||
|
||||
/*
|
||||
* Scheduling policies
|
||||
*/
|
||||
#define SCHED_NORMAL 0
|
||||
#define SCHED_FIFO 1
|
||||
#define SCHED_RR 2
|
||||
#define SCHED_BATCH 3
|
||||
/* SCHED_ISO: reserved but not implemented yet */
|
||||
#define SCHED_IDLE 5
|
||||
#define SCHED_DEADLINE 6
|
||||
|
||||
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
|
||||
#define SCHED_RESET_ON_FORK 0x40000000
|
||||
|
||||
/*
|
||||
* For the sched_{set,get}attr() calls
|
||||
*/
|
||||
#define SCHED_FLAG_RESET_ON_FORK 0x01
|
||||
|
||||
struct sched_param {
|
||||
int sched_priority;
|
||||
};
|
||||
|
||||
struct thread {
|
||||
struct list_head hash_list;
|
||||
// thread info
|
||||
int cpu_id;
|
||||
int tid;
|
||||
int status; // PS_RUNNING -> PS_EXITED
|
||||
// | ^ ^
|
||||
// | | |
|
||||
// V | |
|
||||
// PS_STOPPED------+
|
||||
// PS_TRACED
|
||||
// PS_INTERRPUTIBLE
|
||||
// PS_UNINTERRUPTIBLE
|
||||
|
||||
// process vm
|
||||
struct process_vm *vm;
|
||||
|
||||
// context
|
||||
ihk_mc_kernel_context_t ctx;
|
||||
ihk_mc_user_context_t *uctx;
|
||||
|
||||
// sibling
|
||||
struct process *proc;
|
||||
struct list_head siblings_list; // lock process
|
||||
|
||||
// Runqueue list entry
|
||||
struct list_head sched_list;
|
||||
struct list_head sched_list; // lock cls
|
||||
int sched_policy;
|
||||
struct sched_param sched_param;
|
||||
|
||||
ihk_spinlock_t spin_sleep_lock;
|
||||
int spin_sleep;
|
||||
|
||||
struct thread {
|
||||
int *clear_child_tid;
|
||||
unsigned long tlsblock_base, tlsblock_limit;
|
||||
} thread;
|
||||
ihk_atomic_t refcount;
|
||||
|
||||
volatile int sigevent;
|
||||
int *clear_child_tid;
|
||||
unsigned long tlsblock_base, tlsblock_limit;
|
||||
|
||||
// thread info
|
||||
cpu_set_t cpu_set;
|
||||
fp_regs_struct *fp_regs;
|
||||
int in_syscall_offload;
|
||||
|
||||
// signal
|
||||
struct sig_common *sigcommon;
|
||||
sigset_t sigmask;
|
||||
stack_t sigstack;
|
||||
ihk_spinlock_t sigpendinglock;
|
||||
struct list_head sigpending;
|
||||
struct sig_shared *sigshared;
|
||||
struct sig_handler *sighandler;
|
||||
ihk_spinlock_t sigpendinglock;
|
||||
volatile int sigevent;
|
||||
|
||||
struct rlimit rlimit_stack;
|
||||
// gpio
|
||||
pgio_func_t *pgio_fp;
|
||||
void *pgio_arg;
|
||||
|
||||
struct fork_tree_node *ftn;
|
||||
|
||||
cpu_set_t cpu_set;
|
||||
unsigned long saved_auxv[AUXV_LEN];
|
||||
|
||||
struct user *userp;
|
||||
// for ptrace
|
||||
unsigned long *ptrace_debugreg; /* debug registers for ptrace */
|
||||
struct sig_pending *ptrace_recvsig;
|
||||
struct sig_pending *ptrace_sendsig;
|
||||
};
|
||||
|
||||
struct process_vm {
|
||||
ihk_atomic_t refcount;
|
||||
|
||||
struct page_table *page_table;
|
||||
struct address_space *address_space;
|
||||
struct list_head vm_range_list;
|
||||
struct vm_regions region;
|
||||
struct process *owner_process; /* process that reside on the same page */
|
||||
struct process *proc; /* process that reside on the same page */
|
||||
|
||||
ihk_spinlock_t page_table_lock;
|
||||
ihk_spinlock_t memory_range_lock;
|
||||
ihk_spinlock_t page_table_lock;
|
||||
ihk_spinlock_t memory_range_lock;
|
||||
// to protect the followings:
|
||||
// 1. addition of process "memory range" (extend_process_region, add_process_memory_range)
|
||||
// 2. addition of process page table (allocate_pages, update_process_page_table)
|
||||
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
|
||||
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
|
||||
|
||||
ihk_atomic_t refcount;
|
||||
cpu_set_t cpu_set;
|
||||
ihk_spinlock_t cpu_set_lock;
|
||||
int exiting;
|
||||
};
|
||||
|
||||
|
||||
struct process *create_process(unsigned long user_pc);
|
||||
struct process *clone_process(struct process *org, unsigned long pc,
|
||||
struct thread *create_thread(unsigned long user_pc);
|
||||
struct thread *clone_thread(struct thread *org, unsigned long pc,
|
||||
unsigned long sp, int clone_flags);
|
||||
void destroy_process(struct process *proc);
|
||||
void hold_process(struct process *proc);
|
||||
void release_process(struct process *proc);
|
||||
void flush_process_memory(struct process *proc);
|
||||
void free_process_memory(struct process *proc);
|
||||
void free_process_memory_ranges(struct process *proc);
|
||||
int populate_process_memory(struct process *proc, void *start, size_t len);
|
||||
void destroy_thread(struct thread *thread);
|
||||
void hold_thread(struct thread *thread);
|
||||
void release_thread(struct thread *thread);
|
||||
void flush_process_memory(struct process_vm *vm);
|
||||
void hold_process_vm(struct process_vm *vm);
|
||||
void release_process_vm(struct process_vm *vm);
|
||||
void hold_process(struct process *);
|
||||
void release_process(struct process *);
|
||||
void free_process_memory_ranges(struct process_vm *vm);
|
||||
int populate_process_memory(struct process_vm *vm, void *start, size_t len);
|
||||
|
||||
int add_process_memory_range(struct process *process,
|
||||
int add_process_memory_range(struct process_vm *vm,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long phys, unsigned long flag,
|
||||
struct memobj *memobj, off_t objoff);
|
||||
int remove_process_memory_range(struct process *process, unsigned long start,
|
||||
int remove_process_memory_range(struct process_vm *vm, unsigned long start,
|
||||
unsigned long end, int *ro_freedp);
|
||||
int split_process_memory_range(struct process *process,
|
||||
int split_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t addr, struct vm_range **splitp);
|
||||
int join_process_memory_range(struct process *process, struct vm_range *surviving,
|
||||
int join_process_memory_range(struct process_vm *vm, struct vm_range *surviving,
|
||||
struct vm_range *merging);
|
||||
int change_prot_process_memory_range(
|
||||
struct process *process, struct vm_range *range,
|
||||
struct process_vm *vm, struct vm_range *range,
|
||||
unsigned long newflag);
|
||||
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end, off_t off);
|
||||
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end);
|
||||
int invalidate_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *lookup_process_memory_range(
|
||||
struct process_vm *vm, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *next_process_memory_range(
|
||||
@@ -402,31 +574,41 @@ struct vm_range *previous_process_memory_range(
|
||||
int extend_up_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t newend);
|
||||
|
||||
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
|
||||
int remove_process_region(struct process *proc,
|
||||
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr,
|
||||
uint64_t reason);
|
||||
int remove_process_region(struct process_vm *vm,
|
||||
unsigned long start, unsigned long end);
|
||||
struct program_load_desc;
|
||||
int init_process_stack(struct process *process, struct program_load_desc *pn,
|
||||
int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
int argc, char **argv,
|
||||
int envc, char **env);
|
||||
unsigned long extend_process_region(struct process *proc,
|
||||
unsigned long extend_process_region(struct process_vm *vm,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long address, unsigned long flag);
|
||||
extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
|
||||
enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
|
||||
|
||||
void schedule(void);
|
||||
void runq_add_proc(struct process *proc, int cpu_id);
|
||||
void runq_del_proc(struct process *proc, int cpu_id);
|
||||
int sched_wakeup_process(struct process *proc, int valid_states);
|
||||
void runq_add_thread(struct thread *thread, int cpu_id);
|
||||
void runq_del_thread(struct thread *thread, int cpu_id);
|
||||
int sched_wakeup_thread(struct thread *thread, int valid_states);
|
||||
|
||||
void sched_request_migrate(int cpu_id, struct process *proc);
|
||||
void sched_request_migrate(int cpu_id, struct thread *thread);
|
||||
void check_need_resched(void);
|
||||
|
||||
void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
|
||||
void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
|
||||
void cpu_clear_and_set(int c_cpu, int s_cpu,
|
||||
cpu_set_t *cpu_set, ihk_spinlock_t *lock);
|
||||
|
||||
struct process *findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate);
|
||||
void process_unlock(void *savelock, unsigned long irqstate);
|
||||
void release_cpuid(int cpuid);
|
||||
|
||||
struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock);
|
||||
void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock);
|
||||
struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock);
|
||||
void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock);
|
||||
void chain_process(struct process *);
|
||||
void chain_thread(struct thread *);
|
||||
void proc_init();
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* header file for System V shared memory
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 - 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -12,38 +13,71 @@
|
||||
#ifndef HEADER_SHM_H
|
||||
#define HEADER_SHM_H
|
||||
|
||||
/* begin types.h */
|
||||
typedef int32_t key_t;
|
||||
typedef uint32_t uid_t;
|
||||
typedef uint32_t gid_t;
|
||||
typedef int64_t time_t;
|
||||
typedef int32_t pid_t;
|
||||
/* end types.h */
|
||||
#include <list.h>
|
||||
#include <memobj.h>
|
||||
#include <arch/shm.h>
|
||||
|
||||
typedef uint64_t shmatt_t;
|
||||
enum {
|
||||
/* for key_t */
|
||||
IPC_PRIVATE = 0,
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t cuid;
|
||||
gid_t cgid;
|
||||
uint16_t mode;
|
||||
uint8_t padding[2];
|
||||
uint16_t seq;
|
||||
uint8_t padding2[22];
|
||||
/* for shmflg */
|
||||
IPC_CREAT = 01000,
|
||||
IPC_EXCL = 02000,
|
||||
|
||||
SHM_RDONLY = 010000,
|
||||
SHM_RND = 020000,
|
||||
SHM_REMAP = 040000,
|
||||
SHM_EXEC = 0100000,
|
||||
|
||||
/* for shm_mode */
|
||||
SHM_DEST = 01000,
|
||||
SHM_LOCKED = 02000,
|
||||
|
||||
/* for cmd of shmctl() */
|
||||
IPC_RMID = 0,
|
||||
IPC_SET = 1,
|
||||
IPC_STAT = 2,
|
||||
IPC_INFO = 3,
|
||||
|
||||
SHM_LOCK = 11,
|
||||
SHM_UNLOCK = 12,
|
||||
SHM_STAT = 13,
|
||||
SHM_INFO = 14,
|
||||
};
|
||||
|
||||
struct shmid_ds {
|
||||
struct ipc_perm shm_perm;
|
||||
size_t shm_segsz;
|
||||
time_t shm_atime;
|
||||
time_t shm_dtime;
|
||||
time_t shm_ctime;
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
shmatt_t shm_nattch;
|
||||
uint8_t padding[16];
|
||||
struct shmobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
int index;
|
||||
uint8_t padding[4];
|
||||
size_t real_segsz;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
struct list_head chain; /* shmobj_list */
|
||||
};
|
||||
|
||||
struct shminfo {
|
||||
uint64_t shmmax;
|
||||
uint64_t shmmin;
|
||||
uint64_t shmmni;
|
||||
uint64_t shmseg;
|
||||
uint64_t shmall;
|
||||
uint8_t padding[32];
|
||||
};
|
||||
|
||||
struct shm_info {
|
||||
int32_t used_ids;
|
||||
uint8_t padding[4];
|
||||
uint64_t shm_tot;
|
||||
uint64_t shm_rss;
|
||||
uint64_t shm_swp;
|
||||
uint64_t swap_attempts;
|
||||
uint64_t swap_successes;
|
||||
};
|
||||
|
||||
void shmobj_list_lock(void);
|
||||
void shmobj_list_unlock(void);
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
|
||||
void shmobj_destroy(struct shmobj *obj);
|
||||
|
||||
#endif /* HEADER_SHM_H */
|
||||
|
||||
@@ -13,8 +13,11 @@
|
||||
#ifndef __HEADER_SYSCALL_H
|
||||
#define __HEADER_SYSCALL_H
|
||||
|
||||
#include <ihk/atomic.h>
|
||||
#include <ihk/context.h>
|
||||
#include <ihk/memconst.h>
|
||||
#include <rlimit.h>
|
||||
#include <time.h>
|
||||
|
||||
#define NUM_SYSCALLS 255
|
||||
|
||||
@@ -34,12 +37,15 @@
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
@@ -109,6 +115,24 @@ struct program_image_section {
|
||||
};
|
||||
|
||||
#define SHELL_PATH_MAX_LEN 1024
|
||||
#define MCK_RLIM_MAX 20
|
||||
|
||||
#define MCK_RLIMIT_AS 0
|
||||
#define MCK_RLIMIT_CORE 1
|
||||
#define MCK_RLIMIT_CPU 2
|
||||
#define MCK_RLIMIT_DATA 3
|
||||
#define MCK_RLIMIT_FSIZE 4
|
||||
#define MCK_RLIMIT_LOCKS 5
|
||||
#define MCK_RLIMIT_MEMLOCK 6
|
||||
#define MCK_RLIMIT_MSGQUEUE 7
|
||||
#define MCK_RLIMIT_NICE 8
|
||||
#define MCK_RLIMIT_NOFILE 9
|
||||
#define MCK_RLIMIT_NPROC 10
|
||||
#define MCK_RLIMIT_RSS 11
|
||||
#define MCK_RLIMIT_RTPRIO 12
|
||||
#define MCK_RLIMIT_RTTIME 13
|
||||
#define MCK_RLIMIT_SIGPENDING 14
|
||||
#define MCK_RLIMIT_STACK 15
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
@@ -118,6 +142,7 @@ struct program_load_desc {
|
||||
int err;
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@@ -132,8 +157,7 @@ struct program_load_desc {
|
||||
unsigned long args_len;
|
||||
char *envs;
|
||||
unsigned long envs_len;
|
||||
unsigned long rlimit_stack_cur;
|
||||
unsigned long rlimit_stack_max;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
struct program_image_section sections[0];
|
||||
@@ -217,9 +241,9 @@ struct syscall_params {
|
||||
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
|
||||
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
|
||||
|
||||
#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0)
|
||||
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
|
||||
|
||||
extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu, int pid);
|
||||
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
|
||||
extern int obtain_clone_cpuid();
|
||||
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
|
||||
|
||||
@@ -263,4 +287,15 @@ struct procfs_file {
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
extern void terminate(int, int);
|
||||
|
||||
struct tod_data_s {
|
||||
int8_t do_local;
|
||||
int8_t padding[7];
|
||||
ihk_atomic64_t version;
|
||||
unsigned long clocks_per_sec;
|
||||
struct timespec origin; /* realtime when tsc=0 */
|
||||
};
|
||||
extern struct tod_data_s tod_data; /* residing in arch-dependent file */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
#ifndef __TIME_H
|
||||
#define __TIME_H
|
||||
|
||||
#define NS_PER_SEC 1000000000UL
|
||||
|
||||
typedef long int __time_t;
|
||||
|
||||
/* POSIX.1b structure for a time value. This is like a `struct timeval' but
|
||||
|
||||
@@ -36,7 +36,7 @@ struct timer {
|
||||
uint64_t timeout;
|
||||
struct waitq processes;
|
||||
struct list_head list;
|
||||
struct process *proc;
|
||||
struct thread *thread;
|
||||
};
|
||||
|
||||
uint64_t schedule_timeout(uint64_t timeout);
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#include <ihk/lock.h>
|
||||
#include <list.h>
|
||||
|
||||
struct process;
|
||||
struct thread;
|
||||
struct waitq_entry;
|
||||
|
||||
typedef int (*waitq_func_t)(struct waitq_entry *wait, unsigned mode,
|
||||
@@ -58,7 +58,7 @@ typedef struct waitq_entry {
|
||||
}
|
||||
|
||||
extern void waitq_init(waitq_t *waitq);
|
||||
extern void waitq_init_entry(waitq_entry_t *entry, struct process *proc);
|
||||
extern void waitq_init_entry(waitq_entry_t *entry, struct thread *proc);
|
||||
extern int waitq_active(waitq_t *waitq);
|
||||
extern void waitq_add_entry(waitq_t *waitq, waitq_entry_t *entry);
|
||||
extern void waitq_add_entry_locked(waitq_t *waitq, waitq_entry_t *entry);
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <process.h>
|
||||
#include <init.h>
|
||||
#include <cls.h>
|
||||
#include <syscall.h>
|
||||
|
||||
//#define IOCTL_FUNC_EXTENSION
|
||||
#ifdef IOCTL_FUNC_EXTENSION
|
||||
@@ -40,7 +41,7 @@
|
||||
#ifdef DEBUG_PRINT_INIT
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
int osnum = 0;
|
||||
@@ -118,6 +119,27 @@ char *find_command_line(char *name)
|
||||
return strstr(cmdline, name);
|
||||
}
|
||||
|
||||
static void parse_kargs(void)
|
||||
{
|
||||
kprintf("KCommand Line: %s\n", ihk_mc_get_kernel_args());
|
||||
|
||||
if (1) {
|
||||
char *key = "osnum=";
|
||||
char *p;
|
||||
|
||||
p = find_command_line(key);
|
||||
if (p != NULL) {
|
||||
p += strlen(key);
|
||||
osnum = 0;
|
||||
while (('0' <= *p) && (*p <= '9')) {
|
||||
osnum *= 10;
|
||||
osnum += *p++ - '0';
|
||||
}
|
||||
kprintf("osnum: %d\n", osnum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pc_init(void)
|
||||
{
|
||||
int i;
|
||||
@@ -134,15 +156,6 @@ void pc_init(void)
|
||||
APT_TYPE_STALL, APT_TYPE_CYCLE }, // not updated for KNC
|
||||
};
|
||||
|
||||
p = find_command_line("osnum=");
|
||||
if (p != NULL) {
|
||||
while (('0' <= *p) && (*p <= '9')) {
|
||||
osnum *= 10;
|
||||
osnum += *p++ - '0';
|
||||
}
|
||||
}
|
||||
dkprintf("osnum: %d\n", osnum);
|
||||
|
||||
|
||||
if (!(p = find_command_line("perfctr"))) {
|
||||
dkprintf("perfctr not initialized.\n");
|
||||
@@ -187,12 +200,36 @@ static void pc_test(void)
|
||||
ed[1] - st[1], ed[2] - st[2], ed[3] - st[3]);
|
||||
}
|
||||
|
||||
extern void ihk_mc_get_boot_time(unsigned long *tv_sec, unsigned long *tv_nsec);
|
||||
extern unsigned long ihk_mc_get_ns_per_tsc(void);
|
||||
|
||||
static void time_init(void)
|
||||
{
|
||||
unsigned long tv_sec, tv_nsec;
|
||||
unsigned long ns_per_kclock;
|
||||
|
||||
ihk_mc_get_boot_time(&tv_sec, &tv_nsec);
|
||||
ns_per_kclock = ihk_mc_get_ns_per_tsc();
|
||||
|
||||
tod_data.origin.tv_sec = tv_sec;
|
||||
tod_data.origin.tv_nsec = tv_nsec;
|
||||
|
||||
if (ns_per_kclock) {
|
||||
tod_data.clocks_per_sec = (1000L * NS_PER_SEC) / ns_per_kclock;
|
||||
}
|
||||
|
||||
if (!ns_per_kclock) {
|
||||
gettime_local_support = 0;
|
||||
}
|
||||
|
||||
if (gettime_local_support) {
|
||||
tod_data.do_local = 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void rest_init(void)
|
||||
{
|
||||
char *cmdline;
|
||||
cmdline = ihk_mc_get_kernel_args();
|
||||
kprintf("KCommand Line: %s\n", cmdline);
|
||||
|
||||
handler_init();
|
||||
|
||||
#ifdef USE_DMA
|
||||
@@ -203,14 +240,19 @@ static void rest_init(void)
|
||||
|
||||
ap_init();
|
||||
cpu_local_var_init();
|
||||
time_init();
|
||||
kmalloc_init();
|
||||
|
||||
ikc_master_init();
|
||||
|
||||
proc_init();
|
||||
|
||||
sched_init();
|
||||
}
|
||||
|
||||
int host_ikc_inited = 0;
|
||||
extern int num_processors;
|
||||
extern void zero_tsc(void);
|
||||
|
||||
static void post_init(void)
|
||||
{
|
||||
@@ -228,7 +270,14 @@ static void post_init(void)
|
||||
init_host_syscall_channel2();
|
||||
ihk_mc_spinlock_init(&syscall_lock);
|
||||
}
|
||||
|
||||
/* Zero TSC.
|
||||
* All AP cores are wait spinning for ap_start() and they will zero
|
||||
* their TSC immediatly. */
|
||||
zero_tsc();
|
||||
ap_start();
|
||||
|
||||
create_os_procfs_files();
|
||||
}
|
||||
#ifdef DCFA_RUN
|
||||
extern void user_main();
|
||||
@@ -247,6 +296,14 @@ int main(void)
|
||||
|
||||
arch_init();
|
||||
|
||||
/*
|
||||
* In attached-mic,
|
||||
* bootparam is not mapped until arch_init() is finished.
|
||||
* In builtin-mic and builtin-x86,
|
||||
* virtual address of bootparam is changed in arch_init().
|
||||
*/
|
||||
parse_kargs();
|
||||
|
||||
mem_init();
|
||||
|
||||
rest_init();
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
|
||||
78
kernel/mem.c
78
kernel/mem.c
@@ -44,7 +44,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
@@ -56,6 +56,8 @@ extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
|
||||
|
||||
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
|
||||
|
||||
int anon_on_demand = 0;
|
||||
|
||||
static void reserve_pages(unsigned long start, unsigned long end, int type)
|
||||
{
|
||||
if (start < pa_start) {
|
||||
@@ -171,8 +173,8 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
|
||||
};
|
||||
|
||||
void set_signal(int sig, void *regs, struct siginfo *info);
|
||||
void check_signal(unsigned long rc, void *regs);
|
||||
int gencore(struct process *, void *, struct coretable **, int *);
|
||||
void check_signal(unsigned long, void *, int);
|
||||
int gencore(struct thread *, void *, struct coretable **, int *);
|
||||
void freecore(struct coretable **);
|
||||
|
||||
/**
|
||||
@@ -182,14 +184,14 @@ void freecore(struct coretable **);
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void coredump(struct process *proc, void *regs)
|
||||
void coredump(struct thread *thread, void *regs)
|
||||
{
|
||||
struct syscall_request request IHK_DMA_ALIGN;
|
||||
int ret;
|
||||
struct coretable *coretable;
|
||||
int chunks;
|
||||
|
||||
ret = gencore(proc, regs, &coretable, &chunks);
|
||||
ret = gencore(thread, regs, &coretable, &chunks);
|
||||
if (ret != 0) {
|
||||
dkprintf("could not generate a core file image\n");
|
||||
return;
|
||||
@@ -198,8 +200,7 @@ void coredump(struct process *proc, void *regs)
|
||||
request.args[0] = chunks;
|
||||
request.args[1] = virt_to_phys(coretable);
|
||||
/* no data for now */
|
||||
ret = do_syscall(&request, proc->uctx,
|
||||
proc->cpu_id, proc->ftn->pid);
|
||||
ret = do_syscall(&request, thread->cpu_id, thread->proc->pid);
|
||||
if (ret == 0) {
|
||||
kprintf("dumped core.\n");
|
||||
} else {
|
||||
@@ -208,14 +209,14 @@ void coredump(struct process *proc, void *regs)
|
||||
freecore(&coretable);
|
||||
}
|
||||
|
||||
static void unhandled_page_fault(struct process *proc, void *fault_addr, void *regs)
|
||||
static void unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
{
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
int irqflags;
|
||||
unsigned long error = ((struct x86_regs *)regs)->error;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||
|
||||
irqflags = kprintf_lock();
|
||||
dkprintf("[%d] Page fault for 0x%lX\n",
|
||||
@@ -234,7 +235,7 @@ static void unhandled_page_fault(struct process *proc, void *fault_addr, void *r
|
||||
found = 1;
|
||||
dkprintf("address is in range, flag: 0x%X! \n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->page_table, (void*)address);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -365,33 +366,62 @@ void tlb_flush_handler(int vector)
|
||||
|
||||
static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
||||
{
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
int error;
|
||||
|
||||
dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n",
|
||||
ihk_mc_get_processor_id(), fault_addr, reason, regs);
|
||||
|
||||
error = page_fault_process(proc, fault_addr, reason);
|
||||
preempt_disable();
|
||||
|
||||
cpu_enable_interrupt();
|
||||
|
||||
error = page_fault_process_vm(thread->vm, fault_addr, reason);
|
||||
if (error) {
|
||||
struct siginfo info;
|
||||
|
||||
if (error == -ECANCELED) {
|
||||
dkprintf("process is exiting, terminate.\n");
|
||||
|
||||
preempt_enable();
|
||||
terminate(0, SIGSEGV);
|
||||
// no return
|
||||
}
|
||||
|
||||
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
|
||||
"fault proc failed. %d\n",
|
||||
"fault vm failed. %d, TID: %d\n",
|
||||
ihk_mc_get_processor_id(), fault_addr,
|
||||
reason, regs, error);
|
||||
unhandled_page_fault(proc, fault_addr, regs);
|
||||
reason, regs, error, thread->tid);
|
||||
unhandled_page_fault(thread, fault_addr, regs);
|
||||
preempt_enable();
|
||||
memset(&info, '\0', sizeof info);
|
||||
if (error == -ERANGE) {
|
||||
info.si_signo = SIGBUS;
|
||||
info.si_code = BUS_ADRERR;
|
||||
info._sifields._sigfault.si_addr = fault_addr;
|
||||
set_signal(SIGBUS, regs, &info);
|
||||
}
|
||||
else {
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
|
||||
info.si_signo = SIGSEGV;
|
||||
info.si_code = SEGV_MAPERR;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= (unsigned long)fault_addr && range->end > (unsigned long)fault_addr) {
|
||||
info.si_code = SEGV_ACCERR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
info._sifields._sigfault.si_addr = fault_addr;
|
||||
set_signal(SIGSEGV, regs, &info);
|
||||
}
|
||||
check_signal(0, regs);
|
||||
check_signal(0, regs, 0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
preempt_enable();
|
||||
out:
|
||||
dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n",
|
||||
ihk_mc_get_processor_id(), fault_addr, reason,
|
||||
@@ -648,6 +678,11 @@ void mem_init(void)
|
||||
|
||||
/* Prepare the kernel virtual map space */
|
||||
virtual_allocator_init();
|
||||
|
||||
if (find_command_line("anon_on_demand")) {
|
||||
kprintf("Demand paging on ANONYMOUS mappings enabled.\n");
|
||||
anon_on_demand = 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct location {
|
||||
@@ -839,12 +874,10 @@ int memcheckall()
|
||||
struct alloc *ap;
|
||||
int r = 0;
|
||||
|
||||
kprintf("memcheckall\n");
|
||||
for(i = 0; i < HASHNUM; i++)
|
||||
for(ap = allochash[i]; ap; ap = ap->next)
|
||||
if(ap->p)
|
||||
r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2);
|
||||
kprintf("done\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -942,8 +975,11 @@ void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
h = allocate_pages(req_page, flag);
|
||||
if(h == NULL)
|
||||
if(h == NULL) {
|
||||
kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag);
|
||||
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
|
||||
return NULL;
|
||||
}
|
||||
h->check = 0x5a5a5a5a;
|
||||
prev->next = h;
|
||||
h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2;
|
||||
|
||||
1802
kernel/process.c
1802
kernel/process.c
File diff suppressed because it is too large
Load Diff
389
kernel/procfs.c
389
kernel/procfs.c
@@ -47,6 +47,9 @@ static void create_proc_procfs_file(int pid, char *fname, int mode, int cpuid);
|
||||
static void delete_proc_procfs_file(int pid, char *fname);
|
||||
static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, int cpuid);
|
||||
|
||||
int copy_from_user(void *dst, const void *src, size_t siz);
|
||||
int copy_to_user(void *dst, const void *src, size_t siz);
|
||||
|
||||
/**
|
||||
* \brief Create all procfs files for process.
|
||||
*
|
||||
@@ -63,9 +66,21 @@ void create_proc_procfs_files(int pid, int cpuid)
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0400, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0400, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
|
||||
create_proc_procfs_file(pid, fname, 0444, cpuid);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid);
|
||||
create_proc_procfs_file(pid, fname, 0400, cpuid);
|
||||
|
||||
@@ -116,6 +131,18 @@ void delete_proc_procfs_files(int pid)
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
|
||||
delete_proc_procfs_file(pid, fname);
|
||||
|
||||
@@ -139,6 +166,42 @@ static void delete_proc_procfs_file(int pid, char *fname)
|
||||
dprintf("delete procfs file: %s done\n", fname);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief create a procfs file for this operating system
|
||||
* \param fname relative path name from "host:/proc".
|
||||
* \param mode permissions of the file to be created
|
||||
*
|
||||
* Though operate_proc_procfs_file() is intended to create a process
|
||||
* specific file, it is reused to create a OS specific file by
|
||||
* specifying -1 as the pid parameter.
|
||||
*/
|
||||
static void create_os_procfs_file(char *fname, int mode)
|
||||
{
|
||||
const pid_t pid = -1;
|
||||
const int msg = SCD_MSG_PROCFS_CREATE;
|
||||
const int cpuid = ihk_mc_get_processor_id(); /* i.e. BSP */
|
||||
|
||||
operate_proc_procfs_file(pid, fname, msg, mode, cpuid);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief create all procfs files for this operating system
|
||||
*/
|
||||
void create_os_procfs_files(void)
|
||||
{
|
||||
char *fname = NULL;
|
||||
size_t n;
|
||||
|
||||
fname = kmalloc(PROCFS_NAME_MAX, IHK_MC_AP_CRITICAL);
|
||||
|
||||
n = snprintf(fname, PROCFS_NAME_MAX, "mcos%d/stat", osnum);
|
||||
if (n >= PROCFS_NAME_MAX) panic("/proc/stat");
|
||||
create_os_procfs_file(fname, 0444);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create/delete a procfs file for process.
|
||||
*
|
||||
@@ -194,14 +257,18 @@ static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, in
|
||||
void process_procfs_request(unsigned long rarg)
|
||||
{
|
||||
unsigned long parg, pbuf;
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct process *proc = thread->proc;
|
||||
struct procfs_read *r;
|
||||
struct ikc_scd_packet packet;
|
||||
int rosnum, ret, pid, tid, ans = -EIO, eof = 0;
|
||||
char *buf, *p;
|
||||
struct ihk_ikc_channel_desc *syscall_channel;
|
||||
ihk_spinlock_t *savelock;
|
||||
unsigned long irqstate;
|
||||
struct mcs_rwlock_node_irqsave lock;
|
||||
unsigned long offset;
|
||||
int count;
|
||||
int npages;
|
||||
int is_current = 1; /* is 'proc' same as 'current'? */
|
||||
|
||||
dprintf("process_procfs_request: invoked.\n");
|
||||
|
||||
@@ -221,7 +288,9 @@ void process_procfs_request(unsigned long rarg)
|
||||
dprintf("remote pbuf: %x\n", r->pbuf);
|
||||
pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count);
|
||||
dprintf("pbuf: %x\n", pbuf);
|
||||
buf = ihk_mc_map_virtual(pbuf, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||
count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1));
|
||||
npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE;
|
||||
buf = ihk_mc_map_virtual(pbuf, npages, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||
dprintf("buf: %p\n", buf);
|
||||
if (buf == NULL) {
|
||||
kprintf("ERROR: process_procfs_request: got a null buffer.\n");
|
||||
@@ -229,6 +298,8 @@ void process_procfs_request(unsigned long rarg)
|
||||
goto bufunavail;
|
||||
}
|
||||
|
||||
count = r->count;
|
||||
offset = r->offset;
|
||||
dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count);
|
||||
|
||||
/*
|
||||
@@ -265,23 +336,62 @@ void process_procfs_request(unsigned long rarg)
|
||||
*/
|
||||
ret = sscanf(p, "%d/", &pid);
|
||||
if (ret == 1) {
|
||||
if (pid != cpu_local_var(current)->ftn->pid) {
|
||||
if (pid != cpu_local_var(current)->proc->pid) {
|
||||
/* We are not located in the proper cpu for some reason. */
|
||||
|
||||
dprintf("mismatched pid. We are %d, but requested pid is %d.\n",
|
||||
pid, cpu_local_var(current)->pid);
|
||||
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
|
||||
/* The target process has gone by migration. */
|
||||
r->newcpu = proc->cpu_id;
|
||||
dprintf("expected cpu id is %d.\n", proc->cpu_id);
|
||||
process_unlock(savelock, irqstate);
|
||||
ans = 0;
|
||||
} else {
|
||||
tid = pid; /* main thread */
|
||||
thread = find_thread(pid, tid, &lock);
|
||||
if (!thread) {
|
||||
dprintf("We cannot find the proper cpu for requested pid.\n");
|
||||
goto end;
|
||||
}
|
||||
else if (thread->cpu_id != ihk_mc_get_processor_id()) {
|
||||
/* The target process has gone by migration. */
|
||||
r->newcpu = thread->cpu_id;
|
||||
dprintf("expected cpu id is %d.\n", thread->cpu_id);
|
||||
thread_unlock(thread, &lock);
|
||||
ans = 0;
|
||||
goto end;
|
||||
}
|
||||
else {
|
||||
thread_unlock(thread, &lock);
|
||||
/* 'proc' is not 'current' */
|
||||
is_current = 0;
|
||||
}
|
||||
proc = thread->proc;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(p, "stat")) { /* "/proc/stat" */
|
||||
extern int num_processors; /* kernel/ap.c */
|
||||
char *p;
|
||||
size_t remain;
|
||||
int cpu;
|
||||
|
||||
if (offset > 0) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
} else {
|
||||
p = buf;
|
||||
remain = count;
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
size_t n;
|
||||
|
||||
n = snprintf(p, remain, "cpu%d\n", cpu);
|
||||
if (n >= remain) {
|
||||
ans = -ENOSPC;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
p += n;
|
||||
}
|
||||
ans = p - buf;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
else {
|
||||
goto end;
|
||||
}
|
||||
dprintf("matched PID: %d.\n", pid);
|
||||
@@ -297,19 +407,214 @@ void process_procfs_request(unsigned long rarg)
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
dprintf("range: %lx - %lx\n", range->start, range->end);
|
||||
if ((range->start <= r->offset) &&
|
||||
(r->offset < range->end)) {
|
||||
unsigned int len = r->count;
|
||||
if (range->end < r->offset + r->count) {
|
||||
len = range->end - r->offset;
|
||||
if (!is_current) {
|
||||
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
unsigned long offset = r->offset;
|
||||
unsigned long left = r->count;
|
||||
int ret;
|
||||
|
||||
ans = 0;
|
||||
if(left == 0)
|
||||
goto end;
|
||||
|
||||
while(left){
|
||||
unsigned long pa;
|
||||
char *va;
|
||||
int pos = offset & (PAGE_SIZE - 1);
|
||||
int size = PAGE_SIZE - pos;
|
||||
|
||||
if(size > left)
|
||||
size = left;
|
||||
ret = page_fault_process_vm(proc->vm,
|
||||
(void *)offset, reason);
|
||||
if(ret){
|
||||
if(ans == 0)
|
||||
ans = -EIO;
|
||||
goto end;
|
||||
}
|
||||
memcpy((void *)buf, (void *)range->start, len);
|
||||
ans = len;
|
||||
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
|
||||
(void *)offset, &pa);
|
||||
if(ret){
|
||||
if(ans == 0)
|
||||
ans = -EIO;
|
||||
goto end;
|
||||
}
|
||||
va = phys_to_virt(pa);
|
||||
memcpy(buf + ans, va, size);
|
||||
offset += size;
|
||||
left -= size;
|
||||
ans += size;
|
||||
}
|
||||
}
|
||||
else{
|
||||
unsigned long offset = r->offset;
|
||||
unsigned long left = r->count;
|
||||
unsigned long pos;
|
||||
unsigned long l;
|
||||
ans = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
dprintf("range: %lx - %lx\n", range->start, range->end);
|
||||
while (left &&
|
||||
(range->start <= offset) &&
|
||||
(offset < range->end)) {
|
||||
pos = offset & (PAGE_SIZE - 1);
|
||||
l = PAGE_SIZE - pos;
|
||||
if(l > left)
|
||||
l = left;
|
||||
if(copy_from_user(buf, (void *)offset, l)){
|
||||
if(ans == 0)
|
||||
ans = -EIO;
|
||||
goto end;
|
||||
}
|
||||
buf += l;
|
||||
ans += l;
|
||||
offset += l;
|
||||
left -= l;
|
||||
}
|
||||
}
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/maps
|
||||
*/
|
||||
if (strcmp(p, "maps") == 0) {
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
int left = r->count - 1; /* extra 1 for terminating NULL */
|
||||
int written = 0;
|
||||
char *_buf = buf;
|
||||
|
||||
/* Starting from the middle of a proc file is not supported for maps */
|
||||
if (offset > 0) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
int written_now;
|
||||
|
||||
/* format is (from man proc):
|
||||
* address perms offset dev inode pathname
|
||||
* 08048000-08056000 r-xp 00000000 03:0c 64593 /usr/sbin/gpm
|
||||
*/
|
||||
written_now = snprintf(_buf, left,
|
||||
"%lx-%lx %s%s%s%s %lx %lx:%lx %d %s\n",
|
||||
range->start, range->end,
|
||||
range->flag & VR_PROT_READ ? "r" : "-",
|
||||
range->flag & VR_PROT_WRITE ? "w" : "-",
|
||||
range->flag & VR_PROT_EXEC ? "x" : "-",
|
||||
range->flag & VR_PRIVATE ? "p" : "s",
|
||||
/* TODO: fill in file details! */
|
||||
0UL,
|
||||
0UL,
|
||||
0UL,
|
||||
0,
|
||||
""
|
||||
);
|
||||
|
||||
left -= written_now;
|
||||
_buf += written_now;
|
||||
written += written_now;
|
||||
|
||||
if (left == 0) {
|
||||
kprintf("%s(): WARNING: buffer too small to fill proc/maps\n",
|
||||
__FUNCTION__);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
ans = written + 1;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/pagemap
|
||||
*/
|
||||
if (strcmp(p, "pagemap") == 0) {
|
||||
struct process_vm *vm = proc->vm;
|
||||
uint64_t *_buf = (uint64_t *)buf;
|
||||
uint64_t start, end;
|
||||
|
||||
if (offset < PAGE_SIZE) {
|
||||
kprintf("WARNING: /proc/pagemap queried for NULL page\n");
|
||||
ans = 0;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* Check alignment */
|
||||
if ((offset % sizeof(uint64_t) != 0) ||
|
||||
(count % sizeof(uint64_t) != 0)) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
start = (offset / sizeof(uint64_t)) << PAGE_SHIFT;
|
||||
end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
while (start < end) {
|
||||
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->address_space->page_table, start);
|
||||
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->proc->pid,
|
||||
start, *_buf);
|
||||
start += PAGE_SIZE;
|
||||
++_buf;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n",
|
||||
start, end, count);
|
||||
|
||||
ans = count;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/status
|
||||
*/
|
||||
if (strcmp(p, "status") == 0) {
|
||||
struct vm_range *range;
|
||||
unsigned long lockedsize = 0;
|
||||
char tmp[1024];
|
||||
int len;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
|
||||
list_for_each_entry(range, &proc->vm->vm_range_list, list) {
|
||||
if(range->flag & VR_LOCKED)
|
||||
lockedsize += range->end - range->start;
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
|
||||
|
||||
sprintf(tmp,
|
||||
"Uid:\t%d\t%d\t%d\t%d\n"
|
||||
"Gid:\t%d\t%d\t%d\t%d\n"
|
||||
"VmLck:\t%9lu kB\n",
|
||||
proc->ruid, proc->euid, proc->suid, proc->fsuid,
|
||||
proc->rgid, proc->egid, proc->sgid, proc->fsgid,
|
||||
(lockedsize + 1023) >> 10);
|
||||
len = strlen(tmp);
|
||||
if (r->offset < len) {
|
||||
if (r->offset + r->count < len) {
|
||||
ans = r->count;
|
||||
} else {
|
||||
eof = 1;
|
||||
ans = len;
|
||||
}
|
||||
strncpy(buf, tmp + r->offset, ans);
|
||||
} else if (r->offset == len) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
@@ -335,6 +640,35 @@ void process_procfs_request(unsigned long rarg)
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/cmdline
|
||||
*/
|
||||
if (strcmp(p, "cmdline") == 0) {
|
||||
unsigned int limit = proc->saved_cmdline_len;
|
||||
unsigned int len = r->count;
|
||||
|
||||
if(!proc->saved_cmdline){
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (r->offset < limit) {
|
||||
if (limit < r->offset + r->count) {
|
||||
len = limit - r->offset;
|
||||
}
|
||||
memcpy((void *)buf, ((char *) proc->saved_cmdline) + r->offset, len);
|
||||
ans = len;
|
||||
if (r->offset + len == limit) {
|
||||
eof = 1;
|
||||
}
|
||||
} else if (r->offset == limit) {
|
||||
ans = 0;
|
||||
eof = 1;
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* mcos%d/PID/taks/PID/mem
|
||||
*
|
||||
@@ -351,6 +685,9 @@ void process_procfs_request(unsigned long rarg)
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
|
||||
if (!is_current) {
|
||||
goto end;
|
||||
}
|
||||
if (pid != tid) {
|
||||
/* We are not multithreaded yet. */
|
||||
goto end;
|
||||
@@ -375,7 +712,7 @@ void process_procfs_request(unsigned long rarg)
|
||||
char tmp[1024];
|
||||
int len;
|
||||
|
||||
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
|
||||
if ((thread = find_thread(pid, tid, &lock))){
|
||||
dprintf("thread found! pid=%d tid=%d\n", pid, tid);
|
||||
/*
|
||||
* pid (comm) state ppid
|
||||
@@ -411,10 +748,10 @@ void process_procfs_request(unsigned long rarg)
|
||||
0L, 0L, 0L, 0L, // rsslim...
|
||||
0L, 0L, 0L, 0L, // kstkesp...
|
||||
0L, 0L, 0L, 0L, // sigignore...
|
||||
0L, 0, proc->cpu_id, 0, // cnswap...
|
||||
0L, 0, thread->cpu_id, 0, // cnswap...
|
||||
0, 0LL, 0L, 0L // policy...
|
||||
);
|
||||
process_unlock(savelock, irqstate);
|
||||
thread_unlock(thread, &lock);
|
||||
dprintf("tmp=%s\n", tmp);
|
||||
|
||||
len = strlen(tmp);
|
||||
@@ -445,7 +782,7 @@ void process_procfs_request(unsigned long rarg)
|
||||
*/
|
||||
dprintf("could not find a matching entry for %s.\n", p);
|
||||
end:
|
||||
ihk_mc_unmap_virtual(buf, 1, 0);
|
||||
ihk_mc_unmap_virtual(buf, npages, 0);
|
||||
dprintf("ret: %d, eof: %d\n", ans, eof);
|
||||
r->ret = ans;
|
||||
r->eof = eof;
|
||||
|
||||
24
kernel/script/mkimage.smp-x86
Normal file
24
kernel/script/mkimage.smp-x86
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/bin/sh
|
||||
|
||||
cp $1 $2
|
||||
|
||||
exit 0
|
||||
|
||||
#set -e
|
||||
#
|
||||
#O=`pwd`
|
||||
#
|
||||
#make -C $3/../arch/x86/kboot O=$O clean
|
||||
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
|
||||
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x101001000
|
||||
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x901001000
|
||||
#
|
||||
#make -C $3/../arch/x86/elfboot O=$O clean
|
||||
#make -C $3/../arch/x86/elfboot O=$O
|
||||
#
|
||||
#cat elfboot/elfboot kboot/kboot.elf > $2
|
||||
#
|
||||
#make -C $3/../arch/x86/kboot O=$O clean
|
||||
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
|
||||
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x201001000
|
||||
#cat elfboot/elfboot kboot/kboot.elf > $2.8G
|
||||
217
kernel/shmobj.c
217
kernel/shmobj.c
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* shared memory object
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 - 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -26,21 +27,19 @@
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#define fkprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct shmobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
long ref;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
};
|
||||
static LIST_HEAD(shmobj_list_head);
|
||||
static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED;
|
||||
|
||||
static memobj_release_func_t shmobj_release;
|
||||
static memobj_ref_func_t shmobj_ref;
|
||||
static memobj_get_page_func_t shmobj_get_page;
|
||||
static memobj_invalidate_page_func_t shmobj_invalidate_page;
|
||||
|
||||
static struct memobj_ops shmobj_ops = {
|
||||
.release = &shmobj_release,
|
||||
.ref = &shmobj_ref,
|
||||
.get_page = &shmobj_get_page,
|
||||
.invalidate_page = &shmobj_invalidate_page,
|
||||
};
|
||||
|
||||
static struct shmobj *to_shmobj(struct memobj *memobj)
|
||||
@@ -98,6 +97,25 @@ static struct page *page_list_first(struct shmobj *obj)
|
||||
return list_first_entry(&obj->page_list, struct page, list);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* shmobj_list
|
||||
*/
|
||||
void shmobj_list_lock(void)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&shmobj_list_lock_body);
|
||||
return;
|
||||
}
|
||||
|
||||
void shmobj_list_unlock(void)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(&shmobj_list_lock_body);
|
||||
return;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* operations
|
||||
*/
|
||||
int the_seq = 0;
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
{
|
||||
struct shmobj *obj = NULL;
|
||||
@@ -114,8 +132,11 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
obj->memobj.ops = &shmobj_ops;
|
||||
obj->ref = 1;
|
||||
obj->ds = *ds;
|
||||
obj->ds.shm_perm.seq = the_seq++;
|
||||
obj->ds.shm_nattch = 1;
|
||||
obj->index = -1;
|
||||
obj->real_segsz = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
|
||||
@@ -127,65 +148,124 @@ out:
|
||||
if (obj) {
|
||||
kfree(obj);
|
||||
}
|
||||
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
|
||||
dkprintf("shmobj_create_indexed(%p %#lx,%p):%d %p\n",
|
||||
ds, ds->shm_segsz, objp, error, *objp);
|
||||
return error;
|
||||
}
|
||||
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp)
|
||||
{
|
||||
int error;
|
||||
struct memobj *obj;
|
||||
|
||||
error = shmobj_create(ds, &obj);
|
||||
if (!error) {
|
||||
obj->flags |= MF_SHMDT_OK | MF_IS_REMOVABLE;
|
||||
*objp = to_shmobj(obj);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
void shmobj_destroy(struct shmobj *obj)
|
||||
{
|
||||
extern struct shm_info the_shm_info;
|
||||
extern struct list_head kds_free_list;
|
||||
extern int the_maxi;
|
||||
|
||||
dkprintf("shmobj_destroy(%p [%d %o])\n", obj, obj->index, obj->ds.shm_perm.mode);
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
int count;
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
page_list_remove(obj, page);
|
||||
|
||||
dkprintf("shmobj_destroy(%p):"
|
||||
"release page. %p %#lx %d %d",
|
||||
obj, page, page_to_phys(page),
|
||||
page->mode, page->count);
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
if (!((page->mode == PM_MAPPED) && (count == 0))) {
|
||||
fkprintf("shmobj_destroy(%p): "
|
||||
"page %p phys %#lx mode %#x"
|
||||
" count %d off %#lx\n",
|
||||
obj, page,
|
||||
page_to_phys(page),
|
||||
page->mode, count,
|
||||
page->offset);
|
||||
panic("shmobj_release");
|
||||
}
|
||||
|
||||
/* XXX:NYI: large pages */
|
||||
page->mode = PM_NONE;
|
||||
free_pages(phys_to_virt(page_to_phys(page)), 1);
|
||||
}
|
||||
if (obj->index < 0) {
|
||||
kfree(obj);
|
||||
}
|
||||
else {
|
||||
list_del(&obj->chain);
|
||||
--the_shm_info.used_ids;
|
||||
|
||||
list_add(&obj->chain, &kds_free_list);
|
||||
for (;;) {
|
||||
struct shmobj *p;
|
||||
|
||||
list_for_each_entry(p, &kds_free_list, chain) {
|
||||
if (p->index == the_maxi) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (&p->chain == &kds_free_list) {
|
||||
break;
|
||||
}
|
||||
|
||||
list_del(&p->chain);
|
||||
kfree(p);
|
||||
--the_maxi;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void shmobj_release(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
struct shmobj *freeobj = NULL;
|
||||
long newref;
|
||||
extern time_t time(void);
|
||||
extern pid_t getpid(void);
|
||||
|
||||
dkprintf("shmobj_release(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
if (obj->ref <= 0) {
|
||||
if (obj->ref < 0) {
|
||||
if (obj->index >= 0) {
|
||||
obj->ds.shm_dtime = time();
|
||||
obj->ds.shm_lpid = getpid();
|
||||
dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch);
|
||||
}
|
||||
newref = --obj->ds.shm_nattch;
|
||||
if (newref <= 0) {
|
||||
if (newref < 0) {
|
||||
fkprintf("shmobj_release(%p):ref %ld\n",
|
||||
memobj, obj->ref);
|
||||
memobj, newref);
|
||||
panic("shmobj_release:freeing free shmobj");
|
||||
}
|
||||
freeobj = obj;
|
||||
if (obj->ds.shm_perm.mode & SHM_DEST) {
|
||||
freeobj = obj;
|
||||
}
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
if (freeobj) {
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
int count;
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
page_list_remove(obj, page);
|
||||
|
||||
dkprintf("shmobj_release(%p):"
|
||||
"release page. %p %#lx %d %d",
|
||||
memobj, page, page_to_phys(page),
|
||||
page->mode, page->count);
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
if (!((page->mode == PM_MAPPED) && (count == 0))) {
|
||||
fkprintf("shmobj_release(%p): "
|
||||
"page %p phys %#lx mode %#x"
|
||||
" count %d off %#lx\n",
|
||||
memobj, page,
|
||||
page_to_phys(page),
|
||||
page->mode, count,
|
||||
page->offset);
|
||||
panic("shmobj_release");
|
||||
}
|
||||
|
||||
/* XXX:NYI: large pages */
|
||||
page->mode = PM_NONE;
|
||||
free_pages(phys_to_virt(page_to_phys(page)), 1);
|
||||
}
|
||||
dkprintf("shmobj_release(%p):free shmobj", memobj);
|
||||
kfree(freeobj);
|
||||
shmobj_list_lock();
|
||||
shmobj_destroy(freeobj);
|
||||
shmobj_list_unlock();
|
||||
}
|
||||
dkprintf("shmobj_release(%p):\n", memobj);
|
||||
dkprintf("shmobj_release(%p): %ld\n", memobj, newref);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -193,17 +273,23 @@ static void shmobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
long newref;
|
||||
extern time_t time(void);
|
||||
extern pid_t getpid(void);
|
||||
|
||||
dkprintf("shmobj_ref(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
newref = ++obj->ref;
|
||||
newref = ++obj->ds.shm_nattch;
|
||||
if (obj->index >= 0) {
|
||||
obj->ds.shm_atime = time();
|
||||
obj->ds.shm_lpid = getpid();
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
|
||||
return;
|
||||
}
|
||||
|
||||
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp)
|
||||
uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
int error;
|
||||
@@ -227,13 +313,13 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (obj->ds.shm_segsz <= off) {
|
||||
if (obj->real_segsz <= off) {
|
||||
error = -ERANGE;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
|
||||
if ((obj->real_segsz - off) < (PAGE_SIZE << p2align)) {
|
||||
error = -ENOSPC;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
@@ -285,3 +371,30 @@ out:
|
||||
memobj, off, p2align, physp, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
|
||||
size_t pgsize)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
int error;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize);
|
||||
|
||||
if (!(page = phys_to_page(phys))
|
||||
|| !(page = page_list_lookup(obj, page->offset))) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ihk_atomic_read(&page->count) == 1) {
|
||||
if (page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx):%d\n", memobj, phys, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
4154
kernel/syscall.c
4154
kernel/syscall.c
File diff suppressed because it is too large
Load Diff
@@ -38,10 +38,10 @@
|
||||
#ifdef DEBUG_PRINT_TIMER
|
||||
#define dkprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
#define LOOP_TIMEOUT 10
|
||||
#define LOOP_TIMEOUT 500
|
||||
|
||||
struct list_head timers;
|
||||
ihk_spinlock_t timers_lock;
|
||||
@@ -57,23 +57,25 @@ uint64_t schedule_timeout(uint64_t timeout)
|
||||
{
|
||||
struct waitq_entry my_wait;
|
||||
struct timer my_timer;
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
int irqstate;
|
||||
int spin_sleep;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->spin_sleep_lock);
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout);
|
||||
proc->spin_sleep = 1;
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->spin_sleep_lock);
|
||||
spin_sleep = ++thread->spin_sleep;
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
/* Spin sleep.. */
|
||||
for (;;) {
|
||||
uint64_t t_s = rdtsc();
|
||||
uint64_t t_e;
|
||||
int spin_over = 0;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&proc->spin_sleep_lock);
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
|
||||
/* Woken up by someone? */
|
||||
if (!proc->spin_sleep) {
|
||||
if (thread->spin_sleep < 1) {
|
||||
t_e = rdtsc();
|
||||
|
||||
spin_over = 1;
|
||||
@@ -85,32 +87,76 @@ uint64_t schedule_timeout(uint64_t timeout)
|
||||
}
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->spin_sleep_lock);
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
t_s = rdtsc();
|
||||
if (!spin_over) {
|
||||
t_s = rdtsc();
|
||||
int need_schedule;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
need_schedule = v->runq_len > 1 ? 1 : 0;
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
|
||||
/* Give a chance to another thread (if any) in case the core is
|
||||
* oversubscribed, but make sure we will be re-scheduled */
|
||||
if (need_schedule) {
|
||||
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
|
||||
schedule();
|
||||
xchg4(&(cpu_local_var(current)->status),
|
||||
PS_INTERRUPTIBLE);
|
||||
}
|
||||
else {
|
||||
/* Spin wait */
|
||||
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (timeout < LOOP_TIMEOUT) {
|
||||
timeout = 0;
|
||||
spin_over = 1;
|
||||
}
|
||||
else {
|
||||
timeout -= LOOP_TIMEOUT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (timeout < LOOP_TIMEOUT) {
|
||||
timeout = 0;
|
||||
spin_over = 1;
|
||||
}
|
||||
else {
|
||||
timeout -= LOOP_TIMEOUT;
|
||||
}
|
||||
|
||||
if (spin_over) {
|
||||
dkprintf("schedule_timeout() spin woken up, timeout: %lu\n",
|
||||
timeout);
|
||||
|
||||
/* Give a chance to another thread (if any) in case we timed out,
|
||||
* but make sure we will be re-scheduled */
|
||||
if (timeout == 0) {
|
||||
int need_schedule;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
|
||||
int irqstate =
|
||||
ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
need_schedule = v->runq_len > 1 ? 1 : 0;
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
|
||||
if (need_schedule) {
|
||||
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
|
||||
schedule();
|
||||
xchg4(&(cpu_local_var(current)->status),
|
||||
PS_INTERRUPTIBLE);
|
||||
}
|
||||
}
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
if (spin_sleep == thread->spin_sleep) {
|
||||
--thread->spin_sleep;
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
return timeout;
|
||||
}
|
||||
}
|
||||
|
||||
/* Init waitq and wait entry for this timer */
|
||||
my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout;
|
||||
my_timer.proc = cpu_local_var(current);
|
||||
my_timer.thread = cpu_local_var(current);
|
||||
waitq_init(&my_timer.processes);
|
||||
waitq_init_entry(&my_wait, cpu_local_var(current));
|
||||
|
||||
@@ -167,7 +213,7 @@ void wake_timers_loop(void)
|
||||
list_del(&timer->list);
|
||||
|
||||
dkprintf("timers timeout occurred, waking up pid: %d\n",
|
||||
timer->proc->pid);
|
||||
timer->thread->proc->pid);
|
||||
|
||||
waitq_wakeup(&timer->processes);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ int
|
||||
default_wake_function(waitq_entry_t *entry, unsigned mode,
|
||||
int flags, void *key)
|
||||
{
|
||||
return sched_wakeup_process(entry->private, PS_NORMAL);
|
||||
return sched_wakeup_thread(entry->private, PS_NORMAL);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -30,7 +30,7 @@ waitq_init(waitq_t *waitq)
|
||||
}
|
||||
|
||||
void
|
||||
waitq_init_entry(waitq_entry_t *entry, struct process *proc)
|
||||
waitq_init_entry(waitq_entry_t *entry, struct thread *proc)
|
||||
{
|
||||
entry->private = proc;
|
||||
entry->func = default_wake_function;
|
||||
@@ -89,14 +89,14 @@ waitq_prepare_to_wait(waitq_t *waitq, waitq_entry_t *entry, int state)
|
||||
ihk_mc_spinlock_lock_noirq(&waitq->lock);
|
||||
if (list_empty(&entry->link))
|
||||
list_add(&entry->link, &waitq->waitq);
|
||||
cpu_local_var(current)->ftn->status = state;
|
||||
cpu_local_var(current)->status = state;
|
||||
ihk_mc_spinlock_unlock_noirq(&waitq->lock);
|
||||
}
|
||||
|
||||
void
|
||||
waitq_finish_wait(waitq_t *waitq, waitq_entry_t *entry)
|
||||
{
|
||||
cpu_local_var(current)->ftn->status = PS_RUNNING;
|
||||
cpu_local_var(current)->status = PS_RUNNING;
|
||||
waitq_remove_entry(waitq, entry);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* read-only zeroed page object
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@@ -165,7 +166,7 @@ out:
|
||||
}
|
||||
|
||||
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp)
|
||||
uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
int error;
|
||||
struct zeroobj *obj = to_zeroobj(memobj);
|
||||
|
||||
@@ -99,4 +99,9 @@ enum ihk_asr_type {
|
||||
int ihk_mc_arch_set_special_register(enum ihk_asr_type, unsigned long value);
|
||||
int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value);
|
||||
|
||||
extern unsigned int ihk_ikc_irq;
|
||||
extern unsigned int ihk_ikc_irq_apicid;
|
||||
|
||||
extern int gettime_local_support;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,8 +22,8 @@ struct ihk_kmsg_buf {
|
||||
};
|
||||
|
||||
extern int kprintf(const char *format, ...);
|
||||
extern int kprintf_lock();
|
||||
extern void kprintf_unlock(int irqflags);
|
||||
extern unsigned long kprintf_lock(void);
|
||||
extern void kprintf_unlock(unsigned long irqflags);
|
||||
extern int __kprintf(const char *format, ...);
|
||||
|
||||
extern void panic(const char *msg);
|
||||
|
||||
@@ -147,7 +147,8 @@ struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag);
|
||||
void ihk_mc_pt_destroy(struct page_table *pt);
|
||||
void ihk_mc_load_page_table(struct page_table *pt);
|
||||
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
|
||||
void *virt, unsigned long *phys);
|
||||
const void *virt, unsigned long *phys);
|
||||
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
|
||||
|
||||
void remote_flush_tlb_cpumask(struct process_vm *vm,
|
||||
unsigned long addr, int cpu_id);
|
||||
|
||||
Reference in New Issue
Block a user