Compare commits
78 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8d21846562 | ||
|
|
3e1367caa1 | ||
|
|
02536b7724 | ||
|
|
e28725884f | ||
|
|
c2b3fb7236 | ||
|
|
2f95f7cda8 | ||
|
|
e551aa17ed | ||
|
|
e6d4c160cd | ||
|
|
9390fe5d2c | ||
|
|
419f5e495b | ||
|
|
673deadf37 | ||
|
|
20ea65b38c | ||
|
|
84665ff699 | ||
|
|
bfbc94dfb0 | ||
|
|
f74dcfc2a1 | ||
|
|
7c562d0539 | ||
|
|
b5e4459a34 | ||
|
|
782122b681 | ||
|
|
d550bced78 | ||
|
|
a7ee3f531b | ||
|
|
b9439947a7 | ||
|
|
3b60a95f13 | ||
|
|
82ae6d7458 | ||
|
|
7ebc34ddcc | ||
|
|
bd6a2c2311 | ||
|
|
5fd68eae54 | ||
|
|
f5857cfc9e | ||
|
|
1ce1b17a85 | ||
|
|
a2456c3ed2 | ||
|
|
01d2ea1605 | ||
|
|
15783f09a0 | ||
|
|
9efd568e07 | ||
|
|
1a207e19c2 | ||
|
|
73cf93727b | ||
|
|
4410e702d9 | ||
|
|
f584e2ec25 | ||
|
|
3aa06444f4 | ||
|
|
c897a56c34 | ||
|
|
5e9957da0f | ||
|
|
6ff2d4abe7 | ||
|
|
e4239f1885 | ||
|
|
fbbaaf5b54 | ||
|
|
3fa3920bb3 | ||
|
|
45e51fcc07 | ||
|
|
0884e3d543 | ||
|
|
e3c7c9b890 | ||
|
|
f4155cc9e8 | ||
|
|
a01ae91051 | ||
|
|
daca522d25 | ||
|
|
ec521feb15 | ||
|
|
d7bc947a02 | ||
|
|
fb84d4ef11 | ||
|
|
5fbeee953a | ||
|
|
4cefb4333f | ||
|
|
689da07ac6 | ||
|
|
76981bcc18 | ||
|
|
6aae35cb3d | ||
|
|
dac6f2883e | ||
|
|
c484f766fa | ||
|
|
57690479bd | ||
|
|
d0539a9cac | ||
|
|
4c8f583c0c | ||
|
|
6118faffa9 | ||
|
|
dad6470c60 | ||
|
|
46c37fc8f3 | ||
|
|
f6908f21a8 | ||
|
|
01d9d9a5ba | ||
|
|
c43d993a4d | ||
|
|
7d9bbecd7a | ||
|
|
d135731398 | ||
|
|
5c190beb04 | ||
|
|
fc66556f9f | ||
|
|
648bacc90f | ||
|
|
dd37443fc7 | ||
|
|
e34322702a | ||
|
|
e12997e6a9 | ||
|
|
fabaa806d3 | ||
|
|
a83ad620c8 |
@@ -1,5 +1,6 @@
|
|||||||
TARGET = @TARGET@
|
TARGET = @TARGET@
|
||||||
SBINDIR = @SBINDIR@
|
SBINDIR = @SBINDIR@
|
||||||
|
ETCDIR = @ETCDIR@
|
||||||
MANDIR = @MANDIR@
|
MANDIR = @MANDIR@
|
||||||
|
|
||||||
all::
|
all::
|
||||||
@@ -48,6 +49,9 @@ install::
|
|||||||
mkdir -p -m 755 $(SBINDIR); \
|
mkdir -p -m 755 $(SBINDIR); \
|
||||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||||
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||||
|
mkdir -p -m 755 $(ETCDIR); \
|
||||||
|
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||||
|
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||||
;; \
|
;; \
|
||||||
|
|||||||
@@ -181,6 +181,8 @@ static void init_idt(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int xsave_available = 0;
|
static int xsave_available = 0;
|
||||||
|
static int xsave_size = 0;
|
||||||
|
static uint64_t xsave_mask = 0x0;
|
||||||
|
|
||||||
void init_fpu(void)
|
void init_fpu(void)
|
||||||
{
|
{
|
||||||
@@ -224,6 +226,26 @@ void init_fpu(void)
|
|||||||
xsetbv(0, reg);
|
xsetbv(0, reg);
|
||||||
dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg);
|
dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg);
|
||||||
}
|
}
|
||||||
|
if(xsave_available){
|
||||||
|
unsigned long eax;
|
||||||
|
unsigned long ebx;
|
||||||
|
unsigned long ecx;
|
||||||
|
unsigned long edx;
|
||||||
|
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (0x0d), "c" (0x00));
|
||||||
|
xsave_size = ecx;
|
||||||
|
dkprintf("init_fpu(): xsave_size = %d\n", xsave_size);
|
||||||
|
|
||||||
|
if ((eax & (1 << 5)) && (eax & (1 << 6)) && (eax & (1 << 7))) {
|
||||||
|
/* Set xcr0[7:5] to enable avx-512 ops */
|
||||||
|
reg = xgetbv(0);
|
||||||
|
reg |= 0xe6;
|
||||||
|
xsetbv(0, reg);
|
||||||
|
dkprintf("init_fpu(): AVX-512 init: XCR0 = 0x%016lX\n", reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
xsave_mask = xgetbv(0);
|
||||||
|
dkprintf("init_fpu(): xsave_mask = 0x%016lX\n", xsave_mask);
|
||||||
|
|
||||||
/* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */
|
/* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */
|
||||||
|
|
||||||
@@ -234,6 +256,17 @@ void init_fpu(void)
|
|||||||
asm volatile("finit");
|
asm volatile("finit");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
get_xsave_size()
|
||||||
|
{
|
||||||
|
return xsave_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t get_xsave_mask()
|
||||||
|
{
|
||||||
|
return xsave_mask;
|
||||||
|
}
|
||||||
|
|
||||||
void reload_gdt(struct x86_desc_ptr *gdt_ptr)
|
void reload_gdt(struct x86_desc_ptr *gdt_ptr)
|
||||||
{
|
{
|
||||||
asm volatile("pushq %1\n"
|
asm volatile("pushq %1\n"
|
||||||
@@ -883,13 +916,36 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
|
|||||||
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
|
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
|
||||||
}
|
}
|
||||||
else if (vector == LOCAL_PERF_VECTOR) {
|
else if (vector == LOCAL_PERF_VECTOR) {
|
||||||
|
struct siginfo info;
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
|
struct thread *thread = cpu_local_var(current);
|
||||||
|
struct process *proc = thread->proc;
|
||||||
|
long irqstate;
|
||||||
|
struct mckfd *fdp;
|
||||||
|
|
||||||
|
lapic_write(LAPIC_LVTPC, LOCAL_PERF_VECTOR);
|
||||||
|
|
||||||
value = rdmsr(MSR_PERF_GLOBAL_STATUS);
|
value = rdmsr(MSR_PERF_GLOBAL_STATUS);
|
||||||
wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, value);
|
wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, value);
|
||||||
wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, 0);
|
wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, 0);
|
||||||
//TODO: counter overflow signal
|
|
||||||
//set_signal(0x1d, regs, NULL); // SIGIO
|
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||||
|
for(fdp = proc->mckfd; fdp; fdp = fdp->next) {
|
||||||
|
if(fdp->sig_no > 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||||
|
|
||||||
|
if(fdp) {
|
||||||
|
memset(&info, '\0', sizeof info);
|
||||||
|
info.si_signo = fdp->sig_no;
|
||||||
|
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
|
||||||
|
info._sifields._sigpoll.si_fd = fdp->fd;
|
||||||
|
set_signal(fdp->sig_no, regs, &info);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
set_signal(SIGIO, regs, NULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
|
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
|
||||||
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
|
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
|
||||||
@@ -998,9 +1054,8 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
|||||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||||
|
|
||||||
irqflags = kprintf_lock();
|
irqflags = kprintf_lock();
|
||||||
dkprintf("[%d] Page fault for 0x%lX\n",
|
__kprintf("Page fault for 0x%lx\n", address);
|
||||||
ihk_mc_get_processor_id(), address);
|
__kprintf("%s for %s access in %s mode (reserved bit %s set), "
|
||||||
dkprintf("%s for %s access in %s mode (reserved bit %s set), "
|
|
||||||
"it %s an instruction fetch\n",
|
"it %s an instruction fetch\n",
|
||||||
(error & PF_PROT ? "protection fault" : "no page found"),
|
(error & PF_PROT ? "protection fault" : "no page found"),
|
||||||
(error & PF_WRITE ? "write" : "read"),
|
(error & PF_WRITE ? "write" : "read"),
|
||||||
@@ -1012,14 +1067,14 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
|||||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||||
if (range->start <= address && range->end > address) {
|
if (range->start <= address && range->end > address) {
|
||||||
found = 1;
|
found = 1;
|
||||||
dkprintf("address is in range, flag: 0x%X! \n",
|
__kprintf("address is in range, flag: 0x%lx\n",
|
||||||
range->flag);
|
range->flag);
|
||||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
dkprintf("address is out of range! \n");
|
__kprintf("address is out of range! \n");
|
||||||
}
|
}
|
||||||
|
|
||||||
kprintf_unlock(irqflags);
|
kprintf_unlock(irqflags);
|
||||||
@@ -1494,7 +1549,8 @@ release_fp_regs(struct thread *thread)
|
|||||||
if (thread && !thread->fp_regs)
|
if (thread && !thread->fp_regs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||||
|
dkprintf("release_fp_regs: pages=%d\n", pages);
|
||||||
ihk_mc_free_pages(thread->fp_regs, pages);
|
ihk_mc_free_pages(thread->fp_regs, pages);
|
||||||
thread->fp_regs = NULL;
|
thread->fp_regs = NULL;
|
||||||
}
|
}
|
||||||
@@ -1508,7 +1564,8 @@ save_fp_regs(struct thread *thread)
|
|||||||
int pages;
|
int pages;
|
||||||
|
|
||||||
if (!thread->fp_regs) {
|
if (!thread->fp_regs) {
|
||||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||||
|
dkprintf("save_fp_regs: pages=%d\n", pages);
|
||||||
thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
|
thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
|
||||||
|
|
||||||
if (!thread->fp_regs) {
|
if (!thread->fp_regs) {
|
||||||
@@ -1517,14 +1574,15 @@ save_fp_regs(struct thread *thread)
|
|||||||
}
|
}
|
||||||
|
|
||||||
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
|
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
|
||||||
|
memset(thread->fp_regs, 0, pages * PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (xsave_available) {
|
if (xsave_available) {
|
||||||
unsigned int low, high;
|
unsigned int low, high;
|
||||||
|
|
||||||
/* Request full save of x87, SSE and AVX states */
|
/* Request full save of x87, SSE, AVX and AVX-512 states */
|
||||||
low = 0x7;
|
low = (unsigned int)xsave_mask;
|
||||||
high = 0;
|
high = (unsigned int)(xsave_mask >> 32);
|
||||||
|
|
||||||
asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)
|
asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)
|
||||||
: "memory");
|
: "memory");
|
||||||
@@ -1546,9 +1604,9 @@ restore_fp_regs(struct thread *thread)
|
|||||||
if (xsave_available) {
|
if (xsave_available) {
|
||||||
unsigned int low, high;
|
unsigned int low, high;
|
||||||
|
|
||||||
/* Request full restore of x87, SSE and AVX states */
|
/* Request full restore of x87, SSE, AVX and AVX-512 states */
|
||||||
low = 0x7;
|
low = (unsigned int)xsave_mask;
|
||||||
high = 0;
|
high = (unsigned int)(xsave_mask >> 32);
|
||||||
|
|
||||||
asm volatile("xrstor %0" : : "m" (*thread->fp_regs),
|
asm volatile("xrstor %0" : : "m" (*thread->fp_regs),
|
||||||
"a" (low), "d" (high));
|
"a" (low), "d" (high));
|
||||||
|
|||||||
@@ -318,5 +318,5 @@ extern unsigned long ap_trampoline;
|
|||||||
#define AP_TRAMPOLINE_SIZE 0x2000
|
#define AP_TRAMPOLINE_SIZE 0x2000
|
||||||
|
|
||||||
/* Local is cachable */
|
/* Local is cachable */
|
||||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)
|
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ SYSCALL_DELEGATED(65, semop)
|
|||||||
SYSCALL_HANDLED(67, shmdt)
|
SYSCALL_HANDLED(67, shmdt)
|
||||||
SYSCALL_DELEGATED(69, msgsnd)
|
SYSCALL_DELEGATED(69, msgsnd)
|
||||||
SYSCALL_DELEGATED(70, msgrcv)
|
SYSCALL_DELEGATED(70, msgrcv)
|
||||||
SYSCALL_DELEGATED(72, fcntl)
|
SYSCALL_HANDLED(72, fcntl)
|
||||||
SYSCALL_DELEGATED(79, getcwd)
|
SYSCALL_DELEGATED(79, getcwd)
|
||||||
SYSCALL_DELEGATED(89, readlink)
|
SYSCALL_DELEGATED(89, readlink)
|
||||||
SYSCALL_HANDLED(96, gettimeofday)
|
SYSCALL_HANDLED(96, gettimeofday)
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
#include <process.h>
|
#include <process.h>
|
||||||
#include <page.h>
|
#include <page.h>
|
||||||
#include <cls.h>
|
#include <cls.h>
|
||||||
|
#include <kmalloc.h>
|
||||||
|
|
||||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||||
@@ -84,20 +85,22 @@ void ihk_mc_free_pages(void *p, int npages)
|
|||||||
pa_ops->free_page(p, npages);
|
pa_ops->free_page(p, npages);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *ihk_mc_allocate(int size, enum ihk_mc_ap_flag flag)
|
void *ihk_mc_allocate(int size, int flag)
|
||||||
{
|
{
|
||||||
if (pa_ops && pa_ops->alloc)
|
if (!cpu_local_var(kmalloc_initialized)) {
|
||||||
return pa_ops->alloc(size, flag);
|
kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__);
|
||||||
else
|
return NULL;
|
||||||
return ihk_mc_alloc_pages(1, flag);
|
}
|
||||||
|
return kmalloc(size, IHK_MC_AP_NOWAIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ihk_mc_free(void *p)
|
void ihk_mc_free(void *p)
|
||||||
{
|
{
|
||||||
if (pa_ops && pa_ops->free)
|
if (!cpu_local_var(kmalloc_initialized)) {
|
||||||
return pa_ops->free(p);
|
kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__);
|
||||||
else
|
return;
|
||||||
return ihk_mc_free_pages(p, 1);
|
}
|
||||||
|
kfree(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *get_last_early_heap(void)
|
void *get_last_early_heap(void)
|
||||||
@@ -910,11 +913,17 @@ static int split_large_page(pte_t *ptep, size_t pgsize)
|
|||||||
|
|
||||||
*ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR;
|
*ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR;
|
||||||
|
|
||||||
if (phys_base != NOPHYS) {
|
/* Do not do this check for large pages as they don't come from the zeroobj
|
||||||
page = phys_to_page(phys_base);
|
* and are not actually mapped.
|
||||||
if (page && page_unmap(page)) {
|
* TODO: clean up zeroobj as we don't really need it, anonymous mappings
|
||||||
kprintf("split_large_page:page_unmap:%p\n", page);
|
* should be allocated for real */
|
||||||
panic("split_large_page:page_unmap\n");
|
if (pgsize != PTL2_SIZE) {
|
||||||
|
if (phys_base != NOPHYS) {
|
||||||
|
page = phys_to_page(phys_base);
|
||||||
|
if (pgsize != PTL2_SIZE && page && page_unmap(page)) {
|
||||||
|
kprintf("split_large_page:page_unmap:%p\n", page);
|
||||||
|
panic("split_large_page:page_unmap\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1105,6 +1114,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
|||||||
if (!(old & PFL1_FILEOFF) && args->free_physical) {
|
if (!(old & PFL1_FILEOFF) && args->free_physical) {
|
||||||
if (page && page_unmap(page)) {
|
if (page && page_unmap(page)) {
|
||||||
ihk_mc_free_pages(phys_to_virt(phys), 1);
|
ihk_mc_free_pages(phys_to_virt(phys), 1);
|
||||||
|
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
|
||||||
}
|
}
|
||||||
args->vm->currss -= PTL1_SIZE;
|
args->vm->currss -= PTL1_SIZE;
|
||||||
}
|
}
|
||||||
@@ -1153,6 +1163,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
|
|||||||
if (!(old & PFL2_FILEOFF) && args->free_physical) {
|
if (!(old & PFL2_FILEOFF) && args->free_physical) {
|
||||||
if (page && page_unmap(page)) {
|
if (page && page_unmap(page)) {
|
||||||
ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE);
|
ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE);
|
||||||
|
dkprintf("%s: freeing large page at 0x%lx\n", __FUNCTION__, base);
|
||||||
}
|
}
|
||||||
args->vm->currss -= PTL2_SIZE;
|
args->vm->currss -= PTL2_SIZE;
|
||||||
}
|
}
|
||||||
@@ -2261,13 +2272,18 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
|
|||||||
if ((ustart < vm->region.user_start)
|
if ((ustart < vm->region.user_start)
|
||||||
|| (vm->region.user_end <= ustart)
|
|| (vm->region.user_end <= ustart)
|
||||||
|| ((vm->region.user_end - ustart) < siz)) {
|
|| ((vm->region.user_end - ustart) < siz)) {
|
||||||
|
kprintf("%s: error: out of user range\n", __FUNCTION__);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
reason = PF_USER; /* page not present */
|
reason = PF_USER; /* page not present */
|
||||||
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
|
||||||
|
if (!addr)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
error = page_fault_process_vm(vm, (void *)addr, reason);
|
error = page_fault_process_vm(vm, (void *)addr, reason);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
kprintf("%s: error: PF for %p failed\n", __FUNCTION__, addr);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2283,11 +2299,22 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
|
|||||||
|
|
||||||
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa);
|
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
kprintf("%s: error: resolving physical address or %p\n", __FUNCTION__, from);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
va = phys_to_virt(pa);
|
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
|
||||||
memcpy(to, va, cpsize);
|
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
|
||||||
|
dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p,"
|
||||||
|
"cpsize: %d\n", __FUNCTION__, to, pa, cpsize);
|
||||||
|
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
|
||||||
|
memcpy(to, va, cpsize);
|
||||||
|
ihk_mc_unmap_virtual(va, 1, 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
va = phys_to_virt(pa);
|
||||||
|
memcpy(to, va, cpsize);
|
||||||
|
}
|
||||||
|
|
||||||
from += cpsize;
|
from += cpsize;
|
||||||
to += cpsize;
|
to += cpsize;
|
||||||
@@ -2413,8 +2440,18 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
va = phys_to_virt(pa);
|
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
|
||||||
memcpy(va, from, cpsize);
|
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
|
||||||
|
dkprintf("%s: pa is outside of LWK memory, from: %p,"
|
||||||
|
"pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize);
|
||||||
|
va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE);
|
||||||
|
memcpy(va, from, cpsize);
|
||||||
|
ihk_mc_unmap_virtual(va, 1, 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
va = phys_to_virt(pa);
|
||||||
|
memcpy(va, from, cpsize);
|
||||||
|
}
|
||||||
|
|
||||||
from += cpsize;
|
from += cpsize;
|
||||||
to += cpsize;
|
to += cpsize;
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
|
|||||||
arch_master_channel_packet_handler = packet_handler;
|
arch_master_channel_packet_handler = packet_handler;
|
||||||
|
|
||||||
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
|
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
|
||||||
ihk_ikc_master_channel_packet_handler);
|
ihk_ikc_master_channel_packet_handler, channel);
|
||||||
ihk_ikc_enable_channel(channel);
|
ihk_ikc_enable_channel(channel);
|
||||||
|
|
||||||
/* Set boot parameter */
|
/* Set boot parameter */
|
||||||
|
|||||||
@@ -12,16 +12,29 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <ihk/debug.h>
|
#include <ihk/debug.h>
|
||||||
#include <registers.h>
|
#include <registers.h>
|
||||||
|
#include <mc_perf_event.h>
|
||||||
|
|
||||||
extern unsigned int *x86_march_perfmap;
|
extern unsigned int *x86_march_perfmap;
|
||||||
extern int running_on_kvm(void);
|
extern int running_on_kvm(void);
|
||||||
|
|
||||||
#define X86_CR4_PCE 0x00000100
|
#define X86_CR4_PCE 0x00000100
|
||||||
|
|
||||||
|
int perf_counters_discovered = 0;
|
||||||
|
int X86_IA32_NUM_PERF_COUNTERS = 0;
|
||||||
|
unsigned long X86_IA32_PERF_COUNTERS_MASK = 0;
|
||||||
|
int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0;
|
||||||
|
unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0;
|
||||||
|
|
||||||
void x86_init_perfctr(void)
|
void x86_init_perfctr(void)
|
||||||
{
|
{
|
||||||
|
int i = 0;
|
||||||
unsigned long reg;
|
unsigned long reg;
|
||||||
unsigned long value = 0;
|
unsigned long value = 0;
|
||||||
|
uint64_t op;
|
||||||
|
uint64_t eax;
|
||||||
|
uint64_t ebx;
|
||||||
|
uint64_t ecx;
|
||||||
|
uint64_t edx;
|
||||||
|
|
||||||
/* Do not do it on KVM */
|
/* Do not do it on KVM */
|
||||||
if (running_on_kvm()) return;
|
if (running_on_kvm()) return;
|
||||||
@@ -30,12 +43,41 @@ void x86_init_perfctr(void)
|
|||||||
asm volatile("movq %%cr4, %0" : "=r"(reg));
|
asm volatile("movq %%cr4, %0" : "=r"(reg));
|
||||||
reg |= X86_CR4_PCE;
|
reg |= X86_CR4_PCE;
|
||||||
asm volatile("movq %0, %%cr4" : : "r"(reg));
|
asm volatile("movq %0, %%cr4" : : "r"(reg));
|
||||||
|
|
||||||
|
/* Detect number of supported performance counters */
|
||||||
|
if (!perf_counters_discovered) {
|
||||||
|
/* See Table 35.2 - Architectural MSRs in Vol 3C */
|
||||||
|
op = 0x0a;
|
||||||
|
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op));
|
||||||
|
|
||||||
|
X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
|
||||||
|
X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1;
|
||||||
|
|
||||||
|
X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
|
||||||
|
X86_IA32_FIXED_PERF_COUNTERS_MASK =
|
||||||
|
((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) <<
|
||||||
|
X86_IA32_BASE_FIXED_PERF_COUNTERS;
|
||||||
|
|
||||||
|
perf_counters_discovered = 1;
|
||||||
|
kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n",
|
||||||
|
X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clear Fixed Counter Control */
|
||||||
|
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||||
|
value &= 0xfffffffffffff000L;
|
||||||
|
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||||
|
|
||||||
|
/* Clear Generic Counter Control */
|
||||||
|
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||||
|
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/* Enable PMC Control */
|
/* Enable PMC Control */
|
||||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||||
value |= X86_IA32_PERF_COUNTERS_MASK;
|
value |= X86_IA32_PERF_COUNTERS_MASK;
|
||||||
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
||||||
@@ -63,12 +105,12 @@ static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
|||||||
wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value);
|
wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||||
|
|
||||||
//kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
|
//kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
|
||||||
kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
|
//kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int set_pmc_x86_direct(int counter, unsigned long val)
|
static int set_pmc_x86_direct(int counter, long val)
|
||||||
{
|
{
|
||||||
unsigned long cnt_bit = 0;
|
unsigned long cnt_bit = 0;
|
||||||
|
|
||||||
@@ -76,6 +118,8 @@ static int set_pmc_x86_direct(int counter, unsigned long val)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val &= 0x000000ffffffffff; // 40bit Mask
|
||||||
|
|
||||||
cnt_bit = 1UL << counter;
|
cnt_bit = 1UL << counter;
|
||||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||||
// set generic pmc
|
// set generic pmc
|
||||||
@@ -102,7 +146,7 @@ static int set_perfctr_x86(int counter, int event, int mask, int inv, int count,
|
|||||||
static int set_fixed_counter(int counter, int mode)
|
static int set_fixed_counter(int counter, int mode)
|
||||||
{
|
{
|
||||||
unsigned long value = 0;
|
unsigned long value = 0;
|
||||||
unsigned int ctr_mask = 0x7;
|
unsigned int ctr_mask = 0xf;
|
||||||
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
||||||
unsigned int set_val = 0;
|
unsigned int set_val = 0;
|
||||||
|
|
||||||
@@ -183,6 +227,24 @@ int ihk_mc_perfctr_stop(unsigned long counter_mask)
|
|||||||
value &= ~counter_mask;
|
value &= ~counter_mask;
|
||||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||||
|
|
||||||
|
if(counter_mask >> 32 & 0x1) {
|
||||||
|
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||||
|
value &= ~(0xf);
|
||||||
|
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(counter_mask >> 32 & 0x2) {
|
||||||
|
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||||
|
value &= ~(0xf << 4);
|
||||||
|
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(counter_mask >> 32 & 0x4) {
|
||||||
|
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||||
|
value &= ~(0xf << 8);
|
||||||
|
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -190,7 +252,7 @@ int ihk_mc_perfctr_stop(unsigned long counter_mask)
|
|||||||
int ihk_mc_perfctr_fixed_init(int counter, int mode)
|
int ihk_mc_perfctr_fixed_init(int counter, int mode)
|
||||||
{
|
{
|
||||||
unsigned long value = 0;
|
unsigned long value = 0;
|
||||||
unsigned int ctr_mask = 0x7;
|
unsigned int ctr_mask = 0xf;
|
||||||
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
||||||
unsigned int set_val = 0;
|
unsigned int set_val = 0;
|
||||||
|
|
||||||
@@ -210,6 +272,9 @@ int ihk_mc_perfctr_fixed_init(int counter, int mode)
|
|||||||
set_val |= 1;
|
set_val |= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// enable PMI on overflow
|
||||||
|
set_val |= 1 << 3;
|
||||||
|
|
||||||
set_val <<= counter_idx * 4;
|
set_val <<= counter_idx * 4;
|
||||||
value |= set_val;
|
value |= set_val;
|
||||||
|
|
||||||
@@ -223,7 +288,7 @@ int ihk_mc_perfctr_reset(int counter)
|
|||||||
return set_pmc_x86_direct(counter, 0);
|
return set_pmc_x86_direct(counter, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ihk_mc_perfctr_set(int counter, unsigned long val)
|
int ihk_mc_perfctr_set(int counter, long val)
|
||||||
{
|
{
|
||||||
return set_pmc_x86_direct(counter, val);
|
return set_pmc_x86_direct(counter, val);
|
||||||
}
|
}
|
||||||
@@ -297,23 +362,33 @@ unsigned long ihk_mc_perfctr_read_msr(int counter)
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status)
|
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status)
|
||||||
{
|
{
|
||||||
|
int ret = -1;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int ret = -1;
|
|
||||||
|
|
||||||
// find avail generic counter
|
if(*type == PERF_TYPE_HARDWARE) {
|
||||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
switch(*config){
|
||||||
|
case PERF_COUNT_HW_INSTRUCTIONS :
|
||||||
|
*type = PERF_TYPE_RAW;
|
||||||
|
*config = 0x5300c0;
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
// Unexpected config
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if(*type != PERF_TYPE_RAW) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// find avail generic counter
|
||||||
|
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||||
if(!(pmc_status & (1 << i))) {
|
if(!(pmc_status & (1 << i))) {
|
||||||
ret = i;
|
ret = i;
|
||||||
pmc_status |= (1 << i);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(ret < 0){
|
return ret;
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ void set_signal(int sig, void *regs0, siginfo_t *info);
|
|||||||
void check_signal(unsigned long rc, void *regs0, int num);
|
void check_signal(unsigned long rc, void *regs0, int num);
|
||||||
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
|
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
|
||||||
unsigned long, unsigned long, unsigned long);
|
unsigned long, unsigned long, unsigned long);
|
||||||
|
extern int get_xsave_size();
|
||||||
|
extern uint64_t get_xsave_mask();
|
||||||
|
|
||||||
//#define DEBUG_PRINT_SC
|
//#define DEBUG_PRINT_SC
|
||||||
|
|
||||||
@@ -54,6 +56,7 @@ uintptr_t debug_constants[] = {
|
|||||||
offsetof(struct cpu_local_var, current),
|
offsetof(struct cpu_local_var, current),
|
||||||
offsetof(struct cpu_local_var, runq),
|
offsetof(struct cpu_local_var, runq),
|
||||||
offsetof(struct cpu_local_var, status),
|
offsetof(struct cpu_local_var, status),
|
||||||
|
offsetof(struct cpu_local_var, idle),
|
||||||
offsetof(struct thread, ctx),
|
offsetof(struct thread, ctx),
|
||||||
offsetof(struct thread, sched_list),
|
offsetof(struct thread, sched_list),
|
||||||
offsetof(struct thread, proc),
|
offsetof(struct thread, proc),
|
||||||
@@ -219,6 +222,7 @@ SYSCALL_DECLARE(rt_sigreturn)
|
|||||||
struct x86_user_context *regs;
|
struct x86_user_context *regs;
|
||||||
struct sigsp ksigsp;
|
struct sigsp ksigsp;
|
||||||
struct sigsp *sigsp;
|
struct sigsp *sigsp;
|
||||||
|
int xsavesize = get_xsave_size();
|
||||||
|
|
||||||
asm ("movq %%gs:(%1),%0"
|
asm ("movq %%gs:(%1),%0"
|
||||||
: "=r"(regs)
|
: "=r"(regs)
|
||||||
@@ -265,12 +269,31 @@ SYSCALL_DECLARE(rt_sigreturn)
|
|||||||
check_signal(0, regs, 0);
|
check_signal(0, regs, 0);
|
||||||
check_need_resched();
|
check_need_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(ksigsp.fpregs && xsavesize){
|
||||||
|
void *fpregs = kmalloc(xsavesize + 64, IHK_MC_AP_NOWAIT);
|
||||||
|
|
||||||
|
if(fpregs){
|
||||||
|
uint64_t xsave_mask = get_xsave_mask();
|
||||||
|
unsigned int low = (unsigned int)xsave_mask;
|
||||||
|
unsigned int high = (unsigned int)(xsave_mask >> 32);
|
||||||
|
struct xsave_struct *kfpregs;
|
||||||
|
|
||||||
|
kfpregs = (void *)((((unsigned long)fpregs) + 63) & ~63);
|
||||||
|
|
||||||
|
if(copy_from_user(kfpregs, ksigsp.fpregs, xsavesize))
|
||||||
|
return -EFAULT;
|
||||||
|
asm volatile("xrstor %0" : : "m"(*kfpregs), "a"(low), "d"(high) : "memory");
|
||||||
|
kfree(fpregs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return sigsp->sigrc;
|
return sigsp->sigrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern struct cpu_local_var *clv;
|
extern struct cpu_local_var *clv;
|
||||||
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
|
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
|
||||||
extern void interrupt_syscall(int all, int pid);
|
extern void interrupt_syscall(int pid, int tid);
|
||||||
extern int num_processors;
|
extern int num_processors;
|
||||||
|
|
||||||
#define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \
|
#define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \
|
||||||
@@ -707,6 +730,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
|||||||
unsigned long *usp; /* user stack */
|
unsigned long *usp; /* user stack */
|
||||||
struct sigsp ksigsp;
|
struct sigsp ksigsp;
|
||||||
struct sigsp *sigsp;
|
struct sigsp *sigsp;
|
||||||
|
int xsavesize = get_xsave_size();
|
||||||
|
unsigned long fpregs;
|
||||||
|
|
||||||
if((k->sa.sa_flags & SA_ONSTACK) &&
|
if((k->sa.sa_flags & SA_ONSTACK) &&
|
||||||
!(thread->sigstack.ss_flags & SS_DISABLE) &&
|
!(thread->sigstack.ss_flags & SS_DISABLE) &&
|
||||||
@@ -719,7 +744,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
|||||||
else{
|
else{
|
||||||
usp = (unsigned long *)regs->gpr.rsp;
|
usp = (unsigned long *)regs->gpr.rsp;
|
||||||
}
|
}
|
||||||
sigsp = ((struct sigsp *)usp) - 1;
|
fpregs = (unsigned long)usp - xsavesize;
|
||||||
|
sigsp = ((struct sigsp *)fpregs) - 1;
|
||||||
sigsp = (struct sigsp *)((unsigned long)sigsp & 0xfffffffffffffff0UL);
|
sigsp = (struct sigsp *)((unsigned long)sigsp & 0xfffffffffffffff0UL);
|
||||||
memset(&ksigsp, '\0', sizeof ksigsp);
|
memset(&ksigsp, '\0', sizeof ksigsp);
|
||||||
|
|
||||||
@@ -751,6 +777,33 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
|||||||
ksigsp.restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART);
|
ksigsp.restart = isrestart(num, rc, sig, k->sa.sa_flags & SA_RESTART);
|
||||||
if(num != 0 && rc == -EINTR && sig == SIGCHLD)
|
if(num != 0 && rc == -EINTR && sig == SIGCHLD)
|
||||||
ksigsp.restart = 1;
|
ksigsp.restart = 1;
|
||||||
|
if(xsavesize){
|
||||||
|
uint64_t xsave_mask = get_xsave_mask();
|
||||||
|
unsigned int low = (unsigned int)xsave_mask;
|
||||||
|
unsigned int high = (unsigned int)(xsave_mask >> 32);
|
||||||
|
void *_kfpregs = kmalloc(xsavesize + 64, IHK_MC_AP_NOWAIT);
|
||||||
|
struct xsave_struct *kfpregs;
|
||||||
|
|
||||||
|
if(!_kfpregs){
|
||||||
|
kfree(pending);
|
||||||
|
kfree(_kfpregs);
|
||||||
|
kprintf("do_signal,no space available\n");
|
||||||
|
terminate(0, sig);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
kfpregs = (void *)((((unsigned long)_kfpregs) + 63) & ~63);
|
||||||
|
memset(kfpregs, '\0', xsavesize);
|
||||||
|
asm volatile("xsave %0" : : "m"(*kfpregs), "a"(low), "d"(high) : "memory");
|
||||||
|
if(copy_to_user((void *)fpregs, kfpregs, xsavesize)){
|
||||||
|
kfree(pending);
|
||||||
|
kfree(_kfpregs);
|
||||||
|
kprintf("do_signal,write_process_vm failed\n");
|
||||||
|
terminate(0, sig);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ksigsp.fpregs = (void *)fpregs;
|
||||||
|
kfree(_kfpregs);
|
||||||
|
}
|
||||||
memcpy(&ksigsp.info, &pending->info, sizeof(siginfo_t));
|
memcpy(&ksigsp.info, &pending->info, sizeof(siginfo_t));
|
||||||
|
|
||||||
if(copy_to_user(sigsp, &ksigsp, sizeof ksigsp)){
|
if(copy_to_user(sigsp, &ksigsp, sizeof ksigsp)){
|
||||||
@@ -761,9 +814,6 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
usp = (unsigned long *)sigsp;
|
usp = (unsigned long *)sigsp;
|
||||||
usp--;
|
usp--;
|
||||||
*usp = (unsigned long)k->sa.sa_restorer;
|
*usp = (unsigned long)k->sa.sa_restorer;
|
||||||
@@ -1240,7 +1290,7 @@ done:
|
|||||||
cpu_restore_interrupt(irqstate);
|
cpu_restore_interrupt(irqstate);
|
||||||
|
|
||||||
if (doint && !(mask & tthread->sigmask.__val[0])) {
|
if (doint && !(mask & tthread->sigmask.__val[0])) {
|
||||||
int cpuid = tthread->cpu_id;
|
int tid = tthread->tid;
|
||||||
int pid = tproc->pid;
|
int pid = tproc->pid;
|
||||||
int status = tthread->status;
|
int status = tthread->status;
|
||||||
|
|
||||||
@@ -1251,12 +1301,12 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(!tthread->proc->nohost)
|
if(!tthread->proc->nohost)
|
||||||
interrupt_syscall(pid, cpuid);
|
interrupt_syscall(pid, tid);
|
||||||
|
|
||||||
if (status != PS_RUNNING) {
|
if (status != PS_RUNNING) {
|
||||||
if(sig == SIGKILL){
|
if(sig == SIGKILL){
|
||||||
/* Wake up the target only when stopped by ptrace-reporting */
|
/* Wake up the target only when stopped by ptrace-reporting */
|
||||||
sched_wakeup_thread(tthread, PS_TRACED | PS_STOPPED);
|
sched_wakeup_thread(tthread, PS_TRACED | PS_STOPPED | PS_INTERRUPTIBLE);
|
||||||
}
|
}
|
||||||
else if(sig == SIGCONT || ptracecont == 1){
|
else if(sig == SIGCONT || ptracecont == 1){
|
||||||
/* Wake up the target only when stopped by SIGSTOP */
|
/* Wake up the target only when stopped by SIGSTOP */
|
||||||
@@ -1387,9 +1437,8 @@ SYSCALL_DECLARE(mmap)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((addr < region->user_start)
|
if ((flags & MAP_FIXED) && ((addr < region->user_start)
|
||||||
|| (region->user_end <= addr)
|
|| (region->user_end <= addr))) {
|
||||||
|| ((region->user_end - addr) < len)) {
|
|
||||||
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):ENOMEM\n",
|
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):ENOMEM\n",
|
||||||
addr0, len0, prot, flags0, fd, off0);
|
addr0, len0, prot, flags0, fd, off0);
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
@@ -1513,6 +1562,7 @@ static int vdso_get_vdso_info(void)
|
|||||||
struct ihk_ikc_channel_desc *ch = cpu_local_var(syscall_channel);
|
struct ihk_ikc_channel_desc *ch = cpu_local_var(syscall_channel);
|
||||||
|
|
||||||
dkprintf("vdso_get_vdso_info()\n");
|
dkprintf("vdso_get_vdso_info()\n");
|
||||||
|
memset(&vdso, '\0', sizeof vdso);
|
||||||
vdso.busy = 1;
|
vdso.busy = 1;
|
||||||
vdso.vdso_npages = 0;
|
vdso.vdso_npages = 0;
|
||||||
|
|
||||||
|
|||||||
28
arch/x86/tools/irqbalance_mck.in.in
Normal file
28
arch/x86/tools/irqbalance_mck.in.in
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# irqbalance is a daemon process that distributes interrupts across
|
||||||
|
# CPUS on SMP systems. The default is to rebalance once every 10
|
||||||
|
# seconds. This is the environment file that is specified to systemd via the
|
||||||
|
# EnvironmentFile key in the service unit file (or via whatever method the init
|
||||||
|
# system you're using has.
|
||||||
|
#
|
||||||
|
# ONESHOT=yes
|
||||||
|
# after starting, wait for a minute, then look at the interrupt
|
||||||
|
# load and balance it once; after balancing exit and do not change
|
||||||
|
# it again.
|
||||||
|
#IRQBALANCE_ONESHOT=
|
||||||
|
|
||||||
|
#
|
||||||
|
# IRQBALANCE_BANNED_CPUS
|
||||||
|
# 64 bit bitmask which allows you to indicate which cpu's should
|
||||||
|
# be skipped when reblancing irqs. Cpu numbers which have their
|
||||||
|
# corresponding bits set to one in this mask will not have any
|
||||||
|
# irq's assigned to them on rebalance
|
||||||
|
#
|
||||||
|
IRQBALANCE_BANNED_CPUS=%mask%
|
||||||
|
|
||||||
|
#
|
||||||
|
# IRQBALANCE_ARGS
|
||||||
|
# append any args here to the irqbalance daemon as documented in the man page
|
||||||
|
#
|
||||||
|
IRQBALANCE_ARGS=--banirq=%banirq%
|
||||||
|
|
||||||
|
|
||||||
10
arch/x86/tools/irqbalance_mck.service.in
Normal file
10
arch/x86/tools/irqbalance_mck.service.in
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=irqbalance daemon
|
||||||
|
After=syslog.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
EnvironmentFile=@ETCDIR@/irqbalance_mck
|
||||||
|
ExecStart=/usr/sbin/irqbalance --foreground $IRQBALANCE_ARGS
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -13,27 +13,44 @@
|
|||||||
# Note that the script does not output anything unless an error occurs.
|
# Note that the script does not output anything unless an error occurs.
|
||||||
|
|
||||||
prefix="@prefix@"
|
prefix="@prefix@"
|
||||||
BINDIR="@BINDIR@"
|
BINDIR="${prefix}/bin"
|
||||||
SBINDIR="@SBINDIR@"
|
SBINDIR="${prefix}/sbin"
|
||||||
KMODDIR="@KMODDIR@"
|
ETCDIR=@ETCDIR@
|
||||||
KERNDIR="@KERNDIR@"
|
KMODDIR="${prefix}/kmod"
|
||||||
|
KERNDIR="${prefix}/@TARGET@/kernel"
|
||||||
ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@"
|
ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@"
|
||||||
|
|
||||||
|
mem="512M@0"
|
||||||
|
cpus=""
|
||||||
|
|
||||||
INTERVAL=1
|
INTERVAL=1
|
||||||
LOGMODE=0
|
LOGMODE=0
|
||||||
while getopts :i:k: OPT
|
facility="LOG_LOCAL6"
|
||||||
|
chown_option=`logname 2> /dev/null`
|
||||||
|
|
||||||
|
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" -o "`systemctl status irqbalance.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||||
|
irqbalance_used="yes"
|
||||||
|
else
|
||||||
|
irqbalance_used="no"
|
||||||
|
fi
|
||||||
|
|
||||||
|
while getopts :i:k:c:m:o:f: OPT
|
||||||
do
|
do
|
||||||
case ${OPT} in
|
case ${OPT} in
|
||||||
|
f) facility=${OPTARG}
|
||||||
|
;;
|
||||||
|
o) chown_option=${OPTARG}
|
||||||
|
;;
|
||||||
i) INTERVAL=${OPTARG}
|
i) INTERVAL=${OPTARG}
|
||||||
expr "${INTERVAL}" + 1 > /dev/null 2>&1
|
expr "${INTERVAL}" + 1 > /dev/null 2>&1
|
||||||
if [ $? -ge 2 ]
|
if [ $? -ge 2 ]
|
||||||
then
|
then
|
||||||
echo "invalid -i value"
|
echo "invalid -i value" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ${INTERVAL} -le 0 ]
|
if [ ${INTERVAL} -le 0 ]
|
||||||
then
|
then
|
||||||
echo "invalid -i value"
|
echo "invalid -i value" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
@@ -41,22 +58,24 @@ do
|
|||||||
expr "${LOGMODE}" + 1 > /dev/null 2>&1
|
expr "${LOGMODE}" + 1 > /dev/null 2>&1
|
||||||
if [ $? -ge 2 ]
|
if [ $? -ge 2 ]
|
||||||
then
|
then
|
||||||
echo "invalid -k value"
|
echo "invalid -k value" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ${LOGMODE} -lt 0 -o ${LOGMODE} -gt 2 ]
|
if [ ${LOGMODE} -lt 0 -o ${LOGMODE} -gt 2 ]
|
||||||
then
|
then
|
||||||
echo "invalid -k value"
|
echo "invalid -k value" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*) echo "invalid option -${OPT}"
|
c) cpus=${OPTARG}
|
||||||
|
;;
|
||||||
|
m) mem=${OPTARG}
|
||||||
|
;;
|
||||||
|
*) echo "invalid option -${OPT}" >&2
|
||||||
exit 1
|
exit 1
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
mem="512M@0"
|
|
||||||
cpus=""
|
|
||||||
ihk_ikc_irq_core=0
|
ihk_ikc_irq_core=0
|
||||||
|
|
||||||
release=`uname -r`
|
release=`uname -r`
|
||||||
@@ -66,10 +85,17 @@ patch=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/'`
|
|||||||
linux_version_code=`expr \( ${major} \* 65536 \) + \( ${minor} \* 256 \) + ${patch}`
|
linux_version_code=`expr \( ${major} \* 65536 \) + \( ${minor} \* 256 \) + ${patch}`
|
||||||
rhel_release=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/'`
|
rhel_release=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/'`
|
||||||
if [ "${release}" == "${rhel_release}" ]; then rhel_release=""; fi
|
if [ "${release}" == "${rhel_release}" ]; then rhel_release=""; fi
|
||||||
|
enable_mcoverlay="no"
|
||||||
if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
|
if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
|
||||||
enable_mcoverlay=`if ( [ ${linux_version_code} -ge 262144 ] && [ ${linux_version_code} -lt 262400 ] ); then echo "yes"; else echo "no"; fi`
|
if [ "${rhel_release}" == "" ]; then
|
||||||
else
|
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
|
||||||
enable_mcoverlay=no
|
enable_mcoverlay="yes"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 ]; then
|
||||||
|
enable_mcoverlay="yes"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$cpus" == "" ]; then
|
if [ "$cpus" == "" ]; then
|
||||||
@@ -79,12 +105,7 @@ if [ "$cpus" == "" ]; then
|
|||||||
# Use the second half of the cores
|
# Use the second half of the cores
|
||||||
let nr_cpus="$nr_cpus / 2"
|
let nr_cpus="$nr_cpus / 2"
|
||||||
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
|
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
|
||||||
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
|
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?" >&2; exit 1; fi
|
||||||
fi
|
|
||||||
|
|
||||||
# Remove delegator if loaded
|
|
||||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
|
||||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Remove mcoverlay if loaded
|
# Remove mcoverlay if loaded
|
||||||
@@ -95,13 +116,19 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
if [ "`cat /proc/mounts | grep /tmp/mcos/linux_proc`" != "" ]; then umount -l /tmp/mcos/linux_proc; fi
|
if [ "`cat /proc/mounts | grep /tmp/mcos/linux_proc`" != "" ]; then umount -l /tmp/mcos/linux_proc; fi
|
||||||
if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi
|
if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi
|
||||||
if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi
|
if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi
|
||||||
if ! rmmod mcoverlay; then echo "error: removing mcoverlay"; exit; fi
|
if ! rmmod mcoverlay; then echo "error: removing mcoverlay" >&2; exit 1; fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Stop irqbalance
|
||||||
|
if [ "${irqbalance_used}" == "yes" ]; then
|
||||||
|
systemctl stop irqbalance_mck.service 2>/dev/null
|
||||||
|
if ! systemctl stop irqbalance.service 2>/dev/null ; then echo "error: stopping irqbalance" >&2; exit 1; fi;
|
||||||
|
fi
|
||||||
|
|
||||||
# Load IHK if not loaded
|
# Load IHK if not loaded
|
||||||
if [ "`lsmod | grep ihk`" == "" ]; then
|
if [ "`lsmod | grep ihk`" == "" ]; then
|
||||||
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk"; exit; fi;
|
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk" >&2; exit 1; fi;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||||
@@ -113,57 +140,61 @@ if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
|
|||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
|
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available" >&2; exit 1; fi
|
||||||
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86"; exit; fi;
|
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86" >&2; exit 1; fi;
|
||||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs" >&2; exit 1; fi
|
||||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory" >&2; exit 1; fi
|
||||||
# If loaded, but no resources allocated, get CPUs and memory
|
# If loaded, but no resources allocated, get CPUs and memory
|
||||||
else
|
else
|
||||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||||
cpus_allocated=`${SBINDIR}/ihkosctl 0 query cpu`
|
cpus_allocated=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||||
if [ "$cpus_allocated" == "" ]; then
|
if [ "$cpus_allocated" == "" ]; then
|
||||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs" >&2; exit 1; fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||||
mem_allocated=`${SBINDIR}/ihkosctl 0 query mem`
|
mem_allocated=`${SBINDIR}/ihkosctl 0 query mem`
|
||||||
if [ "$mem_allocated" == "" ]; then
|
if [ "$mem_allocated" == "" ]; then
|
||||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory" >&2; exit 1; fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Load mcctrl if not loaded
|
||||||
|
if [ "`lsmod | grep mcctrl`" == "" ]; then
|
||||||
|
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko" >&2; exit 1; fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Check for existing OS instance and destroy
|
# Check for existing OS instance and destroy
|
||||||
if [ -c /dev/mcos0 ]; then
|
if [ -c /dev/mcos0 ]; then
|
||||||
# Query CPU cores and memory of OS instance so that the same values are used as previously
|
# Query CPU cores and memory of OS instance so that the same values are used as previously
|
||||||
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||||
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
|
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||||
mem=`${SBINDIR}/ihkosctl 0 query mem`
|
mem=`${SBINDIR}/ihkosctl 0 query mem`
|
||||||
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed"; fi
|
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed" >&2; fi
|
||||||
else
|
else
|
||||||
# Otherwise query IHK-SMP for resources
|
# Otherwise query IHK-SMP for resources
|
||||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create"; exit; fi
|
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create" >&2; exit; fi
|
||||||
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs" >&2; exit 1; fi
|
||||||
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory" >&2; exit 1; fi
|
||||||
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image" >&2; exit 1; fi
|
||||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos ksyslogd=${LOGMODE}"; then echo "error: setting kernel arguments"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos ksyslogd=${LOGMODE}"; then echo "error: setting kernel arguments" >&2; exit 1; fi
|
||||||
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
|
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting" >&2; exit 1; fi
|
||||||
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi
|
if ! chown ${chown_option} /dev/mcd* /dev/mcos*; then echo "error: chowning device files" >&2; exit 1; fi
|
||||||
if ! chown `logname` /dev/mcd* /dev/mcos*; then echo "error: chowning device files"; exit; fi
|
|
||||||
|
|
||||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||||
if [ ! -e /tmp/mcos ]; then mkdir -p /tmp/mcos; fi
|
if [ ! -e /tmp/mcos ]; then mkdir -p /tmp/mcos; fi
|
||||||
if ! mount -t tmpfs tmpfs /tmp/mcos; then echo "error: mount /tmp/mcos"; exit; fi
|
if ! mount -t tmpfs tmpfs /tmp/mcos; then echo "error: mount /tmp/mcos" >&2; exit 1; fi
|
||||||
if [ ! -e /tmp/mcos/linux_proc ]; then mkdir -p /tmp/mcos/linux_proc; fi
|
if [ ! -e /tmp/mcos/linux_proc ]; then mkdir -p /tmp/mcos/linux_proc; fi
|
||||||
if ! mount --bind /proc /tmp/mcos/linux_proc; then echo "error: mount /tmp/mcos/linux_proc"; exit; fi
|
if ! mount --bind /proc /tmp/mcos/linux_proc; then echo "error: mount /tmp/mcos/linux_proc" >&2; exit 1; fi
|
||||||
if ! insmod ${KMODDIR}/mcoverlay.ko; then echo "error: inserting mcoverlay.ko"; exit; fi
|
if ! insmod ${KMODDIR}/mcoverlay.ko; then echo "error: inserting mcoverlay.ko" >&2; exit 1; fi
|
||||||
while [ ! -e /proc/mcos0 ]
|
while [ ! -e /proc/mcos0 ]
|
||||||
do
|
do
|
||||||
sleep 1
|
sleep 1
|
||||||
@@ -171,7 +202,7 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
if [ ! -e /tmp/mcos/mcos0_proc ]; then mkdir -p /tmp/mcos/mcos0_proc; fi
|
if [ ! -e /tmp/mcos/mcos0_proc ]; then mkdir -p /tmp/mcos/mcos0_proc; fi
|
||||||
if [ ! -e /tmp/mcos/mcos0_proc_upper ]; then mkdir -p /tmp/mcos/mcos0_proc_upper; fi
|
if [ ! -e /tmp/mcos/mcos0_proc_upper ]; then mkdir -p /tmp/mcos/mcos0_proc_upper; fi
|
||||||
if [ ! -e /tmp/mcos/mcos0_proc_work ]; then mkdir -p /tmp/mcos/mcos0_proc_work; fi
|
if [ ! -e /tmp/mcos/mcos0_proc_work ]; then mkdir -p /tmp/mcos/mcos0_proc_work; fi
|
||||||
if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then echo "error: mount /tmp/mcos/mcos0_proc"; exit; fi
|
if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then echo "error: mount /tmp/mcos/mcos0_proc" >&2; exit 1; fi
|
||||||
mount --make-rprivate /proc
|
mount --make-rprivate /proc
|
||||||
while [ ! -e /sys/devices/virtual/mcos/mcos0/sys ]
|
while [ ! -e /sys/devices/virtual/mcos/mcos0/sys ]
|
||||||
do
|
do
|
||||||
@@ -180,7 +211,7 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
if [ ! -e /tmp/mcos/mcos0_sys ]; then mkdir -p /tmp/mcos/mcos0_sys; fi
|
if [ ! -e /tmp/mcos/mcos0_sys ]; then mkdir -p /tmp/mcos/mcos0_sys; fi
|
||||||
if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then mkdir -p /tmp/mcos/mcos0_sys_upper; fi
|
if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then mkdir -p /tmp/mcos/mcos0_sys_upper; fi
|
||||||
if [ ! -e /tmp/mcos/mcos0_sys_work ]; then mkdir -p /tmp/mcos/mcos0_sys_work; fi
|
if [ ! -e /tmp/mcos/mcos0_sys_work ]; then mkdir -p /tmp/mcos/mcos0_sys_work; fi
|
||||||
if ! mount -t mcoverlay mcoverlay -o lowerdir=/sys/devices/virtual/mcos/mcos0/sys:/sys,upperdir=/tmp/mcos/mcos0_sys_upper,workdir=/tmp/mcos/mcos0_sys_work,nocopyupw,nofscheck /tmp/mcos/mcos0_sys; then echo "error: mount /tmp/mcos/mcos0_sys"; exit; fi
|
if ! mount -t mcoverlay mcoverlay -o lowerdir=/sys/devices/virtual/mcos/mcos0/sys:/sys,upperdir=/tmp/mcos/mcos0_sys_upper,workdir=/tmp/mcos/mcos0_sys_work,nocopyupw,nofscheck /tmp/mcos/mcos0_sys; then echo "error: mount /tmp/mcos/mcos0_sys" >&2; exit 1; fi
|
||||||
mount --make-rprivate /sys
|
mount --make-rprivate /sys
|
||||||
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
||||||
@@ -195,5 +226,25 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
fi
|
fi
|
||||||
if [ ${LOGMODE} -ne 0 ]
|
if [ ${LOGMODE} -ne 0 ]
|
||||||
then
|
then
|
||||||
SBINDIR=${SBINDIR} ${SBINDIR}/mcklogd -i ${INTERVAL}
|
# mcklogd survives when McKernel isn't shut down by mcstop+release.sh
|
||||||
|
pkill mcklogd
|
||||||
|
SBINDIR=${SBINDIR} ${SBINDIR}/mcklogd -i ${INTERVAL} -f ${facility}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Start irqbalance with CPUs and IRQ for McKernel banned
|
||||||
|
if [ "${irqbalance_used}" == "yes" ]; then
|
||||||
|
if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }' ; then echo "error: saving /proc/irq/*/smp_affinity" >&2; exit 1; fi;
|
||||||
|
|
||||||
|
ncpus=`lscpu | grep -E '^CPU\(s\):' | awk '{print $2}'`
|
||||||
|
smp_affinity_mask=`echo $cpus | ncpus=$ncpus perl -e 'while(<>){@tokens = split /,/;foreach $token (@tokens) {@nums = split /-/,$token; for($num = $nums[0]; $num <= $nums[$#nums]; $num++) {$ndx=int($num/32); $mask[$ndx] |= (1<<($num % 32))}}} $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if($j != $nint32s - 1){print ",";} $nblks = $j == $nint32s - 1 ? int(($ENV{'ncpus'} % 32)/4) : 8; for($i = $nblks - 1;$i >= 0;$i--){ printf("%01x",($mask[$j] >> ($i*4)) & 0xf);}}'`
|
||||||
|
|
||||||
|
if ! ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask perl -e '@dirs = grep { -d } glob "/proc/irq/*"; foreach $dir (@dirs) { $hit = 0; $affinity_str = `cat $dir/smp_affinity`; chomp $affinity_str; @int32strs = split /,/, $affinity_str; @int32strs_mask=split /,/, $ENV{'smp_affinity_mask'}; for($i=0;$i <= $#int32strs_mask; $i++) { $int32strs_inv[$i] = sprintf("%08x",hex($int32strs_mask[$i])^0xffffffff); if($i == 0) { $len = int((($ENV{'ncpus'}%32)+3)/4); $int32strs_inv[$i] = substr($int32strs_inv[$i], -$len, $len); } } $inv = join(",", @int32strs_inv); $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if(hex($int32strs[$nint32s - 1 - $j]) & hex($int32strs_mask[$nint32s - 1 - $j])) { $hit = 1; }} if($hit == 1) { $cmd = "echo $inv > $dir/smp_affinity 2>/dev/null"; system $cmd;}}'; then echo "error: modifying /proc/irq/*/smp_affinity" >&2; exit 1; fi;
|
||||||
|
|
||||||
|
banirq=`cat /proc/interrupts| perl -e 'while(<>) { if(/^\s*(\d+).*IHK\-SMP\s*$/) {print $1;}}'`
|
||||||
|
|
||||||
|
sed "s/%mask%/$smp_affinity_mask/g" $ETCDIR/irqbalance_mck.in | sed "s/%banirq%/$banirq/g" > $ETCDIR/irqbalance_mck
|
||||||
|
if ! systemctl link $ETCDIR/irqbalance_mck.service >/dev/null 2>/dev/null; then echo "error: linking irqbalance_mck" >&2; exit 1; fi;
|
||||||
|
if ! systemctl start irqbalance_mck.service 2>/dev/null ; then echo "error: starting irqbalance_mck" >&2; exit 1; fi;
|
||||||
|
# echo cpus=$cpus mask=$smp_affinity_mask banirq=$banirq
|
||||||
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
prefix="@prefix@"
|
prefix="@prefix@"
|
||||||
BINDIR="@BINDIR@"
|
BINDIR="@BINDIR@"
|
||||||
SBINDIR="@SBINDIR@"
|
SBINDIR="@SBINDIR@"
|
||||||
|
ETCDIR=@ETCDIR@
|
||||||
KMODDIR="@KMODDIR@"
|
KMODDIR="@KMODDIR@"
|
||||||
KERNDIR="@KERNDIR@"
|
KERNDIR="@KERNDIR@"
|
||||||
|
|
||||||
@@ -17,31 +18,47 @@ mem=""
|
|||||||
cpus=""
|
cpus=""
|
||||||
|
|
||||||
# No SMP module? Exit.
|
# No SMP module? Exit.
|
||||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit; fi
|
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit 0; fi
|
||||||
|
|
||||||
|
# Destroy all LWK instances
|
||||||
|
if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||||
|
for i in /dev/mcos*; do
|
||||||
|
ind=`echo $i|cut -c10-`;
|
||||||
|
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed" >&2; exit 1; fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Query IHK-SMP resources and release them
|
||||||
|
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||||
|
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||||
|
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs" >&2; exit 1; fi
|
||||||
|
|
||||||
|
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||||
|
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||||
|
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory" >&2; exit 1; fi
|
||||||
|
|
||||||
# Remove delegator if loaded
|
# Remove delegator if loaded
|
||||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
if ! rmmod mcctrl; then echo "error: removing mcctrl" >&2; exit 1; fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Destroy all LWK instances
|
|
||||||
for i in /dev/mcos*; do
|
|
||||||
ind=`echo $i|cut -c10-`;
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed"; exit; fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Query IHK-SMP resources and release them
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
|
||||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs"; exit; fi
|
|
||||||
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
|
||||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
|
||||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory"; exit; fi
|
|
||||||
|
|
||||||
# Remove SMP module
|
# Remove SMP module
|
||||||
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
|
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
|
||||||
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86"; exit; fi
|
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86" >&2; exit 1; fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Remove core module
|
||||||
|
if [ "`lsmod | grep -E 'ihk\s' | awk '{print $1}'`" != "" ]; then
|
||||||
|
if ! rmmod ihk; then echo "error: removing ihk" >&2; exit 1; fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Stop mcklogd
|
||||||
|
pkill mcklogd
|
||||||
|
|
||||||
|
# Start irqbalance with the original settings
|
||||||
|
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||||
|
if ! systemctl stop irqbalance_mck.service 2>/dev/null ; then echo "error: stopping irqbalance_mck" >&2; exit 1; fi;
|
||||||
|
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then echo "error: disabling irqbalance_mck" >&2; exit 1; fi;
|
||||||
|
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }' ; then echo "error: restoring /proc/irq/*/smp_affinity" >&2; exit 1; fi;
|
||||||
|
if ! systemctl start irqbalance.service; then echo "error: starting irqbalance" >&2; exit 1; fi;
|
||||||
|
fi
|
||||||
|
|||||||
11
configure
vendored
11
configure
vendored
@@ -632,6 +632,7 @@ ENABLE_MCOVERLAYFS
|
|||||||
MANDIR
|
MANDIR
|
||||||
KERNDIR
|
KERNDIR
|
||||||
KMODDIR
|
KMODDIR
|
||||||
|
ETCDIR
|
||||||
SBINDIR
|
SBINDIR
|
||||||
BINDIR
|
BINDIR
|
||||||
TARGET
|
TARGET
|
||||||
@@ -3031,6 +3032,9 @@ case $WITH_TARGET in
|
|||||||
if test "X$SBINDIR" = X; then
|
if test "X$SBINDIR" = X; then
|
||||||
SBINDIR="$prefix/sbin"
|
SBINDIR="$prefix/sbin"
|
||||||
fi
|
fi
|
||||||
|
if test "X$ETCDIR" = X; then
|
||||||
|
ETCDIR="$prefix/etc"
|
||||||
|
fi
|
||||||
if test "X$KMODDIR" = X; then
|
if test "X$KMODDIR" = X; then
|
||||||
KMODDIR="$prefix/kmod"
|
KMODDIR="$prefix/kmod"
|
||||||
fi
|
fi
|
||||||
@@ -3882,11 +3886,12 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ac_config_headers="$ac_config_headers executer/config.h"
|
ac_config_headers="$ac_config_headers executer/config.h"
|
||||||
|
|
||||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in"
|
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"
|
||||||
|
|
||||||
|
|
||||||
if test "x$enable_dcfa" = xyes; then :
|
if test "x$enable_dcfa" = xyes; then :
|
||||||
@@ -4590,6 +4595,8 @@ do
|
|||||||
"executer/kernel/mcctrl/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/Makefile" ;;
|
"executer/kernel/mcctrl/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/Makefile" ;;
|
||||||
"executer/kernel/mcctrl/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/arch/x86_64/Makefile" ;;
|
"executer/kernel/mcctrl/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/arch/x86_64/Makefile" ;;
|
||||||
"executer/kernel/mcoverlayfs/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/Makefile" ;;
|
"executer/kernel/mcoverlayfs/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/Makefile" ;;
|
||||||
|
"executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile" ;;
|
||||||
|
"executer/kernel/mcoverlayfs/linux-4.0.9/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/linux-4.0.9/Makefile" ;;
|
||||||
"kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;;
|
"kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;;
|
||||||
"kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;;
|
"kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;;
|
||||||
"arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;;
|
"arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;;
|
||||||
@@ -4599,6 +4606,8 @@ do
|
|||||||
"arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;;
|
"arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;;
|
||||||
"arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;;
|
"arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;;
|
||||||
"arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;;
|
"arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;;
|
||||||
|
"arch/x86/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.service" ;;
|
||||||
|
"arch/x86/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.in" ;;
|
||||||
"kernel/Makefile.dcfa") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.dcfa" ;;
|
"kernel/Makefile.dcfa") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.dcfa" ;;
|
||||||
|
|
||||||
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
|
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
|
||||||
|
|||||||
@@ -146,6 +146,9 @@ case $WITH_TARGET in
|
|||||||
if test "X$SBINDIR" = X; then
|
if test "X$SBINDIR" = X; then
|
||||||
SBINDIR="$prefix/sbin"
|
SBINDIR="$prefix/sbin"
|
||||||
fi
|
fi
|
||||||
|
if test "X$ETCDIR" = X; then
|
||||||
|
ETCDIR="$prefix/etc"
|
||||||
|
fi
|
||||||
if test "X$KMODDIR" = X; then
|
if test "X$KMODDIR" = X; then
|
||||||
KMODDIR="$prefix/kmod"
|
KMODDIR="$prefix/kmod"
|
||||||
fi
|
fi
|
||||||
@@ -278,6 +281,7 @@ AC_SUBST(KDIR)
|
|||||||
AC_SUBST(TARGET)
|
AC_SUBST(TARGET)
|
||||||
AC_SUBST(BINDIR)
|
AC_SUBST(BINDIR)
|
||||||
AC_SUBST(SBINDIR)
|
AC_SUBST(SBINDIR)
|
||||||
|
AC_SUBST(ETCDIR)
|
||||||
AC_SUBST(KMODDIR)
|
AC_SUBST(KMODDIR)
|
||||||
AC_SUBST(KERNDIR)
|
AC_SUBST(KERNDIR)
|
||||||
AC_SUBST(MANDIR)
|
AC_SUBST(MANDIR)
|
||||||
@@ -298,6 +302,8 @@ AC_CONFIG_FILES([
|
|||||||
executer/kernel/mcctrl/Makefile
|
executer/kernel/mcctrl/Makefile
|
||||||
executer/kernel/mcctrl/arch/x86_64/Makefile
|
executer/kernel/mcctrl/arch/x86_64/Makefile
|
||||||
executer/kernel/mcoverlayfs/Makefile
|
executer/kernel/mcoverlayfs/Makefile
|
||||||
|
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile
|
||||||
|
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile
|
||||||
kernel/Makefile
|
kernel/Makefile
|
||||||
kernel/Makefile.build
|
kernel/Makefile.build
|
||||||
arch/x86/tools/mcreboot-attached-mic.sh
|
arch/x86/tools/mcreboot-attached-mic.sh
|
||||||
@@ -307,6 +313,8 @@ AC_CONFIG_FILES([
|
|||||||
arch/x86/tools/mcstop+release-smp-x86.sh
|
arch/x86/tools/mcstop+release-smp-x86.sh
|
||||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||||
|
arch/x86/tools/irqbalance_mck.service
|
||||||
|
arch/x86/tools/irqbalance_mck.in
|
||||||
])
|
])
|
||||||
|
|
||||||
AS_IF([test "x$enable_dcfa" = xyes], [
|
AS_IF([test "x$enable_dcfa" = xyes], [
|
||||||
|
|||||||
@@ -110,6 +110,13 @@ struct program_load_desc {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct syscall_request {
|
struct syscall_request {
|
||||||
|
/* TID of requesting thread */
|
||||||
|
int rtid;
|
||||||
|
/*
|
||||||
|
* TID of target thread. Remote page fault response needs to designate the
|
||||||
|
* thread that must serve the request, 0 indicates any thread from the pool
|
||||||
|
*/
|
||||||
|
int ttid;
|
||||||
unsigned long valid;
|
unsigned long valid;
|
||||||
unsigned long number;
|
unsigned long number;
|
||||||
unsigned long args[6];
|
unsigned long args[6];
|
||||||
@@ -128,8 +135,17 @@ struct syscall_load_desc {
|
|||||||
unsigned long size;
|
unsigned long size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define IHK_SCD_REQ_THREAD_SPINNING 0
|
||||||
|
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
|
||||||
|
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
|
||||||
|
|
||||||
struct syscall_response {
|
struct syscall_response {
|
||||||
|
/* TID of the thread that requested the service */
|
||||||
|
int ttid;
|
||||||
|
/* TID of the mcexec thread that is serving or has served the request */
|
||||||
|
int stid;
|
||||||
unsigned long status;
|
unsigned long status;
|
||||||
|
unsigned long req_thread_status;
|
||||||
long ret;
|
long ret;
|
||||||
unsigned long fault_address;
|
unsigned long fault_address;
|
||||||
unsigned long fault_reason;
|
unsigned long fault_reason;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
#include <linux/version.h>
|
#include <linux/version.h>
|
||||||
#include "../../../../config.h"
|
#include "../../config.h"
|
||||||
#include "../../mcctrl.h"
|
#include "../../mcctrl.h"
|
||||||
|
|
||||||
#ifdef MCCTRL_KSYM_vdso_image_64
|
#ifdef MCCTRL_KSYM_vdso_image_64
|
||||||
@@ -100,8 +100,6 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
|
|||||||
vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso));
|
vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso));
|
||||||
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
|
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
|
||||||
|
|
||||||
memset(vdso, 0, sizeof(*vdso));
|
|
||||||
|
|
||||||
/* VDSO pages */
|
/* VDSO pages */
|
||||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||||
size = vdso_image->size;
|
size = vdso_image->size;
|
||||||
|
|||||||
@@ -255,7 +255,7 @@ void __init binfmt_mcexec_init(void)
|
|||||||
insert_binfmt(&mcexec_format);
|
insert_binfmt(&mcexec_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __exit binfmt_mcexec_exit(void)
|
void binfmt_mcexec_exit(void)
|
||||||
{
|
{
|
||||||
unregister_binfmt(&mcexec_format);
|
unregister_binfmt(&mcexec_format);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,6 +32,8 @@
|
|||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/version.h>
|
#include <linux/version.h>
|
||||||
|
#include <linux/semaphore.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/delay.h>
|
#include <asm/delay.h>
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
@@ -80,7 +82,6 @@ static long mcexec_prepare_image(ihk_os_t os,
|
|||||||
void *args, *envs;
|
void *args, *envs;
|
||||||
long ret = 0;
|
long ret = 0;
|
||||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||||
unsigned long flags;
|
|
||||||
struct mcctrl_per_proc_data *ppd = NULL;
|
struct mcctrl_per_proc_data *ppd = NULL;
|
||||||
|
|
||||||
if (copy_from_user(&desc, udesc,
|
if (copy_from_user(&desc, udesc,
|
||||||
@@ -123,52 +124,48 @@ static long mcexec_prepare_image(ihk_os_t os,
|
|||||||
}
|
}
|
||||||
|
|
||||||
pdesc->args = (void*)virt_to_phys(args);
|
pdesc->args = (void*)virt_to_phys(args);
|
||||||
printk("args: 0x%lX\n", (unsigned long)pdesc->args);
|
dprintk("args: 0x%lX\n", (unsigned long)pdesc->args);
|
||||||
printk("argc: %ld\n", *(long *)args);
|
dprintk("argc: %ld\n", *(long *)args);
|
||||||
pdesc->envs = (void*)virt_to_phys(envs);
|
pdesc->envs = (void*)virt_to_phys(envs);
|
||||||
printk("envs: 0x%lX\n", (unsigned long)pdesc->envs);
|
dprintk("envs: 0x%lX\n", (unsigned long)pdesc->envs);
|
||||||
printk("envc: %ld\n", *(long *)envs);
|
dprintk("envc: %ld\n", *(long *)envs);
|
||||||
|
|
||||||
isp.msg = SCD_MSG_PREPARE_PROCESS;
|
isp.msg = SCD_MSG_PREPARE_PROCESS;
|
||||||
isp.ref = pdesc->cpu;
|
isp.ref = pdesc->cpu;
|
||||||
isp.arg = virt_to_phys(pdesc);
|
isp.arg = virt_to_phys(pdesc);
|
||||||
|
|
||||||
printk("# of sections: %d\n", pdesc->num_sections);
|
dprintk("# of sections: %d\n", pdesc->num_sections);
|
||||||
printk("%p (%lx)\n", pdesc, isp.arg);
|
dprintk("%p (%lx)\n", pdesc, isp.arg);
|
||||||
|
|
||||||
pdesc->status = 0;
|
pdesc->status = 0;
|
||||||
mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||||
|
|
||||||
wait_event_interruptible(usrdata->wq_prepare, pdesc->status);
|
while (wait_event_interruptible(usrdata->wq_prepare, pdesc->status) != 0);
|
||||||
|
|
||||||
if(pdesc->err < 0){
|
if(pdesc->err < 0){
|
||||||
ret = pdesc->err;
|
ret = pdesc->err;
|
||||||
goto free_out;
|
goto free_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC);
|
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||||
if (!ppd) {
|
if (!ppd) {
|
||||||
printk("ERROR: allocating per process data\n");
|
printk("ERROR: no per process data for PID %d\n", task_tgid_vnr(current));
|
||||||
ret = -ENOMEM;
|
ret = -EINVAL;
|
||||||
goto free_out;
|
goto free_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ppd->pid = pdesc->pid;
|
/* Update rpgtable */
|
||||||
ppd->rpgtable = pdesc->rpgtable;
|
ppd->rpgtable = pdesc->rpgtable;
|
||||||
|
|
||||||
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
|
|
||||||
list_add_tail(&ppd->list, &usrdata->per_proc_list);
|
|
||||||
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
|
|
||||||
|
|
||||||
dprintk("pid %d, rpgtable: 0x%lx added\n",
|
|
||||||
ppd->pid, ppd->rpgtable);
|
|
||||||
|
|
||||||
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
|
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
|
||||||
sizeof(struct program_image_section) * desc.num_sections)) {
|
sizeof(struct program_image_section) * desc.num_sections)) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto free_out;
|
goto free_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
|
||||||
|
__FUNCTION__, ppd->pid, ppd->rpgtable);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
free_out:
|
free_out:
|
||||||
@@ -416,19 +413,200 @@ static long mcexec_get_cpu(ihk_os_t os)
|
|||||||
return info->n_cpus;
|
return info->n_cpus;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg)
|
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||||
|
struct mcctrl_per_proc_data *ppd)
|
||||||
|
{
|
||||||
|
struct mcctrl_per_proc_data *ppd_iter;
|
||||||
|
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
|
||||||
|
int ret = 0;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/* Check if data for this thread exists and add if not */
|
||||||
|
write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
|
||||||
|
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
|
||||||
|
if (ppd_iter->pid == pid) {
|
||||||
|
ret = -EBUSY;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
list_add_tail(&ppd->hash, &ud->per_proc_data_hash[hash]);
|
||||||
|
|
||||||
|
out:
|
||||||
|
write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid)
|
||||||
|
{
|
||||||
|
struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL;
|
||||||
|
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
|
||||||
|
int ret = 0;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
|
||||||
|
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
|
||||||
|
if (ppd_iter->pid == pid) {
|
||||||
|
ppd = ppd_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ppd) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_del(&ppd->hash);
|
||||||
|
|
||||||
|
out:
|
||||||
|
write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||||
|
struct mcctrl_usrdata *ud, int pid)
|
||||||
|
{
|
||||||
|
struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL;
|
||||||
|
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/* Check if data for this process exists and return it */
|
||||||
|
read_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
|
||||||
|
|
||||||
|
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
|
||||||
|
if (ppd_iter->pid == pid) {
|
||||||
|
ppd = ppd_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
read_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
|
||||||
|
return ppd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called indirectly from the IKC message handler.
|
||||||
|
*/
|
||||||
|
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
|
||||||
{
|
{
|
||||||
struct wait_queue_head_list_node *wqhln = NULL;
|
struct wait_queue_head_list_node *wqhln = NULL;
|
||||||
struct wait_queue_head_list_node *wqhln_iter;
|
struct wait_queue_head_list_node *wqhln_iter;
|
||||||
|
struct wait_queue_head_list_node *wqhln_alloc = NULL;
|
||||||
|
int pid = packet->pid;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
struct mcctrl_per_proc_data *ppd;
|
||||||
|
|
||||||
|
/* Look up per-process structure */
|
||||||
|
ppd = mcctrl_get_per_proc_data(ud, pid);
|
||||||
|
|
||||||
|
if (unlikely(!ppd)) {
|
||||||
|
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
|
||||||
|
__FUNCTION__, task_tgid_vnr(current));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n",
|
||||||
|
__FUNCTION__,
|
||||||
|
packet->req.rtid,
|
||||||
|
packet->req.ttid,
|
||||||
|
packet->req.number);
|
||||||
|
/*
|
||||||
|
* Three scenarios are possible:
|
||||||
|
* - Find the designated thread if req->ttid is specified.
|
||||||
|
* - Find any available thread if req->ttid is zero.
|
||||||
|
* - Add a request element if no threads are available.
|
||||||
|
*/
|
||||||
|
flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||||
|
|
||||||
|
/* Is this a request for a specific thread? See if it's waiting */
|
||||||
|
if (unlikely(packet->req.ttid)) {
|
||||||
|
list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) {
|
||||||
|
if (packet->req.ttid != task_pid_vnr(wqhln_iter->task))
|
||||||
|
continue;
|
||||||
|
|
||||||
/* Look up per-process wait queue head with pid */
|
|
||||||
flags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
|
||||||
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
|
|
||||||
if (wqhln_iter->pid == pid) {
|
|
||||||
wqhln = wqhln_iter;
|
wqhln = wqhln_iter;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (!wqhln) {
|
||||||
|
printk("%s: WARNING: no target thread found for exact request??\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Is there any thread available? */
|
||||||
|
else {
|
||||||
|
list_for_each_entry(wqhln_iter, &ppd->wq_list, list) {
|
||||||
|
if (wqhln_iter->task && !wqhln_iter->req) {
|
||||||
|
wqhln = wqhln_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If no match found, add request to pending request list */
|
||||||
|
if (unlikely(!wqhln)) {
|
||||||
|
retry_alloc:
|
||||||
|
wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
|
||||||
|
if (!wqhln_alloc) {
|
||||||
|
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
|
||||||
|
goto retry_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
wqhln = wqhln_alloc;
|
||||||
|
wqhln->req = 0;
|
||||||
|
wqhln->task = NULL;
|
||||||
|
init_waitqueue_head(&wqhln->wq_syscall);
|
||||||
|
list_add_tail(&wqhln->list, &ppd->wq_req_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
wqhln->packet = packet;
|
||||||
|
wqhln->req = 1;
|
||||||
|
wake_up(&wqhln->wq_syscall);
|
||||||
|
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from an mcexec thread via ioctl().
|
||||||
|
*/
|
||||||
|
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
|
||||||
|
{
|
||||||
|
struct ikc_scd_packet *packet;
|
||||||
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||||
|
struct wait_queue_head_list_node *wqhln = NULL;
|
||||||
|
struct wait_queue_head_list_node *wqhln_iter;
|
||||||
|
int ret = 0;
|
||||||
|
unsigned long irqflags;
|
||||||
|
struct mcctrl_per_proc_data *ppd;
|
||||||
|
|
||||||
|
/* Look up per-process structure */
|
||||||
|
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||||
|
|
||||||
|
if (unlikely(!ppd)) {
|
||||||
|
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
|
||||||
|
__FUNCTION__, task_tgid_vnr(current));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
|
||||||
|
if (packet) {
|
||||||
|
printk("%s: ERROR: packet %p is already registered for thread %d\n",
|
||||||
|
__FUNCTION__, packet, task_pid_vnr(current));
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
|
retry:
|
||||||
|
/* Prepare per-thread wait queue head or find a valid request */
|
||||||
|
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||||
|
/* First see if there is a valid request already that is not yet taken */
|
||||||
|
list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) {
|
||||||
|
if (wqhln_iter->task == NULL && wqhln_iter->req) {
|
||||||
|
wqhln = wqhln_iter;
|
||||||
|
wqhln->task = current;
|
||||||
|
list_del(&wqhln->list);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!wqhln) {
|
if (!wqhln) {
|
||||||
@@ -439,180 +617,86 @@ retry_alloc:
|
|||||||
goto retry_alloc;
|
goto retry_alloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
wqhln->pid = pid;
|
wqhln->task = current;
|
||||||
wqhln->req = 0;
|
wqhln->req = 0;
|
||||||
init_waitqueue_head(&wqhln->wq_syscall);
|
init_waitqueue_head(&wqhln->wq_syscall);
|
||||||
list_add_tail(&wqhln->list, &c->wq_list);
|
|
||||||
|
/* Wait for a request.. */
|
||||||
|
list_add(&wqhln->list, &ppd->wq_list);
|
||||||
|
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
|
||||||
|
|
||||||
|
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
||||||
|
|
||||||
|
/* Remove per-thread wait queue head */
|
||||||
|
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||||
|
list_del(&wqhln->list);
|
||||||
}
|
}
|
||||||
|
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
|
||||||
|
|
||||||
wqhln->req = 1;
|
|
||||||
wake_up(&wqhln->wq_syscall);
|
|
||||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef DO_USER_MODE
|
|
||||||
// static int remaining_job, base_cpu, job_pos;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// extern int num_channels;
|
|
||||||
// extern int mcctrl_dma_abort;
|
|
||||||
|
|
||||||
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
|
|
||||||
{
|
|
||||||
struct syscall_wait_desc swd;
|
|
||||||
struct mcctrl_channel *c;
|
|
||||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
|
||||||
struct wait_queue_head_list_node *wqhln;
|
|
||||||
struct wait_queue_head_list_node *wqhln_iter;
|
|
||||||
int ret = 0;
|
|
||||||
unsigned long irqflags;
|
|
||||||
#ifndef DO_USER_MODE
|
|
||||||
unsigned long s, w, d;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu));
|
|
||||||
if (copy_from_user(&swd, req, sizeof(swd))) {
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (swd.cpu >= usrdata->num_channels)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
c = get_peer_channel(usrdata, current);
|
|
||||||
if (c) {
|
|
||||||
printk("mcexec_wait_syscall:already registered. task %p ch %p\n",
|
|
||||||
current, c);
|
|
||||||
return -EBUSY;
|
|
||||||
}
|
|
||||||
c = usrdata->channels + swd.cpu;
|
|
||||||
|
|
||||||
#ifdef DO_USER_MODE
|
|
||||||
retry:
|
|
||||||
/* Prepare per-process wait queue head */
|
|
||||||
retry_alloc:
|
|
||||||
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
|
|
||||||
if (!wqhln) {
|
|
||||||
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
|
|
||||||
goto retry_alloc;
|
|
||||||
}
|
|
||||||
|
|
||||||
wqhln->pid = swd.pid;
|
|
||||||
wqhln->req = 0;
|
|
||||||
init_waitqueue_head(&wqhln->wq_syscall);
|
|
||||||
|
|
||||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
|
||||||
/* First see if there is one wait queue already */
|
|
||||||
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
|
|
||||||
if (wqhln_iter->pid == task_tgid_vnr(current)) {
|
|
||||||
kfree(wqhln);
|
|
||||||
wqhln = wqhln_iter;
|
|
||||||
list_del(&wqhln->list);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
list_add_tail(&wqhln->list, &c->wq_list);
|
|
||||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
|
||||||
|
|
||||||
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
|
||||||
|
|
||||||
|
|
||||||
/* Remove per-process wait queue head */
|
|
||||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
|
||||||
list_del(&wqhln->list);
|
|
||||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
|
||||||
if (ret && !wqhln->req) {
|
if (ret && !wqhln->req) {
|
||||||
kfree(wqhln);
|
kfree(wqhln);
|
||||||
|
wqhln = NULL;
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
packet = wqhln->packet;
|
||||||
kfree(wqhln);
|
kfree(wqhln);
|
||||||
|
wqhln = NULL;
|
||||||
|
|
||||||
if (c->param.request_va->number == 61 &&
|
dprintk("%s: tid: %d request from CPU %d\n",
|
||||||
c->param.request_va->args[0] == swd.pid) {
|
__FUNCTION__, task_pid_vnr(current), packet->ref);
|
||||||
|
|
||||||
dprintk("pid: %d, tid: %d: SC %d, swd.cpu: %d, WARNING: wait4() for self?\n",
|
|
||||||
task_tgid_vnr(current),
|
|
||||||
task_pid_vnr(current);
|
|
||||||
c->param.request_va->number,
|
|
||||||
swd.cpu);
|
|
||||||
|
|
||||||
return -EINTR;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
mb();
|
mb();
|
||||||
if (!c->param.request_va->valid) {
|
if (!packet->req.valid) {
|
||||||
printk("mcexec_wait_syscall:stray wakeup\n");
|
printk("%s: ERROR: stray wakeup pid: %d, tid: %d: SC %lu\n",
|
||||||
|
__FUNCTION__,
|
||||||
|
task_tgid_vnr(current),
|
||||||
|
task_pid_vnr(current),
|
||||||
|
packet->req.number);
|
||||||
|
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||||
|
(usrdata->channels + packet->ref)->c);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
while (1) {
|
|
||||||
c = usrdata->channels + swd.cpu;
|
|
||||||
ihk_get_tsc(s);
|
|
||||||
if (!usrdata->remaining_job) {
|
|
||||||
while (!(*c->param.doorbell_va)) {
|
|
||||||
mb();
|
|
||||||
cpu_relax();
|
|
||||||
ihk_get_tsc(w);
|
|
||||||
if (w > s + 1024UL * 1024 * 1024 * 10) {
|
|
||||||
return -EINTR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
d = (*c->param.doorbell_va) - 1;
|
|
||||||
*c->param.doorbell_va = 0;
|
|
||||||
|
|
||||||
if (d < 0 || d >= usrdata->num_channels) {
|
packet->req.valid = 0; /* ack */
|
||||||
d = 0;
|
dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
|
||||||
}
|
"args[3]: %lu, args[4]: %lu, args[5]: %lu\n",
|
||||||
usrdata->base_cpu = d;
|
__FUNCTION__,
|
||||||
usrdata->job_pos = 0;
|
packet->req.number,
|
||||||
usrdata->remaining_job = 1;
|
packet->req.args[0],
|
||||||
} else {
|
packet->req.args[1],
|
||||||
usrdata->job_pos++;
|
packet->req.args[2],
|
||||||
}
|
packet->req.args[3],
|
||||||
|
packet->req.args[4],
|
||||||
for (; usrdata->job_pos < usrdata->num_channels; usrdata->job_pos++) {
|
packet->req.args[5]);
|
||||||
if (base_cpu + job_pos >= num_channels) {
|
|
||||||
c = usrdata->channels +
|
if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) {
|
||||||
(usrdata->base_cpu + usrdata->job_pos - usrdata->num_channels);
|
kprintf("%s: error adding per-thread data\n", __FUNCTION__);
|
||||||
} else {
|
return -EINVAL;
|
||||||
c = usrdata->channels + usrdata->base_cpu + usrdata->job_pos;
|
|
||||||
}
|
|
||||||
if (!c) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (c->param.request_va &&
|
|
||||||
c->param.request_va->valid) {
|
|
||||||
#endif
|
|
||||||
c->param.request_va->valid = 0; /* ack */
|
|
||||||
dprintk("SC #%lx, %lx\n",
|
|
||||||
c->param.request_va->number,
|
|
||||||
c->param.request_va->args[0]);
|
|
||||||
register_peer_channel(usrdata, current, c);
|
|
||||||
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
|
|
||||||
if (copy_to_user(&req->sr, c->param.request_va,
|
|
||||||
sizeof(struct syscall_request))) {
|
|
||||||
deregister_peer_channel(usrdata, current, c);
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
deregister_peer_channel(usrdata, current, c);
|
|
||||||
#ifdef DO_USER_MODE
|
|
||||||
goto retry;
|
|
||||||
#endif
|
|
||||||
#ifndef DO_USER_MODE
|
|
||||||
if (usrdata->mcctrl_dma_abort) {
|
|
||||||
return -2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
usrdata->remaining_job = 0;
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
return 0;
|
if (__do_in_kernel_syscall(os, packet)) {
|
||||||
|
if (copy_to_user(&req->sr, &packet->req,
|
||||||
|
sizeof(struct syscall_request))) {
|
||||||
|
|
||||||
|
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
|
||||||
|
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||||
|
(usrdata->channels + packet->ref)->c);
|
||||||
|
|
||||||
|
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
|
||||||
|
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg)
|
long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg)
|
||||||
@@ -695,33 +779,6 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size);
|
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size);
|
||||||
|
|
||||||
/*
|
|
||||||
ihk_dma_channel_t channel;
|
|
||||||
struct ihk_dma_request request;
|
|
||||||
unsigned long dma_status = 0;
|
|
||||||
|
|
||||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
|
||||||
if (!channel) {
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&request, 0, sizeof(request));
|
|
||||||
request.src_os = os;
|
|
||||||
request.src_phys = desc.src;
|
|
||||||
request.dest_os = NULL;
|
|
||||||
request.dest_phys = desc.dest;
|
|
||||||
request.size = desc.size;
|
|
||||||
request.notify = (void *)virt_to_phys(&dma_status);
|
|
||||||
request.priv = (void *)1;
|
|
||||||
|
|
||||||
ihk_dma_request(channel, &request);
|
|
||||||
|
|
||||||
while (!dma_status) {
|
|
||||||
mb();
|
|
||||||
udelay(1);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -729,80 +786,66 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
|
|||||||
long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
|
long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
|
||||||
{
|
{
|
||||||
struct syscall_ret_desc ret;
|
struct syscall_ret_desc ret;
|
||||||
struct mcctrl_channel *mc;
|
struct ikc_scd_packet *packet;
|
||||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||||
#if 0
|
struct mcctrl_per_proc_data *ppd;
|
||||||
ihk_dma_channel_t channel;
|
|
||||||
struct ihk_dma_request request;
|
|
||||||
|
|
||||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
|
||||||
if (!channel) {
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
|
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
mc = usrdata->channels + ret.cpu;
|
|
||||||
if (!mc) {
|
/* Look up per-process structure */
|
||||||
|
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||||
|
if (!ppd) {
|
||||||
|
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
|
||||||
|
__FUNCTION__, task_tgid_vnr(current));
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
deregister_peer_channel(usrdata, current, mc);
|
|
||||||
|
|
||||||
mc->param.response_va->ret = ret.ret;
|
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
|
||||||
|
if (!packet) {
|
||||||
|
kprintf("%s: ERROR: no packet registered for TID %d\n",
|
||||||
|
__FUNCTION__, task_pid_vnr(current));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
mcctrl_delete_per_thread_data(ppd, current);
|
||||||
|
|
||||||
if (ret.size > 0) {
|
if (ret.size > 0) {
|
||||||
/* Host => Accel. Write is fast. */
|
/* Host => Accel. Write is fast. */
|
||||||
unsigned long phys;
|
unsigned long phys;
|
||||||
void *rpm;
|
void *rpm;
|
||||||
|
|
||||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest,
|
phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, ret.size);
|
||||||
ret.size);
|
|
||||||
#ifdef CONFIG_MIC
|
#ifdef CONFIG_MIC
|
||||||
rpm = ioremap_wc(phys, ret.size);
|
rpm = ioremap_wc(phys, ret.size);
|
||||||
#else
|
#else
|
||||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
|
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
|
||||||
ret.size, NULL, 0);
|
ret.size, NULL, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) {
|
if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) {
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
mb();
|
|
||||||
mc->param.response_va->status = 1;
|
|
||||||
|
|
||||||
#ifdef CONFIG_MIC
|
#ifdef CONFIG_MIC
|
||||||
iounmap(rpm);
|
iounmap(rpm);
|
||||||
#else
|
#else
|
||||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size);
|
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size);
|
||||||
#endif
|
#endif
|
||||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size);
|
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
__return_syscall(os, packet, ret.ret, task_pid_vnr(current));
|
||||||
memset(&request, 0, sizeof(request));
|
|
||||||
request.src_os = NULL;
|
/* Free packet */
|
||||||
request.src_phys = ret.src;
|
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||||
request.dest_os = os;
|
(usrdata->channels + packet->ref)->c);
|
||||||
request.dest_phys = ret.dest;
|
|
||||||
request.size = ret.size;
|
|
||||||
request.notify_os = os;
|
|
||||||
request.notify = (void *)mc->param.response_rpa;
|
|
||||||
request.priv = (void *)1;
|
|
||||||
|
|
||||||
ihk_dma_request(channel, &request);
|
|
||||||
*/
|
|
||||||
} else {
|
|
||||||
mb();
|
|
||||||
mc->param.response_va->status = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
LIST_HEAD(mckernel_exec_files);
|
LIST_HEAD(mckernel_exec_files);
|
||||||
DEFINE_SPINLOCK(mckernel_exec_file_lock);
|
DEFINE_SEMAPHORE(mckernel_exec_file_lock);
|
||||||
|
|
||||||
|
|
||||||
struct mckernel_exec_file {
|
struct mckernel_exec_file {
|
||||||
@@ -861,14 +904,53 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
|||||||
int retval;
|
int retval;
|
||||||
int os_ind = ihk_host_os_get_index(os);
|
int os_ind = ihk_host_os_get_index(os);
|
||||||
char *pathbuf, *fullpath;
|
char *pathbuf, *fullpath;
|
||||||
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||||
|
struct mcctrl_per_proc_data *ppd = NULL;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (os_ind < 0) {
|
if (os_ind < 0) {
|
||||||
return EINVAL;
|
return EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||||
|
|
||||||
|
if (!ppd) {
|
||||||
|
ppd = kmalloc(sizeof(*ppd), GFP_KERNEL);
|
||||||
|
if (!ppd) {
|
||||||
|
printk("ERROR: allocating per process data\n");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
ppd->pid = task_tgid_vnr(current);
|
||||||
|
/*
|
||||||
|
* XXX: rpgtable will be updated in __do_in_kernel_syscall()
|
||||||
|
* under case __NR_munmap
|
||||||
|
*/
|
||||||
|
INIT_LIST_HEAD(&ppd->wq_list);
|
||||||
|
INIT_LIST_HEAD(&ppd->wq_req_list);
|
||||||
|
INIT_LIST_HEAD(&ppd->wq_list_exact);
|
||||||
|
spin_lock_init(&ppd->wq_list_lock);
|
||||||
|
|
||||||
|
for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) {
|
||||||
|
INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]);
|
||||||
|
rwlock_init(&ppd->per_thread_data_hash_lock[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) {
|
||||||
|
printk("%s: error adding per process data\n", __FUNCTION__);
|
||||||
|
retval = EINVAL;
|
||||||
|
goto out_free_ppd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Only deallocate in case of an error if we added it above */
|
||||||
|
ppd = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
|
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
|
||||||
if (!pathbuf) {
|
if (!pathbuf) {
|
||||||
return ENOMEM;
|
retval = ENOMEM;
|
||||||
|
goto out_error_drop_ppd;
|
||||||
}
|
}
|
||||||
|
|
||||||
file = open_exec(filename);
|
file = open_exec(filename);
|
||||||
@@ -889,7 +971,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
|||||||
goto out_put_file;
|
goto out_put_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irq(&mckernel_exec_file_lock);
|
down(&mckernel_exec_file_lock);
|
||||||
/* Find previous file (if exists) and drop it */
|
/* Find previous file (if exists) and drop it */
|
||||||
list_for_each_entry(mcef_iter, &mckernel_exec_files, list) {
|
list_for_each_entry(mcef_iter, &mckernel_exec_files, list) {
|
||||||
if (mcef_iter->os == os && mcef_iter->pid == task_tgid_vnr(current)) {
|
if (mcef_iter->os == os && mcef_iter->pid == task_tgid_vnr(current)) {
|
||||||
@@ -900,7 +982,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add new exec file to the list */
|
/* Add new exec file to the list */
|
||||||
mcef->os = os;
|
mcef->os = os;
|
||||||
mcef->pid = task_tgid_vnr(current);
|
mcef->pid = task_tgid_vnr(current);
|
||||||
@@ -910,19 +992,22 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
|||||||
/* Create /proc/self/exe entry */
|
/* Create /proc/self/exe entry */
|
||||||
add_pid_entry(os_ind, task_tgid_vnr(current));
|
add_pid_entry(os_ind, task_tgid_vnr(current));
|
||||||
proc_exe_link(os_ind, task_tgid_vnr(current), fullpath);
|
proc_exe_link(os_ind, task_tgid_vnr(current), fullpath);
|
||||||
spin_unlock(&mckernel_exec_file_lock);
|
up(&mckernel_exec_file_lock);
|
||||||
|
|
||||||
dprintk("%d open_exec and holding file: %s\n", (int)task_tgid_vnr(current), filename);
|
dprintk("%d open_exec and holding file: %s\n", (int)task_tgid_vnr(current), filename);
|
||||||
|
|
||||||
kfree(pathbuf);
|
kfree(pathbuf);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_put_file:
|
out_put_file:
|
||||||
fput(file);
|
fput(file);
|
||||||
|
|
||||||
out_error_free:
|
out_error_free:
|
||||||
kfree(pathbuf);
|
kfree(pathbuf);
|
||||||
|
out_error_drop_ppd:
|
||||||
|
if (ppd) mcctrl_delete_per_proc_data(usrdata, ppd->pid);
|
||||||
|
out_free_ppd:
|
||||||
|
if (ppd) kfree(ppd);
|
||||||
return -retval;
|
return -retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -932,12 +1017,29 @@ int mcexec_close_exec(ihk_os_t os)
|
|||||||
struct mckernel_exec_file *mcef = NULL;
|
struct mckernel_exec_file *mcef = NULL;
|
||||||
int found = 0;
|
int found = 0;
|
||||||
int os_ind = ihk_host_os_get_index(os);
|
int os_ind = ihk_host_os_get_index(os);
|
||||||
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||||
|
struct mcctrl_per_proc_data *ppd = NULL;
|
||||||
|
|
||||||
|
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||||
|
|
||||||
|
if (ppd) {
|
||||||
|
mcctrl_delete_per_proc_data(usrdata, ppd->pid);
|
||||||
|
|
||||||
|
dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n",
|
||||||
|
task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable);
|
||||||
|
|
||||||
|
kfree(ppd);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printk("WARNING: no per process data for pid %d ?\n",
|
||||||
|
task_tgid_vnr(current));
|
||||||
|
}
|
||||||
|
|
||||||
if (os_ind < 0) {
|
if (os_ind < 0) {
|
||||||
return EINVAL;
|
return EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irq(&mckernel_exec_file_lock);
|
down(&mckernel_exec_file_lock);
|
||||||
list_for_each_entry(mcef, &mckernel_exec_files, list) {
|
list_for_each_entry(mcef, &mckernel_exec_files, list) {
|
||||||
if (mcef->os == os && mcef->pid == task_tgid_vnr(current)) {
|
if (mcef->os == os && mcef->pid == task_tgid_vnr(current)) {
|
||||||
allow_write_access(mcef->fp);
|
allow_write_access(mcef->fp);
|
||||||
@@ -950,7 +1052,7 @@ int mcexec_close_exec(ihk_os_t os)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&mckernel_exec_file_lock);
|
up(&mckernel_exec_file_lock);
|
||||||
|
|
||||||
return (found ? 0 : EINVAL);
|
return (found ? 0 : EINVAL);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -82,79 +82,109 @@ static struct ihk_os_user_call mcctrl_uc[OS_MAX_MINOR];
|
|||||||
|
|
||||||
static ihk_os_t os[OS_MAX_MINOR];
|
static ihk_os_t os[OS_MAX_MINOR];
|
||||||
|
|
||||||
ihk_os_t
|
ihk_os_t osnum_to_os(int n)
|
||||||
osnum_to_os(int n)
|
|
||||||
{
|
{
|
||||||
return os[n];
|
return os[n];
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init mcctrl_init(void)
|
/* OS event notifier implementation */
|
||||||
|
int mcctrl_os_boot_notifier(int os_index)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
rc = -ENOENT;
|
os[os_index] = ihk_host_find_os(os_index, NULL);
|
||||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
if (!os[os_index]) {
|
||||||
os[i] = ihk_host_find_os(i, NULL);
|
printk("mcctrl: error: OS ID %d couldn't be found\n", os_index);
|
||||||
if (os[i]) {
|
return -EINVAL;
|
||||||
printk("OS #%d found.\n", i);
|
|
||||||
rc = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(rc){
|
|
||||||
printk("OS not found.\n");
|
|
||||||
return rc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
if (prepare_ikc_channels(os[os_index]) != 0) {
|
||||||
if (os[i]) {
|
printk("mcctrl: error: preparing IKC channels for OS %d\n", os_index);
|
||||||
if (prepare_ikc_channels(os[i]) != 0) {
|
|
||||||
printk("Preparing syscall channels failed.\n");
|
os[os_index] = NULL;
|
||||||
os[i] = NULL;
|
return -EFAULT;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memcpy(mcctrl_uc + os_index, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||||
|
|
||||||
|
rc = ihk_os_register_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||||
|
if (rc < 0) {
|
||||||
|
destroy_ikc_channels(os[os_index]);
|
||||||
|
printk("mcctrl: error: registering callbacks for OS %d\n", os_index);
|
||||||
|
|
||||||
|
goto error_cleanup_channels;
|
||||||
|
}
|
||||||
|
|
||||||
|
procfs_init(os_index);
|
||||||
|
printk("mcctrl: OS ID %d boot event handled\n", os_index);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error_cleanup_channels:
|
||||||
|
destroy_ikc_channels(os[os_index]);
|
||||||
|
|
||||||
|
os[os_index] = NULL;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mcctrl_os_shutdown_notifier(int os_index)
|
||||||
|
{
|
||||||
|
sysfsm_cleanup(os[os_index]);
|
||||||
|
free_topology_info(os[os_index]);
|
||||||
|
ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||||
|
destroy_ikc_channels(os[os_index]);
|
||||||
|
procfs_exit(os_index);
|
||||||
|
|
||||||
|
printk("mcctrl: OS ID %d shutdown event handled\n", os_index);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ihk_os_notifier_ops mcctrl_os_notifier_ops = {
|
||||||
|
.boot = mcctrl_os_boot_notifier,
|
||||||
|
.shutdown = mcctrl_os_shutdown_notifier,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ihk_os_notifier mcctrl_os_notifier = {
|
||||||
|
.ops = &mcctrl_os_notifier_ops,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init mcctrl_init(void)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
#ifndef DO_USER_MODE
|
#ifndef DO_USER_MODE
|
||||||
mcctrl_syscall_init();
|
mcctrl_syscall_init();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
rus_page_hash_init();
|
rus_page_hash_init();
|
||||||
|
|
||||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
|
||||||
if (os[i]) {
|
|
||||||
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
|
||||||
rc = ihk_os_register_user_call_handlers(os[i], mcctrl_uc + i);
|
|
||||||
if(rc < 0){
|
|
||||||
destroy_ikc_channels(os[i]);
|
|
||||||
os[i] = NULL;
|
|
||||||
}
|
|
||||||
procfs_init(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
binfmt_mcexec_init();
|
binfmt_mcexec_init();
|
||||||
|
|
||||||
return 0;
|
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
|
||||||
|
printk("mcctrl: error: registering OS notifier\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
printk("mcctrl: initialized successfully.\n");
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
error:
|
||||||
|
binfmt_mcexec_exit();
|
||||||
|
rus_page_hash_put_pages();
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __exit mcctrl_exit(void)
|
static void __exit mcctrl_exit(void)
|
||||||
{
|
{
|
||||||
int i;
|
if (ihk_host_deregister_os_notifier(&mcctrl_os_notifier) != 0) {
|
||||||
|
printk("mcctrl: warning: failed to deregister OS notifier??\n");
|
||||||
binfmt_mcexec_exit();
|
|
||||||
printk("mcctrl: unregistered.\n");
|
|
||||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
|
||||||
if(os[i]){
|
|
||||||
sysfsm_cleanup(os[i]);
|
|
||||||
free_topology_info(os[i]);
|
|
||||||
ihk_os_unregister_user_call_handlers(os[i], mcctrl_uc + i);
|
|
||||||
destroy_ikc_channels(os[i]);
|
|
||||||
procfs_exit(i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
binfmt_mcexec_exit();
|
||||||
rus_page_hash_put_pages();
|
rus_page_hash_put_pages();
|
||||||
|
|
||||||
|
printk("mcctrl: unregistered.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_LICENSE("GPL v2");
|
MODULE_LICENSE("GPL v2");
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
#include <linux/miscdevice.h>
|
#include <linux/miscdevice.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
#include "mcctrl.h"
|
#include "mcctrl.h"
|
||||||
#ifdef ATTACHED_MIC
|
#ifdef ATTACHED_MIC
|
||||||
#include <sysdeps/mic/mic/micconst.h>
|
#include <sysdeps/mic/mic/micconst.h>
|
||||||
@@ -40,16 +41,18 @@
|
|||||||
|
|
||||||
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
|
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
|
||||||
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
|
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
|
||||||
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg);
|
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
|
||||||
void sig_done(unsigned long arg, int err);
|
void sig_done(unsigned long arg, int err);
|
||||||
|
|
||||||
|
/* XXX: this runs in atomic context! */
|
||||||
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||||
void *__packet, void *__os)
|
void *__packet, void *__os)
|
||||||
{
|
{
|
||||||
struct ikc_scd_packet *pisp = __packet;
|
struct ikc_scd_packet *pisp = __packet;
|
||||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os);
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os);
|
||||||
|
int msg = pisp->msg;
|
||||||
|
|
||||||
switch (pisp->msg) {
|
switch (msg) {
|
||||||
case SCD_MSG_INIT_CHANNEL:
|
case SCD_MSG_INIT_CHANNEL:
|
||||||
mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c);
|
mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c);
|
||||||
break;
|
break;
|
||||||
@@ -63,7 +66,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SCD_MSG_SYSCALL_ONESIDE:
|
case SCD_MSG_SYSCALL_ONESIDE:
|
||||||
mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg);
|
mcexec_syscall(usrdata, pisp);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SCD_MSG_PROCFS_ANSWER:
|
case SCD_MSG_PROCFS_ANSWER:
|
||||||
@@ -88,11 +91,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SCD_MSG_PROCFS_TID_CREATE:
|
case SCD_MSG_PROCFS_TID_CREATE:
|
||||||
add_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SCD_MSG_PROCFS_TID_DELETE:
|
case SCD_MSG_PROCFS_TID_DELETE:
|
||||||
delete_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg);
|
procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SCD_MSG_GET_VDSO_INFO:
|
case SCD_MSG_GET_VDSO_INFO:
|
||||||
@@ -110,6 +110,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
pisp->err, pisp->arg);
|
pisp->err, pisp->arg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SCD_MSG_SYSCALL_ONESIDE holds the packet and frees is it
|
||||||
|
* mcexec_ret_syscall(), for the rest, free it here.
|
||||||
|
*/
|
||||||
|
if (msg != SCD_MSG_SYSCALL_ONESIDE) {
|
||||||
|
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)__packet, c);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,8 +154,6 @@ int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu)
|
|||||||
|
|
||||||
ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c,
|
ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c,
|
||||||
ihk_ikc_get_processor_id());
|
ihk_ikc_get_processor_id());
|
||||||
kprintf("Setting the target to %d\n",
|
|
||||||
ihk_ikc_get_processor_id());
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -193,12 +199,13 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
pmc->param.request_va =
|
pmc->param.request_va =
|
||||||
(void *)__get_free_pages(GFP_KERNEL,
|
(void *)__get_free_pages(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL,
|
||||||
REQUEST_SHIFT - PAGE_SHIFT);
|
REQUEST_SHIFT - PAGE_SHIFT);
|
||||||
pmc->param.request_pa = virt_to_phys(pmc->param.request_va);
|
pmc->param.request_pa = virt_to_phys(pmc->param.request_va);
|
||||||
pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va;
|
pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va;
|
||||||
pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa;
|
pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa;
|
||||||
pmc->param.post_va = (void *)__get_free_page(GFP_KERNEL);
|
pmc->param.post_va = (void *)__get_free_page(in_interrupt() ?
|
||||||
|
GFP_ATOMIC : GFP_KERNEL);
|
||||||
pmc->param.post_pa = virt_to_phys(pmc->param.post_va);
|
pmc->param.post_pa = virt_to_phys(pmc->param.post_va);
|
||||||
memset(pmc->param.doorbell_va, 0, PAGE_SIZE);
|
memset(pmc->param.doorbell_va, 0, PAGE_SIZE);
|
||||||
memset(pmc->param.request_va, 0, PAGE_SIZE);
|
memset(pmc->param.request_va, 0, PAGE_SIZE);
|
||||||
@@ -218,8 +225,9 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih
|
|||||||
PAGE_SIZE, NULL, 0);
|
PAGE_SIZE, NULL, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pmc->dma_buf = (void *)__get_free_pages(GFP_KERNEL,
|
pmc->dma_buf = (void *)__get_free_pages(in_interrupt() ?
|
||||||
DMA_PIN_SHIFT - PAGE_SHIFT);
|
GFP_ATOMIC : GFP_KERNEL,
|
||||||
|
DMA_PIN_SHIFT - PAGE_SHIFT);
|
||||||
|
|
||||||
rpm->request_page = pmc->param.request_pa;
|
rpm->request_page = pmc->param.request_pa;
|
||||||
rpm->doorbell_page = pmc->param.doorbell_pa;
|
rpm->doorbell_page = pmc->param.doorbell_pa;
|
||||||
@@ -265,9 +273,6 @@ static int connect_handler(struct ihk_ikc_channel_info *param)
|
|||||||
}
|
}
|
||||||
param->packet_handler = syscall_packet_handler;
|
param->packet_handler = syscall_packet_handler;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
|
|
||||||
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
|
|
||||||
|
|
||||||
usrdata->channels[cpu].c = c;
|
usrdata->channels[cpu].c = c;
|
||||||
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||||
|
|
||||||
@@ -286,9 +291,6 @@ static int connect_handler2(struct ihk_ikc_channel_info *param)
|
|||||||
|
|
||||||
param->packet_handler = syscall_packet_handler;
|
param->packet_handler = syscall_packet_handler;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
|
|
||||||
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
|
|
||||||
|
|
||||||
usrdata->channels[cpu].c = c;
|
usrdata->channels[cpu].c = c;
|
||||||
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||||
|
|
||||||
@@ -315,7 +317,7 @@ int prepare_ikc_channels(ihk_os_t os)
|
|||||||
{
|
{
|
||||||
struct ihk_cpu_info *info;
|
struct ihk_cpu_info *info;
|
||||||
struct mcctrl_usrdata *usrdata;
|
struct mcctrl_usrdata *usrdata;
|
||||||
int error;
|
int i;
|
||||||
|
|
||||||
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
|
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
|
||||||
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
|
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
|
||||||
@@ -347,17 +349,14 @@ int prepare_ikc_channels(ihk_os_t os)
|
|||||||
memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2);
|
memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2);
|
||||||
ihk_ikc_listen_port(os, &usrdata->listen_param2);
|
ihk_ikc_listen_port(os, &usrdata->listen_param2);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&usrdata->per_proc_list);
|
for (i = 0; i < MCCTRL_PER_PROC_DATA_HASH_SIZE; ++i) {
|
||||||
spin_lock_init(&usrdata->per_proc_list_lock);
|
INIT_LIST_HEAD(&usrdata->per_proc_data_hash[i]);
|
||||||
|
rwlock_init(&usrdata->per_proc_data_hash_lock[i]);
|
||||||
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&usrdata->cpu_topology_list);
|
INIT_LIST_HEAD(&usrdata->cpu_topology_list);
|
||||||
INIT_LIST_HEAD(&usrdata->node_topology_list);
|
INIT_LIST_HEAD(&usrdata->node_topology_list);
|
||||||
|
|
||||||
error = init_peer_channel_registry(usrdata);
|
|
||||||
if (error) {
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,7 +395,6 @@ void destroy_ikc_channels(ihk_os_t os)
|
|||||||
}
|
}
|
||||||
free_page((unsigned long)usrdata->mcctrl_doorbell_va);
|
free_page((unsigned long)usrdata->mcctrl_doorbell_va);
|
||||||
|
|
||||||
destroy_peer_channel_registry(usrdata);
|
|
||||||
kfree(usrdata->channels);
|
kfree(usrdata->channels);
|
||||||
kfree(usrdata);
|
kfree(usrdata);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
#include <ikc/master.h>
|
#include <ikc/master.h>
|
||||||
#include <ihk/msr.h>
|
#include <ihk/msr.h>
|
||||||
#include <linux/semaphore.h>
|
#include <linux/semaphore.h>
|
||||||
|
#include <linux/rwlock.h>
|
||||||
#include <linux/threads.h>
|
#include <linux/threads.h>
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
|
|
||||||
@@ -48,6 +49,7 @@
|
|||||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||||
|
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
|
||||||
|
|
||||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||||
@@ -110,8 +112,9 @@ struct ikc_scd_packet {
|
|||||||
int ref;
|
int ref;
|
||||||
int osnum;
|
int osnum;
|
||||||
int pid;
|
int pid;
|
||||||
int padding;
|
|
||||||
unsigned long arg;
|
unsigned long arg;
|
||||||
|
struct syscall_request req;
|
||||||
|
unsigned long resp_pa;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* for SCD_MSG_SYSFS_* */
|
/* for SCD_MSG_SYSFS_* */
|
||||||
@@ -120,7 +123,13 @@ struct ikc_scd_packet {
|
|||||||
long sysfs_arg2;
|
long sysfs_arg2;
|
||||||
long sysfs_arg3;
|
long sysfs_arg3;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* SCD_MSG_SCHEDULE_THREAD */
|
||||||
|
struct {
|
||||||
|
int ttid;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
char padding[12];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mcctrl_priv {
|
struct mcctrl_priv {
|
||||||
@@ -154,8 +163,11 @@ struct syscall_params {
|
|||||||
struct wait_queue_head_list_node {
|
struct wait_queue_head_list_node {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
wait_queue_head_t wq_syscall;
|
wait_queue_head_t wq_syscall;
|
||||||
int pid;
|
struct task_struct *task;
|
||||||
|
/* Denotes an exclusive wait for requester TID rtid */
|
||||||
|
int rtid;
|
||||||
int req;
|
int req;
|
||||||
|
struct ikc_scd_packet *packet;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mcctrl_channel {
|
struct mcctrl_channel {
|
||||||
@@ -163,15 +175,30 @@ struct mcctrl_channel {
|
|||||||
struct syscall_params param;
|
struct syscall_params param;
|
||||||
struct ikc_scd_init_param init;
|
struct ikc_scd_init_param init;
|
||||||
void *dma_buf;
|
void *dma_buf;
|
||||||
|
|
||||||
struct list_head wq_list;
|
|
||||||
ihk_spinlock_t wq_list_lock;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct mcctrl_per_thread_data {
|
||||||
|
struct list_head hash;
|
||||||
|
struct task_struct *task;
|
||||||
|
void *data;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
|
||||||
|
#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT)
|
||||||
|
#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1)
|
||||||
|
|
||||||
struct mcctrl_per_proc_data {
|
struct mcctrl_per_proc_data {
|
||||||
struct list_head list;
|
struct list_head hash;
|
||||||
int pid;
|
int pid;
|
||||||
unsigned long rpgtable; /* per process, not per OS */
|
unsigned long rpgtable; /* per process, not per OS */
|
||||||
|
|
||||||
|
struct list_head wq_list;
|
||||||
|
struct list_head wq_req_list;
|
||||||
|
struct list_head wq_list_exact;
|
||||||
|
ihk_spinlock_t wq_list_lock;
|
||||||
|
|
||||||
|
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
|
||||||
|
rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sysfsm_req {
|
struct sysfsm_req {
|
||||||
@@ -230,6 +257,10 @@ struct node_topology {
|
|||||||
|
|
||||||
#define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG))
|
#define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG))
|
||||||
|
|
||||||
|
#define MCCTRL_PER_PROC_DATA_HASH_SHIFT 7
|
||||||
|
#define MCCTRL_PER_PROC_DATA_HASH_SIZE (1 << MCCTRL_PER_PROC_DATA_HASH_SHIFT)
|
||||||
|
#define MCCTRL_PER_PROC_DATA_HASH_MASK (MCCTRL_PER_PROC_DATA_HASH_SIZE - 1)
|
||||||
|
|
||||||
struct mcctrl_usrdata {
|
struct mcctrl_usrdata {
|
||||||
struct ihk_ikc_listen_param listen_param;
|
struct ihk_ikc_listen_param listen_param;
|
||||||
struct ihk_ikc_listen_param listen_param2;
|
struct ihk_ikc_listen_param listen_param2;
|
||||||
@@ -245,8 +276,9 @@ struct mcctrl_usrdata {
|
|||||||
unsigned long last_thread_exec;
|
unsigned long last_thread_exec;
|
||||||
wait_queue_head_t wq_prepare;
|
wait_queue_head_t wq_prepare;
|
||||||
|
|
||||||
struct list_head per_proc_list;
|
struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
|
||||||
ihk_spinlock_t per_proc_list_lock;
|
rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE];
|
||||||
|
|
||||||
void **keys;
|
void **keys;
|
||||||
struct sysfsm_data sysfsm_data;
|
struct sysfsm_data sysfsm_data;
|
||||||
unsigned long cpu_online[CPU_LONGS];
|
unsigned long cpu_online[CPU_LONGS];
|
||||||
@@ -273,12 +305,22 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
|
|||||||
ihk_os_t osnum_to_os(int n);
|
ihk_os_t osnum_to_os(int n);
|
||||||
|
|
||||||
/* syscall.c */
|
/* syscall.c */
|
||||||
int init_peer_channel_registry(struct mcctrl_usrdata *ud);
|
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
|
||||||
void destroy_peer_channel_registry(struct mcctrl_usrdata *ud);
|
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||||
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
|
struct mcctrl_per_proc_data *ppd);
|
||||||
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
|
int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid);
|
||||||
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key);
|
inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||||
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc);
|
struct mcctrl_usrdata *ud, int pid);
|
||||||
|
|
||||||
|
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
|
||||||
|
struct task_struct *task, void *data);
|
||||||
|
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
|
||||||
|
struct task_struct *task);
|
||||||
|
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
|
||||||
|
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
|
||||||
|
|
||||||
|
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
|
||||||
|
long ret, int stid);
|
||||||
|
|
||||||
#define PROCFS_NAME_MAX 1000
|
#define PROCFS_NAME_MAX 1000
|
||||||
|
|
||||||
@@ -301,6 +343,7 @@ struct procfs_file {
|
|||||||
};
|
};
|
||||||
|
|
||||||
void procfs_answer(unsigned int arg, int err);
|
void procfs_answer(unsigned int arg, int err);
|
||||||
|
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg);
|
||||||
void add_tid_entry(int osnum, int pid, int tid);
|
void add_tid_entry(int osnum, int pid, int tid);
|
||||||
void add_pid_entry(int osnum, int pid);
|
void add_pid_entry(int osnum, int pid);
|
||||||
void delete_tid_entry(int osnum, int pid, int tid);
|
void delete_tid_entry(int osnum, int pid, int tid);
|
||||||
|
|||||||
@@ -17,8 +17,10 @@
|
|||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/resource.h>
|
#include <linux/resource.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
#include "mcctrl.h"
|
#include "mcctrl.h"
|
||||||
#include <linux/version.h>
|
#include <linux/version.h>
|
||||||
|
#include <linux/semaphore.h>
|
||||||
|
|
||||||
//#define PROCFS_DEBUG
|
//#define PROCFS_DEBUG
|
||||||
|
|
||||||
@@ -81,7 +83,7 @@ struct procfs_list_entry {
|
|||||||
* file.
|
* file.
|
||||||
*/
|
*/
|
||||||
LIST_HEAD(procfs_file_list);
|
LIST_HEAD(procfs_file_list);
|
||||||
static ihk_spinlock_t procfs_file_list_lock;
|
DEFINE_SEMAPHORE(procfs_file_list_lock);
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
getpath(struct procfs_list_entry *e, char *buf, int bufsize)
|
getpath(struct procfs_list_entry *e, char *buf, int bufsize)
|
||||||
@@ -375,67 +377,62 @@ _add_tid_entry(int osnum, int pid, int tid, const struct cred *cred)
|
|||||||
void
|
void
|
||||||
add_tid_entry(int osnum, int pid, int tid)
|
add_tid_entry(int osnum, int pid, int tid)
|
||||||
{
|
{
|
||||||
unsigned long irqflag;
|
|
||||||
const struct cred *cred = get_pid_cred(pid);
|
const struct cred *cred = get_pid_cred(pid);
|
||||||
|
|
||||||
if(!cred)
|
if(!cred)
|
||||||
return;
|
return;
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
_add_tid_entry(osnum, pid, tid, cred);
|
_add_tid_entry(osnum, pid, tid, cred);
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
add_pid_entry(int osnum, int pid)
|
add_pid_entry(int osnum, int pid)
|
||||||
{
|
{
|
||||||
struct procfs_list_entry *parent;
|
struct procfs_list_entry *parent;
|
||||||
unsigned long irqflag;
|
|
||||||
const struct cred *cred = get_pid_cred(pid);
|
const struct cred *cred = get_pid_cred(pid);
|
||||||
|
|
||||||
if(!cred)
|
if(!cred)
|
||||||
return;
|
return;
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
parent = get_pid_entry(osnum, pid);
|
parent = get_pid_entry(osnum, pid);
|
||||||
add_procfs_entries(parent, pid_entry_stuff, cred->uid, cred->gid);
|
add_procfs_entries(parent, pid_entry_stuff, cred->uid, cred->gid);
|
||||||
_add_tid_entry(osnum, pid, pid, cred);
|
_add_tid_entry(osnum, pid, pid, cred);
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
delete_tid_entry(int osnum, int pid, int tid)
|
delete_tid_entry(int osnum, int pid, int tid)
|
||||||
{
|
{
|
||||||
unsigned long irqflag;
|
|
||||||
struct procfs_list_entry *e;
|
struct procfs_list_entry *e;
|
||||||
|
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
e = find_tid_entry(osnum, pid, tid);
|
e = find_tid_entry(osnum, pid, tid);
|
||||||
if(e)
|
if(e)
|
||||||
delete_procfs_entries(e);
|
delete_procfs_entries(e);
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
delete_pid_entry(int osnum, int pid)
|
delete_pid_entry(int osnum, int pid)
|
||||||
{
|
{
|
||||||
unsigned long irqflag;
|
|
||||||
struct procfs_list_entry *e;
|
struct procfs_list_entry *e;
|
||||||
|
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
e = find_pid_entry(osnum, pid);
|
e = find_pid_entry(osnum, pid);
|
||||||
if(e)
|
if(e)
|
||||||
delete_procfs_entries(e);
|
delete_procfs_entries(e);
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
proc_exe_link(int osnum, int pid, const char *path)
|
proc_exe_link(int osnum, int pid, const char *path)
|
||||||
{
|
{
|
||||||
struct procfs_list_entry *parent;
|
struct procfs_list_entry *parent;
|
||||||
unsigned long irqflag;
|
|
||||||
kuid_t uid = KUIDT_INIT(0);
|
kuid_t uid = KUIDT_INIT(0);
|
||||||
kgid_t gid = KGIDT_INIT(0);
|
kgid_t gid = KGIDT_INIT(0);
|
||||||
|
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
parent = find_pid_entry(osnum, pid);
|
parent = find_pid_entry(osnum, pid);
|
||||||
if(parent){
|
if(parent){
|
||||||
struct procfs_list_entry *task;
|
struct procfs_list_entry *task;
|
||||||
@@ -451,7 +448,7 @@ proc_exe_link(int osnum, int pid, const char *path)
|
|||||||
uid, gid, path);
|
uid, gid, path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -463,14 +460,13 @@ void
|
|||||||
procfs_init(int osnum)
|
procfs_init(int osnum)
|
||||||
{
|
{
|
||||||
struct procfs_list_entry *parent;
|
struct procfs_list_entry *parent;
|
||||||
unsigned long irqflag;
|
|
||||||
kuid_t uid = KUIDT_INIT(0);
|
kuid_t uid = KUIDT_INIT(0);
|
||||||
kgid_t gid = KGIDT_INIT(0);
|
kgid_t gid = KGIDT_INIT(0);
|
||||||
|
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
parent = get_base_entry(osnum);
|
parent = get_base_entry(osnum);
|
||||||
add_procfs_entries(parent, base_entry_stuff, uid, gid);
|
add_procfs_entries(parent, base_entry_stuff, uid, gid);
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -481,14 +477,13 @@ procfs_init(int osnum)
|
|||||||
void
|
void
|
||||||
procfs_exit(int osnum)
|
procfs_exit(int osnum)
|
||||||
{
|
{
|
||||||
unsigned long irqflag;
|
|
||||||
struct procfs_list_entry *e;
|
struct procfs_list_entry *e;
|
||||||
|
|
||||||
irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
down(&procfs_file_list_lock);
|
||||||
e = find_base_entry(osnum);
|
e = find_base_entry(osnum);
|
||||||
if(e)
|
if(e)
|
||||||
delete_procfs_entries(e);
|
delete_procfs_entries(e);
|
||||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag);
|
up(&procfs_file_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -719,6 +714,57 @@ mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
|
|||||||
return file->f_pos;
|
return file->f_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct procfs_work {
|
||||||
|
void *os;
|
||||||
|
int msg;
|
||||||
|
int pid;
|
||||||
|
unsigned long arg;
|
||||||
|
struct work_struct work;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void procfsm_work_main(struct work_struct *work0)
|
||||||
|
{
|
||||||
|
struct procfs_work *work = container_of(work0, struct procfs_work, work);
|
||||||
|
|
||||||
|
switch (work->msg) {
|
||||||
|
case SCD_MSG_PROCFS_TID_CREATE:
|
||||||
|
add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SCD_MSG_PROCFS_TID_DELETE:
|
||||||
|
delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
|
||||||
|
__FUNCTION__, work->msg, work->pid, work->arg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
kfree(work);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
|
||||||
|
{
|
||||||
|
struct procfs_work *work = NULL;
|
||||||
|
|
||||||
|
work = kzalloc(sizeof(*work), GFP_ATOMIC);
|
||||||
|
if (!work) {
|
||||||
|
printk("%s: kzalloc failed\n", __FUNCTION__);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
work->os = os;
|
||||||
|
work->msg = msg;
|
||||||
|
work->pid = pid;
|
||||||
|
work->arg = arg;
|
||||||
|
INIT_WORK(&work->work, &procfsm_work_main);
|
||||||
|
|
||||||
|
schedule_work(&work->work);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct file_operations mckernel_forward_ro = {
|
static const struct file_operations mckernel_forward_ro = {
|
||||||
.llseek = mckernel_procfs_lseek,
|
.llseek = mckernel_procfs_lseek,
|
||||||
.read = mckernel_procfs_read,
|
.read = mckernel_procfs_read,
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,7 @@
|
|||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/device.h>
|
#include <linux/device.h>
|
||||||
#include <linux/version.h>
|
#include <linux/version.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
#include "mcctrl.h"
|
#include "mcctrl.h"
|
||||||
#include "sysfs_msg.h"
|
#include "sysfs_msg.h"
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,3 @@
|
|||||||
KDIR ?= @KDIR@
|
|
||||||
ARCH ?= @ARCH@
|
|
||||||
KMODDIR=@KMODDIR@
|
|
||||||
src = @abs_srcdir@
|
|
||||||
ENABLE_MCOVERLAYFS=@ENABLE_MCOVERLAYFS@
|
ENABLE_MCOVERLAYFS=@ENABLE_MCOVERLAYFS@
|
||||||
|
|
||||||
RELEASE=$(shell uname -r)
|
RELEASE=$(shell uname -r)
|
||||||
@@ -9,31 +5,36 @@ MAJOR=$(shell echo ${RELEASE} | sed -e 's/^\([0-9]*\).*/\1/')
|
|||||||
MINOR=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/')
|
MINOR=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/')
|
||||||
PATCH=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/')
|
PATCH=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/')
|
||||||
LINUX_VERSION_CODE=$(shell expr \( ${MAJOR} \* 65536 \) + \( ${MINOR} \* 256 \) + ${PATCH})
|
LINUX_VERSION_CODE=$(shell expr \( ${MAJOR} \* 65536 \) + \( ${MINOR} \* 256 \) + ${PATCH})
|
||||||
RHEL_RELEASE=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/')
|
RHEL_RELEASE_TMP=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/')
|
||||||
RHEL_RELEASE=$(shell if [ "${RELEASE}" == "${RHEL_RELEASE}" ]; then echo ""; else echo ${RHEL_RELEASE}; fi)
|
RHEL_RELEASE=$(shell if [ "${RELEASE}" == "${RHEL_RELEASE_TMP}" ]; then echo ""; else echo ${RHEL_RELEASE_TMP}; fi)
|
||||||
|
BUILD_MODULE_TMP=$(shell if [ "${RHEL_RELEASE}" == "" ]; then echo "org"; else echo "rhel"; fi)
|
||||||
|
BUILD_MODULE=none
|
||||||
ifeq ($(ENABLE_MCOVERLAYFS),yes)
|
ifeq ($(ENABLE_MCOVERLAYFS),yes)
|
||||||
ENABLE_BUILD=$(shell if ( [ ${LINUX_VERSION_CODE} -ge 262144 ] && [ ${LINUX_VERSION_CODE} -lt 262400 ] ); then echo "yes"; else echo "no"; fi)
|
ifeq ($(BUILD_MODULE_TMP),org)
|
||||||
else
|
ifeq ($(BUILD_MODULE),none)
|
||||||
ENABLE_BUILD=no
|
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifeq ($(BUILD_MODULE_TMP),rhel)
|
||||||
|
ifeq ($(BUILD_MODULE),none)
|
||||||
|
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -eq 327 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
obj-m += mcoverlay.o
|
|
||||||
|
|
||||||
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
|
||||||
|
|
||||||
.PHONY: clean install modules
|
.PHONY: clean install modules
|
||||||
|
|
||||||
modules:
|
modules:
|
||||||
ifeq ($(ENABLE_BUILD),yes)
|
ifneq ($(BUILD_MODULE),none)
|
||||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
@(cd $(BUILD_MODULE); make modules)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
@(cd linux-3.10.0-327.36.1.el7; make clean)
|
||||||
|
@(cd linux-4.0.9; make clean)
|
||||||
|
|
||||||
install:
|
install:
|
||||||
ifeq ($(ENABLE_BUILD),yes)
|
ifneq ($(BUILD_MODULE),none)
|
||||||
mkdir -p -m 755 $(KMODDIR)
|
@(cd $(BUILD_MODULE); make install)
|
||||||
install -m 644 mcoverlay.ko $(KMODDIR)
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
KDIR ?= @KDIR@
|
||||||
|
ARCH ?= @ARCH@
|
||||||
|
KMODDIR = @KMODDIR@
|
||||||
|
src = @abs_srcdir@
|
||||||
|
|
||||||
|
obj-m += mcoverlay.o
|
||||||
|
|
||||||
|
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||||
|
|
||||||
|
.PHONY: clean install modules
|
||||||
|
|
||||||
|
modules:
|
||||||
|
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||||
|
|
||||||
|
install:
|
||||||
|
mkdir -p -m 755 $(KMODDIR)
|
||||||
|
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||||
|
|
||||||
461
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/copy_up.c
Normal file
461
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/copy_up.c
Normal file
@@ -0,0 +1,461 @@
|
|||||||
|
/*
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Novell Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 as published by
|
||||||
|
* the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/file.h>
|
||||||
|
#include <linux/splice.h>
|
||||||
|
#include <linux/xattr.h>
|
||||||
|
#include <linux/security.h>
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <linux/namei.h>
|
||||||
|
#include <linux/fdtable.h>
|
||||||
|
#include <linux/ratelimit.h>
|
||||||
|
#include "overlayfs.h"
|
||||||
|
|
||||||
|
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||||
|
|
||||||
|
static unsigned ovl_check_copy_up = 1;
|
||||||
|
module_param_named(check_copy_up, ovl_check_copy_up, uint,
|
||||||
|
S_IWUSR | S_IRUGO);
|
||||||
|
MODULE_PARM_DESC(ovl_check_copy_up,
|
||||||
|
"Warn on copy-up when causing process also has a R/O fd open");
|
||||||
|
|
||||||
|
static int ovl_check_fd(const void *data, struct file *f, unsigned fd)
|
||||||
|
{
|
||||||
|
const struct dentry *dentry = data;
|
||||||
|
|
||||||
|
if (f->f_path.dentry == dentry)
|
||||||
|
pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
|
||||||
|
f, fd, current->pid, current->comm);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the fds open by this process and warn if something like the following
|
||||||
|
* scenario is about to occur:
|
||||||
|
*
|
||||||
|
* fd1 = open("foo", O_RDONLY);
|
||||||
|
* fd2 = open("foo", O_RDWR);
|
||||||
|
*/
|
||||||
|
static void ovl_do_check_copy_up(struct dentry *dentry)
|
||||||
|
{
|
||||||
|
if (ovl_check_copy_up)
|
||||||
|
iterate_fd(current->files, 0, ovl_check_fd, dentry);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
|
||||||
|
{
|
||||||
|
ssize_t list_size, size, value_size = 0;
|
||||||
|
char *buf, *name, *value = NULL;
|
||||||
|
int uninitialized_var(error);
|
||||||
|
|
||||||
|
if (!old->d_inode->i_op->getxattr ||
|
||||||
|
!new->d_inode->i_op->getxattr)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
list_size = vfs_listxattr(old, NULL, 0);
|
||||||
|
if (list_size <= 0) {
|
||||||
|
if (list_size == -EOPNOTSUPP)
|
||||||
|
return 0;
|
||||||
|
return list_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = kzalloc(list_size, GFP_KERNEL);
|
||||||
|
if (!buf)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
list_size = vfs_listxattr(old, buf, list_size);
|
||||||
|
if (list_size <= 0) {
|
||||||
|
error = list_size;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
|
||||||
|
retry:
|
||||||
|
size = vfs_getxattr(old, name, value, value_size);
|
||||||
|
if (size == -ERANGE)
|
||||||
|
size = vfs_getxattr(old, name, NULL, 0);
|
||||||
|
|
||||||
|
if (size < 0) {
|
||||||
|
error = size;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size > value_size) {
|
||||||
|
void *new;
|
||||||
|
|
||||||
|
new = krealloc(value, size, GFP_KERNEL);
|
||||||
|
if (!new) {
|
||||||
|
error = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
value = new;
|
||||||
|
value_size = size;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = vfs_setxattr(new, name, value, size, 0);
|
||||||
|
if (error)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
kfree(value);
|
||||||
|
out:
|
||||||
|
kfree(buf);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
|
||||||
|
{
|
||||||
|
struct file *old_file;
|
||||||
|
struct file *new_file;
|
||||||
|
loff_t old_pos = 0;
|
||||||
|
loff_t new_pos = 0;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
if (len == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
old_file = ovl_path_open(old, O_RDONLY);
|
||||||
|
if (IS_ERR(old_file))
|
||||||
|
return PTR_ERR(old_file);
|
||||||
|
|
||||||
|
new_file = ovl_path_open(new, O_WRONLY);
|
||||||
|
if (IS_ERR(new_file)) {
|
||||||
|
error = PTR_ERR(new_file);
|
||||||
|
goto out_fput;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FIXME: copy up sparse files efficiently */
|
||||||
|
while (len) {
|
||||||
|
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
|
||||||
|
long bytes;
|
||||||
|
|
||||||
|
if (len < this_len)
|
||||||
|
this_len = len;
|
||||||
|
|
||||||
|
if (signal_pending_state(TASK_KILLABLE, current)) {
|
||||||
|
error = -EINTR;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes = do_splice_direct(old_file, &old_pos,
|
||||||
|
new_file, &new_pos,
|
||||||
|
this_len, SPLICE_F_MOVE);
|
||||||
|
if (bytes <= 0) {
|
||||||
|
error = bytes;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
WARN_ON(old_pos != new_pos);
|
||||||
|
|
||||||
|
len -= bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
fput(new_file);
|
||||||
|
out_fput:
|
||||||
|
fput(old_file);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *ovl_read_symlink(struct dentry *realdentry)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
char *buf;
|
||||||
|
struct inode *inode = realdentry->d_inode;
|
||||||
|
mm_segment_t old_fs;
|
||||||
|
|
||||||
|
res = -EINVAL;
|
||||||
|
if (!inode->i_op->readlink)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
res = -ENOMEM;
|
||||||
|
buf = (char *) __get_free_page(GFP_KERNEL);
|
||||||
|
if (!buf)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
old_fs = get_fs();
|
||||||
|
set_fs(get_ds());
|
||||||
|
/* The cast to a user pointer is valid due to the set_fs() */
|
||||||
|
res = inode->i_op->readlink(realdentry,
|
||||||
|
(char __user *)buf, PAGE_SIZE - 1);
|
||||||
|
set_fs(old_fs);
|
||||||
|
if (res < 0) {
|
||||||
|
free_page((unsigned long) buf);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
buf[res] = '\0';
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
|
||||||
|
err:
|
||||||
|
return ERR_PTR(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||||
|
{
|
||||||
|
struct iattr attr = {
|
||||||
|
.ia_valid =
|
||||||
|
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
||||||
|
.ia_atime = stat->atime,
|
||||||
|
.ia_mtime = stat->mtime,
|
||||||
|
};
|
||||||
|
|
||||||
|
return notify_change(upperdentry, &attr, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if (!S_ISLNK(stat->mode)) {
|
||||||
|
struct iattr attr = {
|
||||||
|
.ia_valid = ATTR_MODE,
|
||||||
|
.ia_mode = stat->mode,
|
||||||
|
};
|
||||||
|
err = notify_change(upperdentry, &attr, NULL);
|
||||||
|
}
|
||||||
|
if (!err) {
|
||||||
|
struct iattr attr = {
|
||||||
|
.ia_valid = ATTR_UID | ATTR_GID,
|
||||||
|
.ia_uid = stat->uid,
|
||||||
|
.ia_gid = stat->gid,
|
||||||
|
};
|
||||||
|
err = notify_change(upperdentry, &attr, NULL);
|
||||||
|
}
|
||||||
|
if (!err)
|
||||||
|
ovl_set_timestamps(upperdentry, stat);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
|
||||||
|
struct dentry *dentry, struct path *lowerpath,
|
||||||
|
struct kstat *stat, struct iattr *attr,
|
||||||
|
const char *link)
|
||||||
|
{
|
||||||
|
struct inode *wdir = workdir->d_inode;
|
||||||
|
struct inode *udir = upperdir->d_inode;
|
||||||
|
struct dentry *newdentry = NULL;
|
||||||
|
struct dentry *upper = NULL;
|
||||||
|
umode_t mode = stat->mode;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||||
|
err = PTR_ERR(newdentry);
|
||||||
|
if (IS_ERR(newdentry))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||||
|
dentry->d_name.len);
|
||||||
|
err = PTR_ERR(upper);
|
||||||
|
if (IS_ERR(upper))
|
||||||
|
goto out1;
|
||||||
|
|
||||||
|
/* Can't properly set mode on creation because of the umask */
|
||||||
|
stat->mode &= S_IFMT;
|
||||||
|
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
|
||||||
|
stat->mode = mode;
|
||||||
|
if (err)
|
||||||
|
goto out2;
|
||||||
|
|
||||||
|
if (S_ISREG(stat->mode)) {
|
||||||
|
struct path upperpath;
|
||||||
|
ovl_path_upper(dentry, &upperpath);
|
||||||
|
BUG_ON(upperpath.dentry != NULL);
|
||||||
|
upperpath.dentry = newdentry;
|
||||||
|
|
||||||
|
err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ovl_copy_xattr(lowerpath->dentry, newdentry);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
mutex_lock(&newdentry->d_inode->i_mutex);
|
||||||
|
err = ovl_set_attr(newdentry, stat);
|
||||||
|
if (!err && attr)
|
||||||
|
err = notify_change(newdentry, attr, NULL);
|
||||||
|
mutex_unlock(&newdentry->d_inode->i_mutex);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
ovl_dentry_update(dentry, newdentry);
|
||||||
|
newdentry = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Non-directores become opaque when copied up.
|
||||||
|
*/
|
||||||
|
if (!S_ISDIR(stat->mode))
|
||||||
|
ovl_dentry_set_opaque(dentry, true);
|
||||||
|
out2:
|
||||||
|
dput(upper);
|
||||||
|
out1:
|
||||||
|
dput(newdentry);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
|
||||||
|
out_cleanup:
|
||||||
|
ovl_cleanup(wdir, newdentry);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy up a single dentry
|
||||||
|
*
|
||||||
|
* Directory renames only allowed on "pure upper" (already created on
|
||||||
|
* upper filesystem, never copied up). Directories which are on lower or
|
||||||
|
* are merged may not be renamed. For these -EXDEV is returned and
|
||||||
|
* userspace has to deal with it. This means, when copying up a
|
||||||
|
* directory we can rely on it and ancestors being stable.
|
||||||
|
*
|
||||||
|
* Non-directory renames start with copy up of source if necessary. The
|
||||||
|
* actual rename will only proceed once the copy up was successful. Copy
|
||||||
|
* up uses upper parent i_mutex for exclusion. Since rename can change
|
||||||
|
* d_parent it is possible that the copy up will lock the old parent. At
|
||||||
|
* that point the file will have already been copied up anyway.
|
||||||
|
*/
|
||||||
|
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||||
|
struct path *lowerpath, struct kstat *stat,
|
||||||
|
struct iattr *attr)
|
||||||
|
{
|
||||||
|
struct dentry *workdir = ovl_workdir(dentry);
|
||||||
|
int err;
|
||||||
|
struct kstat pstat;
|
||||||
|
struct path parentpath;
|
||||||
|
struct dentry *upperdir;
|
||||||
|
struct dentry *upperdentry;
|
||||||
|
const struct cred *old_cred;
|
||||||
|
struct cred *override_cred;
|
||||||
|
char *link = NULL;
|
||||||
|
|
||||||
|
if (WARN_ON(!workdir))
|
||||||
|
return -EROFS;
|
||||||
|
|
||||||
|
ovl_do_check_copy_up(lowerpath->dentry);
|
||||||
|
|
||||||
|
ovl_path_upper(parent, &parentpath);
|
||||||
|
upperdir = parentpath.dentry;
|
||||||
|
|
||||||
|
err = vfs_getattr(&parentpath, &pstat);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (S_ISLNK(stat->mode)) {
|
||||||
|
link = ovl_read_symlink(lowerpath->dentry);
|
||||||
|
if (IS_ERR(link))
|
||||||
|
return PTR_ERR(link);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = -ENOMEM;
|
||||||
|
override_cred = prepare_creds();
|
||||||
|
if (!override_cred)
|
||||||
|
goto out_free_link;
|
||||||
|
|
||||||
|
override_cred->fsuid = stat->uid;
|
||||||
|
override_cred->fsgid = stat->gid;
|
||||||
|
/*
|
||||||
|
* CAP_SYS_ADMIN for copying up extended attributes
|
||||||
|
* CAP_DAC_OVERRIDE for create
|
||||||
|
* CAP_FOWNER for chmod, timestamp update
|
||||||
|
* CAP_FSETID for chmod
|
||||||
|
* CAP_CHOWN for chown
|
||||||
|
* CAP_MKNOD for mknod
|
||||||
|
*/
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_MKNOD);
|
||||||
|
old_cred = override_creds(override_cred);
|
||||||
|
|
||||||
|
err = -EIO;
|
||||||
|
if (lock_rename(workdir, upperdir) != NULL) {
|
||||||
|
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
upperdentry = ovl_dentry_upper(dentry);
|
||||||
|
if (upperdentry) {
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
err = 0;
|
||||||
|
/* Raced with another copy-up? Do the setattr here */
|
||||||
|
if (attr) {
|
||||||
|
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||||
|
err = notify_change(upperdentry, attr, NULL);
|
||||||
|
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||||
|
}
|
||||||
|
goto out_put_cred;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
|
||||||
|
stat, attr, link);
|
||||||
|
if (!err) {
|
||||||
|
/* Restore timestamps on parent (best effort) */
|
||||||
|
ovl_set_timestamps(upperdir, &pstat);
|
||||||
|
}
|
||||||
|
out_unlock:
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
out_put_cred:
|
||||||
|
revert_creds(old_cred);
|
||||||
|
put_cred(override_cred);
|
||||||
|
|
||||||
|
out_free_link:
|
||||||
|
if (link)
|
||||||
|
free_page((unsigned long) link);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_copy_up(struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = 0;
|
||||||
|
while (!err) {
|
||||||
|
struct dentry *next;
|
||||||
|
struct dentry *parent;
|
||||||
|
struct path lowerpath;
|
||||||
|
struct kstat stat;
|
||||||
|
enum ovl_path_type type = ovl_path_type(dentry);
|
||||||
|
|
||||||
|
if (OVL_TYPE_UPPER(type))
|
||||||
|
break;
|
||||||
|
|
||||||
|
next = dget(dentry);
|
||||||
|
/* find the topmost dentry not yet copied up */
|
||||||
|
for (;;) {
|
||||||
|
parent = dget_parent(next);
|
||||||
|
|
||||||
|
type = ovl_path_type(parent);
|
||||||
|
if (OVL_TYPE_UPPER(type))
|
||||||
|
break;
|
||||||
|
|
||||||
|
dput(next);
|
||||||
|
next = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
ovl_path_lower(next, &lowerpath);
|
||||||
|
err = vfs_getattr(&lowerpath, &stat);
|
||||||
|
if (!err)
|
||||||
|
err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
|
||||||
|
|
||||||
|
dput(parent);
|
||||||
|
dput(next);
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
972
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/dir.c
Normal file
972
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/dir.c
Normal file
@@ -0,0 +1,972 @@
|
|||||||
|
/*
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Novell Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 as published by
|
||||||
|
* the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/namei.h>
|
||||||
|
#include <linux/xattr.h>
|
||||||
|
#include <linux/security.h>
|
||||||
|
#include <linux/cred.h>
|
||||||
|
#include "overlayfs.h"
|
||||||
|
|
||||||
|
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
dget(wdentry);
|
||||||
|
if (S_ISDIR(wdentry->d_inode->i_mode))
|
||||||
|
err = ovl_do_rmdir(wdir, wdentry);
|
||||||
|
else
|
||||||
|
err = ovl_do_unlink(wdir, wdentry);
|
||||||
|
dput(wdentry);
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
|
||||||
|
wdentry, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
struct dentry *temp;
|
||||||
|
char name[20];
|
||||||
|
|
||||||
|
snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
|
||||||
|
|
||||||
|
temp = lookup_one_len(name, workdir, strlen(name));
|
||||||
|
if (!IS_ERR(temp) && temp->d_inode) {
|
||||||
|
pr_err("overlayfs: workdir/%s already exists\n", name);
|
||||||
|
dput(temp);
|
||||||
|
temp = ERR_PTR(-EIO);
|
||||||
|
}
|
||||||
|
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* caller holds i_mutex on workdir */
|
||||||
|
static struct dentry *ovl_whiteout(struct dentry *workdir,
|
||||||
|
struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct dentry *whiteout;
|
||||||
|
struct inode *wdir = workdir->d_inode;
|
||||||
|
|
||||||
|
whiteout = ovl_lookup_temp(workdir, dentry);
|
||||||
|
if (IS_ERR(whiteout))
|
||||||
|
return whiteout;
|
||||||
|
|
||||||
|
err = ovl_do_whiteout(wdir, whiteout);
|
||||||
|
if (err) {
|
||||||
|
dput(whiteout);
|
||||||
|
whiteout = ERR_PTR(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return whiteout;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||||
|
struct kstat *stat, const char *link,
|
||||||
|
struct dentry *hardlink, bool debug)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (newdentry->d_inode)
|
||||||
|
return -ESTALE;
|
||||||
|
|
||||||
|
if (hardlink) {
|
||||||
|
err = ovl_do_link(hardlink, dir, newdentry, debug);
|
||||||
|
} else {
|
||||||
|
switch (stat->mode & S_IFMT) {
|
||||||
|
case S_IFREG:
|
||||||
|
err = ovl_do_create(dir, newdentry, stat->mode, debug);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case S_IFDIR:
|
||||||
|
err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case S_IFCHR:
|
||||||
|
case S_IFBLK:
|
||||||
|
case S_IFIFO:
|
||||||
|
case S_IFSOCK:
|
||||||
|
err = ovl_do_mknod(dir, newdentry,
|
||||||
|
stat->mode, stat->rdev, debug);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case S_IFLNK:
|
||||||
|
err = ovl_do_symlink(dir, newdentry, link, debug);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
err = -EPERM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!err && WARN_ON(!newdentry->d_inode)) {
|
||||||
|
/*
|
||||||
|
* Not quite sure if non-instantiated dentry is legal or not.
|
||||||
|
* VFS doesn't seem to care so check and warn here.
|
||||||
|
*/
|
||||||
|
err = -ENOENT;
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_set_opaque(struct dentry *upperdentry)
|
||||||
|
{
|
||||||
|
return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ovl_remove_opaque(struct dentry *upperdentry)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
|
||||||
|
if (err) {
|
||||||
|
pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
|
||||||
|
upperdentry->d_name.name, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||||
|
struct kstat *stat)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
enum ovl_path_type type;
|
||||||
|
struct path realpath;
|
||||||
|
|
||||||
|
type = ovl_path_real(dentry, &realpath);
|
||||||
|
err = vfs_getattr(&realpath, stat);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
stat->dev = dentry->d_sb->s_dev;
|
||||||
|
stat->ino = dentry->d_inode->i_ino;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's probably not worth it to count subdirs to get the
|
||||||
|
* correct link count. nlink=1 seems to pacify 'find' and
|
||||||
|
* other utilities.
|
||||||
|
*/
|
||||||
|
if (OVL_TYPE_MERGE(type))
|
||||||
|
stat->nlink = 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
|
||||||
|
struct kstat *stat, const char *link,
|
||||||
|
struct dentry *hardlink)
|
||||||
|
{
|
||||||
|
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||||
|
struct inode *udir = upperdir->d_inode;
|
||||||
|
struct dentry *newdentry;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
|
||||||
|
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
|
||||||
|
dentry->d_name.len);
|
||||||
|
err = PTR_ERR(newdentry);
|
||||||
|
if (IS_ERR(newdentry))
|
||||||
|
goto out_unlock;
|
||||||
|
err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
|
||||||
|
if (err)
|
||||||
|
goto out_dput;
|
||||||
|
|
||||||
|
ovl_dentry_version_inc(dentry->d_parent);
|
||||||
|
ovl_dentry_update(dentry, newdentry);
|
||||||
|
ovl_copyattr(newdentry->d_inode, inode);
|
||||||
|
d_instantiate(dentry, inode);
|
||||||
|
newdentry = NULL;
|
||||||
|
out_dput:
|
||||||
|
dput(newdentry);
|
||||||
|
out_unlock:
|
||||||
|
mutex_unlock(&udir->i_mutex);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_lock_rename_workdir(struct dentry *workdir,
|
||||||
|
struct dentry *upperdir)
|
||||||
|
{
|
||||||
|
/* Workdir should not be the same as upperdir */
|
||||||
|
if (workdir == upperdir)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
/* Workdir should not be subdir of upperdir and vice versa */
|
||||||
|
if (lock_rename(workdir, upperdir) != NULL)
|
||||||
|
goto err_unlock;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_unlock:
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
err:
|
||||||
|
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dentry *ovl_clear_empty(struct dentry *dentry,
|
||||||
|
struct list_head *list)
|
||||||
|
{
|
||||||
|
struct dentry *workdir = ovl_workdir(dentry);
|
||||||
|
struct inode *wdir = workdir->d_inode;
|
||||||
|
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||||
|
struct inode *udir = upperdir->d_inode;
|
||||||
|
struct path upperpath;
|
||||||
|
struct dentry *upper;
|
||||||
|
struct dentry *opaquedir;
|
||||||
|
struct kstat stat;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (WARN_ON(!workdir))
|
||||||
|
return ERR_PTR(-EROFS);
|
||||||
|
|
||||||
|
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ovl_path_upper(dentry, &upperpath);
|
||||||
|
err = vfs_getattr(&upperpath, &stat);
|
||||||
|
if (err)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
err = -ESTALE;
|
||||||
|
if (!S_ISDIR(stat.mode))
|
||||||
|
goto out_unlock;
|
||||||
|
upper = upperpath.dentry;
|
||||||
|
if (upper->d_parent->d_inode != udir)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
opaquedir = ovl_lookup_temp(workdir, dentry);
|
||||||
|
err = PTR_ERR(opaquedir);
|
||||||
|
if (IS_ERR(opaquedir))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
|
||||||
|
if (err)
|
||||||
|
goto out_dput;
|
||||||
|
|
||||||
|
err = ovl_copy_xattr(upper, opaquedir);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
err = ovl_set_opaque(opaquedir);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
mutex_lock(&opaquedir->d_inode->i_mutex);
|
||||||
|
err = ovl_set_attr(opaquedir, &stat);
|
||||||
|
mutex_unlock(&opaquedir->d_inode->i_mutex);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
ovl_cleanup_whiteouts(upper, list);
|
||||||
|
ovl_cleanup(wdir, upper);
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
|
||||||
|
/* dentry's upper doesn't match now, get rid of it */
|
||||||
|
d_drop(dentry);
|
||||||
|
|
||||||
|
return opaquedir;
|
||||||
|
|
||||||
|
out_cleanup:
|
||||||
|
ovl_cleanup(wdir, opaquedir);
|
||||||
|
out_dput:
|
||||||
|
dput(opaquedir);
|
||||||
|
out_unlock:
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
out:
|
||||||
|
return ERR_PTR(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct dentry *ret = NULL;
|
||||||
|
LIST_HEAD(list);
|
||||||
|
|
||||||
|
err = ovl_check_empty_dir(dentry, &list);
|
||||||
|
if (err)
|
||||||
|
ret = ERR_PTR(err);
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* If no upperdentry then skip clearing whiteouts.
|
||||||
|
*
|
||||||
|
* Can race with copy-up, since we don't hold the upperdir
|
||||||
|
* mutex. Doesn't matter, since copy-up can't create a
|
||||||
|
* non-empty directory from an empty one.
|
||||||
|
*/
|
||||||
|
if (ovl_dentry_upper(dentry))
|
||||||
|
ret = ovl_clear_empty(dentry, &list);
|
||||||
|
}
|
||||||
|
|
||||||
|
ovl_cache_free(&list);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
|
||||||
|
struct kstat *stat, const char *link,
|
||||||
|
struct dentry *hardlink)
|
||||||
|
{
|
||||||
|
struct dentry *workdir = ovl_workdir(dentry);
|
||||||
|
struct inode *wdir = workdir->d_inode;
|
||||||
|
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||||
|
struct inode *udir = upperdir->d_inode;
|
||||||
|
struct dentry *upper;
|
||||||
|
struct dentry *newdentry;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (WARN_ON(!workdir))
|
||||||
|
return -EROFS;
|
||||||
|
|
||||||
|
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||||
|
err = PTR_ERR(newdentry);
|
||||||
|
if (IS_ERR(newdentry))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||||
|
dentry->d_name.len);
|
||||||
|
err = PTR_ERR(upper);
|
||||||
|
if (IS_ERR(upper))
|
||||||
|
goto out_dput;
|
||||||
|
|
||||||
|
err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
|
||||||
|
if (err)
|
||||||
|
goto out_dput2;
|
||||||
|
|
||||||
|
if (S_ISDIR(stat->mode)) {
|
||||||
|
err = ovl_set_opaque(newdentry);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
err = ovl_do_rename(wdir, newdentry, udir, upper,
|
||||||
|
RENAME_EXCHANGE);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
|
||||||
|
ovl_cleanup(wdir, upper);
|
||||||
|
} else {
|
||||||
|
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||||
|
if (err)
|
||||||
|
goto out_cleanup;
|
||||||
|
}
|
||||||
|
ovl_dentry_version_inc(dentry->d_parent);
|
||||||
|
ovl_dentry_update(dentry, newdentry);
|
||||||
|
ovl_copyattr(newdentry->d_inode, inode);
|
||||||
|
d_instantiate(dentry, inode);
|
||||||
|
newdentry = NULL;
|
||||||
|
out_dput2:
|
||||||
|
dput(upper);
|
||||||
|
out_dput:
|
||||||
|
dput(newdentry);
|
||||||
|
out_unlock:
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
|
||||||
|
out_cleanup:
|
||||||
|
ovl_cleanup(wdir, newdentry);
|
||||||
|
goto out_dput2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
|
||||||
|
const char *link, struct dentry *hardlink)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct inode *inode;
|
||||||
|
struct kstat stat = {
|
||||||
|
.mode = mode,
|
||||||
|
.rdev = rdev,
|
||||||
|
};
|
||||||
|
|
||||||
|
err = -ENOMEM;
|
||||||
|
inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
|
||||||
|
if (!inode)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = ovl_copy_up(dentry->d_parent);
|
||||||
|
if (err)
|
||||||
|
goto out_iput;
|
||||||
|
|
||||||
|
if (!ovl_dentry_is_opaque(dentry)) {
|
||||||
|
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
|
||||||
|
} else {
|
||||||
|
const struct cred *old_cred;
|
||||||
|
struct cred *override_cred;
|
||||||
|
|
||||||
|
err = -ENOMEM;
|
||||||
|
override_cred = prepare_creds();
|
||||||
|
if (!override_cred)
|
||||||
|
goto out_iput;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CAP_SYS_ADMIN for setting opaque xattr
|
||||||
|
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||||
|
* CAP_FOWNER for removing whiteout from sticky dir
|
||||||
|
*/
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||||
|
old_cred = override_creds(override_cred);
|
||||||
|
|
||||||
|
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
|
||||||
|
hardlink);
|
||||||
|
|
||||||
|
revert_creds(old_cred);
|
||||||
|
put_cred(override_cred);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!err)
|
||||||
|
inode = NULL;
|
||||||
|
out_iput:
|
||||||
|
iput(inode);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
|
||||||
|
const char *link)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = ovl_want_write(dentry);
|
||||||
|
if (!err) {
|
||||||
|
err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
|
||||||
|
ovl_drop_write(dentry);
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||||
|
bool excl)
|
||||||
|
{
|
||||||
|
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||||
|
{
|
||||||
|
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||||
|
dev_t rdev)
|
||||||
|
{
|
||||||
|
/* Don't allow creation of "whiteout" on overlay */
|
||||||
|
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
return ovl_create_object(dentry, mode, rdev, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
|
||||||
|
const char *link)
|
||||||
|
{
|
||||||
|
return ovl_create_object(dentry, S_IFLNK, 0, link);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||||
|
struct dentry *new)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct dentry *upper;
|
||||||
|
|
||||||
|
err = ovl_want_write(old);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = ovl_copy_up(old);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
upper = ovl_dentry_upper(old);
|
||||||
|
err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
|
||||||
|
|
||||||
|
out_drop_write:
|
||||||
|
ovl_drop_write(old);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
|
||||||
|
{
|
||||||
|
struct dentry *workdir = ovl_workdir(dentry);
|
||||||
|
struct inode *wdir = workdir->d_inode;
|
||||||
|
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||||
|
struct inode *udir = upperdir->d_inode;
|
||||||
|
struct dentry *whiteout;
|
||||||
|
struct dentry *upper;
|
||||||
|
struct dentry *opaquedir = NULL;
|
||||||
|
int err;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
|
if (WARN_ON(!workdir))
|
||||||
|
return -EROFS;
|
||||||
|
|
||||||
|
if (is_dir) {
|
||||||
|
if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
|
||||||
|
opaquedir = ovl_check_empty_and_clear(dentry);
|
||||||
|
err = PTR_ERR(opaquedir);
|
||||||
|
if (IS_ERR(opaquedir))
|
||||||
|
goto out;
|
||||||
|
} else {
|
||||||
|
LIST_HEAD(list);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When removing an empty opaque directory, then it
|
||||||
|
* makes no sense to replace it with an exact replica of
|
||||||
|
* itself. But emptiness still needs to be checked.
|
||||||
|
*/
|
||||||
|
err = ovl_check_empty_dir(dentry, &list);
|
||||||
|
ovl_cache_free(&list);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||||
|
if (err)
|
||||||
|
goto out_dput;
|
||||||
|
|
||||||
|
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||||
|
dentry->d_name.len);
|
||||||
|
err = PTR_ERR(upper);
|
||||||
|
if (IS_ERR(upper))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
err = -ESTALE;
|
||||||
|
if ((opaquedir && upper != opaquedir) ||
|
||||||
|
(!opaquedir && ovl_dentry_upper(dentry) &&
|
||||||
|
upper != ovl_dentry_upper(dentry))) {
|
||||||
|
goto out_dput_upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
whiteout = ovl_whiteout(workdir, dentry);
|
||||||
|
err = PTR_ERR(whiteout);
|
||||||
|
if (IS_ERR(whiteout))
|
||||||
|
goto out_dput_upper;
|
||||||
|
|
||||||
|
if (d_is_dir(upper))
|
||||||
|
flags = RENAME_EXCHANGE;
|
||||||
|
|
||||||
|
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
|
||||||
|
if (err)
|
||||||
|
goto kill_whiteout;
|
||||||
|
if (flags)
|
||||||
|
ovl_cleanup(wdir, upper);
|
||||||
|
|
||||||
|
ovl_dentry_version_inc(dentry->d_parent);
|
||||||
|
out_d_drop:
|
||||||
|
d_drop(dentry);
|
||||||
|
dput(whiteout);
|
||||||
|
out_dput_upper:
|
||||||
|
dput(upper);
|
||||||
|
out_unlock:
|
||||||
|
unlock_rename(workdir, upperdir);
|
||||||
|
out_dput:
|
||||||
|
dput(opaquedir);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
|
||||||
|
kill_whiteout:
|
||||||
|
ovl_cleanup(wdir, whiteout);
|
||||||
|
goto out_d_drop;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
|
||||||
|
{
|
||||||
|
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||||
|
struct inode *dir = upperdir->d_inode;
|
||||||
|
struct dentry *upper;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
|
||||||
|
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||||
|
dentry->d_name.len);
|
||||||
|
err = PTR_ERR(upper);
|
||||||
|
if (IS_ERR(upper))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
err = -ESTALE;
|
||||||
|
if (upper == ovl_dentry_upper(dentry)) {
|
||||||
|
if (is_dir)
|
||||||
|
err = vfs_rmdir(dir, upper);
|
||||||
|
else
|
||||||
|
err = vfs_unlink(dir, upper, NULL);
|
||||||
|
ovl_dentry_version_inc(dentry->d_parent);
|
||||||
|
}
|
||||||
|
dput(upper);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Keeping this dentry hashed would mean having to release
|
||||||
|
* upperpath/lowerpath, which could only be done if we are the
|
||||||
|
* sole user of this dentry. Too tricky... Just unhash for
|
||||||
|
* now.
|
||||||
|
*/
|
||||||
|
if (!err)
|
||||||
|
d_drop(dentry);
|
||||||
|
out_unlock:
|
||||||
|
mutex_unlock(&dir->i_mutex);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_check_sticky(struct dentry *dentry)
|
||||||
|
{
|
||||||
|
struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
|
||||||
|
struct inode *inode = ovl_dentry_real(dentry)->d_inode;
|
||||||
|
|
||||||
|
if (check_sticky(dir, inode))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||||
|
{
|
||||||
|
enum ovl_path_type type;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = ovl_check_sticky(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = ovl_want_write(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = ovl_copy_up(dentry->d_parent);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
type = ovl_path_type(dentry);
|
||||||
|
if (OVL_TYPE_PURE_UPPER(type)) {
|
||||||
|
err = ovl_remove_upper(dentry, is_dir);
|
||||||
|
} else {
|
||||||
|
const struct cred *old_cred;
|
||||||
|
struct cred *override_cred;
|
||||||
|
|
||||||
|
err = -ENOMEM;
|
||||||
|
override_cred = prepare_creds();
|
||||||
|
if (!override_cred)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||||
|
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||||
|
* CAP_FOWNER for removing whiteout from sticky dir
|
||||||
|
* CAP_FSETID for chmod of opaque dir
|
||||||
|
* CAP_CHOWN for chown of opaque dir
|
||||||
|
*/
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||||
|
old_cred = override_creds(override_cred);
|
||||||
|
|
||||||
|
err = ovl_remove_and_whiteout(dentry, is_dir);
|
||||||
|
|
||||||
|
revert_creds(old_cred);
|
||||||
|
put_cred(override_cred);
|
||||||
|
}
|
||||||
|
out_drop_write:
|
||||||
|
ovl_drop_write(dentry);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
return ovl_do_remove(dentry, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
return ovl_do_remove(dentry, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_rename2(struct inode *olddir, struct dentry *old,
|
||||||
|
struct inode *newdir, struct dentry *new,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
enum ovl_path_type old_type;
|
||||||
|
enum ovl_path_type new_type;
|
||||||
|
struct dentry *old_upperdir;
|
||||||
|
struct dentry *new_upperdir;
|
||||||
|
struct dentry *olddentry;
|
||||||
|
struct dentry *newdentry;
|
||||||
|
struct dentry *trap;
|
||||||
|
bool old_opaque;
|
||||||
|
bool new_opaque;
|
||||||
|
bool new_create = false;
|
||||||
|
bool cleanup_whiteout = false;
|
||||||
|
bool overwrite = !(flags & RENAME_EXCHANGE);
|
||||||
|
bool is_dir = S_ISDIR(old->d_inode->i_mode);
|
||||||
|
bool new_is_dir = false;
|
||||||
|
struct dentry *opaquedir = NULL;
|
||||||
|
const struct cred *old_cred = NULL;
|
||||||
|
struct cred *override_cred = NULL;
|
||||||
|
|
||||||
|
err = -EINVAL;
|
||||||
|
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
flags &= ~RENAME_NOREPLACE;
|
||||||
|
|
||||||
|
err = ovl_check_sticky(old);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Don't copy up directory trees */
|
||||||
|
old_type = ovl_path_type(old);
|
||||||
|
err = -EXDEV;
|
||||||
|
if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (new->d_inode) {
|
||||||
|
err = ovl_check_sticky(new);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (S_ISDIR(new->d_inode->i_mode))
|
||||||
|
new_is_dir = true;
|
||||||
|
|
||||||
|
new_type = ovl_path_type(new);
|
||||||
|
err = -EXDEV;
|
||||||
|
if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = 0;
|
||||||
|
if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
|
||||||
|
if (ovl_dentry_lower(old)->d_inode ==
|
||||||
|
ovl_dentry_lower(new)->d_inode)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
|
||||||
|
if (ovl_dentry_upper(old)->d_inode ==
|
||||||
|
ovl_dentry_upper(new)->d_inode)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ovl_dentry_is_opaque(new))
|
||||||
|
new_type = __OVL_PATH_UPPER;
|
||||||
|
else
|
||||||
|
new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ovl_want_write(old);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = ovl_copy_up(old);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
err = ovl_copy_up(new->d_parent);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
if (!overwrite) {
|
||||||
|
err = ovl_copy_up(new);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
}
|
||||||
|
|
||||||
|
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
|
||||||
|
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
|
||||||
|
|
||||||
|
if (old_opaque || new_opaque) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
override_cred = prepare_creds();
|
||||||
|
if (!override_cred)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||||
|
* CAP_DAC_OVERRIDE for create in workdir
|
||||||
|
* CAP_FOWNER for removing whiteout from sticky dir
|
||||||
|
* CAP_FSETID for chmod of opaque dir
|
||||||
|
* CAP_CHOWN for chown of opaque dir
|
||||||
|
*/
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||||
|
old_cred = override_creds(override_cred);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
|
||||||
|
opaquedir = ovl_check_empty_and_clear(new);
|
||||||
|
err = PTR_ERR(opaquedir);
|
||||||
|
if (IS_ERR(opaquedir)) {
|
||||||
|
opaquedir = NULL;
|
||||||
|
goto out_revert_creds;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overwrite) {
|
||||||
|
if (old_opaque) {
|
||||||
|
if (new->d_inode || !new_opaque) {
|
||||||
|
/* Whiteout source */
|
||||||
|
flags |= RENAME_WHITEOUT;
|
||||||
|
} else {
|
||||||
|
/* Switch whiteouts */
|
||||||
|
flags |= RENAME_EXCHANGE;
|
||||||
|
}
|
||||||
|
} else if (is_dir && !new->d_inode && new_opaque) {
|
||||||
|
flags |= RENAME_EXCHANGE;
|
||||||
|
cleanup_whiteout = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
old_upperdir = ovl_dentry_upper(old->d_parent);
|
||||||
|
new_upperdir = ovl_dentry_upper(new->d_parent);
|
||||||
|
|
||||||
|
trap = lock_rename(new_upperdir, old_upperdir);
|
||||||
|
|
||||||
|
|
||||||
|
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
|
||||||
|
old->d_name.len);
|
||||||
|
err = PTR_ERR(olddentry);
|
||||||
|
if (IS_ERR(olddentry))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
err = -ESTALE;
|
||||||
|
if (olddentry != ovl_dentry_upper(old))
|
||||||
|
goto out_dput_old;
|
||||||
|
|
||||||
|
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
|
||||||
|
new->d_name.len);
|
||||||
|
err = PTR_ERR(newdentry);
|
||||||
|
if (IS_ERR(newdentry))
|
||||||
|
goto out_dput_old;
|
||||||
|
|
||||||
|
err = -ESTALE;
|
||||||
|
if (ovl_dentry_upper(new)) {
|
||||||
|
if (opaquedir) {
|
||||||
|
if (newdentry != opaquedir)
|
||||||
|
goto out_dput;
|
||||||
|
} else {
|
||||||
|
if (newdentry != ovl_dentry_upper(new))
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
new_create = true;
|
||||||
|
if (!d_is_negative(newdentry) &&
|
||||||
|
(!new_opaque || !ovl_is_whiteout(newdentry)))
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (olddentry == trap)
|
||||||
|
goto out_dput;
|
||||||
|
if (newdentry == trap)
|
||||||
|
goto out_dput;
|
||||||
|
|
||||||
|
if (is_dir && !old_opaque && new_opaque) {
|
||||||
|
err = ovl_set_opaque(olddentry);
|
||||||
|
if (err)
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
|
||||||
|
err = ovl_set_opaque(newdentry);
|
||||||
|
if (err)
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (old_opaque || new_opaque) {
|
||||||
|
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||||
|
new_upperdir->d_inode, newdentry,
|
||||||
|
flags);
|
||||||
|
} else {
|
||||||
|
/* No debug for the plain case */
|
||||||
|
BUG_ON(flags & ~RENAME_EXCHANGE);
|
||||||
|
err = vfs_rename(old_upperdir->d_inode, olddentry,
|
||||||
|
new_upperdir->d_inode, newdentry,
|
||||||
|
NULL, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
if (is_dir && !old_opaque && new_opaque)
|
||||||
|
ovl_remove_opaque(olddentry);
|
||||||
|
if (!overwrite && new_is_dir && old_opaque && !new_opaque)
|
||||||
|
ovl_remove_opaque(newdentry);
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_dir && old_opaque && !new_opaque)
|
||||||
|
ovl_remove_opaque(olddentry);
|
||||||
|
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
|
||||||
|
ovl_remove_opaque(newdentry);
|
||||||
|
|
||||||
|
if (old_opaque != new_opaque) {
|
||||||
|
ovl_dentry_set_opaque(old, new_opaque);
|
||||||
|
if (!overwrite)
|
||||||
|
ovl_dentry_set_opaque(new, old_opaque);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cleanup_whiteout)
|
||||||
|
ovl_cleanup(old_upperdir->d_inode, newdentry);
|
||||||
|
|
||||||
|
ovl_dentry_version_inc(old->d_parent);
|
||||||
|
ovl_dentry_version_inc(new->d_parent);
|
||||||
|
|
||||||
|
out_dput:
|
||||||
|
dput(newdentry);
|
||||||
|
out_dput_old:
|
||||||
|
dput(olddentry);
|
||||||
|
out_unlock:
|
||||||
|
unlock_rename(new_upperdir, old_upperdir);
|
||||||
|
out_revert_creds:
|
||||||
|
if (old_opaque || new_opaque) {
|
||||||
|
revert_creds(old_cred);
|
||||||
|
put_cred(override_cred);
|
||||||
|
}
|
||||||
|
out_drop_write:
|
||||||
|
ovl_drop_write(old);
|
||||||
|
out:
|
||||||
|
dput(opaquedir);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_rename(struct inode *olddir, struct dentry *old,
|
||||||
|
struct inode *newdir, struct dentry *new)
|
||||||
|
{
|
||||||
|
return ovl_rename2(olddir, old, newdir, new, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct inode_operations_wrapper ovl_dir_inode_operations = {
|
||||||
|
.ops = {
|
||||||
|
.lookup = ovl_lookup,
|
||||||
|
.mkdir = ovl_mkdir,
|
||||||
|
.symlink = ovl_symlink,
|
||||||
|
.unlink = ovl_unlink,
|
||||||
|
.rmdir = ovl_rmdir,
|
||||||
|
.rename = ovl_rename,
|
||||||
|
.link = ovl_link,
|
||||||
|
.setattr = ovl_setattr,
|
||||||
|
.create = ovl_create,
|
||||||
|
.mknod = ovl_mknod,
|
||||||
|
.permission = ovl_permission,
|
||||||
|
.getattr = ovl_dir_getattr,
|
||||||
|
.setxattr = ovl_setxattr,
|
||||||
|
.getxattr = ovl_getxattr,
|
||||||
|
.listxattr = ovl_listxattr,
|
||||||
|
.removexattr = ovl_removexattr,
|
||||||
|
},
|
||||||
|
.rename2 = ovl_rename2,
|
||||||
|
};
|
||||||
442
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/inode.c
Normal file
442
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/inode.c
Normal file
@@ -0,0 +1,442 @@
|
|||||||
|
/*
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Novell Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 as published by
|
||||||
|
* the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/xattr.h>
|
||||||
|
#include "overlayfs.h"
|
||||||
|
|
||||||
|
static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
|
||||||
|
bool no_data)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct dentry *parent;
|
||||||
|
struct kstat stat;
|
||||||
|
struct path lowerpath;
|
||||||
|
|
||||||
|
parent = dget_parent(dentry);
|
||||||
|
err = ovl_copy_up(parent);
|
||||||
|
if (err)
|
||||||
|
goto out_dput_parent;
|
||||||
|
|
||||||
|
ovl_path_lower(dentry, &lowerpath);
|
||||||
|
err = vfs_getattr(&lowerpath, &stat);
|
||||||
|
if (err)
|
||||||
|
goto out_dput_parent;
|
||||||
|
|
||||||
|
if (no_data)
|
||||||
|
stat.size = 0;
|
||||||
|
|
||||||
|
err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
|
||||||
|
|
||||||
|
out_dput_parent:
|
||||||
|
dput(parent);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct dentry *upperdentry;
|
||||||
|
|
||||||
|
err = ovl_want_write(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = ovl_copy_up(dentry);
|
||||||
|
if (!err) {
|
||||||
|
upperdentry = ovl_dentry_upper(dentry);
|
||||||
|
|
||||||
|
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||||
|
err = notify_change(upperdentry, attr, NULL);
|
||||||
|
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||||
|
}
|
||||||
|
ovl_drop_write(dentry);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||||
|
struct kstat *stat)
|
||||||
|
{
|
||||||
|
struct path realpath;
|
||||||
|
|
||||||
|
ovl_path_real(dentry, &realpath);
|
||||||
|
return vfs_getattr(&realpath, stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_permission(struct inode *inode, int mask)
|
||||||
|
{
|
||||||
|
struct ovl_entry *oe;
|
||||||
|
struct dentry *alias = NULL;
|
||||||
|
struct inode *realinode;
|
||||||
|
struct dentry *realdentry;
|
||||||
|
bool is_upper;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (S_ISDIR(inode->i_mode)) {
|
||||||
|
oe = inode->i_private;
|
||||||
|
} else if (mask & MAY_NOT_BLOCK) {
|
||||||
|
return -ECHILD;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* For non-directories find an alias and get the info
|
||||||
|
* from there.
|
||||||
|
*/
|
||||||
|
alias = d_find_any_alias(inode);
|
||||||
|
if (WARN_ON(!alias))
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
oe = alias->d_fsdata;
|
||||||
|
}
|
||||||
|
|
||||||
|
realdentry = ovl_entry_real(oe, &is_upper);
|
||||||
|
|
||||||
|
/* Careful in RCU walk mode */
|
||||||
|
realinode = ACCESS_ONCE(realdentry->d_inode);
|
||||||
|
if (!realinode) {
|
||||||
|
WARN_ON(!(mask & MAY_NOT_BLOCK));
|
||||||
|
err = -ENOENT;
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mask & MAY_WRITE) {
|
||||||
|
umode_t mode = realinode->i_mode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Writes will always be redirected to upper layer, so
|
||||||
|
* ignore lower layer being read-only.
|
||||||
|
*
|
||||||
|
* If the overlay itself is read-only then proceed
|
||||||
|
* with the permission check, don't return EROFS.
|
||||||
|
* This will only happen if this is the lower layer of
|
||||||
|
* another overlayfs.
|
||||||
|
*
|
||||||
|
* If upper fs becomes read-only after the overlay was
|
||||||
|
* constructed return EROFS to prevent modification of
|
||||||
|
* upper layer.
|
||||||
|
*/
|
||||||
|
err = -EROFS;
|
||||||
|
if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
|
||||||
|
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
||||||
|
goto out_dput;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = __inode_permission(realinode, mask);
|
||||||
|
out_dput:
|
||||||
|
dput(alias);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct ovl_link_data {
|
||||||
|
struct dentry *realdentry;
|
||||||
|
void *cookie;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||||
|
{
|
||||||
|
void *ret;
|
||||||
|
struct dentry *realdentry;
|
||||||
|
struct inode *realinode;
|
||||||
|
struct ovl_link_data *data = NULL;
|
||||||
|
|
||||||
|
realdentry = ovl_dentry_real(dentry);
|
||||||
|
realinode = realdentry->d_inode;
|
||||||
|
|
||||||
|
if (WARN_ON(!realinode->i_op->follow_link))
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
|
||||||
|
if (realinode->i_op->put_link) {
|
||||||
|
data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
|
||||||
|
if (!data)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
data->realdentry = realdentry;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = realinode->i_op->follow_link(realdentry, nd);
|
||||||
|
if (IS_ERR(ret)) {
|
||||||
|
kfree(data);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data)
|
||||||
|
data->cookie = ret;
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
|
||||||
|
{
|
||||||
|
struct inode *realinode;
|
||||||
|
struct ovl_link_data *data = c;
|
||||||
|
|
||||||
|
if (!data)
|
||||||
|
return;
|
||||||
|
|
||||||
|
realinode = data->realdentry->d_inode;
|
||||||
|
realinode->i_op->put_link(data->realdentry, nd, data->cookie);
|
||||||
|
kfree(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
|
||||||
|
{
|
||||||
|
struct path realpath;
|
||||||
|
struct inode *realinode;
|
||||||
|
|
||||||
|
ovl_path_real(dentry, &realpath);
|
||||||
|
realinode = realpath.dentry->d_inode;
|
||||||
|
|
||||||
|
if (!realinode->i_op->readlink)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
touch_atime(&realpath);
|
||||||
|
|
||||||
|
return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool ovl_is_private_xattr(const char *name)
|
||||||
|
{
|
||||||
|
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||||
|
const void *value, size_t size, int flags)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct dentry *upperdentry;
|
||||||
|
|
||||||
|
err = ovl_want_write(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = -EPERM;
|
||||||
|
if (ovl_is_private_xattr(name))
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
err = ovl_copy_up(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
upperdentry = ovl_dentry_upper(dentry);
|
||||||
|
err = vfs_setxattr(upperdentry, name, value, size, flags);
|
||||||
|
|
||||||
|
out_drop_write:
|
||||||
|
ovl_drop_write(dentry);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ovl_need_xattr_filter(struct dentry *dentry,
|
||||||
|
enum ovl_path_type type)
|
||||||
|
{
|
||||||
|
if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
|
||||||
|
return S_ISDIR(dentry->d_inode->i_mode);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||||
|
void *value, size_t size)
|
||||||
|
{
|
||||||
|
struct path realpath;
|
||||||
|
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||||
|
|
||||||
|
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||||
|
return -ENODATA;
|
||||||
|
|
||||||
|
return vfs_getxattr(realpath.dentry, name, value, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
|
||||||
|
{
|
||||||
|
struct path realpath;
|
||||||
|
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||||
|
ssize_t res;
|
||||||
|
int off;
|
||||||
|
|
||||||
|
res = vfs_listxattr(realpath.dentry, list, size);
|
||||||
|
if (res <= 0 || size == 0)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
if (!ovl_need_xattr_filter(dentry, type))
|
||||||
|
return res;
|
||||||
|
|
||||||
|
/* filter out private xattrs */
|
||||||
|
for (off = 0; off < res;) {
|
||||||
|
char *s = list + off;
|
||||||
|
size_t slen = strlen(s) + 1;
|
||||||
|
|
||||||
|
BUG_ON(off + slen > res);
|
||||||
|
|
||||||
|
if (ovl_is_private_xattr(s)) {
|
||||||
|
res -= slen;
|
||||||
|
memmove(s, s + slen, res - off);
|
||||||
|
} else {
|
||||||
|
off += slen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ovl_removexattr(struct dentry *dentry, const char *name)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct path realpath;
|
||||||
|
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||||
|
|
||||||
|
err = ovl_want_write(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = -ENODATA;
|
||||||
|
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
if (!OVL_TYPE_UPPER(type)) {
|
||||||
|
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
|
||||||
|
if (err < 0)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
err = ovl_copy_up(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
ovl_path_upper(dentry, &realpath);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = vfs_removexattr(realpath.dentry, name);
|
||||||
|
out_drop_write:
|
||||||
|
ovl_drop_write(dentry);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
|
||||||
|
struct dentry *realdentry)
|
||||||
|
{
|
||||||
|
if (OVL_TYPE_UPPER(type))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (special_file(realdentry->d_inode->i_mode))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_dentry_open(struct dentry *dentry, struct file *file,
|
||||||
|
const struct cred *cred)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct path realpath;
|
||||||
|
enum ovl_path_type type;
|
||||||
|
bool want_write = false;
|
||||||
|
|
||||||
|
type = ovl_path_real(dentry, &realpath);
|
||||||
|
if (!ovl_is_nocopyupw(dentry)) {
|
||||||
|
if (ovl_open_need_copy_up(file->f_flags, type,
|
||||||
|
realpath.dentry)) {
|
||||||
|
want_write = true;
|
||||||
|
err = ovl_want_write(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (file->f_flags & O_TRUNC)
|
||||||
|
err = ovl_copy_up_last(dentry, NULL, true);
|
||||||
|
else
|
||||||
|
err = ovl_copy_up(dentry);
|
||||||
|
if (err)
|
||||||
|
goto out_drop_write;
|
||||||
|
|
||||||
|
ovl_path_upper(dentry, &realpath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = vfs_open(&realpath, file, cred);
|
||||||
|
out_drop_write:
|
||||||
|
if (want_write)
|
||||||
|
ovl_drop_write(dentry);
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct inode_operations_wrapper ovl_file_inode_operations = {
|
||||||
|
.ops = {
|
||||||
|
.setattr = ovl_setattr,
|
||||||
|
.permission = ovl_permission,
|
||||||
|
.getattr = ovl_getattr,
|
||||||
|
.setxattr = ovl_setxattr,
|
||||||
|
.getxattr = ovl_getxattr,
|
||||||
|
.listxattr = ovl_listxattr,
|
||||||
|
.removexattr = ovl_removexattr,
|
||||||
|
},
|
||||||
|
.dentry_open = ovl_dentry_open,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||||
|
.setattr = ovl_setattr,
|
||||||
|
.follow_link = ovl_follow_link,
|
||||||
|
.put_link = ovl_put_link,
|
||||||
|
.readlink = ovl_readlink,
|
||||||
|
.getattr = ovl_getattr,
|
||||||
|
.setxattr = ovl_setxattr,
|
||||||
|
.getxattr = ovl_getxattr,
|
||||||
|
.listxattr = ovl_listxattr,
|
||||||
|
.removexattr = ovl_removexattr,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||||
|
struct ovl_entry *oe)
|
||||||
|
{
|
||||||
|
struct inode *inode;
|
||||||
|
|
||||||
|
inode = new_inode(sb);
|
||||||
|
if (!inode)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
mode &= S_IFMT;
|
||||||
|
|
||||||
|
inode->i_ino = get_next_ino();
|
||||||
|
inode->i_mode = mode;
|
||||||
|
inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case S_IFDIR:
|
||||||
|
inode->i_private = oe;
|
||||||
|
inode->i_op = &ovl_dir_inode_operations.ops;
|
||||||
|
inode->i_fop = &ovl_dir_operations;
|
||||||
|
inode->i_flags |= S_IOPS_WRAPPER;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case S_IFLNK:
|
||||||
|
inode->i_op = &ovl_symlink_inode_operations;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case S_IFREG:
|
||||||
|
case S_IFSOCK:
|
||||||
|
case S_IFBLK:
|
||||||
|
case S_IFCHR:
|
||||||
|
case S_IFIFO:
|
||||||
|
inode->i_op = &ovl_file_inode_operations.ops;
|
||||||
|
inode->i_flags |= S_IOPS_WRAPPER;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
WARN(1, "illegal file type: %i\n", mode);
|
||||||
|
iput(inode);
|
||||||
|
inode = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return inode;
|
||||||
|
}
|
||||||
@@ -0,0 +1,200 @@
|
|||||||
|
/*
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Novell Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 as published by
|
||||||
|
* the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
struct ovl_entry;
|
||||||
|
|
||||||
|
enum ovl_path_type {
|
||||||
|
__OVL_PATH_PURE = (1 << 0),
|
||||||
|
__OVL_PATH_UPPER = (1 << 1),
|
||||||
|
__OVL_PATH_MERGE = (1 << 2),
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
|
||||||
|
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
|
||||||
|
#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE)
|
||||||
|
#define OVL_TYPE_MERGE_OR_LOWER(type) \
|
||||||
|
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
|
||||||
|
|
||||||
|
#define OVL_XATTR_PRE_NAME "trusted.overlay."
|
||||||
|
#define OVL_XATTR_PRE_LEN 16
|
||||||
|
#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque"
|
||||||
|
|
||||||
|
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int err = vfs_rmdir(dir, dentry);
|
||||||
|
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int err = vfs_unlink(dir, dentry, NULL);
|
||||||
|
pr_debug("unlink(%pd2) = %i\n", dentry, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
|
||||||
|
struct dentry *new_dentry, bool debug)
|
||||||
|
{
|
||||||
|
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
|
||||||
|
if (debug) {
|
||||||
|
pr_debug("link(%pd2, %pd2) = %i\n",
|
||||||
|
old_dentry, new_dentry, err);
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
|
||||||
|
umode_t mode, bool debug)
|
||||||
|
{
|
||||||
|
int err = vfs_create(dir, dentry, mode, true);
|
||||||
|
if (debug)
|
||||||
|
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
|
||||||
|
umode_t mode, bool debug)
|
||||||
|
{
|
||||||
|
int err = vfs_mkdir(dir, dentry, mode);
|
||||||
|
if (debug)
|
||||||
|
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
|
||||||
|
umode_t mode, dev_t dev, bool debug)
|
||||||
|
{
|
||||||
|
int err = vfs_mknod(dir, dentry, mode, dev);
|
||||||
|
if (debug) {
|
||||||
|
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
|
||||||
|
dentry, mode, dev, err);
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
|
||||||
|
const char *oldname, bool debug)
|
||||||
|
{
|
||||||
|
int err = vfs_symlink(dir, dentry, oldname);
|
||||||
|
if (debug)
|
||||||
|
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
|
||||||
|
const void *value, size_t size, int flags)
|
||||||
|
{
|
||||||
|
int err = vfs_setxattr(dentry, name, value, size, flags);
|
||||||
|
pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
|
||||||
|
dentry, name, (int) size, (char *) value, flags, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
|
||||||
|
{
|
||||||
|
int err = vfs_removexattr(dentry, name);
|
||||||
|
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
|
||||||
|
struct inode *newdir, struct dentry *newdentry,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
|
||||||
|
olddentry, newdentry, flags);
|
||||||
|
|
||||||
|
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
|
||||||
|
olddentry, newdentry, err);
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int err = vfs_whiteout(dir, dentry);
|
||||||
|
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ovl_is_nocopyupw(struct dentry *dentry);
|
||||||
|
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||||
|
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||||
|
void ovl_dentry_version_inc(struct dentry *dentry);
|
||||||
|
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||||
|
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||||
|
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||||
|
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
|
||||||
|
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||||
|
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||||
|
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||||
|
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
|
||||||
|
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
|
||||||
|
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
|
||||||
|
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||||
|
int ovl_want_write(struct dentry *dentry);
|
||||||
|
void ovl_drop_write(struct dentry *dentry);
|
||||||
|
bool ovl_dentry_is_opaque(struct dentry *dentry);
|
||||||
|
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
|
||||||
|
bool ovl_is_whiteout(struct dentry *dentry);
|
||||||
|
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
|
||||||
|
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||||
|
unsigned int flags);
|
||||||
|
struct file *ovl_path_open(struct path *path, int flags);
|
||||||
|
|
||||||
|
struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
|
||||||
|
struct kstat *stat, const char *link);
|
||||||
|
|
||||||
|
/* readdir.c */
|
||||||
|
extern const struct file_operations ovl_dir_operations;
|
||||||
|
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
|
||||||
|
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
|
||||||
|
void ovl_cache_free(struct list_head *list);
|
||||||
|
|
||||||
|
/* inode.c */
|
||||||
|
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
|
||||||
|
int ovl_permission(struct inode *inode, int mask);
|
||||||
|
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||||
|
const void *value, size_t size, int flags);
|
||||||
|
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||||
|
void *value, size_t size);
|
||||||
|
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||||
|
int ovl_removexattr(struct dentry *dentry, const char *name);
|
||||||
|
|
||||||
|
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||||
|
struct ovl_entry *oe);
|
||||||
|
static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||||
|
{
|
||||||
|
to->i_uid = from->i_uid;
|
||||||
|
to->i_gid = from->i_gid;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* dir.c */
|
||||||
|
extern const struct inode_operations_wrapper ovl_dir_inode_operations;
|
||||||
|
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
|
||||||
|
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||||
|
struct kstat *stat, const char *link,
|
||||||
|
struct dentry *hardlink, bool debug);
|
||||||
|
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||||
|
|
||||||
|
/* copy_up.c */
|
||||||
|
int ovl_copy_up(struct dentry *dentry);
|
||||||
|
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||||
|
struct path *lowerpath, struct kstat *stat,
|
||||||
|
struct iattr *attr);
|
||||||
|
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
|
||||||
|
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||||
588
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c
Normal file
588
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c
Normal file
@@ -0,0 +1,588 @@
|
|||||||
|
/*
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Novell Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 as published by
|
||||||
|
* the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/namei.h>
|
||||||
|
#include <linux/file.h>
|
||||||
|
#include <linux/xattr.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
#include <linux/security.h>
|
||||||
|
#include <linux/cred.h>
|
||||||
|
#include "overlayfs.h"
|
||||||
|
|
||||||
|
struct ovl_cache_entry {
|
||||||
|
unsigned int len;
|
||||||
|
unsigned int type;
|
||||||
|
u64 ino;
|
||||||
|
struct list_head l_node;
|
||||||
|
struct rb_node node;
|
||||||
|
struct ovl_cache_entry *next_maybe_whiteout;
|
||||||
|
bool is_whiteout;
|
||||||
|
char name[];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ovl_dir_cache {
|
||||||
|
long refcount;
|
||||||
|
u64 version;
|
||||||
|
struct list_head entries;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct dir_context {
|
||||||
|
const filldir_t actor;
|
||||||
|
//loff_t pos;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ovl_readdir_data {
|
||||||
|
struct dir_context ctx;
|
||||||
|
bool is_merge;
|
||||||
|
struct rb_root root;
|
||||||
|
struct list_head *list;
|
||||||
|
struct list_head middle;
|
||||||
|
struct ovl_cache_entry *first_maybe_whiteout;
|
||||||
|
int count;
|
||||||
|
int err;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ovl_dir_file {
|
||||||
|
bool is_real;
|
||||||
|
bool is_upper;
|
||||||
|
struct ovl_dir_cache *cache;
|
||||||
|
struct list_head *cursor;
|
||||||
|
struct file *realfile;
|
||||||
|
struct file *upperfile;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
||||||
|
{
|
||||||
|
return container_of(n, struct ovl_cache_entry, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
||||||
|
const char *name, int len)
|
||||||
|
{
|
||||||
|
struct rb_node *node = root->rb_node;
|
||||||
|
int cmp;
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
||||||
|
|
||||||
|
cmp = strncmp(name, p->name, len);
|
||||||
|
if (cmp > 0)
|
||||||
|
node = p->node.rb_right;
|
||||||
|
else if (cmp < 0 || len < p->len)
|
||||||
|
node = p->node.rb_left;
|
||||||
|
else
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
|
||||||
|
const char *name, int len,
|
||||||
|
u64 ino, unsigned int d_type)
|
||||||
|
{
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
||||||
|
|
||||||
|
p = kmalloc(size, GFP_KERNEL);
|
||||||
|
if (!p)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
memcpy(p->name, name, len);
|
||||||
|
p->name[len] = '\0';
|
||||||
|
p->len = len;
|
||||||
|
p->type = d_type;
|
||||||
|
p->ino = ino;
|
||||||
|
p->is_whiteout = false;
|
||||||
|
|
||||||
|
if (d_type == DT_CHR) {
|
||||||
|
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
|
||||||
|
rdd->first_maybe_whiteout = p;
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
||||||
|
const char *name, int len, u64 ino,
|
||||||
|
unsigned int d_type)
|
||||||
|
{
|
||||||
|
struct rb_node **newp = &rdd->root.rb_node;
|
||||||
|
struct rb_node *parent = NULL;
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
|
||||||
|
while (*newp) {
|
||||||
|
int cmp;
|
||||||
|
struct ovl_cache_entry *tmp;
|
||||||
|
|
||||||
|
parent = *newp;
|
||||||
|
tmp = ovl_cache_entry_from_node(*newp);
|
||||||
|
cmp = strncmp(name, tmp->name, len);
|
||||||
|
if (cmp > 0)
|
||||||
|
newp = &tmp->node.rb_right;
|
||||||
|
else if (cmp < 0 || len < tmp->len)
|
||||||
|
newp = &tmp->node.rb_left;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
|
||||||
|
if (p == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
list_add_tail(&p->l_node, rdd->list);
|
||||||
|
rb_link_node(&p->node, parent, newp);
|
||||||
|
rb_insert_color(&p->node, &rdd->root);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_fill_lower(struct ovl_readdir_data *rdd,
|
||||||
|
const char *name, int namelen,
|
||||||
|
loff_t offset, u64 ino, unsigned int d_type)
|
||||||
|
{
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
|
||||||
|
p = ovl_cache_entry_find(&rdd->root, name, namelen);
|
||||||
|
if (p) {
|
||||||
|
list_move_tail(&p->l_node, &rdd->middle);
|
||||||
|
} else {
|
||||||
|
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
|
||||||
|
if (p == NULL)
|
||||||
|
rdd->err = -ENOMEM;
|
||||||
|
else
|
||||||
|
list_add_tail(&p->l_node, &rdd->middle);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rdd->err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ovl_cache_free(struct list_head *list)
|
||||||
|
{
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
struct ovl_cache_entry *n;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(p, n, list, l_node)
|
||||||
|
kfree(p);
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
struct ovl_dir_cache *cache = od->cache;
|
||||||
|
|
||||||
|
WARN_ON(cache->refcount <= 0);
|
||||||
|
cache->refcount--;
|
||||||
|
if (!cache->refcount) {
|
||||||
|
if (ovl_dir_cache(dentry) == cache)
|
||||||
|
ovl_set_dir_cache(dentry, NULL);
|
||||||
|
|
||||||
|
ovl_cache_free(&cache->entries);
|
||||||
|
kfree(cache);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_fill_merge(void *buf, const char *name, int namelen,
|
||||||
|
loff_t offset, u64 ino, unsigned int d_type)
|
||||||
|
{
|
||||||
|
struct dir_context *ctx = buf;
|
||||||
|
struct ovl_readdir_data *rdd =
|
||||||
|
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||||
|
|
||||||
|
rdd->count++;
|
||||||
|
if (!rdd->is_merge)
|
||||||
|
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
||||||
|
else
|
||||||
|
return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
struct dentry *dentry;
|
||||||
|
const struct cred *old_cred;
|
||||||
|
struct cred *override_cred;
|
||||||
|
|
||||||
|
override_cred = prepare_creds();
|
||||||
|
if (!override_cred)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CAP_DAC_OVERRIDE for lookup
|
||||||
|
*/
|
||||||
|
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||||
|
old_cred = override_creds(override_cred);
|
||||||
|
|
||||||
|
err = mutex_lock_killable(&dir->d_inode->i_mutex);
|
||||||
|
if (!err) {
|
||||||
|
while (rdd->first_maybe_whiteout) {
|
||||||
|
p = rdd->first_maybe_whiteout;
|
||||||
|
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
|
||||||
|
dentry = lookup_one_len(p->name, dir, p->len);
|
||||||
|
if (!IS_ERR(dentry)) {
|
||||||
|
p->is_whiteout = ovl_is_whiteout(dentry);
|
||||||
|
dput(dentry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mutex_unlock(&dir->d_inode->i_mutex);
|
||||||
|
}
|
||||||
|
revert_creds(old_cred);
|
||||||
|
put_cred(override_cred);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ovl_dir_read(struct path *realpath,
|
||||||
|
struct ovl_readdir_data *rdd)
|
||||||
|
{
|
||||||
|
struct file *realfile;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
||||||
|
if (IS_ERR(realfile))
|
||||||
|
return PTR_ERR(realfile);
|
||||||
|
|
||||||
|
rdd->first_maybe_whiteout = NULL;
|
||||||
|
//rdd->ctx.pos = 0;
|
||||||
|
do {
|
||||||
|
rdd->count = 0;
|
||||||
|
rdd->err = 0;
|
||||||
|
err = vfs_readdir(realfile, rdd->ctx.actor, rdd);
|
||||||
|
if (err >= 0)
|
||||||
|
err = rdd->err;
|
||||||
|
} while (!err && rdd->count);
|
||||||
|
|
||||||
|
if (!err && rdd->first_maybe_whiteout)
|
||||||
|
err = ovl_check_whiteouts(realpath->dentry, rdd);
|
||||||
|
|
||||||
|
fput(realfile);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ovl_dir_reset(struct file *file)
|
||||||
|
{
|
||||||
|
struct ovl_dir_file *od = file->private_data;
|
||||||
|
struct ovl_dir_cache *cache = od->cache;
|
||||||
|
struct dentry *dentry = file->f_path.dentry;
|
||||||
|
enum ovl_path_type type = ovl_path_type(dentry);
|
||||||
|
|
||||||
|
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
||||||
|
ovl_cache_put(od, dentry);
|
||||||
|
od->cache = NULL;
|
||||||
|
od->cursor = NULL;
|
||||||
|
}
|
||||||
|
WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
|
||||||
|
if (od->is_real && OVL_TYPE_MERGE(type))
|
||||||
|
od->is_real = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct path realpath;
|
||||||
|
struct ovl_readdir_data rdd = {
|
||||||
|
.ctx.actor = ovl_fill_merge,
|
||||||
|
.list = list,
|
||||||
|
.root = RB_ROOT,
|
||||||
|
.is_merge = false,
|
||||||
|
};
|
||||||
|
int idx, next;
|
||||||
|
|
||||||
|
for (idx = 0; idx != -1; idx = next) {
|
||||||
|
next = ovl_path_next(idx, dentry, &realpath);
|
||||||
|
|
||||||
|
if (next != -1) {
|
||||||
|
err = ovl_dir_read(&realpath, &rdd);
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Insert lowest layer entries before upper ones, this
|
||||||
|
* allows offsets to be reasonably constant
|
||||||
|
*/
|
||||||
|
list_add(&rdd.middle, rdd.list);
|
||||||
|
rdd.is_merge = true;
|
||||||
|
err = ovl_dir_read(&realpath, &rdd);
|
||||||
|
list_del(&rdd.middle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
||||||
|
{
|
||||||
|
struct list_head *p;
|
||||||
|
loff_t off = 0;
|
||||||
|
|
||||||
|
list_for_each(p, &od->cache->entries) {
|
||||||
|
if (off >= pos)
|
||||||
|
break;
|
||||||
|
off++;
|
||||||
|
}
|
||||||
|
/* Cursor is safe since the cache is stable */
|
||||||
|
od->cursor = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
struct ovl_dir_cache *cache;
|
||||||
|
|
||||||
|
cache = ovl_dir_cache(dentry);
|
||||||
|
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
||||||
|
cache->refcount++;
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
ovl_set_dir_cache(dentry, NULL);
|
||||||
|
|
||||||
|
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
||||||
|
if (!cache)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
cache->refcount = 1;
|
||||||
|
INIT_LIST_HEAD(&cache->entries);
|
||||||
|
|
||||||
|
res = ovl_dir_read_merged(dentry, &cache->entries);
|
||||||
|
if (res) {
|
||||||
|
ovl_cache_free(&cache->entries);
|
||||||
|
kfree(cache);
|
||||||
|
return ERR_PTR(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
cache->version = ovl_dentry_version_get(dentry);
|
||||||
|
ovl_set_dir_cache(dentry, cache);
|
||||||
|
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
|
||||||
|
{
|
||||||
|
struct ovl_dir_file *od = file->private_data;
|
||||||
|
struct dentry *dentry = file->f_path.dentry;
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
if (!file->f_pos)
|
||||||
|
ovl_dir_reset(file);
|
||||||
|
|
||||||
|
if (od->is_real) {
|
||||||
|
res = vfs_readdir(od->realfile, filler, buf);
|
||||||
|
file->f_pos = od->realfile->f_pos;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!od->cache) {
|
||||||
|
struct ovl_dir_cache *cache;
|
||||||
|
|
||||||
|
cache = ovl_cache_get(dentry);
|
||||||
|
if (IS_ERR(cache))
|
||||||
|
return PTR_ERR(cache);
|
||||||
|
|
||||||
|
od->cache = cache;
|
||||||
|
ovl_seek_cursor(od, file->f_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (od->cursor != &od->cache->entries) {
|
||||||
|
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
||||||
|
if (!p->is_whiteout)
|
||||||
|
if (filler(buf, p->name, p->len, file->f_pos, p->ino, p->type))
|
||||||
|
break;
|
||||||
|
od->cursor = p->l_node.next;
|
||||||
|
file->f_pos++;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||||
|
{
|
||||||
|
loff_t res;
|
||||||
|
struct ovl_dir_file *od = file->private_data;
|
||||||
|
|
||||||
|
mutex_lock(&file_inode(file)->i_mutex);
|
||||||
|
if (!file->f_pos)
|
||||||
|
ovl_dir_reset(file);
|
||||||
|
|
||||||
|
if (od->is_real) {
|
||||||
|
res = vfs_llseek(od->realfile, offset, origin);
|
||||||
|
file->f_pos = od->realfile->f_pos;
|
||||||
|
} else {
|
||||||
|
res = -EINVAL;
|
||||||
|
|
||||||
|
switch (origin) {
|
||||||
|
case SEEK_CUR:
|
||||||
|
offset += file->f_pos;
|
||||||
|
break;
|
||||||
|
case SEEK_SET:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
if (offset < 0)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (offset != file->f_pos) {
|
||||||
|
file->f_pos = offset;
|
||||||
|
if (od->cache)
|
||||||
|
ovl_seek_cursor(od, offset);
|
||||||
|
}
|
||||||
|
res = offset;
|
||||||
|
}
|
||||||
|
out_unlock:
|
||||||
|
mutex_unlock(&file_inode(file)->i_mutex);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||||
|
int datasync)
|
||||||
|
{
|
||||||
|
struct ovl_dir_file *od = file->private_data;
|
||||||
|
struct dentry *dentry = file->f_path.dentry;
|
||||||
|
struct file *realfile = od->realfile;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need to check if we started out being a lower dir, but got copied up
|
||||||
|
*/
|
||||||
|
if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
|
||||||
|
struct inode *inode = file_inode(file);
|
||||||
|
|
||||||
|
realfile = lockless_dereference(od->upperfile);
|
||||||
|
if (!realfile) {
|
||||||
|
struct path upperpath;
|
||||||
|
|
||||||
|
ovl_path_upper(dentry, &upperpath);
|
||||||
|
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
||||||
|
smp_mb__before_spinlock();
|
||||||
|
mutex_lock(&inode->i_mutex);
|
||||||
|
if (!od->upperfile) {
|
||||||
|
if (IS_ERR(realfile)) {
|
||||||
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
return PTR_ERR(realfile);
|
||||||
|
}
|
||||||
|
od->upperfile = realfile;
|
||||||
|
} else {
|
||||||
|
/* somebody has beaten us to it */
|
||||||
|
if (!IS_ERR(realfile))
|
||||||
|
fput(realfile);
|
||||||
|
realfile = od->upperfile;
|
||||||
|
}
|
||||||
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return vfs_fsync_range(realfile, start, end, datasync);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_dir_release(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct ovl_dir_file *od = file->private_data;
|
||||||
|
|
||||||
|
if (od->cache) {
|
||||||
|
mutex_lock(&inode->i_mutex);
|
||||||
|
ovl_cache_put(od, file->f_path.dentry);
|
||||||
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
}
|
||||||
|
fput(od->realfile);
|
||||||
|
if (od->upperfile)
|
||||||
|
fput(od->upperfile);
|
||||||
|
kfree(od);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ovl_dir_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct path realpath;
|
||||||
|
struct file *realfile;
|
||||||
|
struct ovl_dir_file *od;
|
||||||
|
enum ovl_path_type type;
|
||||||
|
|
||||||
|
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
||||||
|
if (!od)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
type = ovl_path_real(file->f_path.dentry, &realpath);
|
||||||
|
realfile = ovl_path_open(&realpath, file->f_flags);
|
||||||
|
if (IS_ERR(realfile)) {
|
||||||
|
kfree(od);
|
||||||
|
return PTR_ERR(realfile);
|
||||||
|
}
|
||||||
|
od->realfile = realfile;
|
||||||
|
od->is_real = !OVL_TYPE_MERGE(type);
|
||||||
|
od->is_upper = OVL_TYPE_UPPER(type);
|
||||||
|
file->private_data = od;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct file_operations ovl_dir_operations = {
|
||||||
|
.read = generic_read_dir,
|
||||||
|
.open = ovl_dir_open,
|
||||||
|
.readdir = ovl_readdir,
|
||||||
|
.llseek = ovl_dir_llseek,
|
||||||
|
.fsync = ovl_dir_fsync,
|
||||||
|
.release = ovl_dir_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
|
||||||
|
err = ovl_dir_read_merged(dentry, list);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
err = 0;
|
||||||
|
|
||||||
|
list_for_each_entry(p, list, l_node) {
|
||||||
|
if (p->is_whiteout)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (p->name[0] == '.') {
|
||||||
|
if (p->len == 1)
|
||||||
|
continue;
|
||||||
|
if (p->len == 2 && p->name[1] == '.')
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
err = -ENOTEMPTY;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
||||||
|
{
|
||||||
|
struct ovl_cache_entry *p;
|
||||||
|
|
||||||
|
mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
|
||||||
|
list_for_each_entry(p, list, l_node) {
|
||||||
|
struct dentry *dentry;
|
||||||
|
|
||||||
|
if (!p->is_whiteout)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
dentry = lookup_one_len(p->name, upper, p->len);
|
||||||
|
if (IS_ERR(dentry)) {
|
||||||
|
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
||||||
|
upper->d_name.name, p->len, p->name,
|
||||||
|
(int) PTR_ERR(dentry));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ovl_cleanup(upper->d_inode, dentry);
|
||||||
|
dput(dentry);
|
||||||
|
}
|
||||||
|
mutex_unlock(&upper->d_inode->i_mutex);
|
||||||
|
}
|
||||||
1203
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/super.c
Normal file
1203
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/super.c
Normal file
File diff suppressed because it is too large
Load Diff
21
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile.in
Normal file
21
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile.in
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
KDIR ?= @KDIR@
|
||||||
|
ARCH ?= @ARCH@
|
||||||
|
KMODDIR = @KMODDIR@
|
||||||
|
src = @abs_srcdir@
|
||||||
|
|
||||||
|
obj-m += mcoverlay.o
|
||||||
|
|
||||||
|
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||||
|
|
||||||
|
.PHONY: clean install modules
|
||||||
|
|
||||||
|
modules:
|
||||||
|
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||||
|
|
||||||
|
install:
|
||||||
|
mkdir -p -m 755 $(KMODDIR)
|
||||||
|
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||||
|
|
||||||
@@ -167,6 +167,7 @@ enum {
|
|||||||
CURRENT_OFFSET,
|
CURRENT_OFFSET,
|
||||||
RUNQ_OFFSET,
|
RUNQ_OFFSET,
|
||||||
CPU_STATUS_OFFSET,
|
CPU_STATUS_OFFSET,
|
||||||
|
IDLE_THREAD_OFFSET,
|
||||||
|
|
||||||
/* process */
|
/* process */
|
||||||
CTX_OFFSET,
|
CTX_OFFSET,
|
||||||
@@ -204,6 +205,7 @@ static int setup_constants(void) {
|
|||||||
printf("CURRENT_OFFSET: %ld\n", K(CURRENT_OFFSET));
|
printf("CURRENT_OFFSET: %ld\n", K(CURRENT_OFFSET));
|
||||||
printf("RUNQ_OFFSET: %ld\n", K(RUNQ_OFFSET));
|
printf("RUNQ_OFFSET: %ld\n", K(RUNQ_OFFSET));
|
||||||
printf("CPU_STATUS_OFFSET: %ld\n", K(CPU_STATUS_OFFSET));
|
printf("CPU_STATUS_OFFSET: %ld\n", K(CPU_STATUS_OFFSET));
|
||||||
|
printf("IDLE_THREAD_OFFSET: %ld\n", K(IDLE_THREAD_OFFSET));
|
||||||
printf("CTX_OFFSET: %ld\n", K(CTX_OFFSET));
|
printf("CTX_OFFSET: %ld\n", K(CTX_OFFSET));
|
||||||
printf("SCHED_LIST_OFFSET: %ld\n", K(SCHED_LIST_OFFSET));
|
printf("SCHED_LIST_OFFSET: %ld\n", K(SCHED_LIST_OFFSET));
|
||||||
printf("PROC_OFFSET: %ld\n", K(PROC_OFFSET));
|
printf("PROC_OFFSET: %ld\n", K(PROC_OFFSET));
|
||||||
@@ -251,6 +253,64 @@ static int setup_threads(void) {
|
|||||||
ihk_mc_switch_context = lookup_symbol("ihk_mc_switch_context");
|
ihk_mc_switch_context = lookup_symbol("ihk_mc_switch_context");
|
||||||
if (0) printf("ihk_mc_switch_context: %lx\n", ihk_mc_switch_context);
|
if (0) printf("ihk_mc_switch_context: %lx\n", ihk_mc_switch_context);
|
||||||
|
|
||||||
|
/* Set up idle threads first */
|
||||||
|
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||||
|
uintptr_t v;
|
||||||
|
uintptr_t thread;
|
||||||
|
uintptr_t proc;
|
||||||
|
int pid;
|
||||||
|
int tid;
|
||||||
|
struct thread_info *ti;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
v = clv + (cpu * K(CPU_LOCAL_VAR_SIZE));
|
||||||
|
|
||||||
|
ti = malloc(sizeof(*ti));
|
||||||
|
if (!ti) {
|
||||||
|
perror("malloc");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
thread = v+K(IDLE_THREAD_OFFSET);
|
||||||
|
|
||||||
|
error = read_64(thread+K(PROC_OFFSET), &proc);
|
||||||
|
if (error) {
|
||||||
|
perror("proc");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = read_32(thread+K(STATUS_OFFSET), &status);
|
||||||
|
if (error) {
|
||||||
|
perror("status");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = read_32(proc+K(PID_OFFSET), &pid);
|
||||||
|
if (error) {
|
||||||
|
perror("pid");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = read_32(thread+K(TID_OFFSET), &tid);
|
||||||
|
if (error) {
|
||||||
|
perror("tid");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ti->next = NULL;
|
||||||
|
ti->status = status;
|
||||||
|
ti->pid = pid;
|
||||||
|
ti->tid = tid;
|
||||||
|
ti->cpu = cpu;
|
||||||
|
ti->lcpu = cpu;
|
||||||
|
ti->process = thread;
|
||||||
|
ti->clv = v;
|
||||||
|
ti->x86_clv = locals + locals_span*cpu;
|
||||||
|
|
||||||
|
*titailp = ti;
|
||||||
|
titailp = &ti->next;
|
||||||
|
}
|
||||||
|
|
||||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||||
uintptr_t v;
|
uintptr_t v;
|
||||||
uintptr_t head;
|
uintptr_t head;
|
||||||
|
|||||||
@@ -101,6 +101,19 @@ int __glob_argc = -1;
|
|||||||
char **__glob_argv = 0;
|
char **__glob_argv = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_MCOVERLAYFS
|
||||||
|
#undef ENABLE_MCOVERLAYFS
|
||||||
|
#ifndef RHEL_RELEASE_CODE
|
||||||
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) && LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
|
||||||
|
#define ENABLE_MCOVERLAYFS 1
|
||||||
|
#endif // LINUX_VERSION_CODE == 4.0
|
||||||
|
#else
|
||||||
|
#if RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(7,2)
|
||||||
|
#define ENABLE_MCOVERLAYFS 1
|
||||||
|
#endif // RHEL_RELEASE_CODE == 7.2
|
||||||
|
#endif // RHEL_RELEASE_CODE
|
||||||
|
#endif // ENABLE_MCOVERLAYFS
|
||||||
|
|
||||||
typedef unsigned char cc_t;
|
typedef unsigned char cc_t;
|
||||||
typedef unsigned int speed_t;
|
typedef unsigned int speed_t;
|
||||||
typedef unsigned int tcflag_t;
|
typedef unsigned int tcflag_t;
|
||||||
@@ -375,7 +388,7 @@ struct program_load_desc *load_interp(struct program_load_desc *desc0, FILE *fp)
|
|||||||
|
|
||||||
unsigned char *dma_buf;
|
unsigned char *dma_buf;
|
||||||
|
|
||||||
int lookup_exec_path(char *filename, char *path, int max_len)
|
int lookup_exec_path(char *filename, char *path, int max_len, int execvp)
|
||||||
{
|
{
|
||||||
int found;
|
int found;
|
||||||
int error;
|
int error;
|
||||||
@@ -393,28 +406,27 @@ retry:
|
|||||||
|
|
||||||
char *token, *string, *tofree;
|
char *token, *string, *tofree;
|
||||||
char *PATH = getenv("COKERNEL_PATH");
|
char *PATH = getenv("COKERNEL_PATH");
|
||||||
if (!PATH) {
|
|
||||||
|
if (!execvp) {
|
||||||
|
if (strlen(filename) + 1 > max_len) {
|
||||||
|
return ENAMETOOLONG;
|
||||||
|
}
|
||||||
|
strcpy(path, filename);
|
||||||
|
error = access(path, X_OK);
|
||||||
|
if (error) {
|
||||||
|
return errno;
|
||||||
|
}
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(PATH = getenv("COKERNEL_PATH"))) {
|
||||||
PATH = getenv("PATH");
|
PATH = getenv("PATH");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strlen(filename) >= 255) {
|
if (strlen(filename) >= 255) {
|
||||||
return ENAMETOOLONG;
|
return ENAMETOOLONG;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* See first whether file is available in current working dir */
|
|
||||||
error = access(filename, X_OK);
|
|
||||||
if (error == 0) {
|
|
||||||
__dprintf("lookup_exec_path(): found %s in cwd\n", filename);
|
|
||||||
error = snprintf(path, max_len, "%s", filename);
|
|
||||||
|
|
||||||
if (error < 0 || error >= max_len) {
|
|
||||||
fprintf(stderr, "lookup_exec_path(): array too small?\n");
|
|
||||||
return ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
found = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
__dprintf("PATH: %s\n", PATH);
|
__dprintf("PATH: %s\n", PATH);
|
||||||
|
|
||||||
@@ -442,6 +454,9 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
free(tofree);
|
free(tofree);
|
||||||
|
if(!found){
|
||||||
|
return ENOENT;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -654,7 +669,7 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void transfer_image(int fd, struct program_load_desc *desc)
|
int transfer_image(int fd, struct program_load_desc *desc)
|
||||||
{
|
{
|
||||||
struct remote_transfer pt;
|
struct remote_transfer pt;
|
||||||
unsigned long s, e, flen, rpa;
|
unsigned long s, e, flen, rpa;
|
||||||
@@ -668,7 +683,10 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
|||||||
+ PAGE_SIZE - 1) & PAGE_MASK;
|
+ PAGE_SIZE - 1) & PAGE_MASK;
|
||||||
rpa = desc->sections[i].remote_pa;
|
rpa = desc->sections[i].remote_pa;
|
||||||
|
|
||||||
fseek(fp, desc->sections[i].offset, SEEK_SET);
|
if (fseek(fp, desc->sections[i].offset, SEEK_SET) != 0) {
|
||||||
|
fprintf(stderr, "transfer_image(): error: seeking file position\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
flen = desc->sections[i].filesz;
|
flen = desc->sections[i].filesz;
|
||||||
|
|
||||||
__dprintf("seeked to %lx | size %ld\n",
|
__dprintf("seeked to %lx | size %ld\n",
|
||||||
@@ -690,7 +708,20 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
|||||||
if (lr > flen) {
|
if (lr > flen) {
|
||||||
lr = flen;
|
lr = flen;
|
||||||
}
|
}
|
||||||
fread(dma_buf + l, 1, lr, fp);
|
if (fread(dma_buf + l, 1, lr, fp) != lr) {
|
||||||
|
if (ferror(fp) > 0) {
|
||||||
|
fprintf(stderr, "transfer_image(): error: accessing file\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
else if (feof(fp) > 0) {
|
||||||
|
fprintf(stderr, "transfer_image(): file too short?\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* TODO: handle smaller reads.. */
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
flen -= lr;
|
flen -= lr;
|
||||||
}
|
}
|
||||||
else if (flen > 0) {
|
else if (flen > 0) {
|
||||||
@@ -699,7 +730,20 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
|||||||
} else {
|
} else {
|
||||||
lr = flen;
|
lr = flen;
|
||||||
}
|
}
|
||||||
fread(dma_buf, 1, lr, fp);
|
if (fread(dma_buf, 1, lr, fp) != lr) {
|
||||||
|
if (ferror(fp) > 0) {
|
||||||
|
fprintf(stderr, "transfer_image(): error: accessing file\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
else if (feof(fp) > 0) {
|
||||||
|
fprintf(stderr, "transfer_image(): file too short?\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* TODO: handle smaller reads.. */
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
flen -= lr;
|
flen -= lr;
|
||||||
}
|
}
|
||||||
s += PAGE_SIZE;
|
s += PAGE_SIZE;
|
||||||
@@ -715,6 +759,8 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_desc(struct program_load_desc *desc)
|
void print_desc(struct program_load_desc *desc)
|
||||||
@@ -837,7 +883,10 @@ struct thread_data_s {
|
|||||||
pthread_mutex_t *lock;
|
pthread_mutex_t *lock;
|
||||||
pthread_barrier_t *init_ready;
|
pthread_barrier_t *init_ready;
|
||||||
} *thread_data;
|
} *thread_data;
|
||||||
|
|
||||||
int ncpu;
|
int ncpu;
|
||||||
|
int n_threads;
|
||||||
|
|
||||||
pid_t master_tid;
|
pid_t master_tid;
|
||||||
|
|
||||||
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
@@ -848,7 +897,7 @@ static void *main_loop_thread_func(void *arg)
|
|||||||
struct thread_data_s *td = (struct thread_data_s *)arg;
|
struct thread_data_s *td = (struct thread_data_s *)arg;
|
||||||
|
|
||||||
td->tid = gettid();
|
td->tid = gettid();
|
||||||
td->remote_tid = (int)td->tid;
|
td->remote_tid = -1;
|
||||||
pthread_barrier_wait(&init_ready);
|
pthread_barrier_wait(&init_ready);
|
||||||
td->ret = main_loop(td->fd, td->cpu, td->lock);
|
td->ret = main_loop(td->fd, td->cpu, td->lock);
|
||||||
|
|
||||||
@@ -931,7 +980,10 @@ act_signalfd4(struct syscall_wait_desc *w)
|
|||||||
flags |= O_NONBLOCK;
|
flags |= O_NONBLOCK;
|
||||||
if(tmp & SFD_CLOEXEC)
|
if(tmp & SFD_CLOEXEC)
|
||||||
flags |= O_CLOEXEC;
|
flags |= O_CLOEXEC;
|
||||||
pipe2(sfd->sigpipe, flags);
|
if (pipe2(sfd->sigpipe, flags) < 0) {
|
||||||
|
perror("pipe2 failed:");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
sfd->next = sigfdtop;
|
sfd->next = sigfdtop;
|
||||||
sigfdtop = sfd;
|
sigfdtop = sfd;
|
||||||
rc = sfd->sigpipe[0];
|
rc = sfd->sigpipe[0];
|
||||||
@@ -962,7 +1014,11 @@ act_signalfd4(struct syscall_wait_desc *w)
|
|||||||
rc = -EBADF;
|
rc = -EBADF;
|
||||||
else{
|
else{
|
||||||
info = (struct signalfd_siginfo *)w->sr.args[2];
|
info = (struct signalfd_siginfo *)w->sr.args[2];
|
||||||
write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo));
|
if (write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo))
|
||||||
|
!= sizeof(struct signalfd_siginfo)) {
|
||||||
|
fprintf(stderr, "error: writing sigpipe\n");
|
||||||
|
rc = -EBADF;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1068,9 +1124,9 @@ void init_worker_threads(int fd)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
pthread_mutex_init(&lock, NULL);
|
pthread_mutex_init(&lock, NULL);
|
||||||
pthread_barrier_init(&init_ready, NULL, ncpu + 2);
|
pthread_barrier_init(&init_ready, NULL, n_threads + 2);
|
||||||
|
|
||||||
for (i = 0; i <= ncpu; ++i) {
|
for (i = 0; i <= n_threads; ++i) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
thread_data[i].fd = fd;
|
thread_data[i].fd = fd;
|
||||||
@@ -1091,7 +1147,6 @@ void init_worker_threads(int fd)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_MCOVERLAYFS
|
#ifdef ENABLE_MCOVERLAYFS
|
||||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) && LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
|
|
||||||
#define READ_BUFSIZE 1024
|
#define READ_BUFSIZE 1024
|
||||||
static int isunshare(void)
|
static int isunshare(void)
|
||||||
{
|
{
|
||||||
@@ -1163,7 +1218,6 @@ static int isunshare(void)
|
|||||||
__dprintf("err=%d\n", err);
|
__dprintf("err=%d\n", err);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif // ENABLE_MCOVERLAYFS
|
#endif // ENABLE_MCOVERLAYFS
|
||||||
|
|
||||||
#define MCK_RLIMIT_AS 0
|
#define MCK_RLIMIT_AS 0
|
||||||
@@ -1353,7 +1407,6 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_MCOVERLAYFS
|
#ifdef ENABLE_MCOVERLAYFS
|
||||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) && LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
|
|
||||||
__dprintf("mcoverlay enable\n");
|
__dprintf("mcoverlay enable\n");
|
||||||
char mcos_procdir[PATH_MAX];
|
char mcos_procdir[PATH_MAX];
|
||||||
char mcos_sysdir[PATH_MAX];
|
char mcos_sysdir[PATH_MAX];
|
||||||
@@ -1401,12 +1454,11 @@ int main(int argc, char **argv)
|
|||||||
} else if (error == -1) {
|
} else if (error == -1) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#else
|
#else
|
||||||
__dprintf("mcoverlay disable\n");
|
__dprintf("mcoverlay disable\n");
|
||||||
#endif // ENABLE_MCOVERLAYFS
|
#endif // ENABLE_MCOVERLAYFS
|
||||||
|
|
||||||
if (lookup_exec_path(argv[optind], path, sizeof(path)) != 0) {
|
if (lookup_exec_path(argv[optind], path, sizeof(path), 1) != 0) {
|
||||||
fprintf(stderr, "error: finding file: %s\n", argv[optind]);
|
fprintf(stderr, "error: finding file: %s\n", argv[optind]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -1418,7 +1470,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
/* Check whether shell script */
|
/* Check whether shell script */
|
||||||
if (shell) {
|
if (shell) {
|
||||||
if (lookup_exec_path(shell, shell_path, sizeof(shell_path)) != 0) {
|
if (lookup_exec_path(shell, shell_path, sizeof(shell_path), 0) != 0) {
|
||||||
fprintf(stderr, "error: finding file: %s\n", shell);
|
fprintf(stderr, "error: finding file: %s\n", shell);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -1480,6 +1532,19 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
n_threads = ncpu;
|
||||||
|
if (ncpu > 16) {
|
||||||
|
n_threads = 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX: keep thread_data ncpu sized despite that there are only
|
||||||
|
* n_threads worker threads in the pool so that signaling code
|
||||||
|
* keeps working.
|
||||||
|
*
|
||||||
|
* TODO: fix signaling code to be independent of TIDs.
|
||||||
|
* TODO: implement dynaic thread pool resizing.
|
||||||
|
*/
|
||||||
thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1));
|
thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1));
|
||||||
memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1));
|
memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1));
|
||||||
|
|
||||||
@@ -1522,7 +1587,10 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
print_desc(desc);
|
print_desc(desc);
|
||||||
transfer_image(fd, desc);
|
if (transfer_image(fd, desc) < 0) {
|
||||||
|
fprintf(stderr, "error: transferring image\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
|
|
||||||
@@ -1561,7 +1629,7 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i <= ncpu; ++i) {
|
for (i = 0; i <= n_threads; ++i) {
|
||||||
pthread_join(thread_data[i].thread_id, NULL);
|
pthread_join(thread_data[i].thread_id, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1623,16 +1691,14 @@ do_generic_syscall(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
kill_thread(unsigned long cpu)
|
kill_thread(unsigned long tid)
|
||||||
{
|
{
|
||||||
if(cpu >= 0 && cpu < ncpu){
|
int i;
|
||||||
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < ncpu; ++i) {
|
for (i = 0; i < n_threads; ++i) {
|
||||||
|
if(thread_data[i].remote_tid == tid){
|
||||||
pthread_kill(thread_data[i].thread_id, LOCALSIG);
|
pthread_kill(thread_data[i].thread_id, LOCALSIG);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1738,9 +1804,7 @@ char *
|
|||||||
chgpath(char *in, char *buf)
|
chgpath(char *in, char *buf)
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_MCOVERLAYFS
|
#ifdef ENABLE_MCOVERLAYFS
|
||||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) && LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
|
|
||||||
return in;
|
return in;
|
||||||
#endif
|
|
||||||
#endif // ENABLE_MCOVERLAYFS
|
#endif // ENABLE_MCOVERLAYFS
|
||||||
char *fn = in;
|
char *fn = in;
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
@@ -1791,6 +1855,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
|
|
||||||
//pthread_mutex_lock(lock);
|
//pthread_mutex_lock(lock);
|
||||||
|
|
||||||
|
thread_data[cpu].remote_tid = w.sr.rtid;
|
||||||
|
|
||||||
switch (w.sr.number) {
|
switch (w.sr.number) {
|
||||||
case __NR_open:
|
case __NR_open:
|
||||||
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
|
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
|
||||||
@@ -1829,13 +1895,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
sig = 0;
|
sig = 0;
|
||||||
term = 0;
|
term = 0;
|
||||||
|
|
||||||
|
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
/* Drop executable file */
|
/* Drop executable file */
|
||||||
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
|
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
|
||||||
fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n");
|
fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
|
||||||
|
|
||||||
__dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n",
|
__dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n",
|
||||||
w.sr.args[0], cpu);
|
w.sr.args[0], cpu);
|
||||||
if(w.sr.number == __NR_exit_group){
|
if(w.sr.number == __NR_exit_group){
|
||||||
@@ -1903,6 +1969,39 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
thread_data[oldcpuid].remote_tid = wtid;
|
thread_data[oldcpuid].remote_tid = wtid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of TIDs and the remote physical address where TIDs are
|
||||||
|
* expected are passed in arg 4 and 5, respectively.
|
||||||
|
*/
|
||||||
|
if (w.sr.args[4] > 0) {
|
||||||
|
struct remote_transfer trans;
|
||||||
|
int i = 0;
|
||||||
|
int *tids = malloc(sizeof(int) * w.sr.args[4]);
|
||||||
|
if (!tids) {
|
||||||
|
fprintf(stderr, "__NR_gettid(): error allocating TIDs\n");
|
||||||
|
goto gettid_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ncpu && i < w.sr.args[4]; ++i) {
|
||||||
|
tids[i] = thread_data[i].tid;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < ncpu; ++i) {
|
||||||
|
tids[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
trans.userp = (void*)tids;
|
||||||
|
trans.rphys = w.sr.args[5];
|
||||||
|
trans.size = sizeof(int) * w.sr.args[4];
|
||||||
|
trans.direction = MCEXEC_UP_TRANSFER_TO_REMOTE;
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) {
|
||||||
|
fprintf(stderr, "__NR_gettid(): error transfering TIDs\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
free(tids);
|
||||||
|
}
|
||||||
|
gettid_out:
|
||||||
do_syscall_return(fd, cpu, thread_data[newcpuid].remote_tid, 0, 0, 0, 0);
|
do_syscall_return(fd, cpu, thread_data[newcpuid].remote_tid, 0, 0, 0, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1945,7 +2044,9 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
close(pipefds[0]);
|
close(pipefds[0]);
|
||||||
pid = fork();
|
pid = fork();
|
||||||
if(pid != 0){
|
if(pid != 0){
|
||||||
write(pipefds[1], &pid, sizeof pid);
|
if (write(pipefds[1], &pid, sizeof pid) != sizeof(pid)) {
|
||||||
|
fprintf(stderr, "error: writing pipefds\n");
|
||||||
|
}
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1954,7 +2055,9 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
int st;
|
int st;
|
||||||
|
|
||||||
close(pipefds[1]);
|
close(pipefds[1]);
|
||||||
read(pipefds[0], &npid, sizeof npid);
|
if (read(pipefds[0], &npid, sizeof npid) != sizeof(npid)) {
|
||||||
|
fprintf(stderr, "error: reading pipefds\n");
|
||||||
|
}
|
||||||
close(pipefds[0]);
|
close(pipefds[0]);
|
||||||
waitpid(pid, &st, 0);
|
waitpid(pid, &st, 0);
|
||||||
pid = npid;
|
pid = npid;
|
||||||
@@ -1994,7 +2097,6 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
|
|
||||||
/* Reinit signals and syscall threads */
|
/* Reinit signals and syscall threads */
|
||||||
init_sigaction();
|
init_sigaction();
|
||||||
init_worker_threads(fd);
|
|
||||||
|
|
||||||
__dprintf("pid(%d): signals and syscall threads OK\n",
|
__dprintf("pid(%d): signals and syscall threads OK\n",
|
||||||
getpid());
|
getpid());
|
||||||
@@ -2008,6 +2110,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
|||||||
goto fork_child_sync_pipe;
|
goto fork_child_sync_pipe;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
init_worker_threads(fd);
|
||||||
|
|
||||||
fork_child_sync_pipe:
|
fork_child_sync_pipe:
|
||||||
sem_post(&fs->sem);
|
sem_post(&fs->sem);
|
||||||
if (fs->status)
|
if (fs->status)
|
||||||
@@ -2118,7 +2222,7 @@ fork_err:
|
|||||||
shell = NULL;
|
shell = NULL;
|
||||||
filename = (char *)w.sr.args[1];
|
filename = (char *)w.sr.args[1];
|
||||||
|
|
||||||
if ((ret = lookup_exec_path(filename, path, sizeof(path)))
|
if ((ret = lookup_exec_path(filename, path, sizeof(path), 0))
|
||||||
!= 0) {
|
!= 0) {
|
||||||
goto return_execve1;
|
goto return_execve1;
|
||||||
}
|
}
|
||||||
@@ -2132,7 +2236,7 @@ fork_err:
|
|||||||
/* Check whether shell script */
|
/* Check whether shell script */
|
||||||
if (shell) {
|
if (shell) {
|
||||||
if ((ret = lookup_exec_path(shell, shell_path,
|
if ((ret = lookup_exec_path(shell, shell_path,
|
||||||
sizeof(shell_path))) != 0) {
|
sizeof(shell_path), 0)) != 0) {
|
||||||
fprintf(stderr, "execve(): error: finding file: %s\n", shell);
|
fprintf(stderr, "execve(): error: finding file: %s\n", shell);
|
||||||
goto return_execve1;
|
goto return_execve1;
|
||||||
}
|
}
|
||||||
@@ -2153,6 +2257,7 @@ fork_err:
|
|||||||
strcpy(desc->shell_path, shell_path);
|
strcpy(desc->shell_path, shell_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
desc->enable_vdso = enable_vdso;
|
||||||
__dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n",
|
__dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n",
|
||||||
path, desc->num_sections);
|
path, desc->num_sections);
|
||||||
|
|
||||||
@@ -2210,7 +2315,10 @@ return_execve1:
|
|||||||
|
|
||||||
__dprintf("%s", "execve(): transfer ELF desc OK\n");
|
__dprintf("%s", "execve(): transfer ELF desc OK\n");
|
||||||
|
|
||||||
transfer_image(fd, desc);
|
if (transfer_image(fd, desc) != 0) {
|
||||||
|
fprintf(stderr, "error: transferring image\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
__dprintf("%s", "execve(): image transferred\n");
|
__dprintf("%s", "execve(): image transferred\n");
|
||||||
|
|
||||||
if (close_cloexec_fds(fd) < 0) {
|
if (close_cloexec_fds(fd) < 0) {
|
||||||
@@ -2262,6 +2370,53 @@ return_execve2:
|
|||||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case __NR_setresuid:
|
||||||
|
ret = setresuid(w.sr.args[0], w.sr.args[1], w.sr.args[2]);
|
||||||
|
if(ret == -1)
|
||||||
|
ret = -errno;
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case __NR_setreuid:
|
||||||
|
ret = setreuid(w.sr.args[0], w.sr.args[1]);
|
||||||
|
if(ret == -1)
|
||||||
|
ret = -errno;
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case __NR_setuid:
|
||||||
|
ret = setuid(w.sr.args[0]);
|
||||||
|
if(ret == -1)
|
||||||
|
ret = -errno;
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case __NR_setresgid:
|
||||||
|
ret = setresgid(w.sr.args[0], w.sr.args[1], w.sr.args[2]);
|
||||||
|
if(ret == -1)
|
||||||
|
ret = -errno;
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case __NR_setregid:
|
||||||
|
ret = setregid(w.sr.args[0], w.sr.args[1]);
|
||||||
|
if(ret == -1)
|
||||||
|
ret = -errno;
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case __NR_setgid:
|
||||||
|
ret = setgid(w.sr.args[0]);
|
||||||
|
if(ret == -1)
|
||||||
|
ret = -errno;
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case __NR_setfsgid:
|
||||||
|
ret = setfsgid(w.sr.args[0]);
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
case __NR_close:
|
case __NR_close:
|
||||||
if(w.sr.args[0] == fd)
|
if(w.sr.args[0] == fd)
|
||||||
ret = -EBADF;
|
ret = -EBADF;
|
||||||
@@ -2295,7 +2450,9 @@ return_execve2:
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_data[cpu].remote_tid = -1;
|
||||||
|
|
||||||
//pthread_mutex_unlock(lock);
|
//pthread_mutex_unlock(lock);
|
||||||
}
|
}
|
||||||
__dprint("timed out.\n");
|
__dprint("timed out.\n");
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ int __kprintf(const char *format, ...)
|
|||||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||||
|
|
||||||
/* Copy into the local buf */
|
/* Copy into the local buf */
|
||||||
|
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
|
||||||
va_start(va, format);
|
va_start(va, format);
|
||||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||||
va_end(va);
|
va_end(va);
|
||||||
|
|||||||
@@ -78,51 +78,52 @@ static struct memobj *to_memobj(struct devobj *devobj)
|
|||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
* devobj
|
* devobj
|
||||||
*/
|
*/
|
||||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp)
|
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
|
||||||
|
int prot, int populate_flags)
|
||||||
{
|
{
|
||||||
ihk_mc_user_context_t ctx;
|
ihk_mc_user_context_t ctx;
|
||||||
struct pager_map_result result; // XXX: assumes contiguous physical
|
struct pager_map_result result; // XXX: assumes contiguous physical
|
||||||
int error;
|
int error;
|
||||||
struct devobj *obj = NULL;
|
struct devobj *obj = NULL;
|
||||||
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||||
|
const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
|
||||||
|
|
||||||
dkprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off);
|
||||||
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
|
|
||||||
if (npages > MAX_PAGES_IN_DEVOBJ) {
|
|
||||||
error = -EFBIG;
|
|
||||||
kprintf("devobj_create(%d,%lx,%lx):too large len. %d\n", fd, len, off, error);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
||||||
if (!obj) {
|
if (!obj) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
kprintf("devobj_create(%d,%lx,%lx):kmalloc failed. %d\n", fd, len, off, error);
|
kprintf("%s: error: fd: %d, len: %lu, off: %lu kmalloc failed.\n",
|
||||||
|
__FUNCTION__, fd, len, off);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
memset(obj, 0, sizeof(*obj));
|
memset(obj, 0, sizeof(*obj));
|
||||||
|
|
||||||
obj->pfn_table = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
obj->pfn_table = ihk_mc_alloc_pages(pfn_npages, IHK_MC_AP_NOWAIT);
|
||||||
if (!obj->pfn_table) {
|
if (!obj->pfn_table) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
kprintf("devobj_create(%d,%lx,%lx):allocate_pages failed. %d\n", fd, len, off, error);
|
kprintf("%s: error: fd: %d, len: %lu, off: %lu allocating PFN failed.\n",
|
||||||
|
__FUNCTION__, fd, len, off);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
memset(obj->pfn_table, 0, 1*PAGE_SIZE);
|
memset(obj->pfn_table, 0, pfn_npages * PAGE_SIZE);
|
||||||
|
|
||||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_MAP;
|
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_MAP;
|
||||||
ihk_mc_syscall_arg1(&ctx) = fd;
|
ihk_mc_syscall_arg1(&ctx) = fd;
|
||||||
ihk_mc_syscall_arg2(&ctx) = len;
|
ihk_mc_syscall_arg2(&ctx) = len;
|
||||||
ihk_mc_syscall_arg3(&ctx) = off;
|
ihk_mc_syscall_arg3(&ctx) = off;
|
||||||
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
|
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
|
||||||
|
ihk_mc_syscall_arg5(&ctx) = prot | populate_flags;
|
||||||
|
|
||||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||||
if (error) {
|
if (error) {
|
||||||
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
|
kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n",
|
||||||
|
__FUNCTION__, fd, len, off);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
dkprintf("devobj_create:handle: %lx\n", result.handle);
|
|
||||||
dkprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
dkprintf("%s: fd: %d, len: %lu, off: %lu, handle: %p, maxprot: %x\n",
|
||||||
|
__FUNCTION__, fd, len, off, result.handle, result.maxprot);
|
||||||
|
|
||||||
obj->memobj.ops = &devobj_ops;
|
obj->memobj.ops = &devobj_ops;
|
||||||
obj->memobj.flags = MF_HAS_PAGER;
|
obj->memobj.flags = MF_HAS_PAGER;
|
||||||
@@ -140,11 +141,12 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
|||||||
out:
|
out:
|
||||||
if (obj) {
|
if (obj) {
|
||||||
if (obj->pfn_table) {
|
if (obj->pfn_table) {
|
||||||
free_pages(obj->pfn_table, 1);
|
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||||
}
|
}
|
||||||
kfree(obj);
|
kfree(obj);
|
||||||
}
|
}
|
||||||
dkprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
dkprintf("%s: ret: %d, fd: %d, len: %lu, off: %lu, handle: %p, maxprot: %x \n",
|
||||||
|
__FUNCTION__, error, fd, len, off, result.handle, result.maxprot);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,6 +166,8 @@ static void devobj_release(struct memobj *memobj)
|
|||||||
struct devobj *obj = to_devobj(memobj);
|
struct devobj *obj = to_devobj(memobj);
|
||||||
struct devobj *free_obj = NULL;
|
struct devobj *free_obj = NULL;
|
||||||
uintptr_t handle;
|
uintptr_t handle;
|
||||||
|
const size_t pfn_npages =
|
||||||
|
(obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
|
||||||
|
|
||||||
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||||
|
|
||||||
@@ -192,7 +196,7 @@ static void devobj_release(struct memobj *memobj)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (obj->pfn_table) {
|
if (obj->pfn_table) {
|
||||||
free_pages(obj->pfn_table, 1);
|
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||||
}
|
}
|
||||||
kfree(free_obj);
|
kfree(free_obj);
|
||||||
}
|
}
|
||||||
@@ -204,7 +208,7 @@ static void devobj_release(struct memobj *memobj)
|
|||||||
|
|
||||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
|
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
|
||||||
{
|
{
|
||||||
const off_t pgoff = off >> PAGE_SHIFT;
|
const off_t pgoff = off / PAGE_SIZE;
|
||||||
struct devobj *obj = to_devobj(memobj);
|
struct devobj *obj = to_devobj(memobj);
|
||||||
int error;
|
int error;
|
||||||
uintptr_t pfn;
|
uintptr_t pfn;
|
||||||
@@ -216,7 +220,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
|||||||
|
|
||||||
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
|
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
|
||||||
error = -EFBIG;
|
error = -EFBIG;
|
||||||
kprintf("devobj_get_page(%p %lx,%lx,%d): out of range. %d\n", memobj, obj->handle, off, p2align, error);
|
kprintf("%s: error: out of range: off: %lu, page off: %lu obj->npages: %d\n", __FUNCTION__, off, pgoff, obj->npages);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ix = pgoff - obj->pfn_pgoff;
|
ix = pgoff - obj->pfn_pgoff;
|
||||||
|
|||||||
@@ -182,7 +182,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
|
|||||||
|
|
||||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||||
if (error) {
|
if (error) {
|
||||||
kprintf("fileobj_create(%d):create failed. %d\n", fd, error);
|
dkprintf("fileobj_create(%d):create failed. %d\n", fd, error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -79,8 +79,6 @@
|
|||||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern struct sigpending *hassigpending(struct thread *thread);
|
|
||||||
|
|
||||||
int futex_cmpxchg_enabled;
|
int futex_cmpxchg_enabled;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -332,6 +332,9 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
vm->vdso_addr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
p->rprocess = (unsigned long)thread;
|
p->rprocess = (unsigned long)thread;
|
||||||
p->rpgtable = virt_to_phys(as->page_table);
|
p->rpgtable = virt_to_phys(as->page_table);
|
||||||
@@ -373,10 +376,16 @@ static int process_msg_prepare_process(unsigned long rphys)
|
|||||||
}
|
}
|
||||||
|
|
||||||
n = p->num_sections;
|
n = p->num_sections;
|
||||||
|
if (n > 16) {
|
||||||
|
kprintf("%s: ERROR: more ELF sections than 16??\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
dkprintf("# of sections: %d\n", n);
|
dkprintf("# of sections: %d\n", n);
|
||||||
|
|
||||||
if((pn = ihk_mc_allocate(sizeof(struct program_load_desc)
|
if((pn = kmalloc(sizeof(struct program_load_desc)
|
||||||
+ sizeof(struct program_image_section) * n, IHK_MC_AP_NOWAIT)) == NULL){
|
+ sizeof(struct program_image_section) * n,
|
||||||
|
IHK_MC_AP_NOWAIT)) == NULL){
|
||||||
ihk_mc_unmap_virtual(p, npages, 0);
|
ihk_mc_unmap_virtual(p, npages, 0);
|
||||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@@ -385,7 +394,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
|||||||
+ sizeof(struct program_image_section) * n);
|
+ sizeof(struct program_image_section) * n);
|
||||||
|
|
||||||
if((thread = create_thread(p->entry)) == NULL){
|
if((thread = create_thread(p->entry)) == NULL){
|
||||||
ihk_mc_free(pn);
|
kfree(pn);
|
||||||
ihk_mc_unmap_virtual(p, npages, 1);
|
ihk_mc_unmap_virtual(p, npages, 1);
|
||||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@@ -435,7 +444,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
|||||||
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
|
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
|
||||||
vm->address_space->page_table);
|
vm->address_space->page_table);
|
||||||
|
|
||||||
ihk_mc_free(pn);
|
kfree(pn);
|
||||||
|
|
||||||
ihk_mc_unmap_virtual(p, npages, 1);
|
ihk_mc_unmap_virtual(p, npages, 1);
|
||||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||||
@@ -443,7 +452,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
ihk_mc_free(pn);
|
kfree(pn);
|
||||||
ihk_mc_unmap_virtual(p, npages, 1);
|
ihk_mc_unmap_virtual(p, npages, 1);
|
||||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||||
destroy_thread(thread);
|
destroy_thread(thread);
|
||||||
@@ -452,7 +461,7 @@ err:
|
|||||||
|
|
||||||
static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_params *lparam)
|
static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_params *lparam)
|
||||||
{
|
{
|
||||||
lparam->response_va = allocate_pages(RESPONSE_PAGE_COUNT, 0);
|
lparam->response_va = ihk_mc_alloc_pages(RESPONSE_PAGE_COUNT, 0);
|
||||||
lparam->response_pa = virt_to_phys(lparam->response_va);
|
lparam->response_pa = virt_to_phys(lparam->response_va);
|
||||||
|
|
||||||
pcp->request_page = 0;
|
pcp->request_page = 0;
|
||||||
@@ -521,12 +530,7 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
|
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
|
||||||
extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
|
|
||||||
|
|
||||||
extern void process_procfs_request(unsigned long rarg);
|
extern void process_procfs_request(unsigned long rarg);
|
||||||
extern int memcheckall();
|
|
||||||
extern int freecheck(int runcount);
|
|
||||||
extern int runcount;
|
|
||||||
extern void terminate_host(int pid);
|
extern void terminate_host(int pid);
|
||||||
extern void debug_log(long);
|
extern void debug_log(long);
|
||||||
|
|
||||||
@@ -561,6 +565,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
struct ikc_scd_packet *packet = __packet;
|
struct ikc_scd_packet *packet = __packet;
|
||||||
struct ikc_scd_packet pckt;
|
struct ikc_scd_packet pckt;
|
||||||
int rc;
|
int rc;
|
||||||
|
struct mcs_rwlock_node_irqsave lock;
|
||||||
struct thread *thread;
|
struct thread *thread;
|
||||||
struct process *proc;
|
struct process *proc;
|
||||||
struct mcctrl_signal {
|
struct mcctrl_signal {
|
||||||
@@ -572,22 +577,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
} *sp, info;
|
} *sp, info;
|
||||||
unsigned long pp;
|
unsigned long pp;
|
||||||
int cpuid;
|
int cpuid;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
switch (packet->msg) {
|
switch (packet->msg) {
|
||||||
case SCD_MSG_INIT_CHANNEL_ACKED:
|
case SCD_MSG_INIT_CHANNEL_ACKED:
|
||||||
dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n");
|
dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n");
|
||||||
process_msg_init_acked(c, packet->arg);
|
process_msg_init_acked(c, packet->arg);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_PREPARE_PROCESS:
|
case SCD_MSG_PREPARE_PROCESS:
|
||||||
|
|
||||||
if (find_command_line("memdebug")) {
|
|
||||||
memcheckall();
|
|
||||||
if (runcount)
|
|
||||||
freecheck(runcount);
|
|
||||||
runcount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if((rc = process_msg_prepare_process(packet->arg)) == 0){
|
if((rc = process_msg_prepare_process(packet->arg)) == 0){
|
||||||
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
|
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
|
||||||
pckt.err = 0;
|
pckt.err = 0;
|
||||||
@@ -600,19 +600,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
pckt.arg = packet->arg;
|
pckt.arg = packet->arg;
|
||||||
syscall_channel_send(c, &pckt);
|
syscall_channel_send(c, &pckt);
|
||||||
|
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_SCHEDULE_PROCESS:
|
case SCD_MSG_SCHEDULE_PROCESS:
|
||||||
cpuid = obtain_clone_cpuid();
|
cpuid = obtain_clone_cpuid();
|
||||||
if(cpuid == -1){
|
if(cpuid == -1){
|
||||||
kprintf("No CPU available\n");
|
kprintf("No CPU available\n");
|
||||||
return -1;
|
ret = -1;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
|
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
|
||||||
thread = (struct thread *)packet->arg;
|
thread = (struct thread *)packet->arg;
|
||||||
proc = thread->proc;
|
proc = thread->proc;
|
||||||
|
|
||||||
settid(thread, 0, cpuid, -1);
|
settid(thread, 0, cpuid, -1, 0, NULL);
|
||||||
proc->status = PS_RUNNING;
|
proc->status = PS_RUNNING;
|
||||||
thread->status = PS_RUNNING;
|
thread->status = PS_RUNNING;
|
||||||
chain_thread(thread);
|
chain_thread(thread);
|
||||||
@@ -620,7 +622,29 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
runq_add_thread(thread, cpuid);
|
runq_add_thread(thread, cpuid);
|
||||||
|
|
||||||
//cpu_local_var(next) = (struct thread *)packet->arg;
|
//cpu_local_var(next) = (struct thread *)packet->arg;
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used for syscall offload reply message to explicitly schedule in
|
||||||
|
* the waiting thread
|
||||||
|
*/
|
||||||
|
case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
|
||||||
|
thread = find_thread(0, packet->ttid, &lock);
|
||||||
|
if (!thread) {
|
||||||
|
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
|
||||||
|
__FUNCTION__, packet->ttid);
|
||||||
|
ret = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
thread_unlock(thread, &lock);
|
||||||
|
|
||||||
|
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
|
||||||
|
__FUNCTION__, packet->ttid);
|
||||||
|
waitq_wakeup(&thread->scd_wq);
|
||||||
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_SEND_SIGNAL:
|
case SCD_MSG_SEND_SIGNAL:
|
||||||
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
|
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
|
||||||
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||||
@@ -635,18 +659,25 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
|
|
||||||
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
|
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
|
||||||
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
|
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_PROCFS_REQUEST:
|
case SCD_MSG_PROCFS_REQUEST:
|
||||||
process_procfs_request(packet->arg);
|
process_procfs_request(packet->arg);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_CLEANUP_PROCESS:
|
case SCD_MSG_CLEANUP_PROCESS:
|
||||||
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
|
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
|
||||||
terminate_host(packet->pid);
|
terminate_host(packet->pid);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_DEBUG_LOG:
|
case SCD_MSG_DEBUG_LOG:
|
||||||
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
|
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
|
||||||
debug_log(packet->arg);
|
debug_log(packet->arg);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_SYSFS_REQ_SHOW:
|
case SCD_MSG_SYSFS_REQ_SHOW:
|
||||||
case SCD_MSG_SYSFS_REQ_STORE:
|
case SCD_MSG_SYSFS_REQ_STORE:
|
||||||
@@ -654,7 +685,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
sysfss_packet_handler(c, packet->msg, packet->err,
|
sysfss_packet_handler(c, packet->msg, packet->err,
|
||||||
packet->sysfs_arg1, packet->sysfs_arg2,
|
packet->sysfs_arg1, packet->sysfs_arg2,
|
||||||
packet->sysfs_arg3);
|
packet->sysfs_arg3);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_GET_CPU_MAPPING:
|
case SCD_MSG_GET_CPU_MAPPING:
|
||||||
req_get_cpu_mapping(packet->arg);
|
req_get_cpu_mapping(packet->arg);
|
||||||
@@ -662,17 +694,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING;
|
pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING;
|
||||||
pckt.arg = packet->arg;
|
pckt.arg = packet->arg;
|
||||||
syscall_channel_send(c, &pckt);
|
syscall_channel_send(c, &pckt);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
kprintf("syscall_pakcet_handler:unknown message "
|
kprintf("syscall_pakcet_handler:unknown message "
|
||||||
"(%d.%d.%d.%d.%d.%#lx)\n",
|
"(%d.%d.%d.%d.%d.%#lx)\n",
|
||||||
packet->msg, packet->ref, packet->osnum,
|
packet->msg, packet->ref, packet->osnum,
|
||||||
packet->pid, packet->err, packet->arg);
|
packet->pid, packet->err, packet->arg);
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
|
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, c);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_host_syscall_channel(void)
|
void init_host_syscall_channel(void)
|
||||||
|
|||||||
@@ -19,11 +19,13 @@
|
|||||||
* CPU Local Storage (cls)
|
* CPU Local Storage (cls)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct malloc_header {
|
struct kmalloc_header {
|
||||||
unsigned int check;
|
unsigned int front_magic;
|
||||||
unsigned int cpu_id;
|
unsigned int cpu_id;
|
||||||
struct malloc_header *next;
|
struct list_head list;
|
||||||
unsigned long size;
|
int size; /* The size of this chunk without the header */
|
||||||
|
unsigned int end_magic;
|
||||||
|
/* 32 bytes */
|
||||||
};
|
};
|
||||||
|
|
||||||
#include <ihk/lock.h>
|
#include <ihk/lock.h>
|
||||||
@@ -38,8 +40,9 @@ extern ihk_spinlock_t cpu_status_lock;
|
|||||||
|
|
||||||
struct cpu_local_var {
|
struct cpu_local_var {
|
||||||
/* malloc */
|
/* malloc */
|
||||||
struct malloc_header free_list;
|
struct list_head free_list;
|
||||||
struct malloc_header *remote_free_list;
|
struct list_head remote_free_list;
|
||||||
|
ihk_spinlock_t remote_free_list_lock;
|
||||||
|
|
||||||
struct thread idle;
|
struct thread idle;
|
||||||
struct process idle_proc;
|
struct process idle_proc;
|
||||||
@@ -73,6 +76,7 @@ struct cpu_local_var {
|
|||||||
int in_interrupt;
|
int in_interrupt;
|
||||||
int no_preempt;
|
int no_preempt;
|
||||||
int timer_enabled;
|
int timer_enabled;
|
||||||
|
int kmalloc_initialized;
|
||||||
} __attribute__((aligned(64)));
|
} __attribute__((aligned(64)));
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,11 +32,10 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line);
|
|||||||
void _kfree(void *ptr, char *file, int line);
|
void _kfree(void *ptr, char *file, int line);
|
||||||
void *__kmalloc(int size, enum ihk_mc_ap_flag flag);
|
void *__kmalloc(int size, enum ihk_mc_ap_flag flag);
|
||||||
void __kfree(void *ptr);
|
void __kfree(void *ptr);
|
||||||
void *___kmalloc(int size, enum ihk_mc_ap_flag flag);
|
|
||||||
void ___kfree(void *ptr);
|
|
||||||
|
|
||||||
int _memcheck(void *ptr, char *msg, char *file, int line, int free);
|
int _memcheck(void *ptr, char *msg, char *file, int line, int free);
|
||||||
int memcheckall();
|
int memcheckall();
|
||||||
int freecheck(int runcount);
|
int freecheck(int runcount);
|
||||||
|
void kmalloc_consolidate_free_list(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -141,6 +141,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
|
|||||||
struct shmid_ds;
|
struct shmid_ds;
|
||||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
||||||
int zeroobj_create(struct memobj **objp);
|
int zeroobj_create(struct memobj **objp);
|
||||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp);
|
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
|
||||||
|
int prot, int populate_flags);
|
||||||
|
|
||||||
#endif /* HEADER_MEMOBJ_H */
|
#endif /* HEADER_MEMOBJ_H */
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
#define VR_IO_NOCACHE 0x100
|
#define VR_IO_NOCACHE 0x100
|
||||||
#define VR_REMOTE 0x200
|
#define VR_REMOTE 0x200
|
||||||
#define VR_WRITE_COMBINED 0x400
|
#define VR_WRITE_COMBINED 0x400
|
||||||
|
#define VR_DONTFORK 0x800
|
||||||
#define VR_DEMAND_PAGING 0x1000
|
#define VR_DEMAND_PAGING 0x1000
|
||||||
#define VR_PRIVATE 0x2000
|
#define VR_PRIVATE 0x2000
|
||||||
#define VR_LOCKED 0x4000
|
#define VR_LOCKED 0x4000
|
||||||
@@ -160,7 +161,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define USER_STACK_NR_PAGES 8192
|
#define USER_STACK_NR_PAGES 8192
|
||||||
#define KERNEL_STACK_NR_PAGES 25
|
#define KERNEL_STACK_NR_PAGES 32
|
||||||
|
|
||||||
#define NOPHYS ((uintptr_t)-1)
|
#define NOPHYS ((uintptr_t)-1)
|
||||||
|
|
||||||
@@ -319,12 +320,14 @@ struct process_vm;
|
|||||||
struct mckfd {
|
struct mckfd {
|
||||||
struct mckfd *next;
|
struct mckfd *next;
|
||||||
int fd;
|
int fd;
|
||||||
|
int sig_no;
|
||||||
long data;
|
long data;
|
||||||
void *opt;
|
void *opt;
|
||||||
long (*read_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
long (*read_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||||
int (*ioctl_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
int (*ioctl_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||||
long (*mmap_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
long (*mmap_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||||
int (*close_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
int (*close_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||||
|
int (*fcntl_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SFD_CLOEXEC 02000000
|
#define SFD_CLOEXEC 02000000
|
||||||
@@ -346,6 +349,11 @@ struct sig_pending {
|
|||||||
|
|
||||||
typedef void pgio_func_t(void *arg);
|
typedef void pgio_func_t(void *arg);
|
||||||
|
|
||||||
|
struct mcexec_tid {
|
||||||
|
int tid;
|
||||||
|
struct thread *thread;
|
||||||
|
};
|
||||||
|
|
||||||
/* Represents a node in the process fork tree, it may exist even after the
|
/* Represents a node in the process fork tree, it may exist even after the
|
||||||
* corresponding process exited due to references from the parent and/or
|
* corresponding process exited due to references from the parent and/or
|
||||||
* children and is used for implementing wait/waitpid without having a
|
* children and is used for implementing wait/waitpid without having a
|
||||||
@@ -360,6 +368,9 @@ struct process {
|
|||||||
// threads and children
|
// threads and children
|
||||||
struct list_head threads_list;
|
struct list_head threads_list;
|
||||||
mcs_rwlock_lock_t threads_lock; // lock for threads_list
|
mcs_rwlock_lock_t threads_lock; // lock for threads_list
|
||||||
|
/* TID set of proxy process */
|
||||||
|
struct mcexec_tid *tids;
|
||||||
|
int nr_tids;
|
||||||
|
|
||||||
/* The ptracing process behave as the parent of the ptraced process
|
/* The ptracing process behave as the parent of the ptraced process
|
||||||
after using PTRACE_ATTACH except getppid. So we save it here. */
|
after using PTRACE_ATTACH except getppid. So we save it here. */
|
||||||
@@ -556,6 +567,9 @@ struct thread {
|
|||||||
struct itimerval itimer_prof;
|
struct itimerval itimer_prof;
|
||||||
struct timespec itimer_virtual_value;
|
struct timespec itimer_virtual_value;
|
||||||
struct timespec itimer_prof_value;
|
struct timespec itimer_prof_value;
|
||||||
|
|
||||||
|
/* Syscall offload wait queue head */
|
||||||
|
struct waitq scd_wq;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct process_vm {
|
struct process_vm {
|
||||||
@@ -675,5 +689,8 @@ void chain_process(struct process *);
|
|||||||
void chain_thread(struct thread *);
|
void chain_thread(struct thread *);
|
||||||
void proc_init();
|
void proc_init();
|
||||||
void set_timer();
|
void set_timer();
|
||||||
|
struct sig_pending *hassigpending(struct thread *thread);
|
||||||
|
void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid,
|
||||||
|
int nr_tids, int *tids);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -31,6 +31,7 @@
|
|||||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||||
|
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
|
||||||
|
|
||||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||||
@@ -117,28 +118,6 @@ struct user_desc {
|
|||||||
unsigned int lm:1;
|
unsigned int lm:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ikc_scd_packet {
|
|
||||||
int msg;
|
|
||||||
int err;
|
|
||||||
union {
|
|
||||||
/* for traditional SCD_MSG_* */
|
|
||||||
struct {
|
|
||||||
int ref;
|
|
||||||
int osnum;
|
|
||||||
int pid;
|
|
||||||
int padding;
|
|
||||||
unsigned long arg;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* for SCD_MSG_SYSFS_* */
|
|
||||||
struct {
|
|
||||||
long sysfs_arg1;
|
|
||||||
long sysfs_arg2;
|
|
||||||
long sysfs_arg3;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
struct program_image_section {
|
struct program_image_section {
|
||||||
unsigned long vaddr;
|
unsigned long vaddr;
|
||||||
unsigned long len;
|
unsigned long len;
|
||||||
@@ -210,13 +189,58 @@ struct ikc_scd_init_param {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct syscall_request {
|
struct syscall_request {
|
||||||
|
/* TID of requesting thread */
|
||||||
|
int rtid;
|
||||||
|
/*
|
||||||
|
* TID of target thread. Remote page fault response needs to designate the
|
||||||
|
* thread that must serve the request, 0 indicates any thread from the pool
|
||||||
|
*/
|
||||||
|
int ttid;
|
||||||
unsigned long valid;
|
unsigned long valid;
|
||||||
unsigned long number;
|
unsigned long number;
|
||||||
unsigned long args[6];
|
unsigned long args[6];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ikc_scd_packet {
|
||||||
|
int msg;
|
||||||
|
int err;
|
||||||
|
union {
|
||||||
|
/* for traditional SCD_MSG_* */
|
||||||
|
struct {
|
||||||
|
int ref;
|
||||||
|
int osnum;
|
||||||
|
int pid;
|
||||||
|
unsigned long arg;
|
||||||
|
struct syscall_request req;
|
||||||
|
unsigned long resp_pa;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* for SCD_MSG_SYSFS_* */
|
||||||
|
struct {
|
||||||
|
long sysfs_arg1;
|
||||||
|
long sysfs_arg2;
|
||||||
|
long sysfs_arg3;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* SCD_MSG_SCHEDULE_THREAD */
|
||||||
|
struct {
|
||||||
|
int ttid;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
char padding[12];
|
||||||
|
};
|
||||||
|
|
||||||
|
#define IHK_SCD_REQ_THREAD_SPINNING 0
|
||||||
|
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
|
||||||
|
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
|
||||||
|
|
||||||
struct syscall_response {
|
struct syscall_response {
|
||||||
|
/* TID of the thread that requested the service */
|
||||||
|
int ttid;
|
||||||
|
/* TID of the mcexec thread that is serving the request */
|
||||||
|
int stid;
|
||||||
unsigned long status;
|
unsigned long status;
|
||||||
|
unsigned long req_thread_status;
|
||||||
long ret;
|
long ret;
|
||||||
unsigned long fault_address;
|
unsigned long fault_address;
|
||||||
unsigned long fault_reason;
|
unsigned long fault_reason;
|
||||||
|
|||||||
@@ -371,7 +371,7 @@ int main(void)
|
|||||||
}
|
}
|
||||||
kmsg_init(mode);
|
kmsg_init(mode);
|
||||||
|
|
||||||
kputs("MCK started.\n");
|
kputs("IHK/McKernel started.\n");
|
||||||
|
|
||||||
arch_init();
|
arch_init();
|
||||||
|
|
||||||
@@ -393,7 +393,7 @@ int main(void)
|
|||||||
|
|
||||||
futex_init();
|
futex_init();
|
||||||
|
|
||||||
kputs("MCK/IHK booted.\n");
|
kputs("IHK/McKernel booted.\n");
|
||||||
|
|
||||||
#ifdef DCFA_KMOD
|
#ifdef DCFA_KMOD
|
||||||
mc_cmd_client_init();
|
mc_cmd_client_init();
|
||||||
|
|||||||
769
kernel/mem.c
769
kernel/mem.c
@@ -156,13 +156,17 @@ void sbox_write(int offset, unsigned int value);
|
|||||||
|
|
||||||
static void query_free_mem_interrupt_handler(void *priv)
|
static void query_free_mem_interrupt_handler(void *priv)
|
||||||
{
|
{
|
||||||
#ifdef ATTACHED_MIC
|
|
||||||
dkprintf("query free mem handler!\n");
|
|
||||||
|
|
||||||
int pages = ihk_pagealloc_query_free(pa_allocator);
|
int pages = ihk_pagealloc_query_free(pa_allocator);
|
||||||
|
|
||||||
dkprintf("free pages: %d\n", pages);
|
kprintf("McKernel free pages: %d\n", pages);
|
||||||
|
|
||||||
|
if (find_command_line("memdebug")) {
|
||||||
|
extern void kmalloc_memcheck(void);
|
||||||
|
|
||||||
|
kmalloc_memcheck();
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef ATTACHED_MIC
|
||||||
sbox_write(SBOX_SCRATCH0, pages);
|
sbox_write(SBOX_SCRATCH0, pages);
|
||||||
sbox_write(SBOX_SCRATCH1, 1);
|
sbox_write(SBOX_SCRATCH1, 1);
|
||||||
#endif
|
#endif
|
||||||
@@ -265,6 +269,13 @@ void remote_flush_tlb_cpumask(struct process_vm *vm,
|
|||||||
unsigned long tsc;
|
unsigned long tsc;
|
||||||
tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */
|
tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */
|
||||||
#endif
|
#endif
|
||||||
|
if (flush_entry->addr) {
|
||||||
|
flush_tlb_single(flush_entry->addr & PAGE_MASK);
|
||||||
|
}
|
||||||
|
/* Zero address denotes full TLB flush */
|
||||||
|
else {
|
||||||
|
flush_tlb();
|
||||||
|
}
|
||||||
|
|
||||||
/* Wait for all cores */
|
/* Wait for all cores */
|
||||||
while (ihk_atomic_read(&flush_entry->pending) != 0) {
|
while (ihk_atomic_read(&flush_entry->pending) != 0) {
|
||||||
@@ -335,10 +346,9 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
|||||||
// no return
|
// no return
|
||||||
}
|
}
|
||||||
|
|
||||||
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
|
kprintf("%s fault VM failed for TID: %d, addr: 0x%lx, "
|
||||||
"fault vm failed. %d, TID: %d\n",
|
"reason: %d, error: %d\n", __FUNCTION__,
|
||||||
ihk_mc_get_processor_id(), fault_addr,
|
thread->tid, fault_addr, reason, error);
|
||||||
reason, regs, error, thread->tid);
|
|
||||||
unhandled_page_fault(thread, fault_addr, regs);
|
unhandled_page_fault(thread, fault_addr, regs);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
memset(&info, '\0', sizeof info);
|
memset(&info, '\0', sizeof info);
|
||||||
@@ -425,8 +435,9 @@ static void page_allocator_init(void)
|
|||||||
|
|
||||||
ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages);
|
ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages);
|
||||||
|
|
||||||
kprintf("Available pages: %ld pages\n",
|
kprintf("Available memory: %ld bytes in %ld pages\n",
|
||||||
ihk_pagealloc_count(pa_allocator));
|
(ihk_pagealloc_count(pa_allocator) * PAGE_SIZE),
|
||||||
|
ihk_pagealloc_count(pa_allocator));
|
||||||
|
|
||||||
/* Notify the ihk to use my page allocator */
|
/* Notify the ihk to use my page allocator */
|
||||||
ihk_mc_set_page_allocator(&allocator);
|
ihk_mc_set_page_allocator(&allocator);
|
||||||
@@ -507,6 +518,9 @@ static void page_init(void)
|
|||||||
|
|
||||||
static char *memdebug = NULL;
|
static char *memdebug = NULL;
|
||||||
|
|
||||||
|
static void *___kmalloc(int size, enum ihk_mc_ap_flag flag);
|
||||||
|
static void ___kfree(void *ptr);
|
||||||
|
|
||||||
void register_kmalloc(void)
|
void register_kmalloc(void)
|
||||||
{
|
{
|
||||||
if(memdebug){
|
if(memdebug){
|
||||||
@@ -636,60 +650,100 @@ void mem_init(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct location {
|
#define KMALLOC_TRACK_HASH_SHIFT (8)
|
||||||
struct location *next;
|
#define KMALLOC_TRACK_HASH_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT)
|
||||||
int line;
|
#define KMALLOC_TRACK_HASH_MASK (KMALLOC_TRACK_HASH_SIZE - 1)
|
||||||
int cnt;
|
|
||||||
char file[0];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct alloc {
|
struct list_head kmalloc_track_hash[KMALLOC_TRACK_HASH_SIZE];
|
||||||
struct alloc *next;
|
ihk_spinlock_t kmalloc_track_hash_locks[KMALLOC_TRACK_HASH_SIZE];
|
||||||
struct malloc_header *p;
|
|
||||||
struct location *loc;
|
struct list_head kmalloc_addr_hash[KMALLOC_TRACK_HASH_SIZE];
|
||||||
int size;
|
ihk_spinlock_t kmalloc_addr_hash_locks[KMALLOC_TRACK_HASH_SIZE];
|
||||||
|
|
||||||
|
int kmalloc_track_initialized = 0;
|
||||||
|
int kmalloc_runcount = 0;
|
||||||
|
|
||||||
|
struct kmalloc_track_addr_entry {
|
||||||
|
void *addr;
|
||||||
int runcount;
|
int runcount;
|
||||||
|
struct list_head list; /* track_entry's list */
|
||||||
|
struct kmalloc_track_entry *entry;
|
||||||
|
struct list_head hash; /* address hash */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define HASHNUM 129
|
struct kmalloc_track_entry {
|
||||||
|
char *file;
|
||||||
|
int line;
|
||||||
|
int size;
|
||||||
|
ihk_atomic_t alloc_count;
|
||||||
|
struct list_head hash;
|
||||||
|
struct list_head addr_list;
|
||||||
|
ihk_spinlock_t addr_list_lock;
|
||||||
|
};
|
||||||
|
|
||||||
static struct alloc *allochash[HASHNUM];
|
void kmalloc_init(void)
|
||||||
static struct location *lochash[HASHNUM];
|
|
||||||
static ihk_spinlock_t alloclock;
|
|
||||||
int runcount;
|
|
||||||
static unsigned char *page;
|
|
||||||
static int space;
|
|
||||||
|
|
||||||
static void *dalloc(unsigned long size)
|
|
||||||
{
|
{
|
||||||
void *r;
|
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||||
static int pos = 0;
|
|
||||||
unsigned long irqstate;
|
|
||||||
|
|
||||||
irqstate = ihk_mc_spinlock_lock(&alloclock);
|
register_kmalloc();
|
||||||
size = (size + 7) & 0xfffffffffffffff8L;
|
|
||||||
if (pos + size > space) {
|
INIT_LIST_HEAD(&v->free_list);
|
||||||
page = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
INIT_LIST_HEAD(&v->remote_free_list);
|
||||||
space = 4096;
|
ihk_mc_spinlock_init(&v->remote_free_list_lock);
|
||||||
pos = 0;
|
|
||||||
|
v->kmalloc_initialized = 1;
|
||||||
|
|
||||||
|
if (!kmalloc_track_initialized) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
memdebug = find_command_line("memdebug");
|
||||||
|
|
||||||
|
kmalloc_track_initialized = 1;
|
||||||
|
for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) {
|
||||||
|
ihk_mc_spinlock_init(&kmalloc_track_hash_locks[i]);
|
||||||
|
INIT_LIST_HEAD(&kmalloc_track_hash[i]);
|
||||||
|
ihk_mc_spinlock_init(&kmalloc_addr_hash_locks[i]);
|
||||||
|
INIT_LIST_HEAD(&kmalloc_addr_hash[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
r = page + pos;
|
|
||||||
pos += size;
|
|
||||||
ihk_mc_spinlock_unlock(&alloclock, irqstate);
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* NOTE: Hash lock must be held */
|
||||||
|
struct kmalloc_track_entry *__kmalloc_track_find_entry(
|
||||||
|
int size, char *file, int line)
|
||||||
|
{
|
||||||
|
struct kmalloc_track_entry *entry_iter, *entry = NULL;
|
||||||
|
int hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK;
|
||||||
|
|
||||||
|
list_for_each_entry(entry_iter, &kmalloc_track_hash[hash], hash) {
|
||||||
|
if (!strcmp(entry_iter->file, file) &&
|
||||||
|
entry_iter->size == size &&
|
||||||
|
entry_iter->line == line) {
|
||||||
|
entry = entry_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry) {
|
||||||
|
dkprintf("%s found entry %s:%d size: %d\n", __FUNCTION__,
|
||||||
|
file, line, size);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dkprintf("%s couldn't find entry %s:%d size: %d\n", __FUNCTION__,
|
||||||
|
file, line, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Top level routines called from macro */
|
||||||
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
|
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
|
||||||
{
|
{
|
||||||
char *r = ___kmalloc(size, flag);
|
unsigned long irqflags;
|
||||||
struct malloc_header *h;
|
struct kmalloc_track_entry *entry;
|
||||||
unsigned long hash;
|
struct kmalloc_track_addr_entry *addr_entry;
|
||||||
char *t;
|
int hash, addr_hash;
|
||||||
struct location *lp;
|
void *r = ___kmalloc(size, flag);
|
||||||
struct alloc *ap;
|
|
||||||
unsigned long alcsize;
|
|
||||||
unsigned long chksize;
|
|
||||||
|
|
||||||
if (!memdebug)
|
if (!memdebug)
|
||||||
return r;
|
return r;
|
||||||
@@ -697,177 +751,177 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
|
|||||||
if (!r)
|
if (!r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
h = ((struct malloc_header *)r) - 1;
|
hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK;
|
||||||
alcsize = h->size * sizeof(struct malloc_header);
|
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]);
|
||||||
chksize = alcsize - size;
|
|
||||||
memset(r + size, '\x5a', chksize);
|
|
||||||
|
|
||||||
for (hash = 0, t = file; *t; t++) {
|
entry = __kmalloc_track_find_entry(size, file, line);
|
||||||
hash <<= 1;
|
|
||||||
hash += *t;
|
if (!entry) {
|
||||||
|
entry = ___kmalloc(sizeof(*entry), IHK_MC_AP_NOWAIT);
|
||||||
|
if (!entry) {
|
||||||
|
kprintf("%s: ERROR: allocating tracking entry\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry->line = line;
|
||||||
|
entry->size = size;
|
||||||
|
ihk_atomic_set(&entry->alloc_count, 0);
|
||||||
|
ihk_mc_spinlock_init(&entry->addr_list_lock);
|
||||||
|
INIT_LIST_HEAD(&entry->addr_list);
|
||||||
|
|
||||||
|
entry->file = ___kmalloc(strlen(file) + 1, IHK_MC_AP_NOWAIT);
|
||||||
|
if (!entry->file) {
|
||||||
|
kprintf("%s: ERROR: allocating file string\n");
|
||||||
|
___kfree(entry);
|
||||||
|
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
strcpy(entry->file, file);
|
||||||
|
entry->file[strlen(file)] = 0;
|
||||||
|
list_add(&entry->hash, &kmalloc_track_hash[hash]);
|
||||||
|
dkprintf("%s entry %s:%d size: %d added\n", __FUNCTION__,
|
||||||
|
file, line, size);
|
||||||
}
|
}
|
||||||
hash += line;
|
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
|
||||||
hash %= HASHNUM;
|
|
||||||
for (lp = lochash[hash]; lp; lp = lp->next)
|
ihk_atomic_inc(&entry->alloc_count);
|
||||||
if (lp->line == line &&
|
|
||||||
!strcmp(lp->file, file))
|
/* Add new addr entry for this allocation entry */
|
||||||
break;
|
addr_entry = ___kmalloc(sizeof(*addr_entry), IHK_MC_AP_NOWAIT);
|
||||||
if (!lp) {
|
if (!addr_entry) {
|
||||||
lp = dalloc(sizeof(struct location) + strlen(file) + 1);
|
kprintf("%s: ERROR: allocating addr entry\n");
|
||||||
memset(lp, '\0', sizeof(struct location));
|
goto out;
|
||||||
lp->line = line;
|
|
||||||
strcpy(lp->file, file);
|
|
||||||
do {
|
|
||||||
lp->next = lochash[hash];
|
|
||||||
} while (!compare_and_swap(lochash + hash, (unsigned long)lp->next, (unsigned long)lp));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
hash = (unsigned long)h % HASHNUM;
|
addr_entry->addr = r;
|
||||||
do {
|
addr_entry->runcount = kmalloc_runcount;
|
||||||
for (ap = allochash[hash]; ap; ap = ap->next)
|
addr_entry->entry = entry;
|
||||||
if (!ap->p)
|
|
||||||
break;
|
|
||||||
} while (ap && !compare_and_swap(&ap->p, 0UL, (unsigned long)h));
|
|
||||||
if (!ap) {
|
|
||||||
ap = dalloc(sizeof(struct alloc));
|
|
||||||
memset(ap, '\0', sizeof(struct alloc));
|
|
||||||
ap->p = h;
|
|
||||||
do {
|
|
||||||
ap->next = allochash[hash];
|
|
||||||
} while (!compare_and_swap(allochash + hash, (unsigned long)ap->next, (unsigned long)ap));
|
|
||||||
}
|
|
||||||
|
|
||||||
ap->loc = lp;
|
irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock);
|
||||||
ap->size = size;
|
list_add(&addr_entry->list, &entry->addr_list);
|
||||||
ap->runcount = runcount;
|
ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags);
|
||||||
|
|
||||||
return r;
|
/* Add addr entry to address hash */
|
||||||
}
|
addr_hash = ((unsigned long)r >> 5) & KMALLOC_TRACK_HASH_MASK;
|
||||||
|
irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[addr_hash]);
|
||||||
|
list_add(&addr_entry->hash, &kmalloc_addr_hash[addr_hash]);
|
||||||
|
ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[addr_hash], irqflags);
|
||||||
|
|
||||||
int _memcheck(void *ptr, char *msg, char *file, int line, int flags)
|
dkprintf("%s addr_entry %p added\n", __FUNCTION__, r);
|
||||||
{
|
|
||||||
struct malloc_header *h = ((struct malloc_header *)ptr) - 1;
|
|
||||||
struct malloc_header *next;
|
|
||||||
unsigned long hash = (unsigned long)h % HASHNUM;
|
|
||||||
struct alloc *ap;
|
|
||||||
static unsigned long check = 0x5a5a5a5a5a5a5a5aUL;
|
|
||||||
unsigned long alcsize;
|
|
||||||
unsigned long chksize;
|
|
||||||
|
|
||||||
|
|
||||||
if (h->check != 0x5a5a5a5a) {
|
|
||||||
int i;
|
|
||||||
unsigned long max = 0;
|
|
||||||
unsigned long cur = (unsigned long)h;
|
|
||||||
struct alloc *maxap = NULL;
|
|
||||||
|
|
||||||
for (i = 0; i < HASHNUM; i++)
|
|
||||||
for (ap = allochash[i]; ap; ap = ap->next)
|
|
||||||
if ((unsigned long)ap->p < cur &&
|
|
||||||
(unsigned long)ap->p > max) {
|
|
||||||
max = (unsigned long)ap->p;
|
|
||||||
maxap = ap;
|
|
||||||
}
|
|
||||||
|
|
||||||
kprintf("%s: detect buffer overrun, alc=%s:%d size=%ld h=%p, s=%ld\n", msg, maxap->loc->file, maxap->loc->line, maxap->size, maxap->p, maxap->p->size);
|
|
||||||
kprintf("broken header: h=%p next=%p size=%ld cpu_id=%d\n", h, h->next, h->size, h->cpu_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (ap = allochash[hash]; ap; ap = ap->next)
|
|
||||||
if (ap->p == h)
|
|
||||||
break;
|
|
||||||
if (!ap) {
|
|
||||||
if(file)
|
|
||||||
kprintf("%s: address not found, %s:%d p=%p\n", msg, file, line, ptr);
|
|
||||||
else
|
|
||||||
kprintf("%s: address not found p=%p\n", msg, ptr);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
alcsize = h->size * sizeof(struct malloc_header);
|
|
||||||
chksize = alcsize - ap->size;
|
|
||||||
if (chksize > 8)
|
|
||||||
chksize = 8;
|
|
||||||
next = (struct malloc_header *)((char *)ptr + alcsize);
|
|
||||||
|
|
||||||
if (next->check != 0x5a5a5a5a ||
|
|
||||||
memcmp((char *)ptr + ap->size, &check, chksize)) {
|
|
||||||
unsigned long buf = 0x5a5a5a5a5a5a5a5aUL;
|
|
||||||
unsigned char *p;
|
|
||||||
unsigned char *q;
|
|
||||||
memcpy(&buf, (char *)ptr + ap->size, chksize);
|
|
||||||
p = (unsigned char *)&(next->check);
|
|
||||||
q = (unsigned char *)&buf;
|
|
||||||
|
|
||||||
if (file)
|
|
||||||
kprintf("%s: broken, %s:%d alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, file, line, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size);
|
|
||||||
else
|
|
||||||
kprintf("%s: broken, alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size);
|
|
||||||
|
|
||||||
|
|
||||||
if (next->check != 0x5a5a5a5a)
|
|
||||||
kprintf("next->HEADER: next=%p size=%ld cpu_id=%d\n", next->next, next->size, next->cpu_id);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(flags & 1){
|
|
||||||
ap->p = NULL;
|
|
||||||
ap->loc = NULL;
|
|
||||||
ap->size = 0;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int memcheckall()
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
struct alloc *ap;
|
|
||||||
int r = 0;
|
|
||||||
|
|
||||||
for(i = 0; i < HASHNUM; i++)
|
|
||||||
for(ap = allochash[i]; ap; ap = ap->next)
|
|
||||||
if(ap->p)
|
|
||||||
r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
int freecheck(int runcount)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
struct alloc *ap;
|
|
||||||
struct location *lp;
|
|
||||||
int r = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < HASHNUM; i++)
|
|
||||||
for (lp = lochash[i]; lp; lp = lp->next)
|
|
||||||
lp->cnt = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < HASHNUM; i++)
|
|
||||||
for (ap = allochash[i]; ap; ap = ap->next)
|
|
||||||
if (ap->p && ap->runcount == runcount) {
|
|
||||||
ap->loc->cnt++;
|
|
||||||
r++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r) {
|
|
||||||
kprintf("memory leak?\n");
|
|
||||||
for (i = 0; i < HASHNUM; i++)
|
|
||||||
for (lp = lochash[i]; lp; lp = lp->next)
|
|
||||||
if (lp->cnt)
|
|
||||||
kprintf(" alc=%s:%d cnt=%d\n", lp->file, lp->line, lp->cnt);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
out:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
void _kfree(void *ptr, char *file, int line)
|
void _kfree(void *ptr, char *file, int line)
|
||||||
{
|
{
|
||||||
if (memdebug)
|
unsigned long irqflags;
|
||||||
_memcheck(ptr, "KFREE", file, line, 1);
|
struct kmalloc_track_entry *entry;
|
||||||
|
struct kmalloc_track_addr_entry *addr_entry_iter, *addr_entry = NULL;
|
||||||
|
int hash;
|
||||||
|
|
||||||
|
if (!memdebug) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash = ((unsigned long)ptr >> 5) & KMALLOC_TRACK_HASH_MASK;
|
||||||
|
irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[hash]);
|
||||||
|
list_for_each_entry(addr_entry_iter,
|
||||||
|
&kmalloc_addr_hash[hash], hash) {
|
||||||
|
if (addr_entry_iter->addr == ptr) {
|
||||||
|
addr_entry = addr_entry_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (addr_entry) {
|
||||||
|
list_del(&addr_entry->hash);
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[hash], irqflags);
|
||||||
|
|
||||||
|
if (!addr_entry) {
|
||||||
|
kprintf("%s: ERROR: kfree()ing invalid pointer\n", __FUNCTION__);
|
||||||
|
panic("panic");
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = addr_entry->entry;
|
||||||
|
|
||||||
|
irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock);
|
||||||
|
list_del(&addr_entry->list);
|
||||||
|
ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags);
|
||||||
|
|
||||||
|
dkprintf("%s addr_entry %p removed\n", __FUNCTION__, addr_entry->addr);
|
||||||
|
___kfree(addr_entry);
|
||||||
|
|
||||||
|
/* Do we need to remove tracking entry as well? */
|
||||||
|
if (!ihk_atomic_dec_and_test(&entry->alloc_count)) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash = (strlen(entry->file) + entry->line + entry->size) &
|
||||||
|
KMALLOC_TRACK_HASH_MASK;
|
||||||
|
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]);
|
||||||
|
list_del(&entry->hash);
|
||||||
|
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
|
||||||
|
|
||||||
|
dkprintf("%s entry %s:%d size: %d removed\n", __FUNCTION__,
|
||||||
|
entry->file, entry->line, entry->size);
|
||||||
|
___kfree(entry->file);
|
||||||
|
___kfree(entry);
|
||||||
|
|
||||||
|
out:
|
||||||
___kfree(ptr);
|
___kfree(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kmalloc_memcheck(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned long irqflags;
|
||||||
|
struct kmalloc_track_entry *entry = NULL;
|
||||||
|
|
||||||
|
for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) {
|
||||||
|
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[i]);
|
||||||
|
list_for_each_entry(entry, &kmalloc_track_hash[i], hash) {
|
||||||
|
struct kmalloc_track_addr_entry *addr_entry = NULL;
|
||||||
|
int cnt = 0;
|
||||||
|
|
||||||
|
ihk_mc_spinlock_lock_noirq(&entry->addr_list_lock);
|
||||||
|
list_for_each_entry(addr_entry, &entry->addr_list, list) {
|
||||||
|
|
||||||
|
dkprintf("%s memory leak: %p @ %s:%d size: %d runcount: %d\n",
|
||||||
|
__FUNCTION__,
|
||||||
|
addr_entry->addr,
|
||||||
|
entry->file,
|
||||||
|
entry->line,
|
||||||
|
entry->size,
|
||||||
|
addr_entry->runcount);
|
||||||
|
|
||||||
|
if (kmalloc_runcount != addr_entry->runcount)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&entry->addr_list_lock);
|
||||||
|
|
||||||
|
if (!cnt)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
kprintf("%s memory leak: %s:%d size: %d cnt: %d, runcount: %d\n",
|
||||||
|
__FUNCTION__,
|
||||||
|
entry->file,
|
||||||
|
entry->line,
|
||||||
|
entry->size,
|
||||||
|
cnt,
|
||||||
|
kmalloc_runcount);
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[i], irqflags);
|
||||||
|
}
|
||||||
|
|
||||||
|
++kmalloc_runcount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Redirection routines registered in alloc structure */
|
||||||
void *__kmalloc(int size, enum ihk_mc_ap_flag flag)
|
void *__kmalloc(int size, enum ihk_mc_ap_flag flag)
|
||||||
{
|
{
|
||||||
return kmalloc(size, flag);
|
return kmalloc(size, flag);
|
||||||
@@ -878,160 +932,199 @@ void __kfree(void *ptr)
|
|||||||
kfree(ptr);
|
kfree(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kmalloc_init(void)
|
|
||||||
|
static void ___kmalloc_insert_chunk(struct list_head *free_list,
|
||||||
|
struct kmalloc_header *chunk)
|
||||||
{
|
{
|
||||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
struct kmalloc_header *chunk_iter, *next_chunk = NULL;
|
||||||
struct malloc_header *h = &v->free_list;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
h->check = 0x5a5a5a5a;
|
/* Find out where to insert */
|
||||||
h->next = &v->free_list;
|
list_for_each_entry(chunk_iter, free_list, list) {
|
||||||
h->size = 0;
|
if ((void *)chunk < (void *)chunk_iter) {
|
||||||
|
next_chunk = chunk_iter;
|
||||||
register_kmalloc();
|
break;
|
||||||
|
|
||||||
memdebug = find_command_line("memdebug");
|
|
||||||
for (i = 0; i < HASHNUM; i++) {
|
|
||||||
allochash[i] = NULL;
|
|
||||||
lochash[i] = NULL;
|
|
||||||
}
|
|
||||||
page = allocate_pages(16, IHK_MC_AP_NOWAIT);
|
|
||||||
space = 16 * 4096;
|
|
||||||
ihk_mc_spinlock_init(&alloclock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ____kfree(struct cpu_local_var *v, struct malloc_header *p)
|
|
||||||
{
|
|
||||||
struct malloc_header *h = &v->free_list;
|
|
||||||
int combined = 0;
|
|
||||||
|
|
||||||
h = h->next;
|
|
||||||
|
|
||||||
while ((p < h || p > h->next) && h != &v->free_list) {
|
|
||||||
h = h->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (h + h->size + 1 == p && h->size != 0) {
|
|
||||||
combined = 1;
|
|
||||||
h->size += p->size + 1;
|
|
||||||
h->check = 0x5a5a5a5a;
|
|
||||||
}
|
|
||||||
if (h->next == p + p->size + 1 && h->next->size != 0) {
|
|
||||||
if (combined) {
|
|
||||||
h->check = 0x5a5a5a5a;
|
|
||||||
h->size += h->next->size + 1;
|
|
||||||
h->next = h->next->next;
|
|
||||||
} else {
|
|
||||||
p->check = 0x5a5a5a5a;
|
|
||||||
p->size += h->next->size + 1;
|
|
||||||
p->next = h->next->next;
|
|
||||||
h->next = p;
|
|
||||||
}
|
}
|
||||||
} else if (!combined) {
|
|
||||||
p->next = h->next;
|
|
||||||
h->next = p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Add in front of next */
|
||||||
|
if (next_chunk) {
|
||||||
|
list_add_tail(&chunk->list, &next_chunk->list);
|
||||||
|
}
|
||||||
|
/* Add after the head */
|
||||||
|
else {
|
||||||
|
list_add(&chunk->list, free_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
|
static void ___kmalloc_init_chunk(struct kmalloc_header *h, int size)
|
||||||
{
|
{
|
||||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
h->size = size;
|
||||||
struct malloc_header *h = &v->free_list, *prev, *p;
|
h->front_magic = 0x5c5c5c5c;
|
||||||
int u, req_page;
|
h->end_magic = 0x6d6d6d6d;
|
||||||
|
h->cpu_id = ihk_mc_get_processor_id();
|
||||||
|
}
|
||||||
|
|
||||||
p = (struct malloc_header *)xchg8((unsigned long *)&v->remote_free_list, 0L);
|
static void ___kmalloc_consolidate_list(struct list_head *list)
|
||||||
while(p){
|
{
|
||||||
struct malloc_header *n = p->next;
|
struct kmalloc_header *chunk_iter, *chunk, *next_chunk;
|
||||||
____kfree(v, p);
|
|
||||||
p = n;
|
reiterate:
|
||||||
|
chunk_iter = NULL;
|
||||||
|
chunk = NULL;
|
||||||
|
|
||||||
|
list_for_each_entry(next_chunk, list, list) {
|
||||||
|
|
||||||
|
if (chunk_iter && (((void *)chunk_iter + sizeof(struct kmalloc_header)
|
||||||
|
+ chunk_iter->size) == (void *)next_chunk)) {
|
||||||
|
chunk = chunk_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk_iter = next_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size >= PAGE_SIZE * 4) {
|
if (!chunk) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk->size += (next_chunk->size + sizeof(struct kmalloc_header));
|
||||||
|
list_del(&next_chunk->list);
|
||||||
|
goto reiterate;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void kmalloc_consolidate_free_list(void)
|
||||||
|
{
|
||||||
|
struct kmalloc_header *chunk, *tmp;
|
||||||
|
unsigned long irqflags =
|
||||||
|
ihk_mc_spinlock_lock(&cpu_local_var(remote_free_list_lock));
|
||||||
|
|
||||||
|
/* Clean up remotely deallocated chunks */
|
||||||
|
list_for_each_entry_safe(chunk, tmp,
|
||||||
|
&cpu_local_var(remote_free_list), list) {
|
||||||
|
|
||||||
|
list_del(&chunk->list);
|
||||||
|
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Free list lock ensures IRQs are disabled */
|
||||||
|
___kmalloc_consolidate_list(&cpu_local_var(free_list));
|
||||||
|
|
||||||
|
ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KMALLOC_MIN_SHIFT (5)
|
||||||
|
#define KMALLOC_MIN_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT)
|
||||||
|
#define KMALLOC_MIN_MASK (KMALLOC_MIN_SIZE - 1)
|
||||||
|
|
||||||
|
/* Actual low-level allocation routines */
|
||||||
|
static void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
|
||||||
|
{
|
||||||
|
struct kmalloc_header *chunk_iter;
|
||||||
|
struct kmalloc_header *chunk = NULL;
|
||||||
|
int npages;
|
||||||
|
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
|
||||||
|
|
||||||
|
/* KMALLOC_MIN_SIZE bytes aligned size. */
|
||||||
|
if (size & KMALLOC_MIN_MASK) {
|
||||||
|
size = ((size + KMALLOC_MIN_SIZE - 1) & ~(KMALLOC_MIN_MASK));
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk = NULL;
|
||||||
|
/* Find a chunk that is big enough */
|
||||||
|
list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) {
|
||||||
|
if (chunk_iter->size >= size) {
|
||||||
|
chunk = chunk_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
split_and_return:
|
||||||
|
/* Did we find one? */
|
||||||
|
if (chunk) {
|
||||||
|
/* Do we need to split it? Only if there is enough space for
|
||||||
|
* another header and some actual content */
|
||||||
|
if (chunk->size > (size + sizeof(struct kmalloc_header))) {
|
||||||
|
struct kmalloc_header *leftover;
|
||||||
|
|
||||||
|
leftover = (struct kmalloc_header *)
|
||||||
|
((void *)chunk + sizeof(struct kmalloc_header) + size);
|
||||||
|
___kmalloc_init_chunk(leftover,
|
||||||
|
(chunk->size - size - sizeof(struct kmalloc_header)));
|
||||||
|
list_add(&leftover->list, &chunk->list);
|
||||||
|
chunk->size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_del(&chunk->list);
|
||||||
|
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||||
|
return ((void *)chunk + sizeof(struct kmalloc_header));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Allocate new memory and add it to free list */
|
||||||
|
npages = (size + sizeof(struct kmalloc_header) + (PAGE_SIZE - 1))
|
||||||
|
>> PAGE_SHIFT;
|
||||||
|
chunk = ihk_mc_alloc_pages(npages, flag);
|
||||||
|
|
||||||
|
if (!chunk) {
|
||||||
|
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
u = (size + sizeof(*h) - 1) / sizeof(*h);
|
___kmalloc_init_chunk(chunk,
|
||||||
|
(npages * PAGE_SIZE - sizeof(struct kmalloc_header)));
|
||||||
|
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||||
|
|
||||||
prev = h;
|
goto split_and_return;
|
||||||
h = h->next;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
if (h == &v->free_list) {
|
|
||||||
req_page = ((u + 2) * sizeof(*h) + PAGE_SIZE - 1)
|
|
||||||
>> PAGE_SHIFT;
|
|
||||||
|
|
||||||
h = allocate_pages(req_page, flag);
|
|
||||||
if(h == NULL) {
|
|
||||||
kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
h->check = 0x5a5a5a5a;
|
|
||||||
prev->next = h;
|
|
||||||
h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2;
|
|
||||||
/* Guard entry */
|
|
||||||
p = h + h->size + 1;
|
|
||||||
p->check = 0x5a5a5a5a;
|
|
||||||
p->next = &v->free_list;
|
|
||||||
p->size = 0;
|
|
||||||
h->next = p;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (h->size >= u) {
|
|
||||||
if (h->size == u || h->size == u + 1) {
|
|
||||||
prev->next = h->next;
|
|
||||||
h->cpu_id = ihk_mc_get_processor_id();
|
|
||||||
|
|
||||||
return h + 1;
|
|
||||||
} else { /* Divide */
|
|
||||||
h->size -= u + 1;
|
|
||||||
|
|
||||||
p = h + h->size + 1;
|
|
||||||
p->check = 0x5a5a5a5a;
|
|
||||||
p->size = u;
|
|
||||||
p->cpu_id = ihk_mc_get_processor_id();
|
|
||||||
|
|
||||||
return p + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
prev = h;
|
|
||||||
h = h->next;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ___kfree(void *ptr)
|
static void ___kfree(void *ptr)
|
||||||
{
|
{
|
||||||
struct malloc_header *p = (struct malloc_header *)ptr;
|
struct kmalloc_header *chunk =
|
||||||
struct cpu_local_var *v = get_cpu_local_var((--p)->cpu_id);
|
(struct kmalloc_header*)(ptr - sizeof(struct kmalloc_header));
|
||||||
|
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
|
||||||
|
|
||||||
if(p->cpu_id == ihk_mc_get_processor_id()){
|
/* Sanity check */
|
||||||
____kfree(v, p);
|
if (chunk->front_magic != 0x5c5c5c5c || chunk->end_magic != 0x6d6d6d6d) {
|
||||||
|
kprintf("%s: memory corruption at address 0x%p\n", __FUNCTION__, ptr);
|
||||||
|
panic("panic");
|
||||||
}
|
}
|
||||||
else{
|
|
||||||
unsigned long oldval;
|
/* Does this chunk belong to this CPU? */
|
||||||
unsigned long newval;
|
if (chunk->cpu_id == ihk_mc_get_processor_id()) {
|
||||||
unsigned long rval;
|
|
||||||
do{
|
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||||
p->next = v->remote_free_list;
|
___kmalloc_consolidate_list(&cpu_local_var(free_list));
|
||||||
oldval = (unsigned long)p->next;
|
|
||||||
newval = (unsigned long)p;
|
|
||||||
rval = atomic_cmpxchg8(
|
|
||||||
(unsigned long *)&v->remote_free_list,
|
|
||||||
oldval, newval);
|
|
||||||
}while(rval != oldval);
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
struct cpu_local_var *v = get_cpu_local_var(chunk->cpu_id);
|
||||||
|
unsigned long irqflags;
|
||||||
|
|
||||||
|
irqflags = ihk_mc_spinlock_lock(&v->remote_free_list_lock);
|
||||||
|
list_add(&chunk->list, &v->remote_free_list);
|
||||||
|
ihk_mc_spinlock_unlock(&v->remote_free_list_lock, irqflags);
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_free_list(void)
|
|
||||||
|
void ___kmalloc_print_free_list(struct list_head *list)
|
||||||
{
|
{
|
||||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
struct kmalloc_header *chunk_iter;
|
||||||
struct malloc_header *h = &v->free_list;
|
unsigned long irqflags = kprintf_lock();
|
||||||
|
|
||||||
h = h->next;
|
__kprintf("%s: [ \n", __FUNCTION__);
|
||||||
|
list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) {
|
||||||
kprintf("free_list : \n");
|
__kprintf("%s: 0x%lx:%d (VA PFN: %lu, off: %lu)\n", __FUNCTION__,
|
||||||
while (h != &v->free_list) {
|
(unsigned long)chunk_iter,
|
||||||
kprintf(" %p : %p, %d ->\n", h, h->next, h->size);
|
chunk_iter->size,
|
||||||
h = h->next;
|
(unsigned long)chunk_iter >> PAGE_SHIFT,
|
||||||
|
(unsigned long)chunk_iter % PAGE_SIZE);
|
||||||
}
|
}
|
||||||
kprintf("\n");
|
__kprintf("%s: ] \n", __FUNCTION__);
|
||||||
|
kprintf_unlock(irqflags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
101
kernel/process.c
101
kernel/process.c
@@ -53,7 +53,6 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm);
|
|||||||
extern void release_fp_regs(struct thread *proc);
|
extern void release_fp_regs(struct thread *proc);
|
||||||
extern void save_fp_regs(struct thread *proc);
|
extern void save_fp_regs(struct thread *proc);
|
||||||
extern void restore_fp_regs(struct thread *proc);
|
extern void restore_fp_regs(struct thread *proc);
|
||||||
void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
|
|
||||||
extern void __runq_add_proc(struct thread *proc, int cpu_id);
|
extern void __runq_add_proc(struct thread *proc, int cpu_id);
|
||||||
extern void terminate_host(int pid);
|
extern void terminate_host(int pid);
|
||||||
extern void lapic_timer_enable(unsigned int clocks);
|
extern void lapic_timer_enable(unsigned int clocks);
|
||||||
@@ -338,6 +337,10 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp,
|
|||||||
proc = org->proc;
|
proc = org->proc;
|
||||||
thread->vm = org->vm;
|
thread->vm = org->vm;
|
||||||
thread->proc = proc;
|
thread->proc = proc;
|
||||||
|
|
||||||
|
thread->sigstack.ss_sp = NULL;
|
||||||
|
thread->sigstack.ss_flags = SS_DISABLE;
|
||||||
|
thread->sigstack.ss_size = 0;
|
||||||
}
|
}
|
||||||
/* fork() */
|
/* fork() */
|
||||||
else {
|
else {
|
||||||
@@ -383,9 +386,15 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp,
|
|||||||
goto err_free_proc;
|
goto err_free_proc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread->vm->vdso_addr = org->vm->vdso_addr;
|
||||||
|
thread->vm->vvar_addr = org->vm->vvar_addr;
|
||||||
thread->proc->maxrss = org->proc->maxrss;
|
thread->proc->maxrss = org->proc->maxrss;
|
||||||
thread->vm->currss = org->vm->currss;
|
thread->vm->currss = org->vm->currss;
|
||||||
|
|
||||||
|
thread->sigstack.ss_sp = org->sigstack.ss_sp;
|
||||||
|
thread->sigstack.ss_flags = org->sigstack.ss_flags;
|
||||||
|
thread->sigstack.ss_size = org->sigstack.ss_size;
|
||||||
|
|
||||||
dkprintf("fork(): copy_user_ranges() OK\n");
|
dkprintf("fork(): copy_user_ranges() OK\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -413,9 +422,6 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp,
|
|||||||
INIT_LIST_HEAD(&thread->sigcommon->sigpending);
|
INIT_LIST_HEAD(&thread->sigcommon->sigpending);
|
||||||
// TODO: copy signalfd
|
// TODO: copy signalfd
|
||||||
}
|
}
|
||||||
thread->sigstack.ss_sp = NULL;
|
|
||||||
thread->sigstack.ss_flags = SS_DISABLE;
|
|
||||||
thread->sigstack.ss_size = 0;
|
|
||||||
ihk_mc_spinlock_init(&thread->sigpendinglock);
|
ihk_mc_spinlock_init(&thread->sigpendinglock);
|
||||||
INIT_LIST_HEAD(&thread->sigpending);
|
INIT_LIST_HEAD(&thread->sigpending);
|
||||||
thread->sigmask = org->sigmask;
|
thread->sigmask = org->sigmask;
|
||||||
@@ -566,6 +572,9 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(src_range->flag & VR_DONTFORK)
|
||||||
|
continue;
|
||||||
|
|
||||||
range = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
|
range = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
|
||||||
if (!range) {
|
if (!range) {
|
||||||
goto err_rollback;
|
goto err_rollback;
|
||||||
@@ -735,7 +744,7 @@ int join_process_memory_range(struct process_vm *vm,
|
|||||||
memobj_release(merging->memobj);
|
memobj_release(merging->memobj);
|
||||||
}
|
}
|
||||||
list_del(&merging->list);
|
list_del(&merging->list);
|
||||||
ihk_mc_free(merging);
|
kfree(merging);
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
@@ -831,8 +840,9 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
|||||||
if (range->memobj) {
|
if (range->memobj) {
|
||||||
memobj_release(range->memobj);
|
memobj_release(range->memobj);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_del(&range->list);
|
list_del(&range->list);
|
||||||
ihk_mc_free(range);
|
kfree(range);
|
||||||
|
|
||||||
dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n",
|
dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n",
|
||||||
vm, start0, end0);
|
vm, start0, end0);
|
||||||
@@ -958,7 +968,6 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa
|
|||||||
return attr;
|
return attr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX: インデントを揃える必要がある */
|
|
||||||
int add_process_memory_range(struct process_vm *vm,
|
int add_process_memory_range(struct process_vm *vm,
|
||||||
unsigned long start, unsigned long end,
|
unsigned long start, unsigned long end,
|
||||||
unsigned long phys, unsigned long flag,
|
unsigned long phys, unsigned long flag,
|
||||||
@@ -1529,6 +1538,8 @@ retry:
|
|||||||
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate new page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate new page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
dkprintf("%s: clearing 0x%lx:%lu\n",
|
||||||
|
__FUNCTION__, pgaddr, pgsize);
|
||||||
memset(virt, 0, pgsize);
|
memset(virt, 0, pgsize);
|
||||||
phys = virt_to_phys(virt);
|
phys = virt_to_phys(virt);
|
||||||
page_map(phys_to_page(phys));
|
page_map(phys_to_page(phys));
|
||||||
@@ -1561,6 +1572,8 @@ retry:
|
|||||||
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate copy page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
dkprintf("%s: copying 0x%lx:%lu\n",
|
||||||
|
__FUNCTION__, pgaddr, pgsize);
|
||||||
memcpy(virt, phys_to_virt(phys), pgsize);
|
memcpy(virt, phys_to_virt(phys), pgsize);
|
||||||
|
|
||||||
phys = virt_to_phys(virt);
|
phys = virt_to_phys(virt);
|
||||||
@@ -1641,6 +1654,18 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
|
|||||||
"access denied. %d\n",
|
"access denied. %d\n",
|
||||||
ihk_mc_get_processor_id(), vm,
|
ihk_mc_get_processor_id(), vm,
|
||||||
fault_addr0, reason, error);
|
fault_addr0, reason, error);
|
||||||
|
kprintf("%s: reason: %s%s%s%s%s%s%s\n", __FUNCTION__,
|
||||||
|
(reason & PF_PROT) ? "PF_PROT " : "",
|
||||||
|
(reason & PF_WRITE) ? "PF_WRITE " : "",
|
||||||
|
(reason & PF_USER) ? "PF_USER " : "",
|
||||||
|
(reason & PF_RSVD) ? "PF_RSVD " : "",
|
||||||
|
(reason & PF_INSTR) ? "PF_INSTR " : "",
|
||||||
|
(reason & PF_PATCH) ? "PF_PATCH " : "",
|
||||||
|
(reason & PF_POPULATE) ? "PF_POPULATE " : "");
|
||||||
|
kprintf("%s: range->flag & (%s%s%s)\n", __FUNCTION__,
|
||||||
|
(range->flag & VR_PROT_READ) ? "VR_PROT_READ " : "",
|
||||||
|
(range->flag & VR_PROT_WRITE) ? "VR_PROT_WRITE " : "",
|
||||||
|
(range->flag & VR_PROT_EXEC) ? "VR_PROT_EXEC " : "");
|
||||||
if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE))
|
if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE))
|
||||||
kprintf("if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE))\n");
|
kprintf("if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE))\n");
|
||||||
if (((reason & PF_WRITE) && !(reason & PF_PATCH)))
|
if (((reason & PF_WRITE) && !(reason & PF_PATCH)))
|
||||||
@@ -1868,14 +1893,14 @@ unsigned long extend_process_region(struct process_vm *vm,
|
|||||||
aligned_end = (aligned_end + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
|
aligned_end = (aligned_end + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
|
||||||
/* Fill in the gap between old_aligned_end and aligned_end
|
/* Fill in the gap between old_aligned_end and aligned_end
|
||||||
* with regular pages */
|
* with regular pages */
|
||||||
if((p = allocate_pages((aligned_end - old_aligned_end) >> PAGE_SHIFT,
|
if((p = ihk_mc_alloc_pages((aligned_end - old_aligned_end) >> PAGE_SHIFT,
|
||||||
IHK_MC_AP_NOWAIT)) == NULL){
|
IHK_MC_AP_NOWAIT)) == NULL){
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
if((rc = add_process_memory_range(vm, old_aligned_end,
|
if((rc = add_process_memory_range(vm, old_aligned_end,
|
||||||
aligned_end, virt_to_phys(p), flag,
|
aligned_end, virt_to_phys(p), flag,
|
||||||
LARGE_PAGE_SHIFT)) != 0){
|
LARGE_PAGE_SHIFT)) != 0){
|
||||||
free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT);
|
ihk_mc_free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT);
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1888,7 +1913,7 @@ unsigned long extend_process_region(struct process_vm *vm,
|
|||||||
(LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
|
(LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
|
||||||
address = aligned_new_end;
|
address = aligned_new_end;
|
||||||
|
|
||||||
if((p = allocate_pages((aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT,
|
if((p = ihk_mc_alloc_pages((aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT,
|
||||||
IHK_MC_AP_NOWAIT)) == NULL){
|
IHK_MC_AP_NOWAIT)) == NULL){
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
@@ -1896,16 +1921,16 @@ unsigned long extend_process_region(struct process_vm *vm,
|
|||||||
p_aligned = ((unsigned long)p + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
|
p_aligned = ((unsigned long)p + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK;
|
||||||
|
|
||||||
if (p_aligned > (unsigned long)p) {
|
if (p_aligned > (unsigned long)p) {
|
||||||
free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT);
|
ihk_mc_free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
free_pages(
|
ihk_mc_free_pages(
|
||||||
(void *)(p_aligned + aligned_new_end - aligned_end),
|
(void *)(p_aligned + aligned_new_end - aligned_end),
|
||||||
(LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT);
|
(LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT);
|
||||||
|
|
||||||
if((rc = add_process_memory_range(vm, aligned_end,
|
if((rc = add_process_memory_range(vm, aligned_end,
|
||||||
aligned_new_end, virt_to_phys((void *)p_aligned),
|
aligned_new_end, virt_to_phys((void *)p_aligned),
|
||||||
flag, LARGE_PAGE_SHIFT)) != 0){
|
flag, LARGE_PAGE_SHIFT)) != 0){
|
||||||
free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT);
|
ihk_mc_free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT);
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1923,7 +1948,7 @@ unsigned long extend_process_region(struct process_vm *vm,
|
|||||||
p=0;
|
p=0;
|
||||||
}else{
|
}else{
|
||||||
|
|
||||||
p = allocate_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT);
|
p = ihk_mc_alloc_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, IHK_MC_AP_NOWAIT);
|
||||||
|
|
||||||
if (!p) {
|
if (!p) {
|
||||||
return end;
|
return end;
|
||||||
@@ -1932,7 +1957,7 @@ unsigned long extend_process_region(struct process_vm *vm,
|
|||||||
if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end,
|
if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end,
|
||||||
(p==0?0:virt_to_phys(p)), flag, NULL, 0,
|
(p==0?0:virt_to_phys(p)), flag, NULL, 0,
|
||||||
PAGE_SHIFT)) != 0){
|
PAGE_SHIFT)) != 0){
|
||||||
free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT);
|
ihk_mc_free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT);
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2045,6 +2070,7 @@ release_process(struct process *proc)
|
|||||||
mcs_rwlock_writer_unlock(&parent->children_lock, &lock);
|
mcs_rwlock_writer_unlock(&parent->children_lock, &lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (proc->tids) kfree(proc->tids);
|
||||||
kfree(proc);
|
kfree(proc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2150,6 +2176,23 @@ release_sigcommon(struct sig_common *sigcommon)
|
|||||||
kfree(sigcommon);
|
kfree(sigcommon);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release the TID from the process' TID set corresponding to this thread.
|
||||||
|
* NOTE: threads_lock must be held.
|
||||||
|
*/
|
||||||
|
void __release_tid(struct process *proc, struct thread *thread) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < proc->nr_tids; ++i) {
|
||||||
|
if (proc->tids[i].thread != thread) continue;
|
||||||
|
|
||||||
|
proc->tids[i].thread = NULL;
|
||||||
|
dkprintf("%s: tid %d has been released by %p\n",
|
||||||
|
__FUNCTION__, thread->tid, thread);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void destroy_thread(struct thread *thread)
|
void destroy_thread(struct thread *thread)
|
||||||
{
|
{
|
||||||
struct sig_pending *pending;
|
struct sig_pending *pending;
|
||||||
@@ -2166,6 +2209,7 @@ void destroy_thread(struct thread *thread)
|
|||||||
|
|
||||||
mcs_rwlock_writer_lock(&proc->threads_lock, &lock);
|
mcs_rwlock_writer_lock(&proc->threads_lock, &lock);
|
||||||
list_del(&thread->siblings_list);
|
list_del(&thread->siblings_list);
|
||||||
|
__release_tid(proc, thread);
|
||||||
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
|
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
|
||||||
|
|
||||||
cpu_clear(thread->cpu_id, &thread->vm->address_space->cpu_set,
|
cpu_clear(thread->cpu_id, &thread->vm->address_space->cpu_set,
|
||||||
@@ -2303,6 +2347,8 @@ static void idle(void)
|
|||||||
}
|
}
|
||||||
if (v->status == CPU_STATUS_IDLE ||
|
if (v->status == CPU_STATUS_IDLE ||
|
||||||
v->status == CPU_STATUS_RESERVED) {
|
v->status == CPU_STATUS_RESERVED) {
|
||||||
|
/* No work to do? Consolidate the kmalloc free list */
|
||||||
|
kmalloc_consolidate_free_list();
|
||||||
cpu_safe_halt();
|
cpu_safe_halt();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -2491,7 +2537,6 @@ static void do_migrate(void)
|
|||||||
cur_v->runq_len -= 1;
|
cur_v->runq_len -= 1;
|
||||||
old_cpu_id = req->thread->cpu_id;
|
old_cpu_id = req->thread->cpu_id;
|
||||||
req->thread->cpu_id = cpu_id;
|
req->thread->cpu_id = cpu_id;
|
||||||
settid(req->thread, 2, cpu_id, old_cpu_id);
|
|
||||||
list_add_tail(&req->thread->sched_list, &v->runq);
|
list_add_tail(&req->thread->sched_list, &v->runq);
|
||||||
v->runq_len += 1;
|
v->runq_len += 1;
|
||||||
|
|
||||||
@@ -2506,6 +2551,7 @@ static void do_migrate(void)
|
|||||||
v->flags |= CPU_FLAG_NEED_RESCHED;
|
v->flags |= CPU_FLAG_NEED_RESCHED;
|
||||||
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
|
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
|
||||||
double_rq_unlock(cur_v, v, irqstate);
|
double_rq_unlock(cur_v, v, irqstate);
|
||||||
|
//settid(req->thread, 2, cpu_id, old_cpu_id, 0, NULL);
|
||||||
|
|
||||||
ack:
|
ack:
|
||||||
waitq_wakeup(&req->wq);
|
waitq_wakeup(&req->wq);
|
||||||
@@ -2541,13 +2587,8 @@ void schedule(void)
|
|||||||
struct thread *last;
|
struct thread *last;
|
||||||
|
|
||||||
if (cpu_local_var(no_preempt)) {
|
if (cpu_local_var(no_preempt)) {
|
||||||
dkprintf("no schedule() while no preemption! \n");
|
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
|
||||||
return;
|
__FUNCTION__, cpu_local_var(no_preempt));
|
||||||
}
|
|
||||||
|
|
||||||
if (cpu_local_var(current)
|
|
||||||
&& cpu_local_var(current)->in_syscall_offload) {
|
|
||||||
dkprintf("no schedule() while syscall offload!\n");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2576,9 +2617,10 @@ redo:
|
|||||||
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
|
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
|
||||||
next = &cpu_local_var(idle);
|
next = &cpu_local_var(idle);
|
||||||
} else {
|
} else {
|
||||||
/* Pick a new running process */
|
/* Pick a new running process or one that has a pending signal */
|
||||||
list_for_each_entry_safe(thread, tmp, &(v->runq), sched_list) {
|
list_for_each_entry_safe(thread, tmp, &(v->runq), sched_list) {
|
||||||
if (thread->status == PS_RUNNING) {
|
if (thread->status == PS_RUNNING ||
|
||||||
|
(thread->status == PS_INTERRUPTIBLE && hassigpending(thread))) {
|
||||||
next = thread;
|
next = thread;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -2704,9 +2746,11 @@ sched_wakeup_thread(struct thread *thread, int valid_states)
|
|||||||
int spin_slept = 0;
|
int spin_slept = 0;
|
||||||
unsigned long irqstate;
|
unsigned long irqstate;
|
||||||
struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id);
|
struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id);
|
||||||
|
struct process *proc = thread->proc;
|
||||||
|
struct mcs_rwlock_node updatelock;
|
||||||
|
|
||||||
dkprintf("sched_wakeup_process,proc->pid=%d,valid_states=%08x,proc->status=%08x,proc->cpu_id=%d,my cpu_id=%d\n",
|
dkprintf("sched_wakeup_process,proc->pid=%d,valid_states=%08x,proc->status=%08x,proc->cpu_id=%d,my cpu_id=%d\n",
|
||||||
thread->proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id());
|
proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id());
|
||||||
|
|
||||||
irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock));
|
irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock));
|
||||||
if (thread->spin_sleep > 0) {
|
if (thread->spin_sleep > 0) {
|
||||||
@@ -2726,7 +2770,10 @@ sched_wakeup_thread(struct thread *thread, int valid_states)
|
|||||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||||
|
|
||||||
if (thread->status & valid_states) {
|
if (thread->status & valid_states) {
|
||||||
xchg4((int *)(&thread->proc->status), PS_RUNNING);
|
mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock);
|
||||||
|
if(proc->status != PS_EXITED)
|
||||||
|
proc->status = PS_RUNNING;
|
||||||
|
mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock);
|
||||||
xchg4((int *)(&thread->status), PS_RUNNING);
|
xchg4((int *)(&thread->status), PS_RUNNING);
|
||||||
status = 0;
|
status = 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -281,6 +281,13 @@ process_procfs_request(unsigned long rarg)
|
|||||||
ans = -EIO;
|
ans = -EIO;
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) ||
|
||||||
|
pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) {
|
||||||
|
ans = -EIO;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
va = phys_to_virt(pa);
|
va = phys_to_virt(pa);
|
||||||
if(readwrite)
|
if(readwrite)
|
||||||
memcpy(va, buf + ans, size);
|
memcpy(va, buf + ans, size);
|
||||||
|
|||||||
515
kernel/syscall.c
515
kernel/syscall.c
@@ -105,7 +105,6 @@ static void calculate_time_from_tsc(struct timespec *ts);
|
|||||||
void check_signal(unsigned long, void *, int);
|
void check_signal(unsigned long, void *, int);
|
||||||
void do_signal(long rc, void *regs, struct thread *thread, struct sig_pending *pending, int num);
|
void do_signal(long rc, void *regs, struct thread *thread, struct sig_pending *pending, int num);
|
||||||
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
|
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
|
||||||
extern struct sigpending *hassigpending(struct thread *thread);
|
|
||||||
extern long alloc_debugreg(struct thread *thread);
|
extern long alloc_debugreg(struct thread *thread);
|
||||||
extern int num_processors;
|
extern int num_processors;
|
||||||
extern unsigned long ihk_mc_get_ns_per_tsc(void);
|
extern unsigned long ihk_mc_get_ns_per_tsc(void);
|
||||||
@@ -128,11 +127,9 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
|||||||
static void do_mod_exit(int status);
|
static void do_mod_exit(int status);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void send_syscall(struct syscall_request *req, int cpu, int pid)
|
static void send_syscall(struct syscall_request *req, int cpu, int pid, struct syscall_response *res)
|
||||||
{
|
{
|
||||||
struct ikc_scd_packet packet;
|
struct ikc_scd_packet packet IHK_DMA_ALIGN;
|
||||||
struct syscall_response *res;
|
|
||||||
struct syscall_params *scp;
|
|
||||||
struct ihk_ikc_channel_desc *syscall_channel;
|
struct ihk_ikc_channel_desc *syscall_channel;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@@ -141,7 +138,6 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
req->number == __NR_kill){ // interrupt syscall
|
req->number == __NR_kill){ // interrupt syscall
|
||||||
extern int num_processors;
|
extern int num_processors;
|
||||||
|
|
||||||
scp = &get_cpu_local_var(0)->scp2;
|
|
||||||
syscall_channel = get_cpu_local_var(0)->syscall_channel2;
|
syscall_channel = get_cpu_local_var(0)->syscall_channel2;
|
||||||
|
|
||||||
/* XXX: is this really going to work if multiple processes
|
/* XXX: is this really going to work if multiple processes
|
||||||
@@ -153,34 +149,22 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
pid = req->args[1];
|
pid = req->args[1];
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
scp = &get_cpu_local_var(cpu)->scp;
|
|
||||||
syscall_channel = get_cpu_local_var(cpu)->syscall_channel;
|
syscall_channel = get_cpu_local_var(cpu)->syscall_channel;
|
||||||
}
|
}
|
||||||
res = scp->response_va;
|
|
||||||
|
|
||||||
res->status = 0;
|
res->status = 0;
|
||||||
req->valid = 0;
|
req->valid = 0;
|
||||||
|
|
||||||
#ifdef USE_DMA
|
memcpy(&packet.req, req, sizeof(*req));
|
||||||
memcpy_async(scp->request_pa,
|
|
||||||
virt_to_phys(req), sizeof(*req), 0, &fin);
|
|
||||||
|
|
||||||
memcpy_async_wait(&scp->post_fin);
|
|
||||||
scp->post_va->v[0] = scp->post_idx;
|
|
||||||
memcpy_async_wait(&fin);
|
|
||||||
#else
|
|
||||||
memcpy(scp->request_va, req, sizeof(*req));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
scp->request_va->valid = 1;
|
packet.req.valid = 1;
|
||||||
*(unsigned int *)scp->doorbell_va = cpu + 1;
|
|
||||||
|
|
||||||
#ifdef SYSCALL_BY_IKC
|
#ifdef SYSCALL_BY_IKC
|
||||||
packet.msg = SCD_MSG_SYSCALL_ONESIDE;
|
packet.msg = SCD_MSG_SYSCALL_ONESIDE;
|
||||||
packet.ref = cpu;
|
packet.ref = cpu;
|
||||||
packet.pid = pid ? pid : cpu_local_var(current)->proc->pid;
|
packet.pid = pid ? pid : cpu_local_var(current)->proc->pid;
|
||||||
packet.arg = scp->request_rpa;
|
packet.resp_pa = virt_to_phys(res);
|
||||||
dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid);
|
dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid);
|
||||||
|
|
||||||
ret = ihk_ikc_send(syscall_channel, &packet, 0);
|
ret = ihk_ikc_send(syscall_channel, &packet, 0);
|
||||||
@@ -194,9 +178,8 @@ ihk_spinlock_t syscall_lock;
|
|||||||
|
|
||||||
long do_syscall(struct syscall_request *req, int cpu, int pid)
|
long do_syscall(struct syscall_request *req, int cpu, int pid)
|
||||||
{
|
{
|
||||||
struct syscall_response *res;
|
struct syscall_response res;
|
||||||
struct syscall_request req2 IHK_DMA_ALIGN;
|
struct syscall_request req2 IHK_DMA_ALIGN;
|
||||||
struct syscall_params *scp;
|
|
||||||
int error;
|
int error;
|
||||||
long rc;
|
long rc;
|
||||||
int islock = 0;
|
int islock = 0;
|
||||||
@@ -207,6 +190,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
dkprintf("SC(%d)[%3d] sending syscall\n",
|
dkprintf("SC(%d)[%3d] sending syscall\n",
|
||||||
ihk_mc_get_processor_id(),
|
ihk_mc_get_processor_id(),
|
||||||
req->number);
|
req->number);
|
||||||
|
|
||||||
|
irqstate = 0; /* for avoidance of warning */
|
||||||
|
barrier();
|
||||||
|
|
||||||
if(req->number != __NR_exit_group){
|
if(req->number != __NR_exit_group){
|
||||||
if(proc->nohost && // host is down
|
if(proc->nohost && // host is down
|
||||||
@@ -216,55 +202,102 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
++thread->in_syscall_offload;
|
++thread->in_syscall_offload;
|
||||||
}
|
}
|
||||||
|
|
||||||
irqstate = 0; /* for avoidance of warning */
|
|
||||||
if(req->number == __NR_exit_group ||
|
if(req->number == __NR_exit_group ||
|
||||||
req->number == __NR_gettid ||
|
req->number == __NR_gettid ||
|
||||||
req->number == __NR_kill){ // interrupt syscall
|
req->number == __NR_kill){ // interrupt syscall
|
||||||
scp = &get_cpu_local_var(0)->scp2;
|
|
||||||
islock = 1;
|
islock = 1;
|
||||||
irqstate = ihk_mc_spinlock_lock(&syscall_lock);
|
irqstate = ihk_mc_spinlock_lock(&syscall_lock);
|
||||||
}
|
}
|
||||||
else{
|
/* The current thread is the requester and any thread from
|
||||||
scp = &get_cpu_local_var(cpu)->scp;
|
* the pool may serve the request */
|
||||||
}
|
req->rtid = cpu_local_var(current)->tid;
|
||||||
res = scp->response_va;
|
req->ttid = 0;
|
||||||
|
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
|
||||||
|
send_syscall(req, cpu, pid, &res);
|
||||||
|
|
||||||
send_syscall(req, cpu, pid);
|
dkprintf("%s: syscall num: %d waiting for Linux.. \n",
|
||||||
|
__FUNCTION__, req->number);
|
||||||
dkprintf("SC(%d)[%3d] waiting for host.. \n",
|
|
||||||
ihk_mc_get_processor_id(),
|
|
||||||
req->number);
|
|
||||||
|
|
||||||
#define STATUS_IN_PROGRESS 0
|
#define STATUS_IN_PROGRESS 0
|
||||||
#define STATUS_COMPLETED 1
|
#define STATUS_COMPLETED 1
|
||||||
#define STATUS_PAGE_FAULT 3
|
#define STATUS_PAGE_FAULT 3
|
||||||
while (res->status != STATUS_COMPLETED) {
|
while (res.status != STATUS_COMPLETED) {
|
||||||
while (res->status == STATUS_IN_PROGRESS) {
|
while (res.status == STATUS_IN_PROGRESS) {
|
||||||
|
struct cpu_local_var *v;
|
||||||
|
int do_schedule = 0;
|
||||||
|
long runq_irqstate;
|
||||||
|
unsigned long flags;
|
||||||
|
DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current));
|
||||||
|
|
||||||
cpu_pause();
|
cpu_pause();
|
||||||
|
|
||||||
|
/* Spin if not preemptable */
|
||||||
|
if (cpu_local_var(no_preempt) || !thread->tid) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Spin by default, but if re-schedule is requested let
|
||||||
|
* the other thread run */
|
||||||
|
runq_irqstate =
|
||||||
|
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||||
|
v = get_this_cpu_local_var();
|
||||||
|
|
||||||
|
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
||||||
|
do_schedule = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
||||||
|
|
||||||
|
if (!do_schedule) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
flags = cpu_disable_interrupt_save();
|
||||||
|
|
||||||
|
/* Try to sleep until notified */
|
||||||
|
if (__sync_bool_compare_and_swap(&res.req_thread_status,
|
||||||
|
IHK_SCD_REQ_THREAD_SPINNING,
|
||||||
|
IHK_SCD_REQ_THREAD_DESCHEDULED)) {
|
||||||
|
|
||||||
|
dkprintf("%s: tid %d waiting for syscall reply...\n",
|
||||||
|
__FUNCTION__, thread->tid);
|
||||||
|
waitq_init(&thread->scd_wq);
|
||||||
|
waitq_prepare_to_wait(&thread->scd_wq, &scd_wq_entry,
|
||||||
|
PS_INTERRUPTIBLE);
|
||||||
|
cpu_restore_interrupt(flags);
|
||||||
|
schedule();
|
||||||
|
waitq_finish_wait(&thread->scd_wq, &scd_wq_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu_restore_interrupt(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (res->status == STATUS_PAGE_FAULT) {
|
if (res.status == STATUS_PAGE_FAULT) {
|
||||||
dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n",
|
dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n",
|
||||||
cpu_local_var(current)->proc->pid);
|
cpu_local_var(current)->proc->pid);
|
||||||
error = page_fault_process_vm(thread->vm,
|
error = page_fault_process_vm(thread->vm,
|
||||||
(void *)res->fault_address,
|
(void *)res.fault_address,
|
||||||
res->fault_reason|PF_POPULATE);
|
res.fault_reason|PF_POPULATE);
|
||||||
|
|
||||||
/* send result */
|
/* send result */
|
||||||
req2.number = __NR_mmap;
|
req2.number = __NR_mmap;
|
||||||
#define PAGER_RESUME_PAGE_FAULT 0x0101
|
#define PAGER_RESUME_PAGE_FAULT 0x0101
|
||||||
req2.args[0] = PAGER_RESUME_PAGE_FAULT;
|
req2.args[0] = PAGER_RESUME_PAGE_FAULT;
|
||||||
req2.args[1] = error;
|
req2.args[1] = error;
|
||||||
|
/* The current thread is the requester and only the waiting thread
|
||||||
|
* may serve the request */
|
||||||
|
req2.rtid = cpu_local_var(current)->tid;
|
||||||
|
req2.ttid = res.stid;
|
||||||
|
|
||||||
send_syscall(&req2, cpu, pid);
|
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
|
||||||
|
send_syscall(&req2, cpu, pid, &res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dkprintf("SC(%d)[%3d] got host reply: %d \n",
|
dkprintf("%s: syscall num: %d got host reply: %d \n",
|
||||||
ihk_mc_get_processor_id(),
|
__FUNCTION__, req->number, res.ret);
|
||||||
req->number, res->ret);
|
|
||||||
|
|
||||||
rc = res->ret;
|
rc = res.ret;
|
||||||
if(islock){
|
if(islock){
|
||||||
ihk_mc_spinlock_unlock(&syscall_lock, irqstate);
|
ihk_mc_spinlock_unlock(&syscall_lock, irqstate);
|
||||||
}
|
}
|
||||||
@@ -795,7 +828,8 @@ terminate(int rc, int sig)
|
|||||||
release_thread(mythread);
|
release_thread(mythread);
|
||||||
release_process_vm(vm);
|
release_process_vm(vm);
|
||||||
schedule();
|
schedule();
|
||||||
// no return
|
kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__);
|
||||||
|
panic("panic");
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -813,14 +847,15 @@ terminate_host(int pid)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
interrupt_syscall(int pid, int cpuid)
|
interrupt_syscall(int pid, int tid)
|
||||||
{
|
{
|
||||||
dkprintf("interrupt_syscall,target pid=%d,target cpuid=%d\n", pid, cpuid);
|
dkprintf("interrupt_syscall,target pid=%d,target tid=%d\n", pid, tid);
|
||||||
ihk_mc_user_context_t ctx;
|
ihk_mc_user_context_t ctx;
|
||||||
long lerror;
|
long lerror;
|
||||||
|
|
||||||
|
kprintf("interrupt_syscall pid=%d tid=%d\n", pid, tid);
|
||||||
ihk_mc_syscall_arg0(&ctx) = pid;
|
ihk_mc_syscall_arg0(&ctx) = pid;
|
||||||
ihk_mc_syscall_arg1(&ctx) = cpuid;
|
ihk_mc_syscall_arg1(&ctx) = tid;
|
||||||
|
|
||||||
lerror = syscall_generic_forwarding(__NR_kill, &ctx);
|
lerror = syscall_generic_forwarding(__NR_kill, &ctx);
|
||||||
if (lerror) {
|
if (lerror) {
|
||||||
@@ -883,8 +918,6 @@ static int do_munmap(void *addr, size_t len)
|
|||||||
begin_free_pages_pending();
|
begin_free_pages_pending();
|
||||||
error = remove_process_memory_range(cpu_local_var(current)->vm,
|
error = remove_process_memory_range(cpu_local_var(current)->vm,
|
||||||
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
|
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
|
||||||
// XXX: TLB flush
|
|
||||||
flush_tlb();
|
|
||||||
if (error || !ro_freed) {
|
if (error || !ro_freed) {
|
||||||
clear_host_pte((uintptr_t)addr, len);
|
clear_host_pte((uintptr_t)addr, len);
|
||||||
}
|
}
|
||||||
@@ -896,6 +929,8 @@ static int do_munmap(void *addr, size_t len)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
finish_free_pages_pending();
|
finish_free_pages_pending();
|
||||||
|
dkprintf("%s: 0x%lx:%lu, error: %ld\n",
|
||||||
|
__FUNCTION__, addr, len, error);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1043,31 +1078,29 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
|||||||
vrflags |= PROT_TO_VR_FLAG(prot);
|
vrflags |= PROT_TO_VR_FLAG(prot);
|
||||||
vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0;
|
vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0;
|
||||||
vrflags |= (flags & MAP_LOCKED)? VR_LOCKED: 0;
|
vrflags |= (flags & MAP_LOCKED)? VR_LOCKED: 0;
|
||||||
|
vrflags |= VR_DEMAND_PAGING;
|
||||||
if (flags & MAP_ANONYMOUS) {
|
if (flags & MAP_ANONYMOUS) {
|
||||||
if (0) {
|
if (!anon_on_demand) {
|
||||||
/* dummy */
|
populated_mapping = 1;
|
||||||
}
|
}
|
||||||
#ifdef USE_NOCACHE_MMAP
|
#ifdef USE_NOCACHE_MMAP
|
||||||
#define X_MAP_NOCACHE MAP_32BIT
|
#define X_MAP_NOCACHE MAP_32BIT
|
||||||
else if (flags & X_MAP_NOCACHE) {
|
else if (flags & X_MAP_NOCACHE) {
|
||||||
|
vrflags &= ~VR_DEMAND_PAGING;
|
||||||
vrflags |= VR_IO_NOCACHE;
|
vrflags |= VR_IO_NOCACHE;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
else {
|
|
||||||
vrflags |= VR_DEMAND_PAGING;
|
|
||||||
if (!anon_on_demand) {
|
|
||||||
populated_mapping = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
vrflags |= VR_DEMAND_PAGING;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & (MAP_POPULATE | MAP_LOCKED)) {
|
if (flags & (MAP_POPULATE | MAP_LOCKED)) {
|
||||||
populated_mapping = 1;
|
populated_mapping = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* XXX: Intel MPI 128MB mapping.. */
|
||||||
|
if (len == 134217728) {
|
||||||
|
populated_mapping = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!(prot & PROT_WRITE)) {
|
if (!(prot & PROT_WRITE)) {
|
||||||
error = set_host_vma(addr, len, PROT_READ);
|
error = set_host_vma(addr, len, PROT_READ);
|
||||||
if (error) {
|
if (error) {
|
||||||
@@ -1097,7 +1130,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (error == -ESRCH) {
|
if (error == -ESRCH) {
|
||||||
kprintf("do_mmap:hit non VREG\n");
|
dkprintf("do_mmap:hit non VREG\n");
|
||||||
/*
|
/*
|
||||||
* XXX: temporary:
|
* XXX: temporary:
|
||||||
*
|
*
|
||||||
@@ -1108,10 +1141,17 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
|||||||
vrflags &= ~VR_MEMTYPE_MASK;
|
vrflags &= ~VR_MEMTYPE_MASK;
|
||||||
vrflags |= VR_MEMTYPE_UC;
|
vrflags |= VR_MEMTYPE_UC;
|
||||||
}
|
}
|
||||||
error = devobj_create(fd, len, off, &memobj, &maxprot);
|
error = devobj_create(fd, len, off, &memobj, &maxprot,
|
||||||
|
prot, (flags & (MAP_POPULATE | MAP_LOCKED)));
|
||||||
|
|
||||||
|
if (!error) {
|
||||||
|
dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n",
|
||||||
|
__FUNCTION__, fd, off, addr, addr + len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("do_mmap:fileobj_create failed. %d\n", error);
|
kprintf("%s: error: file mapping failed, fd: %d, error: %d\n",
|
||||||
|
__FUNCTION__, error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1125,6 +1165,8 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
|||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
dkprintf("%s: 0x%x:%lu allocated %d pages, p2align: %lx\n",
|
||||||
|
__FUNCTION__, addr, len, npages, p2align);
|
||||||
phys = virt_to_phys(p);
|
phys = virt_to_phys(p);
|
||||||
}
|
}
|
||||||
else if (flags & MAP_SHARED) {
|
else if (flags & MAP_SHARED) {
|
||||||
@@ -1160,10 +1202,10 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
|||||||
error = add_process_memory_range(thread->vm, addr, addr+len, phys,
|
error = add_process_memory_range(thread->vm, addr, addr+len, phys,
|
||||||
vrflags, memobj, off, pgshift);
|
vrflags, memobj, off, pgshift);
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("do_mmap:add_process_memory_range"
|
kprintf("%s: add_process_memory_range failed for 0x%lx:%lu"
|
||||||
"(%p,%lx,%lx,%lx,%lx,%d) failed %d\n",
|
" flags: %lx, vrflags: %lx, pgshift: %d, error: %d\n",
|
||||||
thread->vm, addr, addr+len,
|
__FUNCTION__, addr, addr+len,
|
||||||
virt_to_phys(p), vrflags, pgshift, error);
|
flags, vrflags, pgshift, error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1181,9 +1223,12 @@ out:
|
|||||||
if (!error && populated_mapping) {
|
if (!error && populated_mapping) {
|
||||||
error = populate_process_memory(thread->vm, (void *)addr, len);
|
error = populate_process_memory(thread->vm, (void *)addr, len);
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("do_mmap:populate_process_memory"
|
ekprintf("%s: error :populate_process_memory"
|
||||||
"(%p,%p,%lx) failed %d\n",
|
"vm: %p, addr: %p, len: %d (flags: %s%s) failed %d\n", __FUNCTION__,
|
||||||
thread->vm, (void *)addr, len, error);
|
thread->vm, (void *)addr, len,
|
||||||
|
(flags & MAP_POPULATE) ? "MAP_POPULATE " : "",
|
||||||
|
(flags & MAP_LOCKED) ? "MAP_LOCKED ": "",
|
||||||
|
error);
|
||||||
/*
|
/*
|
||||||
* In this case,
|
* In this case,
|
||||||
* the mapping established by this call should be unmapped
|
* the mapping established by this call should be unmapped
|
||||||
@@ -1206,8 +1251,12 @@ out:
|
|||||||
if (memobj) {
|
if (memobj) {
|
||||||
memobj_release(memobj);
|
memobj_release(memobj);
|
||||||
}
|
}
|
||||||
dkprintf("do_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n",
|
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
|
||||||
addr0, len0, prot, flags, fd, off0, error, addr);
|
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
|
||||||
|
__FUNCTION__,
|
||||||
|
addr, len, addr0, len0, prot, flags,
|
||||||
|
fd, off0, error, addr);
|
||||||
|
|
||||||
return (!error)? addr: error;
|
return (!error)? addr: error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1438,8 +1487,8 @@ SYSCALL_DECLARE(getppid)
|
|||||||
return thread->proc->ppid_parent->pid;
|
return thread->proc->ppid_parent->pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid,
|
||||||
settid(struct thread *thread, int mode, int newcpuid, int oldcpuid)
|
int nr_tids, int *tids)
|
||||||
{
|
{
|
||||||
struct syscall_request request IHK_DMA_ALIGN;
|
struct syscall_request request IHK_DMA_ALIGN;
|
||||||
unsigned long rc;
|
unsigned long rc;
|
||||||
@@ -1449,6 +1498,12 @@ settid(struct thread *thread, int mode, int newcpuid, int oldcpuid)
|
|||||||
request.args[1] = thread->proc->pid;
|
request.args[1] = thread->proc->pid;
|
||||||
request.args[2] = newcpuid;
|
request.args[2] = newcpuid;
|
||||||
request.args[3] = oldcpuid;
|
request.args[3] = oldcpuid;
|
||||||
|
/*
|
||||||
|
* If nr_tids is non-zero, tids should point to an array of ints
|
||||||
|
* where the thread ids of the mcexec process are expected.
|
||||||
|
*/
|
||||||
|
request.args[4] = nr_tids;
|
||||||
|
request.args[5] = virt_to_phys(tids);
|
||||||
rc = do_syscall(&request, ihk_mc_get_processor_id(), thread->proc->pid);
|
rc = do_syscall(&request, ihk_mc_get_processor_id(), thread->proc->pid);
|
||||||
if (mode != 2) {
|
if (mode != 2) {
|
||||||
thread->tid = rc;
|
thread->tid = rc;
|
||||||
@@ -1853,7 +1908,61 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
|
|||||||
&new->vm->address_space->cpu_set_lock);
|
&new->vm->address_space->cpu_set_lock);
|
||||||
|
|
||||||
if (clone_flags & CLONE_VM) {
|
if (clone_flags & CLONE_VM) {
|
||||||
settid(new, 1, cpuid, -1);
|
int *tids = NULL;
|
||||||
|
int i;
|
||||||
|
struct mcs_rwlock_node_irqsave lock;
|
||||||
|
|
||||||
|
mcs_rwlock_writer_lock(&newproc->threads_lock, &lock);
|
||||||
|
/* Obtain mcexec TIDs if not known yet */
|
||||||
|
if (!newproc->nr_tids) {
|
||||||
|
tids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT);
|
||||||
|
if (!tids) {
|
||||||
|
mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock);
|
||||||
|
release_cpuid(cpuid);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
newproc->tids = kmalloc(sizeof(struct mcexec_tid) * num_processors, IHK_MC_AP_NOWAIT);
|
||||||
|
if (!newproc->tids) {
|
||||||
|
mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock);
|
||||||
|
kfree(tids);
|
||||||
|
release_cpuid(cpuid);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
settid(new, 1, cpuid, -1, num_processors, tids);
|
||||||
|
|
||||||
|
for (i = 0; (i < num_processors) && tids[i]; ++i) {
|
||||||
|
dkprintf("%s: tid[%d]: %d\n", __FUNCTION__, i, tids[i]);
|
||||||
|
newproc->tids[i].tid = tids[i];
|
||||||
|
newproc->tids[i].thread = NULL;
|
||||||
|
++newproc->nr_tids;
|
||||||
|
}
|
||||||
|
|
||||||
|
kfree(tids);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find an unused TID */
|
||||||
|
retry_tid:
|
||||||
|
for (i = 0; i < newproc->nr_tids; ++i) {
|
||||||
|
if (!newproc->tids[i].thread) {
|
||||||
|
if (!__sync_bool_compare_and_swap(
|
||||||
|
&newproc->tids[i].thread, NULL, new)) {
|
||||||
|
goto retry_tid;
|
||||||
|
}
|
||||||
|
new->tid = newproc->tids[i].tid;
|
||||||
|
dkprintf("%s: tid %d assigned to %p\n", __FUNCTION__, new->tid, new);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: spawn more mcexec threads */
|
||||||
|
if (!new->tid) {
|
||||||
|
kprintf("%s: no more TIDs available\n");
|
||||||
|
panic("");
|
||||||
|
}
|
||||||
|
|
||||||
|
mcs_rwlock_writer_unlock(&newproc->threads_lock, &lock);
|
||||||
}
|
}
|
||||||
/* fork() a new process on the host */
|
/* fork() a new process on the host */
|
||||||
else {
|
else {
|
||||||
@@ -1873,7 +1982,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* In a single threaded process TID equals to PID */
|
/* In a single threaded process TID equals to PID */
|
||||||
settid(new, 0, cpuid, -1);
|
new->tid = newproc->pid;
|
||||||
new->vm->address_space->pids[0] = new->proc->pid;
|
new->vm->address_space->pids[0] = new->proc->pid;
|
||||||
|
|
||||||
dkprintf("fork(): new pid: %d\n", new->proc->pid);
|
dkprintf("fork(): new pid: %d\n", new->proc->pid);
|
||||||
@@ -2242,9 +2351,8 @@ SYSCALL_DECLARE(setfsgid)
|
|||||||
unsigned long newfsgid;
|
unsigned long newfsgid;
|
||||||
struct syscall_request request IHK_DMA_ALIGN;
|
struct syscall_request request IHK_DMA_ALIGN;
|
||||||
|
|
||||||
request.number = __NR_setfsuid;
|
request.number = __NR_setfsgid;
|
||||||
request.args[0] = fsgid;
|
request.args[0] = fsgid;
|
||||||
request.args[1] = 0;
|
|
||||||
newfsgid = do_syscall(&request, ihk_mc_get_processor_id(), 0);
|
newfsgid = do_syscall(&request, ihk_mc_get_processor_id(), 0);
|
||||||
do_setresgid();
|
do_setresgid();
|
||||||
return newfsgid;
|
return newfsgid;
|
||||||
@@ -2470,6 +2578,31 @@ SYSCALL_DECLARE(close)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SYSCALL_DECLARE(fcntl)
|
||||||
|
{
|
||||||
|
int fd = ihk_mc_syscall_arg0(ctx);
|
||||||
|
// int cmd = ihk_mc_syscall_arg1(ctx);
|
||||||
|
long rc;
|
||||||
|
struct thread *thread = cpu_local_var(current);
|
||||||
|
struct process *proc = thread->proc;
|
||||||
|
struct mckfd *fdp;
|
||||||
|
long irqstate;
|
||||||
|
|
||||||
|
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||||
|
for(fdp = proc->mckfd; fdp; fdp = fdp->next)
|
||||||
|
if(fdp->fd == fd)
|
||||||
|
break;
|
||||||
|
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||||
|
|
||||||
|
if(fdp && fdp->fcntl_cb){
|
||||||
|
rc = fdp->fcntl_cb(fdp, ctx);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
rc = syscall_generic_forwarding(__NR_fcntl, ctx);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
SYSCALL_DECLARE(rt_sigprocmask)
|
SYSCALL_DECLARE(rt_sigprocmask)
|
||||||
{
|
{
|
||||||
int how = ihk_mc_syscall_arg0(ctx);
|
int how = ihk_mc_syscall_arg0(ctx);
|
||||||
@@ -2621,26 +2754,12 @@ perf_counter_alloc(struct mc_perf_event *event)
|
|||||||
struct perf_event_attr *attr = &event->attr;
|
struct perf_event_attr *attr = &event->attr;
|
||||||
struct mc_perf_event *leader = event->group_leader;
|
struct mc_perf_event *leader = event->group_leader;
|
||||||
|
|
||||||
if(attr->type == PERF_TYPE_HARDWARE) {
|
ret = ihk_mc_perfctr_alloc_counter(&attr->type, &attr->config, leader->pmc_status);
|
||||||
|
|
||||||
event->counter_id = ihk_mc_perfctr_alloc_counter(leader->pmc_status);
|
|
||||||
|
|
||||||
} else if(attr->type == PERF_TYPE_RAW) {
|
|
||||||
// PAPI_REF_CYC counted by fixed counter
|
|
||||||
if((attr->config & 0x0000ffff) == 0x00000300) {
|
|
||||||
event->counter_id = 2 + X86_IA32_BASE_FIXED_PERF_COUNTERS;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
event->counter_id = ihk_mc_perfctr_alloc_counter(leader->pmc_status);
|
|
||||||
} else {
|
|
||||||
// Not supported type.
|
|
||||||
ret = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(ret >= 0) {
|
if(ret >= 0) {
|
||||||
leader->pmc_status |= 1UL << event->counter_id;
|
leader->pmc_status |= 1UL << ret;
|
||||||
}
|
}
|
||||||
|
event->counter_id = ret;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -2649,7 +2768,6 @@ int
|
|||||||
perf_counter_start(struct mc_perf_event *event)
|
perf_counter_start(struct mc_perf_event *event)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
enum ihk_perfctr_type type;
|
|
||||||
struct perf_event_attr *attr = &event->attr;
|
struct perf_event_attr *attr = &event->attr;
|
||||||
int mode = 0x00;
|
int mode = 0x00;
|
||||||
|
|
||||||
@@ -2660,52 +2778,34 @@ perf_counter_start(struct mc_perf_event *event)
|
|||||||
mode |= PERFCTR_USER_MODE;
|
mode |= PERFCTR_USER_MODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(attr->type == PERF_TYPE_HARDWARE) {
|
if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) {
|
||||||
switch(attr->config){
|
|
||||||
case PERF_COUNT_HW_CPU_CYCLES :
|
|
||||||
type = APT_TYPE_CYCLE;
|
|
||||||
break;
|
|
||||||
case PERF_COUNT_HW_INSTRUCTIONS :
|
|
||||||
type = APT_TYPE_INSTRUCTIONS;
|
|
||||||
break;
|
|
||||||
default :
|
|
||||||
// Not supported config.
|
|
||||||
type = PERFCTR_MAX_TYPE;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = ihk_mc_perfctr_init(event->counter_id, type, mode);
|
|
||||||
ihk_mc_perfctr_set(event->counter_id, event->sample_freq * -1);
|
|
||||||
ihk_mc_perfctr_start(1UL << event->counter_id);
|
|
||||||
|
|
||||||
} else if(attr->type == PERF_TYPE_RAW) {
|
|
||||||
// PAPI_REF_CYC counted by fixed counter
|
|
||||||
if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS) {
|
|
||||||
ret = ihk_mc_perfctr_fixed_init(event->counter_id, mode);
|
|
||||||
ihk_mc_perfctr_set(event->counter_id, event->sample_freq * -1);
|
|
||||||
ihk_mc_perfctr_start(1UL << event->counter_id);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = ihk_mc_perfctr_init_raw(event->counter_id, attr->config, mode);
|
ret = ihk_mc_perfctr_init_raw(event->counter_id, attr->config, mode);
|
||||||
ihk_mc_perfctr_set(event->counter_id, event->sample_freq * -1);
|
|
||||||
ihk_mc_perfctr_start(1UL << event->counter_id);
|
ihk_mc_perfctr_start(1UL << event->counter_id);
|
||||||
} else {
|
}
|
||||||
// Not supported type.
|
else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS &&
|
||||||
|
event->counter_id < X86_IA32_BASE_FIXED_PERF_COUNTERS + X86_IA32_NUM_FIXED_PERF_COUNTERS) {
|
||||||
|
ret = ihk_mc_perfctr_fixed_init(event->counter_id, mode);
|
||||||
|
ihk_mc_perfctr_start(1UL << event->counter_id);
|
||||||
|
}
|
||||||
|
else {
|
||||||
ret = -1;
|
ret = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long perf_event_read_value(struct mc_perf_event *event)
|
unsigned long perf_event_read_value(struct mc_perf_event *event)
|
||||||
{
|
{
|
||||||
unsigned long rtn_count = 0;
|
unsigned long rtn_count = 0;
|
||||||
|
unsigned long pmc_count = 0;
|
||||||
int counter_id = event->counter_id;
|
int counter_id = event->counter_id;
|
||||||
|
|
||||||
if(event->pid == 0)
|
if(event->pid == 0) {
|
||||||
event->count = ihk_mc_perfctr_read(counter_id);
|
pmc_count = ihk_mc_perfctr_read(counter_id) + event->attr.sample_freq;
|
||||||
|
pmc_count &= 0x000000ffffffffffL; // 40bit MASK
|
||||||
|
}
|
||||||
|
|
||||||
rtn_count += event->count;
|
rtn_count += event->count + pmc_count;
|
||||||
|
|
||||||
if(event->attr.inherit)
|
if(event->attr.inherit)
|
||||||
rtn_count += event->child_count_total;
|
rtn_count += event->child_count_total;
|
||||||
@@ -2922,11 +3022,21 @@ perf_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
|||||||
break;
|
break;
|
||||||
case PERF_EVENT_IOC_RESET:
|
case PERF_EVENT_IOC_RESET:
|
||||||
// TODO: reset other process
|
// TODO: reset other process
|
||||||
ihk_mc_perfctr_reset(counter_id);
|
ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1);
|
||||||
|
event->count = 0L;
|
||||||
break;
|
break;
|
||||||
case PERF_EVENT_IOC_REFRESH:
|
case PERF_EVENT_IOC_REFRESH:
|
||||||
// TODO: refresh other process
|
// TODO: refresh other process
|
||||||
ihk_mc_perfctr_set(counter_id, event->sample_freq * -1);
|
|
||||||
|
// not supported on inherited events
|
||||||
|
if(event->attr.inherit)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
event->count += event->attr.sample_freq;
|
||||||
|
ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1);
|
||||||
|
|
||||||
|
perf_start(event);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
default :
|
default :
|
||||||
return -1;
|
return -1;
|
||||||
@@ -2945,6 +3055,28 @@ perf_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
perf_fcntl(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
||||||
|
{
|
||||||
|
int cmd = ihk_mc_syscall_arg1(ctx);
|
||||||
|
long arg = ihk_mc_syscall_arg2(ctx);
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
switch(cmd) {
|
||||||
|
case 10: // F_SETSIG
|
||||||
|
sfd->sig_no = arg;
|
||||||
|
break;
|
||||||
|
case 0xf: // F_SETOWN_EX
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = syscall_generic_forwarding(__NR_fcntl, ctx);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
static long
|
static long
|
||||||
perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
||||||
{
|
{
|
||||||
@@ -2963,6 +3095,7 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
|||||||
|
|
||||||
// setup perf_event_mmap_page
|
// setup perf_event_mmap_page
|
||||||
page = (struct perf_event_mmap_page *)rc;
|
page = (struct perf_event_mmap_page *)rc;
|
||||||
|
page->data_head = 16;
|
||||||
page->cap_user_rdpmc = 1;
|
page->cap_user_rdpmc = 1;
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
@@ -3014,7 +3147,7 @@ SYSCALL_DECLARE(perf_event_open)
|
|||||||
|
|
||||||
event->sample_freq = attr->sample_freq;
|
event->sample_freq = attr->sample_freq;
|
||||||
event->nr_siblings = 0;
|
event->nr_siblings = 0;
|
||||||
event->count = 0;
|
event->count = 0L;
|
||||||
event->child_count_total = 0;
|
event->child_count_total = 0;
|
||||||
event->parent = NULL;
|
event->parent = NULL;
|
||||||
event->pid = pid;
|
event->pid = pid;
|
||||||
@@ -3050,10 +3183,12 @@ SYSCALL_DECLARE(perf_event_open)
|
|||||||
if(!sfd)
|
if(!sfd)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
sfd->fd = fd;
|
sfd->fd = fd;
|
||||||
|
sfd->sig_no = -1;
|
||||||
sfd->read_cb = perf_read;
|
sfd->read_cb = perf_read;
|
||||||
sfd->ioctl_cb = perf_ioctl;
|
sfd->ioctl_cb = perf_ioctl;
|
||||||
sfd->close_cb = perf_close;
|
sfd->close_cb = perf_close;
|
||||||
sfd->mmap_cb = perf_mmap;
|
sfd->mmap_cb = perf_mmap;
|
||||||
|
sfd->fcntl_cb = perf_fcntl;
|
||||||
sfd->data = (long)event;
|
sfd->data = (long)event;
|
||||||
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||||
|
|
||||||
@@ -3413,6 +3548,90 @@ SYSCALL_DECLARE(mincore)
|
|||||||
return 0;
|
return 0;
|
||||||
} /* sys_mincore() */
|
} /* sys_mincore() */
|
||||||
|
|
||||||
|
static int
|
||||||
|
set_memory_range_flag(struct vm_range *range, unsigned long arg)
|
||||||
|
{
|
||||||
|
range->flag |= arg;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
clear_memory_range_flag(struct vm_range *range, unsigned long arg)
|
||||||
|
{
|
||||||
|
range->flag &= ~arg;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
change_attr_process_memory_range(struct process_vm *vm,
|
||||||
|
uintptr_t start, uintptr_t end,
|
||||||
|
int (*change_proc)(struct vm_range *,
|
||||||
|
unsigned long),
|
||||||
|
unsigned long arg)
|
||||||
|
{
|
||||||
|
uintptr_t addr;
|
||||||
|
int error;
|
||||||
|
struct vm_range *range;
|
||||||
|
struct vm_range *prev;
|
||||||
|
struct vm_range *next;
|
||||||
|
int join_flag = 0;
|
||||||
|
|
||||||
|
error = 0;
|
||||||
|
range = lookup_process_memory_range(vm, start, start + PAGE_SIZE);
|
||||||
|
if(!range){
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
prev = previous_process_memory_range(vm, range);
|
||||||
|
if(!prev)
|
||||||
|
prev = range;
|
||||||
|
for (addr = start; addr < end; addr = range->end) {
|
||||||
|
if (range->start < addr) {
|
||||||
|
if((error = split_process_memory_range(vm, range, addr, &range))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (end < range->end) {
|
||||||
|
if((error = split_process_memory_range(vm, range, end, NULL))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!(error = change_proc(range, arg))){
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
range = next_process_memory_range(vm, range);
|
||||||
|
}
|
||||||
|
if(error){
|
||||||
|
next = next_process_memory_range(vm, range);
|
||||||
|
if(!next)
|
||||||
|
next = range;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
next = range;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(prev != next){
|
||||||
|
int wkerr;
|
||||||
|
|
||||||
|
range = next_process_memory_range(vm, prev);
|
||||||
|
if(!range)
|
||||||
|
break;
|
||||||
|
wkerr = join_process_memory_range(vm, prev, range);
|
||||||
|
if(range == next)
|
||||||
|
join_flag = 1;
|
||||||
|
if (wkerr) {
|
||||||
|
if(join_flag)
|
||||||
|
break;
|
||||||
|
prev = range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
SYSCALL_DECLARE(madvise)
|
SYSCALL_DECLARE(madvise)
|
||||||
{
|
{
|
||||||
const uintptr_t start = (uintptr_t)ihk_mc_syscall_arg0(ctx);
|
const uintptr_t start = (uintptr_t)ihk_mc_syscall_arg0(ctx);
|
||||||
@@ -3521,6 +3740,7 @@ SYSCALL_DECLARE(madvise)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if(advice == MADV_DONTFORK || advice == MADV_DOFORK);
|
||||||
else if (!range->memobj || !memobj_has_pager(range->memobj)) {
|
else if (!range->memobj || !memobj_has_pager(range->memobj)) {
|
||||||
dkprintf("[%d]sys_madvise(%lx,%lx,%x):has not pager"
|
dkprintf("[%d]sys_madvise(%lx,%lx,%x):has not pager"
|
||||||
"[%lx-%lx) %lx\n",
|
"[%lx-%lx) %lx\n",
|
||||||
@@ -3565,6 +3785,27 @@ SYSCALL_DECLARE(madvise)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(advice == MADV_DONTFORK){
|
||||||
|
error = change_attr_process_memory_range(thread->vm, start, end,
|
||||||
|
set_memory_range_flag,
|
||||||
|
VR_DONTFORK);
|
||||||
|
if(error){
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(advice == MADV_DOFORK){
|
||||||
|
error = change_attr_process_memory_range(thread->vm, start, end,
|
||||||
|
clear_memory_range_flag,
|
||||||
|
VR_DONTFORK);
|
||||||
|
if(error){
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(advice == MADV_DONTFORK ||
|
||||||
|
advice == MADV_DOFORK){
|
||||||
|
error = syscall_generic_forwarding(__NR_madvise, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock);
|
ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock);
|
||||||
@@ -5540,6 +5781,10 @@ SYSCALL_DECLARE(sched_setaffinity)
|
|||||||
int empty_set = 1;
|
int empty_set = 1;
|
||||||
extern int num_processors;
|
extern int num_processors;
|
||||||
|
|
||||||
|
if (!u_cpu_set) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
if (sizeof(k_cpu_set) > len) {
|
if (sizeof(k_cpu_set) > len) {
|
||||||
memset(&k_cpu_set, 0, sizeof(k_cpu_set));
|
memset(&k_cpu_set, 0, sizeof(k_cpu_set));
|
||||||
}
|
}
|
||||||
@@ -5547,7 +5792,7 @@ SYSCALL_DECLARE(sched_setaffinity)
|
|||||||
len = MIN2(len, sizeof(k_cpu_set));
|
len = MIN2(len, sizeof(k_cpu_set));
|
||||||
|
|
||||||
if (copy_from_user(&k_cpu_set, u_cpu_set, len)) {
|
if (copy_from_user(&k_cpu_set, u_cpu_set, len)) {
|
||||||
kprintf("%s:%d copy_from_user failed.\n", __FILE__, __LINE__);
|
dkprintf("%s: error: copy_from_user failed for %p:%d\n", __FUNCTION__, u_cpu_set, len);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ sysfs_createf(struct sysfs_ops *ops, void *instance, int mode,
|
|||||||
dkprintf("sysfs_createf(%p,%p,%#o,%s,...)\n",
|
dkprintf("sysfs_createf(%p,%p,%#o,%s,...)\n",
|
||||||
ops, instance, mode, fmt);
|
ops, instance, mode, fmt);
|
||||||
|
|
||||||
param = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!param) {
|
if (!param) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_createf:allocate_pages failed. %d\n", error);
|
ekprintf("sysfs_createf:allocate_pages failed. %d\n", error);
|
||||||
@@ -134,7 +134,7 @@ sysfs_createf(struct sysfs_ops *ops, void *instance, int mode,
|
|||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
if (param) {
|
if (param) {
|
||||||
free_pages(param, 1);
|
ihk_mc_free_pages(param, 1);
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("sysfs_createf(%p,%p,%#o,%s,...): %d\n",
|
ekprintf("sysfs_createf(%p,%p,%#o,%s,...): %d\n",
|
||||||
@@ -156,7 +156,7 @@ sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...)
|
|||||||
|
|
||||||
dkprintf("sysfs_mkdirf(%p,%s,...)\n", dirhp, fmt);
|
dkprintf("sysfs_mkdirf(%p,%s,...)\n", dirhp, fmt);
|
||||||
|
|
||||||
param = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!param) {
|
if (!param) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_mkdirf:allocate_pages failed. %d\n", error);
|
ekprintf("sysfs_mkdirf:allocate_pages failed. %d\n", error);
|
||||||
@@ -208,7 +208,7 @@ sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...)
|
|||||||
|
|
||||||
out:
|
out:
|
||||||
if (param) {
|
if (param) {
|
||||||
free_pages(param, 1);
|
ihk_mc_free_pages(param, 1);
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("sysfs_mkdirf(%p,%s,...): %d\n", dirhp, fmt, error);
|
ekprintf("sysfs_mkdirf(%p,%s,...): %d\n", dirhp, fmt, error);
|
||||||
@@ -229,7 +229,7 @@ sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...)
|
|||||||
|
|
||||||
dkprintf("sysfs_symlinkf(%#lx,%s,...)\n", targeth.handle, fmt);
|
dkprintf("sysfs_symlinkf(%#lx,%s,...)\n", targeth.handle, fmt);
|
||||||
|
|
||||||
param = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!param) {
|
if (!param) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_symlinkf:allocate_pages failed. %d\n", error);
|
ekprintf("sysfs_symlinkf:allocate_pages failed. %d\n", error);
|
||||||
@@ -279,7 +279,7 @@ sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...)
|
|||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
if (param) {
|
if (param) {
|
||||||
free_pages(param, 1);
|
ihk_mc_free_pages(param, 1);
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("sysfs_symlinkf(%#lx,%s,...): %d\n",
|
ekprintf("sysfs_symlinkf(%#lx,%s,...): %d\n",
|
||||||
@@ -301,7 +301,7 @@ sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...)
|
|||||||
|
|
||||||
dkprintf("sysfs_lookupf(%p,%s,...)\n", objhp, fmt);
|
dkprintf("sysfs_lookupf(%p,%s,...)\n", objhp, fmt);
|
||||||
|
|
||||||
param = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!param) {
|
if (!param) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_lookupf:allocate_pages failed. %d\n", error);
|
ekprintf("sysfs_lookupf:allocate_pages failed. %d\n", error);
|
||||||
@@ -353,7 +353,7 @@ sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...)
|
|||||||
|
|
||||||
out:
|
out:
|
||||||
if (param) {
|
if (param) {
|
||||||
free_pages(param, 1);
|
ihk_mc_free_pages(param, 1);
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("sysfs_lookupf(%p,%s,...): %d\n", objhp, fmt, error);
|
ekprintf("sysfs_lookupf(%p,%s,...): %d\n", objhp, fmt, error);
|
||||||
@@ -374,7 +374,7 @@ sysfs_unlinkf(int flags, const char *fmt, ...)
|
|||||||
|
|
||||||
dkprintf("sysfs_unlinkf(%#x,%s,...)\n", flags, fmt);
|
dkprintf("sysfs_unlinkf(%#x,%s,...)\n", flags, fmt);
|
||||||
|
|
||||||
param = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!param) {
|
if (!param) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_unlinkf:allocate_pages failed. %d\n", error);
|
ekprintf("sysfs_unlinkf:allocate_pages failed. %d\n", error);
|
||||||
@@ -423,7 +423,7 @@ sysfs_unlinkf(int flags, const char *fmt, ...)
|
|||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
if (param) {
|
if (param) {
|
||||||
free_pages(param, 1);
|
ihk_mc_free_pages(param, 1);
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("sysfs_unlinkf(%#x,%s,...): %d\n", flags, fmt, error);
|
ekprintf("sysfs_unlinkf(%#x,%s,...): %d\n", flags, fmt, error);
|
||||||
@@ -601,14 +601,14 @@ sysfs_init(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
sysfs_data_bufsize = PAGE_SIZE;
|
sysfs_data_bufsize = PAGE_SIZE;
|
||||||
sysfs_data_buf = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
sysfs_data_buf = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!sysfs_data_buf) {
|
if (!sysfs_data_buf) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_init:allocate_pages(buf) failed. %d\n", error);
|
ekprintf("sysfs_init:allocate_pages(buf) failed. %d\n", error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
param = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
param = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT);
|
||||||
if (!param) {
|
if (!param) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
ekprintf("sysfs_init:allocate_pages(param) failed. %d\n",
|
ekprintf("sysfs_init:allocate_pages(param) failed. %d\n",
|
||||||
@@ -644,7 +644,7 @@ sysfs_init(void)
|
|||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
if (param) {
|
if (param) {
|
||||||
free_pages(param, 1);
|
ihk_mc_free_pages(param, 1);
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("sysfs_init(): %d\n", error);
|
ekprintf("sysfs_init(): %d\n", error);
|
||||||
|
|||||||
@@ -172,6 +172,10 @@ static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
|||||||
struct zeroobj *obj = to_zeroobj(memobj);
|
struct zeroobj *obj = to_zeroobj(memobj);
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
|
/* Don't bother about zero page, page fault handler will
|
||||||
|
* allocate and clear pages */
|
||||||
|
return 0;
|
||||||
|
|
||||||
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n",
|
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n",
|
||||||
memobj, off, p2align, physp);
|
memobj, off, p2align, physp);
|
||||||
if (off & ~PAGE_MASK) {
|
if (off & ~PAGE_MASK) {
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ void ihk_mc_clean_micpa(void);
|
|||||||
void *ihk_mc_alloc_aligned_pages(int npages, int p2align, enum ihk_mc_ap_flag flag);
|
void *ihk_mc_alloc_aligned_pages(int npages, int p2align, enum ihk_mc_ap_flag flag);
|
||||||
void *ihk_mc_alloc_pages(int npages, enum ihk_mc_ap_flag flag);
|
void *ihk_mc_alloc_pages(int npages, enum ihk_mc_ap_flag flag);
|
||||||
void ihk_mc_free_pages(void *p, int npages);
|
void ihk_mc_free_pages(void *p, int npages);
|
||||||
void *ihk_mc_allocate(int size, enum ihk_mc_ap_flag flag);
|
void *ihk_mc_allocate(int size, int flag);
|
||||||
void ihk_mc_free(void *p);
|
void ihk_mc_free(void *p);
|
||||||
|
|
||||||
void *arch_alloc_page(enum ihk_mc_ap_flag flag);
|
void *arch_alloc_page(enum ihk_mc_ap_flag flag);
|
||||||
|
|||||||
@@ -54,11 +54,11 @@ int ihk_mc_perfctr_start(unsigned long counter_mask);
|
|||||||
int ihk_mc_perfctr_stop(unsigned long counter_mask);
|
int ihk_mc_perfctr_stop(unsigned long counter_mask);
|
||||||
int ihk_mc_perfctr_fixed_init(int counter, int mode);
|
int ihk_mc_perfctr_fixed_init(int counter, int mode);
|
||||||
int ihk_mc_perfctr_reset(int counter);
|
int ihk_mc_perfctr_reset(int counter);
|
||||||
int ihk_mc_perfctr_set(int counter, unsigned long value);
|
int ihk_mc_perfctr_set(int counter, long value);
|
||||||
int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value);
|
int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value);
|
||||||
unsigned long ihk_mc_perfctr_read(int counter);
|
unsigned long ihk_mc_perfctr_read(int counter);
|
||||||
unsigned long ihk_mc_perfctr_read_msr(int counter);
|
unsigned long ihk_mc_perfctr_read_msr(int counter);
|
||||||
int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status);
|
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user