Merge remote branch 'origin/master' into gdb
Conflicts: kernel/include/process.h
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
#include <cpulocal.h>
|
||||
#include <march.h>
|
||||
#include <signal.h>
|
||||
#include <process.h>
|
||||
|
||||
#define LAPIC_ID 0x020
|
||||
#define LAPIC_TIMER 0x320
|
||||
@@ -438,6 +439,7 @@ void handle_interrupt(int vector, struct x86_regs *regs)
|
||||
}
|
||||
|
||||
check_signal(0, regs);
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
void gpe_handler(struct x86_regs *regs)
|
||||
@@ -447,6 +449,7 @@ void gpe_handler(struct x86_regs *regs)
|
||||
arch_show_interrupt_context(regs);
|
||||
set_signal(SIGILL, regs);
|
||||
check_signal(0, regs);
|
||||
check_need_resched();
|
||||
// panic("GPF");
|
||||
}
|
||||
|
||||
|
||||
@@ -78,6 +78,8 @@
|
||||
#define PFL3_DIRTY ((pte_t)0x40)
|
||||
#define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */
|
||||
#define PFL3_GLOBAL ((pte_t)0x100)
|
||||
#define PFL3_IGNORED_11 ((pte_t)1 << 11)
|
||||
#define PFL3_FILEOFF PFL3_IGNORED_11
|
||||
|
||||
#define PFL2_PRESENT ((pte_t)0x01)
|
||||
#define PFL2_WRITABLE ((pte_t)0x02)
|
||||
@@ -88,6 +90,8 @@
|
||||
#define PFL2_DIRTY ((pte_t)0x40)
|
||||
#define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */
|
||||
#define PFL2_GLOBAL ((pte_t)0x100)
|
||||
#define PFL2_IGNORED_11 ((pte_t)1 << 11)
|
||||
#define PFL2_FILEOFF PFL2_IGNORED_11
|
||||
|
||||
#define PFL1_PRESENT ((pte_t)0x01)
|
||||
#define PFL1_WRITABLE ((pte_t)0x02)
|
||||
@@ -96,6 +100,8 @@
|
||||
#define PFL1_PCD ((pte_t)0x10)
|
||||
#define PFL1_ACCESSED ((pte_t)0x20)
|
||||
#define PFL1_DIRTY ((pte_t)0x40)
|
||||
#define PFL1_IGNORED_11 ((pte_t)1 << 11)
|
||||
#define PFL1_FILEOFF PFL1_IGNORED_11
|
||||
|
||||
/* We allow user programs to access all the memory */
|
||||
#define PFL4_KERN_ATTR (PFL4_PRESENT | PFL4_WRITABLE)
|
||||
@@ -108,6 +114,9 @@
|
||||
#define PFL3_PDIR_ATTR (PFL3_PRESENT | PFL3_WRITABLE | PFL3_USER)
|
||||
#define PFL2_PDIR_ATTR (PFL2_PRESENT | PFL2_WRITABLE | PFL2_USER)
|
||||
|
||||
#define PTE_NULL ((pte_t)0)
|
||||
typedef unsigned long pte_t;
|
||||
|
||||
/* For easy conversion, it is better to be the same as architecture's ones */
|
||||
enum ihk_mc_pt_attribute {
|
||||
PTATTR_ACTIVE = 0x01,
|
||||
@@ -115,14 +124,12 @@ enum ihk_mc_pt_attribute {
|
||||
PTATTR_USER = 0x04,
|
||||
PTATTR_DIRTY = 0x40,
|
||||
PTATTR_LARGEPAGE = 0x80,
|
||||
PTATTR_FILEOFF = PFL2_FILEOFF,
|
||||
PTATTR_NO_EXECUTE = 0x8000000000000000,
|
||||
PTATTR_UNCACHABLE = 0x10000,
|
||||
PTATTR_FOR_USER = 0x20000,
|
||||
};
|
||||
|
||||
#define PTE_NULL ((pte_t)0)
|
||||
typedef unsigned long pte_t;
|
||||
|
||||
static inline int pte_is_null(pte_t *ptep)
|
||||
{
|
||||
return (*ptep == PTE_NULL);
|
||||
@@ -138,11 +145,77 @@ static inline int pte_is_writable(pte_t *ptep)
|
||||
return !!(*ptep & PF_WRITABLE);
|
||||
}
|
||||
|
||||
static inline int pte_is_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
switch (pgsize) {
|
||||
case PTL1_SIZE: return !!(*ptep & PFL1_DIRTY);
|
||||
case PTL2_SIZE: return !!(*ptep & PFL2_DIRTY);
|
||||
case PTL3_SIZE: return !!(*ptep & PFL3_DIRTY);
|
||||
default:
|
||||
#if 0 /* XXX: workaround. cannot use panic() here */
|
||||
panic("pte_is_dirty");
|
||||
#else
|
||||
return !!(*ptep & PTATTR_DIRTY);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static inline int pte_is_fileoff(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
switch (pgsize) {
|
||||
case PTL1_SIZE: return !!(*ptep & PFL1_FILEOFF);
|
||||
case PTL2_SIZE: return !!(*ptep & PFL2_FILEOFF);
|
||||
case PTL3_SIZE: return !!(*ptep & PFL3_FILEOFF);
|
||||
default:
|
||||
#if 0 /* XXX: workaround. cannot use panic() here */
|
||||
panic("pte_is_fileoff");
|
||||
#else
|
||||
return !!(*ptep & PTATTR_FILEOFF);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static inline uintptr_t pte_get_phys(pte_t *ptep)
|
||||
{
|
||||
return (*ptep & PT_PHYSMASK);
|
||||
}
|
||||
|
||||
static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
return (off_t)(*ptep & PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline void pte_make_fileoff(off_t off,
|
||||
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
|
||||
{
|
||||
uint64_t attr;
|
||||
|
||||
attr = ptattr & ~PAGE_MASK;
|
||||
|
||||
switch (pgsize) {
|
||||
case PTL1_SIZE: attr |= PFL1_FILEOFF; break;
|
||||
case PTL2_SIZE: attr |= PFL2_FILEOFF | PFL2_SIZE; break;
|
||||
case PTL3_SIZE: attr |= PFL3_FILEOFF | PFL3_SIZE; break;
|
||||
default:
|
||||
#if 0 /* XXX: workaround. cannot use panic() here */
|
||||
panic("pte_make_fileoff");
|
||||
#else
|
||||
attr |= PTATTR_FILEOFF;
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
*ptep = (off & PAGE_MASK) | attr;
|
||||
}
|
||||
|
||||
#if 0 /* XXX: workaround. cannot use panic() here */
|
||||
static inline void pte_xchg(pte_t *ptep, pte_t *valp)
|
||||
{
|
||||
*valp = xchg(ptep, *valp);
|
||||
}
|
||||
#else
|
||||
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
|
||||
#endif
|
||||
|
||||
struct page_table;
|
||||
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
|
||||
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
|
||||
|
||||
@@ -136,7 +136,7 @@ struct tss64 {
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_regs {
|
||||
unsigned long r11, r10, r9, r8;
|
||||
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
|
||||
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
|
||||
unsigned long error, rip, cs, rflags, rsp, ss;
|
||||
};
|
||||
|
||||
@@ -11,6 +11,9 @@
|
||||
* 2012/02/11 bgerofi what kind of new features have been added
|
||||
*/
|
||||
|
||||
#ifndef __HEADER_X86_COMMON_SIGNAL_H
|
||||
#define __HEADER_X86_COMMON_SIGNAL_H
|
||||
|
||||
#define _NSIG 64
|
||||
#define _NSIG_BPW 64
|
||||
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
|
||||
@@ -149,3 +152,5 @@ typedef struct siginfo {
|
||||
#define SIGSYS 31
|
||||
#define SIGUNUSED 31
|
||||
#define SIGRTMIN 32
|
||||
|
||||
#endif /*__HEADER_X86_COMMON_SIGNAL_H*/
|
||||
|
||||
@@ -40,6 +40,7 @@ SYSCALL_DELEGATED(18, pwrite64)
|
||||
SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
SYSCALL_HANDLED(25, mremap)
|
||||
SYSCALL_HANDLED(28, madvise)
|
||||
SYSCALL_HANDLED(34, pause)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
@@ -75,6 +76,7 @@ SYSCALL_DELEGATED(201, time)
|
||||
SYSCALL_HANDLED(202, futex)
|
||||
SYSCALL_HANDLED(203, sched_setaffinity)
|
||||
SYSCALL_HANDLED(204, sched_getaffinity)
|
||||
SYSCALL_HANDLED(216, remap_file_pages)
|
||||
SYSCALL_DELEGATED(217, getdents64)
|
||||
SYSCALL_HANDLED(218, set_tid_address)
|
||||
SYSCALL_HANDLED(231, exit_group)
|
||||
@@ -87,5 +89,6 @@ SYSCALL_HANDLED(601, pmc_init)
|
||||
SYSCALL_HANDLED(602, pmc_start)
|
||||
SYSCALL_HANDLED(603, pmc_stop)
|
||||
SYSCALL_HANDLED(604, pmc_reset)
|
||||
SYSCALL_HANDLED(700, get_cpu_id)
|
||||
|
||||
/**** End of File ****/
|
||||
|
||||
@@ -35,8 +35,16 @@
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11;
|
||||
pushq %r11; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15;
|
||||
#define POP_ALL_REGS \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
@@ -67,7 +75,7 @@ vector=vector+1
|
||||
|
||||
common_interrupt:
|
||||
PUSH_ALL_REGS
|
||||
movq 88(%rsp), %rdi
|
||||
movq 120(%rsp), %rdi
|
||||
movq %rsp, %rsi
|
||||
call handle_interrupt /* Enter C code */
|
||||
POP_ALL_REGS
|
||||
@@ -83,7 +91,7 @@ page_fault:
|
||||
cld
|
||||
PUSH_ALL_REGS
|
||||
movq %cr2, %rdi
|
||||
movq 88(%rsp),%rsi
|
||||
movq 120(%rsp),%rsi
|
||||
movq %rsp,%rdx
|
||||
movq __page_fault_handler_address(%rip), %rax
|
||||
andq %rax, %rax
|
||||
@@ -120,13 +128,13 @@ x86_syscall:
|
||||
movq %gs:24, %rcx
|
||||
movq %rcx, 32(%rsp)
|
||||
PUSH_ALL_REGS
|
||||
movq 72(%rsp), %rdi
|
||||
movq 104(%rsp), %rdi
|
||||
movw %ss, %ax
|
||||
movw %ax, %ds
|
||||
movq %rsp, %rsi
|
||||
callq *__x86_syscall_handler(%rip)
|
||||
1:
|
||||
movq %rax, 72(%rsp)
|
||||
movq %rax, 104(%rsp)
|
||||
POP_ALL_REGS
|
||||
#ifdef USE_SYSRET
|
||||
movq 8(%rsp), %rcx
|
||||
|
||||
@@ -219,7 +219,13 @@ static struct page_table *__alloc_new_pt(enum ihk_mc_ap_flag ap_flag)
|
||||
* but L2 and L1 do not!
|
||||
*/
|
||||
|
||||
static enum ihk_mc_pt_attribute attr_mask = PTATTR_WRITABLE | PTATTR_USER | PTATTR_ACTIVE;
|
||||
static enum ihk_mc_pt_attribute attr_mask
|
||||
= 0
|
||||
| PTATTR_FILEOFF
|
||||
| PTATTR_WRITABLE
|
||||
| PTATTR_USER
|
||||
| PTATTR_ACTIVE
|
||||
| 0;
|
||||
#define ATTR_MASK attr_mask
|
||||
|
||||
void enable_ptattr_no_execute(void)
|
||||
@@ -523,6 +529,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
|
||||
|
||||
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
|
||||
__kprintf("0x%lX l4idx not present! \n", (unsigned long)virt);
|
||||
__kprintf("l4 entry: 0x%lX\n", pt->entry[l4idx]);
|
||||
return -EFAULT;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
|
||||
@@ -530,6 +537,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
|
||||
__kprintf("l3 table: 0x%lX l3idx: %d \n", virt_to_phys(pt), l3idx);
|
||||
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
|
||||
__kprintf("0x%lX l3idx not present! \n", (unsigned long)virt);
|
||||
__kprintf("l3 entry: 0x%lX\n", pt->entry[l3idx]);
|
||||
return -EFAULT;
|
||||
}
|
||||
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
|
||||
@@ -537,6 +545,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
|
||||
__kprintf("l2 table: 0x%lX l2idx: %d \n", virt_to_phys(pt), l2idx);
|
||||
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
|
||||
__kprintf("0x%lX l2idx not present! \n", (unsigned long)virt);
|
||||
__kprintf("l2 entry: 0x%lX\n", pt->entry[l2idx]);
|
||||
return -EFAULT;
|
||||
}
|
||||
if ((pt->entry[l2idx] & PFL2_SIZE)) {
|
||||
@@ -546,11 +555,12 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
|
||||
|
||||
__kprintf("l1 table: 0x%lX l1idx: %d \n", virt_to_phys(pt), l1idx);
|
||||
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
|
||||
__kprintf("0x%lX PTE (l1) not present! entry: 0x%lX\n",
|
||||
(unsigned long)virt, pt->entry[l1idx]);
|
||||
__kprintf("0x%lX l1idx not present! \n", (unsigned long)virt);
|
||||
__kprintf("l1 entry: 0x%lX\n", pt->entry[l1idx]);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
__kprintf("l1 entry: 0x%lX\n", pt->entry[l1idx]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -822,8 +832,16 @@ static int split_large_page(pte_t *ptep)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
phys = *ptep & PT_PHYSMASK;
|
||||
attr = *ptep & ~PFL2_SIZE;
|
||||
if (!(*ptep & PFL2_FILEOFF)) {
|
||||
phys = *ptep & PT_PHYSMASK;
|
||||
attr = *ptep & ~PT_PHYSMASK;
|
||||
attr &= ~PFL2_SIZE;
|
||||
}
|
||||
else {
|
||||
phys = *ptep & PAGE_MASK; /* file offset */
|
||||
attr = *ptep & ~PAGE_MASK;
|
||||
attr &= ~PFL2_SIZE;
|
||||
}
|
||||
|
||||
for (i = 0; i < PT_ENTRIES; ++i) {
|
||||
pt->entry[i] = (phys + (i * PTL1_SIZE)) | attr;
|
||||
@@ -833,6 +851,156 @@ static int split_large_page(pte_t *ptep)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct visit_pte_args {
|
||||
page_table_t pt;
|
||||
enum visit_pte_flag flags;
|
||||
int padding;
|
||||
pte_visitor_t *funcp;
|
||||
void *arg;
|
||||
};
|
||||
|
||||
static int visit_pte_l1(void *arg0, pte_t *ptep, uintptr_t base,
|
||||
uintptr_t start, uintptr_t end)
|
||||
{
|
||||
struct visit_pte_args *args = arg0;
|
||||
|
||||
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (*args->funcp)(args->arg, args->pt, ptep, (void *)base,
|
||||
PTL1_SIZE);
|
||||
}
|
||||
|
||||
static int visit_pte_l2(void *arg0, pte_t *ptep, uintptr_t base,
|
||||
uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct visit_pte_args *args = arg0;
|
||||
struct page_table *pt;
|
||||
|
||||
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_LARGE_PAGES
|
||||
if (((*ptep == PTE_NULL) || (*ptep & PFL2_SIZE))
|
||||
&& (start <= base)
|
||||
&& (((base + PTL2_SIZE) <= end)
|
||||
|| (end == 0))) {
|
||||
error = (*args->funcp)(args->arg, args->pt, ptep,
|
||||
(void *)base, PTL2_SIZE);
|
||||
if (error != -E2BIG) {
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (*ptep & PFL2_SIZE) {
|
||||
ekprintf("visit_pte_l2:split large page\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
|
||||
if (!pt) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
*ptep = virt_to_phys(pt) | PFL2_PDIR_ATTR;
|
||||
}
|
||||
else {
|
||||
pt = phys_to_virt(*ptep & PT_PHYSMASK);
|
||||
}
|
||||
|
||||
error = walk_pte_l1(pt, base, start, end, &visit_pte_l1, arg0);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int visit_pte_l3(void *arg0, pte_t *ptep, uintptr_t base,
|
||||
uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct visit_pte_args *args = arg0;
|
||||
struct page_table *pt;
|
||||
|
||||
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_LARGE_PAGES
|
||||
if (((*ptep == PTE_NULL) || (*ptep & PFL3_SIZE))
|
||||
&& (start <= base)
|
||||
&& (((base + PTL3_SIZE) <= end)
|
||||
|| (end == 0))) {
|
||||
error = (*args->funcp)(args->arg, args->pt, ptep,
|
||||
(void *)base, PTL3_SIZE);
|
||||
if (error != -E2BIG) {
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (*ptep & PFL3_SIZE) {
|
||||
ekprintf("visit_pte_l3:split large page\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
|
||||
if (!pt) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
*ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR;
|
||||
}
|
||||
else {
|
||||
pt = phys_to_virt(*ptep & PT_PHYSMASK);
|
||||
}
|
||||
|
||||
error = walk_pte_l2(pt, base, start, end, &visit_pte_l2, arg0);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int visit_pte_l4(void *arg0, pte_t *ptep, uintptr_t base,
|
||||
uintptr_t start, uintptr_t end)
|
||||
{
|
||||
int error;
|
||||
struct visit_pte_args *args = arg0;
|
||||
struct page_table *pt;
|
||||
|
||||
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
|
||||
if (!pt) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
*ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR;
|
||||
}
|
||||
else {
|
||||
pt = phys_to_virt(*ptep & PT_PHYSMASK);
|
||||
}
|
||||
|
||||
error = walk_pte_l3(pt, base, start, end, &visit_pte_l3, arg0);
|
||||
return error;
|
||||
}
|
||||
|
||||
int visit_pte_range(page_table_t pt, void *start0, void *end0,
|
||||
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg)
|
||||
{
|
||||
const uintptr_t start = (uintptr_t)start0;
|
||||
const uintptr_t end = (uintptr_t)end0;
|
||||
struct visit_pte_args args;
|
||||
|
||||
args.pt = pt;
|
||||
args.flags = flags;
|
||||
args.funcp = funcp;
|
||||
args.arg = arg;
|
||||
|
||||
return walk_pte_l4(pt, 0, start, end, &visit_pte_l4, &args);
|
||||
}
|
||||
|
||||
struct clear_range_args {
|
||||
int free_physical;
|
||||
uint8_t padding[4];
|
||||
@@ -858,7 +1026,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
||||
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
|
||||
}
|
||||
|
||||
if (args->free_physical) {
|
||||
if (!(old & PFL1_FILEOFF) && args->free_physical) {
|
||||
page = phys_to_page(phys);
|
||||
if (page && page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys), 1);
|
||||
@@ -904,7 +1072,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
|
||||
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
|
||||
}
|
||||
|
||||
if (args->free_physical) {
|
||||
if (!(old & PFL2_FILEOFF) && args->free_physical) {
|
||||
page = phys_to_page(phys);
|
||||
if (page && page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE);
|
||||
@@ -998,7 +1166,7 @@ static int change_attr_range_l1(void *arg0, pte_t *ptep, uint64_t base,
|
||||
{
|
||||
struct change_attr_args *args = arg0;
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
if ((*ptep == PTE_NULL) || (*ptep & PFL1_FILEOFF)) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@@ -1013,7 +1181,7 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base,
|
||||
int error;
|
||||
struct page_table *pt;
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
if ((*ptep == PTE_NULL) || (*ptep & PFL2_FILEOFF)) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@@ -1032,7 +1200,9 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base,
|
||||
}
|
||||
|
||||
if (*ptep & PFL2_SIZE) {
|
||||
*ptep = (*ptep & ~args->clrpte) | args->setpte;
|
||||
if (!(*ptep & PFL2_FILEOFF)) {
|
||||
*ptep = (*ptep & ~args->clrpte) | args->setpte;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1045,7 +1215,7 @@ static int change_attr_range_l3(void *arg0, pte_t *ptep, uint64_t base,
|
||||
{
|
||||
struct page_table *pt;
|
||||
|
||||
if (*ptep == PTE_NULL) {
|
||||
if ((*ptep == PTE_NULL) || (*ptep & PFL3_FILEOFF)) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@@ -1632,6 +1802,76 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
|
||||
return attr;
|
||||
}
|
||||
|
||||
struct move_args {
|
||||
uintptr_t src;
|
||||
uintptr_t dest;
|
||||
};
|
||||
|
||||
static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, void *pgaddr, size_t pgsize)
|
||||
{
|
||||
int error;
|
||||
struct move_args *args = arg0;
|
||||
uintptr_t dest;
|
||||
pte_t apte;
|
||||
uintptr_t phys;
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
|
||||
dkprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize);
|
||||
if (pte_is_fileoff(ptep, pgsize)) {
|
||||
error = -ENOTSUPP;
|
||||
kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):fileoff. %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dest = args->dest + ((uintptr_t)pgaddr - args->src);
|
||||
|
||||
apte = PTE_NULL;
|
||||
pte_xchg(ptep, &apte);
|
||||
|
||||
phys = apte & PT_PHYSMASK;
|
||||
attr = apte & ~PT_PHYSMASK;
|
||||
|
||||
error = ihk_mc_pt_set_range(pt, (void *)dest,
|
||||
(void *)(dest + pgsize), phys, attr);
|
||||
if (error) {
|
||||
kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):"
|
||||
"set failed. %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int move_pte_range(page_table_t pt, void *src, void *dest, size_t size)
|
||||
{
|
||||
int error;
|
||||
struct move_args args;
|
||||
|
||||
dkprintf("move_pte_range(%p,%p,%p,%#lx)\n", pt, src, dest, size);
|
||||
args.src = (uintptr_t)src;
|
||||
args.dest = (uintptr_t)dest;
|
||||
|
||||
error = visit_pte_range(pt, src, src+size, VPTEF_SKIP_NULL,
|
||||
&move_one_page, &args);
|
||||
flush_tlb(); /* XXX: TLB flush */
|
||||
if (error) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("move_pte_range(%p,%p,%p,%#lx):%d\n",
|
||||
pt, src, dest, size, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
void load_page_table(struct page_table *pt)
|
||||
{
|
||||
unsigned long pt_addr;
|
||||
|
||||
@@ -288,14 +288,16 @@ check_signal(unsigned long rc, void *regs0)
|
||||
unsigned long
|
||||
do_kill(int pid, int tid, int sig)
|
||||
{
|
||||
struct cpu_local_var *v;
|
||||
struct process *p;
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct process *tproc = NULL;
|
||||
int i;
|
||||
__sigset_t mask;
|
||||
struct sig_pending *pending;
|
||||
struct list_head *head;
|
||||
int irqstate;
|
||||
int rc;
|
||||
unsigned long irqstate;
|
||||
|
||||
if(proc == NULL || proc->pid == 0){
|
||||
return -ESRCH;
|
||||
@@ -314,37 +316,52 @@ do_kill(int pid, int tid, int sig)
|
||||
}
|
||||
else{
|
||||
for(i = 0; i < num_processors; i++){
|
||||
if(get_cpu_local_var(i)->current &&
|
||||
get_cpu_local_var(i)->current->pid == pid){
|
||||
tproc = get_cpu_local_var(i)->current;
|
||||
break;
|
||||
v = get_cpu_local_var(i);
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
list_for_each_entry(p, &(v->runq), sched_list){
|
||||
if(p->pid == pid){
|
||||
tproc = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(pid == -1){
|
||||
for(i = 0; i < num_processors; i++)
|
||||
if(get_cpu_local_var(i)->current &&
|
||||
get_cpu_local_var(i)->current->pid > 0 &&
|
||||
get_cpu_local_var(i)->current->tid == tid){
|
||||
tproc = get_cpu_local_var(i)->current;
|
||||
break;
|
||||
for(i = 0; i < num_processors; i++){
|
||||
v = get_cpu_local_var(i);
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
list_for_each_entry(p, &(v->runq), sched_list){
|
||||
if(p->pid > 0 &&
|
||||
p->tid == tid){
|
||||
tproc = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(pid == 0)
|
||||
return -ESRCH;
|
||||
for(i = 0; i < num_processors; i++)
|
||||
if(get_cpu_local_var(i)->current &&
|
||||
get_cpu_local_var(i)->current->pid == pid &&
|
||||
get_cpu_local_var(i)->current->tid == tid){
|
||||
tproc = get_cpu_local_var(i)->current;
|
||||
break;
|
||||
for(i = 0; i < num_processors; i++){
|
||||
v = get_cpu_local_var(i);
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
list_for_each_entry(p, &(v->runq), sched_list){
|
||||
if(p->pid == pid &&
|
||||
p->tid == tid){
|
||||
tproc = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
}
|
||||
}
|
||||
|
||||
if(!tproc)
|
||||
if(!tproc){
|
||||
return -ESRCH;
|
||||
}
|
||||
if(sig == 0)
|
||||
return 0;
|
||||
|
||||
@@ -375,7 +392,7 @@ do_kill(int pid, int tid, int sig)
|
||||
}
|
||||
else{
|
||||
list_add_tail(&pending->list, head);
|
||||
proc->sigevent = 1;
|
||||
tproc->sigevent = 1;
|
||||
}
|
||||
}
|
||||
if(tid == -1){
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
IHKDIR=$(IHKBASE)/$(TARGETDIR)
|
||||
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
|
||||
|
||||
@@ -184,6 +184,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
|
||||
|
||||
memset(newobj, 0, sizeof(*newobj));
|
||||
newobj->memobj.ops = &fileobj_ops;
|
||||
newobj->memobj.flags = MF_HAS_PAGER;
|
||||
newobj->handle = result.handle;
|
||||
newobj->sref = 1;
|
||||
newobj->cref = 1;
|
||||
|
||||
@@ -30,6 +30,9 @@ struct malloc_header {
|
||||
#define CPU_STATUS_RUNNING (2)
|
||||
extern ihk_spinlock_t cpu_status_lock;
|
||||
|
||||
#define CPU_FLAG_NEED_RESCHED 0x1U
|
||||
#define CPU_FLAG_NEED_MIGRATE 0x2U
|
||||
|
||||
struct cpu_local_var {
|
||||
/* malloc */
|
||||
struct malloc_header free_list;
|
||||
@@ -54,6 +57,11 @@ struct cpu_local_var {
|
||||
int fs;
|
||||
|
||||
struct list_head pending_free_pages;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
ihk_spinlock_t migq_lock;
|
||||
struct list_head migq;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
|
||||
|
||||
@@ -16,10 +16,19 @@
|
||||
#include <ihk/types.h>
|
||||
#include <ihk/atomic.h>
|
||||
#include <ihk/lock.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
#include <shm.h>
|
||||
|
||||
enum {
|
||||
/* for memobj.flags */
|
||||
MF_HAS_PAGER = 0x0001,
|
||||
};
|
||||
|
||||
struct memobj {
|
||||
struct memobj_ops * ops;
|
||||
uint32_t flags;
|
||||
int8_t padding[4];
|
||||
ihk_spinlock_t lock;
|
||||
};
|
||||
|
||||
@@ -39,29 +48,42 @@ struct memobj_ops {
|
||||
|
||||
static inline void memobj_release(struct memobj *obj)
|
||||
{
|
||||
(*obj->ops->release)(obj);
|
||||
if (obj->ops->release) {
|
||||
(*obj->ops->release)(obj);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memobj_ref(struct memobj *obj)
|
||||
{
|
||||
(*obj->ops->ref)(obj);
|
||||
if (obj->ops->ref) {
|
||||
(*obj->ops->ref)(obj);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int memobj_get_page(struct memobj *obj, off_t off,
|
||||
int p2align, uintptr_t *physp)
|
||||
{
|
||||
return (*obj->ops->get_page)(obj, off, p2align, physp);
|
||||
if (obj->ops->get_page) {
|
||||
return (*obj->ops->get_page)(obj, off, p2align, physp);
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static inline uintptr_t memobj_copy_page(struct memobj *obj,
|
||||
uintptr_t orgphys, int p2align)
|
||||
{
|
||||
return (*obj->ops->copy_page)(obj, orgphys, p2align);
|
||||
if (obj->ops->copy_page) {
|
||||
return (*obj->ops->copy_page)(obj, orgphys, p2align);
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t pgsize)
|
||||
{
|
||||
return (*obj->ops->flush_page)(obj, phys, pgsize);
|
||||
if (obj->ops->flush_page) {
|
||||
return (*obj->ops->flush_page)(obj, phys, pgsize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memobj_lock(struct memobj *obj)
|
||||
@@ -74,6 +96,13 @@ static inline void memobj_unlock(struct memobj *obj)
|
||||
ihk_mc_spinlock_unlock_noirq(&obj->lock);
|
||||
}
|
||||
|
||||
static inline int memobj_has_pager(struct memobj *obj)
|
||||
{
|
||||
return !!(obj->flags & MF_HAS_PAGER);
|
||||
}
|
||||
|
||||
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
||||
int zeroobj_create(struct memobj **objp);
|
||||
|
||||
#endif /* HEADER_MEMOBJ_H */
|
||||
|
||||
@@ -63,4 +63,10 @@
|
||||
#define MADV_HWPOISON 100
|
||||
#define MADV_SOFT_OFFLINE 101
|
||||
|
||||
/*
|
||||
* for mremap()
|
||||
*/
|
||||
#define MREMAP_MAYMOVE 0x01
|
||||
#define MREMAP_FIXED 0x02
|
||||
|
||||
#endif /* HEADER_MMAN_H */
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <list.h>
|
||||
#include <signal.h>
|
||||
#include <memobj.h>
|
||||
#include <affinity.h>
|
||||
|
||||
#define VR_NONE 0x0
|
||||
#define VR_STACK 0x1
|
||||
@@ -29,6 +30,7 @@
|
||||
#define VR_DEMAND_PAGING 0x1000
|
||||
#define VR_PRIVATE 0x2000
|
||||
#define VR_LOCKED 0x4000
|
||||
#define VR_FILEOFF 0x8000 /* remap_file_pages()ed range */
|
||||
#define VR_PROT_NONE 0x00000000
|
||||
#define VR_PROT_READ 0x00010000
|
||||
#define VR_PROT_WRITE 0x00020000
|
||||
@@ -186,6 +188,7 @@ struct process {
|
||||
void *pgio_arg;
|
||||
|
||||
struct fork_tree_node *ftn;
|
||||
cpu_set_t cpu_set;
|
||||
unsigned long saved_auxv[AUXV_LEN];
|
||||
};
|
||||
|
||||
@@ -231,12 +234,16 @@ int join_process_memory_range(struct process *process, struct vm_range *survivin
|
||||
int change_prot_process_memory_range(
|
||||
struct process *process, struct vm_range *range,
|
||||
unsigned long newflag);
|
||||
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end, off_t off);
|
||||
struct vm_range *lookup_process_memory_range(
|
||||
struct process_vm *vm, uintptr_t start, uintptr_t end);
|
||||
struct vm_range *next_process_memory_range(
|
||||
struct process_vm *vm, struct vm_range *range);
|
||||
struct vm_range *previous_process_memory_range(
|
||||
struct process_vm *vm, struct vm_range *range);
|
||||
int extend_up_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t newend);
|
||||
|
||||
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
|
||||
int remove_process_region(struct process *proc,
|
||||
@@ -256,4 +263,7 @@ void runq_add_proc(struct process *proc, int cpu_id);
|
||||
void runq_del_proc(struct process *proc, int cpu_id);
|
||||
int sched_wakeup_process(struct process *proc, int valid_states);
|
||||
|
||||
void sched_request_migrate(int cpu_id, struct process *proc);
|
||||
void check_need_resched(void);
|
||||
|
||||
#endif
|
||||
|
||||
49
kernel/include/shm.h
Normal file
49
kernel/include/shm.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* \file shm.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* header file for System V shared memory
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_SHM_H
|
||||
#define HEADER_SHM_H
|
||||
|
||||
/* begin types.h */
|
||||
typedef int32_t key_t;
|
||||
typedef uint32_t uid_t;
|
||||
typedef uint32_t gid_t;
|
||||
typedef int64_t time_t;
|
||||
typedef int32_t pid_t;
|
||||
/* end types.h */
|
||||
|
||||
typedef uint64_t shmatt_t;
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t cuid;
|
||||
gid_t cgid;
|
||||
uint16_t mode;
|
||||
uint8_t padding[2];
|
||||
uint16_t seq;
|
||||
uint8_t padding2[22];
|
||||
};
|
||||
|
||||
struct shmid_ds {
|
||||
struct ipc_perm shm_perm;
|
||||
size_t shm_segsz;
|
||||
time_t shm_atime;
|
||||
time_t shm_dtime;
|
||||
time_t shm_ctime;
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
shmatt_t shm_nattch;
|
||||
uint8_t padding[16];
|
||||
};
|
||||
|
||||
#endif /* HEADER_SHM_H */
|
||||
@@ -211,8 +211,11 @@ static void post_init(void)
|
||||
}
|
||||
|
||||
if (find_command_line("hidos")) {
|
||||
extern ihk_spinlock_t syscall_lock;
|
||||
|
||||
init_host_syscall_channel();
|
||||
init_host_syscall_channel2();
|
||||
ihk_mc_spinlock_init(&syscall_lock);
|
||||
}
|
||||
ap_start();
|
||||
}
|
||||
|
||||
@@ -18,6 +18,16 @@
|
||||
#include <ihk/ikc.h>
|
||||
#include <ikc/master.h>
|
||||
|
||||
//#define DEBUG_LISTENERS
|
||||
|
||||
#ifdef DEBUG_LISTENERS
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
static unsigned long read_tsc(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
@@ -103,5 +113,5 @@ static struct ihk_ikc_listen_param test_listen_param = {
|
||||
void mc_ikc_test_init(void)
|
||||
{
|
||||
ihk_ikc_listen_port(NULL, &test_listen_param);
|
||||
kprintf("Listener registered port %d\n", 500);
|
||||
dkprintf("Listener registered port %d\n", 500);
|
||||
}
|
||||
|
||||
@@ -281,6 +281,7 @@ out:
|
||||
dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n",
|
||||
ihk_mc_get_processor_id(), fault_addr, reason,
|
||||
regs, error);
|
||||
check_need_resched();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
292
kernel/process.c
292
kernel/process.c
@@ -253,7 +253,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
|
||||
goto err_free_sighandler;
|
||||
}
|
||||
|
||||
memset(proc->sighandler, '\0', sizeof(struct sig_handler));
|
||||
memcpy(proc->sighandler, org->sighandler, sizeof(struct sig_handler));
|
||||
ihk_atomic_set(&proc->sighandler->use, 1);
|
||||
ihk_mc_spinlock_init(&proc->sighandler->lock);
|
||||
ihk_atomic_set(&proc->sigshared->use, 1);
|
||||
@@ -936,6 +936,39 @@ struct vm_range *previous_process_memory_range(
|
||||
return prev;
|
||||
}
|
||||
|
||||
int extend_up_process_memory_range(struct process_vm *vm,
|
||||
struct vm_range *range, uintptr_t newend)
|
||||
{
|
||||
int error;
|
||||
struct vm_range *next;
|
||||
|
||||
dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx)\n",
|
||||
vm, range, range->start, range->end, newend);
|
||||
if (newend <= range->end) {
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (vm->region.user_end < newend) {
|
||||
error = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
next = next_process_memory_range(vm ,range);
|
||||
if (next && (next->start < newend)) {
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
range->end = newend;
|
||||
|
||||
out:
|
||||
dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx):%d\n",
|
||||
vm, range, range->start, range->end, newend, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int change_prot_process_memory_range(struct process *proc,
|
||||
struct vm_range *range, unsigned long protflag)
|
||||
{
|
||||
@@ -997,6 +1030,94 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
struct rfp_args {
|
||||
off_t off;
|
||||
uintptr_t start;
|
||||
struct memobj *memobj;
|
||||
};
|
||||
|
||||
static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep,
|
||||
void *pgaddr, size_t pgsize)
|
||||
{
|
||||
struct rfp_args * const args = arg0;
|
||||
int error;
|
||||
off_t off;
|
||||
pte_t apte;
|
||||
uintptr_t phys;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize);
|
||||
|
||||
/* XXX: NYI: large pages */
|
||||
if (pgsize != PAGE_SIZE) {
|
||||
error = -E2BIG;
|
||||
ekprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
off = args->off + ((uintptr_t)pgaddr - args->start);
|
||||
pte_make_fileoff(off, 0, pgsize, &apte);
|
||||
|
||||
pte_xchg(ptep, &apte);
|
||||
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
|
||||
|
||||
if (pte_is_null(&apte) || pte_is_fileoff(&apte, pgsize)) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
phys = pte_get_phys(&apte);
|
||||
|
||||
if (pte_is_dirty(&apte, pgsize)) {
|
||||
memobj_flush_page(args->memobj, phys, pgsize); /* XXX: in lock period */
|
||||
}
|
||||
|
||||
page = phys_to_page(phys);
|
||||
if (page && page_unmap(page)) {
|
||||
ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE);
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
dkprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx): %d\n",
|
||||
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
uintptr_t start, uintptr_t end, off_t off)
|
||||
{
|
||||
struct rfp_args args;
|
||||
int error;
|
||||
|
||||
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx)\n",
|
||||
vm, range, start, end, off);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
|
||||
args.start = start;
|
||||
args.off = off;
|
||||
args.memobj = range->memobj;
|
||||
|
||||
error = visit_pte_range(vm->page_table, (void *)start,
|
||||
(void *)end, VPTEF_DEFAULT, &remap_one_page, &args);
|
||||
if (error) {
|
||||
ekprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):"
|
||||
"visit pte failed %d\n",
|
||||
vm, range, start, end, off, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
memobj_unlock(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, off, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
|
||||
{
|
||||
int error;
|
||||
@@ -1012,7 +1133,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
/*****/
|
||||
ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align);
|
||||
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)) {
|
||||
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)
|
||||
&& !pte_is_fileoff(ptep, pgsize)) {
|
||||
if (!pte_is_present(ptep)) {
|
||||
error = -EFAULT;
|
||||
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):PROT_NONE. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
|
||||
@@ -1034,11 +1156,16 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
}
|
||||
attr = arch_vrflag_to_ptattr(range->flag, reason, ptep);
|
||||
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
|
||||
if (!ptep || pte_is_null(ptep)) {
|
||||
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
|
||||
if (range->memobj) {
|
||||
off_t off;
|
||||
|
||||
off = range->objoff + ((uintptr_t)pgaddr - range->start);
|
||||
if (!ptep || !pte_is_fileoff(ptep, pgsize)) {
|
||||
off = range->objoff + ((uintptr_t)pgaddr - range->start);
|
||||
}
|
||||
else {
|
||||
off = pte_get_off(ptep, pgsize);
|
||||
}
|
||||
error = memobj_get_page(range->memobj, off, p2align, &phys);
|
||||
if (error) {
|
||||
if (error != -ERESTART) {
|
||||
@@ -1590,6 +1717,9 @@ void sched_init(void)
|
||||
cpu_local_var(runq_len) = 0;
|
||||
ihk_mc_spinlock_init(&cpu_local_var(runq_lock));
|
||||
|
||||
INIT_LIST_HEAD(&cpu_local_var(migq));
|
||||
ihk_mc_spinlock_init(&cpu_local_var(migq_lock));
|
||||
|
||||
#ifdef TIMER_CPU_ID
|
||||
if (ihk_mc_get_processor_id() == TIMER_CPU_ID) {
|
||||
init_timers();
|
||||
@@ -1598,6 +1728,72 @@ void sched_init(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void double_rq_lock(struct cpu_local_var *v1, struct cpu_local_var *v2)
|
||||
{
|
||||
if (v1 < v2) {
|
||||
ihk_mc_spinlock_lock_noirq(&v1->runq_lock);
|
||||
ihk_mc_spinlock_lock_noirq(&v2->runq_lock);
|
||||
} else {
|
||||
ihk_mc_spinlock_lock_noirq(&v2->runq_lock);
|
||||
ihk_mc_spinlock_lock_noirq(&v1->runq_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void double_rq_unlock(struct cpu_local_var *v1, struct cpu_local_var *v2)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(&v1->runq_lock);
|
||||
ihk_mc_spinlock_unlock_noirq(&v2->runq_lock);
|
||||
}
|
||||
|
||||
struct migrate_request {
|
||||
struct list_head list;
|
||||
struct process *proc;
|
||||
struct waitq wq;
|
||||
};
|
||||
|
||||
static void do_migrate(void)
|
||||
{
|
||||
int cur_cpu_id = ihk_mc_get_processor_id();
|
||||
struct cpu_local_var *cur_v = get_cpu_local_var(cur_cpu_id);
|
||||
struct migrate_request *req, *tmp;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&cur_v->migq_lock);
|
||||
list_for_each_entry_safe(req, tmp, &cur_v->migq, list) {
|
||||
int cpu_id;
|
||||
struct cpu_local_var *v;
|
||||
|
||||
/* 0. check if migration is necessary */
|
||||
list_del(&req->list);
|
||||
if (req->proc->cpu_id != cur_cpu_id) /* already not here */
|
||||
goto ack;
|
||||
if (CPU_ISSET(cur_cpu_id, &req->proc->cpu_set)) /* good affinity */
|
||||
goto ack;
|
||||
|
||||
/* 1. select CPU */
|
||||
for (cpu_id = 0; cpu_id < CPU_SETSIZE; cpu_id++)
|
||||
if (CPU_ISSET(cpu_id, &req->proc->cpu_set))
|
||||
break;
|
||||
if (CPU_SETSIZE == cpu_id) /* empty affinity (bug?) */
|
||||
goto ack;
|
||||
|
||||
/* 2. migrate thread */
|
||||
v = get_cpu_local_var(cpu_id);
|
||||
double_rq_lock(cur_v, v);
|
||||
list_del(&req->proc->sched_list);
|
||||
cur_v->runq_len -= 1;
|
||||
req->proc->cpu_id = cpu_id;
|
||||
list_add_tail(&req->proc->sched_list, &v->runq);
|
||||
v->runq_len += 1;
|
||||
if (v->runq_len == 1)
|
||||
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
|
||||
double_rq_unlock(cur_v, v);
|
||||
|
||||
ack:
|
||||
waitq_wakeup(&req->wq);
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&cur_v->migq_lock);
|
||||
}
|
||||
|
||||
void schedule(void)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
@@ -1606,6 +1802,7 @@ void schedule(void)
|
||||
unsigned long irqstate;
|
||||
struct process *last;
|
||||
|
||||
redo:
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
|
||||
next = NULL;
|
||||
@@ -1621,25 +1818,26 @@ void schedule(void)
|
||||
list_add_tail(&prev->sched_list, &(v->runq));
|
||||
++v->runq_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (!v->runq_len) {
|
||||
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
|
||||
next = &cpu_local_var(idle);
|
||||
} else {
|
||||
/* Pick a new running process */
|
||||
list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) {
|
||||
if (proc->status == PS_RUNNING) {
|
||||
next = proc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* No process? Run idle.. */
|
||||
if (!next) {
|
||||
next = &cpu_local_var(idle);
|
||||
v->status = CPU_STATUS_IDLE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pick a new running process */
|
||||
list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) {
|
||||
if (proc->status == PS_RUNNING) {
|
||||
next = proc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* No process? Run idle.. */
|
||||
if (!next) {
|
||||
next = &cpu_local_var(idle);
|
||||
}
|
||||
|
||||
if (prev != next) {
|
||||
switch_ctx = 1;
|
||||
v->current = next;
|
||||
@@ -1675,6 +1873,21 @@ void schedule(void)
|
||||
else {
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
}
|
||||
|
||||
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
|
||||
v->flags &= ~CPU_FLAG_NEED_MIGRATE;
|
||||
do_migrate();
|
||||
goto redo;
|
||||
}
|
||||
}
|
||||
|
||||
void check_need_resched(void)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
||||
v->flags &= ~CPU_FLAG_NEED_RESCHED;
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1719,6 +1932,49 @@ int sched_wakeup_process(struct process *proc, int valid_states)
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* 1. Add current process to waitq
|
||||
* 2. Queue migration request into the target CPU's queue
|
||||
* 3. Kick migration on the CPU
|
||||
* 4. Wait for completion of the migration
|
||||
*
|
||||
* struct migrate_request {
|
||||
* list //migq,
|
||||
* wq,
|
||||
* proc
|
||||
* }
|
||||
*
|
||||
* [expected processing of the target CPU]
|
||||
* 1. Interrupted by IPI
|
||||
* 2. call schedule() via check_resched()
|
||||
* 3. Do migration
|
||||
* 4. Wake up this thread
|
||||
*/
|
||||
void sched_request_migrate(int cpu_id, struct process *proc)
|
||||
{
|
||||
struct cpu_local_var *v = get_cpu_local_var(cpu_id);
|
||||
struct migrate_request req = { .proc = proc };
|
||||
unsigned long irqstate;
|
||||
DECLARE_WAITQ_ENTRY(entry, cpu_local_var(current));
|
||||
|
||||
waitq_init(&req.wq);
|
||||
waitq_prepare_to_wait(&req.wq, &entry, PS_UNINTERRUPTIBLE);
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&v->migq_lock);
|
||||
list_add_tail(&req.list, &v->migq);
|
||||
ihk_mc_spinlock_unlock(&v->migq_lock, irqstate);
|
||||
|
||||
v->flags |= CPU_FLAG_NEED_RESCHED | CPU_FLAG_NEED_MIGRATE;
|
||||
v->status = CPU_STATUS_RUNNING;
|
||||
|
||||
if (cpu_id != ihk_mc_get_processor_id())
|
||||
ihk_mc_interrupt_cpu(/* Kick scheduler */
|
||||
get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
|
||||
|
||||
schedule();
|
||||
waitq_finish_wait(&req.wq, &entry);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Runq lock must be held here */
|
||||
|
||||
287
kernel/shmobj.c
Normal file
287
kernel/shmobj.c
Normal file
@@ -0,0 +1,287 @@
|
||||
/**
|
||||
* \file shmobj.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* shared memory object
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <ihk/atomic.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/lock.h>
|
||||
#include <ihk/mm.h>
|
||||
#include <errno.h>
|
||||
#include <kmalloc.h>
|
||||
#include <list.h>
|
||||
#include <memobj.h>
|
||||
#include <memory.h>
|
||||
#include <page.h>
|
||||
#include <shm.h>
|
||||
#include <string.h>
|
||||
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#define fkprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct shmobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
long ref;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
};
|
||||
|
||||
static memobj_release_func_t shmobj_release;
|
||||
static memobj_ref_func_t shmobj_ref;
|
||||
static memobj_get_page_func_t shmobj_get_page;
|
||||
|
||||
static struct memobj_ops shmobj_ops = {
|
||||
.release = &shmobj_release,
|
||||
.ref = &shmobj_ref,
|
||||
.get_page = &shmobj_get_page,
|
||||
};
|
||||
|
||||
static struct shmobj *to_shmobj(struct memobj *memobj)
|
||||
{
|
||||
return (struct shmobj *)memobj;
|
||||
}
|
||||
|
||||
static struct memobj *to_memobj(struct shmobj *shmobj)
|
||||
{
|
||||
return &shmobj->memobj;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* page_list
|
||||
*/
|
||||
static void page_list_init(struct shmobj *obj)
|
||||
{
|
||||
INIT_LIST_HEAD(&obj->page_list);
|
||||
return;
|
||||
}
|
||||
|
||||
static void page_list_insert(struct shmobj *obj, struct page *page)
|
||||
{
|
||||
list_add(&page->list, &obj->page_list);
|
||||
return;
|
||||
}
|
||||
|
||||
static void page_list_remove(struct shmobj *obj, struct page *page)
|
||||
{
|
||||
list_del(&page->list);
|
||||
return;
|
||||
}
|
||||
|
||||
static struct page *page_list_lookup(struct shmobj *obj, off_t off)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
list_for_each_entry(page, &obj->page_list, list) {
|
||||
if (page->offset == off) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
page = NULL;
|
||||
|
||||
out:
|
||||
return page;
|
||||
}
|
||||
|
||||
static struct page *page_list_first(struct shmobj *obj)
|
||||
{
|
||||
if (list_empty(&obj->page_list)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return list_first_entry(&obj->page_list, struct page, list);
|
||||
}
|
||||
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
{
|
||||
struct shmobj *obj = NULL;
|
||||
int error;
|
||||
|
||||
dkprintf("shmobj_create(%p %#lx,%p)\n", ds, ds->shm_segsz, objp);
|
||||
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
||||
if (!obj) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("shmobj_create(%p %#lx,%p):kmalloc failed. %d\n",
|
||||
ds, ds->shm_segsz, objp, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
obj->memobj.ops = &shmobj_ops;
|
||||
obj->ref = 1;
|
||||
obj->ds = *ds;
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
|
||||
error = 0;
|
||||
*objp = to_memobj(obj);
|
||||
obj = NULL;
|
||||
|
||||
out:
|
||||
if (obj) {
|
||||
kfree(obj);
|
||||
}
|
||||
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
|
||||
ds, ds->shm_segsz, objp, error, *objp);
|
||||
return error;
|
||||
}
|
||||
|
||||
static void shmobj_release(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
struct shmobj *freeobj = NULL;
|
||||
|
||||
dkprintf("shmobj_release(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
if (obj->ref <= 0) {
|
||||
if (obj->ref < 0) {
|
||||
fkprintf("shmobj_release(%p):ref %ld\n",
|
||||
memobj, obj->ref);
|
||||
panic("shmobj_release:freeing free shmobj");
|
||||
}
|
||||
freeobj = obj;
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
if (freeobj) {
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
int count;
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
page_list_remove(obj, page);
|
||||
|
||||
dkprintf("shmobj_release(%p):"
|
||||
"release page. %p %#lx %d %d",
|
||||
memobj, page, page_to_phys(page),
|
||||
page->mode, page->count);
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
if (!((page->mode == PM_MAPPED) && (count == 0))) {
|
||||
fkprintf("shmobj_release(%p): "
|
||||
"page %p phys %#lx mode %#x"
|
||||
" count %d off %#lx\n",
|
||||
memobj, page,
|
||||
page_to_phys(page),
|
||||
page->mode, count,
|
||||
page->offset);
|
||||
panic("shmobj_release");
|
||||
}
|
||||
|
||||
/* XXX:NYI: large pages */
|
||||
page->mode = PM_NONE;
|
||||
free_pages(phys_to_virt(page_to_phys(page)), 1);
|
||||
}
|
||||
dkprintf("shmobj_release(%p):free shmobj", memobj);
|
||||
kfree(freeobj);
|
||||
}
|
||||
dkprintf("shmobj_release(%p):\n", memobj);
|
||||
return;
|
||||
}
|
||||
|
||||
static void shmobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
long newref;
|
||||
|
||||
dkprintf("shmobj_ref(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
newref = ++obj->ref;
|
||||
memobj_unlock(&obj->memobj);
|
||||
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
|
||||
return;
|
||||
}
|
||||
|
||||
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
int error;
|
||||
struct page *page;
|
||||
int npages;
|
||||
void *virt = NULL;
|
||||
uintptr_t phys = -1;
|
||||
|
||||
dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n",
|
||||
memobj, off, p2align, physp);
|
||||
memobj_lock(&obj->memobj);
|
||||
if (off & ~PAGE_MASK) {
|
||||
error = -EINVAL;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
|
||||
error = -ENOMEM;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (obj->ds.shm_segsz <= off) {
|
||||
error = -ERANGE;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
|
||||
error = -ENOSPC;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = page_list_lookup(obj, off);
|
||||
if (!page) {
|
||||
npages = 1 << p2align;
|
||||
virt = ihk_mc_alloc_pages(npages, IHK_MC_AP_NOWAIT);
|
||||
if (!virt) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):"
|
||||
"alloc failed. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
phys = virt_to_phys(virt);
|
||||
page = phys_to_page(phys);
|
||||
if (page->mode != PM_NONE) {
|
||||
fkprintf("shmobj_get_page(%p,%#lx,%d,%p):"
|
||||
"page %p %#lx %d %d %#lx\n",
|
||||
memobj, off, p2align, physp,
|
||||
page, page_to_phys(page), page->mode,
|
||||
page->count, page->offset);
|
||||
panic("shmobj_get_page()");
|
||||
}
|
||||
memset(virt, 0, npages*PAGE_SIZE);
|
||||
page->mode = PM_MAPPED;
|
||||
page->offset = off;
|
||||
ihk_atomic_set(&page->count, 1);
|
||||
page_list_insert(obj, page);
|
||||
virt = NULL;
|
||||
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n",
|
||||
memobj, off, p2align, physp, page, phys);
|
||||
}
|
||||
|
||||
ihk_atomic_inc(&page->count);
|
||||
|
||||
error = 0;
|
||||
*physp = page_to_phys(page);
|
||||
|
||||
out:
|
||||
memobj_unlock(&obj->memobj);
|
||||
if (virt) {
|
||||
ihk_mc_free_pages(virt, npages);
|
||||
}
|
||||
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):%d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
return error;
|
||||
}
|
||||
484
kernel/syscall.c
484
kernel/syscall.c
@@ -45,6 +45,7 @@
|
||||
#include <mman.h>
|
||||
#include <kmalloc.h>
|
||||
#include <memobj.h>
|
||||
#include <shm.h>
|
||||
|
||||
/* Headers taken from kitten LWK */
|
||||
#include <lwk/stddef.h>
|
||||
@@ -168,6 +169,7 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
|
||||
#endif
|
||||
}
|
||||
|
||||
ihk_spinlock_t syscall_lock;
|
||||
|
||||
long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
|
||||
int cpu, int pid)
|
||||
@@ -176,6 +178,9 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
|
||||
struct syscall_request req2 IHK_DMA_ALIGN;
|
||||
struct syscall_params *scp;
|
||||
int error;
|
||||
long rc;
|
||||
int islock = 0;
|
||||
unsigned long irqstate;
|
||||
|
||||
dkprintf("SC(%d)[%3d] sending syscall\n",
|
||||
ihk_mc_get_processor_id(),
|
||||
@@ -184,6 +189,8 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
|
||||
if(req->number == __NR_exit_group ||
|
||||
req->number == __NR_kill){ // interrupt syscall
|
||||
scp = &get_cpu_local_var(0)->scp2;
|
||||
islock = 1;
|
||||
irqstate = ihk_mc_spinlock_lock(&syscall_lock);
|
||||
}
|
||||
else{
|
||||
scp = &get_cpu_local_var(cpu)->scp;
|
||||
@@ -209,7 +216,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
|
||||
cpu_local_var(current)->pid);
|
||||
error = page_fault_process(get_cpu_local_var(cpu)->current,
|
||||
(void *)res->fault_address,
|
||||
res->fault_reason);
|
||||
res->fault_reason|PF_POPULATE);
|
||||
|
||||
/* send result */
|
||||
req2.number = __NR_mmap;
|
||||
@@ -225,7 +232,12 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
|
||||
ihk_mc_get_processor_id(),
|
||||
req->number, res->ret);
|
||||
|
||||
return res->ret;
|
||||
rc = res->ret;
|
||||
if(islock){
|
||||
ihk_mc_spinlock_unlock(&syscall_lock, irqstate);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx)
|
||||
@@ -630,12 +642,13 @@ SYSCALL_DECLARE(mmap)
|
||||
const int prot = ihk_mc_syscall_arg2(ctx);
|
||||
const int flags = ihk_mc_syscall_arg3(ctx);
|
||||
const int fd = ihk_mc_syscall_arg4(ctx);
|
||||
const off_t off = ihk_mc_syscall_arg5(ctx);
|
||||
const off_t off0 = ihk_mc_syscall_arg5(ctx);
|
||||
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct vm_regions *region = &proc->vm->region;
|
||||
intptr_t addr;
|
||||
size_t len;
|
||||
off_t off;
|
||||
int error;
|
||||
intptr_t npages;
|
||||
int p2align;
|
||||
@@ -646,10 +659,11 @@ SYSCALL_DECLARE(mmap)
|
||||
int maxprot;
|
||||
int denied;
|
||||
int ro_vma_mapped = 0;
|
||||
struct shmid_ds ads;
|
||||
|
||||
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n",
|
||||
ihk_mc_get_processor_id(),
|
||||
addr0, len0, prot, flags, fd, off);
|
||||
addr0, len0, prot, flags, fd, off0);
|
||||
|
||||
/* check constants for flags */
|
||||
if (1) {
|
||||
@@ -681,9 +695,9 @@ SYSCALL_DECLARE(mmap)
|
||||
|| ((region->user_end - len) < addr)
|
||||
|| !(flags & (MAP_SHARED | MAP_PRIVATE))
|
||||
|| ((flags & MAP_SHARED) && (flags & MAP_PRIVATE))
|
||||
|| (off & (PAGE_SIZE - 1))) {
|
||||
|| (off0 & (PAGE_SIZE - 1))) {
|
||||
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n",
|
||||
addr0, len0, prot, flags, fd, off);
|
||||
addr0, len0, prot, flags, fd, off0);
|
||||
error = -EINVAL;
|
||||
goto out2;
|
||||
}
|
||||
@@ -692,7 +706,7 @@ SYSCALL_DECLARE(mmap)
|
||||
if ((flags & error_flags)
|
||||
|| (flags & ~(supported_flags | ignored_flags))) {
|
||||
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):unknown flags %x\n",
|
||||
addr0, len0, prot, flags, fd, off,
|
||||
addr0, len0, prot, flags, fd, off0,
|
||||
(flags & ~(supported_flags | ignored_flags)));
|
||||
error = -EINVAL;
|
||||
goto out2;
|
||||
@@ -754,8 +768,10 @@ SYSCALL_DECLARE(mmap)
|
||||
}
|
||||
|
||||
phys = 0;
|
||||
off = 0;
|
||||
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
if (!(flags & MAP_ANONYMOUS)) {
|
||||
off = off0;
|
||||
error = fileobj_create(fd, &memobj, &maxprot);
|
||||
if (error) {
|
||||
ekprintf("sys_mmap:fileobj_create failed. %d\n", error);
|
||||
@@ -781,6 +797,22 @@ SYSCALL_DECLARE(mmap)
|
||||
}
|
||||
phys = virt_to_phys(p);
|
||||
}
|
||||
else if (flags & MAP_SHARED) {
|
||||
memset(&ads, 0, sizeof(ads));
|
||||
ads.shm_segsz = len;
|
||||
error = shmobj_create(&ads, &memobj);
|
||||
if (error) {
|
||||
ekprintf("sys_mmap:shmobj_create failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else {
|
||||
error = zeroobj_create(&memobj);
|
||||
if (error) {
|
||||
ekprintf("sys_mmap:zeroobj_create failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) {
|
||||
maxprot |= PROT_WRITE;
|
||||
@@ -844,7 +876,7 @@ out2:
|
||||
}
|
||||
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n",
|
||||
ihk_mc_get_processor_id(),
|
||||
addr0, len0, prot, flags, fd, off, error, addr);
|
||||
addr0, len0, prot, flags, fd, off0, error, addr);
|
||||
return (!error)? addr: error;
|
||||
}
|
||||
|
||||
@@ -1702,15 +1734,14 @@ SYSCALL_DECLARE(madvise)
|
||||
dkprintf("[%d]sys_madvise(%lx,%lx,%x):not contig "
|
||||
"%lx [%lx-%lx)\n",
|
||||
ihk_mc_get_processor_id(), start,
|
||||
len0, advice, addr, range->start,
|
||||
range->end);
|
||||
len0, advice, addr, range?range->start:0,
|
||||
range?range->end:0);
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
#define MEMOBJ_IS_FILEOBJ(obj) ((obj) != NULL)
|
||||
if (!MEMOBJ_IS_FILEOBJ(range->memobj)) {
|
||||
dkprintf("[%d]sys_madvise(%lx,%lx,%x):not fileobj "
|
||||
if (!range->memobj || !memobj_has_pager(range->memobj)) {
|
||||
dkprintf("[%d]sys_madvise(%lx,%lx,%x):has not pager"
|
||||
"[%lx-%lx) %lx\n",
|
||||
ihk_mc_get_processor_id(), start,
|
||||
len0, advice, range->start,
|
||||
@@ -1888,56 +1919,104 @@ SYSCALL_DECLARE(ptrace)
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#define MIN2(x,y) (x) < (y) ? (x) : (y)
|
||||
SYSCALL_DECLARE(sched_setaffinity)
|
||||
{
|
||||
#if 0
|
||||
int pid = (int)ihk_mc_syscall_arg0(ctx);
|
||||
unsigned int len = (unsigned int)ihk_mc_syscall_arg1(ctx);
|
||||
#endif
|
||||
cpu_set_t *mask = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
|
||||
unsigned long __phys;
|
||||
#if 0
|
||||
int i;
|
||||
#endif
|
||||
/* TODO: check mask is in user's page table */
|
||||
if(!mask) { return -EFAULT; }
|
||||
if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table,
|
||||
(void *)mask,
|
||||
&__phys)) {
|
||||
int tid = (int)ihk_mc_syscall_arg0(ctx);
|
||||
size_t len = (size_t)ihk_mc_syscall_arg1(ctx);
|
||||
cpu_set_t *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
|
||||
|
||||
cpu_set_t k_cpu_set, cpu_set;
|
||||
struct process *thread;
|
||||
int cpu_id;
|
||||
|
||||
if (sizeof(k_cpu_set) > len) {
|
||||
kprintf("%s:%d\n Too small buffer.", __FILE__, __LINE__);
|
||||
return -EINVAL;
|
||||
}
|
||||
len = MIN2(len, sizeof(k_cpu_set));
|
||||
|
||||
if (copy_from_user(cpu_local_var(current), &k_cpu_set, u_cpu_set, len)) {
|
||||
kprintf("%s:%d copy_from_user failed.\n", __FILE__, __LINE__);
|
||||
return -EFAULT;
|
||||
}
|
||||
#if 0
|
||||
dkprintf("sched_setaffinity,\n");
|
||||
for(i = 0; i < len/sizeof(__cpu_mask); i++) {
|
||||
dkprintf("mask[%d]=%lx,", i, mask->__bits[i]);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
// XXX: We should build something like cpu_available_mask in advance
|
||||
CPU_ZERO(&cpu_set);
|
||||
extern int num_processors;
|
||||
for (cpu_id = 0; cpu_id < num_processors; cpu_id++)
|
||||
if (CPU_ISSET(cpu_id, &k_cpu_set))
|
||||
CPU_SET(cpu_id, &cpu_set);
|
||||
|
||||
for (cpu_id = 0; cpu_id < num_processors; cpu_id++) {
|
||||
ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
|
||||
list_for_each_entry(thread, &get_cpu_local_var(cpu_id)->runq, sched_list)
|
||||
if (thread->pid && thread->tid == tid)
|
||||
goto found; /* without unlocking runq_lock */
|
||||
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
|
||||
}
|
||||
kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__);
|
||||
return -ESRCH;
|
||||
|
||||
found:
|
||||
memcpy(&thread->cpu_set, &cpu_set, sizeof(cpu_set));
|
||||
|
||||
if (!CPU_ISSET(cpu_id, &thread->cpu_set)) {
|
||||
hold_process(thread);
|
||||
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
|
||||
sched_request_migrate(cpu_id, thread);
|
||||
release_process(thread);
|
||||
return 0;
|
||||
} else {
|
||||
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define MIN2(x,y) (x) < (y) ? (x) : (y)
|
||||
#define MIN3(x,y,z) MIN2(MIN2((x),(y)),MIN2((y),(z)))
|
||||
// see linux-2.6.34.13/kernel/sched.c
|
||||
SYSCALL_DECLARE(sched_getaffinity)
|
||||
{
|
||||
//int pid = (int)ihk_mc_syscall_arg0(ctx);
|
||||
unsigned int len = (int)ihk_mc_syscall_arg1(ctx);
|
||||
//int cpu_id;
|
||||
cpu_set_t *mask = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
|
||||
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
|
||||
if(len*8 < cpu_info->ncpus) { return -EINVAL; }
|
||||
if(len & (sizeof(unsigned long)-1)) { return -EINVAL; }
|
||||
int min_len = MIN2(len, sizeof(cpu_set_t));
|
||||
//int min_ncpus = MIN2(min_len*8, cpu_info->ncpus);
|
||||
int tid = (int)ihk_mc_syscall_arg0(ctx);
|
||||
size_t len = (size_t)ihk_mc_syscall_arg1(ctx);
|
||||
cpu_set_t k_cpu_set, *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
|
||||
|
||||
CPU_ZERO_S(min_len, mask);
|
||||
CPU_SET_S(ihk_mc_get_hardware_processor_id(), min_len, mask);
|
||||
//for (cpu_id = 0; cpu_id < min_ncpus; ++cpu_id)
|
||||
// CPU_SET_S(cpu_info->hw_ids[cpu_id], min_len, mask);
|
||||
int ret;
|
||||
int found = 0;
|
||||
int i;
|
||||
|
||||
// dkprintf("sched_getaffinity returns full mask\n");
|
||||
if (sizeof(k_cpu_set) > len) {
|
||||
kprintf("%s:%d Too small buffer.\n", __FILE__, __LINE__);
|
||||
return -EINVAL;
|
||||
}
|
||||
len = MIN2(len, sizeof(k_cpu_set));
|
||||
|
||||
return min_len;
|
||||
extern int num_processors;
|
||||
for (i = 0; i < num_processors && !found; i++) {
|
||||
struct process *thread;
|
||||
ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(i)->runq_lock);
|
||||
list_for_each_entry(thread, &get_cpu_local_var(i)->runq, sched_list) {
|
||||
if (thread->pid && thread->tid == tid) {
|
||||
found = 1;
|
||||
memcpy(&k_cpu_set, &thread->cpu_set, sizeof(k_cpu_set));
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(i)->runq_lock);
|
||||
}
|
||||
if (!found) {
|
||||
kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__);
|
||||
return -ESRCH;
|
||||
}
|
||||
ret = copy_to_user(cpu_local_var(current), u_cpu_set, &k_cpu_set, len);
|
||||
kprintf("%s %d %d\n", __FILE__, __LINE__, ret);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
return len;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(get_cpu_id)
|
||||
{
|
||||
return ihk_mc_get_processor_id();
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(sched_yield)
|
||||
@@ -2035,7 +2114,8 @@ SYSCALL_DECLARE(mlock)
|
||||
dkprintf("[%d]sys_mlock(%lx,%lx):not contiguous."
|
||||
" %lx [%lx-%lx)\n",
|
||||
ihk_mc_get_processor_id(), start0,
|
||||
len0, addr, range->start, range->end);
|
||||
len0, addr, range?range->start:0,
|
||||
range?range->end:0);
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -2209,7 +2289,8 @@ SYSCALL_DECLARE(munlock)
|
||||
dkprintf("[%d]sys_munlock(%lx,%lx):not contiguous."
|
||||
" %lx [%lx-%lx)\n",
|
||||
ihk_mc_get_processor_id(), start0,
|
||||
len0, addr, range->start, range->end);
|
||||
len0, addr, range?range->start:0,
|
||||
range?range->end:0);
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -2271,6 +2352,302 @@ out2:
|
||||
return error;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(remap_file_pages)
|
||||
{
|
||||
const uintptr_t start0 = ihk_mc_syscall_arg0(ctx);
|
||||
const size_t size = ihk_mc_syscall_arg1(ctx);
|
||||
const int prot = ihk_mc_syscall_arg2(ctx);
|
||||
const size_t pgoff = ihk_mc_syscall_arg3(ctx);
|
||||
const int flags = ihk_mc_syscall_arg4(ctx);
|
||||
int error;
|
||||
const uintptr_t start = start0 & PAGE_MASK;
|
||||
const uintptr_t end = start + size;
|
||||
const off_t off = (off_t)pgoff << PAGE_SHIFT;
|
||||
struct process * const proc = cpu_local_var(current);
|
||||
struct vm_range *range;
|
||||
int er;
|
||||
int need_populate = 0;
|
||||
|
||||
dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x)\n",
|
||||
start0, size, prot, pgoff, flags);
|
||||
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
|
||||
#define PGOFF_LIMIT ((off_t)1 << ((8*sizeof(off_t) - 1) - PAGE_SHIFT))
|
||||
if ((size <= 0) || (size & (PAGE_SIZE - 1)) || (prot != 0)
|
||||
|| (pgoff < 0) || (PGOFF_LIMIT <= pgoff)
|
||||
|| ((PGOFF_LIMIT - pgoff) < (size / PAGE_SIZE))
|
||||
|| !((start < end) || (end == 0))) {
|
||||
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
|
||||
"invalid args\n",
|
||||
start0, size, prot, pgoff, flags);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
range = lookup_process_memory_range(proc->vm, start, end);
|
||||
if (!range || (start < range->start) || (range->end < end)
|
||||
|| (range->flag & VR_PRIVATE)
|
||||
|| (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))
|
||||
|| !range->memobj) {
|
||||
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
|
||||
"invalid VMR:[%#lx-%#lx) %#lx %p\n",
|
||||
start0, size, prot, pgoff, flags,
|
||||
range?range->start:0, range?range->end:0,
|
||||
range?range->flag:0, range?range->memobj:NULL);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
range->flag |= VR_FILEOFF;
|
||||
error = remap_process_memory_range(proc->vm, range, start, end, off);
|
||||
if (error) {
|
||||
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
|
||||
"remap failed %d\n",
|
||||
start0, size, prot, pgoff, flags, error);
|
||||
goto out;
|
||||
}
|
||||
clear_host_pte(start, size); /* XXX: workaround */
|
||||
|
||||
if (range->flag & VR_LOCKED) {
|
||||
need_populate = 1;
|
||||
}
|
||||
error = 0;
|
||||
out:
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
|
||||
|
||||
if (need_populate
|
||||
&& (er = populate_process_memory(
|
||||
proc, (void *)start, size))) {
|
||||
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
|
||||
"populate failed %d\n",
|
||||
start0, size, prot, pgoff, flags, er);
|
||||
/* ignore populate error */
|
||||
}
|
||||
|
||||
dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x): %d\n",
|
||||
start0, size, prot, pgoff, flags, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(mremap)
|
||||
{
|
||||
const uintptr_t oldaddr = ihk_mc_syscall_arg0(ctx);
|
||||
const size_t oldsize0 = ihk_mc_syscall_arg1(ctx);
|
||||
const size_t newsize0 = ihk_mc_syscall_arg2(ctx);
|
||||
const int flags = ihk_mc_syscall_arg3(ctx);
|
||||
const uintptr_t newaddr = ihk_mc_syscall_arg4(ctx);
|
||||
const ssize_t oldsize = (oldsize0 + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
const ssize_t newsize = (newsize0 + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
const uintptr_t oldstart = oldaddr;
|
||||
const uintptr_t oldend = oldstart + oldsize;
|
||||
struct process *proc = cpu_local_var(current);
|
||||
struct process_vm *vm = proc->vm;
|
||||
int error;
|
||||
struct vm_range *range;
|
||||
int need_relocate;
|
||||
uintptr_t newstart;
|
||||
uintptr_t newend;
|
||||
size_t size;
|
||||
uintptr_t ret;
|
||||
uintptr_t lckstart = -1;
|
||||
uintptr_t lckend = -1;
|
||||
|
||||
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx)\n",
|
||||
oldaddr, oldsize0, newsize0, flags, newaddr);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
if ((oldaddr & ~PAGE_MASK)
|
||||
|| (oldsize < 0)
|
||||
|| (newsize <= 0)
|
||||
|| (flags & ~(MREMAP_MAYMOVE | MREMAP_FIXED))
|
||||
|| ((flags & MREMAP_FIXED)
|
||||
&& !(flags & MREMAP_MAYMOVE))
|
||||
|| ((flags & MREMAP_FIXED)
|
||||
&& (newaddr & ~PAGE_MASK))) {
|
||||
error = -EINVAL;
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):invalid. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags, newaddr,
|
||||
error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* check original mapping */
|
||||
range = lookup_process_memory_range(vm, oldstart, oldstart+PAGE_SIZE);
|
||||
if (!range || (oldstart < range->start) || (range->end < oldend)
|
||||
|| (range->flag & (VR_FILEOFF))
|
||||
|| (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))) {
|
||||
error = -EFAULT;
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"lookup failed. %d %p %#lx-%#lx %#lx\n",
|
||||
oldaddr, oldsize0, newsize0, flags, newaddr,
|
||||
error, range, range?range->start:0,
|
||||
range?range->end:0, range?range->flag:0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (oldend < oldstart) {
|
||||
error = -EINVAL;
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"old range overflow. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags, newaddr,
|
||||
error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* determine new mapping range */
|
||||
need_relocate = 0;
|
||||
if (flags & MREMAP_FIXED) {
|
||||
need_relocate = 1;
|
||||
newstart = newaddr;
|
||||
newend = newstart + newsize;
|
||||
if (newstart < vm->region.user_start) {
|
||||
error = -EPERM;
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"mmap_min_addr %#lx. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, vm->region.user_start,
|
||||
error);
|
||||
goto out;
|
||||
}
|
||||
if ((newstart < oldend) && (oldstart < newend)) {
|
||||
error = -EINVAL;
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"fixed:overlapped. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else if (!(flags & MREMAP_FIXED) && (oldsize < newsize)) {
|
||||
if (oldend == range->end) {
|
||||
newstart = oldstart;
|
||||
newend = newstart + newsize;
|
||||
error = extend_up_process_memory_range(vm, range,
|
||||
newend);
|
||||
if (!error) {
|
||||
if (range->flag & VR_LOCKED) {
|
||||
lckstart = oldend;
|
||||
lckend = newend;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (!(flags & MREMAP_MAYMOVE)) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"cannot relocate. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
need_relocate = 1;
|
||||
error = search_free_space(newsize, vm->region.map_end,
|
||||
(intptr_t *)&newstart);
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"search failed. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
newend = newstart + newsize;
|
||||
}
|
||||
else {
|
||||
newstart = oldstart;
|
||||
newend = newstart + newsize;
|
||||
}
|
||||
|
||||
/* do the remap */
|
||||
if (need_relocate) {
|
||||
if (flags & MREMAP_FIXED) {
|
||||
error = do_munmap((void *)newstart, newsize);
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"fixed:munmap failed. %d\n",
|
||||
oldaddr, oldsize0, newsize0,
|
||||
flags, newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (range->memobj) {
|
||||
memobj_ref(range->memobj);
|
||||
}
|
||||
error = add_process_memory_range(proc, newstart, newend, -1,
|
||||
range->flag, range->memobj,
|
||||
range->objoff + (oldstart - range->start));
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"add failed. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error);
|
||||
if (range->memobj) {
|
||||
memobj_release(range->memobj);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
if (range->flag & VR_LOCKED) {
|
||||
lckstart = newstart;
|
||||
lckend = newend;
|
||||
}
|
||||
|
||||
if (oldsize > 0) {
|
||||
size = (oldsize < newsize)? oldsize: newsize;
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
error = move_pte_range(vm->page_table,
|
||||
(void *)oldstart, (void *)newstart,
|
||||
size);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"move failed. %d\n",
|
||||
oldaddr, oldsize0, newsize0,
|
||||
flags, newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = do_munmap((void *)oldstart, oldsize);
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"relocate:munmap failed. %d\n",
|
||||
oldaddr, oldsize0, newsize0,
|
||||
flags, newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (newsize < oldsize) {
|
||||
error = do_munmap((void *)newend, (oldend - newend));
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"shrink:munmap failed. %d\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* nothing to do */
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
if (!error && (lckstart < lckend)) {
|
||||
error = populate_process_memory(proc, (void *)lckstart, (lckend - lckstart));
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
"populate failed. %d %#lx-%#lx\n",
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error, lckstart, lckend);
|
||||
error = 0; /* ignore error */
|
||||
}
|
||||
}
|
||||
ret = (error)? error: newstart;
|
||||
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):%d %#lx\n",
|
||||
oldaddr, oldsize0, newsize0, flags, newaddr, error,
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef DCFA_KMOD
|
||||
|
||||
#ifdef CMD_DCFA
|
||||
@@ -2407,6 +2784,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
|
||||
}
|
||||
|
||||
check_signal(l, NULL);
|
||||
check_need_resched();
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
206
kernel/zeroobj.c
Normal file
206
kernel/zeroobj.c
Normal file
@@ -0,0 +1,206 @@
|
||||
/**
|
||||
* \file zeroobj.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* read-only zeroed page object
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <ihk/atomic.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/lock.h>
|
||||
#include <ihk/mm.h>
|
||||
#include <errno.h>
|
||||
#include <kmalloc.h>
|
||||
#include <list.h>
|
||||
#include <memobj.h>
|
||||
#include <memory.h>
|
||||
#include <page.h>
|
||||
#include <string.h>
|
||||
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#define fkprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct zeroobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
struct list_head page_list;
|
||||
};
|
||||
|
||||
static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED;
|
||||
static struct zeroobj *the_zeroobj = NULL; /* singleton */
|
||||
|
||||
static memobj_get_page_func_t zeroobj_get_page;
|
||||
|
||||
static struct memobj_ops zeroobj_ops = {
|
||||
.get_page = &zeroobj_get_page,
|
||||
};
|
||||
|
||||
static struct zeroobj *to_zeroobj(struct memobj *memobj)
|
||||
{
|
||||
return (struct zeroobj *)memobj;
|
||||
}
|
||||
|
||||
static struct memobj *to_memobj(struct zeroobj *zeroobj)
|
||||
{
|
||||
return &zeroobj->memobj;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* page_list
|
||||
*/
|
||||
static void page_list_init(struct zeroobj *obj)
|
||||
{
|
||||
INIT_LIST_HEAD(&obj->page_list);
|
||||
return;
|
||||
}
|
||||
|
||||
static void page_list_insert(struct zeroobj *obj, struct page *page)
|
||||
{
|
||||
list_add(&page->list, &obj->page_list);
|
||||
return;
|
||||
}
|
||||
|
||||
static struct page *page_list_first(struct zeroobj *obj)
|
||||
{
|
||||
if (list_empty(&obj->page_list)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return list_first_entry(&obj->page_list, struct page, list);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* zeroobj
|
||||
*/
|
||||
static int alloc_zeroobj(void)
|
||||
{
|
||||
int error;
|
||||
struct zeroobj *obj = NULL;
|
||||
void *virt = NULL;
|
||||
uintptr_t phys;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("alloc_zeroobj()\n");
|
||||
ihk_mc_spinlock_lock_noirq(&the_zeroobj_lock);
|
||||
if (the_zeroobj) {
|
||||
error = 0;
|
||||
dkprintf("alloc_zeroobj():already. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
||||
if (!obj) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("alloc_zeroobj():kmalloc failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
obj->memobj.ops = &zeroobj_ops;
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
|
||||
virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */
|
||||
if (!virt) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("alloc_zeroobj():alloc pages failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
phys = virt_to_phys(virt);
|
||||
page = phys_to_page(phys);
|
||||
|
||||
if (page->mode != PM_NONE) {
|
||||
fkprintf("alloc_zeroobj():"
|
||||
"page %p %#lx %d %d %#lx\n",
|
||||
page, page_to_phys(page), page->mode,
|
||||
page->count, page->offset);
|
||||
panic("alloc_zeroobj:dup alloc");
|
||||
}
|
||||
|
||||
memset(virt, 0, PAGE_SIZE);
|
||||
page->mode = PM_MAPPED;
|
||||
page->offset = 0;
|
||||
ihk_atomic_set(&page->count, 1);
|
||||
page_list_insert(obj, page);
|
||||
virt = NULL;
|
||||
|
||||
error = 0;
|
||||
the_zeroobj = obj;
|
||||
obj = NULL;
|
||||
|
||||
out:
|
||||
ihk_mc_spinlock_unlock_noirq(&the_zeroobj_lock);
|
||||
if (virt) {
|
||||
ihk_mc_free_pages(virt, 1);
|
||||
}
|
||||
if (obj) {
|
||||
kfree(obj);
|
||||
}
|
||||
dkprintf("alloc_zeroobj():%d %p\n", error, the_zeroobj);
|
||||
return error;
|
||||
}
|
||||
|
||||
int zeroobj_create(struct memobj **objp)
|
||||
{
|
||||
int error;
|
||||
|
||||
dkprintf("zeroobj_create(%p)\n", objp);
|
||||
if (!the_zeroobj) {
|
||||
error = alloc_zeroobj();
|
||||
if (error) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
*objp = to_memobj(the_zeroobj);
|
||||
|
||||
out:
|
||||
dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
uintptr_t *physp)
|
||||
{
|
||||
int error;
|
||||
struct zeroobj *obj = to_zeroobj(memobj);
|
||||
struct page *page;
|
||||
|
||||
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n",
|
||||
memobj, off, p2align, physp);
|
||||
if (off & ~PAGE_MASK) {
|
||||
error = -EINVAL;
|
||||
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
|
||||
error = -ENOMEM;
|
||||
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):page not found. %d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ihk_atomic_inc(&page->count);
|
||||
|
||||
error = 0;
|
||||
*physp = page_to_phys(page);
|
||||
|
||||
out:
|
||||
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p):%d\n",
|
||||
memobj, off, p2align, physp, error);
|
||||
return error;
|
||||
}
|
||||
@@ -47,6 +47,12 @@ enum ihk_mc_pt_prepare_flag {
|
||||
IHK_MC_PT_LAST_LEVEL,
|
||||
};
|
||||
|
||||
enum visit_pte_flag {
|
||||
VPTEF_SKIP_NULL = 0x0001, /* skip null PTEs */
|
||||
|
||||
VPTEF_DEFAULT = 0,
|
||||
};
|
||||
|
||||
struct ihk_mc_memory_area {
|
||||
unsigned long start;
|
||||
unsigned long size;
|
||||
@@ -123,6 +129,12 @@ int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize, uintptr_t phy
|
||||
int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size,
|
||||
enum ihk_mc_pt_prepare_flag);
|
||||
|
||||
typedef int pte_visitor_t(void *arg, page_table_t pt, pte_t *ptep,
|
||||
void *pgaddr, size_t pgsize);
|
||||
int visit_pte_range(page_table_t pt, void *start, void *end,
|
||||
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg);
|
||||
int move_pte_range(page_table_t pt, void *src, void *dest, size_t size);
|
||||
|
||||
struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag);
|
||||
/* XXX: proper use of struct page_table and page_table_t is unknown */
|
||||
void ihk_mc_pt_destroy(struct page_table *pt);
|
||||
|
||||
Reference in New Issue
Block a user