Merge remote branch 'origin/master' into gdb

Conflicts:
	kernel/include/process.h
This commit is contained in:
Naoki Hamada
2014-07-18 16:03:35 +09:00
23 changed files with 1723 additions and 117 deletions

View File

@@ -21,6 +21,7 @@
#include <cpulocal.h>
#include <march.h>
#include <signal.h>
#include <process.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@@ -438,6 +439,7 @@ void handle_interrupt(int vector, struct x86_regs *regs)
}
check_signal(0, regs);
check_need_resched();
}
void gpe_handler(struct x86_regs *regs)
@@ -447,6 +449,7 @@ void gpe_handler(struct x86_regs *regs)
arch_show_interrupt_context(regs);
set_signal(SIGILL, regs);
check_signal(0, regs);
check_need_resched();
// panic("GPF");
}

View File

@@ -78,6 +78,8 @@
#define PFL3_DIRTY ((pte_t)0x40)
#define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */
#define PFL3_GLOBAL ((pte_t)0x100)
#define PFL3_IGNORED_11 ((pte_t)1 << 11)
#define PFL3_FILEOFF PFL3_IGNORED_11
#define PFL2_PRESENT ((pte_t)0x01)
#define PFL2_WRITABLE ((pte_t)0x02)
@@ -88,6 +90,8 @@
#define PFL2_DIRTY ((pte_t)0x40)
#define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */
#define PFL2_GLOBAL ((pte_t)0x100)
#define PFL2_IGNORED_11 ((pte_t)1 << 11)
#define PFL2_FILEOFF PFL2_IGNORED_11
#define PFL1_PRESENT ((pte_t)0x01)
#define PFL1_WRITABLE ((pte_t)0x02)
@@ -96,6 +100,8 @@
#define PFL1_PCD ((pte_t)0x10)
#define PFL1_ACCESSED ((pte_t)0x20)
#define PFL1_DIRTY ((pte_t)0x40)
#define PFL1_IGNORED_11 ((pte_t)1 << 11)
#define PFL1_FILEOFF PFL1_IGNORED_11
/* We allow user programs to access all the memory */
#define PFL4_KERN_ATTR (PFL4_PRESENT | PFL4_WRITABLE)
@@ -108,6 +114,9 @@
#define PFL3_PDIR_ATTR (PFL3_PRESENT | PFL3_WRITABLE | PFL3_USER)
#define PFL2_PDIR_ATTR (PFL2_PRESENT | PFL2_WRITABLE | PFL2_USER)
#define PTE_NULL ((pte_t)0)
typedef unsigned long pte_t;
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
PTATTR_ACTIVE = 0x01,
@@ -115,14 +124,12 @@ enum ihk_mc_pt_attribute {
PTATTR_USER = 0x04,
PTATTR_DIRTY = 0x40,
PTATTR_LARGEPAGE = 0x80,
PTATTR_FILEOFF = PFL2_FILEOFF,
PTATTR_NO_EXECUTE = 0x8000000000000000,
PTATTR_UNCACHABLE = 0x10000,
PTATTR_FOR_USER = 0x20000,
};
#define PTE_NULL ((pte_t)0)
typedef unsigned long pte_t;
static inline int pte_is_null(pte_t *ptep)
{
return (*ptep == PTE_NULL);
@@ -138,11 +145,77 @@ static inline int pte_is_writable(pte_t *ptep)
return !!(*ptep & PF_WRITABLE);
}
static inline int pte_is_dirty(pte_t *ptep, size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return !!(*ptep & PFL1_DIRTY);
case PTL2_SIZE: return !!(*ptep & PFL2_DIRTY);
case PTL3_SIZE: return !!(*ptep & PFL3_DIRTY);
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pte_is_dirty");
#else
return !!(*ptep & PTATTR_DIRTY);
#endif
}
}
static inline int pte_is_fileoff(pte_t *ptep, size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return !!(*ptep & PFL1_FILEOFF);
case PTL2_SIZE: return !!(*ptep & PFL2_FILEOFF);
case PTL3_SIZE: return !!(*ptep & PFL3_FILEOFF);
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pte_is_fileoff");
#else
return !!(*ptep & PTATTR_FILEOFF);
#endif
}
}
static inline uintptr_t pte_get_phys(pte_t *ptep)
{
return (*ptep & PT_PHYSMASK);
}
static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
{
return (off_t)(*ptep & PAGE_MASK);
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
uint64_t attr;
attr = ptattr & ~PAGE_MASK;
switch (pgsize) {
case PTL1_SIZE: attr |= PFL1_FILEOFF; break;
case PTL2_SIZE: attr |= PFL2_FILEOFF | PFL2_SIZE; break;
case PTL3_SIZE: attr |= PFL3_FILEOFF | PFL3_SIZE; break;
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pte_make_fileoff");
#else
attr |= PTATTR_FILEOFF;
#endif
break;
}
*ptep = (off & PAGE_MASK) | attr;
}
#if 0 /* XXX: workaround. cannot use panic() here */
static inline void pte_xchg(pte_t *ptep, pte_t *valp)
{
*valp = xchg(ptep, *valp);
}
#else
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
#endif
struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);

View File

@@ -136,7 +136,7 @@ struct tss64 {
} __attribute__((packed));
struct x86_regs {
unsigned long r11, r10, r9, r8;
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
unsigned long error, rip, cs, rflags, rsp, ss;
};

View File

@@ -11,6 +11,9 @@
* 2012/02/11 bgerofi what kind of new features have been added
*/
#ifndef __HEADER_X86_COMMON_SIGNAL_H
#define __HEADER_X86_COMMON_SIGNAL_H
#define _NSIG 64
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
@@ -149,3 +152,5 @@ typedef struct siginfo {
#define SIGSYS 31
#define SIGUNUSED 31
#define SIGRTMIN 32
#endif /*__HEADER_X86_COMMON_SIGNAL_H*/

View File

@@ -40,6 +40,7 @@ SYSCALL_DELEGATED(18, pwrite64)
SYSCALL_DELEGATED(20, writev)
SYSCALL_DELEGATED(21, access)
SYSCALL_HANDLED(24, sched_yield)
SYSCALL_HANDLED(25, mremap)
SYSCALL_HANDLED(28, madvise)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(39, getpid)
@@ -75,6 +76,7 @@ SYSCALL_DELEGATED(201, time)
SYSCALL_HANDLED(202, futex)
SYSCALL_HANDLED(203, sched_setaffinity)
SYSCALL_HANDLED(204, sched_getaffinity)
SYSCALL_HANDLED(216, remap_file_pages)
SYSCALL_DELEGATED(217, getdents64)
SYSCALL_HANDLED(218, set_tid_address)
SYSCALL_HANDLED(231, exit_group)
@@ -87,5 +89,6 @@ SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)
SYSCALL_HANDLED(604, pmc_reset)
SYSCALL_HANDLED(700, get_cpu_id)
/**** End of File ****/

View File

@@ -35,8 +35,16 @@
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11;
pushq %r11; \
pushq %r12; \
pushq %r13; \
pushq %r14; \
pushq %r15;
#define POP_ALL_REGS \
popq %r15; \
popq %r14; \
popq %r13; \
popq %r12; \
popq %r11; \
popq %r10; \
popq %r9; \
@@ -67,7 +75,7 @@ vector=vector+1
common_interrupt:
PUSH_ALL_REGS
movq 88(%rsp), %rdi
movq 120(%rsp), %rdi
movq %rsp, %rsi
call handle_interrupt /* Enter C code */
POP_ALL_REGS
@@ -83,7 +91,7 @@ page_fault:
cld
PUSH_ALL_REGS
movq %cr2, %rdi
movq 88(%rsp),%rsi
movq 120(%rsp),%rsi
movq %rsp,%rdx
movq __page_fault_handler_address(%rip), %rax
andq %rax, %rax
@@ -120,13 +128,13 @@ x86_syscall:
movq %gs:24, %rcx
movq %rcx, 32(%rsp)
PUSH_ALL_REGS
movq 72(%rsp), %rdi
movq 104(%rsp), %rdi
movw %ss, %ax
movw %ax, %ds
movq %rsp, %rsi
callq *__x86_syscall_handler(%rip)
1:
movq %rax, 72(%rsp)
movq %rax, 104(%rsp)
POP_ALL_REGS
#ifdef USE_SYSRET
movq 8(%rsp), %rcx

View File

@@ -219,7 +219,13 @@ static struct page_table *__alloc_new_pt(enum ihk_mc_ap_flag ap_flag)
* but L2 and L1 do not!
*/
static enum ihk_mc_pt_attribute attr_mask = PTATTR_WRITABLE | PTATTR_USER | PTATTR_ACTIVE;
static enum ihk_mc_pt_attribute attr_mask
= 0
| PTATTR_FILEOFF
| PTATTR_WRITABLE
| PTATTR_USER
| PTATTR_ACTIVE
| 0;
#define ATTR_MASK attr_mask
void enable_ptattr_no_execute(void)
@@ -523,6 +529,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
__kprintf("0x%lX l4idx not present! \n", (unsigned long)virt);
__kprintf("l4 entry: 0x%lX\n", pt->entry[l4idx]);
return -EFAULT;
}
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
@@ -530,6 +537,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
__kprintf("l3 table: 0x%lX l3idx: %d \n", virt_to_phys(pt), l3idx);
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
__kprintf("0x%lX l3idx not present! \n", (unsigned long)virt);
__kprintf("l3 entry: 0x%lX\n", pt->entry[l3idx]);
return -EFAULT;
}
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
@@ -537,6 +545,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
__kprintf("l2 table: 0x%lX l2idx: %d \n", virt_to_phys(pt), l2idx);
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
__kprintf("0x%lX l2idx not present! \n", (unsigned long)virt);
__kprintf("l2 entry: 0x%lX\n", pt->entry[l2idx]);
return -EFAULT;
}
if ((pt->entry[l2idx] & PFL2_SIZE)) {
@@ -546,11 +555,12 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
__kprintf("l1 table: 0x%lX l1idx: %d \n", virt_to_phys(pt), l1idx);
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
__kprintf("0x%lX PTE (l1) not present! entry: 0x%lX\n",
(unsigned long)virt, pt->entry[l1idx]);
__kprintf("0x%lX l1idx not present! \n", (unsigned long)virt);
__kprintf("l1 entry: 0x%lX\n", pt->entry[l1idx]);
return -EFAULT;
}
__kprintf("l1 entry: 0x%lX\n", pt->entry[l1idx]);
return 0;
}
@@ -822,8 +832,16 @@ static int split_large_page(pte_t *ptep)
return -ENOMEM;
}
phys = *ptep & PT_PHYSMASK;
attr = *ptep & ~PFL2_SIZE;
if (!(*ptep & PFL2_FILEOFF)) {
phys = *ptep & PT_PHYSMASK;
attr = *ptep & ~PT_PHYSMASK;
attr &= ~PFL2_SIZE;
}
else {
phys = *ptep & PAGE_MASK; /* file offset */
attr = *ptep & ~PAGE_MASK;
attr &= ~PFL2_SIZE;
}
for (i = 0; i < PT_ENTRIES; ++i) {
pt->entry[i] = (phys + (i * PTL1_SIZE)) | attr;
@@ -833,6 +851,156 @@ static int split_large_page(pte_t *ptep)
return 0;
}
struct visit_pte_args {
page_table_t pt;
enum visit_pte_flag flags;
int padding;
pte_visitor_t *funcp;
void *arg;
};
static int visit_pte_l1(void *arg0, pte_t *ptep, uintptr_t base,
uintptr_t start, uintptr_t end)
{
struct visit_pte_args *args = arg0;
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
return 0;
}
return (*args->funcp)(args->arg, args->pt, ptep, (void *)base,
PTL1_SIZE);
}
static int visit_pte_l2(void *arg0, pte_t *ptep, uintptr_t base,
uintptr_t start, uintptr_t end)
{
int error;
struct visit_pte_args *args = arg0;
struct page_table *pt;
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
return 0;
}
#ifdef USE_LARGE_PAGES
if (((*ptep == PTE_NULL) || (*ptep & PFL2_SIZE))
&& (start <= base)
&& (((base + PTL2_SIZE) <= end)
|| (end == 0))) {
error = (*args->funcp)(args->arg, args->pt, ptep,
(void *)base, PTL2_SIZE);
if (error != -E2BIG) {
return error;
}
}
if (*ptep & PFL2_SIZE) {
ekprintf("visit_pte_l2:split large page\n");
return -ENOMEM;
}
#endif
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (!pt) {
return -ENOMEM;
}
*ptep = virt_to_phys(pt) | PFL2_PDIR_ATTR;
}
else {
pt = phys_to_virt(*ptep & PT_PHYSMASK);
}
error = walk_pte_l1(pt, base, start, end, &visit_pte_l1, arg0);
return error;
}
static int visit_pte_l3(void *arg0, pte_t *ptep, uintptr_t base,
uintptr_t start, uintptr_t end)
{
int error;
struct visit_pte_args *args = arg0;
struct page_table *pt;
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
return 0;
}
#ifdef USE_LARGE_PAGES
if (((*ptep == PTE_NULL) || (*ptep & PFL3_SIZE))
&& (start <= base)
&& (((base + PTL3_SIZE) <= end)
|| (end == 0))) {
error = (*args->funcp)(args->arg, args->pt, ptep,
(void *)base, PTL3_SIZE);
if (error != -E2BIG) {
return error;
}
}
if (*ptep & PFL3_SIZE) {
ekprintf("visit_pte_l3:split large page\n");
return -ENOMEM;
}
#endif
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (!pt) {
return -ENOMEM;
}
*ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR;
}
else {
pt = phys_to_virt(*ptep & PT_PHYSMASK);
}
error = walk_pte_l2(pt, base, start, end, &visit_pte_l2, arg0);
return error;
}
static int visit_pte_l4(void *arg0, pte_t *ptep, uintptr_t base,
uintptr_t start, uintptr_t end)
{
int error;
struct visit_pte_args *args = arg0;
struct page_table *pt;
if ((*ptep == PTE_NULL) && (args->flags & VPTEF_SKIP_NULL)) {
return 0;
}
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (!pt) {
return -ENOMEM;
}
*ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR;
}
else {
pt = phys_to_virt(*ptep & PT_PHYSMASK);
}
error = walk_pte_l3(pt, base, start, end, &visit_pte_l3, arg0);
return error;
}
int visit_pte_range(page_table_t pt, void *start0, void *end0,
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg)
{
const uintptr_t start = (uintptr_t)start0;
const uintptr_t end = (uintptr_t)end0;
struct visit_pte_args args;
args.pt = pt;
args.flags = flags;
args.funcp = funcp;
args.arg = arg;
return walk_pte_l4(pt, 0, start, end, &visit_pte_l4, &args);
}
struct clear_range_args {
int free_physical;
uint8_t padding[4];
@@ -858,7 +1026,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
}
if (args->free_physical) {
if (!(old & PFL1_FILEOFF) && args->free_physical) {
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), 1);
@@ -904,7 +1072,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
}
if (args->free_physical) {
if (!(old & PFL2_FILEOFF) && args->free_physical) {
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE);
@@ -998,7 +1166,7 @@ static int change_attr_range_l1(void *arg0, pte_t *ptep, uint64_t base,
{
struct change_attr_args *args = arg0;
if (*ptep == PTE_NULL) {
if ((*ptep == PTE_NULL) || (*ptep & PFL1_FILEOFF)) {
return -ENOENT;
}
@@ -1013,7 +1181,7 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base,
int error;
struct page_table *pt;
if (*ptep == PTE_NULL) {
if ((*ptep == PTE_NULL) || (*ptep & PFL2_FILEOFF)) {
return -ENOENT;
}
@@ -1032,7 +1200,9 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base,
}
if (*ptep & PFL2_SIZE) {
*ptep = (*ptep & ~args->clrpte) | args->setpte;
if (!(*ptep & PFL2_FILEOFF)) {
*ptep = (*ptep & ~args->clrpte) | args->setpte;
}
return 0;
}
@@ -1045,7 +1215,7 @@ static int change_attr_range_l3(void *arg0, pte_t *ptep, uint64_t base,
{
struct page_table *pt;
if (*ptep == PTE_NULL) {
if ((*ptep == PTE_NULL) || (*ptep & PFL3_FILEOFF)) {
return -ENOENT;
}
@@ -1632,6 +1802,76 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
return attr;
}
struct move_args {
uintptr_t src;
uintptr_t dest;
};
static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, void *pgaddr, size_t pgsize)
{
int error;
struct move_args *args = arg0;
uintptr_t dest;
pte_t apte;
uintptr_t phys;
enum ihk_mc_pt_attribute attr;
dkprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
if (pte_is_fileoff(ptep, pgsize)) {
error = -ENOTSUPP;
kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):fileoff. %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
goto out;
}
dest = args->dest + ((uintptr_t)pgaddr - args->src);
apte = PTE_NULL;
pte_xchg(ptep, &apte);
phys = apte & PT_PHYSMASK;
attr = apte & ~PT_PHYSMASK;
error = ihk_mc_pt_set_range(pt, (void *)dest,
(void *)(dest + pgsize), phys, attr);
if (error) {
kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):"
"set failed. %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
goto out;
}
error = 0;
out:
dkprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int move_pte_range(page_table_t pt, void *src, void *dest, size_t size)
{
int error;
struct move_args args;
dkprintf("move_pte_range(%p,%p,%p,%#lx)\n", pt, src, dest, size);
args.src = (uintptr_t)src;
args.dest = (uintptr_t)dest;
error = visit_pte_range(pt, src, src+size, VPTEF_SKIP_NULL,
&move_one_page, &args);
flush_tlb(); /* XXX: TLB flush */
if (error) {
goto out;
}
error = 0;
out:
dkprintf("move_pte_range(%p,%p,%p,%#lx):%d\n",
pt, src, dest, size, error);
return error;
}
void load_page_table(struct page_table *pt)
{
unsigned long pt_addr;

View File

@@ -288,14 +288,16 @@ check_signal(unsigned long rc, void *regs0)
unsigned long
do_kill(int pid, int tid, int sig)
{
struct cpu_local_var *v;
struct process *p;
struct process *proc = cpu_local_var(current);
struct process *tproc = NULL;
int i;
__sigset_t mask;
struct sig_pending *pending;
struct list_head *head;
int irqstate;
int rc;
unsigned long irqstate;
if(proc == NULL || proc->pid == 0){
return -ESRCH;
@@ -314,37 +316,52 @@ do_kill(int pid, int tid, int sig)
}
else{
for(i = 0; i < num_processors; i++){
if(get_cpu_local_var(i)->current &&
get_cpu_local_var(i)->current->pid == pid){
tproc = get_cpu_local_var(i)->current;
break;
v = get_cpu_local_var(i);
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
list_for_each_entry(p, &(v->runq), sched_list){
if(p->pid == pid){
tproc = p;
break;
}
}
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
}
}
else if(pid == -1){
for(i = 0; i < num_processors; i++)
if(get_cpu_local_var(i)->current &&
get_cpu_local_var(i)->current->pid > 0 &&
get_cpu_local_var(i)->current->tid == tid){
tproc = get_cpu_local_var(i)->current;
break;
for(i = 0; i < num_processors; i++){
v = get_cpu_local_var(i);
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
list_for_each_entry(p, &(v->runq), sched_list){
if(p->pid > 0 &&
p->tid == tid){
tproc = p;
break;
}
}
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
}
else{
if(pid == 0)
return -ESRCH;
for(i = 0; i < num_processors; i++)
if(get_cpu_local_var(i)->current &&
get_cpu_local_var(i)->current->pid == pid &&
get_cpu_local_var(i)->current->tid == tid){
tproc = get_cpu_local_var(i)->current;
break;
for(i = 0; i < num_processors; i++){
v = get_cpu_local_var(i);
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
list_for_each_entry(p, &(v->runq), sched_list){
if(p->pid == pid &&
p->tid == tid){
tproc = p;
break;
}
}
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
}
if(!tproc)
if(!tproc){
return -ESRCH;
}
if(sig == 0)
return 0;
@@ -375,7 +392,7 @@ do_kill(int pid, int tid, int sig)
}
else{
list_add_tail(&pending->list, head);
proc->sigevent = 1;
tproc->sigevent = 1;
}
}
if(tid == -1){

View File

@@ -1,6 +1,7 @@
IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__

View File

@@ -184,6 +184,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
memset(newobj, 0, sizeof(*newobj));
newobj->memobj.ops = &fileobj_ops;
newobj->memobj.flags = MF_HAS_PAGER;
newobj->handle = result.handle;
newobj->sref = 1;
newobj->cref = 1;

View File

@@ -30,6 +30,9 @@ struct malloc_header {
#define CPU_STATUS_RUNNING (2)
extern ihk_spinlock_t cpu_status_lock;
#define CPU_FLAG_NEED_RESCHED 0x1U
#define CPU_FLAG_NEED_MIGRATE 0x2U
struct cpu_local_var {
/* malloc */
struct malloc_header free_list;
@@ -54,6 +57,11 @@ struct cpu_local_var {
int fs;
struct list_head pending_free_pages;
unsigned int flags;
ihk_spinlock_t migq_lock;
struct list_head migq;
} __attribute__((aligned(64)));

View File

@@ -16,10 +16,19 @@
#include <ihk/types.h>
#include <ihk/atomic.h>
#include <ihk/lock.h>
#include <errno.h>
#include <list.h>
#include <shm.h>
enum {
/* for memobj.flags */
MF_HAS_PAGER = 0x0001,
};
struct memobj {
struct memobj_ops * ops;
uint32_t flags;
int8_t padding[4];
ihk_spinlock_t lock;
};
@@ -39,29 +48,42 @@ struct memobj_ops {
static inline void memobj_release(struct memobj *obj)
{
(*obj->ops->release)(obj);
if (obj->ops->release) {
(*obj->ops->release)(obj);
}
}
static inline void memobj_ref(struct memobj *obj)
{
(*obj->ops->ref)(obj);
if (obj->ops->ref) {
(*obj->ops->ref)(obj);
}
}
static inline int memobj_get_page(struct memobj *obj, off_t off,
int p2align, uintptr_t *physp)
{
return (*obj->ops->get_page)(obj, off, p2align, physp);
if (obj->ops->get_page) {
return (*obj->ops->get_page)(obj, off, p2align, physp);
}
return -ENXIO;
}
static inline uintptr_t memobj_copy_page(struct memobj *obj,
uintptr_t orgphys, int p2align)
{
return (*obj->ops->copy_page)(obj, orgphys, p2align);
if (obj->ops->copy_page) {
return (*obj->ops->copy_page)(obj, orgphys, p2align);
}
return -ENXIO;
}
static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t pgsize)
{
return (*obj->ops->flush_page)(obj, phys, pgsize);
if (obj->ops->flush_page) {
return (*obj->ops->flush_page)(obj, phys, pgsize);
}
return 0;
}
static inline void memobj_lock(struct memobj *obj)
@@ -74,6 +96,13 @@ static inline void memobj_unlock(struct memobj *obj)
ihk_mc_spinlock_unlock_noirq(&obj->lock);
}
static inline int memobj_has_pager(struct memobj *obj)
{
return !!(obj->flags & MF_HAS_PAGER);
}
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp);
#endif /* HEADER_MEMOBJ_H */

View File

@@ -63,4 +63,10 @@
#define MADV_HWPOISON 100
#define MADV_SOFT_OFFLINE 101
/*
* for mremap()
*/
#define MREMAP_MAYMOVE 0x01
#define MREMAP_FIXED 0x02
#endif /* HEADER_MMAN_H */

View File

@@ -20,6 +20,7 @@
#include <list.h>
#include <signal.h>
#include <memobj.h>
#include <affinity.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
@@ -29,6 +30,7 @@
#define VR_DEMAND_PAGING 0x1000
#define VR_PRIVATE 0x2000
#define VR_LOCKED 0x4000
#define VR_FILEOFF 0x8000 /* remap_file_pages()ed range */
#define VR_PROT_NONE 0x00000000
#define VR_PROT_READ 0x00010000
#define VR_PROT_WRITE 0x00020000
@@ -186,6 +188,7 @@ struct process {
void *pgio_arg;
struct fork_tree_node *ftn;
cpu_set_t cpu_set;
unsigned long saved_auxv[AUXV_LEN];
};
@@ -231,12 +234,16 @@ int join_process_memory_range(struct process *process, struct vm_range *survivin
int change_prot_process_memory_range(
struct process *process, struct vm_range *range,
unsigned long newflag);
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off);
struct vm_range *lookup_process_memory_range(
struct process_vm *vm, uintptr_t start, uintptr_t end);
struct vm_range *next_process_memory_range(
struct process_vm *vm, struct vm_range *range);
struct vm_range *previous_process_memory_range(
struct process_vm *vm, struct vm_range *range);
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend);
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
int remove_process_region(struct process *proc,
@@ -256,4 +263,7 @@ void runq_add_proc(struct process *proc, int cpu_id);
void runq_del_proc(struct process *proc, int cpu_id);
int sched_wakeup_process(struct process *proc, int valid_states);
void sched_request_migrate(int cpu_id, struct process *proc);
void check_need_resched(void);
#endif

49
kernel/include/shm.h Normal file
View File

@@ -0,0 +1,49 @@
/**
* \file shm.h
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
*/
/*
* HISTORY:
*/
#ifndef HEADER_SHM_H
#define HEADER_SHM_H
/* begin types.h */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
/* end types.h */
typedef uint64_t shmatt_t;
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
shmatt_t shm_nattch;
uint8_t padding[16];
};
#endif /* HEADER_SHM_H */

View File

@@ -211,8 +211,11 @@ static void post_init(void)
}
if (find_command_line("hidos")) {
extern ihk_spinlock_t syscall_lock;
init_host_syscall_channel();
init_host_syscall_channel2();
ihk_mc_spinlock_init(&syscall_lock);
}
ap_start();
}

View File

@@ -18,6 +18,16 @@
#include <ihk/ikc.h>
#include <ikc/master.h>
//#define DEBUG_LISTENERS
#ifdef DEBUG_LISTENERS
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
static unsigned long read_tsc(void)
{
unsigned int low, high;
@@ -103,5 +113,5 @@ static struct ihk_ikc_listen_param test_listen_param = {
void mc_ikc_test_init(void)
{
ihk_ikc_listen_port(NULL, &test_listen_param);
kprintf("Listener registered port %d\n", 500);
dkprintf("Listener registered port %d\n", 500);
}

View File

@@ -281,6 +281,7 @@ out:
dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n",
ihk_mc_get_processor_id(), fault_addr, reason,
regs, error);
check_need_resched();
return;
}

View File

@@ -253,7 +253,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
goto err_free_sighandler;
}
memset(proc->sighandler, '\0', sizeof(struct sig_handler));
memcpy(proc->sighandler, org->sighandler, sizeof(struct sig_handler));
ihk_atomic_set(&proc->sighandler->use, 1);
ihk_mc_spinlock_init(&proc->sighandler->lock);
ihk_atomic_set(&proc->sigshared->use, 1);
@@ -936,6 +936,39 @@ struct vm_range *previous_process_memory_range(
return prev;
}
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend)
{
int error;
struct vm_range *next;
dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx)\n",
vm, range, range->start, range->end, newend);
if (newend <= range->end) {
error = -EINVAL;
goto out;
}
if (vm->region.user_end < newend) {
error = -EPERM;
goto out;
}
next = next_process_memory_range(vm ,range);
if (next && (next->start < newend)) {
error = -ENOMEM;
goto out;
}
error = 0;
range->end = newend;
out:
dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx):%d\n",
vm, range, range->start, range->end, newend, error);
return error;
}
int change_prot_process_memory_range(struct process *proc,
struct vm_range *range, unsigned long protflag)
{
@@ -997,6 +1030,94 @@ out:
return error;
}
struct rfp_args {
off_t off;
uintptr_t start;
struct memobj *memobj;
};
static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
struct rfp_args * const args = arg0;
int error;
off_t off;
pte_t apte;
uintptr_t phys;
struct page *page;
dkprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
/* XXX: NYI: large pages */
if (pgsize != PAGE_SIZE) {
error = -E2BIG;
ekprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
goto out;
}
off = args->off + ((uintptr_t)pgaddr - args->start);
pte_make_fileoff(off, 0, pgsize, &apte);
pte_xchg(ptep, &apte);
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
if (pte_is_null(&apte) || pte_is_fileoff(&apte, pgsize)) {
error = 0;
goto out;
}
phys = pte_get_phys(&apte);
if (pte_is_dirty(&apte, pgsize)) {
memobj_flush_page(args->memobj, phys, pgsize); /* XXX: in lock period */
}
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE);
}
error = 0;
out:
dkprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx): %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off)
{
struct rfp_args args;
int error;
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx)\n",
vm, range, start, end, off);
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
args.start = start;
args.off = off;
args.memobj = range->memobj;
error = visit_pte_range(vm->page_table, (void *)start,
(void *)end, VPTEF_DEFAULT, &remap_one_page, &args);
if (error) {
ekprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):"
"visit pte failed %d\n",
vm, range, start, end, off, error);
goto out;
}
error = 0;
out:
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):%d\n",
vm, range, start, end, off, error);
return error;
}
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
{
int error;
@@ -1012,7 +1133,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
/*****/
ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align);
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)) {
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)
&& !pte_is_fileoff(ptep, pgsize)) {
if (!pte_is_present(ptep)) {
error = -EFAULT;
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):PROT_NONE. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
@@ -1034,11 +1156,16 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
}
attr = arch_vrflag_to_ptattr(range->flag, reason, ptep);
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
if (!ptep || pte_is_null(ptep)) {
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
if (range->memobj) {
off_t off;
off = range->objoff + ((uintptr_t)pgaddr - range->start);
if (!ptep || !pte_is_fileoff(ptep, pgsize)) {
off = range->objoff + ((uintptr_t)pgaddr - range->start);
}
else {
off = pte_get_off(ptep, pgsize);
}
error = memobj_get_page(range->memobj, off, p2align, &phys);
if (error) {
if (error != -ERESTART) {
@@ -1590,6 +1717,9 @@ void sched_init(void)
cpu_local_var(runq_len) = 0;
ihk_mc_spinlock_init(&cpu_local_var(runq_lock));
INIT_LIST_HEAD(&cpu_local_var(migq));
ihk_mc_spinlock_init(&cpu_local_var(migq_lock));
#ifdef TIMER_CPU_ID
if (ihk_mc_get_processor_id() == TIMER_CPU_ID) {
init_timers();
@@ -1598,6 +1728,72 @@ void sched_init(void)
#endif
}
static void double_rq_lock(struct cpu_local_var *v1, struct cpu_local_var *v2)
{
if (v1 < v2) {
ihk_mc_spinlock_lock_noirq(&v1->runq_lock);
ihk_mc_spinlock_lock_noirq(&v2->runq_lock);
} else {
ihk_mc_spinlock_lock_noirq(&v2->runq_lock);
ihk_mc_spinlock_lock_noirq(&v1->runq_lock);
}
}
static void double_rq_unlock(struct cpu_local_var *v1, struct cpu_local_var *v2)
{
ihk_mc_spinlock_unlock_noirq(&v1->runq_lock);
ihk_mc_spinlock_unlock_noirq(&v2->runq_lock);
}
struct migrate_request {
struct list_head list;
struct process *proc;
struct waitq wq;
};
static void do_migrate(void)
{
int cur_cpu_id = ihk_mc_get_processor_id();
struct cpu_local_var *cur_v = get_cpu_local_var(cur_cpu_id);
struct migrate_request *req, *tmp;
ihk_mc_spinlock_lock_noirq(&cur_v->migq_lock);
list_for_each_entry_safe(req, tmp, &cur_v->migq, list) {
int cpu_id;
struct cpu_local_var *v;
/* 0. check if migration is necessary */
list_del(&req->list);
if (req->proc->cpu_id != cur_cpu_id) /* already not here */
goto ack;
if (CPU_ISSET(cur_cpu_id, &req->proc->cpu_set)) /* good affinity */
goto ack;
/* 1. select CPU */
for (cpu_id = 0; cpu_id < CPU_SETSIZE; cpu_id++)
if (CPU_ISSET(cpu_id, &req->proc->cpu_set))
break;
if (CPU_SETSIZE == cpu_id) /* empty affinity (bug?) */
goto ack;
/* 2. migrate thread */
v = get_cpu_local_var(cpu_id);
double_rq_lock(cur_v, v);
list_del(&req->proc->sched_list);
cur_v->runq_len -= 1;
req->proc->cpu_id = cpu_id;
list_add_tail(&req->proc->sched_list, &v->runq);
v->runq_len += 1;
if (v->runq_len == 1)
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
double_rq_unlock(cur_v, v);
ack:
waitq_wakeup(&req->wq);
}
ihk_mc_spinlock_unlock_noirq(&cur_v->migq_lock);
}
void schedule(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
@@ -1606,6 +1802,7 @@ void schedule(void)
unsigned long irqstate;
struct process *last;
redo:
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
next = NULL;
@@ -1621,25 +1818,26 @@ void schedule(void)
list_add_tail(&prev->sched_list, &(v->runq));
++v->runq_len;
}
}
if (!v->runq_len) {
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
next = &cpu_local_var(idle);
} else {
/* Pick a new running process */
list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) {
if (proc->status == PS_RUNNING) {
next = proc;
break;
}
}
/* No process? Run idle.. */
if (!next) {
next = &cpu_local_var(idle);
v->status = CPU_STATUS_IDLE;
}
}
/* Pick a new running process */
list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) {
if (proc->status == PS_RUNNING) {
next = proc;
break;
}
}
/* No process? Run idle.. */
if (!next) {
next = &cpu_local_var(idle);
}
if (prev != next) {
switch_ctx = 1;
v->current = next;
@@ -1675,6 +1873,21 @@ void schedule(void)
else {
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
v->flags &= ~CPU_FLAG_NEED_MIGRATE;
do_migrate();
goto redo;
}
}
void check_need_resched(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED) {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
schedule();
}
}
@@ -1719,6 +1932,49 @@ int sched_wakeup_process(struct process *proc, int valid_states)
return status;
}
/*
* 1. Add current process to waitq
* 2. Queue migration request into the target CPU's queue
* 3. Kick migration on the CPU
* 4. Wait for completion of the migration
*
* struct migrate_request {
* list //migq,
* wq,
* proc
* }
*
* [expected processing of the target CPU]
* 1. Interrupted by IPI
* 2. call schedule() via check_resched()
* 3. Do migration
* 4. Wake up this thread
*/
void sched_request_migrate(int cpu_id, struct process *proc)
{
struct cpu_local_var *v = get_cpu_local_var(cpu_id);
struct migrate_request req = { .proc = proc };
unsigned long irqstate;
DECLARE_WAITQ_ENTRY(entry, cpu_local_var(current));
waitq_init(&req.wq);
waitq_prepare_to_wait(&req.wq, &entry, PS_UNINTERRUPTIBLE);
irqstate = ihk_mc_spinlock_lock(&v->migq_lock);
list_add_tail(&req.list, &v->migq);
ihk_mc_spinlock_unlock(&v->migq_lock, irqstate);
v->flags |= CPU_FLAG_NEED_RESCHED | CPU_FLAG_NEED_MIGRATE;
v->status = CPU_STATUS_RUNNING;
if (cpu_id != ihk_mc_get_processor_id())
ihk_mc_interrupt_cpu(/* Kick scheduler */
get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
schedule();
waitq_finish_wait(&req.wq, &entry);
}
/* Runq lock must be held here */

287
kernel/shmobj.c Normal file
View File

@@ -0,0 +1,287 @@
/**
* \file shmobj.c
* License details are found in the file LICENSE.
* \brief
* shared memory object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
*/
/*
* HISTORY:
*/
#include <ihk/atomic.h>
#include <ihk/debug.h>
#include <ihk/lock.h>
#include <ihk/mm.h>
#include <errno.h>
#include <kmalloc.h>
#include <list.h>
#include <memobj.h>
#include <memory.h>
#include <page.h>
#include <shm.h>
#include <string.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define fkprintf(...) kprintf(__VA_ARGS__)
struct shmobj {
struct memobj memobj; /* must be first */
long ref;
struct shmid_ds ds;
struct list_head page_list;
};
static memobj_release_func_t shmobj_release;
static memobj_ref_func_t shmobj_ref;
static memobj_get_page_func_t shmobj_get_page;
static struct memobj_ops shmobj_ops = {
.release = &shmobj_release,
.ref = &shmobj_ref,
.get_page = &shmobj_get_page,
};
static struct shmobj *to_shmobj(struct memobj *memobj)
{
return (struct shmobj *)memobj;
}
static struct memobj *to_memobj(struct shmobj *shmobj)
{
return &shmobj->memobj;
}
/***********************************************************************
* page_list
*/
static void page_list_init(struct shmobj *obj)
{
INIT_LIST_HEAD(&obj->page_list);
return;
}
static void page_list_insert(struct shmobj *obj, struct page *page)
{
list_add(&page->list, &obj->page_list);
return;
}
static void page_list_remove(struct shmobj *obj, struct page *page)
{
list_del(&page->list);
return;
}
static struct page *page_list_lookup(struct shmobj *obj, off_t off)
{
struct page *page;
list_for_each_entry(page, &obj->page_list, list) {
if (page->offset == off) {
goto out;
}
}
page = NULL;
out:
return page;
}
static struct page *page_list_first(struct shmobj *obj)
{
if (list_empty(&obj->page_list)) {
return NULL;
}
return list_first_entry(&obj->page_list, struct page, list);
}
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
{
struct shmobj *obj = NULL;
int error;
dkprintf("shmobj_create(%p %#lx,%p)\n", ds, ds->shm_segsz, objp);
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj) {
error = -ENOMEM;
ekprintf("shmobj_create(%p %#lx,%p):kmalloc failed. %d\n",
ds, ds->shm_segsz, objp, error);
goto out;
}
memset(obj, 0, sizeof(*obj));
obj->memobj.ops = &shmobj_ops;
obj->ref = 1;
obj->ds = *ds;
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
error = 0;
*objp = to_memobj(obj);
obj = NULL;
out:
if (obj) {
kfree(obj);
}
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
ds, ds->shm_segsz, objp, error, *objp);
return error;
}
static void shmobj_release(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
struct shmobj *freeobj = NULL;
dkprintf("shmobj_release(%p)\n", memobj);
memobj_lock(&obj->memobj);
--obj->ref;
if (obj->ref <= 0) {
if (obj->ref < 0) {
fkprintf("shmobj_release(%p):ref %ld\n",
memobj, obj->ref);
panic("shmobj_release:freeing free shmobj");
}
freeobj = obj;
}
memobj_unlock(&obj->memobj);
if (freeobj) {
/* zap page_list */
for (;;) {
struct page *page;
int count;
page = page_list_first(obj);
if (!page) {
break;
}
page_list_remove(obj, page);
dkprintf("shmobj_release(%p):"
"release page. %p %#lx %d %d",
memobj, page, page_to_phys(page),
page->mode, page->count);
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_MAPPED) && (count == 0))) {
fkprintf("shmobj_release(%p): "
"page %p phys %#lx mode %#x"
" count %d off %#lx\n",
memobj, page,
page_to_phys(page),
page->mode, count,
page->offset);
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
}
dkprintf("shmobj_release(%p):free shmobj", memobj);
kfree(freeobj);
}
dkprintf("shmobj_release(%p):\n", memobj);
return;
}
static void shmobj_ref(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
long newref;
dkprintf("shmobj_ref(%p)\n", memobj);
memobj_lock(&obj->memobj);
newref = ++obj->ref;
memobj_unlock(&obj->memobj);
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
return;
}
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
{
struct shmobj *obj = to_shmobj(memobj);
int error;
struct page *page;
int npages;
void *virt = NULL;
uintptr_t phys = -1;
dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n",
memobj, off, p2align, physp);
memobj_lock(&obj->memobj);
if (off & ~PAGE_MASK) {
error = -EINVAL;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
error = -ENOMEM;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if (obj->ds.shm_segsz <= off) {
error = -ERANGE;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
error = -ENOSPC;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
page = page_list_lookup(obj, off);
if (!page) {
npages = 1 << p2align;
virt = ihk_mc_alloc_pages(npages, IHK_MC_AP_NOWAIT);
if (!virt) {
error = -ENOMEM;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):"
"alloc failed. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
phys = virt_to_phys(virt);
page = phys_to_page(phys);
if (page->mode != PM_NONE) {
fkprintf("shmobj_get_page(%p,%#lx,%d,%p):"
"page %p %#lx %d %d %#lx\n",
memobj, off, p2align, physp,
page, page_to_phys(page), page->mode,
page->count, page->offset);
panic("shmobj_get_page()");
}
memset(virt, 0, npages*PAGE_SIZE);
page->mode = PM_MAPPED;
page->offset = off;
ihk_atomic_set(&page->count, 1);
page_list_insert(obj, page);
virt = NULL;
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n",
memobj, off, p2align, physp, page, phys);
}
ihk_atomic_inc(&page->count);
error = 0;
*physp = page_to_phys(page);
out:
memobj_unlock(&obj->memobj);
if (virt) {
ihk_mc_free_pages(virt, npages);
}
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):%d\n",
memobj, off, p2align, physp, error);
return error;
}

View File

@@ -45,6 +45,7 @@
#include <mman.h>
#include <kmalloc.h>
#include <memobj.h>
#include <shm.h>
/* Headers taken from kitten LWK */
#include <lwk/stddef.h>
@@ -168,6 +169,7 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
#endif
}
ihk_spinlock_t syscall_lock;
long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
int cpu, int pid)
@@ -176,6 +178,9 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
struct syscall_request req2 IHK_DMA_ALIGN;
struct syscall_params *scp;
int error;
long rc;
int islock = 0;
unsigned long irqstate;
dkprintf("SC(%d)[%3d] sending syscall\n",
ihk_mc_get_processor_id(),
@@ -184,6 +189,8 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
if(req->number == __NR_exit_group ||
req->number == __NR_kill){ // interrupt syscall
scp = &get_cpu_local_var(0)->scp2;
islock = 1;
irqstate = ihk_mc_spinlock_lock(&syscall_lock);
}
else{
scp = &get_cpu_local_var(cpu)->scp;
@@ -209,7 +216,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
cpu_local_var(current)->pid);
error = page_fault_process(get_cpu_local_var(cpu)->current,
(void *)res->fault_address,
res->fault_reason);
res->fault_reason|PF_POPULATE);
/* send result */
req2.number = __NR_mmap;
@@ -225,7 +232,12 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
ihk_mc_get_processor_id(),
req->number, res->ret);
return res->ret;
rc = res->ret;
if(islock){
ihk_mc_spinlock_unlock(&syscall_lock, irqstate);
}
return rc;
}
long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx)
@@ -630,12 +642,13 @@ SYSCALL_DECLARE(mmap)
const int prot = ihk_mc_syscall_arg2(ctx);
const int flags = ihk_mc_syscall_arg3(ctx);
const int fd = ihk_mc_syscall_arg4(ctx);
const off_t off = ihk_mc_syscall_arg5(ctx);
const off_t off0 = ihk_mc_syscall_arg5(ctx);
struct process *proc = cpu_local_var(current);
struct vm_regions *region = &proc->vm->region;
intptr_t addr;
size_t len;
off_t off;
int error;
intptr_t npages;
int p2align;
@@ -646,10 +659,11 @@ SYSCALL_DECLARE(mmap)
int maxprot;
int denied;
int ro_vma_mapped = 0;
struct shmid_ds ads;
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n",
ihk_mc_get_processor_id(),
addr0, len0, prot, flags, fd, off);
addr0, len0, prot, flags, fd, off0);
/* check constants for flags */
if (1) {
@@ -681,9 +695,9 @@ SYSCALL_DECLARE(mmap)
|| ((region->user_end - len) < addr)
|| !(flags & (MAP_SHARED | MAP_PRIVATE))
|| ((flags & MAP_SHARED) && (flags & MAP_PRIVATE))
|| (off & (PAGE_SIZE - 1))) {
|| (off0 & (PAGE_SIZE - 1))) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n",
addr0, len0, prot, flags, fd, off);
addr0, len0, prot, flags, fd, off0);
error = -EINVAL;
goto out2;
}
@@ -692,7 +706,7 @@ SYSCALL_DECLARE(mmap)
if ((flags & error_flags)
|| (flags & ~(supported_flags | ignored_flags))) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):unknown flags %x\n",
addr0, len0, prot, flags, fd, off,
addr0, len0, prot, flags, fd, off0,
(flags & ~(supported_flags | ignored_flags)));
error = -EINVAL;
goto out2;
@@ -754,8 +768,10 @@ SYSCALL_DECLARE(mmap)
}
phys = 0;
off = 0;
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
if (!(flags & MAP_ANONYMOUS)) {
off = off0;
error = fileobj_create(fd, &memobj, &maxprot);
if (error) {
ekprintf("sys_mmap:fileobj_create failed. %d\n", error);
@@ -781,6 +797,22 @@ SYSCALL_DECLARE(mmap)
}
phys = virt_to_phys(p);
}
else if (flags & MAP_SHARED) {
memset(&ads, 0, sizeof(ads));
ads.shm_segsz = len;
error = shmobj_create(&ads, &memobj);
if (error) {
ekprintf("sys_mmap:shmobj_create failed. %d\n", error);
goto out;
}
}
else {
error = zeroobj_create(&memobj);
if (error) {
ekprintf("sys_mmap:zeroobj_create failed. %d\n", error);
goto out;
}
}
if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) {
maxprot |= PROT_WRITE;
@@ -844,7 +876,7 @@ out2:
}
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n",
ihk_mc_get_processor_id(),
addr0, len0, prot, flags, fd, off, error, addr);
addr0, len0, prot, flags, fd, off0, error, addr);
return (!error)? addr: error;
}
@@ -1702,15 +1734,14 @@ SYSCALL_DECLARE(madvise)
dkprintf("[%d]sys_madvise(%lx,%lx,%x):not contig "
"%lx [%lx-%lx)\n",
ihk_mc_get_processor_id(), start,
len0, advice, addr, range->start,
range->end);
len0, advice, addr, range?range->start:0,
range?range->end:0);
error = -ENOMEM;
goto out;
}
#define MEMOBJ_IS_FILEOBJ(obj) ((obj) != NULL)
if (!MEMOBJ_IS_FILEOBJ(range->memobj)) {
dkprintf("[%d]sys_madvise(%lx,%lx,%x):not fileobj "
if (!range->memobj || !memobj_has_pager(range->memobj)) {
dkprintf("[%d]sys_madvise(%lx,%lx,%x):has not pager"
"[%lx-%lx) %lx\n",
ihk_mc_get_processor_id(), start,
len0, advice, range->start,
@@ -1888,56 +1919,104 @@ SYSCALL_DECLARE(ptrace)
return -ENOSYS;
}
#define MIN2(x,y) (x) < (y) ? (x) : (y)
SYSCALL_DECLARE(sched_setaffinity)
{
#if 0
int pid = (int)ihk_mc_syscall_arg0(ctx);
unsigned int len = (unsigned int)ihk_mc_syscall_arg1(ctx);
#endif
cpu_set_t *mask = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
unsigned long __phys;
#if 0
int i;
#endif
/* TODO: check mask is in user's page table */
if(!mask) { return -EFAULT; }
if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table,
(void *)mask,
&__phys)) {
int tid = (int)ihk_mc_syscall_arg0(ctx);
size_t len = (size_t)ihk_mc_syscall_arg1(ctx);
cpu_set_t *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
cpu_set_t k_cpu_set, cpu_set;
struct process *thread;
int cpu_id;
if (sizeof(k_cpu_set) > len) {
kprintf("%s:%d\n Too small buffer.", __FILE__, __LINE__);
return -EINVAL;
}
len = MIN2(len, sizeof(k_cpu_set));
if (copy_from_user(cpu_local_var(current), &k_cpu_set, u_cpu_set, len)) {
kprintf("%s:%d copy_from_user failed.\n", __FILE__, __LINE__);
return -EFAULT;
}
#if 0
dkprintf("sched_setaffinity,\n");
for(i = 0; i < len/sizeof(__cpu_mask); i++) {
dkprintf("mask[%d]=%lx,", i, mask->__bits[i]);
}
#endif
return 0;
// XXX: We should build something like cpu_available_mask in advance
CPU_ZERO(&cpu_set);
extern int num_processors;
for (cpu_id = 0; cpu_id < num_processors; cpu_id++)
if (CPU_ISSET(cpu_id, &k_cpu_set))
CPU_SET(cpu_id, &cpu_set);
for (cpu_id = 0; cpu_id < num_processors; cpu_id++) {
ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
list_for_each_entry(thread, &get_cpu_local_var(cpu_id)->runq, sched_list)
if (thread->pid && thread->tid == tid)
goto found; /* without unlocking runq_lock */
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
}
kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__);
return -ESRCH;
found:
memcpy(&thread->cpu_set, &cpu_set, sizeof(cpu_set));
if (!CPU_ISSET(cpu_id, &thread->cpu_set)) {
hold_process(thread);
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
sched_request_migrate(cpu_id, thread);
release_process(thread);
return 0;
} else {
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
return 0;
}
}
#define MIN2(x,y) (x) < (y) ? (x) : (y)
#define MIN3(x,y,z) MIN2(MIN2((x),(y)),MIN2((y),(z)))
// see linux-2.6.34.13/kernel/sched.c
SYSCALL_DECLARE(sched_getaffinity)
{
//int pid = (int)ihk_mc_syscall_arg0(ctx);
unsigned int len = (int)ihk_mc_syscall_arg1(ctx);
//int cpu_id;
cpu_set_t *mask = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
if(len*8 < cpu_info->ncpus) { return -EINVAL; }
if(len & (sizeof(unsigned long)-1)) { return -EINVAL; }
int min_len = MIN2(len, sizeof(cpu_set_t));
//int min_ncpus = MIN2(min_len*8, cpu_info->ncpus);
int tid = (int)ihk_mc_syscall_arg0(ctx);
size_t len = (size_t)ihk_mc_syscall_arg1(ctx);
cpu_set_t k_cpu_set, *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
CPU_ZERO_S(min_len, mask);
CPU_SET_S(ihk_mc_get_hardware_processor_id(), min_len, mask);
//for (cpu_id = 0; cpu_id < min_ncpus; ++cpu_id)
// CPU_SET_S(cpu_info->hw_ids[cpu_id], min_len, mask);
int ret;
int found = 0;
int i;
// dkprintf("sched_getaffinity returns full mask\n");
if (sizeof(k_cpu_set) > len) {
kprintf("%s:%d Too small buffer.\n", __FILE__, __LINE__);
return -EINVAL;
}
len = MIN2(len, sizeof(k_cpu_set));
return min_len;
extern int num_processors;
for (i = 0; i < num_processors && !found; i++) {
struct process *thread;
ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(i)->runq_lock);
list_for_each_entry(thread, &get_cpu_local_var(i)->runq, sched_list) {
if (thread->pid && thread->tid == tid) {
found = 1;
memcpy(&k_cpu_set, &thread->cpu_set, sizeof(k_cpu_set));
break;
}
}
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(i)->runq_lock);
}
if (!found) {
kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__);
return -ESRCH;
}
ret = copy_to_user(cpu_local_var(current), u_cpu_set, &k_cpu_set, len);
kprintf("%s %d %d\n", __FILE__, __LINE__, ret);
if (ret < 0)
return ret;
return len;
}
SYSCALL_DECLARE(get_cpu_id)
{
return ihk_mc_get_processor_id();
}
SYSCALL_DECLARE(sched_yield)
@@ -2035,7 +2114,8 @@ SYSCALL_DECLARE(mlock)
dkprintf("[%d]sys_mlock(%lx,%lx):not contiguous."
" %lx [%lx-%lx)\n",
ihk_mc_get_processor_id(), start0,
len0, addr, range->start, range->end);
len0, addr, range?range->start:0,
range?range->end:0);
error = -ENOMEM;
goto out;
}
@@ -2209,7 +2289,8 @@ SYSCALL_DECLARE(munlock)
dkprintf("[%d]sys_munlock(%lx,%lx):not contiguous."
" %lx [%lx-%lx)\n",
ihk_mc_get_processor_id(), start0,
len0, addr, range->start, range->end);
len0, addr, range?range->start:0,
range?range->end:0);
error = -ENOMEM;
goto out;
}
@@ -2271,6 +2352,302 @@ out2:
return error;
}
SYSCALL_DECLARE(remap_file_pages)
{
const uintptr_t start0 = ihk_mc_syscall_arg0(ctx);
const size_t size = ihk_mc_syscall_arg1(ctx);
const int prot = ihk_mc_syscall_arg2(ctx);
const size_t pgoff = ihk_mc_syscall_arg3(ctx);
const int flags = ihk_mc_syscall_arg4(ctx);
int error;
const uintptr_t start = start0 & PAGE_MASK;
const uintptr_t end = start + size;
const off_t off = (off_t)pgoff << PAGE_SHIFT;
struct process * const proc = cpu_local_var(current);
struct vm_range *range;
int er;
int need_populate = 0;
dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x)\n",
start0, size, prot, pgoff, flags);
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
#define PGOFF_LIMIT ((off_t)1 << ((8*sizeof(off_t) - 1) - PAGE_SHIFT))
if ((size <= 0) || (size & (PAGE_SIZE - 1)) || (prot != 0)
|| (pgoff < 0) || (PGOFF_LIMIT <= pgoff)
|| ((PGOFF_LIMIT - pgoff) < (size / PAGE_SIZE))
|| !((start < end) || (end == 0))) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"invalid args\n",
start0, size, prot, pgoff, flags);
error = -EINVAL;
goto out;
}
range = lookup_process_memory_range(proc->vm, start, end);
if (!range || (start < range->start) || (range->end < end)
|| (range->flag & VR_PRIVATE)
|| (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))
|| !range->memobj) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"invalid VMR:[%#lx-%#lx) %#lx %p\n",
start0, size, prot, pgoff, flags,
range?range->start:0, range?range->end:0,
range?range->flag:0, range?range->memobj:NULL);
error = -EINVAL;
goto out;
}
range->flag |= VR_FILEOFF;
error = remap_process_memory_range(proc->vm, range, start, end, off);
if (error) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"remap failed %d\n",
start0, size, prot, pgoff, flags, error);
goto out;
}
clear_host_pte(start, size); /* XXX: workaround */
if (range->flag & VR_LOCKED) {
need_populate = 1;
}
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
if (need_populate
&& (er = populate_process_memory(
proc, (void *)start, size))) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"populate failed %d\n",
start0, size, prot, pgoff, flags, er);
/* ignore populate error */
}
dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x): %d\n",
start0, size, prot, pgoff, flags, error);
return error;
}
SYSCALL_DECLARE(mremap)
{
const uintptr_t oldaddr = ihk_mc_syscall_arg0(ctx);
const size_t oldsize0 = ihk_mc_syscall_arg1(ctx);
const size_t newsize0 = ihk_mc_syscall_arg2(ctx);
const int flags = ihk_mc_syscall_arg3(ctx);
const uintptr_t newaddr = ihk_mc_syscall_arg4(ctx);
const ssize_t oldsize = (oldsize0 + PAGE_SIZE - 1) & PAGE_MASK;
const ssize_t newsize = (newsize0 + PAGE_SIZE - 1) & PAGE_MASK;
const uintptr_t oldstart = oldaddr;
const uintptr_t oldend = oldstart + oldsize;
struct process *proc = cpu_local_var(current);
struct process_vm *vm = proc->vm;
int error;
struct vm_range *range;
int need_relocate;
uintptr_t newstart;
uintptr_t newend;
size_t size;
uintptr_t ret;
uintptr_t lckstart = -1;
uintptr_t lckend = -1;
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx)\n",
oldaddr, oldsize0, newsize0, flags, newaddr);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
if ((oldaddr & ~PAGE_MASK)
|| (oldsize < 0)
|| (newsize <= 0)
|| (flags & ~(MREMAP_MAYMOVE | MREMAP_FIXED))
|| ((flags & MREMAP_FIXED)
&& !(flags & MREMAP_MAYMOVE))
|| ((flags & MREMAP_FIXED)
&& (newaddr & ~PAGE_MASK))) {
error = -EINVAL;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):invalid. %d\n",
oldaddr, oldsize0, newsize0, flags, newaddr,
error);
goto out;
}
/* check original mapping */
range = lookup_process_memory_range(vm, oldstart, oldstart+PAGE_SIZE);
if (!range || (oldstart < range->start) || (range->end < oldend)
|| (range->flag & (VR_FILEOFF))
|| (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))) {
error = -EFAULT;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"lookup failed. %d %p %#lx-%#lx %#lx\n",
oldaddr, oldsize0, newsize0, flags, newaddr,
error, range, range?range->start:0,
range?range->end:0, range?range->flag:0);
goto out;
}
if (oldend < oldstart) {
error = -EINVAL;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"old range overflow. %d\n",
oldaddr, oldsize0, newsize0, flags, newaddr,
error);
goto out;
}
/* determine new mapping range */
need_relocate = 0;
if (flags & MREMAP_FIXED) {
need_relocate = 1;
newstart = newaddr;
newend = newstart + newsize;
if (newstart < vm->region.user_start) {
error = -EPERM;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"mmap_min_addr %#lx. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, vm->region.user_start,
error);
goto out;
}
if ((newstart < oldend) && (oldstart < newend)) {
error = -EINVAL;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"fixed:overlapped. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
}
else if (!(flags & MREMAP_FIXED) && (oldsize < newsize)) {
if (oldend == range->end) {
newstart = oldstart;
newend = newstart + newsize;
error = extend_up_process_memory_range(vm, range,
newend);
if (!error) {
if (range->flag & VR_LOCKED) {
lckstart = oldend;
lckend = newend;
}
goto out;
}
}
if (!(flags & MREMAP_MAYMOVE)) {
error = -ENOMEM;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"cannot relocate. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
need_relocate = 1;
error = search_free_space(newsize, vm->region.map_end,
(intptr_t *)&newstart);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"search failed. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
newend = newstart + newsize;
}
else {
newstart = oldstart;
newend = newstart + newsize;
}
/* do the remap */
if (need_relocate) {
if (flags & MREMAP_FIXED) {
error = do_munmap((void *)newstart, newsize);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"fixed:munmap failed. %d\n",
oldaddr, oldsize0, newsize0,
flags, newaddr, error);
goto out;
}
}
if (range->memobj) {
memobj_ref(range->memobj);
}
error = add_process_memory_range(proc, newstart, newend, -1,
range->flag, range->memobj,
range->objoff + (oldstart - range->start));
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"add failed. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
if (range->memobj) {
memobj_release(range->memobj);
}
goto out;
}
if (range->flag & VR_LOCKED) {
lckstart = newstart;
lckend = newend;
}
if (oldsize > 0) {
size = (oldsize < newsize)? oldsize: newsize;
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
error = move_pte_range(vm->page_table,
(void *)oldstart, (void *)newstart,
size);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"move failed. %d\n",
oldaddr, oldsize0, newsize0,
flags, newaddr, error);
goto out;
}
error = do_munmap((void *)oldstart, oldsize);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"relocate:munmap failed. %d\n",
oldaddr, oldsize0, newsize0,
flags, newaddr, error);
goto out;
}
}
}
else if (newsize < oldsize) {
error = do_munmap((void *)newend, (oldend - newend));
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"shrink:munmap failed. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
}
else {
/* nothing to do */
}
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
if (!error && (lckstart < lckend)) {
error = populate_process_memory(proc, (void *)lckstart, (lckend - lckstart));
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"populate failed. %d %#lx-%#lx\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error, lckstart, lckend);
error = 0; /* ignore error */
}
}
ret = (error)? error: newstart;
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):%d %#lx\n",
oldaddr, oldsize0, newsize0, flags, newaddr, error,
ret);
return ret;
}
#ifdef DCFA_KMOD
#ifdef CMD_DCFA
@@ -2407,6 +2784,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
}
check_signal(l, NULL);
check_need_resched();
return l;
}

206
kernel/zeroobj.c Normal file
View File

@@ -0,0 +1,206 @@
/**
* \file zeroobj.c
* License details are found in the file LICENSE.
* \brief
* read-only zeroed page object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
*/
/*
* HISTORY:
*/
#include <ihk/atomic.h>
#include <ihk/debug.h>
#include <ihk/lock.h>
#include <ihk/mm.h>
#include <errno.h>
#include <kmalloc.h>
#include <list.h>
#include <memobj.h>
#include <memory.h>
#include <page.h>
#include <string.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define fkprintf(...) kprintf(__VA_ARGS__)
struct zeroobj {
struct memobj memobj; /* must be first */
struct list_head page_list;
};
static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED;
static struct zeroobj *the_zeroobj = NULL; /* singleton */
static memobj_get_page_func_t zeroobj_get_page;
static struct memobj_ops zeroobj_ops = {
.get_page = &zeroobj_get_page,
};
static struct zeroobj *to_zeroobj(struct memobj *memobj)
{
return (struct zeroobj *)memobj;
}
static struct memobj *to_memobj(struct zeroobj *zeroobj)
{
return &zeroobj->memobj;
}
/***********************************************************************
* page_list
*/
static void page_list_init(struct zeroobj *obj)
{
INIT_LIST_HEAD(&obj->page_list);
return;
}
static void page_list_insert(struct zeroobj *obj, struct page *page)
{
list_add(&page->list, &obj->page_list);
return;
}
static struct page *page_list_first(struct zeroobj *obj)
{
if (list_empty(&obj->page_list)) {
return NULL;
}
return list_first_entry(&obj->page_list, struct page, list);
}
/***********************************************************************
* zeroobj
*/
static int alloc_zeroobj(void)
{
int error;
struct zeroobj *obj = NULL;
void *virt = NULL;
uintptr_t phys;
struct page *page;
dkprintf("alloc_zeroobj()\n");
ihk_mc_spinlock_lock_noirq(&the_zeroobj_lock);
if (the_zeroobj) {
error = 0;
dkprintf("alloc_zeroobj():already. %d\n", error);
goto out;
}
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj) {
error = -ENOMEM;
ekprintf("alloc_zeroobj():kmalloc failed. %d\n", error);
goto out;
}
memset(obj, 0, sizeof(*obj));
obj->memobj.ops = &zeroobj_ops;
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */
if (!virt) {
error = -ENOMEM;
ekprintf("alloc_zeroobj():alloc pages failed. %d\n", error);
goto out;
}
phys = virt_to_phys(virt);
page = phys_to_page(phys);
if (page->mode != PM_NONE) {
fkprintf("alloc_zeroobj():"
"page %p %#lx %d %d %#lx\n",
page, page_to_phys(page), page->mode,
page->count, page->offset);
panic("alloc_zeroobj:dup alloc");
}
memset(virt, 0, PAGE_SIZE);
page->mode = PM_MAPPED;
page->offset = 0;
ihk_atomic_set(&page->count, 1);
page_list_insert(obj, page);
virt = NULL;
error = 0;
the_zeroobj = obj;
obj = NULL;
out:
ihk_mc_spinlock_unlock_noirq(&the_zeroobj_lock);
if (virt) {
ihk_mc_free_pages(virt, 1);
}
if (obj) {
kfree(obj);
}
dkprintf("alloc_zeroobj():%d %p\n", error, the_zeroobj);
return error;
}
int zeroobj_create(struct memobj **objp)
{
int error;
dkprintf("zeroobj_create(%p)\n", objp);
if (!the_zeroobj) {
error = alloc_zeroobj();
if (error) {
goto out;
}
}
error = 0;
*objp = to_memobj(the_zeroobj);
out:
dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp);
return error;
}
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
{
int error;
struct zeroobj *obj = to_zeroobj(memobj);
struct page *page;
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n",
memobj, off, p2align, physp);
if (off & ~PAGE_MASK) {
error = -EINVAL;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
error = -ENOMEM;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
page = page_list_first(obj);
if (!page) {
error = -ENOMEM;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):page not found. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
ihk_atomic_inc(&page->count);
error = 0;
*physp = page_to_phys(page);
out:
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p):%d\n",
memobj, off, p2align, physp, error);
return error;
}

View File

@@ -47,6 +47,12 @@ enum ihk_mc_pt_prepare_flag {
IHK_MC_PT_LAST_LEVEL,
};
enum visit_pte_flag {
VPTEF_SKIP_NULL = 0x0001, /* skip null PTEs */
VPTEF_DEFAULT = 0,
};
struct ihk_mc_memory_area {
unsigned long start;
unsigned long size;
@@ -123,6 +129,12 @@ int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize, uintptr_t phy
int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size,
enum ihk_mc_pt_prepare_flag);
typedef int pte_visitor_t(void *arg, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize);
int visit_pte_range(page_table_t pt, void *start, void *end,
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg);
int move_pte_range(page_table_t pt, void *src, void *dest, size_t size);
struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag);
/* XXX: proper use of struct page_table and page_table_t is unknown */
void ihk_mc_pt_destroy(struct page_table *pt);