Merge remote branch 'origin/master' into gdb

Conflicts:
	kernel/include/process.h
This commit is contained in:
Naoki Hamada
2014-07-18 16:03:35 +09:00
23 changed files with 1723 additions and 117 deletions

View File

@@ -1,6 +1,7 @@
IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__

View File

@@ -184,6 +184,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
memset(newobj, 0, sizeof(*newobj));
newobj->memobj.ops = &fileobj_ops;
newobj->memobj.flags = MF_HAS_PAGER;
newobj->handle = result.handle;
newobj->sref = 1;
newobj->cref = 1;

View File

@@ -30,6 +30,9 @@ struct malloc_header {
#define CPU_STATUS_RUNNING (2)
extern ihk_spinlock_t cpu_status_lock;
#define CPU_FLAG_NEED_RESCHED 0x1U
#define CPU_FLAG_NEED_MIGRATE 0x2U
struct cpu_local_var {
/* malloc */
struct malloc_header free_list;
@@ -54,6 +57,11 @@ struct cpu_local_var {
int fs;
struct list_head pending_free_pages;
unsigned int flags;
ihk_spinlock_t migq_lock;
struct list_head migq;
} __attribute__((aligned(64)));

View File

@@ -16,10 +16,19 @@
#include <ihk/types.h>
#include <ihk/atomic.h>
#include <ihk/lock.h>
#include <errno.h>
#include <list.h>
#include <shm.h>
enum {
/* for memobj.flags */
MF_HAS_PAGER = 0x0001,
};
struct memobj {
struct memobj_ops * ops;
uint32_t flags;
int8_t padding[4];
ihk_spinlock_t lock;
};
@@ -39,29 +48,42 @@ struct memobj_ops {
static inline void memobj_release(struct memobj *obj)
{
(*obj->ops->release)(obj);
if (obj->ops->release) {
(*obj->ops->release)(obj);
}
}
static inline void memobj_ref(struct memobj *obj)
{
(*obj->ops->ref)(obj);
if (obj->ops->ref) {
(*obj->ops->ref)(obj);
}
}
static inline int memobj_get_page(struct memobj *obj, off_t off,
int p2align, uintptr_t *physp)
{
return (*obj->ops->get_page)(obj, off, p2align, physp);
if (obj->ops->get_page) {
return (*obj->ops->get_page)(obj, off, p2align, physp);
}
return -ENXIO;
}
static inline uintptr_t memobj_copy_page(struct memobj *obj,
uintptr_t orgphys, int p2align)
{
return (*obj->ops->copy_page)(obj, orgphys, p2align);
if (obj->ops->copy_page) {
return (*obj->ops->copy_page)(obj, orgphys, p2align);
}
return -ENXIO;
}
static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t pgsize)
{
return (*obj->ops->flush_page)(obj, phys, pgsize);
if (obj->ops->flush_page) {
return (*obj->ops->flush_page)(obj, phys, pgsize);
}
return 0;
}
static inline void memobj_lock(struct memobj *obj)
@@ -74,6 +96,13 @@ static inline void memobj_unlock(struct memobj *obj)
ihk_mc_spinlock_unlock_noirq(&obj->lock);
}
static inline int memobj_has_pager(struct memobj *obj)
{
return !!(obj->flags & MF_HAS_PAGER);
}
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp);
#endif /* HEADER_MEMOBJ_H */

View File

@@ -63,4 +63,10 @@
#define MADV_HWPOISON 100
#define MADV_SOFT_OFFLINE 101
/*
* for mremap()
*/
#define MREMAP_MAYMOVE 0x01
#define MREMAP_FIXED 0x02
#endif /* HEADER_MMAN_H */

View File

@@ -20,6 +20,7 @@
#include <list.h>
#include <signal.h>
#include <memobj.h>
#include <affinity.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
@@ -29,6 +30,7 @@
#define VR_DEMAND_PAGING 0x1000
#define VR_PRIVATE 0x2000
#define VR_LOCKED 0x4000
#define VR_FILEOFF 0x8000 /* remap_file_pages()ed range */
#define VR_PROT_NONE 0x00000000
#define VR_PROT_READ 0x00010000
#define VR_PROT_WRITE 0x00020000
@@ -186,6 +188,7 @@ struct process {
void *pgio_arg;
struct fork_tree_node *ftn;
cpu_set_t cpu_set;
unsigned long saved_auxv[AUXV_LEN];
};
@@ -231,12 +234,16 @@ int join_process_memory_range(struct process *process, struct vm_range *survivin
int change_prot_process_memory_range(
struct process *process, struct vm_range *range,
unsigned long newflag);
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off);
struct vm_range *lookup_process_memory_range(
struct process_vm *vm, uintptr_t start, uintptr_t end);
struct vm_range *next_process_memory_range(
struct process_vm *vm, struct vm_range *range);
struct vm_range *previous_process_memory_range(
struct process_vm *vm, struct vm_range *range);
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend);
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
int remove_process_region(struct process *proc,
@@ -256,4 +263,7 @@ void runq_add_proc(struct process *proc, int cpu_id);
void runq_del_proc(struct process *proc, int cpu_id);
int sched_wakeup_process(struct process *proc, int valid_states);
void sched_request_migrate(int cpu_id, struct process *proc);
void check_need_resched(void);
#endif

49
kernel/include/shm.h Normal file
View File

@@ -0,0 +1,49 @@
/**
* \file shm.h
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
*/
/*
* HISTORY:
*/
#ifndef HEADER_SHM_H
#define HEADER_SHM_H
/* begin types.h */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
/* end types.h */
typedef uint64_t shmatt_t;
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
shmatt_t shm_nattch;
uint8_t padding[16];
};
#endif /* HEADER_SHM_H */

View File

@@ -211,8 +211,11 @@ static void post_init(void)
}
if (find_command_line("hidos")) {
extern ihk_spinlock_t syscall_lock;
init_host_syscall_channel();
init_host_syscall_channel2();
ihk_mc_spinlock_init(&syscall_lock);
}
ap_start();
}

View File

@@ -18,6 +18,16 @@
#include <ihk/ikc.h>
#include <ikc/master.h>
//#define DEBUG_LISTENERS
#ifdef DEBUG_LISTENERS
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
static unsigned long read_tsc(void)
{
unsigned int low, high;
@@ -103,5 +113,5 @@ static struct ihk_ikc_listen_param test_listen_param = {
void mc_ikc_test_init(void)
{
ihk_ikc_listen_port(NULL, &test_listen_param);
kprintf("Listener registered port %d\n", 500);
dkprintf("Listener registered port %d\n", 500);
}

View File

@@ -281,6 +281,7 @@ out:
dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n",
ihk_mc_get_processor_id(), fault_addr, reason,
regs, error);
check_need_resched();
return;
}

View File

@@ -253,7 +253,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
goto err_free_sighandler;
}
memset(proc->sighandler, '\0', sizeof(struct sig_handler));
memcpy(proc->sighandler, org->sighandler, sizeof(struct sig_handler));
ihk_atomic_set(&proc->sighandler->use, 1);
ihk_mc_spinlock_init(&proc->sighandler->lock);
ihk_atomic_set(&proc->sigshared->use, 1);
@@ -936,6 +936,39 @@ struct vm_range *previous_process_memory_range(
return prev;
}
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend)
{
int error;
struct vm_range *next;
dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx)\n",
vm, range, range->start, range->end, newend);
if (newend <= range->end) {
error = -EINVAL;
goto out;
}
if (vm->region.user_end < newend) {
error = -EPERM;
goto out;
}
next = next_process_memory_range(vm ,range);
if (next && (next->start < newend)) {
error = -ENOMEM;
goto out;
}
error = 0;
range->end = newend;
out:
dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx):%d\n",
vm, range, range->start, range->end, newend, error);
return error;
}
int change_prot_process_memory_range(struct process *proc,
struct vm_range *range, unsigned long protflag)
{
@@ -997,6 +1030,94 @@ out:
return error;
}
struct rfp_args {
off_t off;
uintptr_t start;
struct memobj *memobj;
};
static int remap_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
struct rfp_args * const args = arg0;
int error;
off_t off;
pte_t apte;
uintptr_t phys;
struct page *page;
dkprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
/* XXX: NYI: large pages */
if (pgsize != PAGE_SIZE) {
error = -E2BIG;
ekprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
goto out;
}
off = args->off + ((uintptr_t)pgaddr - args->start);
pte_make_fileoff(off, 0, pgsize, &apte);
pte_xchg(ptep, &apte);
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
if (pte_is_null(&apte) || pte_is_fileoff(&apte, pgsize)) {
error = 0;
goto out;
}
phys = pte_get_phys(&apte);
if (pte_is_dirty(&apte, pgsize)) {
memobj_flush_page(args->memobj, phys, pgsize); /* XXX: in lock period */
}
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE);
}
error = 0;
out:
dkprintf("remap_one_page(%p,%p,%p %#lx,%p,%#lx): %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off)
{
struct rfp_args args;
int error;
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx)\n",
vm, range, start, end, off);
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
args.start = start;
args.off = off;
args.memobj = range->memobj;
error = visit_pte_range(vm->page_table, (void *)start,
(void *)end, VPTEF_DEFAULT, &remap_one_page, &args);
if (error) {
ekprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):"
"visit pte failed %d\n",
vm, range, start, end, off, error);
goto out;
}
error = 0;
out:
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):%d\n",
vm, range, start, end, off, error);
return error;
}
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
{
int error;
@@ -1012,7 +1133,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
/*****/
ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align);
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)) {
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)
&& !pte_is_fileoff(ptep, pgsize)) {
if (!pte_is_present(ptep)) {
error = -EFAULT;
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):PROT_NONE. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
@@ -1034,11 +1156,16 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
}
attr = arch_vrflag_to_ptattr(range->flag, reason, ptep);
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
if (!ptep || pte_is_null(ptep)) {
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
if (range->memobj) {
off_t off;
off = range->objoff + ((uintptr_t)pgaddr - range->start);
if (!ptep || !pte_is_fileoff(ptep, pgsize)) {
off = range->objoff + ((uintptr_t)pgaddr - range->start);
}
else {
off = pte_get_off(ptep, pgsize);
}
error = memobj_get_page(range->memobj, off, p2align, &phys);
if (error) {
if (error != -ERESTART) {
@@ -1590,6 +1717,9 @@ void sched_init(void)
cpu_local_var(runq_len) = 0;
ihk_mc_spinlock_init(&cpu_local_var(runq_lock));
INIT_LIST_HEAD(&cpu_local_var(migq));
ihk_mc_spinlock_init(&cpu_local_var(migq_lock));
#ifdef TIMER_CPU_ID
if (ihk_mc_get_processor_id() == TIMER_CPU_ID) {
init_timers();
@@ -1598,6 +1728,72 @@ void sched_init(void)
#endif
}
static void double_rq_lock(struct cpu_local_var *v1, struct cpu_local_var *v2)
{
if (v1 < v2) {
ihk_mc_spinlock_lock_noirq(&v1->runq_lock);
ihk_mc_spinlock_lock_noirq(&v2->runq_lock);
} else {
ihk_mc_spinlock_lock_noirq(&v2->runq_lock);
ihk_mc_spinlock_lock_noirq(&v1->runq_lock);
}
}
static void double_rq_unlock(struct cpu_local_var *v1, struct cpu_local_var *v2)
{
ihk_mc_spinlock_unlock_noirq(&v1->runq_lock);
ihk_mc_spinlock_unlock_noirq(&v2->runq_lock);
}
struct migrate_request {
struct list_head list;
struct process *proc;
struct waitq wq;
};
static void do_migrate(void)
{
int cur_cpu_id = ihk_mc_get_processor_id();
struct cpu_local_var *cur_v = get_cpu_local_var(cur_cpu_id);
struct migrate_request *req, *tmp;
ihk_mc_spinlock_lock_noirq(&cur_v->migq_lock);
list_for_each_entry_safe(req, tmp, &cur_v->migq, list) {
int cpu_id;
struct cpu_local_var *v;
/* 0. check if migration is necessary */
list_del(&req->list);
if (req->proc->cpu_id != cur_cpu_id) /* already not here */
goto ack;
if (CPU_ISSET(cur_cpu_id, &req->proc->cpu_set)) /* good affinity */
goto ack;
/* 1. select CPU */
for (cpu_id = 0; cpu_id < CPU_SETSIZE; cpu_id++)
if (CPU_ISSET(cpu_id, &req->proc->cpu_set))
break;
if (CPU_SETSIZE == cpu_id) /* empty affinity (bug?) */
goto ack;
/* 2. migrate thread */
v = get_cpu_local_var(cpu_id);
double_rq_lock(cur_v, v);
list_del(&req->proc->sched_list);
cur_v->runq_len -= 1;
req->proc->cpu_id = cpu_id;
list_add_tail(&req->proc->sched_list, &v->runq);
v->runq_len += 1;
if (v->runq_len == 1)
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
double_rq_unlock(cur_v, v);
ack:
waitq_wakeup(&req->wq);
}
ihk_mc_spinlock_unlock_noirq(&cur_v->migq_lock);
}
void schedule(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
@@ -1606,6 +1802,7 @@ void schedule(void)
unsigned long irqstate;
struct process *last;
redo:
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
next = NULL;
@@ -1621,25 +1818,26 @@ void schedule(void)
list_add_tail(&prev->sched_list, &(v->runq));
++v->runq_len;
}
}
if (!v->runq_len) {
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
next = &cpu_local_var(idle);
} else {
/* Pick a new running process */
list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) {
if (proc->status == PS_RUNNING) {
next = proc;
break;
}
}
/* No process? Run idle.. */
if (!next) {
next = &cpu_local_var(idle);
v->status = CPU_STATUS_IDLE;
}
}
/* Pick a new running process */
list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) {
if (proc->status == PS_RUNNING) {
next = proc;
break;
}
}
/* No process? Run idle.. */
if (!next) {
next = &cpu_local_var(idle);
}
if (prev != next) {
switch_ctx = 1;
v->current = next;
@@ -1675,6 +1873,21 @@ void schedule(void)
else {
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
v->flags &= ~CPU_FLAG_NEED_MIGRATE;
do_migrate();
goto redo;
}
}
void check_need_resched(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED) {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
schedule();
}
}
@@ -1719,6 +1932,49 @@ int sched_wakeup_process(struct process *proc, int valid_states)
return status;
}
/*
* 1. Add current process to waitq
* 2. Queue migration request into the target CPU's queue
* 3. Kick migration on the CPU
* 4. Wait for completion of the migration
*
* struct migrate_request {
* list //migq,
* wq,
* proc
* }
*
* [expected processing of the target CPU]
* 1. Interrupted by IPI
* 2. call schedule() via check_resched()
* 3. Do migration
* 4. Wake up this thread
*/
void sched_request_migrate(int cpu_id, struct process *proc)
{
struct cpu_local_var *v = get_cpu_local_var(cpu_id);
struct migrate_request req = { .proc = proc };
unsigned long irqstate;
DECLARE_WAITQ_ENTRY(entry, cpu_local_var(current));
waitq_init(&req.wq);
waitq_prepare_to_wait(&req.wq, &entry, PS_UNINTERRUPTIBLE);
irqstate = ihk_mc_spinlock_lock(&v->migq_lock);
list_add_tail(&req.list, &v->migq);
ihk_mc_spinlock_unlock(&v->migq_lock, irqstate);
v->flags |= CPU_FLAG_NEED_RESCHED | CPU_FLAG_NEED_MIGRATE;
v->status = CPU_STATUS_RUNNING;
if (cpu_id != ihk_mc_get_processor_id())
ihk_mc_interrupt_cpu(/* Kick scheduler */
get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1);
schedule();
waitq_finish_wait(&req.wq, &entry);
}
/* Runq lock must be held here */

287
kernel/shmobj.c Normal file
View File

@@ -0,0 +1,287 @@
/**
* \file shmobj.c
* License details are found in the file LICENSE.
* \brief
* shared memory object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
*/
/*
* HISTORY:
*/
#include <ihk/atomic.h>
#include <ihk/debug.h>
#include <ihk/lock.h>
#include <ihk/mm.h>
#include <errno.h>
#include <kmalloc.h>
#include <list.h>
#include <memobj.h>
#include <memory.h>
#include <page.h>
#include <shm.h>
#include <string.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define fkprintf(...) kprintf(__VA_ARGS__)
struct shmobj {
struct memobj memobj; /* must be first */
long ref;
struct shmid_ds ds;
struct list_head page_list;
};
static memobj_release_func_t shmobj_release;
static memobj_ref_func_t shmobj_ref;
static memobj_get_page_func_t shmobj_get_page;
static struct memobj_ops shmobj_ops = {
.release = &shmobj_release,
.ref = &shmobj_ref,
.get_page = &shmobj_get_page,
};
static struct shmobj *to_shmobj(struct memobj *memobj)
{
return (struct shmobj *)memobj;
}
static struct memobj *to_memobj(struct shmobj *shmobj)
{
return &shmobj->memobj;
}
/***********************************************************************
* page_list
*/
static void page_list_init(struct shmobj *obj)
{
INIT_LIST_HEAD(&obj->page_list);
return;
}
static void page_list_insert(struct shmobj *obj, struct page *page)
{
list_add(&page->list, &obj->page_list);
return;
}
static void page_list_remove(struct shmobj *obj, struct page *page)
{
list_del(&page->list);
return;
}
static struct page *page_list_lookup(struct shmobj *obj, off_t off)
{
struct page *page;
list_for_each_entry(page, &obj->page_list, list) {
if (page->offset == off) {
goto out;
}
}
page = NULL;
out:
return page;
}
static struct page *page_list_first(struct shmobj *obj)
{
if (list_empty(&obj->page_list)) {
return NULL;
}
return list_first_entry(&obj->page_list, struct page, list);
}
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
{
struct shmobj *obj = NULL;
int error;
dkprintf("shmobj_create(%p %#lx,%p)\n", ds, ds->shm_segsz, objp);
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj) {
error = -ENOMEM;
ekprintf("shmobj_create(%p %#lx,%p):kmalloc failed. %d\n",
ds, ds->shm_segsz, objp, error);
goto out;
}
memset(obj, 0, sizeof(*obj));
obj->memobj.ops = &shmobj_ops;
obj->ref = 1;
obj->ds = *ds;
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
error = 0;
*objp = to_memobj(obj);
obj = NULL;
out:
if (obj) {
kfree(obj);
}
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
ds, ds->shm_segsz, objp, error, *objp);
return error;
}
static void shmobj_release(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
struct shmobj *freeobj = NULL;
dkprintf("shmobj_release(%p)\n", memobj);
memobj_lock(&obj->memobj);
--obj->ref;
if (obj->ref <= 0) {
if (obj->ref < 0) {
fkprintf("shmobj_release(%p):ref %ld\n",
memobj, obj->ref);
panic("shmobj_release:freeing free shmobj");
}
freeobj = obj;
}
memobj_unlock(&obj->memobj);
if (freeobj) {
/* zap page_list */
for (;;) {
struct page *page;
int count;
page = page_list_first(obj);
if (!page) {
break;
}
page_list_remove(obj, page);
dkprintf("shmobj_release(%p):"
"release page. %p %#lx %d %d",
memobj, page, page_to_phys(page),
page->mode, page->count);
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_MAPPED) && (count == 0))) {
fkprintf("shmobj_release(%p): "
"page %p phys %#lx mode %#x"
" count %d off %#lx\n",
memobj, page,
page_to_phys(page),
page->mode, count,
page->offset);
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
}
dkprintf("shmobj_release(%p):free shmobj", memobj);
kfree(freeobj);
}
dkprintf("shmobj_release(%p):\n", memobj);
return;
}
static void shmobj_ref(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
long newref;
dkprintf("shmobj_ref(%p)\n", memobj);
memobj_lock(&obj->memobj);
newref = ++obj->ref;
memobj_unlock(&obj->memobj);
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
return;
}
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
{
struct shmobj *obj = to_shmobj(memobj);
int error;
struct page *page;
int npages;
void *virt = NULL;
uintptr_t phys = -1;
dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n",
memobj, off, p2align, physp);
memobj_lock(&obj->memobj);
if (off & ~PAGE_MASK) {
error = -EINVAL;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
error = -ENOMEM;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if (obj->ds.shm_segsz <= off) {
error = -ERANGE;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
error = -ENOSPC;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
page = page_list_lookup(obj, off);
if (!page) {
npages = 1 << p2align;
virt = ihk_mc_alloc_pages(npages, IHK_MC_AP_NOWAIT);
if (!virt) {
error = -ENOMEM;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):"
"alloc failed. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
phys = virt_to_phys(virt);
page = phys_to_page(phys);
if (page->mode != PM_NONE) {
fkprintf("shmobj_get_page(%p,%#lx,%d,%p):"
"page %p %#lx %d %d %#lx\n",
memobj, off, p2align, physp,
page, page_to_phys(page), page->mode,
page->count, page->offset);
panic("shmobj_get_page()");
}
memset(virt, 0, npages*PAGE_SIZE);
page->mode = PM_MAPPED;
page->offset = off;
ihk_atomic_set(&page->count, 1);
page_list_insert(obj, page);
virt = NULL;
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n",
memobj, off, p2align, physp, page, phys);
}
ihk_atomic_inc(&page->count);
error = 0;
*physp = page_to_phys(page);
out:
memobj_unlock(&obj->memobj);
if (virt) {
ihk_mc_free_pages(virt, npages);
}
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):%d\n",
memobj, off, p2align, physp, error);
return error;
}

View File

@@ -45,6 +45,7 @@
#include <mman.h>
#include <kmalloc.h>
#include <memobj.h>
#include <shm.h>
/* Headers taken from kitten LWK */
#include <lwk/stddef.h>
@@ -168,6 +169,7 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid)
#endif
}
ihk_spinlock_t syscall_lock;
long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
int cpu, int pid)
@@ -176,6 +178,9 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
struct syscall_request req2 IHK_DMA_ALIGN;
struct syscall_params *scp;
int error;
long rc;
int islock = 0;
unsigned long irqstate;
dkprintf("SC(%d)[%3d] sending syscall\n",
ihk_mc_get_processor_id(),
@@ -184,6 +189,8 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
if(req->number == __NR_exit_group ||
req->number == __NR_kill){ // interrupt syscall
scp = &get_cpu_local_var(0)->scp2;
islock = 1;
irqstate = ihk_mc_spinlock_lock(&syscall_lock);
}
else{
scp = &get_cpu_local_var(cpu)->scp;
@@ -209,7 +216,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
cpu_local_var(current)->pid);
error = page_fault_process(get_cpu_local_var(cpu)->current,
(void *)res->fault_address,
res->fault_reason);
res->fault_reason|PF_POPULATE);
/* send result */
req2.number = __NR_mmap;
@@ -225,7 +232,12 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
ihk_mc_get_processor_id(),
req->number, res->ret);
return res->ret;
rc = res->ret;
if(islock){
ihk_mc_spinlock_unlock(&syscall_lock, irqstate);
}
return rc;
}
long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx)
@@ -630,12 +642,13 @@ SYSCALL_DECLARE(mmap)
const int prot = ihk_mc_syscall_arg2(ctx);
const int flags = ihk_mc_syscall_arg3(ctx);
const int fd = ihk_mc_syscall_arg4(ctx);
const off_t off = ihk_mc_syscall_arg5(ctx);
const off_t off0 = ihk_mc_syscall_arg5(ctx);
struct process *proc = cpu_local_var(current);
struct vm_regions *region = &proc->vm->region;
intptr_t addr;
size_t len;
off_t off;
int error;
intptr_t npages;
int p2align;
@@ -646,10 +659,11 @@ SYSCALL_DECLARE(mmap)
int maxprot;
int denied;
int ro_vma_mapped = 0;
struct shmid_ds ads;
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n",
ihk_mc_get_processor_id(),
addr0, len0, prot, flags, fd, off);
addr0, len0, prot, flags, fd, off0);
/* check constants for flags */
if (1) {
@@ -681,9 +695,9 @@ SYSCALL_DECLARE(mmap)
|| ((region->user_end - len) < addr)
|| !(flags & (MAP_SHARED | MAP_PRIVATE))
|| ((flags & MAP_SHARED) && (flags & MAP_PRIVATE))
|| (off & (PAGE_SIZE - 1))) {
|| (off0 & (PAGE_SIZE - 1))) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n",
addr0, len0, prot, flags, fd, off);
addr0, len0, prot, flags, fd, off0);
error = -EINVAL;
goto out2;
}
@@ -692,7 +706,7 @@ SYSCALL_DECLARE(mmap)
if ((flags & error_flags)
|| (flags & ~(supported_flags | ignored_flags))) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):unknown flags %x\n",
addr0, len0, prot, flags, fd, off,
addr0, len0, prot, flags, fd, off0,
(flags & ~(supported_flags | ignored_flags)));
error = -EINVAL;
goto out2;
@@ -754,8 +768,10 @@ SYSCALL_DECLARE(mmap)
}
phys = 0;
off = 0;
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
if (!(flags & MAP_ANONYMOUS)) {
off = off0;
error = fileobj_create(fd, &memobj, &maxprot);
if (error) {
ekprintf("sys_mmap:fileobj_create failed. %d\n", error);
@@ -781,6 +797,22 @@ SYSCALL_DECLARE(mmap)
}
phys = virt_to_phys(p);
}
else if (flags & MAP_SHARED) {
memset(&ads, 0, sizeof(ads));
ads.shm_segsz = len;
error = shmobj_create(&ads, &memobj);
if (error) {
ekprintf("sys_mmap:shmobj_create failed. %d\n", error);
goto out;
}
}
else {
error = zeroobj_create(&memobj);
if (error) {
ekprintf("sys_mmap:zeroobj_create failed. %d\n", error);
goto out;
}
}
if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) {
maxprot |= PROT_WRITE;
@@ -844,7 +876,7 @@ out2:
}
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n",
ihk_mc_get_processor_id(),
addr0, len0, prot, flags, fd, off, error, addr);
addr0, len0, prot, flags, fd, off0, error, addr);
return (!error)? addr: error;
}
@@ -1702,15 +1734,14 @@ SYSCALL_DECLARE(madvise)
dkprintf("[%d]sys_madvise(%lx,%lx,%x):not contig "
"%lx [%lx-%lx)\n",
ihk_mc_get_processor_id(), start,
len0, advice, addr, range->start,
range->end);
len0, advice, addr, range?range->start:0,
range?range->end:0);
error = -ENOMEM;
goto out;
}
#define MEMOBJ_IS_FILEOBJ(obj) ((obj) != NULL)
if (!MEMOBJ_IS_FILEOBJ(range->memobj)) {
dkprintf("[%d]sys_madvise(%lx,%lx,%x):not fileobj "
if (!range->memobj || !memobj_has_pager(range->memobj)) {
dkprintf("[%d]sys_madvise(%lx,%lx,%x):has not pager"
"[%lx-%lx) %lx\n",
ihk_mc_get_processor_id(), start,
len0, advice, range->start,
@@ -1888,56 +1919,104 @@ SYSCALL_DECLARE(ptrace)
return -ENOSYS;
}
#define MIN2(x,y) (x) < (y) ? (x) : (y)
SYSCALL_DECLARE(sched_setaffinity)
{
#if 0
int pid = (int)ihk_mc_syscall_arg0(ctx);
unsigned int len = (unsigned int)ihk_mc_syscall_arg1(ctx);
#endif
cpu_set_t *mask = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
unsigned long __phys;
#if 0
int i;
#endif
/* TODO: check mask is in user's page table */
if(!mask) { return -EFAULT; }
if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table,
(void *)mask,
&__phys)) {
int tid = (int)ihk_mc_syscall_arg0(ctx);
size_t len = (size_t)ihk_mc_syscall_arg1(ctx);
cpu_set_t *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
cpu_set_t k_cpu_set, cpu_set;
struct process *thread;
int cpu_id;
if (sizeof(k_cpu_set) > len) {
kprintf("%s:%d\n Too small buffer.", __FILE__, __LINE__);
return -EINVAL;
}
len = MIN2(len, sizeof(k_cpu_set));
if (copy_from_user(cpu_local_var(current), &k_cpu_set, u_cpu_set, len)) {
kprintf("%s:%d copy_from_user failed.\n", __FILE__, __LINE__);
return -EFAULT;
}
#if 0
dkprintf("sched_setaffinity,\n");
for(i = 0; i < len/sizeof(__cpu_mask); i++) {
dkprintf("mask[%d]=%lx,", i, mask->__bits[i]);
}
#endif
return 0;
// XXX: We should build something like cpu_available_mask in advance
CPU_ZERO(&cpu_set);
extern int num_processors;
for (cpu_id = 0; cpu_id < num_processors; cpu_id++)
if (CPU_ISSET(cpu_id, &k_cpu_set))
CPU_SET(cpu_id, &cpu_set);
for (cpu_id = 0; cpu_id < num_processors; cpu_id++) {
ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
list_for_each_entry(thread, &get_cpu_local_var(cpu_id)->runq, sched_list)
if (thread->pid && thread->tid == tid)
goto found; /* without unlocking runq_lock */
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
}
kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__);
return -ESRCH;
found:
memcpy(&thread->cpu_set, &cpu_set, sizeof(cpu_set));
if (!CPU_ISSET(cpu_id, &thread->cpu_set)) {
hold_process(thread);
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
sched_request_migrate(cpu_id, thread);
release_process(thread);
return 0;
} else {
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock);
return 0;
}
}
#define MIN2(x,y) (x) < (y) ? (x) : (y)
#define MIN3(x,y,z) MIN2(MIN2((x),(y)),MIN2((y),(z)))
// see linux-2.6.34.13/kernel/sched.c
SYSCALL_DECLARE(sched_getaffinity)
{
//int pid = (int)ihk_mc_syscall_arg0(ctx);
unsigned int len = (int)ihk_mc_syscall_arg1(ctx);
//int cpu_id;
cpu_set_t *mask = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
if(len*8 < cpu_info->ncpus) { return -EINVAL; }
if(len & (sizeof(unsigned long)-1)) { return -EINVAL; }
int min_len = MIN2(len, sizeof(cpu_set_t));
//int min_ncpus = MIN2(min_len*8, cpu_info->ncpus);
int tid = (int)ihk_mc_syscall_arg0(ctx);
size_t len = (size_t)ihk_mc_syscall_arg1(ctx);
cpu_set_t k_cpu_set, *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx);
CPU_ZERO_S(min_len, mask);
CPU_SET_S(ihk_mc_get_hardware_processor_id(), min_len, mask);
//for (cpu_id = 0; cpu_id < min_ncpus; ++cpu_id)
// CPU_SET_S(cpu_info->hw_ids[cpu_id], min_len, mask);
int ret;
int found = 0;
int i;
// dkprintf("sched_getaffinity returns full mask\n");
if (sizeof(k_cpu_set) > len) {
kprintf("%s:%d Too small buffer.\n", __FILE__, __LINE__);
return -EINVAL;
}
len = MIN2(len, sizeof(k_cpu_set));
return min_len;
extern int num_processors;
for (i = 0; i < num_processors && !found; i++) {
struct process *thread;
ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(i)->runq_lock);
list_for_each_entry(thread, &get_cpu_local_var(i)->runq, sched_list) {
if (thread->pid && thread->tid == tid) {
found = 1;
memcpy(&k_cpu_set, &thread->cpu_set, sizeof(k_cpu_set));
break;
}
}
ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(i)->runq_lock);
}
if (!found) {
kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__);
return -ESRCH;
}
ret = copy_to_user(cpu_local_var(current), u_cpu_set, &k_cpu_set, len);
kprintf("%s %d %d\n", __FILE__, __LINE__, ret);
if (ret < 0)
return ret;
return len;
}
SYSCALL_DECLARE(get_cpu_id)
{
return ihk_mc_get_processor_id();
}
SYSCALL_DECLARE(sched_yield)
@@ -2035,7 +2114,8 @@ SYSCALL_DECLARE(mlock)
dkprintf("[%d]sys_mlock(%lx,%lx):not contiguous."
" %lx [%lx-%lx)\n",
ihk_mc_get_processor_id(), start0,
len0, addr, range->start, range->end);
len0, addr, range?range->start:0,
range?range->end:0);
error = -ENOMEM;
goto out;
}
@@ -2209,7 +2289,8 @@ SYSCALL_DECLARE(munlock)
dkprintf("[%d]sys_munlock(%lx,%lx):not contiguous."
" %lx [%lx-%lx)\n",
ihk_mc_get_processor_id(), start0,
len0, addr, range->start, range->end);
len0, addr, range?range->start:0,
range?range->end:0);
error = -ENOMEM;
goto out;
}
@@ -2271,6 +2352,302 @@ out2:
return error;
}
SYSCALL_DECLARE(remap_file_pages)
{
const uintptr_t start0 = ihk_mc_syscall_arg0(ctx);
const size_t size = ihk_mc_syscall_arg1(ctx);
const int prot = ihk_mc_syscall_arg2(ctx);
const size_t pgoff = ihk_mc_syscall_arg3(ctx);
const int flags = ihk_mc_syscall_arg4(ctx);
int error;
const uintptr_t start = start0 & PAGE_MASK;
const uintptr_t end = start + size;
const off_t off = (off_t)pgoff << PAGE_SHIFT;
struct process * const proc = cpu_local_var(current);
struct vm_range *range;
int er;
int need_populate = 0;
dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x)\n",
start0, size, prot, pgoff, flags);
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
#define PGOFF_LIMIT ((off_t)1 << ((8*sizeof(off_t) - 1) - PAGE_SHIFT))
if ((size <= 0) || (size & (PAGE_SIZE - 1)) || (prot != 0)
|| (pgoff < 0) || (PGOFF_LIMIT <= pgoff)
|| ((PGOFF_LIMIT - pgoff) < (size / PAGE_SIZE))
|| !((start < end) || (end == 0))) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"invalid args\n",
start0, size, prot, pgoff, flags);
error = -EINVAL;
goto out;
}
range = lookup_process_memory_range(proc->vm, start, end);
if (!range || (start < range->start) || (range->end < end)
|| (range->flag & VR_PRIVATE)
|| (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))
|| !range->memobj) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"invalid VMR:[%#lx-%#lx) %#lx %p\n",
start0, size, prot, pgoff, flags,
range?range->start:0, range?range->end:0,
range?range->flag:0, range?range->memobj:NULL);
error = -EINVAL;
goto out;
}
range->flag |= VR_FILEOFF;
error = remap_process_memory_range(proc->vm, range, start, end, off);
if (error) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"remap failed %d\n",
start0, size, prot, pgoff, flags, error);
goto out;
}
clear_host_pte(start, size); /* XXX: workaround */
if (range->flag & VR_LOCKED) {
need_populate = 1;
}
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
if (need_populate
&& (er = populate_process_memory(
proc, (void *)start, size))) {
ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):"
"populate failed %d\n",
start0, size, prot, pgoff, flags, er);
/* ignore populate error */
}
dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x): %d\n",
start0, size, prot, pgoff, flags, error);
return error;
}
SYSCALL_DECLARE(mremap)
{
const uintptr_t oldaddr = ihk_mc_syscall_arg0(ctx);
const size_t oldsize0 = ihk_mc_syscall_arg1(ctx);
const size_t newsize0 = ihk_mc_syscall_arg2(ctx);
const int flags = ihk_mc_syscall_arg3(ctx);
const uintptr_t newaddr = ihk_mc_syscall_arg4(ctx);
const ssize_t oldsize = (oldsize0 + PAGE_SIZE - 1) & PAGE_MASK;
const ssize_t newsize = (newsize0 + PAGE_SIZE - 1) & PAGE_MASK;
const uintptr_t oldstart = oldaddr;
const uintptr_t oldend = oldstart + oldsize;
struct process *proc = cpu_local_var(current);
struct process_vm *vm = proc->vm;
int error;
struct vm_range *range;
int need_relocate;
uintptr_t newstart;
uintptr_t newend;
size_t size;
uintptr_t ret;
uintptr_t lckstart = -1;
uintptr_t lckend = -1;
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx)\n",
oldaddr, oldsize0, newsize0, flags, newaddr);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
if ((oldaddr & ~PAGE_MASK)
|| (oldsize < 0)
|| (newsize <= 0)
|| (flags & ~(MREMAP_MAYMOVE | MREMAP_FIXED))
|| ((flags & MREMAP_FIXED)
&& !(flags & MREMAP_MAYMOVE))
|| ((flags & MREMAP_FIXED)
&& (newaddr & ~PAGE_MASK))) {
error = -EINVAL;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):invalid. %d\n",
oldaddr, oldsize0, newsize0, flags, newaddr,
error);
goto out;
}
/* check original mapping */
range = lookup_process_memory_range(vm, oldstart, oldstart+PAGE_SIZE);
if (!range || (oldstart < range->start) || (range->end < oldend)
|| (range->flag & (VR_FILEOFF))
|| (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))) {
error = -EFAULT;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"lookup failed. %d %p %#lx-%#lx %#lx\n",
oldaddr, oldsize0, newsize0, flags, newaddr,
error, range, range?range->start:0,
range?range->end:0, range?range->flag:0);
goto out;
}
if (oldend < oldstart) {
error = -EINVAL;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"old range overflow. %d\n",
oldaddr, oldsize0, newsize0, flags, newaddr,
error);
goto out;
}
/* determine new mapping range */
need_relocate = 0;
if (flags & MREMAP_FIXED) {
need_relocate = 1;
newstart = newaddr;
newend = newstart + newsize;
if (newstart < vm->region.user_start) {
error = -EPERM;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"mmap_min_addr %#lx. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, vm->region.user_start,
error);
goto out;
}
if ((newstart < oldend) && (oldstart < newend)) {
error = -EINVAL;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"fixed:overlapped. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
}
else if (!(flags & MREMAP_FIXED) && (oldsize < newsize)) {
if (oldend == range->end) {
newstart = oldstart;
newend = newstart + newsize;
error = extend_up_process_memory_range(vm, range,
newend);
if (!error) {
if (range->flag & VR_LOCKED) {
lckstart = oldend;
lckend = newend;
}
goto out;
}
}
if (!(flags & MREMAP_MAYMOVE)) {
error = -ENOMEM;
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"cannot relocate. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
need_relocate = 1;
error = search_free_space(newsize, vm->region.map_end,
(intptr_t *)&newstart);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"search failed. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
newend = newstart + newsize;
}
else {
newstart = oldstart;
newend = newstart + newsize;
}
/* do the remap */
if (need_relocate) {
if (flags & MREMAP_FIXED) {
error = do_munmap((void *)newstart, newsize);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"fixed:munmap failed. %d\n",
oldaddr, oldsize0, newsize0,
flags, newaddr, error);
goto out;
}
}
if (range->memobj) {
memobj_ref(range->memobj);
}
error = add_process_memory_range(proc, newstart, newend, -1,
range->flag, range->memobj,
range->objoff + (oldstart - range->start));
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"add failed. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
if (range->memobj) {
memobj_release(range->memobj);
}
goto out;
}
if (range->flag & VR_LOCKED) {
lckstart = newstart;
lckend = newend;
}
if (oldsize > 0) {
size = (oldsize < newsize)? oldsize: newsize;
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
error = move_pte_range(vm->page_table,
(void *)oldstart, (void *)newstart,
size);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"move failed. %d\n",
oldaddr, oldsize0, newsize0,
flags, newaddr, error);
goto out;
}
error = do_munmap((void *)oldstart, oldsize);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"relocate:munmap failed. %d\n",
oldaddr, oldsize0, newsize0,
flags, newaddr, error);
goto out;
}
}
}
else if (newsize < oldsize) {
error = do_munmap((void *)newend, (oldend - newend));
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"shrink:munmap failed. %d\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error);
goto out;
}
}
else {
/* nothing to do */
}
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
if (!error && (lckstart < lckend)) {
error = populate_process_memory(proc, (void *)lckstart, (lckend - lckstart));
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"populate failed. %d %#lx-%#lx\n",
oldaddr, oldsize0, newsize0, flags,
newaddr, error, lckstart, lckend);
error = 0; /* ignore error */
}
}
ret = (error)? error: newstart;
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):%d %#lx\n",
oldaddr, oldsize0, newsize0, flags, newaddr, error,
ret);
return ret;
}
#ifdef DCFA_KMOD
#ifdef CMD_DCFA
@@ -2407,6 +2784,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
}
check_signal(l, NULL);
check_need_resched();
return l;
}

206
kernel/zeroobj.c Normal file
View File

@@ -0,0 +1,206 @@
/**
* \file zeroobj.c
* License details are found in the file LICENSE.
* \brief
* read-only zeroed page object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
*/
/*
* HISTORY:
*/
#include <ihk/atomic.h>
#include <ihk/debug.h>
#include <ihk/lock.h>
#include <ihk/mm.h>
#include <errno.h>
#include <kmalloc.h>
#include <list.h>
#include <memobj.h>
#include <memory.h>
#include <page.h>
#include <string.h>
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#define fkprintf(...) kprintf(__VA_ARGS__)
struct zeroobj {
struct memobj memobj; /* must be first */
struct list_head page_list;
};
static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED;
static struct zeroobj *the_zeroobj = NULL; /* singleton */
static memobj_get_page_func_t zeroobj_get_page;
static struct memobj_ops zeroobj_ops = {
.get_page = &zeroobj_get_page,
};
static struct zeroobj *to_zeroobj(struct memobj *memobj)
{
return (struct zeroobj *)memobj;
}
static struct memobj *to_memobj(struct zeroobj *zeroobj)
{
return &zeroobj->memobj;
}
/***********************************************************************
* page_list
*/
static void page_list_init(struct zeroobj *obj)
{
INIT_LIST_HEAD(&obj->page_list);
return;
}
static void page_list_insert(struct zeroobj *obj, struct page *page)
{
list_add(&page->list, &obj->page_list);
return;
}
static struct page *page_list_first(struct zeroobj *obj)
{
if (list_empty(&obj->page_list)) {
return NULL;
}
return list_first_entry(&obj->page_list, struct page, list);
}
/***********************************************************************
* zeroobj
*/
static int alloc_zeroobj(void)
{
int error;
struct zeroobj *obj = NULL;
void *virt = NULL;
uintptr_t phys;
struct page *page;
dkprintf("alloc_zeroobj()\n");
ihk_mc_spinlock_lock_noirq(&the_zeroobj_lock);
if (the_zeroobj) {
error = 0;
dkprintf("alloc_zeroobj():already. %d\n", error);
goto out;
}
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj) {
error = -ENOMEM;
ekprintf("alloc_zeroobj():kmalloc failed. %d\n", error);
goto out;
}
memset(obj, 0, sizeof(*obj));
obj->memobj.ops = &zeroobj_ops;
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */
if (!virt) {
error = -ENOMEM;
ekprintf("alloc_zeroobj():alloc pages failed. %d\n", error);
goto out;
}
phys = virt_to_phys(virt);
page = phys_to_page(phys);
if (page->mode != PM_NONE) {
fkprintf("alloc_zeroobj():"
"page %p %#lx %d %d %#lx\n",
page, page_to_phys(page), page->mode,
page->count, page->offset);
panic("alloc_zeroobj:dup alloc");
}
memset(virt, 0, PAGE_SIZE);
page->mode = PM_MAPPED;
page->offset = 0;
ihk_atomic_set(&page->count, 1);
page_list_insert(obj, page);
virt = NULL;
error = 0;
the_zeroobj = obj;
obj = NULL;
out:
ihk_mc_spinlock_unlock_noirq(&the_zeroobj_lock);
if (virt) {
ihk_mc_free_pages(virt, 1);
}
if (obj) {
kfree(obj);
}
dkprintf("alloc_zeroobj():%d %p\n", error, the_zeroobj);
return error;
}
int zeroobj_create(struct memobj **objp)
{
int error;
dkprintf("zeroobj_create(%p)\n", objp);
if (!the_zeroobj) {
error = alloc_zeroobj();
if (error) {
goto out;
}
}
error = 0;
*objp = to_memobj(the_zeroobj);
out:
dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp);
return error;
}
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
{
int error;
struct zeroobj *obj = to_zeroobj(memobj);
struct page *page;
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n",
memobj, off, p2align, physp);
if (off & ~PAGE_MASK) {
error = -EINVAL;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
error = -ENOMEM;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
page = page_list_first(obj);
if (!page) {
error = -ENOMEM;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):page not found. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
ihk_atomic_inc(&page->count);
error = 0;
*physp = page_to_phys(page);
out:
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p):%d\n",
memobj, off, p2align, physp, error);
return error;
}