From a8a226a443419329516a55ce688619f7a64731e1 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Wed, 2 Jul 2014 19:58:13 +0900 Subject: [PATCH 1/7] use PF_POPULATE for resolving delegated page faults Since a host side PTE does not follow McKernel's copying a COW page, COW pages cannot be used for resolving delegated page faults. Therefore, to copy pages eagerly, PF_POPULATE should be used. --- kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/syscall.c b/kernel/syscall.c index 5fb5cc13..ad813f62 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -209,7 +209,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, cpu_local_var(current)->pid); error = page_fault_process(get_cpu_local_var(cpu)->current, (void *)res->fault_address, - res->fault_reason); + res->fault_reason|PF_POPULATE); /* send result */ req2.number = __NR_mmap; From d59628e131cebfaeb517c7de390539269c8e14ee Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Wed, 2 Jul 2014 20:01:05 +0900 Subject: [PATCH 2/7] fix debug prints to avoid NULL dereferences --- kernel/syscall.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/syscall.c b/kernel/syscall.c index ad813f62..6856ab12 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1702,8 +1702,8 @@ SYSCALL_DECLARE(madvise) dkprintf("[%d]sys_madvise(%lx,%lx,%x):not contig " "%lx [%lx-%lx)\n", ihk_mc_get_processor_id(), start, - len0, advice, addr, range->start, - range->end); + len0, advice, addr, range?range->start:0, + range?range->end:0); error = -ENOMEM; goto out; } @@ -2035,7 +2035,8 @@ SYSCALL_DECLARE(mlock) dkprintf("[%d]sys_mlock(%lx,%lx):not contiguous." " %lx [%lx-%lx)\n", ihk_mc_get_processor_id(), start0, - len0, addr, range->start, range->end); + len0, addr, range?range->start:0, + range?range->end:0); error = -ENOMEM; goto out; } @@ -2209,7 +2210,8 @@ SYSCALL_DECLARE(munlock) dkprintf("[%d]sys_munlock(%lx,%lx):not contiguous." " %lx [%lx-%lx)\n", ihk_mc_get_processor_id(), start0, - len0, addr, range->start, range->end); + len0, addr, range?range->start:0, + range?range->end:0); error = -ENOMEM; goto out; } From 9efb5e4fc5be2300acc88f4c0225110e165362b3 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Thu, 3 Jul 2014 12:06:29 +0900 Subject: [PATCH 3/7] add memobj_has_pager() --- kernel/fileobj.c | 1 + kernel/include/memobj.h | 12 ++++++++++++ kernel/syscall.c | 5 ++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 18a27046..df83646d 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -184,6 +184,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp) memset(newobj, 0, sizeof(*newobj)); newobj->memobj.ops = &fileobj_ops; + newobj->memobj.flags = MF_HAS_PAGER; newobj->handle = result.handle; newobj->sref = 1; newobj->cref = 1; diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index c846ad09..7a7e99c3 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -18,8 +18,15 @@ #include #include +enum { + /* for memobj.flags */ + MF_HAS_PAGER = 0x0001, +}; + struct memobj { struct memobj_ops * ops; + uint32_t flags; + int8_t padding[4]; ihk_spinlock_t lock; }; @@ -74,6 +81,11 @@ static inline void memobj_unlock(struct memobj *obj) ihk_mc_spinlock_unlock_noirq(&obj->lock); } +static inline int memobj_has_pager(struct memobj *obj) +{ + return !!(obj->flags & MF_HAS_PAGER); +} + int fileobj_create(int fd, struct memobj **objp, int *maxprotp); #endif /* HEADER_MEMOBJ_H */ diff --git a/kernel/syscall.c b/kernel/syscall.c index 6856ab12..8fb35986 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1708,9 +1708,8 @@ SYSCALL_DECLARE(madvise) goto out; } -#define MEMOBJ_IS_FILEOBJ(obj) ((obj) != NULL) - if (!MEMOBJ_IS_FILEOBJ(range->memobj)) { - dkprintf("[%d]sys_madvise(%lx,%lx,%x):not fileobj " + if (!range->memobj || !memobj_has_pager(range->memobj)) { + dkprintf("[%d]sys_madvise(%lx,%lx,%x):has not pager" "[%lx-%lx) %lx\n", ihk_mc_get_processor_id(), start, len0, advice, range->start, From 9057268f0eaa9a3787fba39f2d6525d3e65c33c0 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Thu, 3 Jul 2014 12:11:02 +0900 Subject: [PATCH 4/7] add memobj's default action --- kernel/include/memobj.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index 7a7e99c3..35350c60 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -16,6 +16,7 @@ #include #include #include +#include #include enum { @@ -46,29 +47,42 @@ struct memobj_ops { static inline void memobj_release(struct memobj *obj) { - (*obj->ops->release)(obj); + if (obj->ops->release) { + (*obj->ops->release)(obj); + } } static inline void memobj_ref(struct memobj *obj) { - (*obj->ops->ref)(obj); + if (obj->ops->ref) { + (*obj->ops->ref)(obj); + } } static inline int memobj_get_page(struct memobj *obj, off_t off, int p2align, uintptr_t *physp) { - return (*obj->ops->get_page)(obj, off, p2align, physp); + if (obj->ops->get_page) { + return (*obj->ops->get_page)(obj, off, p2align, physp); + } + return -ENXIO; } static inline uintptr_t memobj_copy_page(struct memobj *obj, uintptr_t orgphys, int p2align) { - return (*obj->ops->copy_page)(obj, orgphys, p2align); + if (obj->ops->copy_page) { + return (*obj->ops->copy_page)(obj, orgphys, p2align); + } + return -ENXIO; } static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t pgsize) { - return (*obj->ops->flush_page)(obj, phys, pgsize); + if (obj->ops->flush_page) { + return (*obj->ops->flush_page)(obj, phys, pgsize); + } + return 0; } static inline void memobj_lock(struct memobj *obj) From 380fcbda7368689d70c78ed1650e7330546c6f68 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Thu, 3 Jul 2014 13:55:26 +0900 Subject: [PATCH 5/7] add shmobj for shared anonymous mappings --- kernel/Makefile.build | 2 +- kernel/include/memobj.h | 2 + kernel/include/shm.h | 49 +++++++ kernel/shmobj.c | 287 ++++++++++++++++++++++++++++++++++++++++ kernel/syscall.c | 26 +++- 5 files changed, 359 insertions(+), 7 deletions(-) create mode 100644 kernel/include/shm.h create mode 100644 kernel/shmobj.c diff --git a/kernel/Makefile.build b/kernel/Makefile.build index 1a7f6438..ded7f58d 100644 --- a/kernel/Makefile.build +++ b/kernel/Makefile.build @@ -1,6 +1,6 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o -OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o +OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index 35350c60..0ba9c838 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -18,6 +18,7 @@ #include #include #include +#include enum { /* for memobj.flags */ @@ -101,5 +102,6 @@ static inline int memobj_has_pager(struct memobj *obj) } int fileobj_create(int fd, struct memobj **objp, int *maxprotp); +int shmobj_create(struct shmid_ds *ds, struct memobj **objp); #endif /* HEADER_MEMOBJ_H */ diff --git a/kernel/include/shm.h b/kernel/include/shm.h new file mode 100644 index 00000000..3117aba5 --- /dev/null +++ b/kernel/include/shm.h @@ -0,0 +1,49 @@ +/** + * \file shm.h + * License details are found in the file LICENSE. + * \brief + * header file for System V shared memory + * \author Gou Nakamura + */ +/* + * HISTORY: + */ + +#ifndef HEADER_SHM_H +#define HEADER_SHM_H + +/* begin types.h */ +typedef int32_t key_t; +typedef uint32_t uid_t; +typedef uint32_t gid_t; +typedef int64_t time_t; +typedef int32_t pid_t; +/* end types.h */ + +typedef uint64_t shmatt_t; + +struct ipc_perm { + key_t key; + uid_t uid; + gid_t gid; + uid_t cuid; + gid_t cgid; + uint16_t mode; + uint8_t padding[2]; + uint16_t seq; + uint8_t padding2[22]; +}; + +struct shmid_ds { + struct ipc_perm shm_perm; + size_t shm_segsz; + time_t shm_atime; + time_t shm_dtime; + time_t shm_ctime; + pid_t shm_cpid; + pid_t shm_lpid; + shmatt_t shm_nattch; + uint8_t padding[16]; +}; + +#endif /* HEADER_SHM_H */ diff --git a/kernel/shmobj.c b/kernel/shmobj.c new file mode 100644 index 00000000..b0c03942 --- /dev/null +++ b/kernel/shmobj.c @@ -0,0 +1,287 @@ +/** + * \file shmobj.c + * License details are found in the file LICENSE. + * \brief + * shared memory object + * \author Gou Nakamura + */ +/* + * HISTORY: + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#define fkprintf(...) kprintf(__VA_ARGS__) + +struct shmobj { + struct memobj memobj; /* must be first */ + long ref; + struct shmid_ds ds; + struct list_head page_list; +}; + +static memobj_release_func_t shmobj_release; +static memobj_ref_func_t shmobj_ref; +static memobj_get_page_func_t shmobj_get_page; + +static struct memobj_ops shmobj_ops = { + .release = &shmobj_release, + .ref = &shmobj_ref, + .get_page = &shmobj_get_page, +}; + +static struct shmobj *to_shmobj(struct memobj *memobj) +{ + return (struct shmobj *)memobj; +} + +static struct memobj *to_memobj(struct shmobj *shmobj) +{ + return &shmobj->memobj; +} + +/*********************************************************************** + * page_list + */ +static void page_list_init(struct shmobj *obj) +{ + INIT_LIST_HEAD(&obj->page_list); + return; +} + +static void page_list_insert(struct shmobj *obj, struct page *page) +{ + list_add(&page->list, &obj->page_list); + return; +} + +static void page_list_remove(struct shmobj *obj, struct page *page) +{ + list_del(&page->list); + return; +} + +static struct page *page_list_lookup(struct shmobj *obj, off_t off) +{ + struct page *page; + + list_for_each_entry(page, &obj->page_list, list) { + if (page->offset == off) { + goto out; + } + } + page = NULL; + +out: + return page; +} + +static struct page *page_list_first(struct shmobj *obj) +{ + if (list_empty(&obj->page_list)) { + return NULL; + } + + return list_first_entry(&obj->page_list, struct page, list); +} + +int shmobj_create(struct shmid_ds *ds, struct memobj **objp) +{ + struct shmobj *obj = NULL; + int error; + + dkprintf("shmobj_create(%p %#lx,%p)\n", ds, ds->shm_segsz, objp); + obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT); + if (!obj) { + error = -ENOMEM; + ekprintf("shmobj_create(%p %#lx,%p):kmalloc failed. %d\n", + ds, ds->shm_segsz, objp, error); + goto out; + } + + memset(obj, 0, sizeof(*obj)); + obj->memobj.ops = &shmobj_ops; + obj->ref = 1; + obj->ds = *ds; + page_list_init(obj); + ihk_mc_spinlock_init(&obj->memobj.lock); + + error = 0; + *objp = to_memobj(obj); + obj = NULL; + +out: + if (obj) { + kfree(obj); + } + dkprintf("shmobj_create(%p %#lx,%p):%d %p\n", + ds, ds->shm_segsz, objp, error, *objp); + return error; +} + +static void shmobj_release(struct memobj *memobj) +{ + struct shmobj *obj = to_shmobj(memobj); + struct shmobj *freeobj = NULL; + + dkprintf("shmobj_release(%p)\n", memobj); + memobj_lock(&obj->memobj); + --obj->ref; + if (obj->ref <= 0) { + if (obj->ref < 0) { + fkprintf("shmobj_release(%p):ref %ld\n", + memobj, obj->ref); + panic("shmobj_release:freeing free shmobj"); + } + freeobj = obj; + } + memobj_unlock(&obj->memobj); + + if (freeobj) { + /* zap page_list */ + for (;;) { + struct page *page; + int count; + + page = page_list_first(obj); + if (!page) { + break; + } + page_list_remove(obj, page); + + dkprintf("shmobj_release(%p):" + "release page. %p %#lx %d %d", + memobj, page, page_to_phys(page), + page->mode, page->count); + count = ihk_atomic_sub_return(1, &page->count); + if (!((page->mode == PM_MAPPED) && (count == 0))) { + fkprintf("shmobj_release(%p): " + "page %p phys %#lx mode %#x" + " count %d off %#lx\n", + memobj, page, + page_to_phys(page), + page->mode, count, + page->offset); + panic("shmobj_release"); + } + + /* XXX:NYI: large pages */ + page->mode = PM_NONE; + free_pages(phys_to_virt(page_to_phys(page)), 1); + } + dkprintf("shmobj_release(%p):free shmobj", memobj); + kfree(freeobj); + } + dkprintf("shmobj_release(%p):\n", memobj); + return; +} + +static void shmobj_ref(struct memobj *memobj) +{ + struct shmobj *obj = to_shmobj(memobj); + long newref; + + dkprintf("shmobj_ref(%p)\n", memobj); + memobj_lock(&obj->memobj); + newref = ++obj->ref; + memobj_unlock(&obj->memobj); + dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref); + return; +} + +static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, + uintptr_t *physp) +{ + struct shmobj *obj = to_shmobj(memobj); + int error; + struct page *page; + int npages; + void *virt = NULL; + uintptr_t phys = -1; + + dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n", + memobj, off, p2align, physp); + memobj_lock(&obj->memobj); + if (off & ~PAGE_MASK) { + error = -EINVAL; + ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */ + error = -ENOMEM; + ekprintf("shmobj_get_page(%p,%#lx,%d,%p):large page. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + if (obj->ds.shm_segsz <= off) { + error = -ERANGE; + ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) { + error = -ENOSPC; + ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + + page = page_list_lookup(obj, off); + if (!page) { + npages = 1 << p2align; + virt = ihk_mc_alloc_pages(npages, IHK_MC_AP_NOWAIT); + if (!virt) { + error = -ENOMEM; + ekprintf("shmobj_get_page(%p,%#lx,%d,%p):" + "alloc failed. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + phys = virt_to_phys(virt); + page = phys_to_page(phys); + if (page->mode != PM_NONE) { + fkprintf("shmobj_get_page(%p,%#lx,%d,%p):" + "page %p %#lx %d %d %#lx\n", + memobj, off, p2align, physp, + page, page_to_phys(page), page->mode, + page->count, page->offset); + panic("shmobj_get_page()"); + } + memset(virt, 0, npages*PAGE_SIZE); + page->mode = PM_MAPPED; + page->offset = off; + ihk_atomic_set(&page->count, 1); + page_list_insert(obj, page); + virt = NULL; + dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n", + memobj, off, p2align, physp, page, phys); + } + + ihk_atomic_inc(&page->count); + + error = 0; + *physp = page_to_phys(page); + +out: + memobj_unlock(&obj->memobj); + if (virt) { + ihk_mc_free_pages(virt, npages); + } + dkprintf("shmobj_get_page(%p,%#lx,%d,%p):%d\n", + memobj, off, p2align, physp, error); + return error; +} diff --git a/kernel/syscall.c b/kernel/syscall.c index 8fb35986..3187f35e 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -45,6 +45,7 @@ #include #include #include +#include /* Headers taken from kitten LWK */ #include @@ -630,12 +631,13 @@ SYSCALL_DECLARE(mmap) const int prot = ihk_mc_syscall_arg2(ctx); const int flags = ihk_mc_syscall_arg3(ctx); const int fd = ihk_mc_syscall_arg4(ctx); - const off_t off = ihk_mc_syscall_arg5(ctx); + const off_t off0 = ihk_mc_syscall_arg5(ctx); struct process *proc = cpu_local_var(current); struct vm_regions *region = &proc->vm->region; intptr_t addr; size_t len; + off_t off; int error; intptr_t npages; int p2align; @@ -646,10 +648,11 @@ SYSCALL_DECLARE(mmap) int maxprot; int denied; int ro_vma_mapped = 0; + struct shmid_ds ads; dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n", ihk_mc_get_processor_id(), - addr0, len0, prot, flags, fd, off); + addr0, len0, prot, flags, fd, off0); /* check constants for flags */ if (1) { @@ -681,9 +684,9 @@ SYSCALL_DECLARE(mmap) || ((region->user_end - len) < addr) || !(flags & (MAP_SHARED | MAP_PRIVATE)) || ((flags & MAP_SHARED) && (flags & MAP_PRIVATE)) - || (off & (PAGE_SIZE - 1))) { + || (off0 & (PAGE_SIZE - 1))) { ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n", - addr0, len0, prot, flags, fd, off); + addr0, len0, prot, flags, fd, off0); error = -EINVAL; goto out2; } @@ -692,7 +695,7 @@ SYSCALL_DECLARE(mmap) if ((flags & error_flags) || (flags & ~(supported_flags | ignored_flags))) { ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):unknown flags %x\n", - addr0, len0, prot, flags, fd, off, + addr0, len0, prot, flags, fd, off0, (flags & ~(supported_flags | ignored_flags))); error = -EINVAL; goto out2; @@ -754,8 +757,10 @@ SYSCALL_DECLARE(mmap) } phys = 0; + off = 0; maxprot = PROT_READ | PROT_WRITE | PROT_EXEC; if (!(flags & MAP_ANONYMOUS)) { + off = off0; error = fileobj_create(fd, &memobj, &maxprot); if (error) { ekprintf("sys_mmap:fileobj_create failed. %d\n", error); @@ -781,6 +786,15 @@ SYSCALL_DECLARE(mmap) } phys = virt_to_phys(p); } + else if (flags & MAP_SHARED) { + memset(&ads, 0, sizeof(ads)); + ads.shm_segsz = len; + error = shmobj_create(&ads, &memobj); + if (error) { + ekprintf("sys_mmap:shmobj_create failed. %d\n", error); + goto out; + } + } if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) { maxprot |= PROT_WRITE; @@ -844,7 +858,7 @@ out2: } dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n", ihk_mc_get_processor_id(), - addr0, len0, prot, flags, fd, off, error, addr); + addr0, len0, prot, flags, fd, off0, error, addr); return (!error)? addr: error; } From 36cff84e05620271fa1b5d56d3c7ccf17017f12b Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Thu, 3 Jul 2014 13:58:05 +0900 Subject: [PATCH 6/7] add zeroobj for private anonymous mappings --- kernel/Makefile.build | 1 + kernel/include/memobj.h | 1 + kernel/syscall.c | 7 ++ kernel/zeroobj.c | 206 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+) create mode 100644 kernel/zeroobj.c diff --git a/kernel/Makefile.build b/kernel/Makefile.build index ded7f58d..49e40193 100644 --- a/kernel/Makefile.build +++ b/kernel/Makefile.build @@ -1,6 +1,7 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o +OBJS += zeroobj.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index 0ba9c838..6bfd2a0a 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -103,5 +103,6 @@ static inline int memobj_has_pager(struct memobj *obj) int fileobj_create(int fd, struct memobj **objp, int *maxprotp); int shmobj_create(struct shmid_ds *ds, struct memobj **objp); +int zeroobj_create(struct memobj **objp); #endif /* HEADER_MEMOBJ_H */ diff --git a/kernel/syscall.c b/kernel/syscall.c index 3187f35e..b0d86109 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -795,6 +795,13 @@ SYSCALL_DECLARE(mmap) goto out; } } + else { + error = zeroobj_create(&memobj); + if (error) { + ekprintf("sys_mmap:zeroobj_create failed. %d\n", error); + goto out; + } + } if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) { maxprot |= PROT_WRITE; diff --git a/kernel/zeroobj.c b/kernel/zeroobj.c new file mode 100644 index 00000000..2305ea6a --- /dev/null +++ b/kernel/zeroobj.c @@ -0,0 +1,206 @@ +/** + * \file zeroobj.c + * License details are found in the file LICENSE. + * \brief + * read-only zeroed page object + * \author Gou Nakamura + */ +/* + * HISTORY: + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#define fkprintf(...) kprintf(__VA_ARGS__) + +struct zeroobj { + struct memobj memobj; /* must be first */ + struct list_head page_list; +}; + +static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED; +static struct zeroobj *the_zeroobj = NULL; /* singleton */ + +static memobj_get_page_func_t zeroobj_get_page; + +static struct memobj_ops zeroobj_ops = { + .get_page = &zeroobj_get_page, +}; + +static struct zeroobj *to_zeroobj(struct memobj *memobj) +{ + return (struct zeroobj *)memobj; +} + +static struct memobj *to_memobj(struct zeroobj *zeroobj) +{ + return &zeroobj->memobj; +} + +/*********************************************************************** + * page_list + */ +static void page_list_init(struct zeroobj *obj) +{ + INIT_LIST_HEAD(&obj->page_list); + return; +} + +static void page_list_insert(struct zeroobj *obj, struct page *page) +{ + list_add(&page->list, &obj->page_list); + return; +} + +static struct page *page_list_first(struct zeroobj *obj) +{ + if (list_empty(&obj->page_list)) { + return NULL; + } + + return list_first_entry(&obj->page_list, struct page, list); +} + +/*********************************************************************** + * zeroobj + */ +static int alloc_zeroobj(void) +{ + int error; + struct zeroobj *obj = NULL; + void *virt = NULL; + uintptr_t phys; + struct page *page; + + dkprintf("alloc_zeroobj()\n"); + ihk_mc_spinlock_lock_noirq(&the_zeroobj_lock); + if (the_zeroobj) { + error = 0; + dkprintf("alloc_zeroobj():already. %d\n", error); + goto out; + } + + obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT); + if (!obj) { + error = -ENOMEM; + ekprintf("alloc_zeroobj():kmalloc failed. %d\n", error); + goto out; + } + + memset(obj, 0, sizeof(*obj)); + obj->memobj.ops = &zeroobj_ops; + page_list_init(obj); + ihk_mc_spinlock_init(&obj->memobj.lock); + + virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */ + if (!virt) { + error = -ENOMEM; + ekprintf("alloc_zeroobj():alloc pages failed. %d\n", error); + goto out; + } + phys = virt_to_phys(virt); + page = phys_to_page(phys); + + if (page->mode != PM_NONE) { + fkprintf("alloc_zeroobj():" + "page %p %#lx %d %d %#lx\n", + page, page_to_phys(page), page->mode, + page->count, page->offset); + panic("alloc_zeroobj:dup alloc"); + } + + memset(virt, 0, PAGE_SIZE); + page->mode = PM_MAPPED; + page->offset = 0; + ihk_atomic_set(&page->count, 1); + page_list_insert(obj, page); + virt = NULL; + + error = 0; + the_zeroobj = obj; + obj = NULL; + +out: + ihk_mc_spinlock_unlock_noirq(&the_zeroobj_lock); + if (virt) { + ihk_mc_free_pages(virt, 1); + } + if (obj) { + kfree(obj); + } + dkprintf("alloc_zeroobj():%d %p\n", error, the_zeroobj); + return error; +} + +int zeroobj_create(struct memobj **objp) +{ + int error; + + dkprintf("zeroobj_create(%p)\n", objp); + if (!the_zeroobj) { + error = alloc_zeroobj(); + if (error) { + goto out; + } + } + + error = 0; + *objp = to_memobj(the_zeroobj); + +out: + dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp); + return error; +} + +static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align, + uintptr_t *physp) +{ + int error; + struct zeroobj *obj = to_zeroobj(memobj); + struct page *page; + + dkprintf("zeroobj_get_page(%p,%#lx,%d,%p)\n", + memobj, off, p2align, physp); + if (off & ~PAGE_MASK) { + error = -EINVAL; + ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */ + error = -ENOMEM; + ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):large page. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + + page = page_list_first(obj); + if (!page) { + error = -ENOMEM; + ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):page not found. %d\n", + memobj, off, p2align, physp, error); + goto out; + } + + ihk_atomic_inc(&page->count); + + error = 0; + *physp = page_to_phys(page); + +out: + dkprintf("zeroobj_get_page(%p,%#lx,%d,%p):%d\n", + memobj, off, p2align, physp, error); + return error; +} From 31a605f94b9ed20ff849214730697f9fab00fe02 Mon Sep 17 00:00:00 2001 From: "Balazs Gerofi bgerofi@riken.jp" Date: Thu, 10 Jul 2014 13:53:12 +0900 Subject: [PATCH 7/7] push/pop r15 when entering/leaving kernel space (fix for bug #53: r15 wasn't propagated during fork()) --- arch/x86/kernel/include/registers.h | 2 +- arch/x86/kernel/interrupt.S | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/include/registers.h b/arch/x86/kernel/include/registers.h index 97889b6e..0e9dca07 100644 --- a/arch/x86/kernel/include/registers.h +++ b/arch/x86/kernel/include/registers.h @@ -136,7 +136,7 @@ struct tss64 { } __attribute__((packed)); struct x86_regs { - unsigned long r11, r10, r9, r8; + unsigned long r15, r11, r10, r9, r8; unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp; unsigned long error, rip, cs, rflags, rsp, ss; }; diff --git a/arch/x86/kernel/interrupt.S b/arch/x86/kernel/interrupt.S index e814afa4..d0a0838b 100644 --- a/arch/x86/kernel/interrupt.S +++ b/arch/x86/kernel/interrupt.S @@ -35,8 +35,10 @@ pushq %r8; \ pushq %r9; \ pushq %r10; \ - pushq %r11; + pushq %r11; \ + pushq %r15; #define POP_ALL_REGS \ + popq %r15; \ popq %r11; \ popq %r10; \ popq %r9; \ @@ -67,7 +69,7 @@ vector=vector+1 common_interrupt: PUSH_ALL_REGS - movq 88(%rsp), %rdi + movq 96(%rsp), %rdi movq %rsp, %rsi call handle_interrupt /* Enter C code */ POP_ALL_REGS @@ -83,7 +85,7 @@ page_fault: cld PUSH_ALL_REGS movq %cr2, %rdi - movq 88(%rsp),%rsi + movq 96(%rsp),%rsi movq %rsp,%rdx movq __page_fault_handler_address(%rip), %rax andq %rax, %rax @@ -120,13 +122,13 @@ x86_syscall: movq %gs:24, %rcx movq %rcx, 32(%rsp) PUSH_ALL_REGS - movq 72(%rsp), %rdi + movq 80(%rsp), %rdi movw %ss, %ax movw %ax, %ds movq %rsp, %rsi callq *__x86_syscall_handler(%rip) 1: - movq %rax, 72(%rsp) + movq %rax, 80(%rsp) POP_ALL_REGS #ifdef USE_SYSRET movq 8(%rsp), %rcx