diff --git a/arch/x86/kernel/include/arch/shm.h b/arch/x86/kernel/include/arch/shm.h new file mode 100644 index 00000000..cc8d1e0a --- /dev/null +++ b/arch/x86/kernel/include/arch/shm.h @@ -0,0 +1,40 @@ +/** + * \file shm.h + * License details are found in the file LICENSE. + * \brief + * header file for System V shared memory + * \author Gou Nakamura \par + * Copyright (C) 2015 RIKEN-AICS + */ +/* + * HISTORY: + */ + +#ifndef HEADER_ARCH_SHM_H +#define HEADER_ARCH_SHM_H + +struct ipc_perm { + key_t key; + uid_t uid; + gid_t gid; + uid_t cuid; + gid_t cgid; + uint16_t mode; + uint8_t padding[2]; + uint16_t seq; + uint8_t padding2[22]; +}; + +struct shmid_ds { + struct ipc_perm shm_perm; + size_t shm_segsz; + time_t shm_atime; + time_t shm_dtime; + time_t shm_ctime; + pid_t shm_cpid; + pid_t shm_lpid; + uint64_t shm_nattch; + uint8_t padding[16]; +}; + +#endif /* HEADER_ARCH_SHM_H */ diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index acd966f4..d11184d9 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -43,6 +43,9 @@ SYSCALL_HANDLED(24, sched_yield) SYSCALL_HANDLED(25, mremap) SYSCALL_HANDLED(26, msync) SYSCALL_HANDLED(28, madvise) +SYSCALL_HANDLED(29, shmget) +SYSCALL_HANDLED(30, shmat) +SYSCALL_HANDLED(31, shmctl) SYSCALL_HANDLED(34, pause) SYSCALL_HANDLED(39, getpid) SYSCALL_HANDLED(56, clone) @@ -53,6 +56,7 @@ SYSCALL_HANDLED(60, exit) SYSCALL_HANDLED(61, wait4) SYSCALL_HANDLED(62, kill) SYSCALL_DELEGATED(63, uname) +SYSCALL_HANDLED(67, shmdt) SYSCALL_DELEGATED(72, fcntl) SYSCALL_DELEGATED(79, getcwd) SYSCALL_DELEGATED(89, readlink) diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index beb63deb..34347c6f 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -18,11 +18,19 @@ #include #include #include -#include + +/* begin types.h */ +typedef int32_t key_t; +typedef uint32_t uid_t; +typedef uint32_t gid_t; +typedef int64_t time_t; +typedef int32_t pid_t; +/* end types.h */ enum { /* for memobj.flags */ MF_HAS_PAGER = 0x0001, + MF_SHMDT_OK = 0x0002, }; struct memobj { @@ -113,6 +121,7 @@ static inline int memobj_has_pager(struct memobj *obj) } int fileobj_create(int fd, struct memobj **objp, int *maxprotp); +struct shmid_ds; int shmobj_create(struct shmid_ds *ds, struct memobj **objp); int zeroobj_create(struct memobj **objp); int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp); diff --git a/kernel/include/shm.h b/kernel/include/shm.h index 3117aba5..b6a146ab 100644 --- a/kernel/include/shm.h +++ b/kernel/include/shm.h @@ -12,38 +12,71 @@ #ifndef HEADER_SHM_H #define HEADER_SHM_H -/* begin types.h */ -typedef int32_t key_t; -typedef uint32_t uid_t; -typedef uint32_t gid_t; -typedef int64_t time_t; -typedef int32_t pid_t; -/* end types.h */ +#include +#include +#include -typedef uint64_t shmatt_t; +enum { + /* for key_t */ + IPC_PRIVATE = 0, -struct ipc_perm { - key_t key; - uid_t uid; - gid_t gid; - uid_t cuid; - gid_t cgid; - uint16_t mode; - uint8_t padding[2]; - uint16_t seq; - uint8_t padding2[22]; + /* for shmflg */ + IPC_CREAT = 01000, + IPC_EXCL = 02000, + + SHM_RDONLY = 010000, + SHM_RND = 020000, + SHM_REMAP = 040000, + SHM_EXEC = 0100000, + + /* for shm_mode */ + SHM_DEST = 01000, + SHM_LOCKED = 02000, + + /* for cmd of shmctl() */ + IPC_RMID = 0, + IPC_SET = 1, + IPC_STAT = 2, + IPC_INFO = 3, + + SHM_LOCK = 11, + SHM_UNLOCK = 12, + SHM_STAT = 13, + SHM_INFO = 14, }; -struct shmid_ds { - struct ipc_perm shm_perm; - size_t shm_segsz; - time_t shm_atime; - time_t shm_dtime; - time_t shm_ctime; - pid_t shm_cpid; - pid_t shm_lpid; - shmatt_t shm_nattch; - uint8_t padding[16]; +struct shmobj { + struct memobj memobj; /* must be first */ + int index; + uint8_t padding[4]; + size_t real_segsz; + struct shmid_ds ds; + struct list_head page_list; + struct list_head chain; /* shmobj_list */ }; +struct shminfo { + uint64_t shmmax; + uint64_t shmmin; + uint64_t shmmni; + uint64_t shmseg; + uint64_t shmall; + uint8_t padding[32]; +}; + +struct shm_info { + int32_t used_ids; + uint8_t padding[4]; + uint64_t shm_tot; + uint64_t shm_rss; + uint64_t shm_swp; + uint64_t swap_attempts; + uint64_t swap_successes; +}; + +void shmobj_list_lock(void); +void shmobj_list_unlock(void); +int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp); +void shmobj_destroy(struct shmobj *obj); + #endif /* HEADER_SHM_H */ diff --git a/kernel/process.c b/kernel/process.c index 4a1e5ded..fbc3ec40 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -454,6 +454,15 @@ static int copy_user_ranges(struct process *proc, struct process *org) vaddr += PAGE_SIZE; continue; } + if (1) { + struct page *page; + + page = phys_to_page(pte_get_phys(ptep)); + if (page && page_is_in_memobj(page)) { + vaddr += PAGE_SIZE; + continue; + } + } dkprintf("copy_user_ranges(): 0x%lx PTE found\n", vaddr); diff --git a/kernel/shmobj.c b/kernel/shmobj.c index f989c5f6..aa88232d 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -26,12 +26,8 @@ #define ekprintf(...) kprintf(__VA_ARGS__) #define fkprintf(...) kprintf(__VA_ARGS__) -struct shmobj { - struct memobj memobj; /* must be first */ - long ref; - struct shmid_ds ds; - struct list_head page_list; -}; +static LIST_HEAD(shmobj_list_head); +static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED; static memobj_release_func_t shmobj_release; static memobj_ref_func_t shmobj_ref; @@ -98,6 +94,25 @@ static struct page *page_list_first(struct shmobj *obj) return list_first_entry(&obj->page_list, struct page, list); } +/*********************************************************************** + * shmobj_list + */ +void shmobj_list_lock(void) +{ + ihk_mc_spinlock_lock_noirq(&shmobj_list_lock_body); + return; +} + +void shmobj_list_unlock(void) +{ + ihk_mc_spinlock_unlock_noirq(&shmobj_list_lock_body); + return; +} + +/*********************************************************************** + * operations + */ +int the_seq = 0; int shmobj_create(struct shmid_ds *ds, struct memobj **objp) { struct shmobj *obj = NULL; @@ -114,8 +129,11 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp) memset(obj, 0, sizeof(*obj)); obj->memobj.ops = &shmobj_ops; - obj->ref = 1; obj->ds = *ds; + obj->ds.shm_perm.seq = the_seq++; + obj->ds.shm_nattch = 1; + obj->index = -1; + obj->real_segsz = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK; page_list_init(obj); ihk_mc_spinlock_init(&obj->memobj.lock); @@ -127,65 +145,124 @@ out: if (obj) { kfree(obj); } - dkprintf("shmobj_create(%p %#lx,%p):%d %p\n", + dkprintf("shmobj_create_indexed(%p %#lx,%p):%d %p\n", ds, ds->shm_segsz, objp, error, *objp); return error; } +int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp) +{ + int error; + struct memobj *obj; + + error = shmobj_create(ds, &obj); + if (!error) { + obj->flags |= MF_SHMDT_OK; + *objp = to_shmobj(obj); + } + return error; +} + +void shmobj_destroy(struct shmobj *obj) +{ + extern struct shm_info the_shm_info; + extern struct list_head kds_free_list; + extern int the_maxi; + + dkprintf("shmobj_destroy(%p [%d %o])\n", obj, obj->index, obj->ds.shm_perm.mode); + /* zap page_list */ + for (;;) { + struct page *page; + int count; + + page = page_list_first(obj); + if (!page) { + break; + } + page_list_remove(obj, page); + + dkprintf("shmobj_destroy(%p):" + "release page. %p %#lx %d %d", + obj, page, page_to_phys(page), + page->mode, page->count); + count = ihk_atomic_sub_return(1, &page->count); + if (!((page->mode == PM_MAPPED) && (count == 0))) { + fkprintf("shmobj_destroy(%p): " + "page %p phys %#lx mode %#x" + " count %d off %#lx\n", + obj, page, + page_to_phys(page), + page->mode, count, + page->offset); + panic("shmobj_release"); + } + + /* XXX:NYI: large pages */ + page->mode = PM_NONE; + free_pages(phys_to_virt(page_to_phys(page)), 1); + } + if (obj->index < 0) { + kfree(obj); + } + else { + list_del(&obj->chain); + --the_shm_info.used_ids; + + list_add(&obj->chain, &kds_free_list); + for (;;) { + struct shmobj *p; + + list_for_each_entry(p, &kds_free_list, chain) { + if (p->index == the_maxi) { + break; + } + } + if (&p->chain == &kds_free_list) { + break; + } + + list_del(&p->chain); + kfree(p); + --the_maxi; + } + } + return; +} + static void shmobj_release(struct memobj *memobj) { struct shmobj *obj = to_shmobj(memobj); struct shmobj *freeobj = NULL; + long newref; + extern time_t time(void); + extern pid_t getpid(void); dkprintf("shmobj_release(%p)\n", memobj); memobj_lock(&obj->memobj); - --obj->ref; - if (obj->ref <= 0) { - if (obj->ref < 0) { + if (obj->index >= 0) { + obj->ds.shm_dtime = time(); + obj->ds.shm_lpid = getpid(); + dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch); + } + newref = --obj->ds.shm_nattch; + if (newref <= 0) { + if (newref < 0) { fkprintf("shmobj_release(%p):ref %ld\n", - memobj, obj->ref); + memobj, newref); panic("shmobj_release:freeing free shmobj"); } - freeobj = obj; + if (obj->ds.shm_perm.mode & SHM_DEST) { + freeobj = obj; + } } memobj_unlock(&obj->memobj); if (freeobj) { - /* zap page_list */ - for (;;) { - struct page *page; - int count; - - page = page_list_first(obj); - if (!page) { - break; - } - page_list_remove(obj, page); - - dkprintf("shmobj_release(%p):" - "release page. %p %#lx %d %d", - memobj, page, page_to_phys(page), - page->mode, page->count); - count = ihk_atomic_sub_return(1, &page->count); - if (!((page->mode == PM_MAPPED) && (count == 0))) { - fkprintf("shmobj_release(%p): " - "page %p phys %#lx mode %#x" - " count %d off %#lx\n", - memobj, page, - page_to_phys(page), - page->mode, count, - page->offset); - panic("shmobj_release"); - } - - /* XXX:NYI: large pages */ - page->mode = PM_NONE; - free_pages(phys_to_virt(page_to_phys(page)), 1); - } - dkprintf("shmobj_release(%p):free shmobj", memobj); - kfree(freeobj); + shmobj_list_lock(); + shmobj_destroy(freeobj); + shmobj_list_unlock(); } - dkprintf("shmobj_release(%p):\n", memobj); + dkprintf("shmobj_release(%p): %ld\n", memobj, newref); return; } @@ -193,10 +270,16 @@ static void shmobj_ref(struct memobj *memobj) { struct shmobj *obj = to_shmobj(memobj); long newref; + extern time_t time(void); + extern pid_t getpid(void); dkprintf("shmobj_ref(%p)\n", memobj); memobj_lock(&obj->memobj); - newref = ++obj->ref; + newref = ++obj->ds.shm_nattch; + if (obj->index >= 0) { + obj->ds.shm_atime = time(); + obj->ds.shm_lpid = getpid(); + } memobj_unlock(&obj->memobj); dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref); return; @@ -227,13 +310,13 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, memobj, off, p2align, physp, error); goto out; } - if (obj->ds.shm_segsz <= off) { + if (obj->real_segsz <= off) { error = -ERANGE; ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n", memobj, off, p2align, physp, error); goto out; } - if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) { + if ((obj->real_segsz - off) < (PAGE_SIZE << p2align)) { error = -ENOSPC; ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n", memobj, off, p2align, physp, error); diff --git a/kernel/syscall.c b/kernel/syscall.c index c69355f5..572baebd 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1123,6 +1123,7 @@ SYSCALL_DECLARE(mmap) else if (flags & MAP_SHARED) { memset(&ads, 0, sizeof(ads)); ads.shm_segsz = len; + ads.shm_perm.mode = SHM_DEST; error = shmobj_create(&ads, &memobj); if (error) { ekprintf("sys_mmap:shmobj_create failed. %d\n", error); @@ -2644,6 +2645,590 @@ out2: return error; } +struct kshmid_ds { + int destroy; + int padding; + struct shmobj *obj; + struct memobj *memobj; + struct list_head chain; +}; + +int the_maxi = -1; +LIST_HEAD(kds_list); +LIST_HEAD(kds_free_list); +struct shminfo the_shminfo = { + .shmmax = 64L * 1024 * 1024 * 1024, + .shmmin = 1, + .shmmni = 4 * 1024, + .shmall = 4L * 1024 * 1024 * 1024, +}; +struct shm_info the_shm_info = { 0, }; + +static uid_t geteuid(void) { + struct syscall_request sreq IHK_DMA_ALIGN; + struct process *proc = cpu_local_var(current); + + sreq.number = __NR_geteuid; + return (uid_t)do_syscall(&sreq, ihk_mc_get_processor_id(), proc->ftn->pid); +} + +static gid_t getegid(void) { + struct syscall_request sreq IHK_DMA_ALIGN; + struct process *proc = cpu_local_var(current); + + sreq.number = __NR_getegid; + return (gid_t)do_syscall(&sreq, ihk_mc_get_processor_id(), proc->ftn->pid); +} + +time_t time(void) { + struct syscall_request sreq IHK_DMA_ALIGN; + struct process *proc = cpu_local_var(current); + + sreq.number = __NR_time; + sreq.args[0] = (uintptr_t)NULL; + return (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), proc->ftn->pid); +} + +pid_t getpid(void) { + struct process *proc = cpu_local_var(current); + + return proc->ftn->pid; +} + +static int make_shmid(struct shmobj *obj) +{ + return ((int)obj->index << 16) | obj->ds.shm_perm.seq; +} /* make_shmid() */ + +static int shmid_to_index(int shmid) +{ + return (shmid >> 16); +} /* shmid_to_index() */ + +static int shmid_to_seq(int shmid) +{ + return (shmid & ((1 << 16) - 1)); +} /* shmid_to_seq() */ + +int shmobj_list_lookup(int shmid, struct shmobj **objp) +{ + int index; + int seq; + struct shmobj *obj; + + index = shmid_to_index(shmid); + seq = shmid_to_seq(shmid); + + list_for_each_entry(obj, &kds_list, chain) { + if (obj->index == index) { + break; + } + } + if (&obj->chain == &kds_list) { + return -EINVAL; + } + if (obj->ds.shm_perm.seq != seq) { + return -EIDRM; + } + + *objp = obj; + return 0; +} /* shmobj_list_lookup() */ + +int shmobj_list_lookup_by_key(key_t key, struct shmobj **objp) +{ + struct shmobj *obj; + + list_for_each_entry(obj, &kds_list, chain) { + if (obj->ds.shm_perm.key == key) { + break; + } + } + if (&obj->chain == &kds_list) { + return -EINVAL; + } + + *objp = obj; + return 0; +} /* shmobj_list_lookup_by_key() */ + +int shmobj_list_lookup_by_index(int index, struct shmobj **objp) +{ + struct shmobj *obj; + + list_for_each_entry(obj, &kds_list, chain) { + if (obj->index == index) { + break; + } + } + if (&obj->chain == &kds_list) { + return -EINVAL; + } + + *objp = obj; + return 0; +} /* shmobj_list_lookup_by_index() */ + +SYSCALL_DECLARE(shmget) +{ + const key_t key = ihk_mc_syscall_arg0(ctx); + const size_t size = ihk_mc_syscall_arg1(ctx); + const int shmflg = ihk_mc_syscall_arg2(ctx); + uid_t euid = geteuid(); + gid_t egid = getegid(); + time_t now = time(); + struct process *proc = cpu_local_var(current); + int shmid; + int error; + struct shmid_ds ads; + struct shmobj *obj; + + dkprintf("shmget(%#lx,%#lx,%#x)\n", key, size, shmflg); + + if (size < the_shminfo.shmmin) { + dkprintf("shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); + return -EINVAL; + } + + shmobj_list_lock(); + obj = NULL; + if (key != IPC_PRIVATE) { + error = shmobj_list_lookup_by_key(key, &obj); + if (error == -EINVAL) { + obj = NULL; + } + else if (error) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): lookup: %d\n", key, size, shmflg, error); + return error; + } + if (!obj && !(shmflg & IPC_CREAT)) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): -ENOENT\n", key, size, shmflg); + return -ENOENT; + } + if (obj && (shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): -EEXIST\n", key, size, shmflg); + return -EEXIST; + } + } + + if (obj) { + if (euid) { + int req; + + req = (shmflg | (shmflg << 3) | (shmflg << 6)) & 0700; + if ((obj->ds.shm_perm.uid == euid) + || (obj->ds.shm_perm.cuid == euid)) { + /* nothing to do */ + } + else if ((obj->ds.shm_perm.gid == egid) + || (obj->ds.shm_perm.cgid == egid)) { + /* + * XXX: need to check supplementary group IDs + */ + req >>= 3; + } + else { + req >>= 6; + } + if (req & ~obj->ds.shm_perm.mode) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); + return -EACCES; + } + } + if (obj->ds.shm_segsz < size) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); + return -EINVAL; + } + shmid = make_shmid(obj); + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): %d\n", key, size, shmflg, shmid); + return shmid; + } + + if (the_shm_info.used_ids >= the_shminfo.shmmni) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): -ENOSPC\n", key, size, shmflg); + return -ENOSPC; + } + + memset(&ads, 0, sizeof(ads)); + ads.shm_perm.key = key; + ads.shm_perm.uid = euid; + ads.shm_perm.cuid = euid; + ads.shm_perm.gid = egid; + ads.shm_perm.cgid = egid; + ads.shm_perm.mode = shmflg & 0777; + ads.shm_segsz = size; + ads.shm_ctime = now; + ads.shm_cpid = proc->ftn->pid; + + error = shmobj_create_indexed(&ads, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmget(%#lx,%#lx,%#x): shmobj_create: %d\n", key, size, shmflg, error); + return error; + } + + obj->index = ++the_maxi; + + list_add(&obj->chain, &kds_list); + ++the_shm_info.used_ids; + + shmid = make_shmid(obj); + shmobj_list_unlock(); + memobj_release(&obj->memobj); + + dkprintf("shmget(%#lx,%#lx,%#x): %d\n", key, size, shmflg, shmid); + return shmid; +} /* sys_shmget() */ + +SYSCALL_DECLARE(shmat) +{ + const int shmid = ihk_mc_syscall_arg0(ctx); + void * const shmaddr = (void *)ihk_mc_syscall_arg1(ctx); + const int shmflg = ihk_mc_syscall_arg2(ctx); + struct process *proc = cpu_local_var(current); + size_t len; + int error; + struct vm_regions *region = &proc->vm->region; + intptr_t addr; + int prot; + int vrflags; + int req; + uid_t euid = geteuid(); + gid_t egid = getegid(); + struct shmobj *obj; + + dkprintf("shmat(%#x,%p,%#x)\n", shmid, shmaddr, shmflg); + + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x): lookup: %d\n", shmid, shmaddr, shmflg, error); + return error; + } + + if (shmaddr && ((uintptr_t)shmaddr & (PAGE_SIZE - 1)) && !(shmflg & SHM_RND)) { + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg); + return -EINVAL; + } + addr = (uintptr_t)shmaddr & PAGE_MASK; + len = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK; + + prot = PROT_READ; + req = 4; + if (!(shmflg & SHM_RDONLY)) { + prot |= PROT_WRITE; + req |= 2; + } + + if (!euid) { + req = 0; + } + else if ((euid == obj->ds.shm_perm.uid) || (euid == obj->ds.shm_perm.cuid)) { + req <<= 6; + } + else if ((egid == obj->ds.shm_perm.gid) || (egid == obj->ds.shm_perm.cgid)) { + req <<= 3; + } + else { + req <<= 0; + } + if (~obj->ds.shm_perm.mode & req) { + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg); + return -EACCES; + } + + ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + + if (addr) { + if (lookup_process_memory_range(proc->vm, addr, addr+len)) { + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x):lookup_process_memory_range succeeded. -ENOMEM\n", shmid, shmaddr, shmflg); + return -ENOMEM; + } + } + else { + error = search_free_space(len, region->map_end, &addr); + if (error) { + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x):search_free_space failed. %d\n", shmid, shmaddr, shmflg, error); + return error; + } + region->map_end = addr + len; + } + + vrflags = VR_NONE; + vrflags |= VR_DEMAND_PAGING; + vrflags |= PROT_TO_VR_FLAG(prot); + vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); + + if (!(prot & PROT_WRITE)) { + error = set_host_vma(addr, len, PROT_READ); + if (error) { + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x):set_host_vma failed. %d\n", shmid, shmaddr, shmflg, error); + return error; + } + } + + memobj_ref(&obj->memobj); + + error = add_process_memory_range(proc, addr, addr+len, -1, vrflags, &obj->memobj, 0); + if (error) { + if (!(prot & PROT_WRITE)) { + (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); + } + memobj_release(&obj->memobj); + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + shmobj_list_unlock(); + dkprintf("shmat(%#x,%p,%#x):add_process_memory_range failed. %d\n", shmid, shmaddr, shmflg, error); + return error; + } + + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + shmobj_list_unlock(); + + dkprintf("shmat:bump shm_nattach %p %d\n", obj, obj->ds.shm_nattch); + dkprintf("shmat(%#x,%p,%#x): 0x%lx. %d\n", shmid, shmaddr, shmflg, addr); + return addr; +} /* sys_shmat() */ + +SYSCALL_DECLARE(shmctl) +{ + const int shmid = ihk_mc_syscall_arg0(ctx); + const int cmd = ihk_mc_syscall_arg1(ctx); + struct shmid_ds * const buf = (void *)ihk_mc_syscall_arg2(ctx); + int error; + struct shmid_ds ads; + uid_t euid = geteuid(); + gid_t egid = getegid(); + time_t now = time(); + int req; + int maxi; + struct shmobj *obj; + + dkprintf("shmctl(%#x,%d,%p)\n", shmid, cmd, buf); + if (0) ; + else if (cmd == IPC_RMID) { + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + if ((obj->ds.shm_perm.uid != euid) + && (obj->ds.shm_perm.cuid != euid)) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf); + return -EPERM; + } + obj->ds.shm_perm.mode |= SHM_DEST; + if (obj->ds.shm_nattch <= 0) { + shmobj_destroy(obj); + } + shmobj_list_unlock(); + + dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); + return 0; + } + else if (cmd == IPC_SET) { + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + if ((obj->ds.shm_perm.uid != euid) + && (obj->ds.shm_perm.cuid != euid)) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf); + return -EPERM; + } + error = copy_from_user(&ads, buf, sizeof(ads)); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); + return error; + } + obj->ds.shm_perm.uid = ads.shm_perm.uid; + obj->ds.shm_perm.gid = ads.shm_perm.gid; + obj->ds.shm_perm.mode &= ~0777; + obj->ds.shm_perm.mode |= ads.shm_perm.mode & 0777; + obj->ds.shm_ctime = now; + + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); + return 0; + } + else if (cmd == IPC_STAT) { + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + if (!euid) { + req = 0; + } + else if ((euid == obj->ds.shm_perm.uid) || (euid == obj->ds.shm_perm.cuid)) { + req = 0400; + } + else if ((egid == obj->ds.shm_perm.gid) || (egid == obj->ds.shm_perm.cgid)) { + req = 0040; + } + else { + req = 0004; + } + if (req & ~obj->ds.shm_perm.mode) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid, cmd, buf); + return -EACCES; + } + error = copy_to_user(buf, &obj->ds, sizeof(*buf)); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); + return error; + } + + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); + return 0; + } + else if (cmd == IPC_INFO) { + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + error = copy_to_user(buf, &the_shminfo, sizeof(the_shminfo)); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); + return error; + } + + maxi = the_maxi; + if (maxi < 0) { + maxi = 0; + } + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi); + return maxi; + } + else if (cmd == SHM_LOCK) { + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + obj->ds.shm_perm.mode |= SHM_LOCKED; + shmobj_list_unlock(); + + dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); + return 0; + } + else if (cmd == SHM_UNLOCK) { + shmobj_list_lock(); + error = shmobj_list_lookup(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + obj->ds.shm_perm.mode &= ~SHM_LOCKED; + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); + return 0; + } + else if (cmd == SHM_STAT) { + shmobj_list_lock(); + error = shmobj_list_lookup_by_index(shmid, &obj); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); + return error; + } + error = copy_to_user(buf, &obj->ds, sizeof(*buf)); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); + return error; + } + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); + return 0; + } + else if (cmd == SHM_INFO) { + shmobj_list_lock(); + error = copy_to_user(buf, &the_shm_info, sizeof(the_shm_info)); + if (error) { + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); + return error; + } + + maxi = the_maxi; + if (maxi < 0) { + maxi = 0; + } + shmobj_list_unlock(); + dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi); + return maxi; + } + + dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf); + return -EINVAL; +} /* sys_shmctl() */ + +SYSCALL_DECLARE(shmdt) +{ + void * const shmaddr = (void *)ihk_mc_syscall_arg0(ctx); + struct process *proc = cpu_local_var(current); + struct vm_range *range; + int error; + + dkprintf("shmdt(%p)\n", shmaddr); + ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + range = lookup_process_memory_range(proc->vm, (uintptr_t)shmaddr, (uintptr_t)shmaddr+1); + if (!range || (range->start != (uintptr_t)shmaddr) || !range->memobj + || !(range->memobj->flags & MF_SHMDT_OK)) { + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + dkprintf("shmdt(%p): -EINVAL\n", shmaddr); + return -EINVAL; + } + + error = do_munmap((void *)range->start, (range->end - range->start)); + if (error) { + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + dkprintf("shmdt(%p): %d\n", shmaddr, error); + return error; + } + + ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + dkprintf("shmdt(%p): 0\n", shmaddr); + return 0; +} /* sys_shmdt() */ + SYSCALL_DECLARE(futex) { uint64_t timeout = 0; // No timeout