diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 21f1b549..a8bbb60b 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -1109,7 +1109,7 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u struct pager { struct list_head list; struct inode * inode; - int ref; + uint64_t ref; /* needs same type as fileobj->sref */ struct file * rofile; struct file * rwfile; uintptr_t map_uaddr; @@ -1411,14 +1411,14 @@ out: return error; } -static int pager_req_release(ihk_os_t os, uintptr_t handle, int unref) +static int pager_req_release(ihk_os_t os, uintptr_t handle, uint64_t sref) { int error; struct pager *p; struct pager *free_pager = NULL; unsigned long flags; - dprintk("pager_req_relase(%p,%lx,%d)\n", os, handle, unref); + dprintk("%s(%p,%lx)\n", __func__, os, handle); spin_lock_irqsave(&pager_lock, flags); @@ -1426,11 +1426,11 @@ static int pager_req_release(ihk_os_t os, uintptr_t handle, int unref) list_for_each_entry(p, &pager_list, list) { if ((uintptr_t)p == handle) { error = 0; - p->ref -= unref; - if (p->ref <= 0) { - list_del(&p->list); - free_pager = p; - } + p->ref -= sref; + if (p->ref > 0) + break; + list_del(&p->list); + free_pager = p; break; } } @@ -1438,7 +1438,8 @@ static int pager_req_release(ihk_os_t os, uintptr_t handle, int unref) spin_unlock_irqrestore(&pager_lock, flags); if (error) { - printk("pager_req_release(%p,%lx,%d):pager not found. %d\n", os, handle, unref, error); + pr_err("%s(%p,%lx):pager not found. %d\n", + __func__, os, handle, error); goto out; } @@ -1454,7 +1455,7 @@ static int pager_req_release(ihk_os_t os, uintptr_t handle, int unref) error = 0; out: - dprintk("pager_req_release(%p,%lx,%d): %d\n", os, handle, unref, error); + dprintk("%s(%p,%lx): %d\n", __func__, os, handle, error); return error; } diff --git a/kernel/devobj.c b/kernel/devobj.c index f280ac05..7898a725 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -52,16 +52,15 @@ struct devobj { uintptr_t handle; off_t pfn_pgoff; uintptr_t * pfn_table; + ihk_spinlock_t pfn_table_lock; size_t npages; }; -static memobj_release_func_t devobj_release; -static memobj_ref_func_t devobj_ref; +static memobj_free_func_t devobj_free; static memobj_get_page_func_t devobj_get_page; static struct memobj_ops devobj_ops = { - .release = &devobj_release, - .ref = &devobj_ref, + .free = &devobj_free, .get_page = &devobj_get_page, }; @@ -132,6 +131,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp obj->memobj.ops = &devobj_ops; obj->memobj.flags = MF_HAS_PAGER | MF_DEV_FILE; obj->memobj.size = len; + ihk_atomic_set(&obj->memobj.refcnt, 1); obj->handle = result.handle; dkprintf("%s: path=%s\n", __FUNCTION__, result.path); @@ -145,10 +145,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp strncpy(obj->memobj.path, result.path, PATH_MAX); } - obj->ref = 1; obj->pfn_pgoff = off >> PAGE_SHIFT; obj->npages = npages; - ihk_mc_spinlock_init(&obj->memobj.lock); + ihk_mc_spinlock_init(&obj->pfn_table_lock); error = 0; *objp = to_memobj(obj); @@ -167,68 +166,44 @@ out: return error; } -static void devobj_ref(struct memobj *memobj) +static void devobj_free(struct memobj *memobj) { struct devobj *obj = to_devobj(memobj); - - dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle); - memobj_lock(&obj->memobj); - ++obj->ref; - memobj_unlock(&obj->memobj); - return; -} - -static void devobj_release(struct memobj *memobj) -{ - struct devobj *obj = to_devobj(memobj); - struct devobj *free_obj = NULL; uintptr_t handle; const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); const size_t pfn_npages = (obj->npages + uintptr_per_page - 1) / uintptr_per_page; + int error; + ihk_mc_user_context_t ctx; - dkprintf("devobj_release(%p %lx)\n", obj, obj->handle); + dkprintf("%s(%p %lx)\n", __func__, obj, obj->handle); - memobj_lock(&obj->memobj); - --obj->ref; - if (obj->ref <= 0) { - free_obj = obj; - } handle = obj->handle; - memobj_unlock(&obj->memobj); - if (free_obj) { - if (!(free_obj->memobj.flags & MF_HOST_RELEASED)) { - int error; - ihk_mc_user_context_t ctx; + ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP; + ihk_mc_syscall_arg1(&ctx) = handle; + ihk_mc_syscall_arg2(&ctx) = 1; - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP; - ihk_mc_syscall_arg1(&ctx) = handle; - ihk_mc_syscall_arg2(&ctx) = 1; - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error) { - kprintf("devobj_release(%p %lx):" - "release failed. %d\n", - free_obj, handle, error); - /* through */ - } - } - - if (obj->pfn_table) { - // Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory - ihk_mc_free_pages(obj->pfn_table, pfn_npages); - } - - if (to_memobj(free_obj)->path) { - kfree(to_memobj(free_obj)->path); - } - - kfree(free_obj); + error = syscall_generic_forwarding(__NR_mmap, &ctx); + if (error) { + kprintf("%s(%p %lx): release failed. %d\n", + __func__, obj, handle, error); + /* through */ } - dkprintf("devobj_release(%p %lx):free %p\n", - obj, handle, free_obj); + if (obj->pfn_table) { + // Don't call memory_stat_rss_sub() because devobj related + // pages don't reside in main memory + ihk_mc_free_pages(obj->pfn_table, pfn_npages); + } + + if (to_memobj(obj)->path) { + kfree(to_memobj(obj)->path); + } + + kfree(obj); + + dkprintf("%s(%p %lx):free\n", __func__, obj, handle); return; } @@ -252,14 +227,11 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt ix = pgoff - obj->pfn_pgoff; dkprintf("ix: %ld\n", ix); - memobj_lock(&obj->memobj); - pfn = obj->pfn_table[ix]; #ifdef PROFILE_ENABLE profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE); #endif // PROFILE_ENABLE + pfn = obj->pfn_table[ix]; if (!(pfn & PFN_VALID)) { - memobj_unlock(&obj->memobj); - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN; ihk_mc_syscall_arg1(&ctx) = obj->handle; ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1); @@ -293,11 +265,9 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn); } - memobj_lock(&obj->memobj); obj->pfn_table[ix] = pfn; // Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory } - memobj_unlock(&obj->memobj); if (!(pfn & PFN_PRESENT)) { kprintf("devobj_get_page(%p %lx,%lx,%d):not present. %lx\n", memobj, obj->handle, off, p2align, pfn); diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 7ea15044..c9ff8279 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -45,24 +45,21 @@ static LIST_HEAD(fileobj_list); struct fileobj { struct memobj memobj; /* must be first */ - long sref; - long cref; + uint64_t sref; uintptr_t handle; struct list_head list; struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE]; mcs_lock_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE]; }; -static memobj_release_func_t fileobj_release; -static memobj_ref_func_t fileobj_ref; +static memobj_free_func_t fileobj_free; static memobj_get_page_func_t fileobj_get_page; static memobj_flush_page_func_t fileobj_flush_page; static memobj_invalidate_page_func_t fileobj_invalidate_page; static memobj_lookup_page_func_t fileobj_lookup_page; static struct memobj_ops fileobj_ops = { - .release = &fileobj_release, - .ref = &fileobj_ref, + .free = &fileobj_free, .get_page = &fileobj_get_page, .copy_page = NULL, .flush_page = &fileobj_flush_page, @@ -168,22 +165,22 @@ static void obj_list_remove(struct fileobj *obj) /* return NULL or locked fileobj */ static struct fileobj *obj_list_lookup(uintptr_t handle) { - struct fileobj *obj; struct fileobj *p; - obj = NULL; list_for_each_entry(p, &fileobj_list, list) { if (p->handle == handle) { - memobj_lock(&p->memobj); - if (p->cref > 0) { - obj = p; - break; + /* for the interval between last put and fileobj_free + * taking list_lock + */ + if (memobj_ref(&p->memobj) <= 1) { + ihk_atomic_dec(&p->memobj.refcnt); + continue; } - memobj_unlock(&p->memobj); + return p; } } - return obj; + return NULL; } /*********************************************************************** @@ -236,10 +233,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a newobj->memobj.flags = MF_HAS_PAGER | MF_REG_FILE; newobj->handle = result.handle; - newobj->sref = 1; - newobj->cref = 1; fileobj_page_hash_init(newobj); - ihk_mc_spinlock_init(&newobj->memobj.lock); mcs_lock_lock_noirq(&fileobj_list_lock, &node); obj = obj_list_lookup(result.handle); @@ -249,6 +243,8 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a to_memobj(obj)->size = result.size; to_memobj(obj)->flags |= result.flags; to_memobj(obj)->status = MEMOBJ_READY; + ihk_atomic_set(&to_memobj(obj)->refcnt, 1); + obj->sref = 1; if (to_memobj(obj)->flags & MF_PREFETCH) { to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED; } @@ -317,21 +313,17 @@ error_cleanup: } newobj = NULL; - dkprintf("%s: new obj 0x%lx cref: %d, %s\n", + dkprintf("%s: new obj 0x%lx %s\n", __FUNCTION__, obj, - obj->cref, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); } else { found: - ++obj->sref; - ++obj->cref; - memobj_unlock(&obj->memobj); /* locked by obj_list_lookup() */ - dkprintf("%s: existing obj 0x%lx cref: %d, %s\n", + obj->sref++; + dkprintf("%s: existing obj 0x%lx, %s\n", __FUNCTION__, obj, - obj->cref, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); } @@ -349,147 +341,107 @@ out: return error; } -static void fileobj_ref(struct memobj *memobj) +static void fileobj_free(struct memobj *memobj) { struct fileobj *obj = to_fileobj(memobj); - - dkprintf("fileobj_ref(%p %lx):\n", obj, obj->handle); - memobj_lock(&obj->memobj); - ++obj->cref; - memobj_unlock(&obj->memobj); - return; -} - -static void fileobj_release(struct memobj *memobj) -{ - struct fileobj *obj = to_fileobj(memobj); - long free_sref = 0; - uintptr_t free_handle; - struct fileobj *free_obj = NULL; struct mcs_lock_node node; + int error; + ihk_mc_user_context_t ctx; - dkprintf("fileobj_release(%p %lx)\n", obj, obj->handle); - memobj_lock(&obj->memobj); - --obj->cref; - if (obj->cref <= 0) { - free_sref = obj->sref; - free_obj = obj; - } - obj->sref -= free_sref; - free_handle = obj->handle; - memobj_unlock(&obj->memobj); - if (obj->memobj.flags & MF_HOST_RELEASED) { - free_sref = 0; // don't call syscall_generic_forwarding - } + dkprintf("%s: free obj 0x%lx, %s\n", __func__, + obj, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); - if (free_obj) { - dkprintf("%s: release obj 0x%lx cref: %d, free_obj: 0x%lx, %s\n", - __FUNCTION__, - obj, - obj->cref, - free_obj, - to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : ""); - mcs_lock_lock_noirq(&fileobj_list_lock, &node); - /* zap page_list */ - for (;;) { - struct page *page; - void *page_va; - uintptr_t phys; + mcs_lock_lock_noirq(&fileobj_list_lock, &node); + obj_list_remove(obj); + mcs_lock_unlock_noirq(&fileobj_list_lock, &node); - page = fileobj_page_hash_first(obj); - if (!page) { - break; - } - __fileobj_page_hash_remove(page); - phys = page_to_phys(page); - page_va = phys_to_virt(phys); + /* zap page_list */ + for (;;) { + struct page *page; + void *page_va; + uintptr_t phys; - /* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and - incremented by the second get_page() reaping the pageio and decremented by clear_range(). + page = fileobj_page_hash_first(obj); + if (!page) { + break; + } + __fileobj_page_hash_remove(page); + phys = page_to_phys(page); + page_va = phys_to_virt(phys); + /* Count must be one because set to one on the first + * get_page() invoking fileobj_do_pageio and incremented by + * the second get_page() reaping the pageio and decremented + * by clear_range(). + */ + if (ihk_atomic_read(&page->count) != 1) { + kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n", + __func__, ihk_atomic_read(&page->count), + page->phys, to_memobj(obj)->flags); + } + else if (page_unmap(page)) { + ihk_mc_free_pages_user(page_va, 1); + /* Track change in page->count for !MF_PREMAP pages. + * It is decremented here or in clear_range() */ - if (ihk_atomic_read(&page->count) != 1) { - kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n", - __FUNCTION__, - ihk_atomic_read(&page->count), - page->phys, - to_memobj(free_obj)->flags); - } - else if (page_unmap(page)) { - ihk_mc_free_pages_user(page_va, 1); - /* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */ - dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE); - rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE); - } -#if 0 - count = ihk_atomic_sub_return(1, &page->count); - - if (!((page->mode == PM_WILL_PAGEIO) - || (page->mode == PM_DONE_PAGEIO) - || (page->mode == PM_PAGEIO_EOF) - || (page->mode == PM_PAGEIO_ERROR) - || ((page->mode == PM_MAPPED) - && (count <= 0)))) { - kprintf("fileobj_release(%p %lx): " - "mode %x, count %d, off %lx\n", - obj, obj->handle, page->mode, - count, page->offset); - panic("fileobj_release"); - } - - page->mode = PM_NONE; -#endif - } - - /* Pre-mapped? */ - if (to_memobj(free_obj)->flags & MF_PREMAP) { - int i; - for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) { - if (to_memobj(free_obj)->pages[i]) { - dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]); - // Track change in fileobj->pages[] for MF_PREMAP pages - // Note that page_unmap() isn't called for MF_PREMAP in - // free_process_memory_range() --> ihk_mc_pt_free_range() - dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n", - virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE); - rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE); - ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1); - } - } - - kfree(to_memobj(free_obj)->pages); - } - - if (to_memobj(free_obj)->path) { - dkprintf("%s: %s\n", __FUNCTION__, to_memobj(free_obj)->path); - kfree(to_memobj(free_obj)->path); - } - - obj_list_remove(free_obj); - mcs_lock_unlock_noirq(&fileobj_list_lock, &node); - kfree(free_obj); - } - - if (free_sref) { - int error; - ihk_mc_user_context_t ctx; - - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE; - ihk_mc_syscall_arg1(&ctx) = free_handle; - ihk_mc_syscall_arg2(&ctx) = free_sref; - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error) { - kprintf("fileobj_release(%p %lx):" - "release %ld failed. %d\n", - obj, free_handle, free_sref, error); - /* through */ + dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", + phys, __func__, phys, PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_sub(PAGE_SIZE, + PAGE_SIZE); } } - dkprintf("fileobj_release(%p %lx):free %ld %p\n", - obj, free_handle, free_sref, free_obj); + /* Pre-mapped? */ + if (to_memobj(obj)->flags & MF_PREMAP) { + int i; + + for (i = 0; i < to_memobj(obj)->nr_pages; ++i) { + if (to_memobj(obj)->pages[i]) { + dkprintf("%s: pages[i]=%p\n", __func__, i, + to_memobj(obj)->pages[i]); + // Track change in fileobj->pages[] for MF_PREMAP pages + // Note that page_unmap() isn't called for MF_PREMAP in + // free_process_memory_range() --> ihk_mc_pt_free_range() + dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n", + virt_to_phys(to_memobj(obj)->pages[i]), + __func__, + virt_to_phys(to_memobj(obj)->pages[i]), + PAGE_SIZE, PAGE_SIZE); + rusage_memory_stat_mapped_file_sub(PAGE_SIZE, + PAGE_SIZE); + ihk_mc_free_pages_user(to_memobj(obj)->pages[i], + 1); + } + } + + kfree(to_memobj(obj)->pages); + } + + if (to_memobj(obj)->path) { + dkprintf("%s: %s\n", __func__, to_memobj(obj)->path); + kfree(to_memobj(obj)->path); + } + + /* linux side + * sref is necessary because handle is used as key, so there could + * be a new mckernel pager with the same handle being created as + * this one is being destroyed + */ + ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE; + ihk_mc_syscall_arg1(&ctx) = obj->handle; + ihk_mc_syscall_arg2(&ctx) = obj->sref; + + error = syscall_generic_forwarding(__NR_mmap, &ctx); + if (error) { + kprintf("%s(%p %lx): free failed. %d\n", __func__, + obj, obj->handle, error); + /* through */ + } + + dkprintf("%s(%p %lx):free\n", __func__, obj, obj->handle); + kfree(obj); return; + } struct pageio_args { @@ -582,7 +534,7 @@ static void fileobj_do_pageio(void *args0) out: mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], &mcs_node); - fileobj_release(&obj->memobj); /* got fileobj_get_page() */ + memobj_unref(&obj->memobj); /* got fileobj_get_page() */ kfree(args0); dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize); return; @@ -695,9 +647,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, page->mode = PM_WILL_PAGEIO; } - memobj_lock(&obj->memobj); - ++obj->cref; /* for fileobj_do_pageio() */ - memobj_unlock(&obj->memobj); + memobj_ref(&obj->memobj); args->fileobj = obj; args->objoff = off; @@ -758,10 +708,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, return 0; } - if (memobj->flags & MF_HOST_RELEASED) { - return 0; - } - page = phys_to_page(phys); if (!page) { kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n", @@ -769,8 +715,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, return 0; } - memobj_unlock(&obj->memobj); - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE; ihk_mc_syscall_arg1(&ctx) = obj->handle; ihk_mc_syscall_arg2(&ctx) = page->offset; @@ -785,7 +729,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, /* through */ } - memobj_lock(&obj->memobj); return 0; } diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index b164e670..2dd2f3d2 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -44,7 +44,6 @@ enum { MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ MF_SHM = 0x40000, - MF_HOST_RELEASED = 0x80000000, MF_END }; @@ -56,7 +55,7 @@ struct memobj { uint32_t flags; uint32_t status; size_t size; - ihk_spinlock_t lock; + ihk_atomic_t refcnt; /* For pre-mapped memobjects */ void **pages; @@ -64,8 +63,7 @@ struct memobj { char *path; }; -typedef void memobj_release_func_t(struct memobj *obj); -typedef void memobj_ref_func_t(struct memobj *obj); +typedef void memobj_free_func_t(struct memobj *obj); typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr); typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align); typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize); @@ -73,26 +71,23 @@ typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, si typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag); struct memobj_ops { - memobj_release_func_t * release; - memobj_ref_func_t * ref; - memobj_get_page_func_t * get_page; - memobj_copy_page_func_t * copy_page; - memobj_flush_page_func_t * flush_page; - memobj_invalidate_page_func_t * invalidate_page; - memobj_lookup_page_func_t * lookup_page; + memobj_free_func_t *free; + memobj_get_page_func_t *get_page; + memobj_copy_page_func_t *copy_page; + memobj_flush_page_func_t *flush_page; + memobj_invalidate_page_func_t *invalidate_page; + memobj_lookup_page_func_t *lookup_page; }; -static inline void memobj_release(struct memobj *obj) +static inline int memobj_ref(struct memobj *obj) { - if (obj->ops->release) { - (*obj->ops->release)(obj); - } + return ihk_atomic_inc_return(&obj->refcnt); } -static inline void memobj_ref(struct memobj *obj) +static inline void memobj_unref(struct memobj *obj) { - if (obj->ops->ref) { - (*obj->ops->ref)(obj); + if (ihk_atomic_dec_return(&obj->refcnt) == 0) { + (*obj->ops->free)(obj); } } @@ -140,16 +135,6 @@ static inline int memobj_lookup_page(struct memobj *obj, off_t off, return -ENXIO; } -static inline void memobj_lock(struct memobj *obj) -{ - ihk_mc_spinlock_lock_noirq(&obj->lock); -} - -static inline void memobj_unlock(struct memobj *obj) -{ - ihk_mc_spinlock_unlock_noirq(&obj->lock); -} - static inline int memobj_has_pager(struct memobj *obj) { return !!(obj->flags & MF_HAS_PAGER); diff --git a/kernel/include/shm.h b/kernel/include/shm.h index 4071e578..d8b136b6 100644 --- a/kernel/include/shm.h +++ b/kernel/include/shm.h @@ -57,6 +57,7 @@ struct shmobj { struct shmlock_user * user; struct shmid_ds ds; struct list_head page_list; + ihk_spinlock_t page_list_lock; struct list_head chain; /* shmobj_list */ }; @@ -104,7 +105,6 @@ static inline void shmlock_users_unlock(void) void shmobj_list_lock(void); void shmobj_list_unlock(void); int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp); -void shmobj_destroy(struct shmobj *obj); void shmlock_user_free(struct shmlock_user *user); int shmlock_user_get(uid_t ruid, struct shmlock_user **userp); diff --git a/kernel/process.c b/kernel/process.c index ccb4b22c..6e6eb1f4 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -882,7 +882,7 @@ int join_process_memory_range(struct process_vm *vm, surviving->end = merging->end; if (merging->memobj) { - memobj_release(merging->memobj); + memobj_unref(merging->memobj); } rb_erase(&merging->vm_rb_node, &vm->vm_range_tree); for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) { @@ -955,13 +955,13 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); if (range->memobj) { - memobj_lock(range->memobj); + memobj_ref(range->memobj); } error = ihk_mc_pt_free_range(vm->address_space->page_table, vm, (void *)start, (void *)end, (range->flag & VR_PRIVATE)? NULL: range->memobj); if (range->memobj) { - memobj_unlock(range->memobj); + memobj_unref(range->memobj); } ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error && (error != -ENOENT)) { @@ -988,7 +988,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } if (range->memobj) { - memobj_release(range->memobj); + memobj_unref(range->memobj); } rb_erase(&range->vm_rb_node, &vm->vm_range_tree); @@ -1520,7 +1520,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range, dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx)\n", vm, range, start, end, off); ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - memobj_lock(range->memobj); + memobj_ref(range->memobj); args.start = start; args.off = off; @@ -1545,7 +1545,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range, error = 0; out: - memobj_unlock(range->memobj); + memobj_unref(range->memobj); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):%d\n", vm, range, start, end, off, error); @@ -1610,7 +1610,7 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range, ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); if (!(range->memobj->flags & MF_ZEROFILL)) { - memobj_lock(range->memobj); + memobj_ref(range->memobj); } error = visit_pte_range(vm->address_space->page_table, (void *)start, @@ -1618,7 +1618,7 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range, &sync_one_page, &args); if (!(range->memobj->flags & MF_ZEROFILL)) { - memobj_unlock(range->memobj); + memobj_unref(range->memobj); } ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); @@ -1700,11 +1700,11 @@ int invalidate_process_memory_range(struct process_vm *vm, args.range = range; ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - memobj_lock(range->memobj); + memobj_ref(range->memobj); error = visit_pte_range(vm->address_space->page_table, (void *)start, (void *)end, range->pgshift, VPTEF_SKIP_NULL, &invalidate_one_page, &args); - memobj_unlock(range->memobj); + memobj_unref(range->memobj); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):" diff --git a/kernel/shmobj.c b/kernel/shmobj.c index 4da2194a..1b3a7d77 100644 --- a/kernel/shmobj.c +++ b/kernel/shmobj.c @@ -29,15 +29,13 @@ static LIST_HEAD(shmobj_list_head); static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED; -static memobj_release_func_t shmobj_release; -static memobj_ref_func_t shmobj_ref; +static memobj_free_func_t shmobj_free; static memobj_get_page_func_t shmobj_get_page; static memobj_invalidate_page_func_t shmobj_invalidate_page; static memobj_lookup_page_func_t shmobj_lookup_page; static struct memobj_ops shmobj_ops = { - .release = &shmobj_release, - .ref = &shmobj_ref, + .free = &shmobj_free, .get_page = &shmobj_get_page, .invalidate_page = &shmobj_invalidate_page, .lookup_page = &shmobj_lookup_page, @@ -59,9 +57,20 @@ static struct memobj *to_memobj(struct shmobj *shmobj) static void page_list_init(struct shmobj *obj) { INIT_LIST_HEAD(&obj->page_list); + ihk_mc_spinlock_init(&obj->page_list_lock); return; } +static void page_list_lock(struct shmobj *obj) +{ + ihk_mc_spinlock_lock_noirq(&obj->page_list_lock); +} + +static void page_list_unlock(struct shmobj *obj) +{ + ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); +} + static void page_list_insert(struct shmobj *obj, struct page *page) { list_add(&page->list, &obj->page_list); @@ -180,15 +189,14 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp) obj->memobj.ops = &shmobj_ops; obj->memobj.flags = MF_SHM; obj->memobj.size = ds->shm_segsz; + ihk_atomic_set(&obj->memobj.refcnt, 1); obj->ds = *ds; obj->ds.shm_perm.seq = the_seq++; - obj->ds.shm_nattch = 1; obj->ds.init_pgshift = 0; obj->index = -1; obj->pgshift = pgshift; obj->real_segsz = (obj->ds.shm_segsz + pgsize - 1) & ~(pgsize - 1); page_list_init(obj); - ihk_mc_spinlock_init(&obj->memobj.lock); error = 0; *objp = to_memobj(obj); @@ -216,7 +224,7 @@ int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp) return error; } -void shmobj_destroy(struct shmobj *obj) +static void shmobj_destroy(struct shmobj *obj) { extern struct shm_info the_shm_info; extern struct list_head kds_free_list; @@ -244,6 +252,7 @@ void shmobj_destroy(struct shmobj *obj) void *page_va; uintptr_t phys; + /* no lock required as obj is inaccessible */ page = page_list_first(obj); if (!page) { break; @@ -314,61 +323,22 @@ void shmobj_destroy(struct shmobj *obj) return; } -static void shmobj_release(struct memobj *memobj) +static void shmobj_free(struct memobj *memobj) { struct shmobj *obj = to_shmobj(memobj); - struct thread *thread = cpu_local_var(current); - struct process *proc = thread->proc; - struct shmobj *freeobj = NULL; - long newref; extern time_t time(void); - dkprintf("shmobj_release(%p)\n", memobj); - memobj_lock(&obj->memobj); - if (obj->index >= 0) { - obj->ds.shm_dtime = time(); - obj->ds.shm_lpid = proc->pid; - dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch); - } - newref = --obj->ds.shm_nattch; - if (newref <= 0) { - if (newref < 0) { - ekprintf("shmobj_release(%p):ref %ld\n", - memobj, newref); - panic("shmobj_release:freeing free shmobj"); - } - if (obj->ds.shm_perm.mode & SHM_DEST) { - freeobj = obj; - } - } - memobj_unlock(&obj->memobj); + dkprintf("%s(%p)\n", __func__, memobj); - if (freeobj) { - shmobj_list_lock(); - shmobj_destroy(freeobj); - shmobj_list_unlock(); + shmobj_list_lock(); + if (!(obj->ds.shm_perm.mode & SHM_DEST)) { + ekprintf("%s called without going through rmid?", __func__); } - dkprintf("shmobj_release(%p): %ld\n", memobj, newref); - return; -} -static void shmobj_ref(struct memobj *memobj) -{ - struct shmobj *obj = to_shmobj(memobj); - struct thread *thread = cpu_local_var(current); - struct process *proc = thread->proc; - long newref; - extern time_t time(void); + shmobj_destroy(obj); + shmobj_list_unlock(); - dkprintf("shmobj_ref(%p)\n", memobj); - memobj_lock(&obj->memobj); - newref = ++obj->ds.shm_nattch; - if (obj->index >= 0) { - obj->ds.shm_atime = time(); - obj->ds.shm_lpid = proc->pid; - } - memobj_unlock(&obj->memobj); - dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref); + dkprintf("%s(%p)\n", __func__, memobj); return; } @@ -384,7 +354,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n", memobj, off, p2align, physp); - memobj_lock(&obj->memobj); + memobj_ref(memobj); if (off & ~PAGE_MASK) { error = -EINVAL; ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n", @@ -410,12 +380,14 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, goto out; } + page_list_lock(obj); page = page_list_lookup(obj, off); if (!page) { npages = 1 << p2align; virt = ihk_mc_alloc_aligned_pages_user(npages, p2align, IHK_MC_AP_NOWAIT, virt_addr); if (!virt) { + page_list_unlock(obj); error = -ENOMEM; ekprintf("shmobj_get_page(%p,%#lx,%d,%p):" "alloc failed. %d\n", @@ -445,6 +417,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n", memobj, off, p2align, physp, page, phys); } + page_list_unlock(obj); ihk_atomic_inc(&page->count); @@ -452,7 +425,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align, *physp = page_to_phys(page); out: - memobj_unlock(&obj->memobj); + memobj_unref(memobj); if (virt) { ihk_mc_free_pages_user(virt, npages); } @@ -470,11 +443,14 @@ static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys, dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize); + page_list_lock(obj); if (!(page = phys_to_page(phys)) || !(page = page_list_lookup(obj, page->offset))) { + page_list_unlock(obj); error = 0; goto out; } + page_list_unlock(obj); if (ihk_atomic_read(&page->count) == 1) { if (page_unmap(page)) { @@ -503,7 +479,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align, dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p)\n", memobj, off, p2align, physp); - memobj_lock(&obj->memobj); + memobj_ref(&obj->memobj); if (off & ~PAGE_MASK) { error = -EINVAL; ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):invalid argument. %d\n", @@ -529,7 +505,9 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align, goto out; } + page_list_lock(obj); page = page_list_lookup(obj, off); + page_list_unlock(obj); if (!page) { error = -ENOENT; dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p):page not found. %d\n", @@ -544,7 +522,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align, } out: - memobj_unlock(&obj->memobj); + memobj_unref(&obj->memobj); dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p):%d %#lx\n", memobj, off, p2align, physp, error, phys); return error; diff --git a/kernel/syscall.c b/kernel/syscall.c index b360fbff..1c726822 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1679,13 +1679,11 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, populate_len = len; if (!(flags & MAP_ANONYMOUS)) { - memobj_lock(memobj); - if (memobj->status == MEMOBJ_TO_BE_PREFETCHED) { - memobj->status = MEMOBJ_READY; + if (atomic_cmpxchg4(&memobj->status, MEMOBJ_TO_BE_PREFETCHED, + MEMOBJ_READY)) { populated_mapping = 1; populate_len = memobj->size; } - memobj_unlock(memobj); /* Update PTEs for pre-mapped memory object */ if ((memobj->flags & MF_PREMAP) && @@ -1763,7 +1761,7 @@ out: ihk_mc_free_pages_user(p, npages); } if (memobj) { - memobj_release(memobj); + memobj_unref(memobj); } dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, " "fd: %d, off: %lu, error: %ld, addr: 0x%lx\n", @@ -4796,6 +4794,7 @@ int shmobj_list_lookup(int shmid, struct shmobj **objp) return -EIDRM; } + memobj_ref(&obj->memobj); *objp = obj; return 0; } /* shmobj_list_lookup() */ @@ -4814,6 +4813,7 @@ int shmobj_list_lookup_by_key(key_t key, struct shmobj **objp) return -EINVAL; } + memobj_ref(&obj->memobj); *objp = obj; return 0; } /* shmobj_list_lookup_by_key() */ @@ -4831,6 +4831,7 @@ int shmobj_list_lookup_by_index(int index, struct shmobj **objp) return -EINVAL; } + memobj_ref(&obj->memobj); *objp = obj; return 0; } /* shmobj_list_lookup_by_index() */ @@ -4872,6 +4873,7 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) } if (obj && (shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): -EEXIST\n", key, size, shmflg); return -EEXIST; } @@ -4898,12 +4900,14 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) } if (req & ~obj->ds.shm_perm.mode) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); return -EACCES; } } if (obj->ds.shm_segsz < size) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg); return -EINVAL; } @@ -4950,7 +4954,6 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) shmid = make_shmid(obj); shmobj_list_unlock(); - memobj_release(&obj->memobj); dkprintf("do_shmget(%#lx,%#lx,%#x): %d\n", key, size, shmflg, shmid); return shmid; @@ -4986,6 +4989,7 @@ SYSCALL_DECLARE(shmat) pgsize = (size_t)1 << obj->pgshift; if (shmaddr && ((uintptr_t)shmaddr & (pgsize - 1)) && !(shmflg & SHM_RND)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg); return -EINVAL; } @@ -5015,6 +5019,7 @@ SYSCALL_DECLARE(shmat) } if (~obj->ds.shm_perm.mode & req) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg); return -EACCES; } @@ -5025,6 +5030,7 @@ SYSCALL_DECLARE(shmat) if (lookup_process_memory_range(vm, addr, addr+len)) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x):lookup_process_memory_range succeeded. -ENOMEM\n", shmid, shmaddr, shmflg); return -ENOMEM; } @@ -5034,6 +5040,7 @@ SYSCALL_DECLARE(shmat) if (error) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x):search_free_space failed. %d\n", shmid, shmaddr, shmflg, error); return error; } @@ -5049,20 +5056,19 @@ SYSCALL_DECLARE(shmat) if (error) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmat(%#x,%p,%#x):set_host_vma failed. %d\n", shmid, shmaddr, shmflg, error); return error; } } - memobj_ref(&obj->memobj); - error = add_process_memory_range(vm, addr, addr+len, -1, vrflags, &obj->memobj, 0, obj->pgshift, NULL); if (error) { if (!(prot & PROT_WRITE)) { (void)set_host_vma(addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, 1/* holding memory_range_lock */); } - memobj_release(&obj->memobj); + memobj_unref(&obj->memobj); ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat(%#x,%p,%#x):add_process_memory_range failed. %d\n", shmid, shmaddr, shmflg, error); @@ -5072,7 +5078,6 @@ SYSCALL_DECLARE(shmat) ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); - dkprintf("shmat:bump shm_nattach %p %d\n", obj, obj->ds.shm_nattch); dkprintf("shmat(%#x,%p,%#x): 0x%lx. %d\n", shmid, shmaddr, shmflg, addr); return addr; } /* sys_shmat() */ @@ -5094,10 +5099,11 @@ SYSCALL_DECLARE(shmctl) size_t size; struct shmlock_user *user; uid_t ruid = proc->ruid; + uint16_t oldmode; dkprintf("shmctl(%#x,%d,%p)\n", shmid, cmd, buf); - if (0) ; - else if (cmd == IPC_RMID) { + switch (cmd) { + case IPC_RMID: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5109,19 +5115,21 @@ SYSCALL_DECLARE(shmctl) && (obj->ds.shm_perm.uid != proc->euid) && (obj->ds.shm_perm.cuid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf); return -EPERM; } + oldmode = obj->ds.shm_perm.mode; obj->ds.shm_perm.mode |= SHM_DEST; - if (obj->ds.shm_nattch <= 0) { - shmobj_destroy(obj); - } shmobj_list_unlock(); + // unref twice if this is the first time rmid is called + if (!(oldmode & SHM_DEST)) + memobj_unref(&obj->memobj); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == IPC_SET) { + case IPC_SET: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5132,12 +5140,14 @@ SYSCALL_DECLARE(shmctl) if ((obj->ds.shm_perm.uid != proc->euid) && (obj->ds.shm_perm.cuid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf); return -EPERM; } error = copy_from_user(&ads, buf, sizeof(ads)); if (error) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); return error; } @@ -5148,48 +5158,66 @@ SYSCALL_DECLARE(shmctl) obj->ds.shm_ctime = now; shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == IPC_STAT) { + case IPC_STAT: + case SHM_STAT: shmobj_list_lock(); - error = shmobj_list_lookup(shmid, &obj); + if (cmd == IPC_STAT) { + error = shmobj_list_lookup(shmid, &obj); + } else { // SHM_STAT + error = shmobj_list_lookup_by_index(shmid, &obj); + } if (error) { shmobj_list_unlock(); dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); return error; } - if (!proc->euid) { - req = 0; + + if (cmd == IPC_STAT) { + if (!proc->euid) { + req = 0; + } else if ((proc->euid == obj->ds.shm_perm.uid) || + (proc->euid == obj->ds.shm_perm.cuid)) { + req = 0400; + } else if ((proc->egid == obj->ds.shm_perm.gid) || + (proc->egid == obj->ds.shm_perm.cgid)) { + req = 0040; + } else { + req = 0004; + } + if (req & ~obj->ds.shm_perm.mode) { + shmobj_list_unlock(); + memobj_unref(&obj->memobj); + dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid, + cmd, buf); + return -EACCES; + } } - else if ((proc->euid == obj->ds.shm_perm.uid) - || (proc->euid == obj->ds.shm_perm.cuid)) { - req = 0400; - } - else if ((proc->egid == obj->ds.shm_perm.gid) - || (proc->egid == obj->ds.shm_perm.cgid)) { - req = 0040; - } - else { - req = 0004; - } - if (req & ~obj->ds.shm_perm.mode) { - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid, cmd, buf); - return -EACCES; + + /* This could potentially be higher than required if some other + * thread holds a ref at this point. + * Minus one here is because we hold a ref... + */ + obj->ds.shm_nattch = ihk_atomic_read(&obj->memobj.refcnt) - 1; + /* ... And one for sentinel unless RMID has been called */ + if (!(obj->ds.shm_perm.mode & SHM_DEST)) { + obj->ds.shm_nattch--; } + error = copy_to_user(buf, &obj->ds, sizeof(*buf)); if (error) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); return error; } - shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == IPC_INFO) { + case IPC_INFO: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5200,6 +5228,7 @@ SYSCALL_DECLARE(shmctl) error = copy_to_user(buf, &the_shminfo, sizeof(the_shminfo)); if (error) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); return error; } @@ -5209,10 +5238,10 @@ SYSCALL_DECLARE(shmctl) maxi = 0; } shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi); return maxi; - } - else if (cmd == SHM_LOCK) { + case SHM_LOCK: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5224,12 +5253,14 @@ SYSCALL_DECLARE(shmctl) && (obj->ds.shm_perm.cuid != proc->euid) && (obj->ds.shm_perm.uid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): perm shm: %d\n", shmid, cmd, buf, error); return -EPERM; } rlim = &proc->rlimit[MCK_RLIMIT_MEMLOCK]; if (!rlim->rlim_cur && !has_cap_ipc_lock(thread)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): perm proc: %d\n", shmid, cmd, buf, error); return -EPERM; } @@ -5240,6 +5271,7 @@ SYSCALL_DECLARE(shmctl) error = shmlock_user_get(ruid, &user); if (error) { shmlock_users_unlock(); + memobj_unref(&obj->memobj); shmobj_list_unlock(); ekprintf("shmctl(%#x,%d,%p): user lookup: %d\n", shmid, cmd, buf, error); return -ENOMEM; @@ -5250,6 +5282,7 @@ SYSCALL_DECLARE(shmctl) && ((rlim->rlim_cur < user->locked) || ((rlim->rlim_cur - user->locked) < size))) { shmlock_users_unlock(); + memobj_unref(&obj->memobj); shmobj_list_unlock(); dkprintf("shmctl(%#x,%d,%p): too large: %d\n", shmid, cmd, buf, error); return -ENOMEM; @@ -5260,11 +5293,11 @@ SYSCALL_DECLARE(shmctl) shmlock_users_unlock(); } shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == SHM_UNLOCK) { + case SHM_UNLOCK: shmobj_list_lock(); error = shmobj_list_lookup(shmid, &obj); if (error) { @@ -5276,6 +5309,7 @@ SYSCALL_DECLARE(shmctl) && (obj->ds.shm_perm.cuid != proc->euid) && (obj->ds.shm_perm.uid != proc->euid)) { shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): perm shm: %d\n", shmid, cmd, buf, error); return -EPERM; } @@ -5294,28 +5328,10 @@ SYSCALL_DECLARE(shmctl) obj->ds.shm_perm.mode &= ~SHM_LOCKED; } shmobj_list_unlock(); + memobj_unref(&obj->memobj); dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); return 0; - } - else if (cmd == SHM_STAT) { - shmobj_list_lock(); - error = shmobj_list_lookup_by_index(shmid, &obj); - if (error) { - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error); - return error; - } - error = copy_to_user(buf, &obj->ds, sizeof(*buf)); - if (error) { - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error); - return error; - } - shmobj_list_unlock(); - dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf); - return 0; - } - else if (cmd == SHM_INFO) { + case SHM_INFO: shmobj_list_lock(); error = copy_to_user(buf, &the_shm_info, sizeof(the_shm_info)); if (error) { @@ -5331,10 +5347,10 @@ SYSCALL_DECLARE(shmctl) shmobj_list_unlock(); dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi); return maxi; + default: + dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf); + return -EINVAL; } - - dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf); - return -EINVAL; } /* sys_shmctl() */ SYSCALL_DECLARE(shmdt) @@ -7962,7 +7978,7 @@ SYSCALL_DECLARE(mremap) oldaddr, oldsize0, newsize0, flags, newaddr, error); if (range->memobj) { - memobj_release(range->memobj); + memobj_unref(range->memobj); } goto out; } diff --git a/kernel/xpmem.c b/kernel/xpmem.c index 070b9e7d..5b633426 100644 --- a/kernel/xpmem.c +++ b/kernel/xpmem.c @@ -1399,7 +1399,7 @@ static int xpmem_free_process_memory_range( } if (range->memobj) { - memobj_release(range->memobj); + memobj_unref(range->memobj); } rb_erase(&range->vm_rb_node, &vm->vm_range_tree); @@ -1732,7 +1732,8 @@ int xpmem_remove_process_memory_range( remaining_vmr->private_data = NULL; /* This function is always followed by xpmem_free_process_memory_range() - which in turn calls memobj_release() */ + * which in turn calls memobj_put() + */ remaining_vaddr = att->at_vaddr; } @@ -1755,7 +1756,8 @@ int xpmem_remove_process_memory_range( vmr->private_data = NULL; /* This function is always followed by [xpmem_]free_process_memory_range() - which in turn calls memobj_release() */ + * which in turn calls memobj_put() + */ out: mcs_rwlock_writer_unlock(&att->at_lock, &at_lock); diff --git a/kernel/zeroobj.c b/kernel/zeroobj.c index 5987cbee..c937ba5b 100644 --- a/kernel/zeroobj.c +++ b/kernel/zeroobj.c @@ -32,9 +32,11 @@ static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED; static struct zeroobj *the_zeroobj = NULL; /* singleton */ static memobj_get_page_func_t zeroobj_get_page; +static memobj_free_func_t zeroobj_free; static struct memobj_ops zeroobj_ops = { .get_page = &zeroobj_get_page, + .free = &zeroobj_free, }; static struct zeroobj *to_zeroobj(struct memobj *memobj) @@ -74,6 +76,12 @@ static struct page *page_list_first(struct zeroobj *obj) /*********************************************************************** * zeroobj */ + +static void zeroobj_free(struct memobj *obj) +{ + kprintf("trying to free zeroobj, this should never happen\n"); +} + static int alloc_zeroobj(void) { int error; @@ -101,8 +109,8 @@ static int alloc_zeroobj(void) obj->memobj.ops = &zeroobj_ops; obj->memobj.flags = MF_ZEROOBJ; obj->memobj.size = 0; + ihk_atomic_set(&obj->memobj.refcnt, 2); // never reaches 0 page_list_init(obj); - ihk_mc_spinlock_init(&obj->memobj.lock); virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */ if (!virt) { @@ -159,6 +167,7 @@ int zeroobj_create(struct memobj **objp) error = 0; *objp = to_memobj(the_zeroobj); + memobj_ref(*objp); out: dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp);