memobj: transform memobj lock to refcounting
We had a deadlock between: - free_process_memory_range (take lock) -> ihk_mc_pt_free_range -> ... -> remote_flush_tlb_array_cpumask -> "/* Wait for all cores */" and - obj_list_lookup() under fileobj_list_lock that disabled irqs and thus never ack'd the remote flush The rework is quite big but removes the need for the big lock, although devobj and shmobj needed a new smaller lock to be introduced - the new locks are used much more locally and should not cause problems. On the bright side, refcounting being moved to memobj level means we could remove refcounting implemented separately in all object types and simplifies code a bit. Change-Id: I6bc8438a98b1d8edddc91c4ac33c11b88e097ebb
This commit is contained in:
@@ -52,16 +52,15 @@ struct devobj {
|
||||
uintptr_t handle;
|
||||
off_t pfn_pgoff;
|
||||
uintptr_t * pfn_table;
|
||||
ihk_spinlock_t pfn_table_lock;
|
||||
size_t npages;
|
||||
};
|
||||
|
||||
static memobj_release_func_t devobj_release;
|
||||
static memobj_ref_func_t devobj_ref;
|
||||
static memobj_free_func_t devobj_free;
|
||||
static memobj_get_page_func_t devobj_get_page;
|
||||
|
||||
static struct memobj_ops devobj_ops = {
|
||||
.release = &devobj_release,
|
||||
.ref = &devobj_ref,
|
||||
.free = &devobj_free,
|
||||
.get_page = &devobj_get_page,
|
||||
};
|
||||
|
||||
@@ -132,6 +131,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
obj->memobj.ops = &devobj_ops;
|
||||
obj->memobj.flags = MF_HAS_PAGER | MF_DEV_FILE;
|
||||
obj->memobj.size = len;
|
||||
ihk_atomic_set(&obj->memobj.refcnt, 1);
|
||||
obj->handle = result.handle;
|
||||
|
||||
dkprintf("%s: path=%s\n", __FUNCTION__, result.path);
|
||||
@@ -145,10 +145,9 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
strncpy(obj->memobj.path, result.path, PATH_MAX);
|
||||
}
|
||||
|
||||
obj->ref = 1;
|
||||
obj->pfn_pgoff = off >> PAGE_SHIFT;
|
||||
obj->npages = npages;
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
ihk_mc_spinlock_init(&obj->pfn_table_lock);
|
||||
|
||||
error = 0;
|
||||
*objp = to_memobj(obj);
|
||||
@@ -167,68 +166,44 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static void devobj_ref(struct memobj *memobj)
|
||||
static void devobj_free(struct memobj *memobj)
|
||||
{
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
|
||||
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
memobj_lock(&obj->memobj);
|
||||
++obj->ref;
|
||||
memobj_unlock(&obj->memobj);
|
||||
return;
|
||||
}
|
||||
|
||||
static void devobj_release(struct memobj *memobj)
|
||||
{
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
struct devobj *free_obj = NULL;
|
||||
uintptr_t handle;
|
||||
const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t));
|
||||
const size_t pfn_npages =
|
||||
(obj->npages + uintptr_per_page - 1) / uintptr_per_page;
|
||||
int error;
|
||||
ihk_mc_user_context_t ctx;
|
||||
|
||||
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
dkprintf("%s(%p %lx)\n", __func__, obj, obj->handle);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
if (obj->ref <= 0) {
|
||||
free_obj = obj;
|
||||
}
|
||||
handle = obj->handle;
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
if (free_obj) {
|
||||
if (!(free_obj->memobj.flags & MF_HOST_RELEASED)) {
|
||||
int error;
|
||||
ihk_mc_user_context_t ctx;
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP;
|
||||
ihk_mc_syscall_arg1(&ctx) = handle;
|
||||
ihk_mc_syscall_arg2(&ctx) = 1;
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_UNMAP;
|
||||
ihk_mc_syscall_arg1(&ctx) = handle;
|
||||
ihk_mc_syscall_arg2(&ctx) = 1;
|
||||
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("devobj_release(%p %lx):"
|
||||
"release failed. %d\n",
|
||||
free_obj, handle, error);
|
||||
/* through */
|
||||
}
|
||||
}
|
||||
|
||||
if (obj->pfn_table) {
|
||||
// Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
|
||||
if (to_memobj(free_obj)->path) {
|
||||
kfree(to_memobj(free_obj)->path);
|
||||
}
|
||||
|
||||
kfree(free_obj);
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("%s(%p %lx): release failed. %d\n",
|
||||
__func__, obj, handle, error);
|
||||
/* through */
|
||||
}
|
||||
|
||||
dkprintf("devobj_release(%p %lx):free %p\n",
|
||||
obj, handle, free_obj);
|
||||
if (obj->pfn_table) {
|
||||
// Don't call memory_stat_rss_sub() because devobj related
|
||||
// pages don't reside in main memory
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
|
||||
if (to_memobj(obj)->path) {
|
||||
kfree(to_memobj(obj)->path);
|
||||
}
|
||||
|
||||
kfree(obj);
|
||||
|
||||
dkprintf("%s(%p %lx):free\n", __func__, obj, handle);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -252,14 +227,11 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
ix = pgoff - obj->pfn_pgoff;
|
||||
dkprintf("ix: %ld\n", ix);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
pfn = obj->pfn_table[ix];
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE);
|
||||
#endif // PROFILE_ENABLE
|
||||
pfn = obj->pfn_table[ix];
|
||||
if (!(pfn & PFN_VALID)) {
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN;
|
||||
ihk_mc_syscall_arg1(&ctx) = obj->handle;
|
||||
ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1);
|
||||
@@ -293,11 +265,9 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
}
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
obj->pfn_table[ix] = pfn;
|
||||
// Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
if (!(pfn & PFN_PRESENT)) {
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):not present. %lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
|
||||
269
kernel/fileobj.c
269
kernel/fileobj.c
@@ -45,24 +45,21 @@ static LIST_HEAD(fileobj_list);
|
||||
|
||||
struct fileobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
long sref;
|
||||
long cref;
|
||||
uint64_t sref;
|
||||
uintptr_t handle;
|
||||
struct list_head list;
|
||||
struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE];
|
||||
mcs_lock_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE];
|
||||
};
|
||||
|
||||
static memobj_release_func_t fileobj_release;
|
||||
static memobj_ref_func_t fileobj_ref;
|
||||
static memobj_free_func_t fileobj_free;
|
||||
static memobj_get_page_func_t fileobj_get_page;
|
||||
static memobj_flush_page_func_t fileobj_flush_page;
|
||||
static memobj_invalidate_page_func_t fileobj_invalidate_page;
|
||||
static memobj_lookup_page_func_t fileobj_lookup_page;
|
||||
|
||||
static struct memobj_ops fileobj_ops = {
|
||||
.release = &fileobj_release,
|
||||
.ref = &fileobj_ref,
|
||||
.free = &fileobj_free,
|
||||
.get_page = &fileobj_get_page,
|
||||
.copy_page = NULL,
|
||||
.flush_page = &fileobj_flush_page,
|
||||
@@ -168,22 +165,22 @@ static void obj_list_remove(struct fileobj *obj)
|
||||
/* return NULL or locked fileobj */
|
||||
static struct fileobj *obj_list_lookup(uintptr_t handle)
|
||||
{
|
||||
struct fileobj *obj;
|
||||
struct fileobj *p;
|
||||
|
||||
obj = NULL;
|
||||
list_for_each_entry(p, &fileobj_list, list) {
|
||||
if (p->handle == handle) {
|
||||
memobj_lock(&p->memobj);
|
||||
if (p->cref > 0) {
|
||||
obj = p;
|
||||
break;
|
||||
/* for the interval between last put and fileobj_free
|
||||
* taking list_lock
|
||||
*/
|
||||
if (memobj_ref(&p->memobj) <= 1) {
|
||||
ihk_atomic_dec(&p->memobj.refcnt);
|
||||
continue;
|
||||
}
|
||||
memobj_unlock(&p->memobj);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
return obj;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
@@ -236,10 +233,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
|
||||
newobj->memobj.flags = MF_HAS_PAGER | MF_REG_FILE;
|
||||
newobj->handle = result.handle;
|
||||
|
||||
newobj->sref = 1;
|
||||
newobj->cref = 1;
|
||||
fileobj_page_hash_init(newobj);
|
||||
ihk_mc_spinlock_init(&newobj->memobj.lock);
|
||||
|
||||
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
|
||||
obj = obj_list_lookup(result.handle);
|
||||
@@ -249,6 +243,8 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
|
||||
to_memobj(obj)->size = result.size;
|
||||
to_memobj(obj)->flags |= result.flags;
|
||||
to_memobj(obj)->status = MEMOBJ_READY;
|
||||
ihk_atomic_set(&to_memobj(obj)->refcnt, 1);
|
||||
obj->sref = 1;
|
||||
if (to_memobj(obj)->flags & MF_PREFETCH) {
|
||||
to_memobj(obj)->status = MEMOBJ_TO_BE_PREFETCHED;
|
||||
}
|
||||
@@ -317,21 +313,17 @@ error_cleanup:
|
||||
}
|
||||
|
||||
newobj = NULL;
|
||||
dkprintf("%s: new obj 0x%lx cref: %d, %s\n",
|
||||
dkprintf("%s: new obj 0x%lx %s\n",
|
||||
__FUNCTION__,
|
||||
obj,
|
||||
obj->cref,
|
||||
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
|
||||
}
|
||||
else {
|
||||
found:
|
||||
++obj->sref;
|
||||
++obj->cref;
|
||||
memobj_unlock(&obj->memobj); /* locked by obj_list_lookup() */
|
||||
dkprintf("%s: existing obj 0x%lx cref: %d, %s\n",
|
||||
obj->sref++;
|
||||
dkprintf("%s: existing obj 0x%lx, %s\n",
|
||||
__FUNCTION__,
|
||||
obj,
|
||||
obj->cref,
|
||||
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
|
||||
}
|
||||
|
||||
@@ -349,147 +341,107 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static void fileobj_ref(struct memobj *memobj)
|
||||
static void fileobj_free(struct memobj *memobj)
|
||||
{
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
|
||||
dkprintf("fileobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
memobj_lock(&obj->memobj);
|
||||
++obj->cref;
|
||||
memobj_unlock(&obj->memobj);
|
||||
return;
|
||||
}
|
||||
|
||||
static void fileobj_release(struct memobj *memobj)
|
||||
{
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
long free_sref = 0;
|
||||
uintptr_t free_handle;
|
||||
struct fileobj *free_obj = NULL;
|
||||
struct mcs_lock_node node;
|
||||
int error;
|
||||
ihk_mc_user_context_t ctx;
|
||||
|
||||
dkprintf("fileobj_release(%p %lx)\n", obj, obj->handle);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->cref;
|
||||
if (obj->cref <= 0) {
|
||||
free_sref = obj->sref;
|
||||
free_obj = obj;
|
||||
}
|
||||
obj->sref -= free_sref;
|
||||
free_handle = obj->handle;
|
||||
memobj_unlock(&obj->memobj);
|
||||
if (obj->memobj.flags & MF_HOST_RELEASED) {
|
||||
free_sref = 0; // don't call syscall_generic_forwarding
|
||||
}
|
||||
dkprintf("%s: free obj 0x%lx, %s\n", __func__,
|
||||
obj, to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
|
||||
|
||||
if (free_obj) {
|
||||
dkprintf("%s: release obj 0x%lx cref: %d, free_obj: 0x%lx, %s\n",
|
||||
__FUNCTION__,
|
||||
obj,
|
||||
obj->cref,
|
||||
free_obj,
|
||||
to_memobj(obj)->flags & MF_ZEROFILL ? "zerofill" : "");
|
||||
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
void *page_va;
|
||||
uintptr_t phys;
|
||||
mcs_lock_lock_noirq(&fileobj_list_lock, &node);
|
||||
obj_list_remove(obj);
|
||||
mcs_lock_unlock_noirq(&fileobj_list_lock, &node);
|
||||
|
||||
page = fileobj_page_hash_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
__fileobj_page_hash_remove(page);
|
||||
phys = page_to_phys(page);
|
||||
page_va = phys_to_virt(phys);
|
||||
/* zap page_list */
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
void *page_va;
|
||||
uintptr_t phys;
|
||||
|
||||
/* Count must be one because set to one on the first get_page() invoking fileobj_do_pageio and
|
||||
incremented by the second get_page() reaping the pageio and decremented by clear_range().
|
||||
page = fileobj_page_hash_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
}
|
||||
__fileobj_page_hash_remove(page);
|
||||
phys = page_to_phys(page);
|
||||
page_va = phys_to_virt(phys);
|
||||
/* Count must be one because set to one on the first
|
||||
* get_page() invoking fileobj_do_pageio and incremented by
|
||||
* the second get_page() reaping the pageio and decremented
|
||||
* by clear_range().
|
||||
*/
|
||||
if (ihk_atomic_read(&page->count) != 1) {
|
||||
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n",
|
||||
__func__, ihk_atomic_read(&page->count),
|
||||
page->phys, to_memobj(obj)->flags);
|
||||
}
|
||||
else if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(page_va, 1);
|
||||
/* Track change in page->count for !MF_PREMAP pages.
|
||||
* It is decremented here or in clear_range()
|
||||
*/
|
||||
if (ihk_atomic_read(&page->count) != 1) {
|
||||
kprintf("%s: WARNING: page count is %d for phys 0x%lx is invalid, flags: 0x%lx\n",
|
||||
__FUNCTION__,
|
||||
ihk_atomic_read(&page->count),
|
||||
page->phys,
|
||||
to_memobj(free_obj)->flags);
|
||||
}
|
||||
else if (page_unmap(page)) {
|
||||
ihk_mc_free_pages_user(page_va, 1);
|
||||
/* Track change in page->count for !MF_PREMAP pages. It is decremented here or in clear_range() */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, phys, PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
|
||||
}
|
||||
#if 0
|
||||
count = ihk_atomic_sub_return(1, &page->count);
|
||||
|
||||
if (!((page->mode == PM_WILL_PAGEIO)
|
||||
|| (page->mode == PM_DONE_PAGEIO)
|
||||
|| (page->mode == PM_PAGEIO_EOF)
|
||||
|| (page->mode == PM_PAGEIO_ERROR)
|
||||
|| ((page->mode == PM_MAPPED)
|
||||
&& (count <= 0)))) {
|
||||
kprintf("fileobj_release(%p %lx): "
|
||||
"mode %x, count %d, off %lx\n",
|
||||
obj, obj->handle, page->mode,
|
||||
count, page->offset);
|
||||
panic("fileobj_release");
|
||||
}
|
||||
|
||||
page->mode = PM_NONE;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Pre-mapped? */
|
||||
if (to_memobj(free_obj)->flags & MF_PREMAP) {
|
||||
int i;
|
||||
for (i = 0; i < to_memobj(free_obj)->nr_pages; ++i) {
|
||||
if (to_memobj(free_obj)->pages[i]) {
|
||||
dkprintf("%s: pages[i]=%p\n", __FUNCTION__, i, to_memobj(free_obj)->pages[i]);
|
||||
// Track change in fileobj->pages[] for MF_PREMAP pages
|
||||
// Note that page_unmap() isn't called for MF_PREMAP in
|
||||
// free_process_memory_range() --> ihk_mc_pt_free_range()
|
||||
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
|
||||
virt_to_phys(to_memobj(free_obj)->pages[i]), __FUNCTION__, virt_to_phys(to_memobj(free_obj)->pages[i]), PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_sub(PAGE_SIZE, PAGE_SIZE);
|
||||
ihk_mc_free_pages_user(to_memobj(free_obj)->pages[i], 1);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(to_memobj(free_obj)->pages);
|
||||
}
|
||||
|
||||
if (to_memobj(free_obj)->path) {
|
||||
dkprintf("%s: %s\n", __FUNCTION__, to_memobj(free_obj)->path);
|
||||
kfree(to_memobj(free_obj)->path);
|
||||
}
|
||||
|
||||
obj_list_remove(free_obj);
|
||||
mcs_lock_unlock_noirq(&fileobj_list_lock, &node);
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
if (free_sref) {
|
||||
int error;
|
||||
ihk_mc_user_context_t ctx;
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
|
||||
ihk_mc_syscall_arg1(&ctx) = free_handle;
|
||||
ihk_mc_syscall_arg2(&ctx) = free_sref;
|
||||
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("fileobj_release(%p %lx):"
|
||||
"release %ld failed. %d\n",
|
||||
obj, free_handle, free_sref, error);
|
||||
/* through */
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n",
|
||||
phys, __func__, phys, PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_sub(PAGE_SIZE,
|
||||
PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
dkprintf("fileobj_release(%p %lx):free %ld %p\n",
|
||||
obj, free_handle, free_sref, free_obj);
|
||||
/* Pre-mapped? */
|
||||
if (to_memobj(obj)->flags & MF_PREMAP) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < to_memobj(obj)->nr_pages; ++i) {
|
||||
if (to_memobj(obj)->pages[i]) {
|
||||
dkprintf("%s: pages[i]=%p\n", __func__, i,
|
||||
to_memobj(obj)->pages[i]);
|
||||
// Track change in fileobj->pages[] for MF_PREMAP pages
|
||||
// Note that page_unmap() isn't called for MF_PREMAP in
|
||||
// free_process_memory_range() --> ihk_mc_pt_free_range()
|
||||
dkprintf("%lx-,%s: memory_stat_rss_sub,phys=%lx,size=%ld,pgsize=%ld\n",
|
||||
virt_to_phys(to_memobj(obj)->pages[i]),
|
||||
__func__,
|
||||
virt_to_phys(to_memobj(obj)->pages[i]),
|
||||
PAGE_SIZE, PAGE_SIZE);
|
||||
rusage_memory_stat_mapped_file_sub(PAGE_SIZE,
|
||||
PAGE_SIZE);
|
||||
ihk_mc_free_pages_user(to_memobj(obj)->pages[i],
|
||||
1);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(to_memobj(obj)->pages);
|
||||
}
|
||||
|
||||
if (to_memobj(obj)->path) {
|
||||
dkprintf("%s: %s\n", __func__, to_memobj(obj)->path);
|
||||
kfree(to_memobj(obj)->path);
|
||||
}
|
||||
|
||||
/* linux side
|
||||
* sref is necessary because handle is used as key, so there could
|
||||
* be a new mckernel pager with the same handle being created as
|
||||
* this one is being destroyed
|
||||
*/
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE;
|
||||
ihk_mc_syscall_arg1(&ctx) = obj->handle;
|
||||
ihk_mc_syscall_arg2(&ctx) = obj->sref;
|
||||
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("%s(%p %lx): free failed. %d\n", __func__,
|
||||
obj, obj->handle, error);
|
||||
/* through */
|
||||
}
|
||||
|
||||
dkprintf("%s(%p %lx):free\n", __func__, obj, obj->handle);
|
||||
kfree(obj);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
struct pageio_args {
|
||||
@@ -582,7 +534,7 @@ static void fileobj_do_pageio(void *args0)
|
||||
out:
|
||||
mcs_lock_unlock_noirq(&obj->page_hash_locks[hash],
|
||||
&mcs_node);
|
||||
fileobj_release(&obj->memobj); /* got fileobj_get_page() */
|
||||
memobj_unref(&obj->memobj); /* got fileobj_get_page() */
|
||||
kfree(args0);
|
||||
dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize);
|
||||
return;
|
||||
@@ -695,9 +647,7 @@ static int fileobj_get_page(struct memobj *memobj, off_t off,
|
||||
page->mode = PM_WILL_PAGEIO;
|
||||
}
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
++obj->cref; /* for fileobj_do_pageio() */
|
||||
memobj_unlock(&obj->memobj);
|
||||
memobj_ref(&obj->memobj);
|
||||
|
||||
args->fileobj = obj;
|
||||
args->objoff = off;
|
||||
@@ -758,10 +708,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (memobj->flags & MF_HOST_RELEASED) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
page = phys_to_page(phys);
|
||||
if (!page) {
|
||||
kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n",
|
||||
@@ -769,8 +715,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
return 0;
|
||||
}
|
||||
|
||||
memobj_unlock(&obj->memobj);
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE;
|
||||
ihk_mc_syscall_arg1(&ctx) = obj->handle;
|
||||
ihk_mc_syscall_arg2(&ctx) = page->offset;
|
||||
@@ -785,7 +729,6 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
|
||||
/* through */
|
||||
}
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ enum {
|
||||
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
|
||||
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
|
||||
MF_SHM = 0x40000,
|
||||
MF_HOST_RELEASED = 0x80000000,
|
||||
MF_END
|
||||
};
|
||||
|
||||
@@ -56,7 +55,7 @@ struct memobj {
|
||||
uint32_t flags;
|
||||
uint32_t status;
|
||||
size_t size;
|
||||
ihk_spinlock_t lock;
|
||||
ihk_atomic_t refcnt;
|
||||
|
||||
/* For pre-mapped memobjects */
|
||||
void **pages;
|
||||
@@ -64,8 +63,7 @@ struct memobj {
|
||||
char *path;
|
||||
};
|
||||
|
||||
typedef void memobj_release_func_t(struct memobj *obj);
|
||||
typedef void memobj_ref_func_t(struct memobj *obj);
|
||||
typedef void memobj_free_func_t(struct memobj *obj);
|
||||
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag, uintptr_t virt_addr);
|
||||
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
|
||||
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
@@ -73,26 +71,23 @@ typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, si
|
||||
typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
|
||||
|
||||
struct memobj_ops {
|
||||
memobj_release_func_t * release;
|
||||
memobj_ref_func_t * ref;
|
||||
memobj_get_page_func_t * get_page;
|
||||
memobj_copy_page_func_t * copy_page;
|
||||
memobj_flush_page_func_t * flush_page;
|
||||
memobj_invalidate_page_func_t * invalidate_page;
|
||||
memobj_lookup_page_func_t * lookup_page;
|
||||
memobj_free_func_t *free;
|
||||
memobj_get_page_func_t *get_page;
|
||||
memobj_copy_page_func_t *copy_page;
|
||||
memobj_flush_page_func_t *flush_page;
|
||||
memobj_invalidate_page_func_t *invalidate_page;
|
||||
memobj_lookup_page_func_t *lookup_page;
|
||||
};
|
||||
|
||||
static inline void memobj_release(struct memobj *obj)
|
||||
static inline int memobj_ref(struct memobj *obj)
|
||||
{
|
||||
if (obj->ops->release) {
|
||||
(*obj->ops->release)(obj);
|
||||
}
|
||||
return ihk_atomic_inc_return(&obj->refcnt);
|
||||
}
|
||||
|
||||
static inline void memobj_ref(struct memobj *obj)
|
||||
static inline void memobj_unref(struct memobj *obj)
|
||||
{
|
||||
if (obj->ops->ref) {
|
||||
(*obj->ops->ref)(obj);
|
||||
if (ihk_atomic_dec_return(&obj->refcnt) == 0) {
|
||||
(*obj->ops->free)(obj);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,16 +135,6 @@ static inline int memobj_lookup_page(struct memobj *obj, off_t off,
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static inline void memobj_lock(struct memobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&obj->lock);
|
||||
}
|
||||
|
||||
static inline void memobj_unlock(struct memobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(&obj->lock);
|
||||
}
|
||||
|
||||
static inline int memobj_has_pager(struct memobj *obj)
|
||||
{
|
||||
return !!(obj->flags & MF_HAS_PAGER);
|
||||
|
||||
@@ -57,6 +57,7 @@ struct shmobj {
|
||||
struct shmlock_user * user;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
ihk_spinlock_t page_list_lock;
|
||||
struct list_head chain; /* shmobj_list */
|
||||
};
|
||||
|
||||
@@ -104,7 +105,6 @@ static inline void shmlock_users_unlock(void)
|
||||
void shmobj_list_lock(void);
|
||||
void shmobj_list_unlock(void);
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
|
||||
void shmobj_destroy(struct shmobj *obj);
|
||||
void shmlock_user_free(struct shmlock_user *user);
|
||||
int shmlock_user_get(uid_t ruid, struct shmlock_user **userp);
|
||||
|
||||
|
||||
@@ -882,7 +882,7 @@ int join_process_memory_range(struct process_vm *vm,
|
||||
surviving->end = merging->end;
|
||||
|
||||
if (merging->memobj) {
|
||||
memobj_release(merging->memobj);
|
||||
memobj_unref(merging->memobj);
|
||||
}
|
||||
rb_erase(&merging->vm_rb_node, &vm->vm_range_tree);
|
||||
for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) {
|
||||
@@ -955,13 +955,13 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
if (range->memobj) {
|
||||
memobj_lock(range->memobj);
|
||||
memobj_ref(range->memobj);
|
||||
}
|
||||
error = ihk_mc_pt_free_range(vm->address_space->page_table, vm,
|
||||
(void *)start, (void *)end,
|
||||
(range->flag & VR_PRIVATE)? NULL: range->memobj);
|
||||
if (range->memobj) {
|
||||
memobj_unlock(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error && (error != -ENOENT)) {
|
||||
@@ -988,7 +988,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
||||
}
|
||||
|
||||
if (range->memobj) {
|
||||
memobj_release(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
}
|
||||
|
||||
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
|
||||
@@ -1520,7 +1520,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx)\n",
|
||||
vm, range, start, end, off);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
memobj_ref(range->memobj);
|
||||
|
||||
args.start = start;
|
||||
args.off = off;
|
||||
@@ -1545,7 +1545,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
memobj_unlock(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
dkprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):%d\n",
|
||||
vm, range, start, end, off, error);
|
||||
@@ -1610,7 +1610,7 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
|
||||
if (!(range->memobj->flags & MF_ZEROFILL)) {
|
||||
memobj_lock(range->memobj);
|
||||
memobj_ref(range->memobj);
|
||||
}
|
||||
|
||||
error = visit_pte_range(vm->address_space->page_table, (void *)start,
|
||||
@@ -1618,7 +1618,7 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
||||
&sync_one_page, &args);
|
||||
|
||||
if (!(range->memobj->flags & MF_ZEROFILL)) {
|
||||
memobj_unlock(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
@@ -1700,11 +1700,11 @@ int invalidate_process_memory_range(struct process_vm *vm,
|
||||
args.range = range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
memobj_lock(range->memobj);
|
||||
memobj_ref(range->memobj);
|
||||
error = visit_pte_range(vm->address_space->page_table, (void *)start,
|
||||
(void *)end, range->pgshift, VPTEF_SKIP_NULL,
|
||||
&invalidate_one_page, &args);
|
||||
memobj_unlock(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
if (error) {
|
||||
ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):"
|
||||
|
||||
@@ -29,15 +29,13 @@
|
||||
static LIST_HEAD(shmobj_list_head);
|
||||
static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED;
|
||||
|
||||
static memobj_release_func_t shmobj_release;
|
||||
static memobj_ref_func_t shmobj_ref;
|
||||
static memobj_free_func_t shmobj_free;
|
||||
static memobj_get_page_func_t shmobj_get_page;
|
||||
static memobj_invalidate_page_func_t shmobj_invalidate_page;
|
||||
static memobj_lookup_page_func_t shmobj_lookup_page;
|
||||
|
||||
static struct memobj_ops shmobj_ops = {
|
||||
.release = &shmobj_release,
|
||||
.ref = &shmobj_ref,
|
||||
.free = &shmobj_free,
|
||||
.get_page = &shmobj_get_page,
|
||||
.invalidate_page = &shmobj_invalidate_page,
|
||||
.lookup_page = &shmobj_lookup_page,
|
||||
@@ -59,9 +57,20 @@ static struct memobj *to_memobj(struct shmobj *shmobj)
|
||||
static void page_list_init(struct shmobj *obj)
|
||||
{
|
||||
INIT_LIST_HEAD(&obj->page_list);
|
||||
ihk_mc_spinlock_init(&obj->page_list_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static void page_list_lock(struct shmobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&obj->page_list_lock);
|
||||
}
|
||||
|
||||
static void page_list_unlock(struct shmobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock);
|
||||
}
|
||||
|
||||
static void page_list_insert(struct shmobj *obj, struct page *page)
|
||||
{
|
||||
list_add(&page->list, &obj->page_list);
|
||||
@@ -180,15 +189,14 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
|
||||
obj->memobj.ops = &shmobj_ops;
|
||||
obj->memobj.flags = MF_SHM;
|
||||
obj->memobj.size = ds->shm_segsz;
|
||||
ihk_atomic_set(&obj->memobj.refcnt, 1);
|
||||
obj->ds = *ds;
|
||||
obj->ds.shm_perm.seq = the_seq++;
|
||||
obj->ds.shm_nattch = 1;
|
||||
obj->ds.init_pgshift = 0;
|
||||
obj->index = -1;
|
||||
obj->pgshift = pgshift;
|
||||
obj->real_segsz = (obj->ds.shm_segsz + pgsize - 1) & ~(pgsize - 1);
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
|
||||
error = 0;
|
||||
*objp = to_memobj(obj);
|
||||
@@ -216,7 +224,7 @@ int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp)
|
||||
return error;
|
||||
}
|
||||
|
||||
void shmobj_destroy(struct shmobj *obj)
|
||||
static void shmobj_destroy(struct shmobj *obj)
|
||||
{
|
||||
extern struct shm_info the_shm_info;
|
||||
extern struct list_head kds_free_list;
|
||||
@@ -244,6 +252,7 @@ void shmobj_destroy(struct shmobj *obj)
|
||||
void *page_va;
|
||||
uintptr_t phys;
|
||||
|
||||
/* no lock required as obj is inaccessible */
|
||||
page = page_list_first(obj);
|
||||
if (!page) {
|
||||
break;
|
||||
@@ -314,61 +323,22 @@ void shmobj_destroy(struct shmobj *obj)
|
||||
return;
|
||||
}
|
||||
|
||||
static void shmobj_release(struct memobj *memobj)
|
||||
static void shmobj_free(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct process *proc = thread->proc;
|
||||
struct shmobj *freeobj = NULL;
|
||||
long newref;
|
||||
extern time_t time(void);
|
||||
|
||||
dkprintf("shmobj_release(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
if (obj->index >= 0) {
|
||||
obj->ds.shm_dtime = time();
|
||||
obj->ds.shm_lpid = proc->pid;
|
||||
dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch);
|
||||
}
|
||||
newref = --obj->ds.shm_nattch;
|
||||
if (newref <= 0) {
|
||||
if (newref < 0) {
|
||||
ekprintf("shmobj_release(%p):ref %ld\n",
|
||||
memobj, newref);
|
||||
panic("shmobj_release:freeing free shmobj");
|
||||
}
|
||||
if (obj->ds.shm_perm.mode & SHM_DEST) {
|
||||
freeobj = obj;
|
||||
}
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
dkprintf("%s(%p)\n", __func__, memobj);
|
||||
|
||||
if (freeobj) {
|
||||
shmobj_list_lock();
|
||||
shmobj_destroy(freeobj);
|
||||
shmobj_list_unlock();
|
||||
shmobj_list_lock();
|
||||
if (!(obj->ds.shm_perm.mode & SHM_DEST)) {
|
||||
ekprintf("%s called without going through rmid?", __func__);
|
||||
}
|
||||
dkprintf("shmobj_release(%p): %ld\n", memobj, newref);
|
||||
return;
|
||||
}
|
||||
|
||||
static void shmobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct shmobj *obj = to_shmobj(memobj);
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct process *proc = thread->proc;
|
||||
long newref;
|
||||
extern time_t time(void);
|
||||
shmobj_destroy(obj);
|
||||
shmobj_list_unlock();
|
||||
|
||||
dkprintf("shmobj_ref(%p)\n", memobj);
|
||||
memobj_lock(&obj->memobj);
|
||||
newref = ++obj->ds.shm_nattch;
|
||||
if (obj->index >= 0) {
|
||||
obj->ds.shm_atime = time();
|
||||
obj->ds.shm_lpid = proc->pid;
|
||||
}
|
||||
memobj_unlock(&obj->memobj);
|
||||
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
|
||||
dkprintf("%s(%p)\n", __func__, memobj);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -384,7 +354,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
|
||||
dkprintf("shmobj_get_page(%p,%#lx,%d,%p)\n",
|
||||
memobj, off, p2align, physp);
|
||||
memobj_lock(&obj->memobj);
|
||||
memobj_ref(memobj);
|
||||
if (off & ~PAGE_MASK) {
|
||||
error = -EINVAL;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):invalid argument. %d\n",
|
||||
@@ -410,12 +380,14 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
goto out;
|
||||
}
|
||||
|
||||
page_list_lock(obj);
|
||||
page = page_list_lookup(obj, off);
|
||||
if (!page) {
|
||||
npages = 1 << p2align;
|
||||
virt = ihk_mc_alloc_aligned_pages_user(npages, p2align,
|
||||
IHK_MC_AP_NOWAIT, virt_addr);
|
||||
if (!virt) {
|
||||
page_list_unlock(obj);
|
||||
error = -ENOMEM;
|
||||
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):"
|
||||
"alloc failed. %d\n",
|
||||
@@ -445,6 +417,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
dkprintf("shmobj_get_page(%p,%#lx,%d,%p):alloc page. %p %#lx\n",
|
||||
memobj, off, p2align, physp, page, phys);
|
||||
}
|
||||
page_list_unlock(obj);
|
||||
|
||||
ihk_atomic_inc(&page->count);
|
||||
|
||||
@@ -452,7 +425,7 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
|
||||
*physp = page_to_phys(page);
|
||||
|
||||
out:
|
||||
memobj_unlock(&obj->memobj);
|
||||
memobj_unref(memobj);
|
||||
if (virt) {
|
||||
ihk_mc_free_pages_user(virt, npages);
|
||||
}
|
||||
@@ -470,11 +443,14 @@ static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
|
||||
|
||||
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize);
|
||||
|
||||
page_list_lock(obj);
|
||||
if (!(page = phys_to_page(phys))
|
||||
|| !(page = page_list_lookup(obj, page->offset))) {
|
||||
page_list_unlock(obj);
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
page_list_unlock(obj);
|
||||
|
||||
if (ihk_atomic_read(&page->count) == 1) {
|
||||
if (page_unmap(page)) {
|
||||
@@ -503,7 +479,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align,
|
||||
|
||||
dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p)\n",
|
||||
memobj, off, p2align, physp);
|
||||
memobj_lock(&obj->memobj);
|
||||
memobj_ref(&obj->memobj);
|
||||
if (off & ~PAGE_MASK) {
|
||||
error = -EINVAL;
|
||||
ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):invalid argument. %d\n",
|
||||
@@ -529,7 +505,9 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align,
|
||||
goto out;
|
||||
}
|
||||
|
||||
page_list_lock(obj);
|
||||
page = page_list_lookup(obj, off);
|
||||
page_list_unlock(obj);
|
||||
if (!page) {
|
||||
error = -ENOENT;
|
||||
dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p):page not found. %d\n",
|
||||
@@ -544,7 +522,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align,
|
||||
}
|
||||
|
||||
out:
|
||||
memobj_unlock(&obj->memobj);
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p):%d %#lx\n",
|
||||
memobj, off, p2align, physp, error, phys);
|
||||
return error;
|
||||
|
||||
152
kernel/syscall.c
152
kernel/syscall.c
@@ -1679,13 +1679,11 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
populate_len = len;
|
||||
|
||||
if (!(flags & MAP_ANONYMOUS)) {
|
||||
memobj_lock(memobj);
|
||||
if (memobj->status == MEMOBJ_TO_BE_PREFETCHED) {
|
||||
memobj->status = MEMOBJ_READY;
|
||||
if (atomic_cmpxchg4(&memobj->status, MEMOBJ_TO_BE_PREFETCHED,
|
||||
MEMOBJ_READY)) {
|
||||
populated_mapping = 1;
|
||||
populate_len = memobj->size;
|
||||
}
|
||||
memobj_unlock(memobj);
|
||||
|
||||
/* Update PTEs for pre-mapped memory object */
|
||||
if ((memobj->flags & MF_PREMAP) &&
|
||||
@@ -1763,7 +1761,7 @@ out:
|
||||
ihk_mc_free_pages_user(p, npages);
|
||||
}
|
||||
if (memobj) {
|
||||
memobj_release(memobj);
|
||||
memobj_unref(memobj);
|
||||
}
|
||||
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
|
||||
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
|
||||
@@ -4796,6 +4794,7 @@ int shmobj_list_lookup(int shmid, struct shmobj **objp)
|
||||
return -EIDRM;
|
||||
}
|
||||
|
||||
memobj_ref(&obj->memobj);
|
||||
*objp = obj;
|
||||
return 0;
|
||||
} /* shmobj_list_lookup() */
|
||||
@@ -4814,6 +4813,7 @@ int shmobj_list_lookup_by_key(key_t key, struct shmobj **objp)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memobj_ref(&obj->memobj);
|
||||
*objp = obj;
|
||||
return 0;
|
||||
} /* shmobj_list_lookup_by_key() */
|
||||
@@ -4831,6 +4831,7 @@ int shmobj_list_lookup_by_index(int index, struct shmobj **objp)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memobj_ref(&obj->memobj);
|
||||
*objp = obj;
|
||||
return 0;
|
||||
} /* shmobj_list_lookup_by_index() */
|
||||
@@ -4872,6 +4873,7 @@ int do_shmget(const key_t key, const size_t size, const int shmflg)
|
||||
}
|
||||
if (obj && (shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("do_shmget(%#lx,%#lx,%#x): -EEXIST\n", key, size, shmflg);
|
||||
return -EEXIST;
|
||||
}
|
||||
@@ -4898,12 +4900,14 @@ int do_shmget(const key_t key, const size_t size, const int shmflg)
|
||||
}
|
||||
if (req & ~obj->ds.shm_perm.mode) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("do_shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg);
|
||||
return -EACCES;
|
||||
}
|
||||
}
|
||||
if (obj->ds.shm_segsz < size) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("do_shmget(%#lx,%#lx,%#x): -EINVAL\n", key, size, shmflg);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -4950,7 +4954,6 @@ int do_shmget(const key_t key, const size_t size, const int shmflg)
|
||||
|
||||
shmid = make_shmid(obj);
|
||||
shmobj_list_unlock();
|
||||
memobj_release(&obj->memobj);
|
||||
|
||||
dkprintf("do_shmget(%#lx,%#lx,%#x): %d\n", key, size, shmflg, shmid);
|
||||
return shmid;
|
||||
@@ -4986,6 +4989,7 @@ SYSCALL_DECLARE(shmat)
|
||||
pgsize = (size_t)1 << obj->pgshift;
|
||||
if (shmaddr && ((uintptr_t)shmaddr & (pgsize - 1)) && !(shmflg & SHM_RND)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -5015,6 +5019,7 @@ SYSCALL_DECLARE(shmat)
|
||||
}
|
||||
if (~obj->ds.shm_perm.mode & req) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg);
|
||||
return -EACCES;
|
||||
}
|
||||
@@ -5025,6 +5030,7 @@ SYSCALL_DECLARE(shmat)
|
||||
if (lookup_process_memory_range(vm, addr, addr+len)) {
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmat(%#x,%p,%#x):lookup_process_memory_range succeeded. -ENOMEM\n", shmid, shmaddr, shmflg);
|
||||
return -ENOMEM;
|
||||
}
|
||||
@@ -5034,6 +5040,7 @@ SYSCALL_DECLARE(shmat)
|
||||
if (error) {
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmat(%#x,%p,%#x):search_free_space failed. %d\n", shmid, shmaddr, shmflg, error);
|
||||
return error;
|
||||
}
|
||||
@@ -5049,20 +5056,19 @@ SYSCALL_DECLARE(shmat)
|
||||
if (error) {
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmat(%#x,%p,%#x):set_host_vma failed. %d\n", shmid, shmaddr, shmflg, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
memobj_ref(&obj->memobj);
|
||||
|
||||
error = add_process_memory_range(vm, addr, addr+len, -1,
|
||||
vrflags, &obj->memobj, 0, obj->pgshift, NULL);
|
||||
if (error) {
|
||||
if (!(prot & PROT_WRITE)) {
|
||||
(void)set_host_vma(addr, len, PROT_READ | PROT_WRITE | PROT_EXEC, 1/* holding memory_range_lock */);
|
||||
}
|
||||
memobj_release(&obj->memobj);
|
||||
memobj_unref(&obj->memobj);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmat(%#x,%p,%#x):add_process_memory_range failed. %d\n", shmid, shmaddr, shmflg, error);
|
||||
@@ -5072,7 +5078,6 @@ SYSCALL_DECLARE(shmat)
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
shmobj_list_unlock();
|
||||
|
||||
dkprintf("shmat:bump shm_nattach %p %d\n", obj, obj->ds.shm_nattch);
|
||||
dkprintf("shmat(%#x,%p,%#x): 0x%lx. %d\n", shmid, shmaddr, shmflg, addr);
|
||||
return addr;
|
||||
} /* sys_shmat() */
|
||||
@@ -5094,10 +5099,11 @@ SYSCALL_DECLARE(shmctl)
|
||||
size_t size;
|
||||
struct shmlock_user *user;
|
||||
uid_t ruid = proc->ruid;
|
||||
uint16_t oldmode;
|
||||
|
||||
dkprintf("shmctl(%#x,%d,%p)\n", shmid, cmd, buf);
|
||||
if (0) ;
|
||||
else if (cmd == IPC_RMID) {
|
||||
switch (cmd) {
|
||||
case IPC_RMID:
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
if (error) {
|
||||
@@ -5109,19 +5115,21 @@ SYSCALL_DECLARE(shmctl)
|
||||
&& (obj->ds.shm_perm.uid != proc->euid)
|
||||
&& (obj->ds.shm_perm.cuid != proc->euid)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf);
|
||||
return -EPERM;
|
||||
}
|
||||
oldmode = obj->ds.shm_perm.mode;
|
||||
obj->ds.shm_perm.mode |= SHM_DEST;
|
||||
if (obj->ds.shm_nattch <= 0) {
|
||||
shmobj_destroy(obj);
|
||||
}
|
||||
shmobj_list_unlock();
|
||||
// unref twice if this is the first time rmid is called
|
||||
if (!(oldmode & SHM_DEST))
|
||||
memobj_unref(&obj->memobj);
|
||||
memobj_unref(&obj->memobj);
|
||||
|
||||
dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf);
|
||||
return 0;
|
||||
}
|
||||
else if (cmd == IPC_SET) {
|
||||
case IPC_SET:
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
if (error) {
|
||||
@@ -5132,12 +5140,14 @@ SYSCALL_DECLARE(shmctl)
|
||||
if ((obj->ds.shm_perm.uid != proc->euid)
|
||||
&& (obj->ds.shm_perm.cuid != proc->euid)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): -EPERM\n", shmid, cmd, buf);
|
||||
return -EPERM;
|
||||
}
|
||||
error = copy_from_user(&ads, buf, sizeof(ads));
|
||||
if (error) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error);
|
||||
return error;
|
||||
}
|
||||
@@ -5148,48 +5158,66 @@ SYSCALL_DECLARE(shmctl)
|
||||
obj->ds.shm_ctime = now;
|
||||
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf);
|
||||
return 0;
|
||||
}
|
||||
else if (cmd == IPC_STAT) {
|
||||
case IPC_STAT:
|
||||
case SHM_STAT:
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
if (cmd == IPC_STAT) {
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
} else { // SHM_STAT
|
||||
error = shmobj_list_lookup_by_index(shmid, &obj);
|
||||
}
|
||||
if (error) {
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error);
|
||||
return error;
|
||||
}
|
||||
if (!proc->euid) {
|
||||
req = 0;
|
||||
|
||||
if (cmd == IPC_STAT) {
|
||||
if (!proc->euid) {
|
||||
req = 0;
|
||||
} else if ((proc->euid == obj->ds.shm_perm.uid) ||
|
||||
(proc->euid == obj->ds.shm_perm.cuid)) {
|
||||
req = 0400;
|
||||
} else if ((proc->egid == obj->ds.shm_perm.gid) ||
|
||||
(proc->egid == obj->ds.shm_perm.cgid)) {
|
||||
req = 0040;
|
||||
} else {
|
||||
req = 0004;
|
||||
}
|
||||
if (req & ~obj->ds.shm_perm.mode) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid,
|
||||
cmd, buf);
|
||||
return -EACCES;
|
||||
}
|
||||
}
|
||||
else if ((proc->euid == obj->ds.shm_perm.uid)
|
||||
|| (proc->euid == obj->ds.shm_perm.cuid)) {
|
||||
req = 0400;
|
||||
}
|
||||
else if ((proc->egid == obj->ds.shm_perm.gid)
|
||||
|| (proc->egid == obj->ds.shm_perm.cgid)) {
|
||||
req = 0040;
|
||||
}
|
||||
else {
|
||||
req = 0004;
|
||||
}
|
||||
if (req & ~obj->ds.shm_perm.mode) {
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): -EACCES\n", shmid, cmd, buf);
|
||||
return -EACCES;
|
||||
|
||||
/* This could potentially be higher than required if some other
|
||||
* thread holds a ref at this point.
|
||||
* Minus one here is because we hold a ref...
|
||||
*/
|
||||
obj->ds.shm_nattch = ihk_atomic_read(&obj->memobj.refcnt) - 1;
|
||||
/* ... And one for sentinel unless RMID has been called */
|
||||
if (!(obj->ds.shm_perm.mode & SHM_DEST)) {
|
||||
obj->ds.shm_nattch--;
|
||||
}
|
||||
|
||||
error = copy_to_user(buf, &obj->ds, sizeof(*buf));
|
||||
if (error) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf);
|
||||
return 0;
|
||||
}
|
||||
else if (cmd == IPC_INFO) {
|
||||
case IPC_INFO:
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
if (error) {
|
||||
@@ -5200,6 +5228,7 @@ SYSCALL_DECLARE(shmctl)
|
||||
error = copy_to_user(buf, &the_shminfo, sizeof(the_shminfo));
|
||||
if (error) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error);
|
||||
return error;
|
||||
}
|
||||
@@ -5209,10 +5238,10 @@ SYSCALL_DECLARE(shmctl)
|
||||
maxi = 0;
|
||||
}
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi);
|
||||
return maxi;
|
||||
}
|
||||
else if (cmd == SHM_LOCK) {
|
||||
case SHM_LOCK:
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
if (error) {
|
||||
@@ -5224,12 +5253,14 @@ SYSCALL_DECLARE(shmctl)
|
||||
&& (obj->ds.shm_perm.cuid != proc->euid)
|
||||
&& (obj->ds.shm_perm.uid != proc->euid)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): perm shm: %d\n", shmid, cmd, buf, error);
|
||||
return -EPERM;
|
||||
}
|
||||
rlim = &proc->rlimit[MCK_RLIMIT_MEMLOCK];
|
||||
if (!rlim->rlim_cur && !has_cap_ipc_lock(thread)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): perm proc: %d\n", shmid, cmd, buf, error);
|
||||
return -EPERM;
|
||||
}
|
||||
@@ -5240,6 +5271,7 @@ SYSCALL_DECLARE(shmctl)
|
||||
error = shmlock_user_get(ruid, &user);
|
||||
if (error) {
|
||||
shmlock_users_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
shmobj_list_unlock();
|
||||
ekprintf("shmctl(%#x,%d,%p): user lookup: %d\n", shmid, cmd, buf, error);
|
||||
return -ENOMEM;
|
||||
@@ -5250,6 +5282,7 @@ SYSCALL_DECLARE(shmctl)
|
||||
&& ((rlim->rlim_cur < user->locked)
|
||||
|| ((rlim->rlim_cur - user->locked) < size))) {
|
||||
shmlock_users_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): too large: %d\n", shmid, cmd, buf, error);
|
||||
return -ENOMEM;
|
||||
@@ -5260,11 +5293,11 @@ SYSCALL_DECLARE(shmctl)
|
||||
shmlock_users_unlock();
|
||||
}
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
|
||||
dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf);
|
||||
return 0;
|
||||
}
|
||||
else if (cmd == SHM_UNLOCK) {
|
||||
case SHM_UNLOCK:
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup(shmid, &obj);
|
||||
if (error) {
|
||||
@@ -5276,6 +5309,7 @@ SYSCALL_DECLARE(shmctl)
|
||||
&& (obj->ds.shm_perm.cuid != proc->euid)
|
||||
&& (obj->ds.shm_perm.uid != proc->euid)) {
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): perm shm: %d\n", shmid, cmd, buf, error);
|
||||
return -EPERM;
|
||||
}
|
||||
@@ -5294,28 +5328,10 @@ SYSCALL_DECLARE(shmctl)
|
||||
obj->ds.shm_perm.mode &= ~SHM_LOCKED;
|
||||
}
|
||||
shmobj_list_unlock();
|
||||
memobj_unref(&obj->memobj);
|
||||
dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf);
|
||||
return 0;
|
||||
}
|
||||
else if (cmd == SHM_STAT) {
|
||||
shmobj_list_lock();
|
||||
error = shmobj_list_lookup_by_index(shmid, &obj);
|
||||
if (error) {
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): lookup: %d\n", shmid, cmd, buf, error);
|
||||
return error;
|
||||
}
|
||||
error = copy_to_user(buf, &obj->ds, sizeof(*buf));
|
||||
if (error) {
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, error);
|
||||
return error;
|
||||
}
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): 0\n", shmid, cmd, buf);
|
||||
return 0;
|
||||
}
|
||||
else if (cmd == SHM_INFO) {
|
||||
case SHM_INFO:
|
||||
shmobj_list_lock();
|
||||
error = copy_to_user(buf, &the_shm_info, sizeof(the_shm_info));
|
||||
if (error) {
|
||||
@@ -5331,10 +5347,10 @@ SYSCALL_DECLARE(shmctl)
|
||||
shmobj_list_unlock();
|
||||
dkprintf("shmctl(%#x,%d,%p): %d\n", shmid, cmd, buf, maxi);
|
||||
return maxi;
|
||||
default:
|
||||
dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dkprintf("shmctl(%#x,%d,%p): EINVAL\n", shmid, cmd, buf);
|
||||
return -EINVAL;
|
||||
} /* sys_shmctl() */
|
||||
|
||||
SYSCALL_DECLARE(shmdt)
|
||||
@@ -7962,7 +7978,7 @@ SYSCALL_DECLARE(mremap)
|
||||
oldaddr, oldsize0, newsize0, flags,
|
||||
newaddr, error);
|
||||
if (range->memobj) {
|
||||
memobj_release(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1399,7 +1399,7 @@ static int xpmem_free_process_memory_range(
|
||||
}
|
||||
|
||||
if (range->memobj) {
|
||||
memobj_release(range->memobj);
|
||||
memobj_unref(range->memobj);
|
||||
}
|
||||
|
||||
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
|
||||
@@ -1732,7 +1732,8 @@ int xpmem_remove_process_memory_range(
|
||||
|
||||
remaining_vmr->private_data = NULL;
|
||||
/* This function is always followed by xpmem_free_process_memory_range()
|
||||
which in turn calls memobj_release() */
|
||||
* which in turn calls memobj_put()
|
||||
*/
|
||||
remaining_vaddr = att->at_vaddr;
|
||||
}
|
||||
|
||||
@@ -1755,7 +1756,8 @@ int xpmem_remove_process_memory_range(
|
||||
|
||||
vmr->private_data = NULL;
|
||||
/* This function is always followed by [xpmem_]free_process_memory_range()
|
||||
which in turn calls memobj_release() */
|
||||
* which in turn calls memobj_put()
|
||||
*/
|
||||
|
||||
out:
|
||||
mcs_rwlock_writer_unlock(&att->at_lock, &at_lock);
|
||||
|
||||
@@ -32,9 +32,11 @@ static ihk_spinlock_t the_zeroobj_lock = SPIN_LOCK_UNLOCKED;
|
||||
static struct zeroobj *the_zeroobj = NULL; /* singleton */
|
||||
|
||||
static memobj_get_page_func_t zeroobj_get_page;
|
||||
static memobj_free_func_t zeroobj_free;
|
||||
|
||||
static struct memobj_ops zeroobj_ops = {
|
||||
.get_page = &zeroobj_get_page,
|
||||
.free = &zeroobj_free,
|
||||
};
|
||||
|
||||
static struct zeroobj *to_zeroobj(struct memobj *memobj)
|
||||
@@ -74,6 +76,12 @@ static struct page *page_list_first(struct zeroobj *obj)
|
||||
/***********************************************************************
|
||||
* zeroobj
|
||||
*/
|
||||
|
||||
static void zeroobj_free(struct memobj *obj)
|
||||
{
|
||||
kprintf("trying to free zeroobj, this should never happen\n");
|
||||
}
|
||||
|
||||
static int alloc_zeroobj(void)
|
||||
{
|
||||
int error;
|
||||
@@ -101,8 +109,8 @@ static int alloc_zeroobj(void)
|
||||
obj->memobj.ops = &zeroobj_ops;
|
||||
obj->memobj.flags = MF_ZEROOBJ;
|
||||
obj->memobj.size = 0;
|
||||
ihk_atomic_set(&obj->memobj.refcnt, 2); // never reaches 0
|
||||
page_list_init(obj);
|
||||
ihk_mc_spinlock_init(&obj->memobj.lock);
|
||||
|
||||
virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); /* XXX:NYI:large page */
|
||||
if (!virt) {
|
||||
@@ -159,6 +167,7 @@ int zeroobj_create(struct memobj **objp)
|
||||
|
||||
error = 0;
|
||||
*objp = to_memobj(the_zeroobj);
|
||||
memobj_ref(*objp);
|
||||
|
||||
out:
|
||||
dkprintf("zeroobj_create(%p):%d %p\n", objp, error, *objp);
|
||||
|
||||
Reference in New Issue
Block a user