xpmem: porting xpmem v2.6.3
implement xpmem_get, xpmem_release, xpmem_attach, xpmem_detach
This commit is contained in:
@@ -1101,7 +1101,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
|||||||
page = phys_to_page(phys);
|
page = phys_to_page(phys);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) &&
|
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY) && (args->memobj) &&
|
||||||
!(args->memobj->flags & MF_ZEROFILL)) {
|
!(args->memobj->flags & MF_ZEROFILL)) {
|
||||||
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
|
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2636,6 +2636,24 @@ return_execve2:
|
|||||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case __NR_stat:
|
||||||
|
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
|
||||||
|
if (ret >= PATH_MAX) {
|
||||||
|
ret = -ENAMETOOLONG;
|
||||||
|
}
|
||||||
|
if (ret < 0) {
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn = chgpath(pathbuf, tmpbuf);
|
||||||
|
|
||||||
|
ret = stat(fn, (struct stat *)w.sr.args[1]);
|
||||||
|
__dprintf("stat: path=%s, ret=%ld\n", fn, ret);
|
||||||
|
SET_ERR(ret);
|
||||||
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ret = do_generic_syscall(&w);
|
ret = do_generic_syscall(&w);
|
||||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||||
|
|||||||
@@ -389,6 +389,7 @@ struct vm_range {
|
|||||||
off_t objoff;
|
off_t objoff;
|
||||||
int pgshift; /* page size. 0 means THP */
|
int pgshift; /* page size. 0 means THP */
|
||||||
int padding;
|
int padding;
|
||||||
|
void *private_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vm_range_numa_policy {
|
struct vm_range_numa_policy {
|
||||||
|
|||||||
@@ -11,11 +11,16 @@
|
|||||||
#ifndef _XPMEM_H
|
#ifndef _XPMEM_H
|
||||||
#define _XPMEM_H
|
#define _XPMEM_H
|
||||||
|
|
||||||
|
#include <process.h>
|
||||||
#include <ihk/context.h>
|
#include <ihk/context.h>
|
||||||
|
|
||||||
#define XPMEM_DEV_PATH "/dev/xpmem"
|
#define XPMEM_DEV_PATH "/dev/xpmem"
|
||||||
|
|
||||||
extern int xpmem_open(ihk_mc_user_context_t *ctx);
|
extern int xpmem_open(ihk_mc_user_context_t *ctx);
|
||||||
|
extern int xpmem_remove_process_memory_range(struct process_vm *vm,
|
||||||
|
struct vm_range *vmr);
|
||||||
|
extern int xpmem_fault_process_memory_range(struct process_vm *vm,
|
||||||
|
struct vm_range *vmr, unsigned long vaddr, uint64_t reason);
|
||||||
|
|
||||||
#endif /* _XPMEM_H */
|
#endif /* _XPMEM_H */
|
||||||
|
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ static inline int xpmem_ap_hashtable_index(xpmem_apid_t apid)
|
|||||||
|
|
||||||
index = ((xpmem_id_t *)&apid)->xpmem_id.uniq % XPMEM_AP_HASHTABLE_SIZE;
|
index = ((xpmem_id_t *)&apid)->xpmem_id.uniq % XPMEM_AP_HASHTABLE_SIZE;
|
||||||
|
|
||||||
XPMEM_DEBUG("return: apid=%lu, index=%d", apid, index);
|
XPMEM_DEBUG("return: apid=0x%lx, index=%d", apid, index);
|
||||||
|
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
@@ -174,22 +174,20 @@ struct xpmem_thread_group {
|
|||||||
uid_t uid; /* tg's uid */
|
uid_t uid; /* tg's uid */
|
||||||
gid_t gid; /* tg's gid */
|
gid_t gid; /* tg's gid */
|
||||||
volatile int flags; /* tg attributes and state */
|
volatile int flags; /* tg attributes and state */
|
||||||
ihk_atomic_t uniq_segid;
|
ihk_atomic_t uniq_segid; /* segid uniq */
|
||||||
ihk_atomic_t uniq_apid;
|
ihk_atomic_t uniq_apid; /* apid uniq */
|
||||||
mcs_rwlock_lock_t seg_list_lock;
|
mcs_rwlock_lock_t seg_list_lock; /* tg's list of segs lock */
|
||||||
struct list_head seg_list; /* tg's list of segs */
|
struct list_head seg_list; /* tg's list of segs */
|
||||||
ihk_atomic_t refcnt; /* references to tg */
|
ihk_atomic_t refcnt; /* references to tg */
|
||||||
ihk_atomic_t n_pinned; /* #of pages pinned by this tg */
|
ihk_atomic_t n_pinned; /* #of pages pinned by this tg */
|
||||||
struct list_head tg_hashlist; /* tg hash list */
|
struct list_head tg_hashlist; /* tg hash list */
|
||||||
struct thread *group_leader; /* thread group leader */
|
struct thread *group_leader; /* thread group leader */
|
||||||
struct process_vm *vm; /* tg's mm */
|
struct process_vm *vm; /* tg's process_vm */
|
||||||
ihk_atomic_t n_recall_PFNs; /* #of recall of PFNs in progress */
|
|
||||||
struct xpmem_hashlist ap_hashtable[]; /* locks + ap hash lists */
|
struct xpmem_hashlist ap_hashtable[]; /* locks + ap hash lists */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct xpmem_segment {
|
struct xpmem_segment {
|
||||||
ihk_spinlock_t lock; /* seg lock */
|
ihk_spinlock_t lock; /* seg lock */
|
||||||
mcs_rwlock_lock_t seg_lock; /* seg sema */
|
|
||||||
xpmem_segid_t segid; /* unique segid */
|
xpmem_segid_t segid; /* unique segid */
|
||||||
unsigned long vaddr; /* starting address */
|
unsigned long vaddr; /* starting address */
|
||||||
size_t size; /* size of seg */
|
size_t size; /* size of seg */
|
||||||
@@ -216,18 +214,16 @@ struct xpmem_access_permit {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct xpmem_attachment {
|
struct xpmem_attachment {
|
||||||
mcs_rwlock_lock_t at_lock; /* att lock for serialization */
|
mcs_rwlock_lock_t at_lock; /* att lock */
|
||||||
struct mcs_rwlock_node_irqsave at_irqsave; /* att lock for serialization */
|
|
||||||
unsigned long vaddr; /* starting address of seg attached */
|
unsigned long vaddr; /* starting address of seg attached */
|
||||||
unsigned long at_vaddr; /* address where seg is attached */
|
unsigned long at_vaddr; /* address where seg is attached */
|
||||||
size_t at_size; /* size of seg attachment */
|
size_t at_size; /* size of seg attachment */
|
||||||
struct vm_range *at_vma; /* vma where seg is attachment */
|
struct vm_range *at_vmr; /* vm_range where seg is attachment */
|
||||||
volatile int flags; /* att attributes and state */
|
volatile int flags; /* att attributes and state */
|
||||||
ihk_atomic_t refcnt; /* references to att */
|
ihk_atomic_t refcnt; /* references to att */
|
||||||
struct xpmem_access_permit *ap; /* associated access permit */
|
struct xpmem_access_permit *ap; /* associated access permit */
|
||||||
struct list_head att_list; /* atts linked to access permit */
|
struct list_head att_list; /* atts linked to access permit */
|
||||||
struct process_vm *vm; /* mm struct attached to */
|
struct process_vm *vm; /* process_vm attached to */
|
||||||
mcs_rwlock_lock_t invalidate_lock; /* to serialize page table invalidates */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct xpmem_partition {
|
struct xpmem_partition {
|
||||||
@@ -249,8 +245,10 @@ struct xpmem_perm {
|
|||||||
#define XPMEM_PERM_IRUSR 00400
|
#define XPMEM_PERM_IRUSR 00400
|
||||||
#define XPMEM_PERM_IWUSR 00200
|
#define XPMEM_PERM_IWUSR 00200
|
||||||
|
|
||||||
|
extern struct xpmem_partition *xpmem_my_part;
|
||||||
|
|
||||||
static int xpmem_ioctl(struct mckfd *mckfd, ihk_mc_user_context_t *ctx);
|
static int xpmem_ioctl(struct mckfd *mckfd, ihk_mc_user_context_t *ctx);
|
||||||
static int xpmem_close( struct mckfd *mckfd, ihk_mc_user_context_t *ctx);
|
static int xpmem_close(struct mckfd *mckfd, ihk_mc_user_context_t *ctx);
|
||||||
|
|
||||||
static int xpmem_init(void);
|
static int xpmem_init(void);
|
||||||
static void xpmem_exit(void);
|
static void xpmem_exit(void);
|
||||||
@@ -263,10 +261,47 @@ static xpmem_segid_t xpmem_make_segid(struct xpmem_thread_group *);
|
|||||||
static int xpmem_remove(xpmem_segid_t);
|
static int xpmem_remove(xpmem_segid_t);
|
||||||
static void xpmem_remove_seg(struct xpmem_thread_group *,
|
static void xpmem_remove_seg(struct xpmem_thread_group *,
|
||||||
struct xpmem_segment *);
|
struct xpmem_segment *);
|
||||||
|
static void xpmem_remove_segs_of_tg(struct xpmem_thread_group *seg_tg);
|
||||||
|
|
||||||
|
static int xpmem_get(xpmem_segid_t, int, int, void *, xpmem_apid_t *);
|
||||||
|
static int xpmem_check_permit_mode(int, struct xpmem_segment *);
|
||||||
|
static int xpmem_perms(struct xpmem_perm *, short);
|
||||||
|
static xpmem_apid_t xpmem_make_apid(struct xpmem_thread_group *);
|
||||||
|
|
||||||
|
static int xpmem_release(xpmem_apid_t);
|
||||||
|
static void xpmem_release_ap(struct xpmem_thread_group *,
|
||||||
|
struct xpmem_access_permit *);
|
||||||
|
static void xpmem_release_aps_of_tg(struct xpmem_thread_group *ap_tg);
|
||||||
|
|
||||||
|
static int xpmem_attach(struct mckfd *, xpmem_apid_t, off_t, size_t,
|
||||||
|
unsigned long, int, int, unsigned long *);
|
||||||
|
|
||||||
|
static int xpmem_detach(unsigned long);
|
||||||
|
static int xpmem_vm_munmap(struct process_vm *vm, void *addr, size_t len);
|
||||||
|
static int xpmem_remove_process_range(struct process_vm *vm,
|
||||||
|
unsigned long start, unsigned long end, int *ro_freedp);
|
||||||
|
static int xpmem_free_process_memory_range(struct process_vm *vm,
|
||||||
|
struct vm_range *range);
|
||||||
|
static void xpmem_detach_att(struct xpmem_access_permit *,
|
||||||
|
struct xpmem_attachment *);
|
||||||
static void xpmem_clear_PTEs(struct xpmem_segment *);
|
static void xpmem_clear_PTEs(struct xpmem_segment *);
|
||||||
|
static void xpmem_clear_PTEs_range(struct xpmem_segment *, unsigned long,
|
||||||
|
unsigned long);
|
||||||
|
static void xpmem_clear_PTEs_of_ap(struct xpmem_access_permit *, unsigned long,
|
||||||
|
unsigned long);
|
||||||
|
static void xpmem_clear_PTEs_of_att(struct xpmem_attachment *, unsigned long,
|
||||||
|
unsigned long);
|
||||||
|
|
||||||
extern struct xpmem_partition *xpmem_my_part;
|
static int xpmem_remap_pte(struct process_vm *, struct vm_range *,
|
||||||
|
unsigned long, uint64_t, struct xpmem_segment *, unsigned long);
|
||||||
|
|
||||||
|
static int xpmem_ensure_valid_page(struct xpmem_segment *, unsigned long);
|
||||||
|
static pte_t * xpmem_vaddr_to_pte(struct process_vm *, unsigned long,
|
||||||
|
size_t *pgsize);
|
||||||
|
static int xpmem_pin_page(struct xpmem_thread_group *, struct thread *,
|
||||||
|
struct process_vm *, unsigned long);
|
||||||
|
static void xpmem_unpin_pages(struct xpmem_segment *, struct process_vm *,
|
||||||
|
unsigned long, size_t);
|
||||||
|
|
||||||
static struct xpmem_thread_group * __xpmem_tg_ref_by_tgid_nolock_internal(
|
static struct xpmem_thread_group * __xpmem_tg_ref_by_tgid_nolock_internal(
|
||||||
pid_t, int, int);
|
pid_t, int, int);
|
||||||
@@ -317,10 +352,17 @@ static inline struct xpmem_thread_group *__xpmem_tg_ref_by_tgid_nolock(
|
|||||||
#define xpmem_tg_ref_by_tgid_all_nolock(t) __xpmem_tg_ref_by_tgid_nolock(t, 1)
|
#define xpmem_tg_ref_by_tgid_all_nolock(t) __xpmem_tg_ref_by_tgid_nolock(t, 1)
|
||||||
|
|
||||||
static struct xpmem_thread_group * xpmem_tg_ref_by_segid(xpmem_segid_t);
|
static struct xpmem_thread_group * xpmem_tg_ref_by_segid(xpmem_segid_t);
|
||||||
|
static struct xpmem_thread_group * xpmem_tg_ref_by_apid(xpmem_apid_t);
|
||||||
static void xpmem_tg_deref(struct xpmem_thread_group *);
|
static void xpmem_tg_deref(struct xpmem_thread_group *);
|
||||||
static struct xpmem_segment *xpmem_seg_ref_by_segid(struct xpmem_thread_group *,
|
static struct xpmem_segment *xpmem_seg_ref_by_segid(struct xpmem_thread_group *,
|
||||||
xpmem_segid_t);
|
xpmem_segid_t);
|
||||||
static void xpmem_seg_deref(struct xpmem_segment *);
|
static void xpmem_seg_deref(struct xpmem_segment *);
|
||||||
|
static struct xpmem_access_permit * xpmem_ap_ref_by_apid(
|
||||||
|
struct xpmem_thread_group *, xpmem_apid_t);
|
||||||
|
static void xpmem_ap_deref(struct xpmem_access_permit *);
|
||||||
|
static void xpmem_att_deref(struct xpmem_attachment *);
|
||||||
|
static int xpmem_validate_access(struct xpmem_access_permit *, off_t, size_t,
|
||||||
|
int, unsigned long *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Inlines that mark an internal driver structure as being destroyable or not.
|
* Inlines that mark an internal driver structure as being destroyable or not.
|
||||||
@@ -363,6 +405,42 @@ static inline void xpmem_seg_destroyable(
|
|||||||
XPMEM_DEBUG("return: ");
|
XPMEM_DEBUG("return: ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void xpmem_ap_not_destroyable(
|
||||||
|
struct xpmem_access_permit *ap)
|
||||||
|
{
|
||||||
|
ihk_atomic_set(&ap->refcnt, 1);
|
||||||
|
|
||||||
|
XPMEM_DEBUG("return: ap->refcnt=%d", ap->refcnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xpmem_ap_destroyable(
|
||||||
|
struct xpmem_access_permit *ap)
|
||||||
|
{
|
||||||
|
XPMEM_DEBUG("call: ");
|
||||||
|
|
||||||
|
xpmem_ap_deref(ap);
|
||||||
|
|
||||||
|
XPMEM_DEBUG("return: ");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xpmem_att_not_destroyable(
|
||||||
|
struct xpmem_attachment *att)
|
||||||
|
{
|
||||||
|
ihk_atomic_set(&att->refcnt, 1);
|
||||||
|
|
||||||
|
XPMEM_DEBUG("return: att->refcnt=%d", att->refcnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xpmem_att_destroyable(
|
||||||
|
struct xpmem_attachment *att)
|
||||||
|
{
|
||||||
|
XPMEM_DEBUG("call: ");
|
||||||
|
|
||||||
|
xpmem_att_deref(att);
|
||||||
|
|
||||||
|
XPMEM_DEBUG("return: ");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Inlines that increment the refcnt for the specified structure.
|
* Inlines that increment the refcnt for the specified structure.
|
||||||
*/
|
*/
|
||||||
@@ -384,5 +462,29 @@ static inline void xpmem_seg_ref(
|
|||||||
XPMEM_DEBUG("return: seg->refcnt=%d", seg->refcnt);
|
XPMEM_DEBUG("return: seg->refcnt=%d", seg->refcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void xpmem_ap_ref(
|
||||||
|
struct xpmem_access_permit *ap)
|
||||||
|
{
|
||||||
|
DBUG_ON(ihk_atomic_read(&ap->refcnt) <= 0);
|
||||||
|
ihk_atomic_inc(&ap->refcnt);
|
||||||
|
|
||||||
|
XPMEM_DEBUG("return: ap->refcnt=%d", ap->refcnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xpmem_att_ref(
|
||||||
|
struct xpmem_attachment *att)
|
||||||
|
{
|
||||||
|
DBUG_ON(ihk_atomic_read(&att->refcnt) <= 0);
|
||||||
|
ihk_atomic_inc(&att->refcnt);
|
||||||
|
|
||||||
|
XPMEM_DEBUG("return: att->refcnt=%d", att->refcnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int xpmem_is_private_data(
|
||||||
|
struct vm_range *vmr)
|
||||||
|
{
|
||||||
|
return (vmr->private_data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _XPMEM_PRIVATE_H */
|
#endif /* _XPMEM_PRIVATE_H */
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,7 @@
|
|||||||
#include <auxvec.h>
|
#include <auxvec.h>
|
||||||
#include <timer.h>
|
#include <timer.h>
|
||||||
#include <mman.h>
|
#include <mman.h>
|
||||||
|
#include <xpmem.h>
|
||||||
|
|
||||||
//#define DEBUG_PRINT_PROCESS
|
//#define DEBUG_PRINT_PROCESS
|
||||||
|
|
||||||
@@ -637,6 +638,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
|
|||||||
range->memobj = src_range->memobj;
|
range->memobj = src_range->memobj;
|
||||||
range->objoff = src_range->objoff;
|
range->objoff = src_range->objoff;
|
||||||
range->pgshift = src_range->pgshift;
|
range->pgshift = src_range->pgshift;
|
||||||
|
range->private_data = src_range->private_data;
|
||||||
if (range->memobj) {
|
if (range->memobj) {
|
||||||
memobj_ref(range->memobj);
|
memobj_ref(range->memobj);
|
||||||
}
|
}
|
||||||
@@ -734,6 +736,7 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
|
|||||||
newrange->end = range->end;
|
newrange->end = range->end;
|
||||||
newrange->flag = range->flag;
|
newrange->flag = range->flag;
|
||||||
newrange->pgshift = range->pgshift;
|
newrange->pgshift = range->pgshift;
|
||||||
|
newrange->private_data = range->private_data;
|
||||||
|
|
||||||
if (range->memobj) {
|
if (range->memobj) {
|
||||||
memobj_ref(range->memobj);
|
memobj_ref(range->memobj);
|
||||||
@@ -953,6 +956,10 @@ int remove_process_memory_range(struct process_vm *vm,
|
|||||||
ro_freed = 1;
|
ro_freed = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (freerange->private_data) {
|
||||||
|
xpmem_remove_process_memory_range(vm, freerange);
|
||||||
|
}
|
||||||
|
|
||||||
error = free_process_memory_range(vm, freerange);
|
error = free_process_memory_range(vm, freerange);
|
||||||
if (error) {
|
if (error) {
|
||||||
ekprintf("remove_process_memory_range(%p,%lx,%lx):"
|
ekprintf("remove_process_memory_range(%p,%lx,%lx):"
|
||||||
@@ -1058,6 +1065,7 @@ int add_process_memory_range(struct process_vm *vm,
|
|||||||
range->memobj = memobj;
|
range->memobj = memobj;
|
||||||
range->objoff = offset;
|
range->objoff = offset;
|
||||||
range->pgshift = pgshift;
|
range->pgshift = pgshift;
|
||||||
|
range->private_data = NULL;
|
||||||
|
|
||||||
rc = 0;
|
rc = 0;
|
||||||
if (phys == NOPHYS) {
|
if (phys == NOPHYS) {
|
||||||
@@ -1793,7 +1801,12 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
error = page_fault_process_memory_range(vm, range, fault_addr, reason);
|
if (!range->private_data) {
|
||||||
|
error = page_fault_process_memory_range(vm, range, fault_addr, reason);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
error = xpmem_fault_process_memory_range(vm, range, fault_addr, reason);
|
||||||
|
}
|
||||||
if (error == -ERESTART) {
|
if (error == -ERESTART) {
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@@ -2209,6 +2222,19 @@ release_process_vm(struct process_vm *vm)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
long irqstate;
|
||||||
|
struct mckfd *fdp;
|
||||||
|
|
||||||
|
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||||
|
for (fdp = proc->mckfd; fdp; fdp = fdp->next) {
|
||||||
|
if (fdp->close_cb) {
|
||||||
|
fdp->close_cb(fdp, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||||
|
}
|
||||||
|
|
||||||
if(vm->free_cb)
|
if(vm->free_cb)
|
||||||
vm->free_cb(vm, vm->opt);
|
vm->free_cb(vm, vm->opt);
|
||||||
|
|
||||||
|
|||||||
1682
kernel/xpmem.c
1682
kernel/xpmem.c
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user