From ed33ee65b24ffb040bea8b85f30decfae47ac070 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 20 Jan 2015 02:02:46 +0900 Subject: [PATCH] CentOS7 spinlock, procfs and vm_munmap support (i.e., Linux kernel 3.10) --- executer/kernel/control.c | 2 +- executer/kernel/procfs.c | 235 +++++++++++++++++++++----------------- executer/kernel/syscall.c | 15 ++- 3 files changed, 144 insertions(+), 108 deletions(-) diff --git a/executer/kernel/control.c b/executer/kernel/control.c index c6552d55..ddbcdb28 100644 --- a/executer/kernel/control.c +++ b/executer/kernel/control.c @@ -775,7 +775,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) } LIST_HEAD(mckernel_exec_files); -spinlock_t mckernel_exec_file_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(mckernel_exec_file_lock); struct mckernel_exec_file { diff --git a/executer/kernel/procfs.c b/executer/kernel/procfs.c index c2652bb5..d65e29e4 100644 --- a/executer/kernel/procfs.c +++ b/executer/kernel/procfs.c @@ -10,6 +10,7 @@ * HISTORY: */ +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include "mcctrl.h" +#include //#define PROCFS_DEBUG @@ -31,11 +33,12 @@ static ssize_t mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); /* A private data for the procfs driver. */ +struct procfs_list_entry; struct procfs_list_entry { struct list_head list; struct proc_dir_entry *entry; - struct proc_dir_entry *parent; + struct procfs_list_entry *parent; ihk_os_t os; int osnum; int pid; @@ -53,102 +56,6 @@ struct procfs_list_entry { LIST_HEAD(procfs_file_list); static ihk_spinlock_t procfs_file_list_lock; -/** - * \brief Return specified procfs entry. - * - * \param p a name of the procfs file - * \param osnum os number - * \param mode if zero create a directory otherwise a file - * - * return value: NULL: Something wrong has occurred. - * otherwise: address of the proc_dir_entry structure of the procfs file - * - * p should not be NULL nor terminated by "/". - * - * We create a procfs entry if there is not already one. - * This process is recursive to the root of the procfs tree. - */ -/* - * XXX: Two or more entries which have same name can be created. - * - * get_procfs_entry() avoids creating an entry which has already been created. - * But, it allows creating an entry which is being created by another thread. - * - * This problem occurred when two requests which created files with a common - * ancestor directory which was not explicitly created were racing. - */ - -static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode) -{ - char *r; - struct proc_dir_entry *ret = NULL, *parent = NULL; - struct procfs_list_entry *e; - char name[PROCFS_NAME_MAX]; - unsigned long irqflags; - - dprintk("get_procfs_entry: %s for osnum %d mode %o\n", p, osnum, mode); - irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); - list_for_each_entry(e, &procfs_file_list, list) { - if (e == NULL) { - kprintf("ERROR: The procfs_file_list has a null entry.\n"); - return NULL; - } - if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) { - /* We found the entry */ - ret = e->entry; - } - } - ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); - if (ret != NULL) { - return ret; - } - r = strrchr(p, '/'); - if (r != NULL) { - /* We have non-null parent dir. */ - strncpy(name, p, r - p); - name[r - p] = '\0'; - parent = get_procfs_entry(name, osnum, 0); - if (parent == NULL) { - /* We counld not get a parent procfs entry. Give up.*/ - return NULL; - } - } - e = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL); - if (e == NULL) { - kprintf("ERROR: not enough memory to create PROCFS entry.\n"); - return NULL; - } - /* Fill the fname field of the entry */ - strncpy(e->fname, p, PROCFS_NAME_MAX); - - if (r != NULL) { - strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1); - } else { - strncpy(name, p, PROCFS_NAME_MAX); - } - if (mode == 0) { - ret = proc_mkdir(name, parent); - } else { - ret = create_proc_entry(name, mode, parent); - } - if (ret == NULL) { - kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p); - kfree(e); - return NULL; - } - ret->data = e; - e->osnum = osnum; - e->entry = ret; - e->parent = parent; - - irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); - list_add(&(e->list), &procfs_file_list); - ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); - - dprintk("get_procfs_entry: %s done\n", p); - return ret; -} - loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { @@ -170,6 +77,117 @@ static const struct file_operations mckernel_procfs_file_operations = { .write = NULL, }; + +/** + * \brief Return specified procfs entry. + * + * \param p a name of the procfs file + * \param osnum os number + * \param mode if zero create a directory otherwise a file + * + * return value: NULL: Something wrong has occurred. + * otherwise: address of the proc_dir_entry structure of the procfs file + * + * p should not be NULL nor terminated by "/". + * + * We create a procfs entry if there is not already one. + * This process is recursive to the root of the procfs tree. + */ +/* + * XXX: Two or more entries which have same name can be created. + * + * get_procfs_list_entry() avoids creating an entry which has already been created. + * But, it allows creating an entry which is being created by another thread. + * + * This problem occurred when two requests which created files with a common + * ancestor directory which was not explicitly created were racing. + */ + +static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode) +{ + char *r; + struct proc_dir_entry *pde = NULL; + struct procfs_list_entry *e, *ret = NULL, *parent = NULL; + char name[PROCFS_NAME_MAX]; + unsigned long irqflags; + + dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode); + irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + list_for_each_entry(e, &procfs_file_list, list) { + if (e == NULL) { + kprintf("ERROR: The procfs_file_list has a null entry.\n"); + return NULL; + } + if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) { + /* We found the entry */ + ret = e; + break; + } + } + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); + if (ret != NULL) { + return ret; + } + r = strrchr(p, '/'); + if (r != NULL) { + /* We have non-null parent dir. */ + strncpy(name, p, r - p); + name[r - p] = '\0'; + parent = get_procfs_list_entry(name, osnum, 0); + if (parent == NULL) { + /* We counld not get a parent procfs entry. Give up.*/ + return NULL; + } + } + ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL); + if (ret == NULL) { + kprintf("ERROR: not enough memory to create PROCFS entry.\n"); + return NULL; + } + /* Fill the fname field of the entry */ + strncpy(ret->fname, p, PROCFS_NAME_MAX); + + if (r != NULL) { + strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1); + } else { + strncpy(name, p, PROCFS_NAME_MAX); + } + if (mode == 0) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + pde = proc_mkdir(name, parent ? parent->entry : NULL); +#else + pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret); +#endif + } else { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + pde = create_proc_entry(name, mode, parent->entry); + if (pde) + pde->proc_fops = &mckernel_procfs_file_operations; +#else + pde = proc_create_data(name, mode, parent->entry, + &mckernel_procfs_file_operations, ret); +#endif + } + if (pde == NULL) { + kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p); + kfree(ret); + return NULL; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + pde->data = ret; +#endif + ret->osnum = osnum; + ret->entry = pde; + ret->parent = parent; + + irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + list_add(&(ret->list), &procfs_file_list); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); + + dprintk("get_procfs_list_entry: %s done\n", p); + return ret; +} + /** * \brief Create a procfs entry. * @@ -182,7 +200,6 @@ static const struct file_operations mckernel_procfs_file_operations = { void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg) { - struct proc_dir_entry *entry; struct procfs_list_entry *e; ihk_device_t dev = ihk_os_to_dev(__os); unsigned long parg; @@ -204,18 +221,16 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg) printk("ERROR: procfs_creat: file name not properly terminated.\n"); goto quit; } - entry = get_procfs_entry(name, osnum, mode); - if (entry == NULL) { + e = get_procfs_list_entry(name, osnum, mode); + if (e == NULL) { printk("ERROR: could not create a procfs entry for %s.\n", name); goto quit; } - e = entry->data; e->os = __os; e->cpu = ref; e->pid = pid; - entry->proc_fops = &mckernel_procfs_file_operations; quit: f->status = 1; /* Now the peer can free the data. */ ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file)); @@ -237,7 +252,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg) unsigned long parg; struct procfs_file *f; struct procfs_list_entry *e; - struct proc_dir_entry *parent = NULL; + struct procfs_list_entry *parent = NULL; char name[PROCFS_NAME_MAX]; char *r; unsigned long irqflags; @@ -251,8 +266,10 @@ void procfs_delete(void *__os, int osnum, unsigned long arg) if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) && (e->osnum == osnum)) { list_del(&e->list); +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) e->entry->read_proc = NULL; e->entry->data = NULL; +#endif parent = e->parent; kfree(e); r = strrchr(f->fname, '/'); @@ -262,7 +279,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg) strncpy(name, r + 1, PROCFS_NAME_MAX); } dprintk("found and remove %s from the list.\n", name); - remove_proc_entry(name, parent); + remove_proc_entry(name, parent->entry); break; } } @@ -304,8 +321,12 @@ mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, int ret, retrycount = 0; unsigned long pbuf; unsigned long count = nbytes; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) struct proc_dir_entry *dp = PDE(inode); struct procfs_list_entry *e = dp->data; +#else + struct procfs_list_entry *e = PDE_DATA(inode); +#endif loff_t offset = *ppos; dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n", @@ -419,7 +440,7 @@ void procfs_exit(int osnum) { int error; mm_segment_t old_fs = get_fs(); struct kstat stat; - struct proc_dir_entry *parent; + struct procfs_list_entry *parent; struct procfs_list_entry *e, *temp = NULL; unsigned long irqflags; @@ -430,8 +451,10 @@ void procfs_exit(int osnum) { if (e->osnum == osnum) { dprintk("found entry for %s.\n", e->fname); list_del(&e->list); +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) e->entry->read_proc = NULL; e->entry->data = NULL; +#endif parent = e->parent; r = strrchr(e->fname, '/'); if (r == NULL) { @@ -439,7 +462,7 @@ void procfs_exit(int osnum) { } else { r += 1; } - remove_proc_entry(r, parent); + remove_proc_entry(r, parent->entry); dprintk("free the entry\n"); kfree(e); } diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index f90b38ab..af05cf98 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -452,7 +452,7 @@ static int rus_mmap(struct file *file, struct vm_area_struct *vma) #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP; #else - vma->vm_flags |= VM_IO | VM_DONTDUMP | VM_DONTEXPAND | VM_PFNMAP; + vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP; #endif vma->vm_ops = &rus_vmops; return 0; @@ -1142,8 +1142,17 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r down_write(¤t->mm->mmap_sem); #define ANY_WHERE 0 + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff); +#endif + up_write(¤t->mm->mmap_sem); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) + va = vm_mmap(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff << PAGE_SHIFT); +#endif + if (IS_ERR_VALUE(va)) { printk("pager_req_map(%p,%d,%lx,%lx,%lx):do_mmap_pgoff failed. %d\n", os, fd, len, off, result_rpa, (int)va); error = va; @@ -1241,9 +1250,13 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle) printk("pager_req_unmap(%p,%lx)\n", os, handle); +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) down_write(¤t->mm->mmap_sem); error = do_munmap(current->mm, pager->map_uaddr, pager->map_len); up_write(¤t->mm->mmap_sem); +#else + error = vm_munmap(pager->map_uaddr, pager->map_len); +#endif if (error) { printk("pager_req_unmap(%p,%lx):do_munmap failed. %d\n", os, handle, error);