From 583cb94667041e41a08e768504614063bca31f1c Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 19 Nov 2018 17:27:26 +0900 Subject: [PATCH] mcctrl: remove in-kernel calls to syscalls Since 4.17.0, kernel cannot call syscalls directly because the calling convention can be different on x86_64, as explained in this email: https://lore.kernel.org/lkml/20180325162527.GA17492@light.dominikbrodowski.net Use the ksys_* alternatives instead when possible, or for readlink use do_readlinkat (and use readlinkat all the time to simplify ifdefs) It might be possible to change some of these without ifdefs, but for example ksys_unshare only got introduced in 4.17 so we need to keep some syscall calling... Change-Id: Ic47e184b29ef8b21731b2eae6193b0af2548b872 --- executer/kernel/mcctrl/driver.c | 30 +++++++++++++++++++++------- executer/kernel/mcctrl/mcctrl.h | 2 +- executer/kernel/mcctrl/sysfs_files.c | 2 +- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index 0a341ffc..7ce341aa 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "mcctrl.h" #include @@ -222,7 +223,7 @@ long (*mcctrl_sched_setaffinity)(pid_t pid, const struct cpumask *in_mask); int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, int policy, const struct sched_param *param); -ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf, +ssize_t (*mcctrl_sys_readlinkat)(int dfd, const char *path, char *buf, size_t bufsiz); void (*mcctrl_zap_page_range)(struct vm_area_struct *vma, unsigned long start, @@ -234,29 +235,41 @@ struct inode_operations *mcctrl_hugetlbfs_inode_operations; static int symbols_init(void) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0) + mcctrl_sys_mount = (void *) kallsyms_lookup_name("ksys_mount"); +#else mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount"); #if defined(CONFIG_X86_64_SMP) if (!mcctrl_sys_mount) mcctrl_sys_mount = (void *) kallsyms_lookup_name("__x64_sys_mount"); +#endif #endif if (WARN_ON(!mcctrl_sys_mount)) return -EFAULT; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0) + mcctrl_sys_umount = (void *) kallsyms_lookup_name("ksys_umount"); +#else mcctrl_sys_umount = (void *) kallsyms_lookup_name("sys_umount"); #if defined(CONFIG_X86_64_SMP) if (!mcctrl_sys_umount) mcctrl_sys_umount = (void *) kallsyms_lookup_name("__x64_sys_umount"); +#endif #endif if (WARN_ON(!mcctrl_sys_umount)) return -EFAULT; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0) + mcctrl_sys_unshare = (void *) kallsyms_lookup_name("ksys_unshare"); +#else mcctrl_sys_unshare = (void *) kallsyms_lookup_name("sys_unshare"); #if defined(CONFIG_X86_64_SMP) if (!mcctrl_sys_unshare) mcctrl_sys_unshare = (void *) kallsyms_lookup_name("__x64_sys_unshare"); +#endif #endif if (WARN_ON(!mcctrl_sys_unshare)) return -EFAULT; @@ -271,14 +284,17 @@ static int symbols_init(void) if (WARN_ON(!mcctrl_sched_setscheduler_nocheck)) return -EFAULT; - mcctrl_sys_readlink = - (void *) kallsyms_lookup_name("sys_readlink"); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0) + mcctrl_sys_readlinkat = (void *)kallsyms_lookup_name("do_readlinkat"); +#else + mcctrl_sys_readlinkat = (void *)kallsyms_lookup_name("sys_readlinkat"); #if defined(CONFIG_X86_64_SMP) - if (!mcctrl_sys_readlink) - mcctrl_sys_readlink = - (void *) kallsyms_lookup_name("__x64_sys_readlink"); + if (!mcctrl_sys_readlinkat) + mcctrl_sys_readlinkat = + (void *) kallsyms_lookup_name("__x64_sys_readlinkat"); #endif - if (WARN_ON(!mcctrl_sys_readlink)) +#endif + if (WARN_ON(!mcctrl_sys_readlinkat)) return -EFAULT; mcctrl_zap_page_range = diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 811a58b8..fd3a54e6 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -422,7 +422,7 @@ extern long (*mcctrl_sched_setaffinity)(pid_t pid, extern int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, int policy, const struct sched_param *param); -extern ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf, +extern ssize_t (*mcctrl_sys_readlinkat)(int dfd, const char *path, char *buf, size_t bufsiz); extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma, unsigned long start, diff --git a/executer/kernel/mcctrl/sysfs_files.c b/executer/kernel/mcctrl/sysfs_files.c index aab42d4b..fb68765d 100644 --- a/executer/kernel/mcctrl/sysfs_files.c +++ b/executer/kernel/mcctrl/sysfs_files.c @@ -921,7 +921,7 @@ static int read_link(char *buf, size_t bufsize, char *fmt, ...) old_fs = get_fs(); set_fs(KERNEL_DS); - ss = mcctrl_sys_readlink(filename, buf, bufsize); + ss = mcctrl_sys_readlinkat(AT_FDCWD, filename, buf, bufsize); set_fs(old_fs); if (ss < 0) { error = ss;