mcctrl: move mcctrl to executer/kernel/mcctrl

This commit is contained in:
Yoichi Umezawa
2015-11-24 15:42:04 +09:00
parent 828a3ea57a
commit 12eb8a9bb0
11 changed files with 9 additions and 9 deletions

View File

@@ -0,0 +1,26 @@
KDIR ?= @KDIR@
ARCH ?= @ARCH@
src = @abs_srcdir@
KMODDIR=@KMODDIR@
BINDIR=@BINDIR@
IHK_BASE=$(src)/../../../../ihk
obj-m += mcctrl.o
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../../ihk/linux/core/Module.symvers
.PHONY: clean install modules
modules:
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
clean:
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
install:
mkdir -p -m 755 $(KMODDIR)
install -m 644 mcctrl.ko $(KMODDIR)

View File

@@ -0,0 +1,284 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/binfmts.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/version.h>
#include "mcctrl.h"
static int pathcheck(const char *file, const char *list)
{
const char *p;
const char *q;
const char *r;
int l;
if(!*list)
return 1;
p = list;
do{
q = strchr(p, ':');
if(!q)
q = strchr(p, '\0');
for(r = q - 1; r >= p && *r == '/'; r--);
l = r - p + 1;
if(!strncmp(file, p, l) &&
file[l] == '/')
return 1;
p = q + 1;
} while(*q);
return 0;
}
static int load_elf(struct linux_binprm *bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, struct pt_regs *regs
#endif
)
{
char mcexec[BINPRM_BUF_SIZE];
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
const
#endif
char *wp;
char *cp;
struct file *file;
int rc;
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
typedef struct {
char *name;
char *val;
int l;
} envdata;
envdata env[] = {
{.name = "MCEXEC"},
#define env_mcexec (env[0].val)
{.name = "MCEXEC_WL"},
#define env_mcexec_wl (env[1].val)
{.name = "MCEXEC_BL"},
#define env_mcexec_bl (env[2].val)
{.name = NULL}
};
envdata *ep;
unsigned long off = 0;
struct page *page;
char *addr = NULL;
int i;
unsigned long p;
int st;
int mode;
int cnt[2];
char buf[32];
int l;
int pass;
char pbuf[1024];
const char *path;
if(bprm->envc == 0)
return -ENOEXEC;
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
return -ENOEXEC;
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
return -ENOEXEC;
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
return -ENOEXEC;
path = d_path(&bprm->file->f_path, pbuf, 1024);
if(!path || IS_ERR(path))
path = bprm->interp;
cp = strrchr(path, '/');
if(!cp ||
!strcmp(cp, "/mcexec") ||
!strcmp(cp, "/ihkosctl") ||
!strcmp(cp, "/ihkconfig"))
return -ENOEXEC;
cnt[0] = bprm->argc;
cnt[1] = bprm->envc;
for(pass = 0; pass < 2; pass++){
p = bprm->p;
mode = cnt[0] == 0? 1: 0;
if(pass == 1){
for(ep = env; ep->name; ep++){
if(ep->l)
ep->val = kmalloc(ep->l, GFP_KERNEL);
}
}
ep = NULL;
l = 0;
for(i = 0, st = 0; mode != 2;){
if(st == 0){
off = p & ~PAGE_MASK;
rc = get_user_pages(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
if(rc <= 0)
return -EFAULT;
addr = kmap_atomic(page
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
, KM_USER0
#endif
);
st = 1;
}
if(addr[off]){
if(mode == 1){
if(ep){
if(pass == 1)
ep->val[l] = addr[off];
l++;
}
else if(addr[off] == '='){
if(l < 32)
buf[l] = '\0';
buf[31] = '\0';
for(ep = env; ep->name; ep++)
if(!strcmp(ep->name, buf))
break;
if(ep->name)
l = 0;
else
ep = NULL;
}
else{
if(l < 32)
buf[l] = addr[off];
l++;
}
}
}
else{
if(mode == 1 && ep){
if(pass == 0){
ep->l = l + 1;
}
else{
ep->val[l] = '\0';
}
}
ep = NULL;
l = 0;
i++;
if(i == cnt[mode]){
i = 0;
mode++;
}
}
off++;
p++;
if(off == PAGE_SIZE || mode == 2){
kunmap_atomic(addr
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
, KM_USER0
#endif
);
put_page(page);
st = 0;
}
}
}
if(!env_mcexec || !strcmp(env_mcexec, "0") || !strcmp(env_mcexec, "off"))
rc = 1;
else{
rc = 0;
if(strchr(env_mcexec, '/') && strlen(env_mcexec) < BINPRM_BUF_SIZE)
strcpy(mcexec, env_mcexec);
else
strcpy(mcexec, MCEXEC_PATH);
}
if(rc);
else if(env_mcexec_wl)
rc = !pathcheck(path, env_mcexec_wl);
else if(env_mcexec_bl)
rc = pathcheck(path, env_mcexec_bl);
else
rc = pathcheck(path, "/usr:/bin:/sbin:/opt");
for(ep = env; ep->name; ep++)
if(ep->val)
kfree(ep->val);
if(rc)
return -ENOEXEC;
file = open_exec(mcexec);
if (IS_ERR(file))
return -ENOEXEC;
rc = remove_arg_zero(bprm);
if (rc){
fput(file);
return rc;
}
rc = copy_strings_kernel(1, &bprm->interp, bprm);
if (rc < 0){
fput(file);
return rc;
}
bprm->argc++;
wp = mcexec;
rc = copy_strings_kernel(1, &wp, bprm);
if (rc){
fput(file);
return rc;
}
bprm->argc++;
#if 1
rc = bprm_change_interp(mcexec, bprm);
if (rc < 0){
fput(file);
return rc;
}
#else
if(brpm->interp != bprm->filename)
kfree(brpm->interp);
kfree(brpm->filename);
bprm->filename = bprm->interp = kstrdup(mcexec, GFP_KERNEL);
if(!bprm->interp){
fput(file);
return -ENOMEM;
}
#endif
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = file;
rc = prepare_binprm(bprm);
if (rc < 0){
return rc;
}
return search_binary_handler(bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, regs
#endif
);
}
static struct linux_binfmt mcexec_format = {
.module = THIS_MODULE,
.load_binary = load_elf,
};
void __init binfmt_mcexec_init(void)
{
insert_binfmt(&mcexec_format);
}
void __exit binfmt_mcexec_exit(void)
{
unregister_binfmt(&mcexec_format);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,151 @@
/**
* \file executer/kernel/driver.c
* License details are found in the file LICENSE.
* \brief
* kernel module entry
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Balazs Gerofi <bgerofi@riken.jp> \par
* Copyright (C) 2012 RIKEN AICS
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2013 The University of Tokyo
*/
/*
* HISTORY:
* 2013/09/02 shirasawa add terminate thread
* 2013/08/19 shirasawa mcexec forward signal to MIC process
*/
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include "mcctrl.h"
#define OS_MAX_MINOR 64
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long,
struct file *);
extern int prepare_ikc_channels(ihk_os_t os);
extern void destroy_ikc_channels(ihk_os_t os);
#ifndef DO_USER_MODE
extern void mcctrl_syscall_init(void);
#endif
extern void procfs_init(int);
extern void procfs_exit(int);
extern void rus_page_hash_init(void);
extern void rus_page_hash_put_pages(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
unsigned long arg, struct file *file)
{
return __mcctrl_control(os, request, arg, file);
}
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
{ .request = MCEXEC_UP_PREPARE_IMAGE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_TRANSFER, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_START_IMAGE, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_WAIT_SYSCALL, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_RET_SYSCALL, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_LOAD_SYSCALL, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
};
static struct ihk_os_user_call mcctrl_uc_proto = {
.num_handlers = sizeof(mcctrl_uchs) / sizeof(mcctrl_uchs[0]),
.handlers = mcctrl_uchs,
};
static struct ihk_os_user_call mcctrl_uc[OS_MAX_MINOR];
static ihk_os_t os[OS_MAX_MINOR];
static int __init mcctrl_init(void)
{
int i;
int rc;
rc = -ENOENT;
for(i = 0; i < OS_MAX_MINOR; i++){
os[i] = ihk_host_find_os(i, NULL);
if (os[i]) {
printk("OS #%d found.\n", i);
rc = 0;
}
}
if(rc){
printk("OS not found.\n");
return rc;
}
for(i = 0; i < OS_MAX_MINOR; i++){
if (os[i]) {
if (prepare_ikc_channels(os[i]) != 0) {
printk("Preparing syscall channels failed.\n");
os[i] = NULL;
}
}
}
#ifndef DO_USER_MODE
mcctrl_syscall_init();
#endif
rus_page_hash_init();
for(i = 0; i < OS_MAX_MINOR; i++){
if (os[i]) {
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
rc = ihk_os_register_user_call_handlers(os[i], mcctrl_uc + i);
if(rc < 0){
destroy_ikc_channels(os[i]);
os[i] = NULL;
}
procfs_init(i);
}
}
binfmt_mcexec_init();
return 0;
}
static void __exit mcctrl_exit(void)
{
int i;
binfmt_mcexec_exit();
printk("mcctrl: unregistered.\n");
for(i = 0; i < OS_MAX_MINOR; i++){
if(os[i]){
ihk_os_unregister_user_call_handlers(os[i], mcctrl_uc + i);
destroy_ikc_channels(os[i]);
procfs_exit(i);
}
}
rus_page_hash_put_pages();
}
MODULE_LICENSE("GPL v2");
module_init(mcctrl_init);
module_exit(mcctrl_exit);

View File

@@ -0,0 +1,373 @@
/**
* \file executer/kernel/ikc.c
* License details are found in the file LICENSE.
* \brief
* inter kernel communication
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Balazs Gerofi <bgerofi@riken.jp> \par
* Copyright (C) 2012 RIKEN AICS
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2013 The University of Tokyo
*/
/*
* HISTORY:
* 2013/09/02 shirasawa add terminate thread
* 2013/08/07 nakamura add page fault forwarding
* 2013/06/06 shirasawa propagate error code for prepare image
* 2013/06/02 shirasawa add error handling for prepare_process
*/
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/string.h>
#include "mcctrl.h"
#ifdef ATTACHED_MIC
#include <sysdeps/mic/mic/micconst.h>
#endif
#define REQUEST_SHIFT 16
//int num_channels;
//struct mcctrl_channel *channels;
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg);
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg);
void procfs_delete(void *__os, int osnum, unsigned long arg);
void procfs_answer(unsigned long arg, int err);
void sig_done(unsigned long arg, int err);
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
void *__packet, void *__os)
{
struct ikc_scd_packet *pisp = __packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os);
switch (pisp->msg) {
case SCD_MSG_INIT_CHANNEL:
mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c);
break;
case SCD_MSG_PREPARE_PROCESS_ACKED:
mcexec_prepare_ack(__os, pisp->arg, 0);
break;
case SCD_MSG_PREPARE_PROCESS_NACKED:
mcexec_prepare_ack(__os, pisp->arg, pisp->err);
break;
case SCD_MSG_SYSCALL_ONESIDE:
mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg);
break;
case SCD_MSG_PROCFS_CREATE:
procfs_create(__os, pisp->ref, pisp->osnum, pisp->pid, pisp->arg);
break;
case SCD_MSG_PROCFS_DELETE:
procfs_delete(__os, pisp->osnum, pisp->arg);
break;
case SCD_MSG_PROCFS_ANSWER:
procfs_answer(pisp->arg, pisp->err);
break;
case SCD_MSG_SEND_SIGNAL:
sig_done(pisp->arg, pisp->err);
break;
}
return 0;
}
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
if (cpu < 0 || os == NULL || usrdata == NULL ||
cpu >= usrdata->num_channels || !usrdata->channels[cpu].c) {
return -EINVAL;
}
return ihk_ikc_send(usrdata->channels[cpu].c, pisp, 0);
}
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg)
{
struct ikc_scd_packet packet;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
if (cpu < 0 || cpu >= usrdata->num_channels || !usrdata->channels[cpu].c) {
return -EINVAL;
}
packet.msg = msg;
packet.ref = ref;
packet.arg = arg;
return ihk_ikc_send(usrdata->channels[cpu].c, &packet, 0);
}
int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c,
ihk_ikc_get_processor_id());
kprintf("Setting the target to %d\n",
ihk_ikc_get_processor_id());
return 0;
}
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
if (cpu < 0 || cpu >= usrdata->num_channels || !usrdata->channels[cpu].c) {
return 0;
} else {
return 1;
}
}
//unsigned long *mcctrl_doorbell_va;
//unsigned long mcctrl_doorbell_pa;
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct ikc_scd_packet packet;
struct mcctrl_channel *pmc = usrdata->channels + cpu;
unsigned long phys;
struct ikc_scd_init_param *rpm;
if(c->port == 502)
pmc = usrdata->channels + usrdata->num_channels - 1;
if (!pmc) {
return;
}
printk("IKC init: cpu=%d port=%d\n", cpu, c->port);
phys = ihk_device_map_memory(ihk_os_to_dev(os), rphys,
sizeof(struct ikc_scd_init_param));
#ifdef CONFIG_MIC
rpm = ioremap_wc(phys, sizeof(struct ikc_scd_init_param));
#else
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
sizeof(struct ikc_scd_init_param),
NULL, 0);
#endif
pmc->param.request_va =
(void *)__get_free_pages(GFP_KERNEL,
REQUEST_SHIFT - PAGE_SHIFT);
pmc->param.request_pa = virt_to_phys(pmc->param.request_va);
pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va;
pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa;
pmc->param.post_va = (void *)__get_free_page(GFP_KERNEL);
pmc->param.post_pa = virt_to_phys(pmc->param.post_va);
memset(pmc->param.doorbell_va, 0, PAGE_SIZE);
memset(pmc->param.request_va, 0, PAGE_SIZE);
memset(pmc->param.post_va, 0, PAGE_SIZE);
pmc->param.response_rpa = rpm->response_page;
pmc->param.response_pa
= ihk_device_map_memory(ihk_os_to_dev(os),
pmc->param.response_rpa,
PAGE_SIZE);
#ifdef CONFIG_MIC
pmc->param.response_va = ioremap_cache(pmc->param.response_pa,
PAGE_SIZE);
#else
pmc->param.response_va = ihk_device_map_virtual(ihk_os_to_dev(os),
pmc->param.response_pa,
PAGE_SIZE, NULL, 0);
#endif
pmc->dma_buf = (void *)__get_free_pages(GFP_KERNEL,
DMA_PIN_SHIFT - PAGE_SHIFT);
rpm->request_page = pmc->param.request_pa;
rpm->doorbell_page = pmc->param.doorbell_pa;
rpm->post_page = pmc->param.post_pa;
packet.msg = SCD_MSG_INIT_CHANNEL_ACKED;
packet.ref = cpu;
packet.arg = rphys;
printk("Request: %lx, Response: %lx, Doorbell: %lx\n",
pmc->param.request_pa, pmc->param.response_rpa,
pmc->param.doorbell_pa);
printk("Request: %p, Response: %p, Doorbell: %p\n",
pmc->param.request_va, pmc->param.response_va,
pmc->param.doorbell_va);
ihk_ikc_send(pmc->c, &packet, 0);
#ifdef CONFIG_MIC
iounmap(rpm);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm,
sizeof(struct ikc_scd_init_param));
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys,
sizeof(struct ikc_scd_init_param));
}
static int connect_handler(struct ihk_ikc_channel_info *param)
{
struct ihk_ikc_channel_desc *c;
int cpu;
ihk_os_t os = param->channel->remote_os;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
c = param->channel;
cpu = c->send.queue->read_cpu;
if (cpu < 0 || cpu >= usrdata->num_channels) {
kprintf("Invalid connect source processor: %d\n", cpu);
return 1;
}
param->packet_handler = syscall_packet_handler;
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
usrdata->channels[cpu].c = c;
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
return 0;
}
static int connect_handler2(struct ihk_ikc_channel_info *param)
{
struct ihk_ikc_channel_desc *c;
int cpu;
ihk_os_t os = param->channel->remote_os;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
c = param->channel;
cpu = usrdata->num_channels - 1;
param->packet_handler = syscall_packet_handler;
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
usrdata->channels[cpu].c = c;
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
return 0;
}
static struct ihk_ikc_listen_param listen_param = {
.port = 501,
.handler = connect_handler,
.pkt_size = sizeof(struct ikc_scd_packet),
.queue_size = PAGE_SIZE,
.magic = 0x1129,
};
static struct ihk_ikc_listen_param listen_param2 = {
.port = 502,
.handler = connect_handler2,
.pkt_size = sizeof(struct ikc_scd_packet),
.queue_size = PAGE_SIZE,
.magic = 0x1329,
};
int prepare_ikc_channels(ihk_os_t os)
{
struct ihk_cpu_info *info;
struct mcctrl_usrdata *usrdata;
int error;
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
usrdata->mcctrl_doorbell_pa = virt_to_phys(usrdata->mcctrl_doorbell_va);
info = ihk_os_get_cpu_info(os);
if (!info) {
printk("Error: cannot retrieve CPU info.\n");
return -EINVAL;
}
if (info->n_cpus < 1) {
printk("Error: # of cpu is invalid.\n");
return -EINVAL;
}
usrdata->num_channels = info->n_cpus + 1;
usrdata->channels = kzalloc(sizeof(struct mcctrl_channel) * usrdata->num_channels,
GFP_KERNEL);
if (!usrdata->channels) {
printk("Error: cannot allocate channels.\n");
return -ENOMEM;
}
usrdata->os = os;
init_waitqueue_head(&usrdata->wq_prepare);
ihk_host_os_set_usrdata(os, usrdata);
memcpy(&usrdata->listen_param, &listen_param, sizeof listen_param);
ihk_ikc_listen_port(os, &usrdata->listen_param);
memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2);
ihk_ikc_listen_port(os, &usrdata->listen_param2);
INIT_LIST_HEAD(&usrdata->per_proc_list);
spin_lock_init(&usrdata->per_proc_list_lock);
error = init_peer_channel_registry(usrdata);
if (error) {
return error;
}
return 0;
}
void __destroy_ikc_channel(ihk_os_t os, struct mcctrl_channel *pmc)
{
free_pages((unsigned long)pmc->param.request_va,
REQUEST_SHIFT - PAGE_SHIFT);
free_page((unsigned long)pmc->param.post_va);
#ifdef CONFIG_MIC
iounmap(pmc->param.response_va);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), pmc->param.response_va,
PAGE_SIZE);
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os),
pmc->param.response_pa, PAGE_SIZE);
free_pages((unsigned long)pmc->dma_buf,
DMA_PIN_SHIFT - PAGE_SHIFT);
}
void destroy_ikc_channels(ihk_os_t os)
{
int i;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
ihk_host_os_set_usrdata(os, NULL);
for (i = 0; i < usrdata->num_channels; i++) {
if (usrdata->channels[i].c) {
// ihk_ikc_disconnect(usrdata->channels[i].c);
ihk_ikc_free_channel(usrdata->channels[i].c);
__destroy_ikc_channel(os, usrdata->channels + i);
printk("Channel #%d freed.\n", i);
}
}
free_page((unsigned long)usrdata->mcctrl_doorbell_va);
kfree(usrdata->channels);
kfree(usrdata);
}

View File

@@ -0,0 +1,191 @@
/**
* \file mcctrl.h
* License details are found in the file LICENSE.
* \brief
* define data structure
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Balazs Gerofi <bgerofi@riken.jp> \par
* Copyright (C) 2012 RIKEN AICS
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2013 The University of Tokyo
*/
/*
* HISTORY:
* 2013/11/07 hamada added <sys/resource.h> which is required by getrlimit(2)
* 2013/10/21 nakamura exclude interpreter's segment from data region
* 2013/10/11 nakamura mcexec: add a upper limit of the stack size
* 2013/10/11 nakamura mcexec: add a path prefix for interpreter search
* 2013/10/11 nakamura mcexec: add a interpreter invocation
* 2013/10/08 nakamura add a AT_ENTRY entry to the auxiliary vector
* 2013/09/02 shirasawa add terminate thread
* 2013/08/19 shirasawa mcexec forward signal to MIC process
* 2013/08/07 nakamura add page fault forwarding
* 2013/07/26 shirasawa mcexec print signum or exit status
* 2013/07/17 nakamura create more mcexec thread so that all cpu to be serviced
* 2013/04/17 nakamura add generic system call forwarding
*/
#ifndef HEADER_MCCTRL_H
#define HEADER_MCCTRL_H
#include <ihk/ihk_host_driver.h>
#include <uprotocol.h>
#include <linux/wait.h>
#include <ihk/ikc.h>
#include <ikc/master.h>
#define SCD_MSG_PREPARE_PROCESS 0x1
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_INIT_CHANNEL 0x5
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
#define SCD_MSG_SYSCALL_ONESIDE 0x4
#define SCD_MSG_SEND_SIGNAL 0x8
#define SCD_MSG_CLEANUP_PROCESS 0x9
#define SCD_MSG_PROCFS_CREATE 0x10
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_DEBUG_LOG 0x20
#define DMA_PIN_SHIFT 21
#define DO_USER_MODE
#define __NR_coredump 999
struct coretable {
int len;
unsigned long addr;
};
struct ikc_scd_packet {
int msg;
int ref;
int osnum;
int pid;
int err;
unsigned long arg;
};
struct mcctrl_priv {
ihk_os_t os;
struct program_load_desc *desc;
};
struct ikc_scd_init_param {
unsigned long request_page;
unsigned long response_page;
unsigned long doorbell_page;
unsigned long post_page;
};
struct syscall_post {
unsigned long v[8];
};
struct syscall_params {
unsigned long request_pa;
struct syscall_request *request_va;
unsigned long response_rpa, response_pa;
struct syscall_response *response_va;
unsigned long post_pa;
struct syscall_post *post_va;
unsigned long doorbell_pa;
unsigned long *doorbell_va;
};
struct wait_queue_head_list_node {
struct list_head list;
wait_queue_head_t wq_syscall;
int pid;
int req;
};
struct mcctrl_channel {
struct ihk_ikc_channel_desc *c;
struct syscall_params param;
struct ikc_scd_init_param init;
void *dma_buf;
struct list_head wq_list;
ihk_spinlock_t wq_list_lock;
};
struct mcctrl_per_proc_data {
struct list_head list;
int pid;
unsigned long rpgtable; /* per process, not per OS */
};
struct mcctrl_usrdata {
struct ihk_ikc_listen_param listen_param;
struct ihk_ikc_listen_param listen_param2;
ihk_os_t os;
int num_channels;
struct mcctrl_channel *channels;
unsigned long *mcctrl_doorbell_va;
unsigned long mcctrl_doorbell_pa;
int remaining_job;
int base_cpu;
int job_pos;
int mcctrl_dma_abort;
unsigned long last_thread_exec;
wait_queue_head_t wq_prepare;
struct list_head per_proc_list;
ihk_spinlock_t per_proc_list_lock;
void **keys;
};
struct mcctrl_signal {
int cond;
int sig;
int pid;
int tid;
char info[128];
};
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
unsigned long *endp);
/* syscall.c */
int init_peer_channel_registry(struct mcctrl_usrdata *ud);
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key);
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc);
#define PROCFS_NAME_MAX 1000
struct procfs_read {
unsigned long pbuf; /* physical address of the host buffer (request) */
unsigned long offset; /* offset to read (request) */
int count; /* bytes to read (request) */
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
int ret; /* read bytes (answer) */
int status; /* non-zero if done (answer) */
int newcpu; /* migrated new cpu (answer) */
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
};
struct procfs_file {
int status; /* status of processing (answer) */
int mode; /* file mode (request) */
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
};
#endif

View File

@@ -0,0 +1,488 @@
/**
* \file procfs.c
* License details are found in the file LICENSE.
* \brief
* mcctrl procfs
* \author Naoki Hamada <nao@axe.bz> \par
* Copyright (C) 2014 AXE, Inc.
*/
/*
* HISTORY:
*/
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/resource.h>
#include "mcctrl.h"
#include <linux/version.h>
//#define PROCFS_DEBUG
#ifdef PROCFS_DEBUG
#define dprintk(...) printk(__VA_ARGS__)
#else
#define dprintk(...)
#endif
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos);
/* A private data for the procfs driver. */
struct procfs_list_entry;
struct procfs_list_entry {
struct list_head list;
struct proc_dir_entry *entry;
struct procfs_list_entry *parent;
ihk_os_t os;
int osnum;
int pid;
int cpu;
char fname[PROCFS_NAME_MAX];
};
/*
* In the procfs_file_list, mckenrel procfs files are
* listed in the manner that the leaf file is located
* always nearer to the list top than its parent node
* file.
*/
LIST_HEAD(procfs_file_list);
static ihk_spinlock_t procfs_file_list_lock;
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case 0:
file->f_pos = offset;
break;
case 1:
file->f_pos += offset;
break;
default:
return -EINVAL;
}
return file->f_pos;
}
static const struct file_operations mckernel_procfs_file_operations = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read,
.write = NULL,
};
/**
* \brief Return specified procfs entry.
*
* \param p a name of the procfs file
* \param osnum os number
* \param mode if zero create a directory otherwise a file
*
* return value: NULL: Something wrong has occurred.
* otherwise: address of the proc_dir_entry structure of the procfs file
*
* p should not be NULL nor terminated by "/".
*
* We create a procfs entry if there is not already one.
* This process is recursive to the root of the procfs tree.
*/
/*
* XXX: Two or more entries which have same name can be created.
*
* get_procfs_list_entry() avoids creating an entry which has already been created.
* But, it allows creating an entry which is being created by another thread.
*
* This problem occurred when two requests which created files with a common
* ancestor directory which was not explicitly created were racing.
*/
static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode)
{
char *r;
struct proc_dir_entry *pde = NULL;
struct procfs_list_entry *e, *ret = NULL, *parent = NULL;
char name[PROCFS_NAME_MAX];
unsigned long irqflags;
dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode);
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_for_each_entry(e, &procfs_file_list, list) {
if (e == NULL) {
kprintf("ERROR: The procfs_file_list has a null entry.\n");
return NULL;
}
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
/* We found the entry */
ret = e;
break;
}
}
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
if (ret != NULL) {
return ret;
}
r = strrchr(p, '/');
if (r != NULL) {
/* We have non-null parent dir. */
strncpy(name, p, r - p);
name[r - p] = '\0';
parent = get_procfs_list_entry(name, osnum, 0);
if (parent == NULL) {
/* We counld not get a parent procfs entry. Give up.*/
return NULL;
}
}
ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
if (ret == NULL) {
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
return NULL;
}
/* Fill the fname field of the entry */
strncpy(ret->fname, p, PROCFS_NAME_MAX);
if (r != NULL) {
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
} else {
strncpy(name, p, PROCFS_NAME_MAX);
}
if (mode == 0) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde = proc_mkdir(name, parent ? parent->entry : NULL);
#else
pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret);
#endif
} else {
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde = create_proc_entry(name, mode, parent->entry);
if (pde)
pde->proc_fops = &mckernel_procfs_file_operations;
#else
pde = proc_create_data(name, mode, parent->entry,
&mckernel_procfs_file_operations, ret);
#endif
}
if (pde == NULL) {
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
kfree(ret);
return NULL;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde->data = ret;
#endif
ret->osnum = osnum;
ret->entry = pde;
ret->parent = parent;
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_add(&(ret->list), &procfs_file_list);
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
dprintk("get_procfs_list_entry: %s done\n", p);
return ret;
}
/**
* \brief Create a procfs entry.
*
* \param __os (opeque) os variable
* \param ref cpuid of the requesting mckernel process
* \param osnum osnum of the requesting mckernel process
* \param pid pid of the requesting mckernel process
* \param arg sent argument
*/
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
{
struct procfs_list_entry *e;
ihk_device_t dev = ihk_os_to_dev(__os);
unsigned long parg;
struct procfs_file *f;
int mode;
char name[PROCFS_NAME_MAX];
dprintk("procfs_create: osnum: %d, cpu: %d, pid: %d\n", osnum, ref, pid);
parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file));
f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0);
dprintk("name: %s mode: %o\n", f->fname, f->mode);
strncpy(name, f->fname, PROCFS_NAME_MAX);
mode = f->mode;
if (name[PROCFS_NAME_MAX - 1] != '\0') {
printk("ERROR: procfs_creat: file name not properly terminated.\n");
goto quit;
}
e = get_procfs_list_entry(name, osnum, mode);
if (e == NULL) {
printk("ERROR: could not create a procfs entry for %s.\n", name);
goto quit;
}
e->os = __os;
e->cpu = ref;
e->pid = pid;
quit:
f->status = 1; /* Now the peer can free the data. */
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file));
dprintk("procfs_create: done\n");
}
/**
* \brief Delete a procfs entry.
*
* \param __os (opaque) os variable
* \param osnum os number
* \param arg sent argument
*/
void procfs_delete(void *__os, int osnum, unsigned long arg)
{
ihk_device_t dev = ihk_os_to_dev(__os);
unsigned long parg;
struct procfs_file *f;
struct procfs_list_entry *e;
struct procfs_list_entry *parent = NULL;
char name[PROCFS_NAME_MAX];
char *r;
unsigned long irqflags;
dprintk("procfs_delete: \n");
parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file));
f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0);
dprintk("fname: %s.\n", f->fname);
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_for_each_entry(e, &procfs_file_list, list) {
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
(e->osnum == osnum)) {
list_del(&e->list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
#endif
parent = e->parent;
kfree(e);
r = strrchr(f->fname, '/');
if (r == NULL) {
strncpy(name, f->fname, PROCFS_NAME_MAX);
} else {
strncpy(name, r + 1, PROCFS_NAME_MAX);
}
dprintk("found and remove %s from the list.\n", name);
remove_proc_entry(name, parent->entry);
break;
}
}
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
f->status = 1; /* Now the peer can free the data. */
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file));
dprintk("procfs_delete: done\n");
}
/**
* \brief Process SCD_MSG_PROCFS_ANSWER message.
*
* \param arg sent argument
* \param err error info (redundant)
*/
void procfs_answer(unsigned int arg, int err)
{
dprintk("procfs: received SCD_MSG_PROCFS_ANSWER message(err = %d).\n", err);
wake_up_interruptible(&procfsq);
}
/**
* \brief The callback funciton for McKernel procfs
*
* This function conforms to the 2) way of fs/proc/generic.c
* from linux-2.6.39.4.
*/
static ssize_t
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
loff_t *ppos)
{
struct inode * inode = file->f_path.dentry->d_inode;
char *kern_buffer;
int order = 0;
volatile struct procfs_read *r;
struct ikc_scd_packet isp;
int ret, retrycount = 0;
unsigned long pbuf;
unsigned long count = nbytes;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
loff_t offset = *ppos;
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
e->fname, offset, count);
if (count <= 0 || offset < 0) {
return 0;
}
while ((1 << order) < count) ++order;
if (order > 12) {
order -= 12;
}
else {
order = 1;
}
/* NOTE: we need physically contigous memory to pass through IKC */
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
if (!kern_buffer) {
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
return -ENOMEM;
}
pbuf = virt_to_phys(kern_buffer);
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
return -ENOMEM;
}
retry:
dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu);
r->pbuf = pbuf;
r->eof = 0;
r->ret = -EIO; /* default */
r->status = 0;
r->offset = offset;
r->count = count;
strncpy((char *)r->fname, e->fname, PROCFS_NAME_MAX);
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = e->cpu;
isp.arg = virt_to_phys(r);
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("now wait for a relpy\n");
/* Wait for the status field of the procfs_read structure set ready. */
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
goto out;
}
/* Wake up and check the result. */
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
if ((r->ret == 0) && (r->eof != 1)) {
/* A miss-hit caused by migration has occurred.
* We simply retry the query with a new CPU.
*/
if (retrycount++ > 10) {
kprintf("ERROR: mckernel_procfs_read: excessive retry.\n");
goto out;
}
e->cpu = r->newcpu;
dprintk("retry\n");
goto retry;
}
if (r->ret > 0) {
if (copy_to_user(buf, kern_buffer, r->ret)) {
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
ret = -EFAULT;
goto out;
}
*ppos += r->ret;
}
ret = r->ret;
out:
free_pages((uintptr_t)kern_buffer, order);
kfree((void *)r);
return ret;
}
/**
* \brief Initialization for procfs
*
* \param osnum os number
*/
void procfs_init(int osnum) {
}
/**
* \brief Finalization for procfs
*
* \param osnum os number
*/
void procfs_exit(int osnum) {
char buf[20], *r;
int error;
mm_segment_t old_fs = get_fs();
struct kstat stat;
struct procfs_list_entry *parent;
struct procfs_list_entry *e, *temp = NULL;
unsigned long irqflags;
dprintk("remove remaining mckernel procfs files.\n");
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_for_each_entry_safe(e, temp, &procfs_file_list, list) {
if (e->osnum == osnum) {
dprintk("found entry for %s.\n", e->fname);
list_del(&e->list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
#endif
parent = e->parent;
r = strrchr(e->fname, '/');
if (r == NULL) {
r = e->fname;
} else {
r += 1;
}
if (parent) {
remove_proc_entry(r, parent->entry);
}
dprintk("free the entry\n");
kfree(e);
}
dprintk("iterate it.\n");
}
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
sprintf(buf, "/proc/mcos%d", osnum);
set_fs(KERNEL_DS);
error = vfs_stat (buf, &stat);
set_fs(old_fs);
if (error != 0) {
return;
}
printk("procfs_exit: We have to remove unexpectedly remaining %s.\n", buf);
/* remove remnant of previous mcos%d */
remove_proc_entry(buf + 6, NULL);
}

File diff suppressed because it is too large Load Diff