Merge branch 'master' of postpeta.pccluster.org:mckernel

This commit is contained in:
Tomoki Shirasawa
2014-04-13 14:13:46 +09:00
18 changed files with 524 additions and 127 deletions

View File

@@ -52,8 +52,6 @@
#endif
struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_this_cpu_kstack(void);
void init_processors_local(int max_id);
void assign_processor_id(void);
void arch_delay(int);
@@ -313,9 +311,45 @@ static void enable_page_protection_fault(void)
return;
}
static int no_execute_available = 0;
static void enable_no_execute(void)
{
unsigned long efer;
if (!no_execute_available) {
return;
}
efer = rdmsr(MSR_EFER);
#define IA32_EFER_NXE (1UL << 11)
efer |= IA32_EFER_NXE;
wrmsr(MSR_EFER, efer);
return;
}
static void check_no_execute(void)
{
uint32_t edx;
extern void enable_ptattr_no_execute(void);
/* check Execute Disable Bit available bit */
asm ("cpuid" : "=d" (edx) : "a" (0x80000001) : "%rbx", "%rcx");
no_execute_available = (edx & (1 << 20))? 1: 0;
kprintf("no_execute_available: %d\n", no_execute_available);
if (no_execute_available) {
enable_ptattr_no_execute();
}
return;
}
void init_cpu(void)
{
enable_page_protection_fault();
enable_no_execute();
init_fpu();
init_lapic();
init_syscall();
@@ -332,6 +366,8 @@ void setup_x86(void)
init_page_table();
check_no_execute();
init_cpu();
kprintf("setup_x86 done.\n");
@@ -368,8 +404,8 @@ void setup_x86_ap(void (*next_func)(void))
}
void arch_show_interrupt_context(const void *reg);
void set_signal(int, void *);
void check_signal(long, void *);
void set_signal(int sig, void *regs);
void check_signal(unsigned long rc, void *regs);
void handle_interrupt(int vector, struct x86_regs *regs)
{

View File

@@ -114,6 +114,7 @@ enum ihk_mc_pt_attribute {
PTATTR_WRITABLE = 0x02,
PTATTR_USER = 0x04,
PTATTR_LARGEPAGE = 0x80,
PTATTR_NO_EXECUTE = 0x8000000000000000,
PTATTR_UNCACHABLE = 0x10000,
PTATTR_FOR_USER = 0x20000,
};
@@ -142,8 +143,8 @@ static inline uintptr_t pte_get_phys(pte_t *ptep)
}
struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, int attr);
pte_t *get_pte(struct page_table *pt, void *virt, int attr);
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
void *early_alloc_page(void);
void *get_last_early_heap(void);
@@ -156,5 +157,5 @@ void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
#define AP_TRAMPOLINE_SIZE 0x4000
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_WRITABLE | PTATTR_UNCACHABLE)
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)
#endif

View File

@@ -46,6 +46,8 @@ struct x86_cpu_local_variables {
} __attribute__((packed));
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_this_cpu_kstack(void);
#endif

View File

@@ -137,7 +137,7 @@ struct tss64 {
struct x86_regs {
unsigned long r11, r10, r9, r8;
unsigned long rdi, rsi, rdx, rcx, rbx, rax;
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
unsigned long error, rip, cs, rflags, rsp, ss;
};

View File

@@ -44,6 +44,7 @@ SYSCALL_HANDLED(28, madvise)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(39, getpid)
SYSCALL_HANDLED(56, clone)
SYSCALL_DELEGATED(57, fork)
SYSCALL_HANDLED(59, execve)
SYSCALL_HANDLED(60, exit)
SYSCALL_HANDLED(62, kill)

View File

@@ -8,6 +8,9 @@
*/
/*
* HISTORY
*
* 2014/04 - bgerofi: save/restore rbp when entering/leaving kernel (for glibc)
* 2013/?? - bgerofi + shimosawa: handle rsp correctly for nested interrupts
*/
#define X86_CPU_LOCAL_OFFSET_TSS 128
@@ -22,6 +25,7 @@
#define USER_DS (56 + 3)
#define PUSH_ALL_REGS \
pushq %rbp; \
pushq %rax; \
pushq %rbx; \
pushq %rcx; \
@@ -42,7 +46,8 @@
popq %rdx; \
popq %rcx; \
popq %rbx; \
popq %rax
popq %rax; \
popq %rbp
.data
.globl generic_common_handlers
@@ -62,7 +67,7 @@ vector=vector+1
common_interrupt:
PUSH_ALL_REGS
movq 80(%rsp), %rdi
movq 88(%rsp), %rdi
movq %rsp, %rsi
call handle_interrupt /* Enter C code */
POP_ALL_REGS
@@ -78,7 +83,7 @@ page_fault:
cld
PUSH_ALL_REGS
movq %cr2, %rdi
movq 80(%rsp),%rsi
movq 88(%rsp),%rsi
movq %rsp,%rdx
movq __page_fault_handler_address(%rip), %rax
andq %rax, %rax

View File

@@ -219,7 +219,15 @@ static struct page_table *__alloc_new_pt(enum ihk_mc_ap_flag ap_flag)
* but L2 and L1 do not!
*/
#define ATTR_MASK (PTATTR_WRITABLE | PTATTR_USER | PTATTR_ACTIVE)
static enum ihk_mc_pt_attribute attr_mask = PTATTR_WRITABLE | PTATTR_USER | PTATTR_ACTIVE;
#define ATTR_MASK attr_mask
void enable_ptattr_no_execute(void)
{
attr_mask |= PTATTR_NO_EXECUTE;
return;
}
#if 0
static unsigned long attr_to_l4attr(enum ihk_mc_pt_attribute attr)
{
@@ -266,7 +274,7 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
| ((uint64_t)(l1i) << PTL1_SHIFT) \
)
void set_pte(pte_t *ppte, unsigned long phys, int attr)
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr)
{
if (attr & PTATTR_LARGEPAGE) {
*ppte = phys | attr_to_l2attr(attr) | PFL2_SIZE;
@@ -285,7 +293,7 @@ void set_pte(pte_t *ppte, unsigned long phys, int attr)
* and returns a pointer to the PTE corresponding to the
* virtual address.
*/
pte_t *get_pte(struct page_table *pt, void *virt, int attr, enum ihk_mc_ap_flag ap_flag)
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr, enum ihk_mc_ap_flag ap_flag)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
@@ -341,7 +349,7 @@ pte_t *get_pte(struct page_table *pt, void *virt, int attr, enum ihk_mc_ap_flag
#endif
static int __set_pt_page(struct page_table *pt, void *virt, unsigned long phys,
int attr)
enum ihk_mc_pt_attribute attr)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
@@ -1666,21 +1674,25 @@ void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable)
{
unsigned long poffset, paligned;
int i, npages;
int flag = PTATTR_WRITABLE | PTATTR_ACTIVE;
void *v = (void *)fixed_virt;
enum ihk_mc_pt_attribute attr;
poffset = phys & (PAGE_SIZE - 1);
paligned = phys & PAGE_MASK;
npages = (poffset + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
attr = PTATTR_WRITABLE | PTATTR_ACTIVE;
#if 0 /* In the case of LAPIC MMIO, something will happen */
attr |= PTATTR_NO_EXECUTE;
#endif
if (uncachable) {
flag |= PTATTR_UNCACHABLE;
attr |= PTATTR_UNCACHABLE;
}
kprintf("map_fixed: %lx => %p (%d pages)\n", paligned, v, npages);
for (i = 0; i < npages; i++) {
if(__set_pt_page(init_pt, (void *)fixed_virt, paligned, flag)){
if(__set_pt_page(init_pt, (void *)fixed_virt, paligned, attr)){
return NULL;
}
@@ -1695,7 +1707,7 @@ void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable)
void init_low_area(struct page_table *pt)
{
set_pt_large_page(pt, 0, 0, PTATTR_WRITABLE);
set_pt_large_page(pt, 0, 0, PTATTR_NO_EXECUTE|PTATTR_WRITABLE);
}
static void init_vsyscall_area(struct page_table *pt)

View File

@@ -17,6 +17,7 @@
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <cls.h>
#include <cpulocal.h>
#include <syscall.h>
#include <process.h>
#include <string.h>
@@ -89,10 +90,12 @@ int obtain_clone_cpuid() {
SYSCALL_DECLARE(rt_sigreturn)
{
struct process *proc = cpu_local_var(current);
unsigned long *regs;
struct x86_cpu_local_variables *v = get_x86_this_cpu_local();
struct x86_regs *regs;
regs = (struct x86_regs *)v->kernel_stack;
--regs;
asm volatile ("movq %%gs:132,%0" : "=r" (regs));
regs -= 16;
memcpy(regs, proc->sigstack, 128);
proc->sigmask.__val[0] = proc->supmask.__val[0];
@@ -106,8 +109,9 @@ extern void interrupt_syscall(int all);
extern int num_processors;
void
do_signal(unsigned long rc, unsigned long *regs, struct process *proc, struct sig_pending *pending)
do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pending *pending)
{
struct x86_regs *regs = regs0;
struct k_sigaction *k;
int sig;
__sigset_t w;
@@ -116,15 +120,17 @@ do_signal(unsigned long rc, unsigned long *regs, struct process *proc, struct si
for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1);
if(sig == SIGKILL || sig == SIGTERM)
terminate(0, sig, (ihk_mc_user_context_t *)regs[14]);
terminate(0, sig, (ihk_mc_user_context_t *)regs->rsp);
irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock);
if(regs == NULL){ /* call from syscall */
asm volatile ("movq %%gs:132,%0" : "=r" (regs));
regs -= 16;
struct x86_cpu_local_variables *v = get_x86_this_cpu_local();
regs = (struct x86_regs *)v->kernel_stack;
--regs;
}
else{
rc = regs[9]; /* rax */
rc = regs->rax;
}
k = proc->sighandler->action + sig - 1;
@@ -136,15 +142,15 @@ do_signal(unsigned long rc, unsigned long *regs, struct process *proc, struct si
else if(k->sa.sa_handler){
unsigned long *usp; /* user stack */
usp = (void *)regs[14];
usp = (void *)regs->rsp;
memcpy(proc->sigstack, regs, 128);
proc->sigrc = rc;
usp--;
*usp = (unsigned long)k->sa.sa_restorer;
regs[4] = (unsigned long)sig;
regs[11] = (unsigned long)k->sa.sa_handler;
regs[14] = (unsigned long)usp;
regs->rdi = (unsigned long)sig;
regs->rip = (unsigned long)k->sa.sa_handler;
regs->rsp = (unsigned long)usp;
kfree(pending);
proc->sigmask.__val[0] |= pending->sigmask.__val[0];
ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate);
@@ -154,13 +160,14 @@ do_signal(unsigned long rc, unsigned long *regs, struct process *proc, struct si
ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate);
if(sig == SIGCHLD || sig == SIGURG)
return;
terminate(0, sig, (ihk_mc_user_context_t *)regs[14]);
terminate(0, sig, (ihk_mc_user_context_t *)regs->rsp);
}
}
void
check_signal(unsigned long rc, unsigned long *regs)
check_signal(unsigned long rc, void *regs0)
{
struct x86_regs *regs = regs0;
struct process *proc;
struct sig_pending *pending;
struct sig_pending *next;
@@ -175,7 +182,7 @@ check_signal(unsigned long rc, unsigned long *regs)
if(proc == NULL || proc->pid == 0)
return;
if(regs != NULL && (regs[14] & 0x8000000000000000))
if(regs != NULL && (regs->rsp & 0x8000000000000000))
return;
for(;;){
@@ -310,16 +317,17 @@ do_kill(int pid, int tid, int sig)
}
void
set_signal(int sig, unsigned long *regs)
set_signal(int sig, void *regs0)
{
struct x86_regs *regs = regs0;
struct process *proc = cpu_local_var(current);
if(proc == NULL || proc->pid == 0)
return;
if((__sigmask(sig) & proc->sigmask.__val[0]) ||
(regs[14] & 0x8000000000000000))
terminate(0, sig, (ihk_mc_user_context_t *)regs[14]);
(regs->rsp & 0x8000000000000000))
terminate(0, sig, (ihk_mc_user_context_t *)regs->rsp);
else
do_kill(proc->pid, proc->tid, sig);
}

View File

@@ -65,6 +65,7 @@ struct program_load_desc {
int cpu;
int pid;
int err;
int stack_prot;
unsigned long entry;
unsigned long user_start;
unsigned long user_end;

View File

@@ -358,7 +358,6 @@ retry_alloc:
kfree(wqhln);
wqhln = wqhln_iter;
list_del(&wqhln->list);
printk("DEBUG: wait queue head was already available in syscall wait\n");
break;
}
}

View File

@@ -1126,10 +1126,11 @@ static void clear_pte_range(uintptr_t start, uintptr_t len)
}
if (addr < end) {
zap_vma_ptes(vma, addr, end-addr);
dprintk("clear_pte_range() 0x%lx - 0x%lx OK\n",
vma->vm_start, vma->vm_end);
}
addr = end;
}
up_read(&mm->mmap_sem);
return;
}
@@ -1138,6 +1139,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
{
int error;
long ret = -1;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
dprintk("__do_in_kernel_syscall(%p,%p,%ld %lx)\n", os, c, sc->number, sc->args[0]);
switch (sc->number) {
@@ -1146,6 +1148,11 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
break;
case __NR_munmap:
/* Set new remote page table if not zero */
if (sc->args[2]) {
usrdata->rpgtable = sc->args[2];
}
clear_pte_range(sc->args[0], sc->args[1]);
ret = 0;
break;

View File

@@ -159,6 +159,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
fseek(fp, hdr.e_phoff, SEEK_SET);
j = 0;
desc->num_sections = nhdrs;
desc->stack_prot = PROT_READ | PROT_WRITE | PROT_EXEC; /* default */
for (i = 0; i < hdr.e_phnum; i++) {
if (fread(&phdr, sizeof(phdr), 1, fp) < 1) {
__eprintf("Loading phdr failed (%d)\n", i);
@@ -205,6 +206,12 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
load_addr = phdr.p_vaddr - phdr.p_offset;
}
}
if (phdr.p_type == PT_GNU_STACK) {
desc->stack_prot = PROT_NONE;
desc->stack_prot |= (phdr.p_flags & PF_R)? PROT_READ: 0;
desc->stack_prot |= (phdr.p_flags & PF_W)? PROT_WRITE: 0;
desc->stack_prot |= (phdr.p_flags & PF_X)? PROT_EXEC: 0;
}
}
desc->pid = getpid();
desc->entry = hdr.e_entry;
@@ -495,14 +502,19 @@ struct thread_data_s {
int cpu;
int ret;
pthread_mutex_t *lock;
pthread_barrier_t *init_ready;
} *thread_data;
int ncpu;
pid_t master_tid;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
pthread_barrier_t init_ready;
static void *main_loop_thread_func(void *arg)
{
struct thread_data_s *td = (struct thread_data_s *)arg;
pthread_barrier_wait(&init_ready);
td->ret = main_loop(td->fd, td->cpu, td->lock);
return NULL;
@@ -570,6 +582,53 @@ void print_usage(char **argv)
fprintf(stderr, "Usage: %s [-c target_core] [<mcos-id>] (program) [args...]\n", argv[0]);
}
void init_sigaction(void)
{
int i;
master_tid = gettid();
for (i = 1; i <= 64; i++) {
if (i != SIGCHLD && i != SIGCONT && i != SIGSTOP &&
i != SIGTSTP && i != SIGTTIN && i != SIGTTOU) {
struct sigaction act;
sigaction(i, NULL, &act);
act.sa_sigaction = sendsig;
act.sa_flags &= ~(SA_RESTART);
act.sa_flags |= SA_SIGINFO;
sigaction(i, &act, NULL);
}
}
}
void init_worker_threads(int fd)
{
int i;
pthread_mutex_init(&lock, NULL);
pthread_barrier_init(&init_ready, NULL, ncpu + 2);
for (i = 0; i <= ncpu; ++i) {
int ret;
thread_data[i].fd = fd;
thread_data[i].cpu = i;
thread_data[i].lock = &lock;
thread_data[i].init_ready = &init_ready;
ret = pthread_create(&thread_data[i].thread_id, NULL,
&main_loop_thread_func, &thread_data[i]);
if (ret < 0) {
printf("ERROR: creating syscall threads\n");
exit(1);
}
}
pthread_barrier_wait(&init_ready);
}
char dev[64];
int main(int argc, char **argv)
{
// int fd;
@@ -582,11 +641,9 @@ int main(int argc, char **argv)
int envs_len;
char *envs;
char *args;
char dev[64];
char **a;
char *p;
int i;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
FILE *interp = NULL;
char *interp_path;
char *path;
@@ -666,9 +723,9 @@ int main(int argc, char **argv)
__dprintf("target_core: %d, device: %s, command: ", target_core, dev);
for (i = 1; i < argc; ++i) {
printf("%s ", argv[i]);
__dprintf("%s ", argv[i]);
}
printf("\n");
__dprintf("\n");
fp = fopen(argv[1], "rb");
if (!fp) {
@@ -825,33 +882,9 @@ int main(int argc, char **argv)
__dprint("mccmd server initialized\n");
#endif
master_tid = gettid();
for (i = 1; i <= 64; i++)
if (i != SIGCHLD && i != SIGCONT && i != SIGSTOP &&
i != SIGTSTP && i != SIGTTIN && i != SIGTTOU){
struct sigaction act;
init_sigaction();
sigaction(i, NULL, &act);
act.sa_sigaction = sendsig;
act.sa_flags &= ~(SA_RESTART);
act.sa_flags |= SA_SIGINFO;
sigaction(i, &act, NULL);
}
for (i = 0; i <= ncpu; ++i) {
int ret;
thread_data[i].fd = fd;
thread_data[i].cpu = i;
thread_data[i].lock = &lock;
ret = pthread_create(&thread_data[i].thread_id, NULL,
&main_loop_thread_func, &thread_data[i]);
if (ret < 0) {
printf("ERROR: creating syscall threads\n");
exit(1);
}
}
init_worker_threads(fd);
if (ioctl(fd, MCEXEC_UP_START_IMAGE, (unsigned long)desc) != 0) {
perror("exec");
@@ -933,7 +966,6 @@ kill_thread(unsigned long cpu)
}
}
#if 0
static long do_strncpy_from_user(int fd, void *dest, void *src, unsigned long n)
{
struct strncpy_from_user_desc desc;
@@ -952,7 +984,6 @@ static long do_strncpy_from_user(int fd, void *dest, void *src, unsigned long n)
return desc.result;
}
#endif
#define SET_ERR(ret) if (ret == -1) ret = -errno
@@ -964,6 +995,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
int sig;
int term;
struct timeval tv;
char pathbuf[PATH_MAX];
w.cpu = cpu;
w.pid = getpid();
@@ -982,9 +1014,17 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
switch (w.sr.number) {
case __NR_open:
__dprintf("open: %s\n", (char *)w.sr.args[0]);
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
if (ret >= PATH_MAX) {
ret = -ENAMETOOLONG;
}
if (ret < 0) {
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
}
__dprintf("open: %s\n", pathbuf);
fn = (char *)w.sr.args[0];
fn = pathbuf;
if(!strcmp(fn, "/proc/meminfo")){
fn = "/admin/fs/attached/files/proc/meminfo";
}
@@ -1067,6 +1107,55 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
break;
}
#endif
case __NR_fork: {
int child;
child = fork();
switch (child) {
/* Error */
case -1:
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
break;
/* Child process */
case 0: {
int i;
/* Reopen device fd */
close(fd);
fd = open(dev, O_RDWR);
if (fd < 0) {
/* TODO: tell parent something went wrong? */
fprintf(stderr, "ERROR: opening %s\n", dev);
return 1;
}
/* Reinit signals and syscall threads */
init_sigaction();
init_worker_threads(fd);
__dprintf("pid(%d): signals and syscall threads OK\n",
getpid());
/* TODO: does the forked thread run in a pthread context? */
for (i = 0; i <= ncpu; ++i) {
pthread_join(thread_data[i].thread_id, NULL);
}
return 0;
}
/* Parent */
default:
do_syscall_return(fd, cpu, child, 0, 0, 0, 0);
break;
}
break;
}
default:
ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);

View File

@@ -69,13 +69,16 @@ static int process_msg_prepare_process(unsigned long rphys)
unsigned long flags;
uintptr_t interp_obase = -1;
uintptr_t interp_nbase = -1;
enum ihk_mc_pt_attribute attr;
attr = PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER;
sz = sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * 16;
npages = ((rphys + sz - 1) >> PAGE_SHIFT) - (rphys >> PAGE_SHIFT) + 1;
phys = ihk_mc_map_memory(NULL, rphys, sz);
if((p = ihk_mc_map_virtual(phys, npages, PTATTR_WRITABLE | PTATTR_FOR_USER)) == NULL){
if((p = ihk_mc_map_virtual(phys, npages, attr)) == NULL){
ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM;
}
@@ -264,7 +267,7 @@ static int process_msg_prepare_process(unsigned long rphys)
args_envs_rp = ihk_mc_map_memory(NULL, (unsigned long)p->args, p->args_len);
dkprintf("args_envs_rp: 0x%lX\n", args_envs_rp);
if((args_envs_r = (char *)ihk_mc_map_virtual(args_envs_rp, args_envs_npages,
PTATTR_WRITABLE | PTATTR_FOR_USER)) == NULL){
attr)) == NULL){
goto err;
}
dkprintf("args_envs_r: 0x%lX\n", args_envs_r);
@@ -285,7 +288,7 @@ static int process_msg_prepare_process(unsigned long rphys)
args_envs_rp = ihk_mc_map_memory(NULL, (unsigned long)p->envs, p->envs_len);
dkprintf("args_envs_rp: 0x%lX\n", args_envs_rp);
if((args_envs_r = (char *)ihk_mc_map_virtual(args_envs_rp, args_envs_npages,
PTATTR_WRITABLE | PTATTR_FOR_USER)) == NULL){
attr)) == NULL){
goto err;
}
dkprintf("args_envs_r: 0x%lX\n", args_envs_r);
@@ -363,6 +366,9 @@ static void process_msg_init_acked(struct ihk_ikc_channel_desc *c, unsigned long
{
struct ikc_scd_init_param *param = (void *)pphys;
struct syscall_params *lparam;
enum ihk_mc_pt_attribute attr;
attr = PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER;
lparam = &cpu_local_var(scp);
if(cpu_local_var(syscall_channel2) == c)
@@ -372,7 +378,7 @@ static void process_msg_init_acked(struct ihk_ikc_channel_desc *c, unsigned long
REQUEST_PAGE_COUNT * PAGE_SIZE);
if((lparam->request_va = ihk_mc_map_virtual(lparam->request_pa,
REQUEST_PAGE_COUNT,
PTATTR_WRITABLE | PTATTR_FOR_USER)) == NULL){
attr)) == NULL){
// TODO:
panic("ENOMEM");
}
@@ -383,7 +389,7 @@ static void process_msg_init_acked(struct ihk_ikc_channel_desc *c, unsigned long
PAGE_SIZE);
if((lparam->doorbell_va = ihk_mc_map_virtual(lparam->doorbell_pa,
DOORBELL_PAGE_COUNT,
PTATTR_WRITABLE | PTATTR_FOR_USER)) == NULL){
attr)) == NULL){
// TODO:
panic("ENOMEM");
}
@@ -392,7 +398,7 @@ static void process_msg_init_acked(struct ihk_ikc_channel_desc *c, unsigned long
lparam->post_pa = ihk_mc_map_memory(NULL, param->post_page,
PAGE_SIZE);
if((lparam->post_va = ihk_mc_map_virtual(lparam->post_pa, 1,
PTATTR_WRITABLE | PTATTR_FOR_USER)) == NULL){
attr)) == NULL){
// TODO:
panic("ENOMEM");
}

View File

@@ -157,8 +157,8 @@ struct process_vm {
struct process *create_process(unsigned long user_pc);
struct process *clone_process(struct process *org,
unsigned long pc, unsigned long sp);
struct process *clone_process(struct process *org, unsigned long pc,
unsigned long sp, int clone_flags);
void destroy_process(struct process *proc);
void hold_process(struct process *proc);
void free_process(struct process *proc);

View File

@@ -109,6 +109,7 @@ struct program_load_desc {
int cpu;
int pid;
int err;
int stack_prot;
unsigned long entry;
unsigned long user_start;
unsigned long user_end;
@@ -206,9 +207,9 @@ struct syscall_params {
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id())
#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0)
extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu);
extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu, int pid);
extern int obtain_clone_cpuid();
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);

View File

@@ -164,8 +164,8 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
.priv = NULL,
};
void set_signal(int, unsigned long *);
void check_signal(long, unsigned long *);
void set_signal(int sig, void *regs);
void check_signal(unsigned long rc, void *regs);
static void unhandled_page_fault(struct process *proc, void *fault_addr, void *regs)
{

View File

@@ -47,6 +47,10 @@
#define KERNEL_STACK_NR_PAGES 24
extern long do_arch_prctl(unsigned long code, unsigned long address);
static void insert_vm_range_list(struct process_vm *vm,
struct vm_range *newrange);
static int copy_user_ranges(struct process *proc, struct process *org);
enum ihk_mc_pt_attribute vrflag_to_ptattr(unsigned long flag);
static int init_process_vm(struct process *owner, struct process_vm *vm)
{
@@ -105,6 +109,7 @@ struct process *create_process(unsigned long user_pc)
proc->vm = (struct process_vm *)(proc + 1);
if(init_process_vm(proc, proc->vm) != 0){
kfree(proc->sigshared);
kfree(proc->sighandler);
ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES);
return NULL;
@@ -117,11 +122,12 @@ struct process *create_process(unsigned long user_pc)
}
struct process *clone_process(struct process *org, unsigned long pc,
unsigned long sp)
unsigned long sp, int clone_flags)
{
struct process *proc;
if((proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, IHK_MC_AP_NOWAIT)) == NULL){
if ((proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES,
IHK_MC_AP_NOWAIT)) == NULL) {
return NULL;
}
@@ -136,24 +142,198 @@ struct process *clone_process(struct process *org, unsigned long pc,
memcpy(proc->uctx, org->uctx, sizeof(*org->uctx));
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_STACK_POINTER, sp);
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER, pc);
ihk_atomic_inc(&org->vm->refcount);
proc->vm = org->vm;
proc->rlimit_stack = org->rlimit_stack;
proc->sighandler = org->sighandler;
ihk_atomic_inc(&org->sighandler->use);
/* clone() */
if (clone_flags & CLONE_VM) {
ihk_atomic_inc(&org->vm->refcount);
proc->vm = org->vm;
proc->sighandler = org->sighandler;
ihk_atomic_inc(&org->sighandler->use);
proc->sigshared = org->sigshared;
ihk_atomic_inc(&org->sigshared->use);
proc->sigshared = org->sigshared;
ihk_atomic_inc(&org->sigshared->use);
ihk_mc_spinlock_init(&proc->sigpendinglock);
INIT_LIST_HEAD(&proc->sigpending);
ihk_mc_spinlock_init(&proc->sigpendinglock);
INIT_LIST_HEAD(&proc->sigpending);
}
/* fork() */
else {
dkprintf("fork(): sighandler\n");
proc->sighandler = kmalloc(sizeof(struct sig_handler),
IHK_MC_AP_NOWAIT);
if (!proc->sighandler) {
goto err_free_proc;
}
dkprintf("fork(): sigshared\n");
proc->sigshared = kmalloc(sizeof(struct sig_shared), IHK_MC_AP_NOWAIT);
if (!proc->sigshared) {
goto err_free_sighandler;
}
memset(proc->sighandler, '\0', sizeof(struct sig_handler));
ihk_atomic_set(&proc->sighandler->use, 1);
ihk_mc_spinlock_init(&proc->sighandler->lock);
ihk_atomic_set(&proc->sigshared->use, 1);
ihk_mc_spinlock_init(&proc->sigshared->lock);
INIT_LIST_HEAD(&proc->sigshared->sigpending);
ihk_mc_spinlock_init(&proc->sigpendinglock);
INIT_LIST_HEAD(&proc->sigpending);
proc->vm = (struct process_vm *)(proc + 1);
dkprintf("fork(): init_process_vm()\n");
if (init_process_vm(proc, proc->vm) != 0) {
goto err_free_sigshared;
}
memcpy(&proc->vm->region, &org->vm->region, sizeof(struct vm_regions));
dkprintf("fork(): copy_user_ranges()\n");
/* Copy user-space mappings.
* TODO: do this with COW later? */
if (copy_user_ranges(proc, org) != 0) {
goto err_free_sigshared;
}
dkprintf("fork(): copy_user_ranges() OK\n");
}
ihk_mc_spinlock_init(&proc->spin_sleep_lock);
proc->spin_sleep = 0;
return proc;
err_free_sigshared:
kfree(proc->sigshared);
err_free_sighandler:
ihk_mc_free_pages(proc->sighandler, KERNEL_STACK_NR_PAGES);
err_free_proc:
ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES);
return NULL;
}
static int copy_user_ranges(struct process *proc, struct process *org)
{
struct vm_range *src_range;
struct vm_range *range;
ihk_mc_spinlock_lock_noirq(&org->vm->memory_range_lock);
/* Iterate original process' vm_range list and take a copy one-by-one */
list_for_each_entry(src_range, &org->vm->vm_range_list, list) {
void *ptepgaddr;
size_t ptepgsize;
int ptep2align;
void *pg_vaddr;
size_t pgsize;
void *vaddr;
int p2align;
enum ihk_mc_pt_attribute attr;
pte_t *ptep;
range = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
if (!range) {
goto err_rollback;
}
INIT_LIST_HEAD(&range->list);
range->start = src_range->start;
range->end = src_range->end;
range->flag = src_range->flag;
range->memobj = src_range->memobj;
range->objoff = src_range->objoff;
if (range->memobj) {
memobj_ref(range->memobj);
}
/* Copy actual mappings */
vaddr = (void *)range->start;
while ((unsigned long)vaddr < range->end) {
/* Get source PTE */
ptep = ihk_mc_pt_lookup_pte(org->vm->page_table, vaddr,
&ptepgaddr, &ptepgsize, &ptep2align);
if (!ptep || pte_is_null(ptep) || !pte_is_present(ptep)) {
vaddr += PAGE_SIZE;
continue;
}
dkprintf("copy_user_ranges(): 0x%lx PTE found\n", vaddr);
/* Page size */
if (arch_get_smaller_page_size(NULL, -1, &ptepgsize,
&ptep2align)) {
kprintf("ERROR: copy_user_ranges() "
"(%p,%lx-%lx %lx,%lx):"
"get pgsize failed\n", org->vm,
range->start, range->end,
range->flag, vaddr);
goto err_free_range_rollback;
}
pgsize = ptepgsize;
p2align = ptep2align;
dkprintf("copy_user_ranges(): page size: %d\n", pgsize);
/* Get physical page */
pg_vaddr = ihk_mc_alloc_aligned_pages(1, p2align, IHK_MC_AP_NOWAIT);
if (!pg_vaddr) {
kprintf("ERROR: copy_user_ranges() allocating new page\n");
goto err_free_range_rollback;
}
dkprintf("copy_user_ranges(): phys page allocated\n", pgsize);
/* Copy content */
memcpy(pg_vaddr, vaddr, pgsize);
dkprintf("copy_user_ranges(): memcpy OK\n", pgsize);
/* Set up new PTE */
attr = vrflag_to_ptattr(range->flag);
if (ihk_mc_pt_set_range(proc->vm->page_table, vaddr,
vaddr + pgsize, virt_to_phys(pg_vaddr), attr)) {
kprintf("ERROR: copy_user_ranges() "
"(%p,%lx-%lx %lx,%lx):"
"set range failed.\n",
org->vm, range->start, range->end,
range->flag, vaddr);
goto err_free_range_rollback;
}
dkprintf("copy_user_ranges(): new PTE set\n", pgsize);
vaddr += pgsize;
}
insert_vm_range_list(proc->vm, range);
}
ihk_mc_spinlock_unlock_noirq(&org->vm->memory_range_lock);
return 0;
err_free_range_rollback:
kfree(range);
err_rollback:
/* TODO: implement rollback */
ihk_mc_spinlock_unlock_noirq(&org->vm->memory_range_lock);
return -1;
}
int update_process_page_table(struct process *process,
@@ -167,6 +347,7 @@ int update_process_page_table(struct process *process,
attr = flag | PTATTR_USER | PTATTR_FOR_USER;
attr |= (range->flag & VR_PROT_WRITE)? PTATTR_WRITABLE: 0;
attr |= (range->flag & VR_PROT_EXEC)? 0: PTATTR_NO_EXECUTE;
p = range->start;
while (p < range->end) {
@@ -509,6 +690,10 @@ enum ihk_mc_pt_attribute vrflag_to_ptattr(unsigned long flag)
attr |= PTATTR_WRITABLE;
}
if (!(flag & VR_PROT_EXEC)) {
attr |= PTATTR_NO_EXECUTE;
}
return attr;
}
@@ -549,10 +734,9 @@ int add_process_memory_range(struct process *process,
range->start, range->end, range->end - range->start,
range->flag);
} else {
dkprintf("range: 0x%lX - 0x%lX => 0x%lX - 0x%lX (%ld) [%lx]\n",
range->start, range->end, range->phys, range->phys +
range->end - range->start, range->end - range->start,
range->flag);
dkprintf("range: 0x%lX - 0x%lX (%ld) [%lx]\n",
range->start, range->end, range->end - range->start,
range->flag);
}
if (flag & VR_REMOTE) {
@@ -1018,7 +1202,9 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE)
|| ((reason & PF_WRITE)
&& !(range->flag & VR_PROT_WRITE))) {
&& !(range->flag & VR_PROT_WRITE))
|| ((reason & PF_INSTR)
&& !(range->flag & VR_PROT_EXEC))) {
error = -EFAULT;
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
"access denied. %d\n",
@@ -1146,8 +1332,8 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
start = end - size;
vrflag = VR_STACK | VR_DEMAND_PAGING;
vrflag |= VR_PROT_READ | VR_PROT_WRITE | VR_PROT_EXEC;
vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag);
vrflag |= PROT_TO_VR_FLAG(pn->stack_prot);
vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC;
#define NOPHYS ((uintptr_t)-1)
if ((rc = add_process_memory_range(process, start, end, NOPHYS,
vrflag, NULL, 0)) != 0) {

View File

@@ -92,8 +92,8 @@ static char *syscall_name[] MCKERNEL_UNUSED = {
#undef SYSCALL_DELEGATED
};
void check_signal(long rc, unsigned long *regs);
void do_signal(long rc, unsigned long *regs, struct process *proc, struct sig_pending *pending);
void check_signal(unsigned long rc, void *regs);
void do_signal(long rc, void *regs, struct process *proc, struct sig_pending *pending);
int copy_from_user(struct process *, void *, const void *, size_t);
int copy_to_user(struct process *, void *, const void *, size_t);
@@ -101,7 +101,7 @@ int copy_to_user(struct process *, void *, const void *, size_t);
static void do_mod_exit(int status);
#endif
static void send_syscall(struct syscall_request *req, int cpu)
static void send_syscall(struct syscall_request *req, int cpu, int pid)
{
struct ikc_scd_packet packet;
struct syscall_response *res;
@@ -147,7 +147,7 @@ static void send_syscall(struct syscall_request *req, int cpu)
#ifdef SYSCALL_BY_IKC
packet.msg = SCD_MSG_SYSCALL_ONESIDE;
packet.ref = cpu;
packet.pid = cpu_local_var(current)->pid;
packet.pid = pid ? pid : cpu_local_var(current)->pid;
packet.arg = scp->request_rpa;
dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid);
ihk_ikc_send(syscall_channel, &packet, 0);
@@ -155,7 +155,8 @@ static void send_syscall(struct syscall_request *req, int cpu)
}
long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu)
long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx,
int cpu, int pid)
{
struct syscall_response *res;
struct syscall_request req2 IHK_DMA_ALIGN;
@@ -175,7 +176,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu
}
res = scp->response_va;
send_syscall(req, cpu);
send_syscall(req, cpu, pid);
dkprintf("SC(%d)[%3d] waiting for host.. \n",
ihk_mc_get_processor_id(),
@@ -202,7 +203,7 @@ long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu
req2.args[0] = PAGER_RESUME_PAGE_FAULT;
req2.args[1] = error;
send_syscall(&req2, cpu);
send_syscall(&req2, cpu, pid);
}
}
@@ -237,7 +238,7 @@ terminate(int rc, int sig, ihk_mc_user_context_t *ctx)
/* XXX: send SIGKILL to all threads in this process */
flush_process_memory(proc); /* temporary hack */
do_syscall(&request, ctx, ihk_mc_get_processor_id());
do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0);
#define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */
proc->status = PS_ZOMBIE;
@@ -282,7 +283,7 @@ SYSCALL_DECLARE(exit_group)
/* XXX: send SIGKILL to all threads in this process */
do_syscall(&request, ctx, ihk_mc_get_processor_id());
do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0);
#define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */
proc->status = PS_ZOMBIE;
@@ -306,6 +307,8 @@ static void clear_host_pte(uintptr_t addr, size_t len)
ihk_mc_syscall_arg0(&ctx) = addr;
ihk_mc_syscall_arg1(&ctx) = len;
/* NOTE: 3rd parameter denotes new rpgtable of host process (if not zero) */
ihk_mc_syscall_arg2(&ctx) = 0;
lerror = syscall_generic_forwarding(__NR_munmap, &ctx);
if (lerror) {
@@ -961,30 +964,56 @@ SYSCALL_DECLARE(clone)
ihk_mc_user_context_t ctx1;
struct syscall_request request1 IHK_DMA_ALIGN;
if(clone_flags == 0x1200011){
// fork()
return -EOPNOTSUPP;
}
dkprintf("[%d] clone(): stack_pointr: 0x%lX\n",
ihk_mc_get_processor_id(),
(unsigned long)ihk_mc_syscall_arg1(ctx));
dkprintf("clone(): stack_pointr passed in: 0x%lX, stack pointer of caller: 0x%lx\n",
(unsigned long)ihk_mc_syscall_arg1(ctx),
(unsigned long)ihk_mc_syscall_sp(ctx));
cpuid = obtain_clone_cpuid();
new = clone_process(cpu_local_var(current), ihk_mc_syscall_pc(ctx),
ihk_mc_syscall_arg1(ctx));
ihk_mc_syscall_arg1(ctx) ? ihk_mc_syscall_arg1(ctx) :
ihk_mc_syscall_sp(ctx),
clone_flags);
if (!new) {
return -ENOMEM;
}
// /* Allocate new pid */
// new->pid = ihk_atomic_inc_return(&pid_cnt);
if (clone_flags & CLONE_VM) {
new->pid = cpu_local_var(current)->pid;
}
/* fork() a new process on the host */
else {
request1.number = __NR_fork;
new->pid = do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), 0);
if (new->pid == -1) {
kprintf("ERROR: forking host process\n");
/* TODO: clean-up new */
return -EFAULT;
}
new->pid = cpu_local_var(current)->pid;
dkprintf("fork(): new pid: %d\n", new->pid);
/* clear user space PTEs and set new rpgtable so that consequent
* page faults will look up the right mappings */
request1.number = __NR_munmap;
request1.args[0] = new->vm->region.user_start;
request1.args[1] = new->vm->region.user_end -
new->vm->region.user_start;
/* 3rd parameter denotes new rpgtable of host process */
request1.args[2] = virt_to_phys(new->vm->page_table);
dkprintf("fork(): requesting PTE clear and rpgtable (0x%lx) update\n",
request1.args[2]);
if (do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), new->pid)) {
kprintf("ERROR: clearing PTEs in host process\n");
}
}
request1.number = __NR_gettid;
new->tid = do_syscall(&request1, &ctx1, cpuid);
new->tid = do_syscall(&request1, &ctx1, cpuid, new->pid);
if (clone_flags & CLONE_PARENT_SETTID) {
dkprintf("clone_flags & CLONE_PARENT_SETTID: 0x%lX\n",
@@ -1000,6 +1029,20 @@ SYSCALL_DECLARE(clone)
new->thread.clear_child_tid = (int*)ihk_mc_syscall_arg3(ctx);
}
if (clone_flags & CLONE_CHILD_SETTID) {
unsigned long phys;
dkprintf("clone_flags & CLONE_CHILD_SETTID: 0x%lX\n",
(unsigned long)ihk_mc_syscall_arg3(ctx));
if (ihk_mc_pt_virt_to_phys(new->vm->page_table,
(void *)ihk_mc_syscall_arg3(ctx), &phys)) {
kprintf("ERROR: looking up physical addr for child process\n");
return -EFAULT;
}
*((int*)phys_to_virt(phys)) = new->tid;
}
if (clone_flags & CLONE_SETTLS) {
dkprintf("clone_flags & CLONE_SETTLS: 0x%lX\n",
(unsigned long)ihk_mc_syscall_arg4(ctx));
@@ -1023,7 +1066,7 @@ SYSCALL_DECLARE(clone)
SYSCALL_DECLARE(set_tid_address)
{
cpu_local_var(current)->thread.clear_child_tid =
(int*)ihk_mc_syscall_arg2(ctx);
(int*)ihk_mc_syscall_arg0(ctx);
return cpu_local_var(current)->pid;
}
@@ -1485,7 +1528,7 @@ SYSCALL_DECLARE(futex)
request.args[0] = __phys;
int r = do_syscall(&request, ctx, ihk_mc_get_processor_id());
int r = do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0);
if (r < 0) {
return -EFAULT;