diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index 82ee53f7..cd15aae3 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -438,6 +438,7 @@ void handle_interrupt(int vector, struct x86_regs *regs) } check_signal(0, regs); + check_need_resched(); } void gpe_handler(struct x86_regs *regs) @@ -447,6 +448,7 @@ void gpe_handler(struct x86_regs *regs) arch_show_interrupt_context(regs); set_signal(SIGILL, regs); check_signal(0, regs); + check_need_resched(); // panic("GPF"); } diff --git a/kernel/include/cls.h b/kernel/include/cls.h index 77170294..b8d7f52e 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -30,6 +30,9 @@ struct malloc_header { #define CPU_STATUS_RUNNING (2) extern ihk_spinlock_t cpu_status_lock; +#define CPU_FLAG_NEED_RESCHED 0x1U +#define CPU_FLAG_NEED_MIGRATE 0x2U + struct cpu_local_var { /* malloc */ struct malloc_header free_list; @@ -54,6 +57,11 @@ struct cpu_local_var { int fs; struct list_head pending_free_pages; + + unsigned int flags; + + ihk_spinlock_t migq_lock; + struct list_head migq; } __attribute__((aligned(64))); diff --git a/kernel/mem.c b/kernel/mem.c index 90d492c7..d84e043c 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -248,6 +248,7 @@ out: dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n", ihk_mc_get_processor_id(), fault_addr, reason, regs, error); + check_need_resched(); return; } diff --git a/kernel/process.c b/kernel/process.c index ae824878..a3af4a4c 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1711,6 +1711,9 @@ void sched_init(void) cpu_local_var(runq_len) = 0; ihk_mc_spinlock_init(&cpu_local_var(runq_lock)); + INIT_LIST_HEAD(&cpu_local_var(migq)); + ihk_mc_spinlock_init(&cpu_local_var(migq_lock)); + #ifdef TIMER_CPU_ID if (ihk_mc_get_processor_id() == TIMER_CPU_ID) { init_timers(); @@ -1719,6 +1722,72 @@ void sched_init(void) #endif } +static void double_rq_lock(struct cpu_local_var *v1, struct cpu_local_var *v2) +{ + if (v1 < v2) { + ihk_mc_spinlock_lock_noirq(&v1->runq_lock); + ihk_mc_spinlock_lock_noirq(&v2->runq_lock); + } else { + ihk_mc_spinlock_lock_noirq(&v2->runq_lock); + ihk_mc_spinlock_lock_noirq(&v1->runq_lock); + } +} + +static void double_rq_unlock(struct cpu_local_var *v1, struct cpu_local_var *v2) +{ + ihk_mc_spinlock_unlock_noirq(&v1->runq_lock); + ihk_mc_spinlock_unlock_noirq(&v2->runq_lock); +} + +struct migrate_request { + struct list_head list; + struct process *proc; + struct waitq wq; +}; + +static void do_migrate(void) +{ + int cur_cpu_id = ihk_mc_get_processor_id(); + struct cpu_local_var *cur_v = get_cpu_local_var(cur_cpu_id); + struct migrate_request *req, *tmp; + + ihk_mc_spinlock_lock_noirq(&cur_v->migq_lock); + list_for_each_entry_safe(req, tmp, &cur_v->migq, list) { + int cpu_id; + struct cpu_local_var *v; + + /* 0. check if migration is necessary */ + list_del(&req->list); + if (req->proc->cpu_id != cur_cpu_id) /* already not here */ + goto ack; + if (CPU_ISSET(cur_cpu_id, &req->proc->cpu_set)) /* good affinity */ + goto ack; + + /* 1. select CPU */ + for (cpu_id = 0; cpu_id < CPU_SETSIZE; cpu_id++) + if (CPU_ISSET(cpu_id, &req->proc->cpu_set)) + break; + if (CPU_SETSIZE == cpu_id) /* empty affinity (bug?) */ + goto ack; + + /* 2. migrate thread */ + v = get_cpu_local_var(cpu_id); + double_rq_lock(cur_v, v); + list_del(&req->proc->sched_list); + cur_v->runq_len -= 1; + req->proc->cpu_id = cpu_id; + list_add_tail(&req->proc->sched_list, &v->runq); + v->runq_len += 1; + if (v->runq_len == 1) + ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); + double_rq_unlock(cur_v, v); + +ack: + waitq_wakeup(&req->wq); + } + ihk_mc_spinlock_unlock_noirq(&cur_v->migq_lock); +} + void schedule(void) { struct cpu_local_var *v = get_this_cpu_local_var(); @@ -1727,6 +1796,7 @@ void schedule(void) unsigned long irqstate; struct process *last; +redo: irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); next = NULL; @@ -1744,18 +1814,22 @@ void schedule(void) } } - /* Pick a new running process */ - list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) { - if (proc->status == PS_RUNNING) { - next = proc; - break; - } - } - - /* No process? Run idle.. */ - if (!next) { + if (v->flags & CPU_FLAG_NEED_MIGRATE) { next = &cpu_local_var(idle); - v->status = CPU_STATUS_IDLE; + } else { + /* Pick a new running process */ + list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) { + if (proc->status == PS_RUNNING) { + next = proc; + break; + } + } + + /* No process? Run idle.. */ + if (!next) { + next = &cpu_local_var(idle); + v->status = CPU_STATUS_IDLE; + } } if (prev != next) { @@ -1793,6 +1867,21 @@ void schedule(void) else { ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); } + + if (v->flags & CPU_FLAG_NEED_MIGRATE) { + v->flags &= ~CPU_FLAG_NEED_MIGRATE; + do_migrate(); + goto redo; + } +} + +void check_need_resched(void) +{ + struct cpu_local_var *v = get_this_cpu_local_var(); + if (v->flags & CPU_FLAG_NEED_RESCHED) { + v->flags &= ~CPU_FLAG_NEED_RESCHED; + schedule(); + } } @@ -1837,6 +1926,49 @@ int sched_wakeup_process(struct process *proc, int valid_states) return status; } +/* + * 1. Add current process to waitq + * 2. Queue migration request into the target CPU's queue + * 3. Kick migration on the CPU + * 4. Wait for completion of the migration + * + * struct migrate_request { + * list //migq, + * wq, + * proc + * } + * + * [expected processing of the target CPU] + * 1. Interrupted by IPI + * 2. call schedule() via check_resched() + * 3. Do migration + * 4. Wake up this thread + */ +void sched_request_migrate(int cpu_id, struct process *proc) +{ + struct cpu_local_var *v = get_cpu_local_var(cpu_id); + struct migrate_request req = { .proc = proc }; + unsigned long irqstate; + DECLARE_WAITQ_ENTRY(entry, cpu_local_var(current)); + + waitq_init(&req.wq); + waitq_prepare_to_wait(&req.wq, &entry, PS_UNINTERRUPTIBLE); + + irqstate = ihk_mc_spinlock_lock(&v->migq_lock); + list_add_tail(&req.list, &v->migq); + ihk_mc_spinlock_unlock(&v->migq_lock, irqstate); + + v->flags |= CPU_FLAG_NEED_RESCHED | CPU_FLAG_NEED_MIGRATE; + v->status = CPU_STATUS_RUNNING; + + if (cpu_id != ihk_mc_get_processor_id()) + ihk_mc_interrupt_cpu(/* Kick scheduler */ + get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); + + schedule(); + waitq_finish_wait(&req.wq, &entry); +} + /* Runq lock must be held here */ diff --git a/kernel/syscall.c b/kernel/syscall.c index 01e3839e..fd2764b6 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1926,45 +1926,51 @@ SYSCALL_DECLARE(sched_setaffinity) size_t len = (size_t)ihk_mc_syscall_arg1(ctx); cpu_set_t *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx); - cpu_set_t cpu_set; + cpu_set_t k_cpu_set, cpu_set; struct process *thread; - int i; + int cpu_id; - if (sizeof(cpu_set_t) > len) { - kprintf("%s %d\n", __FILE__, __LINE__); + if (sizeof(k_cpu_set) > len) { + kprintf("%s:%d\n Too small buffer.", __FILE__, __LINE__); return -EINVAL; } - len = MIN2(len, sizeof(cpu_set_t)); + len = MIN2(len, sizeof(k_cpu_set)); - if (copy_from_user(cpu_local_var(current), &cpu_set, u_cpu_set, len)) { - kprintf("%s %d\n", __FILE__, __LINE__); + if (copy_from_user(cpu_local_var(current), &k_cpu_set, u_cpu_set, len)) { + kprintf("%s:%d copy_from_user failed.\n", __FILE__, __LINE__); return -EFAULT; } - thread = NULL; + // XXX: We should build something like cpu_available_mask in advance + CPU_ZERO(&cpu_set); extern int num_processors; - for (i = 0; i < num_processors; i++) { - struct process *tmp_proc; - ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(i)->runq_lock); - list_for_each_entry(tmp_proc, &get_cpu_local_var(i)->runq, sched_list) { - if (tmp_proc && tmp_proc->pid && tmp_proc->tid == tid) { - thread = tmp_proc; - hold_process(thread); - break; - } - } - ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(i)->runq_lock); - if (thread) - break; - } - if (!thread) { - kprintf("%s %d\n", __FILE__, __LINE__); - return -ESRCH; + for (cpu_id = 0; cpu_id < num_processors; cpu_id++) + if (CPU_ISSET(cpu_id, &k_cpu_set)) + CPU_SET(cpu_id, &cpu_set); + + for (cpu_id = 0; cpu_id < num_processors; cpu_id++) { + ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(cpu_id)->runq_lock); + list_for_each_entry(thread, &get_cpu_local_var(cpu_id)->runq, sched_list) + if (thread->pid && thread->tid == tid) + goto found; /* without unlocking runq_lock */ + ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock); } + kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__); + return -ESRCH; + +found: memcpy(&thread->cpu_set, &cpu_set, sizeof(cpu_set)); - release_process(thread); - kprintf("%s %d\n", __FILE__, __LINE__); - return 0; + + if (!CPU_ISSET(cpu_id, &thread->cpu_set)) { + hold_process(thread); + ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock); + sched_request_migrate(cpu_id, thread); + release_process(thread); + return 0; + } else { + ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(cpu_id)->runq_lock); + return 0; + } } // see linux-2.6.34.13/kernel/sched.c @@ -1972,40 +1978,36 @@ SYSCALL_DECLARE(sched_getaffinity) { int tid = (int)ihk_mc_syscall_arg0(ctx); size_t len = (size_t)ihk_mc_syscall_arg1(ctx); - cpu_set_t *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx); + cpu_set_t k_cpu_set, *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx); int ret; - struct process *thread; + int found = 0; int i; - if (sizeof(cpu_set_t) > len) { - kprintf("%s %d\n", __FILE__, __LINE__); + if (sizeof(k_cpu_set) > len) { + kprintf("%s:%d Too small buffer.\n", __FILE__, __LINE__); return -EINVAL; } - len = MIN2(len, sizeof(cpu_set_t)); + len = MIN2(len, sizeof(k_cpu_set)); - thread = NULL; extern int num_processors; - for (i = 0; i < num_processors; i++) { - struct process *tmp_proc; + for (i = 0; i < num_processors && !found; i++) { + struct process *thread; ihk_mc_spinlock_lock_noirq(&get_cpu_local_var(i)->runq_lock); - list_for_each_entry(tmp_proc, &get_cpu_local_var(i)->runq, sched_list) { - if (tmp_proc && tmp_proc->pid && tmp_proc->tid == tid) { - thread = tmp_proc; - hold_process(thread); + list_for_each_entry(thread, &get_cpu_local_var(i)->runq, sched_list) { + if (thread->pid && thread->tid == tid) { + found = 1; + memcpy(&k_cpu_set, &thread->cpu_set, sizeof(k_cpu_set)); break; } } ihk_mc_spinlock_unlock_noirq(&get_cpu_local_var(i)->runq_lock); - if (thread) - break; } - if (!thread) { - kprintf("%s %d\n", __FILE__, __LINE__); + if (!found) { + kprintf("%s:%d Thread not found.\n", __FILE__, __LINE__); return -ESRCH; } - ret = copy_to_user(cpu_local_var(current), u_cpu_set, &thread->cpu_set, len); - release_process(thread); + ret = copy_to_user(cpu_local_var(current), u_cpu_set, &k_cpu_set, len); kprintf("%s %d %d\n", __FILE__, __LINE__, ret); if (ret < 0) return ret; @@ -2777,6 +2779,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx) } check_signal(l, NULL); + check_need_resched(); return l; }