schedule(): hold runq lock for the entire duration of context switching

releasing the runq lock after loading page tables but before the actual
context switch can leave execution in an inconsistent if the current
process is descheduled from an IRQ between these two steps.
this patch holds the runq lock with IRQs disabled and makes the context
switch a single atomic operation.
This commit is contained in:
Balazs Gerofi
2015-08-10 12:37:12 +09:00
parent 8d6c97ea5c
commit 9ba40dc0ff
4 changed files with 34 additions and 5 deletions

View File

@@ -1012,6 +1012,18 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
}
extern char enter_user_mode[];
/*
* Release runq_lock before entering user space.
* This is needed because schedule() holds the runq lock throughout
* the context switch and when a new process is created it starts
* execution in enter_user_mode, which in turn calls this function.
*/
void release_runq_lock(void)
{
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
}
/*@
@ requires \valid(ctx);

View File

@@ -206,6 +206,7 @@ x86_syscall:
.globl enter_user_mode
enter_user_mode:
callq release_runq_lock
movq $0, %rdi
movq %rsp, %rsi
call check_signal

View File

@@ -46,6 +46,7 @@ struct cpu_local_var {
struct process_vm idle_vm;
ihk_spinlock_t runq_lock;
unsigned long runq_irqstate;
struct process *current;
struct list_head runq;
size_t runq_len;

View File

@@ -2062,6 +2062,12 @@ static void do_migrate(void);
static void idle(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
/* Release runq_lock before starting the idle loop.
* See comments at release_runq_lock().
*/
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
if(v->status == CPU_STATUS_RUNNING)
v->status = CPU_STATUS_IDLE;
@@ -2236,7 +2242,6 @@ void schedule(void)
struct cpu_local_var *v;
struct process *next, *prev, *proc, *tmp = NULL;
int switch_ctx = 0;
unsigned long irqstate;
struct process *last;
if (cpu_local_var(no_preempt)) {
@@ -2250,7 +2255,8 @@ void schedule(void)
}
redo:
irqstate = ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
cpu_local_var(runq_irqstate) =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
next = NULL;
@@ -2337,14 +2343,22 @@ redo:
/* Set up new TLS.. */
do_arch_prctl(ARCH_SET_FS, next->thread.tlsblock_base);
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
if (prev) {
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
}
else {
last = ihk_mc_switch_context(NULL, &next->ctx, prev);
}
/*
* We must hold the lock throughout the context switch, otherwise
* an IRQ could deschedule this process between page table loading and
* context switching and leave the execution in an inconsistent state.
* Since we may be migrated to another core meanwhile, we refer
* directly to cpu_local_var.
*/
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
/* Have we migrated to another core meanwhile? */
if (v != get_this_cpu_local_var()) {
@@ -2358,7 +2372,8 @@ redo:
}
}
else {
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
}
}