runq_lock: Fix deadlock due to cpu migration.
Symptom and analysis:
runq_lock of the migration source is acquired on
the migration destination CPU.
This happens in the following steps:
(1) The thread stores value of cpu_local_var(runq_lock)
to its register when trying to perform
ihk_mc_spinlock_lock() on the lock variable.
(2) The thread takes IPI and migrates to another CPU.
(3) The thread resumes execution and acquires the wrong lock.
Solution:
* Disable interrupts before getting the value of
cpu_local_var(runq_lock)
Change-Id: Ia0ea450b97f872dd6116252537e4a79f85adfc88
Refs: #1400
This commit is contained in:
committed by
Masamichi Takagi
parent
1a204b6674
commit
edf7b36669
@@ -3375,15 +3375,17 @@ void spin_sleep_or_schedule(void)
|
||||
|
||||
for (;;) {
|
||||
/* Check if we need to reschedule */
|
||||
irqstate =
|
||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||
irqstate = cpu_disable_interrupt_save();
|
||||
ihk_mc_spinlock_lock_noirq(
|
||||
&(get_this_cpu_local_var()->runq_lock));
|
||||
v = get_this_cpu_local_var();
|
||||
|
||||
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
|
||||
do_schedule = 1;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
|
||||
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||
cpu_restore_interrupt(irqstate);
|
||||
|
||||
/* Check if we were woken up */
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
@@ -3425,6 +3427,7 @@ void schedule(void)
|
||||
int switch_ctx = 0;
|
||||
struct thread *last;
|
||||
int prevpid;
|
||||
unsigned long irqstate = 0;
|
||||
|
||||
if (cpu_local_var(no_preempt)) {
|
||||
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
|
||||
@@ -3432,8 +3435,9 @@ void schedule(void)
|
||||
return;
|
||||
}
|
||||
|
||||
cpu_local_var(runq_irqstate) =
|
||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||
irqstate = cpu_disable_interrupt_save();
|
||||
ihk_mc_spinlock_lock_noirq(&(get_this_cpu_local_var()->runq_lock));
|
||||
cpu_local_var(runq_irqstate) = irqstate;
|
||||
v = get_this_cpu_local_var();
|
||||
|
||||
next = NULL;
|
||||
@@ -3541,8 +3545,8 @@ void schedule(void)
|
||||
* Since we may be migrated to another core meanwhile, we refer
|
||||
* directly to cpu_local_var.
|
||||
*/
|
||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
|
||||
cpu_local_var(runq_irqstate));
|
||||
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
|
||||
cpu_restore_interrupt(cpu_local_var(runq_irqstate));
|
||||
|
||||
if ((last != NULL) && (last->status == PS_EXITED)) {
|
||||
v->prevpid = 0;
|
||||
@@ -3576,8 +3580,8 @@ void schedule(void)
|
||||
}
|
||||
}
|
||||
else {
|
||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
|
||||
cpu_local_var(runq_irqstate));
|
||||
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
|
||||
cpu_restore_interrupt(cpu_local_var(runq_irqstate));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -251,8 +251,9 @@ long do_syscall(struct syscall_request *req, int cpu)
|
||||
|
||||
/* Spin by default, but if re-schedule is requested let
|
||||
* the other thread run */
|
||||
runq_irqstate =
|
||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||
runq_irqstate = cpu_disable_interrupt_save();
|
||||
ihk_mc_spinlock_lock_noirq(
|
||||
&(get_this_cpu_local_var()->runq_lock));
|
||||
v = get_this_cpu_local_var();
|
||||
|
||||
if (v->flags & CPU_FLAG_NEED_RESCHED ||
|
||||
@@ -261,7 +262,8 @@ long do_syscall(struct syscall_request *req, int cpu)
|
||||
do_schedule = 1;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
||||
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||
cpu_restore_interrupt(runq_irqstate);
|
||||
|
||||
if (!do_schedule) {
|
||||
continue;
|
||||
@@ -2672,9 +2674,13 @@ end:
|
||||
ihk_mc_free_pages(desc, 4);
|
||||
|
||||
if (!ret) {
|
||||
unsigned long irqstate;
|
||||
|
||||
/* Lock run queue because enter_user_mode expects to release it */
|
||||
cpu_local_var(runq_irqstate) =
|
||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||
irqstate = cpu_disable_interrupt_save();
|
||||
ihk_mc_spinlock_lock_noirq(
|
||||
&(get_this_cpu_local_var()->runq_lock));
|
||||
cpu_local_var(runq_irqstate) = irqstate;
|
||||
preempt_enable();
|
||||
|
||||
ihk_mc_switch_context(NULL, &thread->ctx, thread);
|
||||
@@ -4957,15 +4963,17 @@ do_sigsuspend(struct thread *thread, const sigset_t *set)
|
||||
long runq_irqstate;
|
||||
|
||||
thread->status = PS_INTERRUPTIBLE;
|
||||
runq_irqstate =
|
||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||
runq_irqstate = cpu_disable_interrupt_save();
|
||||
ihk_mc_spinlock_lock_noirq(
|
||||
&(get_this_cpu_local_var()->runq_lock));
|
||||
v = get_this_cpu_local_var();
|
||||
|
||||
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
||||
do_schedule = 1;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
||||
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||
cpu_restore_interrupt(runq_irqstate);
|
||||
|
||||
if (do_schedule) {
|
||||
schedule();
|
||||
|
||||
Reference in New Issue
Block a user