runq_lock: Fix deadlock due to cpu migration.
Symptom and analysis:
runq_lock of the migration source is acquired on
the migration destination CPU.
This happens in the following steps:
(1) The thread stores value of cpu_local_var(runq_lock)
to its register when trying to perform
ihk_mc_spinlock_lock() on the lock variable.
(2) The thread takes IPI and migrates to another CPU.
(3) The thread resumes execution and acquires the wrong lock.
Solution:
* Disable interrupts before getting the value of
cpu_local_var(runq_lock)
Change-Id: Ia0ea450b97f872dd6116252537e4a79f85adfc88
Refs: #1400
This commit is contained in:
committed by
Masamichi Takagi
parent
1a204b6674
commit
edf7b36669
@@ -1448,7 +1448,9 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled)
|
|||||||
|
|
||||||
if(thread == NULL || thread->proc->pid == 0){
|
if(thread == NULL || thread->proc->pid == 0){
|
||||||
struct thread *t;
|
struct thread *t;
|
||||||
irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock)));
|
|
||||||
|
irqstate = cpu_disable_interrupt_save();
|
||||||
|
ihk_mc_spinlock_lock_noirq(&(cpu_local_var(runq_lock)));
|
||||||
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
|
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
|
||||||
if(t->proc->pid <= 0)
|
if(t->proc->pid <= 0)
|
||||||
continue;
|
continue;
|
||||||
@@ -1458,7 +1460,8 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate);
|
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
|
||||||
|
cpu_restore_interrupt(irqstate);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1151,7 +1151,8 @@ check_signal(unsigned long rc, void *regs0, int num)
|
|||||||
if(thread == NULL || thread == &cpu_local_var(idle)){
|
if(thread == NULL || thread == &cpu_local_var(idle)){
|
||||||
struct thread *t;
|
struct thread *t;
|
||||||
|
|
||||||
irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock)));
|
irqstate = cpu_disable_interrupt_save();
|
||||||
|
ihk_mc_spinlock_lock_noirq(&(cpu_local_var(runq_lock)));
|
||||||
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
|
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
|
||||||
if(t == &cpu_local_var(idle))
|
if(t == &cpu_local_var(idle))
|
||||||
continue;
|
continue;
|
||||||
@@ -1161,7 +1162,8 @@ check_signal(unsigned long rc, void *regs0, int num)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate);
|
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
|
||||||
|
cpu_restore_interrupt(irqstate);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3375,15 +3375,17 @@ void spin_sleep_or_schedule(void)
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* Check if we need to reschedule */
|
/* Check if we need to reschedule */
|
||||||
irqstate =
|
irqstate = cpu_disable_interrupt_save();
|
||||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
ihk_mc_spinlock_lock_noirq(
|
||||||
|
&(get_this_cpu_local_var()->runq_lock));
|
||||||
v = get_this_cpu_local_var();
|
v = get_this_cpu_local_var();
|
||||||
|
|
||||||
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
|
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
|
||||||
do_schedule = 1;
|
do_schedule = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
|
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||||
|
cpu_restore_interrupt(irqstate);
|
||||||
|
|
||||||
/* Check if we were woken up */
|
/* Check if we were woken up */
|
||||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||||
@@ -3425,6 +3427,7 @@ void schedule(void)
|
|||||||
int switch_ctx = 0;
|
int switch_ctx = 0;
|
||||||
struct thread *last;
|
struct thread *last;
|
||||||
int prevpid;
|
int prevpid;
|
||||||
|
unsigned long irqstate = 0;
|
||||||
|
|
||||||
if (cpu_local_var(no_preempt)) {
|
if (cpu_local_var(no_preempt)) {
|
||||||
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
|
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
|
||||||
@@ -3432,8 +3435,9 @@ void schedule(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_local_var(runq_irqstate) =
|
irqstate = cpu_disable_interrupt_save();
|
||||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
ihk_mc_spinlock_lock_noirq(&(get_this_cpu_local_var()->runq_lock));
|
||||||
|
cpu_local_var(runq_irqstate) = irqstate;
|
||||||
v = get_this_cpu_local_var();
|
v = get_this_cpu_local_var();
|
||||||
|
|
||||||
next = NULL;
|
next = NULL;
|
||||||
@@ -3541,8 +3545,8 @@ void schedule(void)
|
|||||||
* Since we may be migrated to another core meanwhile, we refer
|
* Since we may be migrated to another core meanwhile, we refer
|
||||||
* directly to cpu_local_var.
|
* directly to cpu_local_var.
|
||||||
*/
|
*/
|
||||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
|
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
|
||||||
cpu_local_var(runq_irqstate));
|
cpu_restore_interrupt(cpu_local_var(runq_irqstate));
|
||||||
|
|
||||||
if ((last != NULL) && (last->status == PS_EXITED)) {
|
if ((last != NULL) && (last->status == PS_EXITED)) {
|
||||||
v->prevpid = 0;
|
v->prevpid = 0;
|
||||||
@@ -3576,8 +3580,8 @@ void schedule(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
|
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
|
||||||
cpu_local_var(runq_irqstate));
|
cpu_restore_interrupt(cpu_local_var(runq_irqstate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -251,8 +251,9 @@ long do_syscall(struct syscall_request *req, int cpu)
|
|||||||
|
|
||||||
/* Spin by default, but if re-schedule is requested let
|
/* Spin by default, but if re-schedule is requested let
|
||||||
* the other thread run */
|
* the other thread run */
|
||||||
runq_irqstate =
|
runq_irqstate = cpu_disable_interrupt_save();
|
||||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
ihk_mc_spinlock_lock_noirq(
|
||||||
|
&(get_this_cpu_local_var()->runq_lock));
|
||||||
v = get_this_cpu_local_var();
|
v = get_this_cpu_local_var();
|
||||||
|
|
||||||
if (v->flags & CPU_FLAG_NEED_RESCHED ||
|
if (v->flags & CPU_FLAG_NEED_RESCHED ||
|
||||||
@@ -261,7 +262,8 @@ long do_syscall(struct syscall_request *req, int cpu)
|
|||||||
do_schedule = 1;
|
do_schedule = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||||
|
cpu_restore_interrupt(runq_irqstate);
|
||||||
|
|
||||||
if (!do_schedule) {
|
if (!do_schedule) {
|
||||||
continue;
|
continue;
|
||||||
@@ -2672,9 +2674,13 @@ end:
|
|||||||
ihk_mc_free_pages(desc, 4);
|
ihk_mc_free_pages(desc, 4);
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
unsigned long irqstate;
|
||||||
|
|
||||||
/* Lock run queue because enter_user_mode expects to release it */
|
/* Lock run queue because enter_user_mode expects to release it */
|
||||||
cpu_local_var(runq_irqstate) =
|
irqstate = cpu_disable_interrupt_save();
|
||||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
ihk_mc_spinlock_lock_noirq(
|
||||||
|
&(get_this_cpu_local_var()->runq_lock));
|
||||||
|
cpu_local_var(runq_irqstate) = irqstate;
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|
||||||
ihk_mc_switch_context(NULL, &thread->ctx, thread);
|
ihk_mc_switch_context(NULL, &thread->ctx, thread);
|
||||||
@@ -4957,15 +4963,17 @@ do_sigsuspend(struct thread *thread, const sigset_t *set)
|
|||||||
long runq_irqstate;
|
long runq_irqstate;
|
||||||
|
|
||||||
thread->status = PS_INTERRUPTIBLE;
|
thread->status = PS_INTERRUPTIBLE;
|
||||||
runq_irqstate =
|
runq_irqstate = cpu_disable_interrupt_save();
|
||||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
ihk_mc_spinlock_lock_noirq(
|
||||||
|
&(get_this_cpu_local_var()->runq_lock));
|
||||||
v = get_this_cpu_local_var();
|
v = get_this_cpu_local_var();
|
||||||
|
|
||||||
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
||||||
do_schedule = 1;
|
do_schedule = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
|
||||||
|
cpu_restore_interrupt(runq_irqstate);
|
||||||
|
|
||||||
if (do_schedule) {
|
if (do_schedule) {
|
||||||
schedule();
|
schedule();
|
||||||
|
|||||||
Reference in New Issue
Block a user