runq_lock: Fix deadlock due to cpu migration.

Symptom and analysis:
runq_lock of the migration source is acquired on
the migration destination CPU.

This happens in the following steps:
 (1) The thread stores value of cpu_local_var(runq_lock)
     to its register when trying to perform
     ihk_mc_spinlock_lock() on the lock variable.
 (2) The thread takes IPI and migrates to another CPU.
 (3) The thread resumes execution and acquires the wrong lock.

Solution:
* Disable interrupts before getting the value of
  cpu_local_var(runq_lock)

Change-Id: Ia0ea450b97f872dd6116252537e4a79f85adfc88
Refs: #1400
This commit is contained in:
Shiratori, Takehiro
2020-02-13 13:02:40 +09:00
committed by Masamichi Takagi
parent 1a204b6674
commit edf7b36669
4 changed files with 38 additions and 21 deletions

View File

@@ -1448,7 +1448,9 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled)
if(thread == NULL || thread->proc->pid == 0){
struct thread *t;
irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock)));
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(&(cpu_local_var(runq_lock)));
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
if(t->proc->pid <= 0)
continue;
@@ -1458,7 +1460,8 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled)
break;
}
}
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate);
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
cpu_restore_interrupt(irqstate);
goto out;
}

View File

@@ -1151,7 +1151,8 @@ check_signal(unsigned long rc, void *regs0, int num)
if(thread == NULL || thread == &cpu_local_var(idle)){
struct thread *t;
irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock)));
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(&(cpu_local_var(runq_lock)));
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
if(t == &cpu_local_var(idle))
continue;
@@ -1161,7 +1162,8 @@ check_signal(unsigned long rc, void *regs0, int num)
break;
}
}
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate);
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
cpu_restore_interrupt(irqstate);
goto out;
}

View File

@@ -3375,15 +3375,17 @@ void spin_sleep_or_schedule(void)
for (;;) {
/* Check if we need to reschedule */
irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(
&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
do_schedule = 1;
}
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
cpu_restore_interrupt(irqstate);
/* Check if we were woken up */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
@@ -3425,6 +3427,7 @@ void schedule(void)
int switch_ctx = 0;
struct thread *last;
int prevpid;
unsigned long irqstate = 0;
if (cpu_local_var(no_preempt)) {
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
@@ -3432,8 +3435,9 @@ void schedule(void)
return;
}
cpu_local_var(runq_irqstate) =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(&(get_this_cpu_local_var()->runq_lock));
cpu_local_var(runq_irqstate) = irqstate;
v = get_this_cpu_local_var();
next = NULL;
@@ -3541,8 +3545,8 @@ void schedule(void)
* Since we may be migrated to another core meanwhile, we refer
* directly to cpu_local_var.
*/
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
cpu_restore_interrupt(cpu_local_var(runq_irqstate));
if ((last != NULL) && (last->status == PS_EXITED)) {
v->prevpid = 0;
@@ -3576,8 +3580,8 @@ void schedule(void)
}
}
else {
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
cpu_restore_interrupt(cpu_local_var(runq_irqstate));
}
}

View File

@@ -251,8 +251,9 @@ long do_syscall(struct syscall_request *req, int cpu)
/* Spin by default, but if re-schedule is requested let
* the other thread run */
runq_irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
runq_irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(
&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED ||
@@ -261,7 +262,8 @@ long do_syscall(struct syscall_request *req, int cpu)
do_schedule = 1;
}
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
cpu_restore_interrupt(runq_irqstate);
if (!do_schedule) {
continue;
@@ -2672,9 +2674,13 @@ end:
ihk_mc_free_pages(desc, 4);
if (!ret) {
unsigned long irqstate;
/* Lock run queue because enter_user_mode expects to release it */
cpu_local_var(runq_irqstate) =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(
&(get_this_cpu_local_var()->runq_lock));
cpu_local_var(runq_irqstate) = irqstate;
preempt_enable();
ihk_mc_switch_context(NULL, &thread->ctx, thread);
@@ -4957,15 +4963,17 @@ do_sigsuspend(struct thread *thread, const sigset_t *set)
long runq_irqstate;
thread->status = PS_INTERRUPTIBLE;
runq_irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
runq_irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(
&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED) {
do_schedule = 1;
}
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
ihk_mc_spinlock_unlock_noirq(&v->runq_lock);
cpu_restore_interrupt(runq_irqstate);
if (do_schedule) {
schedule();