do_syscall(): allow descheduling threads in offloaded syscalls if CPU core oversubscribed
This commit is contained in:
@@ -559,6 +559,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
struct ikc_scd_packet *packet = __packet;
|
struct ikc_scd_packet *packet = __packet;
|
||||||
struct ikc_scd_packet pckt;
|
struct ikc_scd_packet pckt;
|
||||||
int rc;
|
int rc;
|
||||||
|
struct mcs_rwlock_node_irqsave lock;
|
||||||
struct thread *thread;
|
struct thread *thread;
|
||||||
struct process *proc;
|
struct process *proc;
|
||||||
struct mcctrl_signal {
|
struct mcctrl_signal {
|
||||||
@@ -625,6 +626,26 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
|||||||
ret = 0;
|
ret = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used for syscall offload reply message to explicitly schedule in
|
||||||
|
* the waiting thread
|
||||||
|
*/
|
||||||
|
case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
|
||||||
|
thread = find_thread(0, packet->ttid, &lock);
|
||||||
|
if (!thread) {
|
||||||
|
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
|
||||||
|
__FUNCTION__, packet->ttid);
|
||||||
|
ret = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
thread_unlock(thread, &lock);
|
||||||
|
|
||||||
|
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
|
||||||
|
__FUNCTION__, packet->ttid);
|
||||||
|
waitq_wakeup(&thread->scd_wq);
|
||||||
|
ret = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case SCD_MSG_SEND_SIGNAL:
|
case SCD_MSG_SEND_SIGNAL:
|
||||||
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
|
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
|
||||||
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||||
|
|||||||
@@ -566,6 +566,9 @@ struct thread {
|
|||||||
struct itimerval itimer_prof;
|
struct itimerval itimer_prof;
|
||||||
struct timespec itimer_virtual_value;
|
struct timespec itimer_virtual_value;
|
||||||
struct timespec itimer_prof_value;
|
struct timespec itimer_prof_value;
|
||||||
|
|
||||||
|
/* Syscall offload wait queue head */
|
||||||
|
struct waitq scd_wq;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct process_vm {
|
struct process_vm {
|
||||||
|
|||||||
@@ -2576,13 +2576,8 @@ void schedule(void)
|
|||||||
struct thread *last;
|
struct thread *last;
|
||||||
|
|
||||||
if (cpu_local_var(no_preempt)) {
|
if (cpu_local_var(no_preempt)) {
|
||||||
dkprintf("no schedule() while no preemption! \n");
|
kprintf("%s: WARNING can't schedule() while no preemption, cnt: %d\n",
|
||||||
return;
|
__FUNCTION__, cpu_local_var(no_preempt));
|
||||||
}
|
|
||||||
|
|
||||||
if (cpu_local_var(current)
|
|
||||||
&& cpu_local_var(current)->in_syscall_offload) {
|
|
||||||
dkprintf("no schedule() while syscall offload!\n");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -186,6 +186,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
unsigned long irqstate;
|
unsigned long irqstate;
|
||||||
struct thread *thread = cpu_local_var(current);
|
struct thread *thread = cpu_local_var(current);
|
||||||
struct process *proc = thread->proc;
|
struct process *proc = thread->proc;
|
||||||
|
DECLARE_WAITQ_ENTRY(scd_wq_entry, thread);
|
||||||
|
|
||||||
dkprintf("SC(%d)[%3d] sending syscall\n",
|
dkprintf("SC(%d)[%3d] sending syscall\n",
|
||||||
ihk_mc_get_processor_id(),
|
ihk_mc_get_processor_id(),
|
||||||
@@ -212,7 +213,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
* the pool may serve the request */
|
* the pool may serve the request */
|
||||||
req->rtid = cpu_local_var(current)->tid;
|
req->rtid = cpu_local_var(current)->tid;
|
||||||
req->ttid = 0;
|
req->ttid = 0;
|
||||||
|
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
|
||||||
send_syscall(req, cpu, pid, &res);
|
send_syscall(req, cpu, pid, &res);
|
||||||
|
|
||||||
dkprintf("%s: syscall num: %d waiting for Linux.. \n",
|
dkprintf("%s: syscall num: %d waiting for Linux.. \n",
|
||||||
@@ -224,34 +225,53 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
while (res.status != STATUS_COMPLETED) {
|
while (res.status != STATUS_COMPLETED) {
|
||||||
while (res.status == STATUS_IN_PROGRESS) {
|
while (res.status == STATUS_IN_PROGRESS) {
|
||||||
struct cpu_local_var *v;
|
struct cpu_local_var *v;
|
||||||
int call_schedule = 0;
|
int do_schedule = 0;
|
||||||
long runq_irqstate;
|
long runq_irqstate;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current));
|
||||||
|
|
||||||
cpu_pause();
|
cpu_pause();
|
||||||
|
|
||||||
/* XXX: Intel MPI + Intel OpenMP situation:
|
/* Spin if not preemptable */
|
||||||
* While the MPI helper thread waits in a poll() call the OpenMP master
|
if (cpu_local_var(no_preempt) || !thread->tid) {
|
||||||
* thread is iterating through the CPU cores using setaffinity().
|
continue;
|
||||||
* Unless we give a chance to it on this core the two threads seem to
|
}
|
||||||
* hang in deadlock. If the new thread would make a system call on this
|
|
||||||
* core we would be in trouble. For now, allow it, but in the future
|
/* Spin by default, but if re-schedule is requested let
|
||||||
* we should have syscall channels for each thread instead of per core,
|
* the other thread run */
|
||||||
* or we should multiplex syscall threads in mcexec */
|
|
||||||
runq_irqstate =
|
runq_irqstate =
|
||||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||||
v = get_this_cpu_local_var();
|
v = get_this_cpu_local_var();
|
||||||
|
|
||||||
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
if (v->flags & CPU_FLAG_NEED_RESCHED) {
|
||||||
call_schedule = 1;
|
do_schedule = 1;
|
||||||
--thread->in_syscall_offload;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
ihk_mc_spinlock_unlock(&v->runq_lock, runq_irqstate);
|
||||||
|
|
||||||
if (call_schedule) {
|
if (!do_schedule) {
|
||||||
schedule();
|
continue;
|
||||||
++thread->in_syscall_offload;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
flags = cpu_disable_interrupt_save();
|
||||||
|
|
||||||
|
/* Try to sleep until notified */
|
||||||
|
if (__sync_bool_compare_and_swap(&res.req_thread_status,
|
||||||
|
IHK_SCD_REQ_THREAD_SPINNING,
|
||||||
|
IHK_SCD_REQ_THREAD_DESCHEDULED)) {
|
||||||
|
|
||||||
|
dkprintf("%s: tid %d waiting for syscall reply...\n",
|
||||||
|
__FUNCTION__, thread->tid);
|
||||||
|
waitq_init(&thread->scd_wq);
|
||||||
|
waitq_prepare_to_wait(&thread->scd_wq, &scd_wq_entry,
|
||||||
|
PS_INTERRUPTIBLE);
|
||||||
|
cpu_restore_interrupt(flags);
|
||||||
|
schedule();
|
||||||
|
waitq_finish_wait(&thread->scd_wq, &scd_wq_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu_restore_interrupt(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (res.status == STATUS_PAGE_FAULT) {
|
if (res.status == STATUS_PAGE_FAULT) {
|
||||||
@@ -271,6 +291,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
req2.rtid = cpu_local_var(current)->tid;
|
req2.rtid = cpu_local_var(current)->tid;
|
||||||
req2.ttid = res.stid;
|
req2.ttid = res.stid;
|
||||||
|
|
||||||
|
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
|
||||||
send_syscall(&req2, cpu, pid, &res);
|
send_syscall(&req2, cpu, pid, &res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -809,7 +830,8 @@ terminate(int rc, int sig)
|
|||||||
release_thread(mythread);
|
release_thread(mythread);
|
||||||
release_process_vm(vm);
|
release_process_vm(vm);
|
||||||
schedule();
|
schedule();
|
||||||
// no return
|
kprintf("%s: ERROR: returned from terminate() -> schedule()\n", __FUNCTION__);
|
||||||
|
panic("panic");
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
Reference in New Issue
Block a user