coredump: Support threads

Change-Id: Id75ade6c87b15abcff5d772d90f77950376a32c1
Refs: #1219
This commit is contained in:
Masamichi Takagi
2019-02-19 17:08:39 +09:00
parent 12aef0b578
commit 11ef2f8092
33 changed files with 1438 additions and 76 deletions

View File

@@ -91,7 +91,7 @@ int get_prpsinfo_size(void)
* \param proc A pointer to the current process structure.
* \param regs0 A pointer to a ihk_mc_user_context_t structure.
*/
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
void fill_prstatus(struct note *head, struct thread *thread)
{
void *name;
struct elf_prstatus64 *prstatus;
@@ -103,7 +103,7 @@ void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
memcpy(name, "CORE", sizeof("CORE"));
prstatus = (struct elf_prstatus64 *)(name + align32(sizeof("CORE")));
arch_fill_prstatus(prstatus, thread, regs0);
arch_fill_prstatus(prstatus, thread, thread->coredump_regs);
}
/**
@@ -114,7 +114,7 @@ void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
* \param regs A pointer to a ihk_mc_user_context_t structure.
*/
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
void fill_prpsinfo(struct note *head, struct process *proc, char *cmdline)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
@@ -126,8 +126,10 @@ void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
prpsinfo->pr_state = proc->status;
prpsinfo->pr_pid = proc->pid;
memcpy(prpsinfo->pr_fname, cmdline, 16);
/* TODO: Fill the following fields:
* char pr_sname;
@@ -161,7 +163,7 @@ int get_auxv_size(void)
* \param regs A pointer to a ihk_mc_user_context_t structure.
*/
void fill_auxv(struct note *head, struct thread *thread, void *regs)
void fill_auxv(struct note *head, struct process *proc)
{
void *name;
void *auxv;
@@ -172,7 +174,7 @@ void fill_auxv(struct note *head, struct thread *thread, void *regs)
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, thread->proc->saved_auxv,
memcpy(auxv, proc->saved_auxv,
sizeof(unsigned long) * AUXV_LEN);
}
@@ -181,10 +183,25 @@ void fill_auxv(struct note *head, struct thread *thread, void *regs)
*
*/
int get_note_size(void)
int get_note_size(struct process *proc)
{
return get_prstatus_size() + arch_get_thread_core_info_size()
+ get_prpsinfo_size() + get_auxv_size();
int note = 0;
struct thread *thread_iter;
struct mcs_rwlock_node lock;
mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &lock);
list_for_each_entry(thread_iter, &proc->threads_list, siblings_list) {
note += get_prstatus_size();
note += arch_get_thread_core_info_size();
if (thread_iter->tid == proc->pid) {
note += get_prpsinfo_size();
note += get_auxv_size();
}
}
mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock);
return note;
}
/**
@@ -195,18 +212,48 @@ int get_note_size(void)
* \param regs A pointer to a ihk_mc_user_context_t structure.
*/
void fill_note(void *note, struct thread *thread, void *regs)
void fill_note(void *note, struct process *proc, char *cmdline)
{
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
struct thread *thread_iter;
struct mcs_rwlock_node lock;
arch_fill_thread_core_info(note, thread, regs);
note += arch_get_thread_core_info_size();
mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &lock);
list_for_each_entry(thread_iter, &proc->threads_list, siblings_list) {
fill_prstatus(note, thread_iter);
note += get_prstatus_size();
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
arch_fill_thread_core_info(note, thread_iter,
thread_iter->coredump_regs);
note += arch_get_thread_core_info_size();
if (thread_iter->tid == proc->pid) {
fill_prpsinfo(note, proc, cmdline);
note += get_prpsinfo_size();
#if 0
fill_siginfo(note, proc);
note += get_siginfo_size();
#endif
fill_auxv(note, proc);
note += get_auxv_size();
#if 0
fill_file(note, proc);
note += get_file_size();
#endif
}
#if 0
fill_fpregset(note, thread);
note += get_fpregset_size();
fill_x86_xstate(note, thread);
note += get_x86_xstate_size();
#endif
}
mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock);
fill_auxv(note, thread, regs);
}
/**
@@ -224,15 +271,16 @@ void fill_note(void *note, struct thread *thread, void *regs)
* should be zero.
*/
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
int gencore(struct process *proc, struct coretable **coretable, int *chunks,
char *cmdline)
{
int error = 0;
struct coretable *ct = NULL;
Elf64_Ehdr *eh = NULL;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range, *next;
struct process_vm *vm = thread->vm;
struct process_vm *vm = proc->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
@@ -241,8 +289,9 @@ int gencore(struct thread *thread, void *regs,
*chunks = 3; /* Elf header , header table and NOTE segment */
if (vm == NULL) {
dkprintf("no vm found.\n");
return -1;
kprintf("%s: ERROR: vm not found\n", __func__);
error = -EINVAL;
goto fail;
}
next = lookup_process_memory_range(vm, 0, -1);
@@ -264,8 +313,9 @@ int gencore(struct thread *thread, void *regs,
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (ihk_mc_pt_virt_to_phys(
vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
if (prevzero == 1)
@@ -284,7 +334,7 @@ int gencore(struct thread *thread, void *regs,
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = thread->vm->region;
struct vm_regions region = vm->region;
dkprintf("text: %lx-%lx\n", region.text_start,
region.text_end);
@@ -303,6 +353,7 @@ int gencore(struct thread *thread, void *regs,
eh = kmalloc(sizeof(*eh), IHK_MC_AP_NOWAIT);
if (eh == NULL) {
dkprintf("could not alloc a elf header table.\n");
error = -ENOMEM;
goto fail;
}
memset(eh, 0, sizeof(*eh));
@@ -314,7 +365,8 @@ int gencore(struct thread *thread, void *regs,
phsize = sizeof(Elf64_Phdr) * segs;
ph = kmalloc(phsize, IHK_MC_AP_NOWAIT);
if (ph == NULL) {
dkprintf("could not alloc a program header table.\n");
kprintf("%s: ERROR: allocating program header\n", __func__);
error = -ENOMEM;
goto fail;
}
memset(ph, 0, phsize);
@@ -325,15 +377,16 @@ int gencore(struct thread *thread, void *regs,
* To align the next segment page-sized, we prepare a padded
* region for our NOTE segment.
*/
notesize = get_note_size();
notesize = get_note_size(proc);
alignednotesize = alignpage(notesize + offset) - offset;
note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT);
if (note == NULL) {
dkprintf("could not alloc NOTE for core.\n");
kprintf("%s: ERROR: allocating NOTE\n", __func__);
error = -ENOMEM;
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, thread, regs);
fill_note(note, proc, cmdline);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
@@ -377,10 +430,11 @@ int gencore(struct thread *thread, void *regs,
/* coretable to send to host */
ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT);
if (!ct) {
dkprintf("could not alloc a coretable.\n");
kprintf("%s: ERROR: allocating coretable\n", __func__);
error = -ENOMEM;
goto fail;
}
memset(ct, 0, sizeof(*ct));
memset(ct, 0, sizeof(struct coretable) * (*chunks));
ct[0].addr = virt_to_phys(eh); /* ELF header */
ct[0].len = 64;
@@ -406,6 +460,7 @@ int gencore(struct thread *thread, void *regs,
}
if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */
unsigned long p, start, phys;
int prevzero = 0;
@@ -413,8 +468,9 @@ int gencore(struct thread *thread, void *regs,
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (ihk_mc_pt_virt_to_phys(
vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* Start a new chunk */
size = PAGE_SIZE;
@@ -452,16 +508,21 @@ int gencore(struct thread *thread, void *regs,
i++;
}
} else {
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start,
&phys) != 0) {
dkprintf("could not convert user "
"virtual address %lx "
"to physical address\n",
range->start);
goto fail;
if ((vm->region.user_start <= range->start) &&
(range->end <= vm->region.user_end)) {
error = ihk_mc_pt_virt_to_phys(
vm->address_space->page_table,
(void *)range->start, &phys);
if (error) {
if (error != -EFAULT) {
kprintf("%s: error: ihk_mc_pt_virt_to_phys for %lx failed (%d)\n",
__func__, range->start,
error);
goto fail;
}
/* VR_PROT_NONE range */
phys = 0;
error = 0;
}
} else {
phys = virt_to_phys((void *)range->start);
@@ -475,13 +536,14 @@ int gencore(struct thread *thread, void *regs,
}
*coretable = ct;
return 0;
return error;
fail:
kfree(eh);
kfree(ct);
kfree(ph);
kfree(note);
return -1;
return error;
}
/**

View File

@@ -573,6 +573,8 @@ struct process {
#endif // PROFILE_ENABLE
int nr_processes; /* For partitioned execution */
int process_rank; /* Rank in partition */
int coredump_barrier_count, coredump_barrier_count2;
mcs_rwlock_lock_t coredump_lock; // lock for coredump
};
/*
@@ -722,6 +724,11 @@ struct thread {
// for performance counter
unsigned long pmc_alloc_map;
unsigned long extra_reg_alloc_map;
/* coredump */
void *coredump_regs;
struct waitq coredump_wq;
int coredump_status;
};
#define VM_RANGE_CACHE_SIZE 4

View File

@@ -622,4 +622,9 @@ extern long (*linux_wait_event)(void *_resp, unsigned long nsec_timeout);
extern int (*linux_printk)(const char *fmt, ...);
extern int (*linux_clock_gettime)(clockid_t clk_id, struct timespec *tp);
/* coredump */
#define COREDUMP_RUNNING 0
#define COREDUMP_DESCHEDULED 1
#define COREDUMP_TO_BE_WOKEN 2
#endif

View File

@@ -957,8 +957,44 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
.priv = NULL,
};
int gencore(struct thread *, void *, struct coretable **, int *);
int gencore(struct process *proc, struct coretable **coretable,
int *chunks, char *cmdline);
void freecore(struct coretable **);
struct siginfo;
typedef struct siginfo siginfo_t;
unsigned long do_kill(struct thread *thread, int pid, int tid,
int sig, siginfo_t *info, int ptracecont);
void coredump_wait(struct thread *thread)
{
unsigned long flags;
DECLARE_WAITQ_ENTRY(coredump_wq_entry, cpu_local_var(current));
if (__sync_bool_compare_and_swap(&thread->coredump_status,
COREDUMP_RUNNING,
COREDUMP_DESCHEDULED)) {
flags = cpu_disable_interrupt_save();
dkprintf("%s: sleeping,tid=%d\n", __func__, thread->tid);
waitq_init(&thread->coredump_wq);
waitq_prepare_to_wait(&thread->coredump_wq, &coredump_wq_entry,
PS_INTERRUPTIBLE);
cpu_restore_interrupt(flags);
schedule();
waitq_finish_wait(&thread->coredump_wq, &coredump_wq_entry);
thread->coredump_status = COREDUMP_RUNNING;
dkprintf("%s: woken up,tid=%d\n", __func__, thread->tid);
}
}
void coredump_wakeup(struct thread *thread)
{
if (__sync_bool_compare_and_swap(&thread->coredump_status,
COREDUMP_DESCHEDULED,
COREDUMP_TO_BE_WOKEN)) {
dkprintf("%s: waking up tid %d\n", __func__, thread->tid);
waitq_wakeup(&thread->coredump_wq);
}
}
/**
* \brief Generate a core file and tell the host to write it out.
@@ -967,33 +1003,133 @@ void freecore(struct coretable **);
* \param regs A pointer to a x86_regs structure.
*/
void coredump(struct thread *thread, void *regs)
int coredump(struct thread *thread, void *regs, int sig)
{
struct process *proc = thread->proc;
struct syscall_request request IHK_DMA_ALIGN;
int ret;
struct coretable *coretable;
int chunks;
struct mcs_rwlock_node_irqsave lock, lock_dump;
struct thread *thread_iter;
int i, n, rank;
int *ids = NULL;
if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) {
return;
dkprintf("%s: pid=%d,tid=%d,coredump_barrier_count=%d\n",
__func__, proc->pid, thread->tid, proc->coredump_barrier_count);
if (proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) {
ret = -EBUSY;
goto out;
}
ret = gencore(thread, regs, &coretable, &chunks);
if (ret != 0) {
dkprintf("could not generate a core file image\n");
return;
/* Wait until all threads save its register. */
/* mutex coredump */
mcs_rwlock_reader_lock(&proc->coredump_lock, &lock_dump);
rank = __sync_fetch_and_add(&proc->coredump_barrier_count, 1);
if (rank == 0) {
n = 0;
mcs_rwlock_reader_lock(&proc->threads_lock, &lock);
list_for_each_entry(thread_iter, &proc->threads_list,
siblings_list) {
if (thread_iter != thread) {
n++;
}
}
if (n) {
ids = kmalloc(sizeof(int) * n, IHK_MC_AP_NOWAIT);
if (!ids) {
mcs_rwlock_reader_unlock(&proc->threads_lock,
&lock);
kprintf("%s: ERROR: allocating tid table\n",
__func__);
ret = -ENOMEM;
goto out;
}
i = 0;
list_for_each_entry(thread_iter, &proc->threads_list,
siblings_list) {
if (thread_iter != thread) {
ids[i] = thread_iter->tid;
i++;
}
}
}
mcs_rwlock_reader_unlock(&proc->threads_lock, &lock);
/* Note that when the target is sleeping on the source CPU,
* it will wake up and handle the signal when this thread yields
* in coredump_wait()
*/
for (i = 0; i < n; i++) {
dkprintf("%s: calling do_kill, target tid=%d\n",
__func__, ids[i]);
do_kill(thread, proc->pid, ids[i], sig, NULL, 0);
}
}
mcs_rwlock_reader_unlock(&proc->coredump_lock, &lock_dump);
while (1) {
n = 0;
mcs_rwlock_reader_lock(&proc->threads_lock, &lock);
list_for_each_entry(thread_iter, &proc->threads_list,
siblings_list) {
n++;
}
mcs_rwlock_reader_unlock(&proc->threads_lock, &lock);
if (n == proc->coredump_barrier_count) {
list_for_each_entry(thread_iter, &proc->threads_list,
siblings_list) {
coredump_wakeup(thread_iter);
}
break;
}
coredump_wait(thread);
}
/* Followers wait until dump is done to keep struct thread alive */
if (rank != 0) {
ret = 0;
goto skip;
}
if ((ret = gencore(proc, &coretable, &chunks, proc->saved_cmdline))) {
kprintf("%s: ERROR: gencore returned %d\n", __func__, ret);
goto out;
}
request.number = __NR_coredump;
request.args[0] = chunks;
request.args[1] = virt_to_phys(coretable);
request.args[2] = virt_to_phys(thread->proc->saved_cmdline);
request.args[3] = (unsigned long)thread->proc->saved_cmdline_len;
/* no data for now */
ret = do_syscall(&request, thread->cpu_id);
if (ret == 0) {
kprintf("dumped core.\n");
kprintf("%s: INFO: coredump done\n", __func__);
} else {
kprintf("core dump failed.\n");
kprintf("%s: ERROR: do_syscall failed (%d)\n",
__func__, ret);
}
freecore(&coretable);
skip:
__sync_fetch_and_add(&proc->coredump_barrier_count2, 1);
while (1) {
if (n == proc->coredump_barrier_count2) {
list_for_each_entry(thread_iter, &proc->threads_list,
siblings_list) {
coredump_wakeup(thread_iter);
}
break;
}
coredump_wait(thread);
}
out:
kfree(ids);
return ret;
}
void remote_flush_tlb_cpumask(struct process_vm *vm,

View File

@@ -128,6 +128,7 @@ init_process(struct process *proc, struct process *parent)
INIT_LIST_HEAD(&proc->ptraced_children_list);
mcs_rwlock_init(&proc->threads_lock);
mcs_rwlock_init(&proc->children_lock);
mcs_rwlock_init(&proc->coredump_lock);
ihk_mc_spinlock_init(&proc->mckfd_lock);
waitq_init(&proc->waitpid_q);
ihk_atomic_set(&proc->refcount, 2);
@@ -2854,6 +2855,7 @@ void destroy_thread(struct thread *thread)
if (thread->fp_regs) {
release_fp_regs(thread);
}
kfree(thread->coredump_regs);
release_sigcommon(thread->sigcommon);

View File

@@ -2713,6 +2713,10 @@ unsigned long do_fork(int clone_flags, unsigned long newsp,
return -EINVAL;
}
if (oldproc->coredump_barrier_count) {
return -EINVAL;
}
/* N-th creation put the new on Linux CPU. It's turned off when zero is
set to uti_thread_rank. */
if (oldproc->uti_thread_rank) {