add page fault forwarding

This commit is contained in:
NAKAMURA Gou
2013-08-07 19:48:01 +09:00
parent 480f6d4c2f
commit 591f398768
8 changed files with 310 additions and 64 deletions

View File

@@ -71,6 +71,8 @@ struct syscall_load_desc {
struct syscall_response {
unsigned long status;
long ret;
unsigned long fault_address;
unsigned long fault_reason;
};
struct syscall_ret_desc {

View File

@@ -218,8 +218,6 @@ int mcexec_syscall(struct mcctrl_channel *c, unsigned long arg)
}
#ifndef DO_USER_MODE
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
struct syscall_request *sc);
// static int remaining_job, base_cpu, job_pos;
#endif
@@ -242,11 +240,27 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
if(swd.cpu >= usrdata->num_channels)return -EINVAL;
c = get_peer_channel(usrdata, current);
if (c) {
printk("mcexec_wait_syscall:already registered. task %p ch %p\n",
current, c);
return -EBUSY;
}
c = usrdata->channels + swd.cpu;
#ifdef DO_USER_MODE
wait_event_interruptible(c->wq_syscall, c->req);
retry:
if (wait_event_interruptible(c->wq_syscall, c->req)) {
return -EINTR;
}
c->req = 0;
#if 1
mb();
if (!c->param.request_va->valid) {
printk("mcexec_wait_syscall:stray wakeup\n");
goto retry;
}
#endif
#else
while (1) {
c = usrdata->channels + swd.cpu;
@@ -285,22 +299,28 @@ if(swd.cpu >= usrdata->num_channels)return -EINVAL;
}
if (c->param.request_va &&
c->param.request_va->valid) {
#endif
c->param.request_va->valid = 0; /* ack */
dprintk("SC #%lx, %lx\n",
c->param.request_va->number,
c->param.request_va->args[0]);
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
register_peer_channel(usrdata, current, c);
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
if (copy_to_user(&req->sr, c->param.request_va,
sizeof(struct syscall_request))) {
deregister_peer_channel(usrdata, current, c);
return -EFAULT;
}
return 0;
}
deregister_peer_channel(usrdata, current, c);
#ifdef DO_USER_MODE
goto retry;
#endif
if (copy_to_user(&req->sr, c->param.request_va,
sizeof(struct syscall_request))) {
return -EFAULT;
}
#ifndef DO_USER_MODE
return 0;
}
if (usrdata->mcctrl_dma_abort) {
return -2;
}
if (usrdata->mcctrl_dma_abort) {
return -2;
}
}
}
usrdata->remaining_job = 0;
@@ -442,6 +462,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
if (!mc) {
return -EINVAL;
}
deregister_peer_channel(usrdata, current, mc);
mc->param.response_va->ret = ret.ret;
@@ -463,6 +484,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
return -EFAULT;
}
mb();
mc->param.response_va->status = 1;
#ifdef CONFIG_MIC
@@ -486,6 +508,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
ihk_dma_request(channel, &request);
*/
} else {
mb();
mc->param.response_va->status = 1;
}

View File

@@ -213,6 +213,7 @@ int prepare_ikc_channels(ihk_os_t os)
{
struct ihk_cpu_info *info;
struct mcctrl_usrdata *usrdata;
int error;
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
@@ -241,6 +242,12 @@ int prepare_ikc_channels(ihk_os_t os)
ihk_host_os_set_usrdata(os, usrdata);
memcpy(&usrdata->listen_param, &listen_param, sizeof listen_param);
ihk_ikc_listen_port(os, &usrdata->listen_param);
error = init_peer_channel_registry(usrdata);
if (error) {
return error;
}
return 0;
}

View File

@@ -80,6 +80,7 @@ struct mcctrl_usrdata {
unsigned long last_thread_exec;
wait_queue_head_t wq_prepare;
unsigned long rpgtable; /* per process, not per OS */
void **keys;
};
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
@@ -88,4 +89,11 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
unsigned long *endp);
/* syscall.c */
int init_peer_channel_registry(struct mcctrl_usrdata *ud);
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key);
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc);
#endif

View File

@@ -36,8 +36,79 @@ static void print_dma_lastreq(void)
}
#endif
int init_peer_channel_registry(struct mcctrl_usrdata *ud)
{
ud->keys = kzalloc(sizeof(void *) * ud->num_channels, GFP_KERNEL);
if (!ud->keys) {
printk("Error: cannot allocate usrdata.keys[].\n");
return -ENOMEM;
}
return 0;
}
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch)
{
int cpu;
cpu = ch - ud->channels;
if ((cpu < 0) || (ud->num_channels <= cpu)) {
printk("register_peer_channel(%p,%p,%p):"
"not a syscall channel. cpu=%d\n",
ud, key, ch, cpu);
return -EINVAL;
}
if (ud->keys[cpu] != NULL) {
printk("register_peer_channel(%p,%p,%p):"
"already registered. cpu=%d\n",
ud, key, ch, cpu);
return -EBUSY;
}
ud->keys[cpu] = key;
return 0;
}
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch)
{
int cpu;
cpu = ch - ud->channels;
if ((cpu < 0) || (ud->num_channels <= cpu)) {
printk("deregister_peer_channel(%p,%p,%p):"
"not a syscall channel. cpu=%d\n",
ud, key, ch, cpu);
return -EINVAL;
}
if (ud->keys[cpu] && (ud->keys[cpu] != key)) {
printk("register_peer_channel(%p,%p,%p):"
"not registered. cpu=%d\n",
ud, key, ch, cpu);
return -EBUSY;
}
ud->keys[cpu] = NULL;
return 0;
}
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key)
{
int cpu;
for (cpu = 0; cpu < ud->num_channels; ++cpu) {
if (ud->keys[cpu] == key) {
return &ud->channels[cpu];
}
}
return NULL;
}
#if 1 /* x86 depend, host OS side */
unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva)
int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
unsigned long *rpap, unsigned long *pgsizep)
{
unsigned long rpa;
int offsh;
@@ -45,9 +116,12 @@ unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long
int ix;
unsigned long phys;
unsigned long *pt;
int error;
unsigned long pgsize;
rpa = rpt;
offsh = 39;
pgsize = 0;
/* i = 0: PML4, 1: PDPT, 2: PDT, 3: PT */
for (i = 0; i < 4; ++i) {
ix = (rva >> offsh) & 0x1FF;
@@ -60,16 +134,19 @@ unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long
if (!(pt[ix] & PTE_P)) {
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
return -EFAULT;
error = -EFAULT;
goto out;
}
#define PTE_PS 0x080
if (pt[ix] & PTE_PS) {
rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << offsh) - 1);
rpa |= rva & ((1UL << offsh) - 1);
pgsize = 1UL << offsh;
rpa = pt[ix] & ((1UL << 52) - 1) & ~(pgsize - 1);
rpa |= rva & (pgsize - 1);
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
goto out;
error = 0;
goto found;
}
rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << 12) - 1);
@@ -77,13 +154,88 @@ unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
}
rpa |= rva & ((1UL << 12) - 1);
pgsize = 1UL << 12;
rpa |= rva & (pgsize - 1);
found:
error = 0;
*rpap = rpa;
*pgsizep = pgsize;
out:
dprintk("translate_rva_to_rpa: rva %#lx --> rpa %#lx\n", rva, rpa);
return rpa;
dprintk("translate_rva_to_rpa: %d rva %#lx --> rpa %#lx (%lx)\n",
error, rva, rpa, pgsize);
return error;
}
#endif
static int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr, uint64_t reason)
{
struct mcctrl_channel *channel;
struct syscall_request *req;
struct syscall_response *resp;
int error;
dprintk("remote_page_fault(%p,%p,%llx)\n", usrdata, fault_addr, reason);
channel = get_peer_channel(usrdata, current);
if (!channel) {
error = -ENOENT;
printk("remote_page_fault(%p,%p,%llx):channel not found. %d\n",
usrdata, fault_addr, reason, error);
goto out;
}
req = channel->param.request_va;
resp = channel->param.response_va;
/* request page fault */
resp->ret = -EFAULT;
resp->fault_address = (unsigned long)fault_addr;
resp->fault_reason = reason;
#define STATUS_PAGE_FAULT 3
req->valid = 0;
mb();
resp->status = STATUS_PAGE_FAULT;
/* wait for response */
error = wait_event_interruptible(channel->wq_syscall, channel->req);
if (error) {
printk("remote_page_fault:interrupted. %d\n", error);
goto out;
}
channel->req = 0;
if (!req->valid) {
printk("remote_page_fault:not valid\n");
}
req->valid = 0;
/* check result */
if (req->number != __NR_mmap) {
printk("remote_page_fault:unexpected response. %lx %lx\n",
req->number, req->args[0]);
error = -EIO;
goto out;
}
else if (req->args[0] != 0x0101) {
printk("remote_page_fault:unexpected response. %lx %lx\n",
req->number, req->args[0]);
error = -EIO;
goto out;
}
else if (req->args[1] != 0) {
error = req->args[1];
printk("remote_page_fault:response %d\n", error);
goto out;
}
error = 0;
out:
dprintk("remote_page_fault(%p,%p,%llx): %d\n", usrdata, fault_addr, reason, error);
return error;
}
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct mcctrl_usrdata * usrdata = vma->vm_file->private_data;
@@ -91,21 +243,48 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long rpa;
unsigned long phys;
int error;
int try;
uint64_t reason;
unsigned long pgsize;
unsigned long rva;
unsigned long pfn;
dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
rpa = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable,
(unsigned long)vmf->virtual_address);
if ((long)rpa < 0) {
for (try = 1; ; ++try) {
error = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable,
(unsigned long)vmf->virtual_address,
&rpa, &pgsize);
#define NTRIES 2
if (!error || (try >= NTRIES)) {
break;
}
reason = 0;
if (vmf->flags & FAULT_FLAG_WRITE) {
#define PF_WRITE 0x02
reason |= PF_WRITE;
}
error = remote_page_fault(usrdata, vmf->virtual_address, reason);
if (error) {
printk("forward_page_fault failed. %d\n", error);
break;
}
}
if (error) {
printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
return VM_FAULT_SIGBUS;
}
phys = ihk_device_map_memory(dev, rpa, PAGE_SIZE);
error = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, phys>>PAGE_SHIFT);
ihk_device_unmap_memory(dev, phys, PAGE_SIZE);
rva = (unsigned long)vmf->virtual_address & ~(pgsize - 1);
rpa = rpa & ~(pgsize - 1);
phys = ihk_device_map_memory(dev, rpa, pgsize);
pfn = phys >> PAGE_SHIFT;
error = remap_pfn_range(vma, rva, pfn, pgsize, PAGE_SHARED);
ihk_device_unmap_memory(dev, phys, pgsize);
if (error) {
printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
@@ -250,12 +429,6 @@ static void clear_wait(unsigned char *p, int size)
p[size] = 0;
}
static void __return_syscall(struct mcctrl_channel *c, int ret)
{
c->param.response_va->ret = ret;
c->param.response_va->status = 1;
}
static unsigned long translate_remote_va(struct mcctrl_channel *c,
unsigned long rva)
{
@@ -282,6 +455,7 @@ static unsigned long translate_remote_va(struct mcctrl_channel *c,
//extern struct mcctrl_channel *channels;
#if 0
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
struct syscall_request *sc)
{
@@ -397,4 +571,33 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c,
}
}
}
#endif
#endif /* !DO_USER_MODE */
static void __return_syscall(struct mcctrl_channel *c, int ret)
{
c->param.response_va->ret = ret;
mb();
c->param.response_va->status = 1;
}
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc)
{
int error;
long ret;
dprintk("__do_in_kernel_syscall(%p,%p,%p %ld)\n", os, c, sc, sc->number);
switch (sc->number) {
default:
error = -ENOSYS;
goto out;
break;
}
__return_syscall(c, ret);
error = 0;
out:
dprintk("__do_in_kernel_syscall(%p,%p,%p %ld): %d\n", os, c, sc, sc->number, error);
return error;
}

View File

@@ -635,33 +635,12 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
pthread_mutex_unlock(lock);
return w.sr.args[0];
case __NR_mmap: {
// w.sr.args[0] is converted to MIC physical address
__dprintf("mcexec.c,mmap,MIC-paddr=%lx,len=%lx,prot=%lx,flags=%lx,fd=%lx,offset=%lx\n",
w.sr.args[0], w.sr.args[1], w.sr.args[2], w.sr.args[3], w.sr.args[4], w.sr.args[5]);
off_t old_off = lseek(w.sr.args[4], 0, SEEK_CUR);
if(old_off == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; goto mmap_out; }
off_t rlseek = lseek(w.sr.args[4], w.sr.args[5], SEEK_SET);
if(rlseek == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; goto mmap_out; }
ssize_t toread = w.sr.args[1];
ret = 0;
while(toread > 0) {
__dprintf("mcexec.c,mmap,read,addr=%lx,len=%lx\n", (long int)((void *)dma_buf + w.sr.args[1] - toread), toread);
ssize_t rread = read(w.sr.args[4], (void *)dma_buf + w.sr.args[1] - toread, toread);
if(rread == 0) {
__dprint("mcexec.c,mmap,read==0\n");
goto mmap_zero_out;
} else if(rread < 0) {
__dprint("mcexec.c,mmap,read failed\n"); ret = -errno; break;
}
toread -= rread;
}
mmap_zero_out:
rlseek = lseek(w.sr.args[4], old_off, SEEK_SET);
if(rlseek == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; }
mmap_out:
do_syscall_return(fd, cpu, ret, 1, (unsigned long)dma_buf, w.sr.args[0], w.sr.args[1]);
break; }
case __NR_mmap:
case __NR_munmap:
case __NR_mprotect:
/* reserved for internal use */
do_syscall_return(fd, cpu, -ENOSYS, 0, 0, 0, 0);
break;
#ifdef USE_SYSCALL_MOD_CALL
case 303:{

View File

@@ -126,6 +126,8 @@ struct syscall_request {
struct syscall_response {
unsigned long status;
long ret;
unsigned long fault_address;
unsigned long fault_reason;
};
struct syscall_post {

View File

@@ -92,6 +92,7 @@ static void send_syscall(struct syscall_request *req)
memcpy_async_wait(&fin);
barrier();
cpu_local_var(scp).request_va->valid = 1;
*(unsigned int *)cpu_local_var(scp).doorbell_va = w;
@@ -108,6 +109,8 @@ static void send_syscall(struct syscall_request *req)
int do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx)
{
struct syscall_response *res = cpu_local_var(scp).response_va;
struct syscall_request req2;
int error;
dkprintf("SC(%d)[%3d] sending syscall\n",
ihk_mc_get_processor_id(),
@@ -119,10 +122,29 @@ int do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx)
ihk_mc_get_processor_id(),
req->number);
while (!res->status) {
cpu_pause();
}
#define STATUS_IN_PROGRESS 0
#define STATUS_COMPLETED 1
#define STATUS_PAGE_FAULT 3
while (res->status != STATUS_COMPLETED) {
while (res->status == STATUS_IN_PROGRESS) {
cpu_pause();
}
if (res->status == STATUS_PAGE_FAULT) {
error = page_fault_process(cpu_local_var(current),
(void *)res->fault_address,
res->fault_reason);
/* send result */
req2.number = __NR_mmap;
#define PAGER_RESUME_PAGE_FAULT 0x0101
req2.args[0] = PAGER_RESUME_PAGE_FAULT;
req2.args[1] = error;
send_syscall(&req2);
}
}
dkprintf("SC(%d)[%3d] got host reply: %d \n",
ihk_mc_get_processor_id(),
req->number, res->ret);
@@ -399,7 +421,7 @@ SYSCALL_DECLARE(mmap)
vrflags |= VR_IO_NOCACHE;
}
#endif
else if ((len == 64*1024*1024) || (len == 128*1024*1024)) {
else {
vrflags |= VR_DEMAND_PAGING;
}
}