refuse the write to a read-only memory

It was able to change a read-only memory using a system call that
forwarded to host.

To fix this, when read-only mmap(), it places read-only vm_area in the
mcexec's space.
This commit is contained in:
NAKAMURA Gou
2013-11-06 16:02:30 +09:00
parent 89be2af09a
commit bdc02bb687
4 changed files with 161 additions and 42 deletions

View File

@@ -256,6 +256,18 @@ out:
return error;
}
/*
* By remap_pfn_range(), VM_PFN_AT_MMAP may be raised.
* VM_PFN_AT_MMAP cause the following problems.
*
* 1) vm_pgoff is changed. As a result, i_mmap tree is corrupted.
* 2) duplicate free_memtype() calls occur.
*
* These problems may be solved in linux-3.7.
* It uses vm_insert_pfn() until it is fixed.
*/
#define USE_VM_INSERT_PFN 1
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct mcctrl_usrdata * usrdata = vma->vm_file->private_data;
@@ -268,6 +280,9 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long pgsize;
unsigned long rva;
unsigned long pfn;
#if USE_VM_INSERT_PFN
size_t pix;
#endif
dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page);
@@ -303,7 +318,16 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
phys = ihk_device_map_memory(dev, rpa, pgsize);
pfn = phys >> PAGE_SHIFT;
error = remap_pfn_range(vma, rva, pfn, pgsize, PAGE_SHARED);
#if USE_VM_INSERT_PFN
for (pix = 0; pix < (pgsize / PAGE_SIZE); ++pix) {
error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE), pfn+pix);
if (error) {
break;
}
}
#else
error = remap_pfn_range(vma, rva, pfn, pgsize, vma->vm_page_prot);
#endif
ihk_device_unmap_memory(dev, phys, pgsize);
if (error) {
printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n",
@@ -898,32 +922,67 @@ static void __return_syscall(struct mcctrl_channel *c, int ret)
c->param.response_va->status = 1;
}
static void clear_pte_range(uintptr_t addr, uintptr_t len)
static int remap_user_space(uintptr_t rva, size_t len, int prot)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct file *file;
uintptr_t start;
pgoff_t pgoff;
uintptr_t map;
dprintk("remap_user_space(%lx,%lx,%x)\n", rva, len, prot);
down_write(&mm->mmap_sem);
vma = find_vma(mm, rva);
if (!vma || (rva < vma->vm_start)) {
printk("remap_user_space(%lx,%lx,%x):find_vma failed. %p %lx %lx\n",
rva, len, prot, vma,
(vma)? vma->vm_start: -1,
(vma)? vma->vm_end: 0);
up_write(&mm->mmap_sem);
map = -ENOMEM;
goto out;
}
file = vma->vm_file;
start = rva;
pgoff = vma->vm_pgoff + ((rva - vma->vm_start) >> PAGE_SHIFT);
map = do_mmap_pgoff(file, start, len,
prot, MAP_FIXED|MAP_SHARED, pgoff);
up_write(&mm->mmap_sem);
out:
dprintk("remap_user_space(%lx,%lx,%x): %lx (%ld)\n",
rva, len, prot, (long)map, (long)map);
return (IS_ERR_VALUE(map))? (int)map: 0;
}
static void clear_pte_range(uintptr_t start, uintptr_t len)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
uintptr_t addr;
uintptr_t end;
down_read(&mm->mmap_sem);
vma = find_vma(mm, 0);
if (!vma) {
printk("clear_pte_range(%lx,%lx):find_vma(0) failed\n",
addr, len);
up_read(&mm->mmap_sem);
return;
}
addr = start;
while (addr < (start + len)) {
vma = find_vma(mm, addr);
if (!vma) {
break;
}
start = addr;
end = addr + len;
if (start < vma->vm_start) {
start = vma->vm_start;
}
if (vma->vm_end < end) {
end = vma->vm_end;
}
if (start < end) {
zap_vma_ptes(vma, start, end-start);
if (addr < vma->vm_start) {
addr = vma->vm_start;
}
end = start + len;
if (vma->vm_end < end) {
end = vma->vm_end;
}
if (addr < end) {
zap_vma_ptes(vma, addr, end-addr);
}
addr = end;
}
up_read(&mm->mmap_sem);
@@ -946,6 +1005,10 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
ret = 0;
break;
case __NR_mprotect:
ret = remap_user_space(sc->args[0], sc->args[1], sc->args[2]);
break;
default:
error = -ENOSYS;
goto out;

View File

@@ -140,8 +140,8 @@ int add_process_memory_range(struct process *process,
unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag,
struct memobj *memobj, off_t objoff);
int remove_process_memory_range(
struct process *process, unsigned long start, unsigned long end);
int remove_process_memory_range(struct process *process, unsigned long start,
unsigned long end, int *ro_freedp);
int split_process_memory_range(struct process *process,
struct vm_range *range, uintptr_t addr, struct vm_range **splitp);
int join_process_memory_range(struct process *process, struct vm_range *surviving,

View File

@@ -361,13 +361,14 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
}
int remove_process_memory_range(struct process *process,
unsigned long start, unsigned long end)
unsigned long start, unsigned long end, int *ro_freedp)
{
struct process_vm * const vm = process->vm;
struct vm_range *range;
struct vm_range *next;
int error;
struct vm_range *freerange;
int ro_freed = 0;
dkprintf("remove_process_memory_range(%p,%lx,%lx)\n",
process, start, end);
@@ -401,6 +402,10 @@ int remove_process_memory_range(struct process *process,
}
}
if (!(freerange->flag & VR_PROT_WRITE)) {
ro_freed = 1;
}
error = free_process_memory_range(process->vm, freerange);
if (error) {
ekprintf("remove_process_memory_range(%p,%lx,%lx):"
@@ -411,8 +416,11 @@ int remove_process_memory_range(struct process *process,
}
dkprintf("remove_process_memory_range(%p,%lx,%lx): 0\n",
process, start, end);
if (ro_freedp) {
*ro_freedp = ro_freed;
}
dkprintf("remove_process_memory_range(%p,%lx,%lx): 0 %d\n",
process, start, end, ro_freed);
return 0;
}

View File

@@ -274,15 +274,47 @@ static void clear_host_pte(uintptr_t addr, size_t len)
return;
}
static int set_host_vma(uintptr_t addr, size_t len, int prot)
{
ihk_mc_user_context_t ctx;
long lerror;
ihk_mc_syscall_arg0(&ctx) = addr;
ihk_mc_syscall_arg1(&ctx) = len;
ihk_mc_syscall_arg2(&ctx) = prot;
lerror = syscall_generic_forwarding(__NR_mprotect, &ctx);
if (lerror) {
kprintf("set_host_vma(%lx,%lx,%x) failed. %ld\n",
addr, len, prot, lerror);
goto out;
}
lerror = 0;
out:
return (int)lerror;
}
static int do_munmap(void *addr, size_t len)
{
int error;
int ro_freed;
begin_free_pages_pending();
error = remove_process_memory_range(cpu_local_var(current),
(intptr_t)addr, (intptr_t)addr+len);
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
// XXX: TLB flush
flush_tlb();
if (error || !ro_freed) {
clear_host_pte((uintptr_t)addr, len);
}
else {
error = set_host_vma((uintptr_t)addr, len, PROT_READ|PROT_WRITE);
if (error) {
kprintf("sys_munmap:set_host_vma failed. %d\n", error);
/* through */
}
}
finish_free_pages_pending();
return error;
}
@@ -376,10 +408,10 @@ SYSCALL_DECLARE(mmap)
void *p = NULL;
int vrflags;
intptr_t phys;
int unmapped = 0;
struct memobj *memobj = NULL;
int maxprot;
int denied;
int ro_vma_mapped = 0;
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n",
ihk_mc_get_processor_id(),
@@ -419,7 +451,7 @@ SYSCALL_DECLARE(mmap)
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n",
addr0, len0, prot, flags, fd, off);
error = -EINVAL;
goto out;
goto out2;
}
/* check not supported requests */
@@ -429,7 +461,7 @@ SYSCALL_DECLARE(mmap)
addr0, len0, prot, flags, fd, off,
(flags & ~(supported_flags | ignored_flags)));
error = -EINVAL;
goto out;
goto out2;
}
if ((flags & MAP_SHARED) && !(flags & MAP_ANONYMOUS)) {
@@ -437,19 +469,17 @@ SYSCALL_DECLARE(mmap)
addr0, len0, prot, flags, fd, off,
(flags & ~(supported_flags | ignored_flags)));
error = -EINVAL;
goto out;
goto out2;
}
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
if (flags & MAP_FIXED) {
/* clear specified address range */
unmapped = 1;
error = do_munmap((void *)addr, len);
if (error) {
ekprintf("sys_mmap:do_munmap(%lx,%lx) failed. %d\n",
addr, len, error);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
goto out;
}
}
@@ -459,7 +489,6 @@ SYSCALL_DECLARE(mmap)
if (error) {
ekprintf("sys_mmap:search_free_space(%lx,%lx) failed. %d\n",
len, region->map_end, error);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
goto out;
}
region->map_end = addr + len;
@@ -487,13 +516,22 @@ SYSCALL_DECLARE(mmap)
vrflags |= VR_DEMAND_PAGING;
}
if (!(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ);
if (error) {
kprintf("sys_mmap:set_host_vma failed. %d\n", error);
goto out;
}
ro_vma_mapped = 1;
}
phys = 0;
maxprot = PROT_READ | PROT_WRITE | PROT_EXEC;
if (!(flags & MAP_ANONYMOUS)) {
error = fileobj_create(fd, &memobj, &maxprot);
if (error) {
ekprintf("sys_mmap:fileobj_create failed. %d\n", error);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
goto out;
}
}
@@ -511,7 +549,6 @@ SYSCALL_DECLARE(mmap)
if (p == NULL) {
ekprintf("sys_mmap:allocate_pages(%d,%d) failed.\n",
npages, p2align);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
error = -ENOMEM;
goto out;
}
@@ -524,7 +561,6 @@ SYSCALL_DECLARE(mmap)
denied = prot & ~maxprot;
if (denied) {
ekprintf("sys_mmap:denied %x. %x %x\n", denied, prot, maxprot);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
error = (denied == PROT_EXEC)? -EPERM: -EACCES;
goto out;
}
@@ -536,26 +572,27 @@ SYSCALL_DECLARE(mmap)
"(%p,%lx,%lx,%lx,%lx) failed %d\n",
proc, addr, addr+len,
virt_to_phys(p), vrflags, error);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
goto out;
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
error = 0;
p = NULL;
memobj = NULL;
ro_vma_mapped = 0;
out:
if (ro_vma_mapped) {
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE);
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
out2:
if (p) {
ihk_mc_free_pages(p, npages);
}
if (memobj) {
memobj_release(memobj);
}
if (unmapped) {
clear_host_pte(addr, len);
}
dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n",
ihk_mc_get_processor_id(),
addr0, len0, prot, flags, fd, off, error, addr);
@@ -588,7 +625,6 @@ SYSCALL_DECLARE(munmap)
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
error = do_munmap((void *)addr, len);
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
clear_host_pte(addr, len);
out:
dkprintf("[%d]sys_munmap(%lx,%lx): %d\n",
@@ -612,6 +648,7 @@ SYSCALL_DECLARE(mprotect)
struct vm_range *changed;
const unsigned long protflags = PROT_TO_VR_FLAG(prot);
unsigned long denied;
int ro_changed = 0;
dkprintf("[%d]sys_mprotect(%lx,%lx,%x)\n",
ihk_mc_get_processor_id(), start, len0, prot);
@@ -718,6 +755,10 @@ SYSCALL_DECLARE(mprotect)
}
}
if ((range->flag ^ protflags) & VR_PROT_WRITE) {
ro_changed = 1;
}
error = change_prot_process_memory_range(proc, range, protflags);
if (error) {
ekprintf("sys_mprotect(%lx,%lx,%x):change failed. %d\n",
@@ -743,6 +784,13 @@ SYSCALL_DECLARE(mprotect)
out:
// XXX: TLB flush
flush_tlb();
if (ro_changed && !error) {
error = set_host_vma(start, len, prot & (PROT_READ|PROT_WRITE));
if (error) {
kprintf("sys_mprotect:set_host_vma failed. %d\n", error);
/* through */
}
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
dkprintf("[%d]sys_mprotect(%lx,%lx,%x): %d\n",
ihk_mc_get_processor_id(), start, len0, prot, error);