implement mmap(MAP_POPULATE)
populate_process_memory() function is not efficient, because whether every small page is present is checked.
This commit is contained in:
@@ -62,6 +62,7 @@
|
||||
#define PT_PHYSMASK (((1UL << 52) - 1) & PAGE_MASK)
|
||||
|
||||
#define PF_PRESENT ((pte_t)0x01) /* entry is valid */
|
||||
#define PF_WRITABLE ((pte_t)0x02)
|
||||
#define PF_SIZE ((pte_t)0x80) /* entry points large page */
|
||||
|
||||
#define PFL4_PRESENT ((pte_t)0x01)
|
||||
@@ -130,6 +131,11 @@ static inline int pte_is_present(pte_t *ptep)
|
||||
return !!(*ptep & PF_PRESENT);
|
||||
}
|
||||
|
||||
static inline int pte_is_writable(pte_t *ptep)
|
||||
{
|
||||
return !!(*ptep & PF_WRITABLE);
|
||||
}
|
||||
|
||||
static inline uintptr_t pte_get_phys(pte_t *ptep)
|
||||
{
|
||||
return (*ptep & PT_PHYSMASK);
|
||||
|
||||
@@ -151,6 +151,9 @@ struct x86_regs {
|
||||
* bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
* bit 3 == 1: use of reserved bit detected
|
||||
* bit 4 == 1: fault was an instruction fetch
|
||||
*
|
||||
* internal use:
|
||||
* bit 30 == 1: don't use COW page to resolve page fault.
|
||||
*/
|
||||
enum x86_pf_error_code {
|
||||
PF_PROT = 1 << 0,
|
||||
@@ -158,6 +161,8 @@ enum x86_pf_error_code {
|
||||
PF_USER = 1 << 2,
|
||||
PF_RSVD = 1 << 3,
|
||||
PF_INSTR = 1 << 4,
|
||||
|
||||
PF_DONTCOW = 1 << 30,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -163,6 +163,7 @@ void hold_process(struct process *proc);
|
||||
void free_process(struct process *proc);
|
||||
void flush_process_memory(struct process *proc);
|
||||
void free_process_memory(struct process *proc);
|
||||
int populate_process_memory(struct process *proc, void *start, size_t len);
|
||||
|
||||
int add_process_memory_range(struct process *process,
|
||||
unsigned long start, unsigned long end,
|
||||
|
||||
@@ -1037,6 +1037,41 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else if (reason & PF_DONTCOW) {
|
||||
pte_t *ptep;
|
||||
void *ptepgaddr;
|
||||
size_t ptepgsize;
|
||||
int ptep2align;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||
ptep = ihk_mc_pt_lookup_pte(vm->page_table, fault_addr0,
|
||||
&ptepgaddr, &ptepgsize, &ptep2align);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||
|
||||
if (!ptep || pte_is_null(ptep)) {
|
||||
error = page_fault_process_memory_range(vm, range, fault_addr);
|
||||
if (error == -ERESTART) {
|
||||
goto out;
|
||||
}
|
||||
else if (error) {
|
||||
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||
"fault range failed. %d\n",
|
||||
ihk_mc_get_processor_id(), proc,
|
||||
fault_addr0, reason, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else if (!pte_is_writable(ptep) && (range->flag & VR_PROT_WRITE)) {
|
||||
error = protection_fault_process_memory_range(vm, range, fault_addr);
|
||||
if (error) {
|
||||
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||
"protection range failed. %d\n",
|
||||
ihk_mc_get_processor_id(), proc,
|
||||
fault_addr0, reason, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
error = page_fault_process_memory_range(vm, range, fault_addr);
|
||||
if (error == -ERESTART) {
|
||||
@@ -1345,6 +1380,29 @@ void free_process_memory(struct process *proc)
|
||||
free_process(vm->owner_process);
|
||||
}
|
||||
|
||||
int populate_process_memory(struct process *proc, void *start, size_t len)
|
||||
{
|
||||
int error;
|
||||
const int reason = PF_USER | PF_DONTCOW;
|
||||
uintptr_t end;
|
||||
uintptr_t addr;
|
||||
|
||||
end = (uintptr_t)start + len;
|
||||
for (addr = (uintptr_t)start; addr < end; addr += PAGE_SIZE) {
|
||||
error = page_fault_process(proc, (void *)addr, reason);
|
||||
if (error) {
|
||||
ekprintf("populate_process_range:page_fault_process"
|
||||
"(%p,%lx,%lx) failed %d\n",
|
||||
proc, addr, reason, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
void hold_process(struct process *proc)
|
||||
{
|
||||
if (proc->status & (PS_ZOMBIE | PS_EXITED)) {
|
||||
|
||||
@@ -404,6 +404,7 @@ SYSCALL_DECLARE(mmap)
|
||||
| MAP_PRIVATE // 02
|
||||
| MAP_FIXED // 10
|
||||
| MAP_ANONYMOUS // 20
|
||||
| MAP_POPULATE // 8000
|
||||
;
|
||||
const int ignored_flags = 0
|
||||
#ifdef USE_NOCACHE_MMAP
|
||||
@@ -420,7 +421,6 @@ SYSCALL_DECLARE(mmap)
|
||||
| MAP_GROWSDOWN // 0100
|
||||
| MAP_EXECUTABLE // 1000
|
||||
| MAP_LOCKED // 2000
|
||||
| MAP_POPULATE // 8000
|
||||
| MAP_NONBLOCK // 00010000
|
||||
| MAP_HUGETLB // 00040000
|
||||
;
|
||||
@@ -612,6 +612,28 @@ out:
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
|
||||
|
||||
if (!error && (flags & MAP_POPULATE)) {
|
||||
error = populate_process_memory(proc, (void *)addr, len);
|
||||
if (error) {
|
||||
ekprintf("sys_mmap:populate_process_memory"
|
||||
"(%p,%p,%lx) failed %d\n",
|
||||
proc, (void *)addr, len, error);
|
||||
/*
|
||||
* In this case,
|
||||
* the mapping established by this call should be unmapped
|
||||
* before mmap() returns with error.
|
||||
*
|
||||
* However, the mapping cannot be unmaped simply,
|
||||
* because the mapping can be modified by other thread
|
||||
* because memory_range_lock has been released.
|
||||
*
|
||||
* For the moment, like a linux-2.6.38-8,
|
||||
* the physical page allocation failure is ignored.
|
||||
*/
|
||||
error = 0;
|
||||
}
|
||||
}
|
||||
|
||||
out2:
|
||||
if (p) {
|
||||
ihk_mc_free_pages(p, npages);
|
||||
|
||||
Reference in New Issue
Block a user