implement mmap(MAP_POPULATE)
populate_process_memory() function is not efficient, because whether every small page is present is checked.
This commit is contained in:
@@ -62,6 +62,7 @@
|
|||||||
#define PT_PHYSMASK (((1UL << 52) - 1) & PAGE_MASK)
|
#define PT_PHYSMASK (((1UL << 52) - 1) & PAGE_MASK)
|
||||||
|
|
||||||
#define PF_PRESENT ((pte_t)0x01) /* entry is valid */
|
#define PF_PRESENT ((pte_t)0x01) /* entry is valid */
|
||||||
|
#define PF_WRITABLE ((pte_t)0x02)
|
||||||
#define PF_SIZE ((pte_t)0x80) /* entry points large page */
|
#define PF_SIZE ((pte_t)0x80) /* entry points large page */
|
||||||
|
|
||||||
#define PFL4_PRESENT ((pte_t)0x01)
|
#define PFL4_PRESENT ((pte_t)0x01)
|
||||||
@@ -130,6 +131,11 @@ static inline int pte_is_present(pte_t *ptep)
|
|||||||
return !!(*ptep & PF_PRESENT);
|
return !!(*ptep & PF_PRESENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int pte_is_writable(pte_t *ptep)
|
||||||
|
{
|
||||||
|
return !!(*ptep & PF_WRITABLE);
|
||||||
|
}
|
||||||
|
|
||||||
static inline uintptr_t pte_get_phys(pte_t *ptep)
|
static inline uintptr_t pte_get_phys(pte_t *ptep)
|
||||||
{
|
{
|
||||||
return (*ptep & PT_PHYSMASK);
|
return (*ptep & PT_PHYSMASK);
|
||||||
|
|||||||
@@ -151,6 +151,9 @@ struct x86_regs {
|
|||||||
* bit 2 == 0: kernel-mode access 1: user-mode access
|
* bit 2 == 0: kernel-mode access 1: user-mode access
|
||||||
* bit 3 == 1: use of reserved bit detected
|
* bit 3 == 1: use of reserved bit detected
|
||||||
* bit 4 == 1: fault was an instruction fetch
|
* bit 4 == 1: fault was an instruction fetch
|
||||||
|
*
|
||||||
|
* internal use:
|
||||||
|
* bit 30 == 1: don't use COW page to resolve page fault.
|
||||||
*/
|
*/
|
||||||
enum x86_pf_error_code {
|
enum x86_pf_error_code {
|
||||||
PF_PROT = 1 << 0,
|
PF_PROT = 1 << 0,
|
||||||
@@ -158,6 +161,8 @@ enum x86_pf_error_code {
|
|||||||
PF_USER = 1 << 2,
|
PF_USER = 1 << 2,
|
||||||
PF_RSVD = 1 << 3,
|
PF_RSVD = 1 << 3,
|
||||||
PF_INSTR = 1 << 4,
|
PF_INSTR = 1 << 4,
|
||||||
|
|
||||||
|
PF_DONTCOW = 1 << 30,
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -163,6 +163,7 @@ void hold_process(struct process *proc);
|
|||||||
void free_process(struct process *proc);
|
void free_process(struct process *proc);
|
||||||
void flush_process_memory(struct process *proc);
|
void flush_process_memory(struct process *proc);
|
||||||
void free_process_memory(struct process *proc);
|
void free_process_memory(struct process *proc);
|
||||||
|
int populate_process_memory(struct process *proc, void *start, size_t len);
|
||||||
|
|
||||||
int add_process_memory_range(struct process *process,
|
int add_process_memory_range(struct process *process,
|
||||||
unsigned long start, unsigned long end,
|
unsigned long start, unsigned long end,
|
||||||
|
|||||||
@@ -1037,6 +1037,41 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (reason & PF_DONTCOW) {
|
||||||
|
pte_t *ptep;
|
||||||
|
void *ptepgaddr;
|
||||||
|
size_t ptepgsize;
|
||||||
|
int ptep2align;
|
||||||
|
|
||||||
|
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
|
||||||
|
ptep = ihk_mc_pt_lookup_pte(vm->page_table, fault_addr0,
|
||||||
|
&ptepgaddr, &ptepgsize, &ptep2align);
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
|
||||||
|
|
||||||
|
if (!ptep || pte_is_null(ptep)) {
|
||||||
|
error = page_fault_process_memory_range(vm, range, fault_addr);
|
||||||
|
if (error == -ERESTART) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
else if (error) {
|
||||||
|
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||||
|
"fault range failed. %d\n",
|
||||||
|
ihk_mc_get_processor_id(), proc,
|
||||||
|
fault_addr0, reason, error);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!pte_is_writable(ptep) && (range->flag & VR_PROT_WRITE)) {
|
||||||
|
error = protection_fault_process_memory_range(vm, range, fault_addr);
|
||||||
|
if (error) {
|
||||||
|
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
|
||||||
|
"protection range failed. %d\n",
|
||||||
|
ihk_mc_get_processor_id(), proc,
|
||||||
|
fault_addr0, reason, error);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
error = page_fault_process_memory_range(vm, range, fault_addr);
|
error = page_fault_process_memory_range(vm, range, fault_addr);
|
||||||
if (error == -ERESTART) {
|
if (error == -ERESTART) {
|
||||||
@@ -1345,6 +1380,29 @@ void free_process_memory(struct process *proc)
|
|||||||
free_process(vm->owner_process);
|
free_process(vm->owner_process);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int populate_process_memory(struct process *proc, void *start, size_t len)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
const int reason = PF_USER | PF_DONTCOW;
|
||||||
|
uintptr_t end;
|
||||||
|
uintptr_t addr;
|
||||||
|
|
||||||
|
end = (uintptr_t)start + len;
|
||||||
|
for (addr = (uintptr_t)start; addr < end; addr += PAGE_SIZE) {
|
||||||
|
error = page_fault_process(proc, (void *)addr, reason);
|
||||||
|
if (error) {
|
||||||
|
ekprintf("populate_process_range:page_fault_process"
|
||||||
|
"(%p,%lx,%lx) failed %d\n",
|
||||||
|
proc, addr, reason, error);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
error = 0;
|
||||||
|
out:
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
void hold_process(struct process *proc)
|
void hold_process(struct process *proc)
|
||||||
{
|
{
|
||||||
if (proc->status & (PS_ZOMBIE | PS_EXITED)) {
|
if (proc->status & (PS_ZOMBIE | PS_EXITED)) {
|
||||||
|
|||||||
@@ -404,6 +404,7 @@ SYSCALL_DECLARE(mmap)
|
|||||||
| MAP_PRIVATE // 02
|
| MAP_PRIVATE // 02
|
||||||
| MAP_FIXED // 10
|
| MAP_FIXED // 10
|
||||||
| MAP_ANONYMOUS // 20
|
| MAP_ANONYMOUS // 20
|
||||||
|
| MAP_POPULATE // 8000
|
||||||
;
|
;
|
||||||
const int ignored_flags = 0
|
const int ignored_flags = 0
|
||||||
#ifdef USE_NOCACHE_MMAP
|
#ifdef USE_NOCACHE_MMAP
|
||||||
@@ -420,7 +421,6 @@ SYSCALL_DECLARE(mmap)
|
|||||||
| MAP_GROWSDOWN // 0100
|
| MAP_GROWSDOWN // 0100
|
||||||
| MAP_EXECUTABLE // 1000
|
| MAP_EXECUTABLE // 1000
|
||||||
| MAP_LOCKED // 2000
|
| MAP_LOCKED // 2000
|
||||||
| MAP_POPULATE // 8000
|
|
||||||
| MAP_NONBLOCK // 00010000
|
| MAP_NONBLOCK // 00010000
|
||||||
| MAP_HUGETLB // 00040000
|
| MAP_HUGETLB // 00040000
|
||||||
;
|
;
|
||||||
@@ -612,6 +612,28 @@ out:
|
|||||||
}
|
}
|
||||||
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
|
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
|
||||||
|
|
||||||
|
if (!error && (flags & MAP_POPULATE)) {
|
||||||
|
error = populate_process_memory(proc, (void *)addr, len);
|
||||||
|
if (error) {
|
||||||
|
ekprintf("sys_mmap:populate_process_memory"
|
||||||
|
"(%p,%p,%lx) failed %d\n",
|
||||||
|
proc, (void *)addr, len, error);
|
||||||
|
/*
|
||||||
|
* In this case,
|
||||||
|
* the mapping established by this call should be unmapped
|
||||||
|
* before mmap() returns with error.
|
||||||
|
*
|
||||||
|
* However, the mapping cannot be unmaped simply,
|
||||||
|
* because the mapping can be modified by other thread
|
||||||
|
* because memory_range_lock has been released.
|
||||||
|
*
|
||||||
|
* For the moment, like a linux-2.6.38-8,
|
||||||
|
* the physical page allocation failure is ignored.
|
||||||
|
*/
|
||||||
|
error = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
out2:
|
out2:
|
||||||
if (p) {
|
if (p) {
|
||||||
ihk_mc_free_pages(p, npages);
|
ihk_mc_free_pages(p, npages);
|
||||||
|
|||||||
Reference in New Issue
Block a user