diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index 1181b0e3..9fda8e41 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -78,6 +78,8 @@ #define PFL3_DIRTY ((pte_t)0x40) #define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */ #define PFL3_GLOBAL ((pte_t)0x100) +#define PFL3_IGNORED_11 ((pte_t)1 << 11) +#define PFL3_FILEOFF PFL3_IGNORED_11 #define PFL2_PRESENT ((pte_t)0x01) #define PFL2_WRITABLE ((pte_t)0x02) @@ -88,6 +90,8 @@ #define PFL2_DIRTY ((pte_t)0x40) #define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */ #define PFL2_GLOBAL ((pte_t)0x100) +#define PFL2_IGNORED_11 ((pte_t)1 << 11) +#define PFL2_FILEOFF PFL2_IGNORED_11 #define PFL1_PRESENT ((pte_t)0x01) #define PFL1_WRITABLE ((pte_t)0x02) @@ -96,6 +100,8 @@ #define PFL1_PCD ((pte_t)0x10) #define PFL1_ACCESSED ((pte_t)0x20) #define PFL1_DIRTY ((pte_t)0x40) +#define PFL1_IGNORED_11 ((pte_t)1 << 11) +#define PFL1_FILEOFF PFL1_IGNORED_11 /* We allow user programs to access all the memory */ #define PFL4_KERN_ATTR (PFL4_PRESENT | PFL4_WRITABLE) @@ -108,6 +114,9 @@ #define PFL3_PDIR_ATTR (PFL3_PRESENT | PFL3_WRITABLE | PFL3_USER) #define PFL2_PDIR_ATTR (PFL2_PRESENT | PFL2_WRITABLE | PFL2_USER) +#define PTE_NULL ((pte_t)0) +typedef unsigned long pte_t; + /* For easy conversion, it is better to be the same as architecture's ones */ enum ihk_mc_pt_attribute { PTATTR_ACTIVE = 0x01, @@ -115,14 +124,12 @@ enum ihk_mc_pt_attribute { PTATTR_USER = 0x04, PTATTR_DIRTY = 0x40, PTATTR_LARGEPAGE = 0x80, + PTATTR_FILEOFF = PFL2_FILEOFF, PTATTR_NO_EXECUTE = 0x8000000000000000, PTATTR_UNCACHABLE = 0x10000, PTATTR_FOR_USER = 0x20000, }; -#define PTE_NULL ((pte_t)0) -typedef unsigned long pte_t; - static inline int pte_is_null(pte_t *ptep) { return (*ptep == PTE_NULL); @@ -153,11 +160,31 @@ static inline int pte_is_dirty(pte_t *ptep, size_t pgsize) } } +static inline int pte_is_fileoff(pte_t *ptep, size_t pgsize) +{ + switch (pgsize) { + case PTL1_SIZE: return !!(*ptep & PFL1_FILEOFF); + case PTL2_SIZE: return !!(*ptep & PFL2_FILEOFF); + case PTL3_SIZE: return !!(*ptep & PFL3_FILEOFF); + default: +#if 0 /* XXX: workaround. cannot use panic() here */ + panic("pte_is_fileoff"); +#else + return !!(*ptep & PTATTR_FILEOFF); +#endif + } +} + static inline uintptr_t pte_get_phys(pte_t *ptep) { return (*ptep & PT_PHYSMASK); } +static inline off_t pte_get_off(pte_t *ptep, size_t pgsize) +{ + return (off_t)(*ptep & PAGE_MASK); +} + #if 0 /* XXX: workaround. cannot use panic() here */ static inline void pte_xchg(pte_t *ptep, pte_t *valp) { diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 97f02658..d1118c00 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -219,7 +219,13 @@ static struct page_table *__alloc_new_pt(enum ihk_mc_ap_flag ap_flag) * but L2 and L1 do not! */ -static enum ihk_mc_pt_attribute attr_mask = PTATTR_WRITABLE | PTATTR_USER | PTATTR_ACTIVE; +static enum ihk_mc_pt_attribute attr_mask + = 0 + | PTATTR_FILEOFF + | PTATTR_WRITABLE + | PTATTR_USER + | PTATTR_ACTIVE + | 0; #define ATTR_MASK attr_mask void enable_ptattr_no_execute(void) @@ -523,6 +529,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) if (!(pt->entry[l4idx] & PFL4_PRESENT)) { __kprintf("0x%lX l4idx not present! \n", (unsigned long)virt); + __kprintf("l4 entry: 0x%lX\n", pt->entry[l4idx]); return -EFAULT; } pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK); @@ -530,6 +537,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) __kprintf("l3 table: 0x%lX l3idx: %d \n", virt_to_phys(pt), l3idx); if (!(pt->entry[l3idx] & PFL3_PRESENT)) { __kprintf("0x%lX l3idx not present! \n", (unsigned long)virt); + __kprintf("l3 entry: 0x%lX\n", pt->entry[l3idx]); return -EFAULT; } pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK); @@ -537,6 +545,7 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) __kprintf("l2 table: 0x%lX l2idx: %d \n", virt_to_phys(pt), l2idx); if (!(pt->entry[l2idx] & PFL2_PRESENT)) { __kprintf("0x%lX l2idx not present! \n", (unsigned long)virt); + __kprintf("l2 entry: 0x%lX\n", pt->entry[l2idx]); return -EFAULT; } if ((pt->entry[l2idx] & PFL2_SIZE)) { @@ -546,11 +555,12 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) __kprintf("l1 table: 0x%lX l1idx: %d \n", virt_to_phys(pt), l1idx); if (!(pt->entry[l1idx] & PFL1_PRESENT)) { - __kprintf("0x%lX PTE (l1) not present! entry: 0x%lX\n", - (unsigned long)virt, pt->entry[l1idx]); + __kprintf("0x%lX l1idx not present! \n", (unsigned long)virt); + __kprintf("l1 entry: 0x%lX\n", pt->entry[l1idx]); return -EFAULT; } + __kprintf("l1 entry: 0x%lX\n", pt->entry[l1idx]); return 0; } @@ -822,8 +832,16 @@ static int split_large_page(pte_t *ptep) return -ENOMEM; } - phys = *ptep & PT_PHYSMASK; - attr = *ptep & ~PFL2_SIZE; + if (!(*ptep & PFL2_FILEOFF)) { + phys = *ptep & PT_PHYSMASK; + attr = *ptep & ~PT_PHYSMASK; + attr &= ~PFL2_SIZE; + } + else { + phys = *ptep & PAGE_MASK; /* file offset */ + attr = *ptep & ~PAGE_MASK; + attr &= ~PFL2_SIZE; + } for (i = 0; i < PT_ENTRIES; ++i) { pt->entry[i] = (phys + (i * PTL1_SIZE)) | attr; @@ -1008,7 +1026,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, memobj_flush_page(args->memobj, phys, PTL1_SIZE); } - if (args->free_physical) { + if (!(old & PFL1_FILEOFF) && args->free_physical) { page = phys_to_page(phys); if (page && page_unmap(page)) { ihk_mc_free_pages(phys_to_virt(phys), 1); @@ -1054,7 +1072,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, memobj_flush_page(args->memobj, phys, PTL2_SIZE); } - if (args->free_physical) { + if (!(old & PFL2_FILEOFF) && args->free_physical) { page = phys_to_page(phys); if (page && page_unmap(page)) { ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE); @@ -1148,7 +1166,7 @@ static int change_attr_range_l1(void *arg0, pte_t *ptep, uint64_t base, { struct change_attr_args *args = arg0; - if (*ptep == PTE_NULL) { + if ((*ptep == PTE_NULL) || (*ptep & PFL1_FILEOFF)) { return -ENOENT; } @@ -1163,7 +1181,7 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base, int error; struct page_table *pt; - if (*ptep == PTE_NULL) { + if ((*ptep == PTE_NULL) || (*ptep & PFL2_FILEOFF)) { return -ENOENT; } @@ -1182,7 +1200,9 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base, } if (*ptep & PFL2_SIZE) { - *ptep = (*ptep & ~args->clrpte) | args->setpte; + if (!(*ptep & PFL2_FILEOFF)) { + *ptep = (*ptep & ~args->clrpte) | args->setpte; + } return 0; } @@ -1195,7 +1215,7 @@ static int change_attr_range_l3(void *arg0, pte_t *ptep, uint64_t base, { struct page_table *pt; - if (*ptep == PTE_NULL) { + if ((*ptep == PTE_NULL) || (*ptep & PFL3_FILEOFF)) { return -ENOENT; } diff --git a/kernel/process.c b/kernel/process.c index 1c6f75b2..373d3b71 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1012,7 +1012,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); /*****/ ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align); - if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)) { + if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep) + && !pte_is_fileoff(ptep, pgsize)) { if (!pte_is_present(ptep)) { error = -EFAULT; kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):PROT_NONE. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); @@ -1034,11 +1035,16 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang } attr = arch_vrflag_to_ptattr(range->flag, reason, ptep); pgaddr = (void *)(fault_addr & ~(pgsize - 1)); - if (!ptep || pte_is_null(ptep)) { + if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) { if (range->memobj) { off_t off; - off = range->objoff + ((uintptr_t)pgaddr - range->start); + if (!ptep || !pte_is_fileoff(ptep, pgsize)) { + off = range->objoff + ((uintptr_t)pgaddr - range->start); + } + else { + off = pte_get_off(ptep, pgsize); + } error = memobj_get_page(range->memobj, off, p2align, &phys); if (error) { if (error != -ERESTART) {