support large pages

This commit is contained in:
NAKAMURA Gou
2016-04-21 22:38:29 +09:00
parent b3bec32e99
commit d4a0b32f06
8 changed files with 469 additions and 252 deletions

View File

@@ -39,7 +39,8 @@ struct shmid_ds {
pid_t shm_cpid;
pid_t shm_lpid;
uint64_t shm_nattch;
uint8_t padding[16];
uint8_t padding[12];
int init_pgshift;
};
#endif /* HEADER_ARCH_SHM_H */

View File

@@ -124,11 +124,8 @@ struct page_table {
static struct page_table *init_pt;
static ihk_spinlock_t init_pt_lock;
#ifdef USE_LARGE_PAGES
static int use_1gb_page = 0;
#endif
#ifdef USE_LARGE_PAGES
static void check_available_page_size(void)
{
uint32_t edx;
@@ -139,7 +136,6 @@ static void check_available_page_size(void)
return;
}
#endif
static unsigned long setup_l2(struct page_table *pt,
unsigned long page_head, unsigned long start,
@@ -534,28 +530,33 @@ int ihk_mc_pt_virt_to_phys(struct page_table *pt,
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
return -EFAULT;
}
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
pt = phys_to_virt(pte_get_phys(&pt->entry[l4idx]));
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
return -EFAULT;
}
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
if ((pt->entry[l3idx] & PFL3_SIZE)) {
*phys = pte_get_phys(&pt->entry[l3idx])
| (v & (PTL3_SIZE - 1));
return 0;
}
pt = phys_to_virt(pte_get_phys(&pt->entry[l3idx]));
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
return -EFAULT;
}
if ((pt->entry[l2idx] & PFL2_SIZE)) {
*phys = (pt->entry[l2idx] & LARGE_PAGE_MASK) |
(v & (LARGE_PAGE_SIZE - 1));
*phys = pte_get_phys(&pt->entry[l2idx])
| (v & (PTL2_SIZE - 1));
return 0;
}
pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK);
pt = phys_to_virt(pte_get_phys(&pt->entry[l2idx]));
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
return -EFAULT;
}
*phys = (pt->entry[l1idx] & PT_PHYSMASK) | (v & (PAGE_SIZE - 1));
*phys = pte_get_phys(&pt->entry[l1idx]) | (v & (PTL1_SIZE - 1));
return 0;
}
@@ -862,12 +863,19 @@ static int walk_pte_l4(struct page_table *pt, uint64_t base, uint64_t start,
return ret;
}
static int split_large_page(pte_t *ptep)
static int split_large_page(pte_t *ptep, size_t pgsize)
{
struct page_table *pt;
uint64_t phys;
pte_t attr;
uintptr_t phys_base;
int i;
uintptr_t phys;
struct page *page;
pte_t pte;
if ((pgsize != PTL3_SIZE) && (pgsize != PTL2_SIZE)) {
ekprintf("split_large_page:invalid pgsize %#lx\n", pgsize);
return -EINVAL;
}
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (pt == NULL) {
@@ -875,29 +883,47 @@ static int split_large_page(pte_t *ptep)
return -ENOMEM;
}
if (!(*ptep & PFL2_FILEOFF)) {
phys = *ptep & PT_PHYSMASK;
attr = *ptep & ~PT_PHYSMASK;
attr &= ~PFL2_SIZE;
pte = *ptep;
if (pgsize == PTL2_SIZE) {
/* break down to basic page size */
pte &= ~PFL2_SIZE;
}
if (pte_is_fileoff(ptep, pgsize)) {
phys_base = NOPHYS;
}
else {
phys = *ptep & PAGE_MASK; /* file offset */
attr = *ptep & ~PAGE_MASK;
attr &= ~PFL2_SIZE;
phys_base = pte_get_phys(ptep);
}
for (i = 0; i < PT_ENTRIES; ++i) {
pt->entry[i] = (phys + (i * PTL1_SIZE)) | attr;
if (phys_base != NOPHYS) {
phys = phys_base + (i * pgsize / PT_ENTRIES);
page = phys_to_page(phys);
if (page) {
page_map(page);
}
}
pt->entry[i] = pte;
pte += pgsize / PT_ENTRIES;
}
*ptep = (virt_to_phys(pt) & PT_PHYSMASK) | PFL2_PDIR_ATTR;
if (phys_base != NOPHYS) {
page = phys_to_page(phys_base);
if (page && page_unmap(page)) {
kprintf("split_large_page:page_unmap:%p\n", page);
panic("split_large_page:page_unmap\n");
}
}
return 0;
}
struct visit_pte_args {
page_table_t pt;
enum visit_pte_flag flags;
int padding;
int pgshift;
pte_visitor_t *funcp;
void *arg;
};
@@ -926,11 +952,11 @@ static int visit_pte_l2(void *arg0, pte_t *ptep, uintptr_t base,
return 0;
}
#ifdef USE_LARGE_PAGES
if (((*ptep == PTE_NULL) || (*ptep & PFL2_SIZE))
&& (start <= base)
&& (((base + PTL2_SIZE) <= end)
|| (end == 0))) {
|| (end == 0))
&& (!args->pgshift || (args->pgshift == PTL2_SHIFT))) {
error = (*args->funcp)(args->arg, args->pt, ptep,
(void *)base, PTL2_SHIFT);
if (error != -E2BIG) {
@@ -942,7 +968,6 @@ static int visit_pte_l2(void *arg0, pte_t *ptep, uintptr_t base,
ekprintf("visit_pte_l2:split large page\n");
return -ENOMEM;
}
#endif
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
@@ -970,11 +995,12 @@ static int visit_pte_l3(void *arg0, pte_t *ptep, uintptr_t base,
return 0;
}
#ifdef USE_LARGE_PAGES
if (((*ptep == PTE_NULL) || (*ptep & PFL3_SIZE))
&& (start <= base)
&& (((base + PTL3_SIZE) <= end)
|| (end == 0))) {
|| (end == 0))
&& (!args->pgshift || (args->pgshift == PTL3_SHIFT))
&& use_1gb_page) {
error = (*args->funcp)(args->arg, args->pt, ptep,
(void *)base, PTL3_SHIFT);
if (error != -E2BIG) {
@@ -986,7 +1012,6 @@ static int visit_pte_l3(void *arg0, pte_t *ptep, uintptr_t base,
ekprintf("visit_pte_l3:split large page\n");
return -ENOMEM;
}
#endif
if (*ptep == PTE_NULL) {
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
@@ -1029,7 +1054,7 @@ static int visit_pte_l4(void *arg0, pte_t *ptep, uintptr_t base,
return error;
}
int visit_pte_range(page_table_t pt, void *start0, void *end0,
int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift,
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg)
{
const uintptr_t start = (uintptr_t)start0;
@@ -1040,6 +1065,7 @@ int visit_pte_range(page_table_t pt, void *start0, void *end0,
args.flags = flags;
args.funcp = funcp;
args.arg = arg;
args.pgshift = pgshift;
return walk_pte_l4(pt, 0, start, end, &visit_pte_l4, &args);
}
@@ -1063,23 +1089,26 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
return -ENOENT;
}
phys = *ptep & PT_PHYSMASK;
old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id());
if ((old & PFL1_DIRTY) && args->memobj) {
page = NULL;
if (!pte_is_fileoff(&old, PTL1_SIZE)) {
phys = pte_get_phys(&old);
page = phys_to_page(phys);
}
if (page && page_is_in_memobj(page) && (old & PFL1_DIRTY)) {
memobj_flush_page(args->memobj, phys, PTL1_SIZE);
}
if (!(old & PFL1_FILEOFF) && args->free_physical) {
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), 1);
}
args->vm->currss -= PAGE_SIZE;
args->vm->currss -= PTL1_SIZE;
}
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id());
return 0;
}
@@ -1099,36 +1128,35 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if ((*ptep & PFL2_SIZE)
&& ((base < start) || (end < (base + PTL2_SIZE)))) {
error = split_large_page(ptep);
if (error) {
ekprintf("clear_range_l2(%p,%p,%lx,%lx,%lx):"
"split failed. %d\n",
args0, ptep, base, start, end, error);
return error;
}
if (*ptep & PFL2_SIZE) {
panic("clear_range_l2:split");
}
error = -EINVAL;
ekprintf("clear_range_l2(%p,%p,%lx,%lx,%lx):"
"split page. %d\n",
args0, ptep, base, start, end, error);
return error;
}
if (*ptep & PFL2_SIZE) {
phys = *ptep & PT_PHYSMASK;
old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
if ((old & PFL2_DIRTY) && args->memobj) {
page = NULL;
if (!pte_is_fileoff(&old, PTL2_SIZE)) {
phys = pte_get_phys(&old);
page = phys_to_page(phys);
}
if (page && page_is_in_memobj(page) && (old & PFL2_DIRTY)) {
memobj_flush_page(args->memobj, phys, PTL2_SIZE);
}
if (!(old & PFL2_FILEOFF) && args->free_physical) {
page = phys_to_page(phys);
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), PTL2_SIZE/PTL1_SIZE);
}
args->vm->currss -= LARGE_PAGE_SIZE;
args->vm->currss -= PTL2_SIZE;
}
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id());
return 0;
}
@@ -1140,6 +1168,8 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if ((start <= base) && ((base + PTL2_SIZE) <= end)) {
*ptep = PTE_NULL;
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
arch_free_page(pt);
}
@@ -1149,14 +1179,65 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
struct clear_range_args *args = args0;
int error;
uint64_t phys;
pte_t old;
struct page *page;
struct page_table *pt;
if (*ptep == PTE_NULL) {
return -ENOENT;
}
if ((*ptep & PFL3_SIZE)
&& ((base < start) || (end < (base + PTL3_SIZE)))) {
error = -EINVAL;
ekprintf("clear_range_l3(%p,%p,%lx,%lx,%lx):"
"split page. %d\n",
args0, ptep, base, start, end, error);
return error;
}
if (*ptep & PFL3_SIZE) {
old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
page = NULL;
if (!pte_is_fileoff(&old, PTL3_SIZE)) {
phys = pte_get_phys(&old);
page = phys_to_page(phys);
}
if (page && page_is_in_memobj(page) && (old & PFL3_DIRTY)) {
memobj_flush_page(args->memobj, phys, PTL3_SIZE);
}
if (!(old & PFL3_FILEOFF) && args->free_physical) {
if (page && page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), PTL3_SIZE/PTL1_SIZE);
}
args->vm->currss -= PTL3_SIZE;
}
return 0;
}
pt = phys_to_virt(*ptep & PT_PHYSMASK);
return walk_pte_l2(pt, base, start, end, &clear_range_l2, args0);
error = walk_pte_l2(pt, base, start, end, &clear_range_l2, args0);
if (error && (error != -ENOENT)) {
return error;
}
if (use_1gb_page && (start <= base) && ((base + PTL3_SIZE) <= end)) {
*ptep = PTE_NULL;
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
arch_free_page(pt);
}
return 0;
}
static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
@@ -1179,7 +1260,9 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
int error;
struct clear_range_args args;
if ((USER_END <= start) || (USER_END < end) || (end <= start)) {
if ((start < vm->region.user_start)
|| (vm->region.user_end < end)
|| (end <= start)) {
ekprintf("clear_range(%p,%p,%p,%x):"
"invalid start and/or end.\n",
pt, start, end, free_physical);
@@ -1241,16 +1324,11 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base,
if ((*ptep & PFL2_SIZE)
&& ((base < start) || (end < (base + PTL2_SIZE)))) {
error = split_large_page(ptep);
if (error) {
ekprintf("change_attr_range_l2(%p,%p,%lx,%lx,%lx):"
"split failed. %d\n",
arg0, ptep, base, start, end, error);
return error;
}
if (*ptep & PFL2_SIZE) {
panic("change_attr_range_l2:split");
}
error = -EINVAL;
ekprintf("change_attr_range_l2(%p,%p,%lx,%lx,%lx):"
"split page. %d\n",
arg0, ptep, base, start, end, error);
return error;
}
if (*ptep & PFL2_SIZE) {
@@ -1267,12 +1345,30 @@ static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base,
static int change_attr_range_l3(void *arg0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
struct change_attr_args *args = arg0;
int error;
struct page_table *pt;
if ((*ptep == PTE_NULL) || (*ptep & PFL3_FILEOFF)) {
return -ENOENT;
}
if ((*ptep & PFL3_SIZE)
&& ((base < start) || (end < (base + PTL3_SIZE)))) {
error = -EINVAL;
ekprintf("change_attr_range_l3(%p,%p,%lx,%lx,%lx):"
"split page. %d\n",
arg0, ptep, base, start, end, error);
return error;
}
if (*ptep & PFL3_SIZE) {
if (!(*ptep & PFL3_FILEOFF)) {
*ptep = (*ptep & ~args->clrpte) | args->setpte;
}
return 0;
}
pt = phys_to_virt(*ptep & PT_PHYSMASK);
return walk_pte_l2(pt, base, start, end, &change_attr_range_l2, arg0);
}
@@ -1303,7 +1399,7 @@ int ihk_mc_pt_change_attr_range(page_table_t pt, void *start0, void *end0,
return walk_pte_l4(pt, 0, start, end, &change_attr_range_l4, &args);
}
static pte_t *lookup_pte(struct page_table *pt, uintptr_t virt,
static pte_t *lookup_pte(struct page_table *pt, uintptr_t virt, int pgshift,
uintptr_t *basep, size_t *sizep, int *p2alignp)
{
int l4idx, l3idx, l2idx, l1idx;
@@ -1314,63 +1410,46 @@ static pte_t *lookup_pte(struct page_table *pt, uintptr_t virt,
GET_VIRT_INDICES(virt, l4idx, l3idx, l2idx, l1idx);
#ifdef USE_LARGE_PAGES
if (use_1gb_page) {
ptep = NULL;
base = GET_INDICES_VIRT(l4idx, 0, 0, 0);
size = PTL3_SIZE;
p2align = PTL3_SHIFT - PTL1_SHIFT;
}
else {
ptep = NULL;
base = GET_INDICES_VIRT(l4idx, l3idx, 0, 0);
size = PTL2_SIZE;
p2align = PTL2_SHIFT - PTL1_SHIFT;
}
#else
ptep = NULL;
base = GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx);
size = PTL1_SIZE;
p2align = PTL1_SHIFT - PTL1_SHIFT;
#endif
if (!pgshift) {
pgshift = (use_1gb_page)? PTL3_SHIFT: PTL2_SHIFT;
}
if (pt->entry[l4idx] == PTE_NULL) {
if (pgshift > PTL3_SHIFT) {
pgshift = PTL3_SHIFT;
}
goto out;
}
pt = phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK);
pt = phys_to_virt(pte_get_phys(&pt->entry[l4idx]));
if ((pt->entry[l3idx] == PTE_NULL)
|| (pt->entry[l3idx] & PFL3_SIZE)) {
#ifdef USE_LARGE_PAGES
if (use_1gb_page) {
if (pgshift >= PTL3_SHIFT) {
ptep = &pt->entry[l3idx];
base = GET_INDICES_VIRT(l4idx, l3idx, 0, 0);
size = PTL3_SIZE;
p2align = PTL3_SHIFT - PTL1_SHIFT;
pgshift = PTL3_SHIFT;
}
#endif
goto out;
}
pt = phys_to_virt(pt->entry[l3idx] & PT_PHYSMASK);
pt = phys_to_virt(pte_get_phys(&pt->entry[l3idx]));
if ((pt->entry[l2idx] == PTE_NULL)
|| (pt->entry[l2idx] & PFL2_SIZE)) {
#ifdef USE_LARGE_PAGES
ptep = &pt->entry[l2idx];
base = GET_INDICES_VIRT(l4idx, l3idx, l2idx, 0);
size = PTL2_SIZE;
p2align = PTL2_SHIFT - PTL1_SHIFT;
#endif
if (pgshift >= PTL2_SHIFT) {
ptep = &pt->entry[l2idx];
pgshift = PTL2_SHIFT;
}
goto out;
}
pt = phys_to_virt(pt->entry[l2idx] & PT_PHYSMASK);
pt = phys_to_virt(pte_get_phys(&pt->entry[l2idx]));
ptep = &pt->entry[l1idx];
base = GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx);
size = PTL1_SIZE;
p2align = PTL1_SHIFT - PTL1_SHIFT;
pgshift = PTL1_SHIFT;
out:
size = (size_t)1 << pgshift;
base = virt & ~(size - 1);
p2align = pgshift - PAGE_SHIFT;
if (basep) *basep = base;
if (sizep) *sizep = size;
if (p2alignp) *p2alignp = p2align;
@@ -1378,21 +1457,21 @@ out:
return ptep;
}
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, void **basep,
size_t *sizep, int *p2alignp)
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift,
void **basep, size_t *sizep, int *p2alignp)
{
pte_t *ptep;
uintptr_t base;
size_t size;
int p2align;
dkprintf("ihk_mc_pt_lookup_pte(%p,%p)\n", pt, virt);
ptep = lookup_pte(pt, (uintptr_t)virt, &base, &size, &p2align);
dkprintf("ihk_mc_pt_lookup_pte(%p,%p,%d)\n", pt, virt, pgshift);
ptep = lookup_pte(pt, (uintptr_t)virt, pgshift, &base, &size, &p2align);
if (basep) *basep = (void *)base;
if (sizep) *sizep = size;
if (p2alignp) *p2alignp = p2align;
dkprintf("ihk_mc_pt_lookup_pte(%p,%p): %p %lx %lx %d\n",
pt, virt, ptep, base, size, p2align);
dkprintf("ihk_mc_pt_lookup_pte(%p,%p,%d): %p %lx %lx %d\n",
pt, virt, pgshift, ptep, base, size, p2align);
return ptep;
}
@@ -1400,7 +1479,7 @@ struct set_range_args {
page_table_t pt;
uintptr_t phys;
enum ihk_mc_pt_attribute attr;
int padding;
int pgshift;
uintptr_t diff;
struct process_vm *vm;
};
@@ -1438,39 +1517,50 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
struct set_range_args *args = args0;
int error;
struct page_table *pt;
#ifdef USE_LARGE_PAGES
uintptr_t phys;
#endif
struct page_table *newpt = NULL;
pte_t pte;
dkprintf("set_range_l2(%lx,%lx,%lx)\n", base, start, end);
retry:
if (*ptep == PTE_NULL) {
#ifdef USE_LARGE_PAGES
if ((start <= base) && ((base + PTL2_SIZE) <= end)
&& ((args->diff & (PTL2_SIZE - 1)) == 0)) {
&& ((args->diff & (PTL2_SIZE - 1)) == 0)
&& (!args->pgshift
|| (args->pgshift == PTL2_SHIFT))) {
phys = args->phys + (base - start);
*ptep = phys | attr_to_l2attr(
args->attr|PTATTR_LARGEPAGE);
error = 0;
dkprintf("set_range_l2(%lx,%lx,%lx):"
"large page. %d %lx\n",
"2MiB page. %d %lx\n",
base, start, end, error, *ptep);
goto out;
}
#endif
pt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (pt == NULL) {
error = -ENOMEM;
ekprintf("set_range_l2(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, args->vm, start, base,
KEEP_PHYSICAL, NULL);
goto out;
if (!newpt) {
newpt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
if (newpt == NULL) {
error = -ENOMEM;
ekprintf("set_range_l2(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, args->vm, start, base,
KEEP_PHYSICAL, NULL);
goto out;
}
}
*ptep = virt_to_phys(pt) | PFL2_PDIR_ATTR;
pte = virt_to_phys(newpt) | PFL2_PDIR_ATTR;
pte = atomic_cmpxchg8(ptep, PTE_NULL, pte);
if (pte != PTE_NULL) {
/* failed to set PDTe */
goto retry;
}
pt = newpt;
newpt = NULL;
}
else if (*ptep & PFL2_SIZE) {
error = -EBUSY;
@@ -1494,6 +1584,9 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
error = 0;
out:
if (newpt) {
arch_free_page(newpt);
}
dkprintf("set_range_l2(%lx,%lx,%lx): %d %lx\n",
base, start, end, error, *ptep);
return error;
@@ -1506,18 +1599,17 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
pte_t pte;
struct page_table *pt;
int error;
#ifdef USE_LARGE_PAGES
struct set_range_args *args = args0;
uintptr_t phys;
#endif
dkprintf("set_range_l3(%lx,%lx,%lx)\n", base, start, end);
retry:
if (*ptep == PTE_NULL) {
#ifdef USE_LARGE_PAGES
if ((start <= base) && ((base + PTL3_SIZE) <= end)
&& ((args->diff & (PTL3_SIZE - 1)) == 0)
&& (!args->pgshift
|| (args->pgshift == PTL3_SHIFT))
&& use_1gb_page) {
phys = args->phys + (base - start);
*ptep = phys | attr_to_l3attr(
@@ -1528,7 +1620,6 @@ retry:
base, start, end, error, *ptep);
goto out;
}
#endif
if (!newpt) {
newpt = __alloc_new_pt(IHK_MC_AP_NOWAIT);
@@ -1537,6 +1628,8 @@ retry:
ekprintf("set_range_l3(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, args->vm, start,
base, KEEP_PHYSICAL, NULL);
goto out;
}
}
@@ -1556,6 +1649,8 @@ retry:
ekprintf("set_range_l3(%lx,%lx,%lx):"
"page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, args->vm, start, base,
KEEP_PHYSICAL, NULL);
goto out;
}
else {
@@ -1583,6 +1678,7 @@ out:
int set_range_l4(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
uintptr_t end)
{
struct set_range_args *args = args0;
struct page_table *newpt = NULL;
pte_t pte;
struct page_table *pt;
@@ -1599,6 +1695,8 @@ retry:
ekprintf("set_range_l4(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, args->vm, start,
base, KEEP_PHYSICAL, NULL);
goto out;
}
}
@@ -1636,7 +1734,8 @@ out:
}
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr)
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
int pgshift)
{
int error;
struct set_range_args args;
@@ -1649,6 +1748,7 @@ int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
args.attr = attr;
args.diff = (uintptr_t)start ^ phys;
args.vm = vm;
args.pgshift = pgshift;
error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end,
&set_range_l4, &args);
@@ -1677,14 +1777,12 @@ int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize,
if (pgsize == PTL1_SIZE) {
*ptep = phys | attr_to_l1attr(attr);
}
#ifdef USE_LARGE_PAGES
else if (pgsize == PTL2_SIZE) {
*ptep = phys | attr_to_l2attr(attr | PTATTR_LARGEPAGE);
}
else if ((pgsize == PTL3_SIZE) && (use_1gb_page)) {
*ptep = phys | attr_to_l3attr(attr | PTATTR_LARGEPAGE);
}
#endif
else {
error = -EINVAL;
ekprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x):"
@@ -1701,6 +1799,46 @@ out:
return error;
}
int ihk_mc_pt_split(page_table_t pt, struct process_vm *vm, void *addr)
{
int error;
pte_t *ptep;
void *pgaddr;
size_t pgsize;
intptr_t phys;
struct page *page;
retry:
ptep = ihk_mc_pt_lookup_pte(pt, addr, 0, &pgaddr, &pgsize, NULL);
if (ptep && !pte_is_null(ptep) && (pgaddr != addr)) {
page = NULL;
if (!pte_is_fileoff(ptep, pgsize)) {
phys = pte_get_phys(ptep);
page = phys_to_page(phys);
}
if (page && (page_is_in_memobj(page)
|| page_is_multi_mapped(page))) {
error = -EINVAL;
kprintf("ihk_mc_pt_split:NYI:page break down\n");
goto out;
}
error = split_large_page(ptep, pgsize);
if (error) {
kprintf("ihk_mc_pt_split:split_large_page failed. %d\n", error);
goto out;
}
remote_flush_tlb_cpumask(vm, (intptr_t)pgaddr,
ihk_mc_get_processor_id());
goto retry;
}
error = 0;
out:
return error;
} /* ihk_mc_pt_split() */
int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep,
int *p2alignp)
{
@@ -1712,7 +1850,6 @@ int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep,
/* dummy */
panic("not reached");
}
#ifdef USE_LARGE_PAGES
else if ((cursize > PTL3_SIZE) && use_1gb_page) {
/* 1GiB */
newsize = PTL3_SIZE;
@@ -1723,7 +1860,6 @@ int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep,
newsize = PTL2_SIZE;
p2align = PTL2_SHIFT - PTL1_SHIFT;
}
#endif
else if (cursize > PTL1_SIZE) {
/* 4KiB : basic page size */
newsize = PTL1_SIZE;
@@ -1796,7 +1932,7 @@ static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep,
attr = apte & ~PT_PHYSMASK;
error = ihk_mc_pt_set_range(pt, args->vm, (void *)dest,
(void *)(dest + pgsize), phys, attr);
(void *)(dest + pgsize), phys, attr, pgshift);
if (error) {
kprintf("move_one_page(%p,%p,%p %#lx,%p,%d):"
"set failed. %d\n",
@@ -1822,7 +1958,7 @@ int move_pte_range(page_table_t pt, struct process_vm *vm,
args.dest = (uintptr_t)dest;
args.vm = vm;
error = visit_pte_range(pt, src, src+size, VPTEF_SKIP_NULL,
error = visit_pte_range(pt, src, src+size, 0, VPTEF_SKIP_NULL,
&move_one_page, &args);
flush_tlb(); /* XXX: TLB flush */
if (error) {
@@ -1946,9 +2082,7 @@ static void init_vsyscall_area(struct page_table *pt)
void init_page_table(void)
{
#ifdef USE_LARGE_PAGES
check_available_page_size();
#endif
init_pt = arch_alloc_page(IHK_MC_AP_CRITICAL);
ihk_mc_spinlock_init(&init_pt_lock);

View File

@@ -1327,6 +1327,7 @@ SYSCALL_DECLARE(mmap)
intptr_t addr;
size_t len;
int flags = flags0;
size_t pgsize;
dkprintf("sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n",
addr0, len0, prot, flags0, fd, off0);
@@ -1350,14 +1351,36 @@ SYSCALL_DECLARE(mmap)
}
/* check arguments */
#define VALID_DUMMY_ADDR (region->user_start)
pgsize = PAGE_SIZE;
if (flags & MAP_HUGETLB) {
switch (flags & (0x3F << MAP_HUGE_SHIFT)) {
case 0:
flags |= MAP_HUGE_2MB; /* default hugepage size */
break;
case MAP_HUGE_2MB:
case MAP_HUGE_1GB:
break;
default:
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):"
"not supported page size.\n",
addr0, len0, prot, flags0, fd, off0);
error = -EINVAL;
goto out;
}
pgsize = (size_t)1 << ((flags >> MAP_HUGE_SHIFT) & 0x3F);
}
#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1))
addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR;
len = (len0 + PAGE_SIZE - 1) & PAGE_MASK;
if ((addr & (PAGE_SIZE - 1))
len = (len0 + pgsize - 1) & ~(pgsize - 1);
if ((addr & (pgsize - 1))
|| (len == 0)
|| !(flags & (MAP_SHARED | MAP_PRIVATE))
|| ((flags & MAP_SHARED) && (flags & MAP_PRIVATE))
|| (off0 & (PAGE_SIZE - 1))) {
|| (off0 & (pgsize - 1))) {
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n",
addr0, len0, prot, flags0, fd, off0);
error = -EINVAL;
@@ -1383,25 +1406,6 @@ SYSCALL_DECLARE(mmap)
goto out;
}
if (flags & MAP_HUGETLB) {
switch (flags & (0x3F << MAP_HUGE_SHIFT)) {
case 0:
flags |= MAP_HUGE_2MB; /* default hugepage size */
break;
case MAP_HUGE_2MB:
case MAP_HUGE_1GB:
break;
default:
ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):"
"not supported page size.\n",
addr0, len0, prot, flags0, fd, off0);
error = -EINVAL;
goto out;
}
}
addr = do_mmap(addr, len, prot, flags, fd, off0);
error = 0;
@@ -1714,7 +1718,8 @@ int arch_map_vdso(struct process_vm *vm)
for (i = 0; i < vdso.vdso_npages; ++i) {
s = vm->vdso_addr + (i * PAGE_SIZE);
e = s + PAGE_SIZE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.vdso_physlist[i], attr);
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.vdso_physlist[i], attr, 0);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -1744,7 +1749,8 @@ int arch_map_vdso(struct process_vm *vm)
s = vm->vdso_addr + (intptr_t)vdso.vvar_virt;
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.vvar_phys, attr);
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.vvar_phys, attr, 0);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -1754,7 +1760,8 @@ int arch_map_vdso(struct process_vm *vm)
s = vm->vdso_addr + (intptr_t)vdso.hpet_virt;
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.hpet_phys, attr);
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.hpet_phys, attr, 0);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -1764,7 +1771,8 @@ int arch_map_vdso(struct process_vm *vm)
s = vm->vdso_addr + (intptr_t)vdso.pvti_virt;
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.pvti_phys, attr);
error = ihk_mc_pt_set_range(pt, vm, s, e,
vdso.pvti_phys, attr, 0);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;

View File

@@ -522,7 +522,9 @@ static int copy_user_pte(void *arg0, page_table_t src_pt, pte_t *src_ptep, void
attr = arch_vrflag_to_ptattr(args->new_vrflag, PF_POPULATE, NULL);
}
error = ihk_mc_pt_set_range(args->new_vm->address_space->page_table, args->new_vm, pgaddr, pgaddr+pgsize, phys, attr);
error = ihk_mc_pt_set_range(args->new_vm->address_space->page_table,
args->new_vm, pgaddr, pgaddr+pgsize, phys, attr,
pgshift);
if (error) {
args->fault_addr = (intptr_t)pgaddr;
goto out;
@@ -572,6 +574,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
range->flag = src_range->flag;
range->memobj = src_range->memobj;
range->objoff = src_range->objoff;
range->pgshift = src_range->pgshift;
if (range->memobj) {
memobj_ref(range->memobj);
}
@@ -583,7 +586,8 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm)
error = visit_pte_range(orgvm->address_space->page_table,
(void *)range->start, (void *)range->end,
VPTEF_SKIP_NULL, &copy_user_pte, &args);
range->pgshift, VPTEF_SKIP_NULL,
&copy_user_pte, &args);
if (error) {
if (args.fault_addr != -1) {
kprintf("ERROR: copy_user_ranges() "
@@ -626,7 +630,8 @@ int update_process_page_table(struct process_vm *vm,
attr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
flags = ihk_mc_spinlock_lock(&vm->page_table_lock);
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
(void *)range->start, (void *)range->end, phys, attr);
(void *)range->start, (void *)range->end, phys, attr,
range->pgshift);
if (error) {
kprintf("update_process_page_table:ihk_mc_pt_set_range failed. %d\n", error);
goto out;
@@ -647,6 +652,13 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
dkprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p)\n",
vm, range->start, range->end, addr, splitp);
error = ihk_mc_pt_split(vm->address_space->page_table, vm, (void *)addr);
if (error) {
ekprintf("split_process_memory_range:"
"ihk_mc_pt_split failed. %d\n", error);
goto out;
}
newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT);
if (!newrange) {
ekprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p):"
@@ -659,6 +671,7 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
newrange->start = addr;
newrange->end = range->end;
newrange->flag = range->flag;
newrange->pgshift = range->pgshift;
if (range->memobj) {
memobj_ref(range->memobj);
@@ -735,11 +748,10 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
int error;
intptr_t start;
intptr_t end;
#ifdef USE_LARGE_PAGES
struct vm_range *neighbor;
intptr_t lpstart;
intptr_t lpend;
#endif /* USE_LARGE_PAGES */
size_t pgsize;
dkprintf("free_process_memory_range(%p, 0x%lx - 0x%lx)\n",
vm, range->start, range->end);
@@ -747,25 +759,40 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
start = range->start;
end = range->end;
if (!(range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED))) {
#ifdef USE_LARGE_PAGES
lpstart = start & LARGE_PAGE_MASK;
lpend = (end + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK;
if (lpstart < start) {
neighbor = previous_process_memory_range(vm, range);
if ((neighbor == NULL) || (neighbor->end <= lpstart)) {
neighbor = previous_process_memory_range(vm, range);
pgsize = -1;
for (;;) {
error = arch_get_smaller_page_size(
NULL, pgsize, &pgsize, NULL);
if (error) {
kprintf("free_process_memory_range:"
"arch_get_smaller_page_size failed."
" %d\n", error);
break;
}
lpstart = start & ~(pgsize - 1);
if (!neighbor || (neighbor->end <= lpstart)) {
start = lpstart;
break;
}
}
if (end < lpend) {
neighbor = next_process_memory_range(vm, range);
if ((neighbor == NULL) || (lpend <= neighbor->start)) {
neighbor = next_process_memory_range(vm, range);
pgsize = -1;
for (;;) {
error = arch_get_smaller_page_size(
NULL, pgsize, &pgsize, NULL);
if (error) {
kprintf("free_process_memory_range:"
"arch_get_smaller_page_size failed."
" %d\n", error);
break;
}
lpend = (end + pgsize - 1) & ~(pgsize - 1);
if (!neighbor || (lpend <= neighbor->start)) {
end = lpend;
break;
}
}
#endif /* USE_LARGE_PAGES */
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
if (range->memobj) {
@@ -928,6 +955,7 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa
return attr;
}
/* XXX: インデントを揃える必要がある */
int add_process_memory_range(struct process_vm *vm,
unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag,
@@ -1236,7 +1264,8 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
args.memobj = range->memobj;
error = visit_pte_range(vm->address_space->page_table, (void *)start,
(void *)end, VPTEF_DEFAULT, &remap_one_page, &args);
(void *)end, range->pgshift, VPTEF_DEFAULT,
&remap_one_page, &args);
if (error) {
ekprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):"
"visit pte failed %d\n",
@@ -1306,8 +1335,8 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
error = visit_pte_range(vm->address_space->page_table, (void *)start,
(void *)end, VPTEF_SKIP_NULL, &sync_one_page,
&args);
(void *)end, range->pgshift, VPTEF_SKIP_NULL,
&sync_one_page, &args);
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
@@ -1389,7 +1418,7 @@ int invalidate_process_memory_range(struct process_vm *vm,
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
error = visit_pte_range(vm->address_space->page_table, (void *)start,
(void *)end, VPTEF_SKIP_NULL,
(void *)end, range->pgshift, VPTEF_SKIP_NULL,
&invalidate_one_page, &args);
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
@@ -1421,8 +1450,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
/*****/
ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)fault_addr, &pgaddr, &pgsize,
&p2align);
(void *)fault_addr, range->pgshift, &pgaddr, &pgsize,
&p2align);
if (!(reason & (PF_PROT | PF_PATCH)) && ptep && !pte_is_null(ptep)
&& !pte_is_fileoff(ptep, pgsize)) {
if (!pte_is_present(ptep)) {
@@ -1439,13 +1468,19 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
goto out;
}
/*****/
if (!ptep || (pgsize != PAGE_SIZE)) {
while (((uintptr_t)pgaddr < range->start)
|| (range->end < ((uintptr_t)pgaddr + pgsize))) {
ptep = NULL;
pgsize = PAGE_SIZE;
p2align = PAGE_P2ALIGN;
error = arch_get_smaller_page_size(NULL, pgsize, &pgsize, &p2align);
if (error) {
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):arch_get_smaller_page_size(pte) failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out;
}
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
}
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
/*****/
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
phys = NOPHYS;
if (range->memobj) {
off_t off;
@@ -1458,17 +1493,34 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
error = memobj_get_page(range->memobj, off, p2align,
&phys, &memobj_flag);
if (error) {
if (error != -ERESTART) {
struct memobj *obj;
if (zeroobj_create(&obj)) {
panic("PFPMR: zeroobj_crate");
}
if (range->memobj != obj) {
goto out;
}
goto out;
}
}
else {
if (phys == NOPHYS) {
void *virt;
size_t npages;
retry:
npages = pgsize / PAGE_SIZE;
virt = ihk_mc_alloc_aligned_pages(npages, p2align, IHK_MC_AP_NOWAIT);
if (!virt && !range->pgshift && (pgsize != PAGE_SIZE)) {
error = arch_get_smaller_page_size(NULL, pgsize, &pgsize, &p2align);
if (error) {
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):arch_get_smaller_page_size(anon) failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out;
}
ptep = NULL;
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
goto retry;
}
if (!virt) {
error = -ENOMEM;
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot allocate new page. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
@@ -1527,18 +1579,20 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
else {
error = ihk_mc_pt_set_range(vm->address_space->page_table, vm,
pgaddr, pgaddr + pgsize, phys,
attr);
attr, range->pgshift);
if (error) {
kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_range failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error);
goto out;
}
}
flush_tlb_single(fault_addr);
error = 0;
page = NULL;
vm->currss += PAGE_SIZE;
vm->currss += pgsize;
if(vm->currss > vm->proc->maxrss)
vm->proc->maxrss = vm->currss;
error = 0;
page = NULL;
out:
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (page) {
@@ -1712,7 +1766,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
thread->vm, (void *)(end-minsz),
(void *)end, virt_to_phys(stack),
arch_vrflag_to_ptattr(vrflag, PF_POPULATE,
NULL));
NULL), 0);
if (error) {
kprintf("init_process_stack:"
"set range %lx-%lx %lx failed. %d\n",

View File

@@ -159,8 +159,16 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
{
struct shmobj *obj = NULL;
int error;
int pgshift;
size_t pgsize;
dkprintf("shmobj_create(%p %#lx,%p)\n", ds, ds->shm_segsz, objp);
pgshift = ds->init_pgshift;
if (!pgshift) {
pgshift = PAGE_SHIFT;
}
pgsize = (size_t)1 << pgshift;
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj) {
error = -ENOMEM;
@@ -174,9 +182,10 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
obj->ds = *ds;
obj->ds.shm_perm.seq = the_seq++;
obj->ds.shm_nattch = 1;
obj->ds.init_pgshift = 0;
obj->index = -1;
obj->pgshift = PAGE_SHIFT;
obj->real_segsz = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
obj->pgshift = pgshift;
obj->real_segsz = (obj->ds.shm_segsz + pgsize - 1) & ~(pgsize - 1);
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
@@ -213,13 +222,14 @@ void shmobj_destroy(struct shmobj *obj)
extern int the_maxi;
struct shmlock_user *user;
size_t size;
int npages;
dkprintf("shmobj_destroy(%p [%d %o])\n", obj, obj->index, obj->ds.shm_perm.mode);
if (obj->user) {
user = obj->user;
obj->user = NULL;
shmlock_users_lock();
size = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
size = obj->real_segsz;
user->locked -= size;
if (!user->locked) {
shmlock_user_free(user);
@@ -227,6 +237,7 @@ void shmobj_destroy(struct shmobj *obj)
shmlock_users_unlock();
}
/* zap page_list */
npages = (size_t)1 << (obj->pgshift - PAGE_SHIFT);
for (;;) {
struct page *page;
int count;
@@ -253,9 +264,8 @@ void shmobj_destroy(struct shmobj *obj)
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
free_pages(phys_to_virt(page_to_phys(page)), npages);
}
if (obj->index < 0) {
kfree(obj);
@@ -362,9 +372,9 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
memobj, off, p2align, physp, error);
goto out;
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
if (p2align != (obj->pgshift - PAGE_SHIFT)) {
error = -ENOMEM;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):pgsize mismatch. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
@@ -384,7 +394,8 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
page = page_list_lookup(obj, off);
if (!page) {
npages = 1 << p2align;
virt = ihk_mc_alloc_pages(npages, IHK_MC_AP_NOWAIT);
virt = ihk_mc_alloc_aligned_pages(npages, p2align,
IHK_MC_AP_NOWAIT);
if (!virt) {
error = -ENOMEM;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):"
@@ -460,7 +471,7 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align,
struct shmobj *obj = to_shmobj(memobj);
int error;
struct page *page;
uintptr_t phys;
uintptr_t phys = NOPHYS;
dkprintf("shmobj_lookup_page(%p,%#lx,%d,%p)\n",
memobj, off, p2align, physp);
@@ -471,9 +482,9 @@ static int shmobj_lookup_page(struct memobj *memobj, off_t off, int p2align,
memobj, off, p2align, physp, error);
goto out;
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
if (p2align != (obj->pgshift - PAGE_SHIFT)) {
error = -ENOMEM;
ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):large page. %d\n",
ekprintf("shmobj_lookup_page(%p,%#lx,%d,%p):pgsize mismatch. %d\n",
memobj, off, p2align, physp, error);
goto out;
}

View File

@@ -895,24 +895,20 @@ static int do_munmap(void *addr, size_t len)
return error;
}
static int search_free_space(size_t len, intptr_t hint, intptr_t *addrp)
static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *addrp)
{
struct thread *thread = cpu_local_var(current);
struct vm_regions *region = &thread->vm->region;
intptr_t addr;
int error;
struct vm_range *range;
size_t pgsize = (size_t)1 << pgshift;
dkprintf("search_free_space(%lx,%lx,%p)\n", len, hint, addrp);
dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp);
addr = hint;
for (;;) {
#ifdef USE_LARGE_PAGES
if (len >= LARGE_PAGE_SIZE) {
addr = (addr + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK;
}
#endif /* USE_LARGE_PAGES */
addr = (addr + pgsize - 1) & ~(pgsize - 1);
if ((region->user_end <= addr)
|| ((region->user_end - len) < addr)) {
ekprintf("search_free_space(%lx,%lx,%p):"
@@ -934,8 +930,8 @@ static int search_free_space(size_t len, intptr_t hint, intptr_t *addrp)
*addrp = addr;
out:
dkprintf("search_free_space(%lx,%lx,%p): %d %lx\n",
len, hint, addrp, error, addr);
dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n",
len, hint, pgshift, addrp, error, addr);
return error;
}
@@ -994,6 +990,27 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
flush_nfo_tlb();
if (flags & MAP_HUGETLB) {
pgshift = (flags >> MAP_HUGE_SHIFT) & 0x3F;
p2align = pgshift - PAGE_SHIFT;
}
else if ((flags & MAP_PRIVATE) && (flags & MAP_ANONYMOUS)) {
pgshift = 0; /* transparent huge page */
p2align = PAGE_P2ALIGN;
if (len > PAGE_SIZE) {
error = arch_get_smaller_page_size(NULL, len+1, NULL, &p2align);
if (error) {
ekprintf("do_mmap:arch_get_smaller_page_size failed. %d\n", error);
goto out;
}
}
}
else {
pgshift = PAGE_SHIFT; /* basic page size */
p2align = PAGE_P2ALIGN;
}
ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock);
if (flags & MAP_FIXED) {
@@ -1007,10 +1024,11 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
}
else {
/* choose mapping address */
error = search_free_space(len, region->map_end, &addr);
error = search_free_space(len, region->map_end,
PAGE_SHIFT+p2align, &addr);
if (error) {
ekprintf("do_mmap:search_free_space(%lx,%lx) failed. %d\n",
len, region->map_end, error);
ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n",
len, region->map_end, p2align, error);
goto out;
}
region->map_end = addr + len;
@@ -1096,13 +1114,6 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
else if (!(vrflags & VR_DEMAND_PAGING)
&& ((vrflags & VR_PROT_MASK) != VR_PROT_NONE)) {
npages = len >> PAGE_SHIFT;
p2align = PAGE_P2ALIGN;
#ifdef USE_LARGE_PAGES
if ((len >= LARGE_PAGE_SIZE)
&& ((addr & (LARGE_PAGE_SIZE - 1)) == 0)) {
p2align = LARGE_PAGE_P2ALIGN;
}
#endif /* USE_LARGE_PAGES */
p = ihk_mc_alloc_aligned_pages(npages, p2align, IHK_MC_AP_NOWAIT);
if (p == NULL) {
ekprintf("do_mmap:allocate_pages(%d,%d) failed.\n",
@@ -1116,6 +1127,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
memset(&ads, 0, sizeof(ads));
ads.shm_segsz = len;
ads.shm_perm.mode = SHM_DEST;
ads.init_pgshift = PAGE_SHIFT;
error = shmobj_create(&ads, &memobj);
if (error) {
ekprintf("do_mmap:shmobj_create failed. %d\n", error);
@@ -1141,13 +1153,6 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
}
vrflags |= VRFLAG_PROT_TO_MAXPROT(PROT_TO_VR_FLAG(maxprot));
if (flags & MAP_HUGETLB) {
pgshift = (flags >> MAP_HUGE_SHIFT) & 0x3F;
}
else {
pgshift = PAGE_SHIFT; /* basic page size */
}
error = add_process_memory_range(thread->vm, addr, addr+len, phys,
vrflags, memobj, off, pgshift);
if (error) {
@@ -3238,7 +3243,7 @@ SYSCALL_DECLARE(mincore)
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)addr, NULL, NULL, NULL);
(void *)addr, 0, NULL, NULL, NULL);
if (ptep && pte_is_present(ptep)) {
value = 1;
}
@@ -3630,6 +3635,7 @@ int do_shmget(const key_t key, const size_t size, const int shmflg)
ads.shm_segsz = size;
ads.shm_ctime = now;
ads.shm_cpid = proc->pid;
ads.init_pgshift = pgshift;
error = shmobj_create_indexed(&ads, &obj);
if (error) {
@@ -3639,7 +3645,6 @@ int do_shmget(const key_t key, const size_t size, const int shmflg)
}
obj->index = ++the_maxi;
obj->pgshift = pgshift;
list_add(&obj->chain, &kds_list);
++the_shm_info.used_ids;
@@ -3668,6 +3673,7 @@ SYSCALL_DECLARE(shmat)
int vrflags;
int req;
struct shmobj *obj;
size_t pgsize;
dkprintf("shmat(%#x,%p,%#x)\n", shmid, shmaddr, shmflg);
@@ -3679,13 +3685,14 @@ SYSCALL_DECLARE(shmat)
return error;
}
if (shmaddr && ((uintptr_t)shmaddr & (PAGE_SIZE - 1)) && !(shmflg & SHM_RND)) {
pgsize = (size_t)1 << obj->pgshift;
if (shmaddr && ((uintptr_t)shmaddr & (pgsize - 1)) && !(shmflg & SHM_RND)) {
shmobj_list_unlock();
dkprintf("shmat(%#x,%p,%#x): -EINVAL\n", shmid, shmaddr, shmflg);
return -EINVAL;
}
addr = (uintptr_t)shmaddr & PAGE_MASK;
len = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
addr = (uintptr_t)shmaddr & ~(pgsize - 1);
len = obj->real_segsz;
prot = PROT_READ;
req = 4;
@@ -3725,7 +3732,7 @@ SYSCALL_DECLARE(shmat)
}
}
else {
error = search_free_space(len, region->map_end, &addr);
error = search_free_space(len, region->map_end, obj->pgshift, &addr);
if (error) {
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
shmobj_list_unlock();
@@ -3753,7 +3760,7 @@ SYSCALL_DECLARE(shmat)
memobj_ref(&obj->memobj);
error = add_process_memory_range(vm, addr, addr+len, -1,
vrflags, &obj->memobj, 0, PAGE_SHIFT);
vrflags, &obj->memobj, 0, obj->pgshift);
if (error) {
if (!(prot & PROT_WRITE)) {
(void)set_host_vma(addr, len, PROT_READ|PROT_WRITE);
@@ -3940,7 +3947,7 @@ SYSCALL_DECLARE(shmctl)
ekprintf("shmctl(%#x,%d,%p): user lookup: %d\n", shmid, cmd, buf, error);
return -ENOMEM;
}
size = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
size = obj->real_segsz;
if (!has_cap_ipc_lock(thread)
&& (rlim->rlim_cur != (rlim_t)-1)
&& ((rlim->rlim_cur < user->locked)
@@ -3978,7 +3985,7 @@ SYSCALL_DECLARE(shmctl)
if ((obj->ds.shm_perm.mode & SHM_LOCKED)
&& ((obj->pgshift == 0)
|| (obj->pgshift == PAGE_SHIFT))) {
size = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
size = obj->real_segsz;
shmlock_users_lock();
user = obj->user;
obj->user = NULL;
@@ -6433,7 +6440,7 @@ SYSCALL_DECLARE(mremap)
}
need_relocate = 1;
error = search_free_space(newsize, vm->region.map_end,
(intptr_t *)&newstart);
range->pgshift, (intptr_t *)&newstart);
if (error) {
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
"search failed. %d\n",

View File

@@ -182,7 +182,7 @@ static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
}
if (p2align != PAGE_P2ALIGN) { /* XXX:NYI:large pages */
error = -ENOMEM;
ekprintf("zeroobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
dkprintf("zeroobj_get_page(%p,%#lx,%d,%p):large page. %d\n",
memobj, off, p2align, physp, error);
goto out;
}

View File

@@ -127,16 +127,18 @@ int ihk_mc_pt_free_range(page_table_t pt, struct process_vm *vm,
int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end,
enum ihk_mc_pt_attribute clrattr,
enum ihk_mc_pt_attribute setattr);
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, void **pgbasep, size_t *pgsizep, int *p2alignp);
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift, void **pgbasep, size_t *pgsizep, int *p2alignp);
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr);
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
int pgshift);
int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize, uintptr_t phys, enum ihk_mc_pt_attribute attr);
int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size,
enum ihk_mc_pt_prepare_flag);
int ihk_mc_pt_split(page_table_t pt, struct process_vm *vm, void *addr);
typedef int pte_visitor_t(void *arg, page_table_t pt, pte_t *ptep,
void *pgaddr, int pgshift);
int visit_pte_range(page_table_t pt, void *start, void *end,
int visit_pte_range(page_table_t pt, void *start, void *end, int pgshift,
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg);
int move_pte_range(page_table_t pt, struct process_vm *vm,
void *src, void *dest, size_t size);