diff --git a/arch/x86/kernel/include/owano-memory.h b/arch/x86/kernel/include/owano-memory.h new file mode 100644 index 00000000..21e6e460 --- /dev/null +++ b/arch/x86/kernel/include/owano-memory.h @@ -0,0 +1,812 @@ +#ifndef OWANO_MEMORY_H +#define OWANO_MEMORY_H + +/** \brief 指定されたページテーブルのマップ状況を表示する */ +void ihk_mc_pt_show(page_table_t pt); + +/** \brief 指定されたページテーブル間での相違を表示する */ +void ihk_mc_pt_diff(page_table_t oldpt, page_table_t newpt); + +/** \brief 指定されたページテーブルのコピーを作成する */ +page_table_t ihk_mc_pt_snap(page_table_t srcpt); + +/** \brief ihk_mc_pt_snap() で作成したコピーを解放する */ +void ihk_mc_pt_destroy_snap(page_table_t pt); + +#endif /* OWANO_MEMORY_H */ + +#ifdef OWANO_IMPLEMENTATION +static struct page_table *__alloc_new_pt(enum ihk_mc_ap_flag ap_flag); + +#define OM_GET_VIRT_INDICES(virt, l4i, l3i, l2i, l1i) \ + l4i = ((virt) >> PTL4_SHIFT) & (PT_ENTRIES - 1); \ + l3i = ((virt) >> PTL3_SHIFT) & (PT_ENTRIES - 1); \ + l2i = ((virt) >> PTL2_SHIFT) & (PT_ENTRIES - 1); \ + l1i = ((virt) >> PTL1_SHIFT) & (PT_ENTRIES - 1) + +#define OM_GET_INDICES_VIRT(l4i, l3i, l2i, l1i) \ + ( ((uint64_t)(l4i) << PTL4_SHIFT) \ + | ((uint64_t)(l3i) << PTL3_SHIFT) \ + | ((uint64_t)(l2i) << PTL2_SHIFT) \ + | ((uint64_t)(l1i) << PTL1_SHIFT) \ + ) + +static int snap_lookup_pte(struct page_table *pt, void *virt, pte_t **ptep, void **pgbasep, uint64_t *pgsizep) +{ + int l4idx, l3idx, l2idx, l1idx; + + OM_GET_VIRT_INDICES((uint64_t)virt, l4idx, l3idx, l2idx, l1idx); + + if (!(pt->entry[l4idx] & PFL4_PRESENT)) { + return -ENOENT; + } + + pt = phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK); + if (!(pt->entry[l3idx] & PFL3_PRESENT)) { + return -ENOENT; + } + + pt = phys_to_virt(pt->entry[l3idx] & PT_PHYSMASK); + if (!(pt->entry[l2idx] & PFL2_PRESENT) || (pt->entry[l2idx] & PFL2_SIZE)) { + *ptep = &pt->entry[l2idx]; + *pgbasep = (void *)OM_GET_INDICES_VIRT(l4idx, l3idx, l2idx, 0); + *pgsizep = PTL2_SIZE; + return 0; + } + + pt = phys_to_virt(pt->entry[l2idx] & PT_PHYSMASK); + *ptep = &pt->entry[l1idx]; + *pgbasep = (void *)OM_GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx); + *pgsizep = PTL1_SIZE; + + return 0; +} + +typedef int snap_walk_pte_fn_t(void *args, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end); + +static int snap_walk_pte_l1(struct page_table *pt, uint64_t base, uint64_t start, + uint64_t end, snap_walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL1_SHIFT); + eix = ((end == 0) || ((base + PTL2_SIZE) <= end))? PT_ENTRIES + : (((end - base) + (PTL1_SIZE - 1)) >> PTL1_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL1_SIZE; + error = (*funcp)(args, &pt->entry[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int snap_walk_pte_l2(struct page_table *pt, uint64_t base, uint64_t start, + uint64_t end, snap_walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL2_SHIFT); + eix = ((end == 0) || ((base + PTL3_SIZE) <= end))? PT_ENTRIES + : (((end - base) + (PTL2_SIZE - 1)) >> PTL2_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL2_SIZE; + error = (*funcp)(args, &pt->entry[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int snap_walk_pte_l3(struct page_table *pt, uint64_t base, uint64_t start, + uint64_t end, snap_walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL3_SHIFT); + eix = ((end == 0) || ((base + PTL4_SIZE) <= end))? PT_ENTRIES + : (((end - base) + (PTL3_SIZE - 1)) >> PTL3_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL3_SIZE; + error = (*funcp)(args, &pt->entry[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int snap_walk_pte_l4(struct page_table *pt, uint64_t base, uint64_t start, + uint64_t end, snap_walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL4_SHIFT); + eix = (end == 0)? PT_ENTRIES + :(((end - base) + (PTL4_SIZE - 1)) >> PTL4_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL4_SIZE; + error = (*funcp)(args, &pt->entry[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +struct show_args { + struct page_table *pt; + + int nrange; + int final; + uint64_t start; + uint64_t end; + uint64_t pstart; + uint64_t pend; + uint64_t pgsize; +}; + +static void show_show_args(struct show_args *args) +{ + if (++args->nrange == 1) { + kprintf("ihk_mc_pt_show(%p):\n", args->pt); + if (args->start == -1) { + kprintf("no active pages\n"); + goto final; + } + } + else if (args->start == -1) { + return; + } + + kprintf("%012lx-%012lx: %08lx-%08lx [%06x] (%lx)\n", + args->start, args->end, args->pstart, args->pend, + args->pgsize, (args->end - args->start)); + +final: + if (args->final) { + kprintf("-------- end of show\n"); + } + return; +} + +static int show_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct show_args *args = args0; + uint64_t phys; + + if (*ptep & PFL1_PRESENT) { + phys = *ptep & PT_PHYSMASK; + if (args->start != -1) { + if ((args->end == base) && (args->pend == phys) + && (args->pgsize == PTL1_SIZE)) { + args->end += args->pgsize; + args->pend += args->pgsize; + return 0; + } + show_show_args(args); + } + + args->start = base; + args->end = base + PTL1_SIZE; + args->pstart = phys; + args->pend = phys + PTL1_SIZE; + args->pgsize = PTL1_SIZE; + return 0; + } + return 0; +} + +static int show_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct show_args *args = args0; + struct page_table *pt; + uint64_t phys; + + if ((*ptep & PFL2_PRESENT) && (*ptep & PFL2_SIZE)) { + phys = *ptep & PT_PHYSMASK & ~(PTL2_SIZE - 1); + if (args->start != -1) { + if ((args->end == base) && (args->pend == phys) + && (args->pgsize == PTL2_SIZE)) { + args->end += args->pgsize; + args->pend += args->pgsize; + return 0; + } + show_show_args(args); + } + + args->start = base; + args->end = base + PTL2_SIZE; + args->pstart = phys; + args->pend = phys + PTL2_SIZE; + args->pgsize = PTL2_SIZE; + return 0; + } + if (*ptep & PFL2_PRESENT) { + pt = phys_to_virt(*ptep & PT_PHYSMASK); + snap_walk_pte_l1(pt, base, start, end, &show_l1, args0); + } + return 0; +} + +static int show_l3(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep & PFL3_PRESENT) { + pt = phys_to_virt(*ptep & PT_PHYSMASK); + snap_walk_pte_l2(pt, base, start, end, &show_l2, args0); + } + return 0; +} + +static int show_l4(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep & PFL4_PRESENT) { + pt = phys_to_virt(*ptep & PT_PHYSMASK); + snap_walk_pte_l3(pt, base, start, end, &show_l3, args0); + } + return 0; +} + +void ihk_mc_pt_show(page_table_t pt) +{ + struct show_args args; + + memset(&args, 0, sizeof(args)); + args.pt = pt; + args.start = -1; + + snap_walk_pte_l4(pt, 0, 0, 0, &show_l4, &args); + + args.final = 1; + show_show_args(&args); + + return; +} + +struct snap_args { + struct page_table *pt; +}; + +static int snap_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct snap_args *args = args0; + uint64_t phys; + int error; + pte_t *ptep2; + void *pgbase; + uint64_t pgsize; + + if (*ptep & PFL1_PRESENT) { + phys = *ptep & PT_PHYSMASK; +#if 0 + error = ihk_mc_pt_set_range(args->pt, (void *)base, + (void *)(base+PTL1_SIZE), phys, 0); +#else + error = ihk_mc_pt_set_page(args->pt, (void *)base, phys, PTATTR_FOR_USER); +#endif + if (error) { + kprintf("snap_l1:ihk_mc_pt_set_range failed %d\n", error); + return error; + } + error = snap_lookup_pte(args->pt, (void *)base, &ptep2, &pgbase, &pgsize); + if (error) { + kprintf("snap_l1:snap_lookup_pte failed %d\n", error); + return error; + } + *ptep2 = *ptep; + } + return 0; +} +static int snap_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct snap_args *args = args0; + uint64_t phys; + int error; + pte_t *ptep2; + void *pgbase; + uint64_t pgsize; + struct page_table *pt; + + if (*ptep & PFL2_PRESENT) { + if (*ptep & PFL2_SIZE) { + phys = *ptep & PT_PHYSMASK & (PTL2_SIZE - 1); +#if 0 + error = ihk_mc_pt_set_range(args->pt, (void *)base, + (void *)(base+PTL2_SIZE), phys, 0); +#else + error = ihk_mc_pt_set_large_page(args->pt, (void *)base, + phys, PTATTR_FOR_USER); +#endif + if (error) { + kprintf("snap_l2:ihk_mc_pt_set_range failed %d\n", error); + return error; + } + error = snap_lookup_pte(args->pt, (void *)base, &ptep2, &pgbase, &pgsize); + if (error) { + kprintf("snap_l2:snap_lookup_pte failed %d\n", error); + return error; + } + *ptep2 = *ptep; + } + else { + pt = phys_to_virt(*ptep & PT_PHYSMASK); + snap_walk_pte_l1(pt, base, start, end, &snap_l1, args0); + } + } + return 0; +} +static int snap_l3(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep & PFL3_PRESENT) { + pt = phys_to_virt(*ptep & PT_PHYSMASK); + snap_walk_pte_l2(pt, base, start, end, &snap_l2, args0); + } + return 0; +} +static int snap_l4(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) +{ + struct page_table *pt; + + if (*ptep & PFL4_PRESENT) { + pt = phys_to_virt(*ptep & PT_PHYSMASK); + snap_walk_pte_l3(pt, base, start, end, &snap_l3, args0); + } + return 0; +} + +page_table_t ihk_mc_pt_snap(page_table_t srcpt) +{ + struct snap_args args; + + args.pt = __alloc_new_pt(IHK_MC_AP_CRITICAL); + snap_walk_pte_l4(srcpt, 0, 0, 0, &snap_l4, &args); + kprintf("ihk_mc_pt_snap(%p): %p\n", srcpt, args.pt); + + return args.pt; +} + +struct diff_args { + struct page_table * oldpt; + struct page_table * newpt; + + int nrange; + int final; + uint64_t mode; + uint64_t start; + uint64_t end; + uint64_t pstart; + uint64_t pend; + uint64_t pgsize; +}; + +static void show_diff_args(struct diff_args *args) +{ + if (++args->nrange == 1) { + kprintf("ihk_mc_pt_diff(%p,%p):\n", args->oldpt, args->newpt); + if (args->start == -1) { + kprintf("no difference\n"); + goto final; + } + } + else if (args->start == -1) { + return; + } + + if (args->mode & PFL1_PRESENT) { + kprintf("[%d] %012lx-%012lx: mapped %08lx-%08lx [%06x] (%lx)\n", + args->nrange, + args->start, args->end, args->pstart, args->pend, + args->pgsize, (args->end - args->start)); + } + else if (args->mode & PFL1_WRITABLE) { + kprintf("[%d] %012lx-%012lx: changed %08lx --> %08lx [%06x]\n", + args->nrange, + args->start, args->end, args->pstart, args->pend, + args->pgsize); + } + else { + kprintf("[%d] %012lx-%012lx: unmapped %08lx-%08lx [%06x] (%lx)\n", + args->nrange, + args->start, args->end, args->pstart, args->pend, + args->pgsize, (args->end - args->start)); + } + +final: + if (args->final) { + kprintf("-------- end of diff\n"); + } + return; +} + +static void diff_pte_l1(struct page_table *oldpt, struct page_table *newpt, uint64_t base, struct diff_args *args) +{ + int i; + uint64_t off; + pte_t diff; + uint64_t phys; + pte_t oldpte; + pte_t newpte; + + for (i = 0; i < PT_ENTRIES; ++i) { + off = i * PTL1_SIZE; + oldpte = (oldpt == NULL)? 0: oldpt->entry[i]; + newpte = (newpt == NULL)? 0: newpt->entry[i]; + diff = oldpte ^ newpte; + if (diff & PFL1_PRESENT) { + if (oldpte & PFL1_PRESENT) { + phys = oldpte & PT_PHYSMASK; + if (args->start != -1) { + if (!(args->mode & PFL1_PRESENT) + && (args->end == (base + off)) + && (args->pend == phys) + && (args->pgsize == PTL1_SIZE)) { + args->end += args->pgsize; + args->pend += args->pgsize; + continue; + } + show_diff_args(args); + } + args->mode = 0; + args->start = base + off; + args->end = base + off + PTL1_SIZE; + args->pstart = phys; + args->pend = phys + PTL1_SIZE; + args->pgsize = PTL1_SIZE; + } + else { + phys = newpte & PT_PHYSMASK; + if (args->start != -1) { + if ((args->mode & PFL1_PRESENT) + && (args->end == (base + off)) + && (args->pend == phys) + && (args->pgsize == PTL1_SIZE)) { + args->end += args->pgsize; + args->pend += args->pgsize; + continue; + } + show_diff_args(args); + } + args->mode = PFL1_PRESENT; + args->start = base + off; + args->end = base + off + PTL1_SIZE; + args->pstart = phys; + args->pend = phys + PTL1_SIZE; + args->pgsize = PTL1_SIZE; + } + } + else if ((oldpte & PFL1_PRESENT) && diff) { + if (args->start != -1) { + show_diff_args(args); + args->start = -1; + } + + args->mode = PFL1_WRITABLE; + args->start = base + off; + args->end = base + off + PTL1_SIZE; + args->pstart = oldpte; + args->pend = newpte; + args->pgsize = PTL1_SIZE; + + show_diff_args(args); + args->start = -1; + } + } + return; +} + +static void diff_pte_l2(struct page_table *oldpt, struct page_table *newpt, uint64_t base, struct diff_args *args) +{ + int i; + uint64_t off; + pte_t oldpte; + pte_t newpte; + pte_t diff; + struct page_table *p; + struct page_table *q; + uint64_t phys; + pte_t pte; + uint64_t mode; + + for (i = 0; i < PT_ENTRIES; ++i) { + off = i * PTL2_SIZE; + oldpte = (oldpt == NULL)? 0: oldpt->entry[i]; + newpte = (newpt == NULL)? 0: newpt->entry[i]; + diff = oldpte ^ newpte; + if (diff & PFL2_PRESENT) { + pte = (oldpte & PFL2_PRESENT)? oldpte: newpte; + mode = (oldpte & PFL2_PRESENT)? 0: PF_PRESENT; + phys = pte & PT_PHYSMASK; + if (pte & PFL2_SIZE) { + phys &= ~(PTL2_SIZE - 1); + if (args->start != -1) { + if ((args->mode == mode) + && (args->end == (base + off)) + && (args->pend == phys) + && (args->pgsize == PTL2_SIZE)) { + args->end += args->pgsize; + args->pend += args->pgsize; + continue; + } + show_diff_args(args); + } + args->mode = mode; + args->start = base + off; + args->end = base + off + PTL2_SIZE; + args->pstart = phys; + args->pend = phys + PTL2_SIZE; + args->pgsize = PTL2_SIZE; + } + else { + p = !(oldpte & PFL2_PRESENT)? NULL + : phys_to_virt(oldpte & PT_PHYSMASK); + q = !(newpte & PFL2_PRESENT)? NULL + : phys_to_virt(newpte & PT_PHYSMASK); + diff_pte_l1(p, q, base+off, args); + } + } + else if (oldpte & PFL2_PRESENT) { + if (diff & PFL2_SIZE) { + if (args->start != -1) { + show_diff_args(args); + args->start = -1; + } + p = phys_to_virt(oldpte & PT_PHYSMASK); + q = phys_to_virt(newpte & PT_PHYSMASK); + if (oldpte & PFL2_SIZE) { + phys = oldpte & PT_PHYSMASK; + + args->mode = 0; + args->start = base + off; + args->end = base + off + PTL2_SIZE; + args->pstart = phys; + args->pend = phys + PTL2_SIZE; + args->pgsize = PTL2_SIZE; + + show_diff_args(args); + args->start = -1; + + diff_pte_l1(NULL, q, base+off, args); + } + else { + diff_pte_l1(p, NULL, base+off, args); + + phys = newpte & PT_PHYSMASK; + + args->mode = PFL1_PRESENT; + args->start = base + off; + args->end = base + off + PTL2_SIZE; + args->pstart = phys; + args->pend = phys + PTL2_SIZE; + args->pgsize = PTL2_SIZE; + + show_diff_args(args); + args->start = -1; + } + if (args->start != -1) { + show_diff_args(args); + args->start = -1; + } + } + else if (!(oldpte & PFL2_SIZE)) { + p = phys_to_virt(oldpte & PT_PHYSMASK); + q = phys_to_virt(newpte & PT_PHYSMASK); + diff_pte_l1(p, q, base+off, args); + } + else if (diff) { + if (args->start != -1) { + show_diff_args(args); + args->start = -1; + } + + args->mode = PFL1_WRITABLE; + args->start = base + off; + args->end = base + off + PTL2_SIZE; + args->pstart = oldpte; + args->pend = newpte; + args->pgsize = PTL2_SIZE; + + show_diff_args(args); + args->start = -1; + } + } + } + return; +} + +static void diff_pte_l3(struct page_table *oldpt, struct page_table *newpt, uint64_t base, struct diff_args *args) +{ + int i; + uint64_t off; + pte_t oldpte; + pte_t newpte; + pte_t diff; + struct page_table *p; + struct page_table *q; + uint64_t phys; + pte_t pte; + uint64_t mode; + + for (i = 0; i < PT_ENTRIES; ++i) { + off = i * PTL3_SIZE; + oldpte = (oldpt == NULL)? 0: oldpt->entry[i]; + newpte = (newpt == NULL)? 0: newpt->entry[i]; + diff = oldpte ^ newpte; + if (diff & PFL3_PRESENT) { + pte = (oldpte & PFL3_PRESENT)? oldpte: newpte; + mode = (oldpte & PFL3_PRESENT)? 0: PF_PRESENT; + phys = pte & PT_PHYSMASK; + if (pte & PFL3_SIZE) { + phys &= ~(PTL3_SIZE - 1); + if (args->start != -1) { + if ((args->mode == mode) + && (args->end == (base + off)) + && (args->pend == phys) + && (args->pgsize == PTL3_SIZE)) { + args->end += args->pgsize; + args->pend += args->pgsize; + continue; + } + show_diff_args(args); + } + args->mode = mode; + args->start = base + off; + args->end = base + off + PTL3_SIZE; + args->pstart = phys; + args->pend = phys + PTL3_SIZE; + args->pgsize = PTL3_SIZE; + } + else { + p = !(oldpte & PFL3_PRESENT)? NULL + : phys_to_virt(oldpte & PT_PHYSMASK); + q = !(newpte & PFL3_PRESENT)? NULL + : phys_to_virt(newpte & PT_PHYSMASK); + diff_pte_l2(p, q, base+off, args); + } + } + else if (oldpte & PFL3_PRESENT) { + if (diff & PFL3_SIZE) { + kprintf("%lx:L3:changed\n", base+off); + } + else { + p = !(oldpte & PFL3_PRESENT)? NULL + : phys_to_virt(oldpte & PT_PHYSMASK); + q = !(newpte & PFL3_PRESENT)? NULL + : phys_to_virt(newpte & PT_PHYSMASK); + diff_pte_l2(p, q, base+off, args); + } + } + } + return; +} + +static void diff_pte_l4(struct page_table *oldpt, struct page_table *newpt, uint64_t base, struct diff_args *args) +{ + int i; + uint64_t off; + struct page_table *p; + struct page_table *q; + + for (i = 0; i < PT_ENTRIES; ++i) { + off = i * PTL4_SIZE; + p = !(oldpt->entry[i] & PFL4_PRESENT)? NULL + : phys_to_virt(oldpt->entry[i] & PT_PHYSMASK); + q = !(newpt->entry[i] & PFL4_PRESENT)? NULL + : phys_to_virt(newpt->entry[i] & PT_PHYSMASK); + diff_pte_l3(p, q, base+off, args); + } + return; +} + +void ihk_mc_pt_diff(page_table_t oldpt, page_table_t newpt) +{ + struct diff_args args; + + memset(&args, 0, sizeof(args)); + args.oldpt = oldpt; + args.newpt = newpt; + args.start = -1; + + diff_pte_l4(oldpt, newpt, 0, &args); + + args.final = 1; + show_diff_args(&args); +} + +void ihk_mc_pt_destroy_snap(page_table_t pt) +{ + struct page_table *l4pt; + int l4ix; + struct page_table *l3pt; + int l3ix; + struct page_table *l2pt; + int l2ix; + struct page_table *l1pt; + + l4pt = pt; + for (l4ix = 0; l4ix < PT_ENTRIES; ++l4ix) { + if (!(l4pt->entry[l4ix] & PFL4_PRESENT)) { + continue; + } + + l3pt = phys_to_virt(l4pt->entry[l4ix] & PT_PHYSMASK); + for (l3ix = 0; l3ix < PT_ENTRIES; ++l3ix) { + if (!(l3pt->entry[l3ix] & PFL3_PRESENT) + || (l3pt->entry[l3ix] & PFL3_SIZE)) { + continue; + } + + l2pt = phys_to_virt(l3pt->entry[l3ix] & PT_PHYSMASK); + for (l2ix = 0; l2ix < PT_ENTRIES; ++l2ix) { + if (!(l2pt->entry[l2ix] & PFL2_PRESENT) + || (l2pt->entry[l2ix] & PFL2_SIZE)) { + continue; + } + + l1pt = phys_to_virt(l2pt->entry[l2ix] & PT_PHYSMASK); + arch_free_page(l1pt); + } + arch_free_page(l2pt); + } + arch_free_page(l3pt); + } + arch_free_page(l4pt); + return; +} +#endif /* OWANO_IMPLEMENTATION */ diff --git a/arch/x86/kernel/include/owano-process.h b/arch/x86/kernel/include/owano-process.h new file mode 100644 index 00000000..be2c3e15 --- /dev/null +++ b/arch/x86/kernel/include/owano-process.h @@ -0,0 +1,287 @@ +#ifndef OWANO_PROCESS_H +#define OWANO_PROCESS_H + +extern void check_vm_range_list(char *msg, struct process_vm *vm); +extern struct process_vm *snap_vm_range_list(struct process_vm *vm); +extern void destroy_vm_range_list_snap(struct process_vm *snap); +extern void show_vm_range_list(struct process_vm *vm, struct vm_range *stop); +extern void diff_vm_range_list(struct process_vm *oldvm, struct process_vm *newvm); +extern void cmp_vm_range_list(struct process_vm *oldvm, struct process_vm *newvm, struct vm_range *except); + +#ifdef OWANO_IMPLEMENTATION + +void check_vm_range_list(char *msg, struct process_vm *vm) { + struct vm_regions *region = &vm->region; + struct vm_range *range; + struct vm_range *next; + const int max = 1000000; + int n; + struct vm_range *p; + struct vm_range *q; + +kprintf("check_vm_range_list(%p,%p): %s\n", msg, vm, msg); + n = 0; + list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + /* 範囲確認 */ + if ((range->start < region->user_start) || (region->user_end < range->end)) { + kprintf("out of range:%s\n", msg); + kprintf("%p: %lx-%lx %lx\n", range, range->start, range->end, range->flag); + panic("out of range\n"); + /* no return */ + } + + /* リンク整合 */ + if (range->list.next->prev != &range->list) { + kprintf("vm_range_list corrupt:next:%s\n", msg); + show_vm_range_list(vm, next); + panic("check_vm_range_list"); + /* no return */ + } + if (range->list.prev->next != &range->list) { + kprintf("vm_range_list corrupt:prev:%s\n", msg); + show_vm_range_list(vm, range); + panic("check_vm_range_list"); + /* no return */ + } + + /* ループ確認 */ + ++n; + if (n > max) { + kprintf("vm_range_list corrupt:too many:%s\n", msg); + show_vm_range_list(vm, range); + panic("check_vm_range_list"); + /* no return */ + } + } + + /* レンジ重なり */ + list_for_each_entry(p, &vm->vm_range_list, list) { + list_for_each_entry(q, &vm->vm_range_list, list) { + if (p == q) { + continue; + } + if ((p->start < q->end) && (q->start < p->end)) { + kprintf("overlapped vm_range:%s\n", msg); + kprintf("%p: %lx-%lx %lx\n", p, p->start, p->end, p->flag); + kprintf("%p: %lx-%lx %lx\n", q, q->start, q->end, q->flag); + panic("overlapped vm_range\n"); + /* no return */ + } + } + } + return; +} + +struct process_vm *snap_vm_range_list(struct process_vm *vm) { + struct process_vm *snap = NULL; + struct vm_range *orig; + struct vm_range *range; + +kprintf("snap_vm_range_list(%p)\n", vm); + snap = kmalloc(sizeof(*snap), IHK_MC_AP_NOWAIT); + if (snap == NULL) { + kprintf("snap_vm_range_list:kmalloc failed\n"); + return NULL; + } + memset(snap, 0, sizeof(*snap)); + INIT_LIST_HEAD(&snap->vm_range_list); + snap->region = vm->region; + + list_for_each_entry(orig, &vm->vm_range_list, list) { + range = kmalloc(sizeof(*range), IHK_MC_AP_NOWAIT); + if (range == NULL) { + kprintf("snap_vm_range_list:kmalloc(range) failed\n"); + destroy_vm_range_list_snap(snap); + return NULL; + } + memcpy(range, orig, sizeof(*range)); + list_add_tail(&range->list, &snap->vm_range_list); + } + + check_vm_range_list("snap_vm_range_list", snap); + return snap; +} + +void destroy_vm_range_list_snap(struct process_vm *snap) { + struct vm_range *range; + struct vm_range *next; + + check_vm_range_list("destroy_vm_range_list_snap", snap); + list_for_each_entry_safe(range, next, &snap->vm_range_list, list) { + list_del(&range->list); + kfree(range); + } + + kfree(snap); + return; +} + +void show_vm_range_list(struct process_vm *vm, struct vm_range *stop) { + struct vm_range *range; + struct vm_range *next; + + kprintf("vm_range_list: %p\n", &vm->vm_range_list); + list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + kprintf("%p: n %p p %p %lx-%lx %lx\n", + range, + range->list.next, + range->list.prev, + range->start, + range->end, + range->flag); + if ((stop != NULL) && (range == stop)) { + break; + } + } + + return; +} + +static int is_same_vm_range(struct vm_range *lhs, struct vm_range *rhs) { + return (1 + && (lhs->start == rhs->start) + && (lhs->end == rhs->end) + && (lhs->flag == rhs->flag) + ); +} + +void diff_vm_range_list(struct process_vm *oldvm, struct process_vm *newvm) { + struct vm_range *oldrange; + struct vm_range *newrange; + int tail = 0; + int pending = 0; + struct vm_range *pending_range = NULL; + + kprintf("vm_range_list: %p %p\n", oldvm, newvm); + + oldrange = list_first_entry(&oldvm->vm_range_list, struct vm_range, list); + newrange = list_first_entry(&newvm->vm_range_list, struct vm_range, list); + for (;;) { + if ((&oldrange->list == &oldvm->vm_range_list) + && (&newrange->list == &newvm->vm_range_list)) { + break; + } + +#define is_list_end(e,h,m) (&(e)->m == (h)) + if (!is_list_end(oldrange, &oldvm->vm_range_list, list) + && !is_list_end(newrange, &newvm->vm_range_list, list) + && is_same_vm_range(oldrange, newrange)) { + /* same */ + if (tail > 0) { + --tail; + kprintf(" %012lx-%012lx %lx\n", + oldrange->start, + oldrange->end, + oldrange->flag); + } + else if (pending <= 0) { + pending_range = oldrange; + pending = 1; + } + else if (pending <= 2) { + ++pending; + } + else { + pending_range = list_entry(pending_range->list.next, struct vm_range, list); + } + oldrange = list_entry(oldrange->list.next, struct vm_range, list); + newrange = list_entry(newrange->list.next, struct vm_range, list); + } + else { + while (pending > 0) { + kprintf(" %012lx-%012lx %lx\n", + pending_range->start, + pending_range->end, + pending_range->flag); + pending_range = list_entry(pending_range->list.next, struct vm_range, list); + --pending; + } + + if ((!is_list_end(oldrange, &oldvm->vm_range_list, list) + && is_list_end(newrange, &newvm->vm_range_list, list)) + || (!is_list_end(oldrange, &oldvm->vm_range_list, list) + && !is_list_end(newrange, &newvm->vm_range_list, list) + && (oldrange->start <= newrange->start))) { + /* delete */ + kprintf("- %012lx-%012lx %lx\n", + oldrange->start, + oldrange->end, + oldrange->flag); + oldrange = list_entry(oldrange->list.next, struct vm_range, list); + } + else { + /* add */ + kprintf("+ %012lx-%012lx %lx\n", + newrange->start, + newrange->end, + newrange->flag); + newrange = list_entry(newrange->list.next, struct vm_range, list); + } + tail = 3; + } + } + + return; +} + +void cmp_vm_range_list(struct process_vm *oldvm, struct process_vm *newvm, struct vm_range *except) { + struct vm_range *oldrange; + struct vm_range *newrange; + + oldrange = list_first_entry(&oldvm->vm_range_list, struct vm_range, list); + newrange = list_first_entry(&newvm->vm_range_list, struct vm_range, list); + for (;;) { + if ((&oldrange->list == &oldvm->vm_range_list) + && (&newrange->list == &newvm->vm_range_list)) { + break; + } + +#define is_list_end(e,h,m) (&(e)->m == (h)) + if (!is_list_end(oldrange, &oldvm->vm_range_list, list) + && !is_list_end(newrange, &newvm->vm_range_list, list) + && is_same_vm_range(oldrange, newrange)) { + /* same */ + oldrange = list_entry(oldrange->list.next, struct vm_range, list); + newrange = list_entry(newrange->list.next, struct vm_range, list); + } + else if ((!is_list_end(oldrange, &oldvm->vm_range_list, list) + && is_list_end(newrange, &newvm->vm_range_list, list)) + || (!is_list_end(oldrange, &oldvm->vm_range_list, list) + && !is_list_end(newrange, &newvm->vm_range_list, list) + && (oldrange->start <= newrange->start))) { + /* delete */ + if ((except == NULL) + || (oldrange->start < except->start) + || (except->end < oldrange->end)) + { + kprintf("vm_range_list: %p %p\n", oldvm, newvm); + kprintf("- %012lx-%012lx %lx\n", + oldrange->start, + oldrange->end, + oldrange->flag); + panic("cmp_vm_range_list:deleted\n"); + } + oldrange = list_entry(oldrange->list.next, struct vm_range, list); + } + else { + /* add */ + if ((except == NULL) + || (newrange->start < except->start) + || (except->end < newrange->end)) + { + kprintf("vm_range_list: %p %p\n", oldvm, newvm); + kprintf("+ %012lx-%012lx %lx\n", + newrange->start, + newrange->end, + newrange->flag); + panic("cmp_vm_range_list:added\n"); + } + newrange = list_entry(newrange->list.next, struct vm_range, list); + } + } + + return; +} + +#endif /* OWANO_IMPLEMENTATION */ +#endif /* OWANO_PROCESS_H */ diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 511edb15..f62da2c3 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -124,6 +124,10 @@ static struct page_table *init_pt; static int use_1gb_page = 0; #endif +#define OWANO_IMPLEMENTATION +#include +#undef OWANO_IMPLEMENTATION + #ifdef USE_LARGE_PAGES static void check_available_page_size(void) { diff --git a/kernel/process.c b/kernel/process.c index 733cd9de..f46a28d6 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -42,6 +42,10 @@ #define ekprintf(...) kprintf(__VA_ARGS__) #endif +#define OWANO_IMPLEMENTATION +#include +#undef OWANO_IMPLEMENTATION + extern long do_arch_prctl(unsigned long code, unsigned long address); static void insert_vm_range_list(struct process_vm *vm, struct vm_range *newrange);