From 9f39d1cd88cb5193dfd6244d00dbdf0461bce9c6 Mon Sep 17 00:00:00 2001 From: Ken Sato Date: Mon, 7 Dec 2020 14:41:29 +0900 Subject: [PATCH] move_pages: Fix and support some specs for LTP. 1. When nodes array is NULL, move_pages doesn't move any pages, instead will return the node where each page currently resides by status array. 2. Check whether all specified node is online or not. Change-Id: Ie3534997833d797e2a9f595d1107b07d46e1c6cf Refs: #1523 --- arch/arm64/kernel/syscall.c | 70 +++++++++++++++++++++-------- arch/x86_64/kernel/syscall.c | 61 ++++++++++++++++++------- kernel/syscall.c | 35 ++++++++++----- test/issues/1523/C1523.sh | 30 +++++++++++++ test/issues/1523/Makefile | 11 +++++ test/issues/1523/README | 21 +++++++++ test/issues/1523/aarch64_result.log | 25 +++++++++++ test/issues/1523/x86_64_result.log | 25 +++++++++++ 8 files changed, 235 insertions(+), 43 deletions(-) create mode 100755 test/issues/1523/C1523.sh create mode 100644 test/issues/1523/Makefile create mode 100644 test/issues/1523/README create mode 100644 test/issues/1523/aarch64_result.log create mode 100644 test/issues/1523/x86_64_result.log diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 6380f076..2ea28136 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -16,6 +16,7 @@ #include #include #include +#include #include void terminate_mcexec(int, int); @@ -2250,8 +2251,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->status, 0, sizeof(int) * count); memset(mpsr->nr_pages, 0, sizeof(int) * count); @@ -2269,8 +2272,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } mpsr->nodes_ready = 1; break; case 1: @@ -2292,8 +2297,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) sizeof(void *) * count); break; case 1: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } mpsr->nodes_ready = 1; break; case 2: @@ -2322,8 +2329,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) sizeof(void *) * (count / 2)); break; case 2: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } mpsr->nodes_ready = 1; break; case 3: @@ -2349,13 +2358,15 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) } /* NUMA verification in parallel */ - for (i = i_s; i < i_e; i++) { - if (mpsr->nodes[i] < 0 || - mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() || - !test_bit(mpsr->nodes[i], - mpsr->proc->vm->numa_mask)) { - mpsr->phase_ret = -EINVAL; - break; + if (mpsr->user_nodes) { + for (i = i_s; i < i_e; i++) { + if (mpsr->nodes[i] < 0 || + mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() || + !test_bit(mpsr->nodes[i], + mpsr->proc->vm->numa_mask)) { + mpsr->phase_ret = -EINVAL; + break; + } } } @@ -2387,7 +2398,7 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) /* PTE valid? */ if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) { - mpsr->status[i] = -ENOENT; + mpsr->status[i] = -EFAULT; mpsr->ptep[i] = NULL; continue; } @@ -2451,6 +2462,26 @@ pte_out: dkprintf("%s: phase %d done\n", __FUNCTION__, phase); ++phase; + /* + * When nodes array is NULL, move_pages doesn't move any pages, + * instead will return the node where each page + * currently resides by status array. + */ + if (!mpsr->user_nodes) { + /* get nid in parallel */ + for (i = i_s; i < i_e; i++) { + if (mpsr->status[i] < 0) { + continue; + } + mpsr->status[i] = phys_to_nid( + pte_get_phys(mpsr->ptep[i])); + } + mpsr->phase_ret = 0; + goto out; // return node information + } + + /* Processing of move pages */ + if (cpu_index == 0) { /* Allocate new pages on target NUMA nodes */ for (i = 0; i < count; i++) { @@ -2463,8 +2494,11 @@ pte_out: /* TODO: store pgalign info in an array as well? */ if (mpsr->nr_pages[i] > 1) { - if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE) - pgalign = PTL2_SHIFT - PTL1_SHIFT; + int nr_pages; + + for (pgalign = 0, nr_pages = mpsr->nr_pages[i]; + nr_pages != 1; pgalign++, nr_pages >>= 1) { + } } dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i], diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index fe207f47..fe5a0dc5 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c @@ -32,6 +32,7 @@ #include #include #include +#include #include void terminate_mcexec(int, int); @@ -2302,8 +2303,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } memset(mpsr->ptep, 0, sizeof(pte_t) * count); memset(mpsr->status, 0, sizeof(int) * count); memset(mpsr->nr_pages, 0, sizeof(int) * count); @@ -2321,8 +2324,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) case 0: memcpy(mpsr->virt_addr, mpsr->user_virt_addr, sizeof(void *) * count); - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } mpsr->nodes_ready = 1; break; case 1: @@ -2344,8 +2349,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) sizeof(void *) * count); break; case 1: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } mpsr->nodes_ready = 1; break; case 2: @@ -2374,8 +2381,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) sizeof(void *) * (count / 2)); break; case 2: - memcpy(mpsr->nodes, mpsr->user_nodes, - sizeof(int) * count); + if (mpsr->user_nodes) { + memcpy(mpsr->nodes, mpsr->user_nodes, + sizeof(int) * count); + } mpsr->nodes_ready = 1; break; case 3: @@ -2401,13 +2410,15 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg) } /* NUMA verification in parallel */ - for (i = i_s; i < i_e; i++) { - if (mpsr->nodes[i] < 0 || - mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() || - !test_bit(mpsr->nodes[i], - mpsr->proc->vm->numa_mask)) { - mpsr->phase_ret = -EINVAL; - break; + if (mpsr->user_nodes) { + for (i = i_s; i < i_e; i++) { + if (mpsr->nodes[i] < 0 || + mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() || + !test_bit(mpsr->nodes[i], + mpsr->proc->vm->numa_mask)) { + mpsr->phase_ret = -EINVAL; + break; + } } } @@ -2503,6 +2514,26 @@ pte_out: dkprintf("%s: phase %d done\n", __FUNCTION__, phase); ++phase; + /* + * When nodes array is NULL, move_pages doesn't move any pages, + * instead will return the node where each page + * currently resides by status array. + */ + if (!mpsr->user_nodes) { + /* get nid in parallel */ + for (i = i_s; i < i_e; i++) { + if (mpsr->status[i] < 0) { + continue; + } + mpsr->status[i] = phys_to_nid( + pte_get_phys(mpsr->ptep[i])); + } + mpsr->phase_ret = 0; + goto out; // return node information + } + + /* Processing of move pages */ + if (cpu_index == 0) { /* Allocate new pages on target NUMA nodes */ for (i = 0; i < count; i++) { diff --git a/kernel/syscall.c b/kernel/syscall.c index 5b708e61..4831478c 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -10250,7 +10250,7 @@ SYSCALL_DECLARE(move_pages) struct move_pages_smp_req mpsr; struct process_vm *vm = cpu_local_var(current)->vm; - int ret = 0; + int i, ret = 0; unsigned long t_s, t_e; @@ -10260,18 +10260,20 @@ SYSCALL_DECLARE(move_pages) if (pid) { kprintf("%s: ERROR: only self (pid == 0)" " is supported\n", __FUNCTION__); - return -EINVAL; + ret = -EINVAL; + goto out; } - switch (flags) { - case MPOL_MF_MOVE_ALL: + /* Check flags */ + if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) { + ret = -EINVAL; + goto out; + } + if (flags & MPOL_MF_MOVE_ALL) { kprintf("%s: ERROR: MPOL_MF_MOVE_ALL" " not supported\n", __func__); - return -EINVAL; - case MPOL_MF_MOVE: - break; - default: - return -EINVAL; + ret = -EINVAL; + goto out; } /* Allocate kernel arrays */ @@ -10319,7 +10321,7 @@ t_e = rdtsc(); kprintf("%s: init malloc: %lu \n", __FUNCTION__, t_e - t_s); t_s goto dealloc_out; } - if (verify_process_vm(cpu_local_var(current)->vm, + if (user_nodes && verify_process_vm(cpu_local_var(current)->vm, user_nodes, sizeof(int) * count)) { ret = -EFAULT; goto dealloc_out; @@ -10330,6 +10332,18 @@ t_e = rdtsc(); kprintf("%s: init malloc: %lu \n", __FUNCTION__, t_e - t_s); t_s ret = -EFAULT; goto dealloc_out; } + + /* Check node ID */ + if (user_nodes) { + copy_from_user(nodes, user_nodes, sizeof(int) * count); + for (i = 0; i < count; i++) { + if (nodes[i] < 0 || nodes[i] >= ihk_mc_get_nr_numa_nodes()) { + ret = -ENODEV; + goto dealloc_out; + } + } + } + t_e = rdtsc(); kprintf("%s: init verify: %lu \n", __FUNCTION__, t_e - t_s); t_s = t_e; #if 0 @@ -10422,6 +10436,7 @@ dealloc_out: kfree(ptep); kfree(dst_phys); +out: return ret; } diff --git a/test/issues/1523/C1523.sh b/test/issues/1523/C1523.sh new file mode 100755 index 00000000..343903f9 --- /dev/null +++ b/test/issues/1523/C1523.sh @@ -0,0 +1,30 @@ +#/bin/sh + +USELTP=1 +USEOSTEST=0 + +MCREBOOT=0 +. ../../common.sh + +BOOTPARAM="${BOOTPARAM} -e anon_on_demand" +mcreboot + +issue="1523" +tid=01 + +for tp in move_pages01 move_pages02 move_pages04 move_pages06 move_pages09 move_pages10 +do + tname=`printf "C${issue}T%02d" ${tid}` + echo "*** ${tname} start *******************************" + sudo $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt + ok=`grep PASS $tp.txt | wc -l` + ng=`grep FAIL $tp.txt | wc -l` + if [ $ng = 0 ]; then + echo "*** ${tname} PASSED ($ok)" + else + echo "*** ${tname} FAILED (ok=$ok ng=$ng)" + fi + let tid++ + echo "" +done + diff --git a/test/issues/1523/Makefile b/test/issues/1523/Makefile new file mode 100644 index 00000000..bbf53a9f --- /dev/null +++ b/test/issues/1523/Makefile @@ -0,0 +1,11 @@ +CFLAGS=-g +LDFLAGS= + +TARGET= + +all: $(TARGET) + +test: all + ./C1523.sh +clean: + rm -f $(TARGET) *.o *.txt diff --git a/test/issues/1523/README b/test/issues/1523/README new file mode 100644 index 00000000..d84df554 --- /dev/null +++ b/test/issues/1523/README @@ -0,0 +1,21 @@ +【Issue#1523 動作確認】 +□ テスト内容 +1. 以下のLTPがPASSすることを確認する + - move_pages01 + - move_pages02 + - move_pages04 + - move_pages06 + - move_pages09 + - move_pages10 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +x86_64_result.log aarch64_result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1523/aarch64_result.log b/test/issues/1523/aarch64_result.log new file mode 100644 index 00000000..82efa89f --- /dev/null +++ b/test/issues/1523/aarch64_result.log @@ -0,0 +1,25 @@ +mcstop+release.sh ... done +mcreboot.sh -c 37-43,49-55 -m 2G@2,2G@3 -r 37-43:36+49-55:48 -O -e anon_on_demand ... done +*** C1523T01 start ******************************* +move_pages01 1 TPASS : pages are present in expected nodes +*** C1523T01 PASSED (1) + +*** C1523T02 start ******************************* +move_pages02 1 TPASS : pages are present in expected nodes +*** C1523T02 PASSED (1) + +*** C1523T03 start ******************************* +move_pages04 1 TPASS : status[1] has expected value +*** C1523T03 PASSED (1) + +*** C1523T04 start ******************************* +move_pages06 1 TPASS : move_pages failed with ENODEV as expected +*** C1523T04 PASSED (1) + +*** C1523T05 start ******************************* +move_pages09 1 TPASS : move_pages succeeded +*** C1523T05 PASSED (1) + +*** C1523T06 start ******************************* +move_pages10 1 TPASS : move_pages failed with EINVAL as expected +*** C1523T06 PASSED (1) diff --git a/test/issues/1523/x86_64_result.log b/test/issues/1523/x86_64_result.log new file mode 100644 index 00000000..4d07bd47 --- /dev/null +++ b/test/issues/1523/x86_64_result.log @@ -0,0 +1,25 @@ +mcstop+release.sh ... done +mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 -O -e anon_on_demand ... done +*** C1523T01 start ******************************* +move_pages01 1 TPASS : pages are present in expected nodes +*** C1523T01 PASSED (1) + +*** C1523T02 start ******************************* +move_pages02 1 TPASS : pages are present in expected nodes +*** C1523T02 PASSED (1) + +*** C1523T03 start ******************************* +move_pages04 1 TPASS : status[1] has expected value +*** C1523T03 PASSED (1) + +*** C1523T04 start ******************************* +move_pages06 1 TPASS : move_pages failed with ENODEV as expected +*** C1523T04 PASSED (1) + +*** C1523T05 start ******************************* +move_pages09 1 TPASS : move_pages succeeded +*** C1523T05 PASSED (1) + +*** C1523T06 start ******************************* +move_pages10 1 TPASS : move_pages failed with EINVAL as expected +*** C1523T06 PASSED (1)