diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 70688b6f..8c95e0b3 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -298,6 +298,7 @@ struct mcctrl_cpu_topology { //struct mcctrl_usrdata *udp; struct ihk_cpu_topology *saved; int mckernel_cpu_id; + int mckernel_core_id; cpumask_t core_siblings; cpumask_t thread_siblings; diff --git a/executer/kernel/mcctrl/sysfs_files.c b/executer/kernel/mcctrl/sysfs_files.c index cf7024c8..e03a27dc 100644 --- a/executer/kernel/mcctrl/sysfs_files.c +++ b/executer/kernel/mcctrl/sysfs_files.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -23,6 +24,14 @@ #define wprintk(...) do { if (1) printk(KERN_WARNING __VA_ARGS__); } while (0) #define eprintk(...) do { if (1) printk(KERN_ERR __VA_ARGS__); } while (0) +struct physical_core_id { + int linux_core_id; + int mckernel_core_id; + struct hlist_node next; +}; + +DEFINE_HASHTABLE(physical_core_id_map, 10); + static ssize_t show_int(struct sysfsm_ops *ops, void *instance, void *buf, size_t size) { @@ -188,6 +197,9 @@ static void free_cpu_topology(struct mcctrl_usrdata *udp) void free_topology_info(ihk_os_t os) { struct mcctrl_usrdata *udp = ihk_host_os_get_usrdata(os); + int bkt; + struct hlist_node *tmp; + struct physical_core_id *cur; if (!udp) { pr_warn("%s: warning: mcctrl_usrdata not found\n", __func__); @@ -197,6 +209,11 @@ void free_topology_info(ihk_os_t os) free_node_topology(udp); free_cpu_topology(udp); + hash_for_each_safe(physical_core_id_map, bkt, tmp, cur, next) { + hash_del(&cur->next); + kfree(cur); + } + return; } /* free_topology_info() */ @@ -348,6 +365,11 @@ static struct mcctrl_cpu_topology *get_one_cpu_topology(struct mcctrl_usrdata *u struct mcctrl_cpu_topology *topology = NULL; struct cache_topology *cache; struct ihk_cache_topology *saved_cache; + int linux_core_id; + int mckernel_core_id; + struct physical_core_id *entry; + struct physical_core_id *cur; + static int nr_mckernel_core; dprintk("get_one_cpu_topology(%p,%d)\n", udp, index); topology = kmalloc(sizeof(*topology), GFP_KERNEL); @@ -391,6 +413,22 @@ static struct mcctrl_cpu_topology *get_one_cpu_topology(struct mcctrl_usrdata *u goto out; } + linux_core_id = topology->saved->core_id; + mckernel_core_id = -1; + hash_for_each_possible(physical_core_id_map, cur, next, linux_core_id) { + mckernel_core_id = cur->mckernel_core_id; + break; + } + if (mckernel_core_id < 0) { + mckernel_core_id = nr_mckernel_core++; + entry = kmalloc(sizeof(struct physical_core_id), GFP_KERNEL); + entry->linux_core_id = linux_core_id; + entry->mckernel_core_id = mckernel_core_id; + hash_add(physical_core_id_map, + &entry->next, entry->linux_core_id); + } + topology->mckernel_core_id = mckernel_core_id; + list_for_each_entry(saved_cache, &topology->saved->cache_topology_list, chain) { cache = get_cache_topology(udp, topology, saved_cache); @@ -512,7 +550,7 @@ static void setup_cpu_sysfs_files(struct mcctrl_usrdata *udp, "%s/cpu%d/topology/physical_package_id", prefix, cpu_number); sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_d32, - &cpu->saved->core_id, 0444, + &cpu->mckernel_core_id, 0444, "%s/cpu%d/topology/core_id", prefix, cpu_number); diff --git a/test/issues/1439/C1439.py b/test/issues/1439/C1439.py new file mode 100644 index 00000000..89e064a6 --- /dev/null +++ b/test/issues/1439/C1439.py @@ -0,0 +1,92 @@ +# +# Test script for issue #1439 +# + +import os +import sys +import subprocess + +mckdir = os.getenv('MCK_DIR') +mckbin = mckdir + '/bin' +mcksbin = mckdir + '/sbin' +mcreboot = mcksbin + '/mcreboot.sh' +mcstop = mcksbin + '/mcstop+release.sh' +mcexec = mckbin + '/mcexec' + + +def get_command_result(cmd): + results = subprocess.Popen( + cmd, stdout=subprocess.PIPE, + shell=True).stdout.readlines() + return [str(x).rstrip("\n") for x in results] + +def enumerate_cpu(cpu_list): + allcpus = [] + for ranged_cpu in cpu_list.split(','): + try: + cpu_begin, cpu_end = ranged_cpu.split('-') + except ValueError: + cpu_begin = cpu_end = ranged_cpu + for i in range(int(cpu_begin), int(cpu_end) + 1): + allcpus.append(i) + allcpus.sort() + return allcpus + +def bind_cpu_core(catcmd, allcpus): + cpucores = {} + for cpu in allcpus: + sysfile = '/sys/devices/system/cpu/cpu%d/topology/core_id' % cpu + core_id = get_command_result(catcmd + ' ' + sysfile)[0] + cpucores[cpu] = int(core_id) + return cpucores + +def compare_cores(linuxcpucores, mckernelcpucores): + linuxcpus = linuxcpucores.keys() + linuxcpus.sort() + linuxcores = [] + for linuxcpu in linuxcpus: + linuxcores.append(linuxcpucores[linuxcpu]) + mckernelcpus = mckernelcpucores.keys() + mckernelcpus.sort() + mckernelcores = [] + for mckernelcpu in mckernelcpus: + mckernelcores.append(mckernelcpucores[mckernelcpu]) + coremap = {} + seq = 0 + for i in range(len(linuxcores)): + linuxcore = linuxcores[i] + mckernelcore = mckernelcores[i] + if linuxcore in coremap: + if mckernelcore != coremap[linuxcore]: + print 'FAIL' + quit() + else: + if seq != mckernelcore: + print 'FAIL' + quit() + seq = seq + 1 + coremap[linuxcore] = mckernelcore + +def main(): + argvs = sys.argv + argc = len(argvs) + if (argc != 2): + print 'Usage: python %s ' % argvs[0] + quit() + print 'cpu_list = %s' % argvs[1] + cpulist = argvs[1] + linuxcpus = enumerate_cpu(cpulist) + linuxcpucores = bind_cpu_core('cat', linuxcpus) + print 'linux: ' + print linuxcpucores + get_command_result('sudo ' + mcreboot + ' -c ' + cpulist) + mckernelcpus = [i for i in range(0, len(linuxcpus))] + mckernelcpucores = bind_cpu_core(mcexec + ' cat', mckernelcpus) + print 'mckernel: ' + print mckernelcpucores + get_command_result('sudo ' + mcstop) + compare_cores(linuxcpucores, mckernelcpucores) + print 'SUCCESS' + +if __name__ == '__main__': + main() diff --git a/test/issues/1439/Makefile b/test/issues/1439/Makefile new file mode 100644 index 00000000..b60eb2f3 --- /dev/null +++ b/test/issues/1439/Makefile @@ -0,0 +1,16 @@ + +all:: show-omp-cpu-affinity + +show-omp-cpu-affinity:: show-omp-cpu-affinity.c + fcc -Nclang -fopenmp -Nlibomp -o $@ $< + +test:: test-core, test-omp + +test-core:: + . ${HOME}/.mck_test_config && export MCK_DIR && python C1439.py 1-4,30-33,56-61 + +test-omp:: show-omp-cpu-affinity + ./test-affinity.sh + +clean:: + rm show-omp-cpu-affinity diff --git a/test/issues/1439/README b/test/issues/1439/README new file mode 100644 index 00000000..710afc84 --- /dev/null +++ b/test/issues/1439/README @@ -0,0 +1,50 @@ +□ テスト内容 + +/sys/devices/system/cpu/cpuX/topology/core_idについて、以下を確かめる。 + +(A) 物理コア番号の振り方について以下を確かめる + 1) 物理コアの第1の論理コアのcore_idはCPU番号(=cpu_id)と一致する + 2) 論理コアのcore_idは、それが属する物理コアの第1の論理コアのcore_idと一致する + +(B) McKernelがcpu_idをrenumberすることによる悪影響がないか確認する + 1) 富士通OpenMPで、OMP_PROC_BINDの + close(物理コアをラウンドロビンで選ぶ)と + spread(CPUトポロジ上なるべく離れるように配置する) + について、期待通りのバインディングになることを確認する + なお、apolloでのテストでは、OMP_NUM_THREADSは2から物理コア数*2の間で、 + OMP_NUM_THREADSと物理コア数のうち、大きいほうが小さい方で + 割り切れる関係にあるものに設定した。 + +□ 実行手順 + +(1) $HOME/.mck_test_configを、MCK_DIRがMcKernelのインストール先を指すように編集する + +(2) apolloログインノードにおいて一般ユーザで以下のコマンドを実行し、 + 富士通コンパイラでテストに使うOpenMPアプリケーションバイナリを作成する。 +  ソースコードはmckernelユーザでcheckoutしていると思われるが、その場合、 + 本ディレクトリを、その一般ユーザが書き込めるようにwrite permissionを + つけておく必要がある。 + + $ make + +(3) (2)で作られたバイナリを(mckernelユーザの)apolloの計算ノードにて + 動かすために、富士通コンパイラのDLLを当該ユーザが読める場所に + コピーして、その場所にLD_LIBRARY_PATHを通す。 + コピー元のディレクトリは富士通コンパイラ0.36では以下のディレクトリである。 + + /usr/local/FJSVxtclang/fujitsu_compilers_sve_own_20191226/sve_own/lib64 + +(4) apolloの計算ノードにおいてmckernelユーザで以下のコマンドを実行し、 + (A)(B)のテストを実行する。 + + $ make test-core # .... (A) + $ make test-omp # .... (B) + + +□ 確認方法 + +(A) (B) いずれにおいてもSUCCESSが出力され、FAILが出力されていないこと。 + +サンプル出力は以下のとおり + (A) aarch64_result_core.log + (B) aarch64_result_omp.log diff --git a/test/issues/1439/aarch64_result_core.log b/test/issues/1439/aarch64_result_core.log new file mode 100644 index 00000000..8856f717 --- /dev/null +++ b/test/issues/1439/aarch64_result_core.log @@ -0,0 +1,8 @@ +$ make test +python C1439.py 1-4,30-33,56-61 +cpu_list = 1-4,30-33,56-61 +linux: +{32: 4, 1: 1, 2: 2, 3: 3, 4: 4, 33: 5, 56: 256, 57: 257, 58: 258, 59: 259, 60: 260, 61: 261, 30: 2, 31: 3} +mckernel: +{0: 0, 1: 1, 2: 2, 3: 3, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10} +SUCCESS diff --git a/test/issues/1439/aarch64_result_omp.log b/test/issues/1439/aarch64_result_omp.log new file mode 100644 index 00000000..3af9a15d --- /dev/null +++ b/test/issues/1439/aarch64_result_omp.log @@ -0,0 +1,108 @@ +./test-affinity.sh +sudo /home/toshi/install/dev2-mckernel/sbin/mcreboot.sh -m 1G@0,1G@1 -c 4-27,32-55 -O +/home/toshi/install/dev2-mckernel/bin/mcexec python -u test-affinity.py +CPUS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47] +PLACES: [[0, 24], [1, 25], [2, 26], [3, 27], [4, 28], [5, 29], [6, 30], [7, 31], [8, 32], [9, 33], [10, 34], [11, 35], [12, 36], [13, 37], [14, 38], [15, 39], [16, 40], [17, 41], [18, 42], [19, 43], [20, 44], [21, 45], [22, 46], [23, 47]] +MAP_CPU_TO_PLACE {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27], 4: [4, 28], 5: [5, 29], 6: [6, 30], 7: [7, 31], 8: [8, 32], 9: [9, 33], 10: [10, 34], 11: [11, 35], 12: [12, 36], 13: [13, 37], 14: [14, 38], 15: [15, 39], 16: [16, 40], 17: [17, 41], 18: [18, 42], 19: [19, 43], 20: [20, 44], 21: [21, 45], 22: [22, 46], 23: [23, 47], 24: [0, 24], 25: [1, 25], 26: [2, 26], 27: [3, 27], 28: [4, 28], 29: [5, 29], 30: [6, 30], 31: [7, 31], 32: [8, 32], 33: [9, 33], 34: [10, 34], 35: [11, 35], 36: [12, 36], 37: [13, 37], 38: [14, 38], 39: [15, 39], 40: [16, 40], 41: [17, 41], 42: [18, 42], 43: [19, 43], 44: [20, 44], 45: [21, 45], 46: [22, 46], 47: [23, 47]} + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 2, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [12, 36]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 12} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 2, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25], 2: [2, 26]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 3, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [8, 32], 2: [16, 40]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 8, 2: 16} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 3, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2, 3: 3} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 4, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [6, 30], 2: [12, 36], 3: [18, 42]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 6, 2: 12, 3: 18} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 4, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27], 4: [4, 28], 5: [5, 29]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 6, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [4, 28], 2: [8, 32], 3: [12, 36], 4: [16, 40], 5: [20, 44]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 4, 2: 8, 3: 12, 4: 16, 5: 20} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 6, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27], 4: [4, 28], 5: [5, 29], 6: [6, 30], 7: [7, 31]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 8, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [3, 27], 2: [6, 30], 3: [9, 33], 4: [12, 36], 5: [15, 39], 6: [18, 42], 7: [21, 45]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 3, 2: 6, 3: 9, 4: 12, 5: 15, 6: 18, 7: 21} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 8, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27], 4: [4, 28], 5: [5, 29], 6: [6, 30], 7: [7, 31], 8: [8, 32], 9: [9, 33], 10: [10, 34], 11: [11, 35]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 12, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [2, 26], 2: [4, 28], 3: [6, 30], 4: [8, 32], 5: [10, 34], 6: [12, 36], 7: [14, 38], 8: [16, 40], 9: [18, 42], 10: [20, 44], 11: [22, 46]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 2, 2: 4, 3: 6, 4: 8, 5: 10, 6: 12, 7: 14, 8: 16, 9: 18, 10: 20, 11: 22} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 12, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27], 4: [4, 28], 5: [5, 29], 6: [6, 30], 7: [7, 31], 8: [8, 32], 9: [9, 33], 10: [10, 34], 11: [11, 35], 12: [12, 36], 13: [13, 37], 14: [14, 38], 15: [15, 39], 16: [16, 40], 17: [17, 41], 18: [18, 42], 19: [19, 43], 20: [20, 44], 21: [21, 45], 22: [22, 46], 23: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 24, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [1, 25], 2: [2, 26], 3: [3, 27], 4: [4, 28], 5: [5, 29], 6: [6, 30], 7: [7, 31], 8: [8, 32], 9: [9, 33], 10: [10, 34], 11: [11, 35], 12: [12, 36], 13: [13, 37], 14: [14, 38], 15: [15, 39], 16: [16, 40], 17: [17, 41], 18: [18, 42], 19: [19, 43], 20: [20, 44], 21: [21, 45], 22: [22, 46], 23: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 24, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [0, 24], 2: [1, 25], 3: [1, 25], 4: [2, 26], 5: [2, 26], 6: [3, 27], 7: [3, 27], 8: [4, 28], 9: [4, 28], 10: [5, 29], 11: [5, 29], 12: [6, 30], 13: [6, 30], 14: [7, 31], 15: [7, 31], 16: [8, 32], 17: [8, 32], 18: [9, 33], 19: [9, 33], 20: [10, 34], 21: [10, 34], 22: [11, 35], 23: [11, 35], 24: [12, 36], 25: [12, 36], 26: [13, 37], 27: [13, 37], 28: [14, 38], 29: [14, 38], 30: [15, 39], 31: [15, 39], 32: [16, 40], 33: [16, 40], 34: [17, 41], 35: [17, 41], 36: [18, 42], 37: [18, 42], 38: [19, 43], 39: [19, 43], 40: [20, 44], 41: [20, 44], 42: [21, 45], 43: [21, 45], 44: [22, 46], 45: [22, 46], 46: [23, 47], 47: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 0, 2: 1, 3: 1, 4: 2, 5: 2, 6: 3, 7: 3, 8: 4, 9: 4, 10: 5, 11: 5, 12: 6, 13: 6, 14: 7, 15: 7, 16: 8, 17: 8, 18: 9, 19: 9, 20: 10, 21: 10, 22: 11, 23: 11, 24: 12, 25: 12, 26: 13, 27: 13, 28: 14, 29: 14, 30: 15, 31: 15, 32: 16, 33: 16, 34: 17, 35: 17, 36: 18, 37: 18, 38: 19, 39: 19, 40: 20, 41: 20, 42: 21, 43: 21, 44: 22, 45: 22, 46: 23, 47: 47} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 48, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [0, 24], 2: [1, 25], 3: [1, 25], 4: [2, 26], 5: [2, 26], 6: [3, 27], 7: [3, 27], 8: [4, 28], 9: [4, 28], 10: [5, 29], 11: [5, 29], 12: [6, 30], 13: [6, 30], 14: [7, 31], 15: [7, 31], 16: [8, 32], 17: [8, 32], 18: [9, 33], 19: [9, 33], 20: [10, 34], 21: [10, 34], 22: [11, 35], 23: [11, 35], 24: [12, 36], 25: [12, 36], 26: [13, 37], 27: [13, 37], 28: [14, 38], 29: [14, 38], 30: [15, 39], 31: [15, 39], 32: [16, 40], 33: [16, 40], 34: [17, 41], 35: [17, 41], 36: [18, 42], 37: [18, 42], 38: [19, 43], 39: [19, 43], 40: [20, 44], 41: [20, 44], 42: [21, 45], 43: [21, 45], 44: [22, 46], 45: [22, 46], 46: [23, 47], 47: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 0, 2: 1, 3: 1, 4: 2, 5: 2, 6: 3, 7: 3, 8: 4, 9: 4, 10: 5, 11: 5, 12: 6, 13: 6, 14: 7, 15: 7, 16: 8, 17: 8, 18: 9, 19: 9, 20: 10, 21: 10, 22: 11, 23: 11, 24: 12, 25: 12, 26: 13, 27: 13, 28: 14, 29: 14, 30: 15, 31: 15, 32: 16, 33: 16, 34: 17, 35: 17, 36: 18, 37: 18, 38: 19, 39: 19, 40: 20, 41: 20, 42: 21, 43: 21, 44: 22, 45: 22, 46: 23, 47: 47} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 48, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [0, 24], 2: [0, 24], 3: [1, 25], 4: [1, 25], 5: [1, 25], 6: [2, 26], 7: [2, 26], 8: [2, 26], 9: [3, 27], 10: [3, 27], 11: [3, 27], 12: [4, 28], 13: [4, 28], 14: [4, 28], 15: [5, 29], 16: [5, 29], 17: [5, 29], 18: [6, 30], 19: [6, 30], 20: [6, 30], 21: [7, 31], 22: [7, 31], 23: [7, 31], 24: [8, 32], 25: [8, 32], 26: [8, 32], 27: [9, 33], 28: [9, 33], 29: [9, 33], 30: [10, 34], 31: [10, 34], 32: [10, 34], 33: [11, 35], 34: [11, 35], 35: [11, 35], 36: [12, 36], 37: [12, 36], 38: [12, 36], 39: [13, 37], 40: [13, 37], 41: [13, 37], 42: [14, 38], 43: [14, 38], 44: [14, 38], 45: [15, 39], 46: [15, 39], 47: [15, 39], 48: [16, 40], 49: [16, 40], 50: [16, 40], 51: [17, 41], 52: [17, 41], 53: [17, 41], 54: [18, 42], 55: [18, 42], 56: [18, 42], 57: [19, 43], 58: [19, 43], 59: [19, 43], 60: [20, 44], 61: [20, 44], 62: [20, 44], 63: [21, 45], 64: [21, 45], 65: [21, 45], 66: [22, 46], 67: [22, 46], 68: [22, 46], 69: [23, 47], 70: [23, 47], 71: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 3, 10: 3, 11: 3, 12: 4, 13: 4, 14: 4, 15: 5, 16: 5, 17: 5, 18: 6, 19: 6, 20: 6, 21: 7, 22: 7, 23: 7, 24: 8, 25: 8, 26: 8, 27: 9, 28: 9, 29: 9, 30: 10, 31: 10, 32: 10, 33: 11, 34: 11, 35: 35, 36: 36, 37: 12, 38: 12, 39: 13, 40: 13, 41: 13, 42: 14, 43: 14, 44: 14, 45: 15, 46: 15, 47: 15, 48: 16, 49: 16, 50: 16, 51: 17, 52: 17, 53: 17, 54: 18, 55: 18, 56: 18, 57: 19, 58: 19, 59: 19, 60: 20, 61: 20, 62: 20, 63: 21, 64: 21, 65: 21, 66: 22, 67: 22, 68: 22, 69: 23, 70: 23, 71: 23} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 72, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [0, 24], 2: [0, 24], 3: [1, 25], 4: [1, 25], 5: [1, 25], 6: [2, 26], 7: [2, 26], 8: [2, 26], 9: [3, 27], 10: [3, 27], 11: [3, 27], 12: [4, 28], 13: [4, 28], 14: [4, 28], 15: [5, 29], 16: [5, 29], 17: [5, 29], 18: [6, 30], 19: [6, 30], 20: [6, 30], 21: [7, 31], 22: [7, 31], 23: [7, 31], 24: [8, 32], 25: [8, 32], 26: [8, 32], 27: [9, 33], 28: [9, 33], 29: [9, 33], 30: [10, 34], 31: [10, 34], 32: [10, 34], 33: [11, 35], 34: [11, 35], 35: [11, 35], 36: [12, 36], 37: [12, 36], 38: [12, 36], 39: [13, 37], 40: [13, 37], 41: [13, 37], 42: [14, 38], 43: [14, 38], 44: [14, 38], 45: [15, 39], 46: [15, 39], 47: [15, 39], 48: [16, 40], 49: [16, 40], 50: [16, 40], 51: [17, 41], 52: [17, 41], 53: [17, 41], 54: [18, 42], 55: [18, 42], 56: [18, 42], 57: [19, 43], 58: [19, 43], 59: [19, 43], 60: [20, 44], 61: [20, 44], 62: [20, 44], 63: [21, 45], 64: [21, 45], 65: [21, 45], 66: [22, 46], 67: [22, 46], 68: [22, 46], 69: [23, 47], 70: [23, 47], 71: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 3, 10: 3, 11: 3, 12: 4, 13: 4, 14: 4, 15: 5, 16: 5, 17: 5, 18: 6, 19: 6, 20: 6, 21: 7, 22: 7, 23: 7, 24: 8, 25: 8, 26: 8, 27: 9, 28: 9, 29: 9, 30: 10, 31: 10, 32: 10, 33: 11, 34: 11, 35: 35, 36: 36, 37: 12, 38: 12, 39: 13, 40: 13, 41: 13, 42: 14, 43: 14, 44: 14, 45: 15, 46: 15, 47: 15, 48: 16, 49: 16, 50: 16, 51: 17, 52: 17, 53: 17, 54: 18, 55: 18, 56: 18, 57: 19, 58: 19, 59: 19, 60: 20, 61: 20, 62: 20, 63: 21, 64: 21, 65: 21, 66: 22, 67: 22, 68: 22, 69: 23, 70: 23, 71: 23} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 72, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(close): {0: [0, 24], 1: [0, 24], 2: [0, 24], 3: [0, 24], 4: [1, 25], 5: [1, 25], 6: [1, 25], 7: [1, 25], 8: [2, 26], 9: [2, 26], 10: [2, 26], 11: [2, 26], 12: [3, 27], 13: [3, 27], 14: [3, 27], 15: [3, 27], 16: [4, 28], 17: [4, 28], 18: [4, 28], 19: [4, 28], 20: [5, 29], 21: [5, 29], 22: [5, 29], 23: [5, 29], 24: [6, 30], 25: [6, 30], 26: [6, 30], 27: [6, 30], 28: [7, 31], 29: [7, 31], 30: [7, 31], 31: [7, 31], 32: [8, 32], 33: [8, 32], 34: [8, 32], 35: [8, 32], 36: [9, 33], 37: [9, 33], 38: [9, 33], 39: [9, 33], 40: [10, 34], 41: [10, 34], 42: [10, 34], 43: [10, 34], 44: [11, 35], 45: [11, 35], 46: [11, 35], 47: [11, 35], 48: [12, 36], 49: [12, 36], 50: [12, 36], 51: [12, 36], 52: [13, 37], 53: [13, 37], 54: [13, 37], 55: [13, 37], 56: [14, 38], 57: [14, 38], 58: [14, 38], 59: [14, 38], 60: [15, 39], 61: [15, 39], 62: [15, 39], 63: [15, 39], 64: [16, 40], 65: [16, 40], 66: [16, 40], 67: [16, 40], 68: [17, 41], 69: [17, 41], 70: [17, 41], 71: [17, 41], 72: [18, 42], 73: [18, 42], 74: [18, 42], 75: [18, 42], 76: [19, 43], 77: [19, 43], 78: [19, 43], 79: [19, 43], 80: [20, 44], 81: [20, 44], 82: [20, 44], 83: [20, 44], 84: [21, 45], 85: [21, 45], 86: [21, 45], 87: [21, 45], 88: [22, 46], 89: [22, 46], 90: [22, 46], 91: [22, 46], 92: [23, 47], 93: [23, 47], 94: [23, 47], 95: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1, 6: 1, 7: 1, 8: 2, 9: 2, 10: 2, 11: 2, 12: 3, 13: 3, 14: 3, 15: 3, 16: 4, 17: 4, 18: 4, 19: 4, 20: 5, 21: 5, 22: 5, 23: 5, 24: 6, 25: 6, 26: 6, 27: 6, 28: 7, 29: 7, 30: 7, 31: 31, 32: 32, 33: 8, 34: 8, 35: 8, 36: 9, 37: 9, 38: 9, 39: 9, 40: 10, 41: 10, 42: 10, 43: 10, 44: 11, 45: 11, 46: 11, 47: 11, 48: 12, 49: 12, 50: 12, 51: 12, 52: 13, 53: 13, 54: 13, 55: 13, 56: 14, 57: 14, 58: 14, 59: 14, 60: 15, 61: 15, 62: 15, 63: 15, 64: 16, 65: 16, 66: 16, 67: 16, 68: 17, 69: 17, 70: 17, 71: 17, 72: 18, 73: 18, 74: 18, 75: 18, 76: 19, 77: 19, 78: 19, 79: 19, 80: 20, 81: 20, 82: 20, 83: 20, 84: 21, 85: 21, 86: 21, 87: 21, 88: 22, 89: 22, 90: 22, 91: 22, 92: 23, 93: 23, 94: 47, 95: 23} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: close, OMP_NUM_THREAD: 96, RESULT: SUCCESS + +./show-omp-cpu-affinity +MAP_THREAD_TO_PLACE(spread): {0: [0, 24], 1: [0, 24], 2: [0, 24], 3: [0, 24], 4: [1, 25], 5: [1, 25], 6: [1, 25], 7: [1, 25], 8: [2, 26], 9: [2, 26], 10: [2, 26], 11: [2, 26], 12: [3, 27], 13: [3, 27], 14: [3, 27], 15: [3, 27], 16: [4, 28], 17: [4, 28], 18: [4, 28], 19: [4, 28], 20: [5, 29], 21: [5, 29], 22: [5, 29], 23: [5, 29], 24: [6, 30], 25: [6, 30], 26: [6, 30], 27: [6, 30], 28: [7, 31], 29: [7, 31], 30: [7, 31], 31: [7, 31], 32: [8, 32], 33: [8, 32], 34: [8, 32], 35: [8, 32], 36: [9, 33], 37: [9, 33], 38: [9, 33], 39: [9, 33], 40: [10, 34], 41: [10, 34], 42: [10, 34], 43: [10, 34], 44: [11, 35], 45: [11, 35], 46: [11, 35], 47: [11, 35], 48: [12, 36], 49: [12, 36], 50: [12, 36], 51: [12, 36], 52: [13, 37], 53: [13, 37], 54: [13, 37], 55: [13, 37], 56: [14, 38], 57: [14, 38], 58: [14, 38], 59: [14, 38], 60: [15, 39], 61: [15, 39], 62: [15, 39], 63: [15, 39], 64: [16, 40], 65: [16, 40], 66: [16, 40], 67: [16, 40], 68: [17, 41], 69: [17, 41], 70: [17, 41], 71: [17, 41], 72: [18, 42], 73: [18, 42], 74: [18, 42], 75: [18, 42], 76: [19, 43], 77: [19, 43], 78: [19, 43], 79: [19, 43], 80: [20, 44], 81: [20, 44], 82: [20, 44], 83: [20, 44], 84: [21, 45], 85: [21, 45], 86: [21, 45], 87: [21, 45], 88: [22, 46], 89: [22, 46], 90: [22, 46], 91: [22, 46], 92: [23, 47], 93: [23, 47], 94: [23, 47], 95: [23, 47]} +RESULT_MAP_THREAD_TO_CPU: {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1, 6: 1, 7: 1, 8: 2, 9: 2, 10: 2, 11: 2, 12: 3, 13: 3, 14: 3, 15: 3, 16: 4, 17: 4, 18: 4, 19: 4, 20: 5, 21: 5, 22: 5, 23: 5, 24: 6, 25: 6, 26: 6, 27: 6, 28: 7, 29: 7, 30: 7, 31: 31, 32: 32, 33: 8, 34: 8, 35: 8, 36: 9, 37: 9, 38: 9, 39: 9, 40: 10, 41: 10, 42: 10, 43: 10, 44: 11, 45: 11, 46: 11, 47: 11, 48: 12, 49: 12, 50: 12, 51: 12, 52: 13, 53: 13, 54: 13, 55: 13, 56: 14, 57: 14, 58: 14, 59: 14, 60: 15, 61: 15, 62: 15, 63: 15, 64: 16, 65: 16, 66: 16, 67: 16, 68: 17, 69: 17, 70: 17, 71: 17, 72: 18, 73: 18, 74: 18, 75: 18, 76: 19, 77: 19, 78: 19, 79: 19, 80: 20, 81: 20, 82: 20, 83: 20, 84: 21, 85: 21, 86: 21, 87: 21, 88: 22, 89: 22, 90: 22, 91: 22, 92: 23, 93: 23, 94: 47, 95: 23} +#CPU: 48, #PLACE: 24, OMP_PLACES: cores, OMP_PROC_BIND: spread, OMP_NUM_THREAD: 96, RESULT: SUCCESS + +sudo /home/toshi/install/dev2-mckernel/sbin/mcstop+release.sh diff --git a/test/issues/1439/show-omp-cpu-affinity.c b/test/issues/1439/show-omp-cpu-affinity.c new file mode 100644 index 00000000..7c43ee98 --- /dev/null +++ b/test/issues/1439/show-omp-cpu-affinity.c @@ -0,0 +1,19 @@ +#define _GNU_SOURCE + +#include +#include +#include + +int main(int argc, char **argv) +{ +#pragma omp parallel + { + int thread_num = omp_get_thread_num(); + int cpu_num = sched_getcpu(); + + printf("Thread %d CPU %d\n", thread_num, cpu_num); + } + + return 0; +} + diff --git a/test/issues/1439/test-affinity.py b/test/issues/1439/test-affinity.py new file mode 100644 index 00000000..15d80fea --- /dev/null +++ b/test/issues/1439/test-affinity.py @@ -0,0 +1,190 @@ +import os +import sys +import subprocess +import math + +def get_command_result(cmd): + print cmd + results = subprocess.Popen( + cmd, stdout=subprocess.PIPE, + shell=True).stdout.readlines() + return [str(x).rstrip("\n") for x in results] + +def get_cpus(): + cpus = [] + online_file = open('/sys/devices/system/cpu/online', 'r') + for cpurange in online_file.readlines()[0].strip().split(','): + try: + cpurange_start, cpurange_end = cpurange.split('-') + except ValueError: + cpurange_start = cpurange_end = cpurange + for cpu in range(int(cpurange_start), int(cpurange_end) + 1): + cpus.append(cpu) + return cpus + +def get_omp_places_cores(cpus): + places = [] + map_cpu_to_place = {} + for cpu in cpus: + if cpu not in map_cpu_to_place: + siblings_file = open('/sys/devices/system/cpu/cpu{0}/topology/thread_siblings_list'.format(cpu)) + place = [] + siblings = siblings_file.readlines()[0].strip().split(',') + for sibling in siblings: + place.append(int(sibling)) + places.append(place) + for sibling in siblings: + map_cpu_to_place[int(sibling)] = place + return places, map_cpu_to_place + +def index_of_place(places, cpu): + i = 0 + for place in places: + if cpu in place: + return i + i = i + 1 + return -1 + +def index_of_subpartition(subpartition, place): + i = 0 + for placelist in subpartition: + if place in placelist: + return i + i = i + 1 + return -1 + +def get_estimated_bind(omp_proc_bind, nthreads, places): + if omp_proc_bind == 'close': + return get_estimated_bind_close(0, 0, nthreads, places) + elif omp_proc_bind == 'spread': + return get_estimated_bind_spread(0, 0, nthreads, places) + return None + +def get_estimated_bind_close(master_thread, master_cpu, nthreads, places): + map_thread_to_place = {} + nplaces = len(places) +# print 'nthreads =', nthreads +# print 'nplaces =', nplaces + if nthreads <= nplaces: + place_idx = index_of_place(places, master_cpu) +# print 'place_idx =', place_idx + for i in range(nthreads): + thread = (master_thread + i) % nthreads + map_thread_to_place[thread] = places[(place_idx + i) % nplaces] + else: + s = [0] * nplaces + for p in range(nplaces): + if nplaces - p <= nthreads % nplaces: # implementation defined + s[p] = nthreads / nplaces + 1 # ceil + else: + s[p] = nthreads / nplaces # floor +# print 's[', p, '] =', s[p] + i_begin = 0 + place_idx = index_of_place(places, master_cpu) + for p in range(nplaces): + for i in range(i_begin, i_begin + s[p]): + thread = (master_thread + i) % nthreads + map_thread_to_place[thread] = places[(place_idx + p) % nplaces] + i_begin = i_begin + s[p] + return map_thread_to_place + +def get_estimated_bind_spread(master_thread, master_cpu, nthreads, places): + map_thread_to_place = {} + nplaces = len(places) + if nthreads <= nplaces: + places_subpartition = [] + p_begin = 0 + for i in range(nthreads): + if nthreads - i <= nplaces % nthreads: # implementation defined + size_places_subpartition = nplaces / nthreads + 1 # ceil + else: + size_places_subpartition = nplaces / nthreads # floor + places_subpartition.append(places[p_begin:p_begin + size_places_subpartition]) + p_begin = p_begin + size_places_subpartition + place_idx = index_of_place(places, master_cpu) + places_subpartition_idx = index_of_subpartition(places_subpartition, places[place_idx]) + for i in range(nthreads): + thread = (master_thread + i) % nthreads + if thread == master_thread: + map_thread_to_place[thread] = places[place_idx] + else: + map_thread_to_place[thread] = places_subpartition[(places_subpartition_idx + i) % nthreads][0] + else: + threads = [] + for i in range(nthreads): + threads.append(i) + threads_subpartition = [] + i_begin = 0 + for p in range(nplaces): + if nplaces - p <= nthreads % nplaces: # implementation defined + size_threads_subpartition = nthreads / nplaces + 1 # ceil + else: + size_threads_subpartition = nthreads / nplaces # floor + threads_subpartition.append(threads[i_begin:i_begin + size_threads_subpartition]) + i_begin = i_begin + size_threads_subpartition + place_idx = index_of_place(places, master_cpu) + for p in range(nplaces): + for i in threads_subpartition[p]: + thread = (master_thread + i) % nthreads + map_thread_to_place[thread] = places[p] + return map_thread_to_place + +def run_and_get_omp_cpu_affinity(omp_proc_bind, nthreads): + os.environ['NODES'] = '1' + os.environ['PPN'] = '1' + os.environ['HWLOC_HIDE_ERRORS'] = '1' + os.environ['OMP_PLACES'] = 'cores' + os.environ['OMP_PROC_BIND'] = omp_proc_bind + os.environ['OMP_NUM_THREADS'] = str(nthreads) + command = './show-omp-cpu-affinity' + result_map_thread_to_cpu = {} + for line in get_command_result(command): + outputs = line.split(' ') + thread = outputs[1] + cpu = outputs[3] + result_map_thread_to_cpu[int(thread)] = int(cpu) + return result_map_thread_to_cpu + +def compare_result(nthreads, map_thread_to_place, result_map_thread_to_cpu): + try: + for thread in range(nthreads): + place = map_thread_to_place[thread] + if result_map_thread_to_cpu[thread] not in place: + return False + return True + except KeyError: + return False + +def test_cpu_affinity(cpus, omp_proc_bind, nthreads, places): + map_thread_to_place = get_estimated_bind(omp_proc_bind, nthreads, places) + result_map_thread_to_cpu = run_and_get_omp_cpu_affinity(omp_proc_bind, nthreads) + if compare_result(nthreads, map_thread_to_place, result_map_thread_to_cpu): + result = 'SUCCESS' + else: + result = 'FAIL' + print "MAP_THREAD_TO_PLACE({0}): {1}".format(omp_proc_bind, map_thread_to_place) + print "RESULT_MAP_THREAD_TO_CPU: {0}".format(result_map_thread_to_cpu) + print "#CPU: {}, #PLACE: {}, OMP_PLACES: cores, OMP_PROC_BIND: {}, OMP_NUM_THREAD: {}, RESULT: {}".format(len(cpus), len(places), omp_proc_bind, nthreads, result) + +def main(): + cpus = get_cpus() + print 'CPUS:', cpus + places, map_cpu_to_place = get_omp_places_cores(cpus) + print 'PLACES:', places + print 'MAP_CPU_TO_PLACE', map_cpu_to_place + print + + nplaces = len(places) + for nthreads in range(2, nplaces * 4 + 1): + if nthreads < nplaces and nplaces % nthreads > 0: + continue + if nthreads >= nplaces and nthreads % nplaces > 0: + continue + test_cpu_affinity(cpus, 'close', nthreads, places) + print + test_cpu_affinity(cpus, 'spread', nthreads, places) + print + +if __name__ == '__main__': + main() + diff --git a/test/issues/1439/test-affinity.sh b/test/issues/1439/test-affinity.sh new file mode 100755 index 00000000..6ac28b2a --- /dev/null +++ b/test/issues/1439/test-affinity.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +. ${HOME}/.mck_test_config +export MCK_DIR +echo sudo ${MCK_DIR}/sbin/mcreboot.sh -m 1G@0,1G@1 -c 4-27,32-55 -O +sudo ${MCK_DIR}/sbin/mcreboot.sh -m 1G@0,1G@1 -c 4-27,32-55 -O +#sudo ${MCK_DIR}/sbin/mcreboot.sh -m 1G@0,1G@1 -c 4-27,32-55,60-83,88-111 -r 4-7,32-35:0+8-11,36-39:1+12-15,40-43:2+16-19,44-47:3+20-23,48-51:28+24-27,52-55:29+60-63,88-91:56+64-67,92-95:57+68-71,96-99:58+72-75,100-103:59+76-79,104-107:84+80-83,108-111:85 -O +echo ${MCK_DIR}/bin/mcexec python -u test-affinity.py +${MCK_DIR}/bin/mcexec python -u test-affinity.py +echo sudo ${MCK_DIR}/sbin/mcstop+release.sh +sudo ${MCK_DIR}/sbin/mcstop+release.sh + +