Add qlmpi and swap to mckernel (This is rebase commit for merging to development)

This commit is contained in:
Yutaka Ishikawa
2017-07-23 21:19:15 +09:00
committed by Ken Sato
parent 74f15783d2
commit 236a072311
61 changed files with 6638 additions and 24 deletions

127
test/qlmpi/dump-pages.c Normal file
View File

@@ -0,0 +1,127 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include "swapfmt.h"
struct swap_header header;
struct swap_areainfo *meminfo, *lckinfo;
void
show(unsigned *data, int cnt)
{
printf("\t");
while (--cnt) {
printf("%08lx ", *data++);
}
printf("\n");
}
unsigned long
convhex(char *cp)
{
unsigned long val = 0;
while (*cp != '\n' && *cp != 0) {
if (isdigit(*cp)) {
val = (val<<4) + *cp - '0';
} else if (isupper(*cp) && isxdigit(*cp)) {
val = (val<<4) + *cp - 'A' + 10;
} else if (isxdigit(*cp)) {
val = (val<<4) + *cp - 'a' + 10;
} else {
break;
}
cp++;
}
return val;
}
ssize_t
findpos(unsigned long addr)
{
int i;
ssize_t pos = 0;
for (i = 0; i < header.count_sarea; i++) {
if (addr >= meminfo[i].start && addr < meminfo[i].end) {
pos = meminfo[i].pos;
pos += addr - meminfo[i].start;
}
}
return pos;
}
int
main(int argc, char **argv)
{
FILE *fp;
char *fname, *cp;
int interractive = 0;
int i;
if (argc >= 2) {
fname = argv[1];
if (argc >= 3) interractive = 1;
} else {
fname = "/tmp/pages";
}
if ((fp = fopen(fname, "r")) == 0) {
fprintf(stderr, "Cannot open file: %s\n", fname);
exit(-1);
}
fread(&header, sizeof(header), 1, fp);
printf("magic : %s\n", header.magic);
printf("version : %d\n", header.version);
printf("swap area count : %d\n", header.count_sarea);
printf("mlock area count: %d\n", header.count_marea);
printf("SWAP:\n");
printf("\t start end : file position (flags)\n");
meminfo = malloc(sizeof(struct swap_areainfo)* header.count_sarea);
lckinfo = malloc(sizeof(struct swap_areainfo)* header.count_marea);
fread(meminfo, sizeof(struct swap_areainfo), header.count_sarea, fp);
fread(lckinfo, sizeof(struct swap_areainfo), header.count_marea, fp);
for (i = 0; i < header.count_sarea; i++) {
printf("\t%016lx -- %016lx : %010lx (%lx)\n",
meminfo[i].start, meminfo[i].end, meminfo[i].pos, meminfo[i].flag);
}
printf("MLOCK:\n");
printf("\t start end : physical address (flags)\n");
for (i = 0; i < header.count_marea; i++) {
printf("\t%016lx -- %016lx : %010lx (%lx)\n",
lckinfo[i].start, lckinfo[i].end, lckinfo[i].pos, lckinfo[i].flag);
}
if (!interractive) goto ending;
do {
char buf1[128], buf2[128], data[8*8 + 1];
char cmd;;
ssize_t sz;
int cc;
unsigned long addr;
ssize_t fpos;
fprintf(stdout, "> "); fflush(stdout);
cp = fgets(buf1, 128, stdin);
if (cp == NULL) break;
cc = sscanf(buf1, "%c %s", &cmd, buf2);
if (cc != 2) continue;
addr = convhex(buf2);
fpos = findpos(addr);
if (fpos == 0) continue;
printf("%lx (fpos(%lx)):\n", addr, fpos);
fseek(fp, fpos, SEEK_SET);
if ((sz = fread(&data, 8*8, 1, fp)) != 1) goto err;
if (cmd == 's') {
data[8*8] = 0;
printf("\t%s", data);
} else {
show((unsigned*) data, 8);
}
} while (cp != NULL);
err:
ending:
fclose(fp);
return 0;
}

78
test/qlmpi/qlmpi_sample.c Normal file
View File

@@ -0,0 +1,78 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#define BUF_SIZE (16*1024)
#include <qlmpilib.h>
int data[1024*1024];
char sym2[1024*1024] = { 10, 20, 30, 0 };
char *sym1 = "aaaaaa";
char buffer[BUF_SIZE];
char *ptr1, *ptr2;
int
swapout(char *fname, void *buf, size_t sz, int flag)
{
int rc;
rc = syscall(801, fname, buf, sz, flag);
return rc;
}
int
linux_mlock(const void *addr, size_t len)
{
int rc;
rc = syscall(802, addr, len);
return rc;
}
int
main(int argc, char **argv)
{
int rc;
int i;
MPI_Init(&argc, &argv);
ql_loop:
printf("***** Arguments Info ****************\n");
printf(" argc: %d\n", argc);
for (i = 0; i < argc; i++) {
printf(" argv[%d]: %s\n", i, argv[i]);
}
printf("QL_SUCCESS:%d\n", QL_SUCCESS);
printf("************************************\n\n");
printf("&data = %p\n", data);
printf("&sym1 = %p\n", &sym1);
printf("&sym2 = %p\n", sym2);
printf("&rc = %p\n", &rc);
ptr1 = malloc(1024);
ptr2 = malloc(1024*1024);
printf("ptr1 = %p\n", ptr1);
printf("ptr1 = %p\n", ptr2);
/*
* testing mlock in mckernel side
*/
rc = mlock(data, 16*1024);
printf("McKernel mlock returns: %d\n", rc);
/*
* testing mlock in linux side
*/
sprintf((char*) data, "hello\n");
rc = linux_mlock(data, 16*1024);
printf("linux_mlock returns: %d\n", rc);
rc = ql_client(&argc, &argv);
printf("ql_client returns: %d\n", rc);
if (rc == QL_CONTINUE) {
goto ql_loop;
}
MPI_Finalize();
printf("qlmpi_sample finished!!\n");
return 0;
}

View File

@@ -0,0 +1,73 @@
c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!!
include 'mpif.h'
integer dsize
parameter(dsize=536870912)
character val*10
integer ival
integer ierr
integer i
integer*4 dat(dsize)
common dat
integer rank
integer size
integer st(MPI_STATUS_SIZE)
call MPI_INIT(ierr)
1000 continue
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
c size check
if(size.ne.2)then
if(rank.eq.0)then
print*,'bad MPI size'
endif
call MPI_FINALIZE(ierr)
stop 1
endif
c read argument
iargs = iargc()
if(iargs.ne.1)then
print *,'bad argument'
call MPI_FINALIZE(ierr)
stop 1
endif
call getarg(1, val)
read(val, '(i10)')ival
print *,'val=',ival
c test
if(rank.eq.0)then
do 10 i=1, dsize
dat(i) = -1
10 continue
print *,'r1 val=',ival
call MPI_RECV(dat, dsize, MPI_INTEGER4, 1, 0, MPI_COMM_WORLD,
c st, ierr)
print *,'r2 val=',ival
do 20 i=1, dsize
if(dat(i).ne.ival)then
print *,'*** bad value idx=',i,', dat=',dat(i),
c ' , val=',ival
goto 100
endif
20 continue
print *,'*** MPI_Send/Recv OK *** '
100 continue
else
do 30 i=1, dsize
dat(i) = ival
30 continue
call MPI_SEND(dat, dsize, MPI_INTEGER4, 0, 0, MPI_COMM_WORLD,
c ierr)
endif
c repeat?
call ql_client(ierr)
if(ierr.eq.1)then
print *,'repeat'
goto 1000
endif
call MPI_FINALIZE(ierr)
end

View File

@@ -0,0 +1,27 @@
#!/bin/sh
PPOSDIR=/home/satoken/ppos
export PATH=$(PPOSDIR)/bin:$PATH
echo CT20001 device mapping program test START
echo CT20002 program 1 START
echo CT20003 check '"MPI_Send/Recv OK"'
ql_mpiexec_start -machinefile hostfile20 ./CT20a 1
echo CT20004 program 1 suspend
echo CT20005 program 2 START
echo CT20006 check '"MPI_Send/Recv OK"'
ql_mpiexec_start -machinefile hostfile20 ./CT20b 2
echo CT20007 program 2 suspend
echo CT20008 program 1 resume
echo CT20009 check '"MPI_Send/Recv OK"'
ql_mpiexec_start -machinefile hostfile20 ./CT20a 3
echo CT20010 program 1 suspend
echo CT20011 program 2 resume
echo CT20012 check '"MPI_Send/Recv OK"'
ql_mpiexec_start -machinefile hostfile20 ./CT20b 4
echo CT20013 program 2 suspend
echo CT20014 program 1 resume
ql_mpiexec_finalize -machinefile hostfile20 ./CT20a
echo CT20015 program 1 END
echo CT20016 program 2 resume
ql_mpiexec_finalize -machinefile hostfile20 ./CT20b
echo CT20017 program 2 END
echo CT20018 device mapping program test END

View File

@@ -0,0 +1,62 @@
c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!!
include 'mpif.h'
integer size
parameter(size=536870912)
character file*10
character val*10
integer ival
integer ierr
integer i
integer*4 dat(size)
common dat
character myname*10
call getarg(0, myname)
call MPI_INIT(ierr)
1000 continue
iargs = iargc()
if(iargs.ne.2)then
print *,'bad argument'
call MPI_FINALIZE(ierr)
stop 1
endif
call getarg(1, file)
call getarg(2, val)
read(val, '(i10)')ival
print *,' file=',file,', val=',ival
open(1, file=file, status='old', form='unformatted',
c access='stream', err=999)
do 10 i=1, size
dat(i) = -1
10 continue
read(1, err=998)(dat(i), i=1, size)
do 20 i=1, size
if(dat(i).ne.ival)then
print *,'*** FAIL *** BAD VALUE idx=',i,', val=',dat(i)
goto 100
endif
20 continue
print *,' *** data read OK ***'
100 continue
close(1)
call ql_client(ierr)
if(ierr.eq.1)then
print *,'resume'
goto 1000
endif
call MPI_FINALIZE(ierr)
stop 0
998 continue
close(1)
print *,'read error'
goto 9999
999 continue
print *,'open error'
goto 9999
9999 continue
call MPI_FINALIZE(ierr)
stop 1
end

View File

@@ -0,0 +1,27 @@
#!/bin/sh
export PPOSDIR=/home/satoken/ppos
export PATH=$PPOSDIR/bin:$PATH
echo CT21001 mcexec page table update test START
echo CT21002 program 1 START
echo CT21003 check '"data read OK"'
ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21a file1 1
echo CT21004 program 1 suspend
echo CT21005 program 2 START
echo CT21006 check '"data read OK"'
ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21b file1 1
echo CT21007 program 2 suspend
echo CT21008 program 1 resume
echo CT21009 check '"data read OK"'
ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21a file2 2
echo CT21010 program 1 suspend
echo CT21011 program 2 resume
echo CT21012 check '"data read OK"'
ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21b file2 2
echo CT21013 program 2 suspend
echo CT21014 program 1 resume
ql_mpiexec_finalize -machinefile hostfile21 -n 1 ./CT21a
echo CT21015 program 1 END
echo CT21016 program 2 resume
ql_mpiexec_finalize -machinefile hostfile21 -n 1 ./CT21b
echo CT21017 program 2 END
echo CT21018 mcexec page table update test END

View File

@@ -0,0 +1,27 @@
c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!!
!$ use omp_lib
include 'mpif.h'
integer rank
integer size
external omp_get_thread_num
external omp_get_num_threads
integer omp_get_thread_num
integer omp_get_num_threads
call MPI_INIT(ierr)
1000 continue
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
!$omp parallel
print '(1h ,4hmpi=,i2,1h/,i2,6h, omp=,i2,1h/,i2)',
c rank, size, omp_get_thread_num(), omp_get_num_threads()
!$omp end parallel
c repeat?
call ql_client(ierr)
if(ierr.eq.1)then
print *,'repeat'
goto 1000
endif
call MPI_FINALIZE(ierr)
end

View File

@@ -0,0 +1,27 @@
#!/bin/sh
PPOSDIR=/home/satoken/ppos
export PATH=$(PPOSDIR)/bin:$PATH
echo CT22001 OMP test START
echo CT22002 program 1 START
echo CT22003 check rank info
ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a
echo CT22004 program 1 suspend
echo CT22005 program 2 START
echo CT22006 check rank info
ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b
echo CT22007 program 2 suspend
echo CT22008 program 1 resume
echo CT22009 check rank info
ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a
echo CT22010 program 1 suspend
echo CT22011 program 2 resume
echo CT22012 check rank info
ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b
echo CT22013 program 2 suspend
echo CT22014 program 1 resume
ql_mpiexec_finalize -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a
echo CT22015 program 1 END
echo CT22016 program 2 resume
ql_mpiexec_finalize -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b
echo CT22017 program 2 END
echo CT22018 OMP test END

View File

@@ -0,0 +1,57 @@
PPOSDIR=/home/satoken/ppos
MPIDIR=/usr/lib64/mpich-3.2
MPIBINDIR=$(MPIDIR)/bin
MPILIBDIR=$(MPIDIR)/lib
CC=gcc
MPIF90=$(MPIBINDIR)/mpif90
MPICC=$(MPIBINDIR)/mpicc
LIBDIR=$(PPOSDIR)/lib
LDFLAGS=-L$(LIBDIR) -lqlmpi -Wl,-rpath=$(LIBDIR) -Wl,-rpath,$(MPILIBDIR)
CFLAGS= -I$(PPOSDIR)/include
TARGETS= usr_prg_A usr_prg_B usr_prg_C usr_prg_irreg CT20a CT20b CT21a CT21b file1 file2 CT22a CT22b
all:: $(TARGETS)
usr_prg_A: usr_prg_A.c
$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
usr_prg_B: usr_prg_B.c
$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
usr_prg_C: usr_prg_C.c
$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
usr_prg_irreg: usr_prg_irreg.c
$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
CT20a: CT20.f
$(MPIF90) -o $@ $< $(LDFLAGS)
CT20b: CT20.f
$(MPIF90) -o $@ $< $(LDFLAGS)
CT21a: CT21.f
$(MPIF90) -o $@ $< $(LDFLAGS)
CT21b: CT21.f
$(MPIF90) -o $@ $< $(LDFLAGS)
CT22a: CT22.f
$(MPIF90) -O -fopenmp -o $@ $< $(LDFLAGS)
CT22b: CT22.f
$(MPIF90) -O -fopenmp -o $@ $< $(LDFLAGS)
file1: gendata
./gendata 1 536870912 > $@
file2: gendata
./gendata 2 536870912 > $@
gendata: gendata.c
$(CC) -o $@ $<
clean::
rm -f $(TARGETS) gendata

View File

@@ -0,0 +1,9 @@
MCMOD_DIR=/home/satoken/ppos
START=${MCMOD_DIR}/bin/ql_mpiexec_start
FINALIZE=${MCMOD_DIR}/bin/ql_mpiexec_finalize
USR_PRG_A=./usr_prg_A
USR_PRG_B=./usr_prg_B
USR_PRG_C=./usr_prg_C
USR_PRG_IRREG=./usr_prg_irreg

View File

@@ -0,0 +1,40 @@
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
int
main(int argc, char **argv)
{
int data;
long count;
long i;
int *buf;
int r;
char *p;
if (argc != 3) {
fprintf(stderr, "BAD argument\n");
exit(1);
}
data = atoi(argv[1]);
count = atol(argv[2]);
fprintf(stderr, "data=%d count=%ld\n", data, count);
buf = malloc(sizeof(int) * count);
for (i = 0; i < count; i++)
buf[i] = data;
for (r = sizeof(int) * count, p = (char *)buf; r;) {
int rc = write(1, p, r);
if (rc == -EINTR)
continue;
if (rc <= 0) {
fprintf(stderr, "write error: %d", errno);
exit(1);
}
r -= rc;
p += rc;
}
close(1);
exit(0);
}

View File

@@ -0,0 +1,10 @@
#!/bin/sh
for test_param in `ls -1 ./test_cases/CT*.txt`
do
source ${test_param}
./ql_normal.sh ${test_param} 2>&1 | tee ./result/${TEST_PREFIX}.log
done
./ql_irreg.sh ./test_cases/ECT91.txt

View File

@@ -0,0 +1,2 @@
wallaby14:1
wallaby15:1

View File

@@ -0,0 +1 @@
wallaby14

View File

@@ -0,0 +1,2 @@
wallaby14:1
wallaby15:1

View File

@@ -0,0 +1,210 @@
#!/bin/sh
# Functions
function ok_out() {
echo "[OK] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
(( TEST_NUM++ ))
TEST_CODE=`printf %03d ${TEST_NUM}`
}
function ng_out() {
echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
exit 1
}
function ng_out_cont {
echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
(( TEST_NUM++ ))
TEST_CODE=`printf %03d ${TEST_NUM}`
}
if [ $# -lt 1 ]; then
echo "too few arguments."
echo "usage: `basename $0` <param_file>"
fi
TEST_PARAM_FILE=$1
TEST_NUM=1
TEST_CODE=001
ME=`whoami`
# read config
source ./config
# read test param
source ${TEST_PARAM_FILE}
# make machinefile
mkdir ./machinefiles &> /dev/null
MFILE=./machinefiles/mfile_${TEST_PREFIX}
echo ${MASTER}:${PROC_PER_NODE} > ${MFILE}
for slave in ${SLAVE}
do
echo ${slave}:${PROC_PER_NODE} >> ${MFILE}
done
PROC_NUM=`expr ${PROC_PER_NODE} \* ${MPI_NODE_NUM}`
# read machinefile
declare -a node_arry
while read line
do
node_arry+=(${line%:*})
done < ${MFILE}
MASTER=${node_arry[0]}
# make result directory
RESULT_DIR=./result/${TEST_PREFIX}
mkdir -p ${RESULT_DIR}
RANK_MAX=`expr ${PROC_NUM} - 1`
# Log files
start_1st_A_log=${RESULT_DIR}/exec_1st_A.log
start_1st_B_log=${RESULT_DIR}/exec_1st_B.log
start_1st_C_log=${RESULT_DIR}/exec_1st_C.log
start_2nd_A_log=${RESULT_DIR}/exec_2nd_A.log
start_2nd_B_log=${RESULT_DIR}/exec_2nd_B.log
start_2nd_C_log=${RESULT_DIR}/exec_2nd_C.log
finalize_A_log=${RESULT_DIR}/finalize_A.log
finalize_B_log=${RESULT_DIR}/finalize_B.log
finalize_C_log=${RESULT_DIR}/finalize_C.log
# Arguments
args_1st_A="1234 hoge 02hoge"
args_2nd_A="foo 99bar test"
# Env
envs_1st_A="1st_exec_A"
envs_2nd_A="This_is_2nd_exec_A"
BK_PATH=${PATH}
### テスト開始時点でql_serverとテスト用MPIプログラムが各ードで実行されていない
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -c 'ql_(server|talker)'"`
if [ ${cnt} -gt 0 ]; then
ng_out "ql_server is running on ${node}"
fi
cnt=`ssh $node "pgrep -u ${ME} -c 'mpiexec'"`
if [ ${cnt} -gt 0 ]; then
ng_out "other MPI program is running on ${node}"
fi
done
### machinefile is not specified
env QL_TEST=${envs_1st_A} ${START} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "machinefile is not specified, but ql_mpiexec_start returns 0"
else
ok_out "machinefile is not specified, so ql_mpiexec_start returns not 0. returns ${rc}"
fi
### MPI program is not specified
env QL_TEST=${envs_1st_A} ${START} -n ${PROC_NUM} > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "MPI program is not specified, but ql_mpiexec_start returns 0"
else
ok_out "MPI program is not specified, so ql_mpiexec_start returns not 0. returns ${rc}"
fi
### specified machinefile does not exist
env QL_TEST=${envs_1st_A} ${START} -machinefile dose_not_exist -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "specified machinefile does not exist, but ql_mpiexec_start returns 0"
else
ok_out "specified machinefile does not exist, so ql_mpiexec_start returns not 0. returns ${rc}"
fi
### specified MPI program does not exist
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} dose_not_exist ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "specified MPI program does not exist, but ql_mpiexec_start returns 0"
else
ok_out "specified MPI program does not exist, so ql_mpiexec_start returns not 0. returns ${rc}"
fi
### mpiexec is not found
PATH="/usr/bin"
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "mpiexec is not found, but ql_mpiexec_start returns 0"
else
ok_out "mpiexec is not found, so ql_mpiexec_start returns not 0. returns ${rc}"
fi
PATH=${BK_PATH}
### mpiexec abort
PATH="./util:/usr/bin"
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "mpiexec abort, but ql_mpiexec_start returns 0"
else
ok_out "mpiexec abort, so ql_mpiexec_start returns not 0. returns ${rc}"
fi
PATH=${BK_PATH}
### machinefile is not specified
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
${FINALIZE} -n ${PROC_NUM} ${USR_PRG_A} >> ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "machinefile is not specified, but ql_mpiexec_finalize returns 0"
else
ok_out "machinefile is not specified, so ql_mpiexec_finalize returns not 0. returns ${rc}"
fi
### MPI program is not specified
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} >> ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "MPI program is not specified, but ql_mpiexec_finalize returns 0"
else
ok_out "MPI program is not specified, so ql_mpiexec_finalize returns not 0. returns ${rc}"
fi
### specified machinefile is wrong
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
${FINALIZE} -machinefile ./util/wrong_mfile -n ${PROC_NUM} ${USR_PRG_A} >> ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "specified machinefile is wrong, but ql_mpiexec_finalize returns 0"
else
ok_out "specified machinefile is wrong, so ql_mpiexec_finalize returns not 0. returns ${rc}"
fi
### specified MPI program name is wrong
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} >> ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "specified MPI program name is wrong, but ql_mpiexec_finalize returns 0"
else
ok_out "specified MPI program name is wrong, so ql_mpiexec_finalize returns not 0. returns ${rc}"
fi
${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} > /dev/null
### one of MPI process aborts
abort_rank=`expr ${PROC_NUM} - 1`
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_IRREG} 0 > ${RESULT_DIR}/${TEST_CODE}.log
rc=$?
if [ ${rc} -eq 0 ]; then
ng_out_cont "one of MPI processes aborts, but ql_mpiexec_start returns 0"
else
ok_out "one of MPI processes aborts, so ql_mpiexec_start returns not 0. returns ${rc}"
fi

View File

@@ -0,0 +1,348 @@
#!/bin/sh
# Functions
function ok_out() {
echo "[OK] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
(( TEST_NUM++ ))
TEST_CODE=`printf %03d ${TEST_NUM}`
}
function ng_out() {
echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
exit 1
}
if [ $# -lt 1 ]; then
echo "too few arguments."
echo "usage: `basename $0` <param_file>"
fi
TEST_PARAM_FILE=$1
TEST_NUM=1
TEST_CODE=001
ME=`whoami`
# read config
source ./config
# read test param
source ${TEST_PARAM_FILE}
# make machinefile
mkdir ./machinefiles &> /dev/null
MFILE=./machinefiles/mfile_${TEST_PREFIX}
echo ${MASTER}:${PROC_PER_NODE} > ${MFILE}
for slave in ${SLAVE}
do
echo ${slave}:${PROC_PER_NODE} >> ${MFILE}
done
PROC_NUM=`expr ${PROC_PER_NODE} \* ${MPI_NODE_NUM}`
# read machinefile
declare -a node_arry
while read line
do
node_arry+=(${line%:*})
done < ${MFILE}
MASTER=${node_arry[0]}
# make result directory
RESULT_DIR=./result/${TEST_PREFIX}
mkdir -p ${RESULT_DIR}
RANK_MAX=`expr ${PROC_NUM} - 1`
# Log files
start_1st_A_log=${RESULT_DIR}/exec_1st_A.log
start_1st_B_log=${RESULT_DIR}/exec_1st_B.log
start_1st_C_log=${RESULT_DIR}/exec_1st_C.log
start_2nd_A_log=${RESULT_DIR}/exec_2nd_A.log
start_2nd_B_log=${RESULT_DIR}/exec_2nd_B.log
start_2nd_C_log=${RESULT_DIR}/exec_2nd_C.log
finalize_A_log=${RESULT_DIR}/finalize_A.log
finalize_B_log=${RESULT_DIR}/finalize_B.log
finalize_C_log=${RESULT_DIR}/finalize_C.log
# Arguments
args_1st_A="1234 hoge 02hoge"
args_2nd_A="foo 99bar test"
# Env
envs_1st_A="1st_exec_A"
envs_2nd_A="This_is_2nd_exec_A"
### テスト開始時点でql_serverとテスト用MPIプログラムが各ードで実行されていない
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -c 'ql_(server|talker)'"`
if [ ${cnt} -gt 0 ]; then
ng_out "ql_server is running on ${node}"
fi
cnt=`ssh $node "pgrep -u ${ME} -c 'mpiexec'"`
if [ ${cnt} -gt 0 ]; then
ng_out "other MPI program is running on ${node}"
fi
done
ok_out "ql_server and usr_prgs are not running on each node"
### usr_prg_A を実行するql_mpiexec_start の返り値が0 (成功)
env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${start_1st_A_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "ql_mpiexec_start usr_prg_A (first exec) returns 0"
else
ng_out "ql_mpiexec_start usr_prg_A (first exec) returns ${rc}"
fi
### 初回実行後、マスターード上でql_serverが動作している
cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"`
if [ ${cnt} -ne 1 ]; then
ng_out "ql_server is not running on master node"
else
ok_out "ql_server is running on master node"
fi
### 各ードのusr_prg_A の引数が実行時に指定したものと一致している
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "argv="`
tgt=${line#*argv=}
if [ "X${tgt}" != "X${USR_PRG_A} ${args_1st_A}" ]; then
ng_out "usr_prg_A's args is incorrect on rank:${rank}\n ${line}"
fi
done
ok_out "usr_prg_A's args are correct on each node"
### 各ードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "QL_TEST="`
tgt=${line#*QL_TEST=}
if [ "X${tgt}" != "X${envs_1st_A}" ]; then
ng_out "usr_prg_A's env (QL_TEST) is incorrect on each node:${rank}\n ${line}"
fi
done
ok_out "usr_prg_A's env (QL_TEST) is correct on each node"
### 各ードのusr_prg_A の計算処理が完了
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "done="`
tgt=${line#*done=}
if [ "X${tgt}" != "Xyes" ]; then
ng_out "usr_prg_A's calculation is not done on rank:${rank}"
fi
done
ok_out "usr_prg_A's calculation is done on each node"
### ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_A'" | grep " exe" | wc -l`
if [ ${cnt} -eq 0 ]; then
ng_out "usr_prg_A is not running on ${node}"
else
echo " ${cnt} programs is waiting on ${node}"
fi
done
ok_out "usr_prg_A is waiting for resume-req on each node"
### usr_prg_B を実行するql_mpiexec_start の返り値が0 (成功)
${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} 1 2 3 > ${start_1st_B_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "ql_mpiexec_start usr_prg_B (first exec) returns 0"
else
ng_out "ql_mpiexec_start usr_prg_B (first exec) returns ${rc}"
fi
### 各ードのusr_prg_B の計算処理が完了
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_1st_B_log} | grep -e "done="`
tgt=${line#*done=}
if [ "X${tgt}" != "Xyes" ]; then
ng_out "usr_prg_B's calculation is not done on rank:${rank}"
fi
done
ok_out "usr_prg_B's calculation is done on each node"
### ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_B'" | grep " exe" | wc -l`
if [ ${cnt} -eq 0 ]; then
ng_out "usr_prg_B is not running on ${node}"
else
echo " ${cnt} programs is waiting on ${node}"
fi
done
ok_out "usr_prg_B is waiting for resume-req on each node"
### usr_prg_C を実行するql_mpiexec_start の返り値が0 (成功)
${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_C} a b c > ${start_1st_C_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "ql_mpiexec_start usr_prg_C (first exec) returns 0"
else
ng_out "ql_mpiexec_start usr_prg_C (first exec) returns ${rc}"
fi
### 各ードのusr_prg_C の計算処理が完了
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_1st_C_log} | grep -e "done="`
tgt=${line#*done=}
if [ "X${tgt}" != "Xyes" ]; then
ng_out "usr_prg_C's calculation is not done on rank:${rank}"
fi
done
ok_out "usr_prg_C's calculation is done on each node"
### ql_mpiexec_start の完了後、usr_prg_C が再開指示待ちになっている
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_C'" | grep " exe" | wc -l`
if [ ${cnt} -eq 0 ]; then
ng_out "usr_prg_C is not running on ${node}"
else
echo " ${cnt} programs is waiting on ${node}"
fi
done
ok_out "usr_prg_C is waiting for resume-req on each node"
### usr_prg_A を再実行するql_mpiexec_start の返り値が0 (成功)
env QL_TEST=${envs_2nd_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_2nd_A} > ${start_2nd_A_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "(again) ql_mpiexec_start usr_prg_A returns 0"
else
ng_out "(again) ql_mpiexec_start usr_prg_A returns ${rc}"
fi
### 各ードのusr_prg_A の引数が再実行時に指定したものと一致している
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "argv="`
tgt=${line#*argv=}
if [ "X${tgt}" != "X${USR_PRG_A} ${args_2nd_A}" ]; then
ng_out "usr_prg_A's args is incorrect on rank:${rank}\n ${line}"
fi
done
ok_out "(again) usr_prg_A's args are correct on each node"
### 各ードのusr_prg_A テスト用に指定した環境変数が再実行時に指定したものと一致している
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "QL_TEST="`
tgt=${line#*QL_TEST=}
if [ "X${tgt}" != "X${envs_2nd_A}" ]; then
ng_out "usr_prg_A's env (QL_TEST) is incorrect on each node:${rank}\n ${line}"
fi
done
ok_out "(again) usr_prg_A's env (QL_TEST) is correct on each node"
### 各ードのusr_prg_A の計算処理が完了
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "done="`
tgt=${line#*done=}
if [ "X${tgt}" != "Xyes" ]; then
ng_out "usr_prg_A's calculation is not done on rank:${rank}"
fi
done
ok_out "(again) usr_prg_A's calculation is done on each node"
### ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_A'" | grep " exe" | wc -l`
if [ ${cnt} -eq 0 ]; then
ng_out "usr_prg_A is not running on ${node}"
else
echo " ${cnt} programs is waiting on ${node}"
fi
done
ok_out "(again) usr_prg_A is waiting for resume-req on each node"
### usr_prg_B を再実行するql_mpiexec_start の返り値が0 (成功)
${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} 10 20 30 40 > ${start_2nd_B_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "(again) ql_mpiexec_start usr_prg_B returns 0"
else
ng_out "(again) ql_mpiexec_start usr_prg_B returns ${rc}"
fi
### 各ードのusr_prg_B の計算処理が完了
for rank in `seq 0 ${RANK_MAX}`
do
line=`grep -e "^${rank}:" ${start_2nd_B_log} | grep -e "done="`
tgt=${line#*done=}
if [ "X${tgt}" != "Xyes" ]; then
ng_out "usr_prg_B's calculation is not done on rank:${rank}"
fi
done
ok_out "(again) usr_prg_B's calculation is done on each node"
### ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
for node in ${node_arry[@]}
do
cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_B'" | grep " exe" | wc -l`
if [ ${cnt} -eq 0 ]; then
ng_out "usr_prg_B is not running on ${node}"
else
echo " ${cnt} programs is waiting on ${node}"
fi
done
ok_out "(again) usr_prg_B is waiting for resume-req on each node"
### usr_prg_A を終了するql_mpiexec_finalize の返り値が0 (成功)
${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} > ${finalize_A_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "ql_mpiexec_finalize usr_prg_A return 0"
else
ng_out "ql_mpiexec_finalize usr_prg_A return ${rc}"
fi
### usr_prg_B を終了するql_mpiexec_finalize の返り値が0 (成功)
${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} > ${finalize_B_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "ql_mpiexec_finalize usr_prg_B return 0"
else
ng_out "ql_mpiexec_finalize usr_prg_B return ${rc}"
fi
### usr_prg_Bの終了後、ql_serverがマスターード上で動作している
cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"`
if [ ${cnt} -ne 1 ]; then
ng_out "ql_server is not running on master node"
else
ok_out "ql_server is still running on master node"
fi
### usr_prg_C を終了するql_mpiexec_finalize の返り値が0 (成功)
${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_C} > ${finalize_C_log}
rc=$?
if [ ${rc} -eq 0 ]; then
ok_out "ql_mpiexec_finalize usr_prg_C return 0"
else
ng_out "ql_mpiexec_finalize usr_prg_C return ${rc}"
fi
### すべてのMPIプログラムが終了したので、ql_serverが終了した
cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"`
sleep 1
if [ ${cnt} -eq 0 ]; then
ok_out "ql_server is not running on master node"
else
ng_out "ql_server is still running on master node"
fi

View File

@@ -0,0 +1,230 @@
プロセス高速起動
結合テスト仕様 (ql_mpiexec_start/finalize)
下記の条件を組み合わせた構成で動作を検証する
・MPIード数
1, 2
・プロセス数/ノード
1, 2, MAX(mckernelのCPU割り当て数)
・ql_mpiexecコマンドの実行ード
master, not-master
組み合わせ:
MPIード数 proc/node mpiexec実行ード
パターン01: 1 1 master
パターン02: 1 1 not-master
パターン03: 1 2 master
パターン04: 1 2 not-master
パターン05: 1 MAX master
パターン06: 1 MAX not-master
パターン07: 2 1 master
パターン08: 2 1 not-master
パターン09: 2 2 master
パターン10: 2 2 not-master
パターン11: 2 MAX master
パターン12: 2 MAX not-master
CTxx 3つのMPIプログラム(A, B, C) をA, B, C, A, B の順に実行する
□ CTxx001 ql_server and usr_prgs are not running on each node
テスト開始時点でql_serverとテスト用MPIプログラムが各ードで実行されていない
□ CTxx002 ql_mpiexec_start usr_prg_A (first exec) returns 0
usr_prg_A を実行するql_mpiexec_start の返り値が0 (成功)
□ CTxx003 ql_server is running on master node
初回実行後、マスターード上でql_serverが動作している
□ CTxx004 usr_prg_A's args are correct on each node
ードのusr_prg_A の引数が実行時に指定したものと一致している
□ CTxx005 usr_prg_A's env (QL_TEST) is correct on each node
ードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している
□ CTxx006 usr_prg_A's calculation is done on each node
ードのusr_prg_A の計算処理が完了
□ CTxx007 usr_prg_A is waiting for resume-req on each node
ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
□ CTxx008 ql_mpiexec_start usr_prg_B (first exec) returns 0
usr_prg_B を実行するql_mpiexec_start の返り値が0 (成功)
□ CTxx009 usr_prg_B's calculation is done on each node
ードのusr_prg_B の計算処理が完了
□ CTxx010 usr_prg_B is waiting for resume-req on each node
ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
□ CTxx011 ql_mpiexec_start usr_prg_C (first exec) returns 0
usr_prg_C を実行するql_mpiexec_start の返り値が0 (成功)
□ CTxx012 usr_prg_C's calculation is done on each node
ードのusr_prg_C の計算処理が完了
□ CTxx013 usr_prg_C is waiting for resume-req on each node
ql_mpiexec_start の完了後、usr_prg_C が再開指示待ちになっている
□ CTxx014 (again) ql_mpiexec_start usr_prg_A returns 0
usr_prg_A を再実行するql_mpiexec_start の返り値が0 (成功)
□ CTxx015 (again) usr_prg_A's args are correct on each node
ードのusr_prg_A の引数が再実行時に指定したものと一致している
□ CTxx016 (again) usr_prg_A's env (QL_TEST) is correct on each node
ードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している
□ CTxx017 (again) usr_prg_A's calculation is done on each node
ードのusr_prg_A の計算処理が完了
□ CTxx018 (again) usr_prg_A is waiting for resume-req on each node
ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
□ CTxx019 (again) ql_mpiexec_start usr_prg_B returns 0
usr_prg_B を再実行するql_mpiexec_start の返り値が0 (成功)
□ CTxx020 (again) usr_prg_B's calculation is done on each node
ードのusr_prg_B の計算処理が完了
□ CTxx021 (again) usr_prg_B is waiting for resume-req on each node
ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
□ CTxx022 ql_mpiexec_finalize usr_prg_A return 0
usr_prg_A を終了するql_mpiexec_finalize の返り値が0 (成功)
□ CTxx023 ql_mpiexec_finalize usr_prg_B return 0
usr_prg_B を終了するql_mpiexec_finalize の返り値が0 (成功)
□ CTxx024 ql_server is still running on master node
usr_prg_Bの終了後、ql_serverがマスターード上で動作している
□ CTxx025 ql_mpiexec_finalize usr_prg_C return 0
usr_prg_C を終了するql_mpiexec_finalize の返り値が0 (成功)
□ CTxx026 ql_server is not running on master node
すべてのMPIプログラムが終了したので、ql_serverが終了した
CT20 デバイスマッピング (IBのバッファ)
IBを使用するMPIプログラムを2本作成する。
send/recvのバッファはcommon領域に2GB使用する。
送信バッファは送信毎に異なる内容(送信回数の値など)を設定し、
受信側で検証できるようにする。
デバイスマッピングに異常があると、検証で失敗する。
尚、McKernelに割り当てるメモリ量は3GBとする。
□ CT20001 device mapping program test START
□ CT20002 program 1 START
qlmpi_start によってテストプログラム 1 起動 (1プロセス/ノード x 2ード)
□ CT20003 MPI_Send/Recv OK
2 プロセス間でMPI_Send/Recvを実行
□ CT20004 program 1 suspend
テストプログラム 1 が停止
□ CT20005 program 2 START
qlmpi_start によってテストプログラム 2 起動 (1プロセス/ノード x 2ード)
□ CT20006 MPI_Send/Recv OK
2 プロセス間でMPI_Send/Recvを実行
□ CT20007 program 1 suspend
テストプログラム 2 が停止
□ CT20008 program 1 resume
qlmpi_start によってテストプログラム 1 が再開
□ CT20009 MPI_Send/Recv OK
2 プロセス間でMPI_Send/Recvを実行
□ CT20010 program 1 suspend
テストプログラム 1 が停止
□ CT20011 program 2 resume
qlmpi_start によってテストプログラム 2 が再開
□ CT20012 MPI_Send/Recv OK
2 プロセス間でMPI_Send/Recvを実行
□ CT20013 program 2 suspend
テストプログラム 2 が停止
□ CT20014 program 1 resume
qlmpi_finalize によってテストプログラム 1 が再開
□ CT20015 program 1 END
テストプログラム1が終了
□ CT20016 program 2 resume
qlmpi_finalize によってテストプログラム 2 が再開
□ CT20017 program 2 END
テストプログラム2が終了
□ CT20018 device mapping program test END
CT21 mcexecのページテーブル更新確認
ファイルをreadするプログラムを作成する。
ファイル名によって、ファイル内容が確定できるようにする。
(例えば、ファイル名が"1"のファイルは"1"で埋め尽くされているなど)
入力領域はcommon領域に2GB確保する。
ファイルのサイズも2GBとする。
qlmpi_start毎にファイルを切り替える(コマンドラインに渡すなど)。
入力結果を検証する。
もし、ページインで最初と異なる物理ページにバッファが割り当てられ、且つ、
mcexecのページテーブルが更新されていない場合は、readによって関係無いページが
破壊される。また、ファイルの読み込み結果も検証で失敗する。
尚、McKernelに割り当てるメモリ量は3GBとする。
□ CT21001 mcexec page table update test START
□ CT21002 program 1 START
qlmpi_start によってテストプログラム 1 起動 (1プロセス)
□ CT21003 data read OK
ファイルを読み込んで内容確認した結果、問題なし
□ CT21004 program 1 suspend
テストプログラム 1 が停止
□ CT21005 program 2 START
qlmpi_start によってテストプログラム 2 起動 (1プロセス)
□ CT21006 data read OK
ファイルを読み込んで内容確認した結果、問題なし
□ CT21007 program 1 suspend
テストプログラム 2 が停止
□ CT21008 program 1 resume
qlmpi_start によってテストプログラム 1 が再開
□ CT21009 data read OK
ファイルを読み込んで内容確認した結果、問題なし
□ CT21010 program 1 suspend
テストプログラム 1 が停止
□ CT21011 program 2 resume
qlmpi_start によってテストプログラム 2 が再開
□ CT21012 data read OK
ファイルを読み込んで内容確認した結果、問題なし
□ CT21013 program 2 suspend
テストプログラム 2 が停止
□ CT21014 program 1 resume
qlmpi_finalize によってテストプログラム 1 が再開
□ CT21015 program 1 END
テストプログラム1が終了
□ CT21016 program 2 resume
qlmpi_finalize によってテストプログラム 2 が再開
□ CT21017 program 2 END
テストプログラム2が終了
□ CT21018 mcexec page table update test END
CT22 OMP
OMP で複数のスレッドを使用する状況のテスト。
□ CT22001 device mapping program test START
□ CT22002 program 1 START
qlmpi_start によってテストプログラム 1 起動 (1プロセス/ノード x 2ード)
□ CT22003 check rank info
MPIとOMPのプロセス情報が出力されていることを確認する。
以下のように出力されればOK(順不同)。
mpi= 0/ 2, omp= 1/ 4
mpi= 0/ 2, omp= 3/ 4
mpi= 0/ 2, omp= 0/ 4
mpi= 0/ 2, omp= 2/ 4
mpi= 1/ 2, omp= 1/ 4
mpi= 1/ 2, omp= 0/ 4
mpi= 1/ 2, omp= 3/ 4
mpi= 1/ 2, omp= 2/ 4
□ CT22004 program 1 suspend
テストプログラム 1 が停止
□ CT22005 program 2 START
qlmpi_start によってテストプログラム 2 起動 (1プロセス/ノード x 2ード)
□ CT22006 check rank info
MPIとOMPのプロセス情報が出力されていることを確認する。
□ CT22007 program 1 suspend
テストプログラム 2 が停止
□ CT22008 program 1 resume
qlmpi_start によってテストプログラム 1 が再開
□ CT22009 check rank info
MPIとOMPのプロセス情報が出力されていることを確認する。
□ CT22010 program 1 suspend
テストプログラム 1 が停止
□ CT22011 program 2 resume
qlmpi_start によってテストプログラム 2 が再開
□ CT22012 check rank info
MPIとOMPのプロセス情報が出力されていることを確認する。
□ CT22013 program 2 suspend
テストプログラム 2 が停止
□ CT22014 program 1 resume
qlmpi_finalize によってテストプログラム 1 が再開
□ CT22015 program 1 END
テストプログラム1が終了
□ CT22016 program 2 resume
qlmpi_finalize によってテストプログラム 2 が再開
□ CT22017 program 2 END
テストプログラム2が終了
□ CT22018 device mapping program test END
CT91 異常系
□ CT91001 machinefile is not specified, so ql_mpiexec_start returns not 0
□ CT91002 MPI program is not specified, so ql_mpiexec_start returns not 0
□ CT91003 specified machinefile does not exist, so ql_mpiexec_start returns not 0
□ CT91004 specified MPI program does not exist, so ql_mpiexec_start returns not 0
□ CT91005 mpiexec is not found, so ql_mpiexec_start returns not 0
□ CT91006 mpiexec abort, so ql_mpiexec_start returns not 0
□ CT91007 machinefile is not specified, so ql_mpiexec_finalize returns not 0
□ CT91008 MPI program is not specified, so ql_mpiexec_finalize returns not 0
□ CT91009 specified machinefile is wrong, so ql_mpiexec_finalize returns not 0
□ CT91010 specified MPI program name is wrong, so ql_mpiexec_finalize returns not 0
□ CT91011 one of MPI processes aborts, so ql_mpiexec_start returns not 0

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT01
MPI_NODE_NUM=1
PROC_PER_NODE=1
MASTER=wallaby15
SLAVE=

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT02
MPI_NODE_NUM=1
PROC_PER_NODE=1
MASTER=wallaby14
SLAVE=

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT03
MPI_NODE_NUM=1
PROC_PER_NODE=2
MASTER=wallaby15
SLAVE=

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT04
MPI_NODE_NUM=1
PROC_PER_NODE=2
MASTER=wallaby14
SLAVE=

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT05
MPI_NODE_NUM=1
PROC_PER_NODE=8
MASTER=wallaby15
SLAVE=

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT06
MPI_NODE_NUM=1
PROC_PER_NODE=8
MASTER=wallaby14
SLAVE=

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT07
MPI_NODE_NUM=2
PROC_PER_NODE=1
MASTER=wallaby15
SLAVE=wallaby14

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT08
MPI_NODE_NUM=2
PROC_PER_NODE=1
MASTER=wallaby14
SLAVE=wallaby15

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT09
MPI_NODE_NUM=2
PROC_PER_NODE=2
MASTER=wallaby15
SLAVE=wallaby14

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT10
MPI_NODE_NUM=2
PROC_PER_NODE=2
MASTER=wallaby14
SLAVE=wallaby15

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT11
MPI_NODE_NUM=2
PROC_PER_NODE=8
MASTER=wallaby15
SLAVE=wallaby14

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=CT12
MPI_NODE_NUM=2
PROC_PER_NODE=8
MASTER=wallaby14
SLAVE=wallaby15

View File

@@ -0,0 +1,5 @@
TEST_PREFIX=ECT91
MPI_NODE_NUM=2
PROC_PER_NODE=8
MASTER=wallaby14
SLAVE=wallaby15

View File

@@ -0,0 +1,59 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mpi.h>
#include <qlmpilib.h>
int
main(int argc, char **argv)
{
int rc;
int i;
int num_procs, my_rank;
char hname[128];
char argv_str[1024];
gethostname(hname, 128);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
ql_loop:
printf("INFO This is A. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
memset(argv_str, '\0', sizeof(argv_str));
printf("%d:argc=%d\n", my_rank, argc);
for (i = 0; i < argc; i++) {
if (i > 0) {
strcat(argv_str, " ");
}
strcat(argv_str, argv[i]);
}
printf("%d:argv=%s\n", my_rank, argv_str);
printf("%d:QL_TEST=%s\n", my_rank, getenv("QL_TEST"));
printf("%d:done=yes\n", my_rank);
fflush(stdout);
rc = ql_client(&argc, &argv);
//printf("ql_client returns: %d\n", rc);
if (rc == QL_CONTINUE) {
printf("%d:resume=go_back\n", my_rank);
goto ql_loop;
}
else {
printf("%d:resume=go_finalize\n", my_rank);
}
MPI_Finalize();
printf("%d:finish=yes\n", my_rank);
return 0;
}

View File

@@ -0,0 +1,45 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mpi.h>
#include <qlmpilib.h>
int
main(int argc, char **argv)
{
int rc;
int num_procs, my_rank;
char hname[128];
gethostname(hname, 128);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
ql_loop:
printf("INFO This is B. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
printf("%d:done=yes\n", my_rank);
fflush(stdout);
rc = ql_client(&argc, &argv);
//printf("ql_client returns: %d\n", rc);
if (rc == QL_CONTINUE) {
printf("%d:resume=go_back\n", my_rank);
goto ql_loop;
}
else {
printf("%d:resume=go_finalize\n", my_rank);
}
MPI_Finalize();
printf("%d:finish=yes\n", my_rank);
return 0;
}

View File

@@ -0,0 +1,45 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mpi.h>
#include <qlmpilib.h>
int
main(int argc, char **argv)
{
int rc;
int num_procs, my_rank;
char hname[128];
gethostname(hname, 128);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
ql_loop:
printf("INFO This is C. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
printf("%d:done=yes\n", my_rank);
fflush(stdout);
rc = ql_client(&argc, &argv);
//printf("ql_client returns: %d\n", rc);
if (rc == QL_CONTINUE) {
printf("%d:resume=go_back\n", my_rank);
goto ql_loop;
}
else {
printf("%d:resume=go_finalize\n", my_rank);
}
MPI_Finalize();
printf("%d:finish=yes\n", my_rank);
return 0;
}

View File

@@ -0,0 +1,56 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mpi.h>
#include <qlmpilib.h>
int
main(int argc, char **argv)
{
int rc;
int num_procs, my_rank;
char hname[128];
int abort_rank = 0;
gethostname(hname, 128);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
ql_loop:
printf("INFO This is irreg. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
if (argc > 2) {
abort_rank = atoi(argv[1]);
}
if (my_rank != abort_rank) {
printf("%d:done=yes\n", my_rank);
fflush(stdout);
}
else {
printf("%d:done=abort\n", my_rank);
fflush(stdout);
MPI_Abort(MPI_COMM_WORLD, -1);
}
rc = ql_client(&argc, &argv);
//printf("ql_client returns: %d\n", rc);
if (rc == QL_CONTINUE) {
printf("%d:resume=go_back\n", my_rank);
goto ql_loop;
}
else {
printf("%d:resume=go_finalize\n", my_rank);
}
MPI_Finalize();
printf("%d:finish=yes\n", my_rank);
return 0;
}

View File

@@ -0,0 +1,5 @@
#!/bin/sh
# This is dummy mpiexec for irregular test
echo "dummy mpiexec abort!!" >&2
exit 1

View File

@@ -0,0 +1,2 @@
foo
bar

81
test/qlmpi/swaptest.c Normal file
View File

@@ -0,0 +1,81 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SIZE (32*1024)
int data[1024*1024];
char sym2[1024*1024] = { 10, 20, 30, 0 };
char sym3[1024*1024] = { 10, 20, 30, 0 };
char *sym1 = "aaaaaa";
char buffer[BUF_SIZE];
char *ptr1, *ptr2;
char fnamebuf[1024];
int
swapout(char *fname, void *buf, size_t sz, int flag)
{
int cc;
cc = syscall(801, fname, buf, sz, flag);
return cc;
}
int
linux_mlock(const void *addr, size_t len)
{
int cc;
cc = syscall(802, addr, len);
return cc;
}
int
main(int argc, char **argv)
{
int cc;
int flag = 0;
if (argc == 2) {
flag = atoi(argv[1]);
}
switch (flag) {
case 1:
printf("skipping real paging for debugging and just calling swapout in Linux\n");
break;
case 2:
printf("skipping calling swapout in Linux\n");
break;
}
printf("&data = %p\n", data);
printf("&sym1 = %p\n", &sym1);
printf("&sym2 = %p\n", sym2);
printf("&sym3 = %p\n", sym3);
printf("&cc = %p\n", &cc);
ptr1 = malloc(1024);
ptr2 = malloc(1024*1024);
printf("ptr1 = %p\n", ptr1);
printf("ptr2 = %p\n", ptr2);
sprintf((char*) data, "hello\n");
/*
* testing mlock in mckernel side
*/
cc = mlock(data, 16*1024);
printf("McKernel mlock returns: %d\n", cc);
/*
* testing mlock in linux side
*/
cc = linux_mlock(data, 16*1024);
printf("linux_mlock returns: %d\n", cc);
strcpy(sym2, "returns: %d\n");
strcpy(sym3, "data = %d\n");
/* buf area will be used in swapout systemcall for debugging */
strcpy(fnamebuf, "/tmp/pages");
cc = swapout(fnamebuf, buffer, BUF_SIZE, flag);
printf("swapout returns: %d\n", cc);
printf("data = %s", data);
printf(sym2, cc);
printf(sym3, data);
return 0;
}