uti: Add/Modify test programs
Change-Id: I27a39d6b11af5243f93d07c31c2ef80f6727dd53
This commit is contained in:
517
test/uti/posix_aio/001.c
Normal file
517
test/uti/posix_aio/001.c
Normal file
@@ -0,0 +1,517 @@
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <aio.h>
|
||||
#include <signal.h>
|
||||
#include <libgen.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <mpi.h>
|
||||
#include <linux/limits.h>
|
||||
#include "util.h"
|
||||
|
||||
#define NREQS 1 /* # of parallel I/O requests per process */
|
||||
#define SZBUF (1ULL<<23)
|
||||
|
||||
#define MYTIME_TOUSEC 1000000
|
||||
#define MYTIME_TONSEC 1000000000
|
||||
|
||||
#define NROW 11
|
||||
#define NCOL 4
|
||||
|
||||
#define NSAMPLES_DROP 0/*10*/
|
||||
#define NSAMPLES_IO 2/*20*/
|
||||
#define NSAMPLES_TOTAL 2/*20*/
|
||||
#define NSAMPLES_INNER 1
|
||||
|
||||
#define Q(x) #x
|
||||
#define QUOTE(x) Q(x)
|
||||
|
||||
char test_srcdir[PATH_MAX];
|
||||
|
||||
static inline double mytime() {
|
||||
return /*rdtsc_light()*/MPI_Wtime();
|
||||
}
|
||||
|
||||
struct aioreq {
|
||||
int rank;
|
||||
int status;
|
||||
struct aiocb *aiocbp;
|
||||
};
|
||||
|
||||
static void aio_sighandler(int sig, siginfo_t *si, void *ucontext)
|
||||
{
|
||||
if (si->si_code == SI_ASYNCIO) {
|
||||
//struct aioreq *aioreq = si->si_value.sival_ptr;
|
||||
//pr_debug("I/O completion signal received\n");
|
||||
}
|
||||
}
|
||||
|
||||
int my_aio_init(int nreqs, struct aioreq *iolist, struct aiocb *aiocblist, char *aiobufs[NREQS]) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < nreqs; j++) {
|
||||
iolist[j].rank = j;
|
||||
iolist[j].aiocbp = &aiocblist[j];
|
||||
iolist[j].aiocbp->aio_buf = aiobufs[j];
|
||||
iolist[j].aiocbp->aio_nbytes = SZBUF;
|
||||
iolist[j].aiocbp->aio_reqprio = 0;
|
||||
iolist[j].aiocbp->aio_offset = 0;
|
||||
iolist[j].aiocbp->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
|
||||
iolist[j].aiocbp->aio_sigevent.sigev_signo = SIGUSR1;
|
||||
iolist[j].aiocbp->aio_sigevent.sigev_value.sival_ptr = &iolist[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int my_aio_evict(int nreqs, char **fn) {
|
||||
int ret;
|
||||
int i;
|
||||
char cmd[PATH_MAX];
|
||||
|
||||
for (i = 0; i < NREQS; i++) {
|
||||
|
||||
sprintf(cmd, "%s -e %s > /dev/null", QUOTE(VMTOUCH), fn[i]);
|
||||
ret = system(cmd);
|
||||
|
||||
if (ret == -1) {
|
||||
pr_err("%s: error: system\n",
|
||||
__func__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (WEXITSTATUS(ret)) {
|
||||
pr_err("%s: error: system returned %d\n",
|
||||
__func__, WEXITSTATUS(ret));
|
||||
ret = WEXITSTATUS(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
int my_aio_open(int nreqs, struct aioreq *iolist, char **fn) {
|
||||
int ret;
|
||||
int j;
|
||||
|
||||
for (j = 0; j < NREQS; j++) {
|
||||
iolist[j].aiocbp->aio_fildes = open(fn[j], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
|
||||
if (iolist[j].aiocbp->aio_fildes == -1) {
|
||||
pr_err("%s: error: open %s: %s\n",
|
||||
__func__, fn[j], strerror(errno));
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int my_aio_check(int nreqs, char **fn, char **mem_data) {
|
||||
int ret;
|
||||
int i;
|
||||
FILE *fp[NREQS] = { 0 };
|
||||
char *file_data[NREQS];
|
||||
|
||||
/* Check contents */
|
||||
for (i = 0; i < nreqs; i++) {
|
||||
|
||||
if (!(file_data[i] = malloc(SZBUF))) {
|
||||
pr_err("error: allocating data\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(fp[i] = fopen(fn[i], "r+"))) {
|
||||
pr_err("error: fopen %s: %s\n",
|
||||
fn[i], strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fread(file_data[i], sizeof(char), SZBUF, fp[i]) != SZBUF) {
|
||||
pr_err("error: fread: %s\n",
|
||||
strerror(errno));
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fclose(fp[i]);
|
||||
|
||||
if (memcmp((const char *)file_data[i], mem_data[i], SZBUF)) {
|
||||
pr_err("%s: file_data[%d] and mem_data[%d] doesn't match\n",
|
||||
__func__, i, i);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
free(file_data[i]);
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void my_aio_close(int nreqs, struct aioreq *iolist) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < NREQS; j++) {
|
||||
close(iolist[j].aiocbp->aio_fildes);
|
||||
iolist[j].aiocbp->aio_fildes = -1;
|
||||
}
|
||||
}
|
||||
|
||||
int my_aio(int nreqs, struct aioreq *iolist, char **fn, long nsec_calc) {
|
||||
int ret;
|
||||
int i, j;
|
||||
|
||||
/* Start async IO */
|
||||
for (j = 0; j < NSAMPLES_INNER; j++) {
|
||||
int completion_count = 0;
|
||||
|
||||
//pr_debug("debug: opening file\n");
|
||||
if ((ret = my_aio_open(nreqs, iolist, fn)) == -1) {
|
||||
pr_err("%s: error: aio_read: %s\n",
|
||||
__func__, strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
//pr_debug("debug: issuing write command\n");
|
||||
for (j = 0; j < nreqs; j++) {
|
||||
|
||||
/* Reset completion notice */
|
||||
iolist[j].status = EINPROGRESS;
|
||||
|
||||
if ((ret = aio_write(iolist[j].aiocbp)) == -1) {
|
||||
pr_err("%s: error: aio_read: %s\n",
|
||||
__func__, strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Emulate calcuation phase */
|
||||
ndelay(nsec_calc);
|
||||
|
||||
/* Wait for completion of async IO */
|
||||
//pr_debug("debug: waiting for completion\n");
|
||||
while (completion_count != nreqs) {
|
||||
for (j = 0; j < nreqs; j++) {
|
||||
if (iolist[j].status != EINPROGRESS) {
|
||||
continue;
|
||||
}
|
||||
|
||||
iolist[j].status = aio_error(iolist[j].aiocbp);
|
||||
|
||||
switch (iolist[j].status) {
|
||||
case 0: /* Succeeded */
|
||||
goto completed;
|
||||
case EINPROGRESS:
|
||||
break;
|
||||
case ECANCELED:
|
||||
pr_err("%s: error: aio is cancelled\n",
|
||||
__func__);
|
||||
goto completed;
|
||||
default:
|
||||
pr_err("%s: error: unexpected status: %d\n",
|
||||
__func__, iolist[j].status);
|
||||
goto completed;
|
||||
completed:
|
||||
completion_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check write amount */
|
||||
for (j = 0; j < nreqs; j++) {
|
||||
ssize_t size;
|
||||
|
||||
if ((size = aio_return(iolist[j].aiocbp)) != SZBUF) {
|
||||
pr_err("%s: Expected to have written %ld B but reported to have written %ld B\n",
|
||||
__func__, SZBUF, size);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
my_aio_close(nreqs, iolist);
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int measure(double *result, int nsamples, int nsamples_drop, int nreqs, struct aioreq *iolist, char **fn, char **aiobufs, long nsec_calc) {
|
||||
int ret;
|
||||
int i;
|
||||
double t_l, t_g, t_sum = 0;
|
||||
double start, end;
|
||||
|
||||
for (i = 0; i < nsamples + nsamples_drop; i++) {
|
||||
|
||||
#if 0
|
||||
pr_debug("debug: evicting file cache\n");
|
||||
if ((ret = my_aio_evict(nreqs, fn))) {
|
||||
pr_err("%s: error: my_aio_evict returned %d\n",
|
||||
__func__, ret);
|
||||
}
|
||||
#endif
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
start = mytime();
|
||||
if ((ret = my_aio(nreqs, iolist, fn, nsec_calc))) {
|
||||
pr_err("%s: error: my_aio_read returned %d\n",
|
||||
__func__, ret);
|
||||
}
|
||||
end = mytime();
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
/* Check contents */
|
||||
if ((ret = my_aio_check(nreqs, fn, aiobufs))) {
|
||||
pr_err("%s: error: my_aio_check returned %d\n",
|
||||
__func__, ret);
|
||||
}
|
||||
|
||||
if (i < nsamples_drop) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Take max */
|
||||
t_l = end - start;
|
||||
MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
||||
t_sum += t_g;
|
||||
}
|
||||
|
||||
*result = t_sum / nsamples;
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret;
|
||||
int i, j, progress, l;
|
||||
int rank, nproc;
|
||||
int disable_syscall_intercept = 0, ppn = -1;
|
||||
struct aioreq *iolist;
|
||||
struct aiocb *aiocblist;
|
||||
struct sigaction sa;
|
||||
double t_io_ave, t_total_ave;
|
||||
double t_table[NROW][NCOL] = { 0 };
|
||||
int opt;
|
||||
char *aiobufs[NREQS] = { 0 };
|
||||
char **fn;
|
||||
|
||||
opterr = 0; /* Don't print out error when not recognizing option character */
|
||||
|
||||
while ((opt = getopt(argc, argv, ":I:p:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'I':
|
||||
disable_syscall_intercept = atoi(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
ppn = atoi(optarg);
|
||||
break;
|
||||
case '?':
|
||||
pr_err("error: invalid option: -%c\n",
|
||||
optopt);
|
||||
ret = 1;
|
||||
goto out;
|
||||
case ':':
|
||||
pr_err("error: option -%c requires an argument\n",
|
||||
optopt);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (ppn == -1) {
|
||||
pr_err("error: specify -p <PPN>\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Initialize MPI */
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
|
||||
/* Show parameters */
|
||||
if (rank == 0) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
if (omp_get_thread_num() == 0) {
|
||||
printf("nproc: %d, ppn: %d, #threads: %d\n", nproc, ppn, omp_get_num_threads());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Set verbosity */
|
||||
//test_set_loglevel(TEST_LOGLEVEL_WARN);
|
||||
|
||||
/* Initialize delay function */
|
||||
ndelay_init();
|
||||
|
||||
/* Prepare file names */
|
||||
|
||||
#define TEST_SRCDIR "/work/gg10/e29005"
|
||||
sprintf(test_srcdir, "%s", /*TEST_SRCDIR*/dirname(argv[0]));
|
||||
|
||||
if (!(fn = malloc(sizeof(char *) * NREQS))) {
|
||||
pr_err("error: allocating fn\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < NREQS; i++) {
|
||||
if (!(fn[i] = malloc(PATH_MAX))) {
|
||||
pr_err("error: allocating fn\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sprintf(fn[i], "%s/rank%d-number%d", test_srcdir, rank, i);
|
||||
if (rank == 0) pr_debug("debug: rank: %d, fn[%d]: %s\n",
|
||||
rank, i, fn[i]);
|
||||
}
|
||||
|
||||
/* Allocate aio commands */
|
||||
if (!(iolist = calloc(NREQS, sizeof(struct aioreq)))) {
|
||||
pr_err("%s: error: allocating iolist\n",
|
||||
__func__);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(aiocblist = calloc(NREQS, sizeof(struct aiocb)))) {
|
||||
pr_err("%s: error: allocating aiocblist\n",
|
||||
__func__);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Prepare contents to be written */
|
||||
for (i = 0; i < NREQS; i++) {
|
||||
aiobufs[i] = malloc(SZBUF);
|
||||
if (!aiobufs[i]) {
|
||||
pr_err("%s: error: allocating aiobufs\n",
|
||||
__func__);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (j = 0; j < SZBUF; j++) {
|
||||
*(aiobufs[i] + j) = i + j + rank;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set signal handlers */
|
||||
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
||||
sa.sa_sigaction = aio_sighandler;
|
||||
if (sigaction(SIGUSR1, &sa, NULL) == -1) {
|
||||
pr_err("%s: error: sigaction: %s\n",
|
||||
__func__, strerror(errno));
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Set aio parameters except fd and status */
|
||||
if ((ret = my_aio_init(NREQS, iolist, aiocblist, aiobufs))) {
|
||||
pr_err("%s: error: my_aio_init returned %d\n",
|
||||
__func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Measure IO only time */
|
||||
//pr_debug("debug: measuring IO only time\n");
|
||||
if ((ret = measure(&t_io_ave, NSAMPLES_IO, NSAMPLES_DROP, NREQS, iolist, fn, aiobufs, 0))) {
|
||||
pr_err("error: measure returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rank == 0) {
|
||||
printf("t_io_ave: %.0f usec, %.0f MB/s per node\n",
|
||||
t_io_ave * MYTIME_TOUSEC,
|
||||
SZBUF * ppn / t_io_ave / 1000000);
|
||||
}
|
||||
|
||||
/* Measure time with no progress, progress and no uti, progress and uti */
|
||||
for (progress = 0; progress <= (disable_syscall_intercept ? 0 : 0); progress += 1) {
|
||||
|
||||
/* Spawn helper thread onto compute CPUs with ignoring uti_attr */
|
||||
if (progress == 1) {
|
||||
setenv("DISABLE_UTI", "1", 1);
|
||||
}
|
||||
/* Spawn helper thread onto dedicated CPUs with respecting uti_attr */
|
||||
else if (progress == 2) {
|
||||
unsetenv("DISABLE_UTI");
|
||||
}
|
||||
|
||||
/* Measure with various calculation time */
|
||||
for (l = 0; l <= 10; l += 2) {
|
||||
long nsec_calc = (t_io_ave * MYTIME_TONSEC * l) / 10;
|
||||
|
||||
if ((ret = measure(&t_total_ave, NSAMPLES_TOTAL, NSAMPLES_DROP, NREQS, iolist, fn, aiobufs, nsec_calc))) {
|
||||
pr_err("error: measure returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rank == 0) {
|
||||
if (l == 0) {
|
||||
pr_debug("progress=%d\n", progress);
|
||||
if (progress == 0) {
|
||||
pr_debug("calc\ttotal\n");
|
||||
} else {
|
||||
pr_debug("total\n");
|
||||
}
|
||||
}
|
||||
|
||||
t_table[l][0] = nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC);
|
||||
if (progress == 0) {
|
||||
pr_debug("%.0f\t%.0f\n", nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC), t_total_ave * MYTIME_TOUSEC);
|
||||
t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC;
|
||||
} else {
|
||||
pr_debug("%.0f\n", t_total_ave * MYTIME_TOUSEC);
|
||||
t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rank == 0) {
|
||||
printf("calc,no prog,prog and no uti, prog and uti\n");
|
||||
for (l = 0; l <= 10; l++) {
|
||||
for (i = 0; i < NCOL; i++) {
|
||||
if (i > 0) {
|
||||
printf(",");
|
||||
}
|
||||
printf("%.0f", t_table[l][i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
for (i = 0; i < NREQS; i++) {
|
||||
free(aiobufs[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
270
test/uti/posix_aio/001.sh
Executable file
270
test/uti/posix_aio/001.sh
Executable file
@@ -0,0 +1,270 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
#!/usr/bin/bash -x
|
||||
|
||||
MYHOME=/home/e29005
|
||||
test_dir=`pwd -P`
|
||||
mck_dir=${MYHOME}/project/os/install
|
||||
uti_dir_lin=${MYHOME}/project/uti/install_linux
|
||||
uti_dir_mck=${MYHOME}/project/uti/install_mckernel
|
||||
|
||||
exe=`basename $0 | sed 's/\.sh//'`
|
||||
|
||||
stop=0
|
||||
reboot=0
|
||||
go=0
|
||||
|
||||
interactive=0
|
||||
pjsub=0
|
||||
gdb=0
|
||||
disable_syscall_intercept=0
|
||||
mck=0
|
||||
nnodes=2
|
||||
LASTNODE=8196
|
||||
use_hfi=0
|
||||
omp_num_threads=1
|
||||
ppn=4
|
||||
|
||||
while getopts srgc:ml:N:P:o:hGI:ipL: OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
s) stop=1
|
||||
;;
|
||||
r) reboot=1
|
||||
;;
|
||||
g) go=1
|
||||
;;
|
||||
m) mck=1
|
||||
;;
|
||||
N) nnodes=$OPTARG
|
||||
;;
|
||||
P) ppn=$OPTARG
|
||||
;;
|
||||
o) omp_num_threads=$OPTARG
|
||||
;;
|
||||
h) use_hfi=1
|
||||
;;
|
||||
G) gdb=1
|
||||
;;
|
||||
I) disable_syscall_intercept=$OPTARG
|
||||
;;
|
||||
i) interactive=1
|
||||
;;
|
||||
p) pjsub=1
|
||||
;;
|
||||
L) LASTNODE=$OPTARG
|
||||
;;
|
||||
*) echo "invalid option -${OPT}" >&2
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
nprocs=$((ppn * nnodes))
|
||||
nodes=`echo $(seq -s ",c" $(($LASTNODE + 1 - $nnodes)) $LASTNODE) | sed 's/^/c/'`
|
||||
|
||||
# vertical cut, excluding phys loaded with Linux tasks
|
||||
uti_cpu_set_lin=1,69,137,205,18-19,86-87,154-155,222-223
|
||||
exclude_list=0-1,68-69,136-137,204-205,18-19,86-87,154-155,222-223
|
||||
#64-67,132-135,200-203,268-271
|
||||
|
||||
uti_cpu_set_mck=1,69,137,205,18-19,86-87,154-155,222-223
|
||||
|
||||
# horizontal cut, excluding phys loaded with Linux tasks for mckernel
|
||||
#uti_cpu_set_lin=204-271
|
||||
#uti_cpu_set_mck=1-67
|
||||
|
||||
if [ $mck -eq 0 ]; then
|
||||
uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_lin"
|
||||
i_mpi_pin_processor_exclude_list="export I_MPI_PIN_PROCESSOR_EXCLUDE_LIST=$exclude_list"
|
||||
else
|
||||
uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_mck"
|
||||
i_mpi_pin_processor_exclude_list=
|
||||
fi
|
||||
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
i_mpi_pin=off
|
||||
i_mpi_pin_domain=
|
||||
i_mpi_pin_order=
|
||||
# if [ $omp_num_threads -eq 1 ]; then
|
||||
# # Avoid binding main thread and uti thread to one CPU
|
||||
kmp_affinity="export KMP_AFFINITY=disabled"
|
||||
# else
|
||||
# # Bind rank to OMP_NUM_THREAD-sized CPU-domain
|
||||
# kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter"
|
||||
# fi
|
||||
else
|
||||
i_mpi_pin=on
|
||||
domain=$omp_num_threads # Use 32 when you want to match mck's -n division
|
||||
i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain"
|
||||
i_mpi_pin_order="export I_MPI_PIN_ORDER=compact"
|
||||
kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter"
|
||||
fi
|
||||
|
||||
echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes omp_num_threads=$omp_num_threads
|
||||
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
makeopt="UTI_DIR=$uti_dir_mck"
|
||||
use_mck="#PJM -x MCK=$mck_dir"
|
||||
mck_mem="#PJM -x MCK_MEM=32G@0,8G@1"
|
||||
mcexec="${mck_dir}/bin/mcexec"
|
||||
nmcexecthr=$((omp_num_threads + 4))
|
||||
mcexecopt="-n $ppn --uti-use-last-cpu" # -t $nmcexecthr
|
||||
|
||||
if [ ${use_hfi} -eq 1 ]; then
|
||||
mcexecopt="--enable-hfi1 $mcexecopt"
|
||||
fi
|
||||
|
||||
if [ $disable_syscall_intercept -eq 0 ]; then
|
||||
mcexecopt="--enable-uti $mcexecopt"
|
||||
fi
|
||||
|
||||
else
|
||||
offline=`PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes lscpu \| grep Off 2>&1 | dshbak -c | grep Off`
|
||||
if [ "$offline" != "" ]; then
|
||||
echo "Error: Some CPUs are offline: $offline"
|
||||
exit
|
||||
fi
|
||||
|
||||
makeopt="UTI_DIR=$uti_dir_lin"
|
||||
use_mck=
|
||||
mck_mem=
|
||||
mcexec=
|
||||
mcexecopt=
|
||||
fi
|
||||
|
||||
if [ $gdb -eq 1 ]; then
|
||||
enable_x="-enable-x"
|
||||
gdbcmd="xterm -display localhost:11 -hold -e gdb -ex run --args"
|
||||
fi
|
||||
|
||||
if [ $interactive -eq 1 ]; then
|
||||
i_mpi_hydra_bootstrap_exec=
|
||||
i_mpi_hydra_bootstrap=
|
||||
hosts=
|
||||
opt_dir=/opt/intel
|
||||
ssh=
|
||||
else
|
||||
# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\'
|
||||
i_mpi_hydra_bootstrap_exec="export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh"
|
||||
i_mpi_hydra_bootstrap="export I_MPI_HYDRA_BOOTSTRAP=ssh"
|
||||
hosts="-hosts $nodes"
|
||||
opt_dir=/home/opt/local/cores/intel
|
||||
ssh="ssh -A c$LASTNODE"
|
||||
fi
|
||||
|
||||
# If using ssh
|
||||
# Latest versions are: 1.163, 2.199, 3.222
|
||||
if [ $pjsub -eq 0 ] && [ $interactive -eq 0 ]; then
|
||||
compilervars=". ${opt_dir}/compilers_and_libraries_2018.2.199/linux/bin/compilervars.sh intel64"
|
||||
else
|
||||
compilervars=
|
||||
fi
|
||||
|
||||
if [ ${stop} -eq 1 ]; then
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
/usr/sbin/pidof mcexec \| xargs -r sudo kill -9
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
/usr/sbin/pidof $exe \| xargs -r sudo kill -9
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
sudo ${mck_dir}/sbin/mcstop+release.sh
|
||||
else
|
||||
:
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${reboot} -eq 1 ]; then
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
if hostname | grep ofp &>/dev/null; then
|
||||
|
||||
# -h: Hide idle thread to prevent KNL CPU from mux-ing resource and halving throughput
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1
|
||||
|
||||
# perl -e 'for ($i=0;$i<68;$i++){if($i>0){print "+";}printf("%d,%d,%d:%d", $i+68,$i+136,$i+204,$i);}'
|
||||
|
||||
# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
# sudo ${mck_dir}/sbin/mcreboot.sh -O -c 68-271 -r 68,136,204:0+69,137,205:1+70,138,206:2+71,139,207:3+72,140,208:4+73,141,209:5+74,142,210:6+75,143,211:7+76,144,212:8+77,145,213:9+78,146,214:10+79,147,215:11+80,148,216:12+81,149,217:13+82,150,218:14+83,151,219:15+84,152,220:16+85,153,221:17+86,154,222:18+87,155,223:19+88,156,224:20+89,157,225:21+90,158,226:22+91,159,227:23+92,160,228:24+93,161,229:25+94,162,230:26+95,163,231:27+96,164,232:28+97,165,233:29+98,166,234:30+99,167,235:31+100,168,236:32+101,169,237:33+102,170,238:34+103,171,239:35+104,172,240:36+105,173,241:37+106,174,242:38+107,175,243:39+108,176,244:40+109,177,245:41+110,178,246:42+111,179,247:43+112,180,248:44+113,181,249:45+114,182,250:46+115,183,251:47+116,184,252:48+117,185,253:49+118,186,254:50+119,187,255:51+120,188,256:52+121,189,257:53+122,190,258:54+123,191,259:55+124,192,260:56+125,193,261:57+126,194,262:58+127,195,263:59+128,196,264:60+129,197,265:61+130,198,266:62+131,199,267:63+132,200,268:64+133,201,269:65+134,202,270:66+135,203,271:67 -m 32G@0,12G@1
|
||||
else
|
||||
echo "unkwon host type"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
:
|
||||
fi
|
||||
fi
|
||||
|
||||
(
|
||||
cat <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
#PJM -L rscgrp=$rg
|
||||
#PJM -L node=$nnodes
|
||||
#PJM --mpi proc=$nprocs
|
||||
#PJM -L elapse=$elapse
|
||||
#PJM -L proc-crproc=16384
|
||||
#PJM -g gg10
|
||||
#PJM -j
|
||||
#PJM -s
|
||||
$use_mck
|
||||
$mck_mem
|
||||
|
||||
$i_mpi_hydra_bootstrap_exec
|
||||
$i_mpi_hydra_bootstrap
|
||||
|
||||
export OMP_NUM_THREADS=$omp_num_threads
|
||||
#export OMP_STACKSIZE=64M
|
||||
export KMP_BLOCKTIME=1
|
||||
export PSM2_RCVTHREAD=0
|
||||
|
||||
$uti_cpu_set_str
|
||||
export I_MPI_PIN=$i_mpi_pin
|
||||
$i_mpi_pin_processor_exclude_list
|
||||
$i_mpi_pin_domain
|
||||
$i_mpi_pin_order
|
||||
$kmp_affinity
|
||||
|
||||
export HFI_NO_CPUAFFINITY=1
|
||||
export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304
|
||||
export I_MPI_FABRICS=shm:tmi
|
||||
export PSM2_RCVTHREAD=0
|
||||
export I_MPI_TMI_PROVIDER=psm2
|
||||
export I_MPI_FALLBACK=0
|
||||
export PSM2_MQ_RNDV_HFI_WINDOW=4194304
|
||||
export PSM2_MQ_EAGER_SDMA_SZ=65536
|
||||
export PSM2_MQ_RNDV_HFI_THRESH=200000
|
||||
|
||||
export MCKERNEL_RLIMIT_STACK=32M,16G
|
||||
export KMP_STACKSIZE=64m
|
||||
#export KMP_HW_SUBSET=64c,1t
|
||||
|
||||
export I_MPI_ASYNC_PROGRESS=off
|
||||
|
||||
#export I_MPI_STATS=native:20,ipm
|
||||
#export I_MPI_STATS=ipm
|
||||
#export I_MPI_DEBUG=4
|
||||
#export I_MPI_HYDRA_DEBUG=on
|
||||
|
||||
ulimit -c unlimited
|
||||
|
||||
$compilervars
|
||||
mpiexec.hydra -n $nprocs -ppn $ppn $hosts $ilpopt $enable_x $gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -I $disable_syscall_intercept -p $ppn
|
||||
#-l
|
||||
|
||||
EOF
|
||||
) > ./job.sh
|
||||
chmod u+x ./job.sh
|
||||
|
||||
if [ ${go} -eq 1 ]; then
|
||||
if [ $pjsub -eq 1 ]; then
|
||||
pjsub ./job.sh
|
||||
else
|
||||
if [ $interactive -eq 0 ]; then
|
||||
. ${opt_dir}/compilers_and_libraries_2018.2.199/linux/bin/compilervars.sh intel64
|
||||
fi
|
||||
#rm ./$exe
|
||||
make $makeopt ./$exe
|
||||
|
||||
$ssh ${test_dir}/job.sh
|
||||
fi
|
||||
fi
|
||||
658
test/uti/posix_aio/002.c
Normal file
658
test/uti/posix_aio/002.c
Normal file
@@ -0,0 +1,658 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <aio.h>
|
||||
#include <signal.h>
|
||||
#include <libgen.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <fcntl.h>
|
||||
#include <mpi.h>
|
||||
#include <linux/limits.h>
|
||||
#include <semaphore.h>
|
||||
#include "util.h"
|
||||
|
||||
#define SZBUF (1ULL << 23)/*23*/
|
||||
|
||||
#define MYTIME_TOUSEC 1000000
|
||||
#define MYTIME_TONSEC 1000000000
|
||||
|
||||
#define NROW 16
|
||||
#define NCOL 4
|
||||
|
||||
#define NSAMPLES_PROFILE 3
|
||||
#define NSAMPLES_DROP 1/*10*/
|
||||
#define NSAMPLES_IO 5/*20*/
|
||||
#define NSAMPLES_TOTAL 5/*20*/
|
||||
#define NSAMPLES_INNER 1
|
||||
|
||||
#define WAIT_TYPE_BUSY_LOOP 0
|
||||
#define WAIT_TYPE_SEM 1
|
||||
#define WAIT_TYPE WAIT_TYPE_SEM
|
||||
|
||||
static sem_t aio_sem;
|
||||
volatile int completion_count;
|
||||
|
||||
static inline double mytime() {
|
||||
return /*rdtsc_light()*/MPI_Wtime();
|
||||
}
|
||||
|
||||
struct aioreq {
|
||||
int rank, aio_num_threads;
|
||||
int status;
|
||||
struct aiocb *aiocbp;
|
||||
};
|
||||
|
||||
static void aio_handler(sigval_t sigval)
|
||||
{
|
||||
struct aioreq *aioreq = sigval.sival_ptr;
|
||||
int ret;
|
||||
|
||||
//pr_debug("%s: debug: rank=%d\n", __func__, aioreq->rank);
|
||||
ret = __sync_add_and_fetch(&completion_count, 1);
|
||||
if (ret == aioreq->aio_num_threads) {
|
||||
if (sem_post(&aio_sem)) {
|
||||
pr_err("%s: error: sem_post: %s\n",
|
||||
__func__, strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
//pr_debug("%s: debug: completion_count: %d\n", __func__, ret);
|
||||
}
|
||||
|
||||
static void aio_sighandler(int sig, siginfo_t *si, void *ucontext)
|
||||
{
|
||||
pr_debug("%s: debug: enter\n", __func__);
|
||||
#if WAIT_TYPE == WAIT_TYPE_SEM
|
||||
struct aioreq *aioreq = si->si_value.sival_ptr;
|
||||
|
||||
if (si->si_code != SI_ASYNCIO) {
|
||||
pr_err("%s: error: unexpected si_code: %d\n",
|
||||
__func__, si->si_code);
|
||||
}
|
||||
|
||||
aioreq->status = aio_error(aioreq->aiocbp);
|
||||
if (aioreq->status != 0) {
|
||||
pr_err("%s: error: unexpected status: %d\n",
|
||||
__func__, aioreq->status);
|
||||
}
|
||||
|
||||
if (__sync_add_and_fetch(&completion_count, 1) == aioreq->aio_num_threads) {
|
||||
if (sem_post(&aio_sem)) {
|
||||
pr_err("%s: error: sem_post: %s\n",
|
||||
__func__, strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
//pr_debug("%s: debug: completion_count: %d\n", __func__, completion_count);
|
||||
#endif /* WAIT_TYPE */
|
||||
}
|
||||
|
||||
int my_aio_init(int nreqs, struct aioreq *iolist, struct aiocb *aiocblist, char **aiobufs) {
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nreqs; i++) {
|
||||
iolist[i].rank = i;
|
||||
iolist[i].aio_num_threads = nreqs;
|
||||
iolist[i].aiocbp = &aiocblist[i];
|
||||
iolist[i].aiocbp->aio_fildes = -1;
|
||||
iolist[i].aiocbp->aio_buf = aiobufs[i];
|
||||
iolist[i].aiocbp->aio_nbytes = SZBUF;
|
||||
iolist[i].aiocbp->aio_reqprio = 0;
|
||||
iolist[i].aiocbp->aio_offset = 0;
|
||||
#if 0
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_signo = SIGUSR1;
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_value.sival_ptr = &iolist[i];
|
||||
#else
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_notify = SIGEV_THREAD;
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_notify_function = aio_handler;
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_notify_attributes = NULL;
|
||||
iolist[i].aiocbp->aio_sigevent.sigev_value.sival_ptr = &iolist[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int my_aio_open(int aio_num_threads, struct aioreq *iolist, char **fn) {
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aio_num_threads; i++) {
|
||||
iolist[i].aiocbp->aio_fildes = open(fn[i], O_RDWR | O_CREAT | O_TRUNC | O_DIRECT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
|
||||
if (iolist[i].aiocbp->aio_fildes == -1) {
|
||||
pr_err("%s: error: open %s: %s\n",
|
||||
__func__, fn[i], strerror(errno));
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int my_aio_check(struct aioreq *iolist, int aio_num_threads, char **fn) {
|
||||
int ret;
|
||||
int i;
|
||||
FILE **fp = { 0 };
|
||||
char *data;
|
||||
|
||||
if (!(fp = malloc(sizeof(FILE *) * aio_num_threads))) {
|
||||
pr_err("error: allocating fp\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Check contents */
|
||||
for (i = 0; i < aio_num_threads; i++) {
|
||||
if (!(data = malloc(SZBUF))) {
|
||||
pr_err("error: allocating data\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(fp[i] = fopen(fn[i], "r+"))) {
|
||||
pr_err("%s: error: fopen %s: %s\n",
|
||||
__func__, fn[i], strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fread(data, sizeof(char), SZBUF, fp[i]) != SZBUF) {
|
||||
pr_err("%s: error: fread\n",
|
||||
__func__);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (memcmp((const void*)iolist[i].aiocbp->aio_buf, data, SZBUF)) {
|
||||
pr_err("%s: Data written to file %s differs from data in memory\n",
|
||||
__func__, fn[i]);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
for (i = 0; i < aio_num_threads; i++) {
|
||||
fclose(fp[i]);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void my_aio_close(int aio_num_threads, struct aioreq *iolist) {
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aio_num_threads; i++) {
|
||||
if (iolist[i].aiocbp->aio_fildes != -1) {
|
||||
close(iolist[i].aiocbp->aio_fildes);
|
||||
iolist[i].aiocbp->aio_fildes = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int my_aio(int aio_num_threads, struct aioreq *iolist, char **fn, long nsec_calc, int no_aio) {
|
||||
int ret;
|
||||
int i, j;
|
||||
|
||||
//pr_debug("%s: debug: enter\n", __func__);
|
||||
|
||||
|
||||
/* Start async IO */
|
||||
for (i = 0; i < NSAMPLES_INNER; i++) {
|
||||
if (no_aio) goto skip1;
|
||||
|
||||
if ((ret = my_aio_open(aio_num_threads, iolist, fn)) == -1) {
|
||||
pr_err("%s: error: my_aio_open: %s\n",
|
||||
__func__, strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
//pr_debug("%s: debug: after my_aio_open\n", __func__);
|
||||
|
||||
|
||||
/* Reset completion */
|
||||
completion_count = 0;
|
||||
__sync_synchronize();
|
||||
|
||||
for (j = 0; j < aio_num_threads; j++) {
|
||||
iolist[j].status = EINPROGRESS;
|
||||
|
||||
if ((ret = aio_write(iolist[j].aiocbp)) == -1) {
|
||||
pr_err("%s: error: aio_write: %s\n",
|
||||
__func__, strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
//pr_debug("%s: debug: after %d-th aio_write\n", __func__, j);
|
||||
}
|
||||
skip1:
|
||||
/* Emulate calcuation phase */
|
||||
ndelay(nsec_calc);
|
||||
if (no_aio) goto skip2;
|
||||
|
||||
#if 0
|
||||
int k;
|
||||
for (k = 0; k < 20; k++) {
|
||||
char cmd[256];
|
||||
sprintf(cmd, "ls /proc/%d/task | wc -l", getpid());
|
||||
system(cmd);
|
||||
usleep(200000);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Wait for completion of async IO */
|
||||
#if WAIT_TYPE == WAIT_TYPE_SEM
|
||||
|
||||
retry:
|
||||
ret = sem_wait(&aio_sem);
|
||||
if (ret == -1) {
|
||||
if (errno == EINTR) {
|
||||
pr_warn("%s: warning: sem_wait interrupted\n",
|
||||
__func__);
|
||||
goto retry;
|
||||
} else {
|
||||
pr_err("%s: error: sem_wait: %s\n",
|
||||
__func__, strerror(errno));
|
||||
}
|
||||
}
|
||||
//pr_debug("%s: debug: completion_count: %d\n", __func__, completion_count);
|
||||
|
||||
#elif WAIT_TYPE == WAIT_TYPE_BUSY_LOOP
|
||||
|
||||
while (completion_count != aio_num_threads) {
|
||||
for (j = 0; j < aio_num_threads; j++) {
|
||||
if (iolist[j].status != EINPROGRESS) {
|
||||
continue;
|
||||
}
|
||||
|
||||
iolist[j].status = aio_error(iolist[j].aiocbp);
|
||||
|
||||
switch (iolist[j].status) {
|
||||
case 0: /* Completed */
|
||||
goto completed;
|
||||
case EINPROGRESS:
|
||||
break;
|
||||
case ECANCELED:
|
||||
pr_err("%s: error: aio is cancelled\n",
|
||||
__func__);
|
||||
goto completed;
|
||||
default:
|
||||
pr_err("%s: error: aio_error: %s\n",
|
||||
__func__, strerror(iolist[j].status));
|
||||
goto completed;
|
||||
completed:
|
||||
completion_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* WAIT_TYPE */
|
||||
/* Check amount read */
|
||||
for (j = 0; j < aio_num_threads; j++) {
|
||||
ssize_t size;
|
||||
|
||||
if ((size = aio_return(iolist[j].aiocbp)) != SZBUF) {
|
||||
pr_err("%s: Expected to read %ld B but #%d has read %ld B\n",
|
||||
__func__, SZBUF, j, size);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
my_aio_close(aio_num_threads, iolist);
|
||||
skip2:;
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
my_aio_close(aio_num_threads, iolist);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int measure(double *result, int nsamples, int nsamples_drop, int aio_num_threads, struct aioreq *iolist, char **fn, long nsec_calc, int rank, int profile, int no_aio) {
|
||||
int ret;
|
||||
int i;
|
||||
double t_l, t_g, t_sum = 0;
|
||||
double start, end;
|
||||
|
||||
for (i = 0; i < nsamples + nsamples_drop; i++) {
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
/* Set parameter based on current IPC and frequency */
|
||||
ndelay_init(0);
|
||||
|
||||
start = mytime();
|
||||
|
||||
struct rusage ru_start, ru_end;
|
||||
struct timeval tv_start, tv_end;
|
||||
|
||||
if (profile) {
|
||||
if ((ret = getrusage(RUSAGE_SELF, &ru_start))) {
|
||||
pr_err("%s: error: getrusage failed (%d)\n", __func__, ret);
|
||||
}
|
||||
|
||||
if ((ret = gettimeofday(&tv_start, NULL))) {
|
||||
pr_err("%s: error: gettimeofday failed (%d)\n", __func__, ret);
|
||||
}
|
||||
}
|
||||
|
||||
if ((ret = my_aio(aio_num_threads, iolist, fn, nsec_calc, no_aio))) {
|
||||
pr_err("%s: error: my_aio returned %d\n",
|
||||
__func__, ret);
|
||||
}
|
||||
|
||||
if (profile) {
|
||||
if ((ret = getrusage(RUSAGE_SELF, &ru_end))) {
|
||||
pr_err("%s: error: getrusage failed (%d)\n", __func__, ret);
|
||||
}
|
||||
|
||||
if ((ret = gettimeofday(&tv_end, NULL))) {
|
||||
pr_err("%s: error: gettimeofday failed (%d)\n", __func__, ret);
|
||||
}
|
||||
|
||||
if (rank == 0) pr_debug("%s: wall: %ld, user: %ld, sys: %ld\n", __func__,
|
||||
DIFFUSEC(tv_end, tv_start),
|
||||
DIFFUSEC(ru_end.ru_utime, ru_start.ru_utime),
|
||||
DIFFUSEC(ru_end.ru_stime, ru_start.ru_stime));
|
||||
}
|
||||
|
||||
end = mytime();
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
/* Check contents */
|
||||
if ((ret = my_aio_check(iolist, aio_num_threads, fn))) {
|
||||
pr_err("%s: error: my_aio_check returned %d\n",
|
||||
__func__, ret);
|
||||
}
|
||||
|
||||
if (i < nsamples_drop) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Take max */
|
||||
t_l = end - start;
|
||||
MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
||||
t_sum += t_g;
|
||||
}
|
||||
|
||||
*result = t_sum / nsamples;
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret;
|
||||
int i, j, progress, l;
|
||||
int rank, nproc;
|
||||
int ppn = -1;
|
||||
int aio_num_threads = -1;
|
||||
int disable_syscall_intercept = 0;
|
||||
struct aioreq *iolist;
|
||||
struct aiocb *aiocblist;
|
||||
struct sigaction sa;
|
||||
double t_io_ave, t_total_ave;
|
||||
double t_table[NROW][NCOL] = { 0 };
|
||||
int opt;
|
||||
char **aiobufs;
|
||||
char **fn;
|
||||
char src_dir[PATH_MAX];
|
||||
char *argv0;
|
||||
|
||||
opterr = 0; /* Don't print out error when not recognizing option character */
|
||||
|
||||
while ((opt = getopt(argc, argv, ":I:p:t:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'I':
|
||||
disable_syscall_intercept = atoi(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
ppn = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
aio_num_threads = atoi(optarg);
|
||||
break;
|
||||
case '?':
|
||||
pr_err("error: invalid option: -%c\n",
|
||||
optopt);
|
||||
ret = 1;
|
||||
goto out;
|
||||
case ':':
|
||||
pr_err("error: option -%c requires an argument\n",
|
||||
optopt);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (ppn == -1) {
|
||||
pr_err("error: specify ppn with -p <ppn>\n");
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (aio_num_threads == -1) {
|
||||
pr_err("error: specify aio_num_threads with -p <aio_num_threads>\n");
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Initialize MPI */
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
|
||||
#if 0
|
||||
int k;
|
||||
for (k = 0; k < 20; k++) {
|
||||
char cmd[256];
|
||||
sprintf(cmd, "ls /proc/%d/task | wc -l", getpid());
|
||||
system(cmd);
|
||||
usleep(200000);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Show parameters */
|
||||
if (rank == 0) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
if (omp_get_thread_num() == 0) {
|
||||
printf("nproc=%d,#threads=%d\n", nproc, omp_get_num_threads());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Set verbosity */
|
||||
//test_set_loglevel(TEST_LOGLEVEL_WARN);
|
||||
|
||||
/* Set parameter based on current IPC and frequency */
|
||||
ndelay_init(1);
|
||||
|
||||
/* Initialize files */
|
||||
if (!(fn = malloc(sizeof(char *) * aio_num_threads))) {
|
||||
pr_err("error: allocating fn\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
argv0 = strdup(argv[0]);
|
||||
sprintf(src_dir, "%s", dirname(argv0));
|
||||
for (i = 0; i < aio_num_threads; i++) {
|
||||
if (!(fn[i] = malloc(SZBUF))) {
|
||||
pr_err("error: allocating data\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sprintf(fn[i], "%s/rank%d-number%d", src_dir, rank, i);
|
||||
if (rank < 2 && i < 2) {
|
||||
pr_debug("debug: rank: %d, fn[%d]: %s\n",
|
||||
rank, i, fn[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate aio arrays */
|
||||
if (!(iolist = calloc(aio_num_threads, sizeof(struct aioreq)))) {
|
||||
pr_err("%s: error: allocating iolist\n",
|
||||
__func__);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(aiocblist = calloc(aio_num_threads, sizeof(struct aiocb)))) {
|
||||
pr_err("%s: error: allocating aiocblist\n",
|
||||
__func__);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Prepare data to be written */
|
||||
if (!(aiobufs = malloc(sizeof(char *) * aio_num_threads))) {
|
||||
pr_err("error: allocating aiobufs\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < aio_num_threads; i++) {
|
||||
aiobufs[i] = malloc(SZBUF);
|
||||
if (!aiobufs[i]) {
|
||||
pr_err("%s: error: allocating aiobufs\n",
|
||||
__func__);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (j = 0; j < SZBUF; j++) {
|
||||
*(aiobufs[i] + j) = i + j + rank;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize aio parameters except fd and status */
|
||||
if ((ret = my_aio_init(aio_num_threads, iolist, aiocblist, aiobufs))) {
|
||||
pr_err("%s: error: my_aio_init returned %d\n",
|
||||
__func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Set signal handlers */
|
||||
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
||||
sa.sa_sigaction = aio_sighandler;
|
||||
if (sigaction(SIGUSR1, &sa, NULL) == -1) {
|
||||
pr_err("%s: error: sigaction: %s\n",
|
||||
__func__, strerror(errno));
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Initialize semaphore */
|
||||
if ((ret = sem_init(&aio_sem, 0, 0))) {
|
||||
pr_err("%s: error: sem_init: %s\n", __func__, strerror(errno));
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Take profile */
|
||||
if ((ret = measure(&t_io_ave, NSAMPLES_PROFILE, 0, aio_num_threads, iolist, fn, 0, rank, 1, 0))) {
|
||||
pr_err("error: measure returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Measure IO only time */
|
||||
if ((ret = measure(&t_io_ave, NSAMPLES_IO, NSAMPLES_DROP, aio_num_threads, iolist, fn, 0, rank, 0, 0))) {
|
||||
pr_err("error: measure returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rank == 0) {
|
||||
printf("t_io_ave: %.0f usec, %.0f MB/s per node\n",
|
||||
t_io_ave * MYTIME_TOUSEC,
|
||||
SZBUF * ppn * aio_num_threads / t_io_ave / 1000000);
|
||||
}
|
||||
|
||||
/* Measure time with no progress, progress and no uti, progress and uti */
|
||||
for (progress = 0; progress <= (disable_syscall_intercept ? 0 : -1); progress += 1) {
|
||||
|
||||
if (progress == 1) {
|
||||
/* Ignore uti_attr, spawn a thread onto compute CPUs */
|
||||
setenv("DISABLE_UTI", "1", 1);
|
||||
} else if (progress == 2) {
|
||||
unsetenv("DISABLE_UTI");
|
||||
}
|
||||
|
||||
/* Increasing calculation time up to 100% of IO time */
|
||||
for (l = 0; l <= NROW - 1; l += 1) {
|
||||
long nsec_calc = (t_io_ave * MYTIME_TONSEC * l) / 10;
|
||||
|
||||
if ((ret = measure(&t_total_ave, NSAMPLES_TOTAL, NSAMPLES_DROP, aio_num_threads, iolist, fn, nsec_calc, rank, 0, 0))) {
|
||||
pr_err("error: measure returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rank == 0) {
|
||||
if (l == 0) {
|
||||
pr_debug("progress=%d\n", progress);
|
||||
if (progress == 0) {
|
||||
pr_debug("calc\ttotal\n");
|
||||
} else {
|
||||
pr_debug("total\n");
|
||||
}
|
||||
}
|
||||
|
||||
t_table[l][0] = nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC);
|
||||
if (progress == 0) {
|
||||
pr_debug("%.0f\t%.0f\n", nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC), t_total_ave * MYTIME_TOUSEC);
|
||||
t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC;
|
||||
} else {
|
||||
pr_debug("%.0f\n", t_total_ave * MYTIME_TOUSEC);
|
||||
t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rank == 0) {
|
||||
printf("calc,no prog,prog and no uti, prog and uti\n");
|
||||
for (l = 0; l <= NROW - 1; l++) {
|
||||
for (i = 0; i < NCOL; i++) {
|
||||
if (i > 0) {
|
||||
printf(",");
|
||||
}
|
||||
printf("%.0f", t_table[l][i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
//pr_debug("after barrier\n");
|
||||
|
||||
MPI_Finalize();
|
||||
//pr_debug("after finalize\n");
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if ((ret = sem_destroy(&aio_sem))) {
|
||||
pr_err("%s: error: sem_destroy: %s\n", __func__, strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
free(argv0);
|
||||
return ret;
|
||||
}
|
||||
308
test/uti/posix_aio/002.sh
Executable file
308
test/uti/posix_aio/002.sh
Executable file
@@ -0,0 +1,308 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
test_dir=`pwd -P`
|
||||
mck_dir=${HOME}/project/os/install
|
||||
uti_dir_lin=${HOME}/project/uti/install_linux
|
||||
uti_dir_mck=${HOME}/project/uti/install_mckernel
|
||||
|
||||
exe=`basename $0 | sed 's/\.sh//'`
|
||||
|
||||
stop=0
|
||||
reboot=0
|
||||
go=0
|
||||
|
||||
interactive=0
|
||||
pjsub=0
|
||||
gdb=0
|
||||
disable_syscall_intercept=0
|
||||
mck=0
|
||||
nnodes=2
|
||||
host_type=wallaby
|
||||
LASTNODE=15
|
||||
use_hfi=0
|
||||
omp_num_threads=4
|
||||
ppn=4
|
||||
aio_num_threads=1
|
||||
|
||||
while getopts srgc:ml:N:P:o:hGI:ipL: OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
s) stop=1
|
||||
;;
|
||||
r) reboot=1
|
||||
;;
|
||||
g) go=1
|
||||
;;
|
||||
m) mck=1
|
||||
;;
|
||||
N) nnodes=$OPTARG
|
||||
;;
|
||||
P) ppn=$OPTARG
|
||||
;;
|
||||
o) omp_num_threads=$OPTARG
|
||||
;;
|
||||
h) use_hfi=1
|
||||
;;
|
||||
G) gdb=1
|
||||
;;
|
||||
I) disable_syscall_intercept=$OPTARG
|
||||
;;
|
||||
i) interactive=1
|
||||
;;
|
||||
p) pjsub=1
|
||||
;;
|
||||
L) LASTNODE=$OPTARG
|
||||
;;
|
||||
*) echo "invalid option -${OPT}" >&2
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
case $host_type in
|
||||
wallaby) hnprefix=wallaby
|
||||
;;
|
||||
ofp) hnprefix=c
|
||||
;;
|
||||
*) echo "invalid host_type $host_type"
|
||||
exit 1
|
||||
esac
|
||||
|
||||
nprocs=$((ppn * nnodes))
|
||||
nodes="$hnprefix`echo $(seq -s ",$hnprefix" $(($LASTNODE + 1 - $nnodes)) $LASTNODE)`"
|
||||
|
||||
case $host_type in
|
||||
wallaby)
|
||||
uti_cpu_set_lin=0,16,8,24
|
||||
exclude_list=0,16,8,24
|
||||
uti_cpu_set_mck=0,16,8,24
|
||||
;;
|
||||
ofp)
|
||||
# vertical cut, excluding phys loaded with Linux tasks
|
||||
uti_cpu_set_lin=1,69,137,205,18-19,86-87,154-155,222-223
|
||||
exclude_list=0-1,68-69,136-137,204-205,18-19,86-87,154-155,222-223
|
||||
#64-67,132-135,200-203,268-271
|
||||
|
||||
uti_cpu_set_mck=1,69,137,205,18-19,86-87,154-155,222-223
|
||||
|
||||
# horizontal cut, excluding phys loaded with Linux tasks for mckernel
|
||||
#uti_cpu_set_lin=204-271
|
||||
#uti_cpu_set_mck=1-67
|
||||
;;
|
||||
*) echo "invalid host_type $host_type"
|
||||
exit 1
|
||||
esac
|
||||
|
||||
if [ $mck -eq 0 ]; then
|
||||
uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_lin"
|
||||
i_mpi_pin_processor_exclude_list="export I_MPI_PIN_PROCESSOR_EXCLUDE_LIST=$exclude_list"
|
||||
else
|
||||
uti_cpu_set_str="export UTI_CPU_SET=$uti_cpu_set_mck"
|
||||
i_mpi_pin_processor_exclude_list=
|
||||
fi
|
||||
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
i_mpi_pin=off
|
||||
i_mpi_pin_domain=
|
||||
i_mpi_pin_order=
|
||||
# if [ $omp_num_threads -eq 1 ]; then
|
||||
# # Avoid binding main thread and uti thread to one CPU
|
||||
kmp_affinity="export KMP_AFFINITY=disabled"
|
||||
# else
|
||||
# # Bind rank to OMP_NUM_THREAD-sized CPU-domain
|
||||
# kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter"
|
||||
# fi
|
||||
else
|
||||
i_mpi_pin=on
|
||||
domain=$omp_num_threads # Use 32 when you want to match mck's -n division
|
||||
i_mpi_pin_domain="export I_MPI_PIN_DOMAIN=$domain"
|
||||
i_mpi_pin_order="export I_MPI_PIN_ORDER=compact"
|
||||
kmp_affinity="export KMP_AFFINITY=granularity=thread,scatter"
|
||||
fi
|
||||
|
||||
echo nprocs=$nprocs nnodes=$nnodes ppn=$ppn nodes=$nodes domain=$domain
|
||||
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
makeopt="UTI_DIR=$uti_dir_mck"
|
||||
use_mck="#PJM -x MCK=$mck_dir"
|
||||
mck_mem="#PJM -x MCK_MEM=32G@0,8G@1"
|
||||
mcexec="${mck_dir}/bin/mcexec"
|
||||
nmcexecthr=$((omp_num_threads + 1 + aio_num_threads * 2 + 2))
|
||||
mcexecopt="-n $ppn -t $nmcexecthr" # --uti-use-last-cpu
|
||||
|
||||
if [ ${use_hfi} -eq 1 ]; then
|
||||
mcexecopt="--enable-hfi1 $mcexecopt"
|
||||
fi
|
||||
|
||||
if [ $disable_syscall_intercept -eq 0 ]; then
|
||||
mcexecopt="--enable-uti $mcexecopt"
|
||||
fi
|
||||
|
||||
else
|
||||
offline=`PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes lscpu 2>&1 | dshbak -c | grep Off-line`
|
||||
if [ "$offline" != "" ]; then
|
||||
echo "Error: Some CPUs are offline: $offline"
|
||||
exit
|
||||
fi
|
||||
|
||||
makeopt="UTI_DIR=$uti_dir_lin"
|
||||
use_mck=
|
||||
mck_mem=
|
||||
mcexec=
|
||||
mcexecopt=
|
||||
fi
|
||||
|
||||
if [ $gdb -eq 1 ]; then
|
||||
enable_x="-enable-x"
|
||||
gdbcmd="xterm -display localhost:11 -hold -e gdb -ex run --args"
|
||||
fi
|
||||
|
||||
if [ $interactive -eq 1 ]; then
|
||||
i_mpi_hydra_bootstrap_exec=
|
||||
i_mpi_hydra_bootstrap=
|
||||
hosts=
|
||||
ssh=
|
||||
else
|
||||
# PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes bash -c \'if \[ \"\`cat /etc/mtab \| while read line\; do cut -d\" \" -f 2\; done \| grep /work\`\" == \"\" \]\; then sudo mount /work\; fi\'
|
||||
i_mpi_hydra_bootstrap_exec="export I_MPI_HYDRA_BOOTSTRAP_EXEC=/usr/bin/ssh"
|
||||
i_mpi_hydra_bootstrap="export I_MPI_HYDRA_BOOTSTRAP=ssh"
|
||||
hosts="-hosts $nodes"
|
||||
ssh="ssh -A $(echo $nodes | cut -d',' -f1)"
|
||||
fi
|
||||
|
||||
case $host_type in
|
||||
wallaby)
|
||||
i_mpi_fabrics="export I_MPI_FABRICS=shm:dapl"
|
||||
i_mpi_tmi_provider=
|
||||
|
||||
opt_dir=/opt/intel
|
||||
impiver=2018.3.222 # 1.163, 2.199, 3.222
|
||||
;;
|
||||
ofp)
|
||||
i_mpi_fabrics="export I_MPI_FABRICS=shm:tmi"
|
||||
i_mpi_tmi_provider="export I_MPI_TMI_PROVIDER=psm2"
|
||||
|
||||
if [ $interactive -eq 1 ]; then
|
||||
opt_dir=/opt/intel
|
||||
else
|
||||
opt_dir=/home/opt/local/cores/intel
|
||||
fi
|
||||
impiver=2018.1.163 # 1.163, 2.199, 3.222
|
||||
;;
|
||||
*) echo "invalid host_type $host_type"
|
||||
exit 1
|
||||
esac
|
||||
|
||||
# If using ssh
|
||||
if [ $pjsub -eq 0 ] && [ $interactive -eq 0 ]; then
|
||||
compilervars=". ${opt_dir}/compilers_and_libraries_${impiver}/linux/bin/compilervars.sh intel64"
|
||||
else
|
||||
compilervars=
|
||||
fi
|
||||
|
||||
if [ ${stop} -eq 1 ]; then
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
/usr/sbin/pidof mcexec \| xargs -r sudo kill -9
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
/usr/sbin/pidof $exe \| xargs -r sudo kill -9
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
sudo ${mck_dir}/sbin/mcstop+release.sh
|
||||
else
|
||||
:
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${reboot} -eq 1 ]; then
|
||||
if [ ${mck} -eq 1 ]; then
|
||||
case $host_type in
|
||||
wallaby) hnprefix=wallaby
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 1-7,17-23,9-15,25-31 -r 1-7:0+17-23:16+9-15:8+25-31:24 -m 10G@0,10G@1
|
||||
#PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 1-4 -r 1-4:0 -m 10G@0,10G@1
|
||||
;;
|
||||
ofp)
|
||||
# -h: Prevent unnessary CPU resource division for KNL
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
sudo ${mck_dir}/sbin/mcreboot.sh -h -O -c 2-17,70-85,138-153,206-221,20-35,88-103,156-171,224-239,36-51,104-119,172-187,240-255,52-67,120-135,188-203,256-271 -r 2-5,70-73,138-141,206-209:0+6-9,74-77,142-145,210-213:1+10-13,78-81,146-149,214-217:68+14-17,82-85,150-153,218-221:69+20-23,88-91,156-159,224-227:136+24-27,92-95,160-163,228-231:137+28-31,96-99,164-167,232-235:204+32-35,100-103,168-171,236-239:205+36-39,104-107,172-175,240-243:18+40-43,108-111,176-179,244-247:19+44-47,112-115,180-183,248-251:86+48-51,116-119,184-187,252-255:87+52-55,120-123,188-191,256-259:154+56-59,124-127,192-195,260-263:155+60-63,128-131,196-199,264-267:222+64-67,132-135,200-203,268-271:223 -m 32G@0,12G@1
|
||||
;;
|
||||
*) echo "invalid host_type $host_type"
|
||||
exit 1
|
||||
esac
|
||||
else
|
||||
:
|
||||
fi
|
||||
fi
|
||||
|
||||
(
|
||||
cat <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
#PJM -L rscgrp=$rg
|
||||
#PJM -L node=$nnodes
|
||||
#PJM --mpi proc=$nprocs
|
||||
#PJM -L elapse=$elapse
|
||||
#PJM -L proc-crproc=16384
|
||||
#PJM -g gg10
|
||||
#PJM -j
|
||||
#PJM -s
|
||||
$use_mck
|
||||
$mck_mem
|
||||
|
||||
$i_mpi_hydra_bootstrap_exec
|
||||
$i_mpi_hydra_bootstrap
|
||||
|
||||
export OMP_NUM_THREADS=$omp_num_threads
|
||||
#export OMP_STACKSIZE=64M
|
||||
export KMP_BLOCKTIME=1
|
||||
|
||||
$uti_cpu_set_str
|
||||
export I_MPI_PIN=$i_mpi_pin
|
||||
$i_mpi_pin_processor_exclude_list
|
||||
$i_mpi_pin_domain
|
||||
$i_mpi_pin_order
|
||||
$kmp_affinity
|
||||
|
||||
export HFI_NO_CPUAFFINITY=1
|
||||
export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304
|
||||
$i_mpi_fabrics
|
||||
$i_mpi_tmi_provider
|
||||
export I_MPI_FALLBACK=0
|
||||
export PSM2_RCVTHREAD=0
|
||||
export PSM2_MQ_RNDV_HFI_WINDOW=4194304
|
||||
export PSM2_MQ_EAGER_SDMA_SZ=65536
|
||||
export PSM2_MQ_RNDV_HFI_THRESH=200000
|
||||
|
||||
export MCKERNEL_RLIMIT_STACK=32M,16G
|
||||
export KMP_STACKSIZE=64m
|
||||
#export KMP_HW_SUBSET=64c,1t
|
||||
|
||||
export I_MPI_ASYNC_PROGRESS=off
|
||||
|
||||
#export I_MPI_STATS=native:20,ipm
|
||||
#export I_MPI_STATS=ipm
|
||||
#export I_MPI_DEBUG=4
|
||||
#export I_MPI_HYDRA_DEBUG=on
|
||||
|
||||
ulimit -c unlimited
|
||||
|
||||
$compilervars
|
||||
mpiexec.hydra -n $nprocs -ppn $ppn $hosts $ilpopt $enable_x $gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -I $disable_syscall_intercept -p $ppn -t $aio_num_threads
|
||||
#$gdbcmd $mcexec $mcexecopt ${test_dir}/$exe -I $disable_syscall_intercept -p $ppn -t $aio_num_threads
|
||||
#-l
|
||||
|
||||
EOF
|
||||
) > ./job.sh
|
||||
chmod u+x ./job.sh
|
||||
|
||||
if [ ${go} -eq 1 ]; then
|
||||
if [ $pjsub -eq 1 ]; then
|
||||
pjsub ./job.sh
|
||||
else
|
||||
if [ $interactive -eq 0 ]; then
|
||||
eval $compilervars
|
||||
fi
|
||||
make $makeopt ./$exe
|
||||
PDSH_SSH_ARGS_APPEND="-tt -q" pdsh -t 2 -w $nodes \
|
||||
/usr/sbin/pidof $exe \| xargs -r sudo kill -9
|
||||
$ssh ${test_dir}/job.sh
|
||||
fi
|
||||
fi
|
||||
51
test/uti/posix_aio/Makefile
Executable file
51
test/uti/posix_aio/Makefile
Executable file
@@ -0,0 +1,51 @@
|
||||
.SUFFIXES: # Clear suffixes
|
||||
.ONESHELL: # Pack all the lines and pass it to shell
|
||||
|
||||
VMTOUCH=$(HOME)/project/src/vmtouch/install/bin/vmtouch
|
||||
|
||||
# Specify it via *.sh
|
||||
UTI_DIR=${HOME}/project/uti/install_linux
|
||||
|
||||
CC=mpiicc
|
||||
LD=$(CC)
|
||||
|
||||
CFLAGS = -g -O0 -Wall -DVMTOUCH=$(VMTOUCH)
|
||||
LDFLAGS = -lpthread -L$(UTI_DIR)/lib -Wl,-rpath -Wl,$(UTI_DIR)/lib -luti -lrt
|
||||
SRCS = $(shell ls 0*.c)
|
||||
OBJS = $(SRCS:.c=.o) util.o
|
||||
EXES = $(SRCS:.c=)
|
||||
|
||||
define create_files =
|
||||
for i in {1..2}; do
|
||||
dd if=/dev/zero of=./data/$i bs=1M count=1
|
||||
done
|
||||
endef
|
||||
|
||||
all: $(EXES)
|
||||
|
||||
file::
|
||||
$(value create_files)
|
||||
|
||||
util.o:: util.c util.h
|
||||
$(CC) $(CFLAGS) -qopenmp -c $<
|
||||
|
||||
001: 001.o util.o
|
||||
$(LD) -o $@ $^ $(LDFLAGS) -qopenmp
|
||||
|
||||
001.o:: 001.c
|
||||
$(CC) $(CFLAGS) -qopenmp -c $<
|
||||
|
||||
002: 002.o util.o
|
||||
$(LD) -o $@ $^ $(LDFLAGS) -qopenmp
|
||||
|
||||
002.o:: 002.c
|
||||
$(CC) $(CFLAGS) -qopenmp -c $<
|
||||
|
||||
%: %.o
|
||||
$(LD) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
%.o::%.c
|
||||
$(CC) $(CFLAGS) -c $<
|
||||
|
||||
clean:
|
||||
rm -f core $(EXES) $(OBJS) $(DSRCS)
|
||||
15
test/uti/posix_aio/README
Normal file
15
test/uti/posix_aio/README
Normal file
@@ -0,0 +1,15 @@
|
||||
=============================================
|
||||
Benchmarks of asynchronous I/O with busy CPUs
|
||||
=============================================
|
||||
|
||||
The purpose is to show the benefit of spawning the asynchronous threads onto dedicated CPUs.
|
||||
|
||||
---
|
||||
001
|
||||
---
|
||||
Write
|
||||
|
||||
---
|
||||
002
|
||||
---
|
||||
Write, IO completion is notified by spawning thread
|
||||
133
test/uti/posix_aio/util.c
Normal file
133
test/uti/posix_aio/util.c
Normal file
@@ -0,0 +1,133 @@
|
||||
#define _GNU_SOURCE /* See feature_test_macros(7) */
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <mpi.h>
|
||||
#include "util.h"
|
||||
|
||||
/* Messaging */
|
||||
enum test_loglevel test_loglevel = TEST_LOGLEVEL_DEBUG;
|
||||
|
||||
/* Calculation */
|
||||
static inline void asmloop(unsigned long n) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
asm volatile(
|
||||
"movq $0, %%rcx\n\t"
|
||||
"1:\t"
|
||||
"addq $1, %%rcx\n\t"
|
||||
"cmpq $99, %%rcx\n\t"
|
||||
"jle 1b\n\t"
|
||||
:
|
||||
:
|
||||
: "rcx", "cc");
|
||||
}
|
||||
}
|
||||
|
||||
#define N_INIT 10000000
|
||||
double nspw; /* nsec per work */
|
||||
|
||||
void ndelay_init(int verbose) {
|
||||
struct timeval start, end;
|
||||
int rank, nproc;
|
||||
double min, sum, max;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
|
||||
//clock_gettime(TIMER_KIND, &start);
|
||||
gettimeofday(&start, NULL);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
asmloop(N_INIT);
|
||||
}
|
||||
|
||||
//clock_gettime(TIMER_KIND, &end);
|
||||
gettimeofday(&end, NULL);
|
||||
|
||||
nspw = DIFFUSEC(end, start) * 1000 / (double)N_INIT;
|
||||
|
||||
if (verbose) {
|
||||
MPI_Reduce(&nspw, &min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
|
||||
MPI_Reduce(&nspw, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||
MPI_Reduce(&nspw, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
|
||||
if (rank == 0) {
|
||||
pr_debug("nspw: min=%.0f, ave=%.0f, max=%.0f\n", min, sum / nproc, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
void ndelay(long delay_nsec) {
|
||||
if (delay_nsec < 0) {
|
||||
printf("delay_nsec < 0\n");
|
||||
return;
|
||||
}
|
||||
#pragma omp parallel
|
||||
{
|
||||
asmloop(delay_nsec / nspw);
|
||||
}
|
||||
}
|
||||
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
|
||||
void ndelay(long delay_nsec) {
|
||||
struct timespec start, end;
|
||||
|
||||
if (delay_nsec < 0) { return; }
|
||||
clock_gettime(TIMER_KIND, &start);
|
||||
|
||||
while (1) {
|
||||
clock_gettime(TIMER_KIND, &end);
|
||||
if (DIFFNSEC(end, start) >= delay_nsec) {
|
||||
break;
|
||||
}
|
||||
asmloop(2); /* ~150 ns per iteration on FOP */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
double cycpw; /* cyc per work */
|
||||
|
||||
void cdlay_init() {
|
||||
unsigned long start, end;
|
||||
|
||||
start = rdtsc_light();
|
||||
#define N_INIT 10000000
|
||||
asmloop(N_INIT);
|
||||
end = rdtsc_light();
|
||||
cycpw = (end - start) / (double)N_INIT;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void cdelay(long delay_cyc) {
|
||||
if (delay_cyc < 0) {
|
||||
return;
|
||||
}
|
||||
asmloop(delay_cyc / cycpw);
|
||||
}
|
||||
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
|
||||
void cdelay(long delay_cyc) {
|
||||
unsigned long start, end;
|
||||
|
||||
if (delay_cyc < 0) { return; }
|
||||
start = rdtsc_light();
|
||||
|
||||
while (1) {
|
||||
end = rdtsc_light();
|
||||
if (end - start >= delay_cyc) {
|
||||
break;
|
||||
}
|
||||
asmloop(2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
70
test/uti/posix_aio/util.h
Normal file
70
test/uti/posix_aio/util.h
Normal file
@@ -0,0 +1,70 @@
|
||||
#ifndef __UTIL_H_INCLUDED__
|
||||
#define __UTIL_H_INCLUDED__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Messaging */
|
||||
|
||||
enum test_loglevel {
|
||||
TEST_LOGLEVEL_ERR = 0,
|
||||
TEST_LOGLEVEL_WARN,
|
||||
TEST_LOGLEVEL_DEBUG
|
||||
};
|
||||
|
||||
extern enum test_loglevel test_loglevel;
|
||||
static inline void test_set_loglevel(enum test_loglevel level)
|
||||
{
|
||||
test_loglevel = level;
|
||||
}
|
||||
|
||||
#define pr_level(level, fmt, args...) do { \
|
||||
if (test_loglevel >= level) { \
|
||||
fprintf(stdout, fmt, ##args); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define pr_err(fmt, args...) pr_level(TEST_LOGLEVEL_ERR, fmt, ##args)
|
||||
#define pr_warn(fmt, args...) pr_level(TEST_LOGLEVEL_WARN, fmt, ##args)
|
||||
#define pr_debug(fmt, args...) pr_level(TEST_LOGLEVEL_DEBUG, fmt, ##args)
|
||||
|
||||
#define _OKNG(verb, jump, cond, fmt, args...) do { \
|
||||
if (cond) { \
|
||||
if (verb) \
|
||||
printf("[ OK ] " fmt, ##args); \
|
||||
} else { \
|
||||
printf("[ NG ] " fmt, ##args); \
|
||||
if (jump) { \
|
||||
ret = -1; \
|
||||
goto out; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OKNG(args...) _OKNG(1, 1, ##args)
|
||||
#define NG(args...) _OKNG(0, 1, ##args)
|
||||
#define OKNGNOJUMP(args...) _OKNG(1, 0, ##args)
|
||||
|
||||
/* Time */
|
||||
inline uint64_t rdtsc_light(void)
|
||||
{
|
||||
uint64_t x;
|
||||
__asm__ __volatile__("rdtscp;" /* rdtscp don't jump over earlier instructions */
|
||||
"shl $32, %%rdx;"
|
||||
"or %%rdx, %%rax" :
|
||||
"=a"(x) :
|
||||
:
|
||||
"%rcx", "%rdx", "memory");
|
||||
return x;
|
||||
}
|
||||
|
||||
#define DIFFUSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000UL + (end.tv_usec - start.tv_usec))
|
||||
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
|
||||
#define TIMER_KIND CLOCK_MONOTONIC_RAW /* CLOCK_THREAD_CPUTIME_ID */
|
||||
|
||||
/* Calculation emulation */
|
||||
void ndelay_init();
|
||||
void ndelay(long delay_nsec);
|
||||
void cdelay_init();
|
||||
void cdelay(long delay_cyc);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user