qlmpi: add testcase to qlmpi (rusage for swap)

This commit is contained in:
Ken Sato
2017-08-31 15:43:28 +09:00
parent 4b3f220659
commit f4c32e5507
32 changed files with 2953 additions and 0 deletions

View File

@@ -0,0 +1,53 @@
.SUFFIXES: # Clear suffixes
.SUFFIXES: .c
CC = gcc
MCMOD_DIR=$(HOME)/ppos
XPMEM_DIR=$(HOME)/install/xpmem-master
CPPFLAGS = -I$(MCMOD_DIR)/include
CCFLAGS = -g
LDFLAGS = -L$(MCMOD_DIR)/lib -lihk -Wl,-rpath=$(MCMOD_DIR)/ppos/lib
EXES =
SRCS =
OBJS = $(SRCS:.c=.o)
CPPFLAGSMCK = -I$(HOME)/usr/include
CCFLAGSMCK = -g -O0
#LDFLAGSMCK = -static
LDFLAGSMCK =
SRCSMCK = $(shell ls rusage*.c)
EXESMCK = $(SRCSMCK:.c=)
OBJSMCK = $(SRCSMCK:.c=.o)
all: $(EXES) $(EXESMCK)
rusage010: rusage010.o
$(CC) -o $@ $^ $(LDFLAGSMCK)
rusage010.o: rusage010.c
$(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $<
rusage008: rusage008.o
$(CC) -o $@ $^ $(LDFLAGSMCK)
rusage008.o: rusage008.c
$(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $<
rusage009: rusage009.o
$(CC) -o $@ $^ $(LDFLAGSMCK)
rusage009.o: rusage009.c
$(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $<
rusage011: rusage011.o
$(CC) -o $@ $^ $(LDFLAGSMCK) -I$(XPMEM_DIR)/include -L$(XPMEM_DIR)/lib -lxpmem -Wl,-rpath=$(XPMEM_DIR)/lib
rusage011.o: rusage011.c
$(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $<
clean:
rm -f core $(EXES) $(OBJS) $(EXESMCK) $(OBJSMCK)

21
test/qlmpi/rusage/README Normal file
View File

@@ -0,0 +1,21 @@
===========
Test matrix
===========
rusage010:
app->mmap() 2M,anon,pre-page ->set_range()->munmap()->free_process_memory_range()->clear_range()[OK]
rusage005: device file (ib ping-pong)
devobj()->get_page()->pf->munmap()->clear_range() [OK]
remote page fault->cow->clear_range() [OK]
ld-linux.so->mmap private->cow->clear_range() [OK]
rusage008: sharing file-map page
fork()->filemap->pf->clear_range() [OK]
rusage009: sharing shmget() page
fork()->shmat()->pf->clear_range() [OK]
rusage011: sharing xpmem page
fork()->xpmem_attach()->pf->clear_range() [OK]

View File

@@ -0,0 +1,10 @@
#include <unistd.h>
#define BUF_SIZE (32 * 1024)
int do_swap(char *fname, void *buffer) {
int rc = -1;
rc = syscall(801, fname, buffer, BUF_SIZE, 2);
printf("%s: swap returns %d , %s\n", __FUNCTION__, rc, fname);
return rc;
}

97
test/qlmpi/rusage/run.sh Executable file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/bash
testname=$1
bootopt="-m 256M"
mcexecopt=""
testopt=""
kill="n"
dryrun="n"
sleepopt="0.4"
home=$(eval echo \$\{HOME\})
install=${home}/ppos
rusage=work/rusage/for_ql
walb=wallaby14
echo Executing ${testname}
case ${testname} in
rusage005)
#ssh wallaby -c '(cd ${home}/${rusage}/verbs; make rdma_wr)'
bn=verbs/rdma_wr
;;
*)
bn=${testname}
make clean > /dev/null 2> /dev/null
make ${bn}
esac
pid=`pidof mcexec`
if [ "${pid}" != "" ]; then
kill -9 ${pid} > /dev/null 2> /dev/null
fi
case ${testname} in
rusage010)
testopt="1"
;;
rusage005)
ssh ${walb}.aics-sys.riken.jp "${home}/${rusage}/verbs/rdma_wr -p 9999" > ${testname}_rcvside.txt &
echo "Running 'rdma_wr -p 9999' on ${walb}..."
read -p "please enter to go on."
port=9999
testopt="-s ${walb}.aics-sys.riken.jp -p ${port}"
;;
rusage008)
cp ${bn} ./file
;;
rusage009)
;;
rusage011)
if [ `lsmod | grep xpmem | wc -l` -eq 0 ]; then
sudo insmod /home/satoken/install/xpmem-master/lib/module/xpmem.ko
sudo chmod og+rw /dev/xpmem
fi
dryrun="n"
kill="n"
sleepopt="5"
;;
*)
echo Unknown test case
exit 255
esac
if [ ${dryrun} == "y" ]; then
exit
fi
sudo ${install}/sbin/mcstop+release.sh &&
sudo ${install}/sbin/mcreboot.sh ${bootopt}
if [ ${kill} == "y" ]; then
${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} &
sleep ${sleepopt}
sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log
pid=`pidof mcexec`
if [ "${pid}" != "" ]; then
kill -9 ${pid} > /dev/null 2> /dev/null
fi
else
case ${testname} in
rusage005)
echo "**** message of sender side **************************"
${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt}
echo "******************************************************"
echo "**** message of reciever side ************************"
cat ${testname}_rcvside.txt
echo "******************************************************"
#read -p "Run rdma_wr." ans
sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log
;;
*)
${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt}
sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log
esac
fi
sudo ${install}/sbin/mcstop+release.sh

View File

@@ -0,0 +1,100 @@
#include <unistd.h>
#include <stdio.h>
#include <sys/mman.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/wait.h>
#include "qltest.h"
#define DEBUG
#ifdef DEBUG
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) do { } while (0)
#endif
#define CHKANDJUMP(cond, err, ...) \
do { \
if(cond) { \
eprintf(__VA_ARGS__); \
ret = err; \
goto fn_fail; \
} \
} while(0)
int sz_mem[] = {
4 * (1ULL<<10),
2 * (1ULL<<20),
1 * (1ULL<<30),
134217728};
#define SZ_INDEX 0
#define NUM_AREAS 1
int main(int argc, char** argv) {
void* mem;
int ret = 0;
pid_t pid;
int status;
int fd;
// for swap_test
int swap_rc = 0;
char buffer[BUF_SIZE];
pid = fork();
CHKANDJUMP(pid == -1, 255, "fork failed\n");
if(pid == 0) {
fd = open("./file", O_RDWR);
CHKANDJUMP(fd == -1, 255, "open failed\n");
mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n");
unsigned long val = *((unsigned long*)mem);
// for swap_test
swap_rc = do_swap("/tmp/rusage008_c.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap in child is failed\n");
}
_exit(123);
} else {
fd = open("./file", O_RDWR);
CHKANDJUMP(fd == -1, 255, "open failed\n");
mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n");
unsigned long val = *((unsigned long*)mem);
ret = waitpid(pid, &status, 0);
CHKANDJUMP(ret == -1, 255, "waitpid failed\n");
// for swap_test
swap_rc = do_swap("/tmp/rusage008_p.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap in parent is failed\n");
}
printf("exit status=%d\n", WEXITSTATUS(status));
}
fn_exit:
return ret;
fn_fail:
goto fn_exit;
}

View File

@@ -0,0 +1,111 @@
#include <unistd.h>
#include <stdio.h>
#include <sys/mman.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include "qltest.h"
#define DEBUG
#ifdef DEBUG
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) do { } while (0)
#endif
#define CHKANDJUMP(cond, err, ...) \
do { \
if(cond) { \
eprintf(__VA_ARGS__); \
ret = err; \
goto fn_fail; \
} \
} while(0)
int sz_mem[] = {
4 * (1ULL<<10),
2 * (1ULL<<20),
1 * (1ULL<<30),
134217728};
#define SZ_INDEX 0
int main(int argc, char** argv) {
void* mem;
int ret = 0;
pid_t pid;
int status;
key_t key = ftok(argv[0], 0);
int shmid;
// for swap_test
int swap_rc = 0;
char buffer[BUF_SIZE];
shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660);
CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno));
pid = fork();
CHKANDJUMP(pid == -1, 255, "fork failed\n");
if(pid == 0) {
mem = shmat(shmid, NULL, 0);
CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno));
// for swap_test
swap_rc = do_swap("/tmp/rusage009_c.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap in child is failed\n");
}
*((unsigned long*)mem) = 0x1234;
ret = shmdt(mem);
CHKANDJUMP(ret == -1, 255, "shmdt failed\n");
_exit(123);
} else {
mem = shmat(shmid, NULL, 0);
CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno));
ret = waitpid(pid, &status, 0);
CHKANDJUMP(ret == -1, 255, "waitpid failed\n");
// for swap_test
swap_rc = do_swap("/tmp/rusage009_p.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap in parent is failed\n");
}
printf("%lx\n", *((unsigned long*)mem));
#if 0
struct shmid_ds buf;
ret = shmctl(shmid, IPC_RMID, &buf);
CHKANDJUMP(ret == -1, 255, "shmctl failed\n");
#endif
ret = shmdt(mem);
CHKANDJUMP(ret == -1, 255, "shmdt failed\n");
}
fn_exit:
return ret;
fn_fail:
goto fn_exit;
}

View File

@@ -0,0 +1,77 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
#include "qltest.h"
#define DEBUG
#ifdef DEBUG
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) do { } while (0)
#endif
#define CHKANDJUMP(cond, err, ...) \
do { \
if(cond) { \
eprintf(__VA_ARGS__); \
ret = err; \
goto fn_fail; \
} \
} while(0)
int sz_anon[] = {
4 * (1ULL<<10),
2 * (1ULL<<20),
1 * (1ULL<<30),
134217728};
#define SZ_INDEX 0
#define NUM_AREAS 1
int main(int argc, char** argv) {
int i;
int sz_index;
void* anon[NUM_AREAS];
int ret = 0;
// for qlmpi test
int swap_rc = 0;
char buffer[BUF_SIZE];
CHKANDJUMP(argc != 2, 255, "%s <sz_index>\n", argv[0]);
sz_index = atoi(argv[1]);
for(i = 0; i < NUM_AREAS; i++) {
anon[i] = mmap(0, sz_anon[sz_index], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n");
memset(anon[i], 0, sz_anon[sz_index]);
}
// for qlmpi test
swap_rc = do_swap("/tmp/rusage010.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap is failed.\n");
}
for(i = 0; i < NUM_AREAS; i++) {
munmap(anon[i], sz_anon[sz_index]);
}
fn_exit:
return ret;
fn_fail:
goto fn_exit;
}

View File

@@ -0,0 +1,141 @@
#include <unistd.h>
#include <stdio.h>
#include <sys/mman.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <xpmem.h>
#include "qltest.h"
#define DEBUG
#ifdef DEBUG
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
fprintf(stderr, "%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) do { } while (0)
#endif
#define CHKANDJUMP(cond, err, ...) \
do { \
if(cond) { \
eprintf(__VA_ARGS__); \
ret = err; \
goto fn_fail; \
} \
} while(0)
int sz_mem[] = {
4 * (1ULL<<10),
2 * (1ULL<<20),
1 * (1ULL<<30),
134217728};
#define SZ_INDEX 0
int main(int argc, char** argv) {
void* mem;
int ret = 0;
pid_t pid;
int status;
key_t key = ftok(argv[0], 0);
int shmid;
xpmem_segid_t segid;
// for swap_test
int swap_rc = 0;
char buffer[BUF_SIZE];
shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660);
CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno));
mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n");
memset(mem, 0, sz_mem[SZ_INDEX]);
pid = fork();
CHKANDJUMP(pid == -1, 255, "fork failed\n");
if(pid == 0) {
void *shm = shmat(shmid, NULL, 0);
CHKANDJUMP(shm == (void*)-1, 255, "shmat failed: %s\n", strerror(errno));
while((segid = *(xpmem_segid_t*)shm) == 0) { };
ret = shmdt(shm);
CHKANDJUMP(ret == -1, 255, "shmdt failed\n");
ret = xpmem_init();
CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno));
xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL);
CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", strerror(errno));
struct xpmem_addr addr = { .apid = apid, .offset = 0 };
void* attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL);
CHKANDJUMP(attach == (void*)-1, 255, "xpmem_attach failed: %s\n", strerror(errno));
*((unsigned long*)attach) = 0x1234;
// for swap_test
swap_rc = do_swap("/tmp/rusage011_c.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap in child is failed\n");
}
ret = xpmem_detach(attach);
CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n");
_exit(123);
} else {
void *shm = shmat(shmid, NULL, 0);
CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno));
ret = xpmem_init();
CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno));
segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, (void*)0666);
CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", strerror(errno));
*(xpmem_segid_t*)shm = segid;
ret = waitpid(pid, &status, 0);
CHKANDJUMP(ret == -1, 255, "waitpid failed\n");
// for swap_test
swap_rc = do_swap("/tmp/rusage011_p.swp", buffer);
if (swap_rc < 0) {
printf("[NG] swap in parent is failed\n");
}
printf("%lx\n", *((unsigned long*)mem));
struct shmid_ds buf;
ret = shmctl(shmid, IPC_RMID, &buf);
CHKANDJUMP(ret == -1, 255, "shmctl failed\n");
ret = shmdt(shm);
CHKANDJUMP(ret == -1, 255, "shmdt failed\n");
ret = xpmem_remove(segid);
CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n");
}
fn_exit:
return ret;
fn_fail:
goto fn_exit;
}

View File

@@ -0,0 +1,21 @@
/*
* aal_host.h
*
* Created on: 2011/08/09
* Author: simin
*/
#ifndef AAL_HOST_H_
#define AAL_HOST_H_
#define MAX_DEVNO 2
extern int aal_host_init();
extern int aal_host_dev_init(int dev_no);
extern void* aal_host_mem_alloc(int dev_no, int size);
extern void aal_host_mem_free(void * addr, int size);
extern int aal_host_dev_exit(int dev_no);
extern int aal_host_exit();
extern void* aal_host_mem_va2pa(int dev_no, void *virtual_addr);
#endif /* AAL_HOST_H_ */

View File

@@ -0,0 +1,11 @@
#include <stdio.h>
#include <sys/time.h>
#define CURTIME_LIB 1
double cur_time(){
struct timeval tp;
gettimeofday(&tp, NULL);
return tp.tv_sec + tp.tv_usec * 1.0E-6;
}

View File

@@ -0,0 +1,38 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "ibcomm.h"
#include "debug.h"
//#define DEBUG_DEBUG
#ifdef DEBUG_DEBUG
#define dprintf printf
#else
#define dprintf(...)
#endif
void debug_print_qp_conn_info(resource_t res, qpinfo_t qpinfo, config_t *config) {
uint8_t *p;
dprintf("local.qp_num=0x%x\n", qpinfo.qp->qp_num);
dprintf("local.lid=0x%x\n", res.port_attr->lid);
dprintf("local.sock[0]=%d\n", qpinfo.sock[0]);
if (res.rdma_mr.mr != NULL) {
dprintf("local.addr=0x%lx\n", (uint64_t)res.rdma_mr.buf);
dprintf("local.rkey=0x%x\n\n", res.rdma_mr.mr->rkey);
}
int i;
for(i = 0; i < (qpinfo.listenfd == -1 ? 1 : config->nremote); i++) {
dprintf("remote.qp_num=0x%x\n", qpinfo.remote_conn_info[i].qp_num);
dprintf("remote.lid=0x%x\n", qpinfo.remote_conn_info[i].lid);
p = qpinfo.remote_conn_info[i].gid;
dprintf(
"remote.gid = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
if (qpinfo.remote_conn_info[i].addr) {
dprintf("remote.addr=0x%lx\n", qpinfo.remote_conn_info[i].addr);
dprintf("remote.rkey=0x%x\n", qpinfo.remote_conn_info[i].rkey);
}
}
}

View File

@@ -0,0 +1,31 @@
#ifndef MYLIB_H
#define MYLIB_H
#ifndef NULL
#define NULL ((void *) 0)
#endif
#ifdef DEBUG
#define debug_printf(fmt,arg...) {printf("[DEBUG] " fmt, ##arg);}
#define debug_print_mem(arg...) {fprintf(stderr, "[DEBUG] ");print_mem(arg);}
#else
#define debug_printf(fmt,arg...) {}
#define debug_print_mem(arg...) {}
#endif
#ifdef ERROR
#define error_printf(fmt,arg...) {fprintf(stderr, "[ERROR] " fmt, ##arg);}
#define error_perror(arg...) {fprintf(stderr, "[ERROR] "); perror(arg);}
#else
#define error_printf(fmt,arg...) {}
#define error_perror(fmt,arg...) {}
#endif
#include "mtype.h"
/**
* get current time(sec)
*/
extern double cur_time();
extern void print_mem(addr_t addr, int size);
#endif

View File

@@ -0,0 +1,209 @@
#ifndef IBCOMM_H
#define IBCOMM_H
#include <byteswap.h>
#include "infiniband/verbs.h"
#include "sock.h"
#include "list.h"
#define _MAX_FIX_BUF_SIZE 64
#define _MAX_SQ_CAPACITY /*512*/256/*12*/
#define _MAX_RQ_CAPACITY /*512*/256/*1*/
#define _MAX_SGE_CAPACITY /*20*/3
#define _MAX_CQ_CAPACITY /*512*/256/*1*/
#define IBCOM_INLINE_DATA /*(128*4-64)*//*(512-64)*//*884*/512
#define IBCOM_RDMABUF_SZSEG (16384+4096)
#define IBCOM_MAGIC 0x55aa55aa
#define NCHAIN 2
#define SEND_CQ_FLG 1
#define RECV_CQ_FLG 2
#define IBCOMM_ERR_CODE -1
#define ibcomm_return_code_num 30
#if __BYTE_ORDER == __LITTLE_ENDIAN
static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
#elif __BYTE_ORDER == __BIG_ENDIAN
static inline uint64_t htonll(uint64_t x) { return x; }
static inline uint64_t ntohll(uint64_t x) { return x; }
#else
#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN
#endif
/* ERROR definition*/
enum ibcomm_return_code{
_IBCOMM_RETCODE_SUCCESS ,
_IBCOMM_ERRCODE_DEVICE_FOUND,
_IBCOMM_ERRCODE_NO_DEVICE,
_IBCOMM_ERRCODE_DEVICE_OPEN,
_IBCOMM_ERRCODE_CREATE_RES,
_IBCOMM_ERRCODE_DEVICE_QUERY_PORT,
_IBCOMM_ERRCODE_PD_ALLOC,
_IBCOMM_ERRCODE_CQ_CREATE,
_IBCOMM_ERRCODE_QP_CREATE,
_IBCOMM_ERRCODE_MR_CREATE,
_IBCOMM_ERRCODE_QP_DESTROY,
_IBCOMM_ERRCODE_CQ_DESTROY,
_IBCOMM_ERRCODE_MR_DESTROY,
_IBCOMM_ERRCODE_PD_DEALLOC,
_IBCOMM_ERRCODE_DEVICE_CLOSE,
_IBCOMM_ERRCODE_SOCK_CONN,
_IBCOMM_ERRCODE_SOCK_SYNC,
_IBCOMM_ERRCODE_SOCK_CLOSE,
_IBCOMM_ERRCODE_QP_QUERY_GID,
_IBCOMM_ERRCODE_INIT_QP,
_IBCOMM_ERRCODE_RTR_QP,
_IBCOMM_ERRCODE_RTS_QP,
_IBCOMM_ERRCODE_POLL_CQ_ERR,
_IBCOMM_ERRCODE_POLL_CQ_ZERO_RESULT
};
typedef struct config{
char *dev_name; /*IB device name*/
char *server_name; /*server host name*/
u_int32_t tcp_port; /*server TCP port*/
int ib_port; /*local IB port*/
int gid_idx; /*gid index*/
int use_rdma; /*rdma flag*/
int buf_size;
int server_flg;
int pci_buf_flg;
int pci_cq_flg;
int nremote; /* number of remote nodes */
}config_t;
typedef struct qp_conn_info{
uint64_t addr; /*Buffer address*/
uint32_t rkey; /*Remote key*/
uint32_t qp_num; /*QP number*/
uint16_t lid; /*LID of the IB port*/
uint8_t gid[16];/*GID of the IB port*/
}qp_conn_info_t;
typedef struct qp_conn_info_ud{
uint16_t lid;
union ibv_gid gid;
uint32_t qp_num;
uint32_t qkey;
} qp_conn_info_ud_t;
typedef struct mrinfo{
struct ibv_mr *mr;
char *buf; /*Registered buf*/
int buf_size;
}mrinfo_t;
#define NREMOTE 4
typedef struct qpinfo{
struct ibv_qp *qp;
struct ibv_cq *scq; /*Send cq*/
struct ibv_cq *rcq; /*Receive cq*/
qp_conn_info_t remote_conn_info[NREMOTE]; /*Remote info*/
int sock[NREMOTE]; /* exchange remote_conn_info using TCP */
int listenfd; /* exchange remote_conn_info using TCP */
int sr_num;
int rr_num;
int max_inline_data; /*if data smaller than it, use inline send*/
}qpinfo_t;
typedef struct pdinfo{
struct ibv_pd *pd;
}pdinfo_t;
typedef struct resource{
struct ibv_context *ib_ctx;/*HCA handle*/
struct ibv_port_attr *port_attr; /*IB port attributes*/
list_t *pdinfo_list;
list_t *mrinfo_list;
list_t *qpinfo_list;
/* RDMA buffers */
mrinfo_t rdma_mr;
}resource_t;
/**
* create resource
* connect TCP socket
*/
extern int resource_create(config_t config, resource_t *res);
/**
* create a pd and register it to resource
*/
extern int pd_create(resource_t *res, pdinfo_t *pdinfo);
/**
* creete a qp and register it to pd
* -create send cq
* -create recv cq
* -assign send cq to sq
* -assign recv cq to rq
*/
extern int qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo);
extern int qp_create_ud(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo);
/**
* 1.create a mr and register it to pd
* 2.register buf to this mr
*/
extern int mr_create(resource_t *res, pdinfo_t *pdinfo, int buf_size, char *buf, mrinfo_t *mrinfo);
/**
* destroy all resources
*/
extern int resource_destroy(config_t *config, resource_t *res);
/**
* connect to remote qp by exchanging addr info
*/
extern int connect_qp(config_t config, resource_t *res, qpinfo_t *qpinfo);
/**
* change qp status
*/
extern int init_qp(config_t config, qpinfo_t *qpinfo);
extern int init_qp_ud(config_t config, qpinfo_t *qpinfo);
extern int rtr_qp(config_t config, qpinfo_t *qpinfo);
extern int rtr_qp_ud(config_t config, qpinfo_t *qpinfo);
extern int rts_qp(config_t config, qpinfo_t *qpinfo);
extern int rts_qp_ud(config_t config, qpinfo_t *qpinfo);
extern int modify_dest_qp(config_t config, qpinfo_t *qpinfo, qp_conn_info_t* remote_conn_info);
extern int post_send_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, int tag, qp_conn_info_t* remote_conn_info, uint32_t imm_data);
int post_send_req2(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num);
extern int post_send_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_ud_t* remote_conn_info, struct ibv_ah *ah);
extern int post_recv_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo_list, int tag);
extern int post_recv_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, uint64_t wr_id);
extern int poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag);
extern int poll_cq2(qpinfo_t *qpinfo, int cq_flg, int *tag, int *result);
extern int poll_cq2_ud(qpinfo_t *qpinfo, int cq_flg, int *result);
extern void print_qp_status(qpinfo_t *qpinfo);
extern void debug_print_qp_conn_info(resource_t res, qpinfo_t qpinfo, config_t *config);
extern int read_config(config_t *config, int argc, char **argv);
#endif
#define ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; rc = errno; goto fn_fail; }
#define IBCOM_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; ibcom_errno = errno; goto fn_fail; }
#define VERBS_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; verbs_errno = errno; goto fn_fail; }
static inline int show_resident(int step) {
unsigned long size, resident, share, text, lib, data, dirty;
FILE* fp = fopen("/proc/self/statm", "r");
fscanf(fp, "%ld %ld %ld %ld %ld %ld %ld", &size, &resident, &share, &text, &lib, &data, &dirty);
printf("step=%d,resident=%ldKB\n", step, resident * 4);
return 0;
}

View File

@@ -0,0 +1,82 @@
/*
* list.c
*
* Created on: 2011/10/19
* Author: simin
*/
#include <stdio.h>
#include "list.h"
#include <stdlib.h>
void* list_get(list_t *list, int idx) {
int i;
list_element_t *e;
if (list->cnt <= 0 || idx < 0 || idx >= list->cnt)
return NULL;
e = list->head;
for (i = 0; i < idx; i++)
e = e->next;
return e->data;
}
void list_add(list_t *list, void *data) {
list_element_t *e;
e = malloc(sizeof(list_element_t));
e->data = data;
e->next = NULL;
if(list->tail != NULL)
list->tail->next = e;
list->tail = e;
if (list->cnt == 0)
list->head = list->tail;
list->cnt++;
}
void* list_remove(list_t *list, int idx) {
int i;
list_element_t *e, *pe, *ne;
void *data;
e = pe = ne = NULL;
if (list->cnt <= 0 || idx < 0 || idx >= list->cnt)
return NULL;
e = list->head;
i = 0;
if(idx > 0){
while(i++ < idx-1){
e = e->next;
}
pe = e;
i--;
}
while(i++ < idx)
e = e->next;
if(idx < list->cnt)
ne = e->next;
if(pe != NULL)
pe->next = ne;
else
list->head = ne;
if(ne == NULL)
list->tail = pe;
list->cnt--;
data = e->data;
free(e);
return data;
}
void* list_pop(list_t *list){
return list_remove(list, list->cnt-1);
}

View File

@@ -0,0 +1,26 @@
/*
* list.h
*
* Created on: 2011/10/19
* Author: simin
*/
#ifndef LIST_H_
#define LIST_H_
typedef struct list_element_t{
void *data;
struct list_element_t *next;
}list_element_t;
typedef struct list_t{
list_element_t *head;
list_element_t *tail;
int cnt;
}list_t;
extern void* list_get(list_t *list, int idx);
extern void list_add(list_t *list, void *e);
extern void* list_remove(list_t *list, int idx);
extern void* list_pop(list_t *list);
#endif /* LIST_H_ */

View File

@@ -0,0 +1,30 @@
VPATH =
CC = icc
CFLAGS = -Wall -O0 -g -DDEBUG -DERROR
LD = $(CC)
LFLAGS = -libverbs
SRCS = list.c curtime.c printmem.c debug.c post.c qp.c read_config.c resource.c rdma_wr.c
DSRCS = $(SRCS:.c=.d)
OBJS = $(SRCS:.c=.o)
EXECS = rdma_wr
MODULES = list.o curtime.o printmem.o debug.o post.o qp.o read_config.o resource.o sock.o
CWD := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
all: $(EXECS)
rdma_wr: rdma_wr.o $(MODULES)
$(LD) -o $@ $^ $(LFLAGS)
%.o: %.c
$(CC) $(CFLAGS) -c $<
%.d: %.c
$(CC) -MM $< > $*.d
clean:
rm -f $(EXECS) $(OBJS) $(DSRCS)
-include $(DSRCS)

View File

@@ -0,0 +1,23 @@
/*
* mbuf.h
*
* Created on: 2011/10/19
* Author: simin
*/
#ifndef MBUF_H_
#define MBUF_H_
enum buf_type{
HOST_BUF_TYPE,
PCI_BUF_TYPE
};
typedef struct buf_t{
void *buf;
int size;
enum buf_type type;
} buf_t;
#define buf_free(buf_p) {if(buf_p->type) free(buf_p->buf);}
#endif /* MBUF_H_ */

View File

@@ -0,0 +1,9 @@
#ifndef MCONS_H_
#define MCONS_H_
#include "mtype.h"
#define ERR_RET -1
#define ERR_ADDR (addr_t)-1
#endif /* MCONS_H_ */

View File

@@ -0,0 +1,12 @@
#ifndef MM_CORE_H_
#define MM_CORE_H_
#include "mtype.h"
//4kB
#define MIC_PAGE_SIZE 4096
int mm_core_read(addr_t offset, int size, void *buf);
int mm_core_write(addr_t offset, int size, void *buf);
#endif /* MM_CORE_H_ */

View File

@@ -0,0 +1,60 @@
#ifndef MIC_MEM_H_
#define MIC_MEM_H_
#include "mtype.h"
/*#### MMIO ####*/
#define MIC_PCI_MMIO_BASE_ADDR 0xc2300000
/*## GTT ##*/
#define GTT_START_OFFSET 0x40000
#define MIC_PCI_GTT_START_ADDR (MIC_PCI_MMIO_BASE_ADDR + GTT_START_OFFSET)
#define MIC_PCI_GTT_ETT_MAX 65536
#define MIC_GTT_ETT_SIZE 4
/*## SBOX ##*/
#define SBOX_START_OFFSET 0x10000
#define MIC_PCI_SBOX_START_ADDR (MIC_PCI_MMIO_BASE_ADDR + SBOX_START_OFFSET)
#define MIC_PCI_SBOX_SIZE 0x30000
#define SBOX_SBQ_FLUSH_REG 0x0000B1A0
#define SBOX_TLB_FLUSH_REG 0x0000B1A4
/*## APERTURE ##*/
#define MIC_PCI_APERTURE_BASE_ADDR 0xb0000000
//256MB
#define MIC_PCI_APERTURE_SIZE 0x10000000
//4kB
#define MIC_PAGE_SIZE 4096
static inline addr_t _mic_map2mic(addr_t addr){
return addr >> 1 << 1 << 11;
}
#define MIC_MAP2MIC _mic_map2mic
extern int mm_host_init();
extern int mm_host_exit();
extern addr_t mm_host_get_vaddr(int page_no, int offset);
extern addr_t mm_host_get_paddr(int page_no, int offset);
/**
* map a page to MIC memory(set GTT[page_no])
*/
extern int mm_host_page_init(int pg_no, addr_t map_addr, int size, int flush_flg);
/**
* read or write a initialized page
*/
extern int mm_host_page_read(int pg_no, int offset, int size, void *data);
extern int mm_host_page_write(int pg_no, int offset, int size, void *data);
extern int mm_host_dump_gtt();
/**
* flush GTT table
* If only set 1 page, you can call mm_host_page_init with flush_flg=1
* If set several pages, you can call mm_host_page_init with flush_flg=0, and call mm_host_gtt_flush after all page_init
*/
extern int mm_host_gtt_flush();
#endif /* MIC_MEM_H_ */

View File

@@ -0,0 +1,52 @@
/*
* mm_ib_test.h
*
* Created on: 2011/10/14
* Author: simin
*/
#ifndef MM_IB_TEST_H_
#define MM_IB_TEST_H_
//#define USE_1_SERVER 1
//#define TEST_BUF_SIZE 16
#define TEST_SERVER_BUF_NUM 2
#define TEST_COMM_HOST_BASE_ADDR 0x20001
#define TEST_COMM_CORE_BASE_ADDR (0x20000 << 11)
/* MR buffer setting info */
#define TEST_HOST_MR_PAGE_NO 0
#define TEST_MR_BUF_OFFSET 0
#define TEST_MR_HOST_BUF_SIZE 4096
#define TEST_MR_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_MR_BUF_OFFSET)
#define TEST_MR_CORE_BUF_ADDR (TEST_COMM_CORE_BASE_ADDR + TEST_MR_BUF_OFFSET)
/*
#define TEST_S2_HOST_MR_PAGE_NO 1
#define TEST_S2_COMM_HOST_BASE_ADDR 0x30001
#define TEST_S2_COMM_CORE_BASE_ADDR (0x30000 << 11)
#define TEST_S2_MR_HOST_BUF_ADDR TEST_S2_COMM_HOST_BASE_ADDR + TEST_MR_BUF_OFFSET
#define TEST_S2_MR_CORE_BUF_ADDR TEST_S2_COMM_CORE_BASE_ADDR + TEST_MR_BUF_OFFSET
*/
/* CQ buffer setting info */
#define TEST_HOST_CQ_PAGE_NO 1
#define TEST_CQ_BUF_OFFSET (TEST_MR_BUF_OFFSET + TEST_MR_HOST_BUF_SIZE)
#define TEST_CQ_HOST_BUF_SIZE 4096*2 // SCQ + RCQ
#define TEST_CQ_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_CQ_BUF_OFFSET)
#define TEST_CQ_CORE_BUF_ADDR (TEST_CQ_HOST_BUF_ADDR >> 1 << 1 << 11)
/* QP buffer setting info */
#define TEST_HOST_QP_PAGE_NO 3
#define TEST_QP_BUF_OFFSET (TEST_CQ_BUF_OFFSET + TEST_CQ_HOST_BUF_SIZE)
#define TEST_QP_HOST_BUF_SIZE 4096
#define TEST_QP_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_QP_BUF_OFFSET)
#define TEST_QP_CORE_BUF_ADDR (TEST_QP_HOST_BUF_ADDR >> 1 << 1 << 11)
#endif /* MM_IB_TEST_H_ */

View File

@@ -0,0 +1,54 @@
/*
* mmib.h
*
* Created on: 2011/10/19
* Author: simin
*/
#ifndef MMIB_H_
#define MMIB_H_
#include "mtype.h"
#include "ibcomm.h"
enum mmib_buf_type{
MMIB_MR_BUF,
MMIB_CQ_BUF,
MMIB_QP_BUF,
};
enum mmib_buf_pool_state{
MMIB_BUF_POOL_RESET,
MMIB_BUF_POOL_ACTIVE
};
struct mmib_buf_pool{
addr_t offset;
int page_no; // start page_no
int size;
addr_t cur_start; // offset in page
enum mmib_buf_pool_state state;
};
typedef struct mmib_mrinfo{
struct ibv_mr *mr;
buf_t *buf; /*Registered buf*/
}mmib_mrinfo_t;
extern int mmib_pool_init();
extern buf_t *mmib_new_buf(int size, enum mmib_buf_type buf_type);
extern void mmib_destroy_buf(buf_t *buf);
extern void mmib_pool_destroy();
extern void* mmib_qp_buf_alloc(int size);
extern void* mmib_cq_buf_alloc(int size);
extern void mmib_buf_free(void* buf);
extern int mmib_resource_create(config_t config, resource_t *res);
extern int mmib_pd_create(resource_t *res, pdinfo_t *pdinfo);
extern int mmib_qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo);
extern int mmib_mr_create(resource_t *res, pdinfo_t *pdinfo, buf_t *buf, mmib_mrinfo_t *mrinfo);
extern int mmib_post_send_req(qpinfo_t *qpinfo, mmib_mrinfo_t *mrinfo_list, int opcode, int tag);
extern int mmib_post_recv_req(qpinfo_t *qpinfo, mmib_mrinfo_t *mrinfo_list, int tag);
extern int mmib_poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag);
extern int mmib_resource_destroy(config_t *config, resource_t *res);
#endif /* MMIB_H_ */

View File

@@ -0,0 +1,29 @@
/*
* type.h
*
* Created on: 2011/10/08
* Author: simin
*/
#ifndef TYPE_H_
#define TYPE_H_
#include <stdio.h>
typedef unsigned long int addr_t;
enum buf_type{
HOST_BUF_TYPE,
PCI_BUF_TYPE
};
typedef struct buf{
void *buf;
int size;
enum buf_type type;
} buf_t;
#define free_buf(buf_p) {if(buf_p->type == HOST_BUF_TYPE) free(buf_p->buf); buf_p=NULL;}
#endif /* TYPE_H_ */

View File

@@ -0,0 +1,16 @@
/*
* pm_buf.h
*
* Created on: 2011/10/21
* Author: simin
*/
#ifndef PM_BUF_H_
#define PM_BUF_H_
struct pm_buf_ops {
void* (*alloc_buf)(int size);
void (*free_buf)(void *buf);
};
#endif /* PM_BUF_H_ */

View File

@@ -0,0 +1,453 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "ibcomm.h"
#include "debug.h"
//#define DEBUG_POST
#ifdef DEBUG_POST
#define dprintf printf
#else
#define dprintf(...)
#endif
static unsigned long rdtsc() {
unsigned long x;
__asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */
__asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */
__asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */
return x;
}
#define MAX_POLL_TIME (1000000ULL * 1000000)
int swr_id_tag_map[1000];
int rwr_id_tag_map[1000];
void put_swr_id_tag(int wr_id, int tag){
swr_id_tag_map[wr_id] = tag;
}
int get_swr_id_tag(int wr_id){
int tag = swr_id_tag_map[wr_id];
return tag;
}
void put_rwr_id_tag(int wr_id, int tag){
rwr_id_tag_map[wr_id] = tag;
}
int get_rwr_id_tag(int wr_id){
int tag = rwr_id_tag_map[wr_id];
return tag;
}
int post_send_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, int tag, qp_conn_info_t* remote_conn_info, uint32_t imm_data){
struct ibv_send_wr sr, *bad_wr = NULL;
struct ibv_sge sge[1];
int ret = 0;
/* Create sge*/
sge[0].addr = (uintptr_t)mrinfo->buf;
sge[0].length = mrinfo->buf_size;
sge[0].lkey = mrinfo->mr->lkey;
/* Create a SR */
memset(&sr, 0, sizeof(struct ibv_send_wr));
sr.next = NULL;
sr.wr_id = ++qpinfo->sr_num;
sr.sg_list = sge;
sr.num_sge = 1;
sr.opcode = opcode;
sr.imm_data = imm_data;
sr.send_flags = IBV_SEND_SIGNALED;
if(opcode != IBV_WR_RDMA_READ && mrinfo->buf_size <= qpinfo->max_inline_data) { sr.send_flags |= IBV_SEND_INLINE; }
put_swr_id_tag(sr.wr_id, tag);
// set addr and key if is RDMA op
if(opcode != IBV_WR_SEND){
sr.wr.rdma.remote_addr = remote_conn_info->addr;
sr.wr.rdma.rkey = remote_conn_info->rkey;
}
/* Post SR to SQ */
ret = ibv_post_send(qpinfo->qp, &sr, &bad_wr);
if(ret){
error_perror("ibv_post_send");
error_printf("ibv_post_send return %d\n", ret);
return IBCOMM_ERR_CODE;
}
return 0;
}
/* write to addr + sz * seq_num */
int post_send_req2(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num) {
struct ibv_send_wr sr, *bad_wr = NULL;
struct ibv_sge sge[1];
int ret = 0;
/* prepare sge*/
sge[0].addr = (uintptr_t)mrinfo->buf;
sge[0].length = mrinfo->buf_size;
sge[0].lkey = mrinfo->mr->lkey;
dprintf("post_send_req2,sge[0].addr=%lx,sz=%d\n", (unsigned long)sge[0].addr, sge[0].length = mrinfo->buf_size);
/* prepare send request or work request */
//memset(&sr, 0, sizeof(struct ibv_send_wr));
sr.next = NULL;
sr.wr_id = 0;
sr.sg_list = sge;
sr.num_sge = 1;
sr.opcode = opcode;
sr.imm_data = imm_data;
sr.send_flags = IBV_SEND_SIGNALED;
if(opcode != IBV_WR_RDMA_READ && mrinfo->buf_size <= qpinfo->max_inline_data) {
sr.send_flags |= IBV_SEND_INLINE;
}
if(opcode == IBV_WR_RDMA_WRITE || opcode == IBV_WR_RDMA_WRITE_WITH_IMM) {
sr.wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_RDMABUF_SZSEG * seq_num;
sr.wr.rdma.rkey = remote_conn_info->rkey;
dprintf("post_send_req2,raddr=%lx\n", sr.wr.rdma.remote_addr);
}
//__asm__ __volatile__("" ::: "memory");
ret = ibv_post_send(qpinfo->qp, &sr, &bad_wr);
if(ret){
printf("ibv_post_send return %d\n", ret);
return IBCOMM_ERR_CODE;
}
return 0;
}
int ibcom_isend_chain(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num) {
int ibcom_errno = 0;
int ib_errno;
int i;
struct ibv_send_wr sr[NCHAIN], *bad_wr = NULL;
struct ibv_sge sge[NCHAIN];
for(i = 0; i < NCHAIN; i++) {
sge[i].addr = (uintptr_t)mrinfo->buf + IBCOM_INLINE_DATA * i;
sge[i].length = IBCOM_INLINE_DATA;
sge[i].lkey = mrinfo->mr->lkey;
sr[i].next = (i == NCHAIN - 1) ? NULL : &sr[i+1];
//sr[i].wr_id = 0;
sr[i].sg_list = &sge[i];
sr[i].num_sge = 1;
#define SKIP_POLL_RCQ
#ifdef SKIP_POLL_RCQ /* if you want all to be IBV_WR_RDMA_WRITE */
sr[i].opcode = opcode;
#else
sr[i].opcode = (i == NCHAIN - 1) ? IBV_WR_RDMA_WRITE_WITH_IMM : IBV_WR_RDMA_WRITE;
#endif
sr[i].imm_data = imm_data;
sr[i].send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
sr[i].wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_INLINE_DATA * NCHAIN * seq_num + IBCOM_INLINE_DATA * i;
sr[i].wr.rdma.rkey = remote_conn_info->rkey;
}
ib_errno = ibv_post_send(qpinfo->qp, &sr[0], &bad_wr);
IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send\n"));
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
/* write to addr + sz * seq_num */
int post_send_req4(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num, uint32_t offset) {
int ibcom_errno = 0;
int ib_errno;
struct ibv_send_wr sr, *bad_wr = NULL;
struct ibv_sge sge[1];
sge[0].addr = (uintptr_t)mrinfo->buf + offset;
sge[0].length = IBCOM_INLINE_DATA;
sge[0].lkey = mrinfo->mr->lkey;
sr.next = NULL;
//sr.wr_id = 0;
sr.sg_list = sge;
sr.num_sge = 1;
sr.opcode = opcode;
sr.imm_data = imm_data;
sr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
sr.wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_INLINE_DATA * seq_num;
sr.wr.rdma.rkey = remote_conn_info->rkey;
ib_errno = ibv_post_send(qpinfo->qp, &sr, &bad_wr);
IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send\n"));
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
int post_send_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_ud_t* remote_conn_info, struct ibv_ah *ah) {
struct ibv_send_wr sr, *bad_wr;
struct ibv_sge sge[1];
int ibcom_errno = 0, ib_errno;
/* Create sge*/
/* addr to addr + length - 1 will be on the payload, but see "post_send_req_ud" part */
if(mrinfo->buf_size <= 40) { printf("buf_size too short\n"); ibcom_errno = -1; goto fn_fail; }
sge[0].addr = (uintptr_t)mrinfo->buf + 40;
sge[0].length = mrinfo->buf_size - 40;
sge[0].lkey = mrinfo->mr->lkey;
/* Create a SR */
//memset(&sr, 0, sizeof(struct ibv_send_wr));
sr.next = NULL;
sr.wr_id = 0;
sr.sg_list = sge;
sr.num_sge = 1;
sr.opcode = opcode;
//sr.imm_data = 0;
sr.send_flags = IBV_SEND_SIGNALED;
#if 0
if(mrinfo->buf_size <= qpinfo->max_inline_data){
sr.send_flags |= IBV_SEND_INLINE;
}
#endif
sr.wr.ud.ah = ah;
sr.wr.ud.remote_qpn = remote_conn_info->qp_num;
sr.wr.ud.remote_qkey = remote_conn_info->qkey;
dprintf("ibv_post_send,qpn=%08x,qkey=%08x\n", sr.wr.ud.remote_qpn, sr.wr.ud.remote_qkey);
// printf("ibv_post_send,dlid=%02x,is_global=%02x\n", ah->dlid, ah->is_global);
ib_errno = ibv_post_send(qpinfo->qp, &sr, &bad_wr);
if(ib_errno) {
error_perror("ibv_post_send");
printf("ib_errno=%d\n", ib_errno);
ibcom_errno = IBCOMM_ERR_CODE;
goto fn_fail;
}
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
int post_recv_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int tag){
struct ibv_recv_wr *rr;
struct ibv_sge *sge;
struct ibv_recv_wr *bad_wr;
int ret = 0;
/* Prepare scatter/gather entry list */
sge = malloc(sizeof(struct ibv_sge));
memset(sge, 0, sizeof(struct ibv_sge));
sge->addr = (uintptr_t)mrinfo->buf;
sge->length = mrinfo->buf_size;
sge->lkey = mrinfo->mr->lkey;
/* Create RR list */
rr = malloc(sizeof(*rr));
memset(rr, 0, sizeof(*rr));
rr->next = NULL;
rr->wr_id = ++qpinfo->rr_num;
rr->sg_list = sge;
rr->num_sge = 1;
put_rwr_id_tag(rr->wr_id, tag);
/* Post RR to RQ */
ret = ibv_post_recv(qpinfo->qp, rr, &bad_wr);
if(ret){
dprintf("ibv_post_recv ret=%d\n", ret);
free(sge);
free(rr);
return IBCOMM_ERR_CODE;
} else {
dprintf("ibv_post_recv ret=%d\n", ret);
}
free(sge);
free(rr);
return 0;
}
int ibcom_irecv(qpinfo_t *qpinfo, uint64_t wr_id){
struct ibv_recv_wr rr;
struct ibv_recv_wr *bad_wr;
int ibcom_errno = 0;
int ib_errno;
rr.next = NULL;
rr.sg_list = NULL;
rr.num_sge = 0;
rr.wr_id = wr_id;
/* post rr */
ib_errno = ibv_post_recv(qpinfo->qp, &rr, &bad_wr);
IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_recv\n"));
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
int post_recv_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, uint64_t wr_id){
struct ibv_recv_wr rr, *bad_wr;
struct ibv_sge sge[1];
int ibcom_errno = 0, ib_errno;
/* Prepare scatter/gather entry list */
memset(sge, 0, sizeof(struct ibv_sge));
/* addr to addr + 39 are not filled, addr + 40 to addr + length - 1 are filled with payload */
if(mrinfo->buf_size <= 40) { printf("buf_size too short\n"); ibcom_errno = -1; goto fn_fail; }
sge[0].addr = (uintptr_t)mrinfo->buf;
sge[0].length = mrinfo->buf_size;
sge[0].lkey = mrinfo->mr->lkey;
/* Create RR list */
memset(&rr, 0, sizeof(struct ibv_recv_wr));
rr.next = NULL;
rr.wr_id = wr_id;
rr.sg_list = sge;
rr.num_sge = 1;
/* Post RR to RQ */
ib_errno = ibv_post_recv(qpinfo->qp, &rr, &bad_wr);
if(ib_errno){
printf("ibv_post_recv ib_errno=%d\n", ib_errno);
ibcom_errno = IBCOMM_ERR_CODE;
goto fn_fail;
}
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
int poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag) {
struct ibv_wc wc;
int wc_num = 0, time=0, rc = IBCOMM_ERR_CODE;
// wc = malloc(sizeof(struct ibv_wc));
memset(&wc, 0, sizeof(struct ibv_wc));
switch(cq_flg){
case SEND_CQ_FLG:
do{
wc_num = ibv_poll_cq(qpinfo->scq, 1, &wc);
}while(!wc_num && ++time < MAX_POLL_TIME);
break;
case RECV_CQ_FLG:
do{
wc_num = ibv_poll_cq(qpinfo->rcq, 1, &wc);
}while(!wc_num && ++time < MAX_POLL_TIME);
break;
}
if(wc_num < 0){
error_perror("ibv_poll_cq");
goto poll_cq_exit;
}
if(wc_num == 0){
error_printf("no wc is found\n");
goto poll_cq_exit;
}
if (wc.status != IBV_WC_SUCCESS){
error_printf("wrong wc state: %d, %s\n", wc.status, ibv_wc_status_str(wc.status));
goto poll_cq_exit;
}
switch(cq_flg){
case SEND_CQ_FLG:
*tag = get_swr_id_tag(wc.wr_id);
break;
case RECV_CQ_FLG:
*tag = get_rwr_id_tag(wc.wr_id);
break;
}
rc = 0;
poll_cq_exit:
return rc;
}
int poll_cq2(qpinfo_t *qpinfo, int cq_flg, int *tag, int *result) {
struct ibv_wc cqe;
int rc = 0;
switch(cq_flg){
case SEND_CQ_FLG:
*result = ibv_poll_cq(qpinfo->scq, 1, &cqe);
break;
case RECV_CQ_FLG:
*result = ibv_poll_cq(qpinfo->rcq, 1, &cqe);
break;
}
if(*result < 0){
error_perror("ibv_poll_cq");
rc = *result;
goto fn_fail;
}
if(*result > 0 && cqe.status != IBV_WC_SUCCESS){
error_printf("cqe status=%08x,%s\n", cqe.status, ibv_wc_status_str(cqe.status));
rc = -1;
goto fn_fail;
}
if(*result > 0) {
dprintf("cqe.imm_data=%d\n", cqe.imm_data);
switch(cq_flg){
case SEND_CQ_FLG:
*tag = get_swr_id_tag(cqe.wr_id);
break;
case RECV_CQ_FLG:
*tag = get_rwr_id_tag(cqe.wr_id);
break;
}
}
fn_exit:
return rc;
fn_fail:
goto fn_exit;
}
int poll_cq2_ud(qpinfo_t *qpinfo, int cq_flg, int *result) {
struct ibv_wc cqe;
int rc = 0;
switch(cq_flg){
case SEND_CQ_FLG: {
unsigned long tscs = rdtsc();
*result = ibv_poll_cq(qpinfo->scq, 1, &cqe);
unsigned long tsce = rdtsc();
printf("poll_cq,send,%ld\n", tsce-tscs);
break; }
case RECV_CQ_FLG:
*result = ibv_poll_cq(qpinfo->rcq, 1, &cqe);
break;
}
if(*result < 0){
error_perror("ibv_poll_cq");
rc = *result;
goto fn_fail;
}
if(*result > 0 && cqe.status != IBV_WC_SUCCESS){
error_printf("cqe status=%08x,%s\n", cqe.status, ibv_wc_status_str(cqe.status));
rc = -1;
goto fn_fail;
}
fn_exit:
return rc;
fn_fail:
goto fn_exit;
}

View File

@@ -0,0 +1,12 @@
#include <stdio.h>
#include "mtype.h"
void print_mem(addr_t addr, int size){
int i;
printf("print memory[0x%lx]\n", addr);
for(i = 0; i < size; i++){
printf("%02x ", *(unsigned char *)(addr+i));
}
printf("\n");
}

View File

@@ -0,0 +1,297 @@
#include <stdio.h>
#include <stdlib.h>
#include <asm/byteorder.h>
#include <arpa/inet.h>
#include <string.h>
#include <unistd.h>
#include "ibcomm.h"
#include "sock.h"
#include "debug.h"
#define DEBUG_QP
#ifdef DEBUG_QP
#define dprintf printf
#else
#define dprintf(...)
#endif
int connect_qp(config_t config, resource_t *res, qpinfo_t *qpinfo){
union ibv_gid gid;
qp_conn_info_t local_conn_info;
int rc = IBCOMM_ERR_CODE;
// get GID for this connection
memset(&gid, 0, sizeof(union ibv_gid));
if(ibv_query_gid(res->ib_ctx, config.ib_port, config.gid_idx, &gid)){
error_perror("ibv_query_gid");
goto connect_qp_exit;
}
dprintf("port=%08x\n", config.ib_port);
// set local qp conn info
memset(&local_conn_info, 0, sizeof(qp_conn_info_t));
memset(qpinfo->remote_conn_info, 0, sizeof(qpinfo->remote_conn_info));
local_conn_info.qp_num = htonl(qpinfo->qp->qp_num);
local_conn_info.lid = htons(res->port_attr->lid);
memcpy(local_conn_info.gid, &gid, 16);
dprintf("qp_num=%08x, lid=%08x\n", local_conn_info.qp_num, local_conn_info.lid);
// set rdma address
if(config.use_rdma == 1){
local_conn_info.addr = htonll((uint64_t) res->rdma_mr.mr->addr);
local_conn_info.rkey = htonl((uint32_t) res->rdma_mr.mr->lkey);
printf("my lkey=%08x\n", res->rdma_mr.mr->lkey);
printf("my rkey=%08x\n", res->rdma_mr.mr->rkey);
//local_conn_info.rkey = htonl((uint32_t) res->rdma_mr.mr->rkey);
}
if(config.server_flg) { qpinfo->listenfd = -1; } // if listenfd != -1, then listen(listenfd)
int i;
for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) {
// server accepts connection from NREMOTE clients
// NREMOTE clients connect to one server
// sock connect
qpinfo->sock[i] = sock_connect(config.server_name, config.tcp_port, &(qpinfo->listenfd));
if(qpinfo->sock[i] < 0) {
error_perror("sock_connect"); goto connect_qp_exit;
}
dprintf("connect_qp, after sock_connect\n");
// send local_conn_info, receive remote_conn_info
if(sock_sync_data(qpinfo->sock[i], sizeof(qp_conn_info_t), (char*)&local_conn_info, (char*)&qpinfo->remote_conn_info[i])){
error_perror("sock_sync_data");
goto connect_qp_exit;
}
dprintf("connect_qp, after sock_sync_data\n");
qpinfo->remote_conn_info[i].qp_num = ntohl(qpinfo->remote_conn_info[i].qp_num);
qpinfo->remote_conn_info[i].lid = ntohs(qpinfo->remote_conn_info[i].lid);
// set rdma address
if(config.use_rdma == 1){
qpinfo->remote_conn_info[i].addr = ntohll(qpinfo->remote_conn_info[i].addr);
qpinfo->remote_conn_info[i].rkey = ntohl(qpinfo->remote_conn_info[i].rkey);
printf("your rkey=%08x\n", qpinfo->remote_conn_info[i].rkey);
}
}
rc = 0;
connect_qp_exit:
if(rc) {
int i;
for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) {
if(qpinfo->sock[i] > 0) { close(qpinfo->sock[i]); }
}
}
return rc;
}
int init_qp(config_t config, qpinfo_t *qpinfo){
struct ibv_qp_attr attr;
int flags;
int rc = 0;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_INIT;
attr.port_num = config.ib_port;
attr.pkey_index = 0;
attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE;
if(config.use_rdma)
attr.qp_access_flags |= IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;
flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
if(ibv_modify_qp(qpinfo->qp, &attr, flags)){
error_perror("ibv_modify_qp");
rc = IBCOMM_ERR_CODE;
}
return rc;
}
int init_qp_ud(config_t config, qpinfo_t *qpinfo){
struct ibv_qp_attr attr;
int flags;
int ibcom_errno = 0, ib_errno;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_INIT;
attr.port_num = config.ib_port;
attr.pkey_index = 0;
attr.qkey = 0x11111111;
flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY;
ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags);
if(ib_errno) {
dprintf("ib_errno=%d\n", ib_errno);
error_perror("ibv_modify_qp");
ibcom_errno = IBCOMM_ERR_CODE;
goto fn_fail;
}
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
int rtr_qp(config_t config, qpinfo_t *qpinfo){
struct ibv_qp_attr attr;
int flags;
int rc = 0;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTR;
attr.path_mtu = IBV_MTU_2048/*IBV_MTU_2048*//*IBV_MTU_512*/;
attr.ah_attr.dlid = qpinfo->remote_conn_info[0].lid;
attr.ah_attr.port_num = config.ib_port;
attr.dest_qp_num = qpinfo->remote_conn_info[0].qp_num;
attr.rq_psn = 0;
attr.min_rnr_timer = 0x12;
attr.max_dest_rd_atomic = /*0*/1;
if(config.use_rdma)
attr.max_dest_rd_atomic = 1;
flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
if(ibv_modify_qp(qpinfo->qp, &attr, flags)){
error_perror("ibv_modify_qp");
rc = IBCOMM_ERR_CODE;
}
return rc;
}
int rtr_qp_ud(config_t config, qpinfo_t *qpinfo){
struct ibv_qp_attr attr;
int flags;
int ibcom_errno = 0, ib_errno;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTR;
flags = IBV_QP_STATE;
ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags);
if(ib_errno) { error_perror("ibv_modify_qp"); ibcom_errno = IBCOMM_ERR_CODE; goto fn_fail; }
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
int rts_qp(config_t config, qpinfo_t *qpinfo){
struct ibv_qp_attr attr;
int flags;
int rc = 0;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTS;
attr.timeout = 0x14;
attr.retry_cnt = 7;
attr.rnr_retry = 7;
attr.sq_psn = 0;
attr.max_rd_atomic = /*0*/1; // num of outstanding RDMA reads and atomic op allowed
if(config.use_rdma)
attr.max_rd_atomic = 1;
flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC;
if(ibv_modify_qp(qpinfo->qp, &attr, flags)){
error_perror("ibv_modify_qp");
rc = IBCOMM_ERR_CODE;
}
return rc;
}
int rts_qp_ud(config_t config, qpinfo_t *qpinfo){
struct ibv_qp_attr attr;
int flags;
int ibcom_errno = 0, ib_errno;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTS;
attr.sq_psn = 0;
flags = IBV_QP_STATE | IBV_QP_SQ_PSN;
ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags);
if(ib_errno) { error_perror("ibv_modify_qp"); ibcom_errno = IBCOMM_ERR_CODE; goto fn_fail; }
fn_exit:
return ibcom_errno;
fn_fail:
goto fn_exit;
}
/* modify address vector and dest qpn and reset sq_psn */
int modify_dest_qp(config_t config, qpinfo_t *qpinfo, qp_conn_info_t* remote_conn_info){
struct ibv_qp_attr attr;
int flags;
int rc = 0;
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTS;
attr.ah_attr.dlid = remote_conn_info->lid;
attr.ah_attr.port_num = config.ib_port;
attr.dest_qp_num = remote_conn_info->qp_num;
attr.sq_psn = 0;
attr.max_rd_atomic = 0;
attr.retry_cnt = 7;
attr.rnr_retry = 7;
attr.timeout = 0x14;
#if 0
flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_DEST_QPN | IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC |
IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_TIMEOUT;
#else
flags = IBV_QP_STATE | IBV_QP_AV;
#endif
if(ibv_modify_qp(qpinfo->qp, &attr, flags)){
error_perror("ibv_modify_qp");
rc = IBCOMM_ERR_CODE;
}
return rc;
}
void print_qp_status(qpinfo_t *qpinfo){
struct ibv_qp_attr *attr;
struct ibv_qp_init_attr *init_attr;
int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
int rc;
attr = malloc(sizeof(struct ibv_qp_attr));
init_attr = malloc(sizeof(struct ibv_qp_init_attr));
rc = ibv_query_qp(qpinfo->qp, attr, flags, init_attr);
if(rc){
fprintf(stderr, "query qp error\n");
}
else{
switch(attr->cur_qp_state){
case IBV_QPS_RESET:
dprintf("attr=IBV_QPS_RESET\n");
break;
case IBV_QPS_INIT:
dprintf("attr=IBV_QPS_INIT\n");
break;
case IBV_QPS_RTR:
dprintf("attr=IBV_QPS_RTR\n");
break;
case IBV_QPS_RTS:
dprintf("attr=IBV_QPS_RTS\n");
break;
case IBV_QPS_SQD:
dprintf("attr=IBV_QPS_SQD\n");
break;
case IBV_QPS_SQE:
dprintf("attr=IBV_QPS_SQE\n");
break;
case IBV_QPS_ERR:
dprintf("attr=IBV_QPS_ERR\n");
break;
}
}
free(attr);
free(init_attr);
}

View File

@@ -0,0 +1,218 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <sys/mman.h>
#include <unistd.h>
#include "ibcomm.h"
#include "debug.h"
#include "mtype.h"
#include "mcons.h"
#include "mm_ib_test.h"
//#define DEBUG_RDMA_WR
#ifdef DEBUG_RDMA_WR
#define dprintf printf
#else
#define dprintf(...)
#endif
#define TEST_SEND_BUF_NUM 3
#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short))
#define NTRIAL 1 /* 120 */
#define PPOLLS 1 /* sweet spot is around 10 */
#define NSKIPS (PPOLLS*0)
#define PPOLLR 1 /* sweet spot is around 10 */
#define NSKIPR (PPOLLR*0)
#define IBCOM_MAGIC 0x55aa55aa
typedef struct tailmagic_t {
uint32_t magic;
} tailmagic_t;
enum rdma_buf_flg{
RDMA_BUF_RESET_FLG = 0,
RDMA_BUF_WRITE_FLG = 1,
};
static unsigned long rdtsc() {
unsigned long x;
__asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */
__asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */
__asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */
return x;
}
volatile int k;
int main(int argc, char **argv) {
config_t config;
unsigned long i, j;
int ibcom_errno = 0;
char sync_res;
unsigned long tscs, tsce;
resource_t res;
pdinfo_t pdinfo;
qpinfo_t qpinfo;
mrinfo_t *loc_mr_list = NULL;
int entry;
int ibv_errno;
if (read_config(&config, argc, argv)) {
goto fn_exit;
}
config.use_rdma = 1;
unsigned long buf_size;
char* str_env = getenv("BUF_SIZE");
buf_size = str_env ? atol(str_env) : 4096/*48,1073741824ULL * 1 + 4*/;
if(buf_size == 0) { printf("set buf_size"); goto fn_fail; }
if(resource_create(config, &res) || pd_create(&res, &pdinfo)) { printf("qp_create failed\n"); goto fn_fail; }
ibv_errno = qp_create(&res, &pdinfo, &qpinfo);
IBCOM_ERR_CHKANDJUMP(ibv_errno, -1, printf("qp_create failed\n"));
/* create MR buffers */
// rdma-write-to buffer
#if 1
void *rdma_buf = mmap(0, buf_size * NTRIAL, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
memset(rdma_buf, 0, buf_size * NTRIAL);
#else
void *rdma_buf = calloc(buf_size * NTRIAL, sizeof(char));
#endif
if(!rdma_buf) { printf("mmap failed\n"); goto fn_fail; }
if(mr_create(&res, &pdinfo, buf_size * NTRIAL, rdma_buf, &res.rdma_mr)) { printf("mr_create failed\n"); goto fn_fail; }
#if 0
// TLB prefetch
for (i = 0; i < NTRIAL; i++) {
if(!config.server_flg) {
*((uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t))) = 0;
}
}
#endif
// local data buffers
loc_mr_list = calloc(sizeof(mrinfo_t) * NTRIAL, sizeof(char));
for (i = 0; i < NTRIAL; i++) {
void *loc_buf = mmap(0, buf_size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if(loc_buf == MAP_FAILED) { printf("mmap failed\n"); goto fn_fail; }
if(config.server_flg) {
for(j = 0; j < buf_size; j++) {
*((unsigned char*)loc_buf + j) = (char)i;
}
*((uint32_t *)(loc_buf + buf_size - sizeof(uint32_t))) = 0 + IBCOM_MAGIC;
}
dprintf("magic addr=%lx\n", (unsigned long)(loc_buf + buf_size - TEST_RDMA_FLG_SIZE));
if(mr_create(&res, &pdinfo, buf_size, loc_buf, &loc_mr_list[i])) { printf("mr_create failed\n"); goto fn_fail; }
}
if(!config.server_flg) { dprintf("res->rdma_mr.mr->addr=%lx\n", (unsigned long)res.rdma_mr.mr->addr); }
/* exchange gid, lid, qpn, raddr, rkey */
if(connect_qp(config, &res, &qpinfo)) { printf("connect_qp failed\n"); goto fn_fail; }
debug_print_qp_conn_info(res, qpinfo, &config);
printf("connect_qp done\n"); fflush(stdout);
if(config.server_flg) { dprintf("qpinfo->remote_conn_info[0].addr=%lx\n", qpinfo.remote_conn_info[0].addr); }
/* make qp RTS */
if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { printf("rts failed\n"); goto fn_fail; }
printf("rts done\n"); fflush(stdout);
/* barrier */
for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) {
if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)) { perror("sock_sync_data"); }
}
printf("barrier done\n"); fflush(stdout);
if(config.server_flg) { /* sender side */
//usleep(500000);
if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; }
if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; }
for (i = 0; i < NTRIAL; i++) {
if(i == NSKIPS) { tscs = rdtsc(); }
post_send_req2(&qpinfo, &loc_mr_list[0], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], 0, i);
#if 0
int nfound = 0;
if(i % PPOLLS == PPOLLS - 1) {
k = 0;
while(1) {
int result;
struct ibv_wc cqe[PPOLLS];
result = ibv_poll_cq(qpinfo.scq, PPOLLS, &cqe[0]);
if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; }
if(result > 0) {
for(j = 0; j < result; j++) {
if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status)); goto fn_fail; }
}
//debug_print_mem((addr_t)loc_mr_list[entry].buf, buf_size);
nfound += result;
if(nfound >= PPOLLS) { break; }
}
k++;
}
}
#endif
}
tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS));
#if 1
int nfound = 0;
k = 0;
while(1) {
int result;
struct ibv_wc cqe[NTRIAL];
result = ibv_poll_cq(qpinfo.scq, NTRIAL, &cqe[0]);
if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; }
if(result > 0) {
for(j = 0; j < result; j++) {
if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status)); goto fn_fail; }
}
//debug_print_mem((addr_t)loc_mr_list[entry].buf, buf_size);
nfound += result;
if(nfound >= NTRIAL) { break; }
}
k++;
}
#endif
} else { /* receiver side */
if(NSKIPR % PPOLLR !=0) { printf("set NSKIP multiple of PPOLL\n"); goto fn_fail; }
for (i = 0; i < NTRIAL; i++) {
if(i == NSKIPR) { tscs = rdtsc(); }
// poll on magic
dprintf("res.rdma_mr.buf=%lx\n", (unsigned long)res.rdma_mr.buf);
dprintf("poll addr=%lx\n", (unsigned long)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t)));
//k = 0;
volatile uint32_t *ptr = (volatile uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t));
while(*ptr != IBCOM_MAGIC) {
//k++; if(i >= NSKIPR && k % 65536 == 65535) { printf("i=%d,poll value=%x\n", i, *((uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t)))); }
__asm__ __volatile__("pause");
}
//debug_print_mem((addr_t)res.rdma_mr.buf, buf_size);
}
tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR));
}
fn_exit:
/*Can free all resources*/
#if 0
if (resource_destroy(&config, &res)) {
fprintf(stderr, "resource destroy failed\n");
} else {
dprintf("destroy all successfully..\n");
}
if(loc_mr_list) { free(loc_mr_list); }
#endif
return ibcom_errno;
fn_fail:
goto fn_exit;
}

View File

@@ -0,0 +1,79 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
#include "ibcomm.h"
/*
int read_config(config_t *config, int argc, char **argv){
memset(config, 0, sizeof(config_t));
config->server_name = NULL;
config->ib_port = 1;
config->dev_name = NULL;
// client mode
if(argc == 4){
config->server_name = argv[1];
config->tcp_port = strtoul(argv[2], NULL, 0);
config->buf_size = strtoul(argv[3], NULL, 0);
}
// server mode
else if(argc == 3){
config->tcp_port = strtoul(argv[1], NULL, 0);
config->buf_size = strtoul(argv[2], NULL, 0);
config->server_flg = 1;
}
else{
printf("usage: ./main <server_name> <port> <size>\n");
return IBCOMM_ERR_CODE;
}
if(config->tcp_port <=0 )
return IBCOMM_ERR_CODE;
return 0;
}
*/
int read_config(config_t *config, int argc, char **argv) {
memset(config, 0, sizeof(config_t));
config->server_name = NULL;
config->ib_port = 1;
config->dev_name = NULL;
config->server_flg = 1;
config->nremote = 1;
config->buf_size = 40 + 8; /* UD requires more than 40 byte */
config->tcp_port = 5256;
while (1) {
int oc = getopt(argc, argv, "s:p:m:n:h");
if (oc == -1)
break;
switch (oc) {
case 's': /* name for IP for exchanging LID and QPN */
config->server_name = optarg;
config->server_flg = 0;
break;
case 'p': /* TCP port for exchange LID and QPN */
config->tcp_port = atoi(optarg);
break;
case 'm':
config->buf_size = atoi(optarg);
break;
case 'n': /* number of remote nodes */
config->nremote = atoi(optarg);
break;
case 'h':
default:
printf("usage: ./main [-s <server_name>] [-p <tcp_port>] [-m <size>]\n"
"Example: ssh cn01 ./main -p 10000 & ./main -s cn01 -p 10000\n");
exit(-1);
break;
}
}
// if (config->tcp_port <= 0) { return IBCOMM_ERR_CODE; }
// no need to set tcp_port for IB
return 0;
}

View File

@@ -0,0 +1,390 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "ibcomm.h"
#include "debug.h"
#include "list.h"
#include <infiniband/verbs.h>
//#define DEBUG_RESOURCE
#ifdef DEBUG_RESOURCE
#define dprintf printf
#else
#define dprintf(...)
#endif
int resource_create(config_t config, resource_t *res){
struct ibv_device **dev_list = NULL, *ib_dev = NULL;
int dev_num;
int i, rc = IBCOMM_ERR_CODE;
/*Init*/
memset(res, 0, sizeof(resource_t));
res->pdinfo_list = malloc(sizeof(list_t));
res->qpinfo_list = malloc(sizeof(list_t));
res->mrinfo_list = malloc(sizeof(list_t));
res->ib_ctx = NULL;
res->port_attr = NULL;
/*Get IB device list*/
dev_list = ibv_get_device_list(&dev_num);
printf("resource_create,dev_num=%d\n", dev_num);
ERR_CHKANDJUMP(!dev_list, -1, error_perror("ibv_get_device_list"));
if(!dev_num){
error_printf("no devices are found\n");
goto resource_create_exit;
}
/*Find requested HCA*/
if(!config.dev_name) {
config.dev_name = strdup(ibv_get_device_name(dev_list[0]));
}
printf("Trying to open device %s\n", config.dev_name);
for(i=0; i< dev_num; i++){
if(!strcmp(ibv_get_device_name(dev_list[i]), config.dev_name)){
ib_dev = dev_list[i];
break;
}
}
if(ib_dev == NULL){
error_printf("no devices are found\n");
goto resource_create_exit;
}
/*Open HCA*/
res->ib_ctx = ibv_open_device(ib_dev);
if(!res->ib_ctx){
error_perror("resource_create,ibv_open_device");
goto resource_create_exit;
}
struct ibv_device_attr device_attr;
int ib_errno;
ib_errno = ibv_query_device(res->ib_ctx, &device_attr);
if(ib_errno) { printf("ibv_query_device failed\n"); goto resource_create_exit; }
printf("atomic_cap=%08x\n", device_attr.atomic_cap);
printf("max_qp_rd_atom=%08x\n", device_attr.max_qp_rd_atom);
printf("max_ee_rd_atom=%08x\n", device_attr.max_ee_rd_atom);
printf("max_res_rd_atom=%08x\n", device_attr.max_res_rd_atom);
printf("max_qp_init_rd_atom=%08x\n", device_attr.max_qp_init_rd_atom);
printf("max_ee_init_rd_atom=%08x\n", device_attr.max_ee_init_rd_atom);
/*Query Port Attr*/
res->port_attr = malloc(sizeof(struct ibv_port_attr));
memset(res->port_attr, 0 , sizeof(struct ibv_port_attr));
if(ibv_query_port(res->ib_ctx, config.ib_port, res->port_attr)){
error_perror("ibv_query_port");
goto resource_create_exit;
}
printf("res->port_attr.max_msg_sz=%d\n", res->port_attr->max_msg_sz);
rc = 0;
fn_exit:
return rc;
fn_fail:
resource_create_exit:
/*if error, destroy HCA handle*/
if(rc){
if(res->ib_ctx){
ibv_close_device(res->ib_ctx);
res->ib_ctx = NULL;
}
if(res->port_attr){
free(res->port_attr);
}
res = NULL;
}
// free other
ib_dev = NULL;
if(dev_list){
ibv_free_device_list(dev_list);
dev_list = NULL;
}
goto fn_exit;
}
int pd_create(resource_t *res, pdinfo_t *pdinfo){
int rc = IBCOMM_ERR_CODE;
/*Init*/
memset(pdinfo, 0, sizeof(pdinfo_t));
pdinfo->pd = NULL;
/*Alloc on HCA handle*/
pdinfo->pd = ibv_alloc_pd(res->ib_ctx);
if(pdinfo->pd == NULL){
error_perror("ibv_alloc_pd");
goto pd_create_exit;
}
/*Register to res*/
list_add(res->pdinfo_list, pdinfo);
rc = 0;
pd_create_exit:
if(rc)
pdinfo = NULL;
return rc;
}
int qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo){
struct ibv_qp_init_attr qp_init_attr;
int rc = IBCOMM_ERR_CODE;
int ibv_errno;
/*Init*/
memset(qpinfo, 0, sizeof(qpinfo_t));
int i;
for(i = 0; i < NREMOTE; i++) {
qpinfo->sock[i] = -1; // not connected
}
qpinfo->sr_num = 0;
qpinfo->rr_num = 0;
/*Create cq*/
qpinfo->scq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0);
qpinfo->rcq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0);
if(!qpinfo->scq || !qpinfo->rcq){
error_perror("qp_create,ibv_create_cq");
goto qp_create_exit;
}
/*Create qp*/
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
qp_init_attr.qp_type = IBV_QPT_RC;
qp_init_attr.sq_sig_all = 1;
qp_init_attr.send_cq = qpinfo->scq;
qp_init_attr.recv_cq = qpinfo->rcq;
// max SR/RR num in SQ/RQ
qp_init_attr.cap.max_send_wr = _MAX_SQ_CAPACITY;
qp_init_attr.cap.max_recv_wr = _MAX_RQ_CAPACITY;
// max SGE num
qp_init_attr.cap.max_send_sge = _MAX_SGE_CAPACITY;
qp_init_attr.cap.max_recv_sge = _MAX_SGE_CAPACITY;
qp_init_attr.cap.max_inline_data = IBCOM_INLINE_DATA;
#if 0
ibv_errno = show_resident(0);
#endif
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
if(qpinfo->qp == NULL){
error_perror("ibv_create_qp");
goto qp_create_exit;
}
#if 0
ibv_errno = show_resident(1);
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
ibv_errno = show_resident(2);
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
ibv_errno = show_resident(3);
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
ibv_errno = show_resident(4);
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
ibv_errno = show_resident(5);
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
ibv_errno = show_resident(6);
#endif
qpinfo->max_inline_data = qp_init_attr.cap.max_inline_data;
printf("max_send_wr=%d,max_recv_wr=%d,inline_data=%d,max_send_sge=%d,max_recv_sge=%d\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_inline_data, qp_init_attr.cap.max_send_sge, qp_init_attr.cap.max_recv_sge);
/*Register to res*/
list_add(res->qpinfo_list, qpinfo);
rc = 0;
qp_create_exit:
if(rc){
if(qpinfo->scq){
ibv_destroy_cq(qpinfo->scq);
qpinfo->scq = NULL;
}
if(qpinfo->rcq){
ibv_destroy_cq(qpinfo->rcq);
qpinfo->rcq = NULL;
}
if(qpinfo->qp){
ibv_destroy_qp(qpinfo->qp);
qpinfo->qp = NULL;
}
qpinfo = NULL;
}
fn_exit:
return rc;
fn_fail:
goto fn_exit;
}
int qp_create_ud(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo){
struct ibv_qp_init_attr qp_init_attr;
int rc = IBCOMM_ERR_CODE;
int ibv_errno;
/*Init*/
memset(qpinfo, 0, sizeof(qpinfo_t));
int i;
for(i = 0; i < NREMOTE; i++) {
qpinfo->sock[i] = -1; // not connected
}
qpinfo->sr_num = 0;
qpinfo->rr_num = 0;
/*Create cq*/
qpinfo->scq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0);
qpinfo->rcq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0);
if(!qpinfo->scq || !qpinfo->rcq){
error_perror("ibv_create_cq");
goto qp_create_exit;
}
/*Create qp*/
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
qp_init_attr.qp_type = IBV_QPT_UD;
//qp_init_attr.sq_sig_all = 1;
qp_init_attr.send_cq = qpinfo->scq;
qp_init_attr.recv_cq = qpinfo->rcq;
// max SR/RR num in SQ/RQ
qp_init_attr.cap.max_send_wr = _MAX_SQ_CAPACITY;
qp_init_attr.cap.max_recv_wr = _MAX_RQ_CAPACITY;
// max SGE num
qp_init_attr.cap.max_send_sge = _MAX_SGE_CAPACITY;
qp_init_attr.cap.max_recv_sge = _MAX_SGE_CAPACITY;
qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr);
if(qpinfo->qp == NULL){
error_perror("ibv_create_qp");
goto qp_create_exit;
}
qpinfo->max_inline_data = qp_init_attr.cap.max_inline_data;
printf("max_send_wr=%d,max_recv_wr=%d,max_send_sge=%d,max_recv_sge=%d,\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_send_sge, qp_init_attr.cap.max_recv_sge);
/*Register to res*/
list_add(res->qpinfo_list, qpinfo);
rc = 0;
qp_create_exit:
if(rc){
if(qpinfo->scq){
ibv_destroy_cq(qpinfo->scq);
qpinfo->scq = NULL;
}
if(qpinfo->rcq){
ibv_destroy_cq(qpinfo->rcq);
qpinfo->rcq = NULL;
}
if(qpinfo->qp){
ibv_destroy_qp(qpinfo->qp);
qpinfo->qp = NULL;
}
qpinfo = NULL;
}
return rc;
}
int mr_create(resource_t *res, pdinfo_t *pdinfo, int buf_size, char *buf, mrinfo_t *mrinfo) {
int mr_flags;
int rc = IBCOMM_ERR_CODE;
/*Init*/
memset(mrinfo, 0, sizeof(mrinfo_t));
mrinfo->buf = buf;
mrinfo->buf_size = buf_size;
dprintf("mr_create,mrinfo->buf=%lx\n", (unsigned long)mrinfo->buf);
/*Create mr*/
mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;
mrinfo->mr = ibv_reg_mr(pdinfo->pd, buf, buf_size, mr_flags);
if(mrinfo->mr == NULL){
perror("ibv_reg_mr");
goto mr_create_exit;
}
/*Register to res*/
list_add(res->mrinfo_list, mrinfo);
rc = 0;
mr_create_exit:
if(rc) {
if(mrinfo->mr) { ibv_dereg_mr(mrinfo->mr); }
if(mrinfo) { mrinfo = NULL; }
}
return rc;
}
int resource_destroy(config_t *config, resource_t *res){
int rc = 0;
//config.dev_name
if(config->dev_name){
free(config->dev_name);
}
// qp
qpinfo_t *qpinfo = NULL;
while((qpinfo = (qpinfo_t *)list_pop(res->qpinfo_list)) != NULL){
// qp
if(qpinfo->qp && ibv_destroy_qp(qpinfo->qp)){
error_perror("ibv_destroy_qp");
rc = IBCOMM_ERR_CODE;
}
qpinfo->qp = NULL;
// scq
if(qpinfo->scq && ibv_destroy_cq(qpinfo->scq)){
error_perror("ibv_destroy_cq");
rc = IBCOMM_ERR_CODE;
}
qpinfo->scq = NULL;
// rcq
if(qpinfo->rcq && ibv_destroy_cq(qpinfo->rcq)){
error_perror("ibv_destroy_cq");
rc = IBCOMM_ERR_CODE;
}
qpinfo->rcq = NULL;
// sock
int i;
for(i = 0; i < (config->server_flg ? config->nremote : 1); i++) {
if(qpinfo->sock[i] >= 0 && close(qpinfo->sock[i])){
error_perror("close");
rc = IBCOMM_ERR_CODE;
}
}
qpinfo = NULL;
}
// mr
mrinfo_t *mrinfo = NULL;
while ((mrinfo = (mrinfo_t *) list_pop(res->mrinfo_list)) != NULL) {
if (mrinfo->mr && ibv_dereg_mr(mrinfo->mr)) {
error_perror("ibv_dereg_mr");
rc = IBCOMM_ERR_CODE;
}
mrinfo->mr = NULL;
if (mrinfo->buf) {
if (config->pci_buf_flg) {
//aal_host_mem_free(mrinfo->buf);
} else {
munmap(mrinfo->buf, mrinfo->buf_size);
}
}
mrinfo = NULL;
}
// pd
pdinfo_t *pdinfo = NULL;
while((pdinfo = (pdinfo_t *)list_pop(res->pdinfo_list)) != NULL){
if(pdinfo->pd && ibv_dealloc_pd(pdinfo->pd)){
error_perror("ibv_dealloc_pd");
rc = IBCOMM_ERR_CODE;
}
pdinfo = NULL;
}
if (res->ib_ctx && ibv_close_device(res->ib_ctx)) {
error_perror("ibv_close_device");
rc = IBCOMM_ERR_CODE;
}
if(res->port_attr){
free(res->port_attr);
}
res = NULL;
return rc;
}

View File

@@ -0,0 +1,180 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <memory.h>
#include <unistd.h>
#include "sock.h"
#include "debug.h"
#define DEBUG_SOCK
#ifdef DEBUG_SOCK
#define dprintf printf
#else
#define dprintf(...)
#endif
int sock_connect(char *server_name, int port, int *listenfd){
struct addrinfo hints;
struct addrinfo *result = NULL, *rp = NULL;
int rc = 0, sockfd = -1;
// fd for search, after accept (server)
// for for search, after connect (client)
char service[6];
char addrstr[256];
dprintf("sock_connect,enter\n");
if(!server_name && *listenfd != -1) { sockfd = *listenfd; goto reuse_listenfd; }
// set port as service name
if (sprintf(service, "%d", port) < 0)
goto sock_connect_exit;
memset(&hints, 0, sizeof(struct addrinfo));
if(server_name == NULL){
hints.ai_flags = AI_PASSIVE;
}
hints.ai_family = AF_UNSPEC;// IPv4 or IPv6
hints.ai_socktype = SOCK_STREAM;//TCP
hints.ai_protocol = 0; // any protocol
hints.ai_canonname = NULL;
hints.ai_addr = NULL;
hints.ai_next = NULL;
// get a list of addresses
rc = getaddrinfo(server_name, service, &hints, &result);
if(rc){
dprintf("%s\n", gai_strerror(rc));
goto sock_connect_exit;
}
dprintf("result=%p\n", result);
// find a usable address
for(rp = result; rp != NULL; rp = rp->ai_next){
inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100);
void *ptr;
switch(rp->ai_family) {
case AF_INET:
printf("ai_family=AF_INET\n");
ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr;
break;
default:
dprintf("ai_family=%08x\n", rp->ai_family);
}
inet_ntop(rp->ai_family, ptr, addrstr, 100);
printf("trying to use addr=%s,port=%d\n", addrstr,port);
}
for(rp = result; rp != NULL; rp = rp->ai_next){
sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if(sockfd == -1)
continue;
// set socket reusable
int on = 1;
if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) != 0)
continue;
// server mode
if(server_name == NULL){
inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100);
void *ptr;
switch(rp->ai_family) {
case AF_INET:
dprintf("ai_family=AF_INET\n");
ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr;
break;
default:
dprintf("ai_family=%08x\n", rp->ai_family);
}
inet_ntop(rp->ai_family, ptr, addrstr, 100);
printf("server mode,addr=%s,port=%d\n", addrstr,port);
if(bind(sockfd, rp->ai_addr, rp->ai_addrlen) != 0)
continue;
reuse_listenfd:
printf("listen=%d\n", sockfd);
if(listen(sockfd, 1) != 0)
continue;
/* connect successfully */
if(*listenfd == -1) { *listenfd = sockfd; }
sockfd = accept(sockfd, NULL, NULL);
printf("accept=%d\n", sockfd);
goto sock_connect_success;
// client mode
}else{
inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100);
void *ptr;
switch(rp->ai_family) {
case AF_INET:
printf("ai_family=AF_INET\n");
ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr;
break;
default:
dprintf("ai_family=%08x\n", rp->ai_family);
}
inet_ntop(rp->ai_family, ptr, addrstr, 100);
printf("client mode,addr=%s,port=%d\n", addrstr,port);
rc = connect(sockfd, rp->ai_addr, rp->ai_addrlen);
if(rc == 0) {
printf("connect succeeded,fd=%d\n", sockfd);
goto sock_connect_success;
} else {
printf("connect failed, trying to use next\n");
}
}
}
//sock_connect_failure:
if(rp == NULL){
error_printf("All trial failed\n");
sockfd = -1;
goto sock_connect_exit;
}
sock_connect_success:
sock_connect_exit:
#if 0
if(listenfd > 0)
close(listenfd);
#endif
if(result)
freeaddrinfo(result);
return sockfd;
}
int sock_sync_data(int sock, int data_bytes, char *local_data, char *remote_data){
int rc = 0;
int read_bytes = 0;
// write to sock
rc = write(sock, local_data, data_bytes);
if(rc != data_bytes){
rc =_SOCK_WRITE_ERR;
goto sock_sync_data_exit;
}
// read from sock
rc = 0;
while(!rc && read_bytes < data_bytes){
rc = read(sock, remote_data, data_bytes);
if(rc > 0){
read_bytes += rc;
rc = 0;
}else{
rc =_SOCK_READ_ERR;
goto sock_sync_data_exit;
}
}
sock_sync_data_exit:
return rc;
}

View File

@@ -0,0 +1,11 @@
#ifndef SOCK_H
#define SOCK_H
enum sock_return_code{
_SOCK_SUCCESS,
_SOCK_CONN_ERR,
_SOCK_WRITE_ERR,
_SOCK_READ_ERR
};
extern int sock_connect(char *server_name, int port, int *listenfd);
extern int sock_sync_data(int sock, int data_size, char *local_data, char *remote_data);
#endif