From f4c32e5507d4407bceb53797510a6de452f7f96f Mon Sep 17 00:00:00 2001 From: Ken Sato Date: Thu, 31 Aug 2017 15:43:28 +0900 Subject: [PATCH] qlmpi: add testcase to qlmpi (rusage for swap) --- test/qlmpi/rusage/Makefile | 53 +++ test/qlmpi/rusage/README | 21 ++ test/qlmpi/rusage/qltest.h | 10 + test/qlmpi/rusage/run.sh | 97 ++++++ test/qlmpi/rusage/rusage008.c | 100 ++++++ test/qlmpi/rusage/rusage009.c | 111 +++++++ test/qlmpi/rusage/rusage010.c | 77 +++++ test/qlmpi/rusage/rusage011.c | 141 ++++++++ test/qlmpi/rusage/verbs/aal_host.h | 21 ++ test/qlmpi/rusage/verbs/curtime.c | 11 + test/qlmpi/rusage/verbs/debug.c | 38 +++ test/qlmpi/rusage/verbs/debug.h | 31 ++ test/qlmpi/rusage/verbs/ibcomm.h | 209 ++++++++++++ test/qlmpi/rusage/verbs/list.c | 82 +++++ test/qlmpi/rusage/verbs/list.h | 26 ++ test/qlmpi/rusage/verbs/makefile | 30 ++ test/qlmpi/rusage/verbs/mbuf.h | 23 ++ test/qlmpi/rusage/verbs/mcons.h | 9 + test/qlmpi/rusage/verbs/mm_core.h | 12 + test/qlmpi/rusage/verbs/mm_host.h | 60 ++++ test/qlmpi/rusage/verbs/mm_ib_test.h | 52 +++ test/qlmpi/rusage/verbs/mmib.h | 54 +++ test/qlmpi/rusage/verbs/mtype.h | 29 ++ test/qlmpi/rusage/verbs/pm_buf.h | 16 + test/qlmpi/rusage/verbs/post.c | 453 ++++++++++++++++++++++++++ test/qlmpi/rusage/verbs/printmem.c | 12 + test/qlmpi/rusage/verbs/qp.c | 297 +++++++++++++++++ test/qlmpi/rusage/verbs/rdma_wr.c | 218 +++++++++++++ test/qlmpi/rusage/verbs/read_config.c | 79 +++++ test/qlmpi/rusage/verbs/resource.c | 390 ++++++++++++++++++++++ test/qlmpi/rusage/verbs/sock.c | 180 ++++++++++ test/qlmpi/rusage/verbs/sock.h | 11 + 32 files changed, 2953 insertions(+) create mode 100644 test/qlmpi/rusage/Makefile create mode 100644 test/qlmpi/rusage/README create mode 100644 test/qlmpi/rusage/qltest.h create mode 100755 test/qlmpi/rusage/run.sh create mode 100644 test/qlmpi/rusage/rusage008.c create mode 100644 test/qlmpi/rusage/rusage009.c create mode 100644 test/qlmpi/rusage/rusage010.c create mode 100644 test/qlmpi/rusage/rusage011.c create mode 100644 test/qlmpi/rusage/verbs/aal_host.h create mode 100644 test/qlmpi/rusage/verbs/curtime.c create mode 100644 test/qlmpi/rusage/verbs/debug.c create mode 100644 test/qlmpi/rusage/verbs/debug.h create mode 100644 test/qlmpi/rusage/verbs/ibcomm.h create mode 100644 test/qlmpi/rusage/verbs/list.c create mode 100644 test/qlmpi/rusage/verbs/list.h create mode 100644 test/qlmpi/rusage/verbs/makefile create mode 100644 test/qlmpi/rusage/verbs/mbuf.h create mode 100644 test/qlmpi/rusage/verbs/mcons.h create mode 100644 test/qlmpi/rusage/verbs/mm_core.h create mode 100644 test/qlmpi/rusage/verbs/mm_host.h create mode 100644 test/qlmpi/rusage/verbs/mm_ib_test.h create mode 100644 test/qlmpi/rusage/verbs/mmib.h create mode 100644 test/qlmpi/rusage/verbs/mtype.h create mode 100644 test/qlmpi/rusage/verbs/pm_buf.h create mode 100644 test/qlmpi/rusage/verbs/post.c create mode 100644 test/qlmpi/rusage/verbs/printmem.c create mode 100644 test/qlmpi/rusage/verbs/qp.c create mode 100644 test/qlmpi/rusage/verbs/rdma_wr.c create mode 100644 test/qlmpi/rusage/verbs/read_config.c create mode 100644 test/qlmpi/rusage/verbs/resource.c create mode 100644 test/qlmpi/rusage/verbs/sock.c create mode 100644 test/qlmpi/rusage/verbs/sock.h diff --git a/test/qlmpi/rusage/Makefile b/test/qlmpi/rusage/Makefile new file mode 100644 index 00000000..aee00914 --- /dev/null +++ b/test/qlmpi/rusage/Makefile @@ -0,0 +1,53 @@ +.SUFFIXES: # Clear suffixes +.SUFFIXES: .c + +CC = gcc + +MCMOD_DIR=$(HOME)/ppos +XPMEM_DIR=$(HOME)/install/xpmem-master + +CPPFLAGS = -I$(MCMOD_DIR)/include +CCFLAGS = -g +LDFLAGS = -L$(MCMOD_DIR)/lib -lihk -Wl,-rpath=$(MCMOD_DIR)/ppos/lib +EXES = +SRCS = +OBJS = $(SRCS:.c=.o) + +CPPFLAGSMCK = -I$(HOME)/usr/include +CCFLAGSMCK = -g -O0 +#LDFLAGSMCK = -static +LDFLAGSMCK = +SRCSMCK = $(shell ls rusage*.c) +EXESMCK = $(SRCSMCK:.c=) +OBJSMCK = $(SRCSMCK:.c=.o) + +all: $(EXES) $(EXESMCK) + +rusage010: rusage010.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage010.o: rusage010.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + + +rusage008: rusage008.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage008.o: rusage008.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage009: rusage009.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage009.o: rusage009.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage011: rusage011.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -I$(XPMEM_DIR)/include -L$(XPMEM_DIR)/lib -lxpmem -Wl,-rpath=$(XPMEM_DIR)/lib + +rusage011.o: rusage011.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + + +clean: + rm -f core $(EXES) $(OBJS) $(EXESMCK) $(OBJSMCK) diff --git a/test/qlmpi/rusage/README b/test/qlmpi/rusage/README new file mode 100644 index 00000000..12b3b442 --- /dev/null +++ b/test/qlmpi/rusage/README @@ -0,0 +1,21 @@ +=========== +Test matrix +=========== + +rusage010: + app->mmap() 2M,anon,pre-page ->set_range()->munmap()->free_process_memory_range()->clear_range()[OK] + +rusage005: device file (ib ping-pong) + devobj()->get_page()->pf->munmap()->clear_range() [OK] + remote page fault->cow->clear_range() [OK] + ld-linux.so->mmap private->cow->clear_range() [OK] + +rusage008: sharing file-map page +fork()->filemap->pf->clear_range() [OK] + +rusage009: sharing shmget() page +fork()->shmat()->pf->clear_range() [OK] + +rusage011: sharing xpmem page +fork()->xpmem_attach()->pf->clear_range() [OK] + diff --git a/test/qlmpi/rusage/qltest.h b/test/qlmpi/rusage/qltest.h new file mode 100644 index 00000000..e460eab1 --- /dev/null +++ b/test/qlmpi/rusage/qltest.h @@ -0,0 +1,10 @@ +#include + +#define BUF_SIZE (32 * 1024) + +int do_swap(char *fname, void *buffer) { + int rc = -1; + rc = syscall(801, fname, buffer, BUF_SIZE, 2); + printf("%s: swap returns %d , %s\n", __FUNCTION__, rc, fname); + return rc; +} diff --git a/test/qlmpi/rusage/run.sh b/test/qlmpi/rusage/run.sh new file mode 100755 index 00000000..1befba69 --- /dev/null +++ b/test/qlmpi/rusage/run.sh @@ -0,0 +1,97 @@ +#!/usr/bin/bash + +testname=$1 +bootopt="-m 256M" +mcexecopt="" +testopt="" +kill="n" +dryrun="n" +sleepopt="0.4" +home=$(eval echo \$\{HOME\}) +install=${home}/ppos +rusage=work/rusage/for_ql +walb=wallaby14 + +echo Executing ${testname} + +case ${testname} in + rusage005) + #ssh wallaby -c '(cd ${home}/${rusage}/verbs; make rdma_wr)' + bn=verbs/rdma_wr + ;; + *) + bn=${testname} + make clean > /dev/null 2> /dev/null + make ${bn} +esac + +pid=`pidof mcexec` +if [ "${pid}" != "" ]; then + kill -9 ${pid} > /dev/null 2> /dev/null +fi + +case ${testname} in + rusage010) + testopt="1" + ;; + rusage005) + ssh ${walb}.aics-sys.riken.jp "${home}/${rusage}/verbs/rdma_wr -p 9999" > ${testname}_rcvside.txt & + echo "Running 'rdma_wr -p 9999' on ${walb}..." + read -p "please enter to go on." + port=9999 + testopt="-s ${walb}.aics-sys.riken.jp -p ${port}" + ;; + rusage008) + cp ${bn} ./file + ;; + rusage009) + ;; + rusage011) + if [ `lsmod | grep xpmem | wc -l` -eq 0 ]; then + sudo insmod /home/satoken/install/xpmem-master/lib/module/xpmem.ko + sudo chmod og+rw /dev/xpmem + fi + dryrun="n" + kill="n" + sleepopt="5" + ;; + *) + echo Unknown test case + exit 255 +esac + +if [ ${dryrun} == "y" ]; then +exit +fi + +sudo ${install}/sbin/mcstop+release.sh && +sudo ${install}/sbin/mcreboot.sh ${bootopt} + +if [ ${kill} == "y" ]; then + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} & + sleep ${sleepopt} + sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + pid=`pidof mcexec` + if [ "${pid}" != "" ]; then + kill -9 ${pid} > /dev/null 2> /dev/null + fi +else + case ${testname} in + rusage005) + echo "**** message of sender side **************************" + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} + echo "******************************************************" + + echo "**** message of reciever side ************************" + cat ${testname}_rcvside.txt + echo "******************************************************" + #read -p "Run rdma_wr." ans + sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + ;; + *) + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} + sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + esac +fi + +sudo ${install}/sbin/mcstop+release.sh diff --git a/test/qlmpi/rusage/rusage008.c b/test/qlmpi/rusage/rusage008.c new file mode 100644 index 00000000..8c395255 --- /dev/null +++ b/test/qlmpi/rusage/rusage008.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qltest.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + int fd; +// for swap_test + int swap_rc = 0; + char buffer[BUF_SIZE]; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + unsigned long val = *((unsigned long*)mem); + +// for swap_test + swap_rc = do_swap("/tmp/rusage008_c.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap in child is failed\n"); + } + _exit(123); + } else { + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + unsigned long val = *((unsigned long*)mem); + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); +// for swap_test + swap_rc = do_swap("/tmp/rusage008_p.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap in parent is failed\n"); + } + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/qlmpi/rusage/rusage009.c b/test/qlmpi/rusage/rusage009.c new file mode 100644 index 00000000..2f352d92 --- /dev/null +++ b/test/qlmpi/rusage/rusage009.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qltest.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + key_t key = ftok(argv[0], 0); + int shmid; +// for swap_test + int swap_rc = 0; + char buffer[BUF_SIZE]; + + shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); + CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + mem = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + +// for swap_test + swap_rc = do_swap("/tmp/rusage009_c.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap in child is failed\n"); + } + *((unsigned long*)mem) = 0x1234; + + ret = shmdt(mem); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + _exit(123); + } else { + mem = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + +// for swap_test + swap_rc = do_swap("/tmp/rusage009_p.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap in parent is failed\n"); + } + + printf("%lx\n", *((unsigned long*)mem)); + +#if 0 + struct shmid_ds buf; + ret = shmctl(shmid, IPC_RMID, &buf); + CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); +#endif + + ret = shmdt(mem); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/qlmpi/rusage/rusage010.c b/test/qlmpi/rusage/rusage010.c new file mode 100644 index 00000000..25b8bd78 --- /dev/null +++ b/test/qlmpi/rusage/rusage010.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include "qltest.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + int i; + int sz_index; + void* anon[NUM_AREAS]; + int ret = 0; +// for qlmpi test + int swap_rc = 0; + char buffer[BUF_SIZE]; + + CHKANDJUMP(argc != 2, 255, "%s \n", argv[0]); + sz_index = atoi(argv[1]); + + for(i = 0; i < NUM_AREAS; i++) { + anon[i] = mmap(0, sz_anon[sz_index], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); + memset(anon[i], 0, sz_anon[sz_index]); + } +// for qlmpi test + swap_rc = do_swap("/tmp/rusage010.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap is failed.\n"); + } + + for(i = 0; i < NUM_AREAS; i++) { + munmap(anon[i], sz_anon[sz_index]); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/qlmpi/rusage/rusage011.c b/test/qlmpi/rusage/rusage011.c new file mode 100644 index 00000000..db614435 --- /dev/null +++ b/test/qlmpi/rusage/rusage011.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qltest.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + key_t key = ftok(argv[0], 0); + int shmid; + xpmem_segid_t segid; +// for swap_test + int swap_rc = 0; + char buffer[BUF_SIZE]; + + shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); + CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + memset(mem, 0, sz_mem[SZ_INDEX]); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + void *shm = shmat(shmid, NULL, 0); + CHKANDJUMP(shm == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + while((segid = *(xpmem_segid_t*)shm) == 0) { }; + + ret = shmdt(shm); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + ret = xpmem_init(); + CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); + + xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); + CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", strerror(errno)); + + struct xpmem_addr addr = { .apid = apid, .offset = 0 }; + void* attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); + CHKANDJUMP(attach == (void*)-1, 255, "xpmem_attach failed: %s\n", strerror(errno)); + + *((unsigned long*)attach) = 0x1234; +// for swap_test + swap_rc = do_swap("/tmp/rusage011_c.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap in child is failed\n"); + } + + ret = xpmem_detach(attach); + CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); + + _exit(123); + } else { + void *shm = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + ret = xpmem_init(); + CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); + + segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, (void*)0666); + CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", strerror(errno)); + + *(xpmem_segid_t*)shm = segid; + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + +// for swap_test + swap_rc = do_swap("/tmp/rusage011_p.swp", buffer); + if (swap_rc < 0) { + printf("[NG] swap in parent is failed\n"); + } + + printf("%lx\n", *((unsigned long*)mem)); + + struct shmid_ds buf; + ret = shmctl(shmid, IPC_RMID, &buf); + CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); + + ret = shmdt(shm); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + ret = xpmem_remove(segid); + CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/qlmpi/rusage/verbs/aal_host.h b/test/qlmpi/rusage/verbs/aal_host.h new file mode 100644 index 00000000..4162166d --- /dev/null +++ b/test/qlmpi/rusage/verbs/aal_host.h @@ -0,0 +1,21 @@ +/* + * aal_host.h + * + * Created on: 2011/08/09 + * Author: simin + */ + +#ifndef AAL_HOST_H_ +#define AAL_HOST_H_ + +#define MAX_DEVNO 2 + +extern int aal_host_init(); +extern int aal_host_dev_init(int dev_no); +extern void* aal_host_mem_alloc(int dev_no, int size); +extern void aal_host_mem_free(void * addr, int size); +extern int aal_host_dev_exit(int dev_no); +extern int aal_host_exit(); +extern void* aal_host_mem_va2pa(int dev_no, void *virtual_addr); + +#endif /* AAL_HOST_H_ */ diff --git a/test/qlmpi/rusage/verbs/curtime.c b/test/qlmpi/rusage/verbs/curtime.c new file mode 100644 index 00000000..56ea70b3 --- /dev/null +++ b/test/qlmpi/rusage/verbs/curtime.c @@ -0,0 +1,11 @@ +#include +#include + +#define CURTIME_LIB 1 + +double cur_time(){ + struct timeval tp; + gettimeofday(&tp, NULL); + return tp.tv_sec + tp.tv_usec * 1.0E-6; +} + diff --git a/test/qlmpi/rusage/verbs/debug.c b/test/qlmpi/rusage/verbs/debug.c new file mode 100644 index 00000000..9797a7a3 --- /dev/null +++ b/test/qlmpi/rusage/verbs/debug.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" + +//#define DEBUG_DEBUG +#ifdef DEBUG_DEBUG +#define dprintf printf +#else +#define dprintf(...) +#endif + +void debug_print_qp_conn_info(resource_t res, qpinfo_t qpinfo, config_t *config) { + uint8_t *p; + dprintf("local.qp_num=0x%x\n", qpinfo.qp->qp_num); + dprintf("local.lid=0x%x\n", res.port_attr->lid); + dprintf("local.sock[0]=%d\n", qpinfo.sock[0]); + if (res.rdma_mr.mr != NULL) { + dprintf("local.addr=0x%lx\n", (uint64_t)res.rdma_mr.buf); + dprintf("local.rkey=0x%x\n\n", res.rdma_mr.mr->rkey); + } + + int i; + for(i = 0; i < (qpinfo.listenfd == -1 ? 1 : config->nremote); i++) { + dprintf("remote.qp_num=0x%x\n", qpinfo.remote_conn_info[i].qp_num); + dprintf("remote.lid=0x%x\n", qpinfo.remote_conn_info[i].lid); + p = qpinfo.remote_conn_info[i].gid; + dprintf( + "remote.gid = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", + p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); + if (qpinfo.remote_conn_info[i].addr) { + dprintf("remote.addr=0x%lx\n", qpinfo.remote_conn_info[i].addr); + dprintf("remote.rkey=0x%x\n", qpinfo.remote_conn_info[i].rkey); + } + } +} + diff --git a/test/qlmpi/rusage/verbs/debug.h b/test/qlmpi/rusage/verbs/debug.h new file mode 100644 index 00000000..f67dbc4f --- /dev/null +++ b/test/qlmpi/rusage/verbs/debug.h @@ -0,0 +1,31 @@ +#ifndef MYLIB_H +#define MYLIB_H + +#ifndef NULL +#define NULL ((void *) 0) +#endif + +#ifdef DEBUG +#define debug_printf(fmt,arg...) {printf("[DEBUG] " fmt, ##arg);} +#define debug_print_mem(arg...) {fprintf(stderr, "[DEBUG] ");print_mem(arg);} +#else +#define debug_printf(fmt,arg...) {} +#define debug_print_mem(arg...) {} +#endif + +#ifdef ERROR +#define error_printf(fmt,arg...) {fprintf(stderr, "[ERROR] " fmt, ##arg);} +#define error_perror(arg...) {fprintf(stderr, "[ERROR] "); perror(arg);} +#else +#define error_printf(fmt,arg...) {} +#define error_perror(fmt,arg...) {} +#endif + +#include "mtype.h" + +/** + * get current time(sec) + */ +extern double cur_time(); +extern void print_mem(addr_t addr, int size); +#endif diff --git a/test/qlmpi/rusage/verbs/ibcomm.h b/test/qlmpi/rusage/verbs/ibcomm.h new file mode 100644 index 00000000..817eed59 --- /dev/null +++ b/test/qlmpi/rusage/verbs/ibcomm.h @@ -0,0 +1,209 @@ +#ifndef IBCOMM_H +#define IBCOMM_H +#include +#include "infiniband/verbs.h" +#include "sock.h" +#include "list.h" + +#define _MAX_FIX_BUF_SIZE 64 +#define _MAX_SQ_CAPACITY /*512*/256/*12*/ +#define _MAX_RQ_CAPACITY /*512*/256/*1*/ +#define _MAX_SGE_CAPACITY /*20*/3 +#define _MAX_CQ_CAPACITY /*512*/256/*1*/ + +#define IBCOM_INLINE_DATA /*(128*4-64)*//*(512-64)*//*884*/512 +#define IBCOM_RDMABUF_SZSEG (16384+4096) +#define IBCOM_MAGIC 0x55aa55aa +#define NCHAIN 2 + +#define SEND_CQ_FLG 1 +#define RECV_CQ_FLG 2 +#define IBCOMM_ERR_CODE -1 + +#define ibcomm_return_code_num 30 + +#if __BYTE_ORDER == __LITTLE_ENDIAN +static inline uint64_t htonll(uint64_t x) { return bswap_64(x); } +static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); } +#elif __BYTE_ORDER == __BIG_ENDIAN +static inline uint64_t htonll(uint64_t x) { return x; } +static inline uint64_t ntohll(uint64_t x) { return x; } +#else +#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN +#endif + +/* ERROR definition*/ +enum ibcomm_return_code{ + _IBCOMM_RETCODE_SUCCESS , + _IBCOMM_ERRCODE_DEVICE_FOUND, + _IBCOMM_ERRCODE_NO_DEVICE, + _IBCOMM_ERRCODE_DEVICE_OPEN, + _IBCOMM_ERRCODE_CREATE_RES, + _IBCOMM_ERRCODE_DEVICE_QUERY_PORT, + + _IBCOMM_ERRCODE_PD_ALLOC, + _IBCOMM_ERRCODE_CQ_CREATE, + _IBCOMM_ERRCODE_QP_CREATE, + _IBCOMM_ERRCODE_MR_CREATE, + + _IBCOMM_ERRCODE_QP_DESTROY, + _IBCOMM_ERRCODE_CQ_DESTROY, + _IBCOMM_ERRCODE_MR_DESTROY, + _IBCOMM_ERRCODE_PD_DEALLOC, + _IBCOMM_ERRCODE_DEVICE_CLOSE, + + _IBCOMM_ERRCODE_SOCK_CONN, + _IBCOMM_ERRCODE_SOCK_SYNC, + _IBCOMM_ERRCODE_SOCK_CLOSE, + + _IBCOMM_ERRCODE_QP_QUERY_GID, + _IBCOMM_ERRCODE_INIT_QP, + _IBCOMM_ERRCODE_RTR_QP, + _IBCOMM_ERRCODE_RTS_QP, + + _IBCOMM_ERRCODE_POLL_CQ_ERR, + _IBCOMM_ERRCODE_POLL_CQ_ZERO_RESULT +}; +typedef struct config{ + char *dev_name; /*IB device name*/ + char *server_name; /*server host name*/ + u_int32_t tcp_port; /*server TCP port*/ + int ib_port; /*local IB port*/ + int gid_idx; /*gid index*/ + int use_rdma; /*rdma flag*/ + int buf_size; + int server_flg; + int pci_buf_flg; + int pci_cq_flg; + int nremote; /* number of remote nodes */ +}config_t; + +typedef struct qp_conn_info{ + uint64_t addr; /*Buffer address*/ + uint32_t rkey; /*Remote key*/ + uint32_t qp_num; /*QP number*/ + uint16_t lid; /*LID of the IB port*/ + uint8_t gid[16];/*GID of the IB port*/ +}qp_conn_info_t; + +typedef struct qp_conn_info_ud{ + uint16_t lid; + union ibv_gid gid; + uint32_t qp_num; + uint32_t qkey; +} qp_conn_info_ud_t; + +typedef struct mrinfo{ + struct ibv_mr *mr; + char *buf; /*Registered buf*/ + int buf_size; +}mrinfo_t; + +#define NREMOTE 4 +typedef struct qpinfo{ + struct ibv_qp *qp; + struct ibv_cq *scq; /*Send cq*/ + struct ibv_cq *rcq; /*Receive cq*/ + qp_conn_info_t remote_conn_info[NREMOTE]; /*Remote info*/ + int sock[NREMOTE]; /* exchange remote_conn_info using TCP */ + int listenfd; /* exchange remote_conn_info using TCP */ + int sr_num; + int rr_num; + int max_inline_data; /*if data smaller than it, use inline send*/ +}qpinfo_t; + +typedef struct pdinfo{ + struct ibv_pd *pd; +}pdinfo_t; + +typedef struct resource{ + struct ibv_context *ib_ctx;/*HCA handle*/ + struct ibv_port_attr *port_attr; /*IB port attributes*/ + + list_t *pdinfo_list; + list_t *mrinfo_list; + list_t *qpinfo_list; + + /* RDMA buffers */ + mrinfo_t rdma_mr; +}resource_t; + +/** + * create resource + * connect TCP socket + */ +extern int resource_create(config_t config, resource_t *res); + +/** + * create a pd and register it to resource + */ +extern int pd_create(resource_t *res, pdinfo_t *pdinfo); + +/** + * creete a qp and register it to pd + * -create send cq + * -create recv cq + * -assign send cq to sq + * -assign recv cq to rq + */ +extern int qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo); +extern int qp_create_ud(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo); + +/** + * 1.create a mr and register it to pd + * 2.register buf to this mr + */ +extern int mr_create(resource_t *res, pdinfo_t *pdinfo, int buf_size, char *buf, mrinfo_t *mrinfo); +/** + * destroy all resources + */ +extern int resource_destroy(config_t *config, resource_t *res); + + +/** + * connect to remote qp by exchanging addr info + */ +extern int connect_qp(config_t config, resource_t *res, qpinfo_t *qpinfo); + +/** + * change qp status + */ +extern int init_qp(config_t config, qpinfo_t *qpinfo); +extern int init_qp_ud(config_t config, qpinfo_t *qpinfo); + +extern int rtr_qp(config_t config, qpinfo_t *qpinfo); +extern int rtr_qp_ud(config_t config, qpinfo_t *qpinfo); + +extern int rts_qp(config_t config, qpinfo_t *qpinfo); +extern int rts_qp_ud(config_t config, qpinfo_t *qpinfo); + +extern int modify_dest_qp(config_t config, qpinfo_t *qpinfo, qp_conn_info_t* remote_conn_info); + +extern int post_send_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, int tag, qp_conn_info_t* remote_conn_info, uint32_t imm_data); +int post_send_req2(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num); +extern int post_send_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_ud_t* remote_conn_info, struct ibv_ah *ah); + +extern int post_recv_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo_list, int tag); +extern int post_recv_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, uint64_t wr_id); + +extern int poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag); +extern int poll_cq2(qpinfo_t *qpinfo, int cq_flg, int *tag, int *result); +extern int poll_cq2_ud(qpinfo_t *qpinfo, int cq_flg, int *result); + +extern void print_qp_status(qpinfo_t *qpinfo); + +extern void debug_print_qp_conn_info(resource_t res, qpinfo_t qpinfo, config_t *config); +extern int read_config(config_t *config, int argc, char **argv); +#endif + +#define ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; rc = errno; goto fn_fail; } +#define IBCOM_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; ibcom_errno = errno; goto fn_fail; } +#define VERBS_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; verbs_errno = errno; goto fn_fail; } + +static inline int show_resident(int step) { + unsigned long size, resident, share, text, lib, data, dirty; + FILE* fp = fopen("/proc/self/statm", "r"); + fscanf(fp, "%ld %ld %ld %ld %ld %ld %ld", &size, &resident, &share, &text, &lib, &data, &dirty); + printf("step=%d,resident=%ldKB\n", step, resident * 4); + return 0; +} diff --git a/test/qlmpi/rusage/verbs/list.c b/test/qlmpi/rusage/verbs/list.c new file mode 100644 index 00000000..6a0126ea --- /dev/null +++ b/test/qlmpi/rusage/verbs/list.c @@ -0,0 +1,82 @@ +/* + * list.c + * + * Created on: 2011/10/19 + * Author: simin + */ + +#include +#include "list.h" +#include + +void* list_get(list_t *list, int idx) { + int i; + list_element_t *e; + + if (list->cnt <= 0 || idx < 0 || idx >= list->cnt) + return NULL; + + e = list->head; + for (i = 0; i < idx; i++) + e = e->next; + + return e->data; +} + +void list_add(list_t *list, void *data) { + list_element_t *e; + e = malloc(sizeof(list_element_t)); + e->data = data; + e->next = NULL; + + if(list->tail != NULL) + list->tail->next = e; + list->tail = e; + + if (list->cnt == 0) + list->head = list->tail; + + list->cnt++; +} + +void* list_remove(list_t *list, int idx) { + int i; + list_element_t *e, *pe, *ne; + void *data; + e = pe = ne = NULL; + + if (list->cnt <= 0 || idx < 0 || idx >= list->cnt) + return NULL; + + e = list->head; + i = 0; + if(idx > 0){ + while(i++ < idx-1){ + e = e->next; + } + pe = e; + i--; + } + while(i++ < idx) + e = e->next; + if(idx < list->cnt) + ne = e->next; + + if(pe != NULL) + pe->next = ne; + else + list->head = ne; + if(ne == NULL) + list->tail = pe; + + list->cnt--; + + data = e->data; + free(e); + + return data; +} + +void* list_pop(list_t *list){ + return list_remove(list, list->cnt-1); +} diff --git a/test/qlmpi/rusage/verbs/list.h b/test/qlmpi/rusage/verbs/list.h new file mode 100644 index 00000000..9b6dff0c --- /dev/null +++ b/test/qlmpi/rusage/verbs/list.h @@ -0,0 +1,26 @@ +/* + * list.h + * + * Created on: 2011/10/19 + * Author: simin + */ + +#ifndef LIST_H_ +#define LIST_H_ + +typedef struct list_element_t{ + void *data; + struct list_element_t *next; +}list_element_t; + +typedef struct list_t{ + list_element_t *head; + list_element_t *tail; + int cnt; +}list_t; + +extern void* list_get(list_t *list, int idx); +extern void list_add(list_t *list, void *e); +extern void* list_remove(list_t *list, int idx); +extern void* list_pop(list_t *list); +#endif /* LIST_H_ */ diff --git a/test/qlmpi/rusage/verbs/makefile b/test/qlmpi/rusage/verbs/makefile new file mode 100644 index 00000000..5371cb46 --- /dev/null +++ b/test/qlmpi/rusage/verbs/makefile @@ -0,0 +1,30 @@ +VPATH = + +CC = icc +CFLAGS = -Wall -O0 -g -DDEBUG -DERROR +LD = $(CC) +LFLAGS = -libverbs + +SRCS = list.c curtime.c printmem.c debug.c post.c qp.c read_config.c resource.c rdma_wr.c +DSRCS = $(SRCS:.c=.d) +OBJS = $(SRCS:.c=.o) +EXECS = rdma_wr +MODULES = list.o curtime.o printmem.o debug.o post.o qp.o read_config.o resource.o sock.o + +CWD := $(abspath $(dir $(lastword $(MAKEFILE_LIST)))) + +all: $(EXECS) + +rdma_wr: rdma_wr.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +%.d: %.c + $(CC) -MM $< > $*.d + +clean: + rm -f $(EXECS) $(OBJS) $(DSRCS) + +-include $(DSRCS) diff --git a/test/qlmpi/rusage/verbs/mbuf.h b/test/qlmpi/rusage/verbs/mbuf.h new file mode 100644 index 00000000..6c8b8571 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mbuf.h @@ -0,0 +1,23 @@ +/* + * mbuf.h + * + * Created on: 2011/10/19 + * Author: simin + */ + +#ifndef MBUF_H_ +#define MBUF_H_ + +enum buf_type{ + HOST_BUF_TYPE, + PCI_BUF_TYPE +}; + +typedef struct buf_t{ + void *buf; + int size; + enum buf_type type; +} buf_t; + +#define buf_free(buf_p) {if(buf_p->type) free(buf_p->buf);} +#endif /* MBUF_H_ */ diff --git a/test/qlmpi/rusage/verbs/mcons.h b/test/qlmpi/rusage/verbs/mcons.h new file mode 100644 index 00000000..325d3362 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mcons.h @@ -0,0 +1,9 @@ +#ifndef MCONS_H_ +#define MCONS_H_ + +#include "mtype.h" + +#define ERR_RET -1 +#define ERR_ADDR (addr_t)-1 + +#endif /* MCONS_H_ */ diff --git a/test/qlmpi/rusage/verbs/mm_core.h b/test/qlmpi/rusage/verbs/mm_core.h new file mode 100644 index 00000000..af183184 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mm_core.h @@ -0,0 +1,12 @@ +#ifndef MM_CORE_H_ +#define MM_CORE_H_ + +#include "mtype.h" + +//4kB +#define MIC_PAGE_SIZE 4096 + +int mm_core_read(addr_t offset, int size, void *buf); +int mm_core_write(addr_t offset, int size, void *buf); + +#endif /* MM_CORE_H_ */ diff --git a/test/qlmpi/rusage/verbs/mm_host.h b/test/qlmpi/rusage/verbs/mm_host.h new file mode 100644 index 00000000..b3257671 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mm_host.h @@ -0,0 +1,60 @@ +#ifndef MIC_MEM_H_ +#define MIC_MEM_H_ + +#include "mtype.h" + +/*#### MMIO ####*/ +#define MIC_PCI_MMIO_BASE_ADDR 0xc2300000 + +/*## GTT ##*/ +#define GTT_START_OFFSET 0x40000 +#define MIC_PCI_GTT_START_ADDR (MIC_PCI_MMIO_BASE_ADDR + GTT_START_OFFSET) +#define MIC_PCI_GTT_ETT_MAX 65536 +#define MIC_GTT_ETT_SIZE 4 + +/*## SBOX ##*/ +#define SBOX_START_OFFSET 0x10000 +#define MIC_PCI_SBOX_START_ADDR (MIC_PCI_MMIO_BASE_ADDR + SBOX_START_OFFSET) +#define MIC_PCI_SBOX_SIZE 0x30000 +#define SBOX_SBQ_FLUSH_REG 0x0000B1A0 +#define SBOX_TLB_FLUSH_REG 0x0000B1A4 + +/*## APERTURE ##*/ +#define MIC_PCI_APERTURE_BASE_ADDR 0xb0000000 +//256MB +#define MIC_PCI_APERTURE_SIZE 0x10000000 +//4kB +#define MIC_PAGE_SIZE 4096 + +static inline addr_t _mic_map2mic(addr_t addr){ + return addr >> 1 << 1 << 11; +} + +#define MIC_MAP2MIC _mic_map2mic + +extern int mm_host_init(); +extern int mm_host_exit(); + +extern addr_t mm_host_get_vaddr(int page_no, int offset); +extern addr_t mm_host_get_paddr(int page_no, int offset); + +/** + * map a page to MIC memory(set GTT[page_no]) + */ +extern int mm_host_page_init(int pg_no, addr_t map_addr, int size, int flush_flg); +/** + * read or write a initialized page + */ +extern int mm_host_page_read(int pg_no, int offset, int size, void *data); +extern int mm_host_page_write(int pg_no, int offset, int size, void *data); + +extern int mm_host_dump_gtt(); + +/** + * flush GTT table + * If only set 1 page, you can call mm_host_page_init with flush_flg=1 + * If set several pages, you can call mm_host_page_init with flush_flg=0, and call mm_host_gtt_flush after all page_init + */ +extern int mm_host_gtt_flush(); + +#endif /* MIC_MEM_H_ */ diff --git a/test/qlmpi/rusage/verbs/mm_ib_test.h b/test/qlmpi/rusage/verbs/mm_ib_test.h new file mode 100644 index 00000000..9c169222 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mm_ib_test.h @@ -0,0 +1,52 @@ +/* + * mm_ib_test.h + * + * Created on: 2011/10/14 + * Author: simin + */ + +#ifndef MM_IB_TEST_H_ +#define MM_IB_TEST_H_ + +//#define USE_1_SERVER 1 + + +//#define TEST_BUF_SIZE 16 +#define TEST_SERVER_BUF_NUM 2 + +#define TEST_COMM_HOST_BASE_ADDR 0x20001 +#define TEST_COMM_CORE_BASE_ADDR (0x20000 << 11) + +/* MR buffer setting info */ +#define TEST_HOST_MR_PAGE_NO 0 +#define TEST_MR_BUF_OFFSET 0 +#define TEST_MR_HOST_BUF_SIZE 4096 + +#define TEST_MR_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_MR_BUF_OFFSET) +#define TEST_MR_CORE_BUF_ADDR (TEST_COMM_CORE_BASE_ADDR + TEST_MR_BUF_OFFSET) + +/* +#define TEST_S2_HOST_MR_PAGE_NO 1 +#define TEST_S2_COMM_HOST_BASE_ADDR 0x30001 +#define TEST_S2_COMM_CORE_BASE_ADDR (0x30000 << 11) +#define TEST_S2_MR_HOST_BUF_ADDR TEST_S2_COMM_HOST_BASE_ADDR + TEST_MR_BUF_OFFSET +#define TEST_S2_MR_CORE_BUF_ADDR TEST_S2_COMM_CORE_BASE_ADDR + TEST_MR_BUF_OFFSET +*/ + +/* CQ buffer setting info */ +#define TEST_HOST_CQ_PAGE_NO 1 +#define TEST_CQ_BUF_OFFSET (TEST_MR_BUF_OFFSET + TEST_MR_HOST_BUF_SIZE) +#define TEST_CQ_HOST_BUF_SIZE 4096*2 // SCQ + RCQ + +#define TEST_CQ_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_CQ_BUF_OFFSET) +#define TEST_CQ_CORE_BUF_ADDR (TEST_CQ_HOST_BUF_ADDR >> 1 << 1 << 11) + +/* QP buffer setting info */ +#define TEST_HOST_QP_PAGE_NO 3 +#define TEST_QP_BUF_OFFSET (TEST_CQ_BUF_OFFSET + TEST_CQ_HOST_BUF_SIZE) +#define TEST_QP_HOST_BUF_SIZE 4096 + +#define TEST_QP_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_QP_BUF_OFFSET) +#define TEST_QP_CORE_BUF_ADDR (TEST_QP_HOST_BUF_ADDR >> 1 << 1 << 11) + +#endif /* MM_IB_TEST_H_ */ diff --git a/test/qlmpi/rusage/verbs/mmib.h b/test/qlmpi/rusage/verbs/mmib.h new file mode 100644 index 00000000..c4aadd37 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mmib.h @@ -0,0 +1,54 @@ +/* + * mmib.h + * + * Created on: 2011/10/19 + * Author: simin + */ + +#ifndef MMIB_H_ +#define MMIB_H_ + +#include "mtype.h" +#include "ibcomm.h" + +enum mmib_buf_type{ + MMIB_MR_BUF, + MMIB_CQ_BUF, + MMIB_QP_BUF, +}; +enum mmib_buf_pool_state{ + MMIB_BUF_POOL_RESET, + MMIB_BUF_POOL_ACTIVE +}; + +struct mmib_buf_pool{ + addr_t offset; + int page_no; // start page_no + int size; + addr_t cur_start; // offset in page + enum mmib_buf_pool_state state; +}; + +typedef struct mmib_mrinfo{ + struct ibv_mr *mr; + buf_t *buf; /*Registered buf*/ +}mmib_mrinfo_t; + +extern int mmib_pool_init(); +extern buf_t *mmib_new_buf(int size, enum mmib_buf_type buf_type); +extern void mmib_destroy_buf(buf_t *buf); +extern void mmib_pool_destroy(); + +extern void* mmib_qp_buf_alloc(int size); +extern void* mmib_cq_buf_alloc(int size); +extern void mmib_buf_free(void* buf); + +extern int mmib_resource_create(config_t config, resource_t *res); +extern int mmib_pd_create(resource_t *res, pdinfo_t *pdinfo); +extern int mmib_qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo); +extern int mmib_mr_create(resource_t *res, pdinfo_t *pdinfo, buf_t *buf, mmib_mrinfo_t *mrinfo); +extern int mmib_post_send_req(qpinfo_t *qpinfo, mmib_mrinfo_t *mrinfo_list, int opcode, int tag); +extern int mmib_post_recv_req(qpinfo_t *qpinfo, mmib_mrinfo_t *mrinfo_list, int tag); +extern int mmib_poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag); +extern int mmib_resource_destroy(config_t *config, resource_t *res); +#endif /* MMIB_H_ */ diff --git a/test/qlmpi/rusage/verbs/mtype.h b/test/qlmpi/rusage/verbs/mtype.h new file mode 100644 index 00000000..74b05972 --- /dev/null +++ b/test/qlmpi/rusage/verbs/mtype.h @@ -0,0 +1,29 @@ +/* + * type.h + * + * Created on: 2011/10/08 + * Author: simin + */ + +#ifndef TYPE_H_ +#define TYPE_H_ + +#include + +typedef unsigned long int addr_t; + +enum buf_type{ + HOST_BUF_TYPE, + PCI_BUF_TYPE +}; + +typedef struct buf{ + void *buf; + int size; + enum buf_type type; +} buf_t; + +#define free_buf(buf_p) {if(buf_p->type == HOST_BUF_TYPE) free(buf_p->buf); buf_p=NULL;} + + +#endif /* TYPE_H_ */ diff --git a/test/qlmpi/rusage/verbs/pm_buf.h b/test/qlmpi/rusage/verbs/pm_buf.h new file mode 100644 index 00000000..8c2426de --- /dev/null +++ b/test/qlmpi/rusage/verbs/pm_buf.h @@ -0,0 +1,16 @@ +/* + * pm_buf.h + * + * Created on: 2011/10/21 + * Author: simin + */ + +#ifndef PM_BUF_H_ +#define PM_BUF_H_ + +struct pm_buf_ops { + void* (*alloc_buf)(int size); + void (*free_buf)(void *buf); +}; + +#endif /* PM_BUF_H_ */ diff --git a/test/qlmpi/rusage/verbs/post.c b/test/qlmpi/rusage/verbs/post.c new file mode 100644 index 00000000..e0d891e9 --- /dev/null +++ b/test/qlmpi/rusage/verbs/post.c @@ -0,0 +1,453 @@ +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" + +//#define DEBUG_POST +#ifdef DEBUG_POST +#define dprintf printf +#else +#define dprintf(...) +#endif + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +#define MAX_POLL_TIME (1000000ULL * 1000000) +int swr_id_tag_map[1000]; +int rwr_id_tag_map[1000]; + +void put_swr_id_tag(int wr_id, int tag){ + swr_id_tag_map[wr_id] = tag; +} +int get_swr_id_tag(int wr_id){ + int tag = swr_id_tag_map[wr_id]; + return tag; +} +void put_rwr_id_tag(int wr_id, int tag){ + rwr_id_tag_map[wr_id] = tag; +} +int get_rwr_id_tag(int wr_id){ + int tag = rwr_id_tag_map[wr_id]; + return tag; +} +int post_send_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, int tag, qp_conn_info_t* remote_conn_info, uint32_t imm_data){ + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[1]; + int ret = 0; + + /* Create sge*/ + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = mrinfo->buf_size; + sge[0].lkey = mrinfo->mr->lkey; + + /* Create a SR */ + memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = ++qpinfo->sr_num; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED; + + if(opcode != IBV_WR_RDMA_READ && mrinfo->buf_size <= qpinfo->max_inline_data) { sr.send_flags |= IBV_SEND_INLINE; } + put_swr_id_tag(sr.wr_id, tag); + + // set addr and key if is RDMA op + if(opcode != IBV_WR_SEND){ + sr.wr.rdma.remote_addr = remote_conn_info->addr; + sr.wr.rdma.rkey = remote_conn_info->rkey; + } + + /* Post SR to SQ */ + ret = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + if(ret){ + error_perror("ibv_post_send"); + error_printf("ibv_post_send return %d\n", ret); + return IBCOMM_ERR_CODE; + } + + return 0; +} + +/* write to addr + sz * seq_num */ +int post_send_req2(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num) { + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[1]; + int ret = 0; + + /* prepare sge*/ + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = mrinfo->buf_size; + sge[0].lkey = mrinfo->mr->lkey; + + dprintf("post_send_req2,sge[0].addr=%lx,sz=%d\n", (unsigned long)sge[0].addr, sge[0].length = mrinfo->buf_size); + + /* prepare send request or work request */ + //memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED; + + if(opcode != IBV_WR_RDMA_READ && mrinfo->buf_size <= qpinfo->max_inline_data) { + sr.send_flags |= IBV_SEND_INLINE; + } + + if(opcode == IBV_WR_RDMA_WRITE || opcode == IBV_WR_RDMA_WRITE_WITH_IMM) { + sr.wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_RDMABUF_SZSEG * seq_num; + sr.wr.rdma.rkey = remote_conn_info->rkey; + dprintf("post_send_req2,raddr=%lx\n", sr.wr.rdma.remote_addr); + } + + //__asm__ __volatile__("" ::: "memory"); + + ret = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + if(ret){ + printf("ibv_post_send return %d\n", ret); + return IBCOMM_ERR_CODE; + } + + return 0; +} + +int ibcom_isend_chain(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num) { + int ibcom_errno = 0; + int ib_errno; + int i; + struct ibv_send_wr sr[NCHAIN], *bad_wr = NULL; + struct ibv_sge sge[NCHAIN]; + + for(i = 0; i < NCHAIN; i++) { + sge[i].addr = (uintptr_t)mrinfo->buf + IBCOM_INLINE_DATA * i; + sge[i].length = IBCOM_INLINE_DATA; + sge[i].lkey = mrinfo->mr->lkey; + + sr[i].next = (i == NCHAIN - 1) ? NULL : &sr[i+1]; + //sr[i].wr_id = 0; + sr[i].sg_list = &sge[i]; + sr[i].num_sge = 1; +#define SKIP_POLL_RCQ +#ifdef SKIP_POLL_RCQ /* if you want all to be IBV_WR_RDMA_WRITE */ + sr[i].opcode = opcode; +#else + sr[i].opcode = (i == NCHAIN - 1) ? IBV_WR_RDMA_WRITE_WITH_IMM : IBV_WR_RDMA_WRITE; +#endif + sr[i].imm_data = imm_data; + sr[i].send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; + + sr[i].wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_INLINE_DATA * NCHAIN * seq_num + IBCOM_INLINE_DATA * i; + sr[i].wr.rdma.rkey = remote_conn_info->rkey; + } + + ib_errno = ibv_post_send(qpinfo->qp, &sr[0], &bad_wr); + IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send\n")); + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +/* write to addr + sz * seq_num */ +int post_send_req4(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num, uint32_t offset) { + int ibcom_errno = 0; + int ib_errno; + + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[1]; + + sge[0].addr = (uintptr_t)mrinfo->buf + offset; + sge[0].length = IBCOM_INLINE_DATA; + sge[0].lkey = mrinfo->mr->lkey; + + sr.next = NULL; + //sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; + + sr.wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_INLINE_DATA * seq_num; + sr.wr.rdma.rkey = remote_conn_info->rkey; + + ib_errno = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send\n")); + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int post_send_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_ud_t* remote_conn_info, struct ibv_ah *ah) { + struct ibv_send_wr sr, *bad_wr; + struct ibv_sge sge[1]; + int ibcom_errno = 0, ib_errno; + + /* Create sge*/ + /* addr to addr + length - 1 will be on the payload, but see "post_send_req_ud" part */ + if(mrinfo->buf_size <= 40) { printf("buf_size too short\n"); ibcom_errno = -1; goto fn_fail; } + + sge[0].addr = (uintptr_t)mrinfo->buf + 40; + sge[0].length = mrinfo->buf_size - 40; + sge[0].lkey = mrinfo->mr->lkey; + + /* Create a SR */ + //memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + //sr.imm_data = 0; + sr.send_flags = IBV_SEND_SIGNALED; + +#if 0 + if(mrinfo->buf_size <= qpinfo->max_inline_data){ + sr.send_flags |= IBV_SEND_INLINE; + } +#endif + + sr.wr.ud.ah = ah; + sr.wr.ud.remote_qpn = remote_conn_info->qp_num; + sr.wr.ud.remote_qkey = remote_conn_info->qkey; + dprintf("ibv_post_send,qpn=%08x,qkey=%08x\n", sr.wr.ud.remote_qpn, sr.wr.ud.remote_qkey); + // printf("ibv_post_send,dlid=%02x,is_global=%02x\n", ah->dlid, ah->is_global); + + ib_errno = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + if(ib_errno) { + error_perror("ibv_post_send"); + printf("ib_errno=%d\n", ib_errno); + ibcom_errno = IBCOMM_ERR_CODE; + goto fn_fail; + } + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int post_recv_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int tag){ + struct ibv_recv_wr *rr; + struct ibv_sge *sge; + struct ibv_recv_wr *bad_wr; + int ret = 0; + + /* Prepare scatter/gather entry list */ + sge = malloc(sizeof(struct ibv_sge)); + memset(sge, 0, sizeof(struct ibv_sge)); + sge->addr = (uintptr_t)mrinfo->buf; + sge->length = mrinfo->buf_size; + sge->lkey = mrinfo->mr->lkey; + + /* Create RR list */ + rr = malloc(sizeof(*rr)); + memset(rr, 0, sizeof(*rr)); + rr->next = NULL; + rr->wr_id = ++qpinfo->rr_num; + rr->sg_list = sge; + rr->num_sge = 1; + put_rwr_id_tag(rr->wr_id, tag); + + /* Post RR to RQ */ + ret = ibv_post_recv(qpinfo->qp, rr, &bad_wr); + if(ret){ + dprintf("ibv_post_recv ret=%d\n", ret); + free(sge); + free(rr); + return IBCOMM_ERR_CODE; + } else { + dprintf("ibv_post_recv ret=%d\n", ret); + } + + free(sge); + free(rr); + return 0; +} + +int ibcom_irecv(qpinfo_t *qpinfo, uint64_t wr_id){ + struct ibv_recv_wr rr; + struct ibv_recv_wr *bad_wr; + int ibcom_errno = 0; + int ib_errno; + + rr.next = NULL; + rr.sg_list = NULL; + rr.num_sge = 0; + rr.wr_id = wr_id; + + /* post rr */ + ib_errno = ibv_post_recv(qpinfo->qp, &rr, &bad_wr); + IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_recv\n")); + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int post_recv_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, uint64_t wr_id){ + struct ibv_recv_wr rr, *bad_wr; + struct ibv_sge sge[1]; + int ibcom_errno = 0, ib_errno; + + /* Prepare scatter/gather entry list */ + memset(sge, 0, sizeof(struct ibv_sge)); + /* addr to addr + 39 are not filled, addr + 40 to addr + length - 1 are filled with payload */ + if(mrinfo->buf_size <= 40) { printf("buf_size too short\n"); ibcom_errno = -1; goto fn_fail; } + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = mrinfo->buf_size; + sge[0].lkey = mrinfo->mr->lkey; + + /* Create RR list */ + memset(&rr, 0, sizeof(struct ibv_recv_wr)); + rr.next = NULL; + rr.wr_id = wr_id; + rr.sg_list = sge; + rr.num_sge = 1; + + /* Post RR to RQ */ + ib_errno = ibv_post_recv(qpinfo->qp, &rr, &bad_wr); + if(ib_errno){ + printf("ibv_post_recv ib_errno=%d\n", ib_errno); + ibcom_errno = IBCOMM_ERR_CODE; + goto fn_fail; + } + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag) { + struct ibv_wc wc; + int wc_num = 0, time=0, rc = IBCOMM_ERR_CODE; +// wc = malloc(sizeof(struct ibv_wc)); + memset(&wc, 0, sizeof(struct ibv_wc)); + + switch(cq_flg){ + case SEND_CQ_FLG: + do{ + wc_num = ibv_poll_cq(qpinfo->scq, 1, &wc); + }while(!wc_num && ++time < MAX_POLL_TIME); + break; + + case RECV_CQ_FLG: + do{ + wc_num = ibv_poll_cq(qpinfo->rcq, 1, &wc); + }while(!wc_num && ++time < MAX_POLL_TIME); + break; + } + + if(wc_num < 0){ + error_perror("ibv_poll_cq"); + goto poll_cq_exit; + } + if(wc_num == 0){ + error_printf("no wc is found\n"); + goto poll_cq_exit; + } + if (wc.status != IBV_WC_SUCCESS){ + error_printf("wrong wc state: %d, %s\n", wc.status, ibv_wc_status_str(wc.status)); + goto poll_cq_exit; + } + switch(cq_flg){ + case SEND_CQ_FLG: + *tag = get_swr_id_tag(wc.wr_id); + break; + case RECV_CQ_FLG: + *tag = get_rwr_id_tag(wc.wr_id); + break; + } + rc = 0; + + poll_cq_exit: + + return rc; +} + +int poll_cq2(qpinfo_t *qpinfo, int cq_flg, int *tag, int *result) { + struct ibv_wc cqe; + int rc = 0; + + switch(cq_flg){ + case SEND_CQ_FLG: + *result = ibv_poll_cq(qpinfo->scq, 1, &cqe); + break; + + case RECV_CQ_FLG: + *result = ibv_poll_cq(qpinfo->rcq, 1, &cqe); + break; + } + + if(*result < 0){ + error_perror("ibv_poll_cq"); + rc = *result; + goto fn_fail; + } + if(*result > 0 && cqe.status != IBV_WC_SUCCESS){ + error_printf("cqe status=%08x,%s\n", cqe.status, ibv_wc_status_str(cqe.status)); + rc = -1; + goto fn_fail; + } + if(*result > 0) { + dprintf("cqe.imm_data=%d\n", cqe.imm_data); + switch(cq_flg){ + case SEND_CQ_FLG: + *tag = get_swr_id_tag(cqe.wr_id); + break; + case RECV_CQ_FLG: + *tag = get_rwr_id_tag(cqe.wr_id); + break; + } + } + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} + +int poll_cq2_ud(qpinfo_t *qpinfo, int cq_flg, int *result) { + struct ibv_wc cqe; + int rc = 0; + + switch(cq_flg){ + case SEND_CQ_FLG: { + unsigned long tscs = rdtsc(); + *result = ibv_poll_cq(qpinfo->scq, 1, &cqe); + unsigned long tsce = rdtsc(); + printf("poll_cq,send,%ld\n", tsce-tscs); + break; } + case RECV_CQ_FLG: + *result = ibv_poll_cq(qpinfo->rcq, 1, &cqe); + break; + } + + if(*result < 0){ + error_perror("ibv_poll_cq"); + rc = *result; + goto fn_fail; + } + if(*result > 0 && cqe.status != IBV_WC_SUCCESS){ + error_printf("cqe status=%08x,%s\n", cqe.status, ibv_wc_status_str(cqe.status)); + rc = -1; + goto fn_fail; + } + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} diff --git a/test/qlmpi/rusage/verbs/printmem.c b/test/qlmpi/rusage/verbs/printmem.c new file mode 100644 index 00000000..a3ae4273 --- /dev/null +++ b/test/qlmpi/rusage/verbs/printmem.c @@ -0,0 +1,12 @@ +#include +#include "mtype.h" + +void print_mem(addr_t addr, int size){ + int i; + printf("print memory[0x%lx]\n", addr); + for(i = 0; i < size; i++){ + printf("%02x ", *(unsigned char *)(addr+i)); + } + printf("\n"); +} + diff --git a/test/qlmpi/rusage/verbs/qp.c b/test/qlmpi/rusage/verbs/qp.c new file mode 100644 index 00000000..f766b956 --- /dev/null +++ b/test/qlmpi/rusage/verbs/qp.c @@ -0,0 +1,297 @@ +#include +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "sock.h" +#include "debug.h" + +#define DEBUG_QP +#ifdef DEBUG_QP +#define dprintf printf +#else +#define dprintf(...) +#endif + +int connect_qp(config_t config, resource_t *res, qpinfo_t *qpinfo){ + union ibv_gid gid; + qp_conn_info_t local_conn_info; + int rc = IBCOMM_ERR_CODE; + + // get GID for this connection + memset(&gid, 0, sizeof(union ibv_gid)); + if(ibv_query_gid(res->ib_ctx, config.ib_port, config.gid_idx, &gid)){ + error_perror("ibv_query_gid"); + goto connect_qp_exit; + } + dprintf("port=%08x\n", config.ib_port); + + // set local qp conn info + memset(&local_conn_info, 0, sizeof(qp_conn_info_t)); + memset(qpinfo->remote_conn_info, 0, sizeof(qpinfo->remote_conn_info)); + local_conn_info.qp_num = htonl(qpinfo->qp->qp_num); + local_conn_info.lid = htons(res->port_attr->lid); + memcpy(local_conn_info.gid, &gid, 16); + dprintf("qp_num=%08x, lid=%08x\n", local_conn_info.qp_num, local_conn_info.lid); + + // set rdma address + if(config.use_rdma == 1){ + local_conn_info.addr = htonll((uint64_t) res->rdma_mr.mr->addr); + local_conn_info.rkey = htonl((uint32_t) res->rdma_mr.mr->lkey); + printf("my lkey=%08x\n", res->rdma_mr.mr->lkey); + printf("my rkey=%08x\n", res->rdma_mr.mr->rkey); + //local_conn_info.rkey = htonl((uint32_t) res->rdma_mr.mr->rkey); + } + + if(config.server_flg) { qpinfo->listenfd = -1; } // if listenfd != -1, then listen(listenfd) + int i; + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + // server accepts connection from NREMOTE clients + // NREMOTE clients connect to one server + + // sock connect + qpinfo->sock[i] = sock_connect(config.server_name, config.tcp_port, &(qpinfo->listenfd)); + if(qpinfo->sock[i] < 0) { + error_perror("sock_connect"); goto connect_qp_exit; + } + dprintf("connect_qp, after sock_connect\n"); + + // send local_conn_info, receive remote_conn_info + if(sock_sync_data(qpinfo->sock[i], sizeof(qp_conn_info_t), (char*)&local_conn_info, (char*)&qpinfo->remote_conn_info[i])){ + error_perror("sock_sync_data"); + goto connect_qp_exit; + } + dprintf("connect_qp, after sock_sync_data\n"); + qpinfo->remote_conn_info[i].qp_num = ntohl(qpinfo->remote_conn_info[i].qp_num); + qpinfo->remote_conn_info[i].lid = ntohs(qpinfo->remote_conn_info[i].lid); + + // set rdma address + if(config.use_rdma == 1){ + qpinfo->remote_conn_info[i].addr = ntohll(qpinfo->remote_conn_info[i].addr); + qpinfo->remote_conn_info[i].rkey = ntohl(qpinfo->remote_conn_info[i].rkey); + printf("your rkey=%08x\n", qpinfo->remote_conn_info[i].rkey); + } + } + rc = 0; + +connect_qp_exit: + if(rc) { + int i; + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(qpinfo->sock[i] > 0) { close(qpinfo->sock[i]); } + } + } + return rc; +} + +int init_qp(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_INIT; + attr.port_num = config.ib_port; + attr.pkey_index = 0; + attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE; + if(config.use_rdma) + attr.qp_access_flags |= IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; + + flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +int init_qp_ud(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int ibcom_errno = 0, ib_errno; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_INIT; + attr.port_num = config.ib_port; + attr.pkey_index = 0; + attr.qkey = 0x11111111; + + flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY; + + ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags); + if(ib_errno) { + dprintf("ib_errno=%d\n", ib_errno); + error_perror("ibv_modify_qp"); + ibcom_errno = IBCOMM_ERR_CODE; + goto fn_fail; + } + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int rtr_qp(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = IBV_MTU_2048/*IBV_MTU_2048*//*IBV_MTU_512*/; + attr.ah_attr.dlid = qpinfo->remote_conn_info[0].lid; + attr.ah_attr.port_num = config.ib_port; + attr.dest_qp_num = qpinfo->remote_conn_info[0].qp_num; + attr.rq_psn = 0; + attr.min_rnr_timer = 0x12; + attr.max_dest_rd_atomic = /*0*/1; + + if(config.use_rdma) + attr.max_dest_rd_atomic = 1; + + flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +int rtr_qp_ud(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int ibcom_errno = 0, ib_errno; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTR; + + flags = IBV_QP_STATE; + ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags); + if(ib_errno) { error_perror("ibv_modify_qp"); ibcom_errno = IBCOMM_ERR_CODE; goto fn_fail; } + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int rts_qp(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 0x14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = 0; + attr.max_rd_atomic = /*0*/1; // num of outstanding RDMA reads and atomic op allowed + if(config.use_rdma) + attr.max_rd_atomic = 1; + + flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC; + + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +int rts_qp_ud(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int ibcom_errno = 0, ib_errno; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.sq_psn = 0; + + flags = IBV_QP_STATE | IBV_QP_SQ_PSN; + + ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags); + if(ib_errno) { error_perror("ibv_modify_qp"); ibcom_errno = IBCOMM_ERR_CODE; goto fn_fail; } + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +/* modify address vector and dest qpn and reset sq_psn */ +int modify_dest_qp(config_t config, qpinfo_t *qpinfo, qp_conn_info_t* remote_conn_info){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.ah_attr.dlid = remote_conn_info->lid; + attr.ah_attr.port_num = config.ib_port; + attr.dest_qp_num = remote_conn_info->qp_num; + attr.sq_psn = 0; + + attr.max_rd_atomic = 0; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.timeout = 0x14; + +#if 0 + flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_DEST_QPN | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC | + IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_TIMEOUT; +#else + flags = IBV_QP_STATE | IBV_QP_AV; +#endif + + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +void print_qp_status(qpinfo_t *qpinfo){ + struct ibv_qp_attr *attr; + struct ibv_qp_init_attr *init_attr; + int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; + int rc; + + attr = malloc(sizeof(struct ibv_qp_attr)); + init_attr = malloc(sizeof(struct ibv_qp_init_attr)); + + rc = ibv_query_qp(qpinfo->qp, attr, flags, init_attr); + if(rc){ + fprintf(stderr, "query qp error\n"); + } + else{ + switch(attr->cur_qp_state){ + case IBV_QPS_RESET: + dprintf("attr=IBV_QPS_RESET\n"); + break; + case IBV_QPS_INIT: + dprintf("attr=IBV_QPS_INIT\n"); + break; + case IBV_QPS_RTR: + dprintf("attr=IBV_QPS_RTR\n"); + break; + case IBV_QPS_RTS: + dprintf("attr=IBV_QPS_RTS\n"); + break; + case IBV_QPS_SQD: + dprintf("attr=IBV_QPS_SQD\n"); + break; + case IBV_QPS_SQE: + dprintf("attr=IBV_QPS_SQE\n"); + break; + case IBV_QPS_ERR: + dprintf("attr=IBV_QPS_ERR\n"); + break; + } + } + free(attr); + free(init_attr); +} + diff --git a/test/qlmpi/rusage/verbs/rdma_wr.c b/test/qlmpi/rusage/verbs/rdma_wr.c new file mode 100644 index 00000000..f0afcdab --- /dev/null +++ b/test/qlmpi/rusage/verbs/rdma_wr.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +//#define DEBUG_RDMA_WR +#ifdef DEBUG_RDMA_WR +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define TEST_SEND_BUF_NUM 3 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define NTRIAL 1 /* 120 */ +#define PPOLLS 1 /* sweet spot is around 10 */ +#define NSKIPS (PPOLLS*0) +#define PPOLLR 1 /* sweet spot is around 10 */ +#define NSKIPR (PPOLLR*0) + +#define IBCOM_MAGIC 0x55aa55aa + +typedef struct tailmagic_t { + uint32_t magic; +} tailmagic_t; + +enum rdma_buf_flg{ + RDMA_BUF_RESET_FLG = 0, + RDMA_BUF_WRITE_FLG = 1, +}; + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +volatile int k; +int main(int argc, char **argv) { + config_t config; + unsigned long i, j; + int ibcom_errno = 0; + char sync_res; + unsigned long tscs, tsce; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + int entry; + int ibv_errno; + + if (read_config(&config, argc, argv)) { + goto fn_exit; + } + + config.use_rdma = 1; + + unsigned long buf_size; + char* str_env = getenv("BUF_SIZE"); + buf_size = str_env ? atol(str_env) : 4096/*48,1073741824ULL * 1 + 4*/; + + if(buf_size == 0) { printf("set buf_size"); goto fn_fail; } + + if(resource_create(config, &res) || pd_create(&res, &pdinfo)) { printf("qp_create failed\n"); goto fn_fail; } + + ibv_errno = qp_create(&res, &pdinfo, &qpinfo); + IBCOM_ERR_CHKANDJUMP(ibv_errno, -1, printf("qp_create failed\n")); + + /* create MR buffers */ + + // rdma-write-to buffer +#if 1 + void *rdma_buf = mmap(0, buf_size * NTRIAL, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + memset(rdma_buf, 0, buf_size * NTRIAL); +#else + void *rdma_buf = calloc(buf_size * NTRIAL, sizeof(char)); +#endif + if(!rdma_buf) { printf("mmap failed\n"); goto fn_fail; } + if(mr_create(&res, &pdinfo, buf_size * NTRIAL, rdma_buf, &res.rdma_mr)) { printf("mr_create failed\n"); goto fn_fail; } + +#if 0 + // TLB prefetch + for (i = 0; i < NTRIAL; i++) { + if(!config.server_flg) { + *((uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t))) = 0; + } + } +#endif + + // local data buffers + loc_mr_list = calloc(sizeof(mrinfo_t) * NTRIAL, sizeof(char)); + for (i = 0; i < NTRIAL; i++) { + void *loc_buf = mmap(0, buf_size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(loc_buf == MAP_FAILED) { printf("mmap failed\n"); goto fn_fail; } + if(config.server_flg) { + for(j = 0; j < buf_size; j++) { + *((unsigned char*)loc_buf + j) = (char)i; + } + *((uint32_t *)(loc_buf + buf_size - sizeof(uint32_t))) = 0 + IBCOM_MAGIC; + } + + dprintf("magic addr=%lx\n", (unsigned long)(loc_buf + buf_size - TEST_RDMA_FLG_SIZE)); + + if(mr_create(&res, &pdinfo, buf_size, loc_buf, &loc_mr_list[i])) { printf("mr_create failed\n"); goto fn_fail; } + } + + if(!config.server_flg) { dprintf("res->rdma_mr.mr->addr=%lx\n", (unsigned long)res.rdma_mr.mr->addr); } + /* exchange gid, lid, qpn, raddr, rkey */ + if(connect_qp(config, &res, &qpinfo)) { printf("connect_qp failed\n"); goto fn_fail; } + debug_print_qp_conn_info(res, qpinfo, &config); + printf("connect_qp done\n"); fflush(stdout); + + if(config.server_flg) { dprintf("qpinfo->remote_conn_info[0].addr=%lx\n", qpinfo.remote_conn_info[0].addr); } + + /* make qp RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { printf("rts failed\n"); goto fn_fail; } + printf("rts done\n"); fflush(stdout); + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)) { perror("sock_sync_data"); } + } + printf("barrier done\n"); fflush(stdout); + + if(config.server_flg) { /* sender side */ + //usleep(500000); + if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; } + if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; } + + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + + post_send_req2(&qpinfo, &loc_mr_list[0], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], 0, i); + +#if 0 + int nfound = 0; + if(i % PPOLLS == PPOLLS - 1) { + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLS]; + result = ibv_poll_cq(qpinfo.scq, PPOLLS, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status)); goto fn_fail; } + } + //debug_print_mem((addr_t)loc_mr_list[entry].buf, buf_size); + nfound += result; + if(nfound >= PPOLLS) { break; } + } + k++; + } + } +#endif + + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); +#if 1 + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[NTRIAL]; + result = ibv_poll_cq(qpinfo.scq, NTRIAL, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status)); goto fn_fail; } + } + //debug_print_mem((addr_t)loc_mr_list[entry].buf, buf_size); + nfound += result; + if(nfound >= NTRIAL) { break; } + } + k++; + } +#endif + } else { /* receiver side */ + if(NSKIPR % PPOLLR !=0) { printf("set NSKIP multiple of PPOLL\n"); goto fn_fail; } + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPR) { tscs = rdtsc(); } + + // poll on magic + dprintf("res.rdma_mr.buf=%lx\n", (unsigned long)res.rdma_mr.buf); + dprintf("poll addr=%lx\n", (unsigned long)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t))); + //k = 0; + volatile uint32_t *ptr = (volatile uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t)); + while(*ptr != IBCOM_MAGIC) { + //k++; if(i >= NSKIPR && k % 65536 == 65535) { printf("i=%d,poll value=%x\n", i, *((uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t)))); } + __asm__ __volatile__("pause"); + } + //debug_print_mem((addr_t)res.rdma_mr.buf, buf_size); + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_exit: + /*Can free all resources*/ +#if 0 + if (resource_destroy(&config, &res)) { + fprintf(stderr, "resource destroy failed\n"); + } else { + dprintf("destroy all successfully..\n"); + } + if(loc_mr_list) { free(loc_mr_list); } +#endif + return ibcom_errno; + fn_fail: + goto fn_exit; +} diff --git a/test/qlmpi/rusage/verbs/read_config.c b/test/qlmpi/rusage/verbs/read_config.c new file mode 100644 index 00000000..c17daa9c --- /dev/null +++ b/test/qlmpi/rusage/verbs/read_config.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include "ibcomm.h" +/* + int read_config(config_t *config, int argc, char **argv){ + memset(config, 0, sizeof(config_t)); + config->server_name = NULL; + config->ib_port = 1; + config->dev_name = NULL; + + // client mode + if(argc == 4){ + config->server_name = argv[1]; + config->tcp_port = strtoul(argv[2], NULL, 0); + config->buf_size = strtoul(argv[3], NULL, 0); + } + // server mode + else if(argc == 3){ + config->tcp_port = strtoul(argv[1], NULL, 0); + config->buf_size = strtoul(argv[2], NULL, 0); + config->server_flg = 1; + } + else{ + printf("usage: ./main \n"); + return IBCOMM_ERR_CODE; + } + + if(config->tcp_port <=0 ) + return IBCOMM_ERR_CODE; + + return 0; + } + + */ + +int read_config(config_t *config, int argc, char **argv) { + memset(config, 0, sizeof(config_t)); + config->server_name = NULL; + config->ib_port = 1; + config->dev_name = NULL; + config->server_flg = 1; + config->nremote = 1; + config->buf_size = 40 + 8; /* UD requires more than 40 byte */ + config->tcp_port = 5256; + + while (1) { + int oc = getopt(argc, argv, "s:p:m:n:h"); + if (oc == -1) + break; + switch (oc) { + case 's': /* name for IP for exchanging LID and QPN */ + config->server_name = optarg; + config->server_flg = 0; + break; + case 'p': /* TCP port for exchange LID and QPN */ + config->tcp_port = atoi(optarg); + break; + case 'm': + config->buf_size = atoi(optarg); + break; + case 'n': /* number of remote nodes */ + config->nremote = atoi(optarg); + break; + case 'h': + default: + printf("usage: ./main [-s ] [-p ] [-m ]\n" + "Example: ssh cn01 ./main -p 10000 & ./main -s cn01 -p 10000\n"); + exit(-1); + break; + } + } + + // if (config->tcp_port <= 0) { return IBCOMM_ERR_CODE; } + // no need to set tcp_port for IB + + return 0; +} diff --git a/test/qlmpi/rusage/verbs/resource.c b/test/qlmpi/rusage/verbs/resource.c new file mode 100644 index 00000000..5423604a --- /dev/null +++ b/test/qlmpi/rusage/verbs/resource.c @@ -0,0 +1,390 @@ +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "list.h" +#include + +//#define DEBUG_RESOURCE +#ifdef DEBUG_RESOURCE +#define dprintf printf +#else +#define dprintf(...) +#endif + +int resource_create(config_t config, resource_t *res){ + struct ibv_device **dev_list = NULL, *ib_dev = NULL; + int dev_num; + int i, rc = IBCOMM_ERR_CODE; + + /*Init*/ + memset(res, 0, sizeof(resource_t)); + res->pdinfo_list = malloc(sizeof(list_t)); + res->qpinfo_list = malloc(sizeof(list_t)); + res->mrinfo_list = malloc(sizeof(list_t)); + + res->ib_ctx = NULL; + res->port_attr = NULL; + + /*Get IB device list*/ + dev_list = ibv_get_device_list(&dev_num); + printf("resource_create,dev_num=%d\n", dev_num); + ERR_CHKANDJUMP(!dev_list, -1, error_perror("ibv_get_device_list")); + if(!dev_num){ + error_printf("no devices are found\n"); + goto resource_create_exit; + } + /*Find requested HCA*/ + if(!config.dev_name) { + config.dev_name = strdup(ibv_get_device_name(dev_list[0])); + } + printf("Trying to open device %s\n", config.dev_name); + for(i=0; i< dev_num; i++){ + if(!strcmp(ibv_get_device_name(dev_list[i]), config.dev_name)){ + ib_dev = dev_list[i]; + break; + } + } + if(ib_dev == NULL){ + error_printf("no devices are found\n"); + goto resource_create_exit; + } + /*Open HCA*/ + res->ib_ctx = ibv_open_device(ib_dev); + if(!res->ib_ctx){ + error_perror("resource_create,ibv_open_device"); + goto resource_create_exit; + } + + struct ibv_device_attr device_attr; + int ib_errno; + ib_errno = ibv_query_device(res->ib_ctx, &device_attr); + if(ib_errno) { printf("ibv_query_device failed\n"); goto resource_create_exit; } + printf("atomic_cap=%08x\n", device_attr.atomic_cap); + printf("max_qp_rd_atom=%08x\n", device_attr.max_qp_rd_atom); + printf("max_ee_rd_atom=%08x\n", device_attr.max_ee_rd_atom); + printf("max_res_rd_atom=%08x\n", device_attr.max_res_rd_atom); + printf("max_qp_init_rd_atom=%08x\n", device_attr.max_qp_init_rd_atom); + printf("max_ee_init_rd_atom=%08x\n", device_attr.max_ee_init_rd_atom); + + /*Query Port Attr*/ + res->port_attr = malloc(sizeof(struct ibv_port_attr)); + memset(res->port_attr, 0 , sizeof(struct ibv_port_attr)); + if(ibv_query_port(res->ib_ctx, config.ib_port, res->port_attr)){ + error_perror("ibv_query_port"); + goto resource_create_exit; + } + printf("res->port_attr.max_msg_sz=%d\n", res->port_attr->max_msg_sz); + rc = 0; + + fn_exit: + return rc; + fn_fail: + resource_create_exit: + /*if error, destroy HCA handle*/ + if(rc){ + if(res->ib_ctx){ + ibv_close_device(res->ib_ctx); + res->ib_ctx = NULL; + } + if(res->port_attr){ + free(res->port_attr); + } + res = NULL; + } + // free other + ib_dev = NULL; + if(dev_list){ + ibv_free_device_list(dev_list); + dev_list = NULL; + } + goto fn_exit; +} + +int pd_create(resource_t *res, pdinfo_t *pdinfo){ + int rc = IBCOMM_ERR_CODE; + + /*Init*/ + memset(pdinfo, 0, sizeof(pdinfo_t)); + pdinfo->pd = NULL; + + /*Alloc on HCA handle*/ + pdinfo->pd = ibv_alloc_pd(res->ib_ctx); + if(pdinfo->pd == NULL){ + error_perror("ibv_alloc_pd"); + goto pd_create_exit; + } + + /*Register to res*/ + list_add(res->pdinfo_list, pdinfo); + rc = 0; + + pd_create_exit: + if(rc) + pdinfo = NULL; + + return rc; +} + +int qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo){ + struct ibv_qp_init_attr qp_init_attr; + int rc = IBCOMM_ERR_CODE; + int ibv_errno; + + /*Init*/ + memset(qpinfo, 0, sizeof(qpinfo_t)); + int i; + for(i = 0; i < NREMOTE; i++) { + qpinfo->sock[i] = -1; // not connected + } + qpinfo->sr_num = 0; + qpinfo->rr_num = 0; + + /*Create cq*/ + qpinfo->scq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + qpinfo->rcq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + if(!qpinfo->scq || !qpinfo->rcq){ + error_perror("qp_create,ibv_create_cq"); + goto qp_create_exit; + } + + /*Create qp*/ + memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); + qp_init_attr.qp_type = IBV_QPT_RC; + qp_init_attr.sq_sig_all = 1; + qp_init_attr.send_cq = qpinfo->scq; + qp_init_attr.recv_cq = qpinfo->rcq; + // max SR/RR num in SQ/RQ + qp_init_attr.cap.max_send_wr = _MAX_SQ_CAPACITY; + qp_init_attr.cap.max_recv_wr = _MAX_RQ_CAPACITY; + // max SGE num + qp_init_attr.cap.max_send_sge = _MAX_SGE_CAPACITY; + qp_init_attr.cap.max_recv_sge = _MAX_SGE_CAPACITY; + qp_init_attr.cap.max_inline_data = IBCOM_INLINE_DATA; +#if 0 + ibv_errno = show_resident(0); +#endif + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + if(qpinfo->qp == NULL){ + error_perror("ibv_create_qp"); + goto qp_create_exit; + } +#if 0 + ibv_errno = show_resident(1); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(2); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(3); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(4); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(5); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(6); +#endif + + qpinfo->max_inline_data = qp_init_attr.cap.max_inline_data; + printf("max_send_wr=%d,max_recv_wr=%d,inline_data=%d,max_send_sge=%d,max_recv_sge=%d\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_inline_data, qp_init_attr.cap.max_send_sge, qp_init_attr.cap.max_recv_sge); + + /*Register to res*/ + list_add(res->qpinfo_list, qpinfo); + rc = 0; + + qp_create_exit: + if(rc){ + if(qpinfo->scq){ + ibv_destroy_cq(qpinfo->scq); + qpinfo->scq = NULL; + } + if(qpinfo->rcq){ + ibv_destroy_cq(qpinfo->rcq); + qpinfo->rcq = NULL; + } + if(qpinfo->qp){ + ibv_destroy_qp(qpinfo->qp); + qpinfo->qp = NULL; + } + qpinfo = NULL; + } + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} + +int qp_create_ud(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo){ + struct ibv_qp_init_attr qp_init_attr; + int rc = IBCOMM_ERR_CODE; + int ibv_errno; + + /*Init*/ + memset(qpinfo, 0, sizeof(qpinfo_t)); + int i; + for(i = 0; i < NREMOTE; i++) { + qpinfo->sock[i] = -1; // not connected + } + qpinfo->sr_num = 0; + qpinfo->rr_num = 0; + + /*Create cq*/ + qpinfo->scq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + qpinfo->rcq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + if(!qpinfo->scq || !qpinfo->rcq){ + error_perror("ibv_create_cq"); + goto qp_create_exit; + } + + /*Create qp*/ + memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); + qp_init_attr.qp_type = IBV_QPT_UD; + //qp_init_attr.sq_sig_all = 1; + qp_init_attr.send_cq = qpinfo->scq; + qp_init_attr.recv_cq = qpinfo->rcq; + // max SR/RR num in SQ/RQ + qp_init_attr.cap.max_send_wr = _MAX_SQ_CAPACITY; + qp_init_attr.cap.max_recv_wr = _MAX_RQ_CAPACITY; + // max SGE num + qp_init_attr.cap.max_send_sge = _MAX_SGE_CAPACITY; + qp_init_attr.cap.max_recv_sge = _MAX_SGE_CAPACITY; + + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + if(qpinfo->qp == NULL){ + error_perror("ibv_create_qp"); + goto qp_create_exit; + } + qpinfo->max_inline_data = qp_init_attr.cap.max_inline_data; + printf("max_send_wr=%d,max_recv_wr=%d,max_send_sge=%d,max_recv_sge=%d,\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_send_sge, qp_init_attr.cap.max_recv_sge); + + /*Register to res*/ + list_add(res->qpinfo_list, qpinfo); + rc = 0; + + qp_create_exit: + if(rc){ + if(qpinfo->scq){ + ibv_destroy_cq(qpinfo->scq); + qpinfo->scq = NULL; + } + if(qpinfo->rcq){ + ibv_destroy_cq(qpinfo->rcq); + qpinfo->rcq = NULL; + } + if(qpinfo->qp){ + ibv_destroy_qp(qpinfo->qp); + qpinfo->qp = NULL; + } + qpinfo = NULL; + } + return rc; +} + +int mr_create(resource_t *res, pdinfo_t *pdinfo, int buf_size, char *buf, mrinfo_t *mrinfo) { + int mr_flags; + int rc = IBCOMM_ERR_CODE; + + /*Init*/ + memset(mrinfo, 0, sizeof(mrinfo_t)); + mrinfo->buf = buf; + mrinfo->buf_size = buf_size; + dprintf("mr_create,mrinfo->buf=%lx\n", (unsigned long)mrinfo->buf); + + /*Create mr*/ + mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; + mrinfo->mr = ibv_reg_mr(pdinfo->pd, buf, buf_size, mr_flags); + if(mrinfo->mr == NULL){ + perror("ibv_reg_mr"); + goto mr_create_exit; + } + + /*Register to res*/ + list_add(res->mrinfo_list, mrinfo); + rc = 0; + + mr_create_exit: + if(rc) { + if(mrinfo->mr) { ibv_dereg_mr(mrinfo->mr); } + if(mrinfo) { mrinfo = NULL; } + } + return rc; +} + +int resource_destroy(config_t *config, resource_t *res){ + int rc = 0; + + //config.dev_name + if(config->dev_name){ + free(config->dev_name); + } + // qp + qpinfo_t *qpinfo = NULL; + while((qpinfo = (qpinfo_t *)list_pop(res->qpinfo_list)) != NULL){ + // qp + if(qpinfo->qp && ibv_destroy_qp(qpinfo->qp)){ + error_perror("ibv_destroy_qp"); + rc = IBCOMM_ERR_CODE; + } + qpinfo->qp = NULL; + // scq + if(qpinfo->scq && ibv_destroy_cq(qpinfo->scq)){ + error_perror("ibv_destroy_cq"); + rc = IBCOMM_ERR_CODE; + } + qpinfo->scq = NULL; + // rcq + if(qpinfo->rcq && ibv_destroy_cq(qpinfo->rcq)){ + error_perror("ibv_destroy_cq"); + rc = IBCOMM_ERR_CODE; + } + qpinfo->rcq = NULL; + // sock + int i; + for(i = 0; i < (config->server_flg ? config->nremote : 1); i++) { + if(qpinfo->sock[i] >= 0 && close(qpinfo->sock[i])){ + error_perror("close"); + rc = IBCOMM_ERR_CODE; + } + } + qpinfo = NULL; + } + + // mr + mrinfo_t *mrinfo = NULL; + while ((mrinfo = (mrinfo_t *) list_pop(res->mrinfo_list)) != NULL) { + if (mrinfo->mr && ibv_dereg_mr(mrinfo->mr)) { + error_perror("ibv_dereg_mr"); + rc = IBCOMM_ERR_CODE; + } + mrinfo->mr = NULL; + if (mrinfo->buf) { + if (config->pci_buf_flg) { + //aal_host_mem_free(mrinfo->buf); + } else { + munmap(mrinfo->buf, mrinfo->buf_size); + } + } + mrinfo = NULL; + } + // pd + pdinfo_t *pdinfo = NULL; + while((pdinfo = (pdinfo_t *)list_pop(res->pdinfo_list)) != NULL){ + if(pdinfo->pd && ibv_dealloc_pd(pdinfo->pd)){ + error_perror("ibv_dealloc_pd"); + rc = IBCOMM_ERR_CODE; + } + pdinfo = NULL; + } + + if (res->ib_ctx && ibv_close_device(res->ib_ctx)) { + error_perror("ibv_close_device"); + rc = IBCOMM_ERR_CODE; + } + if(res->port_attr){ + free(res->port_attr); + } + res = NULL; + + return rc; +} + diff --git a/test/qlmpi/rusage/verbs/sock.c b/test/qlmpi/rusage/verbs/sock.c new file mode 100644 index 00000000..847859e8 --- /dev/null +++ b/test/qlmpi/rusage/verbs/sock.c @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include +#include +#include +#include "sock.h" +#include "debug.h" + +#define DEBUG_SOCK +#ifdef DEBUG_SOCK +#define dprintf printf +#else +#define dprintf(...) +#endif + +int sock_connect(char *server_name, int port, int *listenfd){ + struct addrinfo hints; + struct addrinfo *result = NULL, *rp = NULL; + int rc = 0, sockfd = -1; + // fd for search, after accept (server) + // for for search, after connect (client) + char service[6]; + char addrstr[256]; + + dprintf("sock_connect,enter\n"); + + if(!server_name && *listenfd != -1) { sockfd = *listenfd; goto reuse_listenfd; } + + // set port as service name + if (sprintf(service, "%d", port) < 0) + goto sock_connect_exit; + + memset(&hints, 0, sizeof(struct addrinfo)); + if(server_name == NULL){ + hints.ai_flags = AI_PASSIVE; + } + hints.ai_family = AF_UNSPEC;// IPv4 or IPv6 + hints.ai_socktype = SOCK_STREAM;//TCP + hints.ai_protocol = 0; // any protocol + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + // get a list of addresses + rc = getaddrinfo(server_name, service, &hints, &result); + if(rc){ + dprintf("%s\n", gai_strerror(rc)); + goto sock_connect_exit; + } + dprintf("result=%p\n", result); + + // find a usable address + for(rp = result; rp != NULL; rp = rp->ai_next){ + + inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100); + void *ptr; + switch(rp->ai_family) { + case AF_INET: + printf("ai_family=AF_INET\n"); + ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr; + break; + default: + dprintf("ai_family=%08x\n", rp->ai_family); + } + inet_ntop(rp->ai_family, ptr, addrstr, 100); + + printf("trying to use addr=%s,port=%d\n", addrstr,port); + } + + for(rp = result; rp != NULL; rp = rp->ai_next){ + + sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); + if(sockfd == -1) + continue; + + // set socket reusable + int on = 1; + if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) != 0) + continue; + + // server mode + if(server_name == NULL){ + inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100); + void *ptr; + switch(rp->ai_family) { + case AF_INET: + dprintf("ai_family=AF_INET\n"); + ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr; + break; + default: + dprintf("ai_family=%08x\n", rp->ai_family); + } + inet_ntop(rp->ai_family, ptr, addrstr, 100); + + printf("server mode,addr=%s,port=%d\n", addrstr,port); + if(bind(sockfd, rp->ai_addr, rp->ai_addrlen) != 0) + continue; + reuse_listenfd: + printf("listen=%d\n", sockfd); + if(listen(sockfd, 1) != 0) + continue; + /* connect successfully */ + if(*listenfd == -1) { *listenfd = sockfd; } + sockfd = accept(sockfd, NULL, NULL); + printf("accept=%d\n", sockfd); + goto sock_connect_success; + + // client mode + }else{ + inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100); + void *ptr; + switch(rp->ai_family) { + case AF_INET: + printf("ai_family=AF_INET\n"); + ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr; + break; + default: + dprintf("ai_family=%08x\n", rp->ai_family); + } + inet_ntop(rp->ai_family, ptr, addrstr, 100); + + printf("client mode,addr=%s,port=%d\n", addrstr,port); + rc = connect(sockfd, rp->ai_addr, rp->ai_addrlen); + + if(rc == 0) { + printf("connect succeeded,fd=%d\n", sockfd); + goto sock_connect_success; + } else { + printf("connect failed, trying to use next\n"); + } + } + } + + //sock_connect_failure: + if(rp == NULL){ + error_printf("All trial failed\n"); + sockfd = -1; + goto sock_connect_exit; + } + sock_connect_success: + sock_connect_exit: +#if 0 + if(listenfd > 0) + close(listenfd); +#endif + if(result) + freeaddrinfo(result); + return sockfd; +} + +int sock_sync_data(int sock, int data_bytes, char *local_data, char *remote_data){ + int rc = 0; + int read_bytes = 0; + // write to sock + rc = write(sock, local_data, data_bytes); + if(rc != data_bytes){ + rc =_SOCK_WRITE_ERR; + goto sock_sync_data_exit; + } + + // read from sock + rc = 0; + while(!rc && read_bytes < data_bytes){ + rc = read(sock, remote_data, data_bytes); + if(rc > 0){ + read_bytes += rc; + rc = 0; + }else{ + rc =_SOCK_READ_ERR; + goto sock_sync_data_exit; + } + } + + sock_sync_data_exit: + return rc; +} + + diff --git a/test/qlmpi/rusage/verbs/sock.h b/test/qlmpi/rusage/verbs/sock.h new file mode 100644 index 00000000..7a56526e --- /dev/null +++ b/test/qlmpi/rusage/verbs/sock.h @@ -0,0 +1,11 @@ +#ifndef SOCK_H +#define SOCK_H +enum sock_return_code{ + _SOCK_SUCCESS, + _SOCK_CONN_ERR, + _SOCK_WRITE_ERR, + _SOCK_READ_ERR +}; +extern int sock_connect(char *server_name, int port, int *listenfd); +extern int sock_sync_data(int sock, int data_size, char *local_data, char *remote_data); +#endif