From 4d9a1628f26e70aad576ca089cae28353dbb9927 Mon Sep 17 00:00:00 2001 From: Masamichi Takagi Date: Wed, 20 Sep 2017 19:48:32 +0900 Subject: [PATCH] Add test programs for ihk_os_getrusage() --- test/rusage/Makefile | 167 +++++++ test/rusage/README | 116 +++++ test/rusage/devobj.pl | 25 + test/rusage/match.pl | 20 + test/rusage/run.sh | 219 +++++++++ test/rusage/rusage000.c | 67 +++ test/rusage/rusage001.c | 72 +++ test/rusage/rusage002.c | 79 +++ test/rusage/rusage003.c | 1 + test/rusage/rusage004.c | 78 +++ test/rusage/rusage006.c | 85 ++++ test/rusage/rusage007.c | 1 + test/rusage/rusage008.c | 86 ++++ test/rusage/rusage009.c | 97 ++++ test/rusage/rusage010.c | 1 + test/rusage/rusage011.c | 126 +++++ test/rusage/rusage012.c | 98 ++++ test/rusage/rusage013.c | 72 +++ test/rusage/rusage014.c | 71 +++ test/rusage/rusage015.c | 74 +++ test/rusage/rusage016.c | 67 +++ test/rusage/rusage017.c | 76 +++ test/rusage/rusage018.c | 73 +++ test/rusage/rusage020.c | 1 + test/rusage/rusage030.c | 1 + test/rusage/rusage100.c | 81 ++++ test/rusage/rusage100.patch | 49 ++ test/rusage/rusage101.c | 93 ++++ test/rusage/rusage102.c | 89 ++++ test/rusage/rusage103.c | 81 ++++ test/rusage/verbs/aal_host.h | 21 + test/rusage/verbs/cas.c | 189 ++++++++ test/rusage/verbs/curtime.c | 11 + test/rusage/verbs/debug.c | 38 ++ test/rusage/verbs/debug.h | 31 ++ test/rusage/verbs/ibcomm.h | 209 ++++++++ test/rusage/verbs/list.c | 82 ++++ test/rusage/verbs/list.h | 26 + test/rusage/verbs/makefile | 68 +++ test/rusage/verbs/mbuf.h | 23 + test/rusage/verbs/mcons.h | 9 + test/rusage/verbs/mm_core.h | 12 + test/rusage/verbs/mm_host.h | 60 +++ test/rusage/verbs/mm_ib_test.h | 52 ++ test/rusage/verbs/mmib.h | 54 +++ test/rusage/verbs/mtype.h | 29 ++ test/rusage/verbs/pm_buf.h | 16 + test/rusage/verbs/post.c | 453 ++++++++++++++++++ test/rusage/verbs/printmem.c | 12 + test/rusage/verbs/qp.c | 297 ++++++++++++ test/rusage/verbs/rdma_imm_chain.c | 261 ++++++++++ test/rusage/verbs/rdma_rd.c | 155 ++++++ test/rusage/verbs/rdma_rdv.c | 194 ++++++++ test/rusage/verbs/rdma_wr.c | 218 +++++++++ test/rusage/verbs/rdma_wr_chain.c | 331 +++++++++++++ .../rusage/verbs/rdma_wr_with_imm_modify_qp.c | 228 +++++++++ test/rusage/verbs/read_config.c | 79 +++ test/rusage/verbs/reg_mr.c | 153 ++++++ test/rusage/verbs/resource.c | 390 +++++++++++++++ test/rusage/verbs/send.c | 148 ++++++ test/rusage/verbs/sock.c | 180 +++++++ test/rusage/verbs/sock.h | 11 + test/rusage/verbs/ud.c | 255 ++++++++++ 63 files changed, 6461 insertions(+) create mode 100644 test/rusage/Makefile create mode 100644 test/rusage/README create mode 100644 test/rusage/devobj.pl create mode 100644 test/rusage/match.pl create mode 100755 test/rusage/run.sh create mode 100644 test/rusage/rusage000.c create mode 100644 test/rusage/rusage001.c create mode 100644 test/rusage/rusage002.c create mode 120000 test/rusage/rusage003.c create mode 100644 test/rusage/rusage004.c create mode 100644 test/rusage/rusage006.c create mode 120000 test/rusage/rusage007.c create mode 100644 test/rusage/rusage008.c create mode 100644 test/rusage/rusage009.c create mode 120000 test/rusage/rusage010.c create mode 100644 test/rusage/rusage011.c create mode 100644 test/rusage/rusage012.c create mode 100644 test/rusage/rusage013.c create mode 100644 test/rusage/rusage014.c create mode 100644 test/rusage/rusage015.c create mode 100644 test/rusage/rusage016.c create mode 100644 test/rusage/rusage017.c create mode 100644 test/rusage/rusage018.c create mode 120000 test/rusage/rusage020.c create mode 120000 test/rusage/rusage030.c create mode 100644 test/rusage/rusage100.c create mode 100644 test/rusage/rusage100.patch create mode 100644 test/rusage/rusage101.c create mode 100644 test/rusage/rusage102.c create mode 100644 test/rusage/rusage103.c create mode 100755 test/rusage/verbs/aal_host.h create mode 100755 test/rusage/verbs/cas.c create mode 100755 test/rusage/verbs/curtime.c create mode 100755 test/rusage/verbs/debug.c create mode 100755 test/rusage/verbs/debug.h create mode 100755 test/rusage/verbs/ibcomm.h create mode 100755 test/rusage/verbs/list.c create mode 100755 test/rusage/verbs/list.h create mode 100755 test/rusage/verbs/makefile create mode 100755 test/rusage/verbs/mbuf.h create mode 100755 test/rusage/verbs/mcons.h create mode 100755 test/rusage/verbs/mm_core.h create mode 100755 test/rusage/verbs/mm_host.h create mode 100755 test/rusage/verbs/mm_ib_test.h create mode 100755 test/rusage/verbs/mmib.h create mode 100755 test/rusage/verbs/mtype.h create mode 100755 test/rusage/verbs/pm_buf.h create mode 100755 test/rusage/verbs/post.c create mode 100755 test/rusage/verbs/printmem.c create mode 100755 test/rusage/verbs/qp.c create mode 100755 test/rusage/verbs/rdma_imm_chain.c create mode 100755 test/rusage/verbs/rdma_rd.c create mode 100755 test/rusage/verbs/rdma_rdv.c create mode 100755 test/rusage/verbs/rdma_wr.c create mode 100755 test/rusage/verbs/rdma_wr_chain.c create mode 100755 test/rusage/verbs/rdma_wr_with_imm_modify_qp.c create mode 100755 test/rusage/verbs/read_config.c create mode 100644 test/rusage/verbs/reg_mr.c create mode 100755 test/rusage/verbs/resource.c create mode 100755 test/rusage/verbs/send.c create mode 100755 test/rusage/verbs/sock.c create mode 100755 test/rusage/verbs/sock.h create mode 100755 test/rusage/verbs/ud.c diff --git a/test/rusage/Makefile b/test/rusage/Makefile new file mode 100644 index 00000000..93fe4728 --- /dev/null +++ b/test/rusage/Makefile @@ -0,0 +1,167 @@ +.SUFFIXES: # Clear suffixes +.SUFFIXES: .c + +CC = gcc + +CPPFLAGS = -I$(HOME)/project/os/install/include +CCFLAGS = -g +LDFLAGS = -L$(HOME)/project/os/install/lib -lihk -Wl,-rpath -Wl,$(HOME)/project/os/install/lib +EXES = +SRCS = +OBJS = $(SRCS:.c=.o) + +CPPFLAGSMCK = -I$(HOME)/usr/include +CCFLAGSMCK = -g -O0 +LDFLAGSMCK = -static +SRCSMCK = $(shell ls rusage*.c) +EXESMCK = $(SRCSMCK:.c=) +OBJSMCK = $(SRCSMCK:.c=.o) + +all: $(EXES) $(EXESMCK) + +rusage000: rusage000.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage000.o: rusage000.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage010: rusage010.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage010.o: rusage010.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage020: rusage020.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage020.o: rusage020.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage030: rusage030.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage030.o: rusage030.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage001: rusage001.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage001.o: rusage001.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage002: rusage002.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -lrt + +rusage002.o: rusage002.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage003: rusage003.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -lrt + +rusage003.o: rusage003.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage004: rusage004.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage004.o: rusage004.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage006: rusage006.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage006.o: rusage006.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage007: rusage007.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage007.o: rusage007.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage008: rusage008.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage008.o: rusage008.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage009: rusage009.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage009.o: rusage009.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage011: rusage011.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -L/home/takagi/usr/lib -lxpmem + +rusage011.o: rusage011.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage012: rusage012.o + $(CC) -o $@ $^ $(LDFLAGSMCK) -L/home/takagi/usr/lib -lxpmem + +rusage012.o: rusage012.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage013: rusage013.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage013.o: rusage013.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage014: rusage014.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage014.o: rusage014.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage015: rusage015.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage015.o: rusage015.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage016: rusage016.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage016.o: rusage016.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage017: rusage017.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage017.o: rusage017.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage018: rusage018.o + $(CC) -o $@ $^ $(LDFLAGSMCK) + +rusage018.o: rusage018.c + $(CC) $(CCFLAGSMCK) $(CPPFLAGSMCK) -c $< + +rusage100: rusage100.o + $(CC) -o $@ $^ $(LDFLAGS) + +rusage100.o: rusage100.c + $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + +rusage101: rusage101.o + $(CC) -o $@ $^ $(LDFLAGS) + +rusage101.o: rusage101.c + $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + +rusage102: rusage102.o + $(CC) -o $@ $^ $(LDFLAGS) + +rusage102.o: rusage102.c + $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + +rusage103: rusage103.o + $(CC) -o $@ $^ $(LDFLAGS) + +rusage103.o: rusage103.c + $(CC) $(CCFLAGS) $(CPPFLAGS) -c $< + +clean: + rm -f core $(EXES) $(OBJS) $(EXESMCK) $(OBJSMCK) diff --git a/test/rusage/README b/test/rusage/README new file mode 100644 index 00000000..55f15daf --- /dev/null +++ b/test/rusage/README @@ -0,0 +1,116 @@ +========== +How to run +========== +./run.sh + +Example: +./run.sh rusage000 + +=================================== +How to judge it behaves as expected +=================================== +Follow the instruction given by run.sh. + +===================== +Descripation of tests +===================== +rusage000: Excercise the following execution paths: + args_envs()->set_range() text,data,arg,env[OK] + args_envs()->set_range() !vdso [OK], + args_envs()->set_range() stack[OK], + glibc->bss [OK], + glibc->brk()->extend_process_region()->add_process_memory_range()->update_process_page_table->ihk_mc_pt_set_range() [OK] + + app->mmap() 4K,anon,pre-page ->set_range() [OK], + app->mmap(), 4K,anon,pre-pag->set_range()->st->munmap [OK] + + app->munmap()->free_process_memory_range()->[OK] + exit_group()->free_process_memory_range()->text,data,bss,heap,anon|private[OK], + exit_group()->free_process_memory_range()->!vdso[OK], + exit_group()->free_process_memory_range()->stack[OK], + +rusage010: Excercise the following execution paths: + app->mmap() 2M,anon,pre-page ->set_range()->munmap()->free_process_memory_range()->clear_range()[OK] + +rusage020: Excercise the following execution paths: + app->mmap() 1G,anon,private,demand-page -> pf -> set_range() [NA (failed to get 1GB page)], + app->mmap() 1G,anon,private,demand-page -> pf -> set_range()->munmap()->free_process_memory_range()->clear_range()[NA (failed to get 1GB page)] + +rusage030: Excercise the following execution paths: + app->mmap() 128M,anon,private,demand-page(=zeroobj) -> pf -> set_range() [OK], + app->mmap() 128M,anon,private,demand-page(=zeroobj) -> pf -> set_range()->munmap()->free_process_memory_range()->clear_range()[OK] + +rusage001: Excercise the following execution paths: + do_mmap(),file,demand-page->get_page(),ld->set_pte()->munmap()->clear_range() [OK] + do_mmap(),file,demand-page->get_page(),ld->set_pte()->st->munmap()->clear_range() [OK] + do_mmap(),file,demand-page->get_page(),st->set_pte()->flush()->munmap()->clear_range() [OK] + +rusage002: Excercise the following execution paths: + do_mmap(),/dev/shm with --mpol_shm_premap,pre-page->get_page(),st->set_pte()->munmap()->clear_range() [OK] + +rusage003: Excercise the following execution paths: + do_mmap(),/dev/shm without --mpol_shm_premap,pre-page->get_page(),st->set_pte()->munmap()->clear_range() [OK] + +rusage004: Excercise the following execution paths related to clone + load segments->copy_user_ranges()->clear_range() [OK] + filemap(demand-paging)->copy_user_ranges()->clear_range() [OK] + +rusage005: Excercise the following execution paths related to device file (ib ping-pong) + devobj()->get_page()->pf->munmap()->clear_range() [OK] + remote page fault->cow->clear_range() [OK] + ld-linux.so->mmap private->cow->clear_range() [OK] + +rusage006: Excercise the following execution paths related to clone + filemap,/dev/shm with --mpol_shm_premap->copy_user_ranges()->clear_range() [OK] + +rusage007: Excercise the following execution paths related to clone + filemap,/dev/shm without --mpol_shm_premap->copy_user_ranges()->clear_range() [OK] + +rusage008: Excercise the following execution paths related to sharing file-map page +fork()->filemap->pf->clear_range() [OK] + +rusage009: Excercise the following execution paths related to sharing shmget() page +fork()->shmat()->pf->clear_range() [OK] + +rusage011: Excercise the following execution paths related to sharing xpmem page +fork()->xpmem_attach()->pf->clear_range() [OK] + +rusage012: #925 + +rusage013: Excercise the following execution paths: +fileobj(prvate)->cow->set_pte()->clear_range() [OK] + +rusage014: Excercise the following execution paths related to remap_file_pages +do_mmap->fileobj-->remap_file_pages()->st->clear_range() [OK] + +rusage015: Excercise the following execution paths related to remap_file_pages +do_mmap->fileobj-->st-->remap_one_page()->clear_range() [OK] + +rusage016: Excercise the following execution paths related to mremap +mremap()->move_pte_range()-->clear_range() [OK] + +rusage017: Excercise the following execution paths related to split_process_memory_range, file-map +mprotect()->split_process_memory_range()->change_prot_process_memory_range()->clear_range()/file_obj_release() [OK] + +rusage018: Excercise the following execution paths related to split_process_memory_range, 2M-page +mprotect()->ihk_mc_pt_split()->change_prot_process_memory_range()->clear_range() [OK] + +rusage019: Run npb bt-mz.S.2 +1-ppn x 2-tpn x 2-node[OK] + +rusage021: Run npb bt-mz.S.4 +2-ppn x 2-tpn x 2-node (wallaby{14,15}) [OK] +2-ppn x 1-tpn x 2-node (polaris,kochab) [OK] + +rusage100: Test ihk_os_getrusage() +anon mmap,num_threads=1 [OK] + +rusage101: Test ihk_os_getrusage() +anon mmap,num_threads=2 [OK] + +rusage102: Test ihk_os_getrusage() +file map,num_threads=1 [OK] + +rusage103: Test ihk_os_getrusage() +anon mmap@numa#1 [OK] + diff --git a/test/rusage/devobj.pl b/test/rusage/devobj.pl new file mode 100644 index 00000000..905ffa51 --- /dev/null +++ b/test/rusage/devobj.pl @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +while(<>) { + if(/\[\s+\d+\]\:\s([^+]+)\+,/) { + $addr = $1; + $countplus{$addr}++; + #print $addr . "\n"; + } + if(/\[\s+\d+\]\:\s([^-]+)\-,/) { + $addr = $1; + $countminus{$addr}--; + #print $addr . "\n"; + } + if(/devobj_get_page\([^)]+\):\s\S+\s(\S+)/) { + $addr = $1; + $devobj{$addr}++; + #print $addr . "\n"; + } +} + +foreach $key (sort keys(%devobj)) { + if($countplus{$key} != 0 || $countminus{$key} != 0) { + print $key.",count=".$countplus{$key}.",".$countminus{$key}."\n"; + } +} diff --git a/test/rusage/match.pl b/test/rusage/match.pl new file mode 100644 index 00000000..8a3d0903 --- /dev/null +++ b/test/rusage/match.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl + +while(<>) { + if(/\[\s+\d+\]\:\s([^+^ ]+)\+,/) { + $addr = $1; + $count{$addr}++; + #print $addr . "\n"; + } + if(/\[\s+\d+\]\:\s([^-^ ]+)\-,/) { + $addr = $1; + $count{$addr}--; + #print $addr . "\n"; + } +} + +foreach $key (sort keys(%count)) { + if($count{$key} != 0) { + print $key.",count=".$count{$key}."\n"; + } +} diff --git a/test/rusage/run.sh b/test/rusage/run.sh new file mode 100755 index 00000000..8f5d7035 --- /dev/null +++ b/test/rusage/run.sh @@ -0,0 +1,219 @@ +#!/usr/bin/bash + +testname=$1 +bootopt="-m 256M" +mcexecopt="" +testopt="" +kill="n" +dryrun="n" +sleepopt="0.4" +home=$(eval echo \$\{HOME\}) +install=${home}/project/os/install + +echo Executing ${testname} + +case ${testname} in + rusage011 | rusage012) + printf "*** Enable debug messages in rusage.h, memory.c, fileobj.c, shmobj.c, process.c by defining DEBUG macro, e.g. #define RUSAGE_DEBUG and then recompile IHK/McKernel.\n" + printf "*** Install xpmem by git-clone https://github.com/hjelmn/xpmem.\n" + ;; + rusage10?) + printf "*** Refer to rusage100.patch to add syscall #900 by editing syscall_list.h and syscall.c and recompile IHK/McKernel.\n" + ;; + *) + printf "*** Enable debug messages in rusage.h, memory.c, fileobj.c, shmobj.c, process.c by defining DEBUG macro, e.g. #define RUSAGE_DEBUG and then recompile IHK/McKernel.\n" + ;; +esac +read -p "*** Hit return when ready!" key + +case ${testname} in + rusage005) + ssh wallaby -c '(cd ${home}/project/src/rusage/verbs; make rdma_wr)' + bn=verbs/rdma_wr + ;; + rusage019) + #ssh wallaby -c '(cd ${home}/project/src/rusage/npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI; make bt-mz CLASS=S NPROCS=2)' + bn=npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI/bin/bt-mz.S.2 + perl -e 'print "wallaby14\nwallaby15\n"' > ./hostfile + ;; + rusage021) + if ! grep /var/log/local6 /etc/rsyslog.conf &>/dev/null; then + echo "Insert a line of local6.* /var/log/local6 into /etc/rsyslog.conf" + exit 255 + fi + ssh wallaby bash -c '(cd ${home}/project/src/rusage/npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI; make bt-mz CLASS=S NPROCS=4)' + bn=npb/NPB3.3.1-MZ/NPB3.3-MZ-MPI/bin/bt-mz.S.4 + perl -e 'print "polaris:2\nkochab:2\n"' > ./hostfile + ;; + *) + bn=${testname} + make clean > /dev/null 2> /dev/null + make ${bn} +esac + +pid=`pidof mcexec` +if [ "${pid}" != "" ]; then + kill -9 ${pid} > /dev/null 2> /dev/null +fi + +case ${testname} in + rusage000) + testopt="0" + ;; + rusage010) + testopt="1" + ;; + rusage020) + bootopt="-m 256M@0,1G@0" + testopt="2" + kill="y" + ;; + rusage030) + testopt="3" + ;; + rusage001) + cp ${bn} ./file + kill="n" + ;; + rusage002) + mcexecopt="--mpol-shm-premap" + ;; + rusage003) + ;; + rusage004) + cp ${bn} ./file + ;; + rusage005) + echo ssh wallaby15.aics-sys.riken.jp ${home}/project/src/verbs/rdma_wr -p 10000& + read -p "Run rdma_wr on wallaby15 and enter the port number." port + testopt="-s wallaby15.aics-sys.riken.jp -p ${port}" + ;; + rusage006) + mcexecopt="--mpol-shm-premap" + ;; + rusage007) + ;; + rusage008) + cp ${bn} ./file + ;; + rusage009) + ;; + rusage011) + sudo insmod /home/takagi/usr/lib/module/xpmem.ko + sudo chmod og+rw /dev/xpmem + dryrun="n" + kill="n" + sleepopt="5" + ;; + rusage012) + sudo insmod /home/takagi/usr/lib/module/xpmem.ko + sudo chmod og+rw /dev/xpmem + dryrun="n" + kill="n" + sleepopt="5" + ;; + rusage013) + cp ${bn} ./file + ;; + rusage014) + cp ${bn} ./file + ;; + rusage015) + cp ${bn} ./file + ;; + rusage016) + ;; + rusage017) + cp ${bn} ./file + ;; + rusage018) + ;; + rusage019 | rusage021) + bootopt="-k 1 -m 256M" + ;; + rusage100) + ;; + rusage101) + ;; + rusage102) + cp ${bn} ./file + ;; + rusage103) + bootopt="-m 256M@1" + ;; + *) + echo Unknown test case + exit 255 +esac + +if [ ${dryrun} == "y" ]; then +exit +fi + +case ${testname} in + rusage019 | rusage021) + sudo rm /var/log/local6 + sudo touch /var/log/local6 + sudo chmod 600 /var/log/local6 + sudo systemctl restart rsyslog + ;; + *) + ;; +esac + +case ${testname} in + rusage019 | rusage021) + echo sudo ssh wallaby15 ${install}/sbin/mcstop+release.sh && + echo sudo ssh wallaby15 ${install}/sbin/mcreboot.sh + read -p "Boot mckernel on wallaby15." ans + ;; + *) + ;; +esac +sudo ${install}/sbin/mcstop+release.sh && +sudo ${install}/sbin/mcreboot.sh ${bootopt} + +if [ ${kill} == "y" ]; then + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} & + sleep ${sleepopt} + sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + pid=`pidof mcexec` + if [ "${pid}" != "" ]; then + kill -9 ${pid} > /dev/null 2> /dev/null + fi +else + case ${testname} in + rusage005) + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} + #read -p "Run rdma_wr." ans + sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + ;; + rusage019 | rusage021) + echo OMP_NUM_THREADS=2 mpiexec -machinefile ./hostfile ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} + read -p "Run ${bn} and hit return." ans + sleep 1.5 + sudo cat /var/log/local6 > ./${testname}.log + ;; + rusage100 | rusage101 | rusage102 | rusage103) + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} > ./${testname}.log + echo "================================================" >> ./${testname}.log + sudo ${install}/sbin/ihkosctl 0 kmsg >> ./${testname}.log + ;; + *) + ${install}/bin/mcexec ${mcexecopt} ./${bn} ${testopt} + sudo ${install}/sbin/ihkosctl 0 kmsg > ./${testname}.log + esac +fi + +case ${testname} in + rusage10?) + printf "*** Check the ihk_os_getrusage() result (the first part of ${testname}.log) matches with the syscall #900 result (the second part) \n" + ;; + + *) + printf "*** cat ${testname}.log (kmsg) > ./match.pl to confirm there's no stray add/sub.\n" + printf "*** Look ${testname}.log (kmsg) to confirm memory_stat_*[*] returned to zero when the last thread exits.\n" + ;; +esac + +sudo ${install}/sbin/mcstop+release.sh diff --git a/test/rusage/rusage000.c b/test/rusage/rusage000.c new file mode 100644 index 00000000..19bc8e74 --- /dev/null +++ b/test/rusage/rusage000.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + int i; + int sz_index; + void* anon[NUM_AREAS]; + int ret = 0; + CHKANDJUMP(argc != 2, 255, "%s \n", argv[0]); + sz_index = atoi(argv[1]); + + for(i = 0; i < NUM_AREAS; i++) { + anon[i] = mmap(0, sz_anon[sz_index], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(anon[i] == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)anon[i]) = 0x123456789abcdef0; + } + + for(i = 0; i < NUM_AREAS; i++) { + munmap(anon[i], sz_anon[sz_index]); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage001.c b/test/rusage/rusage001.c new file mode 100644 index 00000000..cad64d48 --- /dev/null +++ b/test/rusage/rusage001.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + unsigned long val = *((unsigned long*)mem); + //memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long*)mem) = 0x123456789abcdef0; +//ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); +// *((unsigned long*)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + //ret = close(fd); + //CHKANDJUMP(ret != 0, 255, "close failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage002.c b/test/rusage/rusage002.c new file mode 100644 index 00000000..f613334e --- /dev/null +++ b/test/rusage/rusage002.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + int fd; + char fn[256] = "/dev/shm/Intel_MPI"; + + fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR); + CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); + + ret = ftruncate(fd, sz_mem[SZ_INDEX]); + CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long*)mem); + //memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long*)mem) = 0x123456789abcdef0; + // ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); + // *((unsigned long*)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + ret = close(fd); + CHKANDJUMP(ret != 0, 255, "close failed\n"); + ret = unlink(fn); + CHKANDJUMP(ret != 0, 255, "shm_unlink failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage003.c b/test/rusage/rusage003.c new file mode 120000 index 00000000..239a35e2 --- /dev/null +++ b/test/rusage/rusage003.c @@ -0,0 +1 @@ +rusage002.c \ No newline at end of file diff --git a/test/rusage/rusage004.c b/test/rusage/rusage004.c new file mode 100644 index 00000000..bf5aea0f --- /dev/null +++ b/test/rusage/rusage004.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + unsigned long val = *((unsigned long*)mem); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage006.c b/test/rusage/rusage006.c new file mode 100644 index 00000000..e76368f4 --- /dev/null +++ b/test/rusage/rusage006.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + int fd; + + char fn[256] = "/dev/shm/Intel_MPI"; + + fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR); + CHKANDJUMP(fd == -1, 255, "shm_open failed,str=%s\n", strerror(errno)); + + ret = ftruncate(fd, sz_mem[SZ_INDEX]); + CHKANDJUMP(ret != 0, 255, "ftruncate failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long*)mem); + *((unsigned long*)mem) = 0x123456789abcdef0; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage007.c b/test/rusage/rusage007.c new file mode 120000 index 00000000..1a64aac5 --- /dev/null +++ b/test/rusage/rusage007.c @@ -0,0 +1 @@ +rusage006.c \ No newline at end of file diff --git a/test/rusage/rusage008.c b/test/rusage/rusage008.c new file mode 100644 index 00000000..f28ecc89 --- /dev/null +++ b/test/rusage/rusage008.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + int fd; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + unsigned long val = *((unsigned long*)mem); + + _exit(123); + } else { + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + unsigned long val = *((unsigned long*)mem); + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + printf("exit status=%d\n", WEXITSTATUS(status)); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage009.c b/test/rusage/rusage009.c new file mode 100644 index 00000000..698d6f18 --- /dev/null +++ b/test/rusage/rusage009.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + key_t key = ftok(argv[0], 0); + int shmid; + + shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); + CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + mem = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + *((unsigned long*)mem) = 0x1234; + + ret = shmdt(mem); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + _exit(123); + } else { + mem = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + + printf("%lx\n", *((unsigned long*)mem)); + +#if 0 + struct shmid_ds buf; + ret = shmctl(shmid, IPC_RMID, &buf); + CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); +#endif + + ret = shmdt(mem); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage010.c b/test/rusage/rusage010.c new file mode 120000 index 00000000..dcda4d36 --- /dev/null +++ b/test/rusage/rusage010.c @@ -0,0 +1 @@ +rusage000.c \ No newline at end of file diff --git a/test/rusage/rusage011.c b/test/rusage/rusage011.c new file mode 100644 index 00000000..fd7ba260 --- /dev/null +++ b/test/rusage/rusage011.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + key_t key = ftok(argv[0], 0); + int shmid; + xpmem_segid_t segid; + + shmid = shmget(key, sz_mem[SZ_INDEX], IPC_CREAT | 0660); + CHKANDJUMP(shmid == -1, 255, "shmget failed: %s\n", strerror(errno)); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + memset(mem, 0, sz_mem[SZ_INDEX]); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + void *shm = shmat(shmid, NULL, 0); + CHKANDJUMP(shm == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + while((segid = *(xpmem_segid_t*)shm) == 0) { }; + + ret = shmdt(shm); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + ret = xpmem_init(); + CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); + + xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); + CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", strerror(errno)); + + struct xpmem_addr addr = { .apid = apid, .offset = 0 }; + void* attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); + CHKANDJUMP(attach == (void*)-1, 255, "xpmem_attach failed: %s\n", strerror(errno)); + + *((unsigned long*)attach) = 0x1234; + + ret = xpmem_detach(attach); + CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); + + _exit(123); + } else { + void *shm = shmat(shmid, NULL, 0); + CHKANDJUMP(mem == (void*)-1, 255, "shmat failed: %s\n", strerror(errno)); + + ret = xpmem_init(); + CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); + + segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, (void*)0666); + CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", strerror(errno)); + + *(xpmem_segid_t*)shm = segid; + + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + + printf("%lx\n", *((unsigned long*)mem)); + + struct shmid_ds buf; + ret = shmctl(shmid, IPC_RMID, &buf); + CHKANDJUMP(ret == -1, 255, "shmctl failed\n"); + + ret = shmdt(shm); + CHKANDJUMP(ret == -1, 255, "shmdt failed\n"); + + ret = xpmem_remove(segid); + CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage012.c b/test/rusage/rusage012.c new file mode 100644 index 00000000..663b7bb0 --- /dev/null +++ b/test/rusage/rusage012.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + pid_t pid; + int status; + xpmem_segid_t segid; + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + memset(mem, 0, sz_mem[SZ_INDEX]); + + ret = xpmem_init(); + CHKANDJUMP(ret != 0, 255, "xpmem_init failed: %s\n", strerror(errno)); + + segid = xpmem_make(mem, sz_mem[SZ_INDEX], XPMEM_PERMIT_MODE, (void*)0666); + CHKANDJUMP(segid == -1, 255, "xpmem_ioctl failed: %s\n", strerror(errno)); + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed\n"); + if(pid == 0) { + xpmem_apid_t apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); + CHKANDJUMP(apid == -1, 255, "xpmem_get failed: %s\n", strerror(errno)); + + struct xpmem_addr addr = { .apid = apid, .offset = 0 }; + void* attach = xpmem_attach(addr, sz_mem[SZ_INDEX], NULL); + CHKANDJUMP(attach == (void*)-1, 255, "xpmem_attach failed: %s\n", strerror(errno)); + + *((unsigned long*)attach) = 0x1234; + + ret = xpmem_detach(attach); + CHKANDJUMP(ret == -1, 255, "xpmem_detach failed\n"); + + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + + printf("%lx\n", *((unsigned long*)mem)); + + ret = xpmem_remove(segid); + CHKANDJUMP(ret == -1, 255, "xpmem_remove failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage013.c b/test/rusage/rusage013.c new file mode 100644 index 00000000..2ed29080 --- /dev/null +++ b/test/rusage/rusage013.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + //unsigned long val = *((unsigned long*)mem); + //memset(mem, 0, sz_mem[SZ_INDEX]); + + *((unsigned long*)mem) = 0x123456789abcdef0; +//ret = msync(mem, sz_mem[SZ_INDEX], MS_SYNC); +// *((unsigned long*)mem) = 0x123456789abcdef0; + + munmap(mem, sz_mem[SZ_INDEX]); + //ret = close(fd); + //CHKANDJUMP(ret != 0, 255, "close failed\n"); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage014.c b/test/rusage/rusage014.c new file mode 100644 index 00000000..932bc63f --- /dev/null +++ b/test/rusage/rusage014.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_unit[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30)}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, 2 * sz_unit[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + ret = remap_file_pages(mem + 0 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 1, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + ret = remap_file_pages(mem + 1 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 0, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + *((unsigned long*)mem) = 0x123456789abcdef0; + *(unsigned long*)((char*)mem + 1 * sz_unit[SZ_INDEX]) = 0x123456789abcdef0; + + munmap(mem, 2 * sz_unit[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage015.c b/test/rusage/rusage015.c new file mode 100644 index 00000000..43975b13 --- /dev/null +++ b/test/rusage/rusage015.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_unit[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30)}; + +#define SZ_INDEX 0 + +int main(int argc, char** argv) { + void* mem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, 2 * sz_unit[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + *((unsigned long*)mem) = 0x123456789abcdef0; + *(unsigned long*)((char*)mem + 1 * sz_unit[SZ_INDEX]) = 0xbeefbeefbeefbeef; + + ret = remap_file_pages(mem + 0 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 1, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + ret = remap_file_pages(mem + 1 * sz_unit[SZ_INDEX], sz_unit[SZ_INDEX], 0, 0, MAP_SHARED); + CHKANDJUMP(ret != 0, 255, "remap_file_pages failed\n"); + + *((unsigned long*)mem) = 0x123456789abcdef0; + *(unsigned long*)((char*)mem + 1 * sz_unit[SZ_INDEX]) = 0xbeefbeefbeefbeef; + + munmap(mem, 2 * sz_unit[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage016.c b/test/rusage/rusage016.c new file mode 100644 index 00000000..af4a4599 --- /dev/null +++ b/test/rusage/rusage016.c @@ -0,0 +1,67 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ + +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + int i; + void* mem; + void* newmem; + int ret = 0; + + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)mem) = 0x123456789abcdef0; + + newmem = mremap(mem, sz_anon[SZ_INDEX], sz_anon[SZ_INDEX + 1], MREMAP_MAYMOVE); + CHKANDJUMP(newmem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)mem) = 0xbeefbeefbeefbeef; + + munmap(newmem, sz_anon[SZ_INDEX + 1]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage017.c b/test/rusage/rusage017.c new file mode 100644 index 00000000..00ffcefd --- /dev/null +++ b/test/rusage/rusage017.c @@ -0,0 +1,76 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + int i; + void* mem; + void* newmem; + int ret = 0; + int fd; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + mem = mmap(0, 3 * sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + *(unsigned long*)((char*)mem + 0) = 0x123456789abcdef0; + *(unsigned long*)((char*)mem + sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; + *(unsigned long*)((char*)mem + 2 * sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; + + ret = mprotect(mem + sz_mem[SZ_INDEX], sz_mem[SZ_INDEX], PROT_READ | PROT_EXEC); + CHKANDJUMP(ret != 0, 255, "mprotect failed\n"); + + munmap(mem, 3 * sz_mem[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage018.c b/test/rusage/rusage018.c new file mode 100644 index 00000000..4737b3aa --- /dev/null +++ b/test/rusage/rusage018.c @@ -0,0 +1,73 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + eprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 1 +#define NUM_AREAS 1 + +int main(int argc, char** argv) { + int i; + void* mem; + void* newmem; + int ret = 0; + int fd; + + mem = mmap(0, 3 * sz_mem[SZ_INDEX], PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + + *(unsigned long*)((char*)mem + 0) = 0x123456789abcdef0; + *(unsigned long*)((char*)mem + sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; + *(unsigned long*)((char*)mem + 2 * sz_mem[SZ_INDEX]) = 0xbeefbeefbeefbeef; + + ret = mprotect(mem + sz_mem[SZ_INDEX - 1], sz_mem[SZ_INDEX - 1], PROT_READ | PROT_EXEC); + CHKANDJUMP(ret != 0, 255, "mprotect failed\n"); + + munmap(mem, 3 * sz_mem[SZ_INDEX]); + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage020.c b/test/rusage/rusage020.c new file mode 120000 index 00000000..dcda4d36 --- /dev/null +++ b/test/rusage/rusage020.c @@ -0,0 +1 @@ +rusage000.c \ No newline at end of file diff --git a/test/rusage/rusage030.c b/test/rusage/rusage030.c new file mode 120000 index 00000000..dcda4d36 --- /dev/null +++ b/test/rusage/rusage030.c @@ -0,0 +1 @@ +rusage000.c \ No newline at end of file diff --git a/test/rusage/rusage100.c b/test/rusage/rusage100.c new file mode 100644 index 00000000..28177f4a --- /dev/null +++ b/test/rusage/rusage100.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include /* For SYS_xxx definitions */ +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + dprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char** argv) { + int i, j, ret = 0, ret_ihklib; + void* mem; + struct mckernel_rusage rusage; + + for (j = 0; j < NLOOP; j++) { + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage100.patch b/test/rusage/rusage100.patch new file mode 100644 index 00000000..84277a84 --- /dev/null +++ b/test/rusage/rusage100.patch @@ -0,0 +1,49 @@ +diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h +index 7c6edcb..52c1ff3 100644 +--- a/mckernel/arch/x86/kernel/include/syscall_list.h ++++ b/mckernel/arch/x86/kernel/include/syscall_list.h +@@ -161,6 +161,7 @@ SYSCALL_HANDLED(__NR_profile, profile) + SYSCALL_HANDLED(730, util_migrate_inter_kernel) + SYSCALL_HANDLED(731, util_indicate_clone) + SYSCALL_HANDLED(732, get_system) ++SYSCALL_HANDLED(900, dump_rusage) + + /* McKernel Specific */ + SYSCALL_HANDLED(801, swapout) +diff --git a/kernel/syscall.c b/kernel/syscall.c +index 78a832b..eae43cf 100644 +--- a/mckernel/kernel/syscall.c ++++ b/mckernel/kernel/syscall.c +@@ -10030,3 +10030,32 @@ long syscall(int num, ihk_mc_user_context_t *ctx) + + return l; + } ++ ++SYSCALL_DECLARE(dump_rusage) ++{ ++#if 1 ++ /* rusage debug */ ++ int i; ++ for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { ++ kprintf("memory_stat_rss[%d]=%ld\n", i, rusage->memory_stat_rss[i]); ++ } ++ for(i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { ++ kprintf("memory_stat_mapped_file[%d]=%ld\n", i, rusage->memory_stat_mapped_file[i]); ++ } ++ kprintf("memory_max_usage=%ld\n", rusage->memory_max_usage); ++ kprintf("memory_kmem_usage=%ld\n", rusage->memory_kmem_usage); ++ kprintf("memory_kmem_max_usage=%ld\n", rusage->memory_kmem_max_usage); ++ for (i = 0; i < rusage->num_numa_nodes; i++) { ++ kprintf("memory_numa_stat[%d]=%ld\n", i, rusage->memory_numa_stat[i]); ++ } ++ kprintf("ns_per_tsc=%ld\n", rusage->ns_per_tsc); ++ for (i = 0; i < rusage->num_processors; i++) { ++ kprintf("cpu[%d].user_tsc=%ld\n", i, rusage->cpu[i].user_tsc); ++ kprintf("cpu[%d].system_tsc=%ld\n", i, rusage->cpu[i].system_tsc); ++ } ++ ++ kprintf("num_threads=%d\n", rusage->num_threads); ++ kprintf("max_num_threads=%d\n", rusage->max_num_threads); ++#endif ++ return 0; ++} diff --git a/test/rusage/rusage101.c b/test/rusage/rusage101.c new file mode 100644 index 00000000..2d2e29ab --- /dev/null +++ b/test/rusage/rusage101.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include /* For SYS_xxx definitions */ +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + dprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char** argv) { + int i, j, ret = 0, ret_ihklib; + void* mem; + struct mckernel_rusage rusage; + pid_t pid; + int status; + + pid = fork(); + CHKANDJUMP(pid == -1, 255, "fork failed"); + if (pid == 0) { + + for (j = 0; j < NLOOP; j++) { + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + _exit(123); + } else { + ret = waitpid(pid, &status, 0); + CHKANDJUMP(ret == -1, 255, "waitpid failed\n"); + } + + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage102.c b/test/rusage/rusage102.c new file mode 100644 index 00000000..53cf306c --- /dev/null +++ b/test/rusage/rusage102.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include /* For SYS_xxx definitions */ +#include +#include +#include +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + dprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_mem[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char** argv) { + int i, j, ret = 0, ret_ihklib; + void* mem; + int fd; + struct mckernel_rusage rusage; + + fd = open("./file", O_RDWR); + CHKANDJUMP(fd == -1, 255, "open failed\n"); + + for (j = 0; j < NLOOP; j++) { + + mem = mmap(0, sz_mem[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_SHARED, fd, sz_mem[SZ_INDEX] * j); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/rusage103.c b/test/rusage/rusage103.c new file mode 100644 index 00000000..28177f4a --- /dev/null +++ b/test/rusage/rusage103.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include /* For SYS_xxx definitions */ +#include "ihklib.h" +#include "mckernel/ihklib_rusage.h" + +#define DEBUG + +#ifdef DEBUG +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + fprintf(stderr, "%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#endif + +#define CHKANDJUMP(cond, err, ...) \ + do { \ + if(cond) { \ + dprintf(__VA_ARGS__); \ + ret = err; \ + goto fn_fail; \ + } \ + } while(0) + +int sz_anon[] = { + 4 * (1ULL<<10), + 2 * (1ULL<<20), + 1 * (1ULL<<30), + 134217728}; + +#define SZ_INDEX 0 +#define NLOOP 2 + +int main(int argc, char** argv) { + int i, j, ret = 0, ret_ihklib; + void* mem; + struct mckernel_rusage rusage; + + for (j = 0; j < NLOOP; j++) { + mem = mmap(0, sz_anon[SZ_INDEX], PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHKANDJUMP(mem == MAP_FAILED, 255, "mmap failed\n"); + *((unsigned long*)mem) = 0x123456789abcdef0; + + ret = syscall(900); + CHKANDJUMP(ret != 0, 255, "syscall failed\n"); + + ret = ihk_os_getrusage(0, &rusage, sizeof(rusage)); + CHKANDJUMP(ret != 0, 255, "ihk_os_getrusage failed\n"); + + for (i = 0; i < IHK_MAX_NUM_PGSIZES; i++) { + printf("memory_stat_rss[%d]=%ld\n", i, rusage.memory_stat_rss[i]); + printf("memory_stat_mapped_file[%d]=%ld\n", i, rusage.memory_stat_mapped_file[i]); + } + printf("memory_max_usage=%ld\n", rusage.memory_max_usage); + printf("memory_kmem_usage=%ld\n", rusage.memory_kmem_usage); + printf("memory_kmem_max_usage=%ld\n", rusage.memory_kmem_max_usage); +#define NUM_NUMA_NODES 2 + for (i = 0; i < NUM_NUMA_NODES; i++) { + printf("memory_numa_stat[%d]=%ld\n", i, rusage.memory_numa_stat[i]); + } +#define NUM_CPUS 2 + for (i = 0; i < NUM_CPUS; i++) { + printf("cpuacct_usage_percpu[%d]=%ld\n", i, rusage.cpuacct_usage_percpu[i]); + } + printf("cpuacct_stat_system=%ld\n", rusage.cpuacct_stat_system); + printf("cpuacct_stat_user=%ld\n", rusage.cpuacct_stat_user); + printf("cpuacct_usage=%ld\n", rusage.cpuacct_usage); + + printf("num_threads=%d\n", rusage.num_threads); + printf("max_num_threads=%d\n", rusage.max_num_threads); + } + fn_exit: + return ret; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/verbs/aal_host.h b/test/rusage/verbs/aal_host.h new file mode 100755 index 00000000..4162166d --- /dev/null +++ b/test/rusage/verbs/aal_host.h @@ -0,0 +1,21 @@ +/* + * aal_host.h + * + * Created on: 2011/08/09 + * Author: simin + */ + +#ifndef AAL_HOST_H_ +#define AAL_HOST_H_ + +#define MAX_DEVNO 2 + +extern int aal_host_init(); +extern int aal_host_dev_init(int dev_no); +extern void* aal_host_mem_alloc(int dev_no, int size); +extern void aal_host_mem_free(void * addr, int size); +extern int aal_host_dev_exit(int dev_no); +extern int aal_host_exit(); +extern void* aal_host_mem_va2pa(int dev_no, void *virtual_addr); + +#endif /* AAL_HOST_H_ */ diff --git a/test/rusage/verbs/cas.c b/test/rusage/verbs/cas.c new file mode 100755 index 00000000..dc99ecc0 --- /dev/null +++ b/test/rusage/verbs/cas.c @@ -0,0 +1,189 @@ +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +//#define DEBUG_CAS +#ifdef DEBUG_CAS +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define NTRIAL 10 +#define NSKIPS 0 +#define NSKIPR 0 + +#define ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; main_errno = errno; goto fn_fail; } + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +volatile int k; +int main(int argc, char **argv) { + config_t config; + int i; + int j; + int main_errno = 0; + char sync_res; + unsigned long tscs, tsce; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *mr_local = NULL; + int entry; + int ib_errno; + + if (read_config(&config, argc, argv)) { + goto fn_exit; + } + + config.use_rdma = 1; + + if(config.buf_size != 8) { + printf("set buf_size to 8\n"); + config.buf_size = 8; + } + + if(resource_create(config, &res) || pd_create(&res, &pdinfo) || qp_create(&res, &pdinfo, &qpinfo)) { printf("qp_create failed\n"); goto fn_fail; } + + /* rdma-write-to buffer */ + void *buf_rdma = mmap(0, 8/*IBCOM_RDMABUF_SZSEG*/, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + //void *buf_rdma = calloc(8, 1); + ERR_CHKANDJUMP(!buf_rdma, -1, printf("mmap failed\n")); + memset(buf_rdma, 0, 8/*IBCOM_RDMABUF_SZSEG*/); + + //ib_errno = mr_create(&res, &pdinfo, 8/*IBCOM_RDMABUF_SZSEG*/, buf_rdma, &res.rdma_mr); + //ERR_CHKANDJUMP(ib_errno, -1, printf("mr_create failed\n")); + + memset(&res.rdma_mr, 0, sizeof(mrinfo_t)); + res.rdma_mr.buf = buf_rdma; + res.rdma_mr.buf_size = 8; + res.rdma_mr.mr = ibv_reg_mr(pdinfo.pd, buf_rdma, 8, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC); + ERR_CHKANDJUMP(!res.rdma_mr.mr, -1, printf("ibv_reg_mr failed\n")); + + + + mr_local = calloc(sizeof(mrinfo_t), sizeof(char)); + ERR_CHKANDJUMP(!mr_local, -1, printf("calloc failed\n")); + + void *buf_local = mmap(0, config.buf_size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + ERR_CHKANDJUMP(!buf_local, -1, printf("mmap failed\n")); + memset(buf_local, config.server_flg ? 0x55 : 0xaa, config.buf_size); + + //ib_errno = mr_create(&res, &pdinfo, config.buf_size, buf_local, mr_local); + //ERR_CHKANDJUMP(ib_errno, -1, printf("mr_create fail\n")); + memset(mr_local, 0, sizeof(mrinfo_t)); + mr_local->buf = buf_local; + mr_local->buf_size = 8; + mr_local->mr = ibv_reg_mr(pdinfo.pd, buf_local, 8, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC); + ERR_CHKANDJUMP(!mr_local->mr, -1, printf("ibv_reg_mr failed\n")); + + /* exchange gid, lid, qpn, raddr, rkey */ + if(connect_qp(config, &res, &qpinfo)) { printf("connect_qp failed\n"); goto fn_fail; } + debug_print_qp_conn_info(res, qpinfo, &config); + printf("connect_qp done\n"); fflush(stdout); + + /* make qp RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { printf("rts failed\n"); goto fn_fail; } + printf("rts done\n"); fflush(stdout); + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)) { perror("sock_sync_data"); } + } + printf("barrier done\n"); fflush(stdout); + + if(config.server_flg) { /* sender side */ + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + + struct ibv_send_wr sr; + memset(&sr, 0, sizeof(struct ibv_send_wr)); + struct ibv_send_wr *bad_wr = NULL; + + struct ibv_sge sge[1]; + memset(&sge[0], 0, sizeof(struct ibv_sge)); + + sr.next = NULL; + sr.wr_id = 0; + sr.sg_list = sge; + //sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED; + + sge[0].addr = (uintptr_t)mr_local->buf; + printf("local_addr=%08lx\n", sge[0].addr); + sge[0].length = mr_local->buf_size; + printf("length=%d\n", sge[0].length); + sge[0].lkey = mr_local->mr->lkey; + sr.num_sge = 1; + +#if 1 + sr.opcode = IBV_WR_ATOMIC_CMP_AND_SWP; + //sr.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD; + sr.wr.atomic.compare_add = (uint64_t)i; + sr.wr.atomic.swap = (uint64_t)i+1; + sr.wr.atomic.remote_addr = /*255*/qpinfo.remote_conn_info[0].addr; + printf("remote_addr=%08lx\n", sr.wr.atomic.remote_addr); + sr.wr.atomic.rkey = /*123*/qpinfo.remote_conn_info[0].rkey; + printf("rkey=%08lx\n", sr.wr.atomic.rkey); +#else + sr.opcode = IBV_WR_RDMA_WRITE; + sr.wr.rdma.remote_addr = qpinfo.remote_conn_info[0].addr; + sr.wr.rdma.rkey = qpinfo.remote_conn_info[0].rkey; +#endif + dprintf("ibv_post_send,raddr=%lx\n", sr.wr.rdma.remote_addr); + + ib_errno = ibv_post_send(qpinfo.qp, &sr, &bad_wr); + ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send return %d\n", ib_errno)); + while(1) { + int result; + struct ibv_wc cqe[1]; + result = ibv_poll_cq(qpinfo.scq, 1, &cqe[0]); + ERR_CHKANDJUMP(result < 0, -1, printf("ibv_poll_cq")); + if(result > 0) { + for(j = 0; j < result; j++) { + printf("cqe.imm_data=%08x\n", cqe[j].imm_data); + printf("buf_local=%lx\n", *((uint64_t*)buf_local)); + ERR_CHKANDJUMP(cqe[j].status != IBV_WC_SUCCESS, -1, printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status))); + } + break; + } + } + + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); + + } else { /* receiver side */ + + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPR) { tscs = rdtsc(); } + + printf("res.rdma_mr.buf=%lx\n", (unsigned long)res.rdma_mr.buf); + printf("poll addr=%lx\n", (unsigned long)(buf_rdma)); + + volatile uint64_t *ptr = (volatile uint64_t *)buf_rdma; + while(*ptr == i) { + __asm__ __volatile__("pause"); + } + printf("*ptr=%08lx\n", *ptr); + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_exit: + return main_errno; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/verbs/curtime.c b/test/rusage/verbs/curtime.c new file mode 100755 index 00000000..56ea70b3 --- /dev/null +++ b/test/rusage/verbs/curtime.c @@ -0,0 +1,11 @@ +#include +#include + +#define CURTIME_LIB 1 + +double cur_time(){ + struct timeval tp; + gettimeofday(&tp, NULL); + return tp.tv_sec + tp.tv_usec * 1.0E-6; +} + diff --git a/test/rusage/verbs/debug.c b/test/rusage/verbs/debug.c new file mode 100755 index 00000000..9797a7a3 --- /dev/null +++ b/test/rusage/verbs/debug.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" + +//#define DEBUG_DEBUG +#ifdef DEBUG_DEBUG +#define dprintf printf +#else +#define dprintf(...) +#endif + +void debug_print_qp_conn_info(resource_t res, qpinfo_t qpinfo, config_t *config) { + uint8_t *p; + dprintf("local.qp_num=0x%x\n", qpinfo.qp->qp_num); + dprintf("local.lid=0x%x\n", res.port_attr->lid); + dprintf("local.sock[0]=%d\n", qpinfo.sock[0]); + if (res.rdma_mr.mr != NULL) { + dprintf("local.addr=0x%lx\n", (uint64_t)res.rdma_mr.buf); + dprintf("local.rkey=0x%x\n\n", res.rdma_mr.mr->rkey); + } + + int i; + for(i = 0; i < (qpinfo.listenfd == -1 ? 1 : config->nremote); i++) { + dprintf("remote.qp_num=0x%x\n", qpinfo.remote_conn_info[i].qp_num); + dprintf("remote.lid=0x%x\n", qpinfo.remote_conn_info[i].lid); + p = qpinfo.remote_conn_info[i].gid; + dprintf( + "remote.gid = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", + p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); + if (qpinfo.remote_conn_info[i].addr) { + dprintf("remote.addr=0x%lx\n", qpinfo.remote_conn_info[i].addr); + dprintf("remote.rkey=0x%x\n", qpinfo.remote_conn_info[i].rkey); + } + } +} + diff --git a/test/rusage/verbs/debug.h b/test/rusage/verbs/debug.h new file mode 100755 index 00000000..f67dbc4f --- /dev/null +++ b/test/rusage/verbs/debug.h @@ -0,0 +1,31 @@ +#ifndef MYLIB_H +#define MYLIB_H + +#ifndef NULL +#define NULL ((void *) 0) +#endif + +#ifdef DEBUG +#define debug_printf(fmt,arg...) {printf("[DEBUG] " fmt, ##arg);} +#define debug_print_mem(arg...) {fprintf(stderr, "[DEBUG] ");print_mem(arg);} +#else +#define debug_printf(fmt,arg...) {} +#define debug_print_mem(arg...) {} +#endif + +#ifdef ERROR +#define error_printf(fmt,arg...) {fprintf(stderr, "[ERROR] " fmt, ##arg);} +#define error_perror(arg...) {fprintf(stderr, "[ERROR] "); perror(arg);} +#else +#define error_printf(fmt,arg...) {} +#define error_perror(fmt,arg...) {} +#endif + +#include "mtype.h" + +/** + * get current time(sec) + */ +extern double cur_time(); +extern void print_mem(addr_t addr, int size); +#endif diff --git a/test/rusage/verbs/ibcomm.h b/test/rusage/verbs/ibcomm.h new file mode 100755 index 00000000..817eed59 --- /dev/null +++ b/test/rusage/verbs/ibcomm.h @@ -0,0 +1,209 @@ +#ifndef IBCOMM_H +#define IBCOMM_H +#include +#include "infiniband/verbs.h" +#include "sock.h" +#include "list.h" + +#define _MAX_FIX_BUF_SIZE 64 +#define _MAX_SQ_CAPACITY /*512*/256/*12*/ +#define _MAX_RQ_CAPACITY /*512*/256/*1*/ +#define _MAX_SGE_CAPACITY /*20*/3 +#define _MAX_CQ_CAPACITY /*512*/256/*1*/ + +#define IBCOM_INLINE_DATA /*(128*4-64)*//*(512-64)*//*884*/512 +#define IBCOM_RDMABUF_SZSEG (16384+4096) +#define IBCOM_MAGIC 0x55aa55aa +#define NCHAIN 2 + +#define SEND_CQ_FLG 1 +#define RECV_CQ_FLG 2 +#define IBCOMM_ERR_CODE -1 + +#define ibcomm_return_code_num 30 + +#if __BYTE_ORDER == __LITTLE_ENDIAN +static inline uint64_t htonll(uint64_t x) { return bswap_64(x); } +static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); } +#elif __BYTE_ORDER == __BIG_ENDIAN +static inline uint64_t htonll(uint64_t x) { return x; } +static inline uint64_t ntohll(uint64_t x) { return x; } +#else +#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN +#endif + +/* ERROR definition*/ +enum ibcomm_return_code{ + _IBCOMM_RETCODE_SUCCESS , + _IBCOMM_ERRCODE_DEVICE_FOUND, + _IBCOMM_ERRCODE_NO_DEVICE, + _IBCOMM_ERRCODE_DEVICE_OPEN, + _IBCOMM_ERRCODE_CREATE_RES, + _IBCOMM_ERRCODE_DEVICE_QUERY_PORT, + + _IBCOMM_ERRCODE_PD_ALLOC, + _IBCOMM_ERRCODE_CQ_CREATE, + _IBCOMM_ERRCODE_QP_CREATE, + _IBCOMM_ERRCODE_MR_CREATE, + + _IBCOMM_ERRCODE_QP_DESTROY, + _IBCOMM_ERRCODE_CQ_DESTROY, + _IBCOMM_ERRCODE_MR_DESTROY, + _IBCOMM_ERRCODE_PD_DEALLOC, + _IBCOMM_ERRCODE_DEVICE_CLOSE, + + _IBCOMM_ERRCODE_SOCK_CONN, + _IBCOMM_ERRCODE_SOCK_SYNC, + _IBCOMM_ERRCODE_SOCK_CLOSE, + + _IBCOMM_ERRCODE_QP_QUERY_GID, + _IBCOMM_ERRCODE_INIT_QP, + _IBCOMM_ERRCODE_RTR_QP, + _IBCOMM_ERRCODE_RTS_QP, + + _IBCOMM_ERRCODE_POLL_CQ_ERR, + _IBCOMM_ERRCODE_POLL_CQ_ZERO_RESULT +}; +typedef struct config{ + char *dev_name; /*IB device name*/ + char *server_name; /*server host name*/ + u_int32_t tcp_port; /*server TCP port*/ + int ib_port; /*local IB port*/ + int gid_idx; /*gid index*/ + int use_rdma; /*rdma flag*/ + int buf_size; + int server_flg; + int pci_buf_flg; + int pci_cq_flg; + int nremote; /* number of remote nodes */ +}config_t; + +typedef struct qp_conn_info{ + uint64_t addr; /*Buffer address*/ + uint32_t rkey; /*Remote key*/ + uint32_t qp_num; /*QP number*/ + uint16_t lid; /*LID of the IB port*/ + uint8_t gid[16];/*GID of the IB port*/ +}qp_conn_info_t; + +typedef struct qp_conn_info_ud{ + uint16_t lid; + union ibv_gid gid; + uint32_t qp_num; + uint32_t qkey; +} qp_conn_info_ud_t; + +typedef struct mrinfo{ + struct ibv_mr *mr; + char *buf; /*Registered buf*/ + int buf_size; +}mrinfo_t; + +#define NREMOTE 4 +typedef struct qpinfo{ + struct ibv_qp *qp; + struct ibv_cq *scq; /*Send cq*/ + struct ibv_cq *rcq; /*Receive cq*/ + qp_conn_info_t remote_conn_info[NREMOTE]; /*Remote info*/ + int sock[NREMOTE]; /* exchange remote_conn_info using TCP */ + int listenfd; /* exchange remote_conn_info using TCP */ + int sr_num; + int rr_num; + int max_inline_data; /*if data smaller than it, use inline send*/ +}qpinfo_t; + +typedef struct pdinfo{ + struct ibv_pd *pd; +}pdinfo_t; + +typedef struct resource{ + struct ibv_context *ib_ctx;/*HCA handle*/ + struct ibv_port_attr *port_attr; /*IB port attributes*/ + + list_t *pdinfo_list; + list_t *mrinfo_list; + list_t *qpinfo_list; + + /* RDMA buffers */ + mrinfo_t rdma_mr; +}resource_t; + +/** + * create resource + * connect TCP socket + */ +extern int resource_create(config_t config, resource_t *res); + +/** + * create a pd and register it to resource + */ +extern int pd_create(resource_t *res, pdinfo_t *pdinfo); + +/** + * creete a qp and register it to pd + * -create send cq + * -create recv cq + * -assign send cq to sq + * -assign recv cq to rq + */ +extern int qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo); +extern int qp_create_ud(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo); + +/** + * 1.create a mr and register it to pd + * 2.register buf to this mr + */ +extern int mr_create(resource_t *res, pdinfo_t *pdinfo, int buf_size, char *buf, mrinfo_t *mrinfo); +/** + * destroy all resources + */ +extern int resource_destroy(config_t *config, resource_t *res); + + +/** + * connect to remote qp by exchanging addr info + */ +extern int connect_qp(config_t config, resource_t *res, qpinfo_t *qpinfo); + +/** + * change qp status + */ +extern int init_qp(config_t config, qpinfo_t *qpinfo); +extern int init_qp_ud(config_t config, qpinfo_t *qpinfo); + +extern int rtr_qp(config_t config, qpinfo_t *qpinfo); +extern int rtr_qp_ud(config_t config, qpinfo_t *qpinfo); + +extern int rts_qp(config_t config, qpinfo_t *qpinfo); +extern int rts_qp_ud(config_t config, qpinfo_t *qpinfo); + +extern int modify_dest_qp(config_t config, qpinfo_t *qpinfo, qp_conn_info_t* remote_conn_info); + +extern int post_send_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, int tag, qp_conn_info_t* remote_conn_info, uint32_t imm_data); +int post_send_req2(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num); +extern int post_send_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_ud_t* remote_conn_info, struct ibv_ah *ah); + +extern int post_recv_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo_list, int tag); +extern int post_recv_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, uint64_t wr_id); + +extern int poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag); +extern int poll_cq2(qpinfo_t *qpinfo, int cq_flg, int *tag, int *result); +extern int poll_cq2_ud(qpinfo_t *qpinfo, int cq_flg, int *result); + +extern void print_qp_status(qpinfo_t *qpinfo); + +extern void debug_print_qp_conn_info(resource_t res, qpinfo_t qpinfo, config_t *config); +extern int read_config(config_t *config, int argc, char **argv); +#endif + +#define ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; rc = errno; goto fn_fail; } +#define IBCOM_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; ibcom_errno = errno; goto fn_fail; } +#define VERBS_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; verbs_errno = errno; goto fn_fail; } + +static inline int show_resident(int step) { + unsigned long size, resident, share, text, lib, data, dirty; + FILE* fp = fopen("/proc/self/statm", "r"); + fscanf(fp, "%ld %ld %ld %ld %ld %ld %ld", &size, &resident, &share, &text, &lib, &data, &dirty); + printf("step=%d,resident=%ldKB\n", step, resident * 4); + return 0; +} diff --git a/test/rusage/verbs/list.c b/test/rusage/verbs/list.c new file mode 100755 index 00000000..6a0126ea --- /dev/null +++ b/test/rusage/verbs/list.c @@ -0,0 +1,82 @@ +/* + * list.c + * + * Created on: 2011/10/19 + * Author: simin + */ + +#include +#include "list.h" +#include + +void* list_get(list_t *list, int idx) { + int i; + list_element_t *e; + + if (list->cnt <= 0 || idx < 0 || idx >= list->cnt) + return NULL; + + e = list->head; + for (i = 0; i < idx; i++) + e = e->next; + + return e->data; +} + +void list_add(list_t *list, void *data) { + list_element_t *e; + e = malloc(sizeof(list_element_t)); + e->data = data; + e->next = NULL; + + if(list->tail != NULL) + list->tail->next = e; + list->tail = e; + + if (list->cnt == 0) + list->head = list->tail; + + list->cnt++; +} + +void* list_remove(list_t *list, int idx) { + int i; + list_element_t *e, *pe, *ne; + void *data; + e = pe = ne = NULL; + + if (list->cnt <= 0 || idx < 0 || idx >= list->cnt) + return NULL; + + e = list->head; + i = 0; + if(idx > 0){ + while(i++ < idx-1){ + e = e->next; + } + pe = e; + i--; + } + while(i++ < idx) + e = e->next; + if(idx < list->cnt) + ne = e->next; + + if(pe != NULL) + pe->next = ne; + else + list->head = ne; + if(ne == NULL) + list->tail = pe; + + list->cnt--; + + data = e->data; + free(e); + + return data; +} + +void* list_pop(list_t *list){ + return list_remove(list, list->cnt-1); +} diff --git a/test/rusage/verbs/list.h b/test/rusage/verbs/list.h new file mode 100755 index 00000000..9b6dff0c --- /dev/null +++ b/test/rusage/verbs/list.h @@ -0,0 +1,26 @@ +/* + * list.h + * + * Created on: 2011/10/19 + * Author: simin + */ + +#ifndef LIST_H_ +#define LIST_H_ + +typedef struct list_element_t{ + void *data; + struct list_element_t *next; +}list_element_t; + +typedef struct list_t{ + list_element_t *head; + list_element_t *tail; + int cnt; +}list_t; + +extern void* list_get(list_t *list, int idx); +extern void list_add(list_t *list, void *e); +extern void* list_remove(list_t *list, int idx); +extern void* list_pop(list_t *list); +#endif /* LIST_H_ */ diff --git a/test/rusage/verbs/makefile b/test/rusage/verbs/makefile new file mode 100755 index 00000000..f0713020 --- /dev/null +++ b/test/rusage/verbs/makefile @@ -0,0 +1,68 @@ +VPATH = + +CC = icc +CFLAGS = -Wall -O0 -g -DDEBUG -DERROR +LD = $(CC) +LFLAGS = -libverbs + +SRCS = list.c curtime.c printmem.c debug.c post.c qp.c read_config.c resource.c ud.c ud.c rdma_wr.c rdma_wr_chain.c rdma_rd.c rdma_rdv.c send.c +DSRCS = $(SRCS:.c=.d) +OBJS = $(SRCS:.c=.o) +EXECS = send rdma_wr rdma_rd rdma_rdv rdma_wr_chain ud cas +MODULES = list.o curtime.o printmem.o debug.o post.o qp.o read_config.o resource.o sock.o + +CWD := $(abspath $(dir $(lastword $(MAKEFILE_LIST)))) + +all: $(EXECS) + +run-%: $* + sudo ssh kncc10 "sh -c 'echo 1024 > /proc/sys/vm/nr_hugepages'" + sudo ssh kncc11 "sh -c 'echo 1024 > /proc/sys/vm/nr_hugepages'" + sudo ssh kncc10 "echo userspace > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" + sudo ssh kncc10 "cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed" + sudo ssh kncc11 "echo userspace > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" + sudo ssh kncc11 "cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed" + sudo ssh kncc10 "service cpuspeed stop" + sudo ssh kncc11 "service cpuspeed stop" + ssh kncc11 "for i in {1..5}; do taskset 0x00ff $(CWD)/$*; done" & + ssh kncc10 "for i in {1..5}; do taskset 0x00ff $(CWD)/$* -s kncc11; done" + +kill-%: + ssh kncc11 "~/project/mpich/src/tools/kill.pl $*" + ssh kncc10 "~/project/mpich/src/tools/kill.pl $*" + + +cas: cas.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +ud: ud.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +rdma_wr_chain: rdma_wr_chain.o $(MODULES) + $(LD) -o $@ $^ ${LFLAGS} + +rdma_wr: rdma_wr.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +rdma_rd: rdma_rd.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +rdma_rdv: rdma_rdv.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +send: send.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +reg_mr: reg_mr.o $(MODULES) + $(LD) -o $@ $^ $(LFLAGS) + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +%.d: %.c + $(CC) -MM $< > $*.d + +clean: + rm -f $(EXECS) $(OBJS) $(DSRCS) + +-include $(DSRCS) diff --git a/test/rusage/verbs/mbuf.h b/test/rusage/verbs/mbuf.h new file mode 100755 index 00000000..6c8b8571 --- /dev/null +++ b/test/rusage/verbs/mbuf.h @@ -0,0 +1,23 @@ +/* + * mbuf.h + * + * Created on: 2011/10/19 + * Author: simin + */ + +#ifndef MBUF_H_ +#define MBUF_H_ + +enum buf_type{ + HOST_BUF_TYPE, + PCI_BUF_TYPE +}; + +typedef struct buf_t{ + void *buf; + int size; + enum buf_type type; +} buf_t; + +#define buf_free(buf_p) {if(buf_p->type) free(buf_p->buf);} +#endif /* MBUF_H_ */ diff --git a/test/rusage/verbs/mcons.h b/test/rusage/verbs/mcons.h new file mode 100755 index 00000000..325d3362 --- /dev/null +++ b/test/rusage/verbs/mcons.h @@ -0,0 +1,9 @@ +#ifndef MCONS_H_ +#define MCONS_H_ + +#include "mtype.h" + +#define ERR_RET -1 +#define ERR_ADDR (addr_t)-1 + +#endif /* MCONS_H_ */ diff --git a/test/rusage/verbs/mm_core.h b/test/rusage/verbs/mm_core.h new file mode 100755 index 00000000..af183184 --- /dev/null +++ b/test/rusage/verbs/mm_core.h @@ -0,0 +1,12 @@ +#ifndef MM_CORE_H_ +#define MM_CORE_H_ + +#include "mtype.h" + +//4kB +#define MIC_PAGE_SIZE 4096 + +int mm_core_read(addr_t offset, int size, void *buf); +int mm_core_write(addr_t offset, int size, void *buf); + +#endif /* MM_CORE_H_ */ diff --git a/test/rusage/verbs/mm_host.h b/test/rusage/verbs/mm_host.h new file mode 100755 index 00000000..b3257671 --- /dev/null +++ b/test/rusage/verbs/mm_host.h @@ -0,0 +1,60 @@ +#ifndef MIC_MEM_H_ +#define MIC_MEM_H_ + +#include "mtype.h" + +/*#### MMIO ####*/ +#define MIC_PCI_MMIO_BASE_ADDR 0xc2300000 + +/*## GTT ##*/ +#define GTT_START_OFFSET 0x40000 +#define MIC_PCI_GTT_START_ADDR (MIC_PCI_MMIO_BASE_ADDR + GTT_START_OFFSET) +#define MIC_PCI_GTT_ETT_MAX 65536 +#define MIC_GTT_ETT_SIZE 4 + +/*## SBOX ##*/ +#define SBOX_START_OFFSET 0x10000 +#define MIC_PCI_SBOX_START_ADDR (MIC_PCI_MMIO_BASE_ADDR + SBOX_START_OFFSET) +#define MIC_PCI_SBOX_SIZE 0x30000 +#define SBOX_SBQ_FLUSH_REG 0x0000B1A0 +#define SBOX_TLB_FLUSH_REG 0x0000B1A4 + +/*## APERTURE ##*/ +#define MIC_PCI_APERTURE_BASE_ADDR 0xb0000000 +//256MB +#define MIC_PCI_APERTURE_SIZE 0x10000000 +//4kB +#define MIC_PAGE_SIZE 4096 + +static inline addr_t _mic_map2mic(addr_t addr){ + return addr >> 1 << 1 << 11; +} + +#define MIC_MAP2MIC _mic_map2mic + +extern int mm_host_init(); +extern int mm_host_exit(); + +extern addr_t mm_host_get_vaddr(int page_no, int offset); +extern addr_t mm_host_get_paddr(int page_no, int offset); + +/** + * map a page to MIC memory(set GTT[page_no]) + */ +extern int mm_host_page_init(int pg_no, addr_t map_addr, int size, int flush_flg); +/** + * read or write a initialized page + */ +extern int mm_host_page_read(int pg_no, int offset, int size, void *data); +extern int mm_host_page_write(int pg_no, int offset, int size, void *data); + +extern int mm_host_dump_gtt(); + +/** + * flush GTT table + * If only set 1 page, you can call mm_host_page_init with flush_flg=1 + * If set several pages, you can call mm_host_page_init with flush_flg=0, and call mm_host_gtt_flush after all page_init + */ +extern int mm_host_gtt_flush(); + +#endif /* MIC_MEM_H_ */ diff --git a/test/rusage/verbs/mm_ib_test.h b/test/rusage/verbs/mm_ib_test.h new file mode 100755 index 00000000..9c169222 --- /dev/null +++ b/test/rusage/verbs/mm_ib_test.h @@ -0,0 +1,52 @@ +/* + * mm_ib_test.h + * + * Created on: 2011/10/14 + * Author: simin + */ + +#ifndef MM_IB_TEST_H_ +#define MM_IB_TEST_H_ + +//#define USE_1_SERVER 1 + + +//#define TEST_BUF_SIZE 16 +#define TEST_SERVER_BUF_NUM 2 + +#define TEST_COMM_HOST_BASE_ADDR 0x20001 +#define TEST_COMM_CORE_BASE_ADDR (0x20000 << 11) + +/* MR buffer setting info */ +#define TEST_HOST_MR_PAGE_NO 0 +#define TEST_MR_BUF_OFFSET 0 +#define TEST_MR_HOST_BUF_SIZE 4096 + +#define TEST_MR_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_MR_BUF_OFFSET) +#define TEST_MR_CORE_BUF_ADDR (TEST_COMM_CORE_BASE_ADDR + TEST_MR_BUF_OFFSET) + +/* +#define TEST_S2_HOST_MR_PAGE_NO 1 +#define TEST_S2_COMM_HOST_BASE_ADDR 0x30001 +#define TEST_S2_COMM_CORE_BASE_ADDR (0x30000 << 11) +#define TEST_S2_MR_HOST_BUF_ADDR TEST_S2_COMM_HOST_BASE_ADDR + TEST_MR_BUF_OFFSET +#define TEST_S2_MR_CORE_BUF_ADDR TEST_S2_COMM_CORE_BASE_ADDR + TEST_MR_BUF_OFFSET +*/ + +/* CQ buffer setting info */ +#define TEST_HOST_CQ_PAGE_NO 1 +#define TEST_CQ_BUF_OFFSET (TEST_MR_BUF_OFFSET + TEST_MR_HOST_BUF_SIZE) +#define TEST_CQ_HOST_BUF_SIZE 4096*2 // SCQ + RCQ + +#define TEST_CQ_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_CQ_BUF_OFFSET) +#define TEST_CQ_CORE_BUF_ADDR (TEST_CQ_HOST_BUF_ADDR >> 1 << 1 << 11) + +/* QP buffer setting info */ +#define TEST_HOST_QP_PAGE_NO 3 +#define TEST_QP_BUF_OFFSET (TEST_CQ_BUF_OFFSET + TEST_CQ_HOST_BUF_SIZE) +#define TEST_QP_HOST_BUF_SIZE 4096 + +#define TEST_QP_HOST_BUF_ADDR (TEST_COMM_HOST_BASE_ADDR + TEST_QP_BUF_OFFSET) +#define TEST_QP_CORE_BUF_ADDR (TEST_QP_HOST_BUF_ADDR >> 1 << 1 << 11) + +#endif /* MM_IB_TEST_H_ */ diff --git a/test/rusage/verbs/mmib.h b/test/rusage/verbs/mmib.h new file mode 100755 index 00000000..c4aadd37 --- /dev/null +++ b/test/rusage/verbs/mmib.h @@ -0,0 +1,54 @@ +/* + * mmib.h + * + * Created on: 2011/10/19 + * Author: simin + */ + +#ifndef MMIB_H_ +#define MMIB_H_ + +#include "mtype.h" +#include "ibcomm.h" + +enum mmib_buf_type{ + MMIB_MR_BUF, + MMIB_CQ_BUF, + MMIB_QP_BUF, +}; +enum mmib_buf_pool_state{ + MMIB_BUF_POOL_RESET, + MMIB_BUF_POOL_ACTIVE +}; + +struct mmib_buf_pool{ + addr_t offset; + int page_no; // start page_no + int size; + addr_t cur_start; // offset in page + enum mmib_buf_pool_state state; +}; + +typedef struct mmib_mrinfo{ + struct ibv_mr *mr; + buf_t *buf; /*Registered buf*/ +}mmib_mrinfo_t; + +extern int mmib_pool_init(); +extern buf_t *mmib_new_buf(int size, enum mmib_buf_type buf_type); +extern void mmib_destroy_buf(buf_t *buf); +extern void mmib_pool_destroy(); + +extern void* mmib_qp_buf_alloc(int size); +extern void* mmib_cq_buf_alloc(int size); +extern void mmib_buf_free(void* buf); + +extern int mmib_resource_create(config_t config, resource_t *res); +extern int mmib_pd_create(resource_t *res, pdinfo_t *pdinfo); +extern int mmib_qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo); +extern int mmib_mr_create(resource_t *res, pdinfo_t *pdinfo, buf_t *buf, mmib_mrinfo_t *mrinfo); +extern int mmib_post_send_req(qpinfo_t *qpinfo, mmib_mrinfo_t *mrinfo_list, int opcode, int tag); +extern int mmib_post_recv_req(qpinfo_t *qpinfo, mmib_mrinfo_t *mrinfo_list, int tag); +extern int mmib_poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag); +extern int mmib_resource_destroy(config_t *config, resource_t *res); +#endif /* MMIB_H_ */ diff --git a/test/rusage/verbs/mtype.h b/test/rusage/verbs/mtype.h new file mode 100755 index 00000000..74b05972 --- /dev/null +++ b/test/rusage/verbs/mtype.h @@ -0,0 +1,29 @@ +/* + * type.h + * + * Created on: 2011/10/08 + * Author: simin + */ + +#ifndef TYPE_H_ +#define TYPE_H_ + +#include + +typedef unsigned long int addr_t; + +enum buf_type{ + HOST_BUF_TYPE, + PCI_BUF_TYPE +}; + +typedef struct buf{ + void *buf; + int size; + enum buf_type type; +} buf_t; + +#define free_buf(buf_p) {if(buf_p->type == HOST_BUF_TYPE) free(buf_p->buf); buf_p=NULL;} + + +#endif /* TYPE_H_ */ diff --git a/test/rusage/verbs/pm_buf.h b/test/rusage/verbs/pm_buf.h new file mode 100755 index 00000000..8c2426de --- /dev/null +++ b/test/rusage/verbs/pm_buf.h @@ -0,0 +1,16 @@ +/* + * pm_buf.h + * + * Created on: 2011/10/21 + * Author: simin + */ + +#ifndef PM_BUF_H_ +#define PM_BUF_H_ + +struct pm_buf_ops { + void* (*alloc_buf)(int size); + void (*free_buf)(void *buf); +}; + +#endif /* PM_BUF_H_ */ diff --git a/test/rusage/verbs/post.c b/test/rusage/verbs/post.c new file mode 100755 index 00000000..e0d891e9 --- /dev/null +++ b/test/rusage/verbs/post.c @@ -0,0 +1,453 @@ +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" + +//#define DEBUG_POST +#ifdef DEBUG_POST +#define dprintf printf +#else +#define dprintf(...) +#endif + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +#define MAX_POLL_TIME (1000000ULL * 1000000) +int swr_id_tag_map[1000]; +int rwr_id_tag_map[1000]; + +void put_swr_id_tag(int wr_id, int tag){ + swr_id_tag_map[wr_id] = tag; +} +int get_swr_id_tag(int wr_id){ + int tag = swr_id_tag_map[wr_id]; + return tag; +} +void put_rwr_id_tag(int wr_id, int tag){ + rwr_id_tag_map[wr_id] = tag; +} +int get_rwr_id_tag(int wr_id){ + int tag = rwr_id_tag_map[wr_id]; + return tag; +} +int post_send_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, int tag, qp_conn_info_t* remote_conn_info, uint32_t imm_data){ + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[1]; + int ret = 0; + + /* Create sge*/ + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = mrinfo->buf_size; + sge[0].lkey = mrinfo->mr->lkey; + + /* Create a SR */ + memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = ++qpinfo->sr_num; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED; + + if(opcode != IBV_WR_RDMA_READ && mrinfo->buf_size <= qpinfo->max_inline_data) { sr.send_flags |= IBV_SEND_INLINE; } + put_swr_id_tag(sr.wr_id, tag); + + // set addr and key if is RDMA op + if(opcode != IBV_WR_SEND){ + sr.wr.rdma.remote_addr = remote_conn_info->addr; + sr.wr.rdma.rkey = remote_conn_info->rkey; + } + + /* Post SR to SQ */ + ret = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + if(ret){ + error_perror("ibv_post_send"); + error_printf("ibv_post_send return %d\n", ret); + return IBCOMM_ERR_CODE; + } + + return 0; +} + +/* write to addr + sz * seq_num */ +int post_send_req2(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num) { + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[1]; + int ret = 0; + + /* prepare sge*/ + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = mrinfo->buf_size; + sge[0].lkey = mrinfo->mr->lkey; + + dprintf("post_send_req2,sge[0].addr=%lx,sz=%d\n", (unsigned long)sge[0].addr, sge[0].length = mrinfo->buf_size); + + /* prepare send request or work request */ + //memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED; + + if(opcode != IBV_WR_RDMA_READ && mrinfo->buf_size <= qpinfo->max_inline_data) { + sr.send_flags |= IBV_SEND_INLINE; + } + + if(opcode == IBV_WR_RDMA_WRITE || opcode == IBV_WR_RDMA_WRITE_WITH_IMM) { + sr.wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_RDMABUF_SZSEG * seq_num; + sr.wr.rdma.rkey = remote_conn_info->rkey; + dprintf("post_send_req2,raddr=%lx\n", sr.wr.rdma.remote_addr); + } + + //__asm__ __volatile__("" ::: "memory"); + + ret = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + if(ret){ + printf("ibv_post_send return %d\n", ret); + return IBCOMM_ERR_CODE; + } + + return 0; +} + +int ibcom_isend_chain(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num) { + int ibcom_errno = 0; + int ib_errno; + int i; + struct ibv_send_wr sr[NCHAIN], *bad_wr = NULL; + struct ibv_sge sge[NCHAIN]; + + for(i = 0; i < NCHAIN; i++) { + sge[i].addr = (uintptr_t)mrinfo->buf + IBCOM_INLINE_DATA * i; + sge[i].length = IBCOM_INLINE_DATA; + sge[i].lkey = mrinfo->mr->lkey; + + sr[i].next = (i == NCHAIN - 1) ? NULL : &sr[i+1]; + //sr[i].wr_id = 0; + sr[i].sg_list = &sge[i]; + sr[i].num_sge = 1; +#define SKIP_POLL_RCQ +#ifdef SKIP_POLL_RCQ /* if you want all to be IBV_WR_RDMA_WRITE */ + sr[i].opcode = opcode; +#else + sr[i].opcode = (i == NCHAIN - 1) ? IBV_WR_RDMA_WRITE_WITH_IMM : IBV_WR_RDMA_WRITE; +#endif + sr[i].imm_data = imm_data; + sr[i].send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; + + sr[i].wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_INLINE_DATA * NCHAIN * seq_num + IBCOM_INLINE_DATA * i; + sr[i].wr.rdma.rkey = remote_conn_info->rkey; + } + + ib_errno = ibv_post_send(qpinfo->qp, &sr[0], &bad_wr); + IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send\n")); + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +/* write to addr + sz * seq_num */ +int post_send_req4(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_t* remote_conn_info, uint32_t imm_data, uint32_t seq_num, uint32_t offset) { + int ibcom_errno = 0; + int ib_errno; + + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[1]; + + sge[0].addr = (uintptr_t)mrinfo->buf + offset; + sge[0].length = IBCOM_INLINE_DATA; + sge[0].lkey = mrinfo->mr->lkey; + + sr.next = NULL; + //sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + sr.imm_data = imm_data; + sr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; + + sr.wr.rdma.remote_addr = remote_conn_info->addr + IBCOM_INLINE_DATA * seq_num; + sr.wr.rdma.rkey = remote_conn_info->rkey; + + ib_errno = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_send\n")); + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int post_send_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int opcode, qp_conn_info_ud_t* remote_conn_info, struct ibv_ah *ah) { + struct ibv_send_wr sr, *bad_wr; + struct ibv_sge sge[1]; + int ibcom_errno = 0, ib_errno; + + /* Create sge*/ + /* addr to addr + length - 1 will be on the payload, but see "post_send_req_ud" part */ + if(mrinfo->buf_size <= 40) { printf("buf_size too short\n"); ibcom_errno = -1; goto fn_fail; } + + sge[0].addr = (uintptr_t)mrinfo->buf + 40; + sge[0].length = mrinfo->buf_size - 40; + sge[0].lkey = mrinfo->mr->lkey; + + /* Create a SR */ + //memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 1; + sr.opcode = opcode; + //sr.imm_data = 0; + sr.send_flags = IBV_SEND_SIGNALED; + +#if 0 + if(mrinfo->buf_size <= qpinfo->max_inline_data){ + sr.send_flags |= IBV_SEND_INLINE; + } +#endif + + sr.wr.ud.ah = ah; + sr.wr.ud.remote_qpn = remote_conn_info->qp_num; + sr.wr.ud.remote_qkey = remote_conn_info->qkey; + dprintf("ibv_post_send,qpn=%08x,qkey=%08x\n", sr.wr.ud.remote_qpn, sr.wr.ud.remote_qkey); + // printf("ibv_post_send,dlid=%02x,is_global=%02x\n", ah->dlid, ah->is_global); + + ib_errno = ibv_post_send(qpinfo->qp, &sr, &bad_wr); + if(ib_errno) { + error_perror("ibv_post_send"); + printf("ib_errno=%d\n", ib_errno); + ibcom_errno = IBCOMM_ERR_CODE; + goto fn_fail; + } + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int post_recv_req(qpinfo_t *qpinfo, mrinfo_t *mrinfo, int tag){ + struct ibv_recv_wr *rr; + struct ibv_sge *sge; + struct ibv_recv_wr *bad_wr; + int ret = 0; + + /* Prepare scatter/gather entry list */ + sge = malloc(sizeof(struct ibv_sge)); + memset(sge, 0, sizeof(struct ibv_sge)); + sge->addr = (uintptr_t)mrinfo->buf; + sge->length = mrinfo->buf_size; + sge->lkey = mrinfo->mr->lkey; + + /* Create RR list */ + rr = malloc(sizeof(*rr)); + memset(rr, 0, sizeof(*rr)); + rr->next = NULL; + rr->wr_id = ++qpinfo->rr_num; + rr->sg_list = sge; + rr->num_sge = 1; + put_rwr_id_tag(rr->wr_id, tag); + + /* Post RR to RQ */ + ret = ibv_post_recv(qpinfo->qp, rr, &bad_wr); + if(ret){ + dprintf("ibv_post_recv ret=%d\n", ret); + free(sge); + free(rr); + return IBCOMM_ERR_CODE; + } else { + dprintf("ibv_post_recv ret=%d\n", ret); + } + + free(sge); + free(rr); + return 0; +} + +int ibcom_irecv(qpinfo_t *qpinfo, uint64_t wr_id){ + struct ibv_recv_wr rr; + struct ibv_recv_wr *bad_wr; + int ibcom_errno = 0; + int ib_errno; + + rr.next = NULL; + rr.sg_list = NULL; + rr.num_sge = 0; + rr.wr_id = wr_id; + + /* post rr */ + ib_errno = ibv_post_recv(qpinfo->qp, &rr, &bad_wr); + IBCOM_ERR_CHKANDJUMP(ib_errno, -1, printf("ibv_post_recv\n")); + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int post_recv_req_ud(qpinfo_t *qpinfo, mrinfo_t *mrinfo, uint64_t wr_id){ + struct ibv_recv_wr rr, *bad_wr; + struct ibv_sge sge[1]; + int ibcom_errno = 0, ib_errno; + + /* Prepare scatter/gather entry list */ + memset(sge, 0, sizeof(struct ibv_sge)); + /* addr to addr + 39 are not filled, addr + 40 to addr + length - 1 are filled with payload */ + if(mrinfo->buf_size <= 40) { printf("buf_size too short\n"); ibcom_errno = -1; goto fn_fail; } + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = mrinfo->buf_size; + sge[0].lkey = mrinfo->mr->lkey; + + /* Create RR list */ + memset(&rr, 0, sizeof(struct ibv_recv_wr)); + rr.next = NULL; + rr.wr_id = wr_id; + rr.sg_list = sge; + rr.num_sge = 1; + + /* Post RR to RQ */ + ib_errno = ibv_post_recv(qpinfo->qp, &rr, &bad_wr); + if(ib_errno){ + printf("ibv_post_recv ib_errno=%d\n", ib_errno); + ibcom_errno = IBCOMM_ERR_CODE; + goto fn_fail; + } + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int poll_cq(qpinfo_t *qpinfo, int cq_flg, int *tag) { + struct ibv_wc wc; + int wc_num = 0, time=0, rc = IBCOMM_ERR_CODE; +// wc = malloc(sizeof(struct ibv_wc)); + memset(&wc, 0, sizeof(struct ibv_wc)); + + switch(cq_flg){ + case SEND_CQ_FLG: + do{ + wc_num = ibv_poll_cq(qpinfo->scq, 1, &wc); + }while(!wc_num && ++time < MAX_POLL_TIME); + break; + + case RECV_CQ_FLG: + do{ + wc_num = ibv_poll_cq(qpinfo->rcq, 1, &wc); + }while(!wc_num && ++time < MAX_POLL_TIME); + break; + } + + if(wc_num < 0){ + error_perror("ibv_poll_cq"); + goto poll_cq_exit; + } + if(wc_num == 0){ + error_printf("no wc is found\n"); + goto poll_cq_exit; + } + if (wc.status != IBV_WC_SUCCESS){ + error_printf("wrong wc state: %d, %s\n", wc.status, ibv_wc_status_str(wc.status)); + goto poll_cq_exit; + } + switch(cq_flg){ + case SEND_CQ_FLG: + *tag = get_swr_id_tag(wc.wr_id); + break; + case RECV_CQ_FLG: + *tag = get_rwr_id_tag(wc.wr_id); + break; + } + rc = 0; + + poll_cq_exit: + + return rc; +} + +int poll_cq2(qpinfo_t *qpinfo, int cq_flg, int *tag, int *result) { + struct ibv_wc cqe; + int rc = 0; + + switch(cq_flg){ + case SEND_CQ_FLG: + *result = ibv_poll_cq(qpinfo->scq, 1, &cqe); + break; + + case RECV_CQ_FLG: + *result = ibv_poll_cq(qpinfo->rcq, 1, &cqe); + break; + } + + if(*result < 0){ + error_perror("ibv_poll_cq"); + rc = *result; + goto fn_fail; + } + if(*result > 0 && cqe.status != IBV_WC_SUCCESS){ + error_printf("cqe status=%08x,%s\n", cqe.status, ibv_wc_status_str(cqe.status)); + rc = -1; + goto fn_fail; + } + if(*result > 0) { + dprintf("cqe.imm_data=%d\n", cqe.imm_data); + switch(cq_flg){ + case SEND_CQ_FLG: + *tag = get_swr_id_tag(cqe.wr_id); + break; + case RECV_CQ_FLG: + *tag = get_rwr_id_tag(cqe.wr_id); + break; + } + } + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} + +int poll_cq2_ud(qpinfo_t *qpinfo, int cq_flg, int *result) { + struct ibv_wc cqe; + int rc = 0; + + switch(cq_flg){ + case SEND_CQ_FLG: { + unsigned long tscs = rdtsc(); + *result = ibv_poll_cq(qpinfo->scq, 1, &cqe); + unsigned long tsce = rdtsc(); + printf("poll_cq,send,%ld\n", tsce-tscs); + break; } + case RECV_CQ_FLG: + *result = ibv_poll_cq(qpinfo->rcq, 1, &cqe); + break; + } + + if(*result < 0){ + error_perror("ibv_poll_cq"); + rc = *result; + goto fn_fail; + } + if(*result > 0 && cqe.status != IBV_WC_SUCCESS){ + error_printf("cqe status=%08x,%s\n", cqe.status, ibv_wc_status_str(cqe.status)); + rc = -1; + goto fn_fail; + } + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/verbs/printmem.c b/test/rusage/verbs/printmem.c new file mode 100755 index 00000000..a3ae4273 --- /dev/null +++ b/test/rusage/verbs/printmem.c @@ -0,0 +1,12 @@ +#include +#include "mtype.h" + +void print_mem(addr_t addr, int size){ + int i; + printf("print memory[0x%lx]\n", addr); + for(i = 0; i < size; i++){ + printf("%02x ", *(unsigned char *)(addr+i)); + } + printf("\n"); +} + diff --git a/test/rusage/verbs/qp.c b/test/rusage/verbs/qp.c new file mode 100755 index 00000000..f77881ef --- /dev/null +++ b/test/rusage/verbs/qp.c @@ -0,0 +1,297 @@ +#include +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "sock.h" +#include "debug.h" + +//#define DEBUG_QP +#ifdef DEBUG_QP +#define dprintf printf +#else +#define dprintf(...) +#endif + +int connect_qp(config_t config, resource_t *res, qpinfo_t *qpinfo){ + union ibv_gid gid; + qp_conn_info_t local_conn_info; + int rc = IBCOMM_ERR_CODE; + + // get GID for this connection + memset(&gid, 0, sizeof(union ibv_gid)); + if(ibv_query_gid(res->ib_ctx, config.ib_port, config.gid_idx, &gid)){ + error_perror("ibv_query_gid"); + goto connect_qp_exit; + } + dprintf("port=%08x\n", config.ib_port); + + // set local qp conn info + memset(&local_conn_info, 0, sizeof(qp_conn_info_t)); + memset(qpinfo->remote_conn_info, 0, sizeof(qpinfo->remote_conn_info)); + local_conn_info.qp_num = htonl(qpinfo->qp->qp_num); + local_conn_info.lid = htons(res->port_attr->lid); + memcpy(local_conn_info.gid, &gid, 16); + dprintf("qp_num=%08x, lid=%08x\n", local_conn_info.qp_num, local_conn_info.lid); + + // set rdma address + if(config.use_rdma == 1){ + local_conn_info.addr = htonll((uint64_t) res->rdma_mr.mr->addr); + local_conn_info.rkey = htonl((uint32_t) res->rdma_mr.mr->lkey); + printf("my lkey=%08x\n", res->rdma_mr.mr->lkey); + printf("my rkey=%08x\n", res->rdma_mr.mr->rkey); + //local_conn_info.rkey = htonl((uint32_t) res->rdma_mr.mr->rkey); + } + + if(config.server_flg) { qpinfo->listenfd = -1; } // if listenfd != -1, then listen(listenfd) + int i; + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + // server accepts connection from NREMOTE clients + // NREMOTE clients connect to one server + + // sock connect + qpinfo->sock[i] = sock_connect(config.server_name, config.tcp_port, &(qpinfo->listenfd)); + if(qpinfo->sock[i] < 0) { + error_perror("sock_connect"); goto connect_qp_exit; + } + dprintf("connect_qp, after sock_connect\n"); + + // send local_conn_info, receive remote_conn_info + if(sock_sync_data(qpinfo->sock[i], sizeof(qp_conn_info_t), (char*)&local_conn_info, (char*)&qpinfo->remote_conn_info[i])){ + error_perror("sock_sync_data"); + goto connect_qp_exit; + } + dprintf("connect_qp, after sock_sync_data\n"); + qpinfo->remote_conn_info[i].qp_num = ntohl(qpinfo->remote_conn_info[i].qp_num); + qpinfo->remote_conn_info[i].lid = ntohs(qpinfo->remote_conn_info[i].lid); + + // set rdma address + if(config.use_rdma == 1){ + qpinfo->remote_conn_info[i].addr = ntohll(qpinfo->remote_conn_info[i].addr); + qpinfo->remote_conn_info[i].rkey = ntohl(qpinfo->remote_conn_info[i].rkey); + printf("your rkey=%08x\n", qpinfo->remote_conn_info[i].rkey); + } + } + rc = 0; + +connect_qp_exit: + if(rc) { + int i; + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(qpinfo->sock[i] > 0) { close(qpinfo->sock[i]); } + } + } + return rc; +} + +int init_qp(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_INIT; + attr.port_num = config.ib_port; + attr.pkey_index = 0; + attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE; + if(config.use_rdma) + attr.qp_access_flags |= IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; + + flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +int init_qp_ud(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int ibcom_errno = 0, ib_errno; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_INIT; + attr.port_num = config.ib_port; + attr.pkey_index = 0; + attr.qkey = 0x11111111; + + flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY; + + ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags); + if(ib_errno) { + dprintf("ib_errno=%d\n", ib_errno); + error_perror("ibv_modify_qp"); + ibcom_errno = IBCOMM_ERR_CODE; + goto fn_fail; + } + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int rtr_qp(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = IBV_MTU_2048/*IBV_MTU_2048*//*IBV_MTU_512*/; + attr.ah_attr.dlid = qpinfo->remote_conn_info[0].lid; + attr.ah_attr.port_num = config.ib_port; + attr.dest_qp_num = qpinfo->remote_conn_info[0].qp_num; + attr.rq_psn = 0; + attr.min_rnr_timer = 0x12; + attr.max_dest_rd_atomic = /*0*/1; + + if(config.use_rdma) + attr.max_dest_rd_atomic = 1; + + flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +int rtr_qp_ud(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int ibcom_errno = 0, ib_errno; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTR; + + flags = IBV_QP_STATE; + ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags); + if(ib_errno) { error_perror("ibv_modify_qp"); ibcom_errno = IBCOMM_ERR_CODE; goto fn_fail; } + + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +int rts_qp(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 0x14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = 0; + attr.max_rd_atomic = /*0*/1; // num of outstanding RDMA reads and atomic op allowed + if(config.use_rdma) + attr.max_rd_atomic = 1; + + flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC; + + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +int rts_qp_ud(config_t config, qpinfo_t *qpinfo){ + struct ibv_qp_attr attr; + int flags; + int ibcom_errno = 0, ib_errno; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.sq_psn = 0; + + flags = IBV_QP_STATE | IBV_QP_SQ_PSN; + + ib_errno = ibv_modify_qp(qpinfo->qp, &attr, flags); + if(ib_errno) { error_perror("ibv_modify_qp"); ibcom_errno = IBCOMM_ERR_CODE; goto fn_fail; } + fn_exit: + return ibcom_errno; + fn_fail: + goto fn_exit; +} + +/* modify address vector and dest qpn and reset sq_psn */ +int modify_dest_qp(config_t config, qpinfo_t *qpinfo, qp_conn_info_t* remote_conn_info){ + struct ibv_qp_attr attr; + int flags; + int rc = 0; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.ah_attr.dlid = remote_conn_info->lid; + attr.ah_attr.port_num = config.ib_port; + attr.dest_qp_num = remote_conn_info->qp_num; + attr.sq_psn = 0; + + attr.max_rd_atomic = 0; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.timeout = 0x14; + +#if 0 + flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_DEST_QPN | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC | + IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_TIMEOUT; +#else + flags = IBV_QP_STATE | IBV_QP_AV; +#endif + + if(ibv_modify_qp(qpinfo->qp, &attr, flags)){ + error_perror("ibv_modify_qp"); + rc = IBCOMM_ERR_CODE; + } + return rc; +} + +void print_qp_status(qpinfo_t *qpinfo){ + struct ibv_qp_attr *attr; + struct ibv_qp_init_attr *init_attr; + int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; + int rc; + + attr = malloc(sizeof(struct ibv_qp_attr)); + init_attr = malloc(sizeof(struct ibv_qp_init_attr)); + + rc = ibv_query_qp(qpinfo->qp, attr, flags, init_attr); + if(rc){ + fprintf(stderr, "query qp error\n"); + } + else{ + switch(attr->cur_qp_state){ + case IBV_QPS_RESET: + dprintf("attr=IBV_QPS_RESET\n"); + break; + case IBV_QPS_INIT: + dprintf("attr=IBV_QPS_INIT\n"); + break; + case IBV_QPS_RTR: + dprintf("attr=IBV_QPS_RTR\n"); + break; + case IBV_QPS_RTS: + dprintf("attr=IBV_QPS_RTS\n"); + break; + case IBV_QPS_SQD: + dprintf("attr=IBV_QPS_SQD\n"); + break; + case IBV_QPS_SQE: + dprintf("attr=IBV_QPS_SQE\n"); + break; + case IBV_QPS_ERR: + dprintf("attr=IBV_QPS_ERR\n"); + break; + } + } + free(attr); + free(init_attr); +} + diff --git a/test/rusage/verbs/rdma_imm_chain.c b/test/rusage/verbs/rdma_imm_chain.c new file mode 100755 index 00000000..0aa883a0 --- /dev/null +++ b/test/rusage/verbs/rdma_imm_chain.c @@ -0,0 +1,261 @@ +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +//#define DEBUG_RDMA_WR_WITH_IMM +#ifdef DEBUG_RDMA_WR_WITH_IMM +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define TEST_NRECVBUF 10 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define NTRIAL 60 +#define PPOLLS 10 /* sweet spot is around 10 */ +#define NSKIPS 30 +#define PPOLLR 1 /* sweet spot is around 10 */ +#define NSKIPR 30 + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + + +int debug_counter = 0, debug_counter2 = 0, debug_counter3 = 0, dc = 0; + +int main(int argc, char **argv) { + int ibcom_errno; + config_t config; + int i, j, k, tag = 0, rc = 0; + char sync_res; + unsigned long tscs, tsce; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + mrinfo_t *mrinfo_recv_list = NULL; + + FILE* fp; + fp = popen("cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", "r"); + if(!fp) { printf("popen failed\n"); goto fn_fail; } + char freq_str[256]; + int nread = fread(freq_str, sizeof(char), 256, fp); + if(!nread) { printf("popen failed"); goto fn_fail; } + freq_str[nread] = 0; + //printf("%s", freq_str); + long int freq = strtol(freq_str, NULL, 10) * 1000; + printf("freq=%ld\n", freq); + pclose(fp); + //exit(1); + + if (read_config(&config, argc, argv)) { + goto main_exit; + } + + config.use_rdma = 1; + + if(config.buf_size == 0) { printf("set buf_size"); goto fn_fail; } + + if (resource_create(config, &res) || pd_create(&res, &pdinfo) + || qp_create(&res, &pdinfo, &qpinfo)) { + goto main_exit; + } + + // rdma-write-to ring with 2NTRIAL slots + void *rdma_buf = mmap(0, IBCOM_INLINE_DATA * NCHAIN * NTRIAL, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + /* unmark magic */ + memset(rdma_buf, 0, IBCOM_INLINE_DATA * NCHAIN * NTRIAL); + if(!rdma_buf) { printf("mmap failed\n"); goto fn_fail; } + if(mr_create(&res, &pdinfo, IBCOM_INLINE_DATA * NCHAIN * NTRIAL, rdma_buf, &res.rdma_mr)) { printf("mr_create failed\n"); goto fn_fail; } + +#define NSBUF 1 + // rdma-write-from buffers + loc_mr_list = malloc(sizeof(mrinfo_t) * NSBUF); + for(i = 0; i < NSBUF; i++) { + void *loc_buf = mmap(0, IBCOM_INLINE_DATA * NCHAIN, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(!loc_buf) { printf("mmap failed\n"); goto fn_fail; } + if(config.server_flg) { + for(j = 0; j < IBCOM_INLINE_DATA * NCHAIN; j++) { + *((char*)(loc_buf + j)) = IBCOM_INLINE_DATA * NCHAIN * i + j; + } + *((uint32_t*)(loc_buf + IBCOM_INLINE_DATA * NCHAIN - sizeof(uint32_t))) = IBCOM_MAGIC; + } + if(mr_create(&res, &pdinfo, IBCOM_INLINE_DATA * NCHAIN, loc_buf, &loc_mr_list[i])) { printf("mr_create fail\n"); goto fn_fail; } + } + dprintf("create RDMA buffer successfully..\n"); + + /* Connect qp of each side and init them*/ + if (connect_qp(config, &res, &qpinfo)) { + goto main_exit; + } + dprintf("connect done\n"); + debug_print_qp_conn_info(res, qpinfo, &config); + + /* bring qp up to RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { printf("trs failed\n"); goto fn_fail; } + + /* pre-post receive commands */ + if(!config.server_flg) { +#if 0 + for(i = 0; i < _MAX_RQ_CAPACITY - 16; i++){ + ibcom_errno = ibcom_irecv(&qpinfo, 0); + if(ibcom_errno) { printf("post_recv_req\n"); goto fn_fail; } + } +#endif + } + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + perror("sock_sync_data"); + } + } + + if (config.server_flg) { /* sender side */ + // usleep(1000000); + if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; } + if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; } + + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + +#if 0 + for(j = 0; j < NCHAIN - 1; j++) { + post_send_req4(&qpinfo, &loc_mr_list[i % NSBUF], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], 0, i * NCHAIN + j, IBCOM_INLINE_DATA * j); + } + post_send_req4(&qpinfo, &loc_mr_list[i % NSBUF], IBV_WR_RDMA_WRITE_WITH_IMM, &qpinfo.remote_conn_info[0], i, i * NCHAIN + j, IBCOM_INLINE_DATA * j); +#else + ibcom_isend_chain(&qpinfo, &loc_mr_list[i % NSBUF], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], i, i); +#endif + debug_counter2 += 1; + + //#define POLL_SCQ_PERIODICALLY + +#ifdef POLL_SCQ_PERIODICALLY + if(i % PPOLLS == PPOLLS - 1) { + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLS * NCHAIN]; + result = ibv_poll_cq(qpinfo.scq, PPOLLS * NCHAIN, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status\n"); goto fn_fail; } + } + nfound += result; + debug_couter3 += result; + if(nfound == PPOLLS * NCHAIN) { break; } + } + k++; + } + } +#endif + //printf("%d ", i); + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); +#ifndef POLL_SCQ_PERIODICALLY + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[NTRIAL * NCHAIN]; + result = ibv_poll_cq(qpinfo.scq, NTRIAL * NCHAIN, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status\n"); goto fn_fail; } + } + nfound += result; + debug_counter3 += result; + if(nfound == NTRIAL * NCHAIN) { break; } + } + k++; + } +#endif + dprintf("debug_counter2=%d,%d\n", debug_counter2, debug_counter3); + } else { // receiver side + if(NSKIPR % PPOLLR !=0) { printf("set NSKIP multiple of PPOLL\n"); goto fn_fail; } + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPR) { tscs = rdtsc(); } + + if(i % PPOLLR == PPOLLR - 1) { + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLR]; +#define SKIP_POLL_RCQ +#ifdef SKIP_POLL_RCQ /* if you want to skip poll rcq */ + result = 1; +#else + result = ibv_poll_cq(qpinfo.rcq, PPOLLR, &cqe[0]); + if(result < 0) { printf("poll_cq\n"); goto fn_fail; } +#endif + if(result > 0) { + for(j = 0; j < result; j++) { +#ifndef SKIP_POLL_RCQ + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe.status"); goto fn_fail; } +#endif + volatile uint32_t* magic = (volatile uint32_t*)(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * i + IBCOM_INLINE_DATA * NCHAIN - sizeof(uint32_t)); + while(*magic != IBCOM_MAGIC) { + __asm__ __volatile__ + ( + "movq %0, %%rsi;" + "prefetchnta -0x40(%%rsi);" + "prefetchnta -0x80(%%rsi);" + "prefetchnta -0xc0(%%rsi);" + : + : "r"(magic) + : "%rsi"); + + //__asm__ __volatile__ ("pause;" : : ); + } + //if(cqe[j].imm_data != i) { printf("%d\n", cqe[j].imm_data); } + //print_mem((addr_t)res.rdma_mr.buf + IBCOM_RDMABUF_SZSEG * i * 2, 32); + //printf("%d ", i); + } +#ifdef SKIP_POLL_RCQ + break; +#else + debug_counter += result; + nfound += result; + if(nfound == PPOLLR) { break; } +#endif + } else { + k += 1; + if(k % (1ULL<<26) == (1ULL<<26) - 1) { + dc += 1; + printf("i=%d,dc=%d\n", i, dc); + ibcom_errno = ibcom_irecv(&qpinfo, 0); + if(ibcom_errno) { printf("post_recv_req,dc=%d\n", dc); goto fn_fail; } + } + } + } + } + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_fail: +main_exit: + + if (resource_destroy(&config, &res)) { + dprintf("resource destroy failed\n"); + } + if(loc_mr_list) { free(loc_mr_list); } + + return rc; +} diff --git a/test/rusage/verbs/rdma_rd.c b/test/rusage/verbs/rdma_rd.c new file mode 100755 index 00000000..a06a30b4 --- /dev/null +++ b/test/rusage/verbs/rdma_rd.c @@ -0,0 +1,155 @@ +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +#define DEBUG_RDMA_RD +#ifdef DEBUG_RDMA_RD +#define dprintf(fmt, arg...) { printf("[DEBUG] " fmt, ##arg); } +#else +#define dprintf(fmt,arg...) {} +#endif + +#define TEST_BUF_SIZE 16 +#define TEST_SEND_BUF_NUM 3 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define TEST_REPEAT_TIME 1 + +enum rdma_buf_flg{ + RDMA_BUF_RESET_FLG = 0, + RDMA_BUF_WRITE_FLG = 1, +}; + +static void printm(addr_t addr, int size) { + int i; + printf("print memory[0x%lx]\n", addr); + for(i = 0; i < size; i++){ + printf("%02x ", *(unsigned char *)(addr+i)); + } + printf("\n"); +} + +static void poll_magic(config_t config, void *buf, int tag){ + volatile unsigned short *flg_bit = (unsigned short *)(buf + config.buf_size - TEST_RDMA_FLG_SIZE); + while(*flg_bit != tag+RDMA_BUF_WRITE_FLG); +} +/** + * Alloc all MR buffers from MIC card memory + * + */ +int main(int argc, char **argv) { + config_t config; + int i, j, tag = 0, rc = 0; + char sync_res; + double t0, t1, t; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + int entry; + + if (read_config(&config, argc, argv)) { + goto main_exit; + } + + config.use_rdma = 1; + + if(config.buf_size == 0) { config.buf_size = TEST_BUF_SIZE; } + + if (resource_create(config, &res) || pd_create(&res, &pdinfo) || qp_create(&res, &pdinfo, &qpinfo)) { goto main_exit; } + dprintf("create all successfully..\n"); + + // RDMA-read-from buffer + dprintf("config.buf_size=%d\n", config.buf_size); + void *rdma_buf = calloc(config.buf_size, sizeof(char)); + if(!config.server_flg) { + memset(rdma_buf, '1', config.buf_size); + *(uint16_t*)(rdma_buf + config.buf_size - sizeof(uint16_t)) = RDMA_BUF_WRITE_FLG; + } + if(mr_create(&res, &pdinfo, config.buf_size, rdma_buf, &res.rdma_mr)) { goto main_exit; } + + // RDMA-read-to buffer + loc_mr_list = malloc(sizeof(mrinfo_t) * TEST_SEND_BUF_NUM); + for (i = 0; i < TEST_SEND_BUF_NUM; i++) { + void *loc_buf = calloc(config.buf_size, sizeof(char)); + if(mr_create(&res, &pdinfo, config.buf_size, loc_buf, &loc_mr_list[i])) { goto main_exit; } + } + + /* Connect qp of each side and init them*/ + if(connect_qp(config, &res, &qpinfo)) { goto main_exit; } + dprintf("connect done\n"); + debug_print_qp_conn_info(res, qpinfo, &config); + + /* Modify qp state to RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { goto main_exit; } + dprintf("RTS done\n"); + +#if 1 /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + error_perror("sock_sync_data"); + } + } +#endif + + /* Initiator */ + if (config.server_flg) { + t0 = cur_time(); + for (i = 0; i < TEST_REPEAT_TIME; i++) { + for(j = 0; j < config.nremote; j++) { + + entry = j % TEST_SEND_BUF_NUM; + + // post RDMA_READ + post_send_req(&qpinfo, &loc_mr_list[entry], IBV_WR_RDMA_READ, i, &qpinfo.remote_conn_info[j], 0); + dprintf("post done\n"); + + // wait for completion of command + while(!poll_cq(&qpinfo, SEND_CQ_FLG, &tag) == IBCOMM_ERR_CODE) {} + dprintf("poll_cq done\n"); + + // wait for completion of DMA + poll_magic(config, loc_mr_list[entry].buf, 0); /* magic is 16'h0001 */ + dprintf("poll_magic done\n"); + + dprintf("initiator\n"); + printm((addr_t)loc_mr_list[entry].buf, config.buf_size); + } + } + t1 = cur_time(); + } else { + /* Responder */ + t0 = cur_time(); + for (i = 0; i < TEST_REPEAT_TIME; i++) { + // print buffer data + dprintf("responder\n"); + printm((addr_t)res.rdma_mr.buf, config.buf_size); + } + t1 = cur_time(); + } + +#if 1 /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + error_perror("sock_sync_data"); + } + } +#endif + + t = (t1 - t0) * 1000; + dprintf("%d\t%lf\t%lf\n", config.buf_size, t, t / TEST_REPEAT_TIME); + + main_exit: + /*Can free all resources*/ + if (resource_destroy(&config, &res)) { + dprintf("resource_destroy failed\n"); + } + if(loc_mr_list) { free(loc_mr_list); } + dprintf("bye\n"); + + return rc; +} diff --git a/test/rusage/verbs/rdma_rdv.c b/test/rusage/verbs/rdma_rdv.c new file mode 100755 index 00000000..4850c8bb --- /dev/null +++ b/test/rusage/verbs/rdma_rdv.c @@ -0,0 +1,194 @@ +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +#define DEBUG_RDMA_RD +#ifdef DEBUG_RDMA_RD +#define dprintf(fmt, arg...) { printf("[DEBUG] " fmt, ##arg); } +#else +#define dprintf(fmt,arg...) {} +#endif + +#define TEST_BUF_SIZE 16 +#define TEST_SEND_BUF_NUM 3 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define TEST_REPEAT_TIME 1 + +enum rdma_buf_flg{ + RDMA_BUF_RESET_FLG = 0, + RDMA_BUF_WRITE_FLG = 1, +}; + +static void printm(addr_t addr, int size) { + int i; + printf("print memory[0x%lx]\n", addr); + for(i = 0; i < size; i++){ + printf("%02x ", *(unsigned char *)(addr+i)); + } + printf("\n"); +} + +static void poll_magic(config_t config, void *buf, int tag){ + volatile unsigned short *flg_bit = (unsigned short *)(buf + config.buf_size - TEST_RDMA_FLG_SIZE); + while(*flg_bit != tag+RDMA_BUF_WRITE_FLG); +} +/** + * Alloc all MR buffers from MIC card memory + * + */ +int main(int argc, char **argv) { + config_t config; + int i, j, k; + int tag = 0, rc = 0; + char sync_res; + double t0, t1, t; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + int entry; + + if (read_config(&config, argc, argv)) { + goto main_exit; + } + + config.use_rdma = 1; + + if(config.buf_size == 0) { config.buf_size = TEST_BUF_SIZE; } + + if (resource_create(config, &res) || pd_create(&res, &pdinfo) || qp_create(&res, &pdinfo, &qpinfo)) { goto main_exit; } + dprintf("create all successfully..\n"); + + // RDMA-read-from buffer + dprintf("config.buf_size=%d\n", config.buf_size); + void *rdma_buf = calloc(config.buf_size, sizeof(char)); + if(!config.server_flg) { + for(i = 0; i < config.buf_size; i++) { + *(uint8_t*)(rdma_buf + i) = i; + } + *(uint16_t*)(rdma_buf + config.buf_size - sizeof(uint16_t)) = RDMA_BUF_WRITE_FLG; + } + if(mr_create(&res, &pdinfo, config.buf_size, rdma_buf, &res.rdma_mr)) { goto main_exit; } + + // RDMA-read-to buffer + loc_mr_list = malloc(sizeof(mrinfo_t) * TEST_SEND_BUF_NUM); + for (i = 0; i < TEST_SEND_BUF_NUM; i++) { + void *loc_buf = calloc(config.buf_size, sizeof(char)); + if(mr_create(&res, &pdinfo, config.buf_size, loc_buf, &loc_mr_list[i])) { goto main_exit; } + } + + /* Connect qp of each side and init them*/ + if(connect_qp(config, &res, &qpinfo)) { goto main_exit; } + dprintf("connect done\n"); + debug_print_qp_conn_info(res, qpinfo, &config); + + /* Modify qp state to RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { goto main_exit; } + dprintf("RTS done\n"); + +#if 1 /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + error_perror("sock_sync_data"); + } + } +#endif + + /* Initiator */ + if (config.server_flg) { + t0 = cur_time(); + for (i = 0; i < TEST_REPEAT_TIME; i++) { + for(j = 0; j < config.nremote; j++) { + + entry = j % TEST_SEND_BUF_NUM; + + mrinfo_t *mrinfo = &loc_mr_list[entry]; + struct ibv_send_wr sr, *bad_wr = NULL; + struct ibv_sge sge[2]; + int ret = 0; + + /* Create sge*/ + sge[0].addr = (uintptr_t)mrinfo->buf; + sge[0].length = (mrinfo->buf_size>>2); + sge[0].lkey = mrinfo->mr->lkey; + + sge[1].addr = (uintptr_t)mrinfo->buf + (mrinfo->buf_size>>1) + (mrinfo->buf_size>>2); + sge[1].length = (mrinfo->buf_size>>2); + sge[1].lkey = mrinfo->mr->lkey; + + memset(&sr, 0, sizeof(struct ibv_send_wr)); + sr.next = NULL; + sr.wr_id = 0; + sr.sg_list = sge; + sr.num_sge = 2; + sr.opcode = IBV_WR_RDMA_READ; + sr.imm_data = 0; + sr.send_flags = IBV_SEND_SIGNALED; + + sr.wr.rdma.remote_addr = qpinfo.remote_conn_info[j].addr; + sr.wr.rdma.rkey = qpinfo.remote_conn_info[j].rkey; + + ret = ibv_post_send(qpinfo.qp, &sr, &bad_wr); + if(ret) { perror("ibv_post_send"); goto fn_fail; } + + dprintf("post done\n"); + + // wait for completion of command + while(!poll_cq(&qpinfo, SEND_CQ_FLG, &tag) == IBCOMM_ERR_CODE) {} + dprintf("poll_cq done\n"); + + printm((addr_t)loc_mr_list[entry].buf, config.buf_size); + + // wait for completion of DMA + //poll_magic(config, loc_mr_list[entry].buf, 0); /* magic is 16'h0001 */ + + for(k = 0; k < config.buf_size; k++) { + if(k < (config.buf_size>>2) || + k >= (config.buf_size>>1) + (config.buf_size>>2)) { + if(loc_mr_list[entry].buf[k] != k) { + printf("fail,k=%d,data=%x\n", k, (uint32_t)loc_mr_list[entry].buf[k]); + } + } + } + + dprintf("poll_magic done\n"); + + dprintf("initiator\n"); + } + } + t1 = cur_time(); + } else { + /* Responder */ + t0 = cur_time(); + for (i = 0; i < TEST_REPEAT_TIME; i++) { + // print buffer data + dprintf("responder\n"); + printm((addr_t)res.rdma_mr.buf, config.buf_size); + } + t1 = cur_time(); + } + +#if 1 /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + error_perror("sock_sync_data"); + } + } +#endif + + t = (t1 - t0) * 1000; + dprintf("%d\t%lf\t%lf\n", config.buf_size, t, t / TEST_REPEAT_TIME); + + main_exit: + dprintf("bye\n"); + + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/verbs/rdma_wr.c b/test/rusage/verbs/rdma_wr.c new file mode 100755 index 00000000..f0afcdab --- /dev/null +++ b/test/rusage/verbs/rdma_wr.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +//#define DEBUG_RDMA_WR +#ifdef DEBUG_RDMA_WR +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define TEST_SEND_BUF_NUM 3 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define NTRIAL 1 /* 120 */ +#define PPOLLS 1 /* sweet spot is around 10 */ +#define NSKIPS (PPOLLS*0) +#define PPOLLR 1 /* sweet spot is around 10 */ +#define NSKIPR (PPOLLR*0) + +#define IBCOM_MAGIC 0x55aa55aa + +typedef struct tailmagic_t { + uint32_t magic; +} tailmagic_t; + +enum rdma_buf_flg{ + RDMA_BUF_RESET_FLG = 0, + RDMA_BUF_WRITE_FLG = 1, +}; + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +volatile int k; +int main(int argc, char **argv) { + config_t config; + unsigned long i, j; + int ibcom_errno = 0; + char sync_res; + unsigned long tscs, tsce; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + int entry; + int ibv_errno; + + if (read_config(&config, argc, argv)) { + goto fn_exit; + } + + config.use_rdma = 1; + + unsigned long buf_size; + char* str_env = getenv("BUF_SIZE"); + buf_size = str_env ? atol(str_env) : 4096/*48,1073741824ULL * 1 + 4*/; + + if(buf_size == 0) { printf("set buf_size"); goto fn_fail; } + + if(resource_create(config, &res) || pd_create(&res, &pdinfo)) { printf("qp_create failed\n"); goto fn_fail; } + + ibv_errno = qp_create(&res, &pdinfo, &qpinfo); + IBCOM_ERR_CHKANDJUMP(ibv_errno, -1, printf("qp_create failed\n")); + + /* create MR buffers */ + + // rdma-write-to buffer +#if 1 + void *rdma_buf = mmap(0, buf_size * NTRIAL, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + memset(rdma_buf, 0, buf_size * NTRIAL); +#else + void *rdma_buf = calloc(buf_size * NTRIAL, sizeof(char)); +#endif + if(!rdma_buf) { printf("mmap failed\n"); goto fn_fail; } + if(mr_create(&res, &pdinfo, buf_size * NTRIAL, rdma_buf, &res.rdma_mr)) { printf("mr_create failed\n"); goto fn_fail; } + +#if 0 + // TLB prefetch + for (i = 0; i < NTRIAL; i++) { + if(!config.server_flg) { + *((uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t))) = 0; + } + } +#endif + + // local data buffers + loc_mr_list = calloc(sizeof(mrinfo_t) * NTRIAL, sizeof(char)); + for (i = 0; i < NTRIAL; i++) { + void *loc_buf = mmap(0, buf_size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(loc_buf == MAP_FAILED) { printf("mmap failed\n"); goto fn_fail; } + if(config.server_flg) { + for(j = 0; j < buf_size; j++) { + *((unsigned char*)loc_buf + j) = (char)i; + } + *((uint32_t *)(loc_buf + buf_size - sizeof(uint32_t))) = 0 + IBCOM_MAGIC; + } + + dprintf("magic addr=%lx\n", (unsigned long)(loc_buf + buf_size - TEST_RDMA_FLG_SIZE)); + + if(mr_create(&res, &pdinfo, buf_size, loc_buf, &loc_mr_list[i])) { printf("mr_create failed\n"); goto fn_fail; } + } + + if(!config.server_flg) { dprintf("res->rdma_mr.mr->addr=%lx\n", (unsigned long)res.rdma_mr.mr->addr); } + /* exchange gid, lid, qpn, raddr, rkey */ + if(connect_qp(config, &res, &qpinfo)) { printf("connect_qp failed\n"); goto fn_fail; } + debug_print_qp_conn_info(res, qpinfo, &config); + printf("connect_qp done\n"); fflush(stdout); + + if(config.server_flg) { dprintf("qpinfo->remote_conn_info[0].addr=%lx\n", qpinfo.remote_conn_info[0].addr); } + + /* make qp RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { printf("rts failed\n"); goto fn_fail; } + printf("rts done\n"); fflush(stdout); + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)) { perror("sock_sync_data"); } + } + printf("barrier done\n"); fflush(stdout); + + if(config.server_flg) { /* sender side */ + //usleep(500000); + if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; } + if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; } + + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + + post_send_req2(&qpinfo, &loc_mr_list[0], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], 0, i); + +#if 0 + int nfound = 0; + if(i % PPOLLS == PPOLLS - 1) { + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLS]; + result = ibv_poll_cq(qpinfo.scq, PPOLLS, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status)); goto fn_fail; } + } + //debug_print_mem((addr_t)loc_mr_list[entry].buf, buf_size); + nfound += result; + if(nfound >= PPOLLS) { break; } + } + k++; + } + } +#endif + + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); +#if 1 + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[NTRIAL]; + result = ibv_poll_cq(qpinfo.scq, NTRIAL, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status,%s\n", ibv_wc_status_str(cqe[j].status)); goto fn_fail; } + } + //debug_print_mem((addr_t)loc_mr_list[entry].buf, buf_size); + nfound += result; + if(nfound >= NTRIAL) { break; } + } + k++; + } +#endif + } else { /* receiver side */ + if(NSKIPR % PPOLLR !=0) { printf("set NSKIP multiple of PPOLL\n"); goto fn_fail; } + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPR) { tscs = rdtsc(); } + + // poll on magic + dprintf("res.rdma_mr.buf=%lx\n", (unsigned long)res.rdma_mr.buf); + dprintf("poll addr=%lx\n", (unsigned long)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t))); + //k = 0; + volatile uint32_t *ptr = (volatile uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t)); + while(*ptr != IBCOM_MAGIC) { + //k++; if(i >= NSKIPR && k % 65536 == 65535) { printf("i=%d,poll value=%x\n", i, *((uint32_t *)(rdma_buf + buf_size * i + buf_size - sizeof(uint32_t)))); } + __asm__ __volatile__("pause"); + } + //debug_print_mem((addr_t)res.rdma_mr.buf, buf_size); + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_exit: + /*Can free all resources*/ +#if 0 + if (resource_destroy(&config, &res)) { + fprintf(stderr, "resource destroy failed\n"); + } else { + dprintf("destroy all successfully..\n"); + } + if(loc_mr_list) { free(loc_mr_list); } +#endif + return ibcom_errno; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/verbs/rdma_wr_chain.c b/test/rusage/verbs/rdma_wr_chain.c new file mode 100755 index 00000000..b1085d27 --- /dev/null +++ b/test/rusage/verbs/rdma_wr_chain.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "ibcomm.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +//#define DEBUG_RDMA_WR_WITH_IMM +#ifdef DEBUG_RDMA_WR_WITH_IMM +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define TEST_NRECVBUF 10 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define NTRIAL 30 +#define PPOLLS 10 /* sweet spot is around 10 */ +#define NSKIPS 10 +#define NSKIPR 10 + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + + +int debug_counter = 0, debug_counter2 = 0, debug_counter3 = 0, dc = 0; + +int main(int argc, char **argv) { + int ibcom_errno; + config_t config; + int i, j, k, tag = 0, rc = 0; + char sync_res; + unsigned long tscs, tsce; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + mrinfo_t *mrinfo_recv_list = NULL; + + FILE* fp; + fp = popen("cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", "r"); + if(!fp) { printf("popen failed\n"); goto fn_fail; } + char freq_str[256]; + int nread = fread(freq_str, sizeof(char), 256, fp); + if(!nread) { printf("popen failed"); goto fn_fail; } + freq_str[nread] = 0; + long int freq = strtol(freq_str, NULL, 10) * 1000; + if(freq != 2601000000) { printf("freq=%ld\n", freq); goto fn_fail; } + pclose(fp); + + if(read_config(&config, argc, argv)) { goto fn_fail; } + + config.use_rdma = 1; + + if(config.buf_size == 0) { printf("set buf_size"); goto fn_fail; } + + if (resource_create(config, &res) || pd_create(&res, &pdinfo) || qp_create(&res, &pdinfo, &qpinfo)) { goto main_exit; } + + // rdma-write-to ring with 2NTRIAL slots + //#define SHM +#ifdef SHM +#define MAX2(x,y) ((x)>(y)?(x):(y)) + int shmid = shmget(IPC_PRIVATE, MAX2(2*1024*1024, IBCOM_INLINE_DATA * NCHAIN * NTRIAL), SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if(shmid < 0) { perror("shmget"); goto fn_fail; } + //printf("shmid: 0x%x\n", shmid); + void *rdma_buf = shmat(shmid, 0, 0); + if(rdma_buf == (char *)-1) { + perror("Shared memory attach failure"); + shmctl(shmid, IPC_RMID, NULL); + goto fn_fail; + } +#else + void *rdma_buf = mmap(0, IBCOM_INLINE_DATA * NCHAIN * NTRIAL, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); +#endif + /* unmark magic */ + memset(rdma_buf, 0, IBCOM_INLINE_DATA * NCHAIN * NTRIAL); + if(!rdma_buf) { printf("mmap failed\n"); goto fn_fail; } + if(mr_create(&res, &pdinfo, IBCOM_INLINE_DATA * NCHAIN * NTRIAL, rdma_buf, &res.rdma_mr)) { printf("mr_create failed\n"); goto fn_fail; } + +#define NSBUF 1 + // rdma-write-from buffers + loc_mr_list = malloc(sizeof(mrinfo_t) * NSBUF); + for(i = 0; i < NSBUF; i++) { + void *loc_buf = mmap(0, IBCOM_INLINE_DATA * NCHAIN, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(!loc_buf) { printf("mmap failed\n"); goto fn_fail; } + if(config.server_flg) { + for(j = 0; j < IBCOM_INLINE_DATA * NCHAIN; j++) { + *((char*)(loc_buf + j)) = IBCOM_INLINE_DATA * NCHAIN * i + j; + } + *((uint32_t*)(loc_buf + IBCOM_INLINE_DATA * NCHAIN - sizeof(uint32_t))) = IBCOM_MAGIC; + } + if(mr_create(&res, &pdinfo, IBCOM_INLINE_DATA * NCHAIN, loc_buf, &loc_mr_list[i])) { printf("mr_create fail\n"); goto fn_fail; } + } + dprintf("create RDMA buffer successfully..\n"); + + /* Connect qp of each side and init them*/ + if (connect_qp(config, &res, &qpinfo)) { + goto main_exit; + } + dprintf("connect done\n"); + debug_print_qp_conn_info(res, qpinfo, &config); + + /* bring qp up to RTS */ + if(init_qp(config, &qpinfo) || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { printf("trs failed\n"); goto fn_fail; } + + /* pre-post receive commands */ + if(!config.server_flg) { +#if 0 + for(i = 0; i < _MAX_RQ_CAPACITY - 16; i++){ + ibcom_errno = ibcom_irecv(&qpinfo, 0); + if(ibcom_errno) { printf("post_recv_req\n"); goto fn_fail; } + } +#endif + } + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + perror("sock_sync_data"); + } + } + + if (config.server_flg) { /* sender side */ + // usleep(1000000); +#define TLBPREF_AHEAD 20//20 + int tlb_pref_ahd; + if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; } + if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; } + + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + +#if 0 + for(j = 0; j < NCHAIN - 1; j++) { + post_send_req4(&qpinfo, &loc_mr_list[i % NSBUF], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], 0, i * NCHAIN + j, IBCOM_INLINE_DATA * j); + } + post_send_req4(&qpinfo, &loc_mr_list[i % NSBUF], IBV_WR_RDMA_WRITE_WITH_IMM, &qpinfo.remote_conn_info[0], i, i * NCHAIN + j, IBCOM_INLINE_DATA * j); +#else + + ibcom_isend_chain(&qpinfo, &loc_mr_list[i % NSBUF], IBV_WR_RDMA_WRITE, &qpinfo.remote_conn_info[0], i, i); + +#endif +#if 1 + tlb_pref_ahd = 4096 * TLBPREF_AHEAD; + __asm__ __volatile__ + ("movq %0, %%rsi;" + "movq 0(%%rsi), %%rax;" + : : "r"((uint64_t)loc_mr_list[i % NSBUF].buf + tlb_pref_ahd) : "%rsi", "%rax"); +#endif +#if 1 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "prefetchnta 0x00(%%rsi);" + "prefetchnta 0x40(%%rsi);" + "prefetchnta 0x80(%%rsi);" + "prefetchnta 0xc0(%%rsi);" + : : "r"((uint64_t)loc_mr_list[(i+4) % NSBUF].buf) : "%rsi"); +#endif + + //#define POLL_SCQ_PERIODICALLY + +#ifdef POLL_SCQ_PERIODICALLY + if(i % PPOLLS == PPOLLS - 1) { + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLS * NCHAIN]; + result = ibv_poll_cq(qpinfo.scq, PPOLLS * NCHAIN, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status\n"); goto fn_fail; } + } + nfound += result; + debug_couter3 += result; + if(nfound == PPOLLS * NCHAIN) { break; } + } + k++; + } + } +#endif + //printf("%d ", i); + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); +#ifndef POLL_SCQ_PERIODICALLY + int nfound = 0; + k = 0; + while(1) { + int result; + struct ibv_wc cqe[NTRIAL * NCHAIN]; + result = ibv_poll_cq(qpinfo.scq, NTRIAL * NCHAIN, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status\n"); goto fn_fail; } + } + nfound += result; + debug_counter3 += result; + if(nfound == NTRIAL * NCHAIN) { break; } + } + k++; + } +#endif + dprintf("debug_counter=%d\n", debug_counter3); + } else { // receiver side + int tlb_pref_ahd; +#define TLB_PREF_AMT_AHEAD 20//20 +#define PREF_AHEAD 10 +#if 0 + for(j = 0; j < PREF_AHEAD; j++) { + __asm__ __volatile__ + ("movq %0, %%rsi;" + "prefetchnta 0x00(%%rsi);" + "prefetchnta 0x40(%%rsi);" + "prefetchnta 0x80(%%rsi);" + "prefetchnta 0xc0(%%rsi);" + : : "r"(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * (j)) : "%rsi"); + } +#endif + +#if 1 + for(tlb_pref_ahd = 0; tlb_pref_ahd < 4096*TLB_PREF_AMT_AHEAD; tlb_pref_ahd += 4096) { + __asm__ __volatile__ + ("movq %0, %%rsi;" + "movq 4096(%%rsi), %%rax;" + : : "r"(rdma_buf + tlb_pref_ahd) : "%rsi", "%rax"); + } +#endif + for(i = 0; i < NTRIAL; i++) { + if(i == NSKIPR) { tscs = rdtsc(); } + + volatile uint32_t* magic = (volatile uint32_t*)(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * i + IBCOM_INLINE_DATA * NCHAIN - sizeof(uint32_t)); + + k = 4096*5; + void* tlb_pref_ptr = (void*)magic + 4096*TLB_PREF_AMT_AHEAD; +#if 1 + tlb_pref_ahd = (uint64_t)magic + 4096*TLB_PREF_AMT_AHEAD - (uint64_t)rdma_buf; +#else + tlb_pref_ahd = 4096*TLB_PREF_AMT_AHEAD; +#endif + //tlb_pref_ahd = tlb_pref_ahd % (IBCOM_INLINE_DATA * NCHAIN * NTRIAL); + + while(*magic != IBCOM_MAGIC) { +#if 1 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "prefetchnta 0x00(%%rsi);" + "prefetchnta 0x40(%%rsi);" + "prefetchnta 0x80(%%rsi);" + "prefetchnta 0xc0(%%rsi);" + : : "r"(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * (i+1)) : "%rsi"); +#endif +#if 1 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "prefetchnta 0x00(%%rsi);" + "prefetchnta 0x40(%%rsi);" + "prefetchnta 0x80(%%rsi);" + "prefetchnta 0xc0(%%rsi);" + : : "r"(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * (i+2)) : "%rsi"); +#endif +#if 0 + + __asm__ __volatile__ + ("movq %0, %%rsi;" + "prefetchnta 0x00(%%rsi);" + "prefetchnta 0x40(%%rsi);" + "prefetchnta 0x80(%%rsi);" + "prefetchnta 0xc0(%%rsi);" + : : "r"(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * (i+PREF_AHEAD)) : "%rsi"); +#endif +#if 0 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "prefetchnta 0x00(%%rsi);" + "prefetchnta 0x40(%%rsi);" + "prefetchnta 0x80(%%rsi);" + "prefetchnta 0xc0(%%rsi);" + : : "r"(res.rdma_mr.buf + IBCOM_INLINE_DATA * NCHAIN * (i+32)) : "%rsi"); +#endif +#if 0 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "movq 0(%%rsi), %%rax;" + : : "r"(magic+k) : "%rsi", "%rax"); + //k += 4096; +#endif +#if 0 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "movq 0(%%rsi), %%rax;" + : : "r"(tlb_pref_ptr) : "%rsi", "%rax"); +#endif +#if 1 + __asm__ __volatile__ + ("movq %0, %%rsi;" + "movq 0(%%rsi), %%rax;" + : : "r"(rdma_buf + tlb_pref_ahd) : "%rsi", "%rax"); + tlb_pref_ahd = (tlb_pref_ahd + 4096*20) % (IBCOM_INLINE_DATA * NCHAIN * NTRIAL); +#endif + } + //print_mem((addr_t)res.rdma_mr.buf + IBCOM_RDMABUF_SZSEG * i * 2, 32); + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_fail: +main_exit: +#ifdef SHM + shmctl(shmid, IPC_RMID, NULL); +#endif +#if 0 + if (resource_destroy(&config, &res)) { + dprintf("resource destroy failed\n"); + } + if(loc_mr_list) { free(loc_mr_list); } +#endif + return rc; +} diff --git a/test/rusage/verbs/rdma_wr_with_imm_modify_qp.c b/test/rusage/verbs/rdma_wr_with_imm_modify_qp.c new file mode 100755 index 00000000..d8e00141 --- /dev/null +++ b/test/rusage/verbs/rdma_wr_with_imm_modify_qp.c @@ -0,0 +1,228 @@ +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mcons.h" +#include "mm_ib_test.h" + +//#define DEBUG_RDMA_WR_WITH_IMM +#ifdef DEBUG_RDMA_WR_WITH_IMM +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define TEST_NRECVBUF 10 +#define TEST_NSENDBUF 10 +#define TEST_SZBUF 10 +#define TEST_RDMA_FLG_SIZE (sizeof(unsigned short)) +#define NTRIAL 120 +#define PPOLLS 2 /* sweet spot is around 10 */ +#define NSKIPS (PPOLLS*1) +#define PPOLLR 60 /* sweet spot is around 10 */ +#define NSKIPR (PPOLLR*1) + +enum rdma_buf_flg{ + RDMA_BUF_RESET_FLG = 0, + RDMA_BUF_WRITE_FLG = 1, +}; + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +static void set_written_flg(config_t config, void *buf, int tag){ + *(unsigned short *)(buf + config.buf_size - TEST_RDMA_FLG_SIZE) = tag+RDMA_BUF_WRITE_FLG; +} + +/** + * Alloc all MR buffers from MIC card memory + * + */ +int main(int argc, char **argv) { + config_t config; + int i, j, k, tag = 0, rc = 0; + char sync_res; + unsigned long tscs, tsce; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *loc_mr_list = NULL; + int entry, wait_tag; + mrinfo_t *mrinfo_recv_list = NULL; + int mr_idx = 0; + + FILE* fp; + fp = popen("cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", "r"); + if(!fp) { printf("popen failed\n"); goto fn_fail; } + char freq_str[256]; + int nread = fread(freq_str, sizeof(char), 256, fp); + if(!nread) { printf("popen failed"); goto fn_fail; } + freq_str[nread] = 0; + //printf("%s", freq_str); + long int freq = strtol(freq_str, NULL, 10) * 1000; + printf("freq=%ld\n", freq); + pclose(fp); + //exit(1); + + if (read_config(&config, argc, argv)) { + goto main_exit; + } + + config.use_rdma = 1; + + if(config.buf_size == 0) + config.buf_size = TEST_SZBUF; + + if (resource_create(config, &res) || pd_create(&res, &pdinfo) + || qp_create(&res, &pdinfo, &qpinfo)) { + goto main_exit; + } + debug_printf("create all successfully..\n"); + + /* create MR buffers */ + int buf_total_size = config.buf_size; + + // rdma r/w buffer + // add rdma flag size + void *rdma_buf = calloc(buf_total_size, sizeof(char)); + memset(rdma_buf, 0, buf_total_size); + if (mr_create(&res, &pdinfo, buf_total_size, rdma_buf, &res.rdma_mr)) + goto main_exit; + + // local data buffers + loc_mr_list = malloc(sizeof(mrinfo_t) * TEST_NSENDBUF); + for (i = 0; i < TEST_NSENDBUF; i++) { + void *loc_buf = calloc(buf_total_size, sizeof(char)); + if (config.server_flg) { + memset(loc_buf, 's'+ i, config.buf_size); + } else { + memset(loc_buf, 'c'+ i, config.buf_size); + } + set_written_flg(config, loc_buf, 0); /* magic is 16'h0001 */ + + if (mr_create(&res, &pdinfo, buf_total_size, loc_buf, &loc_mr_list[i])) + goto main_exit; + } + dprintf("create RDMA buffer successfully..\n"); + + /* Connect qp of each side and init them*/ + if (connect_qp(config, &res, &qpinfo)) { + goto main_exit; + } + dprintf("connect done\n"); + debug_print_qp_conn_info(res, qpinfo, &config); + + /* Register fixed recv buffers */ + mrinfo_recv_list = malloc(sizeof(mrinfo_t) * TEST_NRECVBUF); + for (i = 0; i < TEST_NRECVBUF; i++) { + char *buf = calloc(config.buf_size, sizeof(char)); + if(buf == NULL) { + fprintf(stderr, "cannot malloc %dth buf\n", i); + goto main_exit; + } + + if (mr_create(&res, &pdinfo, config.buf_size, buf, &mrinfo_recv_list[i])) { + goto main_exit; + } + } + + /* Modify qp state to RTS */ + if (init_qp(config, &qpinfo) + || rtr_qp(config, &qpinfo) || rts_qp(config, &qpinfo)) { + goto main_exit; + } + debug_printf("RTS done\n"); + +#if 1 + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + if(sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res)){ + error_perror("sock_sync_data"); + } + } +#endif + + if (config.server_flg) { /* sender side */ + if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; } + if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; } + + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + for(j = 0; j < config.nremote; j++) { + + entry = j % TEST_NSENDBUF; + + post_send_req(&qpinfo, &loc_mr_list[entry], IBV_WR_RDMA_WRITE_WITH_IMM, 0, &qpinfo.remote_conn_info[j], 100+0); + + + int nfound = 0; + if(i % PPOLLS == PPOLLS - 1) { + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLS]; + result = ibv_poll_cq(qpinfo.scq, PPOLLS, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status\n"); goto fn_fail; } + } + //debug_print_mem((addr_t)loc_mr_list[entry].buf, config.buf_size); + nfound += result; + if(nfound == PPOLLS) { break; } + } + k++; + } + } + } + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); + } else { // receiver side + if(NSKIPR % PPOLLR !=0) { printf("set NSKIP multiple of PPOLL\n"); goto fn_fail; } + for (i = 0; i < NTRIAL; i++) { + if(i == NSKIPR) { tscs = rdtsc(); } + wait_tag = i % TEST_NSENDBUF; + + post_recv_req(&qpinfo, &mrinfo_recv_list[i%TEST_NSENDBUF], i%TEST_NSENDBUF); + int nfound = 0; + if(i % PPOLLR == PPOLLR - 1) { + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLR]; + result = ibv_poll_cq(qpinfo.rcq, 1, &cqe[0]); + if(result < 0) { printf("poll_cq\n"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe.status"); goto fn_fail; } + } + + //debug_print_mem((addr_t)res.rdma_mr.buf, config.buf_size); + nfound += result; + if(nfound == PPOLLR) { break; } + } + k++; + } + } + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_fail: +main_exit: + + if (resource_destroy(&config, &res)) { + dprintf("resource destroy failed\n"); + } + if(loc_mr_list) { free(loc_mr_list); } + + return rc; +} diff --git a/test/rusage/verbs/read_config.c b/test/rusage/verbs/read_config.c new file mode 100755 index 00000000..c17daa9c --- /dev/null +++ b/test/rusage/verbs/read_config.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include "ibcomm.h" +/* + int read_config(config_t *config, int argc, char **argv){ + memset(config, 0, sizeof(config_t)); + config->server_name = NULL; + config->ib_port = 1; + config->dev_name = NULL; + + // client mode + if(argc == 4){ + config->server_name = argv[1]; + config->tcp_port = strtoul(argv[2], NULL, 0); + config->buf_size = strtoul(argv[3], NULL, 0); + } + // server mode + else if(argc == 3){ + config->tcp_port = strtoul(argv[1], NULL, 0); + config->buf_size = strtoul(argv[2], NULL, 0); + config->server_flg = 1; + } + else{ + printf("usage: ./main \n"); + return IBCOMM_ERR_CODE; + } + + if(config->tcp_port <=0 ) + return IBCOMM_ERR_CODE; + + return 0; + } + + */ + +int read_config(config_t *config, int argc, char **argv) { + memset(config, 0, sizeof(config_t)); + config->server_name = NULL; + config->ib_port = 1; + config->dev_name = NULL; + config->server_flg = 1; + config->nremote = 1; + config->buf_size = 40 + 8; /* UD requires more than 40 byte */ + config->tcp_port = 5256; + + while (1) { + int oc = getopt(argc, argv, "s:p:m:n:h"); + if (oc == -1) + break; + switch (oc) { + case 's': /* name for IP for exchanging LID and QPN */ + config->server_name = optarg; + config->server_flg = 0; + break; + case 'p': /* TCP port for exchange LID and QPN */ + config->tcp_port = atoi(optarg); + break; + case 'm': + config->buf_size = atoi(optarg); + break; + case 'n': /* number of remote nodes */ + config->nremote = atoi(optarg); + break; + case 'h': + default: + printf("usage: ./main [-s ] [-p ] [-m ]\n" + "Example: ssh cn01 ./main -p 10000 & ./main -s cn01 -p 10000\n"); + exit(-1); + break; + } + } + + // if (config->tcp_port <= 0) { return IBCOMM_ERR_CODE; } + // no need to set tcp_port for IB + + return 0; +} diff --git a/test/rusage/verbs/reg_mr.c b/test/rusage/verbs/reg_mr.c new file mode 100644 index 00000000..d35397ca --- /dev/null +++ b/test/rusage/verbs/reg_mr.c @@ -0,0 +1,153 @@ +#include +#include +#include +#include +#include + +#include // mmap + +#define BUF_SIZE (32)/*(1024)*/ +#define LOOP_NUM (1000) + +#define __USE_MALLOC__ + +static inline uint64_t rdtsc_light(void ) +{ + uint64_t x; + __asm__ __volatile__("rdtscp;" /* rdtscp don't jump over earlier instructions */ + "shl $32, %%rdx;" + "or %%rdx, %%rax" : + "=a"(x) : + : + "%rcx", "%rdx", "memory"); + return x; +} + +/** + * ibv_reg_mr test + * + */ +int main(int argc, char **argv) +{ + int i, end; + int dev_num; + struct ibv_device **dev_list = NULL; + struct ibv_context *ib_ctx = NULL; + struct ibv_pd *pd = NULL; + + struct ibv_mr *mr[LOOP_NUM] = {NULL}; + int mr_flags; + char *buf[LOOP_NUM]; + int buf_size; + + unsigned long long t1, t2, t3; + + dev_list = ibv_get_device_list(&dev_num); + if (dev_list == NULL) { + perror("ibv_get_device_list"); + goto exit; + } + if (!dev_num) { + printf("no device are found\n"); + goto exit; + } + + printf("dev_num = %d, dev_name = %s\n", dev_num, ibv_get_device_name(dev_list[0])); + + ib_ctx = ibv_open_device(dev_list[0]); + if (!ib_ctx) { + perror("ibv_open_device"); + goto exit; + } + + pd = ibv_alloc_pd(ib_ctx); + if(!pd){ + perror("ibv_alloc_pd"); + goto exit; + } + + mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; + buf_size = BUF_SIZE; + + for (i = 0; i < LOOP_NUM; i++) { +#ifdef __USE_MALLOC__ + buf[i] = (char *)malloc(buf_size); + if (!buf[i]) { + perror("malloc"); + end = i + 1; + goto exit; + } +#else + buf[i] = mmap(0, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf[i] == MAP_FAILED) { + perror("mmap"); + end = i + 1; + goto exit; + } +#endif + } + + end = i; + + t1 = rdtsc_light(); + for (i = 0; i < end; i++) { + mr[i] = ibv_reg_mr(pd, buf[i], buf_size, mr_flags); + if (!mr[i]) { + perror("ibv_reg_mr"); + goto exit; + } + } + t2 = rdtsc_light(); + +exit: + for (i = 0; i < end;i ++) { + if (mr[i]) { + ibv_dereg_mr(mr[i]); + } + } + t3 = rdtsc_light(); + + FILE* fp; + fp = popen("cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", "r"); + if(!fp) { printf("popen failed\n"); goto fn_fail; } + char freq_str[256]; + int nread = fread(freq_str, sizeof(char), 256, fp); + if(!nread) { printf("popen failed"); goto fn_fail; } + freq_str[nread] = 0; + long int freq = strtol(freq_str, NULL, 10) * 1000; + printf("CPU frequency:%ld\n", freq); + pclose(fp); + + printf("%d byte x %d\n", BUF_SIZE, end); + printf(" reg_mr time=%llu (%f msec)\n", t2 - t1, (t2 - t1) * (1 / (double)freq) * 1000); + printf(" dereg_mr time=%llu (%f msec)\n", t3 - t2, (t3 - t2) * (1 / (double)freq) * 1000); + + for (i = 0; i < end;i ++) { +#ifdef __USE_MALLOC__ + if (buf[i]) { + free(buf[i]); + } +#else + if (buf[i]) { + munmap(buf[i], buf_size); + } +#endif + } + + if (pd) { + ibv_dealloc_pd(pd); + } + + if (ib_ctx) { + ibv_close_device(ib_ctx); + } + + if (dev_list) { + ibv_free_device_list(dev_list); + } + + fn_exit: + return 0; + fn_fail: + goto fn_exit; +} diff --git a/test/rusage/verbs/resource.c b/test/rusage/verbs/resource.c new file mode 100755 index 00000000..5423604a --- /dev/null +++ b/test/rusage/verbs/resource.c @@ -0,0 +1,390 @@ +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "list.h" +#include + +//#define DEBUG_RESOURCE +#ifdef DEBUG_RESOURCE +#define dprintf printf +#else +#define dprintf(...) +#endif + +int resource_create(config_t config, resource_t *res){ + struct ibv_device **dev_list = NULL, *ib_dev = NULL; + int dev_num; + int i, rc = IBCOMM_ERR_CODE; + + /*Init*/ + memset(res, 0, sizeof(resource_t)); + res->pdinfo_list = malloc(sizeof(list_t)); + res->qpinfo_list = malloc(sizeof(list_t)); + res->mrinfo_list = malloc(sizeof(list_t)); + + res->ib_ctx = NULL; + res->port_attr = NULL; + + /*Get IB device list*/ + dev_list = ibv_get_device_list(&dev_num); + printf("resource_create,dev_num=%d\n", dev_num); + ERR_CHKANDJUMP(!dev_list, -1, error_perror("ibv_get_device_list")); + if(!dev_num){ + error_printf("no devices are found\n"); + goto resource_create_exit; + } + /*Find requested HCA*/ + if(!config.dev_name) { + config.dev_name = strdup(ibv_get_device_name(dev_list[0])); + } + printf("Trying to open device %s\n", config.dev_name); + for(i=0; i< dev_num; i++){ + if(!strcmp(ibv_get_device_name(dev_list[i]), config.dev_name)){ + ib_dev = dev_list[i]; + break; + } + } + if(ib_dev == NULL){ + error_printf("no devices are found\n"); + goto resource_create_exit; + } + /*Open HCA*/ + res->ib_ctx = ibv_open_device(ib_dev); + if(!res->ib_ctx){ + error_perror("resource_create,ibv_open_device"); + goto resource_create_exit; + } + + struct ibv_device_attr device_attr; + int ib_errno; + ib_errno = ibv_query_device(res->ib_ctx, &device_attr); + if(ib_errno) { printf("ibv_query_device failed\n"); goto resource_create_exit; } + printf("atomic_cap=%08x\n", device_attr.atomic_cap); + printf("max_qp_rd_atom=%08x\n", device_attr.max_qp_rd_atom); + printf("max_ee_rd_atom=%08x\n", device_attr.max_ee_rd_atom); + printf("max_res_rd_atom=%08x\n", device_attr.max_res_rd_atom); + printf("max_qp_init_rd_atom=%08x\n", device_attr.max_qp_init_rd_atom); + printf("max_ee_init_rd_atom=%08x\n", device_attr.max_ee_init_rd_atom); + + /*Query Port Attr*/ + res->port_attr = malloc(sizeof(struct ibv_port_attr)); + memset(res->port_attr, 0 , sizeof(struct ibv_port_attr)); + if(ibv_query_port(res->ib_ctx, config.ib_port, res->port_attr)){ + error_perror("ibv_query_port"); + goto resource_create_exit; + } + printf("res->port_attr.max_msg_sz=%d\n", res->port_attr->max_msg_sz); + rc = 0; + + fn_exit: + return rc; + fn_fail: + resource_create_exit: + /*if error, destroy HCA handle*/ + if(rc){ + if(res->ib_ctx){ + ibv_close_device(res->ib_ctx); + res->ib_ctx = NULL; + } + if(res->port_attr){ + free(res->port_attr); + } + res = NULL; + } + // free other + ib_dev = NULL; + if(dev_list){ + ibv_free_device_list(dev_list); + dev_list = NULL; + } + goto fn_exit; +} + +int pd_create(resource_t *res, pdinfo_t *pdinfo){ + int rc = IBCOMM_ERR_CODE; + + /*Init*/ + memset(pdinfo, 0, sizeof(pdinfo_t)); + pdinfo->pd = NULL; + + /*Alloc on HCA handle*/ + pdinfo->pd = ibv_alloc_pd(res->ib_ctx); + if(pdinfo->pd == NULL){ + error_perror("ibv_alloc_pd"); + goto pd_create_exit; + } + + /*Register to res*/ + list_add(res->pdinfo_list, pdinfo); + rc = 0; + + pd_create_exit: + if(rc) + pdinfo = NULL; + + return rc; +} + +int qp_create(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo){ + struct ibv_qp_init_attr qp_init_attr; + int rc = IBCOMM_ERR_CODE; + int ibv_errno; + + /*Init*/ + memset(qpinfo, 0, sizeof(qpinfo_t)); + int i; + for(i = 0; i < NREMOTE; i++) { + qpinfo->sock[i] = -1; // not connected + } + qpinfo->sr_num = 0; + qpinfo->rr_num = 0; + + /*Create cq*/ + qpinfo->scq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + qpinfo->rcq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + if(!qpinfo->scq || !qpinfo->rcq){ + error_perror("qp_create,ibv_create_cq"); + goto qp_create_exit; + } + + /*Create qp*/ + memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); + qp_init_attr.qp_type = IBV_QPT_RC; + qp_init_attr.sq_sig_all = 1; + qp_init_attr.send_cq = qpinfo->scq; + qp_init_attr.recv_cq = qpinfo->rcq; + // max SR/RR num in SQ/RQ + qp_init_attr.cap.max_send_wr = _MAX_SQ_CAPACITY; + qp_init_attr.cap.max_recv_wr = _MAX_RQ_CAPACITY; + // max SGE num + qp_init_attr.cap.max_send_sge = _MAX_SGE_CAPACITY; + qp_init_attr.cap.max_recv_sge = _MAX_SGE_CAPACITY; + qp_init_attr.cap.max_inline_data = IBCOM_INLINE_DATA; +#if 0 + ibv_errno = show_resident(0); +#endif + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + if(qpinfo->qp == NULL){ + error_perror("ibv_create_qp"); + goto qp_create_exit; + } +#if 0 + ibv_errno = show_resident(1); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(2); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(3); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(4); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(5); + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + ibv_errno = show_resident(6); +#endif + + qpinfo->max_inline_data = qp_init_attr.cap.max_inline_data; + printf("max_send_wr=%d,max_recv_wr=%d,inline_data=%d,max_send_sge=%d,max_recv_sge=%d\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_inline_data, qp_init_attr.cap.max_send_sge, qp_init_attr.cap.max_recv_sge); + + /*Register to res*/ + list_add(res->qpinfo_list, qpinfo); + rc = 0; + + qp_create_exit: + if(rc){ + if(qpinfo->scq){ + ibv_destroy_cq(qpinfo->scq); + qpinfo->scq = NULL; + } + if(qpinfo->rcq){ + ibv_destroy_cq(qpinfo->rcq); + qpinfo->rcq = NULL; + } + if(qpinfo->qp){ + ibv_destroy_qp(qpinfo->qp); + qpinfo->qp = NULL; + } + qpinfo = NULL; + } + fn_exit: + return rc; + fn_fail: + goto fn_exit; +} + +int qp_create_ud(resource_t *res, pdinfo_t *pdinfo, qpinfo_t *qpinfo){ + struct ibv_qp_init_attr qp_init_attr; + int rc = IBCOMM_ERR_CODE; + int ibv_errno; + + /*Init*/ + memset(qpinfo, 0, sizeof(qpinfo_t)); + int i; + for(i = 0; i < NREMOTE; i++) { + qpinfo->sock[i] = -1; // not connected + } + qpinfo->sr_num = 0; + qpinfo->rr_num = 0; + + /*Create cq*/ + qpinfo->scq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + qpinfo->rcq = ibv_create_cq(res->ib_ctx, _MAX_CQ_CAPACITY, NULL, NULL, 0); + if(!qpinfo->scq || !qpinfo->rcq){ + error_perror("ibv_create_cq"); + goto qp_create_exit; + } + + /*Create qp*/ + memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); + qp_init_attr.qp_type = IBV_QPT_UD; + //qp_init_attr.sq_sig_all = 1; + qp_init_attr.send_cq = qpinfo->scq; + qp_init_attr.recv_cq = qpinfo->rcq; + // max SR/RR num in SQ/RQ + qp_init_attr.cap.max_send_wr = _MAX_SQ_CAPACITY; + qp_init_attr.cap.max_recv_wr = _MAX_RQ_CAPACITY; + // max SGE num + qp_init_attr.cap.max_send_sge = _MAX_SGE_CAPACITY; + qp_init_attr.cap.max_recv_sge = _MAX_SGE_CAPACITY; + + qpinfo->qp = ibv_create_qp(pdinfo->pd, &qp_init_attr); + if(qpinfo->qp == NULL){ + error_perror("ibv_create_qp"); + goto qp_create_exit; + } + qpinfo->max_inline_data = qp_init_attr.cap.max_inline_data; + printf("max_send_wr=%d,max_recv_wr=%d,max_send_sge=%d,max_recv_sge=%d,\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_send_sge, qp_init_attr.cap.max_recv_sge); + + /*Register to res*/ + list_add(res->qpinfo_list, qpinfo); + rc = 0; + + qp_create_exit: + if(rc){ + if(qpinfo->scq){ + ibv_destroy_cq(qpinfo->scq); + qpinfo->scq = NULL; + } + if(qpinfo->rcq){ + ibv_destroy_cq(qpinfo->rcq); + qpinfo->rcq = NULL; + } + if(qpinfo->qp){ + ibv_destroy_qp(qpinfo->qp); + qpinfo->qp = NULL; + } + qpinfo = NULL; + } + return rc; +} + +int mr_create(resource_t *res, pdinfo_t *pdinfo, int buf_size, char *buf, mrinfo_t *mrinfo) { + int mr_flags; + int rc = IBCOMM_ERR_CODE; + + /*Init*/ + memset(mrinfo, 0, sizeof(mrinfo_t)); + mrinfo->buf = buf; + mrinfo->buf_size = buf_size; + dprintf("mr_create,mrinfo->buf=%lx\n", (unsigned long)mrinfo->buf); + + /*Create mr*/ + mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; + mrinfo->mr = ibv_reg_mr(pdinfo->pd, buf, buf_size, mr_flags); + if(mrinfo->mr == NULL){ + perror("ibv_reg_mr"); + goto mr_create_exit; + } + + /*Register to res*/ + list_add(res->mrinfo_list, mrinfo); + rc = 0; + + mr_create_exit: + if(rc) { + if(mrinfo->mr) { ibv_dereg_mr(mrinfo->mr); } + if(mrinfo) { mrinfo = NULL; } + } + return rc; +} + +int resource_destroy(config_t *config, resource_t *res){ + int rc = 0; + + //config.dev_name + if(config->dev_name){ + free(config->dev_name); + } + // qp + qpinfo_t *qpinfo = NULL; + while((qpinfo = (qpinfo_t *)list_pop(res->qpinfo_list)) != NULL){ + // qp + if(qpinfo->qp && ibv_destroy_qp(qpinfo->qp)){ + error_perror("ibv_destroy_qp"); + rc = IBCOMM_ERR_CODE; + } + qpinfo->qp = NULL; + // scq + if(qpinfo->scq && ibv_destroy_cq(qpinfo->scq)){ + error_perror("ibv_destroy_cq"); + rc = IBCOMM_ERR_CODE; + } + qpinfo->scq = NULL; + // rcq + if(qpinfo->rcq && ibv_destroy_cq(qpinfo->rcq)){ + error_perror("ibv_destroy_cq"); + rc = IBCOMM_ERR_CODE; + } + qpinfo->rcq = NULL; + // sock + int i; + for(i = 0; i < (config->server_flg ? config->nremote : 1); i++) { + if(qpinfo->sock[i] >= 0 && close(qpinfo->sock[i])){ + error_perror("close"); + rc = IBCOMM_ERR_CODE; + } + } + qpinfo = NULL; + } + + // mr + mrinfo_t *mrinfo = NULL; + while ((mrinfo = (mrinfo_t *) list_pop(res->mrinfo_list)) != NULL) { + if (mrinfo->mr && ibv_dereg_mr(mrinfo->mr)) { + error_perror("ibv_dereg_mr"); + rc = IBCOMM_ERR_CODE; + } + mrinfo->mr = NULL; + if (mrinfo->buf) { + if (config->pci_buf_flg) { + //aal_host_mem_free(mrinfo->buf); + } else { + munmap(mrinfo->buf, mrinfo->buf_size); + } + } + mrinfo = NULL; + } + // pd + pdinfo_t *pdinfo = NULL; + while((pdinfo = (pdinfo_t *)list_pop(res->pdinfo_list)) != NULL){ + if(pdinfo->pd && ibv_dealloc_pd(pdinfo->pd)){ + error_perror("ibv_dealloc_pd"); + rc = IBCOMM_ERR_CODE; + } + pdinfo = NULL; + } + + if (res->ib_ctx && ibv_close_device(res->ib_ctx)) { + error_perror("ibv_close_device"); + rc = IBCOMM_ERR_CODE; + } + if(res->port_attr){ + free(res->port_attr); + } + res = NULL; + + return rc; +} + diff --git a/test/rusage/verbs/send.c b/test/rusage/verbs/send.c new file mode 100755 index 00000000..60d1672d --- /dev/null +++ b/test/rusage/verbs/send.c @@ -0,0 +1,148 @@ +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mm_ib_test.h" + +#define SERVER_BUF_NUM TEST_SERVER_BUF_NUM +#define REPEAT_TIME 1 + + +/** + * Alloc all buffers from host memory + * + */ +int main(int argc, char **argv){ + config_t config; + int i, mr_idx = 0, rc = 0; + char sync_res; + double t0, t1, t; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *mrinfo_send_list = NULL, *mrinfo_recv_list = NULL; + + debug_printf("enter\n"); + + if(read_config(&config, argc, argv)){ + return rc; + } + + debug_printf("after read_config..\n"); + + if(resource_create(config, &res) || + pd_create(&res, &pdinfo) || + qp_create(&res, &pdinfo, &qpinfo)){ + goto main_exit; + } + debug_printf("create all successfully..\n"); + + /* Connect qp of each side and init them*/ + if(connect_qp(config, &res, &qpinfo)){ + goto main_exit; + } + debug_print_qp_conn_info(res, qpinfo, &config); + + if(init_qp(config, &qpinfo)){ + goto main_exit; + } + + debug_printf("buf_size=%d\n", config.buf_size); + + /* Register fixed recv buffers */ + mrinfo_recv_list = malloc(sizeof(mrinfo_t) * SERVER_BUF_NUM); + for (i = 0; i < SERVER_BUF_NUM; i++) { + char *buf = calloc(config.buf_size, sizeof(char)); + if (buf == NULL) { + fprintf(stderr, "cannot malloc %dth buf\n", i); + goto main_exit; + } + + if (mr_create(&res, &pdinfo, config.buf_size, buf, &mrinfo_recv_list[i])) { + goto main_exit; + } + + // post_recv_req(&qpinfo, &mrinfo_recv_list[i], i); + } + + mrinfo_send_list = malloc(sizeof(mrinfo_t) * REPEAT_TIME); + for (i = 0; i < REPEAT_TIME; i++) { + char *buf = malloc(sizeof(char) * config.buf_size); + if (buf == NULL) { + fprintf(stderr, "cannot malloc %dth buf\n", i); + goto main_exit; + }debug_printf("alloc buf=0x%lx\n", (unsigned long)buf); + memset(buf, '1', config.buf_size); + buf[config.buf_size - 4] = i; + + if (mr_create(&res, &pdinfo, config.buf_size, buf, &mrinfo_send_list[i])) { + goto main_exit; + } + } + + + /* Modify qp state to RTS */ + if(rtr_qp(config, &qpinfo) || + rts_qp(config, &qpinfo)){ + goto main_exit; + } + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res); + } + + /*Receive first at server side*/ + if(config.server_flg){ + t0 = cur_time(); + for(i=0; i +#include +#include +#include +#include +#include +#include +#include "sock.h" +#include "debug.h" + +//#define DEBUG_SOCK +#ifdef DEBUG_SOCK +#define dprintf printf +#else +#define dprintf(...) +#endif + +int sock_connect(char *server_name, int port, int *listenfd){ + struct addrinfo hints; + struct addrinfo *result = NULL, *rp = NULL; + int rc = 0, sockfd = -1; + // fd for search, after accept (server) + // for for search, after connect (client) + char service[6]; + char addrstr[256]; + + dprintf("sock_connect,enter\n"); + + if(!server_name && *listenfd != -1) { sockfd = *listenfd; goto reuse_listenfd; } + + // set port as service name + if (sprintf(service, "%d", port) < 0) + goto sock_connect_exit; + + memset(&hints, 0, sizeof(struct addrinfo)); + if(server_name == NULL){ + hints.ai_flags = AI_PASSIVE; + } + hints.ai_family = AF_UNSPEC;// IPv4 or IPv6 + hints.ai_socktype = SOCK_STREAM;//TCP + hints.ai_protocol = 0; // any protocol + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + // get a list of addresses + rc = getaddrinfo(server_name, service, &hints, &result); + if(rc){ + dprintf("%s\n", gai_strerror(rc)); + goto sock_connect_exit; + } + dprintf("result=%p\n", result); + + // find a usable address + for(rp = result; rp != NULL; rp = rp->ai_next){ + + inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100); + void *ptr; + switch(rp->ai_family) { + case AF_INET: + printf("ai_family=AF_INET\n"); + ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr; + break; + default: + dprintf("ai_family=%08x\n", rp->ai_family); + } + inet_ntop(rp->ai_family, ptr, addrstr, 100); + + printf("trying to use addr=%s,port=%d\n", addrstr,port); + } + + for(rp = result; rp != NULL; rp = rp->ai_next){ + + sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); + if(sockfd == -1) + continue; + + // set socket reusable + int on = 1; + if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) != 0) + continue; + + // server mode + if(server_name == NULL){ + inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100); + void *ptr; + switch(rp->ai_family) { + case AF_INET: + dprintf("ai_family=AF_INET\n"); + ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr; + break; + default: + dprintf("ai_family=%08x\n", rp->ai_family); + } + inet_ntop(rp->ai_family, ptr, addrstr, 100); + + printf("server mode,addr=%s,port=%d\n", addrstr,port); + if(bind(sockfd, rp->ai_addr, rp->ai_addrlen) != 0) + continue; + reuse_listenfd: + printf("listen=%d\n", sockfd); + if(listen(sockfd, 1) != 0) + continue; + /* connect successfully */ + if(*listenfd == -1) { *listenfd = sockfd; } + sockfd = accept(sockfd, NULL, NULL); + printf("accept=%d\n", sockfd); + goto sock_connect_success; + + // client mode + }else{ + inet_ntop(rp->ai_family, rp->ai_addr->sa_data, addrstr, 100); + void *ptr; + switch(rp->ai_family) { + case AF_INET: + printf("ai_family=AF_INET\n"); + ptr= &((struct sockaddr_in *)rp->ai_addr)->sin_addr; + break; + default: + dprintf("ai_family=%08x\n", rp->ai_family); + } + inet_ntop(rp->ai_family, ptr, addrstr, 100); + + printf("client mode,addr=%s,port=%d\n", addrstr,port); + rc = connect(sockfd, rp->ai_addr, rp->ai_addrlen); + + if(rc == 0) { + printf("connect succeeded,fd=%d\n", sockfd); + goto sock_connect_success; + } else { + printf("connect failed, trying to use next\n"); + } + } + } + + //sock_connect_failure: + if(rp == NULL){ + error_printf("All trial failed\n"); + sockfd = -1; + goto sock_connect_exit; + } + sock_connect_success: + sock_connect_exit: +#if 0 + if(listenfd > 0) + close(listenfd); +#endif + if(result) + freeaddrinfo(result); + return sockfd; +} + +int sock_sync_data(int sock, int data_bytes, char *local_data, char *remote_data){ + int rc = 0; + int read_bytes = 0; + // write to sock + rc = write(sock, local_data, data_bytes); + if(rc != data_bytes){ + rc =_SOCK_WRITE_ERR; + goto sock_sync_data_exit; + } + + // read from sock + rc = 0; + while(!rc && read_bytes < data_bytes){ + rc = read(sock, remote_data, data_bytes); + if(rc > 0){ + read_bytes += rc; + rc = 0; + }else{ + rc =_SOCK_READ_ERR; + goto sock_sync_data_exit; + } + } + + sock_sync_data_exit: + return rc; +} + + diff --git a/test/rusage/verbs/sock.h b/test/rusage/verbs/sock.h new file mode 100755 index 00000000..7a56526e --- /dev/null +++ b/test/rusage/verbs/sock.h @@ -0,0 +1,11 @@ +#ifndef SOCK_H +#define SOCK_H +enum sock_return_code{ + _SOCK_SUCCESS, + _SOCK_CONN_ERR, + _SOCK_WRITE_ERR, + _SOCK_READ_ERR +}; +extern int sock_connect(char *server_name, int port, int *listenfd); +extern int sock_sync_data(int sock, int data_size, char *local_data, char *remote_data); +#endif diff --git a/test/rusage/verbs/ud.c b/test/rusage/verbs/ud.c new file mode 100755 index 00000000..44a5e678 --- /dev/null +++ b/test/rusage/verbs/ud.c @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include "ibcomm.h" +#include "debug.h" +#include "mtype.h" +#include "mm_ib_test.h" + +//#define DEBUG_UD +#ifdef DEBUG_UD +#define dprintf printf +#else +#define dprintf(...) +#endif + +#define MAX2(x,y) ((x) > (y) ? (x) : (y)) +#define SERVER_BUF_NUM TEST_SERVER_BUF_NUM +#define NTRIAL 120 +#define PPOLLS 10 /* sweet spot is around 10 */ +#define NSKIPS (PPOLLS*1) +#define PPOLLR 10 /* sweet spot is around 10 */ +#define NSKIPR (PPOLLR*1) + +static unsigned long rdtsc() { + unsigned long x; + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* rdtsc cannot be executed earlier than this */ + __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "memory"); /* rdtsc cannot be executed earlier than here */ + __asm__ __volatile__("xorl %%eax, %%eax; cpuid;" : : : "%rax", "%rbx", "%rcx", "%rdx"); /* following instructions cannot be executed earlier than this */ + return x; +} + +int main(int argc, char **argv) { + config_t config; + int i, j, k; + char sync_res; + resource_t res; + pdinfo_t pdinfo; + qpinfo_t qpinfo; + mrinfo_t *mrinfo_send_list = NULL, *mrinfo_recv_list = NULL; + int ibv_errno, ibcom_errno, verbs_errno = 0; + unsigned long tscs, tsce, tscs2, tsce2; + + FILE* fp; + fp = popen("cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", "r"); + if(!fp) { printf("popen failed\n"); goto fn_fail; } + char freq_str[256]; + int nread = fread(freq_str, sizeof(char), 256, fp); + if(!nread) { printf("popen failed"); goto fn_fail; } + freq_str[nread] = 0; + long int freq = strtol(freq_str, NULL, 10) * 1000; + printf("freq=%ld\n", freq); + pclose(fp); + + ibcom_errno = read_config(&config, argc, argv); + if(ibcom_errno) { printf("read_config\n"); goto fn_fail; } + + ibcom_errno = resource_create(config, &res); + if(ibcom_errno) { printf("resource_create\n"); goto fn_fail; } + + ibcom_errno = pd_create(&res, &pdinfo); + if(ibcom_errno) { printf("pd_create\n"); goto fn_fail; } + + ibcom_errno = qp_create_ud(&res, &pdinfo, &qpinfo); + if(ibcom_errno) { printf("qp_create_ud\n"); goto fn_fail; } + + ibcom_errno = init_qp_ud(config, &qpinfo); + if(ibcom_errno) { printf("init_qp_ud\n"); goto fn_fail; } + + /* prepare local lid, gid, qpn, qkey */ + qp_conn_info_ud_t local_conn_info, remote_conn_info; + + struct ibv_port_attr port_attr; /* IB port attributes */ + ibv_errno = ibv_query_port(res.ib_ctx, config.ib_port, &port_attr); + VERBS_ERR_CHKANDJUMP(ibv_errno, -1, printf("ibv_query_port on port %u failed\n", config.ib_port)); + local_conn_info.lid = port_attr.lid; + + ibv_errno = ibv_query_gid(res.ib_ctx, config.ib_port, 0, (union ibv_gid*)&local_conn_info.gid); + VERBS_ERR_CHKANDJUMP(ibv_errno, -1, printf("could not get gid for port %d, index 0\n", config.ib_port)); + + local_conn_info.qp_num = qpinfo.qp->qp_num; + local_conn_info.qkey = 0x11111111; + + /* send local connection info and obtain remote one */ + int listenfd = config.server_flg ? -1 : 0; + int fd = sock_connect(config.server_name, config.tcp_port, &listenfd); + if(fd < 0) { error_perror("sock_connect"); goto fn_fail; } + if(config.server_flg) { + dprintf("server,fd=%d\n", fd); + } else { + dprintf("client,fd=%d\n", fd); + } + ibcom_errno = sock_sync_data(fd, sizeof(qp_conn_info_ud_t), (char*)&local_conn_info, (char*)&remote_conn_info); + if(ibcom_errno) { error_perror("sock_sync_data"); goto fn_fail; } + + /* print local and remote connection info */ + dprintf("local lid=%08x,qpn=%08x,qkey=%08x\n", local_conn_info.lid, local_conn_info.qp_num, local_conn_info.qkey); + dprintf("local gid="); + for(i = 0; i < 16; i++) { dprintf("%02x", local_conn_info.gid.raw[i]); } + dprintf("\n"); + + dprintf("remote lid=%08x,qpn=%08x,qkey=%08x\n", remote_conn_info.lid, remote_conn_info.qp_num, remote_conn_info.qkey); + dprintf("remote gid="); + for(i = 0; i < 16; i++) { dprintf("%02x", remote_conn_info.gid.raw[i]); } + dprintf("\n"); + + /* ibv_reg_mr */ + mrinfo_recv_list = malloc(sizeof(mrinfo_t) * SERVER_BUF_NUM); + for (i = 0; i < SERVER_BUF_NUM; i++) { + char *buf = malloc(config.buf_size * sizeof(char)); + if(!buf) { fprintf(stderr, "cannot malloc %dth buf\n", i); goto fn_fail; } + for(j = 0; j < config.buf_size; j++) { + buf[j] = -j & 0xff; + } + ibcom_errno = mr_create(&res, &pdinfo, config.buf_size, buf, &mrinfo_recv_list[i]); + VERBS_ERR_CHKANDJUMP(ibcom_errno, -1, printf("mr_create\n")); + } + + mrinfo_send_list = malloc(sizeof(mrinfo_t) * NTRIAL); + for (i = 0; i < NTRIAL; i++) { + char *buf = malloc(config.buf_size * sizeof(char)); + if (!buf) { printf("cannot malloc %dth buf\n", i); goto fn_fail; } + for(j = 0; j < config.buf_size; j++) { + buf[j] = j & 0xff; + } + ibcom_errno = mr_create(&res, &pdinfo, config.buf_size, buf, &mrinfo_send_list[i]); + if(ibcom_errno) { printf("mr_create\n"); goto fn_fail; } + } + + /* ibv_modify_qp */ + ibcom_errno = rtr_qp_ud(config, &qpinfo); + if(ibcom_errno) { printf("rtr\n"); goto fn_fail; } + ibcom_errno = rts_qp_ud(config, &qpinfo); + if(ibcom_errno) { printf("rts\n"); goto fn_fail; } + print_qp_status(&qpinfo); + + /* prepare address header (1/2, ibv_ah_attr) */ + struct ibv_ah_attr ah_attr; + memset(&ah_attr, 0, sizeof(struct ibv_ah_attr)); + ah_attr.dlid = remote_conn_info.lid; + ah_attr.sl = 0; + ah_attr.src_path_bits = 0; + ah_attr.static_rate = 0; /* not limit on static rate (100% port speed) */ + ah_attr.is_global = 0; + ah_attr.port_num = config.ib_port; + +#if 0 + ah_attr.is_global = 1; + ah_attr.grh.dgid = remote_conn_info.gid; + ah_attr.grh.flow_label = 0; + ah_attr.grh.sgid_index = 0; /* what is this? */ + ah_attr.grh.hop_limit = 1; + ah_attr.grh.traffic_class = 0; +#endif + + /* prepare address header (2/2, ibv_ah) */ + struct ibv_ah *ah; + ah = ibv_create_ah(pdinfo.pd, &ah_attr); + if(!ah) { printf("ibv_crate_ah\n"); goto fn_fail; } + + /* pre-post receive commands */ + VERBS_ERR_CHKANDJUMP(_MAX_RQ_CAPACITY < NTRIAL, -1, printf("Increase _MAX_RQ_CAPACITY,_MAX_RQ_CAPACITY=%d,NTRIAL=%d\n", _MAX_RQ_CAPACITY, NTRIAL)); + if(!config.server_flg) { + for(i = 0; i < NTRIAL; i++){ + ibcom_errno = post_recv_req_ud(&qpinfo, &mrinfo_recv_list[0], 0x1234ULL); + if(ibcom_errno) { printf("post_recv_req_ud\n"); goto fn_fail; } + } + } + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res); + } + + /* barrier */ + for(i = 0; i < (config.server_flg ? config.nremote : 1); i++) { + sock_sync_data(qpinfo.sock[i], 1, "R", &sync_res); + } + + if(config.server_flg) { // sender side + usleep(1000000); + if(NTRIAL % PPOLLS != 0) { printf("set NTRIAL multiple of PPOLLS\n"); goto fn_fail; } + if(NTRIAL <= NSKIPS) { printf("set NTRIAL > NSKIP\n"); goto fn_fail; } + for(i = 0; i < NTRIAL; i++) { + if(i == NSKIPS) { tscs = rdtsc(); } + + ibcom_errno = post_send_req_ud(&qpinfo, &mrinfo_send_list[0], IBV_WR_SEND, &remote_conn_info, ah); + if(ibcom_errno) { printf("post_send_req_ud\n"); goto fn_fail; } + + +#if 1 + int nfound = 0; + if(i % PPOLLS == PPOLLS - 1) { + k = 0; + while(1) { + int result; + struct ibv_wc cqe[PPOLLS]; + result = ibv_poll_cq(qpinfo.scq, PPOLLS, &cqe[0]); + if(result < 0) { printf("ibv_poll_cq"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe status\n"); goto fn_fail; } + } + //debug_print_mem((unsigned long long)mrinfo_send_list[i].buf, config.buf_size); + nfound += result; + if(nfound == PPOLLS) { break; } + } + k++; + } + } +#endif + } + tsce = rdtsc(); printf("send,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPS)); + + } else { // receiver side + if(NSKIPR % PPOLLR !=0) { printf("set NSKIP multiple of PPOLL\n"); goto fn_fail; } + for(i = 0; i< NTRIAL; i++){ + if(i == NSKIPR) { tscs = rdtsc(); } + //tscs2 = rdtsc(); +#if 0 + ibv_errno = ibv_query_port(res.ib_ctx, config.ib_port, &port_attr); + VERBS_ERR_CHKANDJUMP(ibv_errno, -1, printf("ibv_query_port on port %u failed\n", config.ib_port)); + printf("bad_pkey_cntr=%d,%d\n", port_attr.bad_pkey_cntr, port_attr.qkey_viol_cntr); +#endif + + /* poll CQ */ + int nfound = 0; + if(i % PPOLLR == PPOLLR - 1) { + k = 0; + while(1) { + int ib_errno, result; + struct ibv_wc cqe[PPOLLR]; + result = ibv_poll_cq(qpinfo.rcq, 1, &cqe[0]); + if(result < 0) { printf("poll_cq\n"); goto fn_fail; } + if(result > 0) { + for(j = 0; j < result; j++) { + if(cqe[j].status != IBV_WC_SUCCESS) { printf("cqe.status"); goto fn_fail; } + } + printf("wr_id=%lx\n", cqe[0].wr_id); + //tsce2 = rdtsc(); printf("received,%ld\n", tsce2 - tscs2); + nfound += result; + if(nfound == PPOLLR) { break; } + } + k++; + } + } + } + tsce = rdtsc(); printf("recv,%.0f\n", (tsce-tscs)/(double)(NTRIAL-NSKIPR)); + } + + fn_exit: + return verbs_errno; + fn_fail: + goto fn_exit; +}