tests: add 'postk_master' branch tests

Change-Id: Ie0d4cfd0921aed89d2db6083c9eb068b1cfc1984
This commit is contained in:
Dominique Martinet
2019-02-01 13:56:18 +09:00
parent 25ef4e9261
commit 7f1c17fc4c
175 changed files with 11703 additions and 5839 deletions

View File

@ -11,6 +11,7 @@
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
//#define DEBUG
#ifdef DEBUG
@ -22,73 +23,14 @@
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry,
int src, int dest, MPI_Request *reqs, MPI_Status *status,
double usec)
{
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int src, int dest, MPI_Request* reqs, MPI_Status* status, double usec) {
int i;
if(rank == 1) {
for(i = 0; i < nentry; i++) {
if (rank == 1) {
for (i = 0; i < nentry; i++) {
MPI_Isend(sendv[i], szentry, MPI_CHAR, dest, 0, MPI_COMM_WORLD, &reqs[i]);
if (nentry > 10 && i % (nentry / 10) == 0) {
printf("s"); fflush(stdout);
@ -97,7 +39,7 @@ void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int
MPI_Waitall(nentry, reqs, status);
printf("w\n"); fflush(stdout);
} else {
for(i = 0; i < nentry; i++) {
for (i = 0; i < nentry; i++) {
MPI_Irecv(recvv[i], szentry, MPI_CHAR, src, 0, MPI_COMM_WORLD, &reqs[i]);
if (nentry > 10 && i % (nentry / 10) == 0) {
printf("r"); fflush(stdout);
@ -113,104 +55,133 @@ int main(int argc, char **argv) {
int my_rank = -1, size = -1;
int i, j;
char **sendv, **recvv;
MPI_Status* status;
MPI_Request* reqs;
long szentry;
long nentry;
MPI_Status *status;
MPI_Request *reqs;
long szentry;
long nentry;
int src, dest;
struct timespec start, end;
struct timespec start, end;
double diffusec;
if(argc == 3) {
szentry = atoi(argv[1]);
nentry = atoi(argv[2]);
} else {
szentry = SZENTRY_DEFAULT;
if (argc == 3) {
szentry = atoi(argv[1]);
nentry = atoi(argv[2]);
} else {
szentry = SZENTRY_DEFAULT;
nentry = NENTRY_DEFAULT;
}
}
printf("szentry=%ld,nentry=%ld\n", szentry, nentry);
status = (MPI_Status*)malloc(sizeof(MPI_Status) * nentry);
reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nentry);
status = (MPI_Status *)malloc(sizeof(MPI_Status) * nentry);
reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * nentry);
int actual;
int actual;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
src = (size + my_rank - 1) % size;
dest = (my_rank + 1) % size;
src = (size + my_rank - 1) % size;
dest = (my_rank + 1) % size;
printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest);
printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest);
sendv = malloc(sizeof(char *) * nentry);
if(!sendv) { printf("malloc failed"); goto fn_fail; }
if (!sendv) {
printf("malloc failed");
goto fn_fail;
}
for (i = 0; i < nentry; i++) {
#if 0
int fd;
fd = open("./file", O_RDWR);
if(fd == -1) { printf("open failed\n"); goto fn_fail; }
sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fd == -1) {
printf("open failed\n");
goto fn_fail;
}
sendv[i] = (char *)mmap(0, szentry, PROT_READ | PROT_WRITE,
MAP_PRIVATE, fd, 0);
#else
sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
sendv[i] = (char *)mmap(0, szentry, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif
if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; }
if (sendv[i] == MAP_FAILED) {
printf("mmap failed");
goto fn_fail;
}
dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]);
memset(sendv[i], 0xaa, szentry);
}
recvv = malloc(sizeof(char *) * nentry);
if(!recvv) { printf("malloc failed"); goto fn_fail; }
if (!recvv) {
printf("malloc failed");
goto fn_fail;
}
for (i = 0; i < nentry; i++) {
#if 0
int fd;
fd = open("./file", O_RDWR);
if(fd == -1) { printf("open failed\n"); goto fn_fail; }
recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fd == -1) {
printf("open failed\n");
goto fn_fail;
}
recvv[i] = (char *)mmap(0, szentry, PROT_READ|PROT_WRITE,
MAP_PRIVATE, fd, 0);
#else
recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
recvv[i] = (char *)mmap(0, szentry, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
#endif
if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; }
if (recvv[i] == MAP_FAILED) {
printf("mmap failed");
goto fn_fail;
}
dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]);
memset(recvv[i], 0, szentry);
}
printf("after memset\n");
print_cpu_last_executed_on();
print_cpu_last_executed_on("main");
for (i = 0; i < 1; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, 0);
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest,
reqs, status, 0);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
diffusec = DIFFNSEC(end, start) / (double)1000;
printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout);
printf("%4.4f sec\n",
DIFFNSEC(end, start) / (double)1000000000);
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, diffusec);
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest,
reqs, status, diffusec);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout);
printf("%4.4f sec\n",
DIFFNSEC(end, start) / (double)1000000000);
fflush(stdout);
}
}
fn_exit:
MPI_Finalize();
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
goto fn_exit;
}

View File

@ -1,127 +1,68 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
int main(int argc, char **argv) {
int my_rank = -1, size = -1;
int i, j;
struct timespec start, end;
int actual;
printf("nloop=%d\n", atoi(argv[1]));
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
print_cpu_last_executed_on();
printf("Before 1st barrier\n"); fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
printf("Before 2nd barrier\n"); fflush(stdout);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
for (i = 0; i < atoi(argv[1]); i++) {
MPI_Barrier(MPI_COMM_WORLD);
}
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout);
}
fn_exit:
//MPI_Finalize();
usleep(100000);
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
int main(int argc, char **argv)
{
int my_rank = -1, size = -1;
int i, j;
struct timespec start, end;
int actual;
printf("nloop=%d\n", atoi(argv[1]));
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
print_cpu_last_executed_on("main");
printf("Before 1st barrier\n"); fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
printf("Before 2nd barrier\n"); fflush(stdout);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
for (i = 0; i < atoi(argv[1]); i++) {
MPI_Barrier(MPI_COMM_WORLD);
}
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
printf("%4.4f sec\n",
DIFFNSEC(end, start) / (double)1000000000);
fflush(stdout);
}
fn_exit:
//MPI_Finalize();
usleep(100000);
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,188 +1,151 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int src, int dest, MPI_Request* reqs, MPI_Status* status, double usec) {
int i;
if(rank == 1) {
for(i = 0; i < nentry; i++) {
if (i % (nentry / 10) == 0) {
printf("s"); fflush(stdout);
}
MPI_Isend(sendv[0], szentry, MPI_CHAR, dest, 0, MPI_COMM_WORLD, &reqs[i]);
}
printf("\n"); fflush(stdout);
MPI_Waitall(nentry, reqs, status);
} else {
for(i = 0; i < nentry; i++) {
if (i % (nentry / 10) == 0) {
printf("r"); fflush(stdout);
}
MPI_Irecv(recvv[0], szentry, MPI_CHAR, src, 0, MPI_COMM_WORLD, &reqs[i]);
}
usleep(usec);
MPI_Waitall(nentry, reqs, status);
}
}
int main(int argc, char **argv) {
int my_rank = -1, size = -1;
int i, j;
char **sendv, **recvv;
MPI_Status* status;
MPI_Request* reqs;
long szentry;
long nentry;
int src, dest;
struct timespec start, end;
double diffusec;
if(argc == 3) {
szentry = atoi(argv[1]);
nentry = atoi(argv[2]);
} else {
szentry = SZENTRY_DEFAULT;
nentry = NENTRY_DEFAULT;
}
status = (MPI_Status*)malloc(sizeof(MPI_Status) * nentry);
reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nentry);
int actual;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
src = (size + my_rank - 1) % size;
dest = (my_rank + 1) % size;
printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest);
sendv = malloc(sizeof(char *) * nentry);
if(!sendv) { printf("malloc failed"); goto fn_fail; }
for (i = 0; i < 1; i++) {
sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; }
dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]);
memset(sendv[i], 0xaa, szentry);
}
recvv = malloc(sizeof(char *) * nentry);
if(!recvv) { printf("malloc failed"); goto fn_fail; }
for (i = 0; i < 1; i++) {
recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; }
dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]);
memset(recvv[i], 0, szentry);
}
printf("after memset\n");
print_cpu_last_executed_on();
printf("Before 1st barrier\n"); fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, 0);
printf("Before 2nd barrier\n"); fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
diffusec = DIFFNSEC(end, start) / (double)1000;
printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry,
int src, int dest, MPI_Request *reqs, MPI_Status *status,
double usec)
{
int i;
if (rank == 1) {
for (i = 0; i < nentry; i++) {
if (i % (nentry / 10) == 0) {
printf("s"); fflush(stdout);
}
MPI_Isend(sendv[0], szentry, MPI_CHAR, dest, 0,
MPI_COMM_WORLD, &reqs[i]);
}
printf("\n"); fflush(stdout);
MPI_Waitall(nentry, reqs, status);
} else {
for (i = 0; i < nentry; i++) {
if (i % (nentry / 10) == 0) {
printf("r"); fflush(stdout);
}
MPI_Irecv(recvv[0], szentry, MPI_CHAR, src, 0,
MPI_COMM_WORLD, &reqs[i]);
}
usleep(usec);
MPI_Waitall(nentry, reqs, status);
}
}
int main(int argc, char **argv)
{
int my_rank = -1, size = -1;
int i, j;
char **sendv, **recvv;
MPI_Status *status;
MPI_Request *reqs;
long szentry;
long nentry;
int src, dest;
struct timespec start, end;
double diffusec;
if (argc == 3) {
szentry = atoi(argv[1]);
nentry = atoi(argv[2]);
} else {
szentry = SZENTRY_DEFAULT;
nentry = NENTRY_DEFAULT;
}
status = (MPI_Status *)malloc(sizeof(MPI_Status) * nentry);
reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * nentry);
int actual;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
src = (size + my_rank - 1) % size;
dest = (my_rank + 1) % size;
printf("rank=%d, size=%d, src=%d, dest=%d\n",
my_rank, size, src, dest);
sendv = malloc(sizeof(char *) * nentry);
if (!sendv) {
printf("malloc failed");
goto fn_fail;
}
for (i = 0; i < 1; i++) {
sendv[i] = (char *)mmap(0, szentry, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (sendv[i] == MAP_FAILED) {
printf("mmap failed");
goto fn_fail;
}
dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]);
memset(sendv[i], 0xaa, szentry);
}
recvv = malloc(sizeof(char *) * nentry);
if (!recvv) {
printf("malloc failed");
goto fn_fail;
}
for (i = 0; i < 1; i++) {
recvv[i] = (char *)mmap(0, szentry, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (recvv[i] == MAP_FAILED) {
printf("mmap failed");
goto fn_fail;
}
dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]);
memset(recvv[i], 0, szentry);
}
printf("after memset\n");
print_cpu_last_executed_on("main");
printf("Before 1st barrier\n"); fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs,
status, 0);
printf("Before 2nd barrier\n"); fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
diffusec = DIFFNSEC(end, start) / (double)1000;
printf("%4.4f sec\n",
DIFFNSEC(end, start) / (double)1000000000);
fflush(stdout);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,281 +1,186 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
double nspw; /* nsec per work */
unsigned long nsec;
void fwq_init() {
struct timespec start, end;
int i;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
#define N_INIT 10000000
bulk_fsw(N_INIT);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
nsec = DIFFNSEC(end, start);
nspw = nsec / (double)N_INIT;
}
#if 1
void fwq(long delay_nsec) {
if (delay_nsec < 0) {
return;
//printf("%s: delay_nsec < 0\n", __FUNCTION__);
}
bulk_fsw(delay_nsec / nspw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_nsec) {
struct timespec start, end;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
while (1) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on FOP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
static inline int on_same_node(int ppn, int me, int you) {
return (me / ppn == you / ppn);
}
/* isend-calc-wait */
void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf, int ndoubles, MPI_Request* reqs, long calc_nsec) {
int i;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]);
r++;
req++;
MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]);
s++;
req++;
}
}
fwq(calc_nsec);
MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv) {
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *sbuf, *rbuf;
MPI_Request* reqs;
struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: Thread support level is %d (it should be 3)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nproc * 2);
if(!reqs) { printf("malloc failed"); goto fn_fail; }
sbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!sbuf) { printf("malloc failed"); goto fn_fail; }
memset(sbuf, 0, sizeof(double) * ndoubles);
printf("tid=%d,pid=%d,sbuf=%p\n", syscall(__NR_gettid), getpid(), sbuf);
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!rbuf) { printf("malloc failed"); goto fn_fail; }
memset(rbuf, 0, sizeof(double) * ndoubles);
printf("tid=%d,pid=%d,rbuf=%p\n", syscall(__NR_gettid), getpid(), rbuf);
print_cpu_last_executed_on();
/* Measure isend-wait time */
MPI_Barrier(MPI_COMM_WORLD);
#define NSKIP 5
#define NPURE 30
for (i = 0; i < NPURE + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, 0);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
/* Measure isend-calc-wait time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 30
for (i = 0; i < NOVERALL + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, t_pure);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
static inline int on_same_node(int ppn, int me, int you)
{
return (me / ppn == you / ppn);
}
/* isend-calc-wait */
void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf,
int ndoubles, MPI_Request *reqs, long calc_nsec)
{
int i;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE,
i, 0, MPI_COMM_WORLD, &reqs[req]);
r++;
req++;
MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE,
i, 0, MPI_COMM_WORLD, &reqs[req]);
s++;
req++;
}
}
fwq(calc_nsec);
MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv)
{
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *sbuf, *rbuf;
MPI_Request *reqs;
struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: Thread support level is %d (it should be 3)\n",
actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n",
syscall(__NR_gettid), getpid(), ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * nproc * 2);
if (!reqs) {
printf("malloc failed");
goto fn_fail;
}
sbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!sbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(sbuf, 0, sizeof(double) * ndoubles);
printf("tid=%d,pid=%d,sbuf=%p\n", syscall(__NR_gettid), getpid(), sbuf);
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!rbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(rbuf, 0, sizeof(double) * ndoubles);
printf("tid=%d,pid=%d,rbuf=%p\n", syscall(__NR_gettid), getpid(), rbuf);
print_cpu_last_executed_on("main");
/* Measure isend-wait time */
MPI_Barrier(MPI_COMM_WORLD);
#define NSKIP 5
#define NPURE 30
for (i = 0; i < NPURE + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs, 0);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
/* Measure isend-calc-wait time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 30
for (i = 0; i < NOVERALL + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
my_send(nproc, ppn, my_rank, sbuf, rbuf, ndoubles, reqs,
t_pure);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,338 +1,250 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
#if 1
#define BEGIN_EPOCH(win) do { MPI_Win_fence(0, win); } while(0)
#define END_EPOCH(win) do { MPI_Win_fence(0, win); } while(0)
#define BAR_EPOCH do { } while(0)
#else
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0)
#define BAR_EPOCH do { MPI_Barrier(MPI_COMM_WORLD); } while(0)
#endif
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
double nspw; /* nsec per work */
unsigned long nsec;
void fwq_init() {
struct timespec start, end;
int i;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
#define N_INIT 10000000
bulk_fsw(N_INIT);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
nsec = DIFFNSEC(end, start);
nspw = nsec / (double)N_INIT;
}
#if 1
void fwq(long delay_nsec) {
if (delay_nsec < 0) {
return;
//printf("%s: delay_nsec < 0\n", __FUNCTION__);
}
bulk_fsw(delay_nsec / nspw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_nsec) {
struct timespec start, end;
if (delay_nsec < 0) { return; }
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
while (1) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on FOP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
static inline int on_same_node(int ppn, int me, int you) {
return (me / ppn == you / ppn);
}
/* fence-accumulate-calc-fence */
void accumulate(int nproc, int ppn, int rank, double *wbuf, double *rbuf, int ndoubles, MPI_Win win, long calc_nsec) {
int i, j;
int r = 0, s = 0;
int req = 0;
BEGIN_EPOCH(win);
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
for (j = 0; j < ndoubles; j++) {
//printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n", i, j, rbuf[i * ndoubles + j], wbuf[i * ndoubles + j]);
MPI_Accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE, i, i * ndoubles + j, 1, MPI_DOUBLE, MPI_SUM, win);
}
}
}
fwq(calc_nsec);
END_EPOCH(win);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv) {
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *wbuf, *rbuf;
MPI_Win win;
struct timespec start, end;
long t_fence_l, t_pure_l, t_overall_l;
long t_fence, t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* write-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!wbuf) { printf("malloc failed"); goto fn_fail; }
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!rbuf) { printf("malloc failed"); goto fn_fail; }
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
print_cpu_last_executed_on();
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = i + 1 + j;
rbuf[i * ndoubles + j] = (i + 1) * 2 + j;
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure fence-fence time */
MPI_Barrier(MPI_COMM_WORLD);
#define NSKIP 5
#define NFENCE 30
for (i = 0; i < NFENCE + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
BEGIN_EPOCH(win);
END_EPOCH(win);
}
BAR_EPOCH;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_fence_l = DIFFNSEC(end, start) / NFENCE;
//printf("t_fence (local): %ld usec\n", t_fence_l / 1000UL);
MPI_Allreduce(&t_fence_l, &t_fence, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_fence (max): %ld usec\n", t_fence / 1000UL);
/* Measure fence-acc-fence time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 30
for (i = 0; i < NPURE + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
accumulate(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0);
}
BAR_EPOCH;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure fenc-acc-calc-fence time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 30
for (i = 0; i < NOVERALL + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
accumulate(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, t_pure - t_fence);
}
BAR_EPOCH;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#if 1
#define BEGIN_EPOCH(win) do { MPI_Win_fence(0, win); } while (0)
#define END_EPOCH(win) do { MPI_Win_fence(0, win); } while (0)
#define BAR_EPOCH do { } while (0)
#else
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while (0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while (0)
#define BAR_EPOCH do { MPI_Barrier(MPI_COMM_WORLD); } while (0)
#endif
static inline int on_same_node(int ppn, int me, int you)
{
return (me / ppn == you / ppn);
}
/* fence-accumulate-calc-fence */
void accumulate(int nproc, int ppn, int rank, double *wbuf, double *rbuf,
int ndoubles, MPI_Win win, long calc_nsec)
{
int i, j;
int r = 0, s = 0;
int req = 0;
BEGIN_EPOCH(win);
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
for (j = 0; j < ndoubles; j++) {
#if 0
printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n",
i, j, rbuf[i * ndoubles + j],
wbuf[i * ndoubles + j]);
#endif
MPI_Accumulate(rbuf + i * ndoubles + j, 1,
MPI_DOUBLE, i, i * ndoubles + j,
1, MPI_DOUBLE, MPI_SUM, win);
}
}
}
fwq(calc_nsec);
END_EPOCH(win);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv)
{
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *wbuf, *rbuf;
MPI_Win win;
struct timespec start, end;
long t_fence_l, t_pure_l, t_overall_l;
long t_fence, t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available "
"(level was set to %d)\n",
actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* write-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!wbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!rbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc,
sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD,
&win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
print_cpu_last_executed_on("main");
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = i + 1 + j;
rbuf[i * ndoubles + j] = (i + 1) * 2 + j;
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure fence-fence time */
MPI_Barrier(MPI_COMM_WORLD);
#define NSKIP 5
#define NFENCE 30
for (i = 0; i < NFENCE + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
BEGIN_EPOCH(win);
END_EPOCH(win);
}
BAR_EPOCH;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_fence_l = DIFFNSEC(end, start) / NFENCE;
//printf("t_fence (local): %ld usec\n", t_fence_l / 1000UL);
MPI_Allreduce(&t_fence_l, &t_fence, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_fence (max): %ld usec\n", t_fence / 1000UL);
/* Measure fence-acc-fence time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 30
for (i = 0; i < NPURE + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
accumulate(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0);
}
BAR_EPOCH;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure fenc-acc-calc-fence time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 30
for (i = 0; i < NOVERALL + NSKIP; i++) {
if (i == NSKIP) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
}
accumulate(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win,
t_pure - t_fence);
}
BAR_EPOCH;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,537 +1,459 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include <errno.h>
#include <psm2.h> /* required for core PSM2 functions */
#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BUFFER_LENGTH /*8000000*/(1ULL<<12)
#define CONNECT_ARRAY_SIZE 8
void die(char *msg, int rc) {
fprintf(stderr, "%s: %d\n", msg, rc);
fflush(stderr);
}
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
double nspw; /* nsec per work */
unsigned long nsec;
void fwq_init() {
struct timespec start, end;
int i;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
#define N_INIT 10000000
bulk_fsw(N_INIT);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
nsec = DIFFNSEC(end, start);
nspw = nsec / (double)N_INIT;
}
#if 1
void fwq(long delay_nsec) {
if (delay_nsec < 0) {
return;
//printf("%s: delay_nsec < 0\n", __FUNCTION__);
}
bulk_fsw(delay_nsec / nspw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_nsec) {
struct timespec start, end;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
while (1) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on FOP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
static inline int on_same_node(int ppn, int me, int you) {
return (me / ppn == you / ppn);
}
/* isend-calc-wait */
void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf, int ndoubles, MPI_Request* reqs, long calc_nsec) {
int i;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]);
r++;
req++;
MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &reqs[req]);
s++;
req++;
}
}
fwq(calc_nsec);
MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE);
}
/* Helper functions to find the server's PSM2 endpoint identifier (epid). */
psm2_epid_t find_server(int rank) {
FILE *fp = NULL;
psm2_epid_t server_epid = 0;
char fn[256];
printf("%s: enter\n", __FUNCTION__); fflush(stdout);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
printf("PSM2 client waiting for epid mapping file to appear...\n"); fflush(stdout);
while (!fp) {
sleep(1);
fp = fopen(fn, "r");
}
fscanf(fp, "%lx", &server_epid);
fclose(fp);
printf("PSM2 client found server epid = 0x%lx\n", server_epid);
return server_epid;
}
void write_epid_to_file(int rank, psm2_epid_t myepid) {
FILE *fp;
char fn[256];
printf("%s: enter\n", __FUNCTION__);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
fp = fopen(fn, "w");
if (!fp) {
fprintf(stderr,
"Exiting, couldn't write server's epid mapping file: ");
die(strerror(errno), errno);
}
fprintf(fp, "0x%lx", myepid);
fclose(fp);
printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid);
return;
}
psm2_uuid_t uuid;
psm2_ep_t myep;
psm2_epid_t myepid;
psm2_epid_t server_epid;
psm2_epid_t epid_array[CONNECT_ARRAY_SIZE];
int epid_array_mask[CONNECT_ARRAY_SIZE];
psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE];
psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE];
int my_psm2_init(int my_rank, int server_rank) {
struct psm2_ep_open_opts o;
int rc;
int ver_major = PSM2_VERNO_MAJOR;
int ver_minor = PSM2_VERNO_MINOR;
printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout);
memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */
/* Try to initialize PSM2 with the requested library version.
* * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR
* * as defined in the PSM2 headers, ensure that we are linking with
* * the same version of PSM2 as we compiled against. */
if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) {
die("couldn't init", rc);
}
printf("PSM2 init done.\n");
/* Setup the endpoint options struct */
if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) {
die("couldn't set default opts", rc);
}
printf("PSM2 opts_get_defaults done.\n");
/* Attempt to open a PSM2 endpoint. This allocates hardware resources. */
if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) {
die("couldn't psm2_ep_open()", rc);
}
printf("PSM2 endpoint open done.\n");
return 0;
}
int my_psm2_connect(int my_rank, int server_rank) {
int rc;
int is_server = (my_rank == server_rank) ? 1 : 0;
printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout);
if (is_server) {
write_epid_to_file(my_rank, myepid);
} else {
server_epid = find_server(server_rank);
}
printf("%s: epid exchange done\n", __FUNCTION__); fflush(stdout);
if (is_server) {
/* Server does nothing here. A connection does not have to be
* * established to receive messages. */
printf("PSM2 server up.\n");
} else {
/* Setup connection request info */
/* PSM2 can connect to a single epid per request,
* * or an arbitrary number of epids in a single connect call.
* * For this example, use part of an array of
* * connection requests. */
memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE);
epid_array[0] = server_epid;
epid_array_mask[0] = 1;
/* Begin the connection process.
* * note that if a requested epid is not responding,
* * the connect call will still return OK.
* * The errors array will contain the state of individual
* * connection requests. */
if ((rc = psm2_ep_connect(myep,
CONNECT_ARRAY_SIZE,
epid_array,
epid_array_mask,
epid_connect_errors,
epaddr_array,
0 /* no timeout */
)) != PSM2_OK) {
die("couldn't ep_connect", rc);
return -1;
}
printf("PSM2 connect request processed.\n");
/* Now check if our connection to the server is ready */
if (epid_connect_errors[0] != PSM2_OK) {
die("couldn't connect to server", epid_connect_errors[0]);
return -1;
}
printf("PSM2 client-server connection established.\n");
}
return 0;
}
char msgbuf[BUFFER_LENGTH];
int my_psm2_sendrecv(int rank, int sender, int receiver) {
int is_server = (rank == receiver) ? 1 : 0;
int rc;
psm2_mq_t q;
psm2_mq_req_t req_mq;
//char msgbuf[BUFFER_LENGTH];
register long rsp asm ("rsp");
printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf); fflush(stdout);
memset(msgbuf, 0, BUFFER_LENGTH);
/* Setup our PSM2 message queue */
if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q))
!= PSM2_OK) {
die("couldn't initialize PSM2 MQ", rc);
}
printf("PSM2 MQ init done.\n");
if (is_server) {
psm2_mq_tag_t t = {0xABCD};
psm2_mq_tag_t tm = {-1};
/* Post the receive request */
if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR,
&t, /* message tag */
&tm, /* message tag mask */
0, /* no flags */
msgbuf, BUFFER_LENGTH,
NULL, /* no context to add */
&req_mq /* track irecv status */
)) != PSM2_OK) {
die("couldn't post psm2_mq_irecv()", rc);
}
printf("PSM2 MQ irecv() posted\n");
/* Wait until the message arrives */
if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) {
die("couldn't wait for the irecv", rc);
}
printf("PSM2 MQ wait() done.\n");
printf("Message from client:\n");
printf("%s", msgbuf);
if (is_server) {
char fn[256];
sprintf(fn, "psm2-demo-server-epid-%d", rank);
unlink(fn);
}
} else {
/* Say hello */
snprintf(msgbuf, BUFFER_LENGTH,
"Hello world from epid=0x%lx, pid=%d.\n",
myepid, getpid());
psm2_mq_tag_t t = {0xABCD};
if ((rc = psm2_mq_send2(q,
epaddr_array[0], /* destination epaddr */
PSM2_MQ_FLAG_SENDSYNC, /* no flags */
&t, /* tag */
msgbuf, BUFFER_LENGTH
)) != PSM2_OK) {
die("couldn't post psm2_mq_isend", rc);
}
printf("PSM2 MQ send() done.\n");
}
/* Close down the MQ */
if ((rc = psm2_mq_finalize(q)) != PSM2_OK) {
die("couldn't psm2_mq_finalize()", rc);
}
printf("PSM2 MQ finalized.\n");
/* Close our ep, releasing all hardware resources.
* * Try to close all connections properly */
if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL,
0 /* no timeout */)) != PSM2_OK) {
die("couldn't psm2_ep_close()", rc);
}
printf("PSM2 ep closed.\n");
/* Release all local PSM2 resources */
if ((rc = psm2_finalize()) != PSM2_OK) {
die("couldn't psm2_finalize()", rc);
}
printf("PSM2 shut down, exiting.\n");
return 0;
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
struct thr_arg {
pthread_barrier_t bar;
pthread_t pthread;
int rank;
int ppn;
int nproc;
int server_rank;
};
struct thr_arg thr_arg;
void *progress_fn(void *arg) {
struct thr_arg *thr_arg = (struct thr_arg *)arg;
int rc;
int i;
rc = syscall(732);
if (rc == -1)
fprintf(stdout, "CT09100 progress_fn running on Linux OK\n");
else {
fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc);
}
printf("progress,enter\n");
pthread_barrier_wait(&thr_arg->bar);
#if 1
my_psm2_init(thr_arg->rank, thr_arg->server_rank);
my_psm2_connect(thr_arg->rank, thr_arg->server_rank);
for (i = 0; i < thr_arg->nproc; i++) {
if (!on_same_node(thr_arg->ppn, thr_arg->rank, i)) {
if (thr_arg->rank < i) {
my_psm2_sendrecv(thr_arg->rank, thr_arg->rank, i);
} else {
my_psm2_sendrecv(thr_arg->rank, i, thr_arg->rank);
}
}
}
#endif
pthread_barrier_wait(&thr_arg->bar);
printf("progress,exit\n");
return NULL;
}
int main(int argc, char **argv) {
int rc;
int actual;
int nproc;
int ppn = -1;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *sbuf, *rbuf;
MPI_Request* reqs;
struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
pthread_barrierattr_t barrierattr;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
char *rank_str = getenv("PMI_RANK");
if (!rank_str) {
printf("getenv failed\n");
exit(1);
}
my_rank = atoi(rank_str);
printf("my_rank=%d\n", my_rank); fflush(stdout);
nproc = 2;
if (my_rank == 0) {
printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* Spawn a thread */
thr_arg.rank = my_rank;
thr_arg.ppn = ppn;
thr_arg.nproc = nproc;
thr_arg.server_rank = ppn + (my_rank % ppn);
pthread_barrierattr_init(&barrierattr);
pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc);
char *uti_str = getenv("DISABLE_UTI");
int uti_val = uti_str ? atoi(uti_str) : 0;
if (!uti_val) {
rc = syscall(731, 1, NULL);
if (rc) {
fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc);
} else {
fprintf(stdout, "CT09003 INFO: uti available\n");
}
} else {
fprintf(stdout, "CT09003 INFO: uti disabled\n");
}
rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg);
if (rc){
fprintf(stdout, "pthread_create: %d\n", rc);
exit(1);
}
pthread_barrier_wait(&thr_arg.bar);
pthread_barrier_wait(&thr_arg.bar);
pthread_join(thr_arg.pthread, NULL);
fn_exit:
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include <errno.h>
#include <psm2.h> /* required for core PSM2 functions */
#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BUFFER_LENGTH /*8000000*/(1ULL<<12)
#define CONNECT_ARRAY_SIZE 8
void die(char *msg, int rc)
{
fprintf(stderr, "%s: %d\n", msg, rc);
fflush(stderr);
}
static inline int on_same_node(int ppn, int me, int you)
{
return (me / ppn == you / ppn);
}
/* isend-calc-wait */
void my_send(int nproc, int ppn, int rank, double *sbuf, double *rbuf,
int ndoubles, MPI_Request *reqs, long calc_nsec)
{
int i;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
MPI_Irecv(rbuf + r * ndoubles, ndoubles, MPI_DOUBLE,
i, 0, MPI_COMM_WORLD, &reqs[req]);
r++;
req++;
MPI_Isend(sbuf + s * ndoubles, ndoubles, MPI_DOUBLE,
i, 0, MPI_COMM_WORLD, &reqs[req]);
s++;
req++;
}
}
fwq(calc_nsec);
MPI_Waitall(req, reqs, MPI_STATUSES_IGNORE);
}
/* Helper functions to find the server's PSM2 endpoint identifier (epid). */
psm2_epid_t find_server(int rank)
{
FILE *fp = NULL;
psm2_epid_t server_epid = 0;
char fn[256];
printf("%s: enter\n", __func__); fflush(stdout);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
printf("PSM2 client waiting for epid mapping file to appear...\n");
fflush(stdout);
while (!fp) {
sleep(1);
fp = fopen(fn, "r");
}
fscanf(fp, "%lx", &server_epid);
fclose(fp);
printf("PSM2 client found server epid = 0x%lx\n", server_epid);
return server_epid;
}
void write_epid_to_file(int rank, psm2_epid_t myepid)
{
FILE *fp;
char fn[256];
printf("%s: enter\n", __func__);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
fp = fopen(fn, "w");
if (!fp) {
fprintf(stderr,
"Exiting, couldn't write server's epid mapping file: ");
die(strerror(errno), errno);
}
fprintf(fp, "0x%lx", myepid);
fclose(fp);
printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid);
}
psm2_uuid_t uuid;
psm2_ep_t myep;
psm2_epid_t myepid;
psm2_epid_t server_epid;
psm2_epid_t epid_array[CONNECT_ARRAY_SIZE];
int epid_array_mask[CONNECT_ARRAY_SIZE];
psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE];
psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE];
int my_psm2_init(int my_rank, int server_rank)
{
struct psm2_ep_open_opts o;
int rc;
int ver_major = PSM2_VERNO_MAJOR;
int ver_minor = PSM2_VERNO_MINOR;
printf("%s: my_rank=%d,server_rank=%d\n",
__func__, my_rank, server_rank);
fflush(stdout);
memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */
/* Try to initialize PSM2 with the requested library version.
* In this example, given the use of the PSM2_VERNO_MAJOR and MINOR
* as defined in the PSM2 headers, ensure that we are linking with
* the same version of PSM2 as we compiled against.
*/
if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) {
die("couldn't init", rc);
}
printf("PSM2 init done.\n");
/* Setup the endpoint options struct */
if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) {
die("couldn't set default opts", rc);
}
printf("PSM2 opts_get_defaults done.\n");
/* Attempt to open a PSM2 endpoint. This allocates hardware resources.
*/
if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) {
die("couldn't psm2_ep_open()", rc);
}
printf("PSM2 endpoint open done.\n");
return 0;
}
int my_psm2_connect(int my_rank, int server_rank)
{
int rc;
int is_server = (my_rank == server_rank) ? 1 : 0;
printf("%s: my_rank=%d,server_rank=%d\n",
__func__, my_rank, server_rank);
fflush(stdout);
if (is_server) {
write_epid_to_file(my_rank, myepid);
} else {
server_epid = find_server(server_rank);
}
printf("%s: epid exchange done\n", __func__);
fflush(stdout);
if (is_server) {
/* Server does nothing here. A connection does not have to be
* established to receive messages.
*/
printf("PSM2 server up.\n");
} else {
/* Setup connection request info.
* PSM2 can connect to a single epid per request,
* or an arbitrary number of epids in a single connect call.
* For this example, use part of an array of
* connection requests.
*/
memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE);
epid_array[0] = server_epid;
epid_array_mask[0] = 1;
/* Begin the connection process.
* note that if a requested epid is not responding,
* the connect call will still return OK.
* The errors array will contain the state of individual
* connection requests.
*/
if ((rc = psm2_ep_connect(myep,
CONNECT_ARRAY_SIZE,
epid_array,
epid_array_mask,
epid_connect_errors,
epaddr_array,
0 /* no timeout */
)) != PSM2_OK) {
die("couldn't ep_connect", rc);
return -1;
}
printf("PSM2 connect request processed.\n");
/* Now check if our connection to the server is ready */
if (epid_connect_errors[0] != PSM2_OK) {
die("couldn't connect to server",
epid_connect_errors[0]);
return -1;
}
printf("PSM2 client-server connection established.\n");
}
return 0;
}
char msgbuf[BUFFER_LENGTH];
int my_psm2_sendrecv(int rank, int sender, int receiver)
{
int is_server = (rank == receiver) ? 1 : 0;
int rc;
psm2_mq_t q;
psm2_mq_req_t req_mq;
//char msgbuf[BUFFER_LENGTH];
register long rsp asm ("rsp");
printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf);
fflush(stdout);
memset(msgbuf, 0, BUFFER_LENGTH);
/* Setup our PSM2 message queue */
if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q))
!= PSM2_OK) {
die("couldn't initialize PSM2 MQ", rc);
}
printf("PSM2 MQ init done.\n");
if (is_server) {
psm2_mq_tag_t t = {0xABCD};
psm2_mq_tag_t tm = {-1};
/* Post the receive request */
if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR,
&t, /* message tag */
&tm, /* message tag mask */
0, /* no flags */
msgbuf, BUFFER_LENGTH,
NULL, /* no context to add */
&req_mq /* track irecv status */
)) != PSM2_OK) {
die("couldn't post psm2_mq_irecv()", rc);
}
printf("PSM2 MQ irecv() posted\n");
/* Wait until the message arrives */
if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) {
die("couldn't wait for the irecv", rc);
}
printf("PSM2 MQ wait() done.\n");
printf("Message from client:\n");
printf("%s", msgbuf);
if (is_server) {
char fn[256];
sprintf(fn, "psm2-demo-server-epid-%d", rank);
unlink(fn);
}
} else {
/* Say hello */
snprintf(msgbuf, BUFFER_LENGTH,
"Hello world from epid=0x%lx, pid=%d.\n",
myepid, getpid());
psm2_mq_tag_t t = {0xABCD};
if ((rc = psm2_mq_send2(q,
epaddr_array[0], /* destination epaddr */
PSM2_MQ_FLAG_SENDSYNC, /* no flags */
&t, /* tag */
msgbuf, BUFFER_LENGTH
)) != PSM2_OK) {
die("couldn't post psm2_mq_isend", rc);
}
printf("PSM2 MQ send() done.\n");
}
/* Close down the MQ */
if ((rc = psm2_mq_finalize(q)) != PSM2_OK) {
die("couldn't psm2_mq_finalize()", rc);
}
printf("PSM2 MQ finalized.\n");
/* Close our ep, releasing all hardware resources.
* Try to close all connections properly
*/
if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL,
0 /* no timeout */)) != PSM2_OK) {
die("couldn't psm2_ep_close()", rc);
}
printf("PSM2 ep closed.\n");
/* Release all local PSM2 resources */
if ((rc = psm2_finalize()) != PSM2_OK) {
die("couldn't psm2_finalize()", rc);
}
printf("PSM2 shut down, exiting.\n");
return 0;
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
struct thr_arg {
pthread_barrier_t bar;
pthread_t pthread;
int rank;
int ppn;
int nproc;
int server_rank;
};
struct thr_arg thr_arg;
void *progress_fn(void *arg)
{
struct thr_arg *thr_arg = (struct thr_arg *)arg;
int rc;
int i;
rc = syscall(732);
if (rc == -1)
fprintf(stdout, "CT09100 %s running on Linux OK\n",
__func__);
else {
fprintf(stdout, "CT09100 %s running on McKernel NG (%d)\n",
__func__, rc);
}
printf("progress,enter\n");
pthread_barrier_wait(&thr_arg->bar);
#if 1
my_psm2_init(thr_arg->rank, thr_arg->server_rank);
my_psm2_connect(thr_arg->rank, thr_arg->server_rank);
for (i = 0; i < thr_arg->nproc; i++) {
if (!on_same_node(thr_arg->ppn, thr_arg->rank, i)) {
if (thr_arg->rank < i) {
my_psm2_sendrecv(thr_arg->rank, thr_arg->rank,
i);
} else {
my_psm2_sendrecv(thr_arg->rank, i,
thr_arg->rank);
}
}
}
#endif
pthread_barrier_wait(&thr_arg->bar);
printf("progress,exit\n");
return NULL;
}
int main(int argc, char **argv)
{
int rc;
int actual;
int nproc;
int ppn = -1;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *sbuf, *rbuf;
MPI_Request *reqs;
struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
pthread_barrierattr_t barrierattr;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
char *rank_str = getenv("PMI_RANK");
if (!rank_str) {
printf("getenv failed\n");
exit(1);
}
my_rank = atoi(rank_str);
printf("my_rank=%d\n", my_rank); fflush(stdout);
nproc = 2;
if (my_rank == 0) {
printf("tid=%d,pid=%d,ndoubles=%d,nproc=%d\n",
syscall(__NR_gettid), getpid(), ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* Spawn a thread */
thr_arg.rank = my_rank;
thr_arg.ppn = ppn;
thr_arg.nproc = nproc;
thr_arg.server_rank = ppn + (my_rank % ppn);
pthread_barrierattr_init(&barrierattr);
pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc);
char *uti_str = getenv("DISABLE_UTI");
int uti_val = uti_str ? atoi(uti_str) : 0;
if (!uti_val) {
rc = syscall(731, 1, NULL);
if (rc) {
fprintf(stdout,
"CT09003 INFO: uti not available (rc=%d)\n",
rc);
} else {
fprintf(stdout, "CT09003 INFO: uti available\n");
}
} else {
fprintf(stdout, "CT09003 INFO: uti disabled\n");
}
rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg);
if (rc) {
fprintf(stdout, "pthread_create: %d\n", rc);
exit(1);
}
pthread_barrier_wait(&thr_arg.bar);
pthread_barrier_wait(&thr_arg.bar);
pthread_join(thr_arg.pthread, NULL);
fn_exit:
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,508 +1,424 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include <errno.h>
#include <psm2.h> /* required for core PSM2 functions */
#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BUFFER_LENGTH /*8000000*/(1ULL<<12)
#define CONNECT_ARRAY_SIZE 8
void die(char *msg, int rc) {
fprintf(stderr, "%s: %d\n", msg, rc);
fflush(stderr);
}
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
double nspw; /* nsec per work */
unsigned long nsec;
void fwq_init() {
struct timespec start, end;
int i;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
#define N_INIT 10000000
bulk_fsw(N_INIT);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
nsec = DIFFNSEC(end, start);
nspw = nsec / (double)N_INIT;
}
#if 1
void fwq(long delay_nsec) {
if (delay_nsec < 0) {
return;
//printf("%s: delay_nsec < 0\n", __FUNCTION__);
}
bulk_fsw(delay_nsec / nspw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_nsec) {
struct timespec start, end;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
while (1) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on FOP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,pid=%d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, getpid(), tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
static inline int on_same_node(int ppn, int me, int you) {
return (me / ppn == you / ppn);
}
/* Helper functions to find the server's PSM2 endpoint identifier (epid). */
psm2_epid_t find_server(int rank) {
FILE *fp = NULL;
psm2_epid_t server_epid = 0;
char fn[256];
printf("%s: enter\n", __FUNCTION__); fflush(stdout);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
printf("PSM2 client waiting for epid mapping file to appear...\n"); fflush(stdout);
while (!fp) {
sleep(1);
fp = fopen(fn, "r");
}
fscanf(fp, "%lx", &server_epid);
fclose(fp);
printf("PSM2 client found server epid = 0x%lx\n", server_epid);
return server_epid;
}
void write_epid_to_file(int rank, psm2_epid_t myepid) {
FILE *fp;
char fn[256];
printf("%s: enter\n", __FUNCTION__);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
fp = fopen(fn, "w");
if (!fp) {
fprintf(stderr,
"Exiting, couldn't write server's epid mapping file: ");
die(strerror(errno), errno);
}
fprintf(fp, "0x%lx", myepid);
fclose(fp);
printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid);
return;
}
psm2_uuid_t uuid;
psm2_ep_t myep;
psm2_epid_t myepid;
psm2_epid_t server_epid;
psm2_epid_t epid_array[CONNECT_ARRAY_SIZE];
int epid_array_mask[CONNECT_ARRAY_SIZE];
psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE];
psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE];
int my_psm2_init(int my_rank, int server_rank) {
struct psm2_ep_open_opts o;
int rc;
int ver_major = PSM2_VERNO_MAJOR;
int ver_minor = PSM2_VERNO_MINOR;
printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout);
memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */
/* Try to initialize PSM2 with the requested library version.
* * In this example, given the use of the PSM2_VERNO_MAJOR and MINOR
* * as defined in the PSM2 headers, ensure that we are linking with
* * the same version of PSM2 as we compiled against. */
if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) {
die("couldn't init", rc);
}
printf("PSM2 init done.\n");
/* Setup the endpoint options struct */
if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) {
die("couldn't set default opts", rc);
}
printf("PSM2 opts_get_defaults done.\n");
/* Attempt to open a PSM2 endpoint. This allocates hardware resources. */
if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) {
die("couldn't psm2_ep_open()", rc);
}
printf("PSM2 endpoint open done.\n");
return 0;
}
int my_psm2_connect(int my_rank, int server_rank) {
int rc;
int is_server = (my_rank == server_rank) ? 1 : 0;
printf("%s: my_rank=%d,server_rank=%d\n", __FUNCTION__, my_rank, server_rank); fflush(stdout);
if (is_server) {
write_epid_to_file(my_rank, myepid);
} else {
server_epid = find_server(server_rank);
}
printf("%s: epid exchange done\n", __FUNCTION__); fflush(stdout);
if (is_server) {
/* Server does nothing here. A connection does not have to be
* * established to receive messages. */
printf("PSM2 server up.\n");
} else {
/* Setup connection request info */
/* PSM2 can connect to a single epid per request,
* * or an arbitrary number of epids in a single connect call.
* * For this example, use part of an array of
* * connection requests. */
memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE);
epid_array[0] = server_epid;
epid_array_mask[0] = 1;
/* Begin the connection process.
* * note that if a requested epid is not responding,
* * the connect call will still return OK.
* * The errors array will contain the state of individual
* * connection requests. */
if ((rc = psm2_ep_connect(myep,
CONNECT_ARRAY_SIZE,
epid_array,
epid_array_mask,
epid_connect_errors,
epaddr_array,
0 /* no timeout */
)) != PSM2_OK) {
die("couldn't ep_connect", rc);
return -1;
}
printf("PSM2 connect request processed.\n");
/* Now check if our connection to the server is ready */
if (epid_connect_errors[0] != PSM2_OK) {
die("couldn't connect to server", epid_connect_errors[0]);
return -1;
}
printf("PSM2 client-server connection established.\n");
}
return 0;
}
char msgbuf[BUFFER_LENGTH];
int my_psm2_sendrecv(int rank, int sender, int receiver) {
int is_server = (rank == receiver) ? 1 : 0;
int rc;
psm2_mq_t q;
psm2_mq_req_t req_mq;
//char msgbuf[BUFFER_LENGTH];
register long rsp asm ("rsp");
printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf); fflush(stdout);
memset(msgbuf, 0, BUFFER_LENGTH);
/* Setup our PSM2 message queue */
if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q))
!= PSM2_OK) {
die("couldn't initialize PSM2 MQ", rc);
}
printf("PSM2 MQ init done.\n");
if (is_server) {
psm2_mq_tag_t t = {0xABCD};
psm2_mq_tag_t tm = {-1};
/* Post the receive request */
if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR,
&t, /* message tag */
&tm, /* message tag mask */
0, /* no flags */
msgbuf, BUFFER_LENGTH,
NULL, /* no context to add */
&req_mq /* track irecv status */
)) != PSM2_OK) {
die("couldn't post psm2_mq_irecv()", rc);
}
printf("PSM2 MQ irecv() posted\n");
/* Wait until the message arrives */
if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) {
die("couldn't wait for the irecv", rc);
}
printf("PSM2 MQ wait() done.\n");
printf("Message from client:\n");
printf("%s", msgbuf);
if (is_server) {
char fn[256];
sprintf(fn, "psm2-demo-server-epid-%d", rank);
unlink(fn);
}
} else {
/* Say hello */
snprintf(msgbuf, BUFFER_LENGTH,
"Hello world from epid=0x%lx, pid=%d.\n",
myepid, getpid());
psm2_mq_tag_t t = {0xABCD};
if ((rc = psm2_mq_send2(q,
epaddr_array[0], /* destination epaddr */
PSM2_MQ_FLAG_SENDSYNC, /* no flags */
&t, /* tag */
msgbuf, BUFFER_LENGTH
)) != PSM2_OK) {
die("couldn't post psm2_mq_isend", rc);
}
printf("PSM2 MQ send() done.\n");
}
/* Close down the MQ */
if ((rc = psm2_mq_finalize(q)) != PSM2_OK) {
die("couldn't psm2_mq_finalize()", rc);
}
printf("PSM2 MQ finalized.\n");
/* Close our ep, releasing all hardware resources.
* * Try to close all connections properly */
if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL,
0 /* no timeout */)) != PSM2_OK) {
die("couldn't psm2_ep_close()", rc);
}
printf("PSM2 ep closed.\n");
/* Release all local PSM2 resources */
if ((rc = psm2_finalize()) != PSM2_OK) {
die("couldn't psm2_finalize()", rc);
}
printf("PSM2 shut down, exiting.\n");
return 0;
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
struct thr_arg {
pthread_barrier_t bar;
pthread_t pthread;
int rank;
int ppn;
int nproc;
int server_rank;
};
struct thr_arg thr_arg;
void *progress_fn(void *arg) {
struct thr_arg *thr_arg = (struct thr_arg *)arg;
int rc;
int i;
rc = syscall(732);
if (rc == -1)
fprintf(stdout, "CT09100 progress_fn running on Linux OK\n");
else {
fprintf(stdout, "CT09100 progress_fn running on McKernel NG (%d)\n", rc);
}
printf("progress,enter\n");
pthread_barrier_wait(&thr_arg->bar);
pthread_barrier_wait(&thr_arg->bar);
printf("progress,exit\n");
return NULL;
}
int main(int argc, char **argv) {
int rc;
int actual;
int nproc;
int ppn = -1;
int my_rank = -1, size = -1;
int i, j;
struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
pthread_barrierattr_t barrierattr;
fwq_init();
while ((opt = getopt_long(argc, argv, "+P:", options, NULL)) != -1) {
switch (opt) {
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ppn == -1) {
printf("specify ppn with --ppn");
exit(1);
}
char *rank_str = getenv("PMI_RANK");
if (!rank_str) {
printf("getenv failed\n");
exit(1);
}
my_rank = atoi(rank_str);
printf("my_rank=%d\n", my_rank); fflush(stdout);
nproc = 2;
if (my_rank == 0) {
printf("tid=%d,pid=%d,nproc=%d\n", syscall(__NR_gettid), getpid(), nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* Spawn a thread */
thr_arg.rank = my_rank;
thr_arg.ppn = ppn;
thr_arg.nproc = nproc;
thr_arg.server_rank = ppn + (my_rank % ppn);
pthread_barrierattr_init(&barrierattr);
pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc);
char *uti_str = getenv("DISABLE_UTI");
int uti_val = uti_str ? atoi(uti_str) : 0;
if (!uti_val) {
rc = syscall(731, 1, NULL);
if (rc) {
fprintf(stdout, "CT09003 INFO: uti not available (rc=%d)\n", rc);
} else {
fprintf(stdout, "CT09003 INFO: uti available\n");
}
} else {
fprintf(stdout, "CT09003 INFO: uti disabled\n");
}
rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg);
if (rc){
fprintf(stdout, "pthread_create: %d\n", rc);
exit(1);
}
pthread_barrier_wait(&thr_arg.bar);
my_psm2_init(thr_arg.rank, thr_arg.server_rank);
my_psm2_connect(thr_arg.rank, thr_arg.server_rank);
for (i = 0; i < thr_arg.nproc; i++) {
if (!on_same_node(thr_arg.ppn, thr_arg.rank, i)) {
if (thr_arg.rank < i) {
my_psm2_sendrecv(thr_arg.rank, thr_arg.rank, i);
} else {
my_psm2_sendrecv(thr_arg.rank, i, thr_arg.rank);
}
}
}
pthread_barrier_wait(&thr_arg.bar);
pthread_join(thr_arg.pthread, NULL);
fn_exit:
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include <errno.h>
#include <psm2.h> /* required for core PSM2 functions */
#include <psm2_mq.h> /* required for PSM2 MQ functions (send, recv, etc) */
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BUFFER_LENGTH /*8000000*/(1ULL<<12)
#define CONNECT_ARRAY_SIZE 8
void die(char *msg, int rc)
{
fprintf(stderr, "%s: %d\n", msg, rc);
fflush(stderr);
}
static inline int on_same_node(int ppn, int me, int you)
{
return (me / ppn == you / ppn);
}
/* Helper functions to find the server's PSM2 endpoint identifier (epid). */
psm2_epid_t find_server(int rank)
{
FILE *fp = NULL;
psm2_epid_t server_epid = 0;
char fn[256];
printf("%s: enter\n", __func__); fflush(stdout);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
printf("PSM2 client waiting for epid mapping file to appear...\n");
fflush(stdout);
while (!fp) {
sleep(1);
fp = fopen(fn, "r");
}
fscanf(fp, "%lx", &server_epid);
fclose(fp);
printf("PSM2 client found server epid = 0x%lx\n", server_epid);
return server_epid;
}
void write_epid_to_file(int rank, psm2_epid_t myepid)
{
FILE *fp;
char fn[256];
printf("%s: enter\n", __func__);
sprintf(fn, "psm2-demo-server-epid-%d", rank);
fp = fopen(fn, "w");
if (!fp) {
fprintf(stderr,
"Exiting, couldn't write server's epid mapping file: ");
die(strerror(errno), errno);
}
fprintf(fp, "0x%lx", myepid);
fclose(fp);
printf("PSM2 server wrote epid = 0x%lx to file.\n", myepid);
}
psm2_uuid_t uuid;
psm2_ep_t myep;
psm2_epid_t myepid;
psm2_epid_t server_epid;
psm2_epid_t epid_array[CONNECT_ARRAY_SIZE];
int epid_array_mask[CONNECT_ARRAY_SIZE];
psm2_error_t epid_connect_errors[CONNECT_ARRAY_SIZE];
psm2_epaddr_t epaddr_array[CONNECT_ARRAY_SIZE];
int my_psm2_init(int my_rank, int server_rank)
{
struct psm2_ep_open_opts o;
int rc;
int ver_major = PSM2_VERNO_MAJOR;
int ver_minor = PSM2_VERNO_MINOR;
printf("%s: my_rank=%d,server_rank=%d\n",
__func__, my_rank, server_rank);
fflush(stdout);
memset(uuid, 0, sizeof(psm2_uuid_t)); /* Use a UUID of zero */
/* Try to initialize PSM2 with the requested library version.
* In this example, given the use of the PSM2_VERNO_MAJOR and MINOR
* as defined in the PSM2 headers, ensure that we are linking with
* the same version of PSM2 as we compiled against.
*/
if ((rc = psm2_init(&ver_major, &ver_minor)) != PSM2_OK) {
die("couldn't init", rc);
}
printf("PSM2 init done.\n");
/* Setup the endpoint options struct */
if ((rc = psm2_ep_open_opts_get_defaults(&o)) != PSM2_OK) {
die("couldn't set default opts", rc);
}
printf("PSM2 opts_get_defaults done.\n");
/* Attempt to open a PSM2 endpoint. This allocates hardware resources.
*/
if ((rc = psm2_ep_open(uuid, &o, &myep, &myepid)) != PSM2_OK) {
die("couldn't psm2_ep_open()", rc);
}
printf("PSM2 endpoint open done.\n");
return 0;
}
int my_psm2_connect(int my_rank, int server_rank)
{
int rc;
int is_server = (my_rank == server_rank) ? 1 : 0;
printf("%s: my_rank=%d,server_rank=%d\n",
__func__, my_rank, server_rank);
fflush(stdout);
if (is_server) {
write_epid_to_file(my_rank, myepid);
} else {
server_epid = find_server(server_rank);
}
printf("%s: epid exchange done\n", __func__);
fflush(stdout);
if (is_server) {
/* Server does nothing here. A connection does not have to be
* established to receive messages.
*/
printf("PSM2 server up.\n");
} else {
/* Setup connection request info */
/* PSM2 can connect to a single epid per request,
* or an arbitrary number of epids in a single connect call.
* For this example, use part of an array of
* connection requests.
*/
memset(epid_array_mask, 0, sizeof(int) * CONNECT_ARRAY_SIZE);
epid_array[0] = server_epid;
epid_array_mask[0] = 1;
/* Begin the connection process.
* note that if a requested epid is not responding,
* the connect call will still return OK.
* The errors array will contain the state of individual
* connection requests.
*/
if ((rc = psm2_ep_connect(myep,
CONNECT_ARRAY_SIZE,
epid_array,
epid_array_mask,
epid_connect_errors,
epaddr_array,
0 /* no timeout */
)) != PSM2_OK) {
die("couldn't ep_connect", rc);
return -1;
}
printf("PSM2 connect request processed.\n");
/* Now check if our connection to the server is ready */
if (epid_connect_errors[0] != PSM2_OK) {
die("couldn't connect to server",
epid_connect_errors[0]);
return -1;
}
printf("PSM2 client-server connection established.\n");
}
return 0;
}
char msgbuf[BUFFER_LENGTH];
int my_psm2_sendrecv(int rank, int sender, int receiver)
{
int is_server = (rank == receiver) ? 1 : 0;
int rc;
psm2_mq_t q;
psm2_mq_req_t req_mq;
//char msgbuf[BUFFER_LENGTH];
register long rsp asm ("rsp");
printf("rsp=%lx.msgbuf=%p\n", rsp, msgbuf); fflush(stdout);
memset(msgbuf, 0, BUFFER_LENGTH);
/* Setup our PSM2 message queue */
if ((rc = psm2_mq_init(myep, PSM2_MQ_ORDERMASK_NONE, NULL, 0, &q))
!= PSM2_OK) {
die("couldn't initialize PSM2 MQ", rc);
}
printf("PSM2 MQ init done.\n");
if (is_server) {
psm2_mq_tag_t t = {0xABCD};
psm2_mq_tag_t tm = {-1};
/* Post the receive request */
if ((rc = psm2_mq_irecv2(q, PSM2_MQ_ANY_ADDR,
&t, /* message tag */
&tm, /* message tag mask */
0, /* no flags */
msgbuf, BUFFER_LENGTH,
NULL, /* no context to add */
&req_mq /* track irecv status */
)) != PSM2_OK) {
die("couldn't post psm2_mq_irecv()", rc);
}
printf("PSM2 MQ irecv() posted\n");
/* Wait until the message arrives */
if ((rc = psm2_mq_wait(&req_mq, NULL)) != PSM2_OK) {
die("couldn't wait for the irecv", rc);
}
printf("PSM2 MQ wait() done.\n");
printf("Message from client:\n");
printf("%s", msgbuf);
if (is_server) {
char fn[256];
sprintf(fn, "psm2-demo-server-epid-%d", rank);
unlink(fn);
}
} else {
/* Say hello */
snprintf(msgbuf, BUFFER_LENGTH,
"Hello world from epid=0x%lx, pid=%d.\n",
myepid, getpid());
psm2_mq_tag_t t = {0xABCD};
if ((rc = psm2_mq_send2(q,
epaddr_array[0], /* destination epaddr */
PSM2_MQ_FLAG_SENDSYNC, /* no flags */
&t, /* tag */
msgbuf, BUFFER_LENGTH
)) != PSM2_OK) {
die("couldn't post psm2_mq_isend", rc);
}
printf("PSM2 MQ send() done.\n");
}
/* Close down the MQ */
if ((rc = psm2_mq_finalize(q)) != PSM2_OK) {
die("couldn't psm2_mq_finalize()", rc);
}
printf("PSM2 MQ finalized.\n");
/* Close our ep, releasing all hardware resources.
* Try to close all connections properly
*/
if ((rc = psm2_ep_close(myep, PSM2_EP_CLOSE_GRACEFUL,
0 /* no timeout */)) != PSM2_OK) {
die("couldn't psm2_ep_close()", rc);
}
printf("PSM2 ep closed.\n");
/* Release all local PSM2 resources */
if ((rc = psm2_finalize()) != PSM2_OK) {
die("couldn't psm2_finalize()", rc);
}
printf("PSM2 shut down, exiting.\n");
return 0;
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
struct thr_arg {
pthread_barrier_t bar;
pthread_t pthread;
int rank;
int ppn;
int nproc;
int server_rank;
};
struct thr_arg thr_arg;
void *progress_fn(void *arg)
{
struct thr_arg *thr_arg = (struct thr_arg *)arg;
int rc;
int i;
rc = syscall(732);
if (rc == -1)
fprintf(stdout, "CT09100 %s running on Linux OK\n",
__func__);
else {
fprintf(stdout, "CT09100 %s running on McKernel NG (%d)\n",
__func__, rc);
}
printf("progress,enter\n");
pthread_barrier_wait(&thr_arg->bar);
pthread_barrier_wait(&thr_arg->bar);
printf("progress,exit\n");
return NULL;
}
int main(int argc, char **argv)
{
int rc;
int actual;
int nproc;
int ppn = -1;
int my_rank = -1, size = -1;
int i, j;
struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
pthread_barrierattr_t barrierattr;
fwq_init();
while ((opt = getopt_long(argc, argv, "+P:", options, NULL)) != -1) {
switch (opt) {
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ppn == -1) {
printf("specify ppn with --ppn");
exit(1);
}
char *rank_str = getenv("PMI_RANK");
if (!rank_str) {
printf("getenv failed\n");
exit(1);
}
my_rank = atoi(rank_str);
printf("my_rank=%d\n", my_rank); fflush(stdout);
nproc = 2;
if (my_rank == 0) {
printf("tid=%d,pid=%d,nproc=%d\n",
syscall(__NR_gettid), getpid(), nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* Spawn a thread */
thr_arg.rank = my_rank;
thr_arg.ppn = ppn;
thr_arg.nproc = nproc;
thr_arg.server_rank = ppn + (my_rank % ppn);
pthread_barrierattr_init(&barrierattr);
pthread_barrier_init(&thr_arg.bar, &barrierattr, nproc);
char *uti_str = getenv("DISABLE_UTI");
int uti_val = uti_str ? atoi(uti_str) : 0;
if (!uti_val) {
rc = syscall(731, 1, NULL);
if (rc) {
fprintf(stdout,
"CT09003 INFO: uti not available (rc=%d)\n",
rc);
} else {
fprintf(stdout, "CT09003 INFO: uti available\n");
}
} else {
fprintf(stdout, "CT09003 INFO: uti disabled\n");
}
rc = pthread_create(&thr_arg.pthread, NULL, progress_fn, &thr_arg);
if (rc) {
fprintf(stdout, "pthread_create: %d\n", rc);
exit(1);
}
pthread_barrier_wait(&thr_arg.bar);
my_psm2_init(thr_arg.rank, thr_arg.server_rank);
my_psm2_connect(thr_arg.rank, thr_arg.server_rank);
for (i = 0; i < thr_arg.nproc; i++) {
if (!on_same_node(thr_arg.ppn, thr_arg.rank, i)) {
if (thr_arg.rank < i) {
my_psm2_sendrecv(thr_arg.rank, thr_arg.rank, i);
} else {
my_psm2_sendrecv(thr_arg.rank, i, thr_arg.rank);
}
}
}
pthread_barrier_wait(&thr_arg.bar);
pthread_join(thr_arg.pthread, NULL);
fn_exit:
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,220 +1,197 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry, int src, int dest, MPI_Request* reqs, MPI_Status* status, double usec) {
int i;
if(rank == 1) {
for(i = 0; i < nentry; i++) {
MPI_Isend(sendv[i], szentry, MPI_CHAR, dest, 0, MPI_COMM_WORLD, &reqs[i]);
if (nentry > 10 && i % (nentry / 10) == 0) {
printf("s"); fflush(stdout);
}
}
MPI_Waitall(nentry, reqs, status);
printf("w\n"); fflush(stdout);
} else {
for(i = 0; i < nentry; i++) {
MPI_Irecv(recvv[i], szentry, MPI_CHAR, src, 0, MPI_COMM_WORLD, &reqs[i]);
if (nentry > 10 && i % (nentry / 10) == 0) {
printf("r"); fflush(stdout);
}
}
usleep(usec);
MPI_Waitall(nentry, reqs, status);
printf("W\n"); fflush(stdout);
}
}
int main(int argc, char **argv) {
int my_rank = -1, size = -1;
int i, j;
char **sendv, **recvv;
MPI_Status* status;
MPI_Request* reqs;
long szentry;
long nentry;
int src, dest;
struct timespec start, end;
double diffusec;
if(argc == 3) {
szentry = atoi(argv[1]);
nentry = atoi(argv[2]);
} else {
szentry = SZENTRY_DEFAULT;
nentry = NENTRY_DEFAULT;
}
printf("szentry=%ld,nentry=%ld\n", szentry, nentry);
status = (MPI_Status*)malloc(sizeof(MPI_Status) * nentry);
reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * nentry);
int actual;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
src = (size + my_rank - 1) % size;
dest = (my_rank + 1) % size;
printf("rank=%d, size=%d, src=%d, dest=%d\n", my_rank, size, src, dest);
sendv = malloc(sizeof(char *) * nentry);
if(!sendv) { printf("malloc failed"); goto fn_fail; }
for (i = 0; i < nentry; i++) {
#if 0
int fd;
fd = open("./file", O_RDWR);
if(fd == -1) { printf("open failed\n"); goto fn_fail; }
sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
#else
sendv[i] = (char*)mmap(0, szentry, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif
if(sendv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; }
dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]);
memset(sendv[i], 0xaa, szentry);
}
recvv = malloc(sizeof(char *) * nentry);
if(!recvv) { printf("malloc failed"); goto fn_fail; }
for (i = 0; i < nentry; i++) {
#if 0
int fd;
fd = open("./file", O_RDWR);
if(fd == -1) { printf("open failed\n"); goto fn_fail; }
recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
#else
recvv[i] = (char*)mmap(0, szentry, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
#endif
if(recvv[i] == MAP_FAILED) { printf("mmap failed"); goto fn_fail; }
dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]);
memset(recvv[i], 0, szentry);
}
printf("after memset\n");
print_cpu_last_executed_on();
#pragma omp parallel for
for (i = 0; i < omp_get_num_threads(); i++) {
printf("thread_num=%d,tid=%d\n", i, syscall(SYS_gettid));
}
for (i = 0; i < 1; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, 0);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
diffusec = DIFFNSEC(end, start) / (double)1000;
printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest, reqs, status, diffusec);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
printf("%4.4f sec\n", DIFFNSEC(end, start) / (double)1000000000); fflush(stdout);
}
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define SZENTRY_DEFAULT (65536) /* Size of one slot */
#define NENTRY_DEFAULT 10000 /* Number of slots */
void sendrecv(int rank, int nentry, char **sendv, char **recvv, int szentry,
int src, int dest, MPI_Request *reqs, MPI_Status *status,
double usec)
{
int i;
if (rank == 1) {
for (i = 0; i < nentry; i++) {
MPI_Isend(sendv[i], szentry, MPI_CHAR, dest, 0,
MPI_COMM_WORLD, &reqs[i]);
if (nentry > 10 && i % (nentry / 10) == 0) {
printf("s");
fflush(stdout);
}
}
MPI_Waitall(nentry, reqs, status);
printf("w\n"); fflush(stdout);
} else {
for (i = 0; i < nentry; i++) {
MPI_Irecv(recvv[i], szentry, MPI_CHAR, src, 0,
MPI_COMM_WORLD, &reqs[i]);
if (nentry > 10 && i % (nentry / 10) == 0) {
printf("r"); fflush(stdout);
}
}
usleep(usec);
MPI_Waitall(nentry, reqs, status);
printf("W\n"); fflush(stdout);
}
}
int main(int argc, char **argv)
{
int my_rank = -1, size = -1;
int i, j;
char **sendv, **recvv;
MPI_Status *status;
MPI_Request *reqs;
long szentry;
long nentry;
int src, dest;
struct timespec start, end;
double diffusec;
if (argc == 3) {
szentry = atoi(argv[1]);
nentry = atoi(argv[2]);
} else {
szentry = SZENTRY_DEFAULT;
nentry = NENTRY_DEFAULT;
}
printf("szentry=%ld,nentry=%ld\n", szentry, nentry);
status = (MPI_Status *)malloc(sizeof(MPI_Status) * nentry);
reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * nentry);
int actual;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
printf("Thread support level is %d\n", actual);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
src = (size + my_rank - 1) % size;
dest = (my_rank + 1) % size;
printf("rank=%d, size=%d, src=%d, dest=%d\n",
my_rank, size, src, dest);
sendv = malloc(sizeof(char *) * nentry);
if (!sendv) {
printf("malloc failed");
goto fn_fail;
}
for (i = 0; i < nentry; i++) {
#if 0
int fd;
fd = open("./file", O_RDWR);
if (fd == -1) {
printf("open failed\n");
goto fn_fail;
}
sendv[i] = (char *)mmap(0, szentry, PROT_READ | PROT_WRITE,
MAP_PRIVATE, fd, 0);
#else
sendv[i] = (char *)mmap(0, szentry, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif
if (sendv[i] == MAP_FAILED) {
printf("mmap failed");
goto fn_fail;
}
dprintf("[%d] sendv[%d]=%p\n", my_rank, i, sendv[i]);
memset(sendv[i], 0xaa, szentry);
}
recvv = malloc(sizeof(char *) * nentry);
if (!recvv) {
printf("malloc failed");
goto fn_fail;
}
for (i = 0; i < nentry; i++) {
#if 0
int fd;
fd = open("./file", O_RDWR);
if (fd == -1) {
printf("open failed\n");
goto fn_fail;
}
recvv[i] = (char *)mmap(0, szentry, PROT_READ|PROT_WRITE,
MAP_PRIVATE, fd, 0);
#else
recvv[i] = (char *)mmap(0, szentry, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
#endif
if (recvv[i] == MAP_FAILED) {
printf("mmap failed");
goto fn_fail;
}
dprintf("[%d] recvv[%d]=%p\n", my_rank, i, recvv[i]);
memset(recvv[i], 0, szentry);
}
printf("after memset\n");
print_cpu_last_executed_on("main");
#pragma omp parallel for
for (i = 0; i < omp_get_num_threads(); i++) {
printf("thread_num=%d,tid=%d\n", i, syscall(SYS_gettid));
}
for (i = 0; i < 1; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest,
reqs, status, 0);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
diffusec = DIFFNSEC(end, start) / (double)1000;
printf("%4.4f sec\n",
DIFFNSEC(end, start) / (double)1000000000);
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &start);
}
sendrecv(my_rank, nentry, sendv, recvv, szentry, src, dest,
reqs, status, diffusec);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0) {
clock_gettime(CLOCK_REALTIME, &end);
printf("%4.4f sec\n",
DIFFNSEC(end, start) / (double)1000000000);
fflush(stdout);
}
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,338 +1,253 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0)
#define FLUSH(win) do { MPI_Win_flush_local_all(win); } while(0)
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
double nspw; /* nsec per work */
unsigned long nsec;
void fwq_init() {
struct timespec start, end;
int i;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
#define N_INIT 10000000
bulk_fsw(N_INIT);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
nsec = DIFFNSEC(end, start);
nspw = nsec / (double)N_INIT;
}
#if 0
void fwq(long delay_nsec) {
if (delay_nsec < 0) {
return;
//printf("%s: delay_nsec < 0\n", __FUNCTION__);
}
bulk_fsw(delay_nsec / nspw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_nsec) {
struct timespec start, end;
if (delay_nsec < 0) { return; }
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
while (1) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on FOP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
static inline int on_same_node(int ppn, int me, int you) {
return (me / ppn == you / ppn);
}
/* get_acc-calc-flush_local */
void rma(int nproc, int ppn, int rank, double *wbuf, double *rbuf, double *result, int ndoubles, MPI_Win win, long calc_nsec) {
int i, j;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
for (j = 0; j < ndoubles; j++) {
//printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n", i, j, rbuf[i * ndoubles + j], wbuf[i * ndoubles + j]);
MPI_Get_accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE,
result + i * ndoubles + j, 1, MPI_DOUBLE,
i, i * ndoubles + j, 1, MPI_DOUBLE,
MPI_SUM, win);
}
}
}
fwq(calc_nsec);
FLUSH(win);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv) {
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *wbuf, *rbuf, *result;
MPI_Win win;
struct timespec start, end;
long t_flush_l, t_pure_l, t_overall_l;
long t_flush, t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!wbuf) { printf("malloc failed"); goto fn_fail; }
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!rbuf) { printf("malloc failed"); goto fn_fail; }
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
/* fetch-to buffer */
result = malloc(sizeof(double) * ndoubles * nproc);
if(!result) { printf("malloc failed"); goto fn_fail; }
memset(result, 0, sizeof(double) * ndoubles * nproc);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
//print_cpu_last_executed_on();
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1);
rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1);
result[i * ndoubles + j] = (i + 1) * 100000 + (j + 1);
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
printf("result,proc=%d,j=%d,val=%f\n", i, j, result[i * ndoubles + j]);
}
}
#endif
/* Measure flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NFENCE 10
BEGIN_EPOCH(win);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NFENCE; i++) {
FLUSH(win);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
END_EPOCH(win);
t_flush_l = DIFFNSEC(end, start) / NFENCE;
//printf("t_flush (local): %ld usec\n", t_flush_l / 1000UL);
MPI_Allreduce(&t_flush_l, &t_flush, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_flush (max): %ld usec\n", t_flush / 1000UL);
/* Measure get_acc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NPURE; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, result, ndoubles, win, 0);
END_EPOCH(win);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
printf("result,proc=%d,j=%d,val=%f\n", i, j, result[i * ndoubles + j]);
}
}
#endif
/* Measure get_acc-calc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NOVERALL; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, result, ndoubles, win, t_pure - t_flush);
END_EPOCH(win);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while (0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while (0)
#define FLUSH(win) do { MPI_Win_flush_local_all(win); } while (0)
static inline int on_same_node(int ppn, int me, int you)
{
return (me / ppn == you / ppn);
}
/* get_acc-calc-flush_local */
void rma(int nproc, int ppn, int rank, double *wbuf, double *rbuf,
double *result, int ndoubles, MPI_Win win, long calc_nsec)
{
int i, j;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
for (j = 0; j < ndoubles; j++) {
#if 0
printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n",
i, j, rbuf[i * ndoubles + j],
wbuf[i * ndoubles + j]);
#endif
MPI_Get_accumulate(rbuf + i * ndoubles + j, 1,
MPI_DOUBLE,
result + i * ndoubles + j,
1, MPI_DOUBLE,
i, i * ndoubles + j, 1,
MPI_DOUBLE,
MPI_SUM, win);
}
}
}
fwq(calc_nsec);
FLUSH(win);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv)
{
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *wbuf, *rbuf, *result;
MPI_Win win;
struct timespec start, end;
long t_flush_l, t_pure_l, t_overall_l;
long t_flush, t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available "
"(level was set to %d)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!wbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!rbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
/* fetch-to buffer */
result = malloc(sizeof(double) * ndoubles * nproc);
if (!result) {
printf("malloc failed");
goto fn_fail;
}
memset(result, 0, sizeof(double) * ndoubles * nproc);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc,
sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD,
&win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1);
rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1);
result[i * ndoubles + j] = (i + 1) * 100000 + (j + 1);
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
printf("result,proc=%d,j=%d,val=%f\n",
i, j, result[i * ndoubles + j]);
}
}
#endif
/* Measure flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NFENCE 10
BEGIN_EPOCH(win);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NFENCE; i++) {
FLUSH(win);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
END_EPOCH(win);
t_flush_l = DIFFNSEC(end, start) / NFENCE;
//printf("t_flush (local): %ld usec\n", t_flush_l / 1000UL);
MPI_Allreduce(&t_flush_l, &t_flush, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_flush (max): %ld usec\n", t_flush / 1000UL);
/* Measure get_acc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NPURE; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, result, ndoubles, win, 0);
END_EPOCH(win);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
printf("result,proc=%d,j=%d,val=%f\n",
i, j, result[i * ndoubles + j]);
}
}
#endif
/* Measure get_acc-calc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NOVERALL; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, result, ndoubles, win,
t_pure - t_flush);
END_EPOCH(win);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,335 +1,245 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0)
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
double nspw; /* nsec per work */
unsigned long nsec;
void fwq_init() {
struct timespec start, end;
int i;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
#define N_INIT 10000000
bulk_fsw(N_INIT);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
nsec = DIFFNSEC(end, start);
nspw = nsec / (double)N_INIT;
}
#if 0
void fwq(long delay_nsec) {
if (delay_nsec < 0) {
return;
//printf("%s: delay_nsec < 0\n", __FUNCTION__);
}
bulk_fsw(delay_nsec / nspw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_nsec) {
struct timespec start, end;
if (delay_nsec < 0) { return; }
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
while (1) {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on OFP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
static inline int on_same_node(int ppn, int me, int you) {
return (me / ppn == you / ppn);
}
/* get_acc-calc-flush_local */
void rma(int nproc, int ppn, int rank, double *wbuf, double *rbuf, int ndoubles, MPI_Win win, long calc_nsec, int flush_only) {
int i, j;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
for (j = 0; j < ndoubles; j++) {
//printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n", i, j, rbuf[i * ndoubles + j], wbuf[i * ndoubles + j]);
if (!flush_only) {
MPI_Accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE,
i, i * ndoubles + j, 1, MPI_DOUBLE,
MPI_SUM, win);
}
MPI_Win_flush_local(i, win);
}
}
}
fwq(calc_nsec);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv) {
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *wbuf, *rbuf;
MPI_Win win;
struct timespec start, end;
long t_flush_l, t_pure_l, t_overall_l;
long t_flush, t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!wbuf) { printf("malloc failed"); goto fn_fail; }
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!rbuf) { printf("malloc failed"); goto fn_fail; }
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
//print_cpu_last_executed_on();
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1);
rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1);
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NFENCE 10
BEGIN_EPOCH(win);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NFENCE; i++) {
rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0, 1);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
END_EPOCH(win);
t_flush_l = DIFFNSEC(end, start) / NFENCE;
//printf("t_flush (local): %ld usec\n", t_flush_l / 1000UL);
MPI_Allreduce(&t_flush_l, &t_flush, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_flush (max): %ld usec\n", t_flush / 1000UL);
/* Measure get_acc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
//MPI_Pcontrol(1, "rma");
for (i = 0; i < NPURE; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0, 0);
END_EPOCH(win);
}
//MPI_Pcontrol(-1, "rma");
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure get_acc-calc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
//MPI_Pcontrol(1, "rma-calc");
for (i = 0; i < NOVERALL; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, t_pure - t_flush, 0);
END_EPOCH(win);
}
//MPI_Pcontrol(-1, "rma-calc");
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_overall (max): %ld usec\n", t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while (0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while (0)
static inline int on_same_node(int ppn, int me, int you)
{
return (me / ppn == you / ppn);
}
/* get_acc-calc-flush_local */
void rma(int nproc, int ppn, int rank, double *wbuf, double *rbuf,
int ndoubles, MPI_Win win, long calc_nsec, int flush_only)
{
int i, j;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (!on_same_node(ppn, rank, i)) {
for (j = 0; j < ndoubles; j++) {
#if 0
printf("i=%d,j=%d,rbuf=%f,wbuf=%f\n",
i, j, rbuf[i * ndoubles + j],
wbuf[i * ndoubles + j]);
#endif
if (!flush_only) {
MPI_Accumulate(rbuf + i * ndoubles + j,
1, MPI_DOUBLE,
i, i * ndoubles + j, 1,
MPI_DOUBLE,
MPI_SUM, win);
}
MPI_Win_flush_local(i, win);
}
}
}
fwq(calc_nsec);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv)
{
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
int my_rank = -1, size = -1;
int i, j;
double *wbuf, *rbuf;
MPI_Win win;
struct timespec start, end;
long t_flush_l, t_pure_l, t_overall_l;
long t_flush, t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = (1ULL << atoi(optarg));
break;
case 'P':
ppn = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available "
"(level was set to %d)\n",
actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d\n", ndoubles, nproc);
printf("nsec=%ld, nspw=%f\n", nsec, nspw);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!wbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!rbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc,
sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD,
&win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1);
rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1);
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NFENCE 10
BEGIN_EPOCH(win);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
for (i = 0; i < NFENCE; i++) {
rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0, 1);
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
END_EPOCH(win);
t_flush_l = DIFFNSEC(end, start) / NFENCE;
//printf("t_flush (local): %ld usec\n", t_flush_l / 1000UL);
MPI_Allreduce(&t_flush_l, &t_flush, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_flush (max): %ld usec\n", t_flush / 1000UL);
/* Measure get_acc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
//MPI_Pcontrol(1, "rma");
for (i = 0; i < NPURE; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win, 0, 0);
END_EPOCH(win);
}
//MPI_Pcontrol(-1, "rma");
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_pure (max): %ld usec\n", t_pure / 1000UL);
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
}
}
#endif
/* Measure get_acc-calc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 10
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
//MPI_Pcontrol(1, "rma-calc");
for (i = 0; i < NOVERALL; i++) {
BEGIN_EPOCH(win);
rma(nproc, ppn, my_rank, wbuf, rbuf, ndoubles, win,
t_pure - t_flush, 0);
END_EPOCH(win);
}
//MPI_Pcontrol(-1, "rma-calc");
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
t_overall_l = DIFFNSEC(end, start) / NOVERALL;
//printf("t_overall (local): %ld usec\n", t_overall_l / 1000UL);
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_overall (max): %ld usec\n",
t_overall / 1000UL);
if (my_rank == 0) {
long t_abs = (t_pure * 2) - t_overall;
printf("overlap: %.2f %%\n", (t_abs * 100) / (double)t_pure);
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,242 +1,278 @@
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <getopt.h>
#include <sched.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "async_progress.h"
#include "util.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
static struct option options[] = {
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv) {
int rc;
int actual;
int nproc;
int nsamples = -1;
int my_rank = -1, size = -1;
int i, j, k, l, m;
double *wbuf, *rbuf, *result;
MPI_Win win;
long start, end;
long t_pure_l, t_pure, t_pure0 = 0;
int opt;
int szbuf = 8;
struct rusage ru_start, ru_end;
struct timeval tv_start, tv_end;
fwq_init();
while ((opt = getopt_long(argc, argv, "+n:", options, NULL)) != -1) {
switch (opt) {
case 'n':
nsamples = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (nsamples == -1) {
printf("specify nsamples with -n");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("nsamples=%d,nproc=%d\n", nsamples, nproc);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * szbuf);
if(!wbuf) { printf("malloc failed"); goto fn_fail; }
memset(wbuf, 0, sizeof(double) * szbuf);
/* read-from buffer */
rbuf = malloc(sizeof(double) * szbuf);
if(!rbuf) { printf("malloc failed"); goto fn_fail; }
memset(rbuf, 0, sizeof(double) * szbuf);
/* fetch-to buffer */
result = malloc(sizeof(double) * szbuf);
if(!result) { printf("malloc failed"); goto fn_fail; }
memset(result, 0, sizeof(double) * szbuf);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * szbuf, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
for (j = 0; j < szbuf; j++) {
wbuf[j] = j + 1;
rbuf[j] = 10000 + j + 1;
result[j] = 100000 + j + 1;
}
#if 0
for (j = 0; j < szbuf; j++) {
printf("wbuf,j=%d,val=%f\n", j, wbuf[j]);
printf("rbuf,j=%d,val=%f\n", j, rbuf[j]);
printf("result,j=%d,val=%f\n", j, result[j]);
}
}
#endif
for (k = 0; k < 2; k++) {
if (k == 1) {
print_cpu_last_executed_on("main");
INIT_ASYNC_THREAD_();
if ((rc = getrusage(RUSAGE_THREAD, &ru_start))) {
printf("%s: ERROR: getrusage failed (%d)\n", __FUNCTION__, rc);
}
if ((rc = gettimeofday(&tv_start, NULL))) {
printf("%s: ERROR: gettimeofday failed (%d)\n", __FUNCTION__, rc);
}
syscall(701, 1 | 2 | 0x80000000);
}
for (m = 0; m < 3; m++) {
for (l = 0; l <= 10; l++) {
long calc_cyc = /*(k == 1 && l == 0) ? (double)t_pure0 * 0.1 :*/ t_pure0 / 10 * l;
MPI_Barrier(MPI_COMM_WORLD);
MPI_Win_lock_all(0, win);
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
start = rdtsc_light();
for (j = 0; j < nsamples; j++) {
for (i = 0; i < nproc; i++) {
int target = j % nproc;
if (target == my_rank) {
continue;
}
#if 0
MPI_Get_accumulate(rbuf + j % szbuf, 1, MPI_DOUBLE,
result + j % szbuf, 1, MPI_DOUBLE,
i,
j % szbuf, 1, MPI_DOUBLE,
MPI_SUM, win);
#endif
#if 1
MPI_Get_accumulate(rbuf, szbuf, MPI_DOUBLE,
result, szbuf, MPI_DOUBLE,
i,
0, szbuf, MPI_DOUBLE,
MPI_SUM, win);
#endif
#if 0
MPI_Accumulate(rbuf, szbuf, MPI_DOUBLE,
i,
0, szbuf, MPI_DOUBLE,
MPI_SUM, win);
#endif
#if 0
MPI_Get(rbuf + j % szbuf, 1, MPI_DOUBLE,
i,
j % szbuf, 1, MPI_DOUBLE,
win);
#endif
}
}
fwq(calc_cyc * nsamples);
MPI_Win_flush_local_all(win);
end = rdtsc_light();
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
MPI_Win_unlock_all(win);
MPI_Barrier(MPI_COMM_WORLD);
t_pure_l = (end - start) / nsamples;
//t_pure_l = DIFFNSEC(end, start) / nsamples;
if (1||m == 2) {
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) {
if (l == 0) {
printf("async: %d, trial: %d\n", k, m);
}
if (k == 0) {
printf("%ld\t%ld\n", calc_cyc, t_pure);
} else {
printf("%ld\n", t_pure);
}
}
}
if (k == 0 && l == 0) {
t_pure0 = t_pure;
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < sbuf; j++) {
printf("wbuf,j=%d,val=%f\n", j, wbuf[j]);
printf("rbuf,j=%d,val=%f\n", j, rbuf[j]);
printf("result,j=%d,val=%f\n", j, result[j]);
}
}
#endif
}
}
if (k == 1) {
FINALIZE_ASYNC_THREAD_();
#if 0
if ((rc = getrusage(RUSAGE_THREAD, &ru_end))) {
printf("%s: ERROR: getrusage failed (%d)\n", __FUNCTION__, rc);
}
if ((rc = gettimeofday(&tv_end, NULL))) {
printf("%s: ERROR: gettimeofday failed (%d)\n", __FUNCTION__, rc);
}
printf("%s: wall: %ld, user: %ld, sys: %ld\n", __FUNCTION__,
DIFFUSEC(tv_end, tv_start),
DIFFUSEC(ru_end.ru_utime, ru_start.ru_utime),
DIFFUSEC(ru_end.ru_stime, ru_start.ru_stime));
syscall(701, 4 | 8 | 0x80000000);
#endif
}
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <getopt.h>
#include <sched.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "async_progress.h"
#include "util.h"
#include "fwq.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
static struct option options[] = {
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv)
{
int rc;
int actual;
int nproc;
int nsamples = -1;
int my_rank = -1, size = -1;
int i, j, k, l, m;
double *wbuf, *rbuf, *result;
MPI_Win win;
long start, end;
long t_pure_l, t_pure, t_pure0 = 0;
int opt;
int szbuf = 8;
struct rusage ru_start, ru_end;
struct timeval tv_start, tv_end;
fwq_init();
while ((opt = getopt_long(argc, argv, "+n:", options, NULL)) != -1) {
switch (opt) {
case 'n':
nsamples = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (nsamples == -1) {
printf("specify nsamples with -n");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available "
"(level was set to %d)\n",
actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("nsamples=%d,nproc=%d\n", nsamples, nproc);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * szbuf);
if (!wbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(wbuf, 0, sizeof(double) * szbuf);
/* read-from buffer */
rbuf = malloc(sizeof(double) * szbuf);
if (!rbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(rbuf, 0, sizeof(double) * szbuf);
/* fetch-to buffer */
result = malloc(sizeof(double) * szbuf);
if (!result) {
printf("malloc failed");
goto fn_fail;
}
memset(result, 0, sizeof(double) * szbuf);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * szbuf, sizeof(double),
MPI_INFO_NULL, MPI_COMM_WORLD, &win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
for (j = 0; j < szbuf; j++) {
wbuf[j] = j + 1;
rbuf[j] = 10000 + j + 1;
result[j] = 100000 + j + 1;
}
#if 0
for (j = 0; j < szbuf; j++) {
printf("wbuf,j=%d,val=%f\n", j, wbuf[j]);
printf("rbuf,j=%d,val=%f\n", j, rbuf[j]);
printf("result,j=%d,val=%f\n", j, result[j]);
}
#endif
for (k = 0; k < 2; k++) {
if (k == 1) {
print_cpu_last_executed_on("main");
INIT_ASYNC_THREAD_();
if ((rc = getrusage(RUSAGE_THREAD, &ru_start))) {
printf("%s: ERROR: getrusage failed (%d)\n",
__func__, rc);
}
if ((rc = gettimeofday(&tv_start, NULL))) {
printf("%s: ERROR: gettimeofday failed (%d)\n",
__func__, rc);
}
syscall(701, 1 | 2 | 0x80000000);
}
for (m = 0; m < 3; m++) {
for (l = 0; l <= 10; l++) {
#if 0
long calc_cyc = (k == 1 && l == 0) ?
(double)t_pure0 * 0.1 :
t_pure0 / 10 * l;
#else
long calc_cyc = t_pure0 / 10 * l;
#endif
MPI_Barrier(MPI_COMM_WORLD);
MPI_Win_lock_all(0, win);
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
start = rdtsc_light();
for (j = 0; j < nsamples; j++) {
for (i = 0; i < nproc; i++) {
int target = j % nproc;
if (target == my_rank) {
continue;
}
#if 0
MPI_Get_accumulate(rbuf + j % szbuf, 1,
MPI_DOUBLE,
result + j % szbuf,
1, MPI_DOUBLE,
i,
j % szbuf, 1,
MPI_DOUBLE,
MPI_SUM, win);
#endif
#if 1
MPI_Get_accumulate(rbuf, szbuf,
MPI_DOUBLE,
result, szbuf,
MPI_DOUBLE,
i,
0, szbuf,
MPI_DOUBLE,
MPI_SUM, win);
#endif
#if 0
MPI_Accumulate(rbuf, szbuf, MPI_DOUBLE,
i,
0, szbuf, MPI_DOUBLE,
MPI_SUM, win);
#endif
#if 0
MPI_Get(rbuf + j % szbuf, 1,
MPI_DOUBLE, i,
j % szbuf, 1, MPI_DOUBLE,
win);
#endif
}
}
fwq(calc_cyc * nsamples);
MPI_Win_flush_local_all(win);
end = rdtsc_light();
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
MPI_Win_unlock_all(win);
MPI_Barrier(MPI_COMM_WORLD);
t_pure_l = (end - start) / nsamples;
//t_pure_l = DIFFNSEC(end, start) / nsamples;
if (1 || m == 2) {
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG,
MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) {
if (l == 0) {
printf("async: %d, trial: %d\n",
k, m);
}
if (k == 0) {
printf("%ld\t%ld\n",
calc_cyc, t_pure);
} else {
printf("%ld\n", t_pure);
}
}
}
if (k == 0 && l == 0) {
t_pure0 = t_pure;
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < sbuf; j++) {
printf("wbuf,j=%d,val=%f\n",
j, wbuf[j]);
printf("rbuf,j=%d,val=%f\n",
j, rbuf[j]);
printf("result,j=%d,val=%f\n",
j, result[j]);
}
}
#endif
}
}
if (k == 1) {
FINALIZE_ASYNC_THREAD_();
#if 0
if ((rc = getrusage(RUSAGE_THREAD, &ru_end))) {
printf("%s: ERROR: getrusage failed (%d)\n",
__func__, rc);
}
if ((rc = gettimeofday(&tv_end, NULL))) {
printf("%s: ERROR: gettimeofday failed (%d)\n",
__func__, rc);
}
printf("%s: wall: %ld, user: %ld, sys: %ld\n",
__func__,
DIFFUSEC(tv_end, tv_start),
DIFFUSEC(ru_end.ru_utime, ru_start.ru_utime),
DIFFUSEC(ru_end.ru_stime, ru_start.ru_stime));
syscall(701, 4 | 8 | 0x80000000);
#endif
}
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,346 +1,241 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "async_progress.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while(0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while(0)
static inline uint64_t rdtsc_light(void )
{
uint64_t x;
__asm__ __volatile__("rdtscp;" /* rdtscp don't jump over earlier instructions */
"shl $32, %%rdx;"
"or %%rdx, %%rax" :
"=a"(x) :
:
"%rcx", "%rdx", "memory");
return x;
}
static inline void fixed_size_work() {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n) {
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
long cyc, cycpw; /* cycles per work */
void fwq_init() {
long start, end;
int i;
start = rdtsc_light();
#define N_INIT 10000000
bulk_fsw(N_INIT);
end = rdtsc_light();
cyc = end - start;
cycpw = cyc / (double)N_INIT;
}
#if 0
void fwq(long delay_cyc) {
if (delay_cyc < 0) {
return;
//printf("%s: delay_cyc < 0\n", __FUNCTION__);
}
bulk_fsw(delay_cyc / cycpw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void fwq(long delay_cyc) {
long start, end;
if (delay_cyc < 0) { return; }
start = rdtsc_light();
while (1) {
end = rdtsc_light();
if (end - start >= delay_cyc) {
break;
}
bulk_fsw(2); /* ~150 ns per iteration on FOP */
}
}
#endif
static int print_cpu_last_executed_on() {
char fn[256];
char* result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
goto eof;
}
offset += amount;
}
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
int i;
for(i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
printf("compute thread,pmi_rank=%02d,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", atoi(getenv("PMI_RANK")), atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);
return mpi_errno;
fn_fail:
mpi_errno = -1;
goto fn_exit;
}
/* ga_acc per rank:ga_sync=40:1 */
void rma(int nproc, int my_rank, double *wbuf, double *rbuf, int ndoubles, MPI_Win win, long calc_nsec) {
int i, j;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (i != my_rank) {
for (j = 0; j < ndoubles; j++) {
MPI_Accumulate(rbuf + i * ndoubles + j, 1, MPI_DOUBLE,
i, i * ndoubles + j, 1, MPI_DOUBLE,
MPI_SUM, win);
MPI_Win_flush_local(i, win); /* ga_acc() calls flush_local() immediately */
}
}
}
fwq(calc_nsec);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv) {
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
double add_rate = 1.0;
int my_rank = -1, size = -1;
int i, j, k, l;
double *wbuf, *rbuf, *result;
MPI_Win win;
long start, end;
//struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
fwq_init();
while ((opt = getopt_long(argc, argv, "+d:P:R:", options, NULL)) != -1) {
switch (opt) {
case 'd':
ndoubles = atoi(optarg);
break;
case 'P':
ppn = atoi(optarg);
break;
case 'R':
add_rate = atof(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available (level was set to %d)\n", actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d,add_rate=%f\n", ndoubles, nproc, add_rate);
printf("cyc=%ld, cycpw=%ld\n", cyc, cycpw);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!wbuf) { printf("malloc failed"); goto fn_fail; }
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if(!rbuf) { printf("malloc failed"); goto fn_fail; }
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
//print_cpu_last_executed_on();
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1);
rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1);
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
}
}
#endif
for (k = 0; k < 2; k++) {
if (k == 1) {
INIT_ASYNC_THREAD_();
}
/* Measure get_acc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 10
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
start = rdtsc_light();
MPI_Pcontrol(1, "rma");
syscall(701, 1);
syscall(701, 2);
for (i = 0; i < NPURE; i++) {
BEGIN_EPOCH(win);
rma(nproc, my_rank, wbuf, rbuf, ndoubles, win, 0);
END_EPOCH(win);
}
MPI_Pcontrol(-1, "rma");
syscall(701, 4);
syscall(701, 8);
end = rdtsc_light();
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
MPI_Barrier(MPI_COMM_WORLD);
t_pure_l = (end - start) / NPURE;
//t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_pure (max): %ld cycles\n", t_pure);
#if 1
for (l = 1; l <= 10; l++) {
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 10
start = rdtsc_light();
for (i = 0; i < NOVERALL; i++) {
BEGIN_EPOCH(win);
rma(nproc, my_rank, wbuf, rbuf, ndoubles, win, 100UL * 1000000 * l);
END_EPOCH(win);
}
end = rdtsc_light();
MPI_Barrier(MPI_COMM_WORLD);
t_overall_l = (end - start) / NOVERALL;
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0) printf("t_overall (max): %ld cycle\n", t_overall);
}
#endif
if (k == 1) {
FINALIZE_ASYNC_THREAD_();
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n", i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n", i, j, rbuf[i * ndoubles + j]);
printf("result,proc=%d,j=%d,val=%f\n", i, j, result[i * ndoubles + j]);
}
}
#endif
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sched.h>
#include "async_progress.h"
#include "util.h"
#include "delay.h"
//#define DEBUG
#ifdef DEBUG
#define dprintf printf
#else
#define dprintf {}
#endif
#define BEGIN_EPOCH(win) do { MPI_Win_lock_all(0, win); } while (0)
#define END_EPOCH(win) do { MPI_Win_unlock_all(win); } while (0)
/* ga_acc per rank:ga_sync=40:1 */
void rma(int nproc, int my_rank, double *wbuf, double *rbuf, int ndoubles,
MPI_Win win, long calc_nsec)
{
int i, j;
int r = 0, s = 0;
int req = 0;
for (i = 0; i < nproc; i++) {
if (i != my_rank) {
for (j = 0; j < ndoubles; j++) {
MPI_Accumulate(rbuf + i * ndoubles + j, 1,
MPI_DOUBLE, i, i * ndoubles + j,
1, MPI_DOUBLE, MPI_SUM, win);
/* ga_acc() calls flush_local() immediately */
MPI_Win_flush_local(i, win);
}
}
}
cdelay(calc_nsec);
}
static struct option options[] = {
{
.name = "ppn",
.has_arg = required_argument,
.flag = NULL,
.val = 'P',
},
/* end */
{ NULL, 0, NULL, 0, },
};
int main(int argc, char **argv)
{
int rc;
int actual;
int ppn = -1;
int nproc;
int ndoubles = -1;
double add_rate = 1.0;
int my_rank = -1, size = -1;
int i, j, k, l;
double *wbuf, *rbuf, *result;
MPI_Win win;
long start, end;
//struct timespec start, end;
long t_pure_l, t_overall_l;
long t_pure, t_overall;
int opt;
cdelay_init();
while ((opt = getopt_long(argc, argv, "+d:P:R:", options, NULL))
!= -1) {
switch (opt) {
case 'd':
ndoubles = atoi(optarg);
break;
case 'P':
ppn = atoi(optarg);
break;
case 'R':
add_rate = atof(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
exit(1);
}
}
if (ndoubles == -1 || ppn == -1) {
printf("specify ndoubles with -d and ppn with --ppn");
exit(1);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != 3) {
printf("ERROR: MPI_THREAD_MULTIPLE not available "
"(level was set to %d)\n",
actual);
exit(1);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (my_rank == 0) {
printf("ndoubles=%d,nproc=%d,add_rate=%f\n",
ndoubles, nproc, add_rate);
printf("cyc=%ld, cycpw=%ld\n", cyc, cycpw);
}
/* accumulate-to buffer */
wbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!wbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(wbuf, 0, sizeof(double) * ndoubles * nproc);
/* read-from buffer */
rbuf = malloc(sizeof(double) * ndoubles * nproc);
if (!rbuf) {
printf("malloc failed");
goto fn_fail;
}
memset(rbuf, 0, sizeof(double) * ndoubles * nproc);
/* Expose accumulate-to buffer*/
if (rc = MPI_Win_create(wbuf, sizeof(double) * ndoubles * nproc,
sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD,
&win)) {
printf("MPI_Win_create failed,rc=%d\n", rc);
}
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
wbuf[i * ndoubles + j] = (i + 1) * 1000 + (j + 1);
rbuf[i * ndoubles + j] = (i + 1) * 10000 + (j + 1);
}
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
}
}
#endif
for (k = 0; k < 2; k++) {
if (k == 1) {
INIT_ASYNC_THREAD_();
}
/* Measure get_acc-flush time */
MPI_Barrier(MPI_COMM_WORLD);
#define NPURE 10
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
start = rdtsc_light();
MPI_Pcontrol(1, "rma");
syscall(701, 1);
syscall(701, 2);
for (i = 0; i < NPURE; i++) {
BEGIN_EPOCH(win);
rma(nproc, my_rank, wbuf, rbuf, ndoubles, win, 0);
END_EPOCH(win);
}
MPI_Pcontrol(-1, "rma");
syscall(701, 4);
syscall(701, 8);
end = rdtsc_light();
//clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
MPI_Barrier(MPI_COMM_WORLD);
t_pure_l = (end - start) / NPURE;
//t_pure_l = DIFFNSEC(end, start) / NPURE;
//printf("t_pure (local): %ld usec\n", t_pure_l / 1000UL);
MPI_Allreduce(&t_pure_l, &t_pure, 1, MPI_LONG, MPI_MAX,
MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_pure (max): %ld cycles\n", t_pure);
#if 1
for (l = 1; l <= 10; l++) {
MPI_Barrier(MPI_COMM_WORLD);
#define NOVERALL 10
start = rdtsc_light();
for (i = 0; i < NOVERALL; i++) {
BEGIN_EPOCH(win);
rma(nproc, my_rank, wbuf, rbuf, ndoubles, win,
100UL * 1000000 * l);
END_EPOCH(win);
}
end = rdtsc_light();
MPI_Barrier(MPI_COMM_WORLD);
t_overall_l = (end - start) / NOVERALL;
MPI_Allreduce(&t_overall_l, &t_overall, 1, MPI_LONG,
MPI_MAX, MPI_COMM_WORLD);
if (my_rank == 0)
printf("t_overall (max): %ld cycle\n",
t_overall);
}
#endif
if (k == 1) {
FINALIZE_ASYNC_THREAD_();
}
#if 0
for (i = 0; i < nproc; i++) {
for (j = 0; j < ndoubles; j++) {
printf("wbuf,proc=%d,j=%d,val=%f\n",
i, j, wbuf[i * ndoubles + j]);
printf("rbuf,proc=%d,j=%d,val=%f\n",
i, j, rbuf[i * ndoubles + j]);
printf("result,proc=%d,j=%d,val=%f\n",
i, j, result[i * ndoubles + j]);
}
}
#endif
}
fn_exit:
MPI_Finalize();
return 0;
fn_fail:
goto fn_exit;
}

View File

@ -1,349 +1,419 @@
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <getopt.h>
#include <sched.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "async_progress.h"
#include "util.h"
#define MYTIME_UNIT "usec"
#define MYTIME_TOUSEC 1000000
#define MYTIME_TONSEC 1000000000
#define NROW 16 /* 0%, 10%, ..., 140% */
#define NCOL 4
#define NSAMPLES_DROP 5/*10*/
#define NSAMPLES_COMM 10/*20*/
#define NSAMPLES_TOTAL 10/*20*/
#define NSAMPLES_INNER 5
#define PROGRESS_CALC_PHASE_ONLY
static inline double mytime() {
return /*rdtsc_light()*/MPI_Wtime();
}
static int ppn = -1;
void init_buf(double *origin_buf, double *result, double *target_buf, int szbuf, int rank, int id) {
int j;
for (j = 0; j < szbuf; j++) {
origin_buf[j] = (rank + 1) * 100.0 + (j + 1);
result[j] = (id + 1) * 100000000.0 + (rank + 1) * 10000.0 + (j + 1);
target_buf[j] = (rank + 1) * 1000000.0 + (j + 1);
}
}
void pr_buf(double *origin_buf, double *result, double *target_buf, int szbuf, int rank, int nproc) {
int i, j;
for (i = 0; i < nproc; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if (i != rank) {
usleep(100000);
continue;
}
for (j = 0; j < szbuf; j++) {
pr_debug("[%d] origin_buf,j=%d,val=%f\n", rank, j, origin_buf[j]);
pr_debug("[%d] result,j=%d,val=%f\n", rank, j, result[j]);
pr_debug("[%d] target_buf,j=%d,val=%f\n", rank, j, target_buf[j]);
}
}
}
void rma(int rank, int nproc, MPI_Win win, double *origin_buf, double *result, int szbuf, long nsec_calc, int async_progress, int sync_progress, double pct_calc) {
int i, j, target_rank;
int completed, ret;
for (j = 0; j < NSAMPLES_INNER; j++) {
for (i = 1; i < nproc; i++) {
target_rank = (rank + i) % nproc;
MPI_Get_accumulate(origin_buf, szbuf, MPI_DOUBLE,
result, szbuf, MPI_DOUBLE,
target_rank,
0, szbuf, MPI_DOUBLE,
MPI_NO_OP, win);
#if 0
if (sync_progress) {
if ((ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret);
}
}
#endif
}
}
if (async_progress) {
#ifdef PROGRESS_CALC_PHASE_ONLY
progress_start();
#endif
}
ndelay(nsec_calc);
if (async_progress) {
#ifdef PROGRESS_CALC_PHASE_ONLY
progress_stop();
#endif
}
#define MAX2(x,y) ((x) > (y) ? (x) : (y))
#if 1
/* iprobe is 10 times faster than win_flush_local_all,
20679 usec / (8*63*5) messages for 8-ppn 8-node case */
if (1/*!sync_progress*/)
for (j = 0; j < (async_progress ? MAX2(NSAMPLES_INNER * (nproc - 1) * (1.0 - pct_calc), nproc - 1) : NSAMPLES_INNER * (nproc - 1)); j++) {
//for (j = 0; j < MAX2(NSAMPLES_INNER * (nproc - 1) * (1.0 - pct_calc), nproc - 1); j++) {
if ((ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &completed, MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
pr_err("%s: error: MPI_Iprobe: %d\n", __func__, ret);
}
}
#endif
MPI_Win_flush_local_all(win);
}
double measure(int rank, int nproc, MPI_Win win, double *origin_buf, double* result, double *target_buf, int szbuf, long nsec_calc, int async_progress, int sync_progress, int nsamples, int nsamples_drop, double pct_calc) {
int i;
double t_l, t_g, t_sum = 0;
double start, end;
for (i = 0; i < nsamples + nsamples_drop; i++) {
MPI_Barrier(MPI_COMM_WORLD);
MPI_Win_lock_all(0, win);
/* Set parameter based on current IPC and frequency */
ndelay_init(0);
start = mytime();
rma(rank, nproc, win, origin_buf, result, szbuf, nsec_calc, async_progress, sync_progress, pct_calc);
end = mytime();
MPI_Win_unlock_all(win);
MPI_Barrier(MPI_COMM_WORLD);
t_l = end - start;
MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
if (i < nsamples_drop) {
continue;
}
t_sum += t_g;
}
return t_sum / nsamples;
}
int main(int argc, char **argv)
{
int ret;
int actual;
int rank = -1;
int nproc;
int i, j, progress, l, m;
double *target_buf, *origin_buf, *result;
MPI_Win win;
double t_comm_l, t_comm_g, t_comm_sum, t_comm_ave;
double t_total_l, t_total_g, t_total_sum, t_total_ave;
double t_table[NROW][NCOL];
int opt;
int szbuf = 1; /* Number of doubles to send */
struct rusage ru_start, ru_end;
struct timeval tv_start, tv_end;
int disable_syscall_intercept = 0;
cpu_set_t cpuset;
//test_set_loglevel(TEST_LOGLEVEL_WARN);
ndelay_init(1);
while ((opt = getopt(argc, argv, "+p:I:")) != -1) {
switch (opt) {
case 'p':
ppn = atoi(optarg);
break;
case 'I':
disable_syscall_intercept = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
ret = -1;
goto out;
}
}
if (ppn == -1) {
pr_err("Error: Specify processes-per-rank with -p");
ret = -1;
goto out;
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != MPI_THREAD_MULTIPLE) {
pr_err("Error: MPI_THREAD_MULTIPLE is not available\n");
ret = -1;
goto out;
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (rank == 0) {
printf("ndoubles=%d,nproc=%d\n", szbuf, nproc);
#pragma omp parallel
{
//printf("%d cpu\n", sched_getcpu());
if (omp_get_thread_num() == 0) {
printf("#threads=%d\n", omp_get_num_threads());
}
}
}
/* accumulate-to buffer */
target_buf = malloc(sizeof(double) * szbuf);
if (!target_buf) {
pr_err("Error: allocating target_buf");
ret = -1;
goto out;
}
memset(target_buf, 0, sizeof(double) * szbuf);
/* read-from buffer */
origin_buf = malloc(sizeof(double) * szbuf);
if (!origin_buf) {
pr_err("Error: alloacting origin_buf");
ret = -1;
goto out;
}
memset(origin_buf, 0, sizeof(double) * szbuf);
/* fetch-to buffer */
result = malloc(sizeof(double) * szbuf);
if (!result) {
pr_err("Error: allocating result");
ret = -1;
goto out;
}
memset(result, 0, sizeof(double) * szbuf);
/* Expose accumulate-to buffer*/
ret = MPI_Win_create(target_buf, sizeof(double) * szbuf, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
if (ret != 0) {
pr_err("Error: MPI_Win_create returned %d\n", ret);
ret = -1;
goto out;
}
/* Measure RMA-only time */
init_buf(origin_buf, result, target_buf, szbuf, rank, 99);
t_comm_ave = measure(rank, nproc, win, origin_buf, result, target_buf, szbuf, 0, 0, 1, NSAMPLES_COMM, NSAMPLES_DROP, 0);
if (rank == 0) {
printf("t_comm_ave: %.0f %s\n", t_comm_ave * MYTIME_TOUSEC, MYTIME_UNIT);
}
#ifdef PROFILE
syscall(701, 1 | 2 | 0x80000000); /* syscall profile start */
#endif
/* 0: no progress, 1: progress, no uti, 2: progress, uti */
for (progress = 0; progress <= (disable_syscall_intercept ? 0 : 2); progress += 1) {
if (progress == 1) {
setenv("DISABLE_UTI", "1", 1); /* Don't use uti_attr and pin to Linux/McKernel CPUs */
progress_init();
} else if (progress == 2) {
progress_finalize();
unsetenv("DISABLE_UTI");
progress_init();
}
if (progress == 1 || progress == 2) {
#ifndef PROGRESS_CALC_PHASE_ONLY
//progress_start();
#endif
}
/* RMA-start, compute for T_{RMA} * l / 10, RMA-flush */
for (l = 0; l <= NROW - 1; l += 1) {
long nsec_calc = (t_comm_ave * MYTIME_TONSEC * l) / 10;
init_buf(origin_buf, result, target_buf, szbuf, rank, l);
//pr_buf(origin_buf, result, target_buf, szbuf, rank, nproc);
t_total_ave = measure(rank, nproc, win, origin_buf, result, target_buf, szbuf, nsec_calc, progress, 0, NSAMPLES_TOTAL, NSAMPLES_DROP, l / 10.0);
//pr_buf(origin_buf, result, target_buf, szbuf, rank, nproc);
if (rank == 0) {
if (l == 0) {
pr_debug("progress=%d\n", progress);
if (progress == 0) {
pr_debug("calc\ttotal\n");
} else {
pr_debug("total\n");
}
}
t_table[l][0] = nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC);
if (progress == 0) {
pr_debug("%.0f\t%.0f\n", nsec_calc * (MYTIME_TOUSEC / (double)MYTIME_TONSEC), t_total_ave * MYTIME_TOUSEC);
t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC;
} else {
pr_debug("%.0f\n", t_total_ave * MYTIME_TOUSEC);
t_table[l][progress + 1] = t_total_ave * MYTIME_TOUSEC;
}
}
}
if (progress == 1 || progress == 2) {
#ifndef PROGRESS_CALC_PHASE_ONLY
//progress_stop();
#endif
}
}
#ifdef PROFILE
syscall(701, 4 | 8 | 0x80000000); /* syscall profile report */
#endif
if (rank == 0) {
printf("calc,no prog,prog and no uti, prog and uti\n");
for (l = 0; l <= NROW - 1; l++) {
for (i = 0; i < NCOL; i++) {
if (i > 0) {
printf(",");
}
printf("%.0f", t_table[l][i]);
}
printf("\n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (progress >= 1) {
progress_finalize();
}
MPI_Finalize();
ret = 0;
out:
return ret;
}
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <mpi.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <getopt.h>
#include <sched.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "async_progress.h"
#include "util.h"
#include "delay.h"
#define MYTIME_UNIT "usec"
#define MYTIME_TOUSEC 1000000
#define MYTIME_TONSEC 1000000000
#define NROW 16 /* 0%, 10%, ..., 140% */
#define NCOL 4
#define NSAMPLES_DROP 5/*10*/
#define NSAMPLES_COMM 10/*20*/
#define NSAMPLES_TOTAL 10/*20*/
#define NSAMPLES_INNER 5
#define PROGRESS_CALC_PHASE_ONLY
static inline double mytime(void)
{
return /*rdtsc_light()*/MPI_Wtime();
}
static int ppn = -1;
void init_buf(double *origin_buf, double *result, double *target_buf,
int szbuf, int rank, int id)
{
int j;
for (j = 0; j < szbuf; j++) {
origin_buf[j] = (rank + 1) * 100.0 + (j + 1);
result[j] = (id + 1) * 100000000.0 + (rank + 1) * 10000.0 +
(j + 1);
target_buf[j] = (rank + 1) * 1000000.0 + (j + 1);
}
}
void pr_buf(double *origin_buf, double *result, double *target_buf, int szbuf,
int rank, int nproc)
{
int i, j;
for (i = 0; i < nproc; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if (i != rank) {
usleep(100000);
continue;
}
for (j = 0; j < szbuf; j++) {
pr_debug("[%d] origin_buf,j=%d,val=%f\n",
rank, j, origin_buf[j]);
pr_debug("[%d] result,j=%d,val=%f\n",
rank, j, result[j]);
pr_debug("[%d] target_buf,j=%d,val=%f\n",
rank, j, target_buf[j]);
}
}
}
void rma(int rank, int nproc, MPI_Win win, double *origin_buf, double *result,
int szbuf, long nsec_calc, int async_progress, int sync_progress,
double pct_calc)
{
int i, j, target_rank;
int completed, ret;
for (j = 0; j < NSAMPLES_INNER; j++) {
for (i = 1; i < nproc; i++) {
target_rank = (rank + i) % nproc;
MPI_Get_accumulate(origin_buf, szbuf, MPI_DOUBLE,
result, szbuf, MPI_DOUBLE,
target_rank,
0, szbuf, MPI_DOUBLE,
MPI_NO_OP, win);
#if 0
if (sync_progress) {
if ((ret = MPI_Iprobe(MPI_ANY_SOURCE,
MPI_ANY_TAG,
MPI_COMM_WORLD,
&completed,
MPI_STATUS_IGNORE)) !=
MPI_SUCCESS) {
pr_err("%s: error: MPI_Iprobe: %d\n",
__func__, ret);
}
}
#endif
}
}
if (async_progress) {
#ifdef PROGRESS_CALC_PHASE_ONLY
progress_start();
#endif
}
ndelay(nsec_calc);
if (async_progress) {
#ifdef PROGRESS_CALC_PHASE_ONLY
progress_stop();
#endif
}
#define MAX2(x, y) ((x) > (y) ? (x) : (y))
#if 1
/* iprobe is 10 times faster than win_flush_local_all,
* 20679 usec / (8*63*5) messages for 8-ppn 8-node case
*/
if (1/*!sync_progress*/)
for (
#if 1
j = 0;
j < (async_progress ?
MAX2(NSAMPLES_INNER * (nproc - 1) *
(1.0 - pct_calc), nproc - 1) :
NSAMPLES_INNER * (nproc - 1));
j++
#else
j = 0;
j < MAX2(NSAMPLES_INNER * (nproc - 1) *
(1.0 - pct_calc), nproc - 1);
j++
#endif
) {
if ((ret = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &completed,
MPI_STATUS_IGNORE)) !=
MPI_SUCCESS) {
pr_err("%s: error: MPI_Iprobe: %d\n",
__func__, ret);
}
}
#endif
MPI_Win_flush_local_all(win);
}
double measure(int rank, int nproc, MPI_Win win, double *origin_buf,
double *result, double *target_buf, int szbuf, long nsec_calc,
int async_progress, int sync_progress, int nsamples,
int nsamples_drop, double pct_calc)
{
int i;
double t_l, t_g, t_sum = 0;
double start, end;
for (i = 0; i < nsamples + nsamples_drop; i++) {
MPI_Barrier(MPI_COMM_WORLD);
MPI_Win_lock_all(0, win);
/* Set parameter based on current IPC and frequency */
ndelay_init(0);
start = mytime();
rma(rank, nproc, win, origin_buf, result, szbuf, nsec_calc,
async_progress, sync_progress, pct_calc);
end = mytime();
MPI_Win_unlock_all(win);
MPI_Barrier(MPI_COMM_WORLD);
t_l = end - start;
MPI_Allreduce(&t_l, &t_g, 1, MPI_DOUBLE, MPI_MAX,
MPI_COMM_WORLD);
if (i < nsamples_drop) {
continue;
}
t_sum += t_g;
}
return t_sum / nsamples;
}
int main(int argc, char **argv)
{
int ret;
int actual;
int rank = -1;
int nproc;
int i, j, progress, l, m;
double *target_buf, *origin_buf, *result;
MPI_Win win;
double t_comm_l, t_comm_g, t_comm_sum, t_comm_ave;
double t_total_l, t_total_g, t_total_sum, t_total_ave;
double t_table[NROW][NCOL];
int opt;
int szbuf = 1; /* Number of doubles to send */
struct rusage ru_start, ru_end;
struct timeval tv_start, tv_end;
int disable_syscall_intercept = 0;
cpu_set_t cpuset;
//test_set_loglevel(TEST_LOGLEVEL_WARN);
ndelay_init(1);
while ((opt = getopt(argc, argv, "+p:I:")) != -1) {
switch (opt) {
case 'p':
ppn = atoi(optarg);
break;
case 'I':
disable_syscall_intercept = atoi(optarg);
break;
default: /* '?' */
printf("unknown option %c\n", optopt);
ret = -1;
goto out;
}
}
if (ppn == -1) {
pr_err("Error: Specify processes-per-rank with -p");
ret = -1;
goto out;
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &actual);
if (actual != MPI_THREAD_MULTIPLE) {
pr_err("Error: MPI_THREAD_MULTIPLE is not available\n");
ret = -1;
goto out;
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
if (rank == 0) {
printf("ndoubles=%d,nproc=%d\n", szbuf, nproc);
#pragma omp parallel
{
//printf("%d cpu\n", sched_getcpu());
if (omp_get_thread_num() == 0) {
printf("#threads=%d\n", omp_get_num_threads());
}
}
}
/* accumulate-to buffer */
target_buf = malloc(sizeof(double) * szbuf);
if (!target_buf) {
pr_err("Error: allocating target_buf");
ret = -1;
goto out;
}
memset(target_buf, 0, sizeof(double) * szbuf);
/* read-from buffer */
origin_buf = malloc(sizeof(double) * szbuf);
if (!origin_buf) {
pr_err("Error: alloacting origin_buf");
ret = -1;
goto out;
}
memset(origin_buf, 0, sizeof(double) * szbuf);
/* fetch-to buffer */
result = malloc(sizeof(double) * szbuf);
if (!result) {
pr_err("Error: allocating result");
ret = -1;
goto out;
}
memset(result, 0, sizeof(double) * szbuf);
/* Expose accumulate-to buffer*/
ret = MPI_Win_create(target_buf, sizeof(double) * szbuf,
sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD,
&win);
if (ret != 0) {
pr_err("Error: MPI_Win_create returned %d\n", ret);
ret = -1;
goto out;
}
/* Measure RMA-only time */
init_buf(origin_buf, result, target_buf, szbuf, rank, 99);
t_comm_ave = measure(rank, nproc, win, origin_buf, result, target_buf,
szbuf, 0, 0, 1, NSAMPLES_COMM, NSAMPLES_DROP, 0);
if (rank == 0) {
printf("t_comm_ave: %.0f %s\n",
t_comm_ave * MYTIME_TOUSEC, MYTIME_UNIT);
}
#ifdef PROFILE
syscall(701, 1 | 2 | 0x80000000); /* syscall profile start */
#endif
/* 0: no progress, 1: progress, no uti, 2: progress, uti */
for (progress = 0; progress <= (disable_syscall_intercept ? 0 : 2);
progress += 1) {
if (progress == 1) {
/* Don't use uti_attr and pin to Linux/McKernel CPUs */
setenv("DISABLE_UTI", "1", 1);
progress_init();
} else if (progress == 2) {
progress_finalize();
unsetenv("DISABLE_UTI");
progress_init();
}
if (progress == 1 || progress == 2) {
#ifndef PROGRESS_CALC_PHASE_ONLY
//progress_start();
#endif
}
/* RMA-start, compute for T_{RMA} * l / 10, RMA-flush */
for (l = 0; l <= NROW - 1; l += 1) {
long nsec_calc = (t_comm_ave * MYTIME_TONSEC * l) / 10;
init_buf(origin_buf, result, target_buf, szbuf, rank,
l);
#if 0
pr_buf(origin_buf, result, target_buf, szbuf, rank,
nproc);
#endif
t_total_ave = measure(rank, nproc, win, origin_buf,
result, target_buf, szbuf,
nsec_calc, progress, 0,
NSAMPLES_TOTAL, NSAMPLES_DROP,
l / 10.0);
#if 0
pr_buf(origin_buf, result, target_buf, szbuf, rank,
nproc);
#endif
if (rank == 0) {
if (l == 0) {
pr_debug("progress=%d\n", progress);
if (progress == 0) {
pr_debug("calc\ttotal\n");
} else {
pr_debug("total\n");
}
}
t_table[l][0] = nsec_calc *
(MYTIME_TOUSEC / (double)MYTIME_TONSEC);
if (progress == 0) {
pr_debug("%.0f\t%.0f\n",
nsec_calc *
(MYTIME_TOUSEC /
(double)MYTIME_TONSEC),
t_total_ave * MYTIME_TOUSEC);
t_table[l][progress + 1] =
t_total_ave * MYTIME_TOUSEC;
} else {
pr_debug("%.0f\n",
t_total_ave * MYTIME_TOUSEC);
t_table[l][progress + 1] =
t_total_ave * MYTIME_TOUSEC;
}
}
}
if (progress == 1 || progress == 2) {
#ifndef PROGRESS_CALC_PHASE_ONLY
//progress_stop();
#endif
}
}
#ifdef PROFILE
syscall(701, 4 | 8 | 0x80000000); /* syscall profile report */
#endif
if (rank == 0) {
printf("calc,no prog,prog and no uti, prog and uti\n");
for (l = 0; l <= NROW - 1; l++) {
for (i = 0; i < NCOL; i++) {
if (i > 0) {
printf(",");
}
printf("%.0f", t_table[l][i]);
}
printf("\n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (progress >= 1) {
progress_finalize();
}
MPI_Finalize();
ret = 0;
out:
return ret;
}

View File

@ -25,18 +25,12 @@ async_progress.o:: async_progress.c util.h
$(CC) $(CFLAGS) -I$(UTI_DIR)/include -c $<
util.o:: util.c util.h
$(CC) $(CFLAGS) -qopenmp -c $<
$(CC) $(CFLAGS) -c $<
014: 014.o async_progress.o util.o
$(LD) -o $@ $^ $(LDFLAGS)
fwq.o:: fwq.c fwq.h
$(CC) $(CFLAGS) -c $<
015: 015.o async_progress.o
$(LD) -o $@ $^ $(LDFLAGS)
016: 016.o async_progress.o util.o
$(LD) -o $@ $^ $(LDFLAGS) -qopenmp
016.o::016.c
delay.o:: delay.c delay.h
$(CC) $(CFLAGS) -qopenmp -c $<
011: 011.o
@ -45,10 +39,22 @@ util.o:: util.c util.h
011.o::011.c
$(CC) $(CFLAGS) -qopenmp -c $<
%: %.o
014: 014.o async_progress.o util.o delay.o
$(LD) -o $@ $^ $(LDFLAGS)
%.o::%.c
015: 015.o async_progress.o
$(LD) -o $@ $^ $(LDFLAGS)
016: 016.o async_progress.o util.o delay.o
$(LD) -o $@ $^ $(LDFLAGS) -qopenmp
016.o::016.c
$(CC) $(CFLAGS) -qopenmp -c $<
%: %.o util.o fwq.o
$(LD) -o $@ $^ $(LDFLAGS)
%.o:: %.c util.h fwq.h
$(CC) $(CFLAGS) -c $<
clean:

View File

@ -43,7 +43,7 @@ static int progress_refc;
if (count < NROW_STAT) { \
array[count++] += (end - start); \
} \
} while(0)
} while (0)
static int cyc_prog1_count, cyc_prog2_count, cyc_init1_count, cyc_init2_count, cyc_start_count, cyc_stop1_count, cyc_stop2_count, cyc_stop3_count, cyc_finalize_count;
static unsigned long cyc_prog1[NROW_STAT];
@ -266,17 +266,17 @@ void progress_init()
pr_err("%s: error: pthread_cond_init failed (%d)\n", __func__, ret);
goto out;
}
if ((ret = pthread_attr_init(&pthread_attr))) {
pr_err("%s: error: pthread_attr_init failed (%d)\n", __func__, ret);
goto out;
}
if ((ret = uti_attr_init(&uti_attr))) {
pr_err("%s: error: uti_attr_init failed (%d)\n", __func__, ret);
goto out;
}
#if 0
if ((ret = UTI_ATTR_SAME_L1(&uti_attr))) {
pr_err("%s: error: UTI_ATTR_SAME_L1 failed\n", __func__);
@ -294,7 +294,7 @@ void progress_init()
end = rdtsc_light();
RECORD_STAT(cyc_init1_count, cyc_init1, end, start);
#endif
#ifdef PROFILE
start = rdtsc_light();
#endif
@ -355,7 +355,7 @@ void progress_start()
progress_flag_down = 1;
pthread_cond_signal(&progress_cond_down);
pthread_mutex_unlock(&progress_mutex);
#ifdef PROFILE
end = rdtsc_light();
RECORD_STAT(cyc_start_count, cyc_start, end, start);

29
test/uti/mpi/fwq.h Normal file
View File

@ -0,0 +1,29 @@
#ifndef __FWQ_H_INCLUDED__
#define __FWQ_H_INCLUDED__
static inline void fixed_size_work(void)
{
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
static inline void bulk_fsw(unsigned long n)
{
int j;
for (j = 0; j < (n); j++) {
fixed_size_work();
}
}
void fwq_init(void);
void fwq(long delay_nsec);
#endif

View File

@ -15,145 +15,39 @@
/* Messaging */
enum test_loglevel test_loglevel = TEST_LOGLEVEL_DEBUG;
/* Calculation */
static inline void asmloop(unsigned long n) {
int j;
for (j = 0; j < n; j++) {
asm volatile(
"movq $0, %%rcx\n\t"
"1:\t"
"addq $1, %%rcx\n\t"
"cmpq $99, %%rcx\n\t"
"jle 1b\n\t"
:
:
: "rcx", "cc");
}
}
#define N_INIT 10000000
double nspw; /* nsec per work */
void ndelay_init(int verbose) {
struct timeval start, end;
//clock_gettime(TIMER_KIND, &start);
gettimeofday(&start, NULL);
#pragma omp parallel
{
asmloop(N_INIT);
}
//clock_gettime(TIMER_KIND, &end);
gettimeofday(&end, NULL);
nspw = DIFFUSEC(end, start) * 1000 / (double)N_INIT;
if (verbose) {
pr_debug("nspw=%f\n", nspw);
}
}
#if 1
void ndelay(long delay_nsec) {
if (delay_nsec < 0) {
printf("delay_nsec < 0\n");
return;
}
#pragma omp parallel
{
asmloop(delay_nsec / nspw);
}
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void ndelay(long delay_nsec) {
struct timespec start, end;
if (delay_nsec < 0) { return; }
clock_gettime(TIMER_KIND, &start);
while (1) {
clock_gettime(TIMER_KIND, &end);
if (DIFFNSEC(end, start) >= delay_nsec) {
break;
}
asmloop(2); /* ~150 ns per iteration on FOP */
}
}
#endif
double cycpw; /* cyc per work */
void cdlay_init() {
unsigned long start, end;
start = rdtsc_light();
#define N_INIT 10000000
asmloop(N_INIT);
end = rdtsc_light();
cycpw = (end - start) / (double)N_INIT;
}
#if 0
void cdelay(long delay_cyc) {
if (delay_cyc < 0) {
return;
}
asmloop(delay_cyc / cycpw);
}
#else /* For machines with large core-to-core performance variation (e.g. OFP) */
void cdelay(long delay_cyc) {
unsigned long start, end;
if (delay_cyc < 0) { return; }
start = rdtsc_light();
while (1) {
end = rdtsc_light();
if (end - start >= delay_cyc) {
break;
}
asmloop(2);
}
}
#endif
int print_cpu_last_executed_on(const char *name) {
char fn[256];
char* result;
char *result;
pid_t tid = syscall(SYS_gettid);
int fd;
int offset;
int mpi_errno = 0;
int mpi_errno = 0;
int rc;
sprintf(fn, "/proc/%d/task/%d/stat", getpid(), (int)tid);
//printf("fn=%s\n", fn);
fd = open(fn, O_RDONLY);
if(fd == -1) {
if (fd == -1) {
printf("open() failed\n");
goto fn_fail;
}
result = malloc(65536);
if(result == NULL) {
if (result == NULL) {
printf("malloc() failed");
goto fn_fail;
}
int amount = 0;
offset = 0;
while(1) {
while (1) {
amount = read(fd, result + offset, 65536);
// printf("amount=%d\n", amount);
if(amount == -1) {
if (amount == -1) {
printf("read() failed");
goto fn_fail;
}
if(amount == 0) {
if (amount == 0) {
goto eof;
}
offset += amount;
@ -161,21 +55,21 @@ int print_cpu_last_executed_on(const char *name) {
eof:;
//printf("result:%s\n", result);
char* next_delim = result;
char* field;
char *next_delim = result;
char *field;
int i;
for(i = 0; i < 39; i++) {
for (i = 0; i < 39; i++) {
field = strsep(&next_delim, " ");
}
int cpu = sched_getcpu();
if(cpu == -1) {
if (cpu == -1) {
printf("getpu() failed\n");
goto fn_fail;
}
rc = syscall(732);
printf("%s: pmi_rank=%02d,os=%s,stat-cpu=%02d,sched_getcpu=%02d,tid=%d\n", name, atoi(getenv("PMI_RANK")), rc == -1 ? "lin" : "mck", atoi(field), cpu, tid); fflush(stdout);
fn_exit:
free(result);

View File

@ -61,13 +61,7 @@ inline uint64_t rdtsc_light(void)
#define DIFFNSEC(end, start) ((end.tv_sec - start.tv_sec) * 1000000000UL + (end.tv_nsec - start.tv_nsec))
#define TIMER_KIND CLOCK_MONOTONIC_RAW /* CLOCK_THREAD_CPUTIME_ID */
/* Calculation emulation */
void ndelay_init();
void ndelay(long delay_nsec);
void cdelay_init();
void cdelay(long delay_cyc);
/* CPU location */
int print_cpu_last_executed_on();
int print_cpu_last_executed_on(const char *name);
#endif