diff --git a/test/uti/mpi/delay.c b/test/uti/mpi/delay.c new file mode 100644 index 00000000..0ae14d49 --- /dev/null +++ b/test/uti/mpi/delay.c @@ -0,0 +1,70 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include +#include +#include +#include +#include /* For SYS_xxx definitions */ +#include +#include +#include +#include +#include +#include +#include "delay.h" + +#define N_INIT 10000000 +double nspw; /* nsec per work */ + +void ndelay_init(int verbose) +{ + struct timeval start, end; + + //clock_gettime(TIMER_KIND, &start); + gettimeofday(&start, NULL); + +#pragma omp parallel + { + asmloop(N_INIT); + } + + //clock_gettime(TIMER_KIND, &end); + gettimeofday(&end, NULL); + + nspw = DIFFUSEC(end, start) * 1000 / (double)N_INIT; + if (verbose) { + pr_debug("nspw=%f\n", nspw); + } +} + +void ndelay(long delay_nsec) +{ + if (delay_nsec < 0) { + printf("delay_nsec < 0\n"); + return; + } +#pragma omp parallel + { + asmloop(delay_nsec / nspw); + } +} + +static double cycpw; /* cyc per work */ + +void cdlay_init(void) +{ + unsigned long start, end; + + start = rdtsc_light(); +#define N_INIT 10000000 + asmloop(N_INIT); + end = rdtsc_light(); + cycpw = (end - start) / (double)N_INIT; +} + +void cdelay(long delay_cyc) +{ + if (delay_cyc < 0) { + return; + } + asmloop(delay_cyc / cycpw); +} diff --git a/test/uti/mpi/delay.h b/test/uti/mpi/delay.h new file mode 100644 index 00000000..8d79ac88 --- /dev/null +++ b/test/uti/mpi/delay.h @@ -0,0 +1,40 @@ +#ifndef __DELAY_H_INCLUDED__ +#define __DELAY_H_INCLUDED__ + +static inline uint64_t rdtsc_light(void) +{ + uint64_t x; + + /* rdtscp don't jump over earlier instructions */ + __asm__ __volatile__("rdtscp;" + "shl $32, %%rdx;" + "or %%rdx, %%rax" : + "=a"(x) : + : + "%rcx", "%rdx", "memory"); + return x; +} + +static inline void asmloop(unsigned long n) +{ + int j; + + for (j = 0; j < n; j++) { + asm volatile( + "movq $0, %%rcx\n\t" + "1:\t" + "addq $1, %%rcx\n\t" + "cmpq $99, %%rcx\n\t" + "jle 1b\n\t" + : + : + : "rcx", "cc"); + } +} + +void ndelay_init(void); +void ndelay(long delay_nsec); +void cdelay_init(void); +void cdelay(long delay_cyc); + +#endif diff --git a/test/uti/mpi/fwq.c b/test/uti/mpi/fwq.c new file mode 100644 index 00000000..88247e52 --- /dev/null +++ b/test/uti/mpi/fwq.c @@ -0,0 +1,38 @@ +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include +#include +#include +#include +#include /* For SYS_xxx definitions */ +#include +#include +#include +#include +#include +#include +#include "fwq.h" + +static double nspw; /* nsec per work */ + +void fwq_init(void) +{ + struct timespec start, end; + unsigned long nsec; + int i; + + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); +#define N_INIT 10000000 + bulk_fsw(N_INIT); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + nsec = DIFFNSEC(end, start); + nspw = nsec / (double)N_INIT; +} + +void fwq(long delay_nsec) +{ + if (delay_nsec < 0) { + return; + //printf("%s: delay_nsec < 0\n", __func__); + } + bulk_fsw(delay_nsec / nspw); +}