 |
» |
|
|
 |
This C example is used as a performance benchmark to measure
the amount of time it takes to send and receive data between two
processes. The buffers are aligned and offset from each other to
avoid cache conflicts caused by direct process-to-process byte-copy
operations To run this example: Define the CHECK macro to check data
integrity. Increase the number of bytes to at least twice the
cache size to obtain representative bandwidth measurements.
 |
#include <stdio.h> #include <stdlib.h> #include <math.h> #include <mpi.h> #define NLOOPS 1000 #define ALIGN 4096 main(argc, argv) int argc; char *argv[]; { int i, j; double start, stop; int nbytes = 0; int rank, size; MPI_Status status; char *buf; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size != 2) { if ( ! rank) printf("ping_pong: must have two processes\n"); MPI_Finalize(); exit(0); } nbytes = (argc > 1) ? atoi(argv[1]) : 0; if (nbytes < 0) nbytes = 0; /* * Page-align buffers and displace them in the cache to avoid collisions. */ buf = (char *) malloc(nbytes + 524288 + (ALIGN - 1)); if (buf == 0) { MPI_Abort(MPI_COMM_WORLD, MPI_ERR_BUFFER); exit(1); } buf = (char *) ((((unsigned long) buf) + (ALIGN - 1)) & ~(ALIGN - 1)); if (rank == 1) buf += 524288; memset(buf, 0, nbytes); /* * Ping-pong. */ if (rank == 0) { printf("ping-pong %d bytes ...\n", nbytes); /* * warm-up loop */ for (i = 0; i < 5; i++) { MPI_Send(buf, nbytes, MPI_CHAR, 1, 1, MPI_COMM_WORLD); MPI_Recv(buf, nbytes, MPI_CHAR,1, 1, MPI_COMM_WORLD, &status); } /* * timing loop */ start = MPI_Wtime(); for (i = 0; i < NLOOPS; i++) { #ifdef CHECK for (j = 0; j < nbytes; j++) buf[j] = (char) (j + i); #endif MPI_Send(buf, nbytes, MPI_CHAR,1, 1000 + i, MPI_COMM_WORLD); #ifdef CHECK memset(buf, 0, nbytes); #endif MPI_Recv(buf, nbytes, MPI_CHAR,1, 2000 + i, MPI_COMM_WORLD,&status); #ifdef CHECK for (j = 0; j < nbytes; j++) { if (buf[j] != (char) (j + i)) { printf("error: buf[%d] = %d, not %d\n",j, buf[j], j + i); break; } } #endif } stop = MPI_Wtime(); printf("%d bytes: %.2f usec/msg\n", nbytes, (stop - start) / NLOOPS / 2 * 1000000); if (nbytes > 0) { printf("%d bytes: %.2f MB/sec\n", nbytes,nbytes / 1000000./ ((stop - start) / NLOOPS / 2)); } } else { /* * warm-up loop */ for (i = 0; i < 5; i++) { MPI_Recv(buf, nbytes, MPI_CHAR,0, 1, MPI_COMM_WORLD, &status); MPI_Send(buf, nbytes, MPI_CHAR, 0, 1, MPI_COMM_WORLD); } for (i = 0; i < NLOOPS; i++) { MPI_Recv(buf, nbytes, MPI_CHAR,0, 1000 + i, MPI_COMM_WORLD,&status); MPI_Send(buf, nbytes, MPI_CHAR,0, 2000 + i, MPI_COMM_WORLD); } } MPI_Finalize(); exit(0); } |
 |
ping_pong
output |  |
The output from running the ping_pong executable is shown
below. The application was run with -np2. ping-pong 0 bytes ... 0 bytes: 1.03 usec/msg |
|