root/orte/test/mpi/no-disconnect.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. nice_send
  2. nice_recv
  3. nice_barrier
  4. main

   1 /* Contributed by Marcia Cristina Cera
   2    <marcia.cristina.cera@gmail.com>,
   3    http://www.open-mpi.org/community/lists/users/2009/12/11540.php */
   4 
   5 /* It was decided that the issue highlighted by this test will NOT be
   6    fixed in the 1.3/1.4 series.  It is already fixed in the 1.5
   7    series.  Hence, if we detect Open MPI < v1.5, return 77/skip. */
   8 /* Turns out the hnp cannot handle concurrent MPI_Comm_spawns
   9    as of Open MPI 1.7.  However, we hope this feature will
  10    work in 2.0. with the new state machine based orte. */
  11 
  12 #include <unistd.h>
  13 #include <stdlib.h>
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <time.h>
  17 #include <sys/utsname.h>
  18 
  19 #include <mpi.h>
  20 
  21 #define NCHARS 30
  22 const int max_depth = 4;
  23 
  24 /*
  25  * Here are some replacements for standard, blocking MPI
  26  * functions.  These replacements are "nice" and yield the
  27  * CPU instead of spinning hard.  The interfaces are the same.
  28  * Just replace:
  29  *     MPI_Recv    with  nice_recv
  30  *     MPI_Send    with  nice_send
  31  *     MPI_Barrier with  nice_barrier
  32  */
  33 
  34 
  35 static int nice_send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) {
  36     /* Assume a standard (presumably short/eager) send suffices. */
  37     return MPI_Send(buf, count, datatype, dest, tag, comm);
  38 }
  39 
  40 
  41 static int nice_recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) {
  42     MPI_Request req;
  43     int flag;
  44     struct timespec dt;
  45 
  46     /*
  47      * We're only interested in modest levels of oversubscription
  48      * -- e.g., 2-4x more processes than physical processors.
  49      * So, the sleep time only needs to be about 2-4x longer than
  50      * a futile MPI_Test call.  For a wide range of processors,
  51      * something less than a millisecond should be sufficient.
  52      * Excessive sleep times (e.g., 1 second) would degrade performance.
  53      */
  54     dt.tv_sec    =       0;
  55     dt.tv_nsec   =  100000;
  56 
  57     MPI_Irecv(buf, count, datatype, source, tag, comm, &req);
  58 
  59     MPI_Test(&req, &flag, status);
  60     while ( ! flag ) {
  61         nanosleep(&dt, NULL);
  62         MPI_Test(&req, &flag, status);
  63     }
  64     return MPI_SUCCESS;
  65 }
  66 
  67 
  68 static void nice_barrier(MPI_Comm comm) {
  69     int me, np, jump, buf = -1;
  70 
  71     MPI_Comm_rank(comm,&me);
  72     MPI_Comm_size(comm,&np);
  73 
  74     /* fan in */
  75     for ( jump = 1; jump < np; jump <<= 1 ) {
  76         if ( ( me & jump ) != 0 ) {
  77             nice_send(&buf, 1, MPI_INT, me - jump, 343, comm);
  78             break;
  79         } else if ( me + jump < np ) {
  80             nice_recv(&buf, 1, MPI_INT, me + jump, 343, comm, MPI_STATUS_IGNORE);
  81         }
  82     }
  83 
  84     /* fan out */
  85     if ( 0 != me ) {
  86         nice_recv(&buf, 1, MPI_INT, me - jump, 344, comm, MPI_STATUS_IGNORE);
  87     }
  88     jump >>= 1;
  89     for ( ; jump > 0; jump >>= 1 ) {
  90         if ( me + jump < np ) {
  91             nice_send(&buf, 1, MPI_INT, me + jump, 344, comm);
  92         }
  93     }
  94 }
  95 
  96 
  97 int main (int argc, char **argv)
  98 {
  99     char bufs   [NCHARS];               /* send buffer  */
 100     char bufr[2][NCHARS];               /* recv buffers */
 101     MPI_Comm parent;
 102     int level = 0, participate = 1;
 103     struct utsname buf;
 104 
 105     /* If this is prior to OMPI v2.0, return 77/skip */
 106 #if defined(OPEN_MPI)
 107     if (OMPI_MAJOR_VERSION < 2) {
 108         printf("Skipping, because the orte cannot handle concurrent MPI_Comm_spawns\n");
 109         return 77;
 110     } else {
 111         printf("Verify that this test is truly working because conncurrent MPI_Comm_spawns"
 112                " has not worked before.\n");
 113     }
 114 #endif
 115 
 116     uname(&buf);
 117     printf("I AM pid %d with level %d on %s\n", getpid(), (argc < 2)?0:atoi(argv[1]), buf.nodename);
 118 
 119     MPI_Init(&argc, &argv);
 120     MPI_Comm_get_parent(&parent);
 121 
 122     if (MPI_COMM_NULL != parent) {
 123         /* spawned processes get stuff from parent */
 124         level = atoi(argv[1]);
 125         MPI_Recv(&bufr[0], sizeof(char)*NCHARS, MPI_CHAR, MPI_ANY_SOURCE,
 126                  MPI_ANY_TAG, parent, MPI_STATUS_IGNORE);
 127         printf("Parent sent: %s\n", bufr[0]);
 128     } else {
 129 
 130         /* original processes have to decide whether to participate */
 131 
 132         /* In this test, each process launched by "mpirun -n <np>" spawns a
 133          * binary tree of processes.  You end up with <np> * ( 1 << max_depth )
 134          * processes altogether.  For max_depth=4, this means 16*<np>.  There
 135          * is potential here for heavy oversubscription, especially if in
 136          * testing we launch tests with <np> set to the number of available
 137          * processors.  This test tolerates oversubscription somewhat since
 138          * it entails little inter-process synchronization.  Nevertheless,
 139          * we try to idle all but <np>/4 of the original processes, using a
 140          * minimum of at least two processes
 141          */
 142 
 143         int me, np;
 144 
 145         MPI_Comm_size(MPI_COMM_WORLD,&np);
 146         MPI_Comm_rank(MPI_COMM_WORLD,&me);
 147 
 148         if ( np > 4 ) {
 149             /* turn off all but every 4th process */
 150             if ( ( me & 3 ) != 0 ) participate = 0;
 151         } else
 152         if ( np > 2 ) {
 153             /* turn off all but every 2nd process */
 154             if ( ( me & 1 ) != 0 ) participate = 0;
 155         }
 156     }
 157 
 158     /* all spawned processes and selected "root" processes participate */
 159     if ( participate ) {
 160         printf("level = %d\n", level);
 161 
 162         /* prepare send buffer */
 163         sprintf(bufs,"level %d (pid:%d)", level, getpid());
 164 
 165         /* spawn */
 166         if (level < max_depth) {
 167             int i, nspawn = 2, errcodes[1];
 168             MPI_Request req[2];
 169             MPI_Comm   comm[2];
 170             char argv1[NCHARS];
 171             char *args[2];
 172 
 173             /* level 0 spawns only one process to mimic the original test */
 174             if ( level == 0 ) nspawn = 1;
 175 
 176             /* prepare command line arguments */
 177             snprintf(argv1, sizeof(argv1), "%d", level+1);
 178             args[0] = argv1;
 179             args[1] = NULL;
 180 
 181             /* spawn, with a message sent to and received from each child */
 182             for ( i = 0; i < nspawn; i++ ) {
 183                 MPI_Comm_spawn(argv[0], args, 1, MPI_INFO_NULL, 0, MPI_COMM_SELF,
 184                                &comm[i], errcodes);
 185                 MPI_Send(&bufs, sizeof(char)*NCHARS, MPI_CHAR, 0, 100, comm[i]);
 186                 MPI_Irecv(&bufr[i], sizeof(char)*NCHARS, MPI_CHAR, MPI_ANY_SOURCE,
 187                           MPI_ANY_TAG, comm[i], &req[i]);
 188             }
 189 
 190             /* wait for messages from children and print them */
 191             MPI_Waitall(nspawn, req, MPI_STATUSES_IGNORE);
 192             for ( i = 0; i < nspawn; i++ )
 193                 printf("Child %d sent: %s\n", i, bufr[i]);
 194         }
 195 
 196         /* send message back to parent */
 197         if (MPI_COMM_NULL != parent) {
 198             MPI_Send(&bufs, sizeof(char)*NCHARS, MPI_CHAR, 0, 100, parent);
 199         }
 200     }
 201 
 202     /* non-participating processes wait at this barrier for their peers */
 203     /* (This barrier won't cost that many CPU cycles.) */
 204     if (MPI_COMM_NULL == parent) {
 205         nice_barrier(MPI_COMM_WORLD);
 206     }
 207 
 208     MPI_Finalize();
 209     return 0;
 210 }

/* [<][>][^][v][top][bottom][index][help] */