1 /* -*- C -*-
2 *
3 * Copyright (c) 2008 Los Alamos National Security, LLC. All rights reserved.
4 *
5 * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
6 * $COPYRIGHT$
7 *
8 * Additional copyrights may follow
9 *
10 * $HEADER$
11 *
12 */
13 #include <stdio.h>
14 #include <stdbool.h>
15 #include <sys/types.h>
16 #include <unistd.h>
17 #include <stdlib.h>
18 #include <time.h>
19 #include <sys/time.h>
20
21 #include <mpi.h>
22
23 int main(int argc, char* argv[])
24 {
25 int msg;
26 int rank, size, my_twin;
27 int ppn, my_node;
28 struct timeval tv;
29 unsigned long my_timestamp[2];
30 long *timestamps;
31 int i, maxrank;
32 unsigned long maxsec, maxusec, minutes, seconds;
33 unsigned long start_sec, start_usec;
34 float fsecs;
35 int nnodes;
36 bool odd_nnodes;
37 bool recvit;
38 char *ppnstr;
39
40 if (argc < 3) {
41 fprintf(stderr, "start times must be provided\n");
42 return 1;
43 }
44
45 ppnstr = getenv("OMPI_COMM_WORLD_LOCAL_SIZE");
46 ppn = strtol(ppnstr, NULL, 10);
47 start_sec = strtol(argv[1], NULL, 10);
48 start_usec = strtol(argv[2], NULL, 10);
49
50 MPI_Init(NULL, NULL);
51 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
52 MPI_Comm_size(MPI_COMM_WORLD, &size);
53
54 /* this program requires that the size be an integer multiple of ppn */
55 if (0 != (size % ppn)) {
56 if (0 == rank) {
57 fprintf(stderr, "The number of procs must be an integer multiple of the ppn\n"
58 "Given: num_procs %d ppn %d\n", size, ppn);
59 MPI_Abort(MPI_COMM_WORLD, 1);
60 } else {
61 goto cleanup;
62 }
63 }
64
65 /* see how many nodes we have */
66 nnodes = size / ppn;
67
68 odd_nnodes = false;
69 if (0 != (nnodes % 2)) {
70 /* we have an odd # of nodes */
71 odd_nnodes = true;
72 }
73
74 /* compute the rank of the rank with which I am to exchange a message.
75 * Per requirements, this proc must be on another node. To accomplish
76 * this with max efficiency, we take advantage of knowing that the ppn
77 * on every node will be the same. We therefore pair up the nodes, and
78 * pair up the procs on each node, so that only one connection is setup
79 * for each proc. We also want to ensure that the node pairs are
80 * "neighboring" - i.e., that they hopefully share a switch so that the
81 * hop count of sending the messages is minimized.
82 */
83
84 /* first, determine if my node is odd or even */
85 my_node = rank / ppn;
86
87 if (0 != (my_node % 2)) {
88 /* compute my twin's rank - as I am an odd numbered node, my
89 * twin will be on the node below me. Thus, its rank will be
90 * my rank - ppn
91 */
92 my_twin = rank - ppn;
93 /* if I am an odd numbered node, then I will receive first */
94 MPI_Recv(&msg, 1, MPI_INT, my_twin, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
95 /* receive the return message so that we meet the stated requirement
96 * that -every- proc send a message
97 */
98 MPI_Send(&msg, 1, MPI_INT, my_twin, 1, MPI_COMM_WORLD);
99 } else {
100 /* compute my twin's rank - as I am an even numbered node, my
101 * twin will be on the node above me. Thus, its rank will be
102 * my rank + ppn
103 */
104 my_twin = rank + ppn;
105 /* if we have an odd number of nodes, then the last node will be
106 * even and will have no one above them. In this case, we wrap around
107 * and ask that node=0 take the additional connections
108 */
109 recvit = true;
110 if (my_twin >= size) {
111 my_twin = my_twin - size;
112 recvit = false;
113 }
114 /* I am an even numbered node, so I send first */
115 MPI_Send(&msg, 1, MPI_INT, my_twin, 1, MPI_COMM_WORLD);
116 /* now receive the reply so my twin also meets the requirement - but only
117 * if we don't have an odd number of nodes. If we have an odd number of
118 * nodes, then the node=0 procs will already have met their requirement
119 */
120 if (recvit) {
121 MPI_Recv(&msg, 1, MPI_INT, my_twin, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
122 }
123 }
124
125 /* if we have an odd number of nodes and I am on node=0, then I have
126 * to take the extra recv
127 */
128 if (odd_nnodes && 0 == my_node) {
129 my_twin = size - ppn + rank;
130 MPI_Recv(&msg, 1, MPI_INT, my_twin, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
131 }
132
133 /* get a completion time stamp */
134 gettimeofday(&tv, NULL);
135 my_timestamp[0] = tv.tv_sec;
136 my_timestamp[1] = tv.tv_usec;
137
138 /* THIS COMPLETES THE OFFICIAL TIMING POINT */
139
140 /* Gather to get all the timestamps to rank 0 */
141 timestamps = NULL;
142 if (0 == rank) {
143 timestamps = malloc(2 * size * sizeof(unsigned long));
144 if (NULL == timestamps) {
145 MPI_Abort(MPI_COMM_WORLD, 1);
146 }
147 }
148 MPI_Gather(&my_timestamp, 2, MPI_LONG,
149 timestamps, 2, MPI_LONG, 0, MPI_COMM_WORLD);
150 if (0 == rank) {
151 /* The "timestamps" array will now have everyone's timestamp
152 (i.e., rank 0's timestamp will be in pos 0 & 1,, rank 1's timestamp
153 will be in 2 & 3, ...etc. */
154 /* find the maximum timestamp */
155 maxsec = start_sec;
156 maxusec = start_usec;
157 maxrank = -1;
158 for (i=0; i < 2*size; i+=2) {
159 if (timestamps[i] < maxsec) {
160 continue;
161 }
162 if (timestamps[i] == maxsec &&
163 timestamps[i+1] < maxusec) {
164 continue;
165 }
166 maxsec = timestamps[i];
167 maxusec = timestamps[i+1];
168 maxrank = i/2;
169 }
170 free(timestamps);
171 /* subtract starting time to get time in microsecs for test */
172 maxsec = maxsec - start_sec;
173 if (maxusec >= start_usec) {
174 maxusec = maxusec - start_usec;
175 } else {
176 maxsec--;
177 maxusec = 1000000 - start_usec + maxusec;
178 }
179 /* pretty-print the result */
180 seconds = maxsec + (maxusec / 1000000l);
181 minutes = seconds / 60l;
182 seconds = seconds % 60l;
183 if (0 == minutes && 0 == seconds) {
184 fsecs = ((float)(maxsec)*1000000.0 + (float)maxusec) / 1000.0;
185 fprintf(stderr, "Time test was completed in %8.2f millisecs\nSlowest rank: %d\n",
186 fsecs, maxrank);
187 } else {
188 fprintf(stderr, "Time test was completed in %3lu:%02lu min:sec\nSlowest rank: %d\n",
189 minutes, seconds, maxrank);
190 }
191 }
192
193 cleanup:
194 /* this completes the test */
195 MPI_Finalize();
196
197 return 0;
198 }