root/test/monitoring/monitoring_test.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. main

   1 /*
   2  * Copyright (c) 2013-2015 The University of Tennessee and The University
   3  *                         of Tennessee Research Foundation.  All rights
   4  *                         reserved.
   5  * Copyright (c) 2013-2017 Inria.  All rights reserved.
   6  * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
   7  * Copyright (c) 2016      Intel, Inc.  All rights reserved.
   8  * $COPYRIGHT$
   9  *
  10  * Additional copyrights may follow
  11  *
  12  * $HEADER$
  13  */
  14 
  15 /*
  16 pml monitoring tester.
  17 
  18 Designed by George Bosilca <bosilca@icl.utk.edu> Emmanuel Jeannot <emmanuel.jeannot@inria.fr> and Clément Foyer <clement.foyer@inria.fr>
  19 Contact the authors for questions.
  20 
  21 To options are available for this test, with/without MPI_Tools, and with/without RMA operations. The default mode is without MPI_Tools, and with RMA operations.
  22 To enable the MPI_Tools use, add "--with-mpit" as an application parameter.
  23 To disable the RMA operations testing, add "--without-rma" as an application parameter.
  24 
  25 To be run as (without using MPI_Tool):
  26 
  27 mpirun -np 4 --mca pml_monitoring_enable 2 --mca pml_monitoring_enable_output 3 --mca pml_monitoring_filename prof/output ./monitoring_test
  28 
  29 with the results being, as an example:
  30 output.1.prof
  31 # POINT TO POINT
  32 E       1       2       104 bytes       26 msgs sent    0,0,0,26,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  33 E       1       3       208 bytes       52 msgs sent    8,0,0,65,1,5,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  34 I       1       0       140 bytes       27 msgs sent
  35 I       1       2       2068 bytes      1 msgs sent
  36 I       1       3       2256 bytes      31 msgs sent
  37 # OSC
  38 S       1       0       0 bytes 1 msgs sent
  39 R       1       0       40960 bytes     1 msgs sent
  40 S       1       2       40960 bytes     1 msgs sent
  41 # COLLECTIVES
  42 C       1       0       140 bytes       27 msgs sent
  43 C       1       2       140 bytes       27 msgs sent
  44 C       1       3       140 bytes       27 msgs sent
  45 D       MPI COMMUNICATOR 4 DUP FROM 0   procs: 0,1,2,3
  46 O2A     1       0 bytes 0 msgs sent
  47 A2O     1       0 bytes 0 msgs sent
  48 A2A     1       276 bytes       15 msgs sent
  49 D       MPI_COMM_WORLD  procs: 0,1,2,3
  50 O2A     1       0 bytes 0 msgs sent
  51 A2O     1       0 bytes 0 msgs sent
  52 A2A     1       96 bytes        9 msgs sent
  53 D       MPI COMMUNICATOR 5 SPLIT_TYPE FROM 4    procs: 0,1,2,3
  54 O2A     1       0 bytes 0 msgs sent
  55 A2O     1       0 bytes 0 msgs sent
  56 A2A     1       48 bytes        3 msgs sent
  57 D       MPI COMMUNICATOR 3 SPLIT FROM 0 procs: 1,3
  58 O2A     1       0 bytes 0 msgs sent
  59 A2O     1       0 bytes 0 msgs sent
  60 A2A     1       0 bytes 0 msgs sent
  61 
  62 */
  63 
  64 
  65 #include "mpi.h"
  66 #include <stdio.h>
  67 #include <string.h>
  68 
  69 static MPI_T_pvar_handle flush_handle;
  70 static const char flush_pvar_name[] = "pml_monitoring_flush";
  71 static const void*nullbuf = NULL;
  72 static int flush_pvar_idx;
  73 static int with_mpit = 0;
  74 static int with_rma  = 1;
  75 
  76 int main(int argc, char* argv[])
  77 {
  78     int rank, size, n, to, from, tagno, MPIT_result, provided, count, world_rank;
  79     MPI_T_pvar_session session;
  80     MPI_Comm newcomm;
  81     MPI_Comm comm = MPI_COMM_WORLD;
  82     char filename[1024];
  83     
  84     for ( int arg_it = 1; argc > 1 && arg_it < argc; ++arg_it ) {
  85         if( 0 == strcmp(argv[arg_it], "--with-mpit") ) {
  86             with_mpit = 1;
  87             printf("enable MPIT support\n");
  88         } else if( 0 == strcmp(argv[arg_it], "--without-rma") ) {
  89             with_rma = 0;
  90             printf("disable RMA testing\n");
  91         }
  92     }
  93 
  94     /* first phase : make a token circulated in MPI_COMM_WORLD */
  95     n = -1;
  96     MPI_Init(NULL, NULL);
  97     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
  98     MPI_Comm_size(MPI_COMM_WORLD, &size);
  99     rank = world_rank;
 100     to = (rank + 1) % size;
 101     from = (rank - 1) % size;
 102     tagno = 201;
 103 
 104     if( with_mpit ) {
 105         MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided);
 106         if (MPIT_result != MPI_SUCCESS)
 107             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 108 
 109         MPIT_result = MPI_T_pvar_get_index(flush_pvar_name, MPI_T_PVAR_CLASS_GENERIC, &flush_pvar_idx);
 110         if (MPIT_result != MPI_SUCCESS) {
 111             printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n",
 112                    flush_pvar_name);
 113             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 114         }
 115 
 116         MPIT_result = MPI_T_pvar_session_create(&session);
 117         if (MPIT_result != MPI_SUCCESS) {
 118             printf("cannot create a session for \"%s\" pvar\n", flush_pvar_name);
 119             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 120         }
 121 
 122         /* Allocating a new PVAR in a session will reset the counters */
 123         MPIT_result = MPI_T_pvar_handle_alloc(session, flush_pvar_idx,
 124                                               &comm, &flush_handle, &count);
 125         if (MPIT_result != MPI_SUCCESS) {
 126             printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n",
 127                    flush_pvar_name);
 128             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 129         }
 130 
 131         MPIT_result = MPI_T_pvar_start(session, flush_handle);
 132         if (MPIT_result != MPI_SUCCESS) {
 133             printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
 134                    flush_pvar_name);
 135             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 136         }
 137     }
 138 
 139     if (rank == 0) {
 140         n = 25;
 141         MPI_Send(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD);
 142     }
 143     while (1) {
 144         MPI_Recv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
 145         if (rank == 0) {n--;tagno++;}
 146         MPI_Send(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD);
 147         if (rank != 0) {n--;tagno++;}
 148         if (n<0){
 149             break;
 150         }
 151     }
 152 
 153     if( with_mpit ) {
 154         /* Build one file per processes
 155               Every thing that has been monitored by each
 156               process since the last flush will be output in filename */
 157         /*
 158             Requires directory prof to be created.
 159               Filename format should display the phase number
 160                 and the process rank for ease of parsing with
 161                   aggregate_profile.pl script
 162         */
 163         sprintf(filename, "prof/phase_1");
 164 
 165         if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) {
 166             fprintf(stderr, "Process %d cannot save monitoring in %s.%d.prof\n",
 167                     world_rank, filename, world_rank);
 168         }
 169         /* Force the writing of the monitoring data */
 170         MPIT_result = MPI_T_pvar_stop(session, flush_handle);
 171         if (MPIT_result != MPI_SUCCESS) {
 172             printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
 173                    flush_pvar_name);
 174             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 175         }
 176 
 177         MPIT_result = MPI_T_pvar_start(session, flush_handle);
 178         if (MPIT_result != MPI_SUCCESS) {
 179             printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
 180                    flush_pvar_name);
 181             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 182         }
 183         /* Don't set a filename. If we stop the session before setting it, then no output file
 184          * will be generated.
 185          */
 186         if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, (void*)&nullbuf) ) {
 187             fprintf(stderr, "Process %d cannot save monitoring in %s\n", world_rank, filename);
 188         }
 189     }
 190 
 191     /*
 192       Second phase. Work with different communicators.
 193       even ranks will circulate a token
 194       while odd ranks will perform a all_to_all
 195     */
 196     MPI_Comm_split(MPI_COMM_WORLD, rank%2, rank, &newcomm);
 197 
 198     if(rank%2){ /*odd ranks (in COMM_WORD) circulate a token*/
 199         MPI_Comm_rank(newcomm, &rank);
 200         MPI_Comm_size(newcomm, &size);
 201         if( size > 1 ) {
 202             to = (rank + 1) % size;
 203             from = (rank - 1) % size;
 204             tagno = 201;
 205             if (rank == 0){
 206                 n = 50;
 207                 MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm);
 208             }
 209             while (1){
 210                 MPI_Recv(&n, 1, MPI_INT, from, tagno, newcomm, MPI_STATUS_IGNORE);
 211                 if (rank == 0) {n--; tagno++;}
 212                 MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm);
 213                 if (rank != 0) {n--; tagno++;}
 214                 if (n<0){
 215                     break;
 216                 }
 217             }
 218         }
 219     } else { /*even ranks (in COMM_WORD) will perform a all_to_all and a barrier*/
 220         int send_buff[10240];
 221         int recv_buff[10240];
 222         MPI_Comm newcomm2;
 223         MPI_Comm_rank(newcomm, &rank);
 224         MPI_Comm_size(newcomm, &size);
 225         MPI_Alltoall(send_buff, 10240/size, MPI_INT, recv_buff, 10240/size, MPI_INT, newcomm);
 226         MPI_Comm_split(newcomm, rank%2, rank, &newcomm2);
 227         MPI_Barrier(newcomm2);
 228         MPI_Comm_free(&newcomm2);
 229     }
 230 
 231     if( with_mpit ) {
 232         /* Build one file per processes
 233               Every thing that has been monitored by each
 234               process since the last flush will be output in filename */
 235         /*
 236             Requires directory prof to be created.
 237               Filename format should display the phase number
 238                 and the process rank for ease of parsing with
 239                                                     aggregate_profile.pl script
 240                                                   */
 241         sprintf(filename, "prof/phase_2");
 242 
 243         if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) {
 244             fprintf(stderr, "Process %d cannot save monitoring in %s.%d.prof\n",
 245                     world_rank, filename, world_rank);
 246         }
 247 
 248         /* Force the writing of the monitoring data */
 249         MPIT_result = MPI_T_pvar_stop(session, flush_handle);
 250         if (MPIT_result != MPI_SUCCESS) {
 251             printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
 252                    flush_pvar_name);
 253             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 254         }
 255 
 256         MPIT_result = MPI_T_pvar_start(session, flush_handle);
 257         if (MPIT_result != MPI_SUCCESS) {
 258             printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
 259                    flush_pvar_name);
 260             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 261         }
 262         /* Don't set a filename. If we stop the session before setting it, then no output
 263          * will be generated.
 264          */
 265         if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, (void*)&nullbuf ) ) {
 266             fprintf(stderr, "Process %d cannot save monitoring in %s\n", world_rank, filename);
 267         }
 268     }
 269 
 270     if( with_rma ) {
 271       MPI_Win win;
 272       int rs_buff[10240];
 273       int win_buff[10240];
 274       MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 275       MPI_Comm_size(MPI_COMM_WORLD, &size);
 276       to = (rank + 1) % size;
 277       from = (rank + size - 1) % size;
 278       for( int v = 0; v < 10240; ++v )
 279         rs_buff[v] = win_buff[v] = rank;
 280 
 281       MPI_Win_create(win_buff, 10240 * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
 282       MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
 283       if( rank%2 ) {
 284         MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT, win);
 285         MPI_Get(rs_buff, 10240, MPI_INT, from, 0, 10240, MPI_INT, win);
 286       } else {
 287         MPI_Put(rs_buff, 10240, MPI_INT, to, 0, 10240, MPI_INT, win);
 288         MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT, win);
 289       }
 290       MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
 291 
 292       for( int v = 0; v < 10240; ++v )
 293         if( rs_buff[v] != win_buff[v] && ((rank%2 && rs_buff[v] != from) || (!(rank%2) && rs_buff[v] != rank)) ) {
 294           printf("Error on checking exchanged values: %s_buff[%d] == %d instead of %d\n",
 295                  rank%2 ? "rs" : "win", v, rs_buff[v], rank%2 ? from : rank);
 296           MPI_Abort(MPI_COMM_WORLD, -1);
 297         }
 298 
 299       MPI_Group world_group, newcomm_group, distant_group;
 300       MPI_Comm_group(MPI_COMM_WORLD, &world_group);
 301       MPI_Comm_group(newcomm, &newcomm_group);
 302       MPI_Group_difference(world_group, newcomm_group, &distant_group);
 303       if( rank%2 ) {
 304         MPI_Win_post(distant_group, 0, win);
 305         MPI_Win_wait(win);
 306         /* Check received values */
 307         for( int v = 0; v < 10240; ++v )
 308           if( from != win_buff[v] ) {
 309             printf("Error on checking exchanged values: win_buff[%d] == %d instead of %d\n",
 310                    v, win_buff[v], from);
 311             MPI_Abort(MPI_COMM_WORLD, -1);
 312           }
 313       } else {
 314         MPI_Win_start(distant_group, 0, win);
 315         MPI_Put(rs_buff, 10240, MPI_INT, to, 0, 10240, MPI_INT, win);
 316         MPI_Win_complete(win);
 317       }
 318       MPI_Group_free(&world_group);
 319       MPI_Group_free(&newcomm_group);
 320       MPI_Group_free(&distant_group);
 321       MPI_Barrier(MPI_COMM_WORLD);
 322 
 323       for( int v = 0; v < 10240; ++v ) rs_buff[v] = rank;
 324 
 325       MPI_Win_lock(MPI_LOCK_EXCLUSIVE, to, 0, win);
 326       MPI_Put(rs_buff, 10240, MPI_INT, to, 0, 10240, MPI_INT, win);
 327       MPI_Win_unlock(to, win);
 328 
 329       MPI_Barrier(MPI_COMM_WORLD);
 330 
 331       /* Check received values */
 332       for( int v = 0; v < 10240; ++v )
 333         if( from != win_buff[v] ) {
 334           printf("Error on checking exchanged values: win_buff[%d] == %d instead of %d\n",
 335                  v, win_buff[v], from);
 336           MPI_Abort(MPI_COMM_WORLD, -1);
 337         }
 338 
 339       MPI_Win_free(&win);
 340     }
 341 
 342     if( with_mpit ) {
 343         /* the filename for flushing monitoring now uses 3 as phase number! */
 344         sprintf(filename, "prof/phase_3");
 345 
 346         if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) {
 347             fprintf(stderr, "Process %d cannot save monitoring in %s.%d.prof\n",
 348                     world_rank, filename, world_rank);
 349         }
 350 
 351         MPIT_result = MPI_T_pvar_stop(session, flush_handle);
 352         if (MPIT_result != MPI_SUCCESS) {
 353             printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
 354                    flush_pvar_name);
 355             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 356         }
 357 
 358         MPIT_result = MPI_T_pvar_handle_free(session, &flush_handle);
 359         if (MPIT_result != MPI_SUCCESS) {
 360             printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n",
 361                    flush_pvar_name);
 362             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 363         }
 364 
 365         MPIT_result = MPI_T_pvar_session_free(&session);
 366         if (MPIT_result != MPI_SUCCESS) {
 367             printf("cannot close a session for \"%s\" pvar\n", flush_pvar_name);
 368             MPI_Abort(MPI_COMM_WORLD, MPIT_result);
 369         }
 370 
 371         (void)MPI_T_finalize();
 372     }
 373 
 374     MPI_Comm_free(&newcomm);
 375     /* Now, in MPI_Finalize(), the pml_monitoring library outputs, in
 376        STDERR, the aggregated recorded monitoring of all the phases*/
 377     MPI_Finalize();
 378     return 0;
 379 }

/* [<][>][^][v][top][bottom][index][help] */