root/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_tuning.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* ---------------------------------------------------------------- */
   2 /* (C)Copyright IBM Corp.  2007, 2008                               */
   3 /* ---------------------------------------------------------------- */
   4 /**
   5  * \file ad_gpfs_tuning.h
   6  * \brief ???
   7  */
   8 
   9 /*---------------------------------------------------------------------
  10  * ad_gpfs_tuning.h
  11  *
  12  * declares global variables and macros for performance tuning and
  13  * functional debugging.
  14  *---------------------------------------------------------------------*/
  15 
  16 #ifndef AD_GPFS_TUNING_H_
  17 #define AD_GPFS_TUNING_H_
  18 
  19 #include "adio.h"
  20 
  21 
  22 /*-----------------------------------------
  23  *  Global variables for the control of
  24  *  1.  timing
  25  *  2.  select specific optimizations
  26  *  3.  global flags for certain optimizations
  27  *-----------------------------------------*/
  28 
  29 /* timing fields */
  30 enum {
  31     GPFSMPIO_CIO_DATA_SIZE=0,
  32     GPFSMPIO_CIO_T_SEEK,
  33     GPFSMPIO_CIO_T_LCOMP,       /* time for ADIOI_Calc_my_off_len(), local */
  34     GPFSMPIO_CIO_T_GATHER,      /* time for previous MPI_Allgather, now Allreduce */
  35     GPFSMPIO_CIO_T_PATANA,      /* time for a quick test if access is contiguous or not, local */
  36     GPFSMPIO_CIO_T_FD_PART,     /* time for file domain partitioning, local */
  37     GPFSMPIO_CIO_T_MYREQ,       /* time for ADIOI_Calc_my_req(), local */
  38     GPFSMPIO_CIO_T_OTHREQ,      /* time for ADIOI_Calc_others_req(), short Alltoall */
  39     GPFSMPIO_CIO_T_DEXCH,       /* time for I/O data exchange */
  40     /* the next DEXCH_* timers capture finer-grained portions of T_DEXCH */
  41     GPFSMPIO_CIO_T_DEXCH_RECV_EXCH,/* time for each process to exchange recieve
  42                                     size info with everyone else */
  43     GPFSMPIO_CIO_T_DEXCH_SETUP, /* time for setup portion of I/O data exchange */
  44     GPFSMPIO_CIO_T_DEXCH_NET,   /* time for network portion of I/O data exchange */
  45     GPFSMPIO_CIO_T_DEXCH_SORT,  /* time to sort requesst in I/O data exchange */
  46     GPFSMPIO_CIO_T_DEXCH_SIEVE,         /* time for read portion of RMW in two phase */
  47     GPFSMPIO_CIO_T_POSI_RW,
  48     GPFSMPIO_CIO_B_POSI_RW,
  49     GPFSMPIO_CIO_T_MPIO_RW,     /* time for ADIOI_WriteContig() */
  50     GPFSMPIO_CIO_B_MPIO_RW,
  51     GPFSMPIO_CIO_T_MPIO_CRW,    /* time for ADIOI_GPFS_WriteStridedColl() */
  52     GPFSMPIO_CIO_B_MPIO_CRW,
  53     GPFSMPIO_CIO_LAST
  54 };
  55 
  56 /* +1 because GPFSMPIO_CIO_LAST is actually used to say "zero this counter"" */
  57 extern double   gpfsmpio_prof_cw    [GPFSMPIO_CIO_LAST+1];
  58 extern double   gpfsmpio_prof_cr    [GPFSMPIO_CIO_LAST+1];
  59 
  60 /* corresponds to environment variables to select optimizations and timing level */
  61 extern int      gpfsmpio_timing;
  62 extern int      gpfsmpio_timing_cw_level;
  63 extern int      gpfsmpio_comm;
  64 extern int      gpfsmpio_tunegather;
  65 extern int      gpfsmpio_tuneblocking;
  66 extern long bglocklessmpio_f_type;
  67 extern int      gpfsmpio_pthreadio;
  68 extern int      gpfsmpio_p2pcontig;
  69 extern int      gpfsmpio_write_aggmethod;
  70 extern int      gpfsmpio_read_aggmethod;
  71 extern int  gpfsmpio_balancecontig;
  72 extern int      gpfsmpio_devnullio;
  73 extern int      gpfsmpio_bridgeringagg;
  74 extern int      gpfsmpio_onesided_no_rmw;
  75 extern int      gpfsmpio_onesided_always_rmw;
  76 extern int      gpfsmpio_onesided_inform_rmw;
  77 
  78 /* Default is, well, kind of complicated. Blue Gene /L and /P had "psets": one
  79  * i/o node and all compute nodes wired to it.  On Blue Gene /Q that
  80  * relationship is a lot more fluid.  There are still I/O nodes, and compute
  81  * nodes are assigned to an i/o node, but there are two routes to the i/o node,
  82  * via compute nodes designated as "bridge nodes".  In this code, what we used
  83  * to call a "pset" is actually "compute nodes associated with and including a
  84  * bridge node".  So, "nAgg" is roughly "number of aggregators per bridge", but
  85  * look closely at ADIOI_BG_persInfo_init() for the details */
  86 
  87 #define ADIOI_BG_NAGG_PSET_DFLT 16
  88 
  89 extern int     gpfsmpio_bg_nagg_pset;
  90 
  91 
  92 /* set internal variables for tuning environment variables */
  93 void ad_gpfs_get_env_vars(void);
  94 
  95 /* report timing breakdown for MPI I/O collective call */
  96 void ad_gpfs_timing_crw_report( int rw, ADIO_File fd, int myrank, int nprocs );
  97 
  98 /* note:
  99  *   T := timing;
 100  * CIO := collective I/O
 101  */
 102 #define GPFSMPIO_T_CIO_RESET( RW ) \
 103         { \
 104           int _i; \
 105           for ( _i = 0; _i < GPFSMPIO_CIO_LAST; _i ++ ) \
 106             gpfsmpio_prof_c##RW [ _i ] = 0; \
 107         }
 108 
 109 #define GPFSMPIO_T_CIO_REPORT( RW, FD, MYRANK, NPROCS ) \
 110         ad_gpfs_timing_crw_report ( RW, FD, MYRANK, NPROCS ); \
 111 
 112 #define GPFSMPIO_T_CIO_SET_GET(RW, ISSET, ISGET, VAR1, VAR2 ) \
 113          {\
 114          double temp = MPI_Wtime(); \
 115          if ( ISSET ) gpfsmpio_prof_c##RW [ VAR1 ] = temp; \
 116          if ( ISGET ) gpfsmpio_prof_c##RW [ VAR2 ] = temp - gpfsmpio_prof_c##RW [ VAR2 ] ;\
 117          }
 118 
 119 #endif  /* AD_GPFS_TUNING_H_ */

/* [<][>][^][v][top][bottom][index][help] */