root/ompi/mca/rte/rte.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2012-2015 Los Alamos National Security, LLC.  All rights reserved.
   4  * Copyright (c) 2013      Mellanox Technologies, Inc.
   5  *                         All rights reserved.
   6  * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
   7  *
   8  * $COPYRIGHT$
   9  *
  10  * Additional copyrights may follow
  11  */
  12 
  13 /* This is the public RTE interface to the OMPI layer. Any RTE can be
  14  * connected to the OMPI layer by creating a new static component in
  15  * this framework, assigning it a priority and including a configure.m4
  16  * to define when it should be built.
  17  *
  18  * Each component must provide a number of types and functions that mimic
  19  * those provided by ORTE. These include (where flexibility exists, the
  20  * ORTE data type is shown, but any compatible type is allowed. For example,
  21  * the jobid field in ompi_process_name_t could be any type of integer, but
  22  * cannot be a string):
  23  *
  24  * (a) Process name objects and operations
  25  *     1. Definitions for integral types ompi_jobid_t and ompi_vpid_t.
  26  *        The jobid must be unique for a given MPI_COMM_WORLD capable of
  27  *        connecting to another OMPI_COMM_WORLD and the vpid will be the
  28  *        process's rank in MPI_COMM_WORLD.
  29  *     2. ompi_process_name_t - a struct that must contain at least two integer-typed fields:
  30  *           a. ompi_jobid_t jobid
  31  *           b. ompi_vpid_t vpid
  32  *        Note that the structure can contain any number of fields beyond these
  33  *        two, so the process name struct for any particular RTE can be whatever
  34  *        is desired.
  35  *     3. OMPI_NAME_PRINT - a macro that prints a process name when given
  36  *        a pointer to ompi_process_name_t. The output format is to be
  37  *        a single string representing the name.  This function should
  38  *        be thread-safe for multiple threads to call simultaneously.
  39  *     4. OMPI_PROC_MY_NAME - a pointer to a global variable containing
  40  *        the ompi_process_name_t for this process. Typically, this is
  41  *        stored as a field in the ompi_process_info_t struct, but that
  42  *        is not a requirement.
  43  *     5. OMPI_NAME_WIlDCARD - a wildcard name.
  44  *     6. ompi_rte_compare_name_fields - a function used to compare fields
  45  *        in the ompi_process_name_t struct. The function prototype must be
  46  *        of the form:
  47  *        int ompi_rte_compare_name_fields(ompi_rte_cmp_bitmask_t mask,
  48  *                                         ompi_process_name_t *name1,
  49  *                                         ompi_process_name_t *name2);
  50  *        The bitmask must be defined to indicate the fields to be used
  51  *        in the comparison. Fields not included in the mask must be ignored.
  52  *        Supported bitmask values must include:
  53  *           b. OMPI_RTE_CMP_JOBID
  54  *           c. OMPI_RTE_CMP_VPID
  55  *           d. OMPI_RTE_CMP_ALL
  56  *      7. uint64_t ompi_rte_hash_name(name) - return a string hash uniquely
  57  *         representing the ompi_process_name passed in.
  58  *      8. OMPI_NAME - an Opal DSS constant for a handler already registered
  59  *         to serialize/deserialize an ompi_process_name_t structure.
  60  *
  61  * (b) Collective objects and operations
  62  *     1. ompi_rte_collective_t - an OPAL object used during RTE collective operations
  63  *        such as modex and barrier. It must be an opal_list_item_t and contain the
  64  *        following fields:
  65  *           a. id (ORTE type: int32_t)
  66  *           b. bool active
  67  *              flag that user can poll on to know when collective
  68  *              has completed - set to false just prior to
  69  *              calling user callback function, if provided
  70  *     2. ompi_rte_modex - a function that performs an exchange of endpoint information
  71  *        to wireup the MPI transports. The function prototype must be of the form:
  72  *        int ompi_rte_modex(ompi_rte_collective_t *coll);
  73  *        At the completion of the modex operation, the coll->active flag must be set
  74  *        to false, and the endpoint information must be stored in the modex database.
  75  *        This function must have barrier semantics across the MPI_COMM_WORLD of the
  76  *        calling process.
  77  *     3. ompi_rte_barrier - a function that performs a barrier operation within the
  78  *        RTE. The function prototype must be of the form:
  79  *        int ompi_rte_barrier(ompi_rte_collective_t *coll);
  80  *        At the completion of the barrier operation, the coll->active flag must be set
  81  *        to false
  82  *
  83  * (c) Process info struct
  84  *     1. ompi_process_info_t - a struct containing info about the current process.
  85  *        The struct must contain at least the following fields:
  86  *           a. app_num -
  87  *           b. pid - this process's pid.  Should be same as getpid().
  88  *           c. num_procs - Number of processes in this job (ie, MCW)
  89  *           d. my_node_rank - relative rank on local node to other peers this run-time
  90  *                    instance knows about.  If doing dynamics, this may be something
  91  *                    different than my_local_rank, but will be my_local_rank in a
  92  *                    static job.
  93  *           d. my_local_rank - relative rank on local node with other peers in this job (ie, MCW)
  94  *           e. num_local_peers - Number of local peers (peers in MCW on your node)
  95  *           f. my_hnp_uri -
  96  *           g. peer_modex - a collective id for the modex operation
  97  *           h. peer_init_barrier - a collective id for the barrier during MPI_Init
  98  *           i. peer_fini_barrier - a collective id for the barrier during MPI_Finalize
  99  *           j. job_session_dir -
 100  *           k. proc_session_dir -
 101  *           l. nodename - a string representation for the name of the node this
 102  *              process is on
 103  *           m. cpuset -
 104  *     2. ompi_process_info - a global instance of the ompi_process_t structure.
 105  *     3. ompi_rte_proc_is_bound - global boolean that will be true if the runtime bound
 106  *        the process to a particular core or set of cores and is false otherwise.
 107  *
 108  * (d) Error handling objects and operations
 109  *     1. void ompi_rte_abort(int err_code, char *fmt, ...) - Abort the current
 110  *        process with the specified error code and message.
 111  *     2. int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs) -
 112  *        Abort the specified list of peers
 113  *     3. OMPI_ERROR_LOG(rc) - print error message regarding the given return code
 114  *
 115  * (e) Init and finalize objects and operations
 116  *     1. ompi_rte_init - a function to initialize the RTE. The function
 117  *        prototype must be of the form:
 118  *        int ompi_rte_init(int *argc, char ***argv);
 119  *     2. ompi_rte_finalize - a function to finalize the RTE. The function
 120  *        prototype must be of the form:
 121  *        int ompi_rte_finalize(void);
 122  *     3. void ompi_rte_wait_for_debugger(void) - Called during MPI_Init, this
 123  *        function is used to wait for debuggers to do their pre-MPI attach.
 124  *        If there is no attached debugger, this function will not block.
 125  *
 126  * (f) Database operations
 127  *     1. ompi_rte_db_store - a function to store modex and other data in
 128  *        a local database. The function is primarily used for storing modex
 129  *        data, but can be used for general purposes. The prototype must be
 130  *        of the form:
 131  *        int ompi_rte_db_store(const ompi_process_name_t *proc,
 132  *                              const char *key, const void *data,
 133  *                              opal_data_type_t type);
 134  *        The implementation of this function must store a COPY of the data
 135  *        provided - the data is NOT guaranteed to be valid after return
 136  *        from the call.
 137  *     3. ompi_rte_db_fetch -
 138  *        NOTE: Fetch accepts an 'ompi_proc_t'.
 139  *        int ompi_rte_db_fetch(const struct ompi_proc_t *proc,
 140  *                              const char *key,
 141  *                              void **data,
 142  *                              opal_data_type_t type);
 143  *     4. ompi_rte_db_fetch_pointer -
 144  *        NOTE: Fetch accepts an 'ompi_proc_t'.
 145  *        int ompi_rte_db_fetch_pointer(const struct ompi_proc_t *proc,
 146  *                                      const char *key,
 147  *                                      void **data,
 148  *                                      opal_data_type_t type);
 149  *     5. Pre-defined db keys (with associated values after rte_init)
 150  *        a. OMPI_DB_HOSTNAME
 151  *        b. OMPI_DB_LOCALITY
 152  *
 153  * (g) Communication support
 154  *
 155  */
 156 
 157 #ifndef OMPI_MCA_RTE_H
 158 #define OMPI_MCA_RTE_H
 159 
 160 #include "ompi_config.h"
 161 
 162 #include "opal/dss/dss_types.h"
 163 #include "ompi/mca/mca.h"
 164 #include "opal/mca/base/base.h"
 165 
 166 BEGIN_C_DECLS
 167 
 168 /**
 169  * Structure for rte components.
 170  */
 171 struct ompi_rte_base_component_1_0_0_t {
 172     /** MCA base component */
 173     mca_base_component_t base_version;
 174     /** MCA base data */
 175     mca_base_component_data_t base_data;
 176 };
 177 
 178 /**
 179  * Convenience typedef
 180  */
 181 typedef struct ompi_rte_base_component_1_0_0_t ompi_rte_base_component_1_0_0_t;
 182 typedef struct ompi_rte_base_component_1_0_0_t ompi_rte_component_t;
 183 
 184 /**
 185  * Macro for use in components that are of type rte
 186  */
 187 #define OMPI_RTE_BASE_VERSION_1_0_0 \
 188     OMPI_MCA_BASE_VERSION_2_1_0("rte", 2, 0, 0)
 189 
 190 END_C_DECLS
 191 
 192 /* include implementation to call */
 193 #include MCA_rte_IMPLEMENTATION_HEADER
 194 
 195 BEGIN_C_DECLS
 196 
 197 /*
 198  * MCA Framework
 199  */
 200 OMPI_DECLSPEC extern mca_base_framework_t ompi_rte_base_framework;
 201 
 202 /* In a few places, we need to barrier until something happens
 203  * that changes a flag to indicate we can release - e.g., waiting
 204  * for a specific RTE message to arrive. We don't want to block MPI
 205  * progress while waiting, so we loop over opal_progress, letting
 206  * the RTE progress thread move the RTE along
 207  */
 208 #define OMPI_WAIT_FOR_COMPLETION(flg)                                       \
 209     do {                                                                    \
 210         opal_output_verbose(1, ompi_rte_base_framework.framework_output,    \
 211                             "%s waiting on RTE event at %s:%d",             \
 212                             OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),             \
 213                             __FILE__, __LINE__);                            \
 214         while ((flg)) {                                                     \
 215             opal_progress();                                                \
 216         }                                                                   \
 217     }while(0);
 218 
 219 #define OMPI_LAZY_WAIT_FOR_COMPLETION(flg)                                  \
 220     do {                                                                    \
 221         opal_output_verbose(1, ompi_rte_base_framework.framework_output,    \
 222                             "%s lazy waiting on RTE event at %s:%d",        \
 223                             OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),             \
 224                             __FILE__, __LINE__);                            \
 225         while ((flg)) {                                                     \
 226             opal_progress();                                                \
 227             usleep(100);                                                    \
 228         }                                                                   \
 229     }while(0);
 230 
 231 typedef struct {
 232     opal_list_item_t super;
 233     ompi_process_name_t name;
 234 } ompi_namelist_t;
 235 OBJ_CLASS_DECLARATION(ompi_namelist_t);
 236 
 237 END_C_DECLS
 238 
 239 #endif /* OMPI_RTE_H_ */

/* [<][>][^][v][top][bottom][index][help] */