root/ompi/mca/mtl/mtl.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Regents of the University of California.
   4  *                         All rights reserved.
   5  * Copyright (c) 2012      Sandia National Laboratories.  All rights reserved.
   6  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
   7  *                         reserved.
   8  * Copyright (c) 2017      Intel, Inc. All rights reserved
   9  * $COPYRIGHT$
  10  *
  11  * Additional copyrights may follow
  12  *
  13  * $HEADER$
  14  */
  15 
  16 /**
  17  * @file
  18  *
  19  * Matching Transport Layer
  20  *
  21  * The Matching Transport Layer (MTL) provides device-layer support
  22  * for transfer of MPI point-to-point messages over devices that
  23  * support hardware / library message matching.  This layer is used
  24  * with the MTL PML component to provide lowest latency and highest
  25  * bandwidth on given architectures.  Features found in other PML
  26  * interfaces, such as message fragmenting, multi-device support, and
  27  * NIC failover are not provided by the upper layers.
  28  *
  29  * In general, this interface should not be used for transport layer
  30  * support.  Instead, the BTL interface should be used.  The BTL
  31  * interface allows for multiplexing between multiple users
  32  * (point-to-point, one-sided, etc.) and provides many features not
  33  * found in this interface (RDMA from arbitrary buffers, active
  34  * messaging, reasonable pinned memory caching, etc.)
  35  */
  36 
  37 #ifndef OMPI_MTL_H
  38 #define OMPI_MTL_H
  39 
  40 #include "ompi_config.h"
  41 #include "mpi.h" /* needed for MPI_ANY_TAG */
  42 #include "ompi/mca/mca.h"
  43 #include "ompi/mca/pml/pml_constants.h" /* for send_mode enum */
  44 #include "ompi/request/request.h"
  45 
  46 BEGIN_C_DECLS
  47 
  48 struct ompi_request_t;
  49 struct opal_convertor_t;
  50 
  51 struct mca_mtl_base_module_t;
  52 
  53 struct mca_mtl_request_t {
  54     /** pointer to associated ompi_request_t */
  55     struct ompi_request_t *ompi_req;
  56     void (*completion_callback)(struct mca_mtl_request_t* mtl_request);
  57 };
  58 typedef struct mca_mtl_request_t mca_mtl_request_t;
  59 
  60 
  61 /**
  62  * MTL module flags
  63  */
  64 #define MCA_MTL_BASE_FLAG_REQUIRE_WORLD 0x00000001
  65 #if OPAL_CUDA_SUPPORT
  66 #define MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE 0x00000002
  67 #endif
  68 
  69 /**
  70  * Initialization routine for MTL component
  71  *
  72  * Initialization routine for MTL component.  This function should
  73  * allocate resources for communication and try to do all local setup.
  74  * It should not attempt to contact it's peers, as that should be
  75  * done at add_procs time.  Contact information should be published
  76  * during this initialization function.  It will be made available
  77  * during add_procs().
  78  *
  79  * @param enable_progress_threads (IN) Progress threads have been
  80  *                  enabled by the user and the component must be
  81  *                  capable of making asycnhronous progress (either
  82  *                  with its own thread, with the kernel, or with
  83  *                  the event library.
  84  * @param enable_mpi_threads (IN) MPI threads have been enabled by the
  85  *                  user and the component must be capable of coping
  86  *                  with threads.  If the component can cope with
  87  *                  MPI_THREAD_MULTIPLE, enable_mpi_thread_multiple
  88  *                  should be set to true.  Otherwise, it is assumed
  89  *                  that only THREAD_FUNNELLED and THREAD_SERIALIZED
  90  *                  can be used.
  91  * @param enable_mpi_thread_multiple (OUT) Component does / does not
  92  *                  support MPI_THREAD_MULTIPLE.  This variable only
  93  *                  needs to be set if enable_mpi_threads is true.
  94  *                  Otherwise, the return value will be ignored.
  95  *
  96  * @retval NULL     component can not operate on the current machine
  97  * @retval non-NULL component interface function
  98  */
  99 typedef struct mca_mtl_base_module_t*
 100 (*mca_mtl_base_component_init_fn_t)(bool enable_progress_threads,
 101                                     bool enable_mpi_threads);
 102 
 103 
 104 struct mca_mtl_base_component_2_0_0_t {
 105   mca_base_component_t mtl_version;
 106   mca_base_component_data_t mtl_data;
 107   mca_mtl_base_component_init_fn_t mtl_init;
 108 };
 109 typedef struct mca_mtl_base_component_2_0_0_t mca_mtl_base_component_2_0_0_t;
 110 typedef struct mca_mtl_base_component_2_0_0_t mca_mtl_base_component_t;
 111 
 112 
 113 /**
 114  * MCA->MTL Clean up any resources held by MTL module
 115  *
 116  * Opposite of module_init.  Called when communication will no longer
 117  * be necessary.  ussually this is during MPI_FINALIZE, but it can be
 118  * earlier if the component was not selected to run.  Assuming
 119  * module_init was called, finalize will always be called before the
 120  * component_close function is called.
 121  *
 122  * @param mtl (IN)   MTL module returned from call to initialize
 123  *
 124  * @retval OMPI_SUCCESS cleanup finished successfully
 125  * @retval other        failure during cleanup
 126  *
 127  */
 128 typedef int (*mca_mtl_base_module_finalize_fn_t)(struct mca_mtl_base_module_t* mtl);
 129 
 130 
 131 /**
 132  * PML->MTL notification of change in the process list.
 133  *
 134  * The mca_mtl_base_module_add_procs_fn_t() is used by the PML to
 135  * notify the MTL that new processes are connected to the current
 136  * process.  Any addressing information exported by the peer via the
 137  * ompi_modex_send() function should be available during this
 138  * call via the corresponding ompi_modex_recv() function. The
 139  * MTL may utilize this information to determine reachability of each
 140  * peer process.
 141  *
 142  * It is an error for a proc to not be reachable by the given MTL, and
 143  * an error should be returned if that case is detected.  If a MTL
 144  * requires per-endpoint data, it must handle storage, either using a
 145  * static endpoint tag (MTL is the default tag that should generally
 146  * be used) or a dynamic endpoint tag (although it should be noted
 147  * that OMPI can be built without dynamic endpoint tag support).
 148  *
 149  * @param mtl (IN)            MTL module
 150  * @param nprocs (IN)         Number of processes
 151  * @param procs (IN)          Set of processes
 152  *
 153  * @retval OMPI_SUCCESS successfully connected to processes
 154  * @retval other failure during setup
 155  */
 156 typedef int (*mca_mtl_base_module_add_procs_fn_t)(
 157                             struct mca_mtl_base_module_t* mtl,
 158                             size_t nprocs,
 159                             struct ompi_proc_t** procs);
 160 
 161 
 162 /**
 163  * Notification of change to the process list.
 164  *
 165  * When the process list changes, the PML notifies the MTL of the
 166  * change, to provide the opportunity to cleanup or release any
 167  * resources associated with the peer.  The MTL is responsible for
 168  * releasing any memory associated with the endpoint data it may have
 169  * stored during add_procs().
 170  *
 171  * @param mtl (IN)     MTL module
 172  * @param nprocs (IN)  Number of processes
 173  * @param proc (IN)    Set of processes
 174  * @param peer (IN)    Set of peer addressing information.
 175  *
 176  * @return             Status indicating if cleanup was successful
 177  */
 178 typedef int (*mca_mtl_base_module_del_procs_fn_t)(
 179                             struct mca_mtl_base_module_t* mtl,
 180                             size_t nprocs,
 181                             struct ompi_proc_t** procs);
 182 
 183 
 184 /**
 185  * Blocking send to peer
 186  *
 187  * Blocking send (Call should not return until the user buffer may be
 188  * used again).  Standard MPI semantics must be met by this call, as
 189  * mandated in the mode argument.  There is one special mode argument,
 190  * MCA_PML_BASE_SEND_COMPLETE, which requires local completion before
 191  * the function can return.  This is an optimization for coillective
 192  * routines that can otherwise lead to degenerate performance for
 193  * broadcast-based collectives.
 194  *
 195  * @param comm (IN)      Communicator used for operation
 196  * @param dest (IN)      Destination rank for send (relative to comm)
 197  * @param tag (IN)       MPI tag used for sending.  See note below.
 198  * @param convertor (IN) Datatype convertor describing send datatype.
 199  *                       Already prepared for send.
 200  * @param mode (IN)      Mode for send operation
 201  *
 202  * @return               OMPI_SUCCESS or error value
 203  *
 204  * \note Open MPI is built around non-blocking operations.  This
 205  * function is provided for networks where progressing events outside
 206  * of point-to-point (for example, collectives, I/O, one-sided) can
 207  * occur without a progress function regularily being triggered.
 208  *
 209  * \note While MPI does not allow users to specify negative tags, they
 210  * are used internally in Open MPI to provide a unique channel for
 211  * collective operations.  Therefore, the MTL can *not* cause an error
 212  * if a negative tag is used.
 213  */
 214 typedef int (*mca_mtl_base_module_send_fn_t)(
 215                           struct mca_mtl_base_module_t* mtl,
 216                           struct ompi_communicator_t *comm,
 217                           int dest,
 218                           int tag,
 219                           struct opal_convertor_t *convertor,
 220                           mca_pml_base_send_mode_t mode);
 221 
 222 
 223 /**
 224  * Non-blocking send to peer
 225  *
 226  * Non-blocking send to peer.  Standard MPI semantics must be met by
 227  * this call, as mandated in the mode argument.  There is one special
 228  * mode argument, MCA_PML_BASE_SEND_COMPLETE, which requires local
 229  * completion before the request is marked as complete.
 230  *
 231  * The PML will handle creation of the request, leaving the number of
 232  * bytes requested in the module structure available for the MTL
 233  * directly after the ompi_request_t structure.  The PML will handle
 234  * proper destruction of the request once it can safely be destructed
 235  * (it has been completed and freeed by a call to REQUEST_FReE or
 236  * TEST/WAIT).  The MTL should remove all resources associated with
 237  * the request when it is marked as completed.
 238  *
 239  * @param comm (IN)      Communicator used for operation
 240  * @param dest (IN)      Destination rank for send (relative to comm)
 241  * @param tag (IN)       MPI tag used for sending.  See note below.
 242  * @param convertor (IN) Datatype convertor describing send datatype.
 243  *                       Already prepared for send.
 244  * @param mode (IN)      Mode for send operation (see pml.h)
 245  * @param blocking (IN)  True if the call originated from a blocking
 246  *                       call, but the PML decided to use a
 247  *                       non-blocking operation, likely for
 248  *                       internal performance decisions This is an
 249  *                       optimization flag and is not needed for
 250  *                       correctness.
 251  * @param mtl_request (IN) Pointer to mtl_request.  The ompi_req field
 252  *                       will be populated with an initialized
 253  *                       ompi_request_t before calling.
 254  *
 255  * @return               OMPI_SUCCESS or error value
 256  *
 257  * \note While MPI does not allow users to specify negative tags, they
 258  * are used internally in Open MPI to provide a unique channel for
 259  * collective operations.  Therefore, the MTL can *not* cause an error
 260  * if a negative tag is used.
 261  */
 262 typedef int (*mca_mtl_base_module_isend_fn_t)(
 263                           struct mca_mtl_base_module_t* mtl,
 264                           struct ompi_communicator_t *comm,
 265                           int dest,
 266                           int tag,
 267                           struct opal_convertor_t *convertor,
 268                           mca_pml_base_send_mode_t mode,
 269                           bool blocking,
 270                           mca_mtl_request_t *mtl_request);
 271 
 272 
 273 /**
 274  * Non-blocking receive
 275  *
 276  * Non-blocking receive function.  Standard MPI semantics for
 277  * MPI_Irecv must be implemented by this call.
 278  *
 279  * The PML will handle creation of the request, leaving the number of
 280  * bytes requested in teh module structure available for the MTL,
 281  * directly after the ompi_request_t structure.  The PML will handle
 282  * proper destruction of the request once it can safely be destroyed
 283  * (it has been completed and free'ed by a call to REQUEST_FREE or
 284  * TEST/WAIT).  The MTL should remove all resources associated with
 285  * the request when it is marked as completed.
 286  *
 287  * @param comm (IN)      Communicator used for operation
 288  * @param src (IN)       Source rank for send (relative to comm)
 289  * @param tag (IN)       MPI tag used for sending.  See note below.
 290  * @param convertor (IN) Datatype convertor describing receive datatype.
 291  *                       Already prepared for receive.
 292  * @param mtl_request (IN) Pointer to mtl_request.  The ompi_req field
 293  *                       will be populated with an initialized
 294  *                       ompi_request_t before calling.
 295  *
 296  * @return              OMPI_SUCCESS or error value
 297  *
 298  * \note While MPI does not allow users to specify negative tags, they
 299  * are used internally in Open MPI to provide a unique channel for
 300  * collective operations.  Therefore, the MTL can *not* cause an error
 301  * if a negative tag is used.  Further, MPI_ANY_TAG should *not* match
 302  * against negative tags.
 303  */
 304 typedef int (*mca_mtl_base_module_irecv_fn_t)(
 305                           struct mca_mtl_base_module_t* mtl,
 306                           struct ompi_communicator_t *comm,
 307                           int src,
 308                           int tag,
 309                           struct opal_convertor_t *convertor,
 310                           struct mca_mtl_request_t *mtl_request);
 311 
 312 
 313 /**
 314  * Non-blocking probe
 315  *
 316  * Non-blocking probe function.  Standard MPI semantics for MPI_IPROBE
 317  * must be implemented by this call.
 318  *
 319  * @param comm (IN)      Communicator used for operation
 320  * @param src (IN)       Source rank for send (relative to comm)
 321  * @param tag (IN)       MPI tag used for sending.  See note below.
 322  * @param flag (OUT)     true if message available, false otherwise
 323  * @param status (OUT)   Status structure for information on
 324  *                       available message
 325  *
 326  * \note While MPI does not allow users to specify negative tags, they
 327  * are used internally in Open MPI to provide a unique channel for
 328  * collective operations.  Therefore, the MTL can *not* cause an error
 329  * if a negative tag is used.  Further, MPI_ANY_TAG should *not* match
 330  * against negative tags.
 331  */
 332 typedef int (*mca_mtl_base_module_iprobe_fn_t)(
 333                           struct mca_mtl_base_module_t* mtl,
 334                           struct ompi_communicator_t *comm,
 335                           int src,
 336                           int tag,
 337                           int *flag,
 338                           struct ompi_status_public_t *status);
 339 
 340 
 341 typedef int (*mca_mtl_base_module_imrecv_fn_t)(struct mca_mtl_base_module_t* mtl,
 342                                                struct opal_convertor_t *convertor,
 343                                                struct ompi_message_t **message,
 344                                                struct mca_mtl_request_t *mtl_request);
 345 
 346 typedef int (*mca_mtl_base_module_improbe_fn_t)(struct mca_mtl_base_module_t *mtl,
 347                                                 struct ompi_communicator_t *comm,
 348                                                 int src,
 349                                                 int tag,
 350                                                 int *matched,
 351                                                 struct ompi_message_t **message,
 352                                                 struct ompi_status_public_t *status);
 353 
 354 /**
 355  * Cancel an existing request
 356  *
 357  * Attempt to cancel an existing request.  The (poorly defined)
 358  * semantics for MPI_CANCEL must be implemented by this call.  This,
 359  * of course, allows the MTL module to do nothing at all.
 360  * Implementations of the MTL should make a good faith effort to
 361  * cancel receive requests that have not been started, as the "post a
 362  * receive for control messages" paradigm is a common one in loosely
 363  * coupled MPI applications.
 364  *
 365  * @param request(IN)     Request that should be cancelled
 366  * @param flag            Unknown exactly what this does.
 367  *
 368  */
 369 typedef int (*mca_mtl_base_module_cancel_fn_t)(
 370                           struct mca_mtl_base_module_t* mtl,
 371                           mca_mtl_request_t *mtl_request,
 372                           int flag);
 373 
 374 
 375 /**
 376  * Downcall from PML layer when a new communicator is created.
 377  *
 378  * @param comm  Communicator
 379  * @return      OMPI_SUCCESS or failure status.
 380  *
 381  * Provides the MTL the opportunity to initialize/cache a data structure
 382  * on the communicator.
 383  */
 384 typedef int (*mca_mtl_base_module_add_comm_fn_t)(
 385                           struct mca_mtl_base_module_t* mtl,
 386                           struct ompi_communicator_t* comm);
 387 
 388 
 389 /**
 390  * Downcall from PML layer when a communicator is destroyed.
 391  *
 392  * @param comm  Communicator
 393  * @return      OMPI_SUCCESS or failure status.
 394  *
 395  * Provides the MTL the opportunity to cleanup any datastructures
 396  * associated with the communicator.
 397  */
 398 typedef int (*mca_mtl_base_module_del_comm_fn_t)(
 399                           struct mca_mtl_base_module_t* mtl,
 400                           struct ompi_communicator_t* comm);
 401 
 402 
 403 /**
 404  * MTL module interface functions and attributes.
 405  */
 406 struct mca_mtl_base_module_t {
 407     int      mtl_max_contextid;   /**< maximum allowable contextid */
 408     int      mtl_max_tag;         /**< maximum tag value.  note that negative tags must be allowed */
 409     size_t   mtl_request_size;    /**< number of bytes to reserve with request structure */
 410 
 411     uint32_t mtl_flags;           /**< flags (put/get...) */
 412 
 413     /* MTL function table */
 414     mca_mtl_base_module_add_procs_fn_t   mtl_add_procs;
 415     mca_mtl_base_module_del_procs_fn_t   mtl_del_procs;
 416     mca_mtl_base_module_finalize_fn_t    mtl_finalize;
 417 
 418     mca_mtl_base_module_send_fn_t        mtl_send;
 419     mca_mtl_base_module_isend_fn_t       mtl_isend;
 420     mca_mtl_base_module_irecv_fn_t       mtl_irecv;
 421     mca_mtl_base_module_iprobe_fn_t      mtl_iprobe;
 422     mca_mtl_base_module_imrecv_fn_t      mtl_imrecv;
 423     mca_mtl_base_module_improbe_fn_t     mtl_improbe;
 424 
 425     /* Optional MTL functions */
 426     mca_mtl_base_module_cancel_fn_t      mtl_cancel;
 427     mca_mtl_base_module_add_comm_fn_t    mtl_add_comm;
 428     mca_mtl_base_module_del_comm_fn_t    mtl_del_comm;
 429 };
 430 typedef struct mca_mtl_base_module_t mca_mtl_base_module_t;
 431 
 432 /*
 433  * Macro for use in modules that are of type mtl
 434  */
 435 #define MCA_MTL_BASE_VERSION_2_0_0 \
 436     OMPI_MCA_BASE_VERSION_2_1_0("mtl", 2, 0, 0)
 437 
 438 OMPI_DECLSPEC extern mca_mtl_base_module_t *ompi_mtl;
 439 
 440 /*
 441  * macro for doing direct call / call through struct
 442  */
 443 #if MCA_ompi_mtl_DIRECT_CALL
 444 
 445 
 446 #define OMPI_MTL_CALL_STAMP(a, b) ompi_mtl_ ## a ## _ ## b
 447 #define OMPI_MTL_CALL_EXPANDER(a, b) OMPI_MTL_CALL_STAMP(a,b)
 448 #define OMPI_MTL_CALL(a) OMPI_MTL_CALL_EXPANDER(MCA_ompi_mtl_DIRECT_CALL_COMPONENT, a)
 449 
 450 #include MCA_ompi_mtl_DIRECT_CALL_HEADER
 451 
 452 #else
 453 #define OMPI_MTL_CALL(a) ompi_mtl->mtl_ ## a
 454 #endif
 455 
 456 
 457 END_C_DECLS
 458 #endif

/* [<][>][^][v][top][bottom][index][help] */