root/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University.
   3  *                         All rights reserved.
   4  * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
   5  *                         All rights reserved.
   6  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   7  *                         University of Stuttgart.  All rights reserved.
   8  * Copyright (c) 2004-2005 The Regents of the University of California.
   9  *                         All rights reserved.
  10  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
  11  * $COPYRIGHT$
  12  *
  13  * Additional copyrights may follow
  14  *
  15  * $HEADER$
  16  */
  17 
  18 /**
  19  * @file
  20  *
  21  * Hoke CRCP component
  22  *
  23  */
  24 
  25 #ifndef MCA_CRCP_HOKE_PML_EXPORT_H
  26 #define MCA_CRCP_HOKE_PML_EXPORT_H
  27 
  28 #include "ompi_config.h"
  29 
  30 #include "ompi/mca/mca.h"
  31 #include "ompi/mca/crcp/crcp.h"
  32 #include "ompi/communicator/communicator.h"
  33 
  34 #include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h"
  35 
  36 BEGIN_C_DECLS
  37 
  38     /*
  39      * PML Coordination functions
  40      */
  41     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_enable
  42     ( bool enable, ompi_crcp_base_pml_state_t* pml_state );
  43 
  44     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_comm
  45     ( struct ompi_communicator_t* comm,
  46       ompi_crcp_base_pml_state_t* pml_state );
  47     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_comm
  48     ( struct ompi_communicator_t* comm,
  49       ompi_crcp_base_pml_state_t* pml_state );
  50 
  51     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_procs
  52     ( struct ompi_proc_t **procs, size_t nprocs,
  53       ompi_crcp_base_pml_state_t* pml_state );
  54     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs
  55     ( struct ompi_proc_t **procs, size_t nprocs,
  56       ompi_crcp_base_pml_state_t* pml_state );
  57 
  58     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_progress
  59     (ompi_crcp_base_pml_state_t* pml_state);
  60 
  61     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_iprobe
  62     (int dst, int tag, struct ompi_communicator_t* comm,
  63      int *matched, ompi_status_public_t* status,
  64      ompi_crcp_base_pml_state_t* pml_state );
  65 
  66     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_probe
  67     ( int dst, int tag, struct ompi_communicator_t* comm,
  68       ompi_status_public_t* status,
  69       ompi_crcp_base_pml_state_t* pml_state );
  70 
  71     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init
  72     ( void *buf, size_t count, ompi_datatype_t *datatype,
  73       int dst, int tag, mca_pml_base_send_mode_t mode,
  74       struct ompi_communicator_t* comm,
  75       struct ompi_request_t **request,
  76       ompi_crcp_base_pml_state_t* pml_state );
  77 
  78     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend
  79     ( void *buf, size_t count, ompi_datatype_t *datatype,
  80       int dst, int tag, mca_pml_base_send_mode_t mode,
  81       struct ompi_communicator_t* comm,
  82       struct ompi_request_t **request,
  83       ompi_crcp_base_pml_state_t* pml_state );
  84 
  85     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send
  86     (  void *buf, size_t count, ompi_datatype_t *datatype,
  87        int dst, int tag, mca_pml_base_send_mode_t mode,
  88        struct ompi_communicator_t* comm,
  89        ompi_crcp_base_pml_state_t* pml_state );
  90 
  91     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv_init
  92     ( void *buf, size_t count, ompi_datatype_t *datatype,
  93       int src, int tag, struct ompi_communicator_t* comm,
  94       struct ompi_request_t **request,
  95       ompi_crcp_base_pml_state_t* pml_state);
  96 
  97     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv
  98     ( void *buf, size_t count, ompi_datatype_t *datatype,
  99       int src, int tag, struct ompi_communicator_t* comm,
 100       struct ompi_request_t **request,
 101       ompi_crcp_base_pml_state_t* pml_state );
 102 
 103     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_recv
 104     (  void *buf, size_t count, ompi_datatype_t *datatype,
 105        int src, int tag, struct ompi_communicator_t* comm,
 106        ompi_status_public_t* status,
 107        ompi_crcp_base_pml_state_t* pml_state);
 108 
 109     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_dump
 110     ( struct ompi_communicator_t* comm, int verbose,
 111       ompi_crcp_base_pml_state_t* pml_state );
 112 
 113     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_start
 114     ( size_t count, ompi_request_t** requests,
 115       ompi_crcp_base_pml_state_t* pml_state );
 116 
 117     ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event
 118     (int state, ompi_crcp_base_pml_state_t* pml_state);
 119 
 120     enum ompi_crcp_bkmrk_pml_quiesce_tag_type_t {
 121         QUIESCE_TAG_NONE = 0, /* 0 No tag specified */
 122         QUIESCE_TAG_CKPT,     /* 1 Prepare for checkpoint */
 123         QUIESCE_TAG_CONTINUE, /* 2 Continue after a checkpoint */
 124         QUIESCE_TAG_RESTART,  /* 3 Restart from a checkpoint */
 125         QUIESCE_TAG_UNKNOWN   /* 4 Unknown */
 126     };
 127     typedef enum ompi_crcp_bkmrk_pml_quiesce_tag_type_t ompi_crcp_bkmrk_pml_quiesce_tag_type_t;
 128 
 129     int ompi_crcp_bkmrk_pml_quiesce_start(ompi_crcp_bkmrk_pml_quiesce_tag_type_t tag );
 130     int ompi_crcp_bkmrk_pml_quiesce_end(ompi_crcp_bkmrk_pml_quiesce_tag_type_t tag );
 131 
 132     /*
 133      * Request function
 134      */
 135     int ompi_crcp_bkmrk_request_complete(struct ompi_request_t *request);
 136 
 137     /***********************************
 138      * Globally Defined Structures
 139      ***********************************/
 140     /*
 141      * Types of Messages
 142      */
 143     enum ompi_crcp_bkmrk_pml_message_type_t {
 144         COORD_MSG_TYPE_UNKNOWN, /* 0 Unknown type      */
 145         COORD_MSG_TYPE_B_SEND,  /* 1 Blocking Send     */
 146         COORD_MSG_TYPE_I_SEND,  /* 2 Non-Blocking Send */
 147         COORD_MSG_TYPE_P_SEND,  /* 3 Persistent  Send  */
 148         COORD_MSG_TYPE_B_RECV,  /* 4 Blocking Recv     */
 149         COORD_MSG_TYPE_I_RECV,  /* 5 Non-Blocking Recv */
 150         COORD_MSG_TYPE_P_RECV   /* 6 Persistent  Recv  */
 151     };
 152     typedef enum ompi_crcp_bkmrk_pml_message_type_t ompi_crcp_bkmrk_pml_message_type_t;
 153 
 154     /*
 155      * A list structure to contain {buffer, request, status} sets
 156      *
 157      * send/recv type | Buffer | Request | Status | Active
 158      * ---------------+--------+---------+--------+--------
 159      * Blocking       |     No |      No |     No |     No
 160      * Non-Blocking   |     No |     Yes |    Yes |     No
 161      * Persistent     |    Yes |     Yes |    Yes |    Yes
 162      *
 163      * No : Does not require this field
 164      * Yes: Does require this field
 165      */
 166     struct ompi_crcp_bkmrk_pml_message_content_ref_t {
 167         /** This is a list object */
 168         opal_list_item_t super;
 169 
 170         /** Buffer for data */
 171         void * buffer;
 172 
 173         /* Request for this message */
 174         ompi_request_t *request;
 175 
 176         /** Status */
 177         ompi_status_public_t status;
 178 
 179         /** Active ? */
 180         bool active;
 181 
 182         /** Done ? - Only useful in Drain*/
 183         bool done;
 184 
 185         /** Already_posted ? - Only useful in Drain */
 186         bool already_posted;
 187 
 188         /** Drained */
 189         bool already_drained;
 190 
 191         /** JJH XXX Debug counter*/
 192         uint64_t msg_id;
 193     };
 194     typedef struct ompi_crcp_bkmrk_pml_message_content_ref_t ompi_crcp_bkmrk_pml_message_content_ref_t;
 195 
 196     OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_message_content_ref_t);
 197     void ompi_crcp_bkmrk_pml_message_content_ref_construct(ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref);
 198     void ompi_crcp_bkmrk_pml_message_content_ref_destruct( ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref);
 199 
 200     /*
 201      * Drain Message Reference
 202      * - The first section of this structure should match
 203      *   ompi_crcp_bkmrk_pml_traffic_message_ref_t exactly.
 204      */
 205     struct ompi_crcp_bkmrk_pml_drain_message_ref_t {
 206         /** This is a list object */
 207         opal_list_item_t super;
 208 
 209         /** Sequence Number of this message */
 210         uint64_t msg_id;
 211 
 212         /** Type of message this references */
 213         ompi_crcp_bkmrk_pml_message_type_t msg_type;
 214 
 215         /** Count for data */
 216         size_t count;
 217 
 218         /** Datatype */
 219         struct ompi_datatype_t * datatype;
 220 
 221         /** Quick reference to the size of the datatype */
 222         size_t ddt_size;
 223 
 224         /** Message Tag */
 225         int tag;
 226 
 227         /** Peer rank to which it was sent/recv'ed if known */
 228         int rank;
 229 
 230         /** Communicator pointer */
 231         ompi_communicator_t* comm;
 232 
 233         /** Message Contents */
 234         opal_list_t msg_contents;
 235 
 236         /** Peer which we received from */
 237         ompi_process_name_t proc_name;
 238 
 239         /**
 240          * Count of the number of completed PML messages that match this reference.
 241          */
 242         int done;
 243 
 244         /**
 245          * Count of the number of active PML messages that match this reference.
 246          */
 247         int active;
 248 
 249         /**
 250          * Count of the number of posted PML messages that match this reference.
 251          *   Used when trying to figure out which messages the drain protocol needs to post, and
 252          *   which message have already been posted for it.
 253          */
 254         int already_posted;
 255 
 256     };
 257     typedef struct ompi_crcp_bkmrk_pml_drain_message_ref_t ompi_crcp_bkmrk_pml_drain_message_ref_t;
 258 
 259     OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ref_t);
 260     void ompi_crcp_bkmrk_pml_drain_message_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref);
 261     void ompi_crcp_bkmrk_pml_drain_message_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref);
 262 
 263     /*
 264      * List of Pending ACKs to drained messages
 265      */
 266     struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t {
 267         /** This is a list object */
 268         opal_list_item_t super;
 269 
 270         /** Complete flag */
 271         bool complete;
 272 
 273         /** Peer which we received from */
 274         ompi_process_name_t peer;
 275     };
 276     typedef struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t ompi_crcp_bkmrk_pml_drain_message_ack_ref_t;
 277 
 278     OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t);
 279     void ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref);
 280     void ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref);
 281 
 282     /*
 283      * Regular Traffic Message Reference
 284      * Tracks message signature {count, datatype_size, tag, comm, peer}
 285      */
 286     struct ompi_crcp_bkmrk_pml_traffic_message_ref_t {
 287         /** This is a list object */
 288         opal_list_item_t super;
 289 
 290         /** Sequence Number of this message */
 291         uint64_t msg_id;
 292 
 293         /** Type of message this references */
 294         ompi_crcp_bkmrk_pml_message_type_t msg_type;
 295 
 296         /** Count for data */
 297         size_t count;
 298 
 299         /** Quick reference to the size of the datatype */
 300         size_t ddt_size;
 301 
 302         /** Message Tag */
 303         int tag;
 304 
 305         /** Peer rank to which it was sent/recv'ed if known */
 306         int rank;
 307 
 308         /** Communicator pointer */
 309         ompi_communicator_t* comm;
 310 
 311         /** Message Contents */
 312         opal_list_t msg_contents;
 313 
 314         /** Peer which we received from */
 315         ompi_process_name_t proc_name;
 316 
 317         /* Sample movement of values (mirrored for send):
 318          *                     Recv()   iRecv()  irecv_init()  start()  req_complete()
 319          *   * Pre:
 320          *     matched        = false   false    false         ---      ---
 321          *     done           = false   false    false         ---      true
 322          *     active         = true    true     false         true     false
 323          *     already_posted = true    true     true          ---      ---
 324          *   * Post:
 325          *     matched        = false   false    false         ---      ---
 326          *     done           = true    false    false         false    true
 327          *     active         = false   true     false         true     false
 328          *     already_posted = true    true     true          ---      ---
 329          *   * Drain
 330          *     already_posted = false -> true when posted irecv
 331          */
 332         /** Has this message been matched by the peer?
 333          *  - Resolved during bookmark exchange
 334          * true  = peer confirmed the receipt of this message
 335          * false = unknown if peer has received this message or not
 336          */
 337         int matched;
 338 
 339         /** Is this message complete WRT PML semantics?
 340          *  - Is it not in-flight?
 341          * true  = message done on this side (send or receive)
 342          * false = message still in process (sending or receiving)
 343          */
 344         int done;
 345 
 346         /** Is the message actively being worked on?
 347          *  - Known to be in-flight?
 348          * true  = Message is !done, and is in the progress cycle
 349          * false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests)
 350          */
 351         int active;
 352 
 353         /** How many times a persistent send/recv has been posted, but not activated.
 354          *
 355          */
 356         int posted;
 357 
 358         /** Actively drained
 359          * These are messages that are active, and being drained. So if we checkpoint while the drain
 360          * list is not empty then we do not try to count these messages more than once.
 361          */
 362         int active_drain;
 363     };
 364     typedef struct ompi_crcp_bkmrk_pml_traffic_message_ref_t ompi_crcp_bkmrk_pml_traffic_message_ref_t;
 365 
 366     OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_traffic_message_ref_t);
 367     void ompi_crcp_bkmrk_pml_traffic_message_ref_construct(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref);
 368     void ompi_crcp_bkmrk_pml_traffic_message_ref_destruct( ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref);
 369 
 370     /*
 371      * A structure for a single process
 372      * Contains:
 373      *  - List of sent messages to this peer
 374      *  - List of received message from this peer
 375      *  - Message totals
 376      */
 377     struct ompi_crcp_bkmrk_pml_peer_ref_t {
 378         /** This is a list object */
 379         opal_list_item_t super;
 380 
 381         /** Name of peer */
 382         ompi_process_name_t proc_name;
 383 
 384         /** List of messages sent to this peer */
 385         opal_list_t send_list;      /**< pml_send       */
 386         opal_list_t isend_list;     /**< pml_isend      */
 387         opal_list_t send_init_list; /**< pml_isend_init */
 388 
 389         /** List of messages recved from this peer */
 390         opal_list_t recv_list;      /**< pml_recv       */
 391         opal_list_t irecv_list;     /**< pml_irecv      */
 392         opal_list_t recv_init_list; /**< pml_irecv_init */
 393 
 394         /** List of messages drained from this peer */
 395         opal_list_t drained_list;
 396 
 397         /*
 398          * These are totals over all communicators provided for convenience.
 399          *
 400          * If we are P_n and this structure represent P_m then:
 401          *  - total_*   = P_n --> P_m
 402          *  - matched_* = P_n <-- P_m
 403          * Where P_n --> P_m means:
 404          *  the number of messages P_n knows that it has sent/recv to/from P_m
 405          * And P_n --> P_m means:
 406          *  the number of messages P_m told us that is has sent/recv to/from P_n
 407          *
 408          * How total* are used:
 409          * Send:
 410          *   Before put on the wire: ++total
 411          * Recv:
 412          *   Once completed: ++total
 413          */
 414         /** Total Number of messages sent */
 415         uint32_t  total_msgs_sent;
 416         uint32_t  matched_msgs_sent;
 417 
 418         /** Total Number of messages received */
 419         uint32_t  total_msgs_recvd;
 420         uint32_t  matched_msgs_recvd;
 421 
 422         /** Total Number of messages drained */
 423         uint32_t  total_drained_msgs;
 424 
 425         /** If peer is expecting an ACK after draining the messages */
 426         bool ack_required;
 427     };
 428     typedef struct ompi_crcp_bkmrk_pml_peer_ref_t ompi_crcp_bkmrk_pml_peer_ref_t;
 429 
 430     OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_peer_ref_t);
 431     void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc);
 432     void ompi_crcp_bkmrk_pml_peer_ref_destruct( ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc);
 433 
 434     /*
 435      * Local version of the PML state
 436      */
 437     struct ompi_crcp_bkmrk_pml_state_t {
 438         ompi_crcp_base_pml_state_t p_super;
 439         ompi_crcp_base_pml_state_t *prev_ptr;
 440 
 441         ompi_crcp_bkmrk_pml_peer_ref_t             *peer_ref;
 442         ompi_crcp_bkmrk_pml_traffic_message_ref_t  *msg_ref;
 443     };
 444     typedef struct ompi_crcp_bkmrk_pml_state_t ompi_crcp_bkmrk_pml_state_t;
 445     OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_state_t);
 446 
 447     /***********************************
 448      * Globally Defined Variables
 449      ***********************************/
 450     /*
 451      * List of known peers
 452      */
 453     extern opal_list_t ompi_crcp_bkmrk_pml_peer_refs;
 454 
 455 END_C_DECLS
 456 
 457 #endif /* MCA_CRCP_HOKE_PML_EXPORT_H */

/* [<][>][^][v][top][bottom][index][help] */