1 /*
2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2005 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
7 * reserved.
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
12 * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
13 * reserved.
14 * Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
15 * $COPYRIGHT$
16 *
17 * Additional copyrights may follow
18 *
19 * $HEADER$
20 */
21 /** @file:
22 *
23 * the oob framework
24 */
25
26 #ifndef _MCA_OOB_BASE_H_
27 #define _MCA_OOB_BASE_H_
28
29 #include "orte_config.h"
30
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34 #ifdef HAVE_SYS_UIO_H
35 #include <sys/uio.h>
36 #endif
37 #ifdef HAVE_NET_UIO_H
38 #include <net/uio.h>
39 #endif
40
41 #include "opal/class/opal_bitmap.h"
42 #include "opal/class/opal_hash_table.h"
43 #include "opal/class/opal_list.h"
44 #include "opal/util/printf.h"
45 #include "opal/util/timings.h"
46 #include "opal/mca/event/event.h"
47
48 #include "orte/mca/mca.h"
49 #include "orte/util/threads.h"
50
51 #include "orte/mca/oob/oob.h"
52
53 BEGIN_C_DECLS
54
55 /*
56 * Convenience Typedef
57 */
58 typedef struct {
59 opal_event_base_t *ev_base;
60 char *include;
61 char *exclude;
62 opal_list_t components;
63 opal_list_t actives;
64 int max_uri_length;
65 opal_hash_table_t peers;
66 int num_threads;
67 #if OPAL_ENABLE_TIMING
68 bool timing;
69 #endif
70 } orte_oob_base_t;
71 ORTE_DECLSPEC extern orte_oob_base_t orte_oob_base;
72
73 typedef struct {
74 opal_object_t super;
75 mca_oob_base_component_t *component;
76 opal_bitmap_t addressable;
77 } orte_oob_base_peer_t;
78 OBJ_CLASS_DECLARATION(orte_oob_base_peer_t);
79
80 /* MCA framework */
81 ORTE_DECLSPEC extern mca_base_framework_t orte_oob_base_framework;
82 ORTE_DECLSPEC int orte_oob_base_select(void);
83
84 /* Access the OOB internal functions via set of event-based macros
85 * for inserting messages and other commands into the
86 * OOB event base. This ensures that all OOB operations occur
87 * asynchronously in a thread-safe environment.
88 * Note that this doesn't mean that messages will be *sent*
89 * in order as that depends on the specific transport being
90 * used, when that module's event base indicates the transport
91 * is available, etc.
92 */
93 typedef struct {
94 opal_object_t super;
95 opal_event_t ev;
96 orte_rml_send_t *msg;
97 } orte_oob_send_t;
98 OBJ_CLASS_DECLARATION(orte_oob_send_t);
99
100 /* All OOB sends are based on iovec's and are async as the RML
101 * acts as the initial interface to prepare all communications.
102 * The send_nb function will enter the message into the OOB
103 * base, which will then check to see if a transport for the
104 * intended target has already been assigned. If so, the message
105 * is immediately placed into that module's event base for
106 * transmission. If not, the function will loop across all available
107 * components until one identifies that it has a module capable
108 * of reaching the target.
109 */
110 typedef void (*mca_oob_send_callback_fn_t)(int status,
111 struct iovec *iov,
112 int count, void *cbdata);
113
114 ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata);
115 #define ORTE_OOB_SEND(m) \
116 do { \
117 orte_oob_send_t *cd; \
118 opal_output_verbose(1, \
119 orte_oob_base_framework.framework_output, \
120 "%s OOB_SEND: %s:%d", \
121 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
122 __FILE__, __LINE__); \
123 cd = OBJ_NEW(orte_oob_send_t); \
124 cd->msg = (m); \
125 ORTE_THREADSHIFT(cd, orte_oob_base.ev_base, \
126 orte_oob_base_send_nb, ORTE_MSG_PRI); \
127 }while(0)
128
129 /* During initial wireup, we can only transfer contact info on the daemon
130 * command line. This limits what we can send to a string representation of
131 * the actual contact info, which gets sent in a uri-like form. Not every
132 * oob module can support this transaction, so this function will loop
133 * across all oob components/modules, letting each add to the uri string if
134 * it supports bootstrap operations. An error will be returned in the cbfunc
135 * if NO component can successfully provide a contact.
136 *
137 * Note: since there is a limit to what an OS will allow on a cmd line, we
138 * impose a limit on the length of the resulting uri via an MCA param. The
139 * default value of -1 implies unlimited - however, users with large numbers
140 * of interfaces on their nodes may wish to restrict the size.
141 *
142 * Since all components define their address info at component start,
143 * it is unchanged and does not require acess via event
144 */
145 ORTE_DECLSPEC void orte_oob_base_get_addr(char **uri);
146
147 #if OPAL_ENABLE_FT_CR == 1
148 ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata);
149 #endif
150
151 END_C_DECLS
152 #endif