This source file includes following definitions.
- ompi_rte_abort
- _release_fn
- _register_fn
- ompi_rte_wait_for_debugger
- ompi_rte_connect_accept_support
1
2
3
4
5
6
7
8
9
10
11
12 #include "ompi_config.h"
13 #include "ompi/constants.h"
14
15 #include <string.h>
16 #include <stdio.h>
17 #include <ctype.h>
18
19 #include "opal/dss/dss.h"
20 #include "opal/util/argv.h"
21 #include "opal/util/proc.h"
22 #include "opal/util/opal_getcwd.h"
23 #include "opal/util/printf.h"
24 #include "opal/mca/pmix/pmix.h"
25 #include "opal/threads/threads.h"
26 #include "opal/class/opal_list.h"
27 #include "opal/dss/dss.h"
28
29 #include "orte/mca/errmgr/errmgr.h"
30 #include "orte/mca/ess/ess.h"
31 #include "orte/mca/grpcomm/base/base.h"
32 #include "orte/mca/odls/odls.h"
33 #include "orte/mca/plm/plm.h"
34 #include "orte/mca/rml/rml.h"
35 #include "orte/mca/rml/rml_types.h"
36 #include "orte/mca/rmaps/rmaps.h"
37 #include "orte/mca/rmaps/rmaps_types.h"
38 #include "orte/mca/rmaps/base/base.h"
39 #include "orte/mca/rml/base/rml_contact.h"
40 #include "orte/mca/state/state.h"
41 #include "orte/mca/routed/routed.h"
42 #include "orte/util/name_fns.h"
43 #include "orte/util/session_dir.h"
44 #include "orte/util/show_help.h"
45 #include "orte/runtime/orte_globals.h"
46 #include "orte/runtime/orte_wait.h"
47 #include "orte/runtime/orte_data_server.h"
48
49 #include "ompi/mca/rte/base/base.h"
50 #include "ompi/mca/rte/rte.h"
51 #include "ompi/debuggers/debuggers.h"
52 #include "ompi/proc/proc.h"
53 #include "ompi/runtime/params.h"
54 #include "ompi/communicator/communicator.h"
55
56 extern ompi_rte_component_t mca_rte_orte_component;
57
58 void ompi_rte_abort(int error_code, char *fmt, ...)
59 {
60 va_list arglist;
61
62
63 va_start(arglist, fmt);
64 if( NULL != fmt ) {
65 char* buffer = NULL;
66 opal_vasprintf( &buffer, fmt, arglist );
67 opal_output( 0, "%s", buffer );
68 free( buffer );
69 }
70 va_end(arglist);
71
72
73 if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
74
75 orte_odls.kill_local_procs(NULL);
76
77 orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
78 } else {
79
80 orte_session_dir_finalize(ORTE_PROC_MY_NAME);
81 }
82
83
84 if (ORTE_ERR_CONNECTION_FAILED == error_code ||
85 ORTE_ERR_SENSOR_LIMIT_EXCEEDED == error_code) {
86 orte_ess.abort(error_code, false);
87 } else {
88 orte_ess.abort(error_code, true);
89 }
90
91
92
93
94
95
96
97
98 exit(-1);
99 }
100
101 static size_t handler = SIZE_MAX;
102 static bool debugger_register_active = true;
103 static bool debugger_event_active = true;
104
105 static void _release_fn(int status,
106 const opal_process_name_t *source,
107 opal_list_t *info, opal_list_t *results,
108 opal_pmix_notification_complete_fn_t cbfunc,
109 void *cbdata)
110 {
111
112 if (NULL != cbfunc) {
113 cbfunc(ORTE_SUCCESS, NULL, NULL, NULL, cbdata);
114 }
115 debugger_event_active = false;
116 }
117
118 static void _register_fn(int status,
119 size_t evhandler_ref,
120 void *cbdata)
121 {
122 opal_list_t *codes = (opal_list_t*)cbdata;
123
124 handler = evhandler_ref;
125 OPAL_LIST_RELEASE(codes);
126 debugger_register_active = false;
127 }
128
129
130
131
132
133
134 void ompi_rte_wait_for_debugger(void)
135 {
136 int debugger;
137 opal_list_t *codes, directives;
138 opal_value_t *kv;
139 char *evar;
140 int time;
141
142
143
144 debugger = orte_in_parallel_debugger;
145
146 if (1 == MPIR_being_debugged) {
147 debugger = 1;
148 }
149
150 if (!debugger && NULL == getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
151
152 return;
153 }
154
155
156
157
158 ompi_debugger_setup_dlls();
159
160 if (NULL != (evar = getenv("ORTE_TEST_DEBUGGER_SLEEP"))) {
161 time = strtol(evar, NULL, 10);
162 sleep(time);
163 return;
164 }
165
166 if (orte_standalone_operation) {
167
168 while (MPIR_debug_gate == 0) {
169 #if defined(HAVE_USLEEP)
170 usleep(100000);
171 #else
172 sleep(1);
173 #endif
174 }
175 } else {
176
177
178 codes = OBJ_NEW(opal_list_t);
179 kv = OBJ_NEW(opal_value_t);
180 kv->key = strdup("errorcode");
181 kv->type = OPAL_INT;
182 kv->data.integer = ORTE_ERR_DEBUGGER_RELEASE;
183 opal_list_append(codes, &kv->super);
184
185 OBJ_CONSTRUCT(&directives, opal_list_t);
186 kv = OBJ_NEW(opal_value_t);
187 kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME);
188 kv->type = OPAL_STRING;
189 kv->data.string = strdup("MPI-DEBUGGER-ATTACH");
190 opal_list_append(&directives, &kv->super);
191
192 opal_pmix.register_evhandler(codes, &directives, _release_fn, _register_fn, codes);
193
194 OMPI_WAIT_FOR_COMPLETION(debugger_register_active);
195 OPAL_LIST_DESTRUCT(&directives);
196
197
198 OMPI_WAIT_FOR_COMPLETION(debugger_event_active);
199
200
201 opal_pmix.deregister_evhandler(handler, NULL, NULL);
202 }
203 }
204
205 bool ompi_rte_connect_accept_support(const char *port)
206 {
207 char *ptr, *tmp;
208 orte_process_name_t name;
209
210
211
212 if (NULL == orte_process_info.my_hnp_uri ||
213 NULL == port || 0 == strlen(port)) {
214 return true;
215 }
216
217
218 tmp = strdup(port);
219 if (NULL == (ptr = strchr(tmp, ':'))) {
220
221 orte_show_help("help-orterun.txt", "orterun:malformedport", true);
222 free(tmp);
223 return false;
224 }
225 *ptr = '\0';
226 if (ORTE_SUCCESS != orte_util_convert_string_to_process_name(&name, tmp)) {
227 free(tmp);
228 orte_show_help("help-orterun.txt", "orterun:malformedport", true);
229 return false;
230 }
231 free(tmp);
232 if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(name.jobid)) {
233
234 return true;
235 }
236
237
238
239
240 if (NULL == orte_data_server_uri) {
241
242 orte_show_help("help-orterun.txt", "orterun:server-unavailable", true);
243 return false;
244 }
245
246 return true;
247 }