This source file includes following definitions.
- orte_ess_alps_get_first_rank_on_node
- orte_ess_alps_sync_start
- orte_ess_alps_sync_complete
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 #include "orte_config.h"
25 #include "orte/constants.h"
26
27 #include "orte/util/show_help.h"
28 #include "opal/util/argv.h"
29
30 #include "orte/util/proc_info.h"
31 #include "orte/mca/errmgr/base/base.h"
32 #include "orte/util/name_fns.h"
33 #include "orte/runtime/orte_globals.h"
34
35 #include "orte/mca/ess/ess.h"
36 #include "orte/mca/ess/base/base.h"
37 #include "orte/mca/ess/alps/ess_alps.h"
38
39
40
41
42
43
44
45 int
46 orte_ess_alps_get_first_rank_on_node(int *first_rank)
47 {
48 int alps_status = 0;
49 uint64_t apid;
50 size_t alps_count;
51 int ret = ORTE_SUCCESS;
52 int lli_ret = 0, place_ret;
53 alpsAppLayout_t orted_layout;
54
55 if (first_rank == NULL) {
56 ret = ORTE_ERR_BAD_PARAM;
57 goto fn_exit;
58 }
59
60
61
62
63
64 lli_ret = alps_app_lli_lock();
65 if (0 != ret) {
66 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
67 "%s ess:alps: alps_app_lli_lock returned %d",
68 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
69 ret = ORTE_ERR_FILE_WRITE_FAILURE;
70 goto fn_exit;
71 }
72
73 lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
74 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
75 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
76 "%s ess:alps: alps_app_lli_put_request - APID returned %d",
77 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
78 ret = ORTE_ERR_FILE_WRITE_FAILURE;
79 goto fn_exit_w_lock;
80 }
81
82 lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
83 if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
84 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
85 "%s ess:alps: alps_app_lli_get_response returned %d",
86 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
87 ret = ORTE_ERR_FILE_READ_FAILURE;
88 goto fn_exit_w_lock;
89 }
90
91 lli_ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid));
92 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
93 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
94 "%s ess:alps: alps_app_lli_get_response_bytes returned %d",
95 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
96 ret = ORTE_ERR_FILE_READ_FAILURE;
97 goto fn_exit_w_lock;
98 }
99
100 place_ret = alps_get_placement_info(apid,
101 &orted_layout,
102 NULL,
103 NULL,
104 NULL,
105 NULL,
106 NULL,
107 NULL,
108 NULL,
109 NULL,
110 NULL);
111 if (1 != place_ret) {
112 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
113 "%s ess:alps: alps_get_placement_info returned %d (%s)",
114 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, strerror(errno)));
115 ret = ORTE_ERROR;
116 goto fn_exit;
117 }
118
119 OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
120 "%s ess:alps: alps_get_placement_info returned %d first pe on node is %d",
121 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, orted_layout.firstPe));
122 *first_rank = orted_layout.firstPe;
123
124 fn_exit_w_lock:
125 lli_ret = alps_app_lli_unlock();
126 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
127 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
128 "%s ess:alps: alps_app_lli_unlock returned %d",
129 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
130 ret = ORTE_ERR_FILE_WRITE_FAILURE;
131 }
132
133 fn_exit:
134 return ret;
135 }
136
137
138
139
140 int
141 orte_ess_alps_sync_start(void)
142 {
143 int ret = ORTE_SUCCESS;
144 int lli_ret = 0;
145 int alps_status = 0;
146 size_t alps_count;
147
148 lli_ret = alps_app_lli_lock();
149 if (0 != ret) {
150 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
151 "%s ess:alps: alps_app_lli_lock returned %d",
152 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
153 ret = ORTE_ERR_FILE_WRITE_FAILURE;
154 goto fn_exit;
155 }
156
157 lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_START, NULL, 0);
158 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
159 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
160 "%s ess:alps: alps_app_lli_put_request returned %d",
161 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
162 ret = ORTE_ERR_FILE_WRITE_FAILURE;
163 goto fn_exit_w_lock;
164 }
165
166 lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
167 if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
168 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
169 "%s ess:alps: alps_app_lli_get_response returned %d",
170 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
171 ret = ORTE_ERR_FILE_READ_FAILURE;
172 goto fn_exit_w_lock;
173 }
174
175 fn_exit_w_lock:
176 lli_ret = alps_app_lli_unlock();
177 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
178 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
179 "%s ess:alps: alps_app_lli_unlock returned %d",
180 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
181 ret = ORTE_ERR_FILE_WRITE_FAILURE;
182 }
183
184 fn_exit:
185 return ret;
186 }
187
188
189
190
191
192 int
193 orte_ess_alps_sync_complete(void)
194 {
195 int ret = ORTE_SUCCESS;
196 int lli_ret = 0;
197 int alps_status = 0;
198 size_t alps_count;
199
200 lli_ret = alps_app_lli_lock();
201 if (0 != ret) {
202 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
203 "%s ess:alps: alps_app_lli_lock returned %d",
204 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
205 ret = ORTE_ERR_FILE_WRITE_FAILURE;
206 goto fn_exit;
207 }
208
209 lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_EXITING, NULL, 0);
210 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
211 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
212 "%s ess:alps: alps_app_lli_put_request returned %d",
213 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
214 ret = ORTE_ERR_FILE_WRITE_FAILURE;
215 goto fn_exit_w_lock;
216 }
217
218 lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
219 if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
220 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
221 "%s ess:alps: alps_app_lli_get_response returned %d",
222 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
223 ret = ORTE_ERR_FILE_READ_FAILURE;
224 goto fn_exit_w_lock;
225 }
226
227 fn_exit_w_lock:
228 lli_ret = alps_app_lli_unlock();
229 if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
230 OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
231 "%s ess:alps: alps_app_lli_unlock returned %d",
232 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
233 ret = ORTE_ERR_FILE_WRITE_FAILURE;
234 }
235
236 fn_exit:
237 return ret;
238 }
239
240