This source file includes following definitions.
- orte_odls_alps_get_rdma_creds
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "orte_config.h"
28 #include "orte/constants.h"
29
30 #include <stdlib.h>
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34 #include <ctype.h>
35 #include <sys/syscall.h>
36
37 #include "orte/mca/mca.h"
38 #include "opal/mca/base/base.h"
39 #include "opal/util/opal_environ.h"
40
41 #include "orte/mca/odls/odls.h"
42 #include "orte/mca/odls/base/base.h"
43 #include "orte/mca/odls/base/odls_private.h"
44 #include "orte/mca/odls/alps/odls_alps.h"
45
46 int orte_odls_alps_get_rdma_creds(void)
47 {
48 int alps_status = 0, num_creds, i, len;
49 uint64_t apid;
50 size_t alps_count;
51 int ret = ORTE_SUCCESS;
52 alpsAppLLIGni_t *rdmacred_rsp=NULL;
53 alpsAppGni_t *rdmacred_buf;
54 char *ptr;
55 char env_buffer[1024];
56 static int already_got_creds = 0;
57
58
59
60
61
62
63
64 if (1 == already_got_creds) {
65 return ORTE_SUCCESS;
66 }
67
68
69
70
71
72
73
74
75 if (ORTE_PROC_IS_DAEMON) {
76
77 ret = alps_app_lli_lock();
78
79
80
81
82
83 ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
84 if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
85 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
86 "%s odls:alps: alps_app_lli_put_request returned %d",
87 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
88 ret = ORTE_ERR_FILE_WRITE_FAILURE;
89 goto fn_exit;
90 }
91
92 ret = alps_app_lli_get_response (&alps_status, &alps_count);
93 if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
94 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
95 "%s odls:alps: alps_app_lli_get_response returned %d",
96 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
97 ret = ORTE_ERR_FILE_READ_FAILURE;
98 goto fn_exit;
99 }
100
101 ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid));
102 if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
103 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
104 "%s odls:alps: alps_app_lli_get_response_bytes returned %d",
105 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
106 ret = ORTE_ERR_FILE_READ_FAILURE;
107 goto fn_exit;
108 }
109
110
111
112
113
114 ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0);
115 if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
116 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
117 "%s odls:alps: alps_app_lli_put_request returned %d",
118 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
119 ret = ORTE_ERR_FILE_WRITE_FAILURE;
120 goto fn_exit;
121 }
122
123 ret = alps_app_lli_get_response(&alps_status, &alps_count);
124 if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
125 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
126 "%s odls:alps: alps_app_lli_get_response returned %d",
127 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
128 ret = ORTE_ERR_FILE_READ_FAILURE;
129 goto fn_exit;
130 }
131
132 rdmacred_rsp = (alpsAppLLIGni_t *)malloc(alps_count);
133 if (NULL == rdmacred_rsp) {
134 ret = ORTE_ERR_OUT_OF_RESOURCE;
135 goto fn_exit;
136 }
137
138 memset(rdmacred_rsp,0,alps_count);
139
140 ret = alps_app_lli_get_response_bytes(rdmacred_rsp, alps_count);
141 if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
142 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
143 "%s odls:alps: alps_app_lli_get_response_bytes returned %d",
144 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
145 free(rdmacred_rsp);
146 ret = ORTE_ERR_FILE_READ_FAILURE;
147 goto fn_exit;
148 }
149
150 ret = alps_app_lli_unlock();
151
152 rdmacred_buf = (alpsAppGni_t *)(rdmacred_rsp->u.buf);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167 num_creds = rdmacred_rsp->count;
168
169
170
171
172
173 memset(env_buffer,0,sizeof(env_buffer));
174 ptr = env_buffer;
175 for (i=0; i<num_creds-1; i++) {
176 len = sprintf(ptr,"%d:",rdmacred_buf[i].ptag);
177 ptr += len;
178 }
179 sprintf(ptr,"%d",rdmacred_buf[num_creds-1].ptag);
180 ret = opal_setenv("PMI_GNI_PTAG", env_buffer, false, &orte_launch_environ);
181 if (ret != ORTE_SUCCESS) {
182 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
183 "%s odls:alps: opal_setenv for PMI_GNI_TAG returned %d",
184 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
185 goto fn_exit;
186 }
187
188
189
190
191
192 memset(env_buffer,0,sizeof(env_buffer));
193 ptr = env_buffer;
194 for (i=0; i<num_creds-1; i++) {
195 len = sprintf(ptr,"%d:",rdmacred_buf[i].cookie);
196 ptr += len;
197 }
198 sprintf(ptr,"%d",rdmacred_buf[num_creds-1].cookie);
199 ret = opal_setenv("PMI_GNI_COOKIE", env_buffer, false, &orte_launch_environ);
200 if (ret != ORTE_SUCCESS) {
201 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
202 "%s odls:alps: opal_setenv for PMI_GNI_COOKIE returned %d",
203 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
204 goto fn_exit;
205 }
206
207
208
209
210
211 memset(env_buffer,0,sizeof(env_buffer));
212 ptr = env_buffer;
213 for (i=0; i<num_creds-1; i++) {
214 len = sprintf(ptr,"%d:",rdmacred_buf[i].local_addr);
215 ptr += len;
216 }
217 sprintf(ptr,"%d",rdmacred_buf[num_creds-1].local_addr);
218 ret = opal_setenv("PMI_GNI_LOC_ADDR", env_buffer, false, &orte_launch_environ);
219 if (ret != ORTE_SUCCESS) {
220 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
221 "%s odls:alps: opal_setenv for PMI_GNI_LOC_ADDR returned %d",
222 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
223 goto fn_exit;
224 }
225
226
227
228
229
230 memset(env_buffer,0,sizeof(env_buffer));
231 ptr = env_buffer;
232 for (i=0; i<num_creds-1; i++) {
233 len = sprintf(ptr,"%d:",rdmacred_buf[i].device_id);
234 ptr += len;
235 }
236 sprintf(ptr,"%d",rdmacred_buf[num_creds-1].device_id);
237 ret = opal_setenv("PMI_GNI_DEV_ID", env_buffer, false, &orte_launch_environ);
238 if (ret != ORTE_SUCCESS) {
239 OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
240 "%s odls:alps: opal_setenv for PMI_GNI_DEV_ID returned %d",
241 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
242 goto fn_exit;
243 }
244
245 }
246
247 fn_exit:
248 if (ORTE_SUCCESS == ret) already_got_creds = 1;
249 return ret;
250 }
251
252