This source file includes following definitions.
- opal_btl_usnic_connectivity_client_init
- opal_btl_usnic_connectivity_listen
- opal_btl_usnic_connectivity_ping
- opal_btl_usnic_connectivity_unlisten
- opal_btl_usnic_connectivity_client_finalize
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "opal_config.h"
14
15 #include <assert.h>
16 #include <sys/stat.h>
17 #include <sys/socket.h>
18 #include <sys/un.h>
19 #include <sys/types.h>
20 #include <string.h>
21 #include <unistd.h>
22 #ifdef HAVE_ALLOCA_H
23 #include <alloca.h>
24 #endif
25 #include <time.h>
26
27 #include "opal_stdint.h"
28 #include "opal/threads/mutex.h"
29 #include "opal/mca/event/event.h"
30 #include "opal/util/output.h"
31 #include "opal/util/fd.h"
32 #include "opal/util/string_copy.h"
33 #include "opal/util/printf.h"
34
35 #include "btl_usnic.h"
36 #include "btl_usnic_module.h"
37 #include "btl_usnic_connectivity.h"
38
39
40
41
42
43 static bool initialized = false;
44 static int agent_fd = -1;
45
46
47
48
49
50 int opal_btl_usnic_connectivity_client_init(void)
51 {
52
53 if (!mca_btl_usnic_component.connectivity_enabled) {
54 return OPAL_SUCCESS;
55 }
56 assert(!initialized);
57
58
59 agent_fd = socket(PF_UNIX, SOCK_STREAM, 0);
60 if (agent_fd < 0) {
61 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
62 ABORT("socket() failed");
63
64 }
65
66 char *ipc_filename = NULL;
67 opal_asprintf(&ipc_filename, "%s/%s",
68 opal_process_info.job_session_dir, CONNECTIVITY_SOCK_NAME);
69 if (NULL == ipc_filename) {
70 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
71 ABORT("Out of memory");
72
73 }
74 #if !defined(NDEBUG)
75 struct sockaddr_un sun;
76 assert(strlen(ipc_filename) <= sizeof(sun.sun_path));
77 #endif
78
79
80
81 struct stat sbuf;
82 time_t start = time(NULL);
83 while (1) {
84 int ret = stat(ipc_filename, &sbuf);
85 if (0 == ret) {
86 break;
87 } else if (ENOENT != errno) {
88
89
90 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
91 ABORT("stat() failed");
92
93 }
94
95
96
97 usleep(1);
98
99 if (time(NULL) - start > 10) {
100 ABORT("connectivity client timeout waiting for server socket to appear");
101
102 }
103 }
104
105
106 struct sockaddr_un address;
107 memset(&address, 0, sizeof(struct sockaddr_un));
108 address.sun_family = AF_UNIX;
109 opal_string_copy(address.sun_path, ipc_filename, sizeof(address.sun_path));
110
111 int count = 0;
112 while (1) {
113 int ret = connect(agent_fd, (struct sockaddr*) &address,
114 sizeof(address));
115 if (0 == ret) {
116 break;
117 }
118
119
120 if (ECONNREFUSED == errno) {
121 if (count < mca_btl_usnic_component.connectivity_num_retries) {
122 usleep(100);
123 ++count;
124 continue;
125 }
126 }
127
128 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
129 ABORT("connect() failed");
130
131 }
132
133
134 int tlen = strlen(CONNECTIVITY_MAGIC_TOKEN);
135 if (OPAL_SUCCESS != opal_fd_write(agent_fd, tlen,
136 CONNECTIVITY_MAGIC_TOKEN)) {
137 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
138 ABORT("usnic connectivity client IPC connect write failed");
139
140 }
141
142
143 char *ack = alloca(tlen + 1);
144 if (NULL == ack) {
145 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
146 ABORT("Out of memory");
147
148 }
149 if (OPAL_SUCCESS != opal_fd_read(agent_fd, tlen, ack)) {
150 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
151 ABORT("usnic connectivity client IPC connect read failed");
152
153 }
154 if (memcmp(ack, CONNECTIVITY_MAGIC_TOKEN, tlen) != 0) {
155 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
156 ABORT("usnic connectivity client got wrong token back from agent");
157
158 }
159
160
161 initialized = true;
162 opal_output_verbose(20, USNIC_OUT,
163 "usNIC connectivity client initialized");
164 return OPAL_SUCCESS;
165 }
166
167
168
169
170
171 int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
172 {
173
174 if (!mca_btl_usnic_component.connectivity_enabled) {
175 module->local_modex.connectivity_udp_port = 0;
176 return OPAL_SUCCESS;
177 }
178
179
180 int id = CONNECTIVITY_AGENT_CMD_LISTEN;
181 if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) {
182 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
183 ABORT("usnic connectivity client IPC write failed");
184
185 }
186
187
188 opal_btl_usnic_connectivity_cmd_listen_t cmd = {
189 .module = NULL,
190 .ipv4_addr = module->local_modex.ipv4_addr,
191 .netmask = module->local_modex.netmask,
192 .max_msg_size = module->local_modex.max_msg_size
193 };
194
195
196 if (0 == opal_process_info.my_local_rank) {
197 cmd.module = module;
198 }
199
200
201 opal_string_copy(cmd.nodename, opal_process_info.nodename,
202 CONNECTIVITY_NODENAME_LEN);
203 opal_string_copy(cmd.usnic_name, module->linux_device_name,
204 CONNECTIVITY_IFNAME_LEN);
205
206 if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
207 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
208 ABORT("usnic connectivity client IPC write failed");
209
210 }
211
212
213 opal_btl_usnic_connectivity_cmd_listen_reply_t reply;
214 memset(&reply, 0, sizeof(reply));
215 if (OPAL_SUCCESS != opal_fd_read(agent_fd, sizeof(reply), &reply)) {
216 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
217 ABORT("usnic connectivity client IPC read failed");
218
219 }
220
221
222 assert(CONNECTIVITY_AGENT_CMD_LISTEN == reply.cmd);
223 module->local_modex.connectivity_udp_port = reply.udp_port;
224
225 return OPAL_SUCCESS;
226 }
227
228
229 int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
230 uint32_t dest_ipv4_addr,
231 uint32_t dest_netmask, int dest_port,
232 char *dest_nodename,
233 size_t max_msg_size)
234 {
235
236 if (!mca_btl_usnic_component.connectivity_enabled) {
237 return OPAL_SUCCESS;
238 }
239
240
241 OPAL_THREAD_LOCK(&btl_usnic_lock);
242
243
244 int id = CONNECTIVITY_AGENT_CMD_PING;
245 if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) {
246 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
247 ABORT("usnic connectivity client IPC write failed");
248
249 }
250
251
252 opal_btl_usnic_connectivity_cmd_ping_t cmd = {
253 .src_ipv4_addr = src_ipv4_addr,
254 .src_udp_port = src_port,
255 .dest_ipv4_addr = dest_ipv4_addr,
256 .dest_netmask = dest_netmask,
257 .dest_udp_port = dest_port,
258 .max_msg_size = max_msg_size
259 };
260
261 opal_string_copy(cmd.dest_nodename, dest_nodename, CONNECTIVITY_NODENAME_LEN);
262
263 if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
264 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
265 ABORT("usnic connectivity client IPC write failed");
266
267 }
268
269
270 OPAL_THREAD_UNLOCK(&btl_usnic_lock);
271
272 return OPAL_SUCCESS;
273 }
274
275
276
277
278
279 int opal_btl_usnic_connectivity_unlisten(opal_btl_usnic_module_t *module)
280 {
281
282 if (!mca_btl_usnic_component.connectivity_enabled) {
283 return OPAL_SUCCESS;
284 }
285
286
287 if (0 != opal_process_info.my_local_rank) {
288 return OPAL_SUCCESS;
289 }
290
291
292 int id = CONNECTIVITY_AGENT_CMD_UNLISTEN;
293 if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) {
294 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
295 ABORT("usnic connectivity client IPC write failed");
296
297 }
298
299
300 opal_btl_usnic_connectivity_cmd_unlisten_t cmd = {
301 .ipv4_addr = module->local_modex.ipv4_addr,
302 };
303
304 if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
305 OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
306 ABORT("usnic connectivity client IPC write failed");
307
308 }
309
310 return OPAL_SUCCESS;
311 }
312
313
314
315
316
317 int opal_btl_usnic_connectivity_client_finalize(void)
318 {
319
320 if (!initialized) {
321 return OPAL_SUCCESS;
322 }
323
324 close(agent_fd);
325 agent_fd = -1;
326
327 initialized = false;
328 return OPAL_SUCCESS;
329 }