This source file includes following definitions.
- mca_btl_tcp_proc_construct
- mca_btl_tcp_proc_destruct
- mca_btl_tcp_proc_create
- evaluate_assignment
- visit
- mca_btl_tcp_initialise_interface
- mca_btl_tcp_retrieve_local_interfaces
- mca_btl_tcp_proc_insert
- mca_btl_tcp_proc_remove
- mca_btl_tcp_proc_lookup
- mca_btl_tcp_proc_accept
- mca_btl_tcp_proc_tosocks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 #include "opal_config.h"
29
30 #ifdef HAVE_NETINET_IN_H
31 #include <netinet/in.h>
32 #endif
33 #ifdef HAVE_ARPA_INET_H
34 #include <arpa/inet.h>
35 #endif
36
37 #include "opal/class/opal_hash_table.h"
38 #include "opal/mca/btl/base/btl_base_error.h"
39 #include "opal/mca/pmix/pmix.h"
40 #include "opal/util/arch.h"
41 #include "opal/util/argv.h"
42 #include "opal/util/if.h"
43 #include "opal/util/net.h"
44 #include "opal/util/proc.h"
45 #include "opal/util/show_help.h"
46 #include "opal/util/printf.h"
47
48 #include "btl_tcp.h"
49 #include "btl_tcp_proc.h"
50
51 static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc);
52 static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc);
53
54 struct mca_btl_tcp_proc_data_t {
55 mca_btl_tcp_interface_t** local_interfaces;
56 opal_hash_table_t local_kindex_to_index;
57 size_t num_local_interfaces, max_local_interfaces;
58 size_t num_peer_interfaces;
59 opal_hash_table_t peer_kindex_to_index;
60 unsigned int *best_assignment;
61 int max_assignment_weight;
62 int max_assignment_cardinality;
63 enum mca_btl_tcp_connection_quality **weights;
64 struct mca_btl_tcp_addr_t ***best_addr;
65 };
66
67 typedef struct mca_btl_tcp_proc_data_t mca_btl_tcp_proc_data_t;
68
69 OBJ_CLASS_INSTANCE( mca_btl_tcp_proc_t,
70 opal_list_item_t,
71 mca_btl_tcp_proc_construct,
72 mca_btl_tcp_proc_destruct );
73
74 void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* tcp_proc)
75 {
76 tcp_proc->proc_opal = NULL;
77 tcp_proc->proc_addrs = NULL;
78 tcp_proc->proc_addr_count = 0;
79 tcp_proc->proc_endpoints = NULL;
80 tcp_proc->proc_endpoint_count = 0;
81 OBJ_CONSTRUCT(&tcp_proc->proc_lock, opal_mutex_t);
82 }
83
84
85
86
87
88 void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* tcp_proc)
89 {
90 if( NULL != tcp_proc->proc_opal ) {
91
92 OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
93 opal_proc_table_remove_value(&mca_btl_tcp_component.tcp_procs,
94 tcp_proc->proc_opal->proc_name);
95 OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
96 OBJ_RELEASE(tcp_proc->proc_opal);
97 tcp_proc->proc_opal = NULL;
98 }
99
100 if(NULL != tcp_proc->proc_endpoints) {
101 free(tcp_proc->proc_endpoints);
102 }
103 if(NULL != tcp_proc->proc_addrs) {
104 free(tcp_proc->proc_addrs);
105 }
106 OBJ_DESTRUCT(&tcp_proc->proc_lock);
107 }
108
109
110
111
112
113
114
115
116
117 mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
118 {
119 mca_btl_tcp_proc_t* btl_proc;
120 int rc;
121 mca_btl_tcp_modex_addr_t *remote_addrs = NULL;
122 size_t i, size;
123
124 OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
125 rc = opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
126 proc->proc_name, (void**)&btl_proc);
127 if (OPAL_SUCCESS == rc) {
128 OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
129 return btl_proc;
130 }
131
132
133 btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
134 if (NULL == btl_proc) {
135 rc = OPAL_ERR_OUT_OF_RESOURCE;
136 goto cleanup;
137 }
138
139
140
141
142
143 OBJ_RETAIN(proc);
144
145
146 OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version,
147 &proc->proc_name, (uint8_t**)&remote_addrs, &size);
148 if (OPAL_SUCCESS != rc) {
149 if (OPAL_ERR_NOT_FOUND != rc) {
150 BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc));
151 }
152 goto cleanup;
153 }
154
155 if (0 != (size % sizeof(mca_btl_tcp_modex_addr_t))) {
156 BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n",
157 (unsigned long)size,
158 (unsigned long)sizeof(mca_btl_tcp_modex_addr_t)));
159 rc = OPAL_ERROR;
160 goto cleanup;
161 }
162
163 btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_modex_addr_t);
164 btl_proc->proc_addrs = malloc(btl_proc->proc_addr_count *
165 sizeof(mca_btl_tcp_addr_t));
166 if (NULL == btl_proc->proc_addrs) {
167 rc = OPAL_ERR_OUT_OF_RESOURCE;
168 goto cleanup;
169 }
170
171
172
173 for (i = 0 ; i < btl_proc->proc_addr_count ; i++) {
174 if (MCA_BTL_TCP_AF_INET == remote_addrs[i].addr_family) {
175 memcpy(&btl_proc->proc_addrs[i].addr_inet,
176 remote_addrs[i].addr, sizeof(struct in_addr));
177 btl_proc->proc_addrs[i].addr_port = remote_addrs[i].addr_port;
178 btl_proc->proc_addrs[i].addr_ifkindex = remote_addrs[i].addr_ifkindex;
179 btl_proc->proc_addrs[i].addr_family = AF_INET;
180 btl_proc->proc_addrs[i].addr_inuse = false;
181 } else if (MCA_BTL_TCP_AF_INET6 == remote_addrs[i].addr_family) {
182 #if OPAL_ENABLE_IPV6
183 memcpy(&btl_proc->proc_addrs[i].addr_inet6,
184 remote_addrs[i].addr, sizeof(struct in6_addr));
185 btl_proc->proc_addrs[i].addr_port = remote_addrs[i].addr_port;
186 btl_proc->proc_addrs[i].addr_ifkindex = remote_addrs[i].addr_ifkindex;
187 btl_proc->proc_addrs[i].addr_family = AF_INET6;
188 btl_proc->proc_addrs[i].addr_inuse = false;
189 #else
190 rc = OPAL_ERR_NOT_SUPPORTED;
191 goto cleanup;
192 #endif
193 } else {
194 BTL_ERROR(("Unexpected address family %d",
195 (int)remote_addrs[i].addr_family));
196 rc = OPAL_ERR_BAD_PARAM;
197 goto cleanup;
198 }
199 }
200
201 free(remote_addrs);
202
203
204 btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
205 malloc((1 + btl_proc->proc_addr_count) *
206 sizeof(mca_btl_base_endpoint_t*));
207 if (NULL == btl_proc->proc_endpoints) {
208 rc = OPAL_ERR_OUT_OF_RESOURCE;
209 goto cleanup;
210 }
211
212 cleanup:
213 if (OPAL_SUCCESS == rc) {
214 btl_proc->proc_opal = proc;
215
216 opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs,
217 proc->proc_name, btl_proc);
218 } else {
219 if (btl_proc) {
220 OBJ_RELEASE(btl_proc);
221 OBJ_RELEASE(proc);
222 btl_proc = NULL;
223 }
224 }
225
226 OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
227
228 return btl_proc;
229 }
230
231
232
233 static void evaluate_assignment(mca_btl_tcp_proc_data_t *proc_data, int *a) {
234 size_t i;
235 unsigned int max_interfaces = proc_data->num_local_interfaces;
236 int assignment_weight = 0;
237 int assignment_cardinality = 0;
238
239 if(max_interfaces < proc_data->num_peer_interfaces) {
240 max_interfaces = proc_data->num_peer_interfaces;
241 }
242
243 for(i = 0; i < max_interfaces; ++i) {
244 if(0 < proc_data->weights[i][a[i]-1]) {
245 ++assignment_cardinality;
246 assignment_weight += proc_data->weights[i][a[i]-1];
247 }
248 }
249
250
251
252
253 if(assignment_cardinality > proc_data->max_assignment_cardinality
254 || (assignment_cardinality == proc_data->max_assignment_cardinality
255 && assignment_weight > proc_data->max_assignment_weight)) {
256
257 for(i = 0; i < max_interfaces; ++i) {
258 proc_data->best_assignment[i] = a[i]-1;
259 }
260 proc_data->max_assignment_weight = assignment_weight;
261 proc_data->max_assignment_cardinality = assignment_cardinality;
262 }
263 }
264
265 static void visit(mca_btl_tcp_proc_data_t *proc_data, int k, int level, int siz, int *a)
266 {
267 level = level+1; a[k] = level;
268
269 if (level == siz) {
270 evaluate_assignment(proc_data, a);
271 } else {
272 int i;
273 for ( i = 0; i < siz; i++)
274 if (a[i] == 0)
275 visit(proc_data, i, level, siz, a);
276 }
277
278 level = level-1; a[k] = 0;
279 }
280
281
282 static void mca_btl_tcp_initialise_interface(mca_btl_tcp_interface_t* tcp_interface,
283 int ifk_index, int index)
284 {
285 tcp_interface->kernel_index = ifk_index;
286 tcp_interface->peer_interface = -1;
287 tcp_interface->ipv4_address = NULL;
288 tcp_interface->ipv6_address = NULL;
289 tcp_interface->index = index;
290 tcp_interface->inuse = 0;
291 }
292
293 static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(mca_btl_tcp_proc_data_t *proc_data)
294 {
295 struct sockaddr_storage local_addr;
296 char local_if_name[IF_NAMESIZE];
297 char **include, **exclude, **argv;
298 int idx;
299 mca_btl_tcp_interface_t * local_interface;
300
301 assert (NULL == proc_data->local_interfaces);
302 if( NULL != proc_data->local_interfaces )
303 return proc_data->local_interfaces;
304
305 proc_data->max_local_interfaces = MAX_KERNEL_INTERFACES;
306 proc_data->num_local_interfaces = 0;
307 proc_data->local_interfaces = (mca_btl_tcp_interface_t**)calloc( proc_data->max_local_interfaces, sizeof(mca_btl_tcp_interface_t*) );
308 if( NULL == proc_data->local_interfaces )
309 return NULL;
310
311
312 include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,',');
313 exclude = opal_argv_split(mca_btl_tcp_component.tcp_if_exclude,',');
314
315
316
317
318
319 for( idx = opal_ifbegin(); idx >= 0; idx = opal_ifnext (idx) ) {
320 int kindex;
321 uint64_t index;
322 bool skip = false;
323
324 opal_ifindextoaddr (idx, (struct sockaddr*) &local_addr, sizeof (local_addr));
325 opal_ifindextoname (idx, local_if_name, sizeof (local_if_name));
326
327
328
329
330
331
332
333
334
335 if(NULL != include) {
336 argv = include;
337 skip = true;
338 while(argv && *argv) {
339
340
341 if (0 == strcmp(*argv, local_if_name)) {
342 skip = false;
343 break;
344 }
345 argv++;
346 }
347 } else if (NULL != exclude) {
348
349
350
351 argv = exclude;
352 while(argv && *argv) {
353
354
355
356
357 if(0 == strncmp(*argv, local_if_name, strlen(*argv))) {
358 skip = true;
359 break;
360 }
361 argv++;
362 }
363 }
364 if (true == skip) {
365
366 continue;
367 }
368
369 kindex = opal_ifindextokindex(idx);
370 int rc = opal_hash_table_get_value_uint32(&proc_data->local_kindex_to_index, kindex, (void**) &index);
371
372
373 if (OPAL_SUCCESS != rc) {
374 index = proc_data->num_local_interfaces++;
375 opal_hash_table_set_value_uint32(&proc_data->local_kindex_to_index, kindex, (void*)(uintptr_t) index);
376
377 if( proc_data->num_local_interfaces == proc_data->max_local_interfaces ) {
378 proc_data->max_local_interfaces <<= 1;
379 proc_data->local_interfaces = (mca_btl_tcp_interface_t**)realloc( proc_data->local_interfaces,
380 proc_data->max_local_interfaces * sizeof(mca_btl_tcp_interface_t*) );
381 if( NULL == proc_data->local_interfaces )
382 goto cleanup;
383 }
384 proc_data->local_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t));
385 assert(NULL != proc_data->local_interfaces[index]);
386 mca_btl_tcp_initialise_interface(proc_data->local_interfaces[index], kindex, index);
387 }
388
389 local_interface = proc_data->local_interfaces[index];
390 switch(local_addr.ss_family) {
391 case AF_INET:
392
393 if (4 == mca_btl_tcp_component.tcp_disable_family) {
394 continue;
395 }
396
397 local_interface->ipv4_address =
398 (struct sockaddr_storage*) malloc(sizeof(local_addr));
399 memcpy(local_interface->ipv4_address,
400 &local_addr, sizeof(local_addr));
401 opal_ifindextomask(idx,
402 &local_interface->ipv4_netmask,
403 sizeof(int));
404 break;
405 case AF_INET6:
406
407 if (6 == mca_btl_tcp_component.tcp_disable_family) {
408 continue;
409 }
410
411 local_interface->ipv6_address
412 = (struct sockaddr_storage*) malloc(sizeof(local_addr));
413 memcpy(local_interface->ipv6_address,
414 &local_addr, sizeof(local_addr));
415 opal_ifindextomask(idx,
416 &local_interface->ipv6_netmask,
417 sizeof(int));
418 break;
419 default:
420 opal_output(0, "unknown address family for tcp: %d\n",
421 local_addr.ss_family);
422 }
423 }
424 cleanup:
425 if (NULL != include) {
426 opal_argv_free(include);
427 }
428 if (NULL != exclude) {
429 opal_argv_free(exclude);
430 }
431
432 return proc_data->local_interfaces;
433 }
434
435
436
437
438
439 int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
440 mca_btl_base_endpoint_t* btl_endpoint )
441 {
442 struct sockaddr_storage endpoint_addr_ss;
443 const char *proc_hostname;
444 unsigned int perm_size = 0;
445 int rc, *a = NULL;
446 size_t i, j;
447 mca_btl_tcp_interface_t** peer_interfaces = NULL;
448 mca_btl_tcp_proc_data_t _proc_data, *proc_data=&_proc_data;
449 size_t max_peer_interfaces;
450 char str_local[128], str_remote[128];
451
452 if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) {
453 return OPAL_ERR_UNREACH;
454 }
455
456 memset(proc_data, 0, sizeof(mca_btl_tcp_proc_data_t));
457 OBJ_CONSTRUCT(&_proc_data.local_kindex_to_index, opal_hash_table_t);
458 opal_hash_table_init(&_proc_data.local_kindex_to_index, 8);
459 OBJ_CONSTRUCT(&_proc_data.peer_kindex_to_index, opal_hash_table_t);
460 opal_hash_table_init(&_proc_data.peer_kindex_to_index, 8);
461
462 #ifndef WORDS_BIGENDIAN
463
464
465
466
467
468 if (btl_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
469 btl_endpoint->endpoint_nbo = true;
470 }
471 #endif
472
473
474 btl_endpoint->endpoint_proc = btl_proc;
475 btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
476
477
478 if( NULL == mca_btl_tcp_retrieve_local_interfaces(proc_data) )
479 return OPAL_ERR_OUT_OF_RESOURCE;
480 if( 0 == proc_data->num_local_interfaces ) {
481 return OPAL_ERR_UNREACH;
482 }
483
484 max_peer_interfaces = proc_data->max_local_interfaces;
485 peer_interfaces = (mca_btl_tcp_interface_t**)calloc( max_peer_interfaces, sizeof(mca_btl_tcp_interface_t*) );
486 if (NULL == peer_interfaces) {
487 max_peer_interfaces = 0;
488 rc = OPAL_ERR_OUT_OF_RESOURCE;
489 goto exit;
490 }
491 proc_data->num_peer_interfaces = 0;
492
493
494
495
496
497
498 for( i = 0; i < btl_proc->proc_addr_count; i++ ) {
499
500 uint64_t index;
501
502 mca_btl_tcp_addr_t* endpoint_addr = btl_proc->proc_addrs + i;
503
504 mca_btl_tcp_proc_tosocks (endpoint_addr, &endpoint_addr_ss);
505
506 rc = opal_hash_table_get_value_uint32(&proc_data->peer_kindex_to_index, endpoint_addr->addr_ifkindex, (void**) &index);
507
508 if (OPAL_SUCCESS != rc) {
509 index = proc_data->num_peer_interfaces++;
510 opal_hash_table_set_value_uint32(&proc_data->peer_kindex_to_index, endpoint_addr->addr_ifkindex, (void*)(uintptr_t) index);
511 if( proc_data->num_peer_interfaces == max_peer_interfaces ) {
512 max_peer_interfaces <<= 1;
513 peer_interfaces = (mca_btl_tcp_interface_t**)realloc( peer_interfaces,
514 max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) );
515 if( NULL == peer_interfaces ) {
516 return OPAL_ERR_OUT_OF_RESOURCE;
517 }
518 }
519 peer_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t));
520 mca_btl_tcp_initialise_interface(peer_interfaces[index],
521 endpoint_addr->addr_ifkindex, index);
522 }
523
524
525
526
527
528 if(endpoint_addr->addr_inuse >= mca_btl_tcp_component.tcp_num_links) {
529 peer_interfaces[index]->inuse = 1;
530 }
531
532 switch(endpoint_addr_ss.ss_family) {
533 case AF_INET:
534 peer_interfaces[index]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
535 peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr;
536 memcpy(peer_interfaces[index]->ipv4_address,
537 &endpoint_addr_ss, sizeof(endpoint_addr_ss));
538 break;
539 case AF_INET6:
540 peer_interfaces[index]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
541 peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr;
542 memcpy(peer_interfaces[index]->ipv6_address,
543 &endpoint_addr_ss, sizeof(endpoint_addr_ss));
544 break;
545 default:
546 opal_output(0, "unknown address family for tcp: %d\n",
547 endpoint_addr_ss.ss_family);
548 return OPAL_ERR_UNREACH;
549 }
550 }
551
552
553
554
555
556 perm_size = proc_data->num_local_interfaces;
557 if(proc_data->num_peer_interfaces > perm_size) {
558 perm_size = proc_data->num_peer_interfaces;
559 }
560
561 proc_data->weights = (enum mca_btl_tcp_connection_quality**) malloc(perm_size
562 * sizeof(enum mca_btl_tcp_connection_quality*));
563 assert(NULL != proc_data->weights);
564
565 proc_data->best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size
566 * sizeof(mca_btl_tcp_addr_t **));
567 assert(NULL != proc_data->best_addr);
568 for(i = 0; i < perm_size; ++i) {
569 proc_data->weights[i] = (enum mca_btl_tcp_connection_quality*) calloc(perm_size,
570 sizeof(enum mca_btl_tcp_connection_quality));
571 assert(NULL != proc_data->weights[i]);
572
573 proc_data->best_addr[i] = (mca_btl_tcp_addr_t **) calloc(perm_size,
574 sizeof(mca_btl_tcp_addr_t *));
575 assert(NULL != proc_data->best_addr[i]);
576 }
577
578
579 for( i = 0; i < proc_data->num_local_interfaces; ++i ) {
580 mca_btl_tcp_interface_t* local_interface = proc_data->local_interfaces[i];
581 for( j = 0; j < proc_data->num_peer_interfaces; ++j ) {
582
583
584 proc_data->weights[i][j] = CQ_NO_CONNECTION;
585
586
587 if(NULL != proc_data->local_interfaces[i]->ipv4_address &&
588 NULL != peer_interfaces[j]->ipv4_address) {
589
590
591 inet_ntop(AF_INET, &(((struct sockaddr_in*) proc_data->local_interfaces[i]->ipv4_address))->sin_addr,
592 str_local, sizeof(str_local));
593 inet_ntop(AF_INET, &(((struct sockaddr_in*) peer_interfaces[j]->ipv4_address))->sin_addr,
594 str_remote, sizeof(str_remote));
595
596 if(opal_net_addr_isipv4public((struct sockaddr*) local_interface->ipv4_address) &&
597 opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) {
598 if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address,
599 (struct sockaddr*) peer_interfaces[j]->ipv4_address,
600 local_interface->ipv4_netmask)) {
601 proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
602 opal_output_verbose(20, opal_btl_base_framework.framework_output,
603 "btl:tcp: path from %s to %s: IPV4 PUBLIC SAME NETWORK",
604 str_local, str_remote);
605 } else {
606 proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
607 opal_output_verbose(20, opal_btl_base_framework.framework_output,
608 "btl:tcp: path from %s to %s: IPV4 PUBLIC DIFFERENT NETWORK",
609 str_local, str_remote);
610 }
611 proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
612 continue;
613 }
614 if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address,
615 (struct sockaddr*) peer_interfaces[j]->ipv4_address,
616 local_interface->ipv4_netmask)) {
617 proc_data->weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
618 opal_output_verbose(20, opal_btl_base_framework.framework_output,
619 "btl:tcp: path from %s to %s: IPV4 PRIVATE SAME NETWORK",
620 str_local, str_remote);
621 } else {
622 proc_data->weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
623 opal_output_verbose(20, opal_btl_base_framework.framework_output,
624 "btl:tcp: path from %s to %s: IPV4 PRIVATE DIFFERENT NETWORK",
625 str_local, str_remote);
626 }
627 proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
628 continue;
629 }
630
631
632
633
634 if(NULL != local_interface->ipv6_address &&
635 NULL != peer_interfaces[j]->ipv6_address) {
636
637
638 inet_ntop(AF_INET6, &(((struct sockaddr_in6*) local_interface->ipv6_address))->sin6_addr,
639 str_local, sizeof(str_local));
640 inet_ntop(AF_INET6, &(((struct sockaddr_in6*) peer_interfaces[j]->ipv6_address))->sin6_addr,
641 str_remote, sizeof(str_remote));
642
643 if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv6_address,
644 (struct sockaddr*) peer_interfaces[j]->ipv6_address,
645 local_interface->ipv6_netmask)) {
646 proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
647 opal_output_verbose(20, opal_btl_base_framework.framework_output,
648 "btl:tcp: path from %s to %s: IPV6 PUBLIC SAME NETWORK",
649 str_local, str_remote);
650 } else {
651 proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
652 opal_output_verbose(20, opal_btl_base_framework.framework_output,
653 "btl:tcp: path from %s to %s: IPV6 PUBLIC DIFFERENT NETWORK",
654 str_local, str_remote);
655 }
656 proc_data->best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr;
657 continue;
658 }
659
660 }
661 }
662
663
664
665
666
667
668 proc_data->best_assignment = (unsigned int *) malloc (perm_size * sizeof(int));
669
670 a = (int *) malloc(perm_size * sizeof(int));
671 if (NULL == a) {
672 rc = OPAL_ERR_OUT_OF_RESOURCE;
673 goto exit;
674 }
675
676
677
678
679
680
681 if (perm_size <= MAX_PERMUTATION_INTERFACES) {
682 memset(a, 0, perm_size * sizeof(int));
683 proc_data->max_assignment_cardinality = -1;
684 proc_data->max_assignment_weight = -1;
685 visit(proc_data, 0, -1, perm_size, a);
686
687 rc = OPAL_ERR_UNREACH;
688 for(i = 0; i < perm_size; ++i) {
689 unsigned int best = proc_data->best_assignment[i];
690 if(best > proc_data->num_peer_interfaces
691 || proc_data->weights[i][best] == CQ_NO_CONNECTION
692 || peer_interfaces[best]->inuse
693 || NULL == peer_interfaces[best]) {
694 continue;
695 }
696 peer_interfaces[best]->inuse++;
697 btl_endpoint->endpoint_addr = proc_data->best_addr[i][best];
698 btl_endpoint->endpoint_addr->addr_inuse = true;
699 rc = OPAL_SUCCESS;
700 break;
701 }
702 } else {
703 enum mca_btl_tcp_connection_quality max;
704 int i_max = 0, j_max = 0;
705
706
707 max = CQ_NO_CONNECTION;
708 for(i=0; i<proc_data->num_local_interfaces; ++i) {
709 for(j=0; j<proc_data->num_peer_interfaces; ++j) {
710 if (!peer_interfaces[j]->inuse) {
711 if (proc_data->weights[i][j] > max) {
712 max = proc_data->weights[i][j];
713 i_max = i;
714 j_max = j;
715 }
716 }
717 }
718 }
719
720 rc = OPAL_ERR_UNREACH;
721 if (CQ_NO_CONNECTION != max) {
722 peer_interfaces[j_max]->inuse++;
723 btl_endpoint->endpoint_addr = proc_data->best_addr[i_max][j_max];
724 btl_endpoint->endpoint_addr->addr_inuse = true;
725 rc = OPAL_SUCCESS;
726 }
727 }
728 if (OPAL_ERR_UNREACH == rc) {
729 opal_output_verbose(10, opal_btl_base_framework.framework_output,
730 "btl:tcp: host %s, process %s UNREACHABLE",
731 proc_hostname,
732 OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name));
733 }
734
735 exit:
736
737
738 for(i = 0; i < perm_size; ++i) {
739 free(proc_data->weights[i]);
740 free(proc_data->best_addr[i]);
741 }
742
743 for(i = 0; i < proc_data->num_peer_interfaces; ++i) {
744 if(NULL != peer_interfaces[i]->ipv4_address) {
745 free(peer_interfaces[i]->ipv4_address);
746 }
747 if(NULL != peer_interfaces[i]->ipv6_address) {
748 free(peer_interfaces[i]->ipv6_address);
749 }
750 free(peer_interfaces[i]);
751 }
752 free(peer_interfaces);
753
754 for(i = 0; i < proc_data->num_local_interfaces; ++i) {
755 if(NULL != proc_data->local_interfaces[i]->ipv4_address) {
756 free(proc_data->local_interfaces[i]->ipv4_address);
757 }
758 if(NULL != proc_data->local_interfaces[i]->ipv6_address) {
759 free(proc_data->local_interfaces[i]->ipv6_address);
760 }
761 free(proc_data->local_interfaces[i]);
762 }
763 free(proc_data->local_interfaces); proc_data->local_interfaces = NULL;
764 proc_data->max_local_interfaces = 0;
765
766 free(proc_data->weights); proc_data->weights = NULL;
767 free(proc_data->best_addr); proc_data->best_addr = NULL;
768 free(proc_data->best_assignment); proc_data->best_assignment = NULL;
769
770 OBJ_DESTRUCT(&_proc_data.local_kindex_to_index);
771 OBJ_DESTRUCT(&_proc_data.peer_kindex_to_index);
772
773 free(a);
774
775 return rc;
776 }
777
778
779
780
781
782
783 int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint)
784 {
785 size_t i;
786 if (NULL != btl_proc) {
787 OPAL_THREAD_LOCK(&btl_proc->proc_lock);
788 for(i = 0; i < btl_proc->proc_endpoint_count; i++) {
789 if(btl_proc->proc_endpoints[i] == btl_endpoint) {
790 memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
791 (btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
792 if(--btl_proc->proc_endpoint_count == 0) {
793 OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
794 OBJ_RELEASE(btl_proc);
795 return OPAL_SUCCESS;
796 }
797
798
799
800 if (NULL != btl_endpoint->endpoint_addr) {
801 btl_endpoint->endpoint_addr->addr_inuse = false;
802 }
803 break;
804 }
805 }
806 OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
807 }
808 return OPAL_SUCCESS;
809 }
810
811
812
813
814
815 mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
816 {
817 mca_btl_tcp_proc_t* proc = NULL;
818
819 OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
820 opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
821 *name, (void**)&proc);
822 OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
823 if (OPAL_UNLIKELY(NULL == proc)) {
824 mca_btl_base_endpoint_t *endpoint;
825 opal_proc_t *opal_proc;
826
827 BTL_VERBOSE(("adding tcp proc for unknown peer {%s}",
828 OPAL_NAME_PRINT(*name)));
829
830 opal_proc = opal_proc_for_name (*name);
831 if (NULL == opal_proc) {
832 return NULL;
833 }
834
835
836 for( uint32_t i = 0; i < mca_btl_tcp_component.tcp_num_btls; ++i ) {
837 endpoint = NULL;
838 (void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
839 &endpoint, NULL);
840 if (NULL != endpoint && NULL == proc) {
841
842 proc = endpoint->endpoint_proc;
843 }
844 }
845 }
846
847 return proc;
848 }
849
850
851
852
853
854 void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr, int sd)
855 {
856 OPAL_THREAD_LOCK(&btl_proc->proc_lock);
857 int found_match = 0;
858 mca_btl_base_endpoint_t* match_btl_endpoint;
859
860 for( size_t i = 0; i < btl_proc->proc_endpoint_count; i++ ) {
861 mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
862
863
864
865 if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family) {
866 continue;
867 }
868 switch (addr->sa_family) {
869 case AF_INET:
870 if( memcmp( &btl_endpoint->endpoint_addr->addr_inet,
871 &(((struct sockaddr_in*)addr)->sin_addr),
872 sizeof(struct in_addr) ) ) {
873 char tmp[2][16];
874 opal_output_verbose(20, opal_btl_base_framework.framework_output,
875 "btl: tcp: Match incoming connection from %s %s with locally known IP %s failed (iface %d/%d)!\n",
876 OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name),
877 inet_ntop(AF_INET, (void*)&((struct sockaddr_in*)addr)->sin_addr,
878 tmp[0], 16),
879 inet_ntop(AF_INET, (void*)(struct in_addr*)&btl_endpoint->endpoint_addr->addr_inet,
880 tmp[1], 16),
881 (int)i, (int)btl_proc->proc_endpoint_count);
882 continue;
883 } else if (btl_endpoint->endpoint_state != MCA_BTL_TCP_CLOSED) {
884 found_match = 1;
885 match_btl_endpoint = btl_endpoint;
886 continue;
887 }
888 break;
889 #if OPAL_ENABLE_IPV6
890 case AF_INET6:
891 if( memcmp( &btl_endpoint->endpoint_addr->addr_inet,
892 &(((struct sockaddr_in6*)addr)->sin6_addr),
893 sizeof(struct in6_addr) ) ) {
894 char tmp[2][INET6_ADDRSTRLEN];
895 opal_output_verbose(20, opal_btl_base_framework.framework_output,
896 "btl: tcp: Match incoming connection from %s %s with locally known IP %s failed (iface %d/%d)!\n",
897 OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name),
898 inet_ntop(AF_INET6, (void*)&((struct sockaddr_in6*)addr)->sin6_addr,
899 tmp[0], INET6_ADDRSTRLEN),
900 inet_ntop(AF_INET6, (void*)(struct in6_addr*)&btl_endpoint->endpoint_addr->addr_inet,
901 tmp[1], INET6_ADDRSTRLEN),
902 (int)i, (int)btl_proc->proc_endpoint_count);
903 continue;
904 } else if (btl_endpoint->endpoint_state != MCA_BTL_TCP_CLOSED) {
905 found_match = 1;
906 match_btl_endpoint = btl_endpoint;
907 continue;
908 }
909 break;
910 #endif
911 default:
912 ;
913 }
914
915
916 btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING;
917 (void)mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd);
918 OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
919 return;
920 }
921
922
923
924 if (found_match) {
925 (void)mca_btl_tcp_endpoint_accept(match_btl_endpoint, addr, sd);
926 OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
927 return;
928 }
929
930 CLOSE_THE_SOCKET(sd);
931 {
932 char *addr_str = NULL, *tmp;
933 char ip[128];
934 ip[sizeof(ip) - 1] = '\0';
935
936 for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) {
937 mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
938 if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) {
939 continue;
940 }
941 inet_ntop(btl_endpoint->endpoint_addr->addr_family,
942 (void*) &(btl_endpoint->endpoint_addr->addr_inet),
943 ip, sizeof(ip) - 1);
944 if (NULL == addr_str) {
945 opal_asprintf(&tmp, "\n\t%s", ip);
946 } else {
947 opal_asprintf(&tmp, "%s\n\t%s", addr_str, ip);
948 free(addr_str);
949 }
950 addr_str = tmp;
951 }
952 opal_show_help("help-mpi-btl-tcp.txt", "dropped inbound connection",
953 true, opal_process_info.nodename,
954 getpid(),
955 btl_proc->proc_opal->proc_hostname,
956 OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name),
957 opal_net_get_hostname((struct sockaddr*)addr),
958 btl_proc->proc_endpoint_count,
959 (NULL == addr_str) ? "NONE" : addr_str);
960 if (NULL != addr_str) {
961 free(addr_str);
962 }
963 }
964 OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
965 }
966
967
968
969
970
971 bool mca_btl_tcp_proc_tosocks(mca_btl_tcp_addr_t* proc_addr,
972 struct sockaddr_storage* output)
973 {
974 memset(output, 0, sizeof (*output));
975 switch (proc_addr->addr_family) {
976 case AF_INET:
977 output->ss_family = AF_INET;
978 memcpy(&((struct sockaddr_in*)output)->sin_addr,
979 &proc_addr->addr_inet, sizeof(struct in_addr));
980 ((struct sockaddr_in*)output)->sin_port = proc_addr->addr_port;
981 break;
982 #if OPAL_ENABLE_IPV6
983 case AF_INET6:
984 {
985 struct sockaddr_in6* inaddr = (struct sockaddr_in6*)output;
986 output->ss_family = AF_INET6;
987 memcpy(&inaddr->sin6_addr, &proc_addr->addr_inet,
988 sizeof (proc_addr->addr_inet));
989 inaddr->sin6_port = proc_addr->addr_port;
990 inaddr->sin6_scope_id = 0;
991 inaddr->sin6_flowinfo = 0;
992 }
993 break;
994 #endif
995 default:
996 opal_output( 0, "mca_btl_tcp_proc: unknown af_family received: %d\n",
997 proc_addr->addr_family );
998 return false;
999 }
1000 return true;
1001 }
1002