root/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_tcp2_proc_construct
  2. mca_btl_tcp2_proc_destruct
  3. mca_btl_tcp2_proc_create
  4. evaluate_assignment
  5. visit
  6. mca_btl_tcp2_initialise_interface
  7. mca_btl_tcp2_retrieve_local_interfaces
  8. mca_btl_tcp2_proc_insert
  9. mca_btl_tcp2_proc_remove
  10. mca_btl_tcp2_proc_lookup
  11. mca_btl_tcp2_proc_accept
  12. mca_btl_tcp2_proc_tosocks

   1 /*
   2  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2010 The University of Tennessee and The University
   6  * Copyright (c) 2004-2012 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2008-2010 Oracle and/or its affiliates.  All rights reserved
  14  * Copyright (c) 2011      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2014      Research Organization for Information Science
  16  *                         and Technology (RIST). All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "ompi_config.h"
  25 
  26 #ifdef HAVE_ARPA_INET_H
  27 #include <arpa/inet.h>
  28 #endif
  29 #ifdef HAVE_NETINET_IN_H
  30 #include <netinet/in.h>
  31 #endif
  32 
  33 #include "opal/class/opal_hash_table.h"
  34 #include "ompi/mca/btl/base/btl_base_error.h"
  35 #include "ompi/runtime/ompi_module_exchange.h"
  36 #include "opal/util/arch.h"
  37 #include "opal/util/argv.h"
  38 #include "opal/util/if.h"
  39 #include "opal/util/net.h"
  40 
  41 #include "btl_tcp2.h"
  42 #include "btl_tcp2_proc.h"
  43 
  44 static void mca_btl_tcp2_proc_construct(mca_btl_tcp2_proc_t* proc);
  45 static void mca_btl_tcp2_proc_destruct(mca_btl_tcp2_proc_t* proc);
  46 
  47 static mca_btl_tcp2_interface_t** local_interfaces = NULL;
  48 static int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
  49 static size_t num_local_interfaces, max_local_interfaces;
  50 static mca_btl_tcp2_interface_t** peer_interfaces = NULL;
  51 static size_t num_peer_interfaces, max_peer_interfaces;
  52 static int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
  53 static unsigned int *best_assignment;
  54 static int max_assignment_weight;
  55 static int max_assignment_cardinality;
  56 static enum mca_btl_tcp2_connection_quality **weights;
  57 static struct mca_btl_tcp2_addr_t ***best_addr;
  58 
  59 OBJ_CLASS_INSTANCE( mca_btl_tcp2_proc_t,
  60                     opal_list_item_t,
  61                     mca_btl_tcp2_proc_construct,
  62                     mca_btl_tcp2_proc_destruct );
  63 
  64 void mca_btl_tcp2_proc_construct(mca_btl_tcp2_proc_t* tcp_proc)
  65 {
  66     tcp_proc->proc_ompi = 0;
  67     tcp_proc->proc_addrs = NULL;
  68     tcp_proc->proc_addr_count = 0;
  69     tcp_proc->proc_endpoints = NULL;
  70     tcp_proc->proc_endpoint_count = 0;
  71     OBJ_CONSTRUCT(&tcp_proc->proc_lock, opal_mutex_t);
  72 }
  73 
  74 /*
  75  * Cleanup ib proc instance
  76  */
  77 
  78 void mca_btl_tcp2_proc_destruct(mca_btl_tcp2_proc_t* tcp_proc)
  79 {
  80     /* remove from list of all proc instances */
  81     MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock);
  82     opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs,
  83                                         ompi_rte_hash_name(&tcp_proc->proc_ompi->proc_name));
  84     MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock);
  85 
  86     /* release resources */
  87     if(NULL != tcp_proc->proc_endpoints) {
  88         free(tcp_proc->proc_endpoints);
  89     }
  90     OBJ_DESTRUCT(&tcp_proc->proc_lock);
  91 }
  92 
  93 /*
  94  * Create a TCP process structure. There is a one-to-one correspondence
  95  * between a ompi_proc_t and a mca_btl_tcp2_proc_t instance. We cache
  96  * additional data (specifically the list of mca_btl_tcp2_endpoint_t instances,
  97  * and published addresses) associated w/ a given destination on this
  98  * datastructure.
  99  */
 100 
 101 mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_create(ompi_proc_t* ompi_proc)
 102 {
 103     int rc;
 104     size_t size;
 105     mca_btl_tcp2_proc_t* btl_proc;
 106 
 107     MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock);
 108     rc = opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs,
 109                                           hash, (void**)&btl_proc);
 110     if(OMPI_SUCCESS == rc) {
 111         MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock);
 112         return btl_proc;
 113     }
 114 
 115     btl_proc = OBJ_NEW(mca_btl_tcp2_proc_t);
 116     if(NULL == btl_proc)
 117         return NULL;
 118     btl_proc->proc_ompi = ompi_proc;
 119 
 120     /* add to hash table of all proc instance */
 121     opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
 122                                      hash, btl_proc);
 123     MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock);
 124 
 125     /* lookup tcp parameters exported by this proc */
 126     rc = ompi_modex_recv( &mca_btl_tcp2_component.super.btl_version,
 127                                   ompi_proc,
 128                                   (void**)&btl_proc->proc_addrs,
 129                                   &size );
 130     if(rc != OMPI_SUCCESS) {
 131         BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc));
 132         OBJ_RELEASE(btl_proc);
 133         return NULL;
 134     }
 135     if(0 != (size % sizeof(mca_btl_tcp2_addr_t))) {
 136         BTL_ERROR(("mca_base_modex_recv: invalid size %lu: btl-size: %lu\n",
 137           (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp2_addr_t)));
 138         return NULL;
 139     }
 140     btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp2_addr_t);
 141 
 142     /* allocate space for endpoint array - one for each exported address */
 143     btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
 144         malloc((1 + btl_proc->proc_addr_count) *
 145                 sizeof(mca_btl_base_endpoint_t*));
 146     if(NULL == btl_proc->proc_endpoints) {
 147         OBJ_RELEASE(btl_proc);
 148         return NULL;
 149     }
 150     if(NULL == mca_btl_tcp2_component.tcp_local && ompi_proc == ompi_proc_local()) {
 151         mca_btl_tcp2_component.tcp_local = btl_proc;
 152     }
 153     {
 154         /* convert the OMPI addr_family field to OS constants,
 155          * so we can check for AF_INET (or AF_INET6) and don't have
 156          * to deal with byte ordering anymore.
 157          */
 158         unsigned int i;
 159         for (i = 0; i < btl_proc->proc_addr_count; i++) {
 160             if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) {
 161                 btl_proc->proc_addrs[i].addr_family = AF_INET;
 162             }
 163 #if OPAL_ENABLE_IPV6
 164             if (MCA_BTL_TCP_AF_INET6 == btl_proc->proc_addrs[i].addr_family) {
 165                 btl_proc->proc_addrs[i].addr_family = AF_INET6;
 166             }
 167 #endif
 168         }
 169     }
 170     return btl_proc;
 171 }
 172 
 173 
 174 
 175 static void evaluate_assignment(int *a) {
 176     size_t i;
 177     unsigned int max_interfaces = num_local_interfaces;
 178     int assignment_weight = 0;
 179     int assignment_cardinality = 0;
 180 
 181     if(max_interfaces < num_peer_interfaces) {
 182         max_interfaces = num_peer_interfaces;
 183     }
 184 
 185     for(i = 0; i < max_interfaces; ++i) {
 186         if(0 < weights[i][a[i]-1]) {
 187             ++assignment_cardinality;
 188             assignment_weight += weights[i][a[i]-1];
 189         }
 190     }
 191 
 192     /*
 193      * check wether current solution beats all previous solutions
 194      */
 195     if(assignment_cardinality > max_assignment_cardinality
 196             || (assignment_cardinality == max_assignment_cardinality
 197                 && assignment_weight > max_assignment_weight)) {
 198 
 199         for(i = 0; i < max_interfaces; ++i) {
 200              best_assignment[i] = a[i]-1;
 201         }
 202         max_assignment_weight = assignment_weight;
 203         max_assignment_cardinality = assignment_cardinality;
 204     }
 205 }
 206 
 207 static void visit(int k, int level, int siz, int *a)
 208 {
 209     level = level+1; a[k] = level;
 210 
 211     if (level == siz) {
 212         evaluate_assignment(a);
 213     } else {
 214         int i;
 215         for ( i = 0; i < siz; i++)
 216             if (a[i] == 0)
 217                 visit(i, level, siz, a);
 218     }
 219 
 220     level = level-1; a[k] = 0;
 221 }
 222 
 223 
 224 static void mca_btl_tcp2_initialise_interface(mca_btl_tcp2_interface_t* interface,
 225         int ifk_index, int index)
 226 {
 227     interface->kernel_index = ifk_index;
 228     interface->peer_interface = -1;
 229     interface->ipv4_address = NULL;
 230     interface->ipv6_address =  NULL;
 231     interface->index = index;
 232     interface->inuse = 0;
 233 }
 234 
 235 static mca_btl_tcp2_interface_t** mca_btl_tcp2_retrieve_local_interfaces(void)
 236 {
 237     struct sockaddr_storage local_addr;
 238     char local_if_name[IF_NAMESIZE];
 239     char **include, **exclude, **argv;
 240     int idx;
 241 
 242     if( NULL != local_interfaces )
 243         return local_interfaces;
 244 
 245     max_local_interfaces = MAX_KERNEL_INTERFACES;
 246     num_local_interfaces = 0;
 247     local_interfaces = (mca_btl_tcp2_interface_t**)calloc( max_local_interfaces, sizeof(mca_btl_tcp2_interface_t*) );
 248     if( NULL == local_interfaces )
 249         return NULL;
 250 
 251     memset(local_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
 252 
 253     /* Collect up the list of included and excluded interfaces, if any */
 254     include = opal_argv_split(mca_btl_tcp2_component.tcp_if_include,',');
 255     exclude = opal_argv_split(mca_btl_tcp2_component.tcp_if_exclude,',');
 256 
 257     /*
 258      * identify all kernel interfaces and the associated addresses of
 259      * the local node
 260      */
 261     for( idx = opal_ifbegin(); idx >= 0; idx = opal_ifnext (idx) ) {
 262         int kindex, index;
 263         bool skip = false;
 264 
 265         opal_ifindextoaddr (idx, (struct sockaddr*) &local_addr, sizeof (local_addr));
 266         opal_ifindextoname (idx, local_if_name, sizeof (local_if_name));
 267 
 268         /* If we were given a list of included interfaces, then check
 269          * to see if the current one is a member of this set.  If so,
 270          * drop down and complete processing.  If not, skip it and
 271          * continue on to the next one.  Note that providing an include
 272          * list will override providing an exclude list as the two are
 273          * mutually exclusive.  This matches how it works in
 274          * mca_btl_tcp2_component_create_instances() which is the function
 275          * that exports the interfaces.  */
 276         if(NULL != include) {
 277             argv = include;
 278             skip = true;
 279             while(argv && *argv) {
 280                 /* When comparing included interfaces, we look for exact matches.
 281                    That is why we are using strcmp() here. */
 282                 if (0 == strcmp(*argv, local_if_name)) {
 283                     skip = false;
 284                     break;
 285                 }
 286                 argv++;
 287             }
 288         } else if (NULL != exclude) {
 289             /* If we were given a list of excluded interfaces, then check to see if the
 290              * current one is a member of this set.  If not, drop down and complete
 291              * processing.  If so, skip it and continue on to the next one. */
 292             argv = exclude;
 293             while(argv && *argv) {
 294                 /* When looking for interfaces to exclude, we only look at
 295                  * the number of characters equal to what the user provided.
 296                  * For example, excluding "lo" excludes "lo", "lo0" and
 297                  * anything that starts with "lo" */
 298                 if(0 == strncmp(*argv, local_if_name, strlen(*argv))) {
 299                     skip = true;
 300                     break;
 301                 }
 302                 argv++;
 303             }
 304         }
 305         if (true == skip) {
 306             /* This interface is not part of the requested set, so skip it */
 307             continue;
 308         }
 309 
 310         kindex = opal_ifindextokindex(idx);
 311         index = local_kindex_to_index[kindex];
 312 
 313         /* create entry for this kernel index previously not seen */
 314         if(-1 == index) {
 315             index = num_local_interfaces++;
 316             local_kindex_to_index[kindex] = index;
 317 
 318             if( num_local_interfaces == max_local_interfaces ) {
 319                 max_local_interfaces <<= 1;
 320                 local_interfaces = (mca_btl_tcp2_interface_t**)realloc( local_interfaces,
 321                                                                        max_local_interfaces * sizeof(mca_btl_tcp2_interface_t*) );
 322                 if( NULL == local_interfaces )
 323                     return NULL;
 324             }
 325             local_interfaces[index] = (mca_btl_tcp2_interface_t *) malloc(sizeof(mca_btl_tcp2_interface_t));
 326             assert(NULL != local_interfaces[index]);
 327             mca_btl_tcp2_initialise_interface(local_interfaces[index], kindex, index);
 328         }
 329 
 330         switch(local_addr.ss_family) {
 331         case AF_INET:
 332             /* if AF is disabled, skip it completely */
 333             if (4 == mca_btl_tcp2_component.tcp_disable_family) {
 334                 continue;
 335             }
 336 
 337             local_interfaces[local_kindex_to_index[kindex]]->ipv4_address =
 338                 (struct sockaddr_storage*) malloc(sizeof(local_addr));
 339             memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address,
 340                    &local_addr, sizeof(local_addr));
 341             opal_ifindextomask(idx,
 342                                &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask,
 343                                sizeof(int));
 344             break;
 345         case AF_INET6:
 346             /* if AF is disabled, skip it completely */
 347             if (6 == mca_btl_tcp2_component.tcp_disable_family) {
 348                 continue;
 349             }
 350 
 351             local_interfaces[local_kindex_to_index[kindex]]->ipv6_address
 352                 = (struct sockaddr_storage*) malloc(sizeof(local_addr));
 353             memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address,
 354                    &local_addr, sizeof(local_addr));
 355             opal_ifindextomask(idx,
 356                                &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask,
 357                                sizeof(int));
 358             break;
 359         default:
 360             opal_output(0, "unknown address family for tcp: %d\n",
 361                         local_addr.ss_family);
 362         }
 363     }
 364     opal_argv_free(include);
 365     opal_argv_free(exclude);
 366 
 367     return local_interfaces;
 368 }
 369 /*
 370  * Note that this routine must be called with the lock on the process
 371  * already held.  Insert a btl instance into the proc array and assign
 372  * it an address.
 373  */
 374 int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc,
 375                              mca_btl_base_endpoint_t* btl_endpoint )
 376 {
 377     struct sockaddr_storage endpoint_addr_ss;
 378     unsigned int perm_size;
 379     int rc, *a = NULL;
 380     size_t i, j;
 381 
 382 #ifndef WORDS_BIGENDIAN
 383     /* if we are little endian and our peer is not so lucky, then we
 384        need to put all information sent to him in big endian (aka
 385        Network Byte Order) and expect all information received to
 386        be in NBO.  Since big endian machines always send and receive
 387        in NBO, we don't care so much about that case. */
 388     if (btl_proc->proc_ompi->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) {
 389         btl_endpoint->endpoint_nbo = true;
 390     }
 391 #endif
 392 
 393     /* insert into endpoint array */
 394     btl_endpoint->endpoint_proc = btl_proc;
 395     btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
 396 
 397     /* sanity checks */
 398     if( NULL == local_interfaces ) {
 399         if( NULL == mca_btl_tcp2_retrieve_local_interfaces() )
 400             return OMPI_ERR_OUT_OF_RESOURCE;
 401     }
 402     if( 0 == num_local_interfaces ) {
 403         return OMPI_ERR_UNREACH;
 404     }
 405 
 406     if( NULL == peer_interfaces ) {
 407         max_peer_interfaces = max_local_interfaces;
 408         peer_interfaces = (mca_btl_tcp2_interface_t**)malloc( max_peer_interfaces * sizeof(mca_btl_tcp2_interface_t*) );
 409     }
 410     num_peer_interfaces = 0;
 411     memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
 412     memset(peer_interfaces, 0, max_peer_interfaces * sizeof(mca_btl_tcp2_interface_t*));
 413 
 414     /*
 415      * identify all kernel interfaces and the associated addresses of
 416      * the peer
 417      */
 418 
 419     for( i = 0; i < btl_proc->proc_addr_count; i++ ) {
 420 
 421         int index;
 422 
 423         mca_btl_tcp2_addr_t* endpoint_addr = btl_proc->proc_addrs + i;
 424 
 425         mca_btl_tcp2_proc_tosocks (endpoint_addr, &endpoint_addr_ss);
 426 
 427         index = peer_kindex_to_index[endpoint_addr->addr_ifkindex];
 428 
 429         if(-1 == index) {
 430             index = num_peer_interfaces++;
 431             peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index;
 432             if( num_peer_interfaces == max_peer_interfaces ) {
 433                 max_peer_interfaces <<= 1;
 434                 peer_interfaces = (mca_btl_tcp2_interface_t**)realloc( peer_interfaces,
 435                                                                       max_peer_interfaces * sizeof(mca_btl_tcp2_interface_t*) );
 436                 if( NULL == peer_interfaces )
 437                     return OMPI_ERR_OUT_OF_RESOURCE;
 438             }
 439             peer_interfaces[index] = (mca_btl_tcp2_interface_t *) malloc(sizeof(mca_btl_tcp2_interface_t));
 440             mca_btl_tcp2_initialise_interface(peer_interfaces[index],
 441                                              endpoint_addr->addr_ifkindex, index);
 442         }
 443 
 444         /*
 445          * in case one of the peer addresses is already in use,
 446          * mark the complete peer interface as 'not available'
 447          */
 448         if(endpoint_addr->addr_inuse) {
 449             peer_interfaces[index]->inuse = 1;
 450         }
 451 
 452         switch(endpoint_addr_ss.ss_family) {
 453         case AF_INET:
 454             peer_interfaces[index]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
 455             peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr;
 456             memcpy(peer_interfaces[index]->ipv4_address,
 457                    &endpoint_addr_ss, sizeof(endpoint_addr_ss));
 458             break;
 459         case AF_INET6:
 460             peer_interfaces[index]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
 461             peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr;
 462             memcpy(peer_interfaces[index]->ipv6_address,
 463                    &endpoint_addr_ss, sizeof(endpoint_addr_ss));
 464             break;
 465         default:
 466             opal_output(0, "unknown address family for tcp: %d\n",
 467                         endpoint_addr_ss.ss_family);
 468             /*
 469              * return OMPI_UNREACH or some error, as this is not
 470              * good
 471              */
 472         }
 473     }
 474 
 475     /*
 476      * assign weights to each possible pair of interfaces
 477      */
 478 
 479     perm_size = num_local_interfaces;
 480     if(num_peer_interfaces > perm_size) {
 481         perm_size = num_peer_interfaces;
 482     }
 483 
 484     weights = (enum mca_btl_tcp2_connection_quality**) malloc(perm_size
 485                                                              * sizeof(enum mca_btl_tcp2_connection_quality*));
 486 
 487     best_addr = (mca_btl_tcp2_addr_t ***) malloc(perm_size
 488                                                 * sizeof(mca_btl_tcp2_addr_t **));
 489     for(i = 0; i < perm_size; ++i) {
 490         weights[i] = (enum mca_btl_tcp2_connection_quality*) malloc(perm_size *
 491                                                                    sizeof(enum mca_btl_tcp2_connection_quality));
 492         memset(weights[i], 0, perm_size * sizeof(enum mca_btl_tcp2_connection_quality));
 493 
 494         best_addr[i] = (mca_btl_tcp2_addr_t **) malloc(perm_size *
 495                                                       sizeof(mca_btl_tcp2_addr_t *));
 496         memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp2_addr_t *));
 497     }
 498 
 499 
 500     for(i=0; i<num_local_interfaces; ++i) {
 501         for(j=0; j<num_peer_interfaces; ++j) {
 502 
 503             /*  initially, assume no connection is possible */
 504             weights[i][j] = CQ_NO_CONNECTION;
 505 
 506             /* check state of ipv4 address pair */
 507             if(NULL != local_interfaces[i]->ipv4_address &&
 508                NULL != peer_interfaces[j]->ipv4_address) {
 509 
 510                 /*  check for loopback */
 511                 if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)
 512                      && !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address))
 513                     || (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address)
 514                         && !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address))
 515                     || (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)
 516                         && !opal_ifislocal(btl_proc->proc_ompi->proc_hostname))) {
 517 
 518                     /* No connection is possible on these interfaces */
 519 
 520                     /*  check for RFC1918 */
 521                 } else if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address)
 522                           && opal_net_addr_isipv4public((struct sockaddr*)
 523                                                         peer_interfaces[j]->ipv4_address)) {
 524                     if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
 525                                             (struct sockaddr*) peer_interfaces[j]->ipv4_address,
 526                                             local_interfaces[i]->ipv4_netmask)) {
 527                         weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
 528                     } else {
 529                         weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
 530                     }
 531                     best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
 532                     continue;
 533                 } else {
 534                     if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
 535                                             (struct sockaddr*) peer_interfaces[j]->ipv4_address,
 536                                             local_interfaces[i]->ipv4_netmask)) {
 537                         weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
 538                     } else {
 539                         weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
 540                     }
 541                     best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
 542                 }
 543             }
 544 
 545             /* check state of ipv6 address pair - ipv6 is always public,
 546              * since link-local addresses are skipped in opal_ifinit()
 547              */
 548             if(NULL != local_interfaces[i]->ipv6_address &&
 549                NULL != peer_interfaces[j]->ipv6_address) {
 550 
 551                 /*  check for loopback */
 552                 if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)
 553                      && !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address))
 554                     || (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address)
 555                         && !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address))
 556                     || (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)
 557                         && !opal_ifislocal(btl_proc->proc_ompi->proc_hostname))) {
 558 
 559                     /* No connection is possible on these interfaces */
 560 
 561                 } else if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address,
 562                                                (struct sockaddr*) peer_interfaces[j]->ipv6_address,
 563                                                local_interfaces[i]->ipv6_netmask)) {
 564                     weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
 565                 } else {
 566                     weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
 567                 }
 568                 best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr;
 569             }
 570 
 571         } /* for each peer interface */
 572     } /* for each local interface */
 573 
 574     /*
 575      * determine the size of the set to permute (max number of
 576      * interfaces
 577      */
 578 
 579     best_assignment = (unsigned int *) malloc (perm_size * sizeof(int));
 580 
 581     a = (int *) malloc(perm_size * sizeof(int));
 582     if (NULL == a) {
 583         return OMPI_ERR_OUT_OF_RESOURCE;
 584     }
 585 
 586     /* Can only find the best set of connections when the number of
 587      * interfaces is not too big.  When it gets larger, we fall back
 588      * to a simpler and faster (and not as optimal) algorithm.
 589      * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031
 590      * for more details about this issue.  */
 591     if (perm_size <= MAX_PERMUTATION_INTERFACES) {
 592         memset(a, 0, perm_size * sizeof(int));
 593         max_assignment_cardinality = -1;
 594         max_assignment_weight = -1;
 595         visit(0, -1, perm_size, a);
 596 
 597         rc = OMPI_ERR_UNREACH;
 598         for(i = 0; i < perm_size; ++i) {
 599             if(best_assignment[i] > num_peer_interfaces
 600                || weights[i][best_assignment[i]] == CQ_NO_CONNECTION
 601                || peer_interfaces[best_assignment[i]]->inuse
 602                || NULL == peer_interfaces[best_assignment[i]]) {
 603                 continue;
 604             }
 605             peer_interfaces[best_assignment[i]]->inuse++;
 606             btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]];
 607             btl_endpoint->endpoint_addr->addr_inuse++;
 608             rc = OMPI_SUCCESS;
 609             break;
 610         }
 611     } else {
 612         enum mca_btl_tcp2_connection_quality max;
 613         int i_max = 0, j_max = 0;
 614         /* Find the best connection that is not in use.  Save away
 615          * the indices of the best location. */
 616         max = CQ_NO_CONNECTION;
 617         for(i=0; i<num_local_interfaces; ++i) {
 618             for(j=0; j<num_peer_interfaces; ++j) {
 619                 if (!peer_interfaces[j]->inuse) {
 620                     if (weights[i][j] > max) {
 621                         max = weights[i][j];
 622                         i_max = i;
 623                         j_max = j;
 624                     }
 625                 }
 626             }
 627         }
 628         /* Now see if there is a some type of connection available. */
 629         rc = OMPI_ERR_UNREACH;
 630         if (CQ_NO_CONNECTION != max) {
 631             peer_interfaces[j_max]->inuse++;
 632             btl_endpoint->endpoint_addr = best_addr[i_max][j_max];
 633             btl_endpoint->endpoint_addr->addr_inuse++;
 634             rc = OMPI_SUCCESS;
 635         }
 636     }
 637 
 638     for(i = 0; i < perm_size; ++i) {
 639         free(weights[i]);
 640         free(best_addr[i]);
 641     }
 642 
 643     for(i = 0; i < num_peer_interfaces; ++i) {
 644         if(NULL != peer_interfaces[i]->ipv4_address) {
 645             free(peer_interfaces[i]->ipv4_address);
 646         }
 647         if(NULL != peer_interfaces[i]->ipv6_address) {
 648             free(peer_interfaces[i]->ipv6_address);
 649         }
 650         free(peer_interfaces[i]);
 651     }
 652     free(peer_interfaces);
 653     peer_interfaces = NULL;
 654     max_peer_interfaces = 0;
 655 
 656     for(i = 0; i < num_local_interfaces; ++i) {
 657         if(NULL != local_interfaces[i]->ipv4_address) {
 658             free(local_interfaces[i]->ipv4_address);
 659         }
 660         if(NULL != local_interfaces[i]->ipv6_address) {
 661             free(local_interfaces[i]->ipv6_address);
 662         }
 663         free(local_interfaces[i]);
 664     }
 665     free(local_interfaces);
 666     local_interfaces = NULL;
 667     max_local_interfaces = 0;
 668 
 669     free(weights);
 670     free(best_addr);
 671     free(best_assignment);
 672     free(a);
 673 
 674     return rc;
 675 }
 676 
 677 /*
 678  * Remove an endpoint from the proc array and indicate the address is
 679  * no longer in use.
 680  */
 681 
 682 int mca_btl_tcp2_proc_remove(mca_btl_tcp2_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint)
 683 {
 684     size_t i;
 685     MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&btl_proc->proc_lock);
 686     for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) {
 687         if(btl_proc->proc_endpoints[i] == btl_endpoint) {
 688             memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
 689                 (btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
 690             if(--btl_proc->proc_endpoint_count == 0) {
 691                 OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
 692                 OBJ_RELEASE(btl_proc);
 693                 return OMPI_SUCCESS;
 694             }
 695             /* The endpoint_addr may still be NULL if this enpoint is
 696                being removed early in the wireup sequence (e.g., if it
 697                is unreachable by all other procs) */
 698             if (NULL != btl_endpoint->endpoint_addr) {
 699                 btl_endpoint->endpoint_addr->addr_inuse--;
 700             }
 701             break;
 702         }
 703     }
 704     MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_proc->proc_lock);
 705     return OMPI_SUCCESS;
 706 }
 707 
 708 /*
 709  * Look for an existing TCP process instance based on the globally unique
 710  * process identifier.
 711  */
 712 mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_lookup(const orte_process_name_t *name)
 713 {
 714     mca_btl_tcp_proc_t* proc = NULL;
 715     MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock);
 716     opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs,
 717                                      ompi_rte_hash_name(name), (void**)&proc);
 718     MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock);
 719     return proc;
 720 }
 721 
 722 /*
 723  * loop through all available BTLs for one matching the source address
 724  * of the request.
 725  */
 726 bool mca_btl_tcp2_proc_accept(mca_btl_tcp2_proc_t* btl_proc, struct sockaddr* addr, int sd)
 727 {
 728     size_t i;
 729     MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&btl_proc->proc_lock);
 730     for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) {
 731         mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
 732         /* Check all conditions before going to try to accept the connection. */
 733         if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family ) {
 734             continue;
 735         }
 736 
 737         switch (addr->sa_family) {
 738         case AF_INET:
 739             if( memcmp( &btl_endpoint->endpoint_addr->addr_inet,
 740                         &(((struct sockaddr_in*)addr)->sin_addr),
 741                         sizeof(struct in_addr) ) ) {
 742                 continue;
 743             }
 744             break;
 745 #if OPAL_ENABLE_IPV6
 746         case AF_INET6:
 747             if( memcmp( &btl_endpoint->endpoint_addr->addr_inet,
 748                         &(((struct sockaddr_in6*)addr)->sin6_addr),
 749                         sizeof(struct in6_addr) ) ) {
 750                 continue;
 751             }
 752             break;
 753 #endif
 754         default:
 755             ;
 756         }
 757 
 758         if(mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd)) {
 759             MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_proc->proc_lock);
 760             return true;
 761         }
 762     }
 763     MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_proc->proc_lock);
 764     return false;
 765 }
 766 
 767 /*
 768  * convert internal data structure (mca_btl_tcp2_addr_t) to sockaddr_storage
 769  *
 770  */
 771 bool mca_btl_tcp2_proc_tosocks(mca_btl_tcp2_addr_t* proc_addr,
 772                               struct sockaddr_storage* output)
 773 {
 774     memset(output, 0, sizeof (*output));
 775     switch (proc_addr->addr_family) {
 776     case AF_INET:
 777         output->ss_family = AF_INET;
 778         memcpy(&((struct sockaddr_in*)output)->sin_addr,
 779                &proc_addr->addr_inet, sizeof(struct in_addr));
 780         ((struct sockaddr_in*)output)->sin_port = proc_addr->addr_port;
 781         break;
 782 #if OPAL_ENABLE_IPV6
 783     case AF_INET6:
 784         {
 785             struct sockaddr_in6* inaddr = (struct sockaddr_in6*)output;
 786             output->ss_family = AF_INET6;
 787             memcpy(&inaddr->sin6_addr, &proc_addr->addr_inet,
 788                    sizeof (proc_addr->addr_inet));
 789             inaddr->sin6_port = proc_addr->addr_port;
 790             inaddr->sin6_scope_id = 0;
 791             inaddr->sin6_flowinfo = 0;
 792         }
 793         break;
 794 #endif
 795     default:
 796         opal_output( 0, "mca_btl_tcp2_proc: unknown af_family received: %d\n",
 797                      proc_addr->addr_family );
 798         return false;
 799     }
 800     return true;
 801 }
 802 

/* [<][>][^][v][top][bottom][index][help] */