root/opal/mca/reachable/weighted/reachable_weighted.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. weighted_init
  2. weighted_fini
  3. weighted_reachable
  4. get_weights
  5. calculate_weight

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2014-2015 Intel, Inc.  All rights reserved.
   4  * Copyright (c) 2014      Research Organization for Information Science
   5  *                         and Technology (RIST). All rights reserved.
   6  * Copyright (c) 2014      Mellanox Technologies, Inc.
   7  *                         All rights reserved.
   8  * Copyright (c) 2017      Amazon.com, Inc. or its affiliates.
   9  *                         All Rights reserved.
  10  * $COPYRIGHT$
  11  *
  12  * Additional copyrights may follow
  13  *
  14  * $HEADER$
  15  */
  16 
  17 #include "opal_config.h"
  18 #include "opal/constants.h"
  19 #include "opal/types.h"
  20 
  21 #include <string.h>
  22 #ifdef HAVE_UNISTD_H
  23 #include <unistd.h>
  24 #endif
  25 #ifdef HAVE_MATH_H
  26 #include <math.h>
  27 #endif
  28 
  29 #include "opal/mca/if/if.h"
  30 
  31 #include "opal/mca/reachable/base/base.h"
  32 #include "reachable_weighted.h"
  33 #include "opal/util/net.h"
  34 #include "opal/util/string_copy.h"
  35 
  36 static int weighted_init(void);
  37 static int weighted_fini(void);
  38 static opal_reachable_t* weighted_reachable(opal_list_t *local_if,
  39                                             opal_list_t *remote_if);
  40 
  41 static int get_weights(opal_if_t *local_if, opal_if_t *remote_if);
  42 static int calculate_weight(int bandwidth_local, int bandwidth_remote,
  43                             int connection_quality);
  44 
  45 /*
  46  * Describes the quality of a possible connection between a local and
  47  * a remote network interface.  Highest connection quality is assigned
  48  * to connections between interfaces on same network.  This is because
  49  * same network implies a single hop to destination.  Public addresses
  50  * are preferred over private addresses.  This is all guessing,
  51  * because we don't know actual network topology.
  52  */
  53 enum connection_quality {
  54     CQ_NO_CONNECTION = 0,
  55     CQ_PRIVATE_DIFFERENT_NETWORK = 50,
  56     CQ_PRIVATE_SAME_NETWORK = 80,
  57     CQ_PUBLIC_DIFFERENT_NETWORK = 90,
  58     CQ_PUBLIC_SAME_NETWORK = 100
  59 };
  60 
  61 const opal_reachable_base_module_t opal_reachable_weighted_module = {
  62     weighted_init,
  63     weighted_fini,
  64     weighted_reachable
  65 };
  66 
  67 // local variables
  68 static int init_cntr = 0;
  69 
  70 
  71 static int weighted_init(void)
  72 {
  73     ++init_cntr;
  74 
  75     return OPAL_SUCCESS;
  76 }
  77 
  78 static int weighted_fini(void)
  79 {
  80     --init_cntr;
  81 
  82     return OPAL_SUCCESS;
  83 }
  84 
  85 
  86 static opal_reachable_t* weighted_reachable(opal_list_t *local_if,
  87                                             opal_list_t *remote_if)
  88 {
  89     opal_reachable_t *reachable_results = NULL;
  90     int i, j;
  91     opal_if_t *local_iter, *remote_iter;
  92 
  93     reachable_results = opal_reachable_allocate(opal_list_get_size(local_if),
  94                                                 opal_list_get_size(remote_if));
  95     if (NULL == reachable_results) {
  96         return NULL;
  97     }
  98 
  99     i = 0;
 100     OPAL_LIST_FOREACH(local_iter, local_if, opal_if_t) {
 101         j = 0;
 102         OPAL_LIST_FOREACH(remote_iter, remote_if, opal_if_t) {
 103             reachable_results->weights[i][j] = get_weights(local_iter, remote_iter);
 104             j++;
 105         }
 106         i++;
 107     }
 108 
 109     return reachable_results;
 110 }
 111 
 112 
 113 static int get_weights(opal_if_t *local_if, opal_if_t *remote_if)
 114 {
 115     char str_local[128], str_remote[128], *conn_type;
 116     struct sockaddr *local_sockaddr, *remote_sockaddr;
 117     int weight;
 118 
 119     local_sockaddr = (struct sockaddr *)&local_if->if_addr;
 120     remote_sockaddr = (struct sockaddr *)&remote_if->if_addr;
 121 
 122     /* opal_net_get_hostname returns a static buffer.  Great for
 123        single address printfs, need to copy in this case */
 124     opal_string_copy(str_local, opal_net_get_hostname(local_sockaddr), sizeof(str_local));
 125     str_local[sizeof(str_local) - 1] = '\0';
 126     opal_string_copy(str_remote, opal_net_get_hostname(remote_sockaddr), sizeof(str_remote));
 127     str_remote[sizeof(str_remote) - 1] = '\0';
 128 
 129     /*  initially, assume no connection is possible */
 130     weight = calculate_weight(0, 0, CQ_NO_CONNECTION);
 131 
 132     if (AF_INET == local_sockaddr->sa_family &&
 133         AF_INET == remote_sockaddr->sa_family) {
 134 
 135         if (opal_net_addr_isipv4public(local_sockaddr) &&
 136             opal_net_addr_isipv4public(remote_sockaddr)) {
 137             if (opal_net_samenetwork(local_sockaddr,
 138                                      remote_sockaddr,
 139                                      local_if->if_mask)) {
 140                 conn_type = "IPv4 PUBLIC SAME NETWORK";
 141                 weight = calculate_weight(local_if->if_bandwidth,
 142                                           remote_if->if_bandwidth,
 143                                           CQ_PUBLIC_SAME_NETWORK);
 144             } else {
 145                 conn_type = "IPv4 PUBLIC DIFFERENT NETWORK";
 146                 weight = calculate_weight(local_if->if_bandwidth,
 147                                           remote_if->if_bandwidth,
 148                                           CQ_PUBLIC_DIFFERENT_NETWORK);
 149             }
 150         } else if (!opal_net_addr_isipv4public(local_sockaddr) &&
 151                    !opal_net_addr_isipv4public(remote_sockaddr)) {
 152             if (opal_net_samenetwork(local_sockaddr,
 153                                      remote_sockaddr,
 154                                      local_if->if_mask)) {
 155                 conn_type = "IPv4 PRIVATE SAME NETWORK";
 156                 weight = calculate_weight(local_if->if_bandwidth,
 157                                           remote_if->if_bandwidth,
 158                                           CQ_PRIVATE_SAME_NETWORK);
 159             } else {
 160                 conn_type = "IPv4 PRIVATE DIFFERENT NETWORK";
 161                 weight = calculate_weight(local_if->if_bandwidth,
 162                                           remote_if->if_bandwidth,
 163                                           CQ_PRIVATE_DIFFERENT_NETWORK);
 164             }
 165         } else {
 166             /* one private, one public address.  likely not a match. */
 167             conn_type = "IPv4 NO CONNECTION";
 168             weight = calculate_weight(local_if->if_bandwidth,
 169                                       remote_if->if_bandwidth,
 170                                       CQ_NO_CONNECTION);
 171         }
 172 
 173 #if OPAL_ENABLE_IPV6
 174     } else if (AF_INET6 == local_sockaddr->sa_family &&
 175                AF_INET6 == remote_sockaddr->sa_family) {
 176         if (opal_net_addr_isipv6linklocal(local_sockaddr) &&
 177             opal_net_addr_isipv6linklocal(remote_sockaddr)) {
 178             /* we can't actually tell if link local addresses are on
 179              * the same network or not with the weighted component.
 180              * Assume they are on the same network, so that they'll be
 181              * most likely to be paired together, breaking the fewest
 182              * number of connections.
 183              *
 184              * There used to be a comment in this code (and one in the
 185              * BTL TCP code as well) that the opal_if code doesn't
 186              * pass link-local addresses through.  However, this is
 187              * demonstratably not true on Linux, where link-local
 188              * interfaces are created.  Since it's easy to handle
 189              * either case, do so.
 190              */
 191             conn_type = "IPv6 LINK-LOCAL SAME NETWORK";
 192             weight = calculate_weight(local_if->if_bandwidth,
 193                                       remote_if->if_bandwidth,
 194                                       CQ_PRIVATE_SAME_NETWORK);
 195         } else if (!opal_net_addr_isipv6linklocal(local_sockaddr) &&
 196                    !opal_net_addr_isipv6linklocal(remote_sockaddr)) {
 197             if (opal_net_samenetwork(local_sockaddr,
 198                                      remote_sockaddr,
 199                                      local_if->if_mask)) {
 200                 conn_type = "IPv6 PUBLIC SAME NETWORK";
 201                 weight = calculate_weight(local_if->if_bandwidth,
 202                                           remote_if->if_bandwidth,
 203                                           CQ_PUBLIC_SAME_NETWORK);
 204             } else {
 205                 conn_type = "IPv6 PUBLIC DIFFERENT NETWORK";
 206                 weight = calculate_weight(local_if->if_bandwidth,
 207                                           remote_if->if_bandwidth,
 208                                           CQ_PUBLIC_DIFFERENT_NETWORK);
 209             }
 210         } else {
 211             /* one link-local, one public address.  likely not a match. */
 212             conn_type = "IPv6 NO CONNECTION";
 213             weight = calculate_weight(local_if->if_bandwidth,
 214                                       remote_if->if_bandwidth,
 215                                       CQ_NO_CONNECTION);
 216         }
 217 #endif /* #if OPAL_ENABLE_IPV6 */
 218 
 219     } else {
 220         /* we don't have an address family match, so assume no
 221            connection */
 222         conn_type = "Address type mismatch";
 223         weight = calculate_weight(0, 0, CQ_NO_CONNECTION);
 224     }
 225 
 226     opal_output_verbose(20, opal_reachable_base_framework.framework_output,
 227                         "reachable:weighted: path from %s to %s: %s",
 228                         str_local, str_remote, conn_type);
 229 
 230     return weight;
 231 }
 232 
 233 
 234 /*
 235  * Weights determined by bandwidth between
 236  * interfaces (limited by lower bandwidth
 237  * interface).  A penalty is added to minimize
 238  * the discrepancy in bandwidth.  This helps
 239  * prevent pairing of fast and slow interfaces
 240  *
 241  * Formula: connection_quality * (min(a,b) + 1/(1 + |a-b|))
 242  *
 243  * Examples: a     b     f(a,b)
 244  *           0     0     1
 245  *           0     1     0.5
 246  *           1     1     2
 247  *           1     2     1.5
 248  *           1     3     1.33
 249  *           1     10    1.1
 250  *           10    10    11
 251  *           10    14    10.2
 252  *           11    14    11.25
 253  *           11    15    11.2
 254  *
 255  * NOTE: connection_quality of 1 is assumed for examples.
 256  * In reality, since we're using integers, we need
 257  * connection_quality to be large enough
 258  * to capture decimals
 259  */
 260 static int calculate_weight(int bandwidth_local, int bandwidth_remote,
 261                             int connection_quality)
 262 {
 263     int weight = connection_quality * (MIN(bandwidth_local, bandwidth_remote) +
 264                                        1.0 / (1.0 + (double)abs(bandwidth_local - bandwidth_remote)));
 265     return weight;
 266 }

/* [<][>][^][v][top][bottom][index][help] */