root/opal/mca/reachable/netlink/reachable_netlink_utils_common.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_reachable_netlink_is_nlreply_expected
  2. opal_reachable_netlink_is_nlreply_err
  3. opal_reachable_netlink_send_query
  4. opal_reachable_netlink_set_rcvsk_timer
  5. opal_reachable_netlink_sk_alloc
  6. opal_reachable_netlink_sk_free
  7. opal_reachable_netlink_rt_raw_parse_cb
  8. opal_reachable_netlink_rt_lookup
  9. opal_reachable_netlink_rt_lookup6

   1 /*
   2  * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
   3  * Copyright (c) 2017      Amazon.com, Inc. or its affiliates.
   4  *                         All Rights reserved.
   5  * Portions of this software copied from libfabric
   6  * (https://github.com/ofiwg/libfabric)
   7  *
   8  * LICENSE_BEGIN
   9  *
  10  * BSD license:
  11  *
  12  *     Redistribution and use in source and binary forms, with or
  13  *     without modification, are permitted provided that the following
  14  *     conditions are met:
  15  *
  16  *      - Redistributions of source code must retain the above
  17  *        copyright notice, this list of conditions and the following
  18  *        disclaimer.
  19  *
  20  *      - Redistributions in binary form must reproduce the above
  21  *        copyright notice, this list of conditions and the following
  22  *        disclaimer in the documentation and/or other materials
  23  *        provided with the distribution.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  29  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  33  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  35  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36  * POSSIBILITY OF SUCH DAMAGE.
  37  *
  38  * LICENSE_END
  39  *
  40  *
  41  */
  42 
  43 #include "opal_config.h"
  44 
  45 #include <errno.h>
  46 #include <arpa/inet.h>
  47 #include <time.h>
  48 #ifdef HAVE_NETINET_IN_H
  49 #include <netinet/in.h>
  50 #endif
  51 
  52 #include "libnl_utils.h"
  53 
  54 /* Adapt this copied code for Open MPI */
  55 #include "opal/util/output.h"
  56 
  57 
  58 static struct nla_policy route_policy[RTA_MAX+1] = {
  59         [RTA_IIF]       = { .type = NLA_STRING,
  60                             .maxlen = IFNAMSIZ, },
  61         [RTA_OIF]       = { .type = NLA_U32 },
  62         [RTA_PRIORITY]  = { .type = NLA_U32 },
  63         [RTA_FLOW]      = { .type = NLA_U32 },
  64         [RTA_MP_ALGO]   = { .type = NLA_U32 },
  65         [RTA_CACHEINFO] = { .minlen = sizeof(struct rta_cacheinfo) },
  66         [RTA_METRICS]   = { .type = NLA_NESTED },
  67         [RTA_MULTIPATH] = { .type = NLA_NESTED },
  68 };
  69 
  70 static int opal_reachable_netlink_is_nlreply_expected(struct opal_reachable_netlink_sk *unlsk,
  71                                                       struct nlmsghdr *nlm_hdr)
  72 {
  73 #if OPAL_ENABLE_DEBUG
  74     if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh)
  75         || nlm_hdr->nlmsg_seq != unlsk->seq) {
  76         opal_output(0, "Not an expected reply msg pid: %u local pid: %u msg seq: %u expected seq: %u\n",
  77                     nlm_hdr->nlmsg_pid,
  78                     nl_socket_get_local_port(unlsk->nlh),
  79                     nlm_hdr->nlmsg_seq, unlsk->seq);
  80         return 0;
  81     }
  82 #endif
  83 
  84     return 1;
  85 }
  86 
  87 static int opal_reachable_netlink_is_nlreply_err(struct nlmsghdr *nlm_hdr)
  88 {
  89     if (nlm_hdr->nlmsg_type == NLMSG_ERROR) {
  90         struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr);
  91         if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e)))
  92             opal_output_verbose(20, 0,
  93                                 "Received a netlink error message");
  94         else
  95             opal_output_verbose(20, 0,
  96                                 "Received a truncated netlink error message\n");
  97         return 1;
  98     }
  99 
 100     return 0;
 101 }
 102 
 103 static int opal_reachable_netlink_send_query(struct opal_reachable_netlink_sk *unlsk,
 104                                              struct nl_msg *msg,
 105                                              int protocol, int flag)
 106 {
 107     struct nlmsghdr *nlhdr;
 108 
 109     nlhdr = nlmsg_hdr(msg);
 110     nlhdr->nlmsg_pid = nl_socket_get_local_port(unlsk->nlh);
 111     nlhdr->nlmsg_seq = ++unlsk->seq;
 112     nlmsg_set_proto(msg, protocol);
 113     nlhdr->nlmsg_flags = flag;
 114 
 115     return nl_send(unlsk->nlh, msg);
 116 }
 117 
 118 static int opal_reachable_netlink_set_rcvsk_timer(NL_HANDLE *nlh)
 119 {
 120     int err = 0;
 121     struct timeval timeout;
 122 
 123     timeout.tv_sec = 1;
 124     timeout.tv_usec = 0;
 125 
 126     err = setsockopt(nl_socket_get_fd(nlh), SOL_SOCKET, SO_RCVTIMEO,
 127                      (char *)&timeout, sizeof(timeout));
 128 #if OPAL_ENABLE_DEBUG
 129     if (err < 0)
 130         opal_output(0, "Failed to set SO_RCVTIMEO for nl socket");
 131 #endif
 132 
 133     return err;
 134 }
 135 
 136 static int opal_reachable_netlink_sk_alloc(struct opal_reachable_netlink_sk **p_sk, int protocol)
 137 {
 138     struct opal_reachable_netlink_sk *unlsk;
 139     NL_HANDLE *nlh;
 140     int err;
 141 
 142     unlsk = calloc(1, sizeof(*unlsk));
 143     if (!unlsk) {
 144         opal_output(0, "Failed to allocate opal_reachable_netlink_sk struct\n");
 145         return ENOMEM;
 146     }
 147 
 148     nlh = NL_HANDLE_ALLOC();
 149     if (!nlh) {
 150         opal_output(0, "Failed to allocate nl handle\n");
 151         err = ENOMEM;
 152         goto err_free_unlsk;
 153     }
 154 
 155     err = nl_connect(nlh, protocol);
 156     if (err < 0) {
 157         opal_output(0, "Failed to connnect netlink route socket error: %s\n",
 158                     NL_GETERROR(err));
 159         err = EINVAL;
 160         goto err_free_nlh;
 161     }
 162 
 163     NL_DISABLE_SEQ_CHECK(nlh);
 164     err = opal_reachable_netlink_set_rcvsk_timer(nlh);
 165     if (err < 0)
 166         goto err_close_nlh;
 167 
 168     unlsk->nlh = nlh;
 169     unlsk->seq = time(NULL);
 170     *p_sk = unlsk;
 171     return 0;
 172 
 173  err_close_nlh:
 174     nl_close(nlh);
 175  err_free_nlh:
 176     NL_HANDLE_FREE(nlh);
 177  err_free_unlsk:
 178     free(unlsk);
 179     return err;
 180 }
 181 
 182 static void opal_reachable_netlink_sk_free(struct opal_reachable_netlink_sk *unlsk)
 183 {
 184     nl_close(unlsk->nlh);
 185     NL_HANDLE_FREE(unlsk->nlh);
 186     free(unlsk);
 187 }
 188 
 189 static int opal_reachable_netlink_rt_raw_parse_cb(struct nl_msg *msg, void *arg)
 190 {
 191     struct opal_reachable_netlink_rt_cb_arg *lookup_arg = (struct opal_reachable_netlink_rt_cb_arg *)arg;
 192     struct opal_reachable_netlink_sk *unlsk = lookup_arg->unlsk;
 193     struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg);
 194     struct rtmsg *rtm;
 195     struct nlattr *tb[RTA_MAX + 1];
 196     int found = 0;
 197     int err;
 198 
 199     INC_CB_MSGCNT(lookup_arg);
 200 
 201     if (!opal_reachable_netlink_is_nlreply_expected(unlsk, nlm_hdr)) {
 202 #if OPAL_ENABLE_DEBUG
 203         nl_msg_dump(msg, stderr);
 204 #endif
 205         return NL_SKIP;
 206     }
 207 
 208     if (opal_reachable_netlink_is_nlreply_err(nlm_hdr)) {
 209 #if OPAL_ENABLE_DEBUG
 210         nl_msg_dump(msg, stderr);
 211 #endif
 212         return NL_SKIP;
 213     }
 214 
 215     if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) {
 216 #if OPAL_ENABLE_DEBUG
 217         char buf[128];
 218         nl_nlmsgtype2str(nlm_hdr->nlmsg_type, buf, sizeof(buf));
 219         opal_output(0, "Received an invalid route request reply message type: %s\n",
 220                     buf);
 221         nl_msg_dump(msg, stderr);
 222 #endif
 223         return NL_SKIP;
 224     }
 225 
 226     rtm = nlmsg_data(nlm_hdr);
 227     if (rtm->rtm_family != AF_INET
 228 #if OPAL_ENABLE_IPV6
 229         && rtm->rtm_family != AF_INET6
 230 #endif
 231         ) {
 232 #if OPAL_ENABLE_DEBUG
 233         opal_output(0, "RTM message contains invalid AF family: %u\n",
 234                     rtm->rtm_family);
 235         nl_msg_dump(msg, stderr);
 236 #endif
 237         return NL_SKIP;
 238     }
 239 
 240     err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX,
 241                       route_policy);
 242     if (err < 0) {
 243 #if OPAL_ENABLE_DEBUG
 244         opal_output(0, "nlmsg parse error %s\n", NL_GETERROR(err));
 245         nl_msg_dump(msg, stderr);
 246 #endif
 247         return NL_SKIP;
 248     }
 249 
 250     if (tb[RTA_OIF]) {
 251         if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif)
 252             found = 1;
 253         else
 254             /* usually, this means that there is a route to the remote
 255                host, but that it's not through the given interface.  For
 256                our purposes, that means it's not reachable. */
 257             opal_output_verbose(20, 0, "Retrieved route has a different outgoing interface %d (expected %d)\n",
 258                                 nla_get_u32(tb[RTA_OIF]),
 259                                 lookup_arg->oif);
 260     }
 261 
 262     if (found && tb[RTA_GATEWAY]) {
 263         lookup_arg->has_gateway = 1;
 264     }
 265     lookup_arg->found = found;
 266     return NL_STOP;
 267 }
 268 
 269 int opal_reachable_netlink_rt_lookup(uint32_t src_addr,
 270                                      uint32_t dst_addr,
 271                                      int outgoing_interface,
 272                                      int *has_gateway)
 273 {
 274     struct opal_reachable_netlink_sk *unlsk; /* netlink socket */
 275     struct nl_msg *nlm; /* netlink message */
 276     struct rtmsg rmsg; /* route message */
 277     struct opal_reachable_netlink_rt_cb_arg arg; /* callback argument */
 278     int err;
 279 
 280     /* allocate netlink socket */
 281     unlsk = NULL;
 282     err = opal_reachable_netlink_sk_alloc(&unlsk, NETLINK_ROUTE);
 283     if (err)
 284         return err;
 285 
 286     /* allocate route message */
 287     memset(&rmsg, 0, sizeof(rmsg));
 288     rmsg.rtm_family = AF_INET;
 289     rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT;
 290     rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT;
 291 
 292     /* allocate netlink message of type RTM_GETROUTE */
 293     nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0);
 294     if (!nlm) {
 295         opal_output(0, "Failed to alloc nl message, %s\n",
 296                     NL_GETERROR(err));
 297         err = ENOMEM;
 298         goto out;
 299     }
 300 
 301     /* append route message and addresses to netlink message.   */
 302     nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO);
 303     nla_put_u32(nlm, RTA_DST, dst_addr);
 304     nla_put_u32(nlm, RTA_SRC, src_addr);
 305 
 306     /* query kernel */
 307     err = opal_reachable_netlink_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST);
 308     nlmsg_free(nlm);
 309     if (err < 0) {
 310         opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n",
 311                     NL_GETERROR(err));
 312         err = EINVAL;
 313         goto out;
 314     }
 315 
 316     /* Setup callback function */
 317     memset(&arg, 0, sizeof(arg));
 318     arg.oif = outgoing_interface;
 319     arg.unlsk = unlsk;
 320     err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM,
 321                               opal_reachable_netlink_rt_raw_parse_cb, &arg);
 322     if (err != 0) {
 323         opal_output(0, "Failed to setup callback function, error %s\n",
 324                     NL_GETERROR(err));
 325         err = EINVAL;
 326         goto out;
 327     }
 328 
 329     /* recieve results */
 330     NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out);
 331 
 332     /* check whether a route was found */
 333     if (arg.found) {
 334         *has_gateway = arg.has_gateway;
 335         err = 0;
 336     } else {
 337         *has_gateway = 0;
 338         err = EHOSTUNREACH;
 339     }
 340 
 341  out:
 342     opal_reachable_netlink_sk_free(unlsk);
 343     return err;
 344 }
 345 
 346 
 347 #if OPAL_ENABLE_IPV6
 348 int opal_reachable_netlink_rt_lookup6(struct in6_addr *src_addr,
 349                                       struct in6_addr *dst_addr,
 350                                       int outgoing_interface,
 351                                       int *has_gateway)
 352 {
 353 
 354     struct opal_reachable_netlink_sk *unlsk; /* netlink socket */
 355     struct nl_msg *nlm; /* netlink message */
 356     struct rtmsg rmsg; /* route message */
 357     struct opal_reachable_netlink_rt_cb_arg arg; /* callback argument */
 358     int err;
 359 
 360     /* allocate netlink socket */
 361     unlsk = NULL;
 362     err = opal_reachable_netlink_sk_alloc(&unlsk, NETLINK_ROUTE);
 363     if (err)
 364         return err;
 365 
 366     /* allocate route message */
 367     memset(&rmsg, 0, sizeof(rmsg));
 368     rmsg.rtm_family = AF_INET6;
 369     rmsg.rtm_dst_len = sizeof(*dst_addr) * CHAR_BIT;
 370     rmsg.rtm_src_len = sizeof(*src_addr) * CHAR_BIT;
 371 
 372     /* allocate netlink message of type RTM_GETROUTE */
 373     nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0);
 374     if (!nlm) {
 375         opal_output(0, "Failed to alloc nl message, %s\n",
 376                     NL_GETERROR(err));
 377         err = ENOMEM;
 378         goto out;
 379     }
 380 
 381     /* append route message and addresses to netlink message.   */
 382     nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO);
 383     nla_put(nlm, RTA_DST, sizeof(dst_addr->s6_addr), &(dst_addr->s6_addr));
 384     nla_put(nlm, RTA_SRC, sizeof(src_addr->s6_addr), &(src_addr->s6_addr));
 385 
 386     /* query kernel */
 387     err = opal_reachable_netlink_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST);
 388     nlmsg_free(nlm);
 389     if (err < 0) {
 390         opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n",
 391                     NL_GETERROR(err));
 392         err = EINVAL;
 393         goto out;
 394     }
 395 
 396     /* Setup callback function */
 397     memset(&arg, 0, sizeof(arg));
 398     arg.oif = outgoing_interface;
 399     arg.unlsk = unlsk;
 400     err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM,
 401                               opal_reachable_netlink_rt_raw_parse_cb, &arg);
 402     if (err != 0) {
 403         opal_output(0, "Failed to setup callback function, error %s\n",
 404                     NL_GETERROR(err));
 405         err = EINVAL;
 406         goto out;
 407     }
 408 
 409     /* receive results */
 410     NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out);
 411 
 412     /* check whether a route was found */
 413     if (arg.found) {
 414         *has_gateway = arg.has_gateway;
 415         err = 0;
 416     } else {
 417         *has_gateway = 0;
 418         err = EHOSTUNREACH;
 419     }
 420 
 421  out:
 422     opal_reachable_netlink_sk_free(unlsk);
 423     return err;
 424 }
 425 #endif /* #if OPAL_ENABLE_IPV6 */

/* [<][>][^][v][top][bottom][index][help] */