root/opal/mca/btl/usnic/btl_usnic_map.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. map_compare_modules
  2. map_output_modules
  3. map_compare_endpoints
  4. map_output_endpoints
  5. map_compare_procs
  6. map_output_procs
  7. opal_btl_usnic_connectivity_map

   1 /*
   2  * Copyright (c) 2013-2016 Cisco Systems, Inc.  All rights reserved.
   3  * Copyright (c) 2014      Intel, Inc. All rights reserved
   4  * Copyright (c) 2018      Amazon.com, Inc. or its affiliates.  All Rights reserved.
   5  * $COPYRIGHT$
   6  *
   7  * Additional copyrights may follow
   8  *
   9  * $HEADER$
  10  */
  11 
  12 #include "opal_config.h"
  13 
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <unistd.h>
  17 
  18 #include "opal/util/show_help.h"
  19 #include "opal/util/printf.h"
  20 
  21 #include "btl_usnic_compat.h"
  22 #include "btl_usnic.h"
  23 #include "btl_usnic_module.h"
  24 #include "btl_usnic_util.h"
  25 #include "btl_usnic_proc.h"
  26 
  27 /*
  28  * qsort helper: compare modules by fabric name
  29  */
  30 static int map_compare_modules(const void *aa, const void *bb)
  31 {
  32     opal_btl_usnic_module_t *a = *((opal_btl_usnic_module_t**) aa);
  33     opal_btl_usnic_module_t *b = *((opal_btl_usnic_module_t**) bb);
  34 
  35     return strcmp(a->linux_device_name,
  36                   b->linux_device_name);
  37 }
  38 
  39 /*
  40  * Helper function to output "device:" lines
  41  */
  42 static int map_output_modules(FILE *fp)
  43 {
  44     int i;
  45     size_t size;
  46     opal_btl_usnic_module_t **modules;
  47     struct fi_usnic_info *uip;
  48     char ipv4[IPV4STRADDRLEN];
  49     struct sockaddr_in *sin;
  50     int prefix_len;
  51 
  52     fprintf(fp, "# Devices possibly used by this process:\n");
  53 
  54     /* First, we must sort the modules (by device name) so that
  55        they're always output in a repeatable order. */
  56     size = mca_btl_usnic_component.num_modules *
  57         sizeof(opal_btl_usnic_module_t*);
  58     modules = calloc(1, size);
  59     if (NULL == modules) {
  60         return OPAL_ERR_IN_ERRNO;
  61     }
  62 
  63     memcpy(modules, mca_btl_usnic_component.usnic_active_modules, size);
  64     qsort(modules, mca_btl_usnic_component.num_modules,
  65           sizeof(opal_btl_usnic_module_t*), map_compare_modules);
  66 
  67 
  68     /* Loop over and print the sorted module device information */
  69     for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
  70         uip = &modules[i]->usnic_info;
  71         sin = modules[i]->fabric_info->src_addr;
  72         prefix_len = usnic_netmask_to_cidrlen(uip->ui.v1.ui_netmask_be);
  73 
  74         opal_btl_usnic_snprintf_ipv4_addr(ipv4, IPV4STRADDRLEN,
  75                                         sin->sin_addr.s_addr,
  76                                         prefix_len);
  77 
  78         fprintf(fp, "device=%s,ip=%s,mss=%" PRIsize_t "\n",
  79                 modules[i]->linux_device_name,
  80                 ipv4, modules[i]->fabric_info->ep_attr->max_msg_size);
  81     }
  82 
  83     /* Free the temp array */
  84     free(modules);
  85 
  86     return OPAL_SUCCESS;
  87 }
  88 
  89 /************************************************************************/
  90 
  91 /*
  92  * qsort helper: compare endpoints by fabric name
  93  */
  94 static int map_compare_endpoints(const void *aa, const void *bb)
  95 {
  96     opal_btl_usnic_endpoint_t *a = *((opal_btl_usnic_endpoint_t**) aa);
  97     opal_btl_usnic_endpoint_t *b = *((opal_btl_usnic_endpoint_t**) bb);
  98 
  99     if (NULL == a && NULL == b) {
 100         return 0;
 101     } else if (NULL == a) {
 102         return 1;
 103     } else if (NULL == b) {
 104         return -1;
 105     }
 106 
 107     return strcmp(a->endpoint_module->linux_device_name,
 108                   b->endpoint_module->linux_device_name);
 109 }
 110 
 111 /*
 112  * Helper function to output devices for a single peer
 113  */
 114 static int map_output_endpoints(FILE *fp, opal_btl_usnic_proc_t *proc)
 115 {
 116     size_t i;
 117     size_t num_output;
 118     size_t size;
 119     opal_btl_usnic_endpoint_t **eps;
 120     char ipv4[IPV4STRADDRLEN];
 121 
 122     /* First, we must sort the endpoints on this proc by MCW rank so
 123        that they're always output in a repeatable order.  There may
 124        also be NULL endpoints (if we didn't match that peer's
 125        endpoint).  The sort will put NULLs at the end of the array,
 126        where they can be easily ignored. */
 127     size = proc->proc_endpoint_count * sizeof(opal_btl_usnic_endpoint_t *);
 128     eps = calloc(1, size);
 129     if (NULL == eps) {
 130         return OPAL_ERR_IN_ERRNO;
 131     }
 132 
 133     memcpy(eps, proc->proc_endpoints, size);
 134     qsort(eps, proc->proc_endpoint_count,
 135           sizeof(opal_btl_usnic_endpoint_t*),
 136           map_compare_endpoints);
 137 
 138     /* Loop over and print the sorted endpoint information, ignoring
 139        NULLs that might be at the end of the array. */
 140     for (num_output = i = 0; i < proc->proc_endpoint_count; ++i) {
 141         if (NULL == eps[i]) {
 142             break;
 143         }
 144         if (num_output > 0) {
 145             fprintf(fp, ",");
 146         }
 147 
 148         opal_btl_usnic_snprintf_ipv4_addr(ipv4, IPV4STRADDRLEN,
 149                                           eps[i]->endpoint_remote_modex.ipv4_addr,
 150                                           eps[i]->endpoint_remote_modex.netmask);
 151 
 152         fprintf(fp, "device=%s@peer_ip=%s",
 153                 eps[i]->endpoint_module->linux_device_name,
 154                 ipv4);
 155         ++num_output;
 156     }
 157     fprintf(fp, "\n");
 158 
 159     /* Free the temp array */
 160     free(eps);
 161 
 162     return OPAL_SUCCESS;
 163 }
 164 
 165 /************************************************************************/
 166 
 167 /*
 168  * qsort helper: compare the procs by job ID and VPID
 169  */
 170 static int map_compare_procs(const void *aa, const void *bb)
 171 {
 172     opal_btl_usnic_proc_t *a = *((opal_btl_usnic_proc_t**) aa);
 173     opal_btl_usnic_proc_t *b = *((opal_btl_usnic_proc_t**) bb);
 174     opal_process_name_t *an = &(a->proc_opal->proc_name);
 175     opal_process_name_t *bn = &(b->proc_opal->proc_name);
 176 
 177     if (an > bn) {
 178         return 1;
 179     } else if (an < bn) {
 180         return -1;
 181     } else {
 182         return 0;
 183     }
 184 }
 185 
 186 /*
 187  * Helper function to output "peer:" lines
 188  */
 189 static int map_output_procs(FILE *fp)
 190 {
 191     size_t i;
 192     size_t num_procs;
 193     opal_btl_usnic_proc_t **procs;
 194     opal_btl_usnic_proc_t *pitem;
 195 
 196     fprintf(fp, "# Endpoints used to communicate to each peer MPI process:\n");
 197 
 198     /* First, we must sort the procs by MCW rank so that they're
 199        always output in a repeatable order. */
 200     num_procs = opal_list_get_size(&mca_btl_usnic_component.usnic_procs);
 201     procs = calloc(num_procs, sizeof(opal_btl_usnic_proc_t*));
 202     if (NULL == procs) {
 203         return OPAL_ERR_IN_ERRNO;
 204     }
 205 
 206     i = 0;
 207     OPAL_LIST_FOREACH(pitem, &mca_btl_usnic_component.usnic_procs,
 208                       opal_btl_usnic_proc_t) {
 209         procs[i] = pitem;
 210         ++i;
 211     }
 212     qsort(procs, num_procs, sizeof(opal_btl_usnic_proc_t*),
 213           map_compare_procs);
 214 
 215     /* Loop over and print the sorted module device information */
 216     int ret = OPAL_SUCCESS;
 217     for (i = 0; i < num_procs; ++i) {
 218         fprintf(fp, "peer=%d,", procs[i]->proc_opal->proc_name.vpid);
 219         fprintf(fp, "hostname=%s,", opal_get_proc_hostname(procs[i]->proc_opal));
 220         if (OPAL_SUCCESS != map_output_endpoints(fp, procs[i])) {
 221             break;
 222         }
 223     }
 224 
 225     /* Free the temp array */
 226     free(procs);
 227 
 228     return ret;
 229 }
 230 
 231 /************************************************************************/
 232 
 233 /*
 234  * Output the connectivity map
 235  */
 236 void opal_btl_usnic_connectivity_map(void)
 237 {
 238     char *filename;
 239     FILE *fp;
 240 
 241     if (NULL == mca_btl_usnic_component.connectivity_map_prefix) {
 242         return;
 243     }
 244 
 245     /* Filename is of the form: <prefix>-<hostname>.<pid>.<job>.<MCW
 246        rank>.txt */
 247     opal_asprintf(&filename, "%s-%s.pid%d.job%d.mcwrank%d.txt",
 248              mca_btl_usnic_component.connectivity_map_prefix,
 249              opal_get_proc_hostname(opal_proc_local_get()),
 250              getpid(),
 251              opal_proc_local_get()->proc_name.jobid,
 252              opal_proc_local_get()->proc_name.vpid);
 253     if (NULL == filename) {
 254         /* JMS abort? */
 255         return;
 256     }
 257 
 258     fp = fopen(filename, "w");
 259     if (NULL == fp) {
 260         char dirname[PATH_MAX];
 261         getcwd(dirname, sizeof(dirname));
 262         dirname[sizeof(dirname) - 1] = '\0';
 263         opal_show_help("help-mpi-btl-usnic.txt", "cannot write to map file",
 264                        true,
 265                        opal_process_info.nodename,
 266                        filename,
 267                        dirname,
 268                        strerror(errno), errno);
 269         return;
 270     }
 271 
 272     if (OPAL_SUCCESS == map_output_modules(fp)) {
 273         map_output_procs(fp);
 274     }
 275 
 276     fclose(fp);
 277 }

/* [<][>][^][v][top][bottom][index][help] */