root/orte/mca/rtc/hwloc/rtc_hwloc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init
  2. finalize
  3. assign
  4. set
  5. parse_map_line
  6. use_hole
  7. find_hole
  8. enough_space

   1 /*
   2  * Copyright (c) 2014-2018 Intel, Inc.  All rights reserved.
   3  * Copyright (c) 2017-2018 Cisco Systems, Inc.  All rights reserved
   4  * Copyright (c) 2017      Inria.  All rights reserved.
   5  * $COPYRIGHT$
   6  *
   7  * Additional copyrights may follow
   8  *
   9  * $HEADER$
  10  */
  11 
  12 #define OPAL_HWLOC_WANT_SHMEM 1
  13 
  14 #include "orte_config.h"
  15 #include "orte/constants.h"
  16 #include "orte/types.h"
  17 
  18 #include <stdio.h>
  19 #include <stdlib.h>
  20 #include <stdint.h>
  21 #include <assert.h>
  22 #include <errno.h>
  23 #ifdef HAVE_UNISTD_H
  24 #include <unistd.h>
  25 #endif  /* HAVE_UNISTD_H */
  26 #include <string.h>
  27 #include <sys/mman.h>
  28 #ifdef HAVE_SYS_STAT_H
  29 #include <sys/stat.h>
  30 #endif
  31 #if HAVE_FCNTL_H
  32 #include <fcntl.h>
  33 #endif
  34 
  35 #include "opal/class/opal_list.h"
  36 #include "opal/dss/dss_types.h"
  37 #include "opal/mca/hwloc/hwloc-internal.h"
  38 #include "opal/mca/pmix/pmix_types.h"
  39 #include "opal/util/argv.h"
  40 #include "opal/util/fd.h"
  41 #include "opal/util/opal_environ.h"
  42 #include "opal/util/path.h"
  43 
  44 #include "orte/util/show_help.h"
  45 #include "orte/util/error_strings.h"
  46 #include "orte/runtime/orte_globals.h"
  47 #include "orte/mca/errmgr/errmgr.h"
  48 #include "orte/mca/rmaps/rmaps_types.h"
  49 
  50 #include "orte/mca/rtc/base/base.h"
  51 #include "rtc_hwloc.h"
  52 
  53 static int init(void);
  54 static void finalize(void);
  55 static void assign(orte_job_t *jdata);
  56 static void set(orte_job_t *jdata,
  57                 orte_proc_t *proc,
  58                 char ***environ_copy,
  59                 int write_fd);
  60 
  61 orte_rtc_base_module_t orte_rtc_hwloc_module = {
  62     .init = init,
  63     .finalize = finalize,
  64     .assign = assign,
  65     .set = set
  66 };
  67 
  68 #if HWLOC_API_VERSION >= 0x20000
  69 static size_t shmemsize = 0;
  70 static size_t shmemaddr;
  71 static char *shmemfile = NULL;
  72 static int shmemfd = -1;
  73 
  74 static int parse_map_line(const char *line,
  75                           unsigned long *beginp,
  76                           unsigned long *endp,
  77                           orte_rtc_hwloc_vm_map_kind_t *kindp);
  78 static int use_hole(unsigned long holebegin,
  79                     unsigned long holesize,
  80                     unsigned long *addrp,
  81                     unsigned long size);
  82 static int find_hole(orte_rtc_hwloc_vm_hole_kind_t hkind,
  83                      size_t *addrp,
  84                      size_t size);
  85 static int enough_space(const char *filename,
  86                         size_t space_req,
  87                         uint64_t *space_avail,
  88                         bool *result);
  89 #endif
  90 
  91 static int init(void)
  92 {
  93 #if HWLOC_API_VERSION >= 0x20000
  94     int rc;
  95     bool space_available = false;
  96     uint64_t amount_space_avail = 0;
  97 
  98     /* ensure we have the topology */
  99     if (OPAL_SUCCESS != (rc = opal_hwloc_base_get_topology())) {
 100         return rc;
 101     }
 102 
 103     if (VM_HOLE_NONE == mca_rtc_hwloc_component.kind) {
 104         return ORTE_SUCCESS;
 105     }
 106 
 107     /* get the size of the topology shared memory segment */
 108     if (0 != hwloc_shmem_topology_get_length(opal_hwloc_topology, &shmemsize, 0)) {
 109         opal_output_verbose(2, orte_rtc_base_framework.framework_output,
 110                             "%s hwloc topology shmem not available",
 111                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 112         return ORTE_SUCCESS;
 113     }
 114 
 115     if (ORTE_SUCCESS != (rc = find_hole(mca_rtc_hwloc_component.kind,
 116                                         &shmemaddr, shmemsize))) {
 117         /* we couldn't find a hole, so don't use the shmem support */
 118         if (4 < opal_output_get_verbosity(orte_rtc_base_framework.framework_output)) {
 119             FILE *file = fopen("/proc/self/maps", "r");
 120             if (file) {
 121                 char line[256];
 122                 opal_output(0, "%s Dumping /proc/self/maps",
 123                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 124                 while (fgets(line, sizeof(line), file) != NULL) {
 125                     char *end = strchr(line, '\n');
 126                     if (end) {
 127                        *end = '\0';
 128                     }
 129                     opal_output(0, "%s", line);
 130                 }
 131                 fclose(file);
 132             }
 133         }
 134         return ORTE_SUCCESS;
 135     }
 136     /* create the shmem file in our session dir so it
 137      * will automatically get cleaned up */
 138     opal_asprintf(&shmemfile, "%s/hwloc.sm", orte_process_info.jobfam_session_dir);
 139     /* let's make sure we have enough space for the backing file */
 140     if (OPAL_SUCCESS != (rc = enough_space(shmemfile, shmemsize,
 141                                            &amount_space_avail,
 142                                            &space_available))) {
 143         opal_output_verbose(2, orte_rtc_base_framework.framework_output,
 144                             "%s an error occurred while determining "
 145                             "whether or not %s could be created for topo shmem.",
 146                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), shmemfile);
 147         free(shmemfile);
 148         shmemfile = NULL;
 149         return ORTE_SUCCESS;
 150     }
 151     if (!space_available) {
 152         if (1 < opal_output_get_verbosity(orte_rtc_base_framework.framework_output)) {
 153             orte_show_help("help-orte-rtc-hwloc.txt", "target full", true,
 154                            shmemfile, orte_process_info.nodename,
 155                            (unsigned long)shmemsize,
 156                            (unsigned long long)amount_space_avail);
 157         }
 158         free(shmemfile);
 159         shmemfile = NULL;
 160         return ORTE_SUCCESS;
 161     }
 162     /* enough space is available, so create the segment */
 163     if (-1 == (shmemfd = open(shmemfile, O_CREAT | O_RDWR, 0600))) {
 164         int err = errno;
 165         if (1 < opal_output_get_verbosity(orte_rtc_base_framework.framework_output)) {
 166             orte_show_help("help-orte-rtc-hwloc.txt", "sys call fail", true,
 167                            orte_process_info.nodename,
 168                            "open(2)", "", strerror(err), err);
 169         }
 170         free(shmemfile);
 171         shmemfile = NULL;
 172         return ORTE_SUCCESS;
 173     }
 174     /* ensure nobody inherits this fd */
 175     opal_fd_set_cloexec(shmemfd);
 176     /* populate the shmem segment with the topology */
 177     if (0 != (rc = hwloc_shmem_topology_write(opal_hwloc_topology, shmemfd, 0,
 178                                               (void*)shmemaddr, shmemsize, 0))) {
 179         opal_output_verbose(2, orte_rtc_base_framework.framework_output,
 180                             "%s an error occurred while writing topology to %s",
 181                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), shmemfile);
 182         unlink(shmemfile);
 183         free(shmemfile);
 184         shmemfile = NULL;
 185         close(shmemfd);
 186         shmemfd = -1;
 187         return ORTE_SUCCESS;
 188     }
 189 #endif
 190 
 191     return ORTE_SUCCESS;
 192 }
 193 
 194 static void finalize(void)
 195 {
 196 #if HWLOC_API_VERSION >= 0x20000
 197     if (NULL != shmemfile) {
 198         unlink(shmemfile);
 199         free(shmemfile);
 200     }
 201     if (0 <= shmemfd) {
 202         close(shmemfd);
 203     }
 204 #endif
 205     return;
 206 }
 207 
 208 static void assign(orte_job_t *jdata)
 209 {
 210 #if HWLOC_API_VERSION >= 0x20000
 211     opal_list_t *cache;
 212     opal_value_t *kv;
 213 
 214     if (VM_HOLE_NONE == mca_rtc_hwloc_component.kind ||
 215         NULL == shmemfile) {
 216         return;
 217     }
 218     /* add the shmem address and size to the job-level info that
 219      * will be provided to the proc upon registration */
 220     cache = NULL;
 221     if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) ||
 222         NULL == cache) {
 223         cache = OBJ_NEW(opal_list_t);
 224         orte_set_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, cache, OPAL_PTR);
 225     }
 226     opal_output_verbose(2, orte_rtc_base_framework.framework_output,
 227                         "FILE %s ADDR %lx SIZE %lx", shmemfile,
 228                         (unsigned long)shmemaddr,
 229                         (unsigned long)shmemsize);
 230 
 231     kv = OBJ_NEW(opal_value_t);
 232     kv->key = strdup(OPAL_PMIX_HWLOC_SHMEM_FILE);
 233     kv->type = OPAL_STRING;
 234     kv->data.string = strdup(shmemfile);
 235     opal_list_append(cache, &kv->super);
 236 
 237     kv = OBJ_NEW(opal_value_t);
 238     kv->key = strdup(OPAL_PMIX_HWLOC_SHMEM_ADDR);
 239     kv->type = OPAL_SIZE;
 240     kv->data.size = shmemaddr;
 241     opal_list_append(cache, &kv->super);
 242 
 243     kv = OBJ_NEW(opal_value_t);
 244     kv->key = strdup(OPAL_PMIX_HWLOC_SHMEM_SIZE);
 245     kv->type = OPAL_SIZE;
 246     kv->data.size = shmemsize;
 247     opal_list_append(cache, &kv->super);
 248 #endif
 249 }
 250 
 251 static void set(orte_job_t *jobdat,
 252                 orte_proc_t *child,
 253                 char ***environ_copy,
 254                 int write_fd)
 255 {
 256     hwloc_cpuset_t cpuset;
 257     hwloc_obj_t root;
 258     opal_hwloc_topo_data_t *sum;
 259     orte_app_context_t *context;
 260     int rc=ORTE_ERROR;
 261     char *msg, *param;
 262     char *cpu_bitmap;
 263 
 264     opal_output_verbose(2, orte_rtc_base_framework.framework_output,
 265                         "%s hwloc:set on child %s",
 266                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 267                         (NULL == child) ? "NULL" : ORTE_NAME_PRINT(&child->name));
 268 
 269     if (NULL == jobdat || NULL == child) {
 270         /* nothing for us to do */
 271         opal_output_verbose(2, orte_rtc_base_framework.framework_output,
 272                             "%s hwloc:set jobdat %s child %s - nothing to do",
 273                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 274                             (NULL == jobdat) ? "NULL" : ORTE_JOBID_PRINT(jobdat->jobid),
 275                             (NULL == child) ? "NULL" : ORTE_NAME_PRINT(&child->name));
 276         return;
 277     }
 278 
 279     context = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, child->app_idx);
 280 
 281     /* Set process affinity, if given */
 282     cpu_bitmap = NULL;
 283     if (!orte_get_attribute(&child->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) ||
 284         NULL == cpu_bitmap || 0 == strlen(cpu_bitmap)) {
 285         /* if the daemon is bound, then we need to "free" this proc */
 286         if (NULL != orte_daemon_cores) {
 287             root = hwloc_get_root_obj(opal_hwloc_topology);
 288             if (NULL == root->userdata) {
 289                 orte_rtc_base_send_warn_show_help(write_fd,
 290                                                   "help-orte-odls-default.txt", "incorrectly bound",
 291                                                   orte_process_info.nodename, context->app,
 292                                                   __FILE__, __LINE__);
 293             }
 294             sum = (opal_hwloc_topo_data_t*)root->userdata;
 295             /* bind this proc to all available processors */
 296             rc = hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
 297             /* if we got an error and this wasn't a default binding policy, then report it */
 298             if (rc < 0  && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
 299                 if (errno == ENOSYS) {
 300                     msg = "hwloc indicates cpu binding not supported";
 301                 } else if (errno == EXDEV) {
 302                     msg = "hwloc indicates cpu binding cannot be enforced";
 303                 } else {
 304                     char *tmp;
 305                     (void)hwloc_bitmap_list_asprintf(&tmp, sum->available);
 306                     opal_asprintf(&msg, "hwloc_set_cpubind returned \"%s\" for bitmap \"%s\"",
 307                              opal_strerror(rc), tmp);
 308                     free(tmp);
 309                 }
 310                 if (OPAL_BINDING_REQUIRED(jobdat->map->binding)) {
 311                     /* If binding is required, send an error up the pipe (which exits
 312                        -- it doesn't return). */
 313                     orte_rtc_base_send_error_show_help(write_fd, 1, "help-orte-odls-default.txt",
 314                                                        "binding generic error",
 315                                                        orte_process_info.nodename, context->app, msg,
 316                                                        __FILE__, __LINE__);
 317                 } else {
 318                     orte_rtc_base_send_warn_show_help(write_fd,
 319                                                       "help-orte-odls-default.txt", "not bound",
 320                                                       orte_process_info.nodename, context->app, msg,
 321                                                       __FILE__, __LINE__);
 322                     return;
 323                 }
 324             }
 325         }
 326         if (0 == rc && opal_hwloc_report_bindings) {
 327             opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", child->name.vpid);
 328             /* avoid reporting it twice */
 329             (void) mca_base_var_env_name ("hwloc_base_report_bindings", &param);
 330             opal_unsetenv(param, environ_copy);
 331             free(param);
 332         }
 333     } else {
 334         /* convert the list to a cpuset */
 335         cpuset = hwloc_bitmap_alloc();
 336         if (0 != (rc = hwloc_bitmap_list_sscanf(cpuset, cpu_bitmap))) {
 337             /* See comment above about "This may be a small memory leak" */
 338             opal_asprintf(&msg, "hwloc_bitmap_sscanf returned \"%s\" for the string \"%s\"",
 339                      opal_strerror(rc), cpu_bitmap);
 340             if (NULL == msg) {
 341                 msg = "failed to convert bitmap list to hwloc bitmap";
 342             }
 343             if (OPAL_BINDING_REQUIRED(jobdat->map->binding) &&
 344                 OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
 345                 /* If binding is required and a binding directive was explicitly
 346                  * given (i.e., we are not binding due to a default policy),
 347                  * send an error up the pipe (which exits -- it doesn't return).
 348                  */
 349                 orte_rtc_base_send_error_show_help(write_fd, 1, "help-orte-odls-default.txt",
 350                                                    "binding generic error",
 351                                                    orte_process_info.nodename,
 352                                                    context->app, msg,
 353                                                    __FILE__, __LINE__);
 354             } else {
 355                 orte_rtc_base_send_warn_show_help(write_fd,
 356                                                   "help-orte-odls-default.txt", "not bound",
 357                                                   orte_process_info.nodename, context->app, msg,
 358                                                   __FILE__, __LINE__);
 359                 free(cpu_bitmap);
 360                 return;
 361             }
 362         }
 363         /* bind as specified */
 364         rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0);
 365         /* if we got an error and this wasn't a default binding policy, then report it */
 366         if (rc < 0  && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
 367             char *tmp = NULL;
 368             if (errno == ENOSYS) {
 369                 msg = "hwloc indicates cpu binding not supported";
 370             } else if (errno == EXDEV) {
 371                 msg = "hwloc indicates cpu binding cannot be enforced";
 372             } else {
 373                 opal_asprintf(&msg, "hwloc_set_cpubind returned \"%s\" for bitmap \"%s\"",
 374                          opal_strerror(rc), cpu_bitmap);
 375             }
 376             if (OPAL_BINDING_REQUIRED(jobdat->map->binding)) {
 377                 /* If binding is required, send an error up the pipe (which exits
 378                    -- it doesn't return). */
 379                 orte_rtc_base_send_error_show_help(write_fd, 1, "help-orte-odls-default.txt",
 380                                                    "binding generic error",
 381                                                    orte_process_info.nodename, context->app, msg,
 382                                                    __FILE__, __LINE__);
 383             } else {
 384                 orte_rtc_base_send_warn_show_help(write_fd,
 385                                                   "help-orte-odls-default.txt", "not bound",
 386                                                   orte_process_info.nodename, context->app, msg,
 387                                                   __FILE__, __LINE__);
 388                 if (NULL != tmp) {
 389                     free(tmp);
 390                     free(msg);
 391                 }
 392                 return;
 393             }
 394             if (NULL != tmp) {
 395                 free(tmp);
 396                 free(msg);
 397             }
 398         }
 399         if (0 == rc && opal_hwloc_report_bindings) {
 400             char tmp1[1024], tmp2[1024];
 401             hwloc_cpuset_t mycpus;
 402             /* get the cpus we are bound to */
 403             mycpus = hwloc_bitmap_alloc();
 404             if (hwloc_get_cpubind(opal_hwloc_topology,
 405                                   mycpus,
 406                                   HWLOC_CPUBIND_PROCESS) < 0) {
 407                 opal_output(0, "MCW rank %d is not bound",
 408                             child->name.vpid);
 409             } else {
 410                 if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), opal_hwloc_topology, mycpus)) {
 411                     opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", child->name.vpid);
 412                 } else {
 413                     opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), opal_hwloc_topology, mycpus);
 414                     opal_output(0, "MCW rank %d bound to %s: %s",
 415                                 child->name.vpid, tmp1, tmp2);
 416                 }
 417             }
 418             hwloc_bitmap_free(mycpus);
 419             /* avoid reporting it twice */
 420             (void) mca_base_var_env_name ("hwloc_base_report_bindings", &param);
 421             opal_unsetenv(param, environ_copy);
 422             free(param);
 423         }
 424         /* set memory affinity policy - if we get an error, don't report
 425          * anything unless the user actually specified the binding policy
 426          */
 427         rc = opal_hwloc_base_set_process_membind_policy();
 428         if (ORTE_SUCCESS != rc  && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
 429             if (errno == ENOSYS) {
 430                 msg = "hwloc indicates memory binding not supported";
 431             } else if (errno == EXDEV) {
 432                 msg = "hwloc indicates memory binding cannot be enforced";
 433             } else {
 434                 msg = "failed to bind memory";
 435             }
 436             if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
 437                 /* If binding is required, send an error up the pipe (which exits
 438                    -- it doesn't return). */
 439                 orte_rtc_base_send_error_show_help(write_fd, 1, "help-orte-odls-default.txt",
 440                                                    "memory binding error",
 441                                                    orte_process_info.nodename, context->app, msg,
 442                                                    __FILE__, __LINE__);
 443             } else {
 444                 orte_rtc_base_send_warn_show_help(write_fd,
 445                                                   "help-orte-odls-default.txt", "memory not bound",
 446                                                   orte_process_info.nodename, context->app, msg,
 447                                                   __FILE__, __LINE__);
 448                 free(cpu_bitmap);
 449                 return;
 450             }
 451         }
 452     }
 453     if (NULL != cpu_bitmap) {
 454         free(cpu_bitmap);
 455     }
 456 }
 457 
 458 #if HWLOC_API_VERSION >= 0x20000
 459 
 460 static int parse_map_line(const char *line,
 461                           unsigned long *beginp,
 462                           unsigned long *endp,
 463                           orte_rtc_hwloc_vm_map_kind_t *kindp)
 464 {
 465     const char *tmp = line, *next;
 466     unsigned long value;
 467 
 468     /* "beginaddr-endaddr " */
 469     value = strtoull(tmp, (char **) &next, 16);
 470     if (next == tmp) {
 471         return ORTE_ERROR;
 472     }
 473 
 474     *beginp = (unsigned long) value;
 475 
 476     if (*next != '-') {
 477         return ORTE_ERROR;
 478     }
 479 
 480      tmp = next + 1;
 481 
 482     value = strtoull(tmp, (char **) &next, 16);
 483     if (next == tmp) {
 484         return ORTE_ERROR;
 485     }
 486     *endp = (unsigned long) value;
 487     tmp = next;
 488 
 489     if (*next != ' ') {
 490         return ORTE_ERROR;
 491     }
 492     tmp = next + 1;
 493 
 494     /* look for ending absolute path */
 495     next = strchr(tmp, '/');
 496     if (next) {
 497         *kindp = VM_MAP_FILE;
 498     } else {
 499         /* look for ending special tag [foo] */
 500         next = strchr(tmp, '[');
 501         if (next) {
 502             if (!strncmp(next, "[heap]", 6)) {
 503                 *kindp = VM_MAP_HEAP;
 504             } else if (!strncmp(next, "[stack]", 7)) {
 505                 *kindp = VM_MAP_STACK;
 506             } else {
 507                 char *end;
 508                 if ((end = strchr(next, '\n')) != NULL) {
 509                     *end = '\0';
 510                 }
 511                 opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 512                                     "Found special VMA \"%s\" before stack", next);
 513                 *kindp = VM_MAP_OTHER;
 514             }
 515         } else {
 516             *kindp = VM_MAP_ANONYMOUS;
 517         }
 518     }
 519 
 520     return ORTE_SUCCESS;
 521 }
 522 
 523 #define ALIGN2MB (2*1024*1024UL)
 524 
 525 static int use_hole(unsigned long holebegin,
 526                     unsigned long holesize,
 527                     unsigned long *addrp,
 528                     unsigned long size)
 529 {
 530     unsigned long aligned;
 531     unsigned long middle = holebegin+holesize/2;
 532 
 533     opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 534                         "looking in hole [0x%lx-0x%lx] size %lu (%lu MB) for %lu (%lu MB)\n",
 535                         holebegin, holebegin+holesize, holesize, holesize>>20, size, size>>20);
 536 
 537     if (holesize < size) {
 538         return ORTE_ERROR;
 539     }
 540 
 541     /* try to align the middle of the hole on 64MB for POWER's 64k-page PMD */
 542     #define ALIGN64MB (64*1024*1024UL)
 543     aligned = (middle + ALIGN64MB) & ~(ALIGN64MB-1);
 544     if (aligned + size <= holebegin + holesize) {
 545         opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 546                             "aligned [0x%lx-0x%lx] (middle 0x%lx) to 0x%lx for 64MB\n",
 547                             holebegin, holebegin+holesize, middle, aligned);
 548         opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 549                             " there are %lu MB free before and %lu MB free after\n",
 550                             (aligned-holebegin)>>20, (holebegin+holesize-aligned-size)>>20);
 551 
 552         *addrp = aligned;
 553         return ORTE_SUCCESS;
 554     }
 555 
 556     /* try to align the middle of the hole on 2MB for x86 PMD */
 557     aligned = (middle + ALIGN2MB) & ~(ALIGN2MB-1);
 558     if (aligned + size <= holebegin + holesize) {
 559         opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 560                             "aligned [0x%lx-0x%lx] (middle 0x%lx) to 0x%lx for 2MB\n",
 561                             holebegin, holebegin+holesize, middle, aligned);
 562         opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 563                             " there are %lu MB free before and %lu MB free after\n",
 564                             (aligned-holebegin)>>20, (holebegin+holesize-aligned-size)>>20);
 565         *addrp = aligned;
 566         return ORTE_SUCCESS;
 567     }
 568 
 569     /* just use the end of the hole */
 570     *addrp = holebegin + holesize - size;
 571     opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 572                         "using the end of hole starting at 0x%lx\n", *addrp);
 573     opal_output_verbose(80, orte_rtc_base_framework.framework_output,
 574                         " there are %lu MB free before\n", (*addrp-holebegin)>>20);
 575     return ORTE_SUCCESS;
 576 }
 577 
 578 static int find_hole(orte_rtc_hwloc_vm_hole_kind_t hkind,
 579                      size_t *addrp, size_t size)
 580 {
 581     unsigned long biggestbegin = 0;
 582     unsigned long biggestsize = 0;
 583     unsigned long prevend = 0;
 584     orte_rtc_hwloc_vm_map_kind_t prevmkind = VM_MAP_OTHER;
 585     int in_libs = 0;
 586     FILE *file;
 587     char line[96];
 588 
 589     file = fopen("/proc/self/maps", "r");
 590     if (!file) {
 591         return ORTE_ERROR;
 592     }
 593 
 594     while (fgets(line, sizeof(line), file) != NULL) {
 595         unsigned long begin=0, end=0;
 596         orte_rtc_hwloc_vm_map_kind_t mkind=VM_MAP_OTHER;
 597 
 598         if (!parse_map_line(line, &begin, &end, &mkind)) {
 599             opal_output_verbose(90, orte_rtc_base_framework.framework_output,
 600                                 "found %s from 0x%lx to 0x%lx\n",
 601                                 mkind == VM_MAP_HEAP ? "HEAP" :
 602                                 mkind == VM_MAP_STACK ? "STACK" :
 603                                 mkind == VM_MAP_OTHER ? "OTHER" :
 604                                 mkind == VM_MAP_FILE ? "FILE" :
 605                                 mkind == VM_MAP_ANONYMOUS ? "ANON" : "unknown",
 606                                 begin, end);
 607 
 608             switch (hkind) {
 609                 case VM_HOLE_BEGIN:
 610                     fclose(file);
 611                     return use_hole(0, begin, addrp, size);
 612 
 613                 case VM_HOLE_AFTER_HEAP:
 614                     if (prevmkind == VM_MAP_HEAP && mkind != VM_MAP_HEAP) {
 615                         /* only use HEAP when there's no other HEAP after it
 616                          * (there can be several of them consecutively).
 617                          */
 618                         fclose(file);
 619                         return use_hole(prevend, begin-prevend, addrp, size);
 620                     }
 621                     break;
 622 
 623                 case VM_HOLE_BEFORE_STACK:
 624                     if (mkind == VM_MAP_STACK) {
 625                         fclose(file);
 626                         return use_hole(prevend, begin-prevend, addrp, size);
 627                     }
 628                     break;
 629 
 630                 case VM_HOLE_IN_LIBS:
 631                     /* see if we are between heap and stack */
 632                     if (prevmkind == VM_MAP_HEAP) {
 633                         in_libs = 1;
 634                     }
 635                     if (mkind == VM_MAP_STACK) {
 636                         in_libs = 0;
 637                     }
 638                     if (!in_libs) {
 639                         /* we're not in libs, ignore this entry */
 640                         break;
 641                     }
 642                     /* we're in libs, consider this entry for searching the biggest hole below */
 643                     /* fallthrough */
 644 
 645                 case VM_HOLE_BIGGEST:
 646                     if (begin-prevend > biggestsize) {
 647                         opal_output_verbose(90, orte_rtc_base_framework.framework_output,
 648                                             "new biggest 0x%lx - 0x%lx = %lu (%lu MB)\n",
 649                                             prevend, begin, begin-prevend, (begin-prevend)>>20);
 650                         biggestbegin = prevend;
 651                         biggestsize = begin-prevend;
 652                     }
 653                     break;
 654 
 655                     default:
 656                         assert(0);
 657             }
 658         }
 659 
 660         while (!strchr(line, '\n')) {
 661             if (!fgets(line, sizeof(line), file)) {
 662                 goto done;
 663             }
 664         }
 665 
 666         if (mkind == VM_MAP_STACK) {
 667           /* Don't go beyond the stack. Other VMAs are special (vsyscall, vvar, vdso, etc),
 668            * There's no spare room there. And vsyscall is even above the userspace limit.
 669            */
 670           break;
 671         }
 672 
 673         prevend = end;
 674         prevmkind = mkind;
 675 
 676     }
 677 
 678   done:
 679     fclose(file);
 680     if (hkind == VM_HOLE_IN_LIBS || hkind == VM_HOLE_BIGGEST) {
 681         return use_hole(biggestbegin, biggestsize, addrp, size);
 682     }
 683 
 684     return ORTE_ERROR;
 685 }
 686 
 687 static int enough_space(const char *filename,
 688                         size_t space_req,
 689                         uint64_t *space_avail,
 690                         bool *result)
 691 {
 692     uint64_t avail = 0;
 693     size_t fluff = (size_t)(.05 * space_req);
 694     bool enough = false;
 695     char *last_sep = NULL;
 696     /* the target file name is passed here, but we need to check the parent
 697      * directory. store it so we can extract that info later. */
 698     char *target_dir = strdup(filename);
 699     int rc;
 700 
 701     if (NULL == target_dir) {
 702         rc = OPAL_ERR_OUT_OF_RESOURCE;
 703         goto out;
 704     }
 705     /* get the parent directory */
 706     last_sep = strrchr(target_dir, OPAL_PATH_SEP[0]);
 707     *last_sep = '\0';
 708     /* now check space availability */
 709     if (OPAL_SUCCESS != (rc = opal_path_df(target_dir, &avail))) {
 710         OPAL_OUTPUT_VERBOSE(
 711             (70, orte_rtc_base_framework.framework_output,
 712              "WARNING: opal_path_df failure!")
 713         );
 714         goto out;
 715     }
 716     /* do we have enough space? */
 717     if (avail >= space_req + fluff) {
 718         enough = true;
 719     }
 720     else {
 721         OPAL_OUTPUT_VERBOSE(
 722             (70, orte_rtc_base_framework.framework_output,
 723              "WARNING: not enough space on %s to meet request!"
 724              "available: %"PRIu64 "requested: %lu", target_dir,
 725              avail, (unsigned long)space_req + fluff)
 726         );
 727     }
 728 
 729 out:
 730     if (NULL != target_dir) {
 731         free(target_dir);
 732     }
 733     *result = enough;
 734     *space_avail = avail;
 735     return rc;
 736 }
 737 #endif

/* [<][>][^][v][top][bottom][index][help] */