root/orte/util/session_dir.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_create_dir
  2. _setup_tmpdir_base
  3. orte_setup_top_session_dir
  4. _setup_jobfam_session_dir
  5. _setup_job_session_dir
  6. _setup_proc_session_dir
  7. orte_session_setup_base
  8. orte_session_dir
  9. orte_session_dir_cleanup
  10. orte_session_dir_finalize
  11. orte_dir_check_file

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2006 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2014      Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2015      Research Organization for Information Science
  14  *                         and Technology (RIST). All rights reserved.
  15  * Copyright (c) 2015-2018 Intel, Inc.  All rights reserved.
  16  * $COPYRIGHT$
  17  *
  18  * Additional copyrights may follow
  19  *
  20  * $HEADER$
  21  *
  22  */
  23 
  24 #include "orte_config.h"
  25 #include "orte/constants.h"
  26 
  27 #include <stdio.h>
  28 #ifdef HAVE_PWD_H
  29 #include <pwd.h>
  30 #endif
  31 #include <stddef.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #ifdef HAVE_SYS_PARAM_H
  35 #include <sys/param.h>
  36 #endif  /* HAVE_SYS_PARAM_H */
  37 #ifdef HAVE_SYS_TYPES_H
  38 #include <sys/types.h>
  39 #endif  /* HAVE_SYS_TYPES_H */
  40 #include <sys/stat.h>
  41 #ifdef HAVE_UNISTD_H
  42 #include <unistd.h>
  43 #endif  /* HAVE_UNISTD_H */
  44 #include <errno.h>
  45 #ifdef HAVE_DIRENT_H
  46 #include <dirent.h>
  47 #endif  /* HAVE_DIRENT_H */
  48 #ifdef HAVE_PWD_H
  49 #include <pwd.h>
  50 #endif  /* HAVE_PWD_H */
  51 
  52 #include "opal/util/argv.h"
  53 #include "opal/util/output.h"
  54 #include "opal/util/os_path.h"
  55 #include "opal/util/os_dirpath.h"
  56 #include "opal/util/basename.h"
  57 #include "opal/util/opal_environ.h"
  58 #include "opal/util/printf.h"
  59 
  60 #include "orte/util/proc_info.h"
  61 #include "orte/util/name_fns.h"
  62 #include "orte/util/show_help.h"
  63 
  64 #include "orte/mca/errmgr/errmgr.h"
  65 #include "orte/mca/ras/base/base.h"
  66 #include "orte/runtime/runtime.h"
  67 #include "orte/runtime/orte_globals.h"
  68 
  69 #include "orte/util/session_dir.h"
  70 
  71 /*******************************
  72  * Local function Declarations
  73  *******************************/
  74 static int orte_create_dir(char *directory);
  75 
  76 static bool orte_dir_check_file(const char *root, const char *path);
  77 
  78 #define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
  79 
  80 /****************************
  81  * Funcationality
  82  ****************************/
  83 /*
  84  * Check and create the directory requested
  85  */
  86 static int orte_create_dir(char *directory)
  87 {
  88     mode_t my_mode = S_IRWXU;  /* I'm looking for full rights */
  89     int ret;
  90 
  91     /* Sanity check before creating the directory with the proper mode,
  92      * Make sure it doesn't exist already */
  93     if( ORTE_ERR_NOT_FOUND !=
  94         (ret = opal_os_dirpath_access(directory, my_mode)) ) {
  95         /* Failure because opal_os_dirpath_access() indicated that either:
  96          * - The directory exists and we can access it (no need to create it again),
  97          *    return OPAL_SUCCESS, or
  98          * - don't have access rights, return OPAL_ERROR
  99          */
 100         if (ORTE_SUCCESS != ret) {
 101             ORTE_ERROR_LOG(ret);
 102         }
 103         return(ret);
 104     }
 105 
 106     /* Get here if the directory doesn't exist, so create it */
 107     if (ORTE_SUCCESS != (ret = opal_os_dirpath_create(directory, my_mode))) {
 108         ORTE_ERROR_LOG(ret);
 109     }
 110     return ret;
 111 }
 112 
 113 
 114 static int _setup_tmpdir_base(void)
 115 {
 116     int rc = ORTE_SUCCESS;
 117 
 118     /* make sure that we have tmpdir_base set
 119      * if we need it
 120      */
 121     if (NULL == orte_process_info.tmpdir_base) {
 122         orte_process_info.tmpdir_base =
 123                 strdup(opal_tmp_directory());
 124         if (NULL == orte_process_info.tmpdir_base) {
 125             rc = ORTE_ERR_OUT_OF_RESOURCE;
 126             goto exit;
 127         }
 128     }
 129 exit:
 130     if( ORTE_SUCCESS != rc ){
 131         ORTE_ERROR_LOG(rc);
 132     }
 133     return rc;
 134 }
 135 
 136 int orte_setup_top_session_dir(void)
 137 {
 138     int rc = ORTE_SUCCESS;
 139     /* get the effective uid */
 140     uid_t uid = geteuid();
 141 
 142     /* construct the top_session_dir if we need */
 143     if (NULL == orte_process_info.top_session_dir) {
 144         if (ORTE_SUCCESS != (rc = _setup_tmpdir_base())) {
 145             return rc;
 146         }
 147         if( NULL == orte_process_info.nodename ||
 148                 NULL == orte_process_info.tmpdir_base ){
 149             /* we can't setup top session dir */
 150             rc = ORTE_ERR_BAD_PARAM;
 151             goto exit;
 152         }
 153 
 154         if (0 > opal_asprintf(&orte_process_info.top_session_dir,
 155                          "%s/ompi.%s.%lu", orte_process_info.tmpdir_base,
 156                          orte_process_info.nodename, (unsigned long)uid)) {
 157             orte_process_info.top_session_dir = NULL;
 158             rc = ORTE_ERR_OUT_OF_RESOURCE;
 159             goto exit;
 160         }
 161     }
 162 exit:
 163     if( ORTE_SUCCESS != rc ){
 164         ORTE_ERROR_LOG(rc);
 165     }
 166     return rc;
 167 }
 168 
 169 static int _setup_jobfam_session_dir(orte_process_name_t *proc)
 170 {
 171     int rc = ORTE_SUCCESS;
 172 
 173     /* construct the top_session_dir if we need */
 174     if (NULL == orte_process_info.jobfam_session_dir) {
 175         if (ORTE_SUCCESS != (rc = orte_setup_top_session_dir())) {
 176             return rc;
 177         }
 178 
 179         if (ORTE_PROC_IS_MASTER) {
 180             if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
 181                              "%s/dvm", orte_process_info.top_session_dir)) {
 182                 rc = ORTE_ERR_OUT_OF_RESOURCE;
 183                 goto exit;
 184             }
 185         } else if (ORTE_PROC_IS_HNP) {
 186             if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
 187                              "%s/pid.%lu", orte_process_info.top_session_dir,
 188                              (unsigned long)orte_process_info.pid)) {
 189                 rc = ORTE_ERR_OUT_OF_RESOURCE;
 190                 goto exit;
 191             }
 192         } else {
 193             /* we were not given one, so define it */
 194             if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid)) {
 195                 if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
 196                                  "%s/jobfam", orte_process_info.top_session_dir) ) {
 197                     rc = ORTE_ERR_OUT_OF_RESOURCE;
 198                     goto exit;
 199                 }
 200             } else {
 201                 if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
 202                                  "%s/jf.%d", orte_process_info.top_session_dir,
 203                                  ORTE_JOB_FAMILY(proc->jobid))) {
 204                     orte_process_info.jobfam_session_dir = NULL;
 205                     rc = ORTE_ERR_OUT_OF_RESOURCE;
 206                     goto exit;
 207                 }
 208             }
 209         }
 210     }
 211 exit:
 212     if( ORTE_SUCCESS != rc ){
 213         ORTE_ERROR_LOG(rc);
 214     }
 215     return rc;
 216 }
 217 
 218 static int
 219 _setup_job_session_dir(orte_process_name_t *proc)
 220 {
 221     int rc = ORTE_SUCCESS;
 222 
 223     /* construct the top_session_dir if we need */
 224     if( NULL == orte_process_info.job_session_dir ){
 225         if( ORTE_SUCCESS != (rc = _setup_jobfam_session_dir(proc)) ){
 226             return rc;
 227         }
 228         if (ORTE_JOBID_INVALID != proc->jobid) {
 229             if (0 > opal_asprintf(&orte_process_info.job_session_dir,
 230                              "%s/%d", orte_process_info.jobfam_session_dir,
 231                              ORTE_LOCAL_JOBID(proc->jobid))) {
 232                 orte_process_info.job_session_dir = NULL;
 233                 rc = ORTE_ERR_OUT_OF_RESOURCE;
 234                 goto exit;
 235             }
 236         } else {
 237             orte_process_info.job_session_dir = NULL;
 238         }
 239     }
 240 
 241 exit:
 242     if( ORTE_SUCCESS != rc ){
 243         ORTE_ERROR_LOG(rc);
 244     }
 245     return rc;
 246 }
 247 
 248 static int
 249 _setup_proc_session_dir(orte_process_name_t *proc)
 250 {
 251     int rc = ORTE_SUCCESS;
 252 
 253     /* construct the top_session_dir if we need */
 254     if( NULL == orte_process_info.proc_session_dir ){
 255         if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
 256             return rc;
 257         }
 258         if (ORTE_VPID_INVALID != proc->vpid) {
 259             if (0 > opal_asprintf(&orte_process_info.proc_session_dir,
 260                              "%s/%d", orte_process_info.job_session_dir,
 261                              proc->vpid)) {
 262                 orte_process_info.proc_session_dir = NULL;
 263                 rc = ORTE_ERR_OUT_OF_RESOURCE;
 264                 goto exit;
 265             }
 266         } else {
 267             orte_process_info.proc_session_dir = NULL;
 268         }
 269     }
 270 
 271 exit:
 272     if( ORTE_SUCCESS != rc ){
 273         ORTE_ERROR_LOG(rc);
 274     }
 275     return rc;
 276 }
 277 
 278 int orte_session_setup_base(orte_process_name_t *proc)
 279 {
 280     int rc;
 281 
 282     /* Ensure that system info is set */
 283     orte_proc_info();
 284 
 285     /* setup job and proc session directories */
 286     if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
 287         return rc;
 288     }
 289 
 290     if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){
 291         return rc;
 292     }
 293 
 294     /* BEFORE doing anything else, check to see if this prefix is
 295      * allowed by the system
 296      */
 297     if (NULL != orte_prohibited_session_dirs ||
 298             NULL != orte_process_info.tmpdir_base ) {
 299         char **list;
 300         int i, len;
 301         /* break the string into tokens - it should be
 302          * separated by ','
 303          */
 304         list = opal_argv_split(orte_prohibited_session_dirs, ',');
 305         len = opal_argv_count(list);
 306         /* cycle through the list */
 307         for (i=0; i < len; i++) {
 308             /* check if prefix matches */
 309             if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) {
 310                 /* this is a prohibited location */
 311                 orte_show_help("help-orte-runtime.txt",
 312                                "orte:session:dir:prohibited",
 313                                true, orte_process_info.tmpdir_base,
 314                                orte_prohibited_session_dirs);
 315                 opal_argv_free(list);
 316                 return ORTE_ERR_FATAL;
 317             }
 318         }
 319         opal_argv_free(list);  /* done with this */
 320     }
 321     return ORTE_SUCCESS;
 322 }
 323 
 324 /*
 325  * Construct the session directory and create it if necessary
 326  */
 327 int orte_session_dir(bool create, orte_process_name_t *proc)
 328 {
 329     int rc = ORTE_SUCCESS;
 330 
 331     /*
 332      * Get the session directory full name
 333      */
 334     if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) {
 335         if (ORTE_ERR_FATAL == rc) {
 336             /* this indicates we should abort quietly */
 337             rc = ORTE_ERR_SILENT;
 338         }
 339         goto cleanup;
 340     }
 341 
 342     /*
 343      * Now that we have the full path, go ahead and create it if necessary
 344      */
 345     if( create ) {
 346         if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) {
 347             ORTE_ERROR_LOG(rc);
 348             goto cleanup;
 349         }
 350     }
 351 
 352     if (orte_debug_flag) {
 353         opal_output(0, "procdir: %s",
 354                     OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir));
 355         opal_output(0, "jobdir: %s",
 356                     OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir));
 357         opal_output(0, "top: %s",
 358                     OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir));
 359         opal_output(0, "top: %s",
 360                     OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir));
 361         opal_output(0, "tmp: %s",
 362                     OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base));
 363     }
 364 
 365 cleanup:
 366     return rc;
 367 }
 368 
 369 /*
 370  * A job has aborted - so force cleanup of the session directory
 371  */
 372 int
 373 orte_session_dir_cleanup(orte_jobid_t jobid)
 374 {
 375     /* special case - if a daemon is colocated with mpirun,
 376      * then we let mpirun do the rest to avoid a race
 377      * condition. this scenario always results in the rank=1
 378      * daemon colocated with mpirun */
 379     if (orte_ras_base.launch_orted_on_hn &&
 380         ORTE_PROC_IS_DAEMON &&
 381         1 == ORTE_PROC_MY_NAME->vpid) {
 382         return ORTE_SUCCESS;
 383     }
 384 
 385     if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
 386         /* we haven't created them or RM will clean them up for us*/
 387         return ORTE_SUCCESS;
 388     }
 389 
 390     if (NULL == orte_process_info.jobfam_session_dir ||
 391         NULL == orte_process_info.proc_session_dir) {
 392         /* this should never happen - it means we are calling
 393          * cleanup *before* properly setting up the session
 394          * dir system. This leaves open the possibility of
 395          * accidentally removing directories we shouldn't
 396          * touch
 397          */
 398         return ORTE_ERR_NOT_INITIALIZED;
 399     }
 400 
 401 
 402     /* recursively blow the whole session away for our job family,
 403      * saving only output files
 404      */
 405     opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir,
 406                             true, orte_dir_check_file);
 407 
 408     if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
 409         if (orte_debug_flag) {
 410             opal_output(0, "sess_dir_cleanup: found jobfam session dir empty - deleting");
 411         }
 412         rmdir(orte_process_info.jobfam_session_dir);
 413     } else {
 414         if (orte_debug_flag) {
 415             if (OPAL_ERR_NOT_FOUND ==
 416                     opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
 417                 opal_output(0, "sess_dir_cleanup: job session dir does not exist");
 418             } else {
 419                 opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
 420             }
 421         }
 422     }
 423 
 424     if (NULL != orte_process_info.top_session_dir) {
 425         if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
 426             if (orte_debug_flag) {
 427                 opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
 428             }
 429             rmdir(orte_process_info.top_session_dir);
 430         } else {
 431             if (orte_debug_flag) {
 432                 if (OPAL_ERR_NOT_FOUND ==
 433                         opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
 434                     opal_output(0, "sess_dir_cleanup: top session dir does not exist");
 435                 } else {
 436                     opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
 437                 }
 438             }
 439         }
 440     }
 441 
 442     /* now attempt to eliminate the top level directory itself - this
 443      * will fail if anything is present, but ensures we cleanup if
 444      * we are the last one out
 445      */
 446     if( NULL != orte_process_info.top_session_dir ){
 447         opal_os_dirpath_destroy(orte_process_info.top_session_dir,
 448                                 false, orte_dir_check_file);
 449     }
 450 
 451 
 452     return ORTE_SUCCESS;
 453 }
 454 
 455 
 456 int
 457 orte_session_dir_finalize(orte_process_name_t *proc)
 458 {
 459     if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
 460         /* we haven't created them or RM will clean them up for us*/
 461         return ORTE_SUCCESS;
 462     }
 463 
 464     if (NULL == orte_process_info.job_session_dir ||
 465         NULL == orte_process_info.proc_session_dir) {
 466         /* this should never happen - it means we are calling
 467          * cleanup *before* properly setting up the session
 468          * dir system. This leaves open the possibility of
 469          * accidentally removing directories we shouldn't
 470          * touch
 471          */
 472         return ORTE_ERR_NOT_INITIALIZED;
 473     }
 474 
 475     opal_os_dirpath_destroy(orte_process_info.proc_session_dir,
 476                             false, orte_dir_check_file);
 477 
 478     if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) {
 479         if (orte_debug_flag) {
 480             opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
 481         }
 482         rmdir(orte_process_info.proc_session_dir);
 483     } else {
 484         if (orte_debug_flag) {
 485             if (OPAL_ERR_NOT_FOUND ==
 486                     opal_os_dirpath_access(orte_process_info.proc_session_dir, 0)) {
 487                 opal_output(0, "sess_dir_finalize: proc session dir does not exist");
 488             } else {
 489                 opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
 490             }
 491         }
 492     }
 493 
 494     /* special case - if a daemon is colocated with mpirun,
 495      * then we let mpirun do the rest to avoid a race
 496      * condition. this scenario always results in the rank=1
 497      * daemon colocated with mpirun */
 498     if (orte_ras_base.launch_orted_on_hn &&
 499         ORTE_PROC_IS_DAEMON &&
 500         1 == ORTE_PROC_MY_NAME->vpid) {
 501         return ORTE_SUCCESS;
 502     }
 503 
 504     opal_os_dirpath_destroy(orte_process_info.job_session_dir,
 505                             false, orte_dir_check_file);
 506 
 507     /* only remove the jobfam session dir if we are the
 508      * local daemon and we are finalizing our own session dir */
 509     if ((ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) &&
 510         (ORTE_PROC_MY_NAME == proc)) {
 511         opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir,
 512                                 false, orte_dir_check_file);
 513     }
 514 
 515     if( NULL != orte_process_info.top_session_dir ){
 516         opal_os_dirpath_destroy(orte_process_info.top_session_dir,
 517                                 false, orte_dir_check_file);
 518     }
 519 
 520     if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
 521         if (orte_debug_flag) {
 522             opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
 523         }
 524         rmdir(orte_process_info.job_session_dir);
 525     } else {
 526         if (orte_debug_flag) {
 527             if (OPAL_ERR_NOT_FOUND ==
 528                     opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
 529                 opal_output(0, "sess_dir_finalize: job session dir does not exist");
 530             } else {
 531                 opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
 532             }
 533         }
 534     }
 535 
 536     if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
 537         if (orte_debug_flag) {
 538             opal_output(0, "sess_dir_finalize: found jobfam session dir empty - deleting");
 539         }
 540         rmdir(orte_process_info.jobfam_session_dir);
 541     } else {
 542         if (orte_debug_flag) {
 543             if (OPAL_ERR_NOT_FOUND ==
 544                     opal_os_dirpath_access(orte_process_info.jobfam_session_dir, 0)) {
 545                 opal_output(0, "sess_dir_finalize: jobfam session dir does not exist");
 546             } else {
 547                 opal_output(0, "sess_dir_finalize: jobfam session dir not empty - leaving");
 548             }
 549         }
 550     }
 551 
 552     if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
 553         if (orte_debug_flag) {
 554             opal_output(0, "sess_dir_finalize: found jobfam session dir empty - deleting");
 555         }
 556         rmdir(orte_process_info.jobfam_session_dir);
 557     } else {
 558         if (orte_debug_flag) {
 559             if (OPAL_ERR_NOT_FOUND ==
 560                     opal_os_dirpath_access(orte_process_info.jobfam_session_dir, 0)) {
 561                 opal_output(0, "sess_dir_finalize: jobfam session dir does not exist");
 562             } else {
 563                 opal_output(0, "sess_dir_finalize: jobfam session dir not empty - leaving");
 564             }
 565         }
 566     }
 567 
 568     if (NULL != orte_process_info.top_session_dir) {
 569         if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
 570             if (orte_debug_flag) {
 571                 opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
 572             }
 573             rmdir(orte_process_info.top_session_dir);
 574         } else {
 575             if (orte_debug_flag) {
 576                 if (OPAL_ERR_NOT_FOUND ==
 577                         opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
 578                     opal_output(0, "sess_dir_finalize: top session dir does not exist");
 579                 } else {
 580                     opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
 581                 }
 582             }
 583         }
 584     }
 585 
 586     return ORTE_SUCCESS;
 587 }
 588 
 589 static bool
 590 orte_dir_check_file(const char *root, const char *path)
 591 {
 592     struct stat st;
 593     char *fullpath;
 594 
 595     /*
 596      * Keep:
 597      *  - non-zero files starting with "output-"
 598      */
 599     if (0 == strncmp(path, "output-", strlen("output-"))) {
 600         fullpath = opal_os_path(false, &fullpath, root, path, NULL);
 601         stat(fullpath, &st);
 602         free(fullpath);
 603         if (0 == st.st_size) {
 604             return true;
 605         }
 606         return false;
 607     }
 608 
 609     return true;
 610 }

/* [<][>][^][v][top][bottom][index][help] */