root/orte/runtime/orte_wait.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2008      Institut National de Recherche en Informatique
  13  *                         et Automatique. All rights reserved.
  14  * Copyright (c) 2011      Los Alamos National Security, LLC.
  15  *                         All rights reserved.
  16  * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 /**
  25  * @file
  26  *
  27  * Interface for waitpid / async notification of child death with the
  28  * libevent runtime system.
  29  */
  30 #ifndef ORTE_WAIT_H
  31 #define ORTE_WAIT_H
  32 
  33 #include "orte_config.h"
  34 
  35 #ifdef HAVE_SYS_TYPES_H
  36 #include <sys/types.h>
  37 #endif
  38 #include <time.h>
  39 #if HAVE_SYS_TIME_H
  40 #include <sys/time.h>
  41 #endif
  42 
  43 #include "opal/dss/dss.h"
  44 #include "opal/util/output.h"
  45 #include "opal/sys/atomic.h"
  46 #include "opal/mca/event/event.h"
  47 
  48 #include "orte/types.h"
  49 #include "orte/mca/rml/rml_types.h"
  50 #include "orte/runtime/orte_globals.h"
  51 #include "orte/util/threads.h"
  52 
  53 BEGIN_C_DECLS
  54 
  55 /** typedef for callback function used in \c orte_wait_cb */
  56 typedef void (*orte_wait_cbfunc_t)(int fd, short args, void* cb);
  57 
  58 /* define a tracker */
  59 typedef struct {
  60     opal_list_item_t super;
  61     opal_event_t ev;
  62     opal_event_base_t *evb;
  63     orte_proc_t *child;
  64     orte_wait_cbfunc_t cbfunc;
  65     void *cbdata;
  66 } orte_wait_tracker_t;
  67 OBJ_CLASS_DECLARATION(orte_wait_tracker_t);
  68 
  69 /**
  70  * Disable / re-Enable SIGCHLD handler
  71  *
  72  * These functions have to be used after orte_wait_init was called.
  73  */
  74 
  75 ORTE_DECLSPEC void orte_wait_enable(void);
  76 ORTE_DECLSPEC void orte_wait_disable(void);
  77 
  78 /**
  79  * Register a callback for process termination
  80  *
  81  * Register a callback for notification when this process causes a SIGCHLD.
  82  * \c waitpid() will have already been called on the process at this
  83  * time.
  84  */
  85 ORTE_DECLSPEC void orte_wait_cb(orte_proc_t *proc, orte_wait_cbfunc_t callback,
  86                                 opal_event_base_t *evb, void *data);
  87 
  88 ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc);
  89 
  90 
  91 /* In a few places, we need to barrier until something happens
  92  * that changes a flag to indicate we can release - e.g., waiting
  93  * for a specific message to arrive. If no progress thread is running,
  94  * we cycle across opal_progress - however, if a progress thread
  95  * is active, then we need to just nanosleep to avoid cross-thread
  96  * confusion
  97  */
  98 #define ORTE_WAIT_FOR_COMPLETION(flg)                                   \
  99     do {                                                                \
 100         opal_output_verbose(1, orte_progress_thread_debug,              \
 101                             "%s waiting on progress thread at %s:%d",   \
 102                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),         \
 103                             __FILE__, __LINE__);                        \
 104         while ((flg)) {                                                 \
 105             /* provide a short quiet period so we                       \
 106              * don't hammer the cpu while waiting                       \
 107              */                                                         \
 108             struct timespec tp = {0, 100000};                           \
 109             nanosleep(&tp, NULL);                                       \
 110         }                                                               \
 111         ORTE_ACQUIRE_OBJECT(flg);                                       \
 112     }while(0);
 113 
 114 /**
 115  * In a number of places within the code, we want to setup a timer
 116  * to detect when some procedure failed to complete. For example,
 117  * when we launch the daemons, we frequently have no way to directly
 118  * detect that a daemon failed to launch. Setting a timer allows us
 119  * to automatically fail out of the launch if we don't hear from a
 120  * daemon in some specified time window.
 121  *
 122  * Computing the amount of time to wait takes a few lines of code, but
 123  * this macro encapsulates those lines along with the timer event
 124  * definition just as a convenience. It also centralizes the
 125  * necessary checks to ensure that the microsecond field is always
 126  * less than 1M since some systems care about that, and to ensure
 127  * that the computed wait time doesn't exceed the desired max
 128  * wait
 129  *
 130  * NOTE: the callback function is responsible for releasing the timer
 131  * event back to the event pool!
 132  */
 133 #define ORTE_DETECT_TIMEOUT(n, deltat, maxwait, cbfunc, cbd)                \
 134     do {                                                                    \
 135         orte_timer_t *tmp;                                                  \
 136         int timeout;                                                        \
 137         tmp =  OBJ_NEW(orte_timer_t);                                       \
 138         tmp->payload = (cbd);                                               \
 139         opal_event_evtimer_set(orte_event_base,                             \
 140                                tmp->ev, (cbfunc), tmp);                     \
 141         opal_event_set_priority(tmp->ev, ORTE_ERROR_PRI);                   \
 142         timeout = (deltat) * (n);                                           \
 143         if ((maxwait) > 0 && timeout > (maxwait)) {                         \
 144             timeout = (maxwait);                                            \
 145         }                                                                   \
 146         tmp->tv.tv_sec = timeout/1000000;                                   \
 147         tmp->tv.tv_usec = timeout%1000000;                                  \
 148         OPAL_OUTPUT_VERBOSE((1, orte_debug_output,                          \
 149                              "defining timeout: %ld sec %ld usec at %s:%d", \
 150                             (long)tmp->tv.tv_sec, (long)tmp->tv.tv_usec,    \
 151                             __FILE__, __LINE__));                           \
 152         ORTE_POST_OBJECT(tmp);                                              \
 153         opal_event_evtimer_add(tmp->ev, &tmp->tv);                          \
 154     }while(0);                                                              \
 155 
 156 
 157 /**
 158  * There are places in the code where we just want to periodically
 159  * wakeup to do something, and then go back to sleep again. Setting
 160  * a timer allows us to do this
 161  *
 162  * NOTE: the callback function is responsible for releasing the timer
 163  * event back to the event pool when done! Otherwise, the finalize
 164  * function will take care of it.
 165  */
 166 #define ORTE_TIMER_EVENT(sec, usec, cbfunc, pri)                                \
 167     do {                                                                        \
 168         orte_timer_t *tm;                                                       \
 169         tm = OBJ_NEW(orte_timer_t);                                             \
 170         opal_event_evtimer_set(orte_event_base,                                 \
 171                                tm->ev, (cbfunc), tm);                           \
 172         opal_event_set_priority(tm->ev, (pri));                                 \
 173         tm->tv.tv_sec = (sec) + (usec)/1000000;                                 \
 174         tm->tv.tv_usec = (usec) % 1000000;                                      \
 175         OPAL_OUTPUT_VERBOSE((1, orte_debug_output,                              \
 176                              "defining timer event: %ld sec %ld usec at %s:%d", \
 177                              (long)tm->tv.tv_sec, (long)tm->tv.tv_usec,         \
 178                              __FILE__, __LINE__));                              \
 179         ORTE_POST_OBJECT(tm);                                                   \
 180         opal_event_evtimer_add(tm->ev, &tm->tv);                                \
 181     }while(0);                                                                  \
 182 
 183 
 184 /**
 185  * \internal
 186  *
 187  * Initialize the wait system (allocate mutexes, etc.)
 188  */
 189 ORTE_DECLSPEC int orte_wait_init(void);
 190 
 191 /**
 192  * \internal
 193  *
 194  * Finalize the wait system (deallocate mutexes, etc.)
 195  */
 196 ORTE_DECLSPEC int orte_wait_finalize(void);
 197 
 198 END_C_DECLS
 199 
 200 #endif /* #ifndef ORTE_WAIT_H */

/* [<][>][^][v][top][bottom][index][help] */