root/opal/mca/pmix/pmix4x/pmix/examples/jctrl.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. notification_fn
  2. evhandler_reg_callbk
  3. infocbfunc
  4. main

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006-2013 Los Alamos National Security, LLC.
  13  *                         All rights reserved.
  14  * Copyright (c) 2009-2012 Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
  16  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2015      Mellanox Technologies, Inc.  All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  *
  24  */
  25 
  26 #define _GNU_SOURCE
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <time.h>
  32 #include <signal.h>
  33 
  34 #include <pmix.h>
  35 #include "examples.h"
  36 
  37 static pmix_proc_t myproc;
  38 
  39 /* this is the event notification function we pass down below
  40  * when registering for general events - i.e.,, the default
  41  * handler. We don't technically need to register one, but it
  42  * is usually good practice to catch any events that occur */
  43 static void notification_fn(size_t evhdlr_registration_id,
  44                             pmix_status_t status,
  45                             const pmix_proc_t *source,
  46                             pmix_info_t info[], size_t ninfo,
  47                             pmix_info_t results[], size_t nresults,
  48                             pmix_event_notification_cbfunc_fn_t cbfunc,
  49                             void *cbdata)
  50 {
  51     if (NULL != cbfunc) {
  52         cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
  53     }
  54 }
  55 
  56 /* event handler registration is done asynchronously because it
  57  * may involve the PMIx server registering with the host RM for
  58  * external events. So we provide a callback function that returns
  59  * the status of the request (success or an error), plus a numerical index
  60  * to the registered event. The index is used later on to deregister
  61  * an event handler - if we don't explicitly deregister it, then the
  62  * PMIx server will do so when it see us exit */
  63 static void evhandler_reg_callbk(pmix_status_t status,
  64                                  size_t evhandler_ref,
  65                                  void *cbdata)
  66 {
  67     mylock_t *lock = (mylock_t*)cbdata;
  68 
  69     if (PMIX_SUCCESS != status) {
  70         fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
  71                    myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref);
  72     }
  73     lock->status = status;
  74     lock->evhandler_ref = evhandler_ref;
  75     DEBUG_WAKEUP_THREAD(lock);
  76 }
  77 
  78 static void infocbfunc(pmix_status_t status,
  79                        pmix_info_t *info, size_t ninfo,
  80                        void *cbdata,
  81                        pmix_release_cbfunc_t release_fn,
  82                        void *release_cbdata)
  83 {
  84     mylock_t *lock = (mylock_t*)cbdata;
  85 
  86     /* release the caller */
  87     if (NULL != release_fn) {
  88         release_fn(release_cbdata);
  89     }
  90 
  91     lock->status = status;
  92     DEBUG_WAKEUP_THREAD(lock);
  93 }
  94 
  95 int main(int argc, char **argv)
  96 {
  97     pmix_status_t rc;
  98     pmix_value_t value;
  99     pmix_value_t *val = &value;
 100     pmix_proc_t proc;
 101     uint32_t nprocs, n;
 102     pmix_info_t *info, *iptr;
 103     bool flag;
 104     mylock_t mylock;
 105     pmix_data_array_t *dptr;
 106 
 107     /* init us - note that the call to "init" includes the return of
 108      * any job-related info provided by the RM. */
 109     if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
 110         fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc);
 111         exit(0);
 112     }
 113     fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
 114 
 115 
 116     /* register our default event handler - again, this isn't strictly
 117      * required, but is generally good practice */
 118     DEBUG_CONSTRUCT_LOCK(&mylock);
 119     PMIx_Register_event_handler(NULL, 0, NULL, 0,
 120                                 notification_fn, evhandler_reg_callbk, (void*)&mylock);
 121     /* wait for registration to complete */
 122     DEBUG_WAIT_THREAD(&mylock);
 123     rc = mylock.status;
 124     DEBUG_DESTRUCT_LOCK(&mylock);
 125     if (PMIX_SUCCESS != rc) {
 126         fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank);
 127         goto done;
 128     }
 129 
 130     /* job-related info is found in our nspace, assigned to the
 131      * wildcard rank as it doesn't relate to a specific rank. Setup
 132      * a name to retrieve such values */
 133     PMIX_PROC_CONSTRUCT(&proc);
 134     (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
 135     proc.rank = PMIX_RANK_WILDCARD;
 136 
 137     /* get our universe size */
 138     if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) {
 139         fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc);
 140         goto done;
 141     }
 142     nprocs = val->data.uint32;
 143     PMIX_VALUE_RELEASE(val);
 144     fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs);
 145 
 146     /* inform the RM that we are preemptible, and that our checkpoint methods are
 147      * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */
 148     PMIX_INFO_CREATE(info, 2);
 149     flag = true;
 150     PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL);
 151     /* can't use "load" to load a pmix_data_array_t */
 152     (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN);
 153     PMIX_DATA_ARRAY_CREATE(info[1].value.data.darray, 2, PMIX_INFO);
 154     dptr = info[1].value.data.darray;
 155     rc = SIGUSR2;
 156     iptr = (pmix_info_t*)dptr->array;
 157     PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT);
 158     rc = PMIX_JCTRL_CHECKPOINT;
 159     PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS);
 160 
 161     /* since this is informational and not a requested operation, the target parameter
 162      * doesn't mean anything and can be ignored */
 163     DEBUG_CONSTRUCT_LOCK(&mylock);
 164     if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&mylock))) {
 165         fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
 166         DEBUG_DESTRUCT_LOCK(&mylock);
 167         goto done;
 168     }
 169     DEBUG_WAIT_THREAD(&mylock);
 170     PMIX_INFO_FREE(info, 2);
 171     rc = mylock.status;
 172     DEBUG_DESTRUCT_LOCK(&mylock);
 173     if (PMIX_SUCCESS != rc) {
 174         fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
 175         goto done;
 176     }
 177 
 178     /* now request that this process be monitored using heartbeats */
 179     PMIX_INFO_CREATE(iptr, 1);
 180     PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER);
 181 
 182     PMIX_INFO_CREATE(info, 3);
 183     PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING);
 184     n = 5;  // require a heartbeat every 5 seconds
 185     PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32);
 186     n = 2;  // two heartbeats can be missed before declaring us "stalled"
 187     PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32);
 188 
 189     /* make the request */
 190     DEBUG_CONSTRUCT_LOCK(&mylock);
 191     if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT,
 192                                                       info, 3, infocbfunc, (void*)&mylock))) {
 193         fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
 194         DEBUG_DESTRUCT_LOCK(&mylock);
 195         goto done;
 196     }
 197     DEBUG_WAIT_THREAD(&mylock);
 198     PMIX_INFO_FREE(iptr, 1);
 199     PMIX_INFO_FREE(info, 3);
 200     rc = mylock.status;
 201     DEBUG_DESTRUCT_LOCK(&mylock);
 202     if (PMIX_SUCCESS != rc) {
 203         fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
 204         goto done;
 205     }
 206 
 207     /* send a heartbeat */
 208     PMIx_Heartbeat();
 209 
 210     /* call fence to synchronize with our peers - no need to
 211      * collect any info as we didn't "put" anything */
 212     PMIX_INFO_CREATE(info, 1);
 213     flag = false;
 214     PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
 215     if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) {
 216         fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc);
 217         goto done;
 218     }
 219     PMIX_INFO_FREE(info, 1);
 220 
 221 
 222   done:
 223     /* finalize us */
 224     fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
 225     if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
 226         fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
 227     } else {
 228         fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
 229     }
 230     fflush(stderr);
 231     return(0);
 232 }

/* [<][>][^][v][top][bottom][index][help] */