root/opal/mca/pmix/pmix4x/pmix/examples/fault.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. notification_fn
  2. op_callbk
  3. evhandler_reg_callbk
  4. main

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006-2013 Los Alamos National Security, LLC.
  13  *                         All rights reserved.
  14  * Copyright (c) 2009-2012 Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
  16  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2015      Mellanox Technologies, Inc.  All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  *
  24  */
  25 
  26 #include <stdbool.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <unistd.h>
  30 #include <time.h>
  31 #include <pthread.h>
  32 
  33 #include <pmix.h>
  34 #include "examples.h"
  35 
  36 static pmix_proc_t myproc;
  37 
  38 static void notification_fn(size_t evhdlr_registration_id,
  39                             pmix_status_t status,
  40                             const pmix_proc_t *source,
  41                             pmix_info_t info[], size_t ninfo,
  42                             pmix_info_t results[], size_t nresults,
  43                             pmix_event_notification_cbfunc_fn_t cbfunc,
  44                             void *cbdata)
  45 {
  46     myrel_t *lock;
  47     bool found;
  48     int exit_code;
  49     size_t n;
  50     pmix_proc_t *affected = NULL;
  51 
  52     /* find our return object */
  53     lock = NULL;
  54     found = false;
  55     for (n=0; n < ninfo; n++) {
  56         if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) {
  57             lock = (myrel_t*)info[n].value.data.ptr;
  58             /* not every RM will provide an exit code, but check if one was given */
  59         } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) {
  60             exit_code = info[n].value.data.integer;
  61             found = true;
  62         } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) {
  63             affected = info[n].value.data.proc;
  64         }
  65     }
  66     /* if the object wasn't returned, then that is an error */
  67     if (NULL == lock) {
  68         fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n");
  69         /* let the event handler progress */
  70         if (NULL != cbfunc) {
  71             cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata);
  72         }
  73         return;
  74     }
  75 
  76     /* tell the event handler state machine that we are the last step */
  77     if (NULL != cbfunc) {
  78         cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
  79     }
  80     fprintf(stderr, "DEBUGGER DAEMON NOTIFIED TERMINATED - AFFECTED %s\n",
  81             (NULL == affected) ? "NULL" : affected->nspace);
  82 
  83     if (found) {
  84         lock->exit_code = exit_code;
  85         lock->exit_code_given = true;
  86     }
  87     DEBUG_WAKEUP_THREAD(&lock->lock);
  88 }
  89 
  90 static void op_callbk(pmix_status_t status,
  91                       void *cbdata)
  92 {
  93     mylock_t *lock = (mylock_t*)cbdata;
  94     fprintf(stderr, "Client %s:%d OP CALLBACK CALLED WITH STATUS %d\n", myproc.nspace, myproc.rank, status);
  95     DEBUG_WAKEUP_THREAD(lock);
  96 }
  97 
  98 static void evhandler_reg_callbk(pmix_status_t status,
  99                                   size_t errhandler_ref,
 100                                   void *cbdata)
 101 {
 102     mylock_t *lock = (mylock_t*)cbdata;
 103 
 104     fprintf(stderr, "Client %s:%d ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu\n",
 105                myproc.nspace, myproc.rank, status, (unsigned long)errhandler_ref);
 106     DEBUG_WAKEUP_THREAD(lock);
 107 }
 108 
 109 int main(int argc, char **argv)
 110 {
 111     int rc;
 112     pmix_value_t value;
 113     pmix_value_t *val = &value;
 114     pmix_proc_t proc;
 115     uint32_t nprocs;
 116     pmix_info_t *info;
 117     mylock_t mylock;
 118     myrel_t myrel;
 119     pmix_status_t code[2] = {PMIX_ERR_PROC_ABORTED, PMIX_ERR_JOB_TERMINATED};
 120 
 121     /* init us */
 122     if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
 123         fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc);
 124         exit(0);
 125     }
 126     fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
 127 
 128     PMIX_PROC_CONSTRUCT(&proc);
 129     (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
 130     proc.rank = PMIX_RANK_WILDCARD;
 131 
 132     /* get our universe size */
 133     if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) {
 134         fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc);
 135         goto done;
 136     }
 137     nprocs = val->data.uint32;
 138     PMIX_VALUE_RELEASE(val);
 139     fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs);
 140 
 141     /* register another handler specifically for when the target
 142      * job completes */
 143     DEBUG_CONSTRUCT_MYREL(&myrel);
 144     PMIX_INFO_CREATE(info, 2);
 145     PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER);
 146     /* only call me back when one of us terminates */
 147     PMIX_INFO_LOAD(&info[1], PMIX_NSPACE, myproc.nspace, PMIX_STRING);
 148 
 149     DEBUG_CONSTRUCT_LOCK(&mylock);
 150     PMIx_Register_event_handler(code, 2, info, 2,
 151                                 notification_fn, evhandler_reg_callbk, (void*)&mylock);
 152     DEBUG_WAIT_THREAD(&mylock);
 153     if (PMIX_SUCCESS != mylock.status) {
 154         rc = mylock.status;
 155         DEBUG_DESTRUCT_LOCK(&mylock);
 156         PMIX_INFO_FREE(info, 2);
 157         goto done;
 158     }
 159     DEBUG_DESTRUCT_LOCK(&mylock);
 160     PMIX_INFO_FREE(info, 2);
 161 
 162     /* call fence to sync */
 163     PMIX_PROC_CONSTRUCT(&proc);
 164     (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
 165     proc.rank = PMIX_RANK_WILDCARD;
 166     if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) {
 167         fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc);
 168         goto done;
 169     }
 170 
 171     /* rank=0 calls abort */
 172     if (0 == myproc.rank) {
 173         sleep(2);
 174         fprintf(stderr, "Client ns %s rank %d: exiting with error\n", myproc.nspace, myproc.rank);
 175         exit(1);
 176     }
 177     /* everyone simply waits */
 178     DEBUG_WAIT_THREAD(&myrel.lock);
 179     DEBUG_DESTRUCT_MYREL(&myrel);
 180 
 181  done:
 182     /* finalize us */
 183     fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
 184     DEBUG_CONSTRUCT_LOCK(&mylock);
 185     PMIx_Deregister_event_handler(1, op_callbk, &mylock);
 186     DEBUG_WAIT_THREAD(&mylock);
 187     DEBUG_DESTRUCT_LOCK(&mylock);
 188 
 189     if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
 190         fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
 191     } else {
 192         fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
 193     }
 194     fflush(stderr);
 195     return(0);
 196 }

/* [<][>][^][v][top][bottom][index][help] */