root/opal/mca/pmix/pmix4x/pmix/examples/debuggerd.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cbfunc
  2. notification_fn
  3. evhandler_reg_callbk
  4. main

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006-2013 Los Alamos National Security, LLC.
  13  *                         All rights reserved.
  14  * Copyright (c) 2009-2012 Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
  16  * Copyright (c) 2013-2018 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2015      Mellanox Technologies, Inc.  All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  *
  24  */
  25 
  26 #define _GNU_SOURCE
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <time.h>
  32 
  33 #include <pmix_tool.h>
  34 
  35 /* define a structure for collecting returned
  36  * info from a query */
  37 typedef struct {
  38     volatile bool active;
  39     pmix_info_t *info;
  40     size_t ninfo;
  41 } myquery_data_t;
  42 
  43 
  44 static pmix_proc_t myproc;
  45 
  46 /* this is a callback function for the PMIx_Query
  47  * API. The query will callback with a status indicating
  48  * if the request could be fully satisfied, partially
  49  * satisfied, or completely failed. The info parameter
  50  * contains an array of the returned data, with the
  51  * info->key field being the key that was provided in
  52  * the query call. Thus, you can correlate the returned
  53  * data in the info->value field to the requested key.
  54  *
  55  * Once we have dealt with the returned data, we must
  56  * call the release_fn so that the PMIx library can
  57  * cleanup */
  58 static void cbfunc(pmix_status_t status,
  59                    pmix_info_t *info, size_t ninfo,
  60                    void *cbdata,
  61                    pmix_release_cbfunc_t release_fn,
  62                    void *release_cbdata)
  63 {
  64     myquery_data_t *mq = (myquery_data_t*)cbdata;
  65     size_t n;
  66 
  67     /* save the returned info - it will be
  68      * released in the release_fn */
  69     if (0 < ninfo) {
  70         PMIX_INFO_CREATE(mq->info, ninfo);
  71         mq->ninfo = ninfo;
  72         for (n=0; n < ninfo; n++) {
  73             fprintf(stderr, "Transferring %s\n", info[n].key);
  74             PMIX_INFO_XFER(&mq->info[n], &info[n]);
  75         }
  76     }
  77 
  78     /* let the library release the data */
  79     if (NULL != release_fn) {
  80         release_fn(release_cbdata);
  81     }
  82 
  83     /* release the block */
  84     mq->active = false;
  85 }
  86 
  87 /* this is the event notification function we pass down below
  88  * when registering for general events - i.e.,, the default
  89  * handler. We don't technically need to register one, but it
  90  * is usually good practice to catch any events that occur */
  91 static void notification_fn(size_t evhdlr_registration_id,
  92                             pmix_status_t status,
  93                             const pmix_proc_t *source,
  94                             pmix_info_t info[], size_t ninfo,
  95                             pmix_info_t results[], size_t nresults,
  96                             pmix_event_notification_cbfunc_fn_t cbfunc,
  97                             void *cbdata)
  98 {
  99     if (NULL != cbfunc) {
 100         cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
 101     }
 102 }
 103 
 104 /* event handler registration is done asynchronously because it
 105  * may involve the PMIx server registering with the host RM for
 106  * external events. So we provide a callback function that returns
 107  * the status of the request (success or an error), plus a numerical index
 108  * to the registered event. The index is used later on to deregister
 109  * an event handler - if we don't explicitly deregister it, then the
 110  * PMIx server will do so when it see us exit */
 111 static void evhandler_reg_callbk(pmix_status_t status,
 112                                  size_t evhandler_ref,
 113                                  void *cbdata)
 114 {
 115     volatile int *active = (volatile int*)cbdata;
 116 
 117     if (PMIX_SUCCESS != status) {
 118         fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
 119                    myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref);
 120     }
 121     *active = status;
 122 }
 123 
 124 int main(int argc, char **argv)
 125 {
 126     pmix_status_t rc;
 127     pmix_value_t *val;
 128     pmix_proc_t proc;
 129     pmix_info_t *info;
 130     size_t ninfo;
 131     volatile int active;
 132     pmix_query_t *query;
 133     size_t nq, n;
 134     myquery_data_t myquery_data;
 135 
 136 fprintf(stderr, "I AM HERE\n");
 137 fflush(stderr);
 138     sleep(10);
 139     exit(0);
 140 
 141     /* init us - since we were launched by the RM, our connection info
 142      * will have been provided at startup. */
 143     if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
 144         fprintf(stderr, "Debugger daemon ns %s rank %d: PMIx_tool_init failed: %d\n", myproc.nspace, myproc.rank, rc);
 145         exit(0);
 146     }
 147     fprintf(stderr, "Debugger daemon ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
 148 
 149 
 150     /* register our default event handler */
 151     active = -1;
 152     PMIx_Register_event_handler(NULL, 0, NULL, 0,
 153                                 notification_fn, evhandler_reg_callbk, (void*)&active);
 154     while (-1 == active) {
 155         usleep(10);
 156     }
 157     if (0 != active) {
 158         exit(active);
 159     }
 160 
 161     /* get the nspace of the job we are to debug */
 162     (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
 163     proc.rank = PMIX_RANK_WILDCARD;
 164     if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) {
 165         fprintf(stderr, "[%s:%d] Failed to get job being debugged - error %d\n", myproc.nspace, myproc.rank, rc);
 166         goto done;
 167     }
 168     if (NULL == val) {
 169         fprintf(stderr, "Got NULL return\n");
 170         goto done;
 171     }
 172     fprintf(stderr, "[%s:%d] Debugging %s\n", myproc.nspace, myproc.rank, val->data.string);
 173 
 174     /* get our local proctable - for scalability reasons, we don't want to
 175      * have our "root" debugger process get the proctable for everybody and
 176      * send it out to us. So ask the local PMIx server for the pid's of
 177      * our local target processes */
 178     nq = 1;
 179     PMIX_QUERY_CREATE(query, nq);
 180     PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE);
 181     query[0].nqual = 1;
 182     PMIX_INFO_CREATE(query[0].qualifiers, 1);
 183     PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, val->data.string, PMIX_STRING);  // the nspace we are enquiring about
 184     /* setup the caddy to retrieve the data */
 185     myquery_data.info = NULL;
 186     myquery_data.ninfo = 0;
 187     myquery_data.active = true;
 188     /* execute the query */
 189     if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) {
 190         fprintf(stderr, "PMIx_Query_info failed: %d\n", rc);
 191         goto done;
 192     }
 193     while (myquery_data.active) {
 194         usleep(10);
 195     }
 196     fprintf(stderr, "[%s:%d] Local proctable received\n", myproc.nspace, myproc.rank);
 197 
 198 
 199     /* now that we have the proctable for our local processes, we can do our
 200      * magic debugger stuff and attach to them. We then send a "release" event
 201      * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We
 202      * do this with the event notification system */
 203     (void)strncpy(proc.nspace, val->data.string, PMIX_MAX_NSLEN);
 204     proc.rank = PMIX_RANK_WILDCARD;
 205     /* we send the notification to just the local procs of the job being debugged */
 206     ninfo = 1;
 207     PMIX_INFO_CREATE(info, ninfo);
 208     PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC);  // deliver to the target nspace
 209     fprintf(stderr, "[%s:%u] Sending release\n", myproc.nspace, myproc.rank);
 210     PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE,
 211                       NULL, PMIX_RANGE_LOCAL,
 212                       info, ninfo, NULL, NULL);
 213 
 214     /* do some debugger magic */
 215     n = 0;
 216     fprintf(stderr, "[%s:%u] Hanging around awhile, doing debugger magic\n", myproc.nspace, myproc.rank);
 217     while (n < 5) {
 218         usleep(1000);
 219         ++n;
 220     }
 221 
 222   done:
 223     /* finalize us */
 224     fprintf(stderr, "Debugger daemon ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
 225     if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
 226         fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
 227     } else {
 228         fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
 229     }
 230     fflush(stderr);
 231     return(0);
 232 }

/* [<][>][^][v][top][bottom][index][help] */