This source file includes following definitions.
- cbfunc
- notification_fn
- release_fn
- evhandler_reg_callbk
- spawn_debugger
- main
- attach_to_running_job
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 #define _GNU_SOURCE
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <unistd.h>
  30 #include <time.h>
  31 #include <pthread.h>
  32 
  33 #include <pmix_tool.h>
  34 
  35 typedef struct {
  36     pthread_mutex_t mutex;
  37     pthread_cond_t cond;
  38     volatile bool active;
  39     pmix_status_t status;
  40 } mylock_t;
  41 
  42 #define DEBUG_CONSTRUCT_LOCK(l)                     \
  43     do {                                            \
  44         pthread_mutex_init(&(l)->mutex, NULL);      \
  45         pthread_cond_init(&(l)->cond, NULL);        \
  46         (l)->active = true;                         \
  47         (l)->status = PMIX_SUCCESS;                 \
  48     } while(0)
  49 
  50 #define DEBUG_DESTRUCT_LOCK(l)              \
  51     do {                                    \
  52         pthread_mutex_destroy(&(l)->mutex); \
  53         pthread_cond_destroy(&(l)->cond);   \
  54     } while(0)
  55 
  56 #define DEBUG_WAIT_THREAD(lck)                                      \
  57     do {                                                            \
  58         pthread_mutex_lock(&(lck)->mutex);                          \
  59         while ((lck)->active) {                                     \
  60             pthread_cond_wait(&(lck)->cond, &(lck)->mutex);         \
  61         }                                                           \
  62         pthread_mutex_unlock(&(lck)->mutex);                        \
  63     } while(0)
  64 
  65 #define DEBUG_WAKEUP_THREAD(lck)                        \
  66     do {                                                \
  67         pthread_mutex_lock(&(lck)->mutex);              \
  68         (lck)->active = false;                          \
  69         pthread_cond_broadcast(&(lck)->cond);           \
  70         pthread_mutex_unlock(&(lck)->mutex);            \
  71     } while(0)
  72 
  73 
  74 
  75 typedef struct {
  76     mylock_t lock;
  77     pmix_info_t *info;
  78     size_t ninfo;
  79 } myquery_data_t;
  80 
  81 static int attach_to_running_job(char *nspace);
  82 static mylock_t waiting_for_debugger;
  83 static pmix_proc_t myproc;
  84 
  85 
  86 
  87 
  88 
  89 
  90 
  91 
  92 
  93 
  94 
  95 
  96 
  97 static void cbfunc(pmix_status_t status,
  98                    pmix_info_t *info, size_t ninfo,
  99                    void *cbdata,
 100                    pmix_release_cbfunc_t release_fn,
 101                    void *release_cbdata)
 102 {
 103     myquery_data_t *mq = (myquery_data_t*)cbdata;
 104     size_t n;
 105 
 106     
 107 
 108 
 109     if (0 < ninfo) {
 110         PMIX_INFO_CREATE(mq->info, ninfo);
 111         mq->ninfo = ninfo;
 112         for (n=0; n < ninfo; n++) {
 113             fprintf(stderr, "Transferring %s\n", info[n].key);
 114             PMIX_INFO_XFER(&mq->info[n], &info[n]);
 115         }
 116     }
 117 
 118     
 119 
 120     if (NULL != release_fn) {
 121         release_fn(release_cbdata);
 122     }
 123 
 124     
 125     DEBUG_WAKEUP_THREAD(&mq->lock);
 126 }
 127 
 128 
 129 
 130 
 131 
 132 static void notification_fn(size_t evhdlr_registration_id,
 133                             pmix_status_t status,
 134                             const pmix_proc_t *source,
 135                             pmix_info_t info[], size_t ninfo,
 136                             pmix_info_t results[], size_t nresults,
 137                             pmix_event_notification_cbfunc_fn_t cbfunc,
 138                             void *cbdata)
 139 {
 140     
 141     if (NULL != cbfunc) {
 142         cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
 143     }
 144 }
 145 
 146 
 147 
 148 
 149 
 150 
 151 
 152 
 153 static void release_fn(size_t evhdlr_registration_id,
 154                        pmix_status_t status,
 155                        const pmix_proc_t *source,
 156                        pmix_info_t info[], size_t ninfo,
 157                        pmix_info_t results[], size_t nresults,
 158                        pmix_event_notification_cbfunc_fn_t cbfunc,
 159                        void *cbdata)
 160 {
 161     
 162     if (NULL != cbfunc) {
 163         cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
 164     }
 165     
 166     DEBUG_WAKEUP_THREAD(&waiting_for_debugger);
 167 }
 168 
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176 static void evhandler_reg_callbk(pmix_status_t status,
 177                                  size_t evhandler_ref,
 178                                  void *cbdata)
 179 {
 180     mylock_t *lock = (mylock_t*)cbdata;
 181 
 182     if (PMIX_SUCCESS != status) {
 183         fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
 184                    myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref);
 185     }
 186     lock->status = status;
 187     DEBUG_WAKEUP_THREAD(lock);
 188 }
 189 
 190 static pmix_status_t spawn_debugger(char *appspace)
 191 {
 192     pmix_status_t rc;
 193     pmix_info_t *dinfo;
 194     pmix_app_t *debugger;
 195     size_t dninfo;
 196     char cwd[1024];
 197     char dspace[PMIX_MAX_NSLEN+1];
 198 
 199     
 200     PMIX_APP_CREATE(debugger, 1);
 201     debugger[0].cmd = strdup("./debuggerd");
 202     PMIX_ARGV_APPEND(rc, debugger[0].argv, "./debuggerd");
 203     getcwd(cwd, 1024);  
 204     debugger[0].cwd = strdup(cwd);
 205     
 206 
 207     dninfo = 5;
 208     PMIX_INFO_CREATE(dinfo, dninfo);
 209     PMIX_INFO_LOAD(&dinfo[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING);  
 210     PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); 
 211     PMIX_INFO_LOAD(&dinfo[2], PMIX_DEBUG_JOB, appspace, PMIX_STRING); 
 212     PMIX_INFO_LOAD(&dinfo[3], PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); 
 213     PMIX_INFO_LOAD(&dinfo[4], PMIX_DEBUG_WAITING_FOR_NOTIFY, NULL, PMIX_BOOL);  
 214     
 215     fprintf(stderr, "Debugger: spawning %s\n", debugger[0].cmd);
 216     if (PMIX_SUCCESS != (rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, dspace))) {
 217         fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", PMIx_Error_string(rc));
 218     }
 219     fprintf(stderr, "SPAWNED DEBUGGERD\n");
 220     
 221     PMIX_INFO_FREE(dinfo, dninfo);
 222     PMIX_APP_FREE(debugger, 1);
 223 
 224     return rc;
 225 }
 226 
 227 #define DBGR_LOOP_LIMIT  10
 228 
 229 int main(int argc, char **argv)
 230 {
 231     pmix_status_t rc;
 232     pmix_info_t *info;
 233     pmix_app_t *app;
 234     size_t ninfo, napps;
 235     char *nspace = NULL;
 236     char appspace[PMIX_MAX_NSLEN+1];
 237     int i;
 238     pmix_query_t *query;
 239     size_t nq, n;
 240     myquery_data_t myquery_data;
 241     bool cospawn = false, stop_on_exec = false;
 242     char cwd[1024];
 243     pmix_status_t code = PMIX_ERR_JOB_TERMINATED;
 244     mylock_t mylock;
 245 
 246     
 247     for (i=1; i < argc; i++) {
 248         if (0 == strcmp(argv[i], "-h") ||
 249             0 == strcmp(argv[i], "--help")) {
 250             
 251 
 252         }
 253         if (0 == strcmp(argv[i], "-a") ||
 254             0 == strcmp(argv[i], "--attach")) {
 255             if (NULL != nspace) {
 256                 
 257                 fprintf(stderr, "Cannot attach to more than one nspace\n");
 258                 exit(1);
 259             }
 260             
 261             ++i;
 262             if (argc == i) {
 263                 
 264                 fprintf(stderr, "The %s option requires an <nspace> argument\n", argv[i]);
 265                 exit(1);
 266             }
 267             nspace = strdup(argv[i]);
 268         } else {
 269             fprintf(stderr, "Unknown option: %s\n", argv[i]);
 270             exit(1);
 271         }
 272     }
 273     info = NULL;
 274     ninfo = 0;
 275 
 276     DEBUG_CONSTRUCT_LOCK(&waiting_for_debugger);
 277 
 278     
 279     PMIX_INFO_CREATE(info, 1);
 280     PMIX_INFO_LOAD(&info[0], PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL);
 281     
 282     if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) {
 283         fprintf(stderr, "PMIx_tool_init failed: %d\n", rc);
 284         exit(rc);
 285     }
 286     PMIX_INFO_FREE(info, ninfo);
 287 
 288     fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
 289 
 290     
 291     DEBUG_CONSTRUCT_LOCK(&mylock);
 292     PMIx_Register_event_handler(NULL, 0, NULL, 0,
 293                                 notification_fn, evhandler_reg_callbk, (void*)&mylock);
 294     DEBUG_WAIT_THREAD(&mylock);
 295     DEBUG_DESTRUCT_LOCK(&mylock);
 296 
 297     
 298 
 299     DEBUG_CONSTRUCT_LOCK(&mylock);
 300     PMIx_Register_event_handler(&code, 1, NULL, 0,
 301                                 release_fn, evhandler_reg_callbk, (void*)&mylock);
 302     DEBUG_WAIT_THREAD(&mylock);
 303     DEBUG_DESTRUCT_LOCK(&mylock);
 304 
 305     
 306     if (NULL != nspace) {
 307         if (PMIX_SUCCESS != (rc = attach_to_running_job(nspace))) {
 308             fprintf(stderr, "Failed to attach to nspace %s: error code %d\n",
 309                     nspace, rc);
 310             goto done;
 311         }
 312     } else {
 313         
 314 
 315 
 316 
 317 
 318 
 319 
 320 
 321         nq = 1;
 322         PMIX_QUERY_CREATE(query, nq);
 323         PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_SPAWN_SUPPORT);
 324         PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_DEBUG_SUPPORT);
 325         
 326         DEBUG_CONSTRUCT_LOCK(&myquery_data.lock);
 327         myquery_data.info = NULL;
 328         myquery_data.ninfo = 0;
 329         
 330         fprintf(stderr, "Debugger: querying capabilities\n");
 331         if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) {
 332             fprintf(stderr, "PMIx_Query_info failed: %d\n", rc);
 333             goto done;
 334         }
 335         DEBUG_WAIT_THREAD(&myquery_data.lock);
 336         DEBUG_DESTRUCT_LOCK(&myquery_data.lock);
 337 
 338         
 339 
 340 
 341 
 342         if (2 != myquery_data.ninfo) {
 343             
 344             fprintf(stderr, "PMIx Query returned an incorrect number of results: %lu\n", myquery_data.ninfo);
 345             PMIX_INFO_FREE(myquery_data.info, myquery_data.ninfo);
 346             goto done;
 347         }
 348 
 349         
 350 
 351 
 352 
 353 
 354 
 355 
 356 
 357 
 358 
 359 
 360 
 361         for (n=0; n < myquery_data.ninfo; n++) {
 362             if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_SPAWN_SUPPORT)) {
 363                 
 364                 if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_COSPAWN_APP)) {
 365                     cospawn = true;
 366                 } else {
 367                     cospawn = false;
 368                 }
 369             } else if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_DEBUG_SUPPORT)) {
 370                 if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_DEBUG_STOP_ON_EXEC)) {
 371                     stop_on_exec = true;
 372                 } else {
 373                     stop_on_exec = false;
 374                 }
 375             }
 376         }
 377 
 378         
 379 
 380         if (cospawn) {
 381 
 382         } else {
 383             
 384             napps = 1;
 385             PMIX_APP_CREATE(app, napps);
 386             
 387             app[0].cmd = strdup("client");
 388             PMIX_ARGV_APPEND(rc, app[0].argv, "./client");
 389             getcwd(cwd, 1024);  
 390             app[0].cwd = strdup(cwd);
 391             app[0].maxprocs = 2;
 392             
 393             ninfo = 4;
 394             PMIX_INFO_CREATE(info, ninfo);
 395             PMIX_INFO_LOAD(&info[0], PMIX_MAPBY, "slot", PMIX_STRING);  
 396             if (stop_on_exec) {
 397                 PMIX_INFO_LOAD(&info[1], PMIX_DEBUG_STOP_ON_EXEC, NULL, PMIX_BOOL);  
 398             } else {
 399                 PMIX_INFO_LOAD(&info[1], PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL);  
 400             }
 401             PMIX_INFO_LOAD(&info[2], PMIX_FWD_STDOUT, NULL, PMIX_BOOL);  
 402             PMIX_INFO_LOAD(&info[3], PMIX_FWD_STDERR, NULL, PMIX_BOOL);  
 403 
 404             
 405 
 406             fprintf(stderr, "Debugger: spawning %s\n", app[0].cmd);
 407             if (PMIX_SUCCESS != (rc = PMIx_Spawn(info, ninfo, app, napps, appspace))) {
 408                 fprintf(stderr, "Application failed to launch with error: %s(%d)\n", PMIx_Error_string(rc), rc);
 409                 goto done;
 410             }
 411             PMIX_INFO_FREE(info, ninfo);
 412             PMIX_APP_FREE(app, napps);
 413 
 414             
 415             if (PMIX_SUCCESS != (rc = spawn_debugger(appspace))) {
 416                 goto done;
 417             }
 418         }
 419 
 420 
 421         
 422         DEBUG_WAIT_THREAD(&waiting_for_debugger);
 423     }
 424 
 425   done:
 426     DEBUG_DESTRUCT_LOCK(&waiting_for_debugger);
 427     PMIx_tool_finalize();
 428 
 429     return(rc);
 430 }
 431 
 432 static int attach_to_running_job(char *nspace)
 433 {
 434     pmix_status_t rc;
 435     pmix_proc_t myproc;
 436     pmix_query_t *query;
 437     size_t nq;
 438     myquery_data_t *q;
 439 
 440     
 441 
 442     nq = 1;
 443     PMIX_QUERY_CREATE(query, nq);
 444     PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACES);
 445 
 446     q = (myquery_data_t*)malloc(sizeof(myquery_data_t));
 447     DEBUG_CONSTRUCT_LOCK(&q->lock);
 448     if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)q))) {
 449         fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
 450         return -1;
 451     }
 452     DEBUG_WAIT_THREAD(&q->lock);
 453     DEBUG_DESTRUCT_LOCK(&q->lock);
 454 
 455     if (NULL == q->info) {
 456         fprintf(stderr, "Query returned no info\n");
 457         return -1;
 458     }
 459     
 460     if (PMIX_STRING != q->info[0].value.type) {
 461         fprintf(stderr, "Query returned incorrect data type: %d\n", q->info[0].value.type);
 462         return -1;
 463     }
 464     if (NULL == q->info[0].value.data.string) {
 465         fprintf(stderr, "Query returned no active nspaces\n");
 466         return -1;
 467     }
 468 
 469     fprintf(stderr, "Query returned %s\n", q->info[0].value.data.string);
 470     return 0;
 471 
 472 #if 0
 473     
 474 
 475     
 476     PMIX_INFO_FREE(info, ninfo);
 477 
 478     
 479     ninfo = 1;
 480     PMIX_INFO_CREATE(info, ninfo);
 481     (void)strncpy(info[0].key, PMIX_QUERY_PROC_TABLE, PMIX_MAX_KEYLEN);
 482     (void)strncpy(info[0].qualifier, nspace, PMIX_MAX_KEYLEN);
 483     if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(info, ninfo, infocbfunc, (void*)&active))) {
 484         fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
 485         return -1;
 486     }
 487     
 488 
 489     
 490     if (PMIX_DATA_ARRAY != info[0].type) {
 491         fprintf(stderr, "Query returned incorrect data type: %d\n", info[0].type);
 492         return -1;
 493     }
 494     if (NULL == info[0].data.darray.array) {
 495         fprintf(stderr, "Query returned no proctable info\n");
 496         return -1;
 497     }
 498     
 499 
 500 
 501 
 502 
 503 
 504 
 505 
 506 
 507 
 508 
 509 
 510 
 511     
 512 
 513     PMIX_INFO_FREE(info, ninfo);
 514 
 515     
 516     napps = 1;
 517     PMIX_APP_CREATE(app, napps);
 518     
 519     app[0].cmd = strdup("debuggerdaemon");
 520     app[0].argc = 1;
 521     app[0].argv = (char**)malloc(2*sizeof(char*));
 522     app[0].argv[0] = strdup("debuggerdaemon");
 523     app[0].argv[1] = NULL;
 524     
 525 
 526     ninfo = 3;
 527     PMIX_INFO_CREATE(app[0].info, ninfo);
 528     PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING);  
 529     PMIX_INFO_LOAD(&app[0].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL); 
 530     PMIX_INFO_LOAD(&app[0].info[2], PMIX_DEBUG_TARGET, nspace, PMIX_STRING); 
 531 
 532     
 533     PMIx_Spawn(NULL, 0, app, napps, dspace);
 534     
 535     PMIX_APP_FREE(app, napps);
 536 
 537     
 538 
 539     return 0;
 540 #endif
 541 }