This source file includes following definitions.
- cbfunc
- notification_fn
- release_fn
- evhandler_reg_callbk
- spawn_debugger
- main
- attach_to_running_job
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #define _GNU_SOURCE
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <time.h>
31 #include <pthread.h>
32
33 #include <pmix_tool.h>
34
35 typedef struct {
36 pthread_mutex_t mutex;
37 pthread_cond_t cond;
38 volatile bool active;
39 pmix_status_t status;
40 } mylock_t;
41
42 #define DEBUG_CONSTRUCT_LOCK(l) \
43 do { \
44 pthread_mutex_init(&(l)->mutex, NULL); \
45 pthread_cond_init(&(l)->cond, NULL); \
46 (l)->active = true; \
47 (l)->status = PMIX_SUCCESS; \
48 } while(0)
49
50 #define DEBUG_DESTRUCT_LOCK(l) \
51 do { \
52 pthread_mutex_destroy(&(l)->mutex); \
53 pthread_cond_destroy(&(l)->cond); \
54 } while(0)
55
56 #define DEBUG_WAIT_THREAD(lck) \
57 do { \
58 pthread_mutex_lock(&(lck)->mutex); \
59 while ((lck)->active) { \
60 pthread_cond_wait(&(lck)->cond, &(lck)->mutex); \
61 } \
62 pthread_mutex_unlock(&(lck)->mutex); \
63 } while(0)
64
65 #define DEBUG_WAKEUP_THREAD(lck) \
66 do { \
67 pthread_mutex_lock(&(lck)->mutex); \
68 (lck)->active = false; \
69 pthread_cond_broadcast(&(lck)->cond); \
70 pthread_mutex_unlock(&(lck)->mutex); \
71 } while(0)
72
73
74
75 typedef struct {
76 mylock_t lock;
77 pmix_info_t *info;
78 size_t ninfo;
79 } myquery_data_t;
80
81 static int attach_to_running_job(char *nspace);
82 static mylock_t waiting_for_debugger;
83 static pmix_proc_t myproc;
84
85
86
87
88
89
90
91
92
93
94
95
96
97 static void cbfunc(pmix_status_t status,
98 pmix_info_t *info, size_t ninfo,
99 void *cbdata,
100 pmix_release_cbfunc_t release_fn,
101 void *release_cbdata)
102 {
103 myquery_data_t *mq = (myquery_data_t*)cbdata;
104 size_t n;
105
106
107
108
109 if (0 < ninfo) {
110 PMIX_INFO_CREATE(mq->info, ninfo);
111 mq->ninfo = ninfo;
112 for (n=0; n < ninfo; n++) {
113 fprintf(stderr, "Transferring %s\n", info[n].key);
114 PMIX_INFO_XFER(&mq->info[n], &info[n]);
115 }
116 }
117
118
119
120 if (NULL != release_fn) {
121 release_fn(release_cbdata);
122 }
123
124
125 DEBUG_WAKEUP_THREAD(&mq->lock);
126 }
127
128
129
130
131
132 static void notification_fn(size_t evhdlr_registration_id,
133 pmix_status_t status,
134 const pmix_proc_t *source,
135 pmix_info_t info[], size_t ninfo,
136 pmix_info_t results[], size_t nresults,
137 pmix_event_notification_cbfunc_fn_t cbfunc,
138 void *cbdata)
139 {
140
141 if (NULL != cbfunc) {
142 cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
143 }
144 }
145
146
147
148
149
150
151
152
153 static void release_fn(size_t evhdlr_registration_id,
154 pmix_status_t status,
155 const pmix_proc_t *source,
156 pmix_info_t info[], size_t ninfo,
157 pmix_info_t results[], size_t nresults,
158 pmix_event_notification_cbfunc_fn_t cbfunc,
159 void *cbdata)
160 {
161
162 if (NULL != cbfunc) {
163 cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
164 }
165
166 DEBUG_WAKEUP_THREAD(&waiting_for_debugger);
167 }
168
169
170
171
172
173
174
175
176 static void evhandler_reg_callbk(pmix_status_t status,
177 size_t evhandler_ref,
178 void *cbdata)
179 {
180 mylock_t *lock = (mylock_t*)cbdata;
181
182 if (PMIX_SUCCESS != status) {
183 fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
184 myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref);
185 }
186 lock->status = status;
187 DEBUG_WAKEUP_THREAD(lock);
188 }
189
190 static pmix_status_t spawn_debugger(char *appspace)
191 {
192 pmix_status_t rc;
193 pmix_info_t *dinfo;
194 pmix_app_t *debugger;
195 size_t dninfo;
196 char cwd[1024];
197 char dspace[PMIX_MAX_NSLEN+1];
198
199
200 PMIX_APP_CREATE(debugger, 1);
201 debugger[0].cmd = strdup("./debuggerd");
202 PMIX_ARGV_APPEND(rc, debugger[0].argv, "./debuggerd");
203 getcwd(cwd, 1024);
204 debugger[0].cwd = strdup(cwd);
205
206
207 dninfo = 5;
208 PMIX_INFO_CREATE(dinfo, dninfo);
209 PMIX_INFO_LOAD(&dinfo[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING);
210 PMIX_INFO_LOAD(&dinfo[1], PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL);
211 PMIX_INFO_LOAD(&dinfo[2], PMIX_DEBUG_JOB, appspace, PMIX_STRING);
212 PMIX_INFO_LOAD(&dinfo[3], PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL);
213 PMIX_INFO_LOAD(&dinfo[4], PMIX_DEBUG_WAITING_FOR_NOTIFY, NULL, PMIX_BOOL);
214
215 fprintf(stderr, "Debugger: spawning %s\n", debugger[0].cmd);
216 if (PMIX_SUCCESS != (rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, dspace))) {
217 fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", PMIx_Error_string(rc));
218 }
219 fprintf(stderr, "SPAWNED DEBUGGERD\n");
220
221 PMIX_INFO_FREE(dinfo, dninfo);
222 PMIX_APP_FREE(debugger, 1);
223
224 return rc;
225 }
226
227 #define DBGR_LOOP_LIMIT 10
228
229 int main(int argc, char **argv)
230 {
231 pmix_status_t rc;
232 pmix_info_t *info;
233 pmix_app_t *app;
234 size_t ninfo, napps;
235 char *nspace = NULL;
236 char appspace[PMIX_MAX_NSLEN+1];
237 int i;
238 pmix_query_t *query;
239 size_t nq, n;
240 myquery_data_t myquery_data;
241 bool cospawn = false, stop_on_exec = false;
242 char cwd[1024];
243 pmix_status_t code = PMIX_ERR_JOB_TERMINATED;
244 mylock_t mylock;
245
246
247 for (i=1; i < argc; i++) {
248 if (0 == strcmp(argv[i], "-h") ||
249 0 == strcmp(argv[i], "--help")) {
250
251
252 }
253 if (0 == strcmp(argv[i], "-a") ||
254 0 == strcmp(argv[i], "--attach")) {
255 if (NULL != nspace) {
256
257 fprintf(stderr, "Cannot attach to more than one nspace\n");
258 exit(1);
259 }
260
261 ++i;
262 if (argc == i) {
263
264 fprintf(stderr, "The %s option requires an <nspace> argument\n", argv[i]);
265 exit(1);
266 }
267 nspace = strdup(argv[i]);
268 } else {
269 fprintf(stderr, "Unknown option: %s\n", argv[i]);
270 exit(1);
271 }
272 }
273 info = NULL;
274 ninfo = 0;
275
276 DEBUG_CONSTRUCT_LOCK(&waiting_for_debugger);
277
278
279 PMIX_INFO_CREATE(info, 1);
280 PMIX_INFO_LOAD(&info[0], PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL);
281
282 if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) {
283 fprintf(stderr, "PMIx_tool_init failed: %d\n", rc);
284 exit(rc);
285 }
286 PMIX_INFO_FREE(info, ninfo);
287
288 fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
289
290
291 DEBUG_CONSTRUCT_LOCK(&mylock);
292 PMIx_Register_event_handler(NULL, 0, NULL, 0,
293 notification_fn, evhandler_reg_callbk, (void*)&mylock);
294 DEBUG_WAIT_THREAD(&mylock);
295 DEBUG_DESTRUCT_LOCK(&mylock);
296
297
298
299 DEBUG_CONSTRUCT_LOCK(&mylock);
300 PMIx_Register_event_handler(&code, 1, NULL, 0,
301 release_fn, evhandler_reg_callbk, (void*)&mylock);
302 DEBUG_WAIT_THREAD(&mylock);
303 DEBUG_DESTRUCT_LOCK(&mylock);
304
305
306 if (NULL != nspace) {
307 if (PMIX_SUCCESS != (rc = attach_to_running_job(nspace))) {
308 fprintf(stderr, "Failed to attach to nspace %s: error code %d\n",
309 nspace, rc);
310 goto done;
311 }
312 } else {
313
314
315
316
317
318
319
320
321 nq = 1;
322 PMIX_QUERY_CREATE(query, nq);
323 PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_SPAWN_SUPPORT);
324 PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_DEBUG_SUPPORT);
325
326 DEBUG_CONSTRUCT_LOCK(&myquery_data.lock);
327 myquery_data.info = NULL;
328 myquery_data.ninfo = 0;
329
330 fprintf(stderr, "Debugger: querying capabilities\n");
331 if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) {
332 fprintf(stderr, "PMIx_Query_info failed: %d\n", rc);
333 goto done;
334 }
335 DEBUG_WAIT_THREAD(&myquery_data.lock);
336 DEBUG_DESTRUCT_LOCK(&myquery_data.lock);
337
338
339
340
341
342 if (2 != myquery_data.ninfo) {
343
344 fprintf(stderr, "PMIx Query returned an incorrect number of results: %lu\n", myquery_data.ninfo);
345 PMIX_INFO_FREE(myquery_data.info, myquery_data.ninfo);
346 goto done;
347 }
348
349
350
351
352
353
354
355
356
357
358
359
360
361 for (n=0; n < myquery_data.ninfo; n++) {
362 if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_SPAWN_SUPPORT)) {
363
364 if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_COSPAWN_APP)) {
365 cospawn = true;
366 } else {
367 cospawn = false;
368 }
369 } else if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_DEBUG_SUPPORT)) {
370 if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_DEBUG_STOP_ON_EXEC)) {
371 stop_on_exec = true;
372 } else {
373 stop_on_exec = false;
374 }
375 }
376 }
377
378
379
380 if (cospawn) {
381
382 } else {
383
384 napps = 1;
385 PMIX_APP_CREATE(app, napps);
386
387 app[0].cmd = strdup("client");
388 PMIX_ARGV_APPEND(rc, app[0].argv, "./client");
389 getcwd(cwd, 1024);
390 app[0].cwd = strdup(cwd);
391 app[0].maxprocs = 2;
392
393 ninfo = 4;
394 PMIX_INFO_CREATE(info, ninfo);
395 PMIX_INFO_LOAD(&info[0], PMIX_MAPBY, "slot", PMIX_STRING);
396 if (stop_on_exec) {
397 PMIX_INFO_LOAD(&info[1], PMIX_DEBUG_STOP_ON_EXEC, NULL, PMIX_BOOL);
398 } else {
399 PMIX_INFO_LOAD(&info[1], PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL);
400 }
401 PMIX_INFO_LOAD(&info[2], PMIX_FWD_STDOUT, NULL, PMIX_BOOL);
402 PMIX_INFO_LOAD(&info[3], PMIX_FWD_STDERR, NULL, PMIX_BOOL);
403
404
405
406 fprintf(stderr, "Debugger: spawning %s\n", app[0].cmd);
407 if (PMIX_SUCCESS != (rc = PMIx_Spawn(info, ninfo, app, napps, appspace))) {
408 fprintf(stderr, "Application failed to launch with error: %s(%d)\n", PMIx_Error_string(rc), rc);
409 goto done;
410 }
411 PMIX_INFO_FREE(info, ninfo);
412 PMIX_APP_FREE(app, napps);
413
414
415 if (PMIX_SUCCESS != (rc = spawn_debugger(appspace))) {
416 goto done;
417 }
418 }
419
420
421
422 DEBUG_WAIT_THREAD(&waiting_for_debugger);
423 }
424
425 done:
426 DEBUG_DESTRUCT_LOCK(&waiting_for_debugger);
427 PMIx_tool_finalize();
428
429 return(rc);
430 }
431
432 static int attach_to_running_job(char *nspace)
433 {
434 pmix_status_t rc;
435 pmix_proc_t myproc;
436 pmix_query_t *query;
437 size_t nq;
438 myquery_data_t *q;
439
440
441
442 nq = 1;
443 PMIX_QUERY_CREATE(query, nq);
444 PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACES);
445
446 q = (myquery_data_t*)malloc(sizeof(myquery_data_t));
447 DEBUG_CONSTRUCT_LOCK(&q->lock);
448 if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)q))) {
449 fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
450 return -1;
451 }
452 DEBUG_WAIT_THREAD(&q->lock);
453 DEBUG_DESTRUCT_LOCK(&q->lock);
454
455 if (NULL == q->info) {
456 fprintf(stderr, "Query returned no info\n");
457 return -1;
458 }
459
460 if (PMIX_STRING != q->info[0].value.type) {
461 fprintf(stderr, "Query returned incorrect data type: %d\n", q->info[0].value.type);
462 return -1;
463 }
464 if (NULL == q->info[0].value.data.string) {
465 fprintf(stderr, "Query returned no active nspaces\n");
466 return -1;
467 }
468
469 fprintf(stderr, "Query returned %s\n", q->info[0].value.data.string);
470 return 0;
471
472 #if 0
473
474
475
476 PMIX_INFO_FREE(info, ninfo);
477
478
479 ninfo = 1;
480 PMIX_INFO_CREATE(info, ninfo);
481 (void)strncpy(info[0].key, PMIX_QUERY_PROC_TABLE, PMIX_MAX_KEYLEN);
482 (void)strncpy(info[0].qualifier, nspace, PMIX_MAX_KEYLEN);
483 if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(info, ninfo, infocbfunc, (void*)&active))) {
484 fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
485 return -1;
486 }
487
488
489
490 if (PMIX_DATA_ARRAY != info[0].type) {
491 fprintf(stderr, "Query returned incorrect data type: %d\n", info[0].type);
492 return -1;
493 }
494 if (NULL == info[0].data.darray.array) {
495 fprintf(stderr, "Query returned no proctable info\n");
496 return -1;
497 }
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513 PMIX_INFO_FREE(info, ninfo);
514
515
516 napps = 1;
517 PMIX_APP_CREATE(app, napps);
518
519 app[0].cmd = strdup("debuggerdaemon");
520 app[0].argc = 1;
521 app[0].argv = (char**)malloc(2*sizeof(char*));
522 app[0].argv[0] = strdup("debuggerdaemon");
523 app[0].argv[1] = NULL;
524
525
526 ninfo = 3;
527 PMIX_INFO_CREATE(app[0].info, ninfo);
528 PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING);
529 PMIX_INFO_LOAD(&app[0].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL);
530 PMIX_INFO_LOAD(&app[0].info[2], PMIX_DEBUG_TARGET, nspace, PMIX_STRING);
531
532
533 PMIx_Spawn(NULL, 0, app, napps, dspace);
534
535 PMIX_APP_FREE(app, napps);
536
537
538
539 return 0;
540 #endif
541 }