This source file includes following definitions.
- set_stacktrace_filename
- show_stackframe
- opal_stackframe_output
- opal_stackframe_output_string
- opal_util_register_stackhandlers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 #include "opal_config.h"
24
25 #include <stdio.h>
26 #ifdef HAVE_UNISTD_H
27 #include <unistd.h>
28 #endif
29 #ifdef HAVE_SYS_TYPES_H
30 #include <sys/types.h>
31 #endif
32 #ifdef HAVE_SYS_STAT_H
33 #include <sys/stat.h>
34 #endif
35 #ifdef HAVE_FCNTL_H
36 #include <fcntl.h>
37 #else
38 #ifdef HAVE_SYS_FCNTL_H
39 #include <sys/fcntl.h>
40 #endif
41 #endif
42
43 #include <string.h>
44 #include <signal.h>
45
46 #include "opal/util/stacktrace.h"
47 #include "opal/mca/backtrace/backtrace.h"
48 #include "opal/constants.h"
49 #include "opal/util/output.h"
50 #include "opal/util/show_help.h"
51 #include "opal/util/argv.h"
52 #include "opal/util/proc.h"
53 #include "opal/util/error.h"
54 #include "opal/runtime/opal_params.h"
55
56 #ifndef _NSIG
57 #define _NSIG 32
58 #endif
59
60 #define HOSTFORMAT "[%s:%05d] "
61
62 int opal_stacktrace_output_fileno = -1;
63 static char *opal_stacktrace_output_filename_base = NULL;
64 static size_t opal_stacktrace_output_filename_max_len = 0;
65 static char stacktrace_hostname[OPAL_MAXHOSTNAMELEN];
66 static char *unable_to_print_msg = "Unable to print stack trace!\n";
67
68
69
70
71
72
73
74 static void set_stacktrace_filename(void) {
75 opal_proc_t *my_proc = opal_proc_local_get();
76
77 if( NULL == my_proc ) {
78 snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
79 "%s.%lu",
80 opal_stacktrace_output_filename_base, (unsigned long)getpid());
81 }
82 else {
83 snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
84 "%s.%lu.%lu",
85 opal_stacktrace_output_filename_base, (unsigned long)my_proc->proc_name.vpid, (unsigned long)getpid());
86 }
87
88 return;
89 }
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 #if OPAL_WANT_PRETTY_PRINT_STACKTRACE
106 static void show_stackframe (int signo, siginfo_t * info, void * p)
107 {
108 char print_buffer[1024];
109 char * tmp = print_buffer;
110 int size = sizeof (print_buffer);
111 int ret;
112 char *si_code_str = "";
113
114
115 if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
116
117
118
119
120
121 signal(signo, SIG_DFL);
122 raise(signo);
123
124 return;
125 }
126
127
128 if( 0 < opal_stacktrace_output_filename_max_len ) {
129 set_stacktrace_filename();
130 opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
131 O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
132 if( 0 > opal_stacktrace_output_fileno ) {
133 opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
134 opal_stacktrace_output_filename, strerror(errno));
135 opal_stacktrace_output_fileno = fileno(stderr);
136 }
137 }
138
139
140 memset (print_buffer, 0, sizeof (print_buffer));
141 ret = snprintf(print_buffer, sizeof(print_buffer),
142 HOSTFORMAT "*** Process received signal ***\n",
143 stacktrace_hostname, getpid());
144 write(opal_stacktrace_output_fileno, print_buffer, ret);
145
146
147 memset (print_buffer, 0, sizeof (print_buffer));
148
149 #ifdef HAVE_STRSIGNAL
150 ret = snprintf (tmp, size, HOSTFORMAT "Signal: %s (%d)\n",
151 stacktrace_hostname, getpid(), strsignal(signo), signo);
152 #else
153 ret = snprintf (tmp, size, HOSTFORMAT "Signal: %d\n",
154 stacktrace_hostname, getpid(), signo);
155 #endif
156 size -= ret;
157 tmp += ret;
158
159 if (NULL != info) {
160 switch (signo)
161 {
162 case SIGILL:
163 switch (info->si_code)
164 {
165 #ifdef ILL_ILLOPC
166 case ILL_ILLOPC: si_code_str = "Illegal opcode"; break;
167 #endif
168 #ifdef ILL_ILLOPN
169 case ILL_ILLOPN: si_code_str = "Illegal operand"; break;
170 #endif
171 #ifdef ILL_ILLADR
172 case ILL_ILLADR: si_code_str = "Illegal addressing mode"; break;
173 #endif
174 #ifdef ILL_ILLTRP
175 case ILL_ILLTRP: si_code_str = "Illegal trap"; break;
176 #endif
177 #ifdef ILL_PRVOPC
178 case ILL_PRVOPC: si_code_str = "Privileged opcode"; break;
179 #endif
180 #ifdef ILL_PRVREG
181 case ILL_PRVREG: si_code_str = "Privileged register"; break;
182 #endif
183 #ifdef ILL_COPROC
184 case ILL_COPROC: si_code_str = "Coprocessor error"; break;
185 #endif
186 #ifdef ILL_BADSTK
187 case ILL_BADSTK: si_code_str = "Internal stack error"; break;
188 #endif
189 }
190 break;
191 case SIGFPE:
192 switch (info->si_code)
193 {
194 #ifdef FPE_INTDIV
195 case FPE_INTDIV: si_code_str = "Integer divide-by-zero"; break;
196 #endif
197 #ifdef FPE_INTOVF
198 case FPE_INTOVF: si_code_str = "Integer overflow"; break;
199 #endif
200 case FPE_FLTDIV: si_code_str = "Floating point divide-by-zero"; break;
201 case FPE_FLTOVF: si_code_str = "Floating point overflow"; break;
202 case FPE_FLTUND: si_code_str = "Floating point underflow"; break;
203 #ifdef FPE_FLTRES
204 case FPE_FLTRES: si_code_str = "Floating point inexact result"; break;
205 #endif
206 #ifdef FPE_FLTINV
207 case FPE_FLTINV: si_code_str = "Invalid floating point operation"; break;
208 #endif
209 #ifdef FPE_FLTSUB
210 case FPE_FLTSUB: si_code_str = "Subscript out of range"; break;
211 #endif
212 }
213 break;
214 case SIGSEGV:
215 switch (info->si_code)
216 {
217 #ifdef SEGV_MAPERR
218 case SEGV_MAPERR: si_code_str = "Address not mapped"; break;
219 #endif
220 #ifdef SEGV_ACCERR
221 case SEGV_ACCERR: si_code_str = "Invalid permissions"; break;
222 #endif
223 }
224 break;
225 case SIGBUS:
226 switch (info->si_code)
227 {
228 #ifdef BUS_ADRALN
229 case BUS_ADRALN: si_code_str = "Invalid address alignment"; break;
230 #endif
231 #ifdef BUS_ADRERR
232 case BUS_ADRERR: si_code_str = "Non-existant physical address"; break;
233 #endif
234 #ifdef BUS_OBJERR
235 case BUS_OBJERR: si_code_str = "Objet-specific hardware error"; break;
236 #endif
237 }
238 break;
239 case SIGTRAP:
240 switch (info->si_code)
241 {
242 #ifdef TRAP_BRKPT
243 case TRAP_BRKPT: si_code_str = "Process breakpoint"; break;
244 #endif
245 #ifdef TRAP_TRACE
246 case TRAP_TRACE: si_code_str = "Process trace trap"; break;
247 #endif
248 }
249 break;
250 case SIGCHLD:
251 switch (info->si_code)
252 {
253 #ifdef CLD_EXITED
254 case CLD_EXITED: si_code_str = "Child has exited"; break;
255 #endif
256 #ifdef CLD_KILLED
257 case CLD_KILLED: si_code_str = "Child has terminated abnormally and did not create a core file"; break;
258 #endif
259 #ifdef CLD_DUMPED
260 case CLD_DUMPED: si_code_str = "Child has terminated abnormally and created a core file"; break;
261 #endif
262 #ifdef CLD_WTRAPPED
263 case CLD_TRAPPED: si_code_str = "Traced child has trapped"; break;
264 #endif
265 #ifdef CLD_STOPPED
266 case CLD_STOPPED: si_code_str = "Child has stopped"; break;
267 #endif
268 #ifdef CLD_CONTINUED
269 case CLD_CONTINUED: si_code_str = "Stopped child has continued"; break;
270 #endif
271 }
272 break;
273 #ifdef SIGPOLL
274 case SIGPOLL:
275 switch (info->si_code)
276 {
277 #ifdef POLL_IN
278 case POLL_IN: si_code_str = "Data input available"; break;
279 #endif
280 #ifdef POLL_OUT
281 case POLL_OUT: si_code_str = "Output buffers available"; break;
282 #endif
283 #ifdef POLL_MSG
284 case POLL_MSG: si_code_str = "Input message available"; break;
285 #endif
286 #ifdef POLL_ERR
287 case POLL_ERR: si_code_str = "I/O error"; break;
288 #endif
289 #ifdef POLL_PRI
290 case POLL_PRI: si_code_str = "High priority input available"; break;
291 #endif
292 #ifdef POLL_HUP
293 case POLL_HUP: si_code_str = "Device disconnected"; break;
294 #endif
295 }
296 break;
297 #endif
298 default:
299 switch (info->si_code)
300 {
301 #ifdef SI_ASYNCNL
302 case SI_ASYNCNL: si_code_str = "SI_ASYNCNL"; break;
303 #endif
304 #ifdef SI_SIGIO
305 case SI_SIGIO: si_code_str = "Queued SIGIO"; break;
306 #endif
307 #ifdef SI_ASYNCIO
308 case SI_ASYNCIO: si_code_str = "Asynchronous I/O request completed"; break;
309 #endif
310 #ifdef SI_MESGQ
311 case SI_MESGQ: si_code_str = "Message queue state changed"; break;
312 #endif
313 case SI_TIMER: si_code_str = "Timer expiration"; break;
314 case SI_QUEUE: si_code_str = "Sigqueue() signal"; break;
315 case SI_USER: si_code_str = "User function (kill, sigsend, abort, etc.)"; break;
316 #ifdef SI_KERNEL
317 case SI_KERNEL: si_code_str = "Kernel signal"; break;
318 #endif
319
320
321
322 #if defined(SI_UNDEFINED)
323 #if SI_UNDEFINED != SI_USER
324 case SI_UNDEFINED: si_code_str = "Undefined code"; break;
325 #endif
326 #endif
327 }
328 }
329
330
331 if (0 != info->si_errno) {
332 ret = snprintf(tmp, size, HOSTFORMAT "Associated errno: %s (%d)\n",
333 stacktrace_hostname, getpid(),
334 strerror (info->si_errno), info->si_errno);
335 size -= ret;
336 tmp += ret;
337 }
338
339 ret = snprintf(tmp, size, HOSTFORMAT "Signal code: %s (%d)\n",
340 stacktrace_hostname, getpid(),
341 si_code_str, info->si_code);
342 size -= ret;
343 tmp += ret;
344
345 switch (signo)
346 {
347 case SIGILL:
348 case SIGFPE:
349 case SIGSEGV:
350 case SIGBUS:
351 {
352 ret = snprintf(tmp, size, HOSTFORMAT "Failing at address: %p\n",
353 stacktrace_hostname, getpid(), info->si_addr);
354 size -= ret;
355 tmp += ret;
356 break;
357 }
358 case SIGCHLD:
359 {
360 ret = snprintf(tmp, size, HOSTFORMAT "Sending PID: %d, Sending UID: %d, Status: %d\n",
361 stacktrace_hostname, getpid(),
362 info->si_pid, info->si_uid, info->si_status);
363 size -= ret;
364 tmp += ret;
365 break;
366 }
367 #ifdef SIGPOLL
368 case SIGPOLL:
369 {
370 #ifdef HAVE_SIGINFO_T_SI_FD
371 ret = snprintf(tmp, size, HOSTFORMAT "Band event: %ld, File Descriptor : %d\n",
372 stacktrace_hostname, getpid(), (long)info->si_band, info->si_fd);
373 #elif HAVE_SIGINFO_T_SI_BAND
374 ret = snprintf(tmp, size, HOSTFORMAT "Band event: %ld\n",
375 stacktrace_hostname, getpid(), (long)info->si_band);
376 #else
377 ret = 0;
378 #endif
379 size -= ret;
380 tmp += ret;
381 break;
382 }
383 #endif
384 }
385 } else {
386 ret = snprintf(tmp, size,
387 HOSTFORMAT "siginfo is NULL, additional information unavailable\n",
388 stacktrace_hostname, getpid());
389 size -= ret;
390 tmp += ret;
391 }
392
393
394 write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer)-size);
395
396
397 snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
398 stacktrace_hostname, getpid());
399 ret = opal_backtrace_print(NULL, print_buffer, 2);
400 if (OPAL_SUCCESS != ret) {
401 write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
402 }
403
404
405 memset (print_buffer, 0, sizeof (print_buffer));
406 ret = snprintf(print_buffer, sizeof(print_buffer),
407 HOSTFORMAT "*** End of error message ***\n",
408 stacktrace_hostname, getpid());
409 if (ret > 0) {
410 write(opal_stacktrace_output_fileno, print_buffer, ret);
411 } else {
412 write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
413 }
414
415 if( fileno(stdout) != opal_stacktrace_output_fileno &&
416 fileno(stderr) != opal_stacktrace_output_fileno ) {
417 close(opal_stacktrace_output_fileno);
418 opal_stacktrace_output_fileno = -1;
419 }
420
421
422 opal_delay_abort();
423
424
425
426
427
428
429 signal(signo, SIG_DFL);
430 raise(signo);
431 }
432
433 #endif
434
435
436 #if OPAL_WANT_PRETTY_PRINT_STACKTRACE
437 void opal_stackframe_output(int stream)
438 {
439 int traces_size;
440 char **traces;
441
442
443 if (OPAL_SUCCESS == opal_backtrace_buffer(&traces, &traces_size)) {
444 int i;
445
446
447
448 for (i = 2; i < traces_size; ++i) {
449 opal_output(stream, "%s", traces[i]);
450 }
451 } else {
452
453 if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
454 return;
455 }
456
457
458 if( 0 < opal_stacktrace_output_filename_max_len ) {
459 set_stacktrace_filename();
460 opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
461 O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
462 if( 0 > opal_stacktrace_output_fileno ) {
463 opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
464 opal_stacktrace_output_filename, strerror(errno));
465 opal_stacktrace_output_fileno = fileno(stderr);
466 }
467 }
468
469 opal_backtrace_print(NULL, NULL, 2);
470
471 if( fileno(stdout) != opal_stacktrace_output_fileno &&
472 fileno(stderr) != opal_stacktrace_output_fileno ) {
473 close(opal_stacktrace_output_fileno);
474 opal_stacktrace_output_fileno = -1;
475 }
476 }
477 }
478
479 char *opal_stackframe_output_string(void)
480 {
481 int traces_size, i;
482 size_t len;
483 char *output, **traces;
484
485 len = 0;
486 if (OPAL_SUCCESS != opal_backtrace_buffer(&traces, &traces_size)) {
487 return NULL;
488 }
489
490
491 for (i = 3; i < traces_size; i++) {
492 if (NULL == traces[i]) {
493 break;
494 }
495 len += strlen(traces[i]) + 1;
496 }
497
498 output = (char *) malloc(len + 1);
499 if (NULL == output) {
500 return NULL;
501 }
502
503 *output = '\0';
504 for (i = 3; i < traces_size; i++) {
505 if (NULL == traces[i]) {
506 break;
507 }
508 strcat(output, traces[i]);
509 strcat(output, "\n");
510 }
511
512 free(traces);
513 return output;
514 }
515
516 #endif
517
518
519
520
521
522
523
524
525
526
527
528 int opal_util_register_stackhandlers (void)
529 {
530 #if OPAL_WANT_PRETTY_PRINT_STACKTRACE
531 struct sigaction act, old;
532 char * tmp;
533 char * next;
534 int i;
535 bool complain, showed_help = false;
536
537 gethostname(stacktrace_hostname, sizeof(stacktrace_hostname));
538
539 for (i = 0 ; i < (int)strlen(stacktrace_hostname) ; ++i) {
540 if (stacktrace_hostname[i] == '.') {
541 stacktrace_hostname[i] = '\0';
542 break;
543 }
544 }
545
546
547 if( NULL == opal_stacktrace_output_filename ||
548 0 == strcasecmp(opal_stacktrace_output_filename, "none") ) {
549 opal_stacktrace_output_fileno = -1;
550 }
551 else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stdout") ) {
552 opal_stacktrace_output_fileno = fileno(stdout);
553 }
554 else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stderr") ) {
555 opal_stacktrace_output_fileno = fileno(stderr);
556 }
557 else if( 0 == strcasecmp(opal_stacktrace_output_filename, "file" ) ||
558 0 == strcasecmp(opal_stacktrace_output_filename, "file:") ) {
559 opal_stacktrace_output_filename_base = strdup("stacktrace");
560
561 free(opal_stacktrace_output_filename);
562
563 opal_stacktrace_output_filename_max_len = strlen("stacktrace") + 8 + 8;
564 opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
565 set_stacktrace_filename();
566 opal_stacktrace_output_fileno = -1;
567 }
568 else if( 0 == strncasecmp(opal_stacktrace_output_filename, "file:", 5) ) {
569 char *filename_cpy = NULL;
570 next = strchr(opal_stacktrace_output_filename, ':');
571 next++;
572
573 opal_stacktrace_output_filename_base = strdup(next);
574
575 free(opal_stacktrace_output_filename);
576
577 opal_stacktrace_output_filename_max_len = strlen(opal_stacktrace_output_filename_base) + 8 + 8;
578 opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
579 set_stacktrace_filename();
580 opal_stacktrace_output_fileno = -1;
581
582 free(filename_cpy);
583 }
584 else {
585 opal_stacktrace_output_fileno = fileno(stderr);
586 }
587
588
589
590 memset(&act, 0, sizeof(act));
591 act.sa_sigaction = show_stackframe;
592 act.sa_flags = SA_SIGINFO;
593 #ifdef SA_ONESHOT
594 act.sa_flags |= SA_ONESHOT;
595 #else
596 act.sa_flags |= SA_RESETHAND;
597 #endif
598
599 for (tmp = next = opal_signal_string ;
600 next != NULL && *next != '\0';
601 tmp = next + 1)
602 {
603 int sig;
604 int ret;
605
606 complain = false;
607 sig = strtol (tmp, &next, 10);
608
609
610
611
612
613 if (((0 == sig) && (tmp == next)) || (0 > sig) || (_NSIG <= sig)) {
614 opal_show_help("help-opal-util.txt",
615 "stacktrace bad signal", true,
616 opal_signal_string, tmp);
617 return OPAL_ERR_SILENT;
618 } else if (next == NULL) {
619 return OPAL_ERR_BAD_PARAM;
620 } else if (':' == *next &&
621 0 == strncasecmp(next, ":complain", 9)) {
622 complain = true;
623 next += 9;
624 } else if (',' != *next && '\0' != *next) {
625 return OPAL_ERR_BAD_PARAM;
626 }
627
628
629 ret = sigaction (sig, NULL, &old);
630 if (0 != ret) {
631 return OPAL_ERR_IN_ERRNO;
632 }
633
634 if (SIG_IGN != old.sa_handler && SIG_DFL != old.sa_handler) {
635 if (!showed_help && complain) {
636
637
638
639 opal_show_help("help-opal-util.txt",
640 "stacktrace signal override",
641 true, sig, sig, sig, opal_signal_string);
642 showed_help = true;
643 }
644 }
645
646
647 else {
648 if (0 != sigaction(sig, &act, NULL)) {
649 return OPAL_ERR_IN_ERRNO;
650 }
651 }
652 }
653
654 #endif
655
656 return OPAL_SUCCESS;
657 }
658