root/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init
  2. finalize
  3. pmix_getline
  4. connect_to_peer
  5. send_recv
  6. send_oneway
  7. timeout
  8. parse_uri_file
  9. try_connect
  10. send_connect_ack
  11. recv_connect_ack
  12. df_search

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2010-2011 Oak Ridge National Labs.  All rights reserved.
  13  * Copyright (c) 2011-2014 Cisco Systems, Inc.  All rights reserved.
  14  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.  All rights
  15  *                         reserved.
  16  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2018      IBM Corporation.  All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  *
  24  */
  25 
  26 #include <src/include/pmix_config.h>
  27 #include "src/include/pmix_globals.h"
  28 
  29 #ifdef HAVE_FCNTL_H
  30 #include <fcntl.h>
  31 #endif
  32 #ifdef HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #ifdef HAVE_SYS_SOCKET_H
  36 #include <sys/socket.h>
  37 #endif
  38 #ifdef HAVE_SYS_UIO_H
  39 #include <sys/uio.h>
  40 #endif
  41 #ifdef HAVE_SYS_TYPES_H
  42 #include <sys/types.h>
  43 #endif
  44 #ifdef HAVE_SYS_STAT_H
  45 #include <sys/stat.h>
  46 #endif
  47 #ifdef HAVE_DIRENT_H
  48 #include <dirent.h>
  49 #endif
  50 #ifdef HAVE_SYS_SYSCTL_H
  51 #include <sys/sysctl.h>
  52 #endif
  53 
  54 #include "src/include/pmix_socket_errno.h"
  55 #include "src/client/pmix_client_ops.h"
  56 #include "src/server/pmix_server_ops.h"
  57 #include "src/util/argv.h"
  58 #include "src/util/error.h"
  59 #include "src/util/os_path.h"
  60 #include "src/util/show_help.h"
  61 #include "src/mca/bfrops/base/base.h"
  62 #include "src/mca/gds/gds.h"
  63 
  64 #include "src/mca/ptl/base/base.h"
  65 #include "ptl_tcp.h"
  66 
  67 static pmix_status_t init(void);
  68 static void finalize(void);
  69 static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
  70                                      pmix_info_t *info, size_t ninfo);
  71 static pmix_status_t send_recv(struct pmix_peer_t *peer,
  72                                pmix_buffer_t *bfr,
  73                                pmix_ptl_cbfunc_t cbfunc,
  74                                void *cbdata);
  75 static pmix_status_t send_oneway(struct pmix_peer_t *peer,
  76                                  pmix_buffer_t *bfr,
  77                                  pmix_ptl_tag_t tag);
  78 
  79 pmix_ptl_module_t pmix_ptl_tcp_module = {
  80     .init = init,
  81     .finalize = finalize,
  82     .send_recv = send_recv,
  83     .send = send_oneway,
  84     .connect_to_peer = connect_to_peer
  85 };
  86 
  87 static pmix_status_t recv_connect_ack(int sd, uint8_t myflag);
  88 static pmix_status_t send_connect_ack(int sd, uint8_t *myflag, pmix_info_t info[], size_t ninfo);
  89 
  90 
  91 static pmix_status_t init(void)
  92 {
  93     return PMIX_SUCCESS;
  94 }
  95 
  96 static void finalize(void)
  97 {
  98 }
  99 
 100 static char *pmix_getline(FILE *fp)
 101 {
 102     char *ret, *buff;
 103     char input[1024];
 104 
 105     ret = fgets(input, 1024, fp);
 106     if (NULL != ret) {
 107        input[strlen(input)-1] = '\0';  /* remove newline */
 108        buff = strdup(input);
 109        return buff;
 110     }
 111 
 112     return NULL;
 113 }
 114 
 115 static pmix_status_t parse_uri_file(char *filename,
 116                                     char **uri,
 117                                     char **nspace,
 118                                     pmix_rank_t *rank);
 119 static pmix_status_t try_connect(char *uri, int *sd, pmix_info_t info[], size_t ninfo);
 120 static pmix_status_t df_search(char *dirname, char *prefix,
 121                                pmix_info_t info[], size_t ninfo,
 122                                int *sd, char **nspace,
 123                                pmix_rank_t *rank, char **uri);
 124 
 125 static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
 126                                      pmix_info_t *info, size_t ninfo)
 127 {
 128     char *evar, **uri, *suri = NULL, *suri2 = NULL;
 129     char *filename, *nspace=NULL;
 130     pmix_rank_t rank = PMIX_RANK_WILDCARD;
 131     char *p, *p2, *server_nspace = NULL, *rendfile = NULL;
 132     int sd, rc;
 133     size_t n;
 134     char myhost[PMIX_MAXHOSTNAMELEN];
 135     bool system_level = false;
 136     bool system_level_only = false;
 137     bool reconnect = false;
 138     pid_t pid = 0, mypid;
 139     pmix_list_t ilist;
 140     pmix_info_caddy_t *kv;
 141     pmix_info_t *iptr = NULL, mypidinfo, mycmdlineinfo, launcher;
 142     size_t niptr = 0;
 143     pmix_kval_t *urikv = NULL;
 144 
 145     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 146                         "ptl:tcp: connecting to server");
 147 
 148     /* see if the connection info is in the info array - if
 149      * so, then that overrides all other options */
 150 
 151 
 152     /* if I am a client, then we need to look for the appropriate
 153      * connection info in the environment */
 154     if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
 155         if (NULL != (evar = getenv("PMIX_SERVER_URI4"))) {
 156             /* we are talking to a v3 server */
 157             pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V3;
 158             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 159                                 "V3 SERVER DETECTED");
 160             /* must use the v3 bfrops module */
 161             pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(NULL);
 162             if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
 163                 return PMIX_ERR_INIT;
 164             }
 165         } else if (NULL != (evar = getenv("PMIX_SERVER_URI3"))) {
 166             /* we are talking to a v3 server */
 167             pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V3;
 168             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 169                                 "V3 SERVER DETECTED");
 170             /* must use the v3 bfrops module */
 171             pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v3");
 172             if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
 173                 return PMIX_ERR_INIT;
 174             }
 175         } else if (NULL != (evar = getenv("PMIX_SERVER_URI21"))) {
 176             /* we are talking to a v2.1 server */
 177             pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21;
 178             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 179                                 "V21 SERVER DETECTED");
 180             /* must use the v21 bfrops module */
 181             pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21");
 182             if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
 183                 return PMIX_ERR_INIT;
 184             }
 185         } else if (NULL != (evar = getenv("PMIX_SERVER_URI2"))) {
 186             /* we are talking to a v2.0 server */
 187             pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20;
 188             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 189                                 "V20 SERVER DETECTED");
 190             /* must use the v20 bfrops module */
 191             pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v20");
 192             if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
 193                 return PMIX_ERR_INIT;
 194             }
 195         } else {
 196             /* not us */
 197             return PMIX_ERR_NOT_SUPPORTED;
 198         }
 199         /* the server will be using the same bfrops as us */
 200         pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops;
 201         /* mark that we are using the V2 (i.e., tcp) protocol */
 202         pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2;
 203 
 204         /* the URI consists of the following elements:
 205         *    - server nspace.rank
 206         *    - ptl rendezvous URI
 207         */
 208         uri = pmix_argv_split(evar, ';');
 209         if (2 != pmix_argv_count(uri)) {
 210             PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
 211             pmix_argv_free(uri);
 212             return PMIX_ERR_NOT_SUPPORTED;
 213         }
 214 
 215         /* set the server nspace */
 216         p = uri[0];
 217         if (NULL == (p2 = strchr(p, '.'))) {
 218             PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
 219             pmix_argv_free(uri);
 220             return PMIX_ERR_NOT_SUPPORTED;
 221         }
 222         *p2 = '\0';
 223         ++p2;
 224         nspace = strdup(p);
 225         rank = strtoull(p2, NULL, 10);
 226         suri = strdup(uri[1]);
 227 
 228         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 229                             "ptl:tcp:client attempt connect to %s", uri[1]);
 230 
 231         /* go ahead and try to connect */
 232         if (PMIX_SUCCESS != (rc = try_connect(uri[1], &sd, info, ninfo))) {
 233             free(nspace);
 234             pmix_argv_free(uri);
 235             free(suri);
 236             return rc;
 237         }
 238         pmix_argv_free(uri);
 239         goto complete;
 240 
 241     }
 242 
 243     /* get here if we are a tool - check any provided directives
 244      * to see where they want us to connect to */
 245     suri = NULL;
 246     PMIX_CONSTRUCT(&ilist, pmix_list_t);
 247     if (NULL != info) {
 248         for (n=0; n < ninfo; n++) {
 249             if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_TO_SYSTEM)) {
 250                 system_level_only = PMIX_INFO_TRUE(&info[n]);
 251             } else if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_SYSTEM_FIRST)) {
 252                 /* try the system-level */
 253                 system_level = PMIX_INFO_TRUE(&info[n]);
 254             } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_PIDINFO)) {
 255                 pid = info[n].value.data.pid;
 256             } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_NSPACE)) {
 257                 if (NULL != server_nspace) {
 258                     /* they included it more than once */
 259                     if (0 == strcmp(server_nspace, info[n].value.data.string)) {
 260                         /* same value, so ignore it */
 261                         continue;
 262                     }
 263                     /* otherwise, we don't know which one to use */
 264                     rc = PMIX_ERR_BAD_PARAM;
 265                     goto cleanup;
 266                 }
 267                 server_nspace = strdup(info[n].value.data.string);
 268             } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_URI)) {
 269                 if (NULL != suri) {
 270                     /* they included it more than once */
 271                     if (0 == strcmp(suri, info[n].value.data.string)) {
 272                         /* same value, so ignore it */
 273                         continue;
 274                     }
 275                     /* otherwise, we don't know which one to use */
 276                     rc = PMIX_ERR_BAD_PARAM;
 277                     goto cleanup;
 278                 }
 279                 suri = strdup(info[n].value.data.string);
 280             } else if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_RETRY_DELAY)) {
 281                 mca_ptl_tcp_component.wait_to_connect = info[n].value.data.uint32;
 282             } else if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_MAX_RETRIES)) {
 283                 mca_ptl_tcp_component.max_retries = info[n].value.data.uint32;
 284             } else if (PMIX_CHECK_KEY(&info[n], PMIX_RECONNECT_SERVER)) {
 285                 reconnect = true;
 286             } else if (PMIX_CHECK_KEY(&info[n], PMIX_LAUNCHER_RENDEZVOUS_FILE)) {
 287                 if (NULL != rendfile) {
 288                     free(rendfile);
 289                 }
 290                 rendfile = strdup(info[n].value.data.string);
 291             } else {
 292                 /* need to pass this to server */
 293                 kv = PMIX_NEW(pmix_info_caddy_t);
 294                 kv->info = &info[n];
 295                 pmix_list_append(&ilist, &kv->super);
 296             }
 297         }
 298     }
 299     /* add our pid to the array */
 300     kv = PMIX_NEW(pmix_info_caddy_t);
 301     mypid = getpid();
 302     PMIX_INFO_LOAD(&mypidinfo, PMIX_PROC_PID, &mypid, PMIX_PID);
 303     kv->info = &mypidinfo;
 304     pmix_list_append(&ilist, &kv->super);
 305 
 306     /* if I am a launcher, tell them so */
 307     if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) {
 308         kv = PMIX_NEW(pmix_info_caddy_t);
 309         PMIX_INFO_LOAD(&launcher, PMIX_LAUNCHER, NULL, PMIX_BOOL);
 310         kv->info = &launcher;
 311         pmix_list_append(&ilist, &kv->super);
 312     }
 313 
 314     /* add our cmd line to the array */
 315 #if PMIX_HAVE_APPLE
 316     int mib[3], argmax, nargs, num;
 317     size_t size;
 318     char *procargs, *cp, *cptr;
 319     char **stack = NULL;
 320 
 321     /* Get the maximum process arguments size. */
 322     mib[0] = CTL_KERN;
 323     mib[1] = KERN_ARGMAX;
 324     size = sizeof(argmax);
 325 
 326     if (sysctl(mib, 2, &argmax, &size, NULL, 0) == -1) {
 327         fprintf(stderr, "sysctl() argmax failed\n");
 328         rc = PMIX_ERR_NO_PERMISSIONS;
 329         goto cleanup;
 330     }
 331 
 332     /* Allocate space for the arguments. */
 333     procargs = (char *)malloc(argmax);
 334     if (procargs == NULL) {
 335         rc = -1;
 336         goto cleanup;
 337     }
 338 
 339     /* Make a sysctl() call to get the raw argument space of the process. */
 340     mib[0] = CTL_KERN;
 341     mib[1] = KERN_PROCARGS2;
 342     mib[2] = getpid();
 343 
 344     size = (size_t)argmax;
 345 
 346     if (sysctl(mib, 3, procargs, &size, NULL, 0) == -1) {
 347         fprintf(stderr, "Lacked permissions\n");;
 348         rc = PMIX_ERR_NO_PERMISSIONS;
 349         goto cleanup;
 350     }
 351 
 352     memcpy(&nargs, procargs, sizeof(nargs));
 353     /* this points to the executable - skip over that to get the rest */
 354     cp = procargs + sizeof(nargs);
 355     cp += strlen(cp);
 356     /* this is the first argv */
 357     pmix_argv_append_nosize(&stack, cp);
 358     /* skip any embedded NULLs */
 359     while (cp < &procargs[size] && '\0' == *cp) {
 360         ++cp;
 361     }
 362     if (cp != &procargs[size]) {
 363         /* from this point, we have the argv separated by NULLs - split them out */
 364         cptr = cp;
 365         num = 0;
 366         while (cp < &procargs[size] && num < nargs) {
 367             if ('\0' == *cp) {
 368                 pmix_argv_append_nosize(&stack, cptr);
 369                 ++cp;  // skip over the NULL
 370                 cptr = cp;
 371                 ++num;
 372             } else {
 373                 ++cp;
 374             }
 375         }
 376     }
 377     p = pmix_argv_join(stack, ' ');
 378     pmix_argv_free(stack);
 379     free(procargs);
 380 #else
 381     char tmp[512];
 382     FILE *fp;
 383 
 384     /* open the pid's info file */
 385     snprintf(tmp, 512, "/proc/%lu/cmdline", (unsigned long)mypid);
 386     fp = fopen(tmp, "r");
 387     if (NULL != fp) {
 388         /* read the cmd line */
 389         fgets(tmp, 512, fp);
 390         fclose(fp);
 391         p = strdup(tmp);
 392     }
 393 #endif
 394     /* pass it along */
 395     kv = PMIX_NEW(pmix_info_caddy_t);
 396     PMIX_INFO_LOAD(&mycmdlineinfo, PMIX_CMD_LINE, p, PMIX_STRING);
 397     kv->info = &mycmdlineinfo;
 398     pmix_list_append(&ilist, &kv->super);
 399     free(p);
 400 
 401     /* if we need to pass anything, setup an array */
 402     if (0 < (niptr = pmix_list_get_size(&ilist))) {
 403         PMIX_INFO_CREATE(iptr, niptr);
 404         n = 0;
 405         while (NULL != (kv = (pmix_info_caddy_t*)pmix_list_remove_first(&ilist))) {
 406             PMIX_INFO_XFER(&iptr[n], kv->info);
 407             PMIX_RELEASE(kv);
 408             ++n;
 409         }
 410     }
 411     PMIX_LIST_DESTRUCT(&ilist);
 412 
 413     if (NULL == suri && !reconnect && NULL != mca_ptl_tcp_component.super.uri) {
 414         suri = strdup(mca_ptl_tcp_component.super.uri);
 415     }
 416 
 417     /* mark that we are using the V2 protocol */
 418     pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2;
 419     gethostname(myhost, sizeof(myhost));
 420     /* if we were given a URI via MCA param, then look no further */
 421     if (NULL != suri) {
 422         /* if the string starts with "file:", then they are pointing
 423          * us to a file we need to read to get the URI itself */
 424         if (0 == strncmp(suri, "file:", 5)) {
 425             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 426                                 "ptl:tcp:tool getting connection info from %s", suri);
 427             nspace = NULL;
 428             rc = parse_uri_file(&suri[5], &suri2, &nspace, &rank);
 429             if (PMIX_SUCCESS != rc) {
 430                 rc = PMIX_ERR_UNREACH;
 431                 goto cleanup;
 432             }
 433             free(suri);
 434             suri = suri2;
 435         } else {
 436             /* we need to extract the nspace/rank of the server from the string */
 437             p = strchr(suri, ';');
 438             if (NULL == p) {
 439                 rc = PMIX_ERR_BAD_PARAM;
 440                 goto cleanup;
 441             }
 442             *p = '\0';
 443             p++;
 444             suri2 = strdup(p); // save the uri portion
 445             /* the '.' in the first part of the original string separates
 446              * nspace from rank */
 447             p = strchr(suri, '.');
 448             if (NULL == p) {
 449                 free(suri2);
 450                 rc = PMIX_ERR_BAD_PARAM;
 451                 goto cleanup;
 452             }
 453             *p = '\0';
 454             p++;
 455             nspace = strdup(suri);
 456             rank = strtoull(p, NULL, 10);
 457             free(suri);
 458             suri = suri2;
 459             /* now update the URI */
 460         }
 461         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 462                             "ptl:tcp:tool attempt connect using given URI %s", suri);
 463         /* go ahead and try to connect */
 464         if (PMIX_SUCCESS != (rc = try_connect(suri, &sd, iptr, niptr))) {
 465             goto cleanup;
 466         }
 467         /* cleanup */
 468         goto complete;
 469     }
 470 
 471     /* if they gave us a rendezvous file, use it */
 472     if (NULL != rendfile) {
 473         /* try to read the file */
 474         rc = parse_uri_file(rendfile, &suri, &nspace, &rank);
 475         free(rendfile);
 476         rendfile = NULL;
 477         if (PMIX_SUCCESS == rc) {
 478             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 479                                 "ptl:tcp:tool attempt connect to system server at %s", suri);
 480             /* go ahead and try to connect */
 481             if (PMIX_SUCCESS == try_connect(suri, &sd, iptr, niptr)) {
 482                 /* don't free nspace - we will use it below */
 483                 if (NULL != iptr) {
 484                     PMIX_INFO_FREE(iptr, niptr);
 485                 }
 486                 goto complete;
 487             }
 488         }
 489         /* since they gave us a specific rendfile and we couldn't
 490          * connect to it, return an error */
 491         rc = PMIX_ERR_UNREACH;
 492         goto cleanup;
 493     }
 494 
 495     /* if they asked for system-level first or only, we start there */
 496     if (system_level || system_level_only) {
 497         if (0 > asprintf(&filename, "%s/pmix.sys.%s", mca_ptl_tcp_component.system_tmpdir, myhost)) {
 498             rc = PMIX_ERR_NOMEM;
 499             goto cleanup;
 500         }
 501         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 502                             "ptl:tcp:tool looking for system server at %s",
 503                             filename);
 504         /* try to read the file */
 505         rc = parse_uri_file(filename, &suri, &nspace, &rank);
 506         free(filename);
 507         if (PMIX_SUCCESS == rc) {
 508             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 509                                 "ptl:tcp:tool attempt connect to system server at %s", suri);
 510             /* go ahead and try to connect */
 511             if (PMIX_SUCCESS == try_connect(suri, &sd, iptr, niptr)) {
 512                 /* don't free nspace - we will use it below */
 513                 goto complete;
 514             }
 515             free(nspace);
 516             nspace = NULL;
 517         }
 518     }
 519 
 520     /* we get here if they either didn't ask for a system-level connection,
 521      * or they asked for it and it didn't succeed. If they _only_ wanted
 522      * a system-level connection, then we are done */
 523     if (system_level_only) {
 524         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 525                             "ptl:tcp: connecting to system failed");
 526         rc = PMIX_ERR_UNREACH;
 527         goto cleanup;
 528     }
 529 
 530     /* if they gave us a pid, then look for it */
 531     if (0 != pid) {
 532         if (0 > asprintf(&filename, "pmix.%s.tool.%d", myhost, pid)) {
 533             rc = PMIX_ERR_NOMEM;
 534             goto cleanup;
 535         }
 536         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 537                             "ptl:tcp:tool searching for given session server %s",
 538                             filename);
 539         nspace = NULL;
 540         rc = df_search(mca_ptl_tcp_component.system_tmpdir,
 541                        filename, iptr, niptr, &sd, &nspace, &rank, &suri);
 542         free(filename);
 543         if (PMIX_SUCCESS == rc) {
 544             goto complete;
 545         }
 546         /* since they gave us a specific pid and we couldn't
 547          * connect to it, return an error */
 548         rc = PMIX_ERR_UNREACH;
 549         goto cleanup;
 550     }
 551 
 552     /* if they gave us an nspace, then look for it */
 553     if (NULL != server_nspace) {
 554         if (0 > asprintf(&filename, "pmix.%s.tool.%s", myhost, server_nspace)) {
 555             rc = PMIX_ERR_NOMEM;
 556             goto cleanup;
 557         }
 558         free(server_nspace);
 559         server_nspace = NULL;
 560         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 561                             "ptl:tcp:tool searching for given session server %s",
 562                             filename);
 563         nspace = NULL;
 564         rc = df_search(mca_ptl_tcp_component.system_tmpdir,
 565                        filename, iptr, niptr, &sd, &nspace, &rank, &suri);
 566         free(filename);
 567         if (PMIX_SUCCESS == rc) {
 568             goto complete;
 569         }
 570         /* since they gave us a specific nspace and we couldn't
 571          * connect to it, return an error */
 572         rc = PMIX_ERR_UNREACH;
 573         goto cleanup;
 574     }
 575 
 576     /* they didn't give us a pid, so we will search to see what session-level
 577      * tools are available to this user. We will take the first connection
 578      * that succeeds - this is based on the likelihood that there is only
 579      * one session per user on a node */
 580 
 581     if (0 > asprintf(&filename, "pmix.%s.tool", myhost)) {
 582         rc = PMIX_ERR_NOMEM;
 583         goto cleanup;
 584     }
 585     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 586                         "ptl:tcp:tool searching for session server %s",
 587                         filename);
 588     nspace = NULL;
 589     rc = df_search(mca_ptl_tcp_component.system_tmpdir,
 590                    filename, iptr, niptr, &sd, &nspace, &rank, &suri);
 591     free(filename);
 592     if (PMIX_SUCCESS != rc) {
 593         rc = PMIX_ERR_UNREACH;
 594         goto cleanup;
 595     }
 596 
 597   complete:
 598     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 599                         "tcp_peer_try_connect: Connection across to server succeeded");
 600 
 601     /* do a final bozo check */
 602     if (NULL == nspace || PMIX_RANK_WILDCARD == rank) {
 603         CLOSE_THE_SOCKET(sd);
 604         rc = PMIX_ERR_UNREACH;
 605         goto cleanup;
 606     }
 607     /* mark the connection as made */
 608     pmix_globals.connected = true;
 609     pmix_client_globals.myserver->sd = sd;
 610 
 611     /* tools setup their server info in try_connect because they
 612      * utilize a broader handshake */
 613     if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
 614         /* setup the server info */
 615         if (NULL == pmix_client_globals.myserver->info) {
 616             pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
 617         }
 618         if (NULL == pmix_client_globals.myserver->nptr) {
 619             pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t);
 620         }
 621         if (NULL != pmix_client_globals.myserver->nptr->nspace) {
 622             free(pmix_client_globals.myserver->nptr->nspace);
 623         }
 624         pmix_client_globals.myserver->nptr->nspace = strdup(nspace);
 625 
 626         if (NULL != pmix_client_globals.myserver->info->pname.nspace) {
 627             free(pmix_client_globals.myserver->info->pname.nspace);
 628         }
 629         pmix_client_globals.myserver->info->pname.nspace = strdup(pmix_client_globals.myserver->nptr->nspace);
 630         pmix_client_globals.myserver->info->pname.rank = rank;
 631     }
 632     /* store the URI for subsequent lookups */
 633     urikv = PMIX_NEW(pmix_kval_t);
 634     urikv->key = strdup(PMIX_SERVER_URI);
 635     PMIX_VALUE_CREATE(urikv->value, 1);
 636     urikv->value->type = PMIX_STRING;
 637     asprintf(&urikv->value->data.string, "%s.%u;%s", nspace, rank, suri);
 638     PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer,
 639                       &pmix_globals.myid, PMIX_INTERNAL,
 640                       urikv);
 641     PMIX_RELEASE(urikv);  // maintain accounting
 642 
 643     pmix_ptl_base_set_nonblocking(sd);
 644 
 645     /* setup recv event */
 646     pmix_event_assign(&pmix_client_globals.myserver->recv_event,
 647                       pmix_globals.evbase,
 648                       pmix_client_globals.myserver->sd,
 649                       EV_READ | EV_PERSIST,
 650                       pmix_ptl_base_recv_handler, pmix_client_globals.myserver);
 651     pmix_client_globals.myserver->recv_ev_active = true;
 652     PMIX_POST_OBJECT(pmix_client_globals.myserver);
 653     pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);
 654 
 655     /* setup send event */
 656     pmix_event_assign(&pmix_client_globals.myserver->send_event,
 657                       pmix_globals.evbase,
 658                       pmix_client_globals.myserver->sd,
 659                       EV_WRITE|EV_PERSIST,
 660                       pmix_ptl_base_send_handler, pmix_client_globals.myserver);
 661     pmix_client_globals.myserver->send_ev_active = false;
 662 
 663   cleanup:
 664     if (NULL != nspace) {
 665         free(nspace);
 666     }
 667     if (NULL != iptr) {
 668         PMIX_INFO_FREE(iptr, niptr);
 669     }
 670     if (NULL != rendfile) {
 671         free(rendfile);
 672     }
 673     if (NULL != suri) {
 674         free(suri);
 675     }
 676     if (NULL != server_nspace) {
 677         free(server_nspace);
 678     }
 679     return rc;
 680 }
 681 
 682 static pmix_status_t send_recv(struct pmix_peer_t *peer,
 683                                pmix_buffer_t *bfr,
 684                                pmix_ptl_cbfunc_t cbfunc,
 685                                void *cbdata)
 686 {
 687     pmix_ptl_sr_t *ms;
 688     pmix_peer_t *pr = (pmix_peer_t*)peer;
 689 
 690     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 691                         "[%s:%d] post send to server",
 692                         __FILE__, __LINE__);
 693 
 694     ms = PMIX_NEW(pmix_ptl_sr_t);
 695     PMIX_RETAIN(pr);
 696     ms->peer = pr;
 697     ms->bfr = bfr;
 698     ms->cbfunc = cbfunc;
 699     ms->cbdata = cbdata;
 700     PMIX_THREADSHIFT(ms, pmix_ptl_base_send_recv);
 701     return PMIX_SUCCESS;
 702 }
 703 
 704 static pmix_status_t send_oneway(struct pmix_peer_t *peer,
 705                                  pmix_buffer_t *bfr,
 706                                  pmix_ptl_tag_t tag)
 707 {
 708     pmix_ptl_queue_t *q;
 709     pmix_peer_t *pr = (pmix_peer_t*)peer;
 710 
 711     /* we have to transfer this to an event for thread
 712      * safety as we need to post this message on the
 713      * peer's send queue */
 714     q = PMIX_NEW(pmix_ptl_queue_t);
 715     PMIX_RETAIN(pr);
 716     q->peer = pr;
 717     q->buf = bfr;
 718     q->tag = tag;
 719     PMIX_THREADSHIFT(q, pmix_ptl_base_send);
 720     return PMIX_SUCCESS;
 721 }
 722 
 723 static void timeout(int sd, short args, void *cbdata)
 724 {
 725     pmix_lock_t *lock = (pmix_lock_t*)cbdata;
 726     PMIX_WAKEUP_THREAD(lock);
 727 }
 728 
 729 /****    SUPPORTING FUNCTIONS    ****/
 730 static pmix_status_t parse_uri_file(char *filename,
 731                                     char **uri,
 732                                     char **nspace,
 733                                     pmix_rank_t *rank)
 734 {
 735     FILE *fp;
 736     char *srvr, *p, *p2;
 737     pmix_lock_t lock;
 738     pmix_event_t ev;
 739     struct timeval tv;
 740     int retries;
 741     int major;
 742 
 743     fp = fopen(filename, "r");
 744     if (NULL == fp) {
 745         /* if we cannot open the file, then the server must not
 746          * be configured to support tool connections, or this
 747          * user isn't authorized to access it - or it may just
 748          * not exist yet! Check for existence */
 749         if (0 != access(filename, R_OK)) {
 750             if (ENOENT == errno && 0 < mca_ptl_tcp_component.wait_to_connect) {
 751                 /* the file does not exist, so give it
 752                  * a little time to see if the server
 753                  * is still starting up */
 754                 retries = 0;
 755                 do {
 756                     ++retries;
 757                     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 758                                         "WAITING FOR CONNECTION FILE");
 759                     PMIX_CONSTRUCT_LOCK(&lock);
 760                     tv.tv_sec = mca_ptl_tcp_component.wait_to_connect;
 761                     tv.tv_usec = 0;
 762                     pmix_event_evtimer_set(pmix_globals.evbase, &ev,
 763                                            timeout, &lock);
 764                     pmix_event_evtimer_add(&ev, &tv);
 765                     PMIX_WAIT_THREAD(&lock);
 766                     PMIX_DESTRUCT_LOCK(&lock);
 767                     fp = fopen(filename, "r");
 768                     if (NULL != fp) {
 769                         /* we found it! */
 770                         goto process;
 771                     }
 772                 } while (retries < mca_ptl_tcp_component.max_retries);
 773                 /* otherwise, mark it as unreachable */
 774             }
 775         }
 776         return PMIX_ERR_UNREACH;
 777     }
 778 
 779   process:
 780     /* get the URI */
 781     srvr = pmix_getline(fp);
 782     if (NULL == srvr) {
 783         PMIX_ERROR_LOG(PMIX_ERR_FILE_READ_FAILURE);
 784         fclose(fp);
 785         return PMIX_ERR_UNREACH;
 786     }
 787     /* see if this file contains the server's version */
 788     p2 = pmix_getline(fp);
 789     if (NULL == p2) {
 790         pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20;
 791         pmix_client_globals.myserver->protocol = PMIX_PROTOCOL_V2;
 792         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 793                             "V20 SERVER DETECTED");
 794     } else {
 795         /* convert the version to a number */
 796         if ('v' == p2[0]) {
 797             major = strtoul(&p2[1], NULL, 10);
 798         } else {
 799             major = strtoul(p2, NULL, 10);
 800         }
 801         if (2 == major) {
 802             pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21;
 803             pmix_client_globals.myserver->protocol = PMIX_PROTOCOL_V2;
 804             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 805                                 "V21 SERVER DETECTED");
 806         } else if (3 <= major) {
 807             pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V3;
 808             pmix_client_globals.myserver->protocol = PMIX_PROTOCOL_V2;
 809             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 810                                 "V3 SERVER DETECTED");
 811         }
 812     }
 813     if (NULL != p2) {
 814         free(p2);
 815     }
 816 
 817     fclose(fp);
 818     /* up to the first ';' is the server nspace/rank */
 819     if (NULL == (p = strchr(srvr, ';'))) {
 820         /* malformed */
 821         free(srvr);
 822         return PMIX_ERR_UNREACH;
 823     }
 824     *p = '\0';
 825     ++p;  // move past the semicolon
 826     /* the nspace is the section up to the '.' */
 827     if (NULL == (p2 = strchr(srvr, '.'))) {
 828         /* malformed */
 829         free(srvr);
 830         return PMIX_ERR_UNREACH;
 831     }
 832     *p2 = '\0';
 833     ++p2;
 834     /* set the server nspace/rank */
 835     *nspace = strdup(srvr);
 836     *rank = strtoull(p2, NULL, 10);
 837 
 838     /* now parse the uri itself */
 839     *uri = strdup(p);
 840     free(srvr);
 841 
 842     return PMIX_SUCCESS;
 843 }
 844 
 845 static pmix_status_t try_connect(char *uri, int *sd, pmix_info_t iptr[], size_t niptr)
 846 {
 847     char *p, *p2, *host;
 848     struct sockaddr_in *in;
 849     struct sockaddr_in6 *in6;
 850     size_t len;
 851     pmix_status_t rc;
 852     int retries = 0;
 853     uint8_t myflag;
 854 
 855     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 856                         "pmix:tcp try connect to %s", uri);
 857 
 858     /* mark that we are the active module for this server */
 859     pmix_client_globals.myserver->nptr->compat.ptl = &pmix_ptl_tcp_module;
 860 
 861     /* setup the path to the daemon rendezvous point */
 862     memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage));
 863     if (0 == strncmp(uri, "tcp4", 4)) {
 864         /* need to skip the tcp4: part */
 865         p = strdup(&uri[7]);
 866         if (NULL == p) {
 867             PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
 868             return PMIX_ERR_NOMEM;
 869         }
 870 
 871         /* separate the IP address from the port */
 872         p2 = strchr(p, ':');
 873         if (NULL == p2) {
 874             free(p);
 875             PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
 876             return PMIX_ERR_BAD_PARAM;
 877         }
 878         *p2 = '\0';
 879         p2++;
 880         host = p;
 881         /* load the address */
 882         in = (struct sockaddr_in*)&mca_ptl_tcp_component.connection;
 883         in->sin_family = AF_INET;
 884         in->sin_addr.s_addr = inet_addr(host);
 885         if (in->sin_addr.s_addr == INADDR_NONE) {
 886             free(p);
 887             PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
 888             return PMIX_ERR_BAD_PARAM;
 889         }
 890         in->sin_port = htons(atoi(p2));
 891         len = sizeof(struct sockaddr_in);
 892     } else {
 893         /* need to skip the tcp6: part */
 894         p = strdup(&uri[7]);
 895         if (NULL == p) {
 896             PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
 897             return PMIX_ERR_NOMEM;
 898         }
 899 
 900         p2 = strchr(p, ':');
 901         if (NULL == p2) {
 902             free(p);
 903             PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
 904             return PMIX_ERR_BAD_PARAM;
 905         }
 906         *p2 = '\0';
 907         if (']' == p[strlen(p)-1]) {
 908             p[strlen(p)-1] = '\0';
 909         }
 910         if ('[' == p[0]) {
 911             host = &p[1];
 912         } else {
 913             host = &p[0];
 914         }
 915         /* load the address */
 916         in6 = (struct sockaddr_in6*)&mca_ptl_tcp_component.connection;
 917         in6->sin6_family = AF_INET6;
 918         if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) {
 919             pmix_output (0, "ptl_tcp_parse_uri: Could not convert %s\n", host);
 920             free(p);
 921             PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
 922             return PMIX_ERR_BAD_PARAM;
 923         }
 924         in6->sin6_port = htons(atoi(p2));
 925         len = sizeof(struct sockaddr_in6);
 926     }
 927     free(p);
 928 
 929   retry:
 930     /* establish the connection */
 931     if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_tcp_component.connection, len, sd))) {
 932         /* do not error log - might just be a stale connection point */
 933         return rc;
 934     }
 935 
 936     /* send our identity and any authentication credentials to the server */
 937     if (PMIX_SUCCESS != (rc = send_connect_ack(*sd, &myflag, iptr, niptr))) {
 938         PMIX_ERROR_LOG(rc);
 939         CLOSE_THE_SOCKET(*sd);
 940         return rc;
 941     }
 942 
 943     /* do whatever handshake is required */
 944     if (PMIX_SUCCESS != (rc = recv_connect_ack(*sd, myflag))) {
 945         CLOSE_THE_SOCKET(*sd);
 946         if (PMIX_ERR_TEMP_UNAVAILABLE == rc) {
 947             ++retries;
 948             if( retries < mca_ptl_tcp_component.handshake_max_retries ) {
 949                 goto retry;
 950             }
 951         }
 952         return rc;
 953     }
 954 
 955     return PMIX_SUCCESS;
 956 }
 957 static pmix_status_t send_connect_ack(int sd, uint8_t *myflag,
 958                                       pmix_info_t iptr[], size_t niptr)
 959 {
 960     char *msg;
 961     pmix_ptl_hdr_t hdr;
 962     size_t sdsize=0, csize=0;
 963     pmix_byte_object_t cred;
 964     char *sec, *bfrops, *gds;
 965     pmix_bfrop_buffer_type_t bftype;
 966     pmix_status_t rc;
 967     uint8_t flag;
 968     uid_t euid;
 969     gid_t egid;
 970     uint32_t u32;
 971     pmix_buffer_t buf;
 972 
 973     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
 974                         "pmix:tcp SEND CONNECT ACK");
 975 
 976     /* if we are a server, then we shouldn't be here */
 977     if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) &&
 978         !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) {
 979         PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED);
 980         return PMIX_ERR_NOT_SUPPORTED;
 981     }
 982 
 983     /* setup the header */
 984     memset(&hdr, 0, sizeof(pmix_ptl_hdr_t));
 985     hdr.pindex = -1;
 986     hdr.tag = UINT32_MAX;
 987 
 988     /* a security module was assigned to us during rte_init based
 989      * on a list of available security modules provided by our
 990      * local PMIx server, if known. Now use that module to
 991      * get a credential, if the security system provides one. Not
 992      * every psec module will do so, thus we must first check */
 993     PMIX_BYTE_OBJECT_CONSTRUCT(&cred);
 994     PMIX_PSEC_CREATE_CRED(rc, pmix_globals.mypeer,
 995                           NULL, 0, NULL, 0, &cred);
 996     if (PMIX_SUCCESS != rc) {
 997         return rc;
 998     }
 999 
1000     /* allow space for a marker indicating client vs tool */
1001     sdsize = 1;
1002 
1003     /* Defined marker values:
1004      *
1005      * 0 => simple client process
1006      * 1 => legacy tool - may or may not have an identifier
1007      * 2 => legacy launcher - may or may not have an identifier
1008      * ------------------------------------------
1009      * 3 => self-started tool process that needs an identifier
1010      * 4 => self-started tool process that was given an identifier by caller
1011      * 5 => tool that was started by a PMIx server - identifier specified by server
1012      * 6 => self-started launcher that needs an identifier
1013      * 7 => self-started launcher that was given an identifier by caller
1014      * 8 => launcher that was started by a PMIx server - identifier specified by server
1015      */
1016     if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) {
1017         if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
1018             /* if we are both launcher and client, then we need
1019              * to tell the server we are both */
1020             flag = 8;
1021             /* add space for our uid/gid for ACL purposes */
1022             sdsize += 2*sizeof(uint32_t);
1023             /* add space for our identifier */
1024             sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t);
1025         } else {
1026             /* add space for our uid/gid for ACL purposes */
1027             sdsize += 2*sizeof(uint32_t);
1028             /* if they gave us an identifier, we need to pass it */
1029             if (0 < strlen(pmix_globals.myid.nspace) &&
1030                 PMIX_RANK_INVALID != pmix_globals.myid.rank) {
1031                 flag = 7;
1032                 sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t);
1033             } else {
1034                 flag = 6;
1035             }
1036         }
1037 
1038     } else if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer) &&
1039                !PMIX_PROC_IS_TOOL(pmix_globals.mypeer)) {
1040         /* we are a simple client */
1041         flag = 0;
1042         /* reserve space for our nspace and rank info */
1043         sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t);
1044 
1045     } else {  // must be a tool of some sort
1046         /* add space for our uid/gid for ACL purposes */
1047         sdsize += 2*sizeof(uint32_t);
1048         if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
1049             /* if we are both tool and client, then we need
1050              * to tell the server we are both */
1051             flag = 5;
1052             /* add space for our identifier */
1053             sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t);
1054         } else if (0 < strlen(pmix_globals.myid.nspace) &&
1055             PMIX_RANK_INVALID != pmix_globals.myid.rank) {
1056             /* we were given an identifier by the caller, pass it */
1057             sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t);
1058             flag = 4;
1059         } else {
1060             /* we are a self-started tool that needs an identifier */
1061             flag = 3;
1062         }
1063     }
1064     *myflag = flag;
1065 
1066     /* add the name of our active sec module - we selected it
1067      * in pmix_client.c prior to entering here */
1068     sec = pmix_globals.mypeer->nptr->compat.psec->name;
1069 
1070     /* add our active bfrops module name */
1071     bfrops = pmix_globals.mypeer->nptr->compat.bfrops->version;
1072     /* and the type of buffer we are using */
1073     bftype = pmix_globals.mypeer->nptr->compat.type;
1074 
1075     /* add our active gds module for working with the server */
1076     gds = (char*)pmix_client_globals.myserver->nptr->compat.gds->name;
1077 
1078     /* if we were given info structs to pass to the server, pack them */
1079     PMIX_CONSTRUCT(&buf, pmix_buffer_t);
1080     if (NULL != iptr) {
1081         PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &buf, &niptr, 1, PMIX_SIZE);
1082         PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &buf, iptr, niptr, PMIX_INFO);
1083     }
1084 
1085     /* set the number of bytes to be read beyond the header - must
1086      * NULL terminate the strings! */
1087     hdr.nbytes = sdsize + strlen(PMIX_VERSION) + 1 + strlen(sec) + 1 \
1088                 + strlen(bfrops) + 1 + sizeof(bftype) \
1089                 + strlen(gds) + 1 + sizeof(uint32_t) + cred.size \
1090                 + buf.bytes_used;
1091 
1092     /* create a space for our message */
1093     sdsize = (sizeof(hdr) + hdr.nbytes);
1094     if (NULL == (msg = (char*)malloc(sdsize))) {
1095         PMIX_BYTE_OBJECT_DESTRUCT(&cred);
1096         free(sec);
1097         PMIX_DESTRUCT(&buf);
1098         return PMIX_ERR_OUT_OF_RESOURCE;
1099     }
1100     memset(msg, 0, sdsize);
1101 
1102     /* load the message */
1103     csize=0;
1104     memcpy(msg, &hdr, sizeof(pmix_ptl_hdr_t));
1105     csize += sizeof(pmix_ptl_hdr_t);
1106 
1107     /* provide our active psec module */
1108     memcpy(msg+csize, sec, strlen(sec));
1109     csize += strlen(sec)+1;
1110 
1111     /* load the length of the credential - we put this in uint32_t
1112      * format as that is a fixed size, and convert to network
1113      * byte order for heterogeneity */
1114     u32 = htonl((uint32_t)cred.size);
1115     memcpy(msg+csize, &u32, sizeof(uint32_t));
1116     csize += sizeof(uint32_t);
1117     /* load the credential */
1118     if (0 < u32) {
1119         memcpy(msg+csize, cred.bytes, cred.size);
1120         csize += cred.size;
1121     }
1122     PMIX_BYTE_OBJECT_DESTRUCT(&cred);
1123 
1124     /* load our process type - this is a single byte,
1125      * so no worry about heterogeneity here */
1126     memcpy(msg+csize, &flag, 1);
1127     csize += 1;
1128 
1129     if (0 == flag) {
1130         /* if we are a client, provide our nspace/rank */
1131         memcpy(msg+csize, pmix_globals.myid.nspace, strlen(pmix_globals.myid.nspace));
1132         csize += strlen(pmix_globals.myid.nspace)+1;
1133         /* again, need to convert */
1134         u32 = htonl((uint32_t)pmix_globals.myid.rank);
1135         memcpy(msg+csize, &u32, sizeof(uint32_t));
1136         csize += sizeof(uint32_t);
1137     } else if (3 == flag || 6 == flag) {
1138         /* we are a tool or launcher that needs an identifier - add our ACLs */
1139         euid = geteuid();
1140         u32 = htonl(euid);
1141         memcpy(msg+csize, &u32, sizeof(uint32_t));
1142         csize += sizeof(uint32_t);
1143         egid = getegid();
1144         u32 = htonl(egid);
1145         memcpy(msg+csize, &u32, sizeof(uint32_t));
1146         csize += sizeof(uint32_t);
1147     } else if (4 == flag || 5 == flag || 7 == flag || 8 == flag) {
1148         /* we are a tool or launcher that has an identifier - start with our ACLs */
1149         euid = geteuid();
1150         u32 = htonl(euid);
1151         memcpy(msg+csize, &u32, sizeof(uint32_t));
1152         csize += sizeof(uint32_t);
1153         egid = getegid();
1154         u32 = htonl(egid);
1155         memcpy(msg+csize, &u32, sizeof(uint32_t));
1156         csize += sizeof(uint32_t);
1157         /* now add our identifier */
1158         memcpy(msg+csize, pmix_globals.myid.nspace, strlen(pmix_globals.myid.nspace));
1159         csize += strlen(pmix_globals.myid.nspace)+1;
1160         /* again, need to convert */
1161         u32 = htonl((uint32_t)pmix_globals.myid.rank);
1162         memcpy(msg+csize, &u32, sizeof(uint32_t));
1163         csize += sizeof(uint32_t);
1164     } else {
1165         /* not a valid flag */
1166         PMIX_DESTRUCT(&buf);
1167         return PMIX_ERR_NOT_SUPPORTED;
1168     }
1169 
1170     /* provide our version */
1171     memcpy(msg+csize, PMIX_VERSION, strlen(PMIX_VERSION));
1172     csize += strlen(PMIX_VERSION)+1;
1173 
1174     /* provide our active bfrops module */
1175     memcpy(msg+csize, bfrops, strlen(bfrops));
1176     csize += strlen(bfrops)+1;
1177 
1178     /* provide the bfrops type */
1179     memcpy(msg+csize, &bftype, sizeof(bftype));
1180     csize += sizeof(bftype);
1181 
1182     /* provide the gds module */
1183     memcpy(msg+csize, gds, strlen(gds));
1184     csize += strlen(gds)+1;
1185 
1186     /* provide the info struct bytes */
1187     memcpy(msg+csize, buf.base_ptr, buf.bytes_used);
1188     csize += buf.bytes_used;
1189 
1190     /* send the entire message across */
1191     if (PMIX_SUCCESS != pmix_ptl_base_send_blocking(sd, msg, sdsize)) {
1192         free(msg);
1193         PMIX_DESTRUCT(&buf);
1194         return PMIX_ERR_UNREACH;
1195     }
1196     free(msg);
1197     PMIX_DESTRUCT(&buf);
1198     return PMIX_SUCCESS;
1199 }
1200 
1201 /* we receive a connection acknowledgement from the server,
1202  * consisting of nothing more than a status report. If success,
1203  * then we initiate authentication method */
1204 static pmix_status_t recv_connect_ack(int sd, uint8_t myflag)
1205 {
1206     pmix_status_t reply;
1207     pmix_status_t rc;
1208     struct timeval tv, save;
1209     pmix_socklen_t sz;
1210     bool sockopt = true;
1211     pmix_nspace_t nspace;
1212     uint32_t u32;
1213 
1214     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1215                         "pmix: RECV CONNECT ACK FROM SERVER");
1216 
1217     /* get the current timeout value so we can reset to it */
1218     sz = sizeof(save);
1219     if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz)) {
1220         if (ENOPROTOOPT == errno || EOPNOTSUPP == errno) {
1221             sockopt = false;
1222         } else {
1223            return PMIX_ERR_UNREACH;
1224        }
1225    } else {
1226         /* set a timeout on the blocking recv so we don't hang */
1227         tv.tv_sec  = mca_ptl_tcp_component.handshake_wait_time;
1228         tv.tv_usec = 0;
1229         if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
1230             if (ENOPROTOOPT == errno || EOPNOTSUPP == errno) {
1231                 sockopt = false;
1232             } else {
1233                 pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1234                                     "pmix: recv_connect_ack could not setsockopt SO_RCVTIMEO");
1235                 return PMIX_ERR_UNREACH;
1236             }
1237         }
1238     }
1239 
1240     /* receive the status reply */
1241     rc = pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(uint32_t));
1242     if (PMIX_SUCCESS != rc) {
1243         if (sockopt) {
1244             /* return the socket to normal */
1245             if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
1246                 return PMIX_ERR_UNREACH;
1247             }
1248         }
1249         return rc;
1250     }
1251     reply = ntohl(u32);
1252 
1253     if (0 == myflag) {
1254         /* see if they want us to do the handshake */
1255         if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
1256             PMIX_PSEC_CLIENT_HANDSHAKE(rc, pmix_client_globals.myserver, sd);
1257             if (PMIX_SUCCESS != rc) {
1258                 return rc;
1259             }
1260         } else if (PMIX_SUCCESS != reply) {
1261             return reply;
1262         }
1263         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1264                             "pmix: RECV CONNECT CONFIRMATION");
1265 
1266         /* receive our index into the server's client array */
1267         rc = pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(uint32_t));
1268         if (PMIX_SUCCESS != rc) {
1269             return rc;
1270         }
1271         pmix_globals.pindex = ntohl(u32);
1272     } else {  // we are a tool
1273         /* if the status indicates an error, then we are done */
1274         if (PMIX_SUCCESS != reply) {
1275             return reply;
1276         }
1277         /* if we needed an identifier, recv it */
1278         if (3 == myflag || 6 == myflag) {
1279             /* first the nspace */
1280             rc = pmix_ptl_base_recv_blocking(sd, (char*)&nspace, PMIX_MAX_NSLEN+1);
1281             if (PMIX_SUCCESS != rc) {
1282                 return rc;
1283             }
1284             PMIX_LOAD_NSPACE(pmix_globals.myid.nspace, nspace);
1285             /* now the rank */
1286             rc = pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(uint32_t));
1287             if (PMIX_SUCCESS != rc) {
1288                 return rc;
1289             }
1290             /* convert and store */
1291             pmix_globals.myid.rank = htonl(u32);
1292         }
1293 
1294         /* get the server's nspace and rank so we can send to it */
1295         if (NULL == pmix_client_globals.myserver->info) {
1296             pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
1297         }
1298         if (NULL == pmix_client_globals.myserver->nptr) {
1299             pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t);
1300         }
1301         pmix_ptl_base_recv_blocking(sd, (char*)nspace, PMIX_MAX_NSLEN+1);
1302         if (NULL != pmix_client_globals.myserver->nptr->nspace) {
1303             free(pmix_client_globals.myserver->nptr->nspace);
1304         }
1305         pmix_client_globals.myserver->nptr->nspace = strdup(nspace);
1306         if (NULL != pmix_client_globals.myserver->info->pname.nspace) {
1307             free(pmix_client_globals.myserver->info->pname.nspace);
1308         }
1309         pmix_client_globals.myserver->info->pname.nspace = strdup(nspace);
1310         pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(uint32_t));
1311         pmix_client_globals.myserver->info->pname.rank = htonl(u32);
1312 
1313         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1314                             "pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d",
1315                             pmix_globals.myid.nspace, pmix_globals.myid.rank,
1316                             pmix_client_globals.myserver->info->pname.nspace,
1317                             pmix_client_globals.myserver->info->pname.rank);
1318 
1319         /* get the returned status from the security handshake */
1320         pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t));
1321         if (PMIX_SUCCESS != reply) {
1322             /* see if they want us to do the handshake */
1323             if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
1324                 PMIX_PSEC_CLIENT_HANDSHAKE(reply, pmix_client_globals.myserver, sd);
1325                 if (PMIX_SUCCESS != reply) {
1326                     return reply;
1327                 }
1328                 /* if the handshake succeeded, then fall thru to the next step */
1329             } else {
1330                 return reply;
1331             }
1332         }
1333     }
1334 
1335     if (sockopt) {
1336         if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
1337             return PMIX_ERR_UNREACH;
1338         }
1339     }
1340 
1341     return PMIX_SUCCESS;
1342 }
1343 
1344 static pmix_status_t df_search(char *dirname, char *prefix,
1345                                pmix_info_t info[], size_t ninfo,
1346                                int *sd, char **nspace,
1347                                pmix_rank_t *rank, char **uri)
1348 {
1349     char *suri, *nsp, *newdir;
1350     pmix_rank_t rk;
1351     pmix_status_t rc;
1352     struct stat buf;
1353     DIR *cur_dirp;
1354     struct dirent *dir_entry;
1355 
1356     if (NULL == (cur_dirp = opendir(dirname))) {
1357         return PMIX_ERR_NOT_FOUND;
1358     }
1359 
1360     pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1361                         "pmix:tcp: searching directory %s", dirname);
1362 
1363     /* search the entries for something that starts with the provided prefix */
1364     while (NULL != (dir_entry = readdir(cur_dirp))) {
1365         /* ignore the . and .. entries */
1366         if (0 == strcmp(dir_entry->d_name, ".") ||
1367             0 == strcmp(dir_entry->d_name, "..")) {
1368             continue;
1369         }
1370         newdir = pmix_os_path(false, dirname, dir_entry->d_name, NULL);
1371         if (-1 == stat(newdir, &buf)) {
1372             free(newdir);
1373             continue;
1374         }
1375         /* if it is a directory, down search */
1376         if (S_ISDIR(buf.st_mode)) {
1377             rc = df_search(newdir, prefix, info, ninfo, sd, nspace, rank, uri);
1378             free(newdir);
1379             if (PMIX_SUCCESS == rc) {
1380                 closedir(cur_dirp);
1381                 return rc;
1382             }
1383             continue;
1384         }
1385         pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1386                             "pmix:tcp: checking %s vs %s", dir_entry->d_name, prefix);
1387         /* see if it starts with our prefix */
1388         if (0 == strncmp(dir_entry->d_name, prefix, strlen(prefix))) {
1389             /* try to read this file */
1390             pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1391                                 "pmix:tcp: reading file %s", newdir);
1392             rc = parse_uri_file(newdir, &suri, &nsp, &rk);
1393             if (PMIX_SUCCESS == rc) {
1394                 /* go ahead and try to connect */
1395                 pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
1396                                     "pmix:tcp: attempting to connect to %s", suri);
1397                 if (PMIX_SUCCESS == try_connect(suri, sd, info, ninfo)) {
1398                     (*nspace) = nsp;
1399                     *rank = rk;
1400                     closedir(cur_dirp);
1401                     *uri = suri;
1402                     free(newdir);
1403                     return PMIX_SUCCESS;
1404                 }
1405                 free(suri);
1406                 free(nsp);
1407             }
1408         }
1409         free(newdir);
1410     }
1411     closedir(cur_dirp);
1412     return PMIX_ERR_NOT_FOUND;
1413 }

/* [<][>][^][v][top][bottom][index][help] */