This source file includes following definitions.
- orte_pmix_server_register_nspace
- mycbfunc
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 #include "orte_config.h"
  31 
  32 #ifdef HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include <fcntl.h>
  36 
  37 #include "opal_stdint.h"
  38 #include "opal/types.h"
  39 #include "opal/util/argv.h"
  40 #include "opal/util/output.h"
  41 #include "opal/util/error.h"
  42 #include "opal/mca/hwloc/base/base.h"
  43 #include "opal/mca/pmix/pmix.h"
  44 
  45 #include "orte/util/name_fns.h"
  46 #include "orte/runtime/orte_globals.h"
  47 #include "orte/runtime/orte_wait.h"
  48 #include "orte/mca/errmgr/errmgr.h"
  49 #include "orte/mca/rmaps/base/base.h"
  50 
  51 #include "pmix_server_internal.h"
  52 #include "pmix_server.h"
  53 
  54 static void mycbfunc(int status, void *cbdata);
  55 
  56 
  57 int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force)
  58 {
  59     int rc;
  60     orte_proc_t *pptr;
  61     int i, k, n;
  62     opal_list_t *info, *pmap;
  63     opal_value_t *kv;
  64     orte_node_t *node, *mynode;
  65     opal_vpid_t vpid;
  66     char **list, **procs, **micro, *tmp, *regex;
  67     orte_job_t *dmns;
  68     orte_job_map_t *map;
  69     orte_app_context_t *app;
  70     uid_t uid;
  71     gid_t gid;
  72     opal_list_t *cache;
  73     hwloc_obj_t machine;
  74     opal_buffer_t buf, bucket;
  75     opal_byte_object_t bo, *boptr;
  76     orte_proc_t *proc;
  77 
  78     opal_output_verbose(2, orte_pmix_server_globals.output,
  79                         "%s register nspace for %s",
  80                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
  81                         ORTE_JOBID_PRINT(jdata->jobid));
  82 
  83     
  84     info = OBJ_NEW(opal_list_t);
  85     uid = geteuid();
  86     gid = getegid();
  87 
  88     
  89     kv = OBJ_NEW(opal_value_t);
  90     kv->key = strdup(OPAL_PMIX_SERVER_NSPACE);
  91     kv->data.string = strdup(ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
  92     kv->type = OPAL_STRING;
  93     opal_list_append(info, &kv->super);
  94 
  95     kv = OBJ_NEW(opal_value_t);
  96     kv->key = strdup(OPAL_PMIX_SERVER_RANK);
  97     kv->data.uint32 = ORTE_PROC_MY_NAME->vpid;
  98     kv->type = OPAL_UINT32;
  99     opal_list_append(info, &kv->super);
 100 
 101     
 102     kv = OBJ_NEW(opal_value_t);
 103     kv->key = strdup(OPAL_PMIX_JOBID);
 104     kv->data.string = strdup(ORTE_JOBID_PRINT(jdata->jobid));
 105     kv->type = OPAL_STRING;
 106     opal_list_append(info, &kv->super);
 107 
 108     
 109     kv = OBJ_NEW(opal_value_t);
 110     kv->key = strdup(OPAL_PMIX_NPROC_OFFSET);
 111     kv->data.uint32 = jdata->offset;
 112     kv->type = OPAL_UINT32;
 113     opal_list_append(info, &kv->super);
 114 
 115     
 116     cache = NULL;
 117     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) &&
 118         NULL != cache) {
 119         while (NULL != (kv = (opal_value_t*)opal_list_remove_first(cache))) {
 120             opal_list_append(info, &kv->super);
 121         }
 122         orte_remove_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE);
 123         OBJ_RELEASE(cache);
 124     }
 125 
 126     
 127     list = NULL;
 128     procs = NULL;
 129     map = jdata->map;
 130     for (i=0; i < map->nodes->size; i++) {
 131         micro = NULL;
 132         if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
 133             opal_argv_append_nosize(&list, node->name);
 134             
 135             for (k=0; k < node->procs->size; k++) {
 136                 if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) {
 137                     if (jdata->jobid == pptr->name.jobid) {
 138                         opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid));
 139                     }
 140                 }
 141             }
 142             
 143             if (NULL != micro) {
 144                 tmp = opal_argv_join(micro, ',');
 145                 opal_argv_free(micro);
 146                 opal_argv_append_nosize(&procs, tmp);
 147                 free(tmp);
 148             }
 149         }
 150     }
 151     
 152     if (NULL != list) {
 153         tmp = opal_argv_join(list, ',');
 154         opal_argv_free(list);
 155         list = NULL;
 156         if (OPAL_SUCCESS != (rc = opal_pmix.generate_regex(tmp, ®ex))) {
 157             ORTE_ERROR_LOG(rc);
 158             free(tmp);
 159             OPAL_LIST_RELEASE(info);
 160             return rc;
 161         }
 162         free(tmp);
 163         kv = OBJ_NEW(opal_value_t);
 164         kv->key = strdup(OPAL_PMIX_NODE_MAP);
 165         kv->type = OPAL_STRING;
 166         kv->data.string = regex;
 167         opal_list_append(info, &kv->super);
 168     }
 169 
 170     
 171     if (NULL != procs) {
 172         tmp = opal_argv_join(procs, ';');
 173         opal_argv_free(procs);
 174         procs = NULL;
 175         if (OPAL_SUCCESS != (rc = opal_pmix.generate_ppn(tmp, ®ex))) {
 176             ORTE_ERROR_LOG(rc);
 177             free(tmp);
 178             OPAL_LIST_RELEASE(info);
 179             return rc;
 180         }
 181         free(tmp);
 182         kv = OBJ_NEW(opal_value_t);
 183         kv->key = strdup(OPAL_PMIX_PROC_MAP);
 184         kv->type = OPAL_STRING;
 185         kv->data.string = regex;
 186         opal_list_append(info, &kv->super);
 187     }
 188 
 189     
 190     if (NULL == (dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
 191         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 192         OPAL_LIST_RELEASE(info);
 193         return ORTE_ERR_NOT_FOUND;
 194     }
 195     if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, ORTE_PROC_MY_NAME->vpid))) {
 196         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 197         OPAL_LIST_RELEASE(info);
 198         return ORTE_ERR_NOT_FOUND;
 199     }
 200     mynode = pptr->node;
 201     if (NULL == mynode) {
 202         
 203         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 204         OPAL_LIST_RELEASE(info);
 205         return ORTE_ERR_NOT_FOUND;
 206     }
 207     
 208     kv = OBJ_NEW(opal_value_t);
 209     kv->key = strdup(OPAL_PMIX_NODEID);
 210     kv->type = OPAL_UINT32;
 211     kv->data.uint32 = mynode->index;
 212     opal_list_append(info, &kv->super);
 213 
 214     
 215     kv = OBJ_NEW(opal_value_t);
 216     kv->key = strdup(OPAL_PMIX_NODE_SIZE);
 217     kv->type = OPAL_UINT32;
 218     kv->data.uint32 = mynode->num_procs;
 219     opal_list_append(info, &kv->super);
 220 
 221     
 222     kv = OBJ_NEW(opal_value_t);
 223     kv->key = strdup(OPAL_PMIX_NUM_NODES);
 224     kv->type = OPAL_UINT32;
 225     kv->data.uint32 = map->num_nodes;
 226     opal_list_append(info, &kv->super);
 227 
 228     
 229     kv = OBJ_NEW(opal_value_t);
 230     kv->key = strdup(OPAL_PMIX_UNIV_SIZE);
 231     kv->type = OPAL_UINT32;
 232     kv->data.uint32 = jdata->total_slots_alloc;
 233     opal_list_append(info, &kv->super);
 234 
 235     
 236     kv = OBJ_NEW(opal_value_t);
 237     kv->key = strdup(OPAL_PMIX_JOB_SIZE);
 238     kv->type = OPAL_UINT32;
 239     kv->data.uint32 = jdata->num_procs;
 240     opal_list_append(info, &kv->super);
 241 
 242     
 243     kv = OBJ_NEW(opal_value_t);
 244     kv->key = strdup(OPAL_PMIX_JOB_NUM_APPS);
 245     kv->type = OPAL_UINT32;
 246     kv->data.uint32 = jdata->num_apps;
 247     opal_list_append(info, &kv->super);
 248 
 249     
 250     kv = OBJ_NEW(opal_value_t);
 251     kv->key = strdup(OPAL_PMIX_LOCAL_SIZE);
 252     kv->type = OPAL_UINT32;
 253     kv->data.uint32 = jdata->num_local_procs;
 254     opal_list_append(info, &kv->super);
 255 
 256     
 257     kv = OBJ_NEW(opal_value_t);
 258     kv->key = strdup(OPAL_PMIX_MAX_PROCS);
 259     kv->type = OPAL_UINT32;
 260     kv->data.uint32 = jdata->total_slots_alloc;
 261     opal_list_append(info, &kv->super);
 262 
 263     
 264     kv = OBJ_NEW(opal_value_t);
 265     kv->key = strdup(OPAL_PMIX_TOPOLOGY_SIGNATURE);
 266     kv->type = OPAL_STRING;
 267     kv->data.string = strdup(orte_topo_signature);
 268     opal_list_append(info, &kv->super);
 269 
 270     
 271     machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, HWLOC_OBJ_MACHINE, NULL);
 272     if (NULL != machine) {
 273         kv = OBJ_NEW(opal_value_t);
 274         kv->key = strdup(OPAL_PMIX_AVAIL_PHYS_MEMORY);
 275         kv->type = OPAL_UINT64;
 276 #if HWLOC_API_VERSION < 0x20000
 277         kv->data.uint64 = machine->memory.total_memory;
 278 #else
 279         kv->data.uint64 = machine->total_memory;
 280 #endif
 281         opal_list_append(info, &kv->super);
 282     }
 283 
 284     
 285     kv = OBJ_NEW(opal_value_t);
 286     kv->key = strdup(OPAL_PMIX_MAPBY);
 287     kv->type = OPAL_STRING;
 288     kv->data.string = strdup(orte_rmaps_base_print_mapping(jdata->map->mapping));
 289     opal_list_append(info, &kv->super);
 290 
 291     
 292     kv = OBJ_NEW(opal_value_t);
 293     kv->key = strdup(OPAL_PMIX_RANKBY);
 294     kv->type = OPAL_STRING;
 295     kv->data.string = strdup(orte_rmaps_base_print_ranking(jdata->map->ranking));
 296     opal_list_append(info, &kv->super);
 297 
 298     
 299     kv = OBJ_NEW(opal_value_t);
 300     kv->key = strdup(OPAL_PMIX_BINDTO);
 301     kv->type = OPAL_STRING;
 302     kv->data.string = strdup(opal_hwloc_base_print_binding(jdata->map->binding));
 303     opal_list_append(info, &kv->super);
 304 
 305 
 306 
 307     
 308     vpid = ORTE_VPID_MAX;
 309     micro = NULL;
 310     for (i=0; i < mynode->procs->size; i++) {
 311         if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(mynode->procs, i))) {
 312             continue;
 313         }
 314         if (pptr->name.jobid == jdata->jobid) {
 315             opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid));
 316             if (pptr->name.vpid < vpid) {
 317                 vpid = pptr->name.vpid;
 318             }
 319             
 320             if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid,
 321                                                                        (void*)pptr, NULL, NULL))) {
 322                 ORTE_ERROR_LOG(rc);
 323             }
 324         }
 325     }
 326     if (NULL != micro) {
 327         
 328         kv = OBJ_NEW(opal_value_t);
 329         kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
 330         kv->type = OPAL_STRING;
 331         kv->data.string = opal_argv_join(micro, ',');
 332         opal_argv_free(micro);
 333         opal_list_append(info, &kv->super);
 334     }
 335 
 336     
 337     kv = OBJ_NEW(opal_value_t);
 338     kv->key = strdup(OPAL_PMIX_LOCALLDR);
 339     kv->type = OPAL_VPID;
 340     kv->data.name.vpid = vpid;
 341     opal_list_append(info, &kv->super);
 342 
 343     
 344 
 345 
 346 
 347 
 348 
 349 
 350     for (n=0; n < map->nodes->size; n++) {
 351         if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) {
 352             continue;
 353         }
 354         
 355 
 356         for (i=0; i < node->procs->size; i++) {
 357             if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
 358                 continue;
 359             }
 360             
 361             if (pptr->name.jobid != jdata->jobid) {
 362                 continue;
 363             }
 364             
 365             kv = OBJ_NEW(opal_value_t);
 366             kv->key = strdup(OPAL_PMIX_PROC_DATA);
 367             kv->type = OPAL_PTR;
 368             kv->data.ptr = OBJ_NEW(opal_list_t);
 369             opal_list_append(info, &kv->super);
 370             pmap = kv->data.ptr;
 371 
 372             
 373             kv = OBJ_NEW(opal_value_t);
 374             kv->key = strdup(OPAL_PMIX_RANK);
 375             kv->type = OPAL_VPID;
 376             kv->data.name.vpid = pptr->name.vpid;
 377             opal_list_append(pmap, &kv->super);
 378 
 379             
 380             if (node == mynode) {
 381                 tmp = NULL;
 382                 if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING) &&
 383                     NULL != tmp) {
 384                     kv = OBJ_NEW(opal_value_t);
 385                     kv->key = strdup(OPAL_PMIX_LOCALITY_STRING);
 386                     kv->type = OPAL_STRING;
 387                     kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp);
 388                     opal_list_append(pmap, &kv->super);
 389                     free(tmp);
 390                 } else {
 391                     
 392                     kv = OBJ_NEW(opal_value_t);
 393                     kv->key = strdup(OPAL_PMIX_LOCALITY_STRING);
 394                     kv->type = OPAL_STRING;
 395                     kv->data.string = NULL;
 396                     opal_list_append(pmap, &kv->super);
 397                 }
 398             }
 399 
 400             
 401             kv = OBJ_NEW(opal_value_t);
 402             kv->key = strdup(OPAL_PMIX_GLOBAL_RANK);
 403             kv->type = OPAL_VPID;
 404             kv->data.name.vpid = pptr->name.vpid + jdata->offset;
 405             opal_list_append(pmap, &kv->super);
 406 
 407             if (1 < jdata->num_apps) {
 408                 
 409                 kv = OBJ_NEW(opal_value_t);
 410                 kv->key = strdup(OPAL_PMIX_APPNUM);
 411                 kv->type = OPAL_UINT32;
 412                 kv->data.uint32 = pptr->app_idx;
 413                 opal_list_append(pmap, &kv->super);
 414 
 415                 
 416                 app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
 417                 kv = OBJ_NEW(opal_value_t);
 418                 kv->key = strdup(OPAL_PMIX_APPLDR);
 419                 kv->type = OPAL_VPID;
 420                 kv->data.name.vpid = app->first_rank;
 421                 opal_list_append(pmap, &kv->super);
 422 
 423                 
 424                 kv = OBJ_NEW(opal_value_t);
 425                 kv->key = strdup(OPAL_PMIX_APP_RANK);
 426                 kv->type = OPAL_VPID;
 427                 kv->data.name.vpid = pptr->app_rank;
 428                 opal_list_append(pmap, &kv->super);
 429 
 430                 
 431                 kv = OBJ_NEW(opal_value_t);
 432                 kv->key = strdup(OPAL_PMIX_APP_SIZE);
 433                 kv->type = OPAL_UINT32;
 434                 kv->data.uint32 = app->num_procs;
 435                 opal_list_append(info, &kv->super);
 436 
 437                 app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
 438                 tmp = NULL;
 439                 if (orte_get_attribute(&app->attributes, ORTE_APP_PSET_NAME, (void**)&tmp, OPAL_STRING) &&
 440                     NULL != tmp) {
 441                     kv = OBJ_NEW(opal_value_t);
 442                     kv->key = strdup(OPAL_PMIX_PSET_NAME);
 443                     kv->type = OPAL_STRING;
 444                     kv->data.string = tmp;
 445                     opal_list_append(pmap, &kv->super);
 446                 }
 447             } else {
 448                 app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
 449                 tmp = NULL;
 450                 if (orte_get_attribute(&app->attributes, ORTE_APP_PSET_NAME, (void**)&tmp, OPAL_STRING) &&
 451                     NULL != tmp) {
 452                     kv = OBJ_NEW(opal_value_t);
 453                     kv->key = strdup(OPAL_PMIX_PSET_NAME);
 454                     kv->type = OPAL_STRING;
 455                     kv->data.string = tmp;
 456                     opal_list_append(pmap, &kv->super);
 457                 }
 458             }
 459 
 460             
 461             kv = OBJ_NEW(opal_value_t);
 462             kv->key = strdup(OPAL_PMIX_LOCAL_RANK);
 463             kv->type = OPAL_UINT16;
 464             kv->data.uint16 = pptr->local_rank;
 465             opal_list_append(pmap, &kv->super);
 466 
 467             
 468             kv = OBJ_NEW(opal_value_t);
 469             kv->key = strdup(OPAL_PMIX_NODE_RANK);
 470             kv->type = OPAL_UINT16;
 471             kv->data.uint32 = pptr->node_rank;
 472             opal_list_append(pmap, &kv->super);
 473 
 474             
 475             kv = OBJ_NEW(opal_value_t);
 476             kv->key = strdup(OPAL_PMIX_NODEID);
 477             kv->type = OPAL_UINT32;
 478             kv->data.uint32 = pptr->node->index;
 479             opal_list_append(pmap, &kv->super);
 480 
 481             if (map->num_nodes < orte_hostname_cutoff) {
 482                 kv = OBJ_NEW(opal_value_t);
 483                 kv->key = strdup(OPAL_PMIX_HOSTNAME);
 484                 kv->type = OPAL_STRING;
 485                 kv->data.string = strdup(pptr->node->name);
 486                 opal_list_append(pmap, &kv->super);
 487             }
 488         }
 489     }
 490 
 491     
 492     orte_set_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
 493 
 494     
 495     
 496     rc = opal_pmix.server_register_nspace(jdata->jobid,
 497                                           jdata->num_local_procs,
 498                                           info, NULL, NULL);
 499     OPAL_LIST_RELEASE(info);
 500     if (OPAL_SUCCESS != rc) {
 501         return rc;
 502     }
 503 
 504     
 505 
 506 
 507 
 508 
 509 
 510     if (ORTE_PROC_IS_HNP && ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(jdata->jobid)) {
 511         
 512 
 513         OBJ_CONSTRUCT(&buf, opal_buffer_t);
 514         if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &jdata, 1, ORTE_JOB))) {
 515             ORTE_ERROR_LOG(rc);
 516             OBJ_DESTRUCT(&buf);
 517             return rc;
 518         }
 519 
 520         
 521         map = jdata->map;
 522         OBJ_CONSTRUCT(&bucket, opal_buffer_t);
 523         for (i=0; i < map->nodes->size; i++) {
 524             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
 525                 continue;
 526             }
 527             opal_dss.pack(&bucket, &node->daemon->rml_uri, 1, OPAL_STRING);
 528         }
 529         opal_dss.unload(&bucket, (void**)&bo.bytes, &bo.size);
 530         boptr = &bo;
 531         opal_dss.pack(&buf, &boptr, 1, OPAL_BYTE_OBJECT);
 532 
 533         
 534         OBJ_CONSTRUCT(&bucket, opal_buffer_t);
 535         for (i=0; i < jdata->procs->size; i++) {
 536             if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
 537                 continue;
 538             }
 539             opal_dss.pack(&bucket, &proc->name, 1, ORTE_NAME);
 540             opal_dss.pack(&bucket, &proc->node->daemon->name, 1, ORTE_NAME);
 541         }
 542         opal_dss.unload(&bucket, (void**)&bo.bytes, &bo.size);
 543         boptr = &bo;
 544         opal_dss.pack(&buf, &boptr, 1, OPAL_BYTE_OBJECT);
 545 
 546         info = OBJ_NEW(opal_list_t);
 547         
 548 
 549         kv = OBJ_NEW(opal_value_t);
 550         orte_util_convert_jobid_to_string(&kv->key, jdata->jobid);
 551         kv->type = OPAL_BYTE_OBJECT;
 552         opal_dss.unload(&buf, (void**)&kv->data.bo.bytes, &kv->data.bo.size);
 553         OBJ_DESTRUCT(&buf);
 554         opal_list_append(info, &kv->super);
 555 
 556         
 557         kv = OBJ_NEW(opal_value_t);
 558         kv->key = strdup(OPAL_PMIX_RANGE);
 559         kv->type = OPAL_UINT;
 560         kv->data.uint = OPAL_PMIX_RANGE_SESSION;
 561         opal_list_append(info, &kv->super);
 562 
 563         
 564         kv = OBJ_NEW(opal_value_t);
 565         kv->key = strdup(OPAL_PMIX_PERSISTENCE);
 566         kv->type = OPAL_INT;
 567         kv->data.integer = OPAL_PMIX_PERSIST_APP;
 568         opal_list_append(info, &kv->super);
 569 
 570         
 571         kv = OBJ_NEW(opal_value_t);
 572         kv->key = strdup(OPAL_PMIX_USERID);
 573         kv->type = OPAL_UINT32;
 574         kv->data.uint32 = geteuid();
 575         opal_list_append(info, &kv->super);
 576 
 577         
 578         if (ORTE_SUCCESS != (rc = pmix_server_publish_fn(ORTE_PROC_MY_NAME,
 579                                                          info, mycbfunc, info))) {
 580             ORTE_ERROR_LOG(rc);
 581         }
 582     }
 583 
 584     return rc;
 585 }
 586 
 587 static void mycbfunc(int status, void *cbdata)
 588 {
 589     opal_list_t *info = (opal_list_t*)cbdata;
 590 
 591     if (ORTE_SUCCESS != status) {
 592         ORTE_ERROR_LOG(status);
 593     }
 594     OPAL_LIST_RELEASE(info);
 595 }