root/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check_oversubscribing
  2. dump_int_array
  3. dump_double_array
  4. mca_topo_treematch_dist_graph_create

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2011-2017 The University of Tennessee and The University
   4  *                         of Tennessee Research Foundation.  All rights
   5  *                         reserved.
   6  * Copyright (c) 2011-2018 Inria.  All rights reserved.
   7  * Copyright (c) 2011-2018 Bordeaux Polytechnic Institute
   8  * Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
   9  * Copyright (c) 2015-2017 Research Organization for Information Science
  10  *                         and Technology (RIST). All rights reserved.
  11  * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
  12  *                         reserved.
  13  * Copyright (c) 2017      Cisco Systems, Inc.  All rights reserved
  14  * Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
  15  * $COPYRIGHT$
  16  *
  17  * Additional copyrights may follow
  18  *
  19  * $HEADER$
  20  */
  21 
  22 #include "ompi_config.h"
  23 
  24 #include "opal/constants.h"
  25 #include "opal/mca/hwloc/base/base.h"
  26 
  27 #include "ompi/mca/topo/treematch/topo_treematch.h"
  28 #include "ompi/mca/topo/treematch/treematch/treematch.h"
  29 #include "ompi/mca/topo/treematch/treematch/tm_mapping.h"
  30 #include "ompi/mca/topo/base/base.h"
  31 
  32 #include "ompi/communicator/communicator.h"
  33 #include "ompi/info/info.h"
  34 
  35 #include "ompi/mca/pml/pml.h"
  36 
  37 #include "opal/mca/pmix/pmix.h"
  38 
  39 /* #define __DEBUG__ 1  */
  40 
  41 /**
  42  * This function is a allreduce between all processes to detect for oversubscription.
  43  * On each node, the local_procs will be a different array, that contains only the
  44  * local processes. Thus, that process will compute the node oversubscription and will
  45  * bring this value to the operation, while every other process on the node will
  46  * contribute 0.
  47  * Doing an AllReduce might be an overkill for this situation, but it should remain
  48  * more scalable than a star reduction (between the roots of each node (nodes_roots),
  49  * followed by a bcast to all processes.
  50  */
  51 static int check_oversubscribing(int rank,
  52                                  int num_nodes,
  53                                  int num_objs_in_node,
  54                                  int num_procs_in_node,
  55                                  int *nodes_roots,
  56                                  int *local_procs,
  57                                  ompi_communicator_t *comm_old)
  58 {
  59     int oversubscribed = 0, local_oversub = 0, err;
  60 
  61     /* Only a single process per node, the local root, compute the oversubscription condition */
  62     if (rank == local_procs[0])
  63         if(num_objs_in_node < num_procs_in_node)
  64             local_oversub = 1;
  65 
  66 
  67     if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_allreduce(&local_oversub, &oversubscribed, 1, MPI_INT,
  68                                                                 MPI_SUM, comm_old, comm_old->c_coll->coll_allreduce_module)))
  69         return err;
  70 
  71     return oversubscribed;
  72 }
  73 
  74 #ifdef __DEBUG__
  75 static void dump_int_array( int level, int output_id, char* prolog, char* line_prolog, int* array, size_t length )
  76 {
  77     size_t i;
  78     if( -1 == output_id ) return;
  79 
  80     opal_output_verbose(level, output_id, "%s : ", prolog);
  81     for(i = 0; i < length ; i++)
  82         opal_output_verbose(level, output_id, "%s [%lu:%i] ", line_prolog, i, array[i]);
  83     opal_output_verbose(level, output_id, "\n");
  84 }
  85 static void dump_double_array( int level, int output_id, char* prolog, char* line_prolog, double* array, size_t length )
  86 {
  87     size_t i;
  88 
  89     if( -1 == output_id ) return;
  90     opal_output_verbose(level, output_id, "%s : ", prolog);
  91     for(i = 0; i < length ; i++)
  92         opal_output_verbose(level, output_id, "%s [%lu:%lf] ", line_prolog, i, array[i]);
  93     opal_output_verbose(level, output_id, "\n");
  94 }
  95 #endif
  96 
  97 int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
  98                                          ompi_communicator_t *comm_old,
  99                                          int n, const int nodes[],
 100                                          const int degrees[], const int targets[],
 101                                          const int weights[],
 102                                          struct opal_info_t *info, int reorder,
 103                                          ompi_communicator_t **newcomm)
 104 {
 105     int err;
 106 
 107     if (OMPI_SUCCESS != (err = mca_topo_base_dist_graph_distribute(topo_module, comm_old, n, nodes,
 108                                                                    degrees, targets, weights,
 109                                                                    &(topo_module->mtc.dist_graph))))
 110         return err;
 111 
 112     if(!reorder) {  /* No reorder. Create a new communicator, then   */
 113                     /* jump out to attach the dist_graph and return */
 114     fallback:
 115 
 116         if( OMPI_SUCCESS == (err = ompi_comm_create(comm_old,
 117                                                     comm_old->c_local_group,
 118                                                     newcomm))){
 119             /* Attach the dist_graph to the newly created communicator */
 120             (*newcomm)->c_flags        |= OMPI_COMM_DIST_GRAPH;
 121             (*newcomm)->c_topo          = topo_module;
 122             (*newcomm)->c_topo->reorder = reorder;
 123         }
 124         return err;
 125     }  /* reorder == yes */
 126 
 127     mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL;
 128     ompi_proc_t *proc = NULL;
 129     MPI_Request  *reqs = NULL;
 130     hwloc_cpuset_t set = NULL;
 131     hwloc_obj_t object, root_obj;
 132     hwloc_obj_t *tracker = NULL;
 133     double *local_pattern = NULL;
 134     int *vpids, *colors = NULL;
 135     int *lindex_to_grank = NULL;
 136     int *nodes_roots = NULL, *k = NULL;
 137     int *localrank_to_objnum  = NULL;
 138     int depth = 0, effective_depth = 0, obj_rank = -1;
 139     int num_objs_in_node = 0, num_pus_in_node = 0;
 140     int numlevels = 0, num_nodes = 0, num_procs_in_node = 0;
 141     int rank, size, newrank = -1, hwloc_err, i, j, idx;
 142     int oversubscribing_objs = 0, oversubscribed_pus = 0;
 143     uint32_t val, *pval;
 144 
 145     /* We need to know if the processes are bound. We assume all
 146      * processes are in the same state: all bound or none. */
 147     if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
 148         goto fallback;
 149     }
 150     root_obj = hwloc_get_root_obj(opal_hwloc_topology);
 151     if (NULL == root_obj) goto fallback;
 152 
 153     topo = topo_module->mtc.dist_graph;
 154     rank = ompi_comm_rank(comm_old);
 155     size = ompi_comm_size(comm_old);
 156 
 157     OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 158                          "Process rank is : %i\n",rank));
 159     /**
 160      * In order to decrease the number of loops let's use a trick:
 161      * build the lindex_to_grank in the vpids array, and only allocate
 162      * it upon completion of the most costly loop.
 163      */
 164     vpids = (int *)malloc(size * sizeof(int));
 165     colors = (int *)malloc(size * sizeof(int));
 166     for(i = 0 ; i < size ; i++) {
 167         proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
 168         if (( i == rank ) ||
 169             (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)))
 170             vpids[num_procs_in_node++] = i;
 171 
 172         pval = &val;
 173         OPAL_MODEX_RECV_VALUE(err, OPAL_PMIX_NODEID, &(proc->super.proc_name), &pval, OPAL_UINT32);
 174         if( OPAL_SUCCESS != err ) {
 175             opal_output(0, "Unable to extract peer %s nodeid from the modex.\n",
 176                         OMPI_NAME_PRINT(&(proc->super.proc_name)));
 177             colors[i] = -1;
 178             continue;
 179         }
 180         colors[i] = (int)val;
 181     }
 182     lindex_to_grank = (int *)malloc(num_procs_in_node * sizeof(int));
 183     memcpy(lindex_to_grank, vpids, num_procs_in_node * sizeof(int));
 184     memcpy(vpids, colors, size * sizeof(int));
 185 
 186 #ifdef __DEBUG__
 187     if ( 0 == rank ) {
 188         dump_int_array(10, ompi_topo_base_framework.framework_output,
 189                        "lindex_to_grank : ", "", lindex_to_grank, num_procs_in_node);
 190         dump_int_array(10, ompi_topo_base_framework.framework_output,
 191                        "Vpids : ", "", colors, size);
 192     }
 193 #endif
 194     /* clean-up dupes in the array */
 195     for(i = 0; i < size ; i++) {
 196         if ( -1 == vpids[i] ) continue;
 197         num_nodes++;  /* compute number of nodes */
 198         for(j = i+1; j < size; j++)
 199             if( vpids[i] == vpids[j] )
 200                 vpids[j] = -1;
 201     }
 202     if( 0 == num_nodes ) {
 203         /* No useful info has been retrieved from the runtime. Fallback
 204          * and create a duplicate of the original communicator */
 205         free(vpids);
 206         free(colors);
 207         goto fallback; /* return with success */
 208     }
 209     /* compute local roots ranks in comm_old */
 210     /* Only the global root needs to do this */
 211     if(0 == rank) {
 212         nodes_roots = (int *)calloc(num_nodes, sizeof(int));
 213         for(i = idx = 0; i < size; i++)
 214             if( vpids[i] != -1 )
 215                 nodes_roots[idx++] = i;
 216         OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 217                              "num nodes is %i\n", num_nodes));
 218 #ifdef __DEBUG__
 219         dump_int_array(10, ompi_topo_base_framework.framework_output,
 220                        "Root nodes are :\n", "root ", nodes_roots, num_nodes);
 221 #endif
 222     }
 223     free(vpids);
 224 
 225     /* if cpubind returns an error, it will be full anyway */
 226     set = hwloc_bitmap_alloc_full();
 227     hwloc_get_cpubind(opal_hwloc_topology, set, 0);
 228     num_pus_in_node = hwloc_get_nbobjs_by_type(opal_hwloc_topology, HWLOC_OBJ_PU);
 229 
 230     /**
 231      * In all situations (including heterogeneous environments) all processes must execute
 232      * all the calls that involve collective communications, so we have to lay the logic
 233      * accordingly.
 234      */
 235 
 236     if(hwloc_bitmap_isincluded(root_obj->cpuset,set)) { /* processes are not bound on the machine */
 237         if (0 == rank)
 238             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 239                                  ">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n"));
 240 
 241         /* we try to bind to cores or above objects if enough are present */
 242         /* Not sure that cores are present in ALL nodes */
 243         depth = hwloc_get_type_or_above_depth(opal_hwloc_topology, HWLOC_OBJ_CORE);
 244         num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, depth);
 245     } else {    /* the processes are already bound */
 246         object = hwloc_get_obj_covering_cpuset(opal_hwloc_topology, set);
 247         obj_rank = object->logical_index;
 248         effective_depth = object->depth;
 249         num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, effective_depth);
 250     }
 251     if( (0 == num_objs_in_node) || (0 == num_pus_in_node) ) {  /* deal with bozo cases: COVERITY 1418505 */
 252         free(colors);
 253         goto fallback; /* return with success */
 254     }
 255     /* Check for oversubscribing */
 256     oversubscribing_objs = check_oversubscribing(rank, num_nodes,
 257                                                  num_objs_in_node, num_procs_in_node,
 258                                                  nodes_roots, lindex_to_grank, comm_old);
 259 
 260     if(oversubscribing_objs) {
 261         if(hwloc_bitmap_isincluded(root_obj->cpuset, set)) { /* processes are not bound on the machine */
 262             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 263                                  "Oversubscribing OBJ/CORES resources => Trying to use PUs \n"));
 264 
 265             oversubscribed_pus = check_oversubscribing(rank, num_nodes,
 266                                                        num_pus_in_node, num_procs_in_node,
 267                                                        nodes_roots, lindex_to_grank, comm_old);
 268             /* Update the data used to compute the correct binding */
 269             if (!oversubscribed_pus) {
 270                 obj_rank = ompi_process_info.my_local_rank%num_pus_in_node;
 271                 effective_depth = hwloc_topology_get_depth(opal_hwloc_topology) - 1;
 272                 num_objs_in_node = num_pus_in_node;
 273                 OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 274                                      "Process %i not bound : binding on PU#%i \n", rank, obj_rank));
 275             }
 276         } else {
 277             /* Bound processes will participate with the same data as before */
 278             oversubscribed_pus = check_oversubscribing(rank, num_nodes,
 279                                                        num_objs_in_node, num_procs_in_node,
 280                                                        nodes_roots, lindex_to_grank, comm_old);
 281         }
 282     }
 283 
 284     if( !oversubscribing_objs && !oversubscribed_pus ) {
 285         if( hwloc_bitmap_isincluded(root_obj->cpuset, set) ) { /* processes are not bound on the machine */
 286             obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
 287             effective_depth = depth;
 288             object = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, obj_rank);
 289             if( NULL == object) {
 290                 free(colors);
 291                 hwloc_bitmap_free(set);
 292                 goto fallback;  /* return with success */
 293             }
 294 
 295             hwloc_bitmap_copy(set, object->cpuset);
 296             hwloc_bitmap_singlify(set); /* we don't want the process to move */
 297             hwloc_err = hwloc_set_cpubind(opal_hwloc_topology, set, 0);
 298             if( -1 == hwloc_err) {
 299                 /* This is a local issue. Either we agree with the rest of the processes to stop the
 300                  * reordering or we have to complete the entire process. Let's complete.
 301                  */
 302                 OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 303                                      "Process %i failed to bind on OBJ#%i \n", rank, obj_rank));
 304             } else
 305                 OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 306                                      "Process %i not bound : binding on OBJ#%i \n",rank, obj_rank));
 307         } else {
 308             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 309                                  "Process %i bound  on OBJ #%i \n"
 310                                  "=====> Num obj in node : %i | num pus in node : %i\n",
 311                                  rank, obj_rank,
 312                                  num_objs_in_node, num_pus_in_node));
 313         }
 314     } else {
 315         OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 316                              "Oversubscribing PUs resources => Rank Reordering Impossible \n"));
 317         free(colors);
 318         hwloc_bitmap_free(set);
 319         goto fallback;  /* return with success */
 320     }
 321 
 322     reqs = (MPI_Request *)calloc(num_procs_in_node-1, sizeof(MPI_Request));
 323     if( rank == lindex_to_grank[0] ) {  /* local leader cleans the hierarchy */
 324         int array_size = effective_depth + 1;
 325         int *myhierarchy = (int *)calloc(array_size, sizeof(int));
 326 
 327         numlevels = 1;
 328         myhierarchy[0] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, 0);
 329         for (i = 1; i < array_size ; i++) {
 330             myhierarchy[i] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, i);
 331             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 332                                  "hierarchy[%i] = %i\n", i, myhierarchy[i]));
 333             if ((myhierarchy[i] != 0) && (myhierarchy[i] != myhierarchy[i-1]))
 334                 numlevels++;
 335         }
 336 
 337         tracker = (hwloc_obj_t *)calloc(numlevels, sizeof(hwloc_obj_t));
 338         for(idx = 0, i = 1; i < array_size; i++) {
 339             if(myhierarchy[i] != myhierarchy[i-1])
 340                 tracker[idx++] = hwloc_get_obj_by_depth(opal_hwloc_topology, i-1, 0);
 341         }
 342         tracker[idx] = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, 0);
 343         free(myhierarchy);
 344 
 345         OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 346                              ">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n",
 347                              effective_depth, hwloc_topology_get_depth(opal_hwloc_topology), numlevels));
 348         for(i = 0 ; i < numlevels ; i++) {
 349             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 350                                  "tracker[%i] : arity %i | depth %i\n",
 351                                  i, tracker[i]->arity, tracker[i]->depth));
 352         }
 353         /* get the obj number */
 354         localrank_to_objnum = (int *)calloc(num_procs_in_node, sizeof(int));
 355         localrank_to_objnum[0] = obj_rank;
 356 
 357         for(i = 1;  i < num_procs_in_node; i++) {
 358             if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&localrank_to_objnum[i], 1, MPI_INT,
 359                                                            lindex_to_grank[i], -111, comm_old, &reqs[i-1])))) {
 360                 free(reqs); reqs = NULL;
 361                 goto release_and_return;
 362             }
 363         }
 364         if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_procs_in_node-1,
 365                                                           reqs, MPI_STATUSES_IGNORE))) {
 366             free(reqs); reqs = NULL;
 367             goto release_and_return;
 368         }
 369     } else {
 370         /* sending my core number to my local master on the node */
 371         if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&obj_rank, 1, MPI_INT, lindex_to_grank[0],
 372                                                      -111, MCA_PML_BASE_SEND_STANDARD, comm_old)))) {
 373             free(reqs); reqs = NULL;
 374             goto release_and_return;
 375         }
 376     }
 377     free(reqs); reqs = NULL;
 378 
 379     /* Centralized Reordering */
 380     if (0 == mca_topo_treematch_component.reorder_mode) {
 381         int *k = NULL;
 382         int *obj_mapping = NULL;
 383         int num_objs_total = 0;
 384 
 385         /* Gather comm pattern
 386          * If weights have been provided take them in account. Otherwise rely
 387          * solely on HWLOC information.
 388          */
 389         if( 0 == rank ) {
 390 
 391             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 392                                  "========== Centralized Reordering ========= \n"));
 393             local_pattern = (double *)calloc(size*size,sizeof(double));
 394         } else {
 395             local_pattern = (double *)calloc(size,sizeof(double));
 396         }
 397         if( true == topo->weighted ) {
 398             for(i = 0; i < topo->indegree ; i++)
 399                 local_pattern[topo->in[i]] += topo->inw[i];
 400             for(i = 0; i < topo->outdegree ; i++)
 401                 local_pattern[topo->out[i]] += topo->outw[i];
 402         }
 403         err = comm_old->c_coll->coll_gather( (0 == rank ? MPI_IN_PLACE : local_pattern), size, MPI_DOUBLE,
 404                                              local_pattern, size, MPI_DOUBLE,  /* ignored on non-root */
 405                                              0, comm_old, comm_old->c_coll->coll_gather_module);
 406         if (OMPI_SUCCESS != err) {
 407             goto release_and_return;
 408         }
 409 
 410         if( rank == lindex_to_grank[0] ) {
 411             tm_topology_t *tm_topology = NULL;
 412             int *obj_to_rank_in_comm = NULL;
 413             int *hierarchies = NULL;
 414             int  min;
 415 
 416             /* create a table that derives the rank in comm_old from the object number */
 417             obj_to_rank_in_comm = (int *)malloc(num_objs_in_node*sizeof(int));
 418             for(i = 0 ; i < num_objs_in_node ; i++) {
 419                 obj_to_rank_in_comm[i] = -1;
 420                 object = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, i);
 421                 for( j = 0; j < num_procs_in_node ; j++ )
 422                     if(localrank_to_objnum[j] == (int)(object->logical_index)) {
 423                         obj_to_rank_in_comm[i] = lindex_to_grank[j];
 424                         break;
 425                     }
 426             }
 427 
 428             /* the global master gathers info from local_masters */
 429             if ( 0 == rank ) {
 430                 if ( num_nodes > 1 ) {
 431                     int *objs_per_node = NULL, displ;
 432 
 433                     objs_per_node = (int *)calloc(num_nodes, sizeof(int));
 434                     reqs = (MPI_Request *)calloc(num_nodes-1, sizeof(MPI_Request));
 435                     objs_per_node[0] = num_objs_in_node;
 436                     for(i = 1; i < num_nodes ; i++)
 437                         if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(objs_per_node + i, 1, MPI_INT,
 438                                                                        nodes_roots[i], -112, comm_old, &reqs[i-1])))) {
 439                             free(obj_to_rank_in_comm);
 440                             free(objs_per_node);
 441                             goto release_and_return;
 442                         }
 443 
 444                     if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
 445                                                                       reqs, MPI_STATUSES_IGNORE))) {
 446                         free(objs_per_node);
 447                         goto release_and_return;
 448                     }
 449 
 450                     for(i = 0; i < num_nodes; i++)
 451                         num_objs_total += objs_per_node[i];
 452                     obj_mapping = (int *)malloc(num_objs_total*sizeof(int));
 453                     for(i = 0; i < num_objs_total; i++)
 454                         obj_mapping[i] = -1;
 455 
 456                     memcpy(obj_mapping, obj_to_rank_in_comm, objs_per_node[0]*sizeof(int));
 457                     displ = objs_per_node[0];
 458                     for(i = 1; i < num_nodes ; i++) {
 459                         if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(obj_mapping + displ, objs_per_node[i], MPI_INT,
 460                                                                        nodes_roots[i], -113, comm_old, &reqs[i-1])))) {
 461                             free(obj_to_rank_in_comm);
 462                             free(objs_per_node);
 463                             free(obj_mapping);
 464                             goto release_and_return;
 465                         }
 466                         displ += objs_per_node[i];
 467                     }
 468                     if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
 469                                                                       reqs, MPI_STATUSES_IGNORE))) {
 470                         free(obj_to_rank_in_comm);
 471                         free(objs_per_node);
 472                         free(obj_mapping);
 473                         goto release_and_return;
 474                     }
 475                     free(objs_per_node);
 476                 } else {
 477                     /* if num_nodes == 1, then it's easy to get the obj mapping */
 478                     num_objs_total = num_objs_in_node;
 479                     obj_mapping = (int *)calloc(num_objs_total, sizeof(int));
 480                     memcpy(obj_mapping, obj_to_rank_in_comm, num_objs_total*sizeof(int));
 481                 }
 482 #ifdef __DEBUG__
 483                 dump_int_array(10, ompi_topo_base_framework.framework_output,
 484                                "Obj mapping : ", "", obj_mapping, num_objs_total );
 485 #endif
 486             } else {
 487                 if ( num_nodes > 1 ) {
 488                     if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&num_objs_in_node, 1, MPI_INT,
 489                                                                  0, -112, MCA_PML_BASE_SEND_STANDARD, comm_old)))) {
 490                         free(obj_to_rank_in_comm);
 491                         goto release_and_return;
 492                     }
 493                     if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(obj_to_rank_in_comm, num_objs_in_node, MPI_INT,
 494                                                                  0, -113, MCA_PML_BASE_SEND_STANDARD, comm_old)))) {
 495                         free(obj_to_rank_in_comm);
 496                         goto release_and_return;
 497                     }
 498                 }
 499             }
 500             free(obj_to_rank_in_comm);
 501 
 502             assert(numlevels < TM_MAX_LEVELS);
 503             if( 0 == rank ) {
 504                 hierarchies = (int *)malloc(num_nodes*(TM_MAX_LEVELS+1)*sizeof(int));
 505             } else {
 506                 hierarchies = (int *)malloc((TM_MAX_LEVELS+1)*sizeof(int));
 507             }
 508 
 509             hierarchies[0] = numlevels;
 510 
 511             for(i = 0 ; i < hierarchies[0]; i++)
 512                 hierarchies[i+1] = tracker[i]->arity;
 513             for(; i < (TM_MAX_LEVELS+1); i++)  /* fill up everything else with 0 */
 514                 hierarchies[i] = 0;
 515 
 516             /* gather hierarchies iff more than 1 node! */
 517             if ( num_nodes > 1 ) {
 518                 if( rank != 0 ) {
 519                     if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchies,(TM_MAX_LEVELS+1), MPI_INT, 0,
 520                                                                  -114, MCA_PML_BASE_SEND_STANDARD, comm_old)))) {
 521                         free(hierarchies);
 522                         goto release_and_return;
 523                     }
 524                 } else {
 525                     for(i = 1; i < num_nodes ; i++)
 526                         if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(TM_MAX_LEVELS+1), (TM_MAX_LEVELS+1), MPI_INT,
 527                                                                        nodes_roots[i], -114, comm_old, &reqs[i-1])))) {
 528                             free(obj_mapping);
 529                             free(hierarchies);
 530                             goto release_and_return;
 531                         }
 532                     if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
 533                                                                       reqs, MPI_STATUSES_IGNORE))) {
 534                         free(obj_mapping);
 535                         free(hierarchies);
 536                         goto release_and_return;
 537                     }
 538                     free(reqs); reqs = NULL;
 539                 }
 540             }
 541 
 542             if ( 0 == rank ) {
 543                 tm_tree_t *comm_tree = NULL;
 544                 tm_solution_t *sol = NULL;
 545                 tm_affinity_mat_t *aff_mat = NULL;
 546                 double **comm_pattern = NULL;
 547 
 548 #ifdef __DEBUG__
 549                 dump_int_array(10, ompi_topo_base_framework.framework_output,
 550                                "hierarchies : ", "", hierarchies, num_nodes*(TM_MAX_LEVELS+1));
 551 #endif
 552                 tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
 553                 tm_topology->nb_levels = hierarchies[0];
 554 
 555                 /* extract min depth */
 556                 for(i = 1 ; i < num_nodes ; i++)
 557                     if (hierarchies[i*(TM_MAX_LEVELS+1)] < tm_topology->nb_levels)
 558                         tm_topology->nb_levels = hierarchies[i*(TM_MAX_LEVELS+1)];
 559 
 560                 /* Crush levels in hierarchies too long (ie > tm_topology->nb_levels)*/
 561                 for(i = 0; i < num_nodes ; i++) {
 562                     int *base_ptr = hierarchies + i*(TM_MAX_LEVELS+1);
 563                     int  suppl = *base_ptr - tm_topology->nb_levels;
 564                     for(j = 1 ; j <= suppl ; j++)
 565                         *(base_ptr + tm_topology->nb_levels) *= *(base_ptr + tm_topology->nb_levels + j);
 566                 }
 567                 if( num_nodes > 1) {
 568                     /* We aggregate all topos => +1 level!*/
 569                     tm_topology->nb_levels += 1;
 570                     tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
 571                     tm_topology->arity[0] = num_nodes;
 572                     for(i = 1; i < tm_topology->nb_levels; i++) {  /* compute the minimum for each level */
 573                         min = hierarchies[i];
 574                         for(j = 1; j < num_nodes ; j++)
 575                             if( hierarchies[j*(TM_MAX_LEVELS+1) + i] < min)
 576                                 min = hierarchies[j*(TM_MAX_LEVELS+1) + i];
 577                         tm_topology->arity[i] = min;
 578                     }
 579                 } else {
 580                     tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
 581                     for(i = 0; i < tm_topology->nb_levels; i++)
 582                         tm_topology->arity[i] = hierarchies[i+1];
 583                 }
 584                 free(hierarchies);
 585 
 586                 for(i = 0; i < tm_topology->nb_levels; i++) {
 587                     OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 588                                          "topo_arity[%i] = %i\n", i, tm_topology->arity[i]));
 589                 }
 590 
 591                 /* compute the number of processing elements */
 592                 tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
 593                 tm_topology->nb_nodes[0] = 1;
 594                 for(i = 1 ; i < tm_topology->nb_levels; i++)
 595                     tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1];
 596 
 597 #ifdef __DEBUG__
 598                 assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
 599 #endif
 600                 /* Build process id tab */
 601                 tm_topology->node_id  = (int *)malloc(num_objs_total*sizeof(int));
 602                 tm_topology->node_rank = (int *)malloc(num_objs_total*sizeof(int));
 603                 for( i = 0 ; i < num_objs_total ; i++ ) 
 604                     tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
 605                 /*note : we make the hypothesis that logical indexes in hwloc range from
 606                   0 to N, are contiguous and crescent.  */                   
 607                 for( i = 0 ; i < num_objs_total ; i++ ) {
 608                     tm_topology->node_id[i] = obj_mapping[i];       /* use process ranks instead of core numbers */                                                            
 609                     if (obj_mapping[i] != -1)                       /* so that k[i] is the new rank of process i */ 
 610                         tm_topology->node_rank[obj_mapping[i]] = i; /* after computation by TreeMatch */
 611                 }
 612                 
 613                 /* unused for now*/
 614                 tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
 615                 tm_topology->nb_proc_units = num_objs_total;
 616 
 617                 tm_topology->nb_constraints = 0;
 618                 for(i = 0; i < tm_topology->nb_proc_units ; i++)
 619                     if (obj_mapping[i] != -1)
 620                         tm_topology->nb_constraints++;
 621                 tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
 622                 for(idx = 0, i = 0; i < tm_topology->nb_proc_units ; i++)
 623                     if (obj_mapping[i] != -1)
 624                         tm_topology->constraints[idx++] = obj_mapping[i]; /* use process ranks instead of core numbers */
 625 #ifdef __DEBUG__
 626                 assert(idx == tm_topology->nb_constraints);
 627 #endif
 628                 tm_topology->oversub_fact = 1;
 629 
 630 #ifdef __DEBUG__
 631                 /*                
 632                 for(i = 0; i < tm_topology->nb_levels ; i++) {
 633                     opal_output_verbose(10, ompi_topo_base_framework.framework_output,
 634                                         "tm topo node_id for level [%i] : ",i);
 635                     dump_int_array(10, ompi_topo_base_framework.framework_output,
 636                                    "", "", obj_mapping, tm_topology->nb_nodes[i]);
 637                 }
 638                 */
 639                 tm_display_topology(tm_topology);
 640 #endif
 641                 comm_pattern = (double **)malloc(size*sizeof(double *));
 642                 for(i = 0 ; i < size ; i++)
 643                     comm_pattern[i] = local_pattern + i * size;
 644                 /* matrix needs to be symmetric */
 645                 for( i = 0; i < size ; i++ )
 646                     for( j = i; j < size ; j++ ) {
 647                         comm_pattern[i][j] = (comm_pattern[i][j] + comm_pattern[j][i]) / 2;
 648                         comm_pattern[j][i] = comm_pattern[i][j];
 649                     }
 650 #ifdef __DEBUG__
 651                 opal_output_verbose(10, ompi_topo_base_framework.framework_output,
 652                                     "==== COMM PATTERN ====\n");
 653                 for( i = 0 ; i < size ; i++) {
 654                     dump_double_array(10, ompi_topo_base_framework.framework_output,
 655                                       "", "", comm_pattern[i], size);
 656                 }
 657 #endif
 658                 //tm_optimize_topology(&tm_topology);
 659                 aff_mat = tm_build_affinity_mat(comm_pattern,size);
 660                 comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
 661                 sol = tm_compute_mapping(tm_topology, comm_tree);
 662 
 663                 k = (int *)calloc(sol->k_length, sizeof(int));
 664                 for(idx = 0 ; idx < (int)sol->k_length ; idx++)
 665                     k[idx] = sol->k[idx][0];
 666 #ifdef __DEBUG__
 667                 opal_output_verbose(10, ompi_topo_base_framework.framework_output,
 668                                     "====> nb levels : %i\n",tm_topology->nb_levels);
 669                 dump_int_array(10, ompi_topo_base_framework.framework_output,
 670                                "Rank permutation sigma/k : ", "", k, num_objs_total);
 671                 assert(size == (int)sol->sigma_length);
 672                 dump_int_array(10, ompi_topo_base_framework.framework_output,
 673                                "Matching : ", "",sol->sigma, sol->sigma_length);
 674 #endif
 675                 free(obj_mapping);
 676                 free(comm_pattern);
 677                 free(aff_mat->sum_row);
 678                 free(aff_mat);
 679                 tm_free_solution(sol);
 680                 tm_free_tree(comm_tree);
 681                 tm_free_topology(tm_topology);
 682             }
 683         }
 684 
 685         /* Todo : Bcast + group creation */
 686         /* scatter the ranks */
 687         /* don't need to convert k from local rank to global rank */
 688         if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_scatter(k, 1, MPI_INT,
 689                                                                   &newrank, 1, MPI_INT,
 690                                                                   0, comm_old,
 691                                                                   comm_old->c_coll->coll_scatter_module))) {
 692             if (NULL != k) free(k);
 693             goto release_and_return;
 694         }
 695 
 696         if ( 0 == rank )
 697             free(k);
 698 
 699         /* this needs to be optimized but will do for now */
 700         if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, 0, newrank, newcomm, false))) {
 701             goto release_and_return;
 702         }
 703         /* end of TODO */
 704 
 705         /* Attach the dist_graph to the newly created communicator */
 706         (*newcomm)->c_flags        |= OMPI_COMM_DIST_GRAPH;
 707         (*newcomm)->c_topo          = topo_module;
 708         (*newcomm)->c_topo->reorder = reorder;
 709 
 710     } else { /* partially distributed reordering */
 711         int *grank_to_lrank = NULL, *lrank_to_grank = NULL, *marked = NULL;
 712         int node_position = 0, offset = 0, pos = 0;
 713         ompi_communicator_t *localcomm = NULL;
 714 
 715         if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, colors[rank], rank,
 716                                                    &localcomm, false))) {
 717             goto release_and_return;
 718         }
 719 
 720         lrank_to_grank = (int *)calloc(num_procs_in_node, sizeof(int));
 721         if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_allgather(&rank, 1, MPI_INT,
 722                                                                      lrank_to_grank, 1, MPI_INT,
 723                                                                      localcomm, localcomm->c_coll->coll_allgather_module))) {
 724             free(lrank_to_grank);
 725             ompi_comm_free(&localcomm);
 726             goto release_and_return;
 727         }
 728 
 729         grank_to_lrank = (int *)malloc(size * sizeof(int));
 730         for(i = 0 ; i < size ; grank_to_lrank[i++] = -1);
 731         for(i = 0 ; i < num_procs_in_node ; i++)
 732             grank_to_lrank[lrank_to_grank[i]] = i;
 733 
 734         /* Discover the local patterns */
 735         if (rank == lindex_to_grank[0]) {
 736             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 737                                  "========== Partially Distributed Reordering ========= \n"));
 738             local_pattern = (double *)calloc(num_procs_in_node * num_procs_in_node, sizeof(double));
 739         } else {
 740             local_pattern = (double *)calloc(num_procs_in_node, sizeof(double));
 741         }
 742         /* Extract the local communication pattern */
 743         if( true == topo->weighted ) {
 744             for(i = 0; i < topo->indegree; i++)
 745                 if (grank_to_lrank[topo->in[i]] != -1)
 746                     local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
 747             for(i = 0; i < topo->outdegree; i++)
 748                 if (grank_to_lrank[topo->out[i]] != -1)
 749                     local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
 750         }
 751         if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather((rank == lindex_to_grank[0] ? MPI_IN_PLACE : local_pattern),
 752                                                                   num_procs_in_node, MPI_DOUBLE,
 753                                                                   local_pattern, num_procs_in_node, MPI_DOUBLE,
 754                                                                   0, localcomm, localcomm->c_coll->coll_gather_module))) {
 755             free(lrank_to_grank);
 756             ompi_comm_free(&localcomm);
 757             free(grank_to_lrank);
 758             goto release_and_return;
 759         }
 760 
 761         /* The root has now the entire information, so let's crunch it */
 762         if (rank == lindex_to_grank[0]) {
 763             tm_topology_t  *tm_topology = NULL;
 764             tm_tree_t *comm_tree = NULL;
 765             tm_solution_t *sol = NULL;
 766             tm_affinity_mat_t *aff_mat = NULL;
 767             double **comm_pattern = NULL;
 768             int *obj_to_rank_in_lcomm = NULL;
 769 
 770             comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
 771             for( i = 0; i < num_procs_in_node; i++ ) {
 772                 comm_pattern[i] = local_pattern + i * num_procs_in_node;
 773             }
 774             /* Matrix needs to be symmetric. Beware: as comm_patterns
 775              * refers to local_pattern we indirectly alter the content
 776              * of local_pattern */
 777             for( i = 0; i < num_procs_in_node ; i++ )
 778                 for( j = i; j < num_procs_in_node ; j++ ) {
 779                     comm_pattern[i][j] = (comm_pattern[i][j] + comm_pattern[j][i]) / 2;
 780                     comm_pattern[j][i] = comm_pattern[i][j];
 781                 }
 782 
 783 #ifdef __DEBUG__
 784             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 785                                  "========== COMM PATTERN ============= \n"));
 786             for(i = 0 ; i < num_procs_in_node ; i++){
 787                 opal_output_verbose(10, ompi_topo_base_framework.framework_output," %i : ",i);
 788                 dump_double_array(10, ompi_topo_base_framework.framework_output,
 789                                   "", "", comm_pattern[i], num_procs_in_node);
 790             }
 791             opal_output_verbose(10, ompi_topo_base_framework.framework_output,
 792                                 "======================= \n");
 793 #endif
 794 
 795             tm_topology  = (tm_topology_t *)malloc(sizeof(tm_topology_t));
 796             tm_topology->nb_levels = numlevels;
 797             tm_topology->arity     = (int *)calloc(tm_topology->nb_levels, sizeof(int));
 798             tm_topology->nb_nodes  = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
 799             
 800             for(i = 0 ; i < tm_topology->nb_levels ; i++){
 801                 int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth);
 802                 tm_topology->nb_nodes[i] = nb_objs;
 803                 tm_topology->arity[i]    = tracker[i]->arity;
 804             }
 805 
 806             
 807 #ifdef __DEBUG__
 808             assert(num_objs_in_node == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
 809 #endif
 810             /* create a table that derives the rank in local (node) comm from the object number */
 811             obj_to_rank_in_lcomm = (int *)malloc(num_objs_in_node*sizeof(int));
 812             for(i = 0 ; i < num_objs_in_node ; i++) {
 813                 obj_to_rank_in_lcomm[i] = -1;
 814                 object = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, i);
 815                 for( j = 0; j < num_procs_in_node ; j++ )
 816                     if(localrank_to_objnum[j] == (int)(object->logical_index)) {
 817                         obj_to_rank_in_lcomm[i] = j;
 818                         break;
 819                     }
 820             }
 821             
 822             /* Build process id tab */
 823             tm_topology->node_id  = (int *)malloc(num_objs_in_node*sizeof(int));
 824             tm_topology->node_rank = (int *)malloc(num_objs_in_node*sizeof(int));
 825             for(i = 1 ; i < num_objs_in_node; i++)
 826                 tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
 827             
 828             for( i = 0 ; i < num_objs_in_node ; i++ ) {
 829                 /*note : we make the hypothesis that logical indexes in hwloc range from
 830                   0 to N, are contiguous and crescent.  */                   
 831                 tm_topology->node_id[i] = obj_to_rank_in_lcomm[i];
 832                 if( obj_to_rank_in_lcomm[i] != -1)
 833                     tm_topology->node_rank[obj_to_rank_in_lcomm[i]] = i; 
 834             }
 835             
 836             /* unused for now*/
 837             tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
 838 
 839             tm_topology->nb_proc_units = num_objs_in_node;
 840             tm_topology->nb_constraints = 0;
 841             
 842             for(i = 0; i < num_objs_in_node ; i++)
 843                 if (obj_to_rank_in_lcomm[i] != -1)
 844                     tm_topology->nb_constraints++;
 845             
 846             tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
 847             for(idx = 0,i = 0; i < num_objs_in_node ; i++)
 848                 if (obj_to_rank_in_lcomm[i] != -1)
 849                     tm_topology->constraints[idx++] = obj_to_rank_in_lcomm[i];
 850 
 851             tm_topology->oversub_fact = 1;
 852 
 853 #ifdef __DEBUG__
 854             assert(num_objs_in_node == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
 855             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 856                                  "Levels in topo : %i | num procs in node : %i\n",
 857                                  tm_topology->nb_levels,num_procs_in_node));
 858             for(i = 0; i < tm_topology->nb_levels ; i++) {
 859                 OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 860                                      "Nb objs for level %i : %lu | arity %i\n ",
 861                                      i, tm_topology->nb_nodes[i],tm_topology->arity[i]));
 862             }
 863             dump_int_array(10, ompi_topo_base_framework.framework_output,
 864                            "", "Obj id ", tm_topology->node_id, tm_topology->nb_nodes[tm_topology->nb_levels-1]);
 865             tm_display_topology(tm_topology);
 866 #endif
 867             //tm_optimize_topology(&tm_topology);
 868             aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node);
 869             comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
 870             sol = tm_compute_mapping(tm_topology, comm_tree);
 871 
 872             assert((int)sol->k_length == num_objs_in_node);
 873 
 874             k = (int *)calloc(sol->k_length, sizeof(int));
 875             for(idx = 0 ; idx < (int)sol->k_length ; idx++)
 876                 k[idx] = sol->k[idx][0];
 877 
 878 #ifdef __DEBUG__
 879             OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
 880                                  "====> nb levels : %i\n",tm_topology->nb_levels));
 881             dump_int_array(10, ompi_topo_base_framework.framework_output,
 882                            "Rank permutation sigma/k : ", "", k, num_procs_in_node);
 883             assert(num_procs_in_node == (int)sol->sigma_length);
 884             dump_int_array(10, ompi_topo_base_framework.framework_output,
 885                            "Matching : ", "", sol->sigma, sol->sigma_length);
 886 #endif
 887             free(obj_to_rank_in_lcomm);
 888             free(aff_mat->sum_row);
 889             free(aff_mat);
 890             free(comm_pattern);
 891             tm_free_solution(sol);
 892             tm_free_tree(comm_tree);
 893             tm_free_topology(tm_topology);
 894         }
 895         
 896         /* Todo : Bcast + group creation */
 897         /* scatter the ranks */
 898         if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_scatter(k, 1, MPI_INT,
 899                                                                    &newrank, 1, MPI_INT,
 900                                                                    0, localcomm,
 901                                                                    localcomm->c_coll->coll_scatter_module))) {
 902             if (NULL != k) free(k);
 903             ompi_comm_free(&localcomm);
 904             free(lrank_to_grank);
 905             free(grank_to_lrank);
 906             goto release_and_return;
 907         }
 908 
 909         /* compute the offset of newrank before the split */
 910         /* use the colors array, not the vpids */
 911         marked = (int *)malloc((num_nodes-1)*sizeof(int));
 912         for(idx = 0 ; idx < num_nodes - 1 ; idx++)
 913             marked[idx] = -1;
 914 
 915         while( (node_position != rank) && (colors[node_position] != colors[rank])) {
 916             /* Have we already counted the current color ? */
 917             for(idx = 0; idx < pos; idx++)
 918                 if( marked[idx] == colors[node_position] )
 919                     goto next_iter;  /* yes, let's skip the rest */
 920             /* How many elements of this color are here ? none before the current position */
 921             for(; idx < size; idx++)
 922                 if(colors[idx] == colors[node_position])
 923                     offset++;
 924             marked[pos++] = colors[node_position];
 925           next_iter:
 926             node_position++;
 927         }
 928         newrank += offset;
 929         free(marked);
 930 
 931         if (rank == lindex_to_grank[0])
 932             free(k);
 933 
 934         /* this needs to be optimized but will do for now */
 935         if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, 0, newrank, newcomm, false))) {
 936             ompi_comm_free(&localcomm);
 937             free(lrank_to_grank);
 938             free(grank_to_lrank);
 939             goto release_and_return;
 940         }
 941         /* end of TODO */
 942 
 943         /* Attach the dist_graph to the newly created communicator */
 944         (*newcomm)->c_flags        |= OMPI_COMM_DIST_GRAPH;
 945         (*newcomm)->c_topo          = topo_module;
 946         (*newcomm)->c_topo->reorder = reorder;
 947 
 948         free(grank_to_lrank);
 949         free(lrank_to_grank);
 950     } /* distributed reordering end */
 951 
 952   release_and_return:
 953     if (NULL != reqs ) free(reqs);
 954     if (NULL != tracker) free(tracker);
 955     if (NULL != local_pattern) free(local_pattern);
 956     free(colors);
 957     if (NULL != lindex_to_grank) free(lindex_to_grank);
 958     if (NULL != nodes_roots) free(nodes_roots);  /* only on root */
 959     if (NULL != localrank_to_objnum) free(localrank_to_objnum);
 960     if( NULL != set) hwloc_bitmap_free(set);
 961     /* As the reordering is optional, if we encountered an error during the reordering,
 962      * we can safely return with just a duplicate of the original communicator associated
 963      * with the topology. */
 964     if( OMPI_SUCCESS != err ) goto fallback;
 965     return OMPI_SUCCESS;
 966 }

/* [<][>][^][v][top][bottom][index][help] */