root/ompi/mca/coll/base/coll_base_comm_select.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_coll_base_comm_select
  2. avail_coll_compare
  3. check_components
  4. check_one_component
  5. query
  6. query_2_0_0

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2017 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2007      Lawrence Livermore National Security, LLC.  All
  14  *                         rights reserved.
  15  * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
  16  * Copyright (c) 2008-2014 Cisco Systems, Inc.  All rights reserved.
  17  * Copyright (c) 2012      Oak Ridge National Laboratory.  All rights reserved.
  18  * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
  19  *                         reserved.
  20  * Copyright (c) 2014      Research Organization for Information Science
  21  *                         and Technology (RIST). All rights reserved.
  22  * Copyright (c) 2016-2017 IBM Corporation.  All rights reserved.
  23  * Copyright (c) 2017      FUJITSU LIMITED.  All rights reserved.
  24  * $COPYRIGHT$
  25  *
  26  * Additional copyrights may follow
  27  *
  28  * $HEADER$
  29  */
  30 
  31 #include "ompi_config.h"
  32 
  33 #include <stdio.h>
  34 #include <string.h>
  35 #include <stdlib.h>
  36 
  37 #include "mpi.h"
  38 #include "ompi/communicator/communicator.h"
  39 #include "opal/util/output.h"
  40 #include "opal/util/show_help.h"
  41 #include "opal/class/opal_list.h"
  42 #include "opal/class/opal_object.h"
  43 #include "ompi/mca/mca.h"
  44 #include "opal/mca/base/base.h"
  45 #include "ompi/mca/coll/coll.h"
  46 #include "ompi/mca/coll/base/base.h"
  47 
  48 
  49 /*
  50  * Local types
  51  */
  52 struct avail_coll_t {
  53     opal_list_item_t super;
  54 
  55     int ac_priority;
  56     mca_coll_base_module_2_3_0_t *ac_module;
  57     const char * ac_component_name;
  58 };
  59 typedef struct avail_coll_t avail_coll_t;
  60 
  61 
  62 /*
  63  * Local functions
  64  */
  65 static opal_list_t *check_components(opal_list_t * components,
  66                                      ompi_communicator_t * comm);
  67 static int check_one_component(ompi_communicator_t * comm,
  68                                const mca_base_component_t * component,
  69                                mca_coll_base_module_2_3_0_t ** module);
  70 
  71 static int query(const mca_base_component_t * component,
  72                  ompi_communicator_t * comm, int *priority,
  73                  mca_coll_base_module_2_3_0_t ** module);
  74 
  75 static int query_2_0_0(const mca_coll_base_component_2_0_0_t *
  76                        coll_component, ompi_communicator_t * comm,
  77                        int *priority,
  78                        mca_coll_base_module_2_3_0_t ** module);
  79 
  80 /*
  81  * Stuff for the OBJ interface
  82  */
  83 static OBJ_CLASS_INSTANCE(avail_coll_t, opal_list_item_t, NULL, NULL);
  84 
  85 
  86 #define COPY(module, comm, func)                                        \
  87     do {                                                                \
  88         if (NULL != module->coll_ ## func) {                            \
  89             if (NULL != comm->c_coll->coll_ ## func ## _module) {       \
  90                 OBJ_RELEASE(comm->c_coll->coll_ ## func ## _module);    \
  91             }                                                           \
  92             comm->c_coll->coll_ ## func = module->coll_ ## func;        \
  93             comm->c_coll->coll_ ## func ## _module = module;            \
  94             OBJ_RETAIN(module);                                         \
  95         }                                                               \
  96     } while (0)
  97 
  98 #define CHECK_NULL(what, comm, func)                                    \
  99   ( (what) = # func , NULL == (comm)->c_coll->coll_ ## func)
 100 
 101 /*
 102  * This function is called at the initialization time of every
 103  * communicator.  It is used to select which coll component will be
 104  * active for a given communicator.
 105  *
 106  * This selection logic is not for the weak.
 107  */
 108 int mca_coll_base_comm_select(ompi_communicator_t * comm)
 109 {
 110     opal_list_t *selectable;
 111     opal_list_item_t *item;
 112     char* which_func = "unknown";
 113     int ret;
 114 
 115     /* Announce */
 116     opal_output_verbose(9, ompi_coll_base_framework.framework_output,
 117                         "coll:base:comm_select: new communicator: %s (cid %d)",
 118                         comm->c_name, comm->c_contextid);
 119 
 120     /* Initialize all the relevant pointers, since they're used as
 121      * sentinel values */
 122     comm->c_coll = (mca_coll_base_comm_coll_t*)calloc(1, sizeof(mca_coll_base_comm_coll_t));
 123 
 124     opal_output_verbose(10, ompi_coll_base_framework.framework_output,
 125                         "coll:base:comm_select: Checking all available modules");
 126     selectable = check_components(&ompi_coll_base_framework.framework_components, comm);
 127 
 128     /* Upon return from the above, the modules list will contain the
 129        list of modules that returned (priority >= 0).  If we have no
 130        collective modules available, then print error and return. */
 131     if (NULL == selectable) {
 132         /* There's no modules available */
 133         opal_show_help("help-mca-coll-base.txt",
 134                        "comm-select:none-available", true);
 135         return OMPI_ERROR;
 136     }
 137 
 138     /* FIX ME - Do some kind of collective operation to find a module
 139        that everyone has available */
 140 
 141     /* do the selection loop */
 142     for (item = opal_list_remove_first(selectable);
 143          NULL != item; item = opal_list_remove_first(selectable)) {
 144 
 145         avail_coll_t *avail = (avail_coll_t *) item;
 146 
 147         /* initialize the module */
 148         ret = avail->ac_module->coll_module_enable(avail->ac_module, comm);
 149 
 150         opal_output_verbose(9, ompi_coll_base_framework.framework_output,
 151                             "coll:base:comm_select: selecting  %10s, priority %3d, %s",
 152                             avail->ac_component_name, avail->ac_priority,
 153                             (OMPI_SUCCESS == ret ? "Enabled": "Disabled") );
 154 
 155         if (OMPI_SUCCESS == ret) {
 156 
 157             /* copy over any of the pointers */
 158             COPY(avail->ac_module, comm, allgather);
 159             COPY(avail->ac_module, comm, allgatherv);
 160             COPY(avail->ac_module, comm, allreduce);
 161             COPY(avail->ac_module, comm, alltoall);
 162             COPY(avail->ac_module, comm, alltoallv);
 163             COPY(avail->ac_module, comm, alltoallw);
 164             COPY(avail->ac_module, comm, barrier);
 165             COPY(avail->ac_module, comm, bcast);
 166             COPY(avail->ac_module, comm, exscan);
 167             COPY(avail->ac_module, comm, gather);
 168             COPY(avail->ac_module, comm, gatherv);
 169             COPY(avail->ac_module, comm, reduce);
 170             COPY(avail->ac_module, comm, reduce_scatter_block);
 171             COPY(avail->ac_module, comm, reduce_scatter);
 172             COPY(avail->ac_module, comm, scan);
 173             COPY(avail->ac_module, comm, scatter);
 174             COPY(avail->ac_module, comm, scatterv);
 175 
 176             COPY(avail->ac_module, comm, iallgather);
 177             COPY(avail->ac_module, comm, iallgatherv);
 178             COPY(avail->ac_module, comm, iallreduce);
 179             COPY(avail->ac_module, comm, ialltoall);
 180             COPY(avail->ac_module, comm, ialltoallv);
 181             COPY(avail->ac_module, comm, ialltoallw);
 182             COPY(avail->ac_module, comm, ibarrier);
 183             COPY(avail->ac_module, comm, ibcast);
 184             COPY(avail->ac_module, comm, iexscan);
 185             COPY(avail->ac_module, comm, igather);
 186             COPY(avail->ac_module, comm, igatherv);
 187             COPY(avail->ac_module, comm, ireduce);
 188             COPY(avail->ac_module, comm, ireduce_scatter_block);
 189             COPY(avail->ac_module, comm, ireduce_scatter);
 190             COPY(avail->ac_module, comm, iscan);
 191             COPY(avail->ac_module, comm, iscatter);
 192             COPY(avail->ac_module, comm, iscatterv);
 193 
 194             COPY(avail->ac_module, comm, allgather_init);
 195             COPY(avail->ac_module, comm, allgatherv_init);
 196             COPY(avail->ac_module, comm, allreduce_init);
 197             COPY(avail->ac_module, comm, alltoall_init);
 198             COPY(avail->ac_module, comm, alltoallv_init);
 199             COPY(avail->ac_module, comm, alltoallw_init);
 200             COPY(avail->ac_module, comm, barrier_init);
 201             COPY(avail->ac_module, comm, bcast_init);
 202             COPY(avail->ac_module, comm, exscan_init);
 203             COPY(avail->ac_module, comm, gather_init);
 204             COPY(avail->ac_module, comm, gatherv_init);
 205             COPY(avail->ac_module, comm, reduce_init);
 206             COPY(avail->ac_module, comm, reduce_scatter_block_init);
 207             COPY(avail->ac_module, comm, reduce_scatter_init);
 208             COPY(avail->ac_module, comm, scan_init);
 209             COPY(avail->ac_module, comm, scatter_init);
 210             COPY(avail->ac_module, comm, scatterv_init);
 211 
 212             /* We can not reliably check if this comm has a topology
 213              * at this time. The flags are set *after* coll_select */
 214             COPY(avail->ac_module, comm, neighbor_allgather);
 215             COPY(avail->ac_module, comm, neighbor_allgatherv);
 216             COPY(avail->ac_module, comm, neighbor_alltoall);
 217             COPY(avail->ac_module, comm, neighbor_alltoallv);
 218             COPY(avail->ac_module, comm, neighbor_alltoallw);
 219 
 220             COPY(avail->ac_module, comm, ineighbor_allgather);
 221             COPY(avail->ac_module, comm, ineighbor_allgatherv);
 222             COPY(avail->ac_module, comm, ineighbor_alltoall);
 223             COPY(avail->ac_module, comm, ineighbor_alltoallv);
 224             COPY(avail->ac_module, comm, ineighbor_alltoallw);
 225 
 226             COPY(avail->ac_module, comm, neighbor_allgather_init);
 227             COPY(avail->ac_module, comm, neighbor_allgatherv_init);
 228             COPY(avail->ac_module, comm, neighbor_alltoall_init);
 229             COPY(avail->ac_module, comm, neighbor_alltoallv_init);
 230             COPY(avail->ac_module, comm, neighbor_alltoallw_init);
 231 
 232             COPY(avail->ac_module, comm, reduce_local);
 233         }
 234         /* release the original module reference and the list item */
 235         OBJ_RELEASE(avail->ac_module);
 236         OBJ_RELEASE(avail);
 237     }
 238 
 239     /* Done with the list from the check_components() call so release it. */
 240     OBJ_RELEASE(selectable);
 241 
 242     /* check to make sure no NULLs */
 243     if (CHECK_NULL(which_func, comm, allgather) ||
 244         CHECK_NULL(which_func, comm, allgatherv) ||
 245         CHECK_NULL(which_func, comm, allreduce) ||
 246         CHECK_NULL(which_func, comm, alltoall) ||
 247         CHECK_NULL(which_func, comm, alltoallv) ||
 248         CHECK_NULL(which_func, comm, alltoallw) ||
 249         CHECK_NULL(which_func, comm, barrier) ||
 250         CHECK_NULL(which_func, comm, bcast) ||
 251         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, exscan)) ||
 252         CHECK_NULL(which_func, comm, gather) ||
 253         CHECK_NULL(which_func, comm, gatherv) ||
 254         CHECK_NULL(which_func, comm, reduce) ||
 255         CHECK_NULL(which_func, comm, reduce_scatter_block) ||
 256         CHECK_NULL(which_func, comm, reduce_scatter) ||
 257         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, scan)) ||
 258         CHECK_NULL(which_func, comm, scatter) ||
 259         CHECK_NULL(which_func, comm, scatterv) ||
 260         CHECK_NULL(which_func, comm, iallgather) ||
 261         CHECK_NULL(which_func, comm, iallgatherv) ||
 262         CHECK_NULL(which_func, comm, iallreduce) ||
 263         CHECK_NULL(which_func, comm, ialltoall) ||
 264         CHECK_NULL(which_func, comm, ialltoallv) ||
 265         CHECK_NULL(which_func, comm, ialltoallw) ||
 266         CHECK_NULL(which_func, comm, ibarrier) ||
 267         CHECK_NULL(which_func, comm, ibcast) ||
 268         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iexscan)) ||
 269         CHECK_NULL(which_func, comm, igather) ||
 270         CHECK_NULL(which_func, comm, igatherv) ||
 271         CHECK_NULL(which_func, comm, ireduce) ||
 272         CHECK_NULL(which_func, comm, ireduce_scatter_block) ||
 273         CHECK_NULL(which_func, comm, ireduce_scatter) ||
 274         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iscan)) ||
 275         CHECK_NULL(which_func, comm, iscatter) ||
 276         CHECK_NULL(which_func, comm, iscatterv) ||
 277         CHECK_NULL(which_func, comm, allgather_init) ||
 278         CHECK_NULL(which_func, comm, allgatherv_init) ||
 279         CHECK_NULL(which_func, comm, allreduce_init) ||
 280         CHECK_NULL(which_func, comm, alltoall_init) ||
 281         CHECK_NULL(which_func, comm, alltoallv_init) ||
 282         CHECK_NULL(which_func, comm, alltoallw_init) ||
 283         CHECK_NULL(which_func, comm, barrier_init) ||
 284         CHECK_NULL(which_func, comm, bcast_init) ||
 285         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, exscan_init)) ||
 286         CHECK_NULL(which_func, comm, gather_init) ||
 287         CHECK_NULL(which_func, comm, gatherv_init) ||
 288         CHECK_NULL(which_func, comm, reduce_init) ||
 289         CHECK_NULL(which_func, comm, reduce_scatter_block_init) ||
 290         CHECK_NULL(which_func, comm, reduce_scatter_init) ||
 291         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, scan_init)) ||
 292         CHECK_NULL(which_func, comm, scatter_init) ||
 293         CHECK_NULL(which_func, comm, scatterv_init) ||
 294         CHECK_NULL(which_func, comm, reduce_local) ) {
 295         /* TODO -- Once the topology flags are set before coll_select then
 296          * check if neighborhood collectives have been set. */
 297 
 298         opal_show_help("help-mca-coll-base.txt",
 299                        "comm-select:no-function-available", true, which_func);
 300 
 301          mca_coll_base_comm_unselect(comm);
 302         return OMPI_ERR_NOT_FOUND;
 303     }
 304     return OMPI_SUCCESS;
 305 }
 306 
 307 static int avail_coll_compare (opal_list_item_t **a,
 308                                opal_list_item_t **b) {
 309     avail_coll_t *acoll = (avail_coll_t *) *a;
 310     avail_coll_t *bcoll = (avail_coll_t *) *b;
 311 
 312     if (acoll->ac_priority > bcoll->ac_priority) {
 313         return 1;
 314     } else if (acoll->ac_priority < bcoll->ac_priority) {
 315         return -1;
 316     }
 317 
 318     return 0;
 319 }
 320 
 321 /*
 322  * For each module in the list, check and see if it wants to run, and
 323  * do the resulting priority comparison.  Make a list of modules to be
 324  * only those who returned that they want to run, and put them in
 325  * priority order.
 326  */
 327 static opal_list_t *check_components(opal_list_t * components,
 328                                      ompi_communicator_t * comm)
 329 {
 330     int priority;
 331     const mca_base_component_t *component;
 332     mca_base_component_list_item_t *cli;
 333     mca_coll_base_module_2_3_0_t *module;
 334     opal_list_t *selectable;
 335     avail_coll_t *avail;
 336 
 337     /* Make a list of the components that query successfully */
 338     selectable = OBJ_NEW(opal_list_t);
 339 
 340     /* Scan through the list of components */
 341     OPAL_LIST_FOREACH(cli, &ompi_coll_base_framework.framework_components, mca_base_component_list_item_t) {
 342         component = cli->cli_component;
 343 
 344         priority = check_one_component(comm, component, &module);
 345         if (priority >= 0) {
 346             /* We have a component that indicated that it wants to run
 347                by giving us a module */
 348             avail = OBJ_NEW(avail_coll_t);
 349             avail->ac_priority = priority;
 350             avail->ac_module = module;
 351             // Point to the string so we don't have to free later
 352             avail->ac_component_name = component->mca_component_name;
 353 
 354             opal_list_append(selectable, &avail->super);
 355         }
 356         else {
 357             opal_output_verbose(10, ompi_coll_base_framework.framework_output,
 358                                 "coll:base:comm_select: component disqualified: %s (priority %d < 0)",
 359                                 component->mca_component_name, priority );
 360 
 361             // If the disqualified collective returned a module make sure we
 362             // release it here, since it will become a leak otherwise.
 363             if( NULL != module ) {
 364                 OBJ_RELEASE(module);
 365                 module = NULL;
 366             }
 367         }
 368     }
 369 
 370     /* If we didn't find any available components, return an error */
 371     if (0 == opal_list_get_size(selectable)) {
 372         OBJ_RELEASE(selectable);
 373         return NULL;
 374     }
 375 
 376     /* Put this list in priority order */
 377     opal_list_sort(selectable, avail_coll_compare);
 378 
 379     /* All done */
 380     return selectable;
 381 }
 382 
 383 
 384 /*
 385  * Check a single component
 386  */
 387 static int check_one_component(ompi_communicator_t * comm,
 388                                const mca_base_component_t * component,
 389                                mca_coll_base_module_2_3_0_t ** module)
 390 {
 391     int err;
 392     int priority = -1;
 393 
 394     err = query(component, comm, &priority, module);
 395 
 396     if (OMPI_SUCCESS == err) {
 397         priority = (priority < 100) ? priority : 100;
 398         opal_output_verbose(10, ompi_coll_base_framework.framework_output,
 399                             "coll:base:comm_select: component available: %s, priority: %d",
 400                             component->mca_component_name, priority);
 401 
 402     } else {
 403         priority = -1;
 404         opal_output_verbose(10, ompi_coll_base_framework.framework_output,
 405                             "coll:base:comm_select: component not available: %s",
 406                             component->mca_component_name);
 407     }
 408 
 409     return priority;
 410 }
 411 
 412 
 413 /**************************************************************************
 414  * Query functions
 415  **************************************************************************/
 416 
 417 /*
 418  * Take any version of a coll module, query it, and return the right
 419  * module struct
 420  */
 421 static int query(const mca_base_component_t * component,
 422                  ompi_communicator_t * comm,
 423                  int *priority, mca_coll_base_module_2_3_0_t ** module)
 424 {
 425     *module = NULL;
 426     if (2 == component->mca_type_major_version &&
 427         0 == component->mca_type_minor_version &&
 428         0 == component->mca_type_release_version) {
 429         const mca_coll_base_component_2_0_0_t *coll100 =
 430             (mca_coll_base_component_2_0_0_t *) component;
 431 
 432         return query_2_0_0(coll100, comm, priority, module);
 433     }
 434 
 435     /* Unknown coll API version -- return error */
 436 
 437     return OMPI_ERROR;
 438 }
 439 
 440 
 441 static int query_2_0_0(const mca_coll_base_component_2_0_0_t * component,
 442                        ompi_communicator_t * comm, int *priority,
 443                        mca_coll_base_module_2_3_0_t ** module)
 444 {
 445     mca_coll_base_module_2_3_0_t *ret;
 446 
 447     /* There's currently no need for conversion */
 448 
 449     ret = component->collm_comm_query(comm, priority);
 450     if (NULL != ret) {
 451         *module = ret;
 452         return OMPI_SUCCESS;
 453     }
 454 
 455     return OMPI_ERROR;
 456 }

/* [<][>][^][v][top][bottom][index][help] */