root/oshmem/mca/scoll/base/scoll_base_select.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. scoll_null_barrier
  2. scoll_null_broadcast
  3. scoll_null_collect
  4. scoll_null_reduce
  5. scoll_null_alltoall
  6. mca_scoll_base_group_unselect
  7. mca_scoll_base_select
  8. avail_coll_compare
  9. check_components
  10. check_one_component
  11. query
  12. query_1_0_0

   1 /*
   2  * Copyright (c) 2013-2018 Mellanox Technologies, Inc.
   3  *                         All rights reserved.
   4  * Copyright (c) 2016      Cisco Systems, Inc.  All rights reserved.
   5  * $COPYRIGHT$
   6  *
   7  * Additional copyrights may follow
   8  *
   9  * $HEADER$
  10  */
  11 
  12 #include "oshmem_config.h"
  13 
  14 #include <stdio.h>
  15 #include <stdlib.h>
  16 #include <string.h>
  17 
  18 #include "oshmem/constants.h"
  19 
  20 #include "opal/class/opal_list.h"
  21 #include "oshmem/mca/mca.h"
  22 #include "opal/mca/base/base.h"
  23 #include "opal/mca/base/mca_base_component_repository.h"
  24 #include "ompi/util/timings.h"
  25 
  26 #include "oshmem/util/oshmem_util.h"
  27 #include "oshmem/mca/scoll/scoll.h"
  28 #include "oshmem/mca/scoll/base/base.h"
  29 #include "oshmem/proc/proc.h"
  30 #include "oshmem/runtime/runtime.h"
  31 
  32 /*
  33  * Local types
  34  */
  35 struct avail_com_t {
  36     opal_list_item_t super;
  37 
  38     int ac_priority;
  39     mca_scoll_base_module_t *ac_module;
  40 };
  41 typedef struct avail_com_t avail_com_t;
  42 
  43 /*
  44  * Local functions
  45  */
  46 static opal_list_t *check_components(opal_list_t * components,
  47                                      oshmem_group_t * group);
  48 static int check_one_component(oshmem_group_t * group,
  49                                const mca_base_component_t * component,
  50                                mca_scoll_base_module_1_0_0_t ** module);
  51 
  52 static int query(const mca_base_component_t * component,
  53                  oshmem_group_t * group,
  54                  int *priority,
  55                  mca_scoll_base_module_1_0_0_t ** module);
  56 
  57 static int query_1_0_0(const mca_scoll_base_component_1_0_0_t * scoll_component,
  58                        oshmem_group_t * group,
  59                        int *priority,
  60                        mca_scoll_base_module_1_0_0_t ** module);
  61 
  62 static int scoll_null_barrier(struct oshmem_group_t *group,
  63                               long *pSync,
  64                               int alg)
  65 {
  66     if (oshmem_proc_group_is_member(group)) {
  67         SCOLL_ERROR("internal error");
  68         oshmem_shmem_abort(-1);
  69         return OSHMEM_ERROR;
  70     }
  71     return OSHMEM_SUCCESS;
  72 }
  73 
  74 static int scoll_null_broadcast(struct oshmem_group_t *group,
  75                                 int PE_root,
  76                                 void *target,
  77                                 const void *source,
  78                                 size_t nlong,
  79                                 long *pSync,
  80                                 bool nlong_type,
  81                                 int alg)
  82 {
  83     if (oshmem_proc_group_is_member(group)) {
  84         SCOLL_ERROR("internal error");
  85         oshmem_shmem_abort(-1);
  86         return OSHMEM_ERROR;
  87     }
  88     return OSHMEM_SUCCESS;
  89 }
  90 
  91 static int scoll_null_collect(struct oshmem_group_t *group,
  92                               void *target,
  93                               const void *source,
  94                               size_t nlong,
  95                               long *pSync,
  96                               bool nlong_type,
  97                               int alg)
  98 {
  99     if (oshmem_proc_group_is_member(group)) {
 100         SCOLL_ERROR("internal error");
 101         oshmem_shmem_abort(-1);
 102         return OSHMEM_ERROR;
 103     }
 104     return OSHMEM_SUCCESS;
 105 }
 106 
 107 static int scoll_null_reduce(struct oshmem_group_t *group,
 108                              struct oshmem_op_t *op,
 109                              void *target,
 110                              const void *source,
 111                              size_t nlong,
 112                              long *pSync,
 113                              void *pWrk,
 114                              int alg)
 115 {
 116     if (oshmem_proc_group_is_member(group)) {
 117         SCOLL_ERROR("internal error");
 118         oshmem_shmem_abort(-1);
 119         return OSHMEM_ERROR;
 120     }
 121     return OSHMEM_SUCCESS;
 122 }
 123 
 124 static int scoll_null_alltoall(struct oshmem_group_t *group,
 125                               void *target,
 126                               const void *source,
 127                               ptrdiff_t dst, ptrdiff_t sst,
 128                               size_t nlong,
 129                               size_t element_size,
 130                               long *pSync,
 131                               int alg)
 132 {
 133     if (oshmem_proc_group_is_member(group)) {
 134         SCOLL_ERROR("internal error");
 135         oshmem_shmem_abort(-1);
 136         return OSHMEM_ERROR;
 137     }
 138     return OSHMEM_SUCCESS;
 139 }
 140 
 141 /*
 142  * Stuff for the OBJ interface
 143  */
 144 static OBJ_CLASS_INSTANCE(avail_com_t, opal_list_item_t, NULL, NULL);
 145 
 146 #define COPY(module, group, func)                                        \
 147     do {                                                                \
 148         if (NULL != module->scoll_ ## func) {                            \
 149             if (NULL != group->g_scoll.scoll_ ## func ## _module) {        \
 150                 OBJ_RELEASE(group->g_scoll.scoll_ ## func ## _module);     \
 151             }                                                           \
 152             group->g_scoll.scoll_ ## func = module->scoll_ ## func;         \
 153             group->g_scoll.scoll_ ## func ## _module = module;             \
 154             OBJ_RETAIN(module);                                         \
 155         }                                                               \
 156     } while (0)
 157 
 158 #define CLOSE(group, func)                                       \
 159     do {                                                        \
 160             if (NULL != group->g_scoll.scoll_ ## func ## _module) {    \
 161             OBJ_RELEASE(group->g_scoll.scoll_ ## func ## _module); \
 162             group->g_scoll.scoll_## func = NULL;                   \
 163             group->g_scoll.scoll_## func ## _module = NULL;        \
 164         }                                                       \
 165     } while (0)
 166 
 167 int mca_scoll_base_group_unselect(struct oshmem_group_t * group)
 168 {
 169     /*
 170      * scoll close() is called before group destructors, so
 171      * do close group collectives if scoll modules are no longer
 172      * valid
 173      *
 174      * there is a memory leak here, because not doing close means
 175      * that we leaving object with dangling ref counts
 176      */
 177     SCOLL_VERBOSE(10, "scoll:base:group_unselect: group: %d", group->id);
 178 
 179     CLOSE(group, barrier);
 180     CLOSE(group, broadcast);
 181     CLOSE(group, collect);
 182     CLOSE(group, reduce);
 183     CLOSE(group, alltoall);
 184 
 185     /* All done */
 186     return OSHMEM_SUCCESS;
 187 }
 188 /*
 189  * This function is called at the initialization time of every
 190  * group.  It is used to select which coll component will be
 191  * active for a given group.
 192  */
 193 int mca_scoll_base_select(struct oshmem_group_t *group)
 194 {
 195     opal_list_t *selectable;
 196     opal_list_item_t *item;
 197     int ret;
 198 
 199     OPAL_TIMING_ENV_INIT(mca_scoll_base_select);
 200 
 201     /* Announce */
 202     SCOLL_VERBOSE(10, "scoll:base:group_select: new group: %d", group->id);
 203     mca_scoll_base_group_unselect(group);
 204     memset(&group->g_scoll, 0, sizeof(mca_scoll_base_group_scoll_t));
 205     if (!oshmem_proc_group_is_member(group)) {
 206         group->g_scoll.scoll_barrier = scoll_null_barrier;
 207         group->g_scoll.scoll_broadcast = scoll_null_broadcast;
 208         group->g_scoll.scoll_collect = scoll_null_collect;
 209         group->g_scoll.scoll_reduce = scoll_null_reduce;
 210         group->g_scoll.scoll_alltoall = scoll_null_alltoall;
 211         return OSHMEM_SUCCESS;
 212     }
 213 
 214     OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "setup");
 215 
 216     SCOLL_VERBOSE(10,
 217                   "scoll:base:group_select: Checking all available modules");
 218     selectable = check_components(&oshmem_scoll_base_framework.framework_components, group);
 219 
 220     /* Upon return from the above, the modules list will contain the
 221      list of modules that returned (priority >= 0).  If we have no
 222      collective modules available, then print error and return. */
 223     if (NULL == selectable) {
 224         /* There's no modules available */
 225         return OSHMEM_ERROR;
 226     }
 227 
 228     OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "check_components");
 229 
 230     /* do the selection loop */
 231     for (item = opal_list_remove_first(selectable); NULL != item; item =
 232             opal_list_remove_first(selectable)) {
 233         avail_com_t *avail = (avail_com_t *) item;
 234         ret = avail->ac_module->scoll_module_enable(avail->ac_module, group);
 235         if (OSHMEM_SUCCESS != ret) {
 236             mca_scoll_base_group_unselect(group);
 237         } else {
 238             COPY(avail->ac_module, group, barrier);
 239             COPY(avail->ac_module, group, broadcast);
 240             COPY(avail->ac_module, group, collect);
 241             COPY(avail->ac_module, group, reduce);
 242             COPY(avail->ac_module, group, alltoall);
 243         }
 244         OBJ_RELEASE(avail->ac_module);
 245         OBJ_RELEASE(avail);
 246     }
 247 
 248     OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "select_loop");
 249 
 250     /* Done with the list from the check_components() call so release it. */
 251     OBJ_RELEASE(selectable);
 252     if ((NULL == group->g_scoll.scoll_barrier)
 253             || (NULL == group->g_scoll.scoll_broadcast)
 254             || (NULL == group->g_scoll.scoll_collect)
 255             || (NULL == group->g_scoll.scoll_reduce)
 256             || (NULL == group->g_scoll.scoll_alltoall)) {
 257         mca_scoll_base_group_unselect(group);
 258         return OSHMEM_ERR_NOT_FOUND;
 259     }
 260 
 261     OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "release");
 262 
 263     return OSHMEM_SUCCESS;
 264 }
 265 
 266 static int avail_coll_compare(opal_list_item_t **a,
 267                               opal_list_item_t **b)
 268 {
 269     avail_com_t *acom = (avail_com_t *) *a;
 270     avail_com_t *bcom = (avail_com_t *) *b;
 271 
 272     if (acom->ac_priority > bcom->ac_priority) {
 273         return 1;
 274     } else if (acom->ac_priority < bcom->ac_priority) {
 275         return -1;
 276     }
 277 
 278     return 0;
 279 }
 280 
 281 /*
 282  * For each module in the list, check and see if it wants to run, and
 283  * do the resulting priority comparison.  Make a list of modules to be
 284  * only those who returned that they want to run, and put them in
 285  * priority order.
 286  */
 287 static opal_list_t *check_components(opal_list_t *components,
 288                                      oshmem_group_t *group)
 289 {
 290     int priority;
 291     const mca_base_component_t *component;
 292     mca_base_component_list_item_t *cli;
 293     mca_scoll_base_module_1_0_0_t *module;
 294     opal_list_t *selectable;
 295     avail_com_t *avail;
 296 
 297     /* Make a list of the components that query successfully */
 298     selectable = OBJ_NEW(opal_list_t);
 299 
 300     /* Scan through the list of components */
 301     OPAL_LIST_FOREACH(cli, &oshmem_scoll_base_framework.framework_components, mca_base_component_list_item_t) {
 302         component = cli->cli_component;
 303 
 304         priority = check_one_component(group, component, &module);
 305         if (priority >= 0) {
 306             /* We have a component that indicated that it wants to run
 307                by giving us a module */
 308             avail = OBJ_NEW(avail_com_t);
 309             avail->ac_priority = priority;
 310             avail->ac_module = module;
 311 
 312             opal_list_append(selectable, &avail->super);
 313         }
 314     }
 315 
 316     /* If we didn't find any available components, return an error */
 317     if (0 == opal_list_get_size(selectable)) {
 318         OBJ_RELEASE(selectable);
 319         return NULL;
 320     }
 321 
 322     /* Put this list in priority order */
 323     opal_list_sort(selectable, avail_coll_compare);
 324 
 325     /* All done */
 326     return selectable;
 327 }
 328 
 329 /*
 330  * Check a single component
 331  */
 332 static int check_one_component(oshmem_group_t *group,
 333                                const mca_base_component_t *component,
 334                                mca_scoll_base_module_1_0_0_t **module)
 335 {
 336     int err;
 337     int priority = -1;
 338 
 339     err = query(component, group, &priority, module);
 340 
 341     if (OSHMEM_SUCCESS == err) {
 342         priority = (priority < 100) ? priority : 100;
 343         SCOLL_VERBOSE(10,
 344                       "scoll:base:group_select: component available: %s, priority: %d",
 345                       component->mca_component_name, priority);
 346 
 347     } else {
 348         priority = -1;
 349         SCOLL_VERBOSE(10,
 350                       "scoll:base:group_select: component not available: %s",
 351                       component->mca_component_name);
 352     }
 353 
 354     return priority;
 355 }
 356 
 357 /**************************************************************************
 358  * Query functions
 359  **************************************************************************/
 360 
 361 /*
 362  * Take any version of a coll module, query it, and return the right
 363  * module struct
 364  */
 365 static int query(const mca_base_component_t * component,
 366                  oshmem_group_t *group,
 367                  int *priority,
 368                  mca_scoll_base_module_1_0_0_t **module)
 369 {
 370     *module = NULL;
 371     if (1 == component->mca_type_major_version
 372             && 0 == component->mca_type_minor_version
 373             && 0 == component->mca_type_release_version) {
 374         const mca_scoll_base_component_1_0_0_t *coll100 =
 375                 (mca_scoll_base_component_1_0_0_t *) component;
 376 
 377         return query_1_0_0(coll100, group, priority, module);
 378     }
 379 
 380     /* Unknown coll API version -- return error */
 381 
 382     return OSHMEM_ERROR;
 383 }
 384 
 385 static int query_1_0_0(const mca_scoll_base_component_1_0_0_t *component,
 386                        oshmem_group_t *group,
 387                        int *priority,
 388                        mca_scoll_base_module_1_0_0_t **module)
 389 {
 390     mca_scoll_base_module_1_0_0_t *ret;
 391 
 392     /* There's currently no need for conversion */
 393 
 394     ret = component->scoll_query(group, priority);
 395     if (NULL != ret) {
 396         *module = ret;
 397         return OSHMEM_SUCCESS;
 398     }
 399 
 400     return OSHMEM_ERROR;
 401 }

/* [<][>][^][v][top][bottom][index][help] */