root/ompi/mca/pml/base/pml_base_select.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_pml_base_select
  2. mca_pml_base_pml_selected
  3. mca_pml_base_pml_check_selected

   1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
   2 /*
   3  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2007 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
  14  *                         reserved.
  15  * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
  16  * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "ompi_config.h"
  25 
  26 #include <string.h>
  27 
  28 #include "opal/class/opal_list.h"
  29 #include "opal/util/output.h"
  30 #include "opal/util/show_help.h"
  31 #include "opal/runtime/opal_progress.h"
  32 #include "ompi/mca/mca.h"
  33 #include "opal/mca/base/base.h"
  34 #include "opal/runtime/opal.h"
  35 #include "opal/mca/pmix/pmix.h"
  36 
  37 #include "ompi/constants.h"
  38 #include "ompi/mca/pml/pml.h"
  39 #include "ompi/mca/pml/base/base.h"
  40 #include "ompi/proc/proc.h"
  41 
  42 typedef struct opened_component_t {
  43   opal_list_item_t super;
  44   mca_pml_base_component_t *om_component;
  45 } opened_component_t;
  46 
  47 static bool modex_reqd=false;
  48 
  49 /**
  50  * Function for selecting one component from all those that are
  51  * available.
  52  *
  53  * Call the init function on all available components and get their
  54  * priorities.  Select the component with the highest priority.  All
  55  * other components will be closed and unloaded.  The selected component
  56  * will have all of its function pointers saved and returned to the
  57  * caller.
  58  */
  59 int mca_pml_base_select(bool enable_progress_threads,
  60                         bool enable_mpi_threads)
  61 {
  62     int i, priority = 0, best_priority = 0, num_pml = 0;
  63     opal_list_item_t *item = NULL;
  64     mca_base_component_list_item_t *cli = NULL;
  65     mca_pml_base_component_t *component = NULL, *best_component = NULL;
  66     mca_pml_base_module_t *module = NULL, *best_module = NULL;
  67     opal_list_t opened;
  68     opened_component_t *om = NULL;
  69     bool found_pml;
  70 #if OPAL_ENABLE_FT_CR == 1
  71     mca_pml_base_component_t *wrapper_component = NULL;
  72     int wrapper_priority = -1;
  73 #endif
  74 
  75     /* Traverse the list of available components; call their init
  76        functions. */
  77 
  78     best_priority = -1;
  79     best_component = NULL;
  80     module = NULL;
  81     OBJ_CONSTRUCT(&opened, opal_list_t);
  82     OPAL_LIST_FOREACH(cli, &ompi_pml_base_framework.framework_components, mca_base_component_list_item_t) {
  83         component = (mca_pml_base_component_t *) cli->cli_component;
  84 
  85         /* if there is an include list - item must be in the list to be included */
  86         found_pml = false;
  87         for( i = 0; i < opal_pointer_array_get_size(&mca_pml_base_pml); i++) {
  88             char * tmp_val = NULL;
  89             tmp_val = (char *) opal_pointer_array_get_item(&mca_pml_base_pml, i);
  90             if( NULL == tmp_val) {
  91                 continue;
  92             }
  93 
  94             if(0 == strncmp(component->pmlm_version.mca_component_name,
  95                             tmp_val, strlen(component->pmlm_version.mca_component_name)) ) {
  96                 found_pml = true;
  97                 break;
  98             }
  99         }
 100 
 101         if(!found_pml && opal_pointer_array_get_size(&mca_pml_base_pml)) {
 102             opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 103                                      "select: component %s not in the include list",
 104                                      component->pmlm_version.mca_component_name );
 105 
 106             continue;
 107         }
 108 
 109         /* if there is no init function - ignore it */
 110         if (NULL == component->pmlm_init) {
 111             opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 112                                  "select: no init function; ignoring component %s",
 113                                  component->pmlm_version.mca_component_name );
 114             continue;
 115         }
 116 
 117         /* this is a pml that could be considered */
 118         num_pml++;
 119 
 120         /* Init component to get its priority */
 121         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 122                              "select: initializing %s component %s",
 123                              component->pmlm_version.mca_type_name,
 124                              component->pmlm_version.mca_component_name );
 125         priority = best_priority;
 126         module = component->pmlm_init(&priority, enable_progress_threads,
 127                                       enable_mpi_threads);
 128         if (NULL == module) {
 129             opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 130                                  "select: init returned failure for component %s",
 131                                  component->pmlm_version.mca_component_name );
 132             continue;
 133         }
 134 
 135         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 136                              "select: init returned priority %d", priority );
 137 #if OPAL_ENABLE_FT_CR == 1
 138         /* Determine if this is the wrapper component */
 139         if( priority <= PML_SELECT_WRAPPER_PRIORITY) {
 140             opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 141                                  "pml:select: Wrapper Component: Component %s was determined to be a Wrapper PML with priority %d",
 142                                  component->pmlm_version.mca_component_name, priority );
 143             wrapper_priority  = priority;
 144             wrapper_component = component;
 145             continue;
 146         }
 147         /* Otherwise determine if this is the best component */
 148         else
 149 #endif
 150         if (priority > best_priority) {
 151             best_priority = priority;
 152             best_component = component;
 153             best_module = module;
 154         }
 155 
 156         om = (opened_component_t*)malloc(sizeof(opened_component_t));
 157         if (NULL == om) {
 158             return OMPI_ERR_OUT_OF_RESOURCE;
 159         }
 160         OBJ_CONSTRUCT(om, opal_list_item_t);
 161         om->om_component = component;
 162         opal_list_append(&opened, (opal_list_item_t*) om);
 163     }
 164 
 165     /* Finished querying all components.  Check for the bozo case. */
 166 
 167     if( NULL == best_component ) {
 168         opal_show_help("help-mca-base.txt", "find-available:none found",
 169                        true, "pml",
 170                        opal_process_info.nodename,
 171                        "pml");
 172         for( i = 0; i < opal_pointer_array_get_size(&mca_pml_base_pml); i++) {
 173             char * tmp_val = NULL;
 174             tmp_val = (char *) opal_pointer_array_get_item(&mca_pml_base_pml, i);
 175             if( NULL == tmp_val) {
 176                 continue;
 177             }
 178             ompi_rte_abort(1, "PML %s cannot be selected", tmp_val);
 179         }
 180         if(0 == i) {
 181             ompi_rte_abort(2, "No pml component available.  This shouldn't happen.");
 182         }
 183     }
 184 
 185     opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 186                          "selected %s best priority %d\n",
 187                          best_component->pmlm_version.mca_component_name, best_priority);
 188 
 189     /* if more than one PML could be considered, then we still need the
 190      * modex since we cannot know which one will be selected on all procs
 191      */
 192     if (1 < num_pml) {
 193         modex_reqd = true;
 194     }
 195 
 196     /* Save the winner */
 197 
 198     mca_pml_base_selected_component = *best_component;
 199     mca_pml = *best_module;
 200     opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 201                          "select: component %s selected",
 202                          mca_pml_base_selected_component.pmlm_version.mca_component_name );
 203 
 204     /* Finalize all non-selected components */
 205 
 206     for (item = opal_list_remove_first(&opened);
 207          NULL != item;
 208          item = opal_list_remove_first(&opened)) {
 209         om = (opened_component_t *) item;
 210 
 211         if (om->om_component != best_component
 212 #if OPAL_ENABLE_FT_CR == 1
 213             && om->om_component != wrapper_component
 214 #endif
 215             ) {
 216             /* Finalize */
 217 
 218             if (NULL != om->om_component->pmlm_finalize) {
 219 
 220                 /* Blatently ignore the return code (what would we do to
 221                    recover, anyway?  This component is going away, so errors
 222                    don't matter anymore) */
 223 
 224                 om->om_component->pmlm_finalize();
 225                 opal_output_verbose(10, ompi_pml_base_framework.framework_output,
 226                                     "select: component %s not selected / finalized",
 227                                     om->om_component->pmlm_version.mca_component_name);
 228             }
 229         }
 230         OBJ_DESTRUCT( om );
 231         free(om);
 232     }
 233     OBJ_DESTRUCT( &opened );
 234 
 235 #if OPAL_ENABLE_FT_CR == 1
 236     /* Remove the wrapper component from the ompi_pml_base_framework.framework_components list
 237      * so we don't unload it prematurely in the next call
 238      */
 239     if( NULL != wrapper_component ) {
 240         OPAL_LIST_FOREACH(cli, &ompi_pml_base_framework.framework_components, mca_base_component_list_item_t) {
 241             component = (mca_pml_base_component_t *) cli->cli_component;
 242 
 243             if( component == wrapper_component ) {
 244                 opal_list_remove_item(&ompi_pml_base_framework.framework_components, item);
 245             }
 246         }
 247     }
 248 #endif
 249 
 250     /* This base function closes, unloads, and removes from the
 251        available list all unselected components.  The available list will
 252        contain only the selected component. */
 253 
 254     mca_base_components_close(ompi_pml_base_framework.framework_output,
 255                               &ompi_pml_base_framework.framework_components,
 256                               (mca_base_component_t *) best_component);
 257 
 258 #if OPAL_ENABLE_FT_CR == 1
 259     /* If we have a wrapper then initalize it */
 260     if( NULL != wrapper_component ) {
 261         priority = PML_SELECT_WRAPPER_PRIORITY;
 262         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 263                              "pml:select: Wrapping: Component %s [%d] is being wrapped by component %s [%d]",
 264                              mca_pml_base_selected_component.pmlm_version.mca_component_name,
 265                              best_priority,
 266                              wrapper_component->pmlm_version.mca_component_name,
 267                              wrapper_priority );
 268 
 269         /* Ask the wrapper commponent to wrap around the currently
 270          * selected component. Indicated by the priority value provided
 271          * this will cause the wrapper to do something different this time around
 272          */
 273         module = wrapper_component->pmlm_init(&priority,
 274                                               enable_progress_threads,
 275                                               enable_mpi_threads);
 276         /* Replace with the wrapper */
 277         best_component = wrapper_component;
 278         mca_pml_base_selected_component = *best_component;
 279         best_module = module;
 280         mca_pml     = *best_module;
 281     }
 282 #endif
 283 
 284     /* register the winner's callback */
 285     if( NULL != mca_pml.pml_progress ) {
 286         opal_progress_register(mca_pml.pml_progress);
 287     }
 288 
 289     /* register winner in the modex */
 290     if (modex_reqd && 0 == OMPI_PROC_MY_NAME->vpid) {
 291         mca_pml_base_pml_selected(best_component->pmlm_version.mca_component_name);
 292     }
 293 
 294     /* All done */
 295 
 296     return OMPI_SUCCESS;
 297 }
 298 
 299 /* need a "commonly" named PML structure so everything ends up in the
 300    same modex field */
 301 static mca_base_component_t pml_base_component = {
 302     OMPI_MCA_BASE_VERSION_2_1_0("pml", 2, 0, 0),
 303     .mca_component_name = "base",
 304     .mca_component_major_version = 2,
 305     .mca_component_minor_version = 0,
 306     .mca_component_release_version = 0,
 307 };
 308 
 309 
 310 int
 311 mca_pml_base_pml_selected(const char *name)
 312 {
 313     int rc;
 314 
 315     OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1);
 316     return rc;
 317 }
 318 
 319 int
 320 mca_pml_base_pml_check_selected(const char *my_pml,
 321                                 ompi_proc_t **procs,
 322                                 size_t nprocs)
 323 {
 324     size_t size;
 325     int ret;
 326     char *remote_pml;
 327 
 328     /* if no modex was required by the PML, then
 329      * we can assume success
 330      */
 331     if (!modex_reqd) {
 332         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 333                             "check:select: modex not reqd");
 334         return OMPI_SUCCESS;
 335     }
 336 
 337     /* if we are rank=0, then we can also assume success */
 338     if (0 == OMPI_PROC_MY_NAME->vpid) {
 339         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 340                             "check:select: rank=0");
 341         return OMPI_SUCCESS;
 342     }
 343 
 344     /* get the name of the PML module selected by rank=0 */
 345     OPAL_MODEX_RECV(ret, &pml_base_component,
 346                     &procs[0]->super.proc_name, (void**) &remote_pml, &size);
 347 
 348     /* if this key wasn't found, then just assume all is well... */
 349     if (OMPI_SUCCESS != ret) {
 350         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 351                             "check:select: modex data not found");
 352         return OMPI_SUCCESS;
 353     }
 354 
 355     /* the remote pml returned should never be NULL if an error
 356      * wasn't returned, but just to be safe, and since the check
 357      * is fast...let's be sure
 358      */
 359     if (NULL == remote_pml) {
 360         opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 361                             "check:select: got a NULL pml from rank=0");
 362         return OMPI_ERR_UNREACH;
 363     }
 364 
 365     opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
 366                         "check:select: checking my pml %s against rank=0 pml %s",
 367                         my_pml, remote_pml);
 368 
 369     /* if that module doesn't match my own, return an error */
 370     if ((size != strlen(my_pml) + 1) ||
 371         (0 != strcmp(my_pml, remote_pml))) {
 372         opal_output(0, "%s selected pml %s, but peer %s on %s selected pml %s",
 373                     OMPI_NAME_PRINT(&ompi_proc_local()->super.proc_name),
 374                     my_pml, OMPI_NAME_PRINT(&procs[0]->super.proc_name),
 375                     (NULL == procs[0]->super.proc_hostname) ? "unknown" : procs[0]->super.proc_hostname,
 376                     remote_pml);
 377         free(remote_pml); /* cleanup before returning */
 378         return OMPI_ERR_UNREACH;
 379     }
 380 
 381     free(remote_pml);
 382     return OMPI_SUCCESS;
 383 }

/* [<][>][^][v][top][bottom][index][help] */