1 /*
2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2005 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
7 * reserved.
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
12 * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
13 * reserved.
14 * Copyright (c) 2015 Research Organization for Information Science
15 * and Technology (RIST). All rights reserved.
16 * $COPYRIGHT$
17 *
18 * Additional copyrights may follow
19 *
20 * $HEADER$
21 */
22
23
24 #include "orte_config.h"
25 #include "orte/constants.h"
26
27 #include "orte/mca/mca.h"
28 #include "opal/mca/base/base.h"
29 #include "opal/mca/event/event.h"
30
31 #include "orte/mca/ras/base/ras_private.h"
32 #include "orte/mca/ras/base/base.h"
33
34
35 /* NOTE: the RAS does not require a proxy as only the
36 * HNP can open the framework in orte_init - non-HNP
37 * procs are not allowed to allocate resources
38 */
39
40 /*
41 * The following file was created by configure. It contains extern
42 * statements and the definition of an array of pointers to each
43 * component's public mca_base_component_t struct.
44 */
45
46 #include "orte/mca/ras/base/static-components.h"
47
48 /*
49 * Global variables
50 */
51 orte_ras_base_t orte_ras_base = {0};
52
53 static int ras_register(mca_base_register_flag_t flags)
54 {
55 orte_ras_base.multiplier = 1;
56 mca_base_var_register("orte", "ras", "base", "multiplier",
57 "Simulate a larger cluster by launching N daemons/node",
58 MCA_BASE_VAR_TYPE_INT,
59 NULL, 0, 0,
60 OPAL_INFO_LVL_9,
61 MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.multiplier);
62 #if SLURM_CRAY_ENV
63 /*
64 * If we are in a Cray-SLURM environment, then we cannot
65 * launch procs local to the HNP. The problem
66 * is the MPI processes launched on the head node (where the
67 * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
68 * (mpirun) which is not a child of a slurmd daemon. This
69 * means that any RDMA credentials obtained via the odls/alps
70 * local launcher are incorrect. Test for this condition. If
71 * found, then take steps to ensure we launch a daemon on
72 * the same node as mpirun and that it gets used to fork
73 * local procs instead of mpirun so they get the proper
74 * credential */
75
76 orte_ras_base.launch_orted_on_hn = true;
77 #else
78 orte_ras_base.launch_orted_on_hn = false;
79 #endif
80
81 mca_base_var_register("orte", "ras", "base", "launch_orted_on_hn",
82 "Launch an orte daemon on the head node",
83 MCA_BASE_VAR_TYPE_BOOL,
84 NULL, 0, 0,
85 OPAL_INFO_LVL_9,
86 MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.launch_orted_on_hn);
87 return ORTE_SUCCESS;
88 }
89
90 static int orte_ras_base_close(void)
91 {
92 /* Close selected component */
93 if (NULL != orte_ras_base.active_module) {
94 orte_ras_base.active_module->finalize();
95 }
96
97 return mca_base_framework_components_close(&orte_ras_base_framework, NULL);
98 }
99
100 /**
101 * * Function for finding and opening either all MCA components, or the one
102 * * that was specifically requested via a MCA parameter.
103 * */
104 static int orte_ras_base_open(mca_base_open_flag_t flags)
105 {
106 /* set default flags */
107 orte_ras_base.active_module = NULL;
108 orte_ras_base.allocation_read = false;
109 orte_ras_base.total_slots_alloc = 0;
110
111 /* Open up all available components */
112 return mca_base_framework_components_open(&orte_ras_base_framework, flags);
113 }
114
115 MCA_BASE_FRAMEWORK_DECLARE(orte, ras, "ORTE Resource Allocation Subsystem",
116 ras_register, orte_ras_base_open, orte_ras_base_close,
117 mca_ras_base_static_components, 0);