1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2005 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
14 * Copyright (c) 2014 Intel, Inc. All rights reserved.
15 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
16 * reserved.
17 * $COPYRIGHT$
18 *
19 * Additional copyrights may follow
20 *
21 * $HEADER$
22 */
23
24 #include "orte_config.h"
25 #include "orte/constants.h"
26
27 #include "opal/mca/base/base.h"
28 #include "opal/util/basename.h"
29
30 #include "orte/mca/ras/base/ras_private.h"
31 #include "ras_tm.h"
32
33
34 /*
35 * Local variables
36 */
37 static int param_priority;
38
39
40 /*
41 * Local functions
42 */
43 static int ras_tm_register(void);
44 static int ras_tm_open(void);
45 static int orte_ras_tm_component_query(mca_base_module_t **module, int *priority);
46
47
48 orte_ras_tm_component_t mca_ras_tm_component = {
49 {
50 /* First, the mca_base_component_t struct containing meta
51 information about the component itself */
52
53 .base_version = {
54 ORTE_RAS_BASE_VERSION_2_0_0,
55
56 /* Component name and version */
57 .mca_component_name = "tm",
58 MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
59 ORTE_RELEASE_VERSION),
60
61 /* Component open and close functions */
62 .mca_open_component = ras_tm_open,
63 .mca_query_component = orte_ras_tm_component_query,
64 .mca_register_component_params = ras_tm_register,
65 },
66 .base_data = {
67 /* The component is checkpoint ready */
68 MCA_BASE_METADATA_PARAM_CHECKPOINT
69 },
70 }
71 };
72
73 static int ras_tm_register(void)
74 {
75 mca_base_component_t *c = &mca_ras_tm_component.super.base_version;
76 char *pbs_nodefile_env = NULL;
77
78 param_priority = 100;
79 (void) mca_base_component_var_register(c, "priority", "Priority of the tm ras component",
80 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
81 OPAL_INFO_LVL_9,
82 MCA_BASE_VAR_SCOPE_READONLY,
83 ¶m_priority);
84
85 mca_ras_tm_component.nodefile_dir = NULL;
86
87 /* try to detect the default directory */
88 pbs_nodefile_env = getenv("PBS_NODEFILE");
89 if (NULL != pbs_nodefile_env) {
90 mca_ras_tm_component.nodefile_dir = opal_dirname(pbs_nodefile_env);
91 }
92
93 if (NULL == mca_ras_tm_component.nodefile_dir) {
94 mca_ras_tm_component.nodefile_dir = strdup ("/var/torque/aux");
95 }
96
97 (void) mca_base_component_var_register (c, "nodefile_dir",
98 "The directory where the PBS nodefile can be found",
99 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
100 OPAL_INFO_LVL_9,
101 MCA_BASE_VAR_SCOPE_READONLY,
102 &mca_ras_tm_component.nodefile_dir);
103
104 /* for big SMP machines (e.g., those from SGI), listing the nodes
105 * once/slot in the nodefile is extreme. In those cases, they may
106 * choose to list each node once, but then provide an envar that
107 * tells us how many cpus/node were allocated. Allow the user to
108 * inform us that we are in such an environment
109 */
110 mca_ras_tm_component.smp_mode = false;
111 (void) mca_base_component_var_register (c, "smp",
112 "The Torque system is configured in SMP mode "
113 "with the number of cpus/node given in the environment",
114 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
115 OPAL_INFO_LVL_9,
116 MCA_BASE_VAR_SCOPE_READONLY,
117 &mca_ras_tm_component.smp_mode);
118
119 return ORTE_SUCCESS;
120 }
121
122 static int ras_tm_open(void)
123 {
124 return ORTE_SUCCESS;
125 }
126
127
128 static int orte_ras_tm_component_query(mca_base_module_t **module, int *priority)
129 {
130 /* Are we running under a TM job? */
131 if (NULL != getenv("PBS_ENVIRONMENT") &&
132 NULL != getenv("PBS_JOBID")) {
133 *priority = param_priority;
134 *module = (mca_base_module_t *) &orte_ras_tm_module;
135 return ORTE_SUCCESS;
136 }
137
138 /* Sadly, no */
139 *module = NULL;
140 return ORTE_ERROR;
141 }