This source file includes following definitions.
- ompi_mtl_psm_component_register
- ompi_mtl_psm_component_open
- ompi_mtl_psm_component_query
- ompi_mtl_psm_component_close
- get_num_total_procs
- get_num_local_procs
- get_local_rank
- ompi_mtl_psm_component_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 #include "ompi_config.h"
25
26 #include "opal/mca/event/event.h"
27 #include "opal/util/output.h"
28 #include "opal/util/show_help.h"
29 #include "ompi/proc/proc.h"
30
31 #include "mtl_psm.h"
32 #include "mtl_psm_types.h"
33 #include "mtl_psm_request.h"
34
35 #include "psm.h"
36
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <unistd.h>
40 #include <glob.h>
41
42 static int param_priority;
43
44 static int ompi_mtl_psm_component_open(void);
45 static int ompi_mtl_psm_component_close(void);
46 static int ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority);
47 static int ompi_mtl_psm_component_register(void);
48
49 static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
50 bool enable_mpi_threads );
51
52 mca_mtl_psm_component_t mca_mtl_psm_component = {
53
54 {
55
56
57
58 .mtl_version = {
59 MCA_MTL_BASE_VERSION_2_0_0,
60
61 .mca_component_name = "psm",
62 MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
63 OMPI_RELEASE_VERSION),
64 .mca_open_component = ompi_mtl_psm_component_open,
65 .mca_close_component = ompi_mtl_psm_component_close,
66 .mca_query_component = ompi_mtl_psm_component_query,
67 .mca_register_component_params = ompi_mtl_psm_component_register,
68 },
69 .mtl_data = {
70
71 MCA_BASE_METADATA_PARAM_NONE
72 },
73
74 .mtl_init = ompi_mtl_psm_component_init,
75 }
76 };
77
78 #if PSM_VERNO >= 0x010d
79 static mca_base_var_enum_value_t path_query_values[] = {
80 {PSM_PATH_RES_NONE, "none"},
81 {PSM_PATH_RES_OPP, "opp"},
82 {0, NULL}
83 };
84 #endif
85
86 static int
87 ompi_mtl_psm_component_register(void)
88 {
89 #if PSM_VERNO >= 0x010d
90 mca_base_var_enum_t *new_enum;
91 #endif
92
93
94
95 param_priority = 30;
96 (void) mca_base_component_var_register (&mca_mtl_psm_component.super.mtl_version,
97 "priority", "Priority of the PSM MTL component",
98 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
99 OPAL_INFO_LVL_9,
100 MCA_BASE_VAR_SCOPE_READONLY,
101 ¶m_priority);
102
103 ompi_mtl_psm.connect_timeout = 180;
104 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
105 "connect_timeout",
106 "PSM connection timeout value in seconds",
107 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
108 OPAL_INFO_LVL_9,
109 MCA_BASE_VAR_SCOPE_READONLY,
110 &ompi_mtl_psm.connect_timeout);
111
112 ompi_mtl_psm.debug_level = 1;
113 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
114 "debug", "PSM debug level",
115 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
116 OPAL_INFO_LVL_9,
117 MCA_BASE_VAR_SCOPE_READONLY,
118 &ompi_mtl_psm.debug_level);
119
120 ompi_mtl_psm.ib_unit = -1;
121 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
122 "ib_unit", "Truescale unit to use",
123 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
124 OPAL_INFO_LVL_9,
125 MCA_BASE_VAR_SCOPE_READONLY,
126 &ompi_mtl_psm.ib_unit);
127
128 ompi_mtl_psm.ib_port = 0;
129 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
130 "ib_port", "Truescale port on unit to use",
131 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
132 OPAL_INFO_LVL_9,
133 MCA_BASE_VAR_SCOPE_READONLY,
134 &ompi_mtl_psm.ib_port);
135
136 ompi_mtl_psm.ib_service_level = 0;
137 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
138 "ib_service_level", "Infiniband service level"
139 "(0 <= SL <= 15)", MCA_BASE_VAR_TYPE_INT,
140 NULL, 0, 0, OPAL_INFO_LVL_9,
141 MCA_BASE_VAR_SCOPE_READONLY,
142 &ompi_mtl_psm.ib_service_level);
143
144 ompi_mtl_psm.ib_pkey = 0x7fffUL;
145 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
146 "ib_pkey", "Infiniband partition key",
147 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
148 OPAL_INFO_LVL_9,
149 MCA_BASE_VAR_SCOPE_READONLY,
150 &ompi_mtl_psm.ib_pkey);
151
152 #if PSM_VERNO >= 0x010d
153 ompi_mtl_psm.ib_service_id = 0x1000117500000000ull;
154 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
155 "ib_service_id",
156 "Infiniband service ID to use for application (default is 0)",
157 MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0,
158 OPAL_INFO_LVL_9,
159 MCA_BASE_VAR_SCOPE_READONLY,
160 &ompi_mtl_psm.ib_service_id);
161
162 ompi_mtl_psm.path_res_type = PSM_PATH_RES_NONE;
163 mca_base_var_enum_create("mtl_psm_path_query", path_query_values, &new_enum);
164 (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
165 "path_query",
166 "Path record query mechanisms",
167 MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
168 OPAL_INFO_LVL_9,
169 MCA_BASE_VAR_SCOPE_READONLY,
170 &ompi_mtl_psm.path_res_type);
171 OBJ_RELEASE(new_enum);
172 #endif
173
174 return OMPI_SUCCESS;
175 }
176
177 static int
178 ompi_mtl_psm_component_open(void)
179 {
180 struct stat st;
181
182 if (ompi_mtl_psm.ib_service_level < 0) {
183 ompi_mtl_psm.ib_service_level = 0;
184 } else if (ompi_mtl_psm.ib_service_level > 15) {
185 ompi_mtl_psm.ib_service_level = 15;
186 }
187
188
189 if (0 != stat("/dev/ipath", &st)) {
190 return OPAL_ERR_NOT_AVAILABLE;
191 }
192
193
194 bool foundOnlineQibPort = false;
195 size_t i;
196 char portState[128];
197 FILE *devFile;
198 glob_t globbuf;
199 globbuf.gl_offs = 0;
200 if (glob("/sys/class/infiniband/qib*/ports/*/state",
201 GLOB_DOOFFS, NULL, &globbuf) != 0) {
202 return OPAL_ERR_NOT_AVAILABLE;
203 }
204
205 for (i=0;i < globbuf.gl_pathc; i++) {
206 devFile = fopen(globbuf.gl_pathv[i], "r");
207 fgets(portState, sizeof(portState), devFile);
208 fclose(devFile);
209
210 if (strstr(portState, "ACTIVE") != NULL) {
211
212 foundOnlineQibPort = true;
213 break;
214 }
215 }
216
217 globfree(&globbuf);
218
219 if (!foundOnlineQibPort) {
220 return OPAL_ERR_NOT_AVAILABLE;
221 }
222
223 return OMPI_SUCCESS;
224 }
225
226 static int
227 ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority)
228 {
229
230
231
232
233 *priority = param_priority;
234 *module = (mca_base_module_t *)&ompi_mtl_psm.super;
235 return OMPI_SUCCESS;
236 }
237
238
239 static int
240 ompi_mtl_psm_component_close(void)
241 {
242 return OMPI_SUCCESS;
243 }
244
245 static int
246 get_num_total_procs(int *out_ntp)
247 {
248 *out_ntp = (int)ompi_process_info.num_procs;
249 return OMPI_SUCCESS;
250 }
251
252 static int
253 get_num_local_procs(int *out_nlp)
254 {
255
256
257 *out_nlp = (int)(1 + ompi_process_info.num_local_peers);
258 return OMPI_SUCCESS;
259 }
260
261 static int
262 get_local_rank(int *out_rank)
263 {
264 ompi_node_rank_t my_node_rank;
265
266 *out_rank = 0;
267
268 if (OMPI_NODE_RANK_INVALID == (my_node_rank =
269 ompi_process_info.my_node_rank)) {
270 return OMPI_ERROR;
271 }
272 *out_rank = (int)my_node_rank;
273 return OMPI_SUCCESS;
274 }
275
276 static mca_mtl_base_module_t *
277 ompi_mtl_psm_component_init(bool enable_progress_threads,
278 bool enable_mpi_threads)
279 {
280 psm_error_t err;
281 int verno_major = PSM_VERNO_MAJOR;
282 int verno_minor = PSM_VERNO_MINOR;
283 int local_rank = -1, num_local_procs = 0;
284 int num_total_procs = 0;
285
286
287
288
289
290 if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) {
291 opal_output(0, "Cannot determine number of local processes. "
292 "Cannot continue.\n");
293 return NULL;
294 }
295 if (OMPI_SUCCESS != get_local_rank(&local_rank)) {
296 opal_output(0, "Cannot determine local rank. Cannot continue.\n");
297 return NULL;
298 }
299 if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) {
300 opal_output(0, "Cannot determine total number of processes. "
301 "Cannot continue.\n");
302 return NULL;
303 }
304
305
306 #if PSM_VERNO >= 0x010c
307
308 err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG,
309 (const void*) &ompi_mtl_psm.debug_level,
310 sizeof(unsigned));
311 if (err) {
312
313 opal_show_help("help-mtl-psm.txt",
314 "psm init", false,
315 psm_error_get_string(err));
316 }
317 #endif
318
319 if (getenv("PSM_DEVICES") == NULL) {
320
321
322
323 if (PSM_VERNO >= 0x0104) {
324 if (num_local_procs == num_total_procs) {
325 setenv("PSM_DEVICES", "self,shm", 0);
326 } else {
327 setenv("PSM_DEVICES", "self,shm,ipath", 0);
328 }
329 }
330 else {
331 if (num_local_procs == num_total_procs) {
332 setenv("PSM_DEVICES", "shm", 0);
333 } else {
334 setenv("PSM_DEVICES", "shm,ipath", 0);
335 }
336 }
337 }
338
339 err = psm_init(&verno_major, &verno_minor);
340 if (err) {
341 opal_show_help("help-mtl-psm.txt",
342 "psm init", true,
343 psm_error_get_string(err));
344 return NULL;
345 }
346
347
348 ompi_mtl_psm_module_init(local_rank, num_local_procs);
349
350 ompi_mtl_psm.super.mtl_request_size =
351 sizeof(mca_mtl_psm_request_t) -
352 sizeof(struct mca_mtl_request_t);
353
354
355 err = psm_error_register_handler(NULL ,
356 PSM_ERRHANDLER_NOP);
357 if (err) {
358 opal_output(0, "Error in psm_error_register_handler (error %s)\n",
359 psm_error_get_string(err));
360 return NULL;
361 }
362
363 return &ompi_mtl_psm.super;
364 }
365