This source file includes following definitions.
- ompi_mtl_ofi_add_procs
- ompi_mtl_ofi_del_procs
1
2
3
4
5
6
7
8
9
10
11 #include "mtl_ofi.h"
12
13 OMPI_DECLSPEC extern mca_mtl_ofi_component_t mca_mtl_ofi_component;
14
15 mca_mtl_ofi_module_t ompi_mtl_ofi = {
16 {
17 (int)((1ULL << MTL_OFI_CID_BIT_COUNT_1) - 1),
18 (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1)) - 1) ,
19 0,
20 0,
21
22 ompi_mtl_ofi_add_procs,
23 ompi_mtl_ofi_del_procs,
24 ompi_mtl_ofi_finalize,
25
26 NULL,
27 NULL,
28 NULL,
29 NULL,
30 ompi_mtl_ofi_imrecv,
31 NULL,
32
33 ompi_mtl_ofi_cancel,
34 ompi_mtl_ofi_add_comm,
35 ompi_mtl_ofi_del_comm
36 },
37 0,
38 0,
39 NULL,
40 NULL
41 };
42
43 int
44 ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
45 size_t nprocs,
46 struct ompi_proc_t** procs)
47 {
48 int ret = OMPI_SUCCESS;
49 size_t i;
50 size_t size;
51 size_t namelen;
52 int count = 0;
53 char *ep_name = NULL;
54 char *ep_names = NULL;
55 fi_addr_t *fi_addrs = NULL;
56 mca_mtl_ofi_endpoint_t *endpoint = NULL;
57 int num_peers_limit = (1 << ompi_mtl_ofi.num_bits_source_rank) - 1;
58
59 namelen = ompi_mtl_ofi.epnamelen;
60
61
62 if ((false == ompi_mtl_ofi.fi_cq_data) &&
63 OPAL_UNLIKELY(((int) (nprocs + ompi_mtl_ofi.num_peers) > num_peers_limit))) {
64 opal_output(0, "%s:%d: OFI provider: %s does not have enough bits for source rank in its tag.\n"
65 "Adding more ranks will result in undefined behaviour. Please enable\n"
66 "FI_REMOTE_CQ_DATA feature in the provider. For more info refer fi_cq(3).\n",
67 __FILE__, __LINE__, ompi_mtl_ofi.provider_name);
68 fflush(stderr);
69 ret = OMPI_ERROR;
70 goto bail;
71 }
72
73
74
75
76 ep_names = malloc(nprocs * namelen);
77 if (NULL == ep_names) {
78 ret = OMPI_ERROR;
79 goto bail;
80 }
81
82
83
84
85 fi_addrs = malloc(nprocs * sizeof(fi_addr_t));
86 if (NULL == fi_addrs) {
87 ret = OMPI_ERROR;
88 goto bail;
89 }
90
91
92
93
94 for (i = 0; i < nprocs; ++i) {
95 OFI_COMPAT_MODEX_RECV(ret,
96 &mca_mtl_ofi_component.super.mtl_version,
97 procs[i],
98 (void**)&ep_name,
99 &size);
100 if (OMPI_SUCCESS != ret) {
101 opal_show_help("help-mtl-ofi.txt", "modex failed",
102 true, ompi_process_info.nodename,
103 procs[i]->super.proc_hostname,
104 opal_strerror(ret), ret);
105 goto bail;
106 }
107 memcpy(&ep_names[i*namelen], ep_name, namelen);
108 }
109
110
111
112
113 count = fi_av_insert(ompi_mtl_ofi.av, ep_names, nprocs, fi_addrs, 0, NULL);
114 if ((count < 0) || (nprocs != (size_t)count)) {
115 opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
116 "%s:%d: fi_av_insert failed: %d\n",
117 __FILE__, __LINE__, count);
118 ret = OMPI_ERROR;
119 goto bail;
120 }
121
122
123
124
125 for (i = 0; i < nprocs; ++i) {
126 endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t);
127 if (NULL == endpoint) {
128 opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
129 "%s:%d: mtl/ofi: could not allocate endpoint"
130 " structure\n",
131 __FILE__, __LINE__);
132 ret = OMPI_ERROR;
133 goto bail;
134 }
135
136 endpoint->mtl_ofi_module = &ompi_mtl_ofi;
137 endpoint->peer_fiaddr = fi_addrs[i];
138
139
140 procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
141 }
142
143
144 ompi_mtl_ofi.num_peers += nprocs;
145
146 ret = OMPI_SUCCESS;
147
148 bail:
149 if (fi_addrs)
150 free(fi_addrs);
151
152 if (ep_names)
153 free(ep_names);
154
155 return ret;
156 }
157
158 int
159 ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl,
160 size_t nprocs,
161 struct ompi_proc_t** procs)
162 {
163 int ret;
164 size_t i;
165 mca_mtl_ofi_endpoint_t *endpoint = NULL;
166
167 for (i = 0 ; i < nprocs ; ++i) {
168 if (NULL != procs[i] &&
169 NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
170 endpoint = procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
171 ret = fi_av_remove(ompi_mtl_ofi.av, &endpoint->peer_fiaddr, 1, 0);
172 if (ret) {
173 opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
174 "%s:%d: fi_av_remove failed: %s\n", __FILE__, __LINE__, fi_strerror(errno));
175 return ret;
176 }
177 procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = NULL;
178 OBJ_RELEASE(endpoint);
179 }
180 }
181
182 return OMPI_SUCCESS;
183 }