This source file includes following definitions.
- NBC_Scan_args_compare
- nbc_scan_init
- scan_sched_linear
- scan_sched_recursivedoubling
- ompi_coll_libnbc_iscan
- ompi_coll_libnbc_scan_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include "opal/align.h"
22 #include "ompi/op/op.h"
23
24 #include "nbc_internal.h"
25
26 static inline int scan_sched_linear(
27 int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
28 MPI_Datatype datatype, MPI_Op op, char inplace, NBC_Schedule *schedule,
29 void *tmpbuf);
30 static inline int scan_sched_recursivedoubling(
31 int rank, int comm_size, const void *sendbuf, void *recvbuf,
32 int count, MPI_Datatype datatype, MPI_Op op, char inplace,
33 NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2);
34
35 #ifdef NBC_CACHE_SCHEDULE
36
37 int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) {
38 if ((a->sendbuf == b->sendbuf) &&
39 (a->recvbuf == b->recvbuf) &&
40 (a->count == b->count) &&
41 (a->datatype == b->datatype) &&
42 (a->op == b->op) ) {
43 return 0;
44 }
45
46 if (a->sendbuf < b->sendbuf) {
47 return -1;
48 }
49
50 return 1;
51 }
52 #endif
53
54 static int nbc_scan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
55 struct ompi_communicator_t *comm, ompi_request_t ** request,
56 struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
57 int rank, p, res;
58 ptrdiff_t gap, span;
59 NBC_Schedule *schedule;
60 void *tmpbuf = NULL, *tmpbuf1 = NULL, *tmpbuf2 = NULL;
61 enum { NBC_SCAN_LINEAR, NBC_SCAN_RDBL } alg;
62 char inplace;
63 ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
64
65 NBC_IN_PLACE(sendbuf, recvbuf, inplace);
66
67 rank = ompi_comm_rank (comm);
68 p = ompi_comm_size (comm);
69
70 if (count == 0) {
71 return nbc_get_noop_request(persistent, request);
72 }
73
74 span = opal_datatype_span(&datatype->super, count, &gap);
75 if (libnbc_iscan_algorithm == 2) {
76 alg = NBC_SCAN_RDBL;
77 ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
78 tmpbuf = malloc(span_align + span);
79 if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
80 tmpbuf1 = (void *)(-gap);
81 tmpbuf2 = (char *)(span_align) - gap;
82 } else {
83 alg = NBC_SCAN_LINEAR;
84 if (rank > 0) {
85 tmpbuf = malloc(span);
86 if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
87 }
88 }
89
90 #ifdef NBC_CACHE_SCHEDULE
91 NBC_Scan_args *args, *found, search;
92
93
94 search.sendbuf = sendbuf;
95 search.recvbuf = recvbuf;
96 search.count = count;
97 search.datatype = datatype;
98 search.op = op;
99 found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], &search);
100 if (NULL == found) {
101 #endif
102 schedule = OBJ_NEW(NBC_Schedule);
103 if (OPAL_UNLIKELY(NULL == schedule)) {
104 free(tmpbuf);
105 return OMPI_ERR_OUT_OF_RESOURCE;
106 }
107
108 if (alg == NBC_SCAN_LINEAR) {
109 res = scan_sched_linear(rank, p, sendbuf, recvbuf, count, datatype,
110 op, inplace, schedule, tmpbuf);
111 } else {
112 res = scan_sched_recursivedoubling(rank, p, sendbuf, recvbuf, count,
113 datatype, op, inplace, schedule, tmpbuf1, tmpbuf2);
114 }
115 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
116 OBJ_RELEASE(schedule);
117 free(tmpbuf);
118 return res;
119 }
120
121 res = NBC_Sched_commit(schedule);
122 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
123 OBJ_RELEASE(schedule);
124 free(tmpbuf);
125 return res;
126 }
127
128 #ifdef NBC_CACHE_SCHEDULE
129
130 args = (NBC_Scan_args *) malloc (sizeof (args));
131 if (NULL != args) {
132 args->sendbuf = sendbuf;
133 args->recvbuf = recvbuf;
134 args->count = count;
135 args->datatype = datatype;
136 args->op = op;
137 args->schedule = schedule;
138 res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], args, args, 0);
139 if (0 == res) {
140 OBJ_RETAIN(schedule);
141
142
143 if (++libnbc_module->NBC_Dict_size[NBC_SCAN] > NBC_SCHED_DICT_UPPER) {
144 NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN],
145 &libnbc_module->NBC_Dict_size[NBC_SCAN]);
146 }
147 } else {
148 NBC_Error("error in dict_insert() (%i)", res);
149 free (args);
150 }
151 }
152 } else {
153
154 schedule = found->schedule;
155 OBJ_RETAIN(schedule);
156 }
157 #endif
158
159 res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
160 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
161 OBJ_RELEASE(schedule);
162 free(tmpbuf);
163 return res;
164 }
165
166 return OMPI_SUCCESS;
167 }
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 static inline int scan_sched_linear(
184 int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
185 MPI_Datatype datatype, MPI_Op op, char inplace, NBC_Schedule *schedule,
186 void *tmpbuf)
187 {
188 int res = OMPI_SUCCESS;
189
190 if (!inplace) {
191
192 res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
193 recvbuf, false, count, datatype, schedule, false);
194 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
195 }
196
197 if (rank > 0) {
198 ptrdiff_t gap;
199 opal_datatype_span(&datatype->super, count, &gap);
200
201 res = NBC_Sched_recv((void *)(-gap), true, count, datatype, rank - 1, schedule, true);
202 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
203
204
205
206 res = NBC_Sched_op((void *)(-gap), true, recvbuf, false, count, datatype, op, schedule,
207 true);
208 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
209 }
210
211 if (rank != comm_size - 1) {
212 res = NBC_Sched_send(recvbuf, false, count, datatype, rank + 1, schedule, false);
213 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
214 }
215
216 cleanup_and_return:
217 return res;
218 }
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253 static inline int scan_sched_recursivedoubling(
254 int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
255 MPI_Datatype datatype, MPI_Op op, char inplace,
256 NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2)
257 {
258 int res = OMPI_SUCCESS;
259
260 if (!inplace) {
261 res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
262 recvbuf, false, count, datatype, schedule, true);
263 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
264 }
265 if (comm_size < 2)
266 goto cleanup_and_return;
267
268 char *psend = (char *)tmpbuf1;
269 char *precv = (char *)tmpbuf2;
270 res = NBC_Sched_copy(recvbuf, false, count, datatype,
271 psend, true, count, datatype, schedule, true);
272 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
273
274 int is_commute = ompi_op_is_commute(op);
275 for (int mask = 1; mask < comm_size; mask <<= 1) {
276 int remote = rank ^ mask;
277 if (remote < comm_size) {
278 res = NBC_Sched_send(psend, true, count, datatype, remote, schedule, false);
279 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
280 res = NBC_Sched_recv(precv, true, count, datatype, remote, schedule, true);
281 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
282
283 if (rank > remote) {
284
285 res = NBC_Sched_op(precv, true, recvbuf, false, count,
286 datatype, op, schedule, false);
287 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
288
289 res = NBC_Sched_op(precv, true, psend, true, count,
290 datatype, op, schedule, true);
291 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
292 } else {
293 if (is_commute) {
294
295 res = NBC_Sched_op(precv, true, psend, true, count,
296 datatype, op, schedule, true);
297 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
298 } else {
299
300 res = NBC_Sched_op(psend, true, precv, true, count,
301 datatype, op, schedule, true);
302 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
303 char *tmp = psend;
304 psend = precv;
305 precv = tmp;
306 }
307 }
308 }
309 }
310
311 cleanup_and_return:
312 return res;
313 }
314
315 int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
316 struct ompi_communicator_t *comm, ompi_request_t ** request,
317 struct mca_coll_base_module_2_3_0_t *module) {
318 int res = nbc_scan_init(sendbuf, recvbuf, count, datatype, op,
319 comm, request, module, false);
320 if (OPAL_LIKELY(OMPI_SUCCESS != res)) {
321 return res;
322 }
323 res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
324 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
325 NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
326 *request = &ompi_request_null.request;
327 return res;
328 }
329
330 return OMPI_SUCCESS;
331 }
332
333 int ompi_coll_libnbc_scan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
334 struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
335 struct mca_coll_base_module_2_3_0_t *module) {
336 int res = nbc_scan_init(sendbuf, recvbuf, count, datatype, op,
337 comm, request, module, true);
338 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
339 return res;
340 }
341
342 return OMPI_SUCCESS;
343 }