This source file includes following definitions.
- NBC_Scan_args_compare
- nbc_exscan_init
- ompi_coll_libnbc_iexscan
- ompi_coll_libnbc_exscan_init
- exscan_sched_linear
- exscan_sched_recursivedoubling
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include "opal/align.h"
22 #include "ompi/op/op.h"
23
24 #include "nbc_internal.h"
25
26 static inline int exscan_sched_linear(
27 int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
28 MPI_Datatype datatype, MPI_Op op, char inplace, NBC_Schedule *schedule,
29 void *tmpbuf);
30 static inline int exscan_sched_recursivedoubling(
31 int rank, int comm_size, const void *sendbuf, void *recvbuf,
32 int count, MPI_Datatype datatype, MPI_Op op, char inplace,
33 NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2);
34
35 #ifdef NBC_CACHE_SCHEDULE
36
37 int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) {
38 if ((a->sendbuf == b->sendbuf) &&
39 (a->recvbuf == b->recvbuf) &&
40 (a->count == b->count) &&
41 (a->datatype == b->datatype) &&
42 (a->op == b->op) ) {
43 return 0;
44 }
45
46 if( a->sendbuf < b->sendbuf ) {
47 return -1;
48 }
49
50 return 1;
51 }
52 #endif
53
54 static int nbc_exscan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
55 struct ompi_communicator_t *comm, ompi_request_t ** request,
56 struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
57 int rank, p, res;
58 NBC_Schedule *schedule;
59 char inplace;
60 void *tmpbuf = NULL, *tmpbuf1 = NULL, *tmpbuf2 = NULL;
61 enum { NBC_EXSCAN_LINEAR, NBC_EXSCAN_RDBL } alg;
62 ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
63 ptrdiff_t span, gap;
64
65 NBC_IN_PLACE(sendbuf, recvbuf, inplace);
66
67 rank = ompi_comm_rank(comm);
68 p = ompi_comm_size(comm);
69
70 if (p < 2) {
71 return nbc_get_noop_request(persistent, request);
72 }
73
74 span = opal_datatype_span(&datatype->super, count, &gap);
75 if (libnbc_iexscan_algorithm == 2) {
76 alg = NBC_EXSCAN_RDBL;
77 ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
78 tmpbuf = malloc(span_align + span);
79 if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
80 tmpbuf1 = (void *)(-gap);
81 tmpbuf2 = (char *)(span_align) - gap;
82 } else {
83 alg = NBC_EXSCAN_LINEAR;
84 if (rank > 0) {
85 tmpbuf = malloc(span);
86 if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
87 }
88 }
89
90 #ifdef NBC_CACHE_SCHEDULE
91 NBC_Scan_args *args, *found, search;
92
93 search.sendbuf = sendbuf;
94 search.recvbuf = recvbuf;
95 search.count = count;
96 search.datatype = datatype;
97 search.op = op;
98 found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], &search);
99 if (NULL == found) {
100 #endif
101 schedule = OBJ_NEW(NBC_Schedule);
102 if (OPAL_UNLIKELY(NULL == schedule)) {
103 free(tmpbuf);
104 return OMPI_ERR_OUT_OF_RESOURCE;
105 }
106
107 if (alg == NBC_EXSCAN_LINEAR) {
108 res = exscan_sched_linear(rank, p, sendbuf, recvbuf, count, datatype,
109 op, inplace, schedule, tmpbuf);
110 } else {
111 res = exscan_sched_recursivedoubling(rank, p, sendbuf, recvbuf, count,
112 datatype, op, inplace, schedule, tmpbuf1, tmpbuf2);
113 }
114 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
115 OBJ_RELEASE(schedule);
116 free(tmpbuf);
117 return res;
118 }
119
120 res = NBC_Sched_commit(schedule);
121 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
122 OBJ_RELEASE(schedule);
123 free(tmpbuf);
124 return res;
125 }
126
127 #ifdef NBC_CACHE_SCHEDULE
128
129 args = (NBC_Scan_args *) malloc (sizeof (args));
130 if (NULL != args) {
131 args->sendbuf = sendbuf;
132 args->recvbuf = recvbuf;
133 args->count = count;
134 args->datatype = datatype;
135 args->op = op;
136 args->schedule = schedule;
137 res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], args, args, 0);
138 if (0 == res) {
139 OBJ_RETAIN(schedule);
140
141
142 if (++libnbc_module->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) {
143 NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN],
144 &libnbc_module->NBC_Dict_size[NBC_EXSCAN]);
145 }
146 } else {
147 NBC_Error("error in dict_insert() (%i)", res);
148 free (args);
149 }
150 }
151 } else {
152
153 schedule = found->schedule;
154 OBJ_RETAIN(schedule);
155 }
156 #endif
157
158 res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
159 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
160 OBJ_RELEASE(schedule);
161 free(tmpbuf);
162 return res;
163 }
164
165 return OMPI_SUCCESS;
166 }
167
168 int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
169 struct ompi_communicator_t *comm, ompi_request_t ** request,
170 struct mca_coll_base_module_2_3_0_t *module) {
171 int res = nbc_exscan_init(sendbuf, recvbuf, count, datatype, op,
172 comm, request, module, false);
173 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
174 return res;
175 }
176
177 res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
178 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
179 NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
180 *request = &ompi_request_null.request;
181 return res;
182 }
183
184 return OMPI_SUCCESS;
185 }
186
187 int ompi_coll_libnbc_exscan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
188 struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
189 struct mca_coll_base_module_2_3_0_t *module) {
190 int res = nbc_exscan_init(sendbuf, recvbuf, count, datatype, op,
191 comm, request, module, true);
192 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
193 return res;
194 }
195
196 return OMPI_SUCCESS;
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 static inline int exscan_sched_linear(
214 int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
215 MPI_Datatype datatype, MPI_Op op, char inplace, NBC_Schedule *schedule,
216 void *tmpbuf)
217 {
218 int res = OMPI_SUCCESS;
219 ptrdiff_t gap;
220 opal_datatype_span(&datatype->super, count, &gap);
221
222 if (rank > 0) {
223 if (inplace) {
224 res = NBC_Sched_copy(recvbuf, false, count, datatype,
225 (char *)tmpbuf - gap, false, count, datatype, schedule, false);
226 } else {
227 res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
228 (char *)tmpbuf - gap, false, count, datatype, schedule, false);
229 }
230 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
231
232 res = NBC_Sched_recv(recvbuf, false, count, datatype, rank - 1, schedule, false);
233 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
234
235 if (rank < comm_size - 1) {
236
237 res = NBC_Sched_barrier(schedule);
238 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
239
240 res = NBC_Sched_op(recvbuf, false, (void *)(-gap), true, count,
241 datatype, op, schedule, true);
242 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
243
244
245 res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rank + 1, schedule, false);
246 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
247 }
248 } else if (comm_size > 1) {
249
250 if (inplace) {
251 res = NBC_Sched_send(recvbuf, false, count, datatype, 1, schedule, false);
252 } else {
253 res = NBC_Sched_send(sendbuf, false, count, datatype, 1, schedule, false);
254 }
255 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
256 }
257
258 cleanup_and_return:
259 return res;
260 }
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295 static inline int exscan_sched_recursivedoubling(
296 int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
297 MPI_Datatype datatype, MPI_Op op, char inplace,
298 NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2)
299 {
300 int res = OMPI_SUCCESS;
301 char *psend = (char *)tmpbuf1;
302 char *precv = (char *)tmpbuf2;
303
304 if (!inplace) {
305 res = NBC_Sched_copy((char *)sendbuf, false, count, datatype,
306 psend, true, count, datatype, schedule, true);
307 } else {
308 res = NBC_Sched_copy((char *)recvbuf, false, count, datatype,
309 psend, true, count, datatype, schedule, true);
310 }
311 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
312
313 int is_commute = ompi_op_is_commute(op);
314 int is_first_block = 1;
315
316 for (int mask = 1; mask < comm_size; mask <<= 1) {
317 int remote = rank ^ mask;
318 if (remote < comm_size) {
319 res = NBC_Sched_send(psend, true, count, datatype, remote, schedule, false);
320 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
321 res = NBC_Sched_recv(precv, true, count, datatype, remote, schedule, true);
322 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
323
324 if (rank > remote) {
325
326 if (is_first_block) {
327 res = NBC_Sched_copy(precv, true, count, datatype,
328 recvbuf, false, count, datatype, schedule, false);
329 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
330 is_first_block = 0;
331 } else {
332
333 res = NBC_Sched_op(precv, true, recvbuf, false, count,
334 datatype, op, schedule, false);
335 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
336 }
337
338 res = NBC_Sched_op(precv, true, psend, true, count,
339 datatype, op, schedule, true);
340 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
341 } else {
342 if (is_commute) {
343
344 res = NBC_Sched_op(precv, true, psend, true, count,
345 datatype, op, schedule, true);
346 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
347 } else {
348
349 res = NBC_Sched_op(psend, true, precv, true, count,
350 datatype, op, schedule, true);
351 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
352 char *tmp = psend;
353 psend = precv;
354 precv = tmp;
355 }
356 }
357 }
358 }
359
360 cleanup_and_return:
361 return res;
362 }