1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2007 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2008 UT-Battelle, LLC
14 * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved
15 * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
16 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
17 * reserved.
18 * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
19 * Copyright (c) 2019 Research Organization for Information Science
20 * and Technology (RIST). All rights reserved.
21 * Copyright (c) 2018 Triad National Security, LLC. All rights
22 * reserved.
23 * $COPYRIGHT$
24 *
25 * Additional copyrights may follow
26 *
27 * $HEADER$
28 */
29 /**
30 * @file
31 *
32 * Public interface for the MPI_Op handle.
33 */
34
35 #ifndef OMPI_OP_H
36 #define OMPI_OP_H
37
38 #include "ompi_config.h"
39
40 #include <stdio.h>
41
42 #include "mpi.h"
43
44 #include "opal/class/opal_object.h"
45 #include "opal/util/printf.h"
46
47 #include "ompi/datatype/ompi_datatype.h"
48 #include "ompi/mpi/fortran/base/fint_2_int.h"
49 #include "ompi/mca/op/op.h"
50
51 BEGIN_C_DECLS
52
53 /**
54 * Typedef for C op functions for user-defined MPI_Ops.
55 *
56 * We don't use MPI_User_function because this would create a
57 * confusing dependency loop between this file and mpi.h. So this is
58 * repeated code, but it's better this way (and this typedef will
59 * never change, so there's not much of a maintenance worry).
60 */
61 typedef void (ompi_op_c_handler_fn_t)(void *, void *, int *,
62 struct ompi_datatype_t **);
63
64 /**
65 * Typedef for fortran user-defined MPI_Ops.
66 */
67 typedef void (ompi_op_fortran_handler_fn_t)(void *, void *,
68 MPI_Fint *, MPI_Fint *);
69
70 /**
71 * Typedef for C++ op functions intercept (used for user-defined
72 * MPI::Ops).
73 *
74 * See the lengthy explanation for why this is different than the C
75 * intercept in ompi/mpi/cxx/intercepts.cc in the
76 * ompi_mpi_cxx_op_intercept() function.
77 */
78 typedef void (ompi_op_cxx_handler_fn_t)(void *, void *, int *,
79 struct ompi_datatype_t **,
80 MPI_User_function * op);
81
82 /**
83 * Typedef for Java op functions intercept (used for user-defined
84 * MPI.Ops).
85 *
86 * See the lengthy explanation for why this is different than the C
87 * intercept in ompi/mpi/cxx/intercepts.cc in the
88 * ompi_mpi_cxx_op_intercept() function.
89 */
90 typedef void (ompi_op_java_handler_fn_t)(void *, void *, int *,
91 struct ompi_datatype_t **,
92 int baseType,
93 void *jnienv, void *object);
94
95 /*
96 * Flags for MPI_Op
97 */
98 /** Set if the MPI_Op is a built-in operation */
99 #define OMPI_OP_FLAGS_INTRINSIC 0x0001
100 /** Set if the callback function is in Fortran */
101 #define OMPI_OP_FLAGS_FORTRAN_FUNC 0x0002
102 /** Set if the callback function is in C++ */
103 #define OMPI_OP_FLAGS_CXX_FUNC 0x0004
104 /** Set if the callback function is in Java */
105 #define OMPI_OP_FLAGS_JAVA_FUNC 0x0008
106 /** Set if the callback function is associative (MAX and SUM will both
107 have ASSOC set -- in fact, it will only *not* be set if we
108 implement some extensions to MPI, because MPI says that all
109 MPI_Op's should be associative, so this flag is really here for
110 future expansion) */
111 #define OMPI_OP_FLAGS_ASSOC 0x0010
112 /** Set if the callback function is associative for floating point
113 operands (e.g., MPI_SUM will have ASSOC set, but will *not* have
114 FLOAT_ASSOC set) */
115 #define OMPI_OP_FLAGS_FLOAT_ASSOC 0x0020
116 /** Set if the callback function is communative */
117 #define OMPI_OP_FLAGS_COMMUTE 0x0040
118
119
120
121
122 /*
123 * Basic operation type for predefined types.
124 */
125 enum ompi_op_type {
126 OMPI_OP_NULL,
127 OMPI_OP_MAX,
128 OMPI_OP_MIN,
129 OMPI_OP_SUM,
130 OMPI_OP_PROD,
131 OMPI_OP_LAND,
132 OMPI_OP_BAND,
133 OMPI_OP_LOR,
134 OMPI_OP_BOR,
135 OMPI_OP_LXOR,
136 OMPI_OP_BXOR,
137 OMPI_OP_MAXLOC,
138 OMPI_OP_MINLOC,
139 OMPI_OP_REPLACE,
140 OMPI_OP_NUM_OF_TYPES
141 };
142 /**
143 * Back-end type of MPI_Op
144 */
145 struct ompi_op_t {
146 /** Parent class, for reference counting */
147 opal_object_t super;
148
149 /** Name, for debugging purposes */
150 char o_name[MPI_MAX_OBJECT_NAME];
151
152 enum ompi_op_type op_type;
153
154 /** Flags about the op */
155 uint32_t o_flags;
156
157 /** Index in Fortran <-> C translation array */
158 int o_f_to_c_index;
159
160 /** Union holding (2-buffer functions):
161 1. Function pointers for all supported datatypes when this op
162 is intrinsic
163 2. Function pointers for when this op is user-defined (only
164 need one function pointer for this; we call it for *all*
165 datatypes, even intrinsics)
166 */
167 union {
168 /** Function/module pointers for intrinsic ops */
169 ompi_op_base_op_fns_t intrinsic;
170 /** C handler function pointer */
171 ompi_op_c_handler_fn_t *c_fn;
172 /** Fortran handler function pointer */
173 ompi_op_fortran_handler_fn_t *fort_fn;
174 /** C++ intercept function data -- see lengthy comment in
175 ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for
176 an explanation */
177 struct {
178 /* The user's function (it's the wrong type, but that's ok) */
179 ompi_op_c_handler_fn_t *user_fn;
180 /* The OMPI C++ callback/intercept function */
181 ompi_op_cxx_handler_fn_t *intercept_fn;
182 } cxx_data;
183 struct {
184 /* The OMPI C++ callback/intercept function */
185 ompi_op_java_handler_fn_t *intercept_fn;
186 /* The Java run time environment */
187 void *jnienv, *object;
188 int baseType;
189 } java_data;
190 } o_func;
191
192 /** 3-buffer functions, which is only for intrinsic ops. No need
193 for the C/C++/Fortran user-defined functions. */
194 ompi_op_base_op_3buff_fns_t o_3buff_intrinsic;
195 };
196
197 /**
198 * Convenience typedef
199 */
200 typedef struct ompi_op_t ompi_op_t;
201 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_t);
202
203 /**
204 * Padded struct to maintain back compatibiltiy.
205 * See ompi/communicator/communicator.h comments with struct ompi_communicator_t
206 * for full explanation why we chose the following padding construct for predefines.
207 */
208 #define PREDEFINED_OP_PAD 2048
209
210 struct ompi_predefined_op_t {
211 struct ompi_op_t op;
212 char padding[PREDEFINED_OP_PAD - sizeof(ompi_op_t)];
213 };
214
215 typedef struct ompi_predefined_op_t ompi_predefined_op_t;
216
217 /**
218 * Array to map ddt->id values to the corresponding position in the op
219 * function array.
220 *
221 * NOTE: It is possible to have an implementation without this map.
222 * There are basically 3 choices for implementing "how to find the
223 * right position in the op array based on the datatype":
224 *
225 * 1. Use the exact same ordering as ddt->id in the op map. This is
226 * nice in that it's always a direct lookup via one memory
227 * de-reference. But it makes a sparse op array, and it's at least
228 * somewhat wasteful. It also chains the ddt and op implementations
229 * together. If the ddt ever changes its ordering, op is screwed. It
230 * seemed safer from a maintenance point of view not to do it that
231 * way.
232 *
233 * 2. Re-arrange the ddt ID values so that all the reducable types are
234 * at the beginning. This means that we can have a dense array here
235 * in op, but then we have the same problem as number one -- and so
236 * this didn't seem like a good idea from a maintenance point of view.
237 *
238 * 3. Create a mapping between the ddt->id values and the position in
239 * the op array. This allows a nice dense op array, and if we make
240 * the map based on symbolic values, then if ddt ever changes its
241 * ordering, it won't matter to op. This seemed like the safest thing
242 * to do from a maintenance perspective, and since it only costs one
243 * extra lookup, and that lookup is way cheaper than the function call
244 * to invoke the reduction operation, it seemed like the best idea.
245 */
246 OMPI_DECLSPEC extern int ompi_op_ddt_map[OMPI_DATATYPE_MAX_PREDEFINED];
247
248 /**
249 * Global variable for MPI_OP_NULL (_addr flavor is for F03 bindings)
250 */
251 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_null;
252 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_null_addr;
253
254 /**
255 * Global variable for MPI_MAX (_addr flavor is for F03 bindings)
256 */
257 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_max;
258 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_max_addr;
259
260 /**
261 * Global variable for MPI_MIN (_addr flavor is for F03 bindings)
262 */
263 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_min;
264 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_min_addr;
265
266 /**
267 * Global variable for MPI_SUM (_addr flavor is for F03 bindings)
268 */
269 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_sum;
270 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_sum_addr;
271
272 /**
273 * Global variable for MPI_PROD (_addr flavor is for F03 bindings)
274 */
275 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_prod;
276 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_prod_addr;
277
278 /**
279 * Global variable for MPI_LAND (_addr flavor is for F03 bindings)
280 */
281 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_land;
282 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_land_addr;
283
284 /**
285 * Global variable for MPI_BAND (_addr flavor is for F03 bindings)
286 */
287 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_band;
288 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_band_addr;
289
290 /**
291 * Global variable for MPI_LOR (_addr flavor is for F03 bindings)
292 */
293 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_lor;
294 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_lor_addr;
295
296 /**
297 * Global variable for MPI_BOR (_addr flavor is for F03 bindings)
298 */
299 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_bor;
300 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_bor_addr;
301
302 /**
303 * Global variable for MPI_LXOR (_addr flavor is for F03 bindings)
304 */
305 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_lxor;
306 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_lxor_addr;
307
308 /**
309 * Global variable for MPI_BXOR (_addr flavor is for F03 bindings)
310 */
311 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_bxor;
312 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_bxor_addr;
313
314 /**
315 * Global variable for MPI_MAXLOC (_addr flavor is for F03 bindings)
316 */
317 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_maxloc;
318 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_maxloc_addr;
319
320 /**
321 * Global variable for MPI_MINLOC (_addr flavor is for F03 bindings)
322 */
323 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_minloc;
324 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_minloc_addr;
325
326 /**
327 * Global variable for MPI_REPLACE (_addr flavor is for F03 bindings)
328 */
329 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_replace;
330 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_replace_addr;
331
332 /**
333 * Global variable for MPI_NO_OP
334 */
335 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_no_op;
336
337
338 /**
339 * Table for Fortran <-> C op handle conversion
340 */
341 extern struct opal_pointer_array_t *ompi_op_f_to_c_table;
342
343 /**
344 * Initialize the op interface.
345 *
346 * @returns OMPI_SUCCESS Upon success
347 * @returns OMPI_ERROR Otherwise
348 *
349 * Invoked from ompi_mpi_init(); sets up the op interface, creates
350 * the predefined MPI operations, and creates the corresopnding F2C
351 * translation table.
352 */
353 int ompi_op_init(void);
354
355 /**
356 * Finalize the op interface.
357 *
358 * @returns OMPI_SUCCESS Always
359 *
360 * Invokes from ompi_mpi_finalize(); tears down the op interface, and
361 * destroys the F2C translation table.
362 */
363 int ompi_op_finalize(void);
364
365 /**
366 * Create a ompi_op_t with a user-defined callback (vs. creating an
367 * intrinsic ompi_op_t).
368 *
369 * @param commute Boolean indicating whether the operation is
370 * communative or not
371 * @param func Function pointer of the error handler
372 *
373 * @returns op Pointer to the ompi_op_t that will be
374 * created and returned
375 *
376 * This function is called as the back-end of all the MPI_OP_CREATE
377 * function. It creates a new ompi_op_t object, initializes it to the
378 * correct object type, and sets the callback function on it.
379 *
380 * The type of the function pointer is (arbitrarily) the fortran
381 * function handler type. Since this function has to accept 2
382 * different function pointer types (lest we have 2 different
383 * functions to create errhandlers), the fortran one was picked
384 * arbitrarily. Note that (void*) is not sufficient because at
385 * least theoretically, a sizeof(void*) may not necessarily be the
386 * same as sizeof(void(*)).
387 *
388 * NOTE: It *always* sets the "fortran" flag to false. The Fortran
389 * wrapper for MPI_OP_CREATE is expected to reset this flag to true
390 * manually.
391 */
392 ompi_op_t *ompi_op_create_user(bool commute,
393 ompi_op_fortran_handler_fn_t func);
394
395 /**
396 * Mark an MPI_Op as holding a C++ callback function, and cache
397 * that function in the MPI_Op. See a lenghty comment in
398 * ompi/mpi/cxx/op.c::ompi_mpi_cxx_op_intercept() for a full
399 * expalantion.
400 */
401 OMPI_DECLSPEC void ompi_op_set_cxx_callback(ompi_op_t * op,
402 MPI_User_function * fn);
403
404 /**
405 * Similar to ompi_op_set_cxx_callback(), mark an MPI_Op as holding a
406 * Java calback function, and cache that function in the MPI_Op.
407 */
408 OMPI_DECLSPEC void ompi_op_set_java_callback(ompi_op_t *op, void *jnienv,
409 void *object, int baseType);
410
411 /**
412 * Check to see if an op is intrinsic.
413 *
414 * @param op The op to check
415 *
416 * @returns true If the op is intrinsic
417 * @returns false If the op is not intrinsic
418 *
419 * Self-explanitory. This is needed in a few top-level MPI functions;
420 * this function is provided to hide the internal structure field
421 * names.
422 */
423 static inline bool ompi_op_is_intrinsic(ompi_op_t * op)
424 {
425 return (bool) (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC));
426 }
427
428
429 /**
430 * Check to see if an op is communative or not
431 *
432 * @param op The op to check
433 *
434 * @returns true If the op is communative
435 * @returns false If the op is not communative
436 *
437 * Self-explanitory. This is needed in a few top-level MPI functions;
438 * this function is provided to hide the internal structure field
439 * names.
440 */
441 static inline bool ompi_op_is_commute(ompi_op_t * op)
442 {
443 return (bool) (0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE));
444 }
445
446 /**
447 * Check to see if an op is floating point associative or not
448 *
449 * @param op The op to check
450 *
451 * @returns true If the op is floating point associative
452 * @returns false If the op is not floating point associative
453 *
454 * Self-explanitory. This is needed in a few top-level MPI functions;
455 * this function is provided to hide the internal structure field
456 * names.
457 */
458 static inline bool ompi_op_is_float_assoc(ompi_op_t * op)
459 {
460 return (bool) (0 != (op->o_flags & OMPI_OP_FLAGS_FLOAT_ASSOC));
461 }
462
463
464 /**
465 * Check to see if an op is valid on a given datatype
466 *
467 * @param op The op to check
468 * @param ddt The datatype to check
469 *
470 * @returns true If the op is valid on that datatype
471 * @returns false If the op is not valid on that datatype
472 *
473 * Self-explanitory. This is needed in a few top-level MPI functions;
474 * this function is provided to hide the internal structure field
475 * names.
476 */
477 static inline bool ompi_op_is_valid(ompi_op_t * op, ompi_datatype_t * ddt,
478 char **msg, const char *func)
479 {
480 /* Check:
481 - non-intrinsic ddt's cannot be invoked on intrinsic op's
482 - if intrinsic ddt invoked on intrinsic op:
483 - ensure the datatype is defined in the op map
484 - ensure we have a function pointer for that combination
485 */
486
487 if (ompi_op_is_intrinsic(op)) {
488 if (ompi_datatype_is_predefined(ddt)) {
489 /* Intrinsic ddt on intrinsic op */
490 if (-1 == ompi_op_ddt_map[ddt->id] ||
491 NULL == op->o_func.intrinsic.fns[ompi_op_ddt_map[ddt->id]]) {
492 (void) opal_asprintf(msg,
493 "%s: the reduction operation %s is not defined on the %s datatype",
494 func, op->o_name, ddt->name);
495 return false;
496 }
497 } else {
498 /* Non-intrinsic ddt on intrinsic op */
499 if ('\0' != ddt->name[0]) {
500 (void) opal_asprintf(msg,
501 "%s: the reduction operation %s is not defined for non-intrinsic datatypes (attempted with datatype named \"%s\")",
502 func, op->o_name, ddt->name);
503 } else {
504 (void) opal_asprintf(msg,
505 "%s: the reduction operation %s is not defined for non-intrinsic datatypes",
506 func, op->o_name);
507 }
508 return false;
509 }
510 }
511
512 /* All other cases ok */
513 return true;
514 }
515
516
517 /**
518 * Perform a reduction operation.
519 *
520 * @param op The operation (IN)
521 * @param source Source (input) buffer (IN)
522 * @param target Target (output) buffer (IN/OUT)
523 * @param count Number of elements (IN)
524 * @param dtype MPI datatype (IN)
525 *
526 * @returns void As with MPI user-defined reduction functions, there
527 * is no return code from this function.
528 *
529 * Perform a reduction operation with count elements of type dtype in
530 * the buffers source and target. The target buffer obtains the
531 * result (i.e., the original values in the target buffer are reduced
532 * with the values in the source buffer and the result is stored in
533 * the target buffer).
534 *
535 * This function figures out which reduction operation function to
536 * invoke and whether to invoke it with C- or Fortran-style invocation
537 * methods. If the op is intrinsic and has the operation defined for
538 * dtype, the appropriate back-end function will be invoked.
539 * Otherwise, the op is assumed to be a user op and the first function
540 * pointer in the op array will be used.
541 *
542 * NOTE: This function assumes that a correct combination will be
543 * given to it; it makes no provision for errors (in the name of
544 * optimization). If you give it an intrinsic op with a datatype that
545 * is not defined to have that operation, it is likely to seg fault.
546 */
547 static inline void ompi_op_reduce(ompi_op_t * op, void *source,
548 void *target, int count,
549 ompi_datatype_t * dtype)
550 {
551 MPI_Fint f_dtype, f_count;
552
553 /*
554 * Call the reduction function. Two dimensions: a) if both the op
555 * and the datatype are intrinsic, we have a series of predefined
556 * functions for each datatype (that are *only* in C -- not
557 * Fortran or C++!), or b) the op is user-defined, and therefore
558 * we have to check whether to invoke the callback with the C,
559 * C++, or Fortran callback signature (see lengthy description of
560 * the C++ callback in ompi/mpi/cxx/intercepts.cc).
561 *
562 * NOTE: We *assume* the following:
563 *
564 * 1. If the op is intrinsic, the op is pre-defined
565 * 2. That we will get a valid result back from the
566 * ompi_op_ddt_map[] (and not -1).
567 *
568 * Failures in these assumptions should have been caught by the
569 * upper layer (i.e., they should never have called this
570 * function). If either of these assumptions are wrong, it's
571 * likely that the MPI API function parameter checking is turned
572 * off, then it's an erroneous program and it's the user's fault.
573 * :-)
574 */
575
576 /* For intrinsics, we also pass the corresponding op module */
577 if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC)) {
578 int dtype_id;
579 if (!ompi_datatype_is_predefined(dtype)) {
580 ompi_datatype_t *dt = ompi_datatype_get_single_predefined_type_from_args(dtype);
581 dtype_id = ompi_op_ddt_map[dt->id];
582 } else {
583 dtype_id = ompi_op_ddt_map[dtype->id];
584 }
585 op->o_func.intrinsic.fns[dtype_id](source, target,
586 &count, &dtype,
587 op->o_func.intrinsic.modules[dtype_id]);
588 return;
589 }
590
591 /* User-defined function */
592 if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) {
593 f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index);
594 f_count = OMPI_INT_2_FINT(count);
595 op->o_func.fort_fn(source, target, &f_count, &f_dtype);
596 return;
597 } else if (0 != (op->o_flags & OMPI_OP_FLAGS_CXX_FUNC)) {
598 op->o_func.cxx_data.intercept_fn(source, target, &count, &dtype,
599 op->o_func.cxx_data.user_fn);
600 return;
601 } else if (0 != (op->o_flags & OMPI_OP_FLAGS_JAVA_FUNC)) {
602 op->o_func.java_data.intercept_fn(source, target, &count, &dtype,
603 op->o_func.java_data.baseType,
604 op->o_func.java_data.jnienv,
605 op->o_func.java_data.object);
606 return;
607 }
608 op->o_func.c_fn(source, target, &count, &dtype);
609 return;
610 }
611
612 static inline void ompi_3buff_op_user (ompi_op_t *op, void * restrict source1, void * restrict source2,
613 void * restrict result, int count, struct ompi_datatype_t *dtype)
614 {
615 ompi_datatype_copy_content_same_ddt (dtype, count, result, source1);
616 op->o_func.c_fn (source2, result, &count, &dtype);
617 }
618
619 /**
620 * Perform a reduction operation.
621 *
622 * @param op The operation (IN)
623 * @param source Source1 (input) buffer (IN)
624 * @param source Source2 (input) buffer (IN)
625 * @param target Target (output) buffer (IN/OUT)
626 * @param count Number of elements (IN)
627 * @param dtype MPI datatype (IN)
628 *
629 * @returns void As with MPI user-defined reduction functions, there
630 * is no return code from this function.
631 *
632 * Perform a reduction operation with count elements of type dtype in
633 * the buffers source and target. The target buffer obtains the
634 * result (i.e., the original values in the target buffer are reduced
635 * with the values in the source buffer and the result is stored in
636 * the target buffer).
637 *
638 * This function will *only* be invoked on intrinsic MPI_Ops.
639 *
640 * Otherwise, this function is the same as ompi_op_reduce.
641 */
642 static inline void ompi_3buff_op_reduce(ompi_op_t * op, void *source1,
643 void *source2, void *target,
644 int count, ompi_datatype_t * dtype)
645 {
646 void *restrict src1;
647 void *restrict src2;
648 void *restrict tgt;
649 src1 = source1;
650 src2 = source2;
651 tgt = target;
652
653 if (OPAL_LIKELY(ompi_op_is_intrinsic (op))) {
654 op->o_3buff_intrinsic.fns[ompi_op_ddt_map[dtype->id]](src1, src2,
655 tgt, &count,
656 &dtype,
657 op->o_3buff_intrinsic.modules[ompi_op_ddt_map[dtype->id]]);
658 } else {
659 ompi_3buff_op_user (op, src1, src2, tgt, count, dtype);
660 }
661 }
662
663 END_C_DECLS
664
665 #endif /* OMPI_OP_H */