1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana 4 * University Research and Technology 5 * Corporation. All rights reserved. 6 * Copyright (c) 2004-2007 The University of Tennessee and The University 7 * of Tennessee Research Foundation. All rights 8 * reserved. 9 * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, 10 * University of Stuttgart. All rights reserved. 11 * Copyright (c) 2004-2005 The Regents of the University of California. 12 * All rights reserved. 13 * Copyright (c) 2008 UT-Battelle, LLC 14 * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved 15 * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. 16 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights 17 * reserved. 18 * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. 19 * Copyright (c) 2019 Research Organization for Information Science 20 * and Technology (RIST). All rights reserved. 21 * Copyright (c) 2018 Triad National Security, LLC. All rights 22 * reserved. 23 * $COPYRIGHT$ 24 * 25 * Additional copyrights may follow 26 * 27 * $HEADER$ 28 */ 29 /** 30 * @file 31 * 32 * Public interface for the MPI_Op handle. 33 */ 34 35 #ifndef OMPI_OP_H 36 #define OMPI_OP_H 37 38 #include "ompi_config.h" 39 40 #include <stdio.h> 41 42 #include "mpi.h" 43 44 #include "opal/class/opal_object.h" 45 #include "opal/util/printf.h" 46 47 #include "ompi/datatype/ompi_datatype.h" 48 #include "ompi/mpi/fortran/base/fint_2_int.h" 49 #include "ompi/mca/op/op.h" 50 51 BEGIN_C_DECLS 52 53 /** 54 * Typedef for C op functions for user-defined MPI_Ops. 55 * 56 * We don't use MPI_User_function because this would create a 57 * confusing dependency loop between this file and mpi.h. So this is 58 * repeated code, but it's better this way (and this typedef will 59 * never change, so there's not much of a maintenance worry). 60 */ 61 typedef void (ompi_op_c_handler_fn_t)(void *, void *, int *, 62 struct ompi_datatype_t **); 63 64 /** 65 * Typedef for fortran user-defined MPI_Ops. 66 */ 67 typedef void (ompi_op_fortran_handler_fn_t)(void *, void *, 68 MPI_Fint *, MPI_Fint *); 69 70 /** 71 * Typedef for C++ op functions intercept (used for user-defined 72 * MPI::Ops). 73 * 74 * See the lengthy explanation for why this is different than the C 75 * intercept in ompi/mpi/cxx/intercepts.cc in the 76 * ompi_mpi_cxx_op_intercept() function. 77 */ 78 typedef void (ompi_op_cxx_handler_fn_t)(void *, void *, int *, 79 struct ompi_datatype_t **, 80 MPI_User_function * op); 81 82 /** 83 * Typedef for Java op functions intercept (used for user-defined 84 * MPI.Ops). 85 * 86 * See the lengthy explanation for why this is different than the C 87 * intercept in ompi/mpi/cxx/intercepts.cc in the 88 * ompi_mpi_cxx_op_intercept() function. 89 */ 90 typedef void (ompi_op_java_handler_fn_t)(void *, void *, int *, 91 struct ompi_datatype_t **, 92 int baseType, 93 void *jnienv, void *object); 94 95 /* 96 * Flags for MPI_Op 97 */ 98 /** Set if the MPI_Op is a built-in operation */ 99 #define OMPI_OP_FLAGS_INTRINSIC 0x0001 100 /** Set if the callback function is in Fortran */ 101 #define OMPI_OP_FLAGS_FORTRAN_FUNC 0x0002 102 /** Set if the callback function is in C++ */ 103 #define OMPI_OP_FLAGS_CXX_FUNC 0x0004 104 /** Set if the callback function is in Java */ 105 #define OMPI_OP_FLAGS_JAVA_FUNC 0x0008 106 /** Set if the callback function is associative (MAX and SUM will both 107 have ASSOC set -- in fact, it will only *not* be set if we 108 implement some extensions to MPI, because MPI says that all 109 MPI_Op's should be associative, so this flag is really here for 110 future expansion) */ 111 #define OMPI_OP_FLAGS_ASSOC 0x0010 112 /** Set if the callback function is associative for floating point 113 operands (e.g., MPI_SUM will have ASSOC set, but will *not* have 114 FLOAT_ASSOC set) */ 115 #define OMPI_OP_FLAGS_FLOAT_ASSOC 0x0020 116 /** Set if the callback function is communative */ 117 #define OMPI_OP_FLAGS_COMMUTE 0x0040 118 119 120 121 122 /* 123 * Basic operation type for predefined types. 124 */ 125 enum ompi_op_type { 126 OMPI_OP_NULL, 127 OMPI_OP_MAX, 128 OMPI_OP_MIN, 129 OMPI_OP_SUM, 130 OMPI_OP_PROD, 131 OMPI_OP_LAND, 132 OMPI_OP_BAND, 133 OMPI_OP_LOR, 134 OMPI_OP_BOR, 135 OMPI_OP_LXOR, 136 OMPI_OP_BXOR, 137 OMPI_OP_MAXLOC, 138 OMPI_OP_MINLOC, 139 OMPI_OP_REPLACE, 140 OMPI_OP_NUM_OF_TYPES 141 }; 142 /** 143 * Back-end type of MPI_Op 144 */ 145 struct ompi_op_t { 146 /** Parent class, for reference counting */ 147 opal_object_t super; 148 149 /** Name, for debugging purposes */ 150 char o_name[MPI_MAX_OBJECT_NAME]; 151 152 enum ompi_op_type op_type; 153 154 /** Flags about the op */ 155 uint32_t o_flags; 156 157 /** Index in Fortran <-> C translation array */ 158 int o_f_to_c_index; 159 160 /** Union holding (2-buffer functions): 161 1. Function pointers for all supported datatypes when this op 162 is intrinsic 163 2. Function pointers for when this op is user-defined (only 164 need one function pointer for this; we call it for *all* 165 datatypes, even intrinsics) 166 */ 167 union { 168 /** Function/module pointers for intrinsic ops */ 169 ompi_op_base_op_fns_t intrinsic; 170 /** C handler function pointer */ 171 ompi_op_c_handler_fn_t *c_fn; 172 /** Fortran handler function pointer */ 173 ompi_op_fortran_handler_fn_t *fort_fn; 174 /** C++ intercept function data -- see lengthy comment in 175 ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for 176 an explanation */ 177 struct { 178 /* The user's function (it's the wrong type, but that's ok) */ 179 ompi_op_c_handler_fn_t *user_fn; 180 /* The OMPI C++ callback/intercept function */ 181 ompi_op_cxx_handler_fn_t *intercept_fn; 182 } cxx_data; 183 struct { 184 /* The OMPI C++ callback/intercept function */ 185 ompi_op_java_handler_fn_t *intercept_fn; 186 /* The Java run time environment */ 187 void *jnienv, *object; 188 int baseType; 189 } java_data; 190 } o_func; 191 192 /** 3-buffer functions, which is only for intrinsic ops. No need 193 for the C/C++/Fortran user-defined functions. */ 194 ompi_op_base_op_3buff_fns_t o_3buff_intrinsic; 195 }; 196 197 /** 198 * Convenience typedef 199 */ 200 typedef struct ompi_op_t ompi_op_t; 201 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_t); 202 203 /** 204 * Padded struct to maintain back compatibiltiy. 205 * See ompi/communicator/communicator.h comments with struct ompi_communicator_t 206 * for full explanation why we chose the following padding construct for predefines. 207 */ 208 #define PREDEFINED_OP_PAD 2048 209 210 struct ompi_predefined_op_t { 211 struct ompi_op_t op; 212 char padding[PREDEFINED_OP_PAD - sizeof(ompi_op_t)]; 213 }; 214 215 typedef struct ompi_predefined_op_t ompi_predefined_op_t; 216 217 /** 218 * Array to map ddt->id values to the corresponding position in the op 219 * function array. 220 * 221 * NOTE: It is possible to have an implementation without this map. 222 * There are basically 3 choices for implementing "how to find the 223 * right position in the op array based on the datatype": 224 * 225 * 1. Use the exact same ordering as ddt->id in the op map. This is 226 * nice in that it's always a direct lookup via one memory 227 * de-reference. But it makes a sparse op array, and it's at least 228 * somewhat wasteful. It also chains the ddt and op implementations 229 * together. If the ddt ever changes its ordering, op is screwed. It 230 * seemed safer from a maintenance point of view not to do it that 231 * way. 232 * 233 * 2. Re-arrange the ddt ID values so that all the reducable types are 234 * at the beginning. This means that we can have a dense array here 235 * in op, but then we have the same problem as number one -- and so 236 * this didn't seem like a good idea from a maintenance point of view. 237 * 238 * 3. Create a mapping between the ddt->id values and the position in 239 * the op array. This allows a nice dense op array, and if we make 240 * the map based on symbolic values, then if ddt ever changes its 241 * ordering, it won't matter to op. This seemed like the safest thing 242 * to do from a maintenance perspective, and since it only costs one 243 * extra lookup, and that lookup is way cheaper than the function call 244 * to invoke the reduction operation, it seemed like the best idea. 245 */ 246 OMPI_DECLSPEC extern int ompi_op_ddt_map[OMPI_DATATYPE_MAX_PREDEFINED]; 247 248 /** 249 * Global variable for MPI_OP_NULL (_addr flavor is for F03 bindings) 250 */ 251 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_null; 252 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_null_addr; 253 254 /** 255 * Global variable for MPI_MAX (_addr flavor is for F03 bindings) 256 */ 257 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_max; 258 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_max_addr; 259 260 /** 261 * Global variable for MPI_MIN (_addr flavor is for F03 bindings) 262 */ 263 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_min; 264 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_min_addr; 265 266 /** 267 * Global variable for MPI_SUM (_addr flavor is for F03 bindings) 268 */ 269 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_sum; 270 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_sum_addr; 271 272 /** 273 * Global variable for MPI_PROD (_addr flavor is for F03 bindings) 274 */ 275 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_prod; 276 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_prod_addr; 277 278 /** 279 * Global variable for MPI_LAND (_addr flavor is for F03 bindings) 280 */ 281 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_land; 282 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_land_addr; 283 284 /** 285 * Global variable for MPI_BAND (_addr flavor is for F03 bindings) 286 */ 287 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_band; 288 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_band_addr; 289 290 /** 291 * Global variable for MPI_LOR (_addr flavor is for F03 bindings) 292 */ 293 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_lor; 294 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_lor_addr; 295 296 /** 297 * Global variable for MPI_BOR (_addr flavor is for F03 bindings) 298 */ 299 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_bor; 300 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_bor_addr; 301 302 /** 303 * Global variable for MPI_LXOR (_addr flavor is for F03 bindings) 304 */ 305 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_lxor; 306 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_lxor_addr; 307 308 /** 309 * Global variable for MPI_BXOR (_addr flavor is for F03 bindings) 310 */ 311 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_bxor; 312 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_bxor_addr; 313 314 /** 315 * Global variable for MPI_MAXLOC (_addr flavor is for F03 bindings) 316 */ 317 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_maxloc; 318 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_maxloc_addr; 319 320 /** 321 * Global variable for MPI_MINLOC (_addr flavor is for F03 bindings) 322 */ 323 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_minloc; 324 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_minloc_addr; 325 326 /** 327 * Global variable for MPI_REPLACE (_addr flavor is for F03 bindings) 328 */ 329 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_replace; 330 OMPI_DECLSPEC extern ompi_predefined_op_t *ompi_mpi_op_replace_addr; 331 332 /** 333 * Global variable for MPI_NO_OP 334 */ 335 OMPI_DECLSPEC extern ompi_predefined_op_t ompi_mpi_op_no_op; 336 337 338 /** 339 * Table for Fortran <-> C op handle conversion 340 */ 341 extern struct opal_pointer_array_t *ompi_op_f_to_c_table; 342 343 /** 344 * Initialize the op interface. 345 * 346 * @returns OMPI_SUCCESS Upon success 347 * @returns OMPI_ERROR Otherwise 348 * 349 * Invoked from ompi_mpi_init(); sets up the op interface, creates 350 * the predefined MPI operations, and creates the corresopnding F2C 351 * translation table. 352 */ 353 int ompi_op_init(void); 354 355 /** 356 * Finalize the op interface. 357 * 358 * @returns OMPI_SUCCESS Always 359 * 360 * Invokes from ompi_mpi_finalize(); tears down the op interface, and 361 * destroys the F2C translation table. 362 */ 363 int ompi_op_finalize(void); 364 365 /** 366 * Create a ompi_op_t with a user-defined callback (vs. creating an 367 * intrinsic ompi_op_t). 368 * 369 * @param commute Boolean indicating whether the operation is 370 * communative or not 371 * @param func Function pointer of the error handler 372 * 373 * @returns op Pointer to the ompi_op_t that will be 374 * created and returned 375 * 376 * This function is called as the back-end of all the MPI_OP_CREATE 377 * function. It creates a new ompi_op_t object, initializes it to the 378 * correct object type, and sets the callback function on it. 379 * 380 * The type of the function pointer is (arbitrarily) the fortran 381 * function handler type. Since this function has to accept 2 382 * different function pointer types (lest we have 2 different 383 * functions to create errhandlers), the fortran one was picked 384 * arbitrarily. Note that (void*) is not sufficient because at 385 * least theoretically, a sizeof(void*) may not necessarily be the 386 * same as sizeof(void(*)). 387 * 388 * NOTE: It *always* sets the "fortran" flag to false. The Fortran 389 * wrapper for MPI_OP_CREATE is expected to reset this flag to true 390 * manually. 391 */ 392 ompi_op_t *ompi_op_create_user(bool commute, 393 ompi_op_fortran_handler_fn_t func); 394 395 /** 396 * Mark an MPI_Op as holding a C++ callback function, and cache 397 * that function in the MPI_Op. See a lenghty comment in 398 * ompi/mpi/cxx/op.c::ompi_mpi_cxx_op_intercept() for a full 399 * expalantion. 400 */ 401 OMPI_DECLSPEC void ompi_op_set_cxx_callback(ompi_op_t * op, 402 MPI_User_function * fn); 403 404 /** 405 * Similar to ompi_op_set_cxx_callback(), mark an MPI_Op as holding a 406 * Java calback function, and cache that function in the MPI_Op. 407 */ 408 OMPI_DECLSPEC void ompi_op_set_java_callback(ompi_op_t *op, void *jnienv, 409 void *object, int baseType); 410 411 /** 412 * Check to see if an op is intrinsic. 413 * 414 * @param op The op to check 415 * 416 * @returns true If the op is intrinsic 417 * @returns false If the op is not intrinsic 418 * 419 * Self-explanitory. This is needed in a few top-level MPI functions; 420 * this function is provided to hide the internal structure field 421 * names. 422 */ 423 static inline bool ompi_op_is_intrinsic(ompi_op_t * op) 424 { 425 return (bool) (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC)); 426 } 427 428 429 /** 430 * Check to see if an op is communative or not 431 * 432 * @param op The op to check 433 * 434 * @returns true If the op is communative 435 * @returns false If the op is not communative 436 * 437 * Self-explanitory. This is needed in a few top-level MPI functions; 438 * this function is provided to hide the internal structure field 439 * names. 440 */ 441 static inline bool ompi_op_is_commute(ompi_op_t * op) 442 { 443 return (bool) (0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE)); 444 } 445 446 /** 447 * Check to see if an op is floating point associative or not 448 * 449 * @param op The op to check 450 * 451 * @returns true If the op is floating point associative 452 * @returns false If the op is not floating point associative 453 * 454 * Self-explanitory. This is needed in a few top-level MPI functions; 455 * this function is provided to hide the internal structure field 456 * names. 457 */ 458 static inline bool ompi_op_is_float_assoc(ompi_op_t * op) 459 { 460 return (bool) (0 != (op->o_flags & OMPI_OP_FLAGS_FLOAT_ASSOC)); 461 } 462 463 464 /** 465 * Check to see if an op is valid on a given datatype 466 * 467 * @param op The op to check 468 * @param ddt The datatype to check 469 * 470 * @returns true If the op is valid on that datatype 471 * @returns false If the op is not valid on that datatype 472 * 473 * Self-explanitory. This is needed in a few top-level MPI functions; 474 * this function is provided to hide the internal structure field 475 * names. 476 */ 477 static inline bool ompi_op_is_valid(ompi_op_t * op, ompi_datatype_t * ddt, 478 char **msg, const char *func) 479 { 480 /* Check: 481 - non-intrinsic ddt's cannot be invoked on intrinsic op's 482 - if intrinsic ddt invoked on intrinsic op: 483 - ensure the datatype is defined in the op map 484 - ensure we have a function pointer for that combination 485 */ 486 487 if (ompi_op_is_intrinsic(op)) { 488 if (ompi_datatype_is_predefined(ddt)) { 489 /* Intrinsic ddt on intrinsic op */ 490 if (-1 == ompi_op_ddt_map[ddt->id] || 491 NULL == op->o_func.intrinsic.fns[ompi_op_ddt_map[ddt->id]]) { 492 (void) opal_asprintf(msg, 493 "%s: the reduction operation %s is not defined on the %s datatype", 494 func, op->o_name, ddt->name); 495 return false; 496 } 497 } else { 498 /* Non-intrinsic ddt on intrinsic op */ 499 if ('\0' != ddt->name[0]) { 500 (void) opal_asprintf(msg, 501 "%s: the reduction operation %s is not defined for non-intrinsic datatypes (attempted with datatype named \"%s\")", 502 func, op->o_name, ddt->name); 503 } else { 504 (void) opal_asprintf(msg, 505 "%s: the reduction operation %s is not defined for non-intrinsic datatypes", 506 func, op->o_name); 507 } 508 return false; 509 } 510 } 511 512 /* All other cases ok */ 513 return true; 514 } 515 516 517 /** 518 * Perform a reduction operation. 519 * 520 * @param op The operation (IN) 521 * @param source Source (input) buffer (IN) 522 * @param target Target (output) buffer (IN/OUT) 523 * @param count Number of elements (IN) 524 * @param dtype MPI datatype (IN) 525 * 526 * @returns void As with MPI user-defined reduction functions, there 527 * is no return code from this function. 528 * 529 * Perform a reduction operation with count elements of type dtype in 530 * the buffers source and target. The target buffer obtains the 531 * result (i.e., the original values in the target buffer are reduced 532 * with the values in the source buffer and the result is stored in 533 * the target buffer). 534 * 535 * This function figures out which reduction operation function to 536 * invoke and whether to invoke it with C- or Fortran-style invocation 537 * methods. If the op is intrinsic and has the operation defined for 538 * dtype, the appropriate back-end function will be invoked. 539 * Otherwise, the op is assumed to be a user op and the first function 540 * pointer in the op array will be used. 541 * 542 * NOTE: This function assumes that a correct combination will be 543 * given to it; it makes no provision for errors (in the name of 544 * optimization). If you give it an intrinsic op with a datatype that 545 * is not defined to have that operation, it is likely to seg fault. 546 */ 547 static inline void ompi_op_reduce(ompi_op_t * op, void *source, 548 void *target, int count, 549 ompi_datatype_t * dtype) 550 { 551 MPI_Fint f_dtype, f_count; 552 553 /* 554 * Call the reduction function. Two dimensions: a) if both the op 555 * and the datatype are intrinsic, we have a series of predefined 556 * functions for each datatype (that are *only* in C -- not 557 * Fortran or C++!), or b) the op is user-defined, and therefore 558 * we have to check whether to invoke the callback with the C, 559 * C++, or Fortran callback signature (see lengthy description of 560 * the C++ callback in ompi/mpi/cxx/intercepts.cc). 561 * 562 * NOTE: We *assume* the following: 563 * 564 * 1. If the op is intrinsic, the op is pre-defined 565 * 2. That we will get a valid result back from the 566 * ompi_op_ddt_map[] (and not -1). 567 * 568 * Failures in these assumptions should have been caught by the 569 * upper layer (i.e., they should never have called this 570 * function). If either of these assumptions are wrong, it's 571 * likely that the MPI API function parameter checking is turned 572 * off, then it's an erroneous program and it's the user's fault. 573 * :-) 574 */ 575 576 /* For intrinsics, we also pass the corresponding op module */ 577 if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC)) { 578 int dtype_id; 579 if (!ompi_datatype_is_predefined(dtype)) { 580 ompi_datatype_t *dt = ompi_datatype_get_single_predefined_type_from_args(dtype); 581 dtype_id = ompi_op_ddt_map[dt->id]; 582 } else { 583 dtype_id = ompi_op_ddt_map[dtype->id]; 584 } 585 op->o_func.intrinsic.fns[dtype_id](source, target, 586 &count, &dtype, 587 op->o_func.intrinsic.modules[dtype_id]); 588 return; 589 } 590 591 /* User-defined function */ 592 if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) { 593 f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index); 594 f_count = OMPI_INT_2_FINT(count); 595 op->o_func.fort_fn(source, target, &f_count, &f_dtype); 596 return; 597 } else if (0 != (op->o_flags & OMPI_OP_FLAGS_CXX_FUNC)) { 598 op->o_func.cxx_data.intercept_fn(source, target, &count, &dtype, 599 op->o_func.cxx_data.user_fn); 600 return; 601 } else if (0 != (op->o_flags & OMPI_OP_FLAGS_JAVA_FUNC)) { 602 op->o_func.java_data.intercept_fn(source, target, &count, &dtype, 603 op->o_func.java_data.baseType, 604 op->o_func.java_data.jnienv, 605 op->o_func.java_data.object); 606 return; 607 } 608 op->o_func.c_fn(source, target, &count, &dtype); 609 return; 610 } 611 612 static inline void ompi_3buff_op_user (ompi_op_t *op, void * restrict source1, void * restrict source2, 613 void * restrict result, int count, struct ompi_datatype_t *dtype) 614 { 615 ompi_datatype_copy_content_same_ddt (dtype, count, result, source1); 616 op->o_func.c_fn (source2, result, &count, &dtype); 617 } 618 619 /** 620 * Perform a reduction operation. 621 * 622 * @param op The operation (IN) 623 * @param source Source1 (input) buffer (IN) 624 * @param source Source2 (input) buffer (IN) 625 * @param target Target (output) buffer (IN/OUT) 626 * @param count Number of elements (IN) 627 * @param dtype MPI datatype (IN) 628 * 629 * @returns void As with MPI user-defined reduction functions, there 630 * is no return code from this function. 631 * 632 * Perform a reduction operation with count elements of type dtype in 633 * the buffers source and target. The target buffer obtains the 634 * result (i.e., the original values in the target buffer are reduced 635 * with the values in the source buffer and the result is stored in 636 * the target buffer). 637 * 638 * This function will *only* be invoked on intrinsic MPI_Ops. 639 * 640 * Otherwise, this function is the same as ompi_op_reduce. 641 */ 642 static inline void ompi_3buff_op_reduce(ompi_op_t * op, void *source1, 643 void *source2, void *target, 644 int count, ompi_datatype_t * dtype) 645 { 646 void *restrict src1; 647 void *restrict src2; 648 void *restrict tgt; 649 src1 = source1; 650 src2 = source2; 651 tgt = target; 652 653 if (OPAL_LIKELY(ompi_op_is_intrinsic (op))) { 654 op->o_3buff_intrinsic.fns[ompi_op_ddt_map[dtype->id]](src1, src2, 655 tgt, &count, 656 &dtype, 657 op->o_3buff_intrinsic.modules[ompi_op_ddt_map[dtype->id]]); 658 } else { 659 ompi_3buff_op_user (op, src1, src2, tgt, count, dtype); 660 } 661 } 662 663 END_C_DECLS 664 665 #endif /* OMPI_OP_H */