1
2
3
4
5
6
7
8
9
10
11
12
13 #ifndef MCA_COLL_CUDA_EXPORT_H
14 #define MCA_COLL_CUDA_EXPORT_H
15
16 #include "ompi_config.h"
17
18 #include "mpi.h"
19
20 #include "opal/class/opal_object.h"
21 #include "ompi/mca/mca.h"
22
23 #include "ompi/constants.h"
24 #include "ompi/mca/coll/coll.h"
25 #include "ompi/mca/coll/base/base.h"
26 #include "ompi/communicator/communicator.h"
27
28 BEGIN_C_DECLS
29
30
31
32 int mca_coll_cuda_init_query(bool enable_progress_threads,
33 bool enable_mpi_threads);
34 mca_coll_base_module_t
35 *mca_coll_cuda_comm_query(struct ompi_communicator_t *comm,
36 int *priority);
37
38 int mca_coll_cuda_module_enable(mca_coll_base_module_t *module,
39 struct ompi_communicator_t *comm);
40
41 int
42 mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
43 struct ompi_datatype_t *dtype,
44 struct ompi_op_t *op,
45 struct ompi_communicator_t *comm,
46 mca_coll_base_module_t *module);
47
48 int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
49 struct ompi_datatype_t *dtype,
50 struct ompi_op_t *op,
51 int root,
52 struct ompi_communicator_t *comm,
53 mca_coll_base_module_t *module);
54
55 int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count,
56 struct ompi_datatype_t *dtype,
57 struct ompi_op_t *op,
58 struct ompi_communicator_t *comm,
59 mca_coll_base_module_t *module);
60
61 int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count,
62 struct ompi_datatype_t *dtype,
63 struct ompi_op_t *op,
64 struct ompi_communicator_t *comm,
65 mca_coll_base_module_t *module);
66
67 int
68 mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
69 struct ompi_datatype_t *dtype,
70 struct ompi_op_t *op,
71 struct ompi_communicator_t *comm,
72 mca_coll_base_module_t *module);
73
74
75
76
77 typedef struct mca_coll_cuda_module_t {
78 mca_coll_base_module_t super;
79
80
81 mca_coll_base_comm_coll_t c_coll;
82 } mca_coll_cuda_module_t;
83
84 OBJ_CLASS_DECLARATION(mca_coll_cuda_module_t);
85
86
87
88 typedef struct mca_coll_cuda_component_t {
89 mca_coll_base_component_2_0_0_t super;
90
91 int priority;
92 int disable_cuda_coll;
93 } mca_coll_cuda_component_t;
94
95
96
97 OMPI_MODULE_DECLSPEC extern mca_coll_cuda_component_t mca_coll_cuda_component;
98
99 END_C_DECLS
100
101 #endif