This source file includes following definitions.
- mca_coll_cuda_reduce
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "ompi_config.h"
14 #include "coll_cuda.h"
15
16 #include <stdio.h>
17
18 #include "ompi/op/op.h"
19 #include "opal/datatype/opal_convertor.h"
20 #include "opal/datatype/opal_datatype_cuda.h"
21
22
23
24
25
26
27
28
29 int
30 mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
31 struct ompi_datatype_t *dtype,
32 struct ompi_op_t *op,
33 int root, struct ompi_communicator_t *comm,
34 mca_coll_base_module_t *module)
35 {
36 mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
37 ptrdiff_t gap;
38 char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
39 const char *sbuf2;
40 size_t bufsize;
41 int rc;
42
43 bufsize = opal_datatype_span(&dtype->super, count, &gap);
44
45
46 if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
47 sbuf1 = (char*)malloc(bufsize);
48 if (NULL == sbuf1) {
49 return OMPI_ERR_OUT_OF_RESOURCE;
50 }
51 opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
52 sbuf2 = sbuf;
53 sbuf = sbuf1 - gap;
54 }
55
56 if (opal_cuda_check_bufs(rbuf, NULL)) {
57 rbuf1 = (char*)malloc(bufsize);
58 if (NULL == rbuf1) {
59 if (NULL != sbuf1) free(sbuf1);
60 return OMPI_ERR_OUT_OF_RESOURCE;
61 }
62 opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
63 rbuf2 = rbuf;
64 rbuf = rbuf1 - gap;
65 }
66 rc = s->c_coll.coll_reduce((void *) sbuf, rbuf, count,
67 dtype, op, root, comm,
68 s->c_coll.coll_reduce_module);
69
70 if (NULL != sbuf1) {
71 free(sbuf1);
72 }
73 if (NULL != rbuf1) {
74 rbuf = rbuf2;
75 opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
76 free(rbuf1);
77 }
78 return rc;
79 }