This source file includes following definitions.
- mca_coll_cuda_allreduce
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "ompi_config.h"
14 #include "coll_cuda.h"
15
16 #include <stdio.h>
17
18 #include "ompi/op/op.h"
19 #include "opal/datatype/opal_convertor.h"
20 #include "opal/datatype/opal_datatype_cuda.h"
21
22
23
24
25
26
27
28
29 int
30 mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
31 struct ompi_datatype_t *dtype,
32 struct ompi_op_t *op,
33 struct ompi_communicator_t *comm,
34 mca_coll_base_module_t *module)
35 {
36 mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
37 ptrdiff_t gap;
38 char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
39 size_t bufsize;
40 int rc;
41
42 bufsize = opal_datatype_span(&dtype->super, count, &gap);
43
44 if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
45 sbuf1 = (char*)malloc(bufsize);
46 if (NULL == sbuf1) {
47 return OMPI_ERR_OUT_OF_RESOURCE;
48 }
49 opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
50 sbuf = sbuf1 - gap;
51 }
52
53 if (opal_cuda_check_bufs(rbuf, NULL)) {
54 rbuf1 = (char*)malloc(bufsize);
55 if (NULL == rbuf1) {
56 if (NULL != sbuf1) free(sbuf1);
57 return OMPI_ERR_OUT_OF_RESOURCE;
58 }
59 opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
60 rbuf2 = rbuf;
61 rbuf = rbuf1 - gap;
62 }
63 rc = s->c_coll.coll_allreduce(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_allreduce_module);
64 if (NULL != sbuf1) {
65 free(sbuf1);
66 }
67 if (NULL != rbuf1) {
68 rbuf = rbuf2;
69 opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
70 free(rbuf1);
71 }
72 return rc;
73 }
74