This source file includes following definitions.
- mca_coll_cuda_reduce_scatter_block
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "ompi_config.h"
14 #include "coll_cuda.h"
15
16 #include <stdio.h>
17
18 #include "ompi/op/op.h"
19 #include "opal/datatype/opal_convertor.h"
20 #include "opal/datatype/opal_datatype_cuda.h"
21
22
23
24
25
26
27
28
29
30
31
32
33 int
34 mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
35 struct ompi_datatype_t *dtype,
36 struct ompi_op_t *op,
37 struct ompi_communicator_t *comm,
38 mca_coll_base_module_t *module)
39 {
40 mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
41 ptrdiff_t gap;
42 char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
43 size_t sbufsize, rbufsize;
44 int rc;
45
46 rbufsize = opal_datatype_span(&dtype->super, rcount, &gap);
47
48 sbufsize = rbufsize * ompi_comm_size(comm);
49
50 if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
51 sbuf1 = (char*)malloc(sbufsize);
52 if (NULL == sbuf1) {
53 return OMPI_ERR_OUT_OF_RESOURCE;
54 }
55 opal_cuda_memcpy_sync(sbuf1, sbuf, sbufsize);
56 sbuf = sbuf1 - gap;
57 }
58
59 if (opal_cuda_check_bufs(rbuf, NULL)) {
60 rbuf1 = (char*)malloc(rbufsize);
61 if (NULL == rbuf1) {
62 if (NULL != sbuf1) free(sbuf1);
63 return OMPI_ERR_OUT_OF_RESOURCE;
64 }
65 opal_cuda_memcpy_sync(rbuf1, rbuf, rbufsize);
66 rbuf2 = rbuf;
67 rbuf = rbuf1 - gap;
68 }
69 rc = s->c_coll.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm,
70 s->c_coll.coll_reduce_scatter_block_module);
71 if (NULL != sbuf1) {
72 free(sbuf1);
73 }
74 if (NULL != rbuf1) {
75 rbuf = rbuf2;
76 opal_cuda_memcpy_sync(rbuf, rbuf1, rbufsize);
77 free(rbuf1);
78 }
79 return rc;
80 }
81