This source file includes following definitions.
- mca_coll_cuda_exscan
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "ompi_config.h"
14 #include "coll_cuda.h"
15
16 #include <stdio.h>
17
18 #include "ompi/op/op.h"
19 #include "opal/datatype/opal_convertor.h"
20 #include "opal/datatype/opal_datatype_cuda.h"
21
22 int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count,
23 struct ompi_datatype_t *dtype,
24 struct ompi_op_t *op,
25 struct ompi_communicator_t *comm,
26 mca_coll_base_module_t *module)
27 {
28 mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
29 ptrdiff_t gap;
30 char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
31 size_t bufsize;
32 int rc;
33
34 bufsize = opal_datatype_span(&dtype->super, count, &gap);
35
36 if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
37 sbuf1 = (char*)malloc(bufsize);
38 if (NULL == sbuf1) {
39 return OMPI_ERR_OUT_OF_RESOURCE;
40 }
41 opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
42 sbuf = sbuf1 - gap;
43 }
44
45 if (opal_cuda_check_bufs(rbuf, NULL)) {
46 rbuf1 = (char*)malloc(bufsize);
47 if (NULL == rbuf1) {
48 if (NULL != sbuf1) free(sbuf1);
49 return OMPI_ERR_OUT_OF_RESOURCE;
50 }
51 opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
52 rbuf2 = rbuf;
53 rbuf = rbuf1 - gap;
54 }
55
56 rc = s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm,
57 s->c_coll.coll_exscan_module);
58 if (NULL != sbuf1) {
59 free(sbuf1);
60 }
61 if (NULL != rbuf1) {
62 rbuf = rbuf2;
63 opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
64 free(rbuf1);
65 }
66 return rc;
67 }