This source file includes following definitions.
- mca_coll_cuda_scan
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "ompi_config.h"
14 #include "coll_cuda.h"
15
16 #include <stdio.h>
17
18 #include "ompi/op/op.h"
19 #include "opal/datatype/opal_convertor.h"
20 #include "opal/datatype/opal_datatype_cuda.h"
21
22
23
24
25
26
27
28
29 int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count,
30 struct ompi_datatype_t *dtype,
31 struct ompi_op_t *op,
32 struct ompi_communicator_t *comm,
33 mca_coll_base_module_t *module)
34 {
35 mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
36 ptrdiff_t gap;
37 char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
38 size_t bufsize;
39 int rc;
40
41 bufsize = opal_datatype_span(&dtype->super, count, &gap);
42
43 if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
44 sbuf1 = (char*)malloc(bufsize);
45 if (NULL == sbuf1) {
46 return OMPI_ERR_OUT_OF_RESOURCE;
47 }
48 opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
49 sbuf = sbuf1 - gap;
50 }
51
52 if (opal_cuda_check_bufs(rbuf, NULL)) {
53 rbuf1 = (char*)malloc(bufsize);
54 if (NULL == rbuf1) {
55 if (NULL != sbuf1) free(sbuf1);
56 return OMPI_ERR_OUT_OF_RESOURCE;
57 }
58 opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
59 rbuf2 = rbuf;
60 rbuf = rbuf1 - gap;
61 }
62 rc = s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm,
63 s->c_coll.coll_scan_module);
64 if (NULL != sbuf1) {
65 free(sbuf1);
66 }
67 if (NULL != rbuf1) {
68 rbuf = rbuf2;
69 opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
70 free(rbuf1);
71 }
72 return rc;
73 }