This source file includes following definitions.
- mca_coll_sm_bcast_intra
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 #include "ompi_config.h"
21
22 #include <string.h>
23
24 #include "opal/datatype/opal_convertor.h"
25 #include "ompi/constants.h"
26 #include "ompi/communicator/communicator.h"
27 #include "ompi/datatype/ompi_datatype.h"
28 #include "ompi/mca/coll/coll.h"
29 #include "opal/sys/atomic.h"
30 #include "coll_sm.h"
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 int mca_coll_sm_bcast_intra(void *buff, int count,
57 struct ompi_datatype_t *datatype, int root,
58 struct ompi_communicator_t *comm,
59 mca_coll_base_module_t *module)
60 {
61 struct iovec iov;
62 mca_coll_sm_module_t *sm_module = (mca_coll_sm_module_t*) module;
63 mca_coll_sm_comm_t *data;
64 int i, ret, rank, size, num_children, src_rank;
65 int flag_num, segment_num, max_segment_num;
66 int parent_rank;
67 size_t total_size, max_data, bytes;
68 mca_coll_sm_in_use_flag_t *flag;
69 opal_convertor_t convertor;
70 mca_coll_sm_tree_node_t *me, *parent, **children;
71 mca_coll_sm_data_index_t *index;
72
73
74
75 if (!sm_module->enabled) {
76 if (OMPI_SUCCESS != (ret = ompi_coll_sm_lazy_enable(module, comm))) {
77 return ret;
78 }
79 }
80 data = sm_module->sm_comm_data;
81
82
83
84 rank = ompi_comm_rank(comm);
85 size = ompi_comm_size(comm);
86
87 OBJ_CONSTRUCT(&convertor, opal_convertor_t);
88 iov.iov_len = mca_coll_sm_component.sm_fragment_size;
89 bytes = 0;
90
91 me = &data->mcb_tree[(rank + size - root) % size];
92 parent = me->mcstn_parent;
93 children = me->mcstn_children;
94 num_children = me->mcstn_num_children;
95
96
97
98
99
100
101
102
103
104
105 if (root == rank) {
106
107
108
109
110 if (OMPI_SUCCESS !=
111 (ret =
112 opal_convertor_copy_and_prepare_for_send(ompi_mpi_local_convertor,
113 &(datatype->super),
114 count,
115 buff,
116 0,
117 &convertor))) {
118 return ret;
119 }
120 opal_convertor_get_packed_size(&convertor, &total_size);
121
122
123
124 do {
125 flag_num = (data->mcb_operation_count++ %
126 mca_coll_sm_component.sm_comm_num_in_use_flags);
127
128 FLAG_SETUP(flag_num, flag, data);
129 FLAG_WAIT_FOR_IDLE(flag, bcast_root_label);
130 FLAG_RETAIN(flag, size - 1, data->mcb_operation_count - 1);
131
132
133
134 segment_num =
135 flag_num * mca_coll_sm_component.sm_segs_per_inuse_flag;
136 max_segment_num =
137 (flag_num + 1) * mca_coll_sm_component.sm_segs_per_inuse_flag;
138 do {
139 index = &(data->mcb_data_index[segment_num]);
140
141
142
143 max_data = mca_coll_sm_component.sm_fragment_size;
144 COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data);
145 bytes += max_data;
146
147
148 opal_atomic_wmb();
149
150
151 PARENT_NOTIFY_CHILDREN(children, num_children, index,
152 max_data);
153
154 ++segment_num;
155 } while (bytes < total_size && segment_num < max_segment_num);
156 } while (bytes < total_size);
157 }
158
159
160
161
162
163 else {
164
165
166
167
168 if (OMPI_SUCCESS !=
169 (ret =
170 opal_convertor_copy_and_prepare_for_recv(ompi_mpi_local_convertor,
171 &(datatype->super),
172 count,
173 buff,
174 0,
175 &convertor))) {
176 return ret;
177 }
178 opal_convertor_get_packed_size(&convertor, &total_size);
179
180
181
182
183 do {
184 flag_num = (data->mcb_operation_count %
185 mca_coll_sm_component.sm_comm_num_in_use_flags);
186
187
188
189 FLAG_SETUP(flag_num, flag, data);
190 FLAG_WAIT_FOR_OP(flag, data->mcb_operation_count, bcast_nonroot_label1);
191 ++data->mcb_operation_count;
192
193
194
195 segment_num =
196 flag_num * mca_coll_sm_component.sm_segs_per_inuse_flag;
197 max_segment_num =
198 (flag_num + 1) * mca_coll_sm_component.sm_segs_per_inuse_flag;
199 do {
200
201
202 parent_rank = (parent->mcstn_id + root) % size;
203 index = &(data->mcb_data_index[segment_num]);
204
205
206 CHILD_WAIT_FOR_NOTIFY(rank, index, max_data, bcast_nonroot_label2);
207
208
209 if (num_children > 0) {
210
211
212 COPY_FRAGMENT_BETWEEN(parent_rank, rank, index, max_data);
213
214
215 opal_atomic_wmb();
216
217
218 PARENT_NOTIFY_CHILDREN(children, num_children, index,
219 max_data);
220
221
222
223
224
225
226 src_rank = rank;
227 }
228
229
230
231
232
233 else {
234 src_rank = parent_rank;
235 }
236
237
238 COPY_FRAGMENT_OUT(convertor, src_rank, index, iov, max_data);
239
240 bytes += max_data;
241 ++segment_num;
242 } while (bytes < total_size && segment_num < max_segment_num);
243
244
245
246 opal_atomic_wmb();
247
248
249 FLAG_RELEASE(flag);
250 } while (bytes < total_size);
251 }
252
253
254
255 OBJ_DESTRUCT(&convertor);
256
257
258
259 return OMPI_SUCCESS;
260 }