This source file includes following definitions.
- ompi_coll_tuned_allgather_intra_check_forced_init
- ompi_coll_tuned_allgather_intra_do_this
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #include "ompi_config.h"
15
16 #include "mpi.h"
17 #include "opal/util/bit_ops.h"
18 #include "ompi/constants.h"
19 #include "ompi/datatype/ompi_datatype.h"
20 #include "ompi/communicator/communicator.h"
21 #include "ompi/mca/coll/coll.h"
22 #include "ompi/mca/coll/base/coll_tags.h"
23 #include "ompi/mca/coll/base/coll_base_topo.h"
24 #include "ompi/mca/coll/base/coll_base_util.h"
25 #include "coll_tuned.h"
26
27
28 static int coll_tuned_allgather_forced_algorithm = 0;
29 static int coll_tuned_allgather_segment_size = 0;
30 static int coll_tuned_allgather_tree_fanout;
31 static int coll_tuned_allgather_chain_fanout;
32
33
34 static mca_base_var_enum_value_t allgather_algorithms[] = {
35 {0, "ignore"},
36 {1, "linear"},
37 {2, "bruck"},
38 {3, "recursive_doubling"},
39 {4, "ring"},
40 {5, "neighbor"},
41 {6, "two_proc"},
42 {0, NULL}
43 };
44
45
46
47
48
49
50
51
52
53
54
55
56 int
57 ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
58 {
59 mca_base_var_enum_t *new_enum;
60 int cnt;
61
62 for( cnt = 0; NULL != allgather_algorithms[cnt].string; cnt++ );
63 ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = cnt;
64
65 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
66 "allgather_algorithm_count",
67 "Number of allgather algorithms available",
68 MCA_BASE_VAR_TYPE_INT, NULL, 0,
69 MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
70 OPAL_INFO_LVL_5,
71 MCA_BASE_VAR_SCOPE_CONSTANT,
72 &ompi_coll_tuned_forced_max_algorithms[ALLGATHER]);
73
74
75 coll_tuned_allgather_forced_algorithm = 0;
76 (void) mca_base_var_enum_create("coll_tuned_allgather_algorithms", allgather_algorithms, &new_enum);
77 mca_param_indices->algorithm_param_index =
78 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
79 "allgather_algorithm",
80 "Which allallgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
81 MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
82 OPAL_INFO_LVL_5,
83 MCA_BASE_VAR_SCOPE_ALL,
84 &coll_tuned_allgather_forced_algorithm);
85 OBJ_RELEASE(new_enum);
86 if (mca_param_indices->algorithm_param_index < 0) {
87 return mca_param_indices->algorithm_param_index;
88 }
89
90 coll_tuned_allgather_segment_size = 0;
91 mca_param_indices->segsize_param_index =
92 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
93 "allgather_algorithm_segmentsize",
94 "Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
95 MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
96 OPAL_INFO_LVL_5,
97 MCA_BASE_VAR_SCOPE_ALL,
98 &coll_tuned_allgather_segment_size);
99
100 coll_tuned_allgather_tree_fanout = ompi_coll_tuned_init_tree_fanout;
101 mca_param_indices->tree_fanout_param_index =
102 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
103 "allgather_algorithm_tree_fanout",
104 "Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
105 MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
106 OPAL_INFO_LVL_5,
107 MCA_BASE_VAR_SCOPE_ALL,
108 &coll_tuned_allgather_tree_fanout);
109
110 coll_tuned_allgather_chain_fanout = ompi_coll_tuned_init_chain_fanout;
111 mca_param_indices->chain_fanout_param_index =
112 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
113 "allgather_algorithm_chain_fanout",
114 "Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
115 MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
116 OPAL_INFO_LVL_5,
117 MCA_BASE_VAR_SCOPE_ALL,
118 &coll_tuned_allgather_chain_fanout);
119
120 return (MPI_SUCCESS);
121 }
122
123 int ompi_coll_tuned_allgather_intra_do_this(const void *sbuf, int scount,
124 struct ompi_datatype_t *sdtype,
125 void* rbuf, int rcount,
126 struct ompi_datatype_t *rdtype,
127 struct ompi_communicator_t *comm,
128 mca_coll_base_module_t *module,
129 int algorithm, int faninout, int segsize)
130 {
131 OPAL_OUTPUT((ompi_coll_tuned_stream,
132 "coll:tuned:allgather_intra_do_this selected algorithm %d topo faninout %d segsize %d",
133 algorithm, faninout, segsize));
134
135 switch (algorithm) {
136 case (0):
137 return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype,
138 rbuf, rcount, rdtype,
139 comm, module);
140 case (1):
141 return ompi_coll_base_allgather_intra_basic_linear(sbuf, scount, sdtype,
142 rbuf, rcount, rdtype,
143 comm, module);
144 case (2):
145 return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
146 rbuf, rcount, rdtype,
147 comm, module);
148 case (3):
149 return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
150 rbuf, rcount, rdtype,
151 comm, module);
152 case (4):
153 return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
154 rbuf, rcount, rdtype,
155 comm, module);
156 case (5):
157 return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
158 rbuf, rcount, rdtype,
159 comm, module);
160 case (6):
161 return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
162 rbuf, rcount, rdtype,
163 comm, module);
164 }
165 OPAL_OUTPUT((ompi_coll_tuned_stream,
166 "coll:tuned:allgather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
167 algorithm, ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
168 return (MPI_ERR_ARG);
169 }