This source file includes following definitions.
- ompi_coll_tuned_gather_intra_check_forced_init
- ompi_coll_tuned_gather_intra_do_this
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #include "ompi_config.h"
16
17 #include "mpi.h"
18 #include "ompi/constants.h"
19 #include "ompi/datatype/ompi_datatype.h"
20 #include "ompi/communicator/communicator.h"
21 #include "ompi/mca/coll/coll.h"
22 #include "ompi/mca/coll/base/coll_tags.h"
23 #include "ompi/mca/pml/pml.h"
24 #include "coll_tuned.h"
25 #include "ompi/mca/coll/base/coll_base_topo.h"
26 #include "ompi/mca/coll/base/coll_base_util.h"
27
28
29 static int coll_tuned_gather_forced_algorithm = 0;
30 static int coll_tuned_gather_segment_size = 0;
31 static int coll_tuned_gather_tree_fanout;
32 static int coll_tuned_gather_chain_fanout;
33
34
35 static mca_base_var_enum_value_t gather_algorithms[] = {
36 {0, "ignore"},
37 {1, "basic_linear"},
38 {2, "binomial"},
39 {3, "linear_sync"},
40 {0, NULL}
41 };
42
43
44
45
46
47
48
49
50
51
52
53
54 int
55 ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
56 {
57 mca_base_var_enum_t *new_enum;
58 int cnt;
59
60 for( cnt = 0; NULL != gather_algorithms[cnt].string; cnt++ );
61 ompi_coll_tuned_forced_max_algorithms[GATHER] = cnt;
62
63 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
64 "gather_algorithm_count",
65 "Number of gather algorithms available",
66 MCA_BASE_VAR_TYPE_INT, NULL, 0,
67 MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
68 OPAL_INFO_LVL_5,
69 MCA_BASE_VAR_SCOPE_CONSTANT,
70 &ompi_coll_tuned_forced_max_algorithms[GATHER]);
71
72
73 coll_tuned_gather_forced_algorithm = 0;
74 (void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
75 mca_param_indices->algorithm_param_index =
76 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
77 "gather_algorithm",
78 "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
79 MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
80 OPAL_INFO_LVL_5,
81 MCA_BASE_VAR_SCOPE_ALL,
82 &coll_tuned_gather_forced_algorithm);
83 OBJ_RELEASE(new_enum);
84 if (mca_param_indices->algorithm_param_index < 0) {
85 return mca_param_indices->algorithm_param_index;
86 }
87
88 coll_tuned_gather_segment_size = 0;
89 mca_param_indices->segsize_param_index =
90 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
91 "gather_algorithm_segmentsize",
92 "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
93 MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
94 OPAL_INFO_LVL_5,
95 MCA_BASE_VAR_SCOPE_ALL,
96 &coll_tuned_gather_segment_size);
97
98 coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout;
99 mca_param_indices->tree_fanout_param_index =
100 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
101 "gather_algorithm_tree_fanout",
102 "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
103 MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
104 OPAL_INFO_LVL_5,
105 MCA_BASE_VAR_SCOPE_ALL,
106 &coll_tuned_gather_tree_fanout);
107
108 coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout;
109 mca_param_indices->chain_fanout_param_index =
110 mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
111 "gather_algorithm_chain_fanout",
112 "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
113 MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
114 OPAL_INFO_LVL_5,
115 MCA_BASE_VAR_SCOPE_ALL,
116 &coll_tuned_gather_chain_fanout);
117
118 return (MPI_SUCCESS);
119 }
120
121 int
122 ompi_coll_tuned_gather_intra_do_this(const void *sbuf, int scount,
123 struct ompi_datatype_t *sdtype,
124 void* rbuf, int rcount,
125 struct ompi_datatype_t *rdtype,
126 int root,
127 struct ompi_communicator_t *comm,
128 mca_coll_base_module_t *module,
129 int algorithm, int faninout, int segsize)
130 {
131 OPAL_OUTPUT((ompi_coll_tuned_stream,
132 "coll:tuned:gather_intra_do_this selected algorithm %d topo faninout %d segsize %d",
133 algorithm, faninout, segsize));
134
135 switch (algorithm) {
136 case (0):
137 return ompi_coll_tuned_gather_intra_dec_fixed(sbuf, scount, sdtype,
138 rbuf, rcount, rdtype,
139 root, comm, module);
140 case (1):
141 return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
142 rbuf, rcount, rdtype,
143 root, comm, module);
144 case (2):
145 return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
146 rbuf, rcount, rdtype,
147 root, comm, module);
148 case (3):
149 return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
150 rbuf, rcount, rdtype,
151 root, comm, module,
152 segsize);
153 }
154 OPAL_OUTPUT((ompi_coll_tuned_stream,
155 "coll:tuned:gather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
156 algorithm, ompi_coll_tuned_forced_max_algorithms[GATHER]));
157 return (MPI_ERR_ARG);
158 }