This source file includes following definitions.
- LMAX
- LMIN
- IMAX
- opal_datatype_add
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 #include "opal_config.h"
25
26 #include <stddef.h>
27
28 #include "opal/constants.h"
29 #include "opal/util/output.h"
30 #include "opal/datatype/opal_datatype.h"
31 #include "opal/datatype/opal_datatype_internal.h"
32
33
34 #define SET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) | (OPAL_DATATYPE_FLAG_CONTIGUOUS)
35 #define SET_NO_GAP_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) | (OPAL_DATATYPE_FLAG_NO_GAPS)
36 #define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) & (~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS))
37
38 #if defined(__GNUC__) && !defined(__STDC__)
39 #define LMAX(A,B) ({ ptrdiff_t _a = (A), _b = (B); (_a < _b ? _b : _a) })
40 #define LMIN(A,B) ({ ptrdiff_t _a = (A), _b = (B); (_a < _b ? _a : _b); })
41 #define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
42 #else
43 static inline ptrdiff_t LMAX( ptrdiff_t a, ptrdiff_t b ) { return ( a < b ? b : a ); }
44 static inline ptrdiff_t LMIN( ptrdiff_t a, ptrdiff_t b ) { return ( a < b ? a : b ); }
45 static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); }
46 #endif
47
48 #define OPAL_DATATYPE_COMPUTE_REQUIRED_ENTRIES( _pdtAdd, _count, _extent, _place_needed) \
49 { \
50 if( (_pdtAdd)->flags & OPAL_DATATYPE_FLAG_PREDEFINED ) { \
51 (_place_needed) = ((_extent) == (ptrdiff_t)(_pdtAdd)->size ? 1 : 3); \
52 } else { \
53 (_place_needed) = (_pdtAdd)->desc.used; \
54 if( (_count) != 1 ) { \
55 if( (_place_needed) < (MAX_DT_COMPONENT_COUNT - 2) ) { \
56 (_place_needed) += 2; \
57 } else { \
58
59
60 \
61 opal_output( 0, "Too many elements in the datatype. The limit is %ud\n", \
62 MAX_DT_COMPONENT_COUNT ); \
63 return OPAL_ERROR; \
64 } \
65 } \
66 } \
67 }
68
69 #define OPAL_DATATYPE_LB_UB_CONT( _count, _disp, _old_lb, _old_ub, _old_extent, _new_lb, _new_ub ) \
70 { \
71 if( 0 == _count ) { \
72 _new_lb = (_old_lb) + (_disp); \
73 _new_ub = (_old_ub) + (_disp); \
74 } else { \
75 ptrdiff_t lower, upper; \
76 upper = (_disp) + (_old_extent) * ((_count) - 1); \
77 lower = (_disp); \
78 if( lower < upper ) { \
79 _new_lb = lower; \
80 _new_ub = upper; \
81 } else { \
82 _new_lb = upper; \
83 _new_ub = lower; \
84 } \
85 _new_lb += (_old_lb); \
86 _new_ub += (_old_ub); \
87 }\
88 }
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd,
106 size_t count, ptrdiff_t disp, ptrdiff_t extent )
107 {
108 uint32_t newLength, place_needed = 0, i;
109 short localFlags = 0;
110 dt_elem_desc_t *pLast, *pLoop = NULL;
111 ptrdiff_t lb, ub, true_lb, true_ub, epsilon, old_true_ub;
112
113
114
115
116
117
118
119 if( 0 == count ) return OPAL_SUCCESS;
120
121
122
123
124 if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb);
125
126
127 if( OPAL_DATATYPE_LB == pdtAdd->id ) {
128 pdtBase->bdt_used |= (((uint32_t)1) << OPAL_DATATYPE_LB);
129 if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_LB ) {
130 pdtBase->lb = LMIN( pdtBase->lb, disp );
131 } else {
132 pdtBase->lb = disp;
133 pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB;
134 }
135 if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) {
136 pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS;
137 }
138 return OPAL_SUCCESS;
139 } else if( OPAL_DATATYPE_UB == pdtAdd->id ) {
140 pdtBase->bdt_used |= (((uint32_t)1) << OPAL_DATATYPE_UB);
141 if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB ) {
142 pdtBase->ub = LMAX( pdtBase->ub, disp );
143 } else {
144 pdtBase->ub = disp;
145 pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB;
146 }
147 if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) {
148 pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS;
149 }
150 return OPAL_SUCCESS;
151 }
152
153
154 OPAL_DATATYPE_COMPUTE_REQUIRED_ENTRIES( pdtAdd, count, extent, place_needed );
155
156
157
158
159
160
161
162 OPAL_DATATYPE_LB_UB_CONT( count, disp, pdtAdd->lb, pdtAdd->ub, extent, lb, ub );
163
164
165
166
167
168 true_lb = lb - (pdtAdd->lb - pdtAdd->true_lb);
169 true_ub = ub - (pdtAdd->ub - pdtAdd->true_ub);
170 if( true_lb > true_ub ) {
171 old_true_ub = true_lb;
172 true_lb = true_ub;
173 true_ub = old_true_ub;
174 }
175
176 #if 0
177
178 if( !(pdtBase->flags & OPAL_DATATYPE_FLAG_OVERLAP) ) {
179 if( ((disp + true_lb) >= pdtBase->true_ub) ||
180 ((disp + true_ub) <= pdtBase->true_lb) ) {
181 } else {
182
183 }
184 }
185 #endif
186
187
188
189
190
191 if( (pdtAdd->flags ^ pdtBase->flags) & OPAL_DATATYPE_FLAG_USER_LB ) {
192 if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_LB ) {
193 lb = pdtBase->lb;
194 }
195 pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB;
196 } else {
197
198 lb = LMIN( pdtBase->lb, lb );
199 }
200
201
202
203
204
205 if( (pdtBase->flags ^ pdtAdd->flags) & OPAL_DATATYPE_FLAG_USER_UB ) {
206 if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB ) {
207 ub = pdtBase->ub;
208 }
209 pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB;
210 } else {
211
212
213 ub = LMAX( pdtBase->ub, ub );
214 }
215
216
217
218
219 pdtBase->lb = lb;
220 pdtBase->ub = ub;
221
222
223 pdtBase->align = IMAX( pdtBase->align, pdtAdd->align );
224
225
226
227
228
229
230
231 if( !(pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB) ) {
232 epsilon = (pdtBase->ub - pdtBase->lb) % pdtBase->align;
233 if( 0 != epsilon ) {
234 pdtBase->ub += (pdtBase->align - epsilon);
235 }
236 }
237
238 pdtBase->flags |= OPAL_DATATYPE_FLAG_DATA;
239
240
241
242
243
244
245
246
247
248
249
250 if( (0 == count) || (0 == pdtAdd->size) ) {
251 return OPAL_SUCCESS;
252 }
253
254
255
256
257 pdtBase->size += count * pdtAdd->size;
258 if( 0 == pdtBase->nbElems ) old_true_ub = disp;
259 else old_true_ub = pdtBase->true_ub;
260 if( 0 != pdtBase->size ) {
261 pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb );
262 pdtBase->true_ub = LMAX( true_ub, pdtBase->true_ub );
263 } else {
264 pdtBase->true_lb = true_lb;
265 pdtBase->true_ub = true_ub;
266 }
267
268 pdtBase->bdt_used |= pdtAdd->bdt_used;
269 newLength = pdtBase->desc.used + place_needed;
270 if( newLength > pdtBase->desc.length ) {
271 newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK;
272 pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc,
273 sizeof(dt_elem_desc_t) * newLength );
274 pdtBase->desc.length = newLength;
275 }
276 pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
277
278
279
280
281 if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
282 if( NULL != pdtBase->ptypes )
283 pdtBase->ptypes[pdtAdd->id] += count;
284 pLast->elem.common.type = pdtAdd->id;
285 pLast->elem.count = count;
286 pLast->elem.disp = disp;
287 pLast->elem.extent = extent;
288 pdtBase->desc.used++;
289 pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED);
290 if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) {
291 pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS);
292 }
293 } else {
294
295 pdtBase->loops += pdtAdd->loops;
296 pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB);
297 pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB);
298 if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) {
299 for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
300 if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
301 }
302 if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) &&
303 (extent == pdtAdd->desc.desc[0].elem.extent) ){
304 pLast->elem = pdtAdd->desc.desc[0].elem;
305 pLast->elem.count *= count;
306 pLast->elem.disp += disp;
307 pdtBase->desc.used++;
308 } else {
309
310
311
312 if( count != 1 ) {
313 pLoop = pLast;
314 CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent,
315 (pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) );
316 pdtBase->loops += 2;
317 pdtBase->desc.used += 2;
318 pLast++;
319 }
320
321 for( i = 0; i < pdtAdd->desc.used; i++ ) {
322 pLast->elem = pdtAdd->desc.desc[i].elem;
323 if( OPAL_DATATYPE_FLAG_DATA & pLast->elem.common.flags )
324 pLast->elem.disp += disp;
325 else if( OPAL_DATATYPE_END_LOOP == pLast->elem.common.type ) {
326 pLast->end_loop.first_elem_disp += disp;
327 }
328 pLast++;
329 }
330 pdtBase->desc.used += pdtAdd->desc.used;
331 if( pLoop != NULL ) {
332 int index = GET_FIRST_NON_LOOP( pLoop );
333 assert( pLoop[index].elem.common.flags & OPAL_DATATYPE_FLAG_DATA );
334 CREATE_LOOP_END( pLast, pdtAdd->desc.used + 1, pLoop[index].elem.disp,
335 pdtAdd->size, pLoop->loop.common.flags );
336 }
337 }
338
339 }
340
341
342
343
344
345
346 localFlags = pdtBase->flags & pdtAdd->flags;
347 UNSET_CONTIGUOUS_FLAG(pdtBase->flags);
348 if( (localFlags & OPAL_DATATYPE_FLAG_CONTIGUOUS)
349 && ((disp + pdtAdd->true_lb) == old_true_ub)
350 && ( ((ptrdiff_t)pdtAdd->size == extent)
351
352 || (count < 2)) ) {
353 SET_CONTIGUOUS_FLAG(pdtBase->flags);
354 if( (ptrdiff_t)pdtBase->size == (pdtBase->ub - pdtBase->lb) )
355 SET_NO_GAP_FLAG(pdtBase->flags);
356 }
357
358
359 if( pdtBase->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) {
360 assert( pdtBase->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS );
361 }
362 pdtBase->nbElems += (count * pdtAdd->nbElems);
363
364 return OPAL_SUCCESS;
365 }