This source file includes following definitions.
- opal_dt_swap_bytes
- opal_dt_swap_long_double
- datatype_check
- copy_cxx_bool_heterogeneous
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 #include "opal_config.h"
18
19 #ifdef HAVE_IEEE754_H
20 #include <ieee754.h>
21 #endif
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #include "opal/util/arch.h"
27
28 #include "opal/types.h"
29 #include "opal/datatype/opal_datatype.h"
30 #include "opal/datatype/opal_convertor.h"
31 #include "opal/datatype/opal_datatype_internal.h"
32 #include "opal/datatype/opal_datatype_checksum.h"
33 #include "opal/datatype/opal_convertor_internal.h"
34
35
36
37
38
39
40
41
42
43
44
45
46
47 static inline void
48 opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t count)
49 {
50 size_t i;
51 size_t back_i = size - 1;
52 uint8_t *to = (uint8_t*) to_p;
53 uint8_t *from = (uint8_t*) from_p;
54
55
56 for (i = 0 ; i < size ; i++, back_i--) {
57 to[back_i] = from[i];
58 }
59
60 while(count > 1) {
61 to += size;
62 from += size;
63 count--;
64 for (i = 0, back_i = size - 1 ; i < size ; i++, back_i--) {
65 to[back_i] = from[i];
66 }
67 }
68 }
69
70 #ifdef HAVE_IEEE754_H
71 struct bit128 {
72 unsigned int mantissa3:32;
73 unsigned int mantissa2:32;
74 unsigned int mantissa1:32;
75 unsigned int mantissa0:16;
76 unsigned int exponent:15;
77 unsigned int negative:1;
78 };
79
80 struct bit80 {
81 unsigned int pad:32;
82 unsigned int empty:16;
83 unsigned int negative:1;
84 unsigned int exponent:15;
85 unsigned int mantissa0:32;
86 unsigned int mantissa1:32;
87 };
88
89 static inline void
90 opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size_t count, uint32_t remoteArch)
91 {
92 #ifdef HAVE_IEEE754_H
93 size_t i;
94 long double*to = (long double *) to_p;
95
96 if ((opal_local_arch&OPAL_ARCH_LDISINTEL) && !(remoteArch&OPAL_ARCH_LDISINTEL)) {
97 #ifdef __x86_64
98 for (i=0; i<count; i++, to++) {
99 union ieee854_long_double ld;
100 struct bit128 * b = (struct bit128 *)to;
101 ld.ieee.empty = 0;
102 ld.ieee.mantissa0 = 0x80000000 | (((unsigned int)b->mantissa0 << 15) & 0x7FFF8000) | ((b->mantissa1 >> 17) & 0x00007FFF);
103 ld.ieee.mantissa1 = ((b->mantissa1 << 15) & 0xFFFF8000) | ((b->mantissa2 << 17) & 0x000007FFF);
104 ld.ieee.exponent = b->exponent;
105 ld.ieee.negative = b->negative;
106 MEMCPY( to, &ld, sizeof(long double));
107 }
108 #endif
109 } else if (!(opal_local_arch&OPAL_ARCH_LDISINTEL) && (remoteArch&OPAL_ARCH_LDISINTEL)) {
110 #ifdef __sparcv9
111 for (i=0; i<count; i++, to++) {
112 union ieee854_long_double ld;
113 struct bit80 * b = (struct bit80 *)to;
114 ld.ieee.mantissa3 = 0;
115 ld.ieee.mantissa2 = 0;
116 ld.ieee.mantissa0 = (b->mantissa0 << 1) | (b->mantissa1 & 0x80000000);
117 ld.ieee.mantissa1 = (b->mantissa1 << 1) & 0xFFFFFFFE;
118 ld.ieee.exponent = b->exponent;
119 ld.ieee.negative = b->negative;
120 MEMCPY( to, &ld, sizeof(long double));
121 }
122 #endif
123 }
124 #else
125 assert(0);
126 #endif
127 }
128 #else
129 #define opal_dt_swap_long_double(to_p, from_p, size, count, remoteArch)
130 #endif
131
132
133
134
135
136
137
138 #define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
139 COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0 )
140
141 #define COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE ) \
142 static int32_t \
143 copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, size_t count, \
144 const char* from, size_t from_len, ptrdiff_t from_extent, \
145 char* to, size_t to_length, ptrdiff_t to_extent, \
146 ptrdiff_t *advance) \
147 { \
148 size_t i; \
149 \
150 datatype_check( #TYPE, sizeof(TYPE), sizeof(TYPE), &count, \
151 from, from_len, from_extent, \
152 to, to_length, to_extent); \
153 \
154 if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
155 (opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
156 if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
157 opal_dt_swap_bytes(to, from, sizeof(TYPE), count); \
158 if (LONG_DOUBLE) { \
159 opal_dt_swap_long_double(to, from, sizeof(TYPE), count, pConvertor->remoteArch);\
160 } \
161 } else { \
162 for( i = 0; i < count; i++ ) { \
163 opal_dt_swap_bytes(to, from, sizeof(TYPE), 1); \
164 if (LONG_DOUBLE) { \
165 opal_dt_swap_long_double(to, from, sizeof(TYPE), 1, pConvertor->remoteArch);\
166 } \
167 to += to_extent; \
168 from += from_extent; \
169 } \
170 } \
171 } else if ((ptrdiff_t)sizeof(TYPE) == to_extent && \
172 (ptrdiff_t)sizeof(TYPE) == from_extent) { \
173 MEMCPY( to, from, count * sizeof(TYPE) ); \
174 } else { \
175 \
176 for( i = 0; i < count; i++ ) { \
177 MEMCPY( to, from, sizeof(TYPE) ); \
178 to += to_extent; \
179 from += from_extent; \
180 } \
181 } \
182 *advance = count * from_extent; \
183 return count; \
184 }
185
186 #define COPY_2SAMETYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
187 COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0)
188
189 #define COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE) \
190 static int32_t \
191 copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, size_t count, \
192 const char* from, size_t from_len, ptrdiff_t from_extent, \
193 char* to, size_t to_length, ptrdiff_t to_extent, \
194 ptrdiff_t *advance) \
195 { \
196 size_t i; \
197 \
198 datatype_check( #TYPE, sizeof(TYPE), sizeof(TYPE), &count, \
199 from, from_len, from_extent, \
200 to, to_length, to_extent); \
201 \
202 if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
203 (opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
204 if( (to_extent == from_extent) && (to_extent == (2 * sizeof(TYPE))) ) { \
205 opal_dt_swap_bytes(to, from, sizeof(TYPE), 2 * count); \
206 if (LONG_DOUBLE) { \
207 opal_dt_swap_long_double(to, from, sizeof(TYPE), 2*count, pConvertor->remoteArch);\
208 } \
209 } else { \
210 for( i = 0; i < count; i++ ) { \
211 opal_dt_swap_bytes(to, from, sizeof(TYPE), 2); \
212 if (LONG_DOUBLE) { \
213 opal_dt_swap_long_double(to, from, sizeof(TYPE), 2, pConvertor->remoteArch);\
214 } \
215 to += to_extent; \
216 from += from_extent; \
217 } \
218 } \
219 } else if ((ptrdiff_t)sizeof(TYPE) == to_extent && \
220 (ptrdiff_t)sizeof(TYPE) == from_extent) { \
221 MEMCPY( to, from, count * sizeof(TYPE) ); \
222 } else { \
223 \
224 for( i = 0; i < count; i++ ) { \
225 MEMCPY( to, from, sizeof(TYPE) ); \
226 to += to_extent; \
227 from += from_extent; \
228 } \
229 } \
230 *advance = count * from_extent; \
231 return count; \
232 }
233
234 #define COPY_2TYPE_HETEROGENEOUS( TYPENAME, TYPE1, TYPE2 ) \
235 static int32_t \
236 copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, size_t count, \
237 const char* from, size_t from_len, ptrdiff_t from_extent, \
238 char* to, size_t to_length, ptrdiff_t to_extent, \
239 ptrdiff_t *advance) \
240 { \
241 size_t i; \
242 \
243 datatype_check( #TYPENAME, sizeof(TYPE1) + sizeof(TYPE2), \
244 sizeof(TYPE1) + sizeof(TYPE2), &count, \
245 from, from_len, from_extent, \
246 to, to_length, to_extent); \
247 \
248 if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
249 (opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
250 \
251 for( i = 0; i < count; i++ ) { \
252 TYPE1* to_1, *from_1; \
253 TYPE2* to_2, *from_2; \
254 to_1 = (TYPE1*) to; from_1 = (TYPE1*) from; \
255 opal_dt_swap_bytes(to_1, from_1, sizeof(TYPE1), 1); \
256 to_2 = (TYPE2*) (to_1 + 1); from_2 = (TYPE2*) (from_1 + 1); \
257 opal_dt_swap_bytes(to_2, from_2, sizeof(TYPE2), 1); \
258 to += to_extent; \
259 from += from_extent; \
260 } \
261 } else if ((ptrdiff_t)(sizeof(TYPE1) + sizeof(TYPE2)) == to_extent && \
262 (ptrdiff_t)(sizeof(TYPE1) + sizeof(TYPE2)) == from_extent) { \
263 \
264 MEMCPY( to, from, count * (sizeof(TYPE1) + sizeof(TYPE2)) ); \
265 } else { \
266 \
267 for( i = 0; i < count; i++ ) { \
268 MEMCPY( to, from, sizeof(TYPE1) + sizeof(TYPE2) ); \
269 to += to_extent; \
270 from += from_extent; \
271 } \
272 } \
273 *advance = count * from_extent; \
274 return count; \
275 }
276
277
278 static inline void
279 datatype_check(char *type, size_t local_size, size_t remote_size, size_t *count,
280 const char* from, size_t from_len, ptrdiff_t from_extent,
281 char* to, size_t to_len, ptrdiff_t to_extent)
282 {
283
284 if( (remote_size * *count) > from_len ) {
285 *count = from_len / remote_size;
286 if( (*count * remote_size) != from_len ) {
287 DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n",
288 from_len - (*count * remote_size) );
289 }
290 DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n",
291 "char", *count, from, from_len, to, to_len );
292 } else {
293 DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n",
294 "char", *count, from, from_len, to, to_len );
295 }
296 }
297
298 #define CXX_BOOL_COPY_LOOP(TYPE) \
299 for(size_t i = 0; i < count; i++ ) { \
300 bool *to_real = (bool*) to; \
301 *to_real = *((TYPE*) from) == 0 ? false : true; \
302 to += to_extent; \
303 from += from_extent; \
304 }
305 static int32_t
306 copy_cxx_bool_heterogeneous(opal_convertor_t *pConvertor, size_t count,
307 const char* from, size_t from_len, ptrdiff_t from_extent,
308 char* to, size_t to_length, ptrdiff_t to_extent,
309 ptrdiff_t *advance)
310 {
311
312 if ((pConvertor->remoteArch & OPAL_ARCH_BOOLISxx) !=
313 (opal_local_arch & OPAL_ARCH_BOOLISxx)) {
314 switch (pConvertor->remoteArch & OPAL_ARCH_BOOLISxx) {
315 case OPAL_ARCH_BOOLIS8:
316 from_extent = 1;
317 break;
318 case OPAL_ARCH_BOOLIS16:
319 from_extent = 2;
320 break;
321 case OPAL_ARCH_BOOLIS32:
322 from_extent = 4;
323 break;
324 }
325 }
326
327 datatype_check( "bool", sizeof(bool), sizeof(bool), &count,
328 from, from_len, from_extent,
329 to, to_length, to_extent);
330
331 if ((to_extent != sizeof(bool) || from_extent != sizeof(bool)) ||
332 ((pConvertor->remoteArch & OPAL_ARCH_BOOLISxx) !=
333 (opal_local_arch & OPAL_ARCH_BOOLISxx))) {
334 switch (pConvertor->remoteArch & OPAL_ARCH_BOOLISxx) {
335 case OPAL_ARCH_BOOLIS8:
336 CXX_BOOL_COPY_LOOP(int8_t);
337 break;
338 case OPAL_ARCH_BOOLIS16:
339 CXX_BOOL_COPY_LOOP(int16_t);
340 break;
341 case OPAL_ARCH_BOOLIS32:
342 CXX_BOOL_COPY_LOOP(int32_t);
343 break;
344 }
345 } else {
346 MEMCPY( to, from, count * sizeof(bool) );
347 }
348
349 *advance = count * from_extent;
350 return count;
351 }
352
353
354 COPY_TYPE_HETEROGENEOUS(int1, int8_t)
355 COPY_TYPE_HETEROGENEOUS(int2, int16_t)
356 COPY_TYPE_HETEROGENEOUS(int4, int32_t)
357 #ifdef HAVE_INT64_T
358 COPY_TYPE_HETEROGENEOUS(int8, int64_t)
359 #else
360 #define copy_int8_heterogeneous NULL
361 #endif
362
363 #ifdef HAVE_INT128_T
364 COPY_TYPE_HETEROGENEOUS(int16, int128_t)
365 #else
366 #define copy_int16_heterogeneous NULL
367 #endif
368
369
370 #if defined(HAVE_SHORT_FLOAT) && SIZEOF_SHORT_FLOAT == 2
371 COPY_TYPE_HETEROGENEOUS( float2, short float )
372 #elif SIZEOF_FLOAT == 2
373 COPY_TYPE_HETEROGENEOUS( float2, float )
374 #elif SIZEOF_DOUBLE == 2
375 COPY_TYPE_HETEROGENEOUS( float2, double )
376 #elif SIZEOF_LONG_DOUBLE == 2
377 COPY_TYPE_HETEROGENEOUS( float2, long double )
378 #elif defined(HAVE_OPAL_SHORT_FLOAT_T) && SIZEOF_OPAL_SHORT_FLOAT_T == 2
379 COPY_TYPE_HETEROGENEOUS( float2, opal_short_float_t )
380 #else
381
382 #define copy_float2_heterogeneous NULL
383 #endif
384
385 #if defined(HAVE_SHORT_FLOAT) && SIZEOF_SHORT_FLOAT == 4
386 COPY_TYPE_HETEROGENEOUS( float4, short float )
387 #elif SIZEOF_FLOAT == 4
388 COPY_TYPE_HETEROGENEOUS( float4, float )
389 #elif SIZEOF_DOUBLE == 4
390 COPY_TYPE_HETEROGENEOUS( float4, double )
391 #elif SIZEOF_LONG_DOUBLE == 4
392 COPY_TYPE_HETEROGENEOUS( float4, long double )
393 #elif defined(HAVE_OPAL_SHORT_FLOAT_T) && SIZEOF_OPAL_SHORT_FLOAT_T == 4
394 COPY_TYPE_HETEROGENEOUS( float4, opal_short_float_t )
395 #else
396
397 #define copy_float4_heterogeneous NULL
398 #endif
399
400 #if defined(HAVE_SHORT_FLOAT) && SIZEOF_SHORT_FLOAT == 8
401 COPY_TYPE_HETEROGENEOUS( float8, short float )
402 #elif SIZEOF_FLOAT == 8
403 COPY_TYPE_HETEROGENEOUS( float8, float )
404 #elif SIZEOF_DOUBLE == 8
405 COPY_TYPE_HETEROGENEOUS( float8, double )
406 #elif SIZEOF_LONG_DOUBLE == 8
407 COPY_TYPE_HETEROGENEOUS( float8, long double )
408 #elif defined(HAVE_OPAL_SHORT_FLOAT_T) && SIZEOF_OPAL_SHORT_FLOAT_T == 8
409 COPY_TYPE_HETEROGENEOUS( float8, opal_short_float_t )
410 #else
411
412 #define copy_float8_heterogeneous NULL
413 #endif
414
415 #if defined(HAVE_SHORT_FLOAT) && SIZEOF_SHORT_FLOAT == 12
416 COPY_TYPE_HETEROGENEOUS( float12, short float )
417 #elif SIZEOF_FLOAT == 12
418 COPY_TYPE_HETEROGENEOUS( float12, float )
419 #elif SIZEOF_DOUBLE == 12
420 COPY_TYPE_HETEROGENEOUS( float12, double )
421 #elif SIZEOF_LONG_DOUBLE == 12
422 COPY_TYPE_HETEROGENEOUS( float12, long double )
423 #elif defined(HAVE_OPAL_SHORT_FLOAT_T) && SIZEOF_OPAL_SHORT_FLOAT_T == 12
424 COPY_TYPE_HETEROGENEOUS( float12, opal_short_float_t )
425 #else
426
427 #define copy_float12_heterogeneous NULL
428 #endif
429
430 #if defined(HAVE_SHORT_FLOAT) && SIZEOF_SHORT_FLOAT == 16
431 COPY_TYPE_HETEROGENEOUS( float16, short float )
432 #elif SIZEOF_FLOAT == 16
433 COPY_TYPE_HETEROGENEOUS( float16, float )
434 #elif SIZEOF_DOUBLE == 16
435 COPY_TYPE_HETEROGENEOUS( float16, double )
436 #elif SIZEOF_LONG_DOUBLE == 16
437 COPY_TYPE_HETEROGENEOUS_INTERNAL( float16, long double, 1)
438 #elif defined(HAVE_OPAL_SHORT_FLOAT_T) && SIZEOF_OPAL_SHORT_FLOAT_T == 16
439 COPY_TYPE_HETEROGENEOUS( float16, opal_short_float_t )
440 #else
441
442 #define copy_float16_heterogeneous NULL
443 #endif
444
445 #if defined(HAVE_SHORT_FLOAT__COMPLEX)
446 COPY_2SAMETYPE_HETEROGENEOUS( short_float_complex, short float _Complex )
447 #elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
448 COPY_2SAMETYPE_HETEROGENEOUS( short_float_complex, opal_short_float_complex_t )
449 #else
450
451 #define copy_short_float_complex_heterogeneous NULL
452 #endif
453
454 COPY_2SAMETYPE_HETEROGENEOUS( float_complex, float )
455
456 COPY_2SAMETYPE_HETEROGENEOUS( double_complex, double )
457
458 COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( long_double_complex, long double, 1)
459
460 COPY_TYPE_HETEROGENEOUS (wchar, wchar_t)
461
462
463 conversion_fct_t opal_datatype_heterogeneous_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
464 [OPAL_DATATYPE_LOOP] = NULL,
465 [OPAL_DATATYPE_END_LOOP] = NULL,
466 [OPAL_DATATYPE_LB] = NULL,
467 [OPAL_DATATYPE_UB] = NULL,
468 [OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1_heterogeneous,
469 [OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2_heterogeneous,
470 [OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4_heterogeneous,
471 [OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8_heterogeneous,
472 [OPAL_DATATYPE_INT16] = (conversion_fct_t) copy_int16_heterogeneous,
473 [OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1_heterogeneous,
474 [OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2_heterogeneous,
475 [OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4_heterogeneous,
476 [OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8_heterogeneous,
477 [OPAL_DATATYPE_UINT16] = (conversion_fct_t) copy_int16_heterogeneous,
478 [OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float2_heterogeneous,
479 [OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float4_heterogeneous,
480 [OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float8_heterogeneous,
481 [OPAL_DATATYPE_FLOAT12] = (conversion_fct_t) copy_float12_heterogeneous,
482 [OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float16_heterogeneous,
483 [OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = (conversion_fct_t) copy_short_float_complex_heterogeneous,
484 [OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex_heterogeneous,
485 [OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex_heterogeneous,
486 [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex_heterogeneous,
487 [OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_cxx_bool_heterogeneous,
488 [OPAL_DATATYPE_WCHAR] = (conversion_fct_t) copy_wchar_heterogeneous,
489 [OPAL_DATATYPE_UNAVAILABLE] = NULL,
490 };