This source file includes following definitions.
- opal_convertor_construct
- opal_convertor_destruct
- opal_convertor_destroy_masters
- opal_convertor_find_or_create_master
- opal_convertor_create
- opal_convertor_pack
- opal_convertor_unpack
- opal_convertor_create_stack_with_pos_contig
- opal_convertor_create_stack_at_begining
- opal_convertor_set_position_nocheck
- opal_datatype_compute_remote_size
- opal_convertor_compute_remote_size
- opal_convertor_prepare_for_recv
- opal_convertor_prepare_for_send
- opal_convertor_clone
- opal_convertor_dump
- opal_datatype_dump_stack
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "opal_config.h"
26
27 #include <stddef.h>
28 #include <stdio.h>
29 #include <stdint.h>
30
31 #include "opal/prefetch.h"
32 #include "opal/util/arch.h"
33 #include "opal/util/output.h"
34
35 #include "opal/datatype/opal_datatype_internal.h"
36 #include "opal/datatype/opal_datatype.h"
37 #include "opal/datatype/opal_convertor.h"
38 #include "opal/datatype/opal_datatype_checksum.h"
39 #include "opal/datatype/opal_datatype_prototypes.h"
40 #include "opal/datatype/opal_convertor_internal.h"
41 #if OPAL_CUDA_SUPPORT
42 #include "opal/datatype/opal_datatype_cuda.h"
43 #define MEMCPY_CUDA( DST, SRC, BLENGTH, CONVERTOR ) \
44 CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
45 #endif
46
47 static void opal_convertor_construct( opal_convertor_t* convertor )
48 {
49 convertor->pStack = convertor->static_stack;
50 convertor->stack_size = DT_STATIC_STACK_SIZE;
51 convertor->partial_length = 0;
52 convertor->remoteArch = opal_local_arch;
53 convertor->flags = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
54 #if OPAL_CUDA_SUPPORT
55 convertor->cbmemcpy = &opal_cuda_memcpy;
56 #endif
57 }
58
59
60 static void opal_convertor_destruct( opal_convertor_t* convertor )
61 {
62 opal_convertor_cleanup( convertor );
63 }
64
65 OBJ_CLASS_INSTANCE(opal_convertor_t, opal_object_t, opal_convertor_construct, opal_convertor_destruct );
66
67 static opal_convertor_master_t* opal_convertor_master_list = NULL;
68
69 extern conversion_fct_t opal_datatype_heterogeneous_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED];
70 extern conversion_fct_t opal_datatype_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED];
71
72 void opal_convertor_destroy_masters( void )
73 {
74 opal_convertor_master_t* master = opal_convertor_master_list;
75
76 while( NULL != master ) {
77 opal_convertor_master_list = master->next;
78 master->next = NULL;
79
80 if( (master->pFunctions != opal_datatype_heterogeneous_copy_functions) &&
81 (master->pFunctions != opal_datatype_copy_functions) )
82 free( master->pFunctions );
83
84 free( master );
85 master = opal_convertor_master_list;
86 }
87 }
88
89
90
91
92
93
94 opal_convertor_master_t* opal_convertor_find_or_create_master( uint32_t remote_arch )
95 {
96 opal_convertor_master_t* master = opal_convertor_master_list;
97 int i;
98 size_t* remote_sizes;
99
100 while( NULL != master ) {
101 if( master->remote_arch == remote_arch )
102 return master;
103 master = master->next;
104 }
105
106
107
108
109 master = (opal_convertor_master_t*)malloc( sizeof(opal_convertor_master_t) );
110 master->next = opal_convertor_master_list;
111 opal_convertor_master_list = master;
112 master->remote_arch = remote_arch;
113 master->flags = 0;
114 master->hetero_mask = 0;
115
116
117
118
119
120 remote_sizes = (size_t*)master->remote_sizes;
121 memcpy(remote_sizes, opal_datatype_local_sizes, sizeof(size_t) * OPAL_DATATYPE_MAX_PREDEFINED);
122
123
124
125
126
127 if( master->remote_arch == opal_local_arch ) {
128 master->pFunctions = opal_datatype_copy_functions;
129 master->flags |= CONVERTOR_HOMOGENEOUS;
130 return master;
131 }
132
133
134 if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_BOOLIS8 ) ) {
135 remote_sizes[OPAL_DATATYPE_BOOL] = 1;
136 } else if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_BOOLIS16 ) ) {
137 remote_sizes[OPAL_DATATYPE_BOOL] = 2;
138 } else if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_BOOLIS32 ) ) {
139 remote_sizes[OPAL_DATATYPE_BOOL] = 4;
140 } else {
141 opal_output( 0, "Unknown sizeof(bool) for the remote architecture\n" );
142 }
143
144
145
146
147
148
149
150 for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
151 if( remote_sizes[i] != opal_datatype_local_sizes[i] )
152 master->hetero_mask |= (((uint32_t)1) << i);
153 }
154 if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_ISBIGENDIAN ) !=
155 opal_arch_checkmask( &opal_local_arch, OPAL_ARCH_ISBIGENDIAN ) ) {
156 uint32_t hetero_mask = 0;
157
158 for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
159 if( remote_sizes[i] > 1 )
160 hetero_mask |= (((uint32_t)1) << i);
161 }
162 hetero_mask &= ~(((uint32_t)1) << OPAL_DATATYPE_BOOL);
163 master->hetero_mask |= hetero_mask;
164 }
165 master->pFunctions = (conversion_fct_t*)malloc( sizeof(opal_datatype_heterogeneous_copy_functions) );
166
167
168
169
170 for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
171 if( master->hetero_mask & (((uint32_t)1) << i) )
172 master->pFunctions[i] = opal_datatype_heterogeneous_copy_functions[i];
173 else
174 master->pFunctions[i] = opal_datatype_copy_functions[i];
175 }
176
177
178 return master;
179 }
180
181
182 opal_convertor_t* opal_convertor_create( int32_t remote_arch, int32_t mode )
183 {
184 opal_convertor_t* convertor = OBJ_NEW(opal_convertor_t);
185 opal_convertor_master_t* master;
186
187 master = opal_convertor_find_or_create_master( remote_arch );
188
189 convertor->remoteArch = remote_arch;
190 convertor->stack_pos = 0;
191 convertor->flags = master->flags;
192 convertor->master = master;
193
194 return convertor;
195 }
196
197 #define OPAL_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( CONVERTOR, IOV, OUT, MAX_DATA ) \
198 do { \
199 \
200 if( OPAL_UNLIKELY((CONVERTOR)->flags & CONVERTOR_COMPLETED) ) { \
201 (IOV)[0].iov_len = 0; \
202 *(OUT) = 0; \
203 *(MAX_DATA) = 0; \
204 return 1; \
205 } \
206 (CONVERTOR)->checksum = OPAL_CSUM_ZERO; \
207 (CONVERTOR)->csum_ui1 = 0; \
208 (CONVERTOR)->csum_ui2 = 0; \
209 assert( (CONVERTOR)->bConverted < (CONVERTOR)->local_size ); \
210 } while(0)
211
212
213
214
215
216
217
218 int32_t opal_convertor_pack( opal_convertor_t* pConv,
219 struct iovec* iov, uint32_t* out_size,
220 size_t* max_data )
221 {
222 OPAL_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );
223
224 if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
225
226
227
228
229
230 uint32_t i;
231 unsigned char* base_pointer;
232 size_t pending_length = pConv->local_size - pConv->bConverted;
233
234 *max_data = pending_length;
235 opal_convertor_get_current_pointer( pConv, (void**)&base_pointer );
236
237 for( i = 0; i < *out_size; i++ ) {
238 if( iov[i].iov_len >= pending_length ) {
239 goto complete_contiguous_data_pack;
240 }
241 if( OPAL_LIKELY(NULL == iov[i].iov_base) )
242 iov[i].iov_base = (IOVBASE_TYPE *) base_pointer;
243 else
244 #if OPAL_CUDA_SUPPORT
245 MEMCPY_CUDA( iov[i].iov_base, base_pointer, iov[i].iov_len, pConv );
246 #else
247 MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
248 #endif
249 pending_length -= iov[i].iov_len;
250 base_pointer += iov[i].iov_len;
251 }
252 *max_data -= pending_length;
253 pConv->bConverted += (*max_data);
254 return 0;
255
256 complete_contiguous_data_pack:
257 iov[i].iov_len = pending_length;
258 if( OPAL_LIKELY(NULL == iov[i].iov_base) )
259 iov[i].iov_base = (IOVBASE_TYPE *) base_pointer;
260 else
261 #if OPAL_CUDA_SUPPORT
262 MEMCPY_CUDA( iov[i].iov_base, base_pointer, iov[i].iov_len, pConv );
263 #else
264 MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
265 #endif
266 pConv->bConverted = pConv->local_size;
267 *out_size = i + 1;
268 pConv->flags |= CONVERTOR_COMPLETED;
269 return 1;
270 }
271
272 return pConv->fAdvance( pConv, iov, out_size, max_data );
273 }
274
275
276 int32_t opal_convertor_unpack( opal_convertor_t* pConv,
277 struct iovec* iov, uint32_t* out_size,
278 size_t* max_data )
279 {
280 OPAL_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );
281
282 if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
283
284
285
286
287
288 uint32_t i;
289 unsigned char* base_pointer;
290 size_t pending_length = pConv->local_size - pConv->bConverted;
291
292 *max_data = pending_length;
293 opal_convertor_get_current_pointer( pConv, (void**)&base_pointer );
294
295 for( i = 0; i < *out_size; i++ ) {
296 if( iov[i].iov_len >= pending_length ) {
297 goto complete_contiguous_data_unpack;
298 }
299 #if OPAL_CUDA_SUPPORT
300 MEMCPY_CUDA( base_pointer, iov[i].iov_base, iov[i].iov_len, pConv );
301 #else
302 MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );
303 #endif
304 pending_length -= iov[i].iov_len;
305 base_pointer += iov[i].iov_len;
306 }
307 *max_data -= pending_length;
308 pConv->bConverted += (*max_data);
309 return 0;
310
311 complete_contiguous_data_unpack:
312 iov[i].iov_len = pending_length;
313 #if OPAL_CUDA_SUPPORT
314 MEMCPY_CUDA( base_pointer, iov[i].iov_base, iov[i].iov_len, pConv );
315 #else
316 MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );
317 #endif
318 pConv->bConverted = pConv->local_size;
319 *out_size = i + 1;
320 pConv->flags |= CONVERTOR_COMPLETED;
321 return 1;
322 }
323
324 return pConv->fAdvance( pConv, iov, out_size, max_data );
325 }
326
327 static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
328 size_t starting_point, const size_t* sizes )
329 {
330 dt_stack_t* pStack;
331 const opal_datatype_t* pData = pConvertor->pDesc;
332 dt_elem_desc_t* pElems;
333 size_t count;
334 ptrdiff_t extent;
335
336 pStack = pConvertor->pStack;
337
338
339
340
341 pElems = pConvertor->use_desc->desc;
342
343 count = starting_point / pData->size;
344 extent = pData->ub - pData->lb;
345
346 pStack[0].type = OPAL_DATATYPE_LOOP;
347 pStack[0].count = pConvertor->count - count;
348 pStack[0].index = -1;
349 pStack[0].disp = count * extent;
350
351
352 count = starting_point - count * pData->size;
353
354
355
356
357 if( OPAL_LIKELY(0 == count) ) {
358 pStack[1].type = pElems->elem.common.type;
359 pStack[1].count = pElems->elem.count;
360 } else {
361 pStack[1].type = OPAL_DATATYPE_UINT1;
362 pStack[1].count = pData->size - count;
363 }
364 pStack[1].disp = count;
365 pStack[1].index = 0;
366
367 pConvertor->bConverted = starting_point;
368 pConvertor->stack_pos = 1;
369 assert( 0 == pConvertor->partial_length );
370 return OPAL_SUCCESS;
371 }
372
373 static inline
374 int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
375 const size_t* sizes )
376 {
377 dt_stack_t* pStack = convertor->pStack;
378 dt_elem_desc_t* pElems;
379
380
381
382
383
384 pElems = convertor->use_desc->desc;
385
386 convertor->stack_pos = 1;
387 convertor->partial_length = 0;
388 convertor->bConverted = 0;
389
390
391
392
393
394 pStack[0].index = -1;
395 pStack[0].count = convertor->count;
396 pStack[0].disp = 0;
397 pStack[0].type = OPAL_DATATYPE_LOOP;
398
399 pStack[1].index = 0;
400 pStack[1].disp = 0;
401 if( pElems[0].elem.common.type == OPAL_DATATYPE_LOOP ) {
402 pStack[1].count = pElems[0].loop.loops;
403 pStack[1].type = OPAL_DATATYPE_LOOP;
404 } else {
405 pStack[1].count = pElems[0].elem.count;
406 pStack[1].type = pElems[0].elem.common.type;
407 }
408 return OPAL_SUCCESS;
409 }
410
411
412 int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
413 size_t* position )
414 {
415 int32_t rc;
416
417
418
419
420
421
422
423 if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) {
424 rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position),
425 opal_datatype_local_sizes );
426 } else {
427 if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
428 rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
429 if( 0 == (*position) ) return rc;
430 }
431 rc = opal_convertor_generic_simple_position( convertor, position );
432
433
434
435
436
437
438
439 if( CONVERTOR_SEND & convertor->flags ) {
440 convertor->bConverted -= convertor->partial_length;
441 convertor->partial_length = 0;
442 }
443 }
444 *position = convertor->bConverted;
445 return rc;
446 }
447
448 static size_t
449 opal_datatype_compute_remote_size( const opal_datatype_t* pData,
450 const size_t* sizes )
451 {
452 uint32_t typeMask = pData->bdt_used;
453 size_t length = 0;
454
455 if (opal_datatype_is_predefined(pData)) {
456 return sizes[pData->desc.desc->elem.common.type];
457 }
458
459 if( OPAL_UNLIKELY(NULL == pData->ptypes) ) {
460
461 opal_datatype_compute_ptypes( (opal_datatype_t*)pData );
462 }
463
464 for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) {
465 if( typeMask & ((uint32_t)1 << i) ) {
466 length += (pData->ptypes[i] * sizes[i]);
467 typeMask ^= ((uint32_t)1 << i);
468 }
469 }
470 return length;
471 }
472
473
474
475
476
477
478 size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
479 {
480 opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc;
481
482 pConvertor->remote_size = pConvertor->local_size;
483 if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
484 pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
485 if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
486 pConvertor->use_desc = &(datatype->desc);
487 }
488 if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {
489
490 pConvertor->remote_size = opal_datatype_compute_remote_size(datatype,
491 pConvertor->master->remote_sizes);
492 pConvertor->remote_size *= pConvertor->count;
493 }
494 }
495 pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE;
496 return pConvertor->remote_size;
497 }
498
499
500
501
502
503
504
505
506 #define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \
507 { \
508 convertor->local_size = count * datatype->size; \
509 convertor->pBaseBuf = (unsigned char*)pUserBuf; \
510 convertor->count = count; \
511 convertor->pDesc = (opal_datatype_t*)datatype; \
512 convertor->bConverted = 0; \
513 convertor->use_desc = &(datatype->opt_desc); \
514
515
516
517 \
518 if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \
519 convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \
520 convertor->local_size = convertor->remote_size = 0; \
521 return OPAL_SUCCESS; \
522 } \
523 \
524 \
525 convertor->flags &= CONVERTOR_TYPE_MASK; \
526 convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \
527 convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
528 \
529 convertor->remote_size = convertor->local_size; \
530 if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \
531 if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && \
532 ((convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) || \
533 ((convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count))) ) { \
534 return OPAL_SUCCESS; \
535 } \
536 } \
537 \
538 assert( (convertor)->pDesc == (datatype) ); \
539 opal_convertor_compute_remote_size( convertor ); \
540 assert( NULL != convertor->use_desc->desc ); \
541 \
542 \
543 if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) \
544 == OPAL_DATATYPE_FLAG_NO_GAPS) && \
545 ((convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) == \
546 (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \
547 return OPAL_SUCCESS; \
548 } \
549 convertor->flags &= ~CONVERTOR_NO_OP; \
550 { \
551 uint32_t required_stack_length = datatype->loops + 1; \
552 \
553 if( required_stack_length > convertor->stack_size ) { \
554 assert(convertor->pStack == convertor->static_stack); \
555 convertor->stack_size = required_stack_length; \
556 convertor->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * \
557 convertor->stack_size ); \
558 } \
559 } \
560 opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes ); \
561 }
562
563
564 int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
565 const struct opal_datatype_t* datatype,
566 size_t count,
567 const void* pUserBuf )
568 {
569
570
571 convertor->flags |= CONVERTOR_RECV;
572 #if OPAL_CUDA_SUPPORT
573 if (!( convertor->flags & CONVERTOR_SKIP_CUDA_INIT )) {
574 mca_cuda_convertor_init(convertor, pUserBuf);
575 }
576 #endif
577
578 assert(! (convertor->flags & CONVERTOR_SEND));
579 OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
580
581 if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
582 if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
583 convertor->fAdvance = opal_unpack_general_checksum;
584 } else {
585 if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
586 convertor->fAdvance = opal_unpack_homogeneous_contig_checksum;
587 } else {
588 convertor->fAdvance = opal_generic_simple_unpack_checksum;
589 }
590 }
591 } else {
592 if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
593 convertor->fAdvance = opal_unpack_general;
594 } else {
595 if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
596 convertor->fAdvance = opal_unpack_homogeneous_contig;
597 } else {
598 convertor->fAdvance = opal_generic_simple_unpack;
599 }
600 }
601 }
602 return OPAL_SUCCESS;
603 }
604
605
606 int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
607 const struct opal_datatype_t* datatype,
608 size_t count,
609 const void* pUserBuf )
610 {
611 convertor->flags |= CONVERTOR_SEND;
612 #if OPAL_CUDA_SUPPORT
613 if (!( convertor->flags & CONVERTOR_SKIP_CUDA_INIT )) {
614 mca_cuda_convertor_init(convertor, pUserBuf);
615 }
616 #endif
617
618 OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
619
620 if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
621 if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) {
622 convertor->fAdvance = opal_pack_general_checksum;
623 } else {
624 if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
625 if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size)
626 || (1 >= convertor->count) )
627 convertor->fAdvance = opal_pack_homogeneous_contig_checksum;
628 else
629 convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum;
630 } else {
631 convertor->fAdvance = opal_generic_simple_pack_checksum;
632 }
633 }
634 } else {
635 if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) {
636 convertor->fAdvance = opal_pack_general;
637 } else {
638 if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
639 if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size)
640 || (1 >= convertor->count) )
641 convertor->fAdvance = opal_pack_homogeneous_contig;
642 else
643 convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps;
644 } else {
645 convertor->fAdvance = opal_generic_simple_pack;
646 }
647 }
648 }
649 return OPAL_SUCCESS;
650 }
651
652
653
654
655
656
657
658
659
660
661
662 int opal_convertor_clone( const opal_convertor_t* source,
663 opal_convertor_t* destination,
664 int32_t copy_stack )
665 {
666 destination->remoteArch = source->remoteArch;
667 destination->flags = source->flags;
668 destination->pDesc = source->pDesc;
669 destination->use_desc = source->use_desc;
670 destination->count = source->count;
671 destination->pBaseBuf = source->pBaseBuf;
672 destination->fAdvance = source->fAdvance;
673 destination->master = source->master;
674 destination->local_size = source->local_size;
675 destination->remote_size = source->remote_size;
676
677 if( OPAL_UNLIKELY(source->stack_size > DT_STATIC_STACK_SIZE) ) {
678 destination->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * source->stack_size );
679 } else {
680 destination->pStack = destination->static_stack;
681 }
682 destination->stack_size = source->stack_size;
683
684
685 if( OPAL_LIKELY(0 == copy_stack) ) {
686 destination->bConverted = -1;
687 destination->stack_pos = -1;
688 } else {
689 memcpy( destination->pStack, source->pStack, sizeof(dt_stack_t) * (source->stack_pos+1) );
690 destination->bConverted = source->bConverted;
691 destination->stack_pos = source->stack_pos;
692 }
693 #if OPAL_CUDA_SUPPORT
694 destination->cbmemcpy = source->cbmemcpy;
695 #endif
696 return OPAL_SUCCESS;
697 }
698
699
700 void opal_convertor_dump( opal_convertor_t* convertor )
701 {
702 opal_output( 0, "Convertor %p count %" PRIsize_t " stack position %u bConverted %" PRIsize_t "\n"
703 "\tlocal_size %" PRIsize_t " remote_size %" PRIsize_t " flags %X stack_size %u pending_length %" PRIsize_t "\n"
704 "\tremote_arch %u local_arch %u\n",
705 (void*)convertor,
706 convertor->count, convertor->stack_pos, convertor->bConverted,
707 convertor->local_size, convertor->remote_size,
708 convertor->flags, convertor->stack_size, convertor->partial_length,
709 convertor->remoteArch, opal_local_arch );
710 if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack ");
711 if( convertor->flags & CONVERTOR_SEND ) opal_output( 0, "pack ");
712 if( convertor->flags & CONVERTOR_SEND_CONVERSION ) opal_output( 0, "conversion ");
713 if( convertor->flags & CONVERTOR_HOMOGENEOUS ) opal_output( 0, "homogeneous " );
714 else opal_output( 0, "heterogeneous ");
715 if( convertor->flags & CONVERTOR_NO_OP ) opal_output( 0, "no_op ");
716 if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) opal_output( 0, "checksum ");
717 if( convertor->flags & CONVERTOR_CUDA ) opal_output( 0, "CUDA ");
718 if( convertor->flags & CONVERTOR_CUDA_ASYNC ) opal_output( 0, "CUDA Async ");
719 if( convertor->flags & CONVERTOR_COMPLETED ) opal_output( 0, "COMPLETED ");
720
721 opal_datatype_dump( convertor->pDesc );
722 if( !((0 == convertor->stack_pos) &&
723 ((size_t)convertor->pStack[convertor->stack_pos].index > convertor->pDesc->desc.length)) ) {
724
725 opal_output( 0, "Actual stack representation\n" );
726 opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos,
727 convertor->pDesc->desc.desc, convertor->pDesc->name );
728 }
729 }
730
731
732 void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
733 const union dt_elem_desc* pDesc, const char* name )
734 {
735 opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
736 for( ; stack_pos >= 0; stack_pos-- ) {
737 opal_output( 0, "%d: pos %d count %" PRIsize_t " disp %ld ", stack_pos, pStack[stack_pos].index,
738 pStack[stack_pos].count, pStack[stack_pos].disp );
739 if( pStack->index != -1 )
740 opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n",
741 (unsigned long)pDesc[pStack[stack_pos].index].elem.count,
742 (long)pDesc[pStack[stack_pos].index].elem.disp,
743 (long)pDesc[pStack[stack_pos].index].elem.extent );
744 else
745 opal_output( 0, "\n" );
746 }
747 opal_output( 0, "\n" );
748 }