root/opal/datatype/opal_datatype_pack.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_pack_homogeneous_contig_function
  2. opal_pack_homogeneous_contig_with_gaps_function
  3. opal_generic_simple_pack_function
  4. pack_predefined_heterogeneous
  5. opal_pack_general_function

   1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2016 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2006 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
  14  * Copyright (c) 2013      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2017-2018 Research Organization for Information Science
  16  *                         and Technology (RIST).  All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "opal_config.h"
  25 
  26 #include <stddef.h>
  27 
  28 #include "opal/datatype/opal_convertor_internal.h"
  29 #include "opal/datatype/opal_datatype_internal.h"
  30 
  31 #if OPAL_ENABLE_DEBUG
  32 #include "opal/util/output.h"
  33 
  34 #define DO_DEBUG(INST)  if( opal_pack_debug ) { INST }
  35 #else
  36 #define DO_DEBUG(INST)
  37 #endif  /* OPAL_ENABLE_DEBUG */
  38 
  39 #include "opal/datatype/opal_datatype_checksum.h"
  40 #include "opal/datatype/opal_datatype_pack.h"
  41 #include "opal/datatype/opal_datatype_prototypes.h"
  42 
  43 #if defined(CHECKSUM)
  44 #define opal_pack_homogeneous_contig_function           opal_pack_homogeneous_contig_checksum
  45 #define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps_checksum
  46 #define opal_generic_simple_pack_function               opal_generic_simple_pack_checksum
  47 #define opal_pack_general_function                      opal_pack_general_checksum
  48 #else
  49 #define opal_pack_homogeneous_contig_function           opal_pack_homogeneous_contig
  50 #define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps
  51 #define opal_generic_simple_pack_function               opal_generic_simple_pack
  52 #define opal_pack_general_function                      opal_pack_general
  53 #endif  /* defined(CHECKSUM) */
  54 
  55 
  56 #define IOVEC_MEM_LIMIT 8192
  57 
  58 /* the contig versions does not use the stack. They can easily retrieve
  59  * the status with just the informations from pConvertor->bConverted.
  60  */
  61 int32_t
  62 opal_pack_homogeneous_contig_function( opal_convertor_t* pConv,
  63                                        struct iovec* iov,
  64                                        uint32_t* out_size,
  65                                        size_t* max_data )
  66 {
  67     dt_stack_t* pStack = pConv->pStack;
  68     unsigned char *source_base = NULL;
  69     uint32_t iov_count;
  70     size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted;
  71     ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
  72 
  73     source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp);
  74 
  75     /* There are some optimizations that can be done if the upper level
  76      * does not provide a buffer.
  77      */
  78     for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
  79         if( 0 == length ) break;
  80         if( (size_t)iov[iov_count].iov_len > length )
  81             iov[iov_count].iov_len = length;
  82         if( iov[iov_count].iov_base == NULL ) {
  83             iov[iov_count].iov_base = (IOVBASE_TYPE *) source_base;
  84             COMPUTE_CSUM( iov[iov_count].iov_base, iov[iov_count].iov_len, pConv );
  85         } else {
  86             /* contiguous data just memcpy the smallest data in the user buffer */
  87             OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, iov[iov_count].iov_len,
  88                                         pConv->pBaseBuf, pConv->pDesc, pConv->count );
  89             MEMCPY_CSUM( iov[iov_count].iov_base, source_base, iov[iov_count].iov_len, pConv );
  90         }
  91         length -= iov[iov_count].iov_len;
  92         pConv->bConverted += iov[iov_count].iov_len;
  93         pStack[0].disp += iov[iov_count].iov_len;
  94         source_base += iov[iov_count].iov_len;
  95     }
  96 
  97     /* update the return value */
  98     *max_data = pConv->bConverted - initial_amount;
  99     *out_size = iov_count;
 100     if( pConv->bConverted == pConv->local_size ) {
 101         pConv->flags |= CONVERTOR_COMPLETED;
 102         return 1;
 103     }
 104     return 0;
 105 }
 106 
 107 
 108 int32_t
 109 opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
 110                                                  struct iovec* iov,
 111                                                  uint32_t* out_size,
 112                                                  size_t* max_data )
 113 {
 114     const opal_datatype_t* pData = pConv->pDesc;
 115     dt_stack_t* stack = pConv->pStack;
 116     unsigned char *user_memory, *packed_buffer;
 117     uint32_t iov_count, index;
 118     size_t i;
 119     size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted;
 120     ptrdiff_t extent= pData->ub - pData->lb;
 121     ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
 122 
 123     assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) );
 124     DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
 125                            (void*)pConv->pBaseBuf, *out_size ); );
 126     if( stack[1].type != opal_datatype_uint1.id ) {
 127         stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
 128         stack[1].type = opal_datatype_uint1.id;
 129     }
 130 
 131     /* There are some optimizations that can be done if the upper level
 132      * does not provide a buffer.
 133      */
 134     for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
 135         /* Limit the amount of packed data to the data left over on this convertor */
 136         remaining = pConv->local_size - pConv->bConverted;
 137         if( 0 == remaining ) break;  /* we're done this time */
 138         if( remaining > iov[iov_count].iov_len )
 139             remaining = iov[iov_count].iov_len;
 140         packed_buffer = (unsigned char *)iov[iov_count].iov_base;
 141         bConverted = remaining; /* how much will get unpacked this time */
 142         user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp + stack[1].disp;
 143         i = pConv->count - stack[0].count;  /* how many we already packed */
 144         assert(i == (pConv->bConverted / pData->size));
 145 
 146         if( packed_buffer == NULL ) {
 147             /* special case for small data. We avoid allocating memory if we
 148              * can fill the iovec directly with the address of the remaining
 149              * data.
 150              */
 151             if( stack->count < (size_t)((*out_size) - iov_count) ) {
 152                 stack[1].count = pData->size - (pConv->bConverted % pData->size);
 153                 for( index = iov_count; i < pConv->count; i++, index++ ) {
 154                     iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
 155                     iov[index].iov_len = stack[1].count;
 156                     stack[0].disp += extent;
 157                     pConv->bConverted += stack[1].count;
 158                     stack[1].disp  = 0;  /* reset it for the next round */
 159                     stack[1].count = pData->size;
 160                     user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp;
 161                     COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
 162                 }
 163                 *out_size = iov_count + index;
 164                 *max_data = (pConv->bConverted - initial_bytes_converted);
 165                 pConv->flags |= CONVERTOR_COMPLETED;
 166                 return 1;  /* we're done */
 167             }
 168             /* now special case for big contiguous data with gaps around */
 169             if( pData->size >= IOVEC_MEM_LIMIT ) {
 170                 /* as we dont have to copy any data, we can simply fill the iovecs
 171                  * with data from the user data description.
 172                  */
 173                 for( index = iov_count; (i < pConv->count) && (index < (*out_size));
 174                      i++, index++ ) {
 175                     if( remaining < pData->size ) {
 176                         iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
 177                         iov[index].iov_len = remaining;
 178                         remaining = 0;
 179                         COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
 180                         break;
 181                     } else {
 182                         iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
 183                         iov[index].iov_len = pData->size;
 184                         user_memory += extent;
 185                         COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv );
 186                     }
 187                     remaining -= iov[index].iov_len;
 188                     pConv->bConverted += iov[index].iov_len;
 189                 }
 190                 *out_size = index;
 191                 *max_data = (pConv->bConverted - initial_bytes_converted);
 192                 if( pConv->bConverted == pConv->local_size ) {
 193                     pConv->flags |= CONVERTOR_COMPLETED;
 194                     return 1;
 195                 }
 196                 return 0;
 197             }
 198         }
 199 
 200         {
 201             DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n",
 202                                    (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
 203 
 204             length = (0 == pConv->stack_pos ? 0 : stack[1].count);  /* left over from the last pack */
 205             /* data left from last round and enough space in the buffer */
 206             if( (0 != length) && (length <= remaining)) {
 207                 /* copy the partial left-over from the previous round */
 208                 OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
 209                                                  pData, pConv->count );
 210                 DO_DEBUG( opal_output( 0, "2. pack dest %p src %p length %lu\n",
 211                                        (void*)user_memory, (void*)packed_buffer, (unsigned long)length ); );
 212                 MEMCPY_CSUM( packed_buffer, user_memory, length, pConv );
 213                 packed_buffer  += length;
 214                 user_memory    += (extent - pData->size + length);
 215                 remaining      -= length;
 216                 stack[1].count -= length;
 217                 if( 0 == stack[1].count) { /* one completed element */
 218                     stack[0].count--;
 219                     stack[0].disp += extent;
 220                     if( 0 != stack[0].count ) {  /* not yet done */
 221                         stack[1].count = pData->size;
 222                         stack[1].disp = 0;
 223                     }
 224                 }
 225             }
 226             for( i = 0;  pData->size <= remaining; i++ ) {
 227                 OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
 228                                                  pData, pConv->count );
 229                 DO_DEBUG( opal_output( 0, "3. pack dest %p src %p length %lu\n",
 230                                        (void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); );
 231                 MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv );
 232                 packed_buffer += pData->size;
 233                 user_memory   += extent;
 234                 remaining   -= pData->size;
 235             }
 236             stack[0].count -= i;  /* the filled up and the entire types */
 237             stack[0].disp  += (i * extent);
 238             stack[1].disp  += remaining;
 239             /* Copy the last bits */
 240             if( 0 != remaining ) {
 241                 OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
 242                                                  pData, pConv->count );
 243                 DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %lu\n",
 244                                        (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
 245                 MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv );
 246                 user_memory += remaining;
 247                 stack[1].count -= remaining;
 248             }
 249             if( 0 == stack[1].count ) {  /* prepare for the next element */
 250                 stack[1].count = pData->size;
 251                 stack[1].disp  = 0;
 252             }
 253         }
 254         pConv->bConverted += bConverted;
 255     }
 256     *out_size = iov_count;
 257     *max_data = (pConv->bConverted - initial_bytes_converted);
 258     if( pConv->bConverted == pConv->local_size ) {
 259         pConv->flags |= CONVERTOR_COMPLETED;
 260         return 1;
 261     }
 262     return 0;
 263 }
 264 
 265 /* The pack/unpack functions need a cleanup. I have to create a proper interface to access
 266  * all basic functionalities, hence using them as basic blocks for all conversion functions.
 267  *
 268  * But first let's make some global assumptions:
 269  * - a datatype (with the flag DT_DATA set) will have the contiguous flags set if and only if
 270  *   the data is really contiguous (extent equal with size)
 271  * - for the OPAL_DATATYPE_LOOP type the DT_CONTIGUOUS flag set means that the content of the loop is
 272  *   contiguous but with a gap in the begining or at the end.
 273  * - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless.
 274  */
 275 int32_t
 276 opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
 277                                    struct iovec* iov, uint32_t* out_size,
 278                                    size_t* max_data )
 279 {
 280     dt_stack_t* pStack;       /* pointer to the position on the stack */
 281     uint32_t pos_desc;        /* actual position in the description of the derived datatype */
 282     size_t count_desc;        /* the number of items already done in the actual pos_desc */
 283     size_t total_packed = 0;  /* total amount packed this time */
 284     dt_elem_desc_t* description;
 285     dt_elem_desc_t* pElem;
 286     const opal_datatype_t *pData = pConvertor->pDesc;
 287     unsigned char *conv_ptr, *iov_ptr;
 288     size_t iov_len_local;
 289     uint32_t iov_count;
 290 
 291     DO_DEBUG( opal_output( 0, "opal_convertor_generic_simple_pack( %p:%p, {%p, %lu}, %d )\n",
 292                            (void*)pConvertor, (void*)pConvertor->pBaseBuf,
 293                            (void*)iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); );
 294 
 295     description = pConvertor->use_desc->desc;
 296 
 297     /* For the first step we have to add both displacement to the source. After in the
 298      * main while loop we will set back the conv_ptr to the correct value. This is
 299      * due to the fact that the convertor can stop in the middle of a data with a count
 300      */
 301     pStack = pConvertor->pStack + pConvertor->stack_pos;
 302     pos_desc   = pStack->index;
 303     conv_ptr   = pConvertor->pBaseBuf + pStack->disp;
 304     count_desc = pStack->count;
 305     pStack--;
 306     pConvertor->stack_pos--;
 307     pElem = &(description[pos_desc]);
 308 
 309     DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n"
 310                            "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n",
 311                            pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf),
 312                            pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
 313 
 314     for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
 315         iov_ptr = (unsigned char *) iov[iov_count].iov_base;
 316         iov_len_local = iov[iov_count].iov_len;
 317         while( 1 ) {
 318             while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
 319                 /* now here we have a basic datatype */
 320                 PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
 321                                           conv_ptr, iov_ptr, iov_len_local );
 322                 if( 0 == count_desc ) {  /* completed */
 323                     conv_ptr = pConvertor->pBaseBuf + pStack->disp;
 324                     pos_desc++;  /* advance to the next data */
 325                     UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
 326                     continue;
 327                 }
 328                 goto complete_loop;
 329             }
 330             if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
 331                 DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d"
 332                                        " pos_desc %d disp %ld space %lu\n",
 333                                        pStack->count, pConvertor->stack_pos,
 334                                        pos_desc, pStack->disp, (unsigned long)iov_len_local ); );
 335                 if( --(pStack->count) == 0 ) { /* end of loop */
 336                     if( 0 == pConvertor->stack_pos ) {
 337                         /* we're done. Force the exit of the main for loop (around iovec) */
 338                         *out_size = iov_count;
 339                         goto complete_loop;
 340                     }
 341                     pConvertor->stack_pos--;  /* go one position up on the stack */
 342                     pStack--;
 343                     pos_desc++;  /* and move to the next element */
 344                 } else {
 345                     pos_desc = pStack->index + 1;  /* jump back to the begining of the loop */
 346                     if( pStack->index == -1 ) {  /* If it's the datatype count loop */
 347                         pStack->disp += (pData->ub - pData->lb);  /* jump by the datatype extent */
 348                     } else {
 349                         assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type );
 350                         pStack->disp += description[pStack->index].loop.extent;  /* jump by the loop extent */
 351                     }
 352                 }
 353                 conv_ptr = pConvertor->pBaseBuf + pStack->disp;
 354                 UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
 355                 DO_DEBUG( opal_output( 0, "pack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld space %lu\n",
 356                                        pStack->count, pConvertor->stack_pos, pos_desc,
 357                                        count_desc, pStack->disp, (unsigned long)iov_len_local ); );
 358             }
 359             if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) {
 360                 ptrdiff_t local_disp = (ptrdiff_t)conv_ptr;
 361                 if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
 362                     PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
 363                                           conv_ptr, iov_ptr, iov_len_local );
 364                     if( 0 == count_desc ) {  /* completed */
 365                         pos_desc += pElem->loop.items + 1;
 366                         goto update_loop_description;
 367                     }
 368                     /* Save the stack with the correct last_count value. */
 369                 }
 370                 local_disp = (ptrdiff_t)conv_ptr - local_disp;
 371                 PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc,
 372                             pStack->disp + local_disp);
 373                 pos_desc++;
 374             update_loop_description:  /* update the current state */
 375                 conv_ptr = pConvertor->pBaseBuf + pStack->disp;
 376                 UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
 377                 DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
 378             }
 379         }
 380     complete_loop:
 381         iov[iov_count].iov_len -= iov_len_local;  /* update the amount of valid data */
 382         total_packed += iov[iov_count].iov_len;
 383     }
 384     *max_data = total_packed;
 385     pConvertor->bConverted += total_packed;  /* update the already converted bytes */
 386     *out_size = iov_count;
 387     if( pConvertor->bConverted == pConvertor->local_size ) {
 388         pConvertor->flags |= CONVERTOR_COMPLETED;
 389         return 1;
 390     }
 391     /* Save the global position for the next round */
 392     PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
 393                 conv_ptr - pConvertor->pBaseBuf );
 394     DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n",
 395                            pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
 396     return 0;
 397 }
 398 
 399 /*
 400  * Remember that the first item in the stack (ie. position 0) is the number
 401  * of times the datatype is involved in the operation (ie. the count argument
 402  * in the MPI_ call).
 403  */
 404 /* Convert data from multiple input buffers (as received from the network layer)
 405  * to a contiguous output buffer with a predefined size.
 406  * return OPAL_SUCCESS if everything went OK and if there is still room before the complete
 407  *          conversion of the data (need additional call with others input buffers )
 408  *        1 if everything went fine and the data was completly converted
 409  *       -1 something wrong occurs.
 410  */
 411 
 412 static inline void
 413 pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR,
 414                                const dt_elem_desc_t* ELEM,
 415                                size_t* COUNT,
 416                                unsigned char** SOURCE,
 417                                unsigned char** DESTINATION,
 418                                size_t* SPACE )
 419 {
 420     const opal_convertor_master_t* master = (CONVERTOR)->master;
 421     const ddt_elem_desc_t* _elem = &((ELEM)->elem);
 422     unsigned char* _source = (*SOURCE) + _elem->disp;
 423     ptrdiff_t advance;
 424     size_t _count = *(COUNT);
 425     size_t _r_blength;
 426 
 427     _r_blength = master->remote_sizes[_elem->common.type];
 428     if( (_count * _r_blength) > *(SPACE) ) {
 429         _count = (*(SPACE) / _r_blength);
 430         if( 0 == _count ) return;  /* nothing to do */
 431     }
 432 
 433     OPAL_DATATYPE_SAFEGUARD_POINTER( _source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf,
 434                                      (CONVERTOR)->pDesc, (CONVERTOR)->count );
 435     DO_DEBUG( opal_output( 0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n",
 436                            ((ptrdiff_t)(opal_datatype_basicDatatypes[_elem->common.type]->size) == _elem->extent) ? "cont" : "----",
 437                            ((ptrdiff_t)_r_blength == _elem->extent) ? "cont" : "----",
 438                            (void*)*(DESTINATION), (void*)_source, (unsigned long)_r_blength,
 439                            (unsigned long)(*(SPACE)) ); );
 440     master->pFunctions[_elem->common.type]( CONVERTOR, _count,
 441                                             _source, *SPACE, _elem->extent,
 442                                             *DESTINATION, *SPACE, _r_blength,
 443                                             &advance );
 444     _r_blength     *= _count;  /* update the remote length to encompass all the elements */
 445     *(SOURCE)      += _count * _elem->extent;
 446     *(DESTINATION) += _r_blength;
 447     *(SPACE)       -= _r_blength;
 448     *(COUNT)       -= _count;
 449 }
 450 
 451 int32_t
 452 opal_pack_general_function( opal_convertor_t* pConvertor,
 453                             struct iovec* iov, uint32_t* out_size,
 454                             size_t* max_data )
 455 {
 456     dt_stack_t* pStack;       /* pointer to the position on the stack */
 457     uint32_t pos_desc;        /* actual position in the description of the derived datatype */
 458     size_t count_desc;      /* the number of items already done in the actual pos_desc */
 459     size_t total_packed = 0;  /* total amount packed this time */
 460     dt_elem_desc_t* description;
 461     dt_elem_desc_t* pElem;
 462     const opal_datatype_t *pData = pConvertor->pDesc;
 463     unsigned char *conv_ptr, *iov_ptr;
 464     size_t iov_len_local;
 465     uint32_t iov_count;
 466 
 467     DO_DEBUG( opal_output( 0, "opal_convertor_general_pack( %p:%p, {%p, %lu}, %d )\n",
 468                            (void*)pConvertor, (void*)pConvertor->pBaseBuf,
 469                            (void*)iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); );
 470 
 471     description = pConvertor->use_desc->desc;
 472 
 473     /* For the first step we have to add both displacement to the source. After in the
 474      * main while loop we will set back the conv_ptr to the correct value. This is
 475      * due to the fact that the convertor can stop in the middle of a data with a count
 476      */
 477     pStack = pConvertor->pStack + pConvertor->stack_pos;
 478     pos_desc   = pStack->index;
 479     conv_ptr   = pConvertor->pBaseBuf + pStack->disp;
 480     count_desc = pStack->count;
 481     pStack--;
 482     pConvertor->stack_pos--;
 483     pElem = &(description[pos_desc]);
 484 
 485     DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n"
 486                            "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n",
 487                            pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf),
 488                            pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
 489 
 490     for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
 491         iov_ptr = (unsigned char *) iov[iov_count].iov_base;
 492         iov_len_local = iov[iov_count].iov_len;
 493         while( 1 ) {
 494             while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
 495                 /* now here we have a basic datatype */
 496                 DO_DEBUG( opal_output( 0, "pack (%p:%ld, %" PRIsize_t ", %ld) -> (%p, %ld) type %s\n",
 497                                        (void*)pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf,
 498                                        count_desc, description[pos_desc].elem.extent,
 499                                        (void*)iov_ptr, iov_len_local,
 500                                        opal_datatype_basicDatatypes[pElem->elem.common.type]->name ); );
 501 
 502                 pack_predefined_heterogeneous( pConvertor, pElem, &count_desc,
 503                                                &conv_ptr, &iov_ptr, &iov_len_local);
 504 #if 0
 505                 PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
 506                                           conv_ptr, iov_ptr, iov_len_local );
 507 #endif
 508                 if( 0 == count_desc ) {  /* completed */
 509                     conv_ptr = pConvertor->pBaseBuf + pStack->disp;
 510                     pos_desc++;  /* advance to the next data */
 511                     UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
 512                     continue;
 513                 }
 514                 goto complete_loop;
 515             }
 516             if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
 517                 DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d"
 518                                        " pos_desc %d disp %ld space %lu\n",
 519                                        pStack->count, pConvertor->stack_pos,
 520                                        pos_desc, pStack->disp, (unsigned long)iov_len_local ); );
 521                 if( --(pStack->count) == 0 ) { /* end of loop */
 522                     if( 0 == pConvertor->stack_pos ) {
 523                         /* we lie about the size of the next element in order to
 524                          * make sure we exit the main loop.
 525                          */
 526                         *out_size = iov_count;
 527                         goto complete_loop;  /* completed */
 528                     }
 529                     pConvertor->stack_pos--;
 530                     pStack--;
 531                     pos_desc++;
 532                 } else {
 533                     pos_desc = pStack->index + 1;
 534                     if( pStack->index == -1 ) {
 535                         pStack->disp += (pData->ub - pData->lb);
 536                     } else {
 537                         assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type );
 538                         pStack->disp += description[pStack->index].loop.extent;
 539                     }
 540                 }
 541                 conv_ptr = pConvertor->pBaseBuf + pStack->disp;
 542                 UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
 543                 DO_DEBUG( opal_output( 0, "pack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld space %lu\n",
 544                                        pStack->count, pConvertor->stack_pos, pos_desc,
 545                                        count_desc, pStack->disp, (unsigned long)iov_len_local ); );
 546             }
 547             if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) {
 548                 ptrdiff_t local_disp = (ptrdiff_t)conv_ptr;
 549 #if 0
 550                 if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
 551                     PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
 552                                           conv_ptr, iov_ptr, iov_len_local );
 553                     if( 0 == count_desc ) {  /* completed */
 554                         pos_desc += pElem->loop.items + 1;
 555                         goto update_loop_description;
 556                     }
 557                     /* Save the stack with the correct last_count value. */
 558                 }
 559 #endif  /* in a heterogeneous environment we can't handle the contiguous loops */
 560                 local_disp = (ptrdiff_t)conv_ptr - local_disp;
 561                 PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc,
 562                             pStack->disp + local_disp);
 563                 pos_desc++;
 564 #if 0
 565             update_loop_description:  /* update the current state */
 566 #endif  /* in a heterogeneous environment we can't handle the contiguous loops */
 567                 conv_ptr = pConvertor->pBaseBuf + pStack->disp;
 568                 UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
 569                 DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
 570                 continue;
 571             }
 572         }
 573     complete_loop:
 574         iov[iov_count].iov_len -= iov_len_local;  /* update the amount of valid data */
 575         total_packed += iov[iov_count].iov_len;
 576     }
 577     *max_data = total_packed;
 578     pConvertor->bConverted += total_packed;  /* update the already converted bytes */
 579     *out_size = iov_count;
 580     if( pConvertor->bConverted == pConvertor->local_size ) {
 581         pConvertor->flags |= CONVERTOR_COMPLETED;
 582         return 1;
 583     }
 584     /* Save the global position for the next round */
 585     PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
 586                 conv_ptr - pConvertor->pBaseBuf );
 587     DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t" disp %ld\n",
 588                            pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
 589     return 0;
 590 }

/* [<][>][^][v][top][bottom][index][help] */