root/opal/datatype/opal_datatype_add.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. LMAX
  2. LMIN
  3. IMAX
  4. opal_datatype_add

   1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2017 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2006 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
  14  * Copyright (c) 2014      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2017      Research Organization for Information Science
  16  *                         and Technology (RIST). All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "opal_config.h"
  25 
  26 #include <stddef.h>
  27 
  28 #include "opal/constants.h"
  29 #include "opal/util/output.h"
  30 #include "opal/datatype/opal_datatype.h"
  31 #include "opal/datatype/opal_datatype_internal.h"
  32 
  33 /* macros to play with the flags */
  34 #define SET_CONTIGUOUS_FLAG( INT_VALUE )     (INT_VALUE) = (INT_VALUE) | (OPAL_DATATYPE_FLAG_CONTIGUOUS)
  35 #define SET_NO_GAP_FLAG( INT_VALUE )         (INT_VALUE) = (INT_VALUE) | (OPAL_DATATYPE_FLAG_NO_GAPS)
  36 #define UNSET_CONTIGUOUS_FLAG( INT_VALUE )   (INT_VALUE) = (INT_VALUE) & (~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS))
  37 
  38 #if defined(__GNUC__) && !defined(__STDC__)
  39 #define LMAX(A,B)  ({ ptrdiff_t _a = (A), _b = (B); (_a < _b ? _b : _a) })
  40 #define LMIN(A,B)  ({ ptrdiff_t _a = (A), _b = (B); (_a < _b ? _a : _b); })
  41 #define IMAX(A,B)  ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
  42 #else
  43 static inline ptrdiff_t LMAX( ptrdiff_t a, ptrdiff_t b ) { return ( a < b ? b : a ); }
  44 static inline ptrdiff_t LMIN( ptrdiff_t a, ptrdiff_t b ) { return ( a < b ? a : b ); }
  45 static inline int  IMAX( int a, int b ) { return ( a < b ? b : a ); }
  46 #endif  /* __GNU__ */
  47 
  48 #define OPAL_DATATYPE_COMPUTE_REQUIRED_ENTRIES( _pdtAdd, _count, _extent, _place_needed) \
  49 { \
  50     if( (_pdtAdd)->flags & OPAL_DATATYPE_FLAG_PREDEFINED ) { /* add a basic datatype */ \
  51         (_place_needed) = ((_extent) == (ptrdiff_t)(_pdtAdd)->size ? 1 : 3); \
  52     } else { \
  53         (_place_needed) = (_pdtAdd)->desc.used; \
  54         if( (_count) != 1 ) { \
  55             if( (_place_needed) < (MAX_DT_COMPONENT_COUNT - 2) ) { \
  56                 (_place_needed) += 2;  /* for the loop markers */ \
  57             } else { \
  58                 /* The data-type contain too many elements. We will be unable \
  59                  * to handle it, so let's just complain by now. \
  60                  */ \
  61                 opal_output( 0, "Too many elements in the datatype. The limit is %ud\n", \
  62                              MAX_DT_COMPONENT_COUNT ); \
  63                 return OPAL_ERROR; \
  64             } \
  65         } \
  66     } \
  67 }
  68 
  69 #define OPAL_DATATYPE_LB_UB_CONT( _count, _disp, _old_lb, _old_ub, _old_extent, _new_lb, _new_ub ) \
  70 { \
  71     if( 0 == _count ) { \
  72         _new_lb = (_old_lb) + (_disp); \
  73         _new_ub = (_old_ub) + (_disp); \
  74     } else { \
  75         ptrdiff_t lower, upper; \
  76         upper = (_disp) + (_old_extent) * ((_count) - 1); \
  77         lower = (_disp); \
  78         if( lower < upper ) { \
  79             _new_lb = lower; \
  80             _new_ub = upper; \
  81          } else { \
  82             _new_lb = upper; \
  83             _new_ub = lower; \
  84          } \
  85          _new_lb += (_old_lb); \
  86          _new_ub += (_old_ub); \
  87     }\
  88 }
  89 
  90 /* When we add a datatype we should update it's definition depending on the
  91  * initial displacement for the whole data, so the displacement of all elements
  92  * inside a datatype depend only on the loop displacement and it's own
  93  * displacement.
  94  */
  95 
  96 /* we have 3 differents structures to update:
  97  * - the first is the real representation of the datatype
  98  * - the second is the internal representation using extents
  99  * - the last is the representation used for send operations
 100  *
 101  * If the count is ZERO we dont have to add the pdtAdd datatype. But we have to
 102  * be sure that the pdtBase datatype is correctly initialized with all fields
 103  * set to ZERO if it's a empty datatype.
 104  */
 105 int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd,
 106                            size_t count, ptrdiff_t disp, ptrdiff_t extent )
 107 {
 108     uint32_t newLength, place_needed = 0, i;
 109     short localFlags = 0;  /* no specific options yet */
 110     dt_elem_desc_t *pLast, *pLoop = NULL;
 111     ptrdiff_t lb, ub, true_lb, true_ub, epsilon, old_true_ub;
 112 
 113     /**
 114      * From MPI-3, page 84, lines 18-20: Most datatype constructors have
 115      * replication count or block length arguments. Allowed values are
 116      * non-negative integers. If the value is zero, no elements are generated in
 117      * the type map and there is no effect on datatype bounds or extent.
 118      */
 119     if( 0 == count ) return OPAL_SUCCESS;
 120 
 121     /* the extent should always be positive. So a negative value here have a
 122      * special meaning ie. default extent as computed by ub - lb
 123      */
 124     if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb);
 125 
 126     /* Deal with the special markers (OPAL_DATATYPE_LB and OPAL_DATATYPE_UB) */
 127     if( OPAL_DATATYPE_LB == pdtAdd->id ) {
 128         pdtBase->bdt_used |= (((uint32_t)1) << OPAL_DATATYPE_LB);
 129         if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_LB ) {
 130             pdtBase->lb = LMIN( pdtBase->lb, disp );
 131         } else {
 132             pdtBase->lb = disp;
 133             pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB;
 134         }
 135         if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) {
 136             pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS;
 137         }
 138         return OPAL_SUCCESS; /* Just ignore the OPAL_DATATYPE_LOOP and OPAL_DATATYPE_END_LOOP */
 139     } else if( OPAL_DATATYPE_UB == pdtAdd->id ) {
 140         pdtBase->bdt_used |= (((uint32_t)1) << OPAL_DATATYPE_UB);
 141         if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB ) {
 142             pdtBase->ub = LMAX( pdtBase->ub, disp );
 143         } else {
 144             pdtBase->ub = disp;
 145             pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB;
 146         }
 147         if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) {
 148             pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS;
 149         }
 150         return OPAL_SUCCESS; /* Just ignore the OPAL_DATATYPE_LOOP and OPAL_DATATYPE_END_LOOP */
 151     }
 152 
 153     /* Compute the number of entries we need in the datatype description */
 154     OPAL_DATATYPE_COMPUTE_REQUIRED_ENTRIES( pdtAdd, count, extent, place_needed );
 155 
 156     /*
 157      * Compute the lower and upper bound of the datatype. We do it in 2 steps.
 158      * First compute the lb and ub of the new datatype taking in account the
 159      * count. Then update the lb value depending on the user markers and
 160      * update the global lb and ub.
 161      */
 162     OPAL_DATATYPE_LB_UB_CONT( count, disp, pdtAdd->lb, pdtAdd->ub, extent, lb, ub );
 163 
 164     /* Compute the true_lb and true_ub for the datatype to be added, taking
 165      * in account the number of repetions. These values do not include the
 166      * potential gaps at the begining and at the end of the datatype.
 167      */
 168     true_lb = lb - (pdtAdd->lb - pdtAdd->true_lb);
 169     true_ub = ub - (pdtAdd->ub - pdtAdd->true_ub);
 170     if( true_lb > true_ub ) {
 171         old_true_ub = true_lb;
 172         true_lb = true_ub;
 173         true_ub = old_true_ub;
 174     }
 175 
 176 #if 0
 177     /* Avoid claiming overlap as much as possible. */
 178     if( !(pdtBase->flags & OPAL_DATATYPE_FLAG_OVERLAP) ) {
 179         if( ((disp + true_lb) >= pdtBase->true_ub) ||
 180             ((disp + true_ub) <= pdtBase->true_lb) ) {
 181         } else {
 182             /* potential overlap */
 183         }
 184     }
 185 #endif
 186 
 187     /* The lower bound should be inherited from the parent if and only
 188      * if the USER has explicitly set it. The result lb is the MIN between
 189      * the all lb + disp if and only if all or nobody flags's contain the LB.
 190      */
 191     if( (pdtAdd->flags ^ pdtBase->flags) & OPAL_DATATYPE_FLAG_USER_LB ) {
 192         if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_LB ) {
 193             lb = pdtBase->lb;  /* base type has a user provided lb */
 194         }
 195         pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB;
 196     } else {
 197         /* both of them have the LB flag or both of them dont have it */
 198         lb = LMIN( pdtBase->lb, lb );
 199     }
 200 
 201     /* the same apply for the upper bound except for the case where
 202      * either of them has the flag UB, in which case we should
 203      * compute the UB including the natural alignement of the data.
 204      */
 205     if( (pdtBase->flags ^ pdtAdd->flags) & OPAL_DATATYPE_FLAG_USER_UB ) {
 206         if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB ) {
 207             ub = pdtBase->ub;
 208         }
 209         pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB;
 210     } else {
 211         /* both of them have the UB flag or both of them dont have it */
 212         /* we should compute the extent depending on the alignement */
 213         ub = LMAX( pdtBase->ub, ub );
 214     }
 215     /* While the true_lb and true_ub have to be ordered to have the true_lb lower
 216      * than the true_ub, the ub and lb do not have to be ordered. They should be
 217      * as the user define them.
 218      */
 219     pdtBase->lb = lb;
 220     pdtBase->ub = ub;
 221 
 222     /* compute the new memory alignement */
 223     pdtBase->align = IMAX( pdtBase->align, pdtAdd->align );
 224 
 225     /* Now that we have the new ub and the alignment we should update the ub to match
 226      * the new alignement. We have to add an epsilon that is the least nonnegative
 227      * increment needed to roung the extent to the next multiple of the alignment.
 228      * This rule apply only if there is user specified upper bound as stated in the
 229      * MPI standard MPI 1.2 page 71.
 230      */
 231     if( !(pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB) ) {
 232         epsilon = (pdtBase->ub - pdtBase->lb) % pdtBase->align;
 233         if( 0 != epsilon ) {
 234             pdtBase->ub += (pdtBase->align - epsilon);
 235         }
 236     }
 237     /* now we know it contain some data */
 238     pdtBase->flags |= OPAL_DATATYPE_FLAG_DATA;
 239 
 240     /*
 241      * MPI Standard 3.0 Chapter 4.1: Most datatype constructors have
 242      * replication count or block length arguments. If the value is zero,
 243      * no elements are generated in the type map and there is no effect
 244      * on datatype bounds or extent.
 245      *
 246      * Therefore we support it here in the upper part of this function. As an
 247      * extension, the count set to zero can be used to reset the alignment of
 248      * the data, but not for changing the true_lb and true_ub.
 249      */
 250     if( (0 == count) || (0 == pdtAdd->size) ) {
 251         return OPAL_SUCCESS;
 252     }
 253 
 254     /* Now, once we know everything is fine and there are some bytes in
 255      * the data-type we can update the size, true_lb and true_ub.
 256      */
 257     pdtBase->size += count * pdtAdd->size;
 258     if( 0 == pdtBase->nbElems ) old_true_ub = disp;
 259     else                        old_true_ub = pdtBase->true_ub;
 260     if( 0 != pdtBase->size ) {
 261         pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb );
 262         pdtBase->true_ub = LMAX( true_ub, pdtBase->true_ub );
 263     } else {
 264         pdtBase->true_lb = true_lb;
 265         pdtBase->true_ub = true_ub;
 266     }
 267 
 268     pdtBase->bdt_used |= pdtAdd->bdt_used;
 269     newLength = pdtBase->desc.used + place_needed;
 270     if( newLength > pdtBase->desc.length ) {
 271         newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK;
 272         pdtBase->desc.desc   = (dt_elem_desc_t*)realloc( pdtBase->desc.desc,
 273                                                          sizeof(dt_elem_desc_t) * newLength );
 274         pdtBase->desc.length = newLength;
 275     }
 276     pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
 277     /* The condition to be able to use the optimized path here is to be in presence
 278      * of an predefined contiguous datatype. This part is unable to handle any
 279      * predefined non contiguous datatypes (like MPI_SHORT_INT).
 280      */
 281     if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
 282         if( NULL != pdtBase->ptypes )
 283             pdtBase->ptypes[pdtAdd->id] += count;
 284         pLast->elem.common.type      = pdtAdd->id;
 285         pLast->elem.count            = count;
 286         pLast->elem.disp             = disp;
 287         pLast->elem.extent           = extent;
 288         pdtBase->desc.used++;
 289         pLast->elem.common.flags     = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED);
 290         if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) {  /* gaps around the datatype */
 291             pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS);
 292         }
 293     } else {
 294         /* keep trace of the total number of basic datatypes in the datatype definition */
 295         pdtBase->loops += pdtAdd->loops;
 296         pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB);
 297         pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB);
 298         if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) {
 299             for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
 300                 if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
 301         }
 302         if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) &&
 303             (extent == pdtAdd->desc.desc[0].elem.extent) ){
 304             pLast->elem        = pdtAdd->desc.desc[0].elem;
 305             pLast->elem.count *= count;
 306             pLast->elem.disp  += disp;
 307             pdtBase->desc.used++;
 308         } else {
 309             /* if the extent of the datatype is the same as the extent of the loop
 310              * description of the datatype then we simply have to update the main loop.
 311              */
 312             if( count != 1 ) {
 313                 pLoop = pLast;
 314                 CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent,
 315                                    (pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) );
 316                 pdtBase->loops     += 2;
 317                 pdtBase->desc.used += 2;
 318                 pLast++;
 319             }
 320 
 321             for( i = 0; i < pdtAdd->desc.used; i++ ) {
 322                 pLast->elem               = pdtAdd->desc.desc[i].elem;
 323                 if( OPAL_DATATYPE_FLAG_DATA & pLast->elem.common.flags )
 324                     pLast->elem.disp += disp;
 325                 else if( OPAL_DATATYPE_END_LOOP == pLast->elem.common.type ) {
 326                     pLast->end_loop.first_elem_disp += disp;
 327                 }
 328                 pLast++;
 329             }
 330             pdtBase->desc.used += pdtAdd->desc.used;
 331             if( pLoop != NULL ) {
 332                 int index = GET_FIRST_NON_LOOP( pLoop );
 333                 assert( pLoop[index].elem.common.flags & OPAL_DATATYPE_FLAG_DATA );
 334                 CREATE_LOOP_END( pLast, pdtAdd->desc.used + 1, pLoop[index].elem.disp,
 335                                  pdtAdd->size, pLoop->loop.common.flags );
 336             }
 337         }
 338         /* should I add some space until the extent of this datatype ? */
 339     }
 340 
 341     /* Is the data still contiguous ?
 342      * The only way for the data to be contiguous is to have the true extent
 343      * equal to his size. In other words to avoid having internal gaps between
 344      * elements. If any of the data are overlapping then this method will not work.
 345      */
 346     localFlags = pdtBase->flags & pdtAdd->flags;
 347     UNSET_CONTIGUOUS_FLAG(pdtBase->flags);
 348     if( (localFlags & OPAL_DATATYPE_FLAG_CONTIGUOUS)             /* both type were contiguous */
 349         && ((disp + pdtAdd->true_lb) == old_true_ub)  /* and there is no gap between them */
 350         && ( ((ptrdiff_t)pdtAdd->size == extent)      /* the size and the extent of the
 351                                                        * added type have to match */
 352              || (count < 2)) ) {                      /* if the count is bigger than 2 */
 353             SET_CONTIGUOUS_FLAG(pdtBase->flags);
 354             if( (ptrdiff_t)pdtBase->size == (pdtBase->ub - pdtBase->lb) )
 355                 SET_NO_GAP_FLAG(pdtBase->flags);
 356     }
 357 
 358     /* If the NO_GAP flag is set the contiguous have to be set too */
 359     if( pdtBase->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) {
 360         assert( pdtBase->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS );
 361     }
 362     pdtBase->nbElems += (count * pdtAdd->nbElems);
 363 
 364     return OPAL_SUCCESS;
 365 }

/* [<][>][^][v][top][bottom][index][help] */