root/opal/datatype/opal_convertor.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. opal_convertor_get_checksum
  2. opal_convertor_cleanup
  3. opal_convertor_need_buffers
  4. opal_convertor_get_packed_size
  5. opal_convertor_get_unpacked_size
  6. opal_convertor_get_current_pointer
  7. opal_convertor_get_offset_pointer
  8. opal_convertor_copy_and_prepare_for_send
  9. opal_convertor_copy_and_prepare_for_recv
  10. opal_convertor_set_position
  11. opal_convertor_personalize
  12. opal_convertor_clone_with_position

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2017 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2006 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
  14  * Copyright (c) 2014      NVIDIA Corporation.  All rights reserved.
  15  * Copyright (c) 2017-2018 Research Organization for Information Science
  16  *                         and Technology (RIST).  All rights reserved.
  17  * Copyright (c) 2017      Intel, Inc. All rights reserved
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 #ifndef OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED
  26 #define OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED
  27 
  28 #include "opal_config.h"
  29 
  30 #ifdef HAVE_SYS_UIO_H
  31 #include <sys/uio.h>
  32 #endif
  33 
  34 #include "opal/constants.h"
  35 #include "opal/datatype/opal_datatype.h"
  36 #include "opal/prefetch.h"
  37 
  38 BEGIN_C_DECLS
  39 /*
  40  * CONVERTOR SECTION
  41  */
  42 /* keep the last 16 bits free for data flags */
  43 #define CONVERTOR_DATATYPE_MASK    0x0000FFFF
  44 #define CONVERTOR_SEND_CONVERSION  0x00010000
  45 #define CONVERTOR_RECV             0x00020000
  46 #define CONVERTOR_SEND             0x00040000
  47 #define CONVERTOR_HOMOGENEOUS      0x00080000
  48 #define CONVERTOR_NO_OP            0x00100000
  49 #define CONVERTOR_WITH_CHECKSUM    0x00200000
  50 #define CONVERTOR_CUDA             0x00400000
  51 #define CONVERTOR_CUDA_ASYNC       0x00800000
  52 #define CONVERTOR_TYPE_MASK        0x10FF0000
  53 #define CONVERTOR_STATE_START      0x01000000
  54 #define CONVERTOR_STATE_COMPLETE   0x02000000
  55 #define CONVERTOR_STATE_ALLOC      0x04000000
  56 #define CONVERTOR_COMPLETED        0x08000000
  57 #define CONVERTOR_CUDA_UNIFIED     0x10000000
  58 #define CONVERTOR_HAS_REMOTE_SIZE  0x20000000
  59 #define CONVERTOR_SKIP_CUDA_INIT   0x40000000
  60 
  61 union dt_elem_desc;
  62 typedef struct opal_convertor_t opal_convertor_t;
  63 
  64 typedef int32_t (*convertor_advance_fct_t)( opal_convertor_t* pConvertor,
  65                                             struct iovec* iov,
  66                                             uint32_t* out_size,
  67                                             size_t* max_data );
  68 typedef void*(*memalloc_fct_t)( size_t* pLength, void* userdata );
  69 typedef void*(*memcpy_fct_t)( void* dest, const void* src, size_t n, opal_convertor_t* pConvertor );
  70 
  71 /* The master convertor struct (defined in convertor_internal.h) */
  72 struct opal_convertor_master_t;
  73 
  74 struct dt_stack_t {
  75     int32_t           index;    /**< index in the element description */
  76     int16_t           type;     /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
  77     int16_t           padding;
  78     size_t            count;    /**< number of times we still have to do it */
  79     ptrdiff_t         disp;     /**< actual displacement depending on the count field */
  80 };
  81 typedef struct dt_stack_t dt_stack_t;
  82 
  83 /**
  84  *
  85  */
  86 #define DT_STATIC_STACK_SIZE   5                /**< This should be sufficient for most applications */
  87 
  88 struct opal_convertor_t {
  89     opal_object_t                 super;          /**< basic superclass */
  90     uint32_t                      remoteArch;     /**< the remote architecture */
  91     uint32_t                      flags;          /**< the properties of this convertor */
  92     size_t                        local_size;     /**< overall length data on local machine, compared to bConverted */
  93     size_t                        remote_size;    /**< overall length data on remote machine, compared to bConverted */
  94     const opal_datatype_t*        pDesc;          /**< the datatype description associated with the convertor */
  95     const dt_type_desc_t*         use_desc;       /**< the version used by the convertor (normal or optimized) */
  96     opal_datatype_count_t         count;          /**< the total number of full datatype elements */
  97 
  98     /* --- cacheline boundary (64 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
  99     uint32_t                      stack_size;     /**< size of the allocated stack */
 100     unsigned char*                pBaseBuf;       /**< initial buffer as supplied by the user */
 101     dt_stack_t*                   pStack;         /**< the local stack for the actual conversion */
 102     convertor_advance_fct_t       fAdvance;       /**< pointer to the pack/unpack functions */
 103 
 104     /* --- cacheline boundary (96 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
 105     struct opal_convertor_master_t* master;       /**< the master convertor */
 106 
 107     /* All others fields get modified for every call to pack/unpack functions */
 108     uint32_t                      stack_pos;      /**< the actual position on the stack */
 109     size_t                        partial_length; /**< amount of data left over from the last unpack */
 110     size_t                        bConverted;     /**< # of bytes already converted */
 111 
 112     /* --- cacheline boundary (128 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
 113     uint32_t                      checksum;       /**< checksum computed by pack/unpack operation */
 114     uint32_t                      csum_ui1;       /**< partial checksum computed by pack/unpack operation */
 115     size_t                        csum_ui2;       /**< partial checksum computed by pack/unpack operation */
 116 
 117     /* --- fields are no more aligned on cacheline --- */
 118     dt_stack_t                    static_stack[DT_STATIC_STACK_SIZE];  /**< local stack for small datatypes */
 119 
 120 #if OPAL_CUDA_SUPPORT
 121     memcpy_fct_t                  cbmemcpy;       /**< memcpy or cuMemcpy */
 122     void *                        stream;         /**< CUstream for async copy */
 123 #endif
 124 };
 125 OPAL_DECLSPEC OBJ_CLASS_DECLARATION( opal_convertor_t );
 126 
 127 
 128 /*
 129  *
 130  */
 131 static inline uint32_t opal_convertor_get_checksum( opal_convertor_t* convertor )
 132 {
 133     return convertor->checksum;
 134 }
 135 
 136 
 137 /*
 138  *
 139  */
 140 OPAL_DECLSPEC int32_t opal_convertor_pack( opal_convertor_t* pConv, struct iovec* iov,
 141                                            uint32_t* out_size, size_t* max_data );
 142 
 143 /*
 144  *
 145  */
 146 OPAL_DECLSPEC int32_t opal_convertor_unpack( opal_convertor_t* pConv, struct iovec* iov,
 147                                              uint32_t* out_size, size_t* max_data );
 148 
 149 /*
 150  *
 151  */
 152 OPAL_DECLSPEC opal_convertor_t* opal_convertor_create( int32_t remote_arch, int32_t mode );
 153 
 154 
 155 /**
 156  * The cleanup function will put the convertor in exactly the same state as after a call
 157  * to opal_convertor_construct. Therefore, all PML can call OBJ_DESTRUCT on the request's
 158  * convertors without having to call OBJ_CONSTRUCT everytime they grab a new one from the
 159  * cache. The OBJ_CONSTRUCT on the convertor should be called only on the first creation
 160  * of a request (not when extracted from the cache).
 161  */
 162 static inline int opal_convertor_cleanup( opal_convertor_t* convertor )
 163 {
 164     if( OPAL_UNLIKELY(convertor->stack_size > DT_STATIC_STACK_SIZE) ) {
 165         free( convertor->pStack );
 166         convertor->pStack     = convertor->static_stack;
 167         convertor->stack_size = DT_STATIC_STACK_SIZE;
 168     }
 169     convertor->pDesc     = NULL;
 170     convertor->stack_pos = 0;
 171     convertor->flags     = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
 172 
 173     return OPAL_SUCCESS;
 174 }
 175 
 176 
 177 /**
 178  * Return:   0 if no packing is required for sending (the upper layer
 179  *             can use directly the pointer to the contiguous user
 180  *             buffer).
 181  *           1 if data does need to be packed, i.e. heterogeneous peers
 182  *             (source arch != dest arch) or non contiguous memory
 183  *             layout.
 184  */
 185 static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConvertor )
 186 {
 187     if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
 188 #if OPAL_CUDA_SUPPORT
 189     if( pConvertor->flags & (CONVERTOR_CUDA | CONVERTOR_CUDA_UNIFIED)) return 1;
 190 #endif
 191     if( pConvertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) return 0;
 192     if( (pConvertor->count == 1) && (pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;
 193     return 1;
 194 }
 195 
 196 /**
 197  * Update the size of the remote datatype representation. The size will
 198  * depend on the configuration of the master convertor. In homogeneous
 199  * environments, the local and remote sizes are identical.
 200  */
 201 size_t
 202 opal_convertor_compute_remote_size( opal_convertor_t* pConv );
 203 
 204 /**
 205  * Return the local size of the convertor (count times the size of the datatype).
 206  */
 207 static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv,
 208                                                    size_t* pSize )
 209 {
 210     *pSize = pConv->local_size;
 211 }
 212 
 213 
 214 /**
 215  * Return the remote size of the convertor (count times the remote size of the
 216  * datatype). On homogeneous environments the local and remote sizes are
 217  * identical.
 218  */
 219 static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv,
 220                                                      size_t* pSize )
 221 {
 222     if( pConv->flags & CONVERTOR_HOMOGENEOUS ) {
 223         *pSize = pConv->local_size;
 224         return;
 225     }
 226     if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) {
 227         assert(! (pConv->flags & CONVERTOR_SEND));
 228         opal_convertor_compute_remote_size( (opal_convertor_t*)pConv);
 229     }
 230     *pSize = pConv->remote_size;
 231 }
 232 
 233 /**
 234  * Return the current absolute position of the next pack/unpack. This function is
 235  * mostly useful for contiguous datatypes, when we need to get the pointer to the
 236  * contiguous piece of memory.
 237  */
 238 static inline void opal_convertor_get_current_pointer( const opal_convertor_t* pConv,
 239                                                        void** position )
 240 {
 241     unsigned char* base = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb;
 242     *position = (void*)base;
 243 }
 244 
 245 static inline void opal_convertor_get_offset_pointer( const opal_convertor_t* pConv,
 246                                                       size_t offset, void** position )
 247 {
 248     unsigned char* base = pConv->pBaseBuf + offset + pConv->pDesc->true_lb;
 249     *position = (void*)base;
 250 }
 251 
 252 
 253 /*
 254  *
 255  */
 256 OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
 257                                                        const struct opal_datatype_t* datatype,
 258                                                        size_t count,
 259                                                        const void* pUserBuf);
 260 
 261 static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv,
 262                                                                 const struct opal_datatype_t* datatype,
 263                                                                 size_t count,
 264                                                                 const void* pUserBuf,
 265                                                                 int32_t flags,
 266                                                                 opal_convertor_t* convertor )
 267 {
 268     convertor->remoteArch = pSrcConv->remoteArch;
 269     convertor->flags      = pSrcConv->flags | flags;
 270     convertor->master     = pSrcConv->master;
 271 
 272     return opal_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
 273 }
 274 
 275 /*
 276  *
 277  */
 278 OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
 279                                                        const struct opal_datatype_t* datatype,
 280                                                        size_t count,
 281                                                        const void* pUserBuf );
 282 static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv,
 283                                                                 const struct opal_datatype_t* datatype,
 284                                                                 size_t count,
 285                                                                 const void* pUserBuf,
 286                                                                 int32_t flags,
 287                                                                 opal_convertor_t* convertor )
 288 {
 289     convertor->remoteArch = pSrcConv->remoteArch;
 290     convertor->flags      = (pSrcConv->flags | flags);
 291     convertor->master     = pSrcConv->master;
 292 
 293     return opal_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
 294 }
 295 
 296 /*
 297  * Give access to the raw memory layout based on the datatype.
 298  */
 299 OPAL_DECLSPEC int32_t
 300 opal_convertor_raw( opal_convertor_t* convertor,  /* [IN/OUT] */
 301                     struct iovec* iov,            /* [IN/OUT] */
 302                     uint32_t* iov_count,          /* [IN/OUT] */
 303                     size_t* length );             /* [OUT]    */
 304 
 305 
 306 /*
 307  * Upper level does not need to call the _nocheck function directly.
 308  */
 309 OPAL_DECLSPEC int32_t
 310 opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
 311                                      size_t* position );
 312 static inline int32_t
 313 opal_convertor_set_position( opal_convertor_t* convertor,
 314                              size_t* position )
 315 {
 316     /*
 317      * Do not allow the convertor to go outside the data boundaries. This test include
 318      * the check for datatype with size zero as well as for convertors with a count of zero.
 319      */
 320     if( OPAL_UNLIKELY(convertor->local_size <= *position) ) {
 321         convertor->flags |= CONVERTOR_COMPLETED;
 322         convertor->bConverted = convertor->local_size;
 323         *position = convertor->bConverted;
 324         return OPAL_SUCCESS;
 325     }
 326 
 327     /*
 328      * If the convertor is already at the correct position we are happy.
 329      */
 330     if( OPAL_LIKELY((*position) == convertor->bConverted) ) return OPAL_SUCCESS;
 331 
 332     /* Remove the completed flag if it's already set */
 333     convertor->flags &= ~CONVERTOR_COMPLETED;
 334 
 335     if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) &&
 336         (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
 337         (convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) {
 338         /* Contiguous and no checkpoint and no homogeneous unpack */
 339         convertor->bConverted = *position;
 340         return OPAL_SUCCESS;
 341     }
 342 
 343     return opal_convertor_set_position_nocheck( convertor, position );
 344 }
 345 
 346 /*
 347  *
 348  */
 349 static inline int32_t
 350 opal_convertor_personalize( opal_convertor_t* convertor,
 351                             uint32_t flags,
 352                             size_t* position )
 353 {
 354     convertor->flags |= flags;
 355 
 356     if( OPAL_UNLIKELY(NULL == position) )
 357         return OPAL_SUCCESS;
 358     return opal_convertor_set_position( convertor, position );
 359 }
 360 
 361 /*
 362  *
 363  */
 364 OPAL_DECLSPEC int
 365 opal_convertor_clone( const opal_convertor_t* source,
 366                       opal_convertor_t* destination,
 367                       int32_t copy_stack );
 368 
 369 static inline int
 370 opal_convertor_clone_with_position( const opal_convertor_t* source,
 371                                     opal_convertor_t* destination,
 372                                     int32_t copy_stack,
 373                                     size_t* position )
 374 {
 375     (void)opal_convertor_clone( source, destination, copy_stack );
 376     return opal_convertor_set_position( destination, position );
 377 }
 378 
 379 /*
 380  *
 381  */
 382 OPAL_DECLSPEC void
 383 opal_convertor_dump( opal_convertor_t* convertor );
 384 
 385 OPAL_DECLSPEC void
 386 opal_datatype_dump_stack( const dt_stack_t* pStack,
 387                           int stack_pos,
 388                           const union dt_elem_desc* pDesc,
 389                           const char* name );
 390 
 391 /*
 392  *
 393  */
 394 OPAL_DECLSPEC int
 395 opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
 396                                         size_t* position );
 397 
 398 END_C_DECLS
 399 
 400 #endif  /* OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED */

/* [<][>][^][v][top][bottom][index][help] */