root/ompi/mca/common/ompio/common_ompio_file_view.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. datatype_duplicate
  2. mca_common_ompio_set_view
  3. get_contiguous_chunk_size

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2017 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2008-2019 University of Houston. All rights reserved.
  13  * Copyright (c) 2017-2018 Research Organization for Information Science
  14  *                         and Technology (RIST). All rights reserved.
  15  * Copyright (c) 2017      IBM Corporation. All rights reserved.
  16  * $COPYRIGHT$
  17  *
  18  * Additional copyrights may follow
  19  *
  20  * $HEADER$
  21  */
  22 
  23 #include "ompi_config.h"
  24 #include "opal/datatype/opal_convertor.h"
  25 #include "ompi/datatype/ompi_datatype.h"
  26 #include <stdlib.h>
  27 #include <stdio.h>
  28 
  29 #include "common_ompio.h"
  30 #include "common_ompio_aggregators.h"
  31 #include "ompi/mca/fcoll/base/base.h"
  32 #include "ompi/mca/topo/topo.h"
  33 
  34 static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (ompio_file_t *, int flag);
  35 static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype );
  36 static int datatype_duplicate  (ompi_datatype_t *oldtype, ompi_datatype_t **newtype )
  37 {
  38     ompi_datatype_t *type;
  39     if( ompi_datatype_is_predefined(oldtype) ) {
  40         OBJ_RETAIN(oldtype);
  41         *newtype = oldtype;
  42         return OMPI_SUCCESS;
  43     }
  44 
  45     if ( OMPI_SUCCESS != ompi_datatype_duplicate (oldtype, &type)){
  46         ompi_datatype_destroy (&type);
  47         return MPI_ERR_INTERN;
  48     }
  49     
  50     ompi_datatype_set_args( type, 0, NULL, 0, NULL, 1, &oldtype, MPI_COMBINER_DUP );
  51 
  52     *newtype = type;
  53     return OMPI_SUCCESS;
  54 }
  55 
  56 
  57 int mca_common_ompio_set_view (ompio_file_t *fh,
  58                                OMPI_MPI_OFFSET_TYPE disp,
  59                                ompi_datatype_t *etype,
  60                                ompi_datatype_t *filetype,
  61                                const char *datarep,
  62                                opal_info_t *info)
  63 {
  64     int ret=OMPI_SUCCESS;
  65     size_t max_data = 0;
  66     int i, flag;
  67     int num_groups = 0;
  68     int num_cb_nodes=-1;
  69     mca_common_ompio_contg *contg_groups=NULL;
  70 
  71     size_t ftype_size;
  72     ptrdiff_t ftype_extent, lb, ub;
  73     ompi_datatype_t *newfiletype;
  74 
  75     if ( NULL != fh->f_etype ) {
  76         ompi_datatype_destroy (&fh->f_etype);
  77     }
  78     if ( NULL != fh->f_filetype ) {
  79         ompi_datatype_destroy (&fh->f_filetype);
  80     }
  81     if ( NULL != fh->f_orig_filetype ) {
  82         ompi_datatype_destroy (&fh->f_orig_filetype);
  83     }
  84     if (NULL != fh->f_decoded_iov) {
  85         free (fh->f_decoded_iov);
  86         fh->f_decoded_iov = NULL;
  87     }
  88 
  89     if (NULL != fh->f_datarep) {
  90         free (fh->f_datarep);
  91         fh->f_datarep = NULL;
  92     }
  93 
  94     if (NULL != fh->f_file_convertor) {
  95         opal_convertor_cleanup (fh->f_file_convertor);
  96         free (fh->f_file_convertor);
  97         fh->f_file_convertor = NULL;
  98     }
  99     
 100     /* Reset the flags first */
 101     if ( fh->f_flags & OMPIO_CONTIGUOUS_FVIEW ) {
 102         fh->f_flags &= ~OMPIO_CONTIGUOUS_FVIEW;
 103     }
 104     if ( fh->f_flags & OMPIO_UNIFORM_FVIEW ) {
 105         fh->f_flags &= ~OMPIO_UNIFORM_FVIEW;
 106     }
 107     if ( fh->f_flags & OMPIO_DATAREP_NATIVE ) {
 108         fh->f_flags &= ~OMPIO_DATAREP_NATIVE;
 109     }
 110     fh->f_datarep = strdup (datarep);
 111 
 112     if ( !(strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
 113         fh->f_file_convertor = malloc ( sizeof(struct opal_convertor_t) );
 114         if ( NULL == fh->f_file_convertor ) {
 115             return OMPI_ERR_OUT_OF_RESOURCE;
 116         }
 117         opal_convertor_clone (ompi_mpi_external32_convertor, fh->f_file_convertor, 0);
 118     }
 119     else {
 120         fh->f_file_convertor = opal_convertor_create (opal_local_arch, 0);
 121         fh->f_flags |= OMPIO_DATAREP_NATIVE;
 122     }
 123     
 124     datatype_duplicate (filetype, &fh->f_orig_filetype );
 125     opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent);
 126     opal_datatype_type_size (&filetype->super, &ftype_size);
 127 
 128     if ( etype == filetype                             &&
 129          ompi_datatype_is_predefined (filetype )       &&
 130          ftype_extent == (ptrdiff_t)ftype_size ){
 131         ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE,
 132                                         &ompi_mpi_byte.dt,
 133                                         &newfiletype);
 134         ompi_datatype_commit (&newfiletype);
 135     }
 136     else {
 137         newfiletype = filetype;
 138         fh->f_flags |= OMPIO_FILE_VIEW_IS_SET;
 139     }
 140 
 141     fh->f_iov_count   = 0;
 142     fh->f_disp        = disp;
 143     fh->f_offset      = disp;
 144     fh->f_total_bytes = 0;
 145     fh->f_index_in_file_view=0;
 146     fh->f_position_in_file_view=0;
 147 
 148     mca_common_ompio_decode_datatype (fh,
 149                                       newfiletype,
 150                                       1,
 151                                       NULL,
 152                                       &max_data,
 153                                       fh->f_file_convertor,
 154                                       &fh->f_decoded_iov,
 155                                       &fh->f_iov_count);
 156 
 157     opal_datatype_get_extent(&newfiletype->super, &lb, &fh->f_view_extent);
 158     opal_datatype_type_ub   (&newfiletype->super, &ub);
 159     opal_datatype_type_size (&etype->super, &fh->f_etype_size);
 160     opal_datatype_type_size (&newfiletype->super, &fh->f_view_size);
 161     datatype_duplicate (etype, &fh->f_etype);
 162     // This file type is our own representation. The original is stored
 163     // in orig_file type, No need to set args on this one.
 164     ompi_datatype_duplicate (newfiletype, &fh->f_filetype);
 165 
 166     if ( (fh->f_view_size % fh->f_etype_size) ) {
 167         // File view is not a multiple of the etype.
 168         return MPI_ERR_ARG;
 169     }
 170 
 171     if( SIMPLE_PLUS == OMPIO_MCA_GET(fh, grouping_option) ) {
 172         fh->f_cc_size = get_contiguous_chunk_size (fh, 1);
 173     }
 174     else {
 175         fh->f_cc_size = get_contiguous_chunk_size (fh, 0);
 176     }
 177 
 178     if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) {
 179         if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) &&
 180             fh->f_view_extent == (ptrdiff_t)fh->f_view_size ) {
 181             fh->f_flags |= OMPIO_CONTIGUOUS_FVIEW;
 182         }
 183     }
 184 
 185     contg_groups = (mca_common_ompio_contg*) calloc ( 1, fh->f_size * sizeof(mca_common_ompio_contg));
 186     if (NULL == contg_groups) {
 187         opal_output (1, "OUT OF MEMORY\n");
 188         return OMPI_ERR_OUT_OF_RESOURCE;
 189     }
 190     for( i = 0; i < fh->f_size; i++){
 191        contg_groups[i].procs_in_contg_group = (int*)calloc (1,fh->f_size * sizeof(int));
 192        if(NULL == contg_groups[i].procs_in_contg_group){
 193           int j;
 194           opal_output (1, "OUT OF MEMORY\n");
 195           for(j=0; j<i; j++) {
 196               free(contg_groups[j].procs_in_contg_group);
 197           }
 198           free(contg_groups);
 199           return OMPI_ERR_OUT_OF_RESOURCE;
 200        }
 201     }
 202 
 203     char char_stripe[MPI_MAX_INFO_VAL];
 204     /* Check the info object set during File_open */
 205     opal_info_get (fh->f_info, "cb_nodes", MPI_MAX_INFO_VAL, char_stripe, &flag);
 206     if ( flag ) {
 207         sscanf ( char_stripe, "%d", &num_cb_nodes );
 208         OMPIO_MCA_PRINT_INFO(fh, "cb_nodes", char_stripe, "");
 209     }
 210     else {
 211         /* Check the info object set during file_set_view */
 212         opal_info_get (info, "cb_nodes", MPI_MAX_INFO_VAL, char_stripe, &flag);
 213         if ( flag ) {
 214             sscanf ( char_stripe, "%d", &num_cb_nodes );
 215             OMPIO_MCA_PRINT_INFO(fh, "cb_nodes", char_stripe, "");
 216         }
 217     }
 218         
 219 
 220     if ( -1 != OMPIO_MCA_GET(fh, num_aggregators) || -1 != num_cb_nodes) {
 221         /* The user requested a particular number of aggregators */
 222         num_groups = OMPIO_MCA_GET(fh, num_aggregators);                                       
 223         if ( -1 != num_cb_nodes ) {
 224             /* A hint through an  MPI Info object trumps an mca parameter value */
 225             num_groups = num_cb_nodes;
 226         }
 227         if ( num_groups > fh->f_size ) {
 228             num_groups = fh->f_size;
 229         }
 230         mca_common_ompio_forced_grouping ( fh, num_groups, contg_groups);
 231     }
 232     else {
 233         if ( SIMPLE != OMPIO_MCA_GET(fh, grouping_option) && 
 234              SIMPLE_PLUS != OMPIO_MCA_GET(fh, grouping_option) ) {
 235             ret = mca_common_ompio_fview_based_grouping(fh,
 236                                                         &num_groups,
 237                                                         contg_groups);
 238             if ( OMPI_SUCCESS != ret ) {
 239                 opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_fview_based_grouping failed\n");
 240                 goto exit;
 241             }
 242         }
 243         else {
 244             int done=0;
 245             int ndims;
 246             
 247             if ( fh->f_comm->c_flags & OMPI_COMM_CART ){
 248                 ret = fh->f_comm->c_topo->topo.cart.cartdim_get( fh->f_comm, &ndims);
 249                 if ( OMPI_SUCCESS != ret ){
 250                     goto exit;
 251                 }
 252                 if ( ndims > 1 ) { 
 253                     ret = mca_common_ompio_cart_based_grouping( fh, 
 254                                                                 &num_groups, 
 255                                                                 contg_groups);
 256                     if (OMPI_SUCCESS != ret ) {
 257                         opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_cart_based_grouping failed\n");
 258                         goto exit;
 259                     }
 260                     done=1;
 261                 }
 262             }
 263             
 264             if ( !done ) {
 265                 ret = mca_common_ompio_simple_grouping(fh,
 266                                                        &num_groups,
 267                                                        contg_groups);
 268                 if ( OMPI_SUCCESS != ret ){
 269                     opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_simple_grouping failed\n");
 270                     goto exit;
 271                 }
 272             }
 273         }
 274     }
 275 #ifdef DEBUG_OMPIO
 276     if ( fh->f_rank == 0) {
 277         int ii, jj;
 278         printf("BEFORE finalize_init: comm size = %d num_groups = %d\n", fh->f_size, num_groups);
 279         for ( ii=0; ii< num_groups; ii++ ) {
 280             printf("contg_groups[%d].procs_per_contg_group=%d\n", ii, contg_groups[ii].procs_per_contg_group); 
 281             printf("contg_groups[%d].procs_in_contg_group.[", ii);
 282 
 283             for ( jj=0; jj< contg_groups[ii].procs_per_contg_group; jj++ ) {
 284                 printf("%d,", contg_groups[ii].procs_in_contg_group[jj]);
 285             }
 286             printf("]\n");
 287         }
 288     }
 289 #endif
 290 
 291     ret = mca_common_ompio_finalize_initial_grouping(fh,
 292                                                      num_groups,
 293                                                      contg_groups);
 294     if ( OMPI_SUCCESS != ret ) {
 295         opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_finalize_initial_grouping failed\n");
 296         goto exit;
 297     }
 298 
 299     if ( etype == filetype                              &&
 300          ompi_datatype_is_predefined (filetype )        &&
 301          ftype_extent == (ptrdiff_t)ftype_size ){
 302         ompi_datatype_destroy ( &newfiletype );
 303     }
 304 
 305     bool info_is_set=false;
 306     opal_info_get (fh->f_info, "collective_buffering", MPI_MAX_INFO_VAL, char_stripe, &flag);
 307     if ( flag ) {
 308         if ( strncmp ( char_stripe, "false", sizeof("true") )){
 309             info_is_set = true;
 310             OMPIO_MCA_PRINT_INFO(fh, "collective_buffering", char_stripe, "enforcing using individual fcoll component");
 311         } else {
 312             OMPIO_MCA_PRINT_INFO(fh, "collective_buffering", char_stripe, "");
 313         }
 314     } else {
 315         opal_info_get (info, "collective_buffering", MPI_MAX_INFO_VAL, char_stripe, &flag);
 316         if ( flag ) {
 317             if ( strncmp ( char_stripe, "false", sizeof("true") )){
 318                 info_is_set = true;
 319                 OMPIO_MCA_PRINT_INFO(fh, "collective_buffering", char_stripe, "enforcing using individual fcoll component");
 320             } else {
 321                 OMPIO_MCA_PRINT_INFO(fh, "collective_buffering", char_stripe, "");
 322             }
 323         }
 324     }
 325 
 326     mca_fcoll_base_component_t *preferred =NULL;
 327     if ( info_is_set ) {
 328         /* user requested using an info object to disable collective buffering. */
 329         preferred = mca_fcoll_base_component_lookup ("individual");
 330     }
 331     ret = mca_fcoll_base_file_select (fh, (mca_base_component_t *)preferred);
 332     if ( OMPI_SUCCESS != ret ) {
 333         opal_output(1, "mca_common_ompio_set_view: mca_fcoll_base_file_select() failed\n");
 334         goto exit;
 335     }
 336 
 337 
 338     if ( NULL != fh->f_sharedfp ) {
 339         ret = fh->f_sharedfp->sharedfp_seek( fh, 0, MPI_SEEK_SET);
 340     }
 341 
 342 exit:
 343     for( i = 0; i < fh->f_size; i++){
 344        free(contg_groups[i].procs_in_contg_group);
 345     }
 346     free(contg_groups);
 347 
 348     return ret;
 349 }
 350 
 351 OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (ompio_file_t *fh, int flag)
 352 {
 353     int uniform = 0;
 354     OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0};
 355     OMPI_MPI_OFFSET_TYPE global_avg[3] = {0,0,0};
 356     int i = 0;
 357 
 358     /* This function does two things: first, it determines the average data chunk
 359     ** size in the file view for each process and across all processes.
 360     ** Second, it establishes whether the view across all processes is uniform.
 361     ** By definition, uniform means:
 362     ** 1. the file view of each process has the same number of contiguous sections
 363     ** 2. each section in the file view has exactly the same size
 364     */
 365 
 366     if ( flag  ) {
 367         global_avg[0] = MCA_IO_DEFAULT_FILE_VIEW_SIZE;
 368     }
 369     else {
 370         for (i=0 ; i<(int)fh->f_iov_count ; i++) {
 371             avg[0] += fh->f_decoded_iov[i].iov_len;
 372             if (i && 0 == uniform) {
 373                 if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) {
 374                     uniform = 1;
 375                 }
 376             }
 377         }
 378         if ( 0 != fh->f_iov_count ) {
 379             avg[0] = avg[0]/fh->f_iov_count;
 380         }
 381         avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count;
 382         avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform;
 383         
 384         fh->f_comm->c_coll->coll_allreduce (avg,
 385                                             global_avg,
 386                                             3,
 387                                             OMPI_OFFSET_DATATYPE,
 388                                             MPI_SUM,
 389                                             fh->f_comm,
 390                                             fh->f_comm->c_coll->coll_allreduce_module);
 391         global_avg[0] = global_avg[0]/fh->f_size;
 392         global_avg[1] = global_avg[1]/fh->f_size;
 393         
 394 #if 0 
 395         /* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */
 396         int global_uniform=0;
 397         
 398         if ( global_avg[0] == avg[0] &&
 399              global_avg[1] == avg[1] &&
 400              0 == avg[2]             &&
 401              0 == global_avg[2] ) {
 402             uniform = 0;
 403         }
 404         else {
 405             uniform = 1;
 406         }
 407         
 408         /* second confirmation round to see whether all processes agree
 409         ** on having a uniform file view or not
 410         */
 411         fh->f_comm->c_coll->coll_allreduce (&uniform,
 412                                             &global_uniform,
 413                                             1,
 414                                             MPI_INT,
 415                                             MPI_MAX,
 416                                             fh->f_comm,
 417                                             fh->f_comm->c_coll->coll_allreduce_module);
 418         
 419         if ( 0 == global_uniform  ){
 420             /* yes, everybody agrees on having a uniform file view */
 421             fh->f_flags |= OMPIO_UNIFORM_FVIEW;
 422         }
 423 #endif
 424     }
 425 
 426     return global_avg[0];
 427 }
 428 
 429 

/* [<][>][^][v][top][bottom][index][help] */