root/ompi/mca/io/romio321/romio/adio/common/ad_opencoll.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ADIOI_GEN_OpenColl

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /* 
   3  *
   4  *   Copyright (C) 2007 UChicago/Argonne LLC
   5  *   See COPYRIGHT notice in top-level directory.
   6  */
   7 
   8 #include "adio.h"
   9 
  10 /* Generic version of a "collective open".  Assumes a "real" underlying
  11  * file system (meaning no wonky consistency semantics like NFS).
  12  *
  13  * optimization: by having just one process create a file, close it,
  14  * then have all N processes open it, we can possibly avoid contention
  15  * for write locks on a directory for some file systems.  
  16  *
  17  * Happy side-effect: exclusive create (error if file already exists)
  18  * just falls out 
  19  *
  20  * Note: this is not a "scalable open" (c.f. "The impact of file systems
  21  * on MPI-IO scalability").  
  22  */
  23      
  24 void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, 
  25         int access_mode, int *error_code)
  26 {
  27     int orig_amode_excl, orig_amode_wronly;
  28     MPI_Comm tmp_comm;
  29 
  30     orig_amode_excl = access_mode;
  31 
  32     if (access_mode & ADIO_CREATE ){
  33        if(rank == fd->hints->ranklist[0]) {
  34            /* remove delete_on_close flag if set */
  35            if (access_mode & ADIO_DELETE_ON_CLOSE)
  36                fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE;
  37            else 
  38                fd->access_mode = access_mode;
  39                
  40            tmp_comm = fd->comm;
  41            fd->comm = MPI_COMM_SELF;
  42            (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
  43            fd->comm = tmp_comm;
  44            MPI_Bcast(error_code, 1, MPI_INT, \
  45                      fd->hints->ranklist[0], fd->comm);
  46            /* if no error, close the file and reopen normally below */
  47            if (*error_code == MPI_SUCCESS) 
  48                (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
  49 
  50            fd->access_mode = access_mode; /* back to original */
  51        }
  52        else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
  53 
  54        if (*error_code != MPI_SUCCESS) {
  55            return;
  56        } 
  57        else {
  58            /* turn off CREAT (and EXCL if set) for real multi-processor open */
  59            access_mode ^= ADIO_CREATE; 
  60            if (access_mode & ADIO_EXCL)
  61                    access_mode ^= ADIO_EXCL;
  62        }
  63     }
  64     fd->blksize = 1024*1024*4; /* this large default value should be good for
  65                                  most file systems.  any ROMIO driver is free
  66                                  to stat the file and find an optimial value */
  67 
  68     /* if we are doing deferred open, non-aggregators should return now */
  69     if (fd->hints->deferred_open ) {
  70         if (!(fd->is_agg)) {
  71             /* we might have turned off EXCL for the aggregators.
  72              * restore access_mode that non-aggregators get the right
  73              * value from get_amode */
  74             fd->access_mode = orig_amode_excl;
  75             /* In file-system specific open, a driver might collect some
  76              * information via stat().  Deferred open means not every process
  77              * participates in fs-specific open, but they all participate in
  78              * this open call.  Broadcast a bit of information in case
  79              * lower-level file system driver (e.g. 'bluegene') collected it
  80              * (not all do)*/
  81             MPI_Bcast(&(fd->blksize), 1, MPI_LONG, fd->hints->ranklist[0], fd->comm);
  82             *error_code = MPI_SUCCESS;
  83             ADIOI_Assert(fd->blksize > 0);
  84             return;
  85         }
  86     }
  87 
  88 /* For writing with data sieving, a read-modify-write is needed. If 
  89    the file is opened for write_only, the read will fail. Therefore,
  90    if write_only, open the file as read_write, but record it as write_only
  91    in fd, so that get_amode returns the right answer. */
  92 
  93     /* observation from David Knaak: file systems that do not support data
  94      * sieving do not need to change the mode */
  95 
  96     orig_amode_wronly = access_mode;
  97     if ( (access_mode & ADIO_WRONLY) &&
  98             ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) ) {
  99         access_mode = access_mode ^ ADIO_WRONLY;
 100         access_mode = access_mode | ADIO_RDWR;
 101     }
 102     fd->access_mode = access_mode;
 103 
 104     (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
 105 
 106     /* if error, may be it was due to the change in amode above. 
 107        therefore, reopen with access mode provided by the user.*/ 
 108     fd->access_mode = orig_amode_wronly;  
 109     if (*error_code != MPI_SUCCESS) 
 110         (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
 111 
 112     /* if we turned off EXCL earlier, then we should turn it back on */
 113     if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;
 114 
 115     /* broadcast a bit of information (blocksize for now) to all proceses in
 116      * communicator, not just those who participated in open */
 117     MPI_Bcast(&(fd->blksize), 1, MPI_LONG, fd->hints->ranklist[0], fd->comm);
 118     /* file domain code will get terribly confused in a hard-to-debug way if
 119      * gpfs blocksize not sensible */
 120     ADIOI_Assert( fd->blksize > 0);
 121     /* for deferred open: this process has opened the file (because if we are
 122      * not an aggregaor and we are doing deferred open, we returned earlier)*/
 123     fd->is_open = 1;
 124 
 125 }
 126 
 127 /* 
 128  * vim: ts=8 sts=4 sw=4 noexpandtab 
 129  */

/* [<][>][^][v][top][bottom][index][help] */