root/ompi/datatype/ompi_datatype_create_darray.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. block
  2. cyclic
  3. ompi_datatype_create_darray

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2015 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2006 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2009      Sun Microsystems, Inc. All rights reserved.
  14  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
  15  * Copyright (c) 2010      Cisco Systems, Inc.  All rights reserved.
  16  * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
  17  *                         reserved.
  18  * Copyright (c) 2017      IBM Corporation. All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "ompi_config.h"
  27 
  28 #include <stddef.h>
  29 
  30 #include "ompi/datatype/ompi_datatype.h"
  31 
  32 static int
  33 block(const int *gsize_array, int dim, int ndims, int nprocs,
  34       int rank, int darg, int order, ptrdiff_t orig_extent,
  35       ompi_datatype_t *type_old, ompi_datatype_t **type_new,
  36       ptrdiff_t *st_offset)
  37 {
  38     int blksize, global_size, mysize, i, j, rc, start_loop, step;
  39     ptrdiff_t stride, disps[2];
  40 
  41     global_size = gsize_array[dim];
  42 
  43     if (darg == MPI_DISTRIBUTE_DFLT_DARG)
  44         blksize = (global_size + nprocs - 1) / nprocs;
  45     else {
  46         blksize = darg;
  47     }
  48 
  49     j = global_size - blksize*rank;
  50     mysize = blksize < j ? blksize : j;
  51     if (mysize < 0) mysize = 0;
  52 
  53     if (MPI_ORDER_C == order) {
  54         start_loop = ndims - 1 ; step = -1;
  55     } else {
  56         start_loop = 0 ; step = 1;
  57     }
  58 
  59     stride = orig_extent;
  60     if (dim == start_loop) {
  61         rc = ompi_datatype_create_contiguous(mysize, type_old, type_new);
  62         if (OMPI_SUCCESS != rc) return rc;
  63     } else {
  64         for (i = start_loop ; i != dim ; i += step) {
  65             stride *= gsize_array[i];
  66         }
  67         rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new);
  68         if (OMPI_SUCCESS != rc) return rc;
  69     }
  70 
  71     *st_offset = blksize * rank;
  72     /* in terms of no. of elements of type oldtype in this dimension */
  73     if (mysize == 0) *st_offset = 0;
  74 
  75     /* need to set the UB for block-cyclic to work */
  76     disps[0] = 0;         disps[1] = orig_extent;
  77     if (order == MPI_ORDER_FORTRAN) {
  78         for(i=0; i<=dim; i++) {
  79             disps[1] *= gsize_array[i];
  80         }
  81     } else {
  82         for(i=ndims-1; i>=dim; i--) {
  83             disps[1] *= gsize_array[i];
  84         }
  85     }
  86     rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] );
  87     if (OMPI_SUCCESS != rc) return rc;
  88 
  89     return OMPI_SUCCESS;
  90 }
  91 
  92 
  93 static int
  94 cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
  95        int rank, int darg, int order, ptrdiff_t orig_extent,
  96        ompi_datatype_t* type_old, ompi_datatype_t **type_new,
  97        ptrdiff_t *st_offset)
  98 {
  99     int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
 100     ptrdiff_t stride, disps[2];
 101     ompi_datatype_t *type_tmp, *types[2];
 102 
 103     if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
 104         blksize = 1;
 105     } else {
 106         blksize = darg;
 107     }
 108 
 109     st_index = rank * blksize;
 110     end_index = gsize_array[dim] - 1;
 111 
 112     if (end_index < st_index) {
 113         local_size = 0;
 114     } else {
 115         local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
 116         rem = (end_index - st_index + 1) % (nprocs*blksize);
 117         local_size += rem < blksize ? rem : blksize;
 118     }
 119 
 120     count = local_size / blksize;
 121     rem = local_size % blksize;
 122 
 123     stride = nprocs*blksize*orig_extent;
 124     if (order == MPI_ORDER_FORTRAN) {
 125         for (i=0; i<dim; i++) {
 126             stride *= gsize_array[i];
 127         }
 128     } else {
 129         for (i=ndims-1; i>dim; i--) {
 130             stride *= gsize_array[i];
 131         }
 132     }
 133 
 134     rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new);
 135     if (OMPI_SUCCESS != rc) return rc;
 136 
 137     if (rem) {
 138         /* if the last block is of size less than blksize, include
 139            it separately using MPI_Type_struct */
 140 
 141         types  [0] = *type_new; types  [1] = type_old;
 142         disps  [0] = 0;         disps  [1] = count*stride;
 143         blklens[0] = 1;         blklens[1] = rem;
 144 
 145         rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
 146         ompi_datatype_destroy(type_new);
 147         /* even in error condition, need to destroy type_new, so check
 148            for error after destroy. */
 149         if (OMPI_SUCCESS != rc) return rc;
 150         *type_new = type_tmp;
 151     }
 152 
 153     /* need to set the UB for block-cyclic to work */
 154     disps[0] = 0;         disps[1] = orig_extent;
 155     if (order == MPI_ORDER_FORTRAN) {
 156         for(i=0; i<=dim; i++) {
 157             disps[1] *= gsize_array[i];
 158         }
 159     } else {
 160         for(i=ndims-1; i>=dim; i--) {
 161             disps[1] *= gsize_array[i];
 162         }
 163     }
 164     rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] );
 165     if (OMPI_SUCCESS != rc) return rc;
 166 
 167     *st_offset = rank * blksize;
 168     /* in terms of no. of elements of type oldtype in this dimension */
 169     if (local_size == 0) *st_offset = 0;
 170 
 171     return OMPI_SUCCESS;
 172 }
 173 
 174 int32_t ompi_datatype_create_darray(int size,
 175                                     int rank,
 176                                     int ndims,
 177                                     int const* gsize_array,
 178                                     int const* distrib_array,
 179                                     int const* darg_array,
 180                                     int const* psize_array,
 181                                     int order,
 182                                     const ompi_datatype_t* oldtype,
 183                                     ompi_datatype_t** newtype)
 184 {
 185     ompi_datatype_t *lastType;
 186     ptrdiff_t orig_extent, *st_offsets = NULL;
 187     int i, start_loop, end_loop, step;
 188     int *coords = NULL, rc = OMPI_SUCCESS;
 189     ptrdiff_t displs[2], tmp_size = 1;
 190 
 191     /* speedy corner case */
 192     if (ndims < 1) {
 193         /* Don't just return MPI_DATATYPE_NULL as that can't be
 194            MPI_TYPE_FREE()ed, and that seems bad */
 195         return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newtype);
 196     }
 197 
 198     rc = ompi_datatype_type_extent(oldtype, &orig_extent);
 199     if (MPI_SUCCESS != rc) goto cleanup;
 200 
 201     /* calculate position in grid using row-major ordering */
 202     {
 203         int tmp_rank = rank, procs = size;
 204 
 205         coords = (int *) malloc(ndims * sizeof(int));
 206         displs[1] = orig_extent;
 207         for (i = 0 ; i < ndims ; i++) {
 208             procs = procs / psize_array[i];
 209             coords[i] = tmp_rank / procs;
 210             tmp_rank = tmp_rank % procs;
 211             /* compute the upper bound of the datatype, including all dimensions */
 212             displs[1] *= gsize_array[i];
 213         }
 214     }
 215 
 216     st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
 217 
 218     /* duplicate type to here to 1) deal with constness without
 219        casting and 2) eliminate need to for conditional destroy below.
 220        Lame, yes.  But cleaner code all around. */
 221     rc = ompi_datatype_duplicate(oldtype, &lastType);
 222     if (OMPI_SUCCESS != rc) goto cleanup;
 223 
 224     /* figure out ordering issues */
 225     if (MPI_ORDER_C == order) {
 226         start_loop = ndims - 1 ; step = -1; end_loop = -1;
 227     } else {
 228         start_loop = 0 ; step = 1; end_loop = ndims;
 229     }
 230 
 231     /* Build up array */
 232     for (i = start_loop; i != end_loop; i += step) {
 233         int nprocs, tmp_rank;
 234 
 235         switch(distrib_array[i]) {
 236         case MPI_DISTRIBUTE_BLOCK:
 237             rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
 238                        darg_array[i], order, orig_extent,
 239                        lastType, newtype, st_offsets+i);
 240             break;
 241         case MPI_DISTRIBUTE_CYCLIC:
 242             rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
 243                         darg_array[i], order, orig_extent,
 244                         lastType, newtype, st_offsets+i);
 245             break;
 246         case MPI_DISTRIBUTE_NONE:
 247             /* treat it as a block distribution on 1 process */
 248             if (order == MPI_ORDER_C) {
 249                 nprocs = psize_array[i]; tmp_rank = coords[i];
 250             } else {
 251                 nprocs = 1; tmp_rank = 0;
 252             }
 253 
 254             rc = block(gsize_array, i, ndims, nprocs, tmp_rank,
 255                        MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
 256                        lastType, newtype, st_offsets+i);
 257             break;
 258         default:
 259             rc = MPI_ERR_ARG;
 260         }
 261         ompi_datatype_destroy(&lastType);
 262         /* need to destroy the old type even in error condition, so
 263            don't check return code from above until after cleanup. */
 264         if (MPI_SUCCESS != rc) goto cleanup;
 265         lastType = *newtype;
 266     }
 267 
 268     /**
 269      * We need to shift the content (useful data) of the datatype, so
 270      * we need to force the displacement to be moved. Therefore, we
 271      * cannot use resize as it will only set the soft lb and ub
 272      * markers without moving the data. Instead, we have to create a
 273      * new data, and insert the last_Type with the correct
 274      * displacement.
 275      */
 276     displs[0] = st_offsets[start_loop];
 277     for (i = start_loop + step; i != end_loop; i += step) {
 278         tmp_size *= gsize_array[i - step];
 279         displs[0] += tmp_size * st_offsets[i];
 280     }
 281     displs[0] *= orig_extent;
 282 
 283     *newtype = ompi_datatype_create(lastType->super.desc.used);
 284     rc = ompi_datatype_add(*newtype, lastType, 1, displs[0], displs[1]);
 285     ompi_datatype_destroy(&lastType);
 286     /* need to destroy the old type even in error condition, so
 287        don't check return code from above until after cleanup. */
 288     if (MPI_SUCCESS != rc) {
 289         ompi_datatype_destroy (newtype);
 290     } else {
 291         (void) opal_datatype_resize( &(*newtype)->super, 0, displs[1]);
 292     }
 293 
 294  cleanup:
 295     free(st_offsets);
 296     free(coords);
 297     return rc;
 298 }

/* [<][>][^][v][top][bottom][index][help] */