root/ompi/mca/io/romio321/romio/adio/common/flatten.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ADIOI_Type_get_envelope
  2. ADIOI_Type_get_contents
  3. ADIOI_Flatten_datatype
  4. ADIOI_Flatten
  5. ADIOI_Count_contiguous_blocks
  6. ADIOI_Optimize_flattened
  7. ADIOI_Delete_flattened
  8. ADIOI_Flatten_and_find

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /*
   3  *   Copyright (C) 1997 University of Chicago. 
   4  *   See COPYRIGHT notice in top-level directory.
   5  */
   6 
   7 #include "adio.h"
   8 #include "adio_extern.h"
   9 /* #ifdef MPISGI
  10 #include "mpisgi2.h"
  11 #endif */
  12 
  13 #ifdef USE_DBG_LOGGING
  14   #define FLATTEN_DEBUG 1
  15 #endif
  16 
  17 struct adio_short_int {
  18   short elem_s;
  19   int elem_i;
  20 };
  21 
  22 struct adio_double_int {
  23   double elem_d;
  24   int elem_i;
  25 };
  26 
  27 struct adio_long_int {
  28   long elem_l;
  29   int elem_i;
  30 };
  31 
  32 struct adio_long_double_int {
  33   long double elem_ld;
  34   int elem_i;
  35 };
  36 
  37 int ADIOI_Type_get_envelope (MPI_Datatype datatype, int *num_integers,
  38                              int *num_addresses, int *num_datatypes, int *combiner)
  39 {
  40   int rc, is_contig;
  41 
  42   ADIOI_Datatype_iscontig(datatype, &is_contig);
  43 
  44   rc = MPI_Type_get_envelope (datatype, num_integers, num_addresses, num_datatypes, combiner);
  45   if (MPI_SUCCESS != rc || MPI_COMBINER_NAMED != *combiner || is_contig) {
  46     return rc;
  47   }
  48 
  49   if (MPI_SHORT_INT == datatype || MPI_DOUBLE_INT == datatype || MPI_LONG_DOUBLE_INT == datatype ||
  50       MPI_LONG_INT == datatype) {
  51       *num_integers = 2;
  52       *num_addresses = 2;
  53       *num_datatypes = 2;
  54       *combiner = MPI_COMBINER_STRUCT;
  55   }
  56 
  57   return rc;
  58 }
  59 
  60 int ADIOI_Type_get_contents (MPI_Datatype datatype, int max_integers,
  61                              int max_addresses, int max_datatypes, int array_of_integers[],
  62                              MPI_Aint array_of_addresses[], MPI_Datatype array_of_datatypes[])
  63 {
  64   int dontcare, combiner;
  65   int rc;
  66 
  67   rc = MPI_Type_get_envelope (datatype, &dontcare, &dontcare, &dontcare, &combiner);
  68   if (MPI_SUCCESS != rc) {
  69     return rc;
  70   }
  71 
  72   if (MPI_COMBINER_NAMED != combiner) {
  73     return MPI_Type_get_contents (datatype, max_integers, max_addresses, max_datatypes,
  74                                   array_of_integers, array_of_addresses, array_of_datatypes);
  75   }
  76 
  77   array_of_integers[0] = 1;
  78   array_of_integers[1] = 1;
  79   array_of_addresses[0] = 0;
  80   array_of_datatypes[1] = MPI_INT;
  81 
  82   if (MPI_SHORT_INT == datatype) {
  83       array_of_datatypes[0] = MPI_SHORT;
  84       array_of_addresses[1] = offsetof (struct adio_short_int, elem_i);
  85   } else if (MPI_DOUBLE_INT == datatype) {
  86       array_of_datatypes[0] = MPI_DOUBLE;
  87       array_of_addresses[1] = offsetof (struct adio_double_int, elem_i);
  88   } else if (MPI_LONG_DOUBLE_INT == datatype) {
  89       array_of_datatypes[0] = MPI_LONG_DOUBLE;
  90       array_of_addresses[1] = offsetof (struct adio_long_double_int, elem_i);
  91   } else if (MPI_LONG_INT == datatype) {
  92       array_of_datatypes[0] = MPI_LONG;
  93       array_of_addresses[1] = offsetof (struct adio_long_int, elem_i);
  94   } else {
  95     rc = MPI_ERR_TYPE;
  96   }
  97 
  98   return rc;
  99 }
 100 
 101 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type);
 102 /* flatten datatype and add it to Flatlist */
 103 void ADIOI_Flatten_datatype(MPI_Datatype datatype)
 104 {
 105 #ifdef HAVE_MPIR_TYPE_FLATTEN
 106     MPI_Aint flatten_idx;
 107 #endif
 108     MPI_Count curr_index=0;
 109     int is_contig;
 110     ADIOI_Flatlist_node *flat, *prev=0;
 111 
 112     /* check if necessary to flatten. */
 113  
 114     /* is it entirely contiguous? */
 115     ADIOI_Datatype_iscontig(datatype, &is_contig);
 116   #ifdef FLATTEN_DEBUG 
 117   DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: is_contig %#X\n",is_contig);
 118   #endif
 119     if (is_contig) return;
 120 
 121     /* has it already been flattened? */
 122     flat = ADIOI_Flatlist;
 123     while (flat) {
 124         if (flat->type == datatype) {
 125       #ifdef FLATTEN_DEBUG 
 126       DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: found datatype %#X\n", datatype);
 127       #endif
 128                 return;
 129         }
 130         else {
 131             prev = flat;
 132             flat = flat->next;
 133         }
 134     }
 135 
 136     /* flatten and add to the list */
 137     flat = prev;
 138     flat->next = (ADIOI_Flatlist_node *)ADIOI_Malloc(sizeof(ADIOI_Flatlist_node));
 139     flat = flat->next;
 140 
 141     flat->type = datatype;
 142     flat->next = NULL;
 143     flat->blocklens = NULL;
 144     flat->indices = NULL;
 145     flat->lb_idx = flat->ub_idx = -1;
 146 
 147     flat->count = ADIOI_Count_contiguous_blocks(datatype, &curr_index);
 148 #ifdef FLATTEN_DEBUG 
 149     DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: count %llX, cur_idx = %#llX\n",flat->count,curr_index);
 150 #endif
 151 /*    DBG_FPRINTF(stderr, "%d\n", flat->count);*/
 152 
 153     if (flat->count) {
 154         flat->blocklens = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
 155         flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
 156     }
 157         
 158     curr_index = 0;
 159 #ifdef HAVE_MPIR_TYPE_FLATTEN
 160     flatten_idx = (MPI_Aint) flat->count;
 161     MPIR_Type_flatten(datatype, flat->indices, flat->blocklens, &flatten_idx);
 162   #ifdef FLATTEN_DEBUG 
 163   DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: MPIR_Type_flatten\n");
 164   #endif
 165 #else
 166     ADIOI_Flatten(datatype, flat, 0, &curr_index);
 167   #ifdef FLATTEN_DEBUG 
 168   DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n");
 169   #endif
 170 
 171     ADIOI_Optimize_flattened(flat);
 172 #endif
 173 /* debug */
 174 #ifdef FLATTEN_DEBUG
 175     {
 176         int i;
 177         for (i=0; i<flat->count; i++) 
 178       DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n",
 179               i,
 180               flat->blocklens[i],
 181               flat->indices[i]
 182              );
 183   }
 184 #endif
 185 }
 186 
 187 /* ADIOI_Flatten()
 188  *
 189  * Assumption: input datatype is not a basic!!!!
 190  */
 191 void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, 
 192                   ADIO_Offset st_offset, MPI_Count *curr_index)
 193 {
 194     int k, m, n, is_hindexed_block=0;
 195     int lb_updated=0;
 196     int combiner, old_combiner, old_is_contig;
 197     int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
 198     /* By using ADIO_Offset we preserve +/- sign and 
 199          avoid >2G integer arithmetic problems */
 200     ADIO_Offset top_count;
 201     MPI_Count i, j, old_size, prev_index, basic_num, num, nonzeroth;
 202     MPI_Aint old_extent, lb;/* Assume extents are non-negative */
 203     int *ints;
 204     MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
 205     MPI_Datatype *types;
 206     ADIOI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
 207     if (combiner == MPI_COMBINER_NAMED) {
 208         return;  /* can't do anything else: calling get_contents on a builtin
 209                     type is an error */
 210     }
 211     ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int));
 212     adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint));
 213     types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype));
 214     ADIOI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
 215 
 216   #ifdef FLATTEN_DEBUG 
 217   DBG_FPRINTF(stderr,"ADIOI_Flatten:: st_offset %#llX, curr_index %#llX\n",st_offset,*curr_index);
 218   DBG_FPRINTF(stderr,"ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n",nints, nadds, ntypes);
 219   for(i=0; i< nints; ++i)
 220   {
 221     DBG_FPRINTF(stderr,"ADIOI_Flatten:: ints[%lld]=%#X\n",i,ints[i]);
 222   }
 223   for(i=0; i< nadds; ++i)
 224   {
 225     DBG_FPRINTF(stderr,"ADIOI_Flatten:: adds[%lld]="MPI_AINT_FMT_HEX_SPEC"\n",i,adds[i]);
 226   }
 227   for(i=0; i< ntypes; ++i)
 228   {
 229     DBG_FPRINTF(stderr,"ADIOI_Flatten:: types[%lld]=%#llX\n",i,(unsigned long long)(unsigned long)types[i]);
 230   }
 231   #endif
 232   /* Chapter 4, page 83: when processing datatypes, note this item from the
 233    * standard:
 234          Most datatype constructors have replication count or block length
 235          arguments.  Allowed values are non-negative integers. If the value is
 236          zero, no elements are generated in the type map and there is no effect
 237          on datatype bounds or extent.  */
 238 
 239     switch (combiner) {
 240 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
 241     case MPI_COMBINER_DUP:
 242     #ifdef FLATTEN_DEBUG 
 243     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DUP\n");
 244     #endif
 245         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 246                                 &old_ntypes, &old_combiner); 
 247         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 248         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 249             ADIOI_Flatten(types[0], flat, st_offset, curr_index);
 250         break;
 251 #endif
 252 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
 253     case MPI_COMBINER_SUBARRAY:
 254         {
 255             int dims = ints[0];
 256             MPI_Datatype stype;
 257       #ifdef FLATTEN_DEBUG 
 258       DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n");
 259       #endif
 260 
 261             ADIO_Type_create_subarray(dims,
 262                                       &ints[1],        /* sizes */
 263                                       &ints[dims+1],   /* subsizes */
 264                                       &ints[2*dims+1], /* starts */
 265                                       ints[3*dims+1],  /* order */
 266                                       types[0],        /* type */
 267                                       &stype);
 268             ADIOI_Flatten(stype, flat, st_offset, curr_index);
 269             MPI_Type_free(&stype);
 270         }
 271         break;
 272 #endif
 273 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
 274     case MPI_COMBINER_DARRAY:
 275         {
 276             int dims = ints[2];
 277             MPI_Datatype dtype;
 278       #ifdef FLATTEN_DEBUG 
 279       DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY\n");
 280       #endif
 281 
 282             ADIO_Type_create_darray(ints[0],         /* size */
 283                                     ints[1],         /* rank */
 284                                     dims,
 285                                     &ints[3],        /* gsizes */
 286                                     &ints[dims+3],   /* distribs */
 287                                     &ints[2*dims+3], /* dargs */
 288                                     &ints[3*dims+3], /* psizes */
 289                                     ints[4*dims+3],  /* order */
 290                                     types[0],
 291                                     &dtype);
 292       #ifdef FLATTEN_DEBUG 
 293       DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY <ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n",
 294               0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
 295       #endif
 296             ADIOI_Flatten(dtype, flat, st_offset, curr_index);
 297       #ifdef FLATTEN_DEBUG 
 298       DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n",
 299               0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
 300       #endif
 301             MPI_Type_free(&dtype);
 302         }
 303         break;
 304 #endif
 305     case MPI_COMBINER_CONTIGUOUS:
 306     #ifdef FLATTEN_DEBUG 
 307     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n");
 308     #endif
 309         top_count = ints[0];
 310         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 311                                 &old_ntypes, &old_combiner); 
 312         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 313 
 314         prev_index = *curr_index;
 315         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 316             ADIOI_Flatten(types[0], flat, st_offset, curr_index);
 317 
 318         if (prev_index == *curr_index) {
 319 /* simplest case, made up of basic or contiguous types */
 320             j = *curr_index;
 321             flat->indices[j] = st_offset;
 322             MPI_Type_size_x(types[0], &old_size);
 323             flat->blocklens[j] = top_count * old_size;
 324       #ifdef FLATTEN_DEBUG 
 325       DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
 326       #endif
 327             (*curr_index)++;
 328         }
 329         else {
 330 /* made up of noncontiguous derived types */
 331             j = *curr_index;
 332             num = *curr_index - prev_index;
 333 
 334 /* The noncontiguous types have to be replicated count times */
 335             MPI_Type_get_extent(types[0], &lb, &old_extent);
 336             for (m=1; m<top_count; m++) {
 337                 for (i=0; i<num; i++) {
 338                     flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 339                     flat->blocklens[j] = flat->blocklens[j-num];
 340           #ifdef FLATTEN_DEBUG 
 341           DBG_FPRINTF(stderr,"ADIOI_Flatten:: derived flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
 342           #endif
 343                     j++;
 344                 }
 345             }
 346             *curr_index = j;
 347         }
 348         break;
 349 
 350     case MPI_COMBINER_VECTOR: 
 351     #ifdef FLATTEN_DEBUG 
 352     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_VECTOR\n");
 353     #endif
 354         top_count = ints[0];
 355         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 356                                 &old_ntypes, &old_combiner); 
 357         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 358 
 359         prev_index = *curr_index;
 360         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 361             ADIOI_Flatten(types[0], flat, st_offset, curr_index);
 362 
 363         if (prev_index == *curr_index) {
 364 /* simplest case, vector of basic or contiguous types */
 365     /* By using ADIO_Offset we preserve +/- sign and 
 366          avoid >2G integer arithmetic problems */
 367     ADIO_Offset blocklength = ints[1], stride = ints[2];
 368             j = *curr_index;
 369             flat->indices[j] = st_offset;
 370             MPI_Type_size_x(types[0], &old_size);
 371             flat->blocklens[j] = blocklength * old_size;
 372             for (i=j+1; i<j+top_count; i++) {
 373                 flat->indices[i] = flat->indices[i-1] + stride * old_size;
 374                 flat->blocklens[i] = flat->blocklens[j];
 375             }
 376             *curr_index = i;
 377         }
 378         else {
 379 /* vector of noncontiguous derived types */
 380     /* By using ADIO_Offset we preserve +/- sign and 
 381          avoid >2G integer arithmetic problems */
 382     ADIO_Offset blocklength = ints[1], stride = ints[2];
 383 
 384             j = *curr_index;
 385             num = *curr_index - prev_index;
 386 
 387 /* The noncontiguous types have to be replicated blocklen times
 388    and then strided. Replicate the first one. */
 389             MPI_Type_get_extent(types[0], &lb, &old_extent);
 390             for (m=1; m<blocklength; m++) {
 391                 for (i=0; i<num; i++) {
 392                     flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 393                     flat->blocklens[j] = flat->blocklens[j-num];
 394                     j++;
 395                 }
 396             }
 397             *curr_index = j;
 398 
 399 /* Now repeat with strides. */
 400             num = *curr_index - prev_index;
 401             for (i=1; i<top_count; i++) {
 402                 for (m=0; m<num; m++) {
 403                    flat->indices[j] =  flat->indices[j-num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
 404                    flat->blocklens[j] = flat->blocklens[j-num];
 405                    j++;
 406                 }
 407             }
 408             *curr_index = j;
 409         }
 410         break;
 411 
 412     case MPI_COMBINER_HVECTOR: 
 413     case MPI_COMBINER_HVECTOR_INTEGER: 
 414     #ifdef FLATTEN_DEBUG 
 415     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n");
 416     #endif
 417         top_count = ints[0];
 418         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 419                                 &old_ntypes, &old_combiner); 
 420         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 421 
 422         prev_index = *curr_index;
 423         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 424             ADIOI_Flatten(types[0], flat, st_offset, curr_index);
 425 
 426         if (prev_index == *curr_index) {
 427 /* simplest case, vector of basic or contiguous types */
 428     /* By using ADIO_Offset we preserve +/- sign and 
 429          avoid >2G integer arithmetic problems */
 430     ADIO_Offset blocklength = ints[1];
 431             j = *curr_index;
 432             flat->indices[j] = st_offset;
 433             MPI_Type_size_x(types[0], &old_size);
 434             flat->blocklens[j] = blocklength * old_size;
 435             for (i=j+1; i<j+top_count; i++) {
 436                 flat->indices[i] = flat->indices[i-1] + adds[0];
 437                 flat->blocklens[i] = flat->blocklens[j];
 438             }
 439             *curr_index = i;
 440         }
 441         else {
 442 /* vector of noncontiguous derived types */
 443     /* By using ADIO_Offset we preserve +/- sign and 
 444          avoid >2G integer arithmetic problems */
 445     ADIO_Offset blocklength = ints[1];
 446 
 447             j = *curr_index;
 448             num = *curr_index - prev_index;
 449 
 450 /* The noncontiguous types have to be replicated blocklen times
 451    and then strided. Replicate the first one. */
 452             MPI_Type_get_extent(types[0], &lb, &old_extent);
 453             for (m=1; m<blocklength; m++) {
 454                 for (i=0; i<num; i++) {
 455                     flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 456                     flat->blocklens[j] = flat->blocklens[j-num];
 457                     j++;
 458                 }
 459             }
 460             *curr_index = j;
 461 
 462 /* Now repeat with strides. */
 463             num = *curr_index - prev_index;
 464             for (i=1; i<top_count; i++) {
 465                 for (m=0; m<num; m++) {
 466                    flat->indices[j] =  flat->indices[j-num] + adds[0];
 467                    flat->blocklens[j] = flat->blocklens[j-num];
 468                    j++;
 469                 }
 470             }
 471             *curr_index = j;
 472         }
 473         break;
 474 
 475     case MPI_COMBINER_INDEXED: 
 476     #ifdef FLATTEN_DEBUG 
 477     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED\n");
 478     #endif
 479         top_count = ints[0];
 480         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 481                                 &old_ntypes, &old_combiner); 
 482         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 483         MPI_Type_get_extent(types[0], &lb, &old_extent);
 484 
 485         prev_index = *curr_index;
 486         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 487   {
 488     /* By using ADIO_Offset we preserve +/- sign and 
 489          avoid >2G integer arithmetic problems */
 490     ADIO_Offset stride = ints[top_count+1];
 491         ADIOI_Flatten(types[0], flat,
 492          st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
 493   }
 494 
 495         if (prev_index == *curr_index) {
 496 /* simplest case, indexed type made up of basic or contiguous types */
 497             j = *curr_index;
 498             for (i=j, nonzeroth=i; i<j+top_count; i++) {
 499     /* By using ADIO_Offset we preserve +/- sign and 
 500          avoid >2G integer arithmetic problems */
 501     ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j];
 502                 if (blocklength > 0) {
 503                     flat->indices[nonzeroth] =
 504                         st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 505                     flat->blocklens[nonzeroth] =
 506                         blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 507                     nonzeroth++;
 508                 } else {
 509                     flat->count--; /* don't count/consider any zero-length blocklens */
 510                 }
 511             }
 512             *curr_index = i;
 513         }
 514         else {
 515 /* indexed type made up of noncontiguous derived types */
 516 
 517             j = *curr_index;
 518             num = *curr_index - prev_index;
 519             basic_num = num;
 520 
 521 /* The noncontiguous types have to be replicated blocklens[i] times
 522    and then strided. Replicate the first one. */
 523             for (m=1; m<ints[1]; m++) {
 524                 for (i=0, nonzeroth = j; i<num; i++) {
 525                     if (flat->blocklens[j-num] > 0) {
 526                         flat->indices[nonzeroth] =
 527                             flat->indices[nonzeroth-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 528                         flat->blocklens[nonzeroth] =
 529                             flat->blocklens[nonzeroth-num];
 530                         j++;
 531                         nonzeroth++;
 532                     } else {
 533                         flat->count --;
 534                     }
 535                 }
 536             }
 537             *curr_index = j;
 538 
 539 /* Now repeat with strides. */
 540             for (i=1; i<top_count; i++) {
 541                 num = *curr_index - prev_index;
 542                 prev_index = *curr_index;
 543                 for (m=0, nonzeroth=j; m<basic_num; m++) {
 544       /* By using ADIO_Offset we preserve +/- sign and 
 545          avoid >2G integer arithmetic problems */
 546       ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i];
 547                     if (flat->blocklens[j-num] > 0 ) {
 548                         flat->indices[nonzeroth] =
 549                             flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 550                         flat->blocklens[nonzeroth] = flat->blocklens[j-num];
 551                         j++;
 552                         nonzeroth++;
 553                     } else {
 554                         flat->count--;
 555                     }
 556                 }
 557                 *curr_index = j;
 558                 for (m=1; m<ints[1+i]; m++) {
 559                     for (k=0, nonzeroth=j; k<basic_num; k++) {
 560                         if (flat->blocklens[j-basic_num] > 0) {
 561                             flat->indices[nonzeroth] =
 562                                 flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 563                             flat->blocklens[nonzeroth] = flat->blocklens[j-basic_num];
 564                             j++;
 565                             nonzeroth++;
 566                         } else {
 567                             flat->count --;
 568                         }
 569                     }
 570                 }
 571                 *curr_index = j;
 572             }
 573         }
 574         break;
 575 
 576 #if defined HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK && HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK
 577     case MPI_COMBINER_HINDEXED_BLOCK:
 578         is_hindexed_block=1;
 579         /* deliberate fall-through */
 580 #endif
 581     case MPI_COMBINER_INDEXED_BLOCK:
 582     #ifdef FLATTEN_DEBUG 
 583     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n");
 584     #endif
 585         top_count = ints[0];
 586         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 587                                 &old_ntypes, &old_combiner); 
 588         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 589         MPI_Type_get_extent(types[0], &lb, &old_extent);
 590 
 591         prev_index = *curr_index;
 592         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 593   {
 594       /* By using ADIO_Offset we preserve +/- sign and 
 595          avoid >2G integer arithmetic problems */
 596       ADIO_Offset stride = ints[1+1];
 597         if (is_hindexed_block) {
 598             ADIOI_Flatten(types[0], flat,
 599                     st_offset+adds[0], curr_index);
 600         } else {
 601             ADIOI_Flatten(types[0], flat,
 602                     st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
 603         }
 604   }
 605 
 606         if (prev_index == *curr_index) {
 607 /* simplest case, indexed type made up of basic or contiguous types */
 608             j = *curr_index;
 609             for (i=j; i<j+top_count; i++) {
 610       /* By using ADIO_Offset we preserve +/- sign and 
 611          avoid >2G integer arithmetic problems */
 612                 ADIO_Offset blocklength = ints[1];
 613                 if (is_hindexed_block) {
 614                     flat->indices[i] = st_offset + adds[i-j];
 615                 } else {
 616                     ADIO_Offset stride = ints[1+1+i-j];
 617                     flat->indices[i] = st_offset +
 618                         stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 619                 }
 620                 flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 621             }
 622             *curr_index = i;
 623         }
 624         else {
 625 /* vector of noncontiguous derived types */
 626 
 627             j = *curr_index;
 628             num = *curr_index - prev_index;
 629 
 630 /* The noncontiguous types have to be replicated blocklens[i] times
 631    and then strided. Replicate the first one. */
 632             for (m=1; m<ints[1]; m++) {
 633                 for (i=0; i<num; i++) {
 634                     if (is_hindexed_block) {
 635                         /* this is the one place the hindexed case uses the
 636                          * extent of a type */
 637                         MPI_Type_get_extent(types[0], &lb, &old_extent);
 638                     }
 639                     flat->indices[j] = flat->indices[j-num] +
 640                         ADIOI_AINT_CAST_TO_OFFSET old_extent;
 641                     flat->blocklens[j] = flat->blocklens[j-num];
 642                     j++;
 643                 }
 644             }
 645             *curr_index = j;
 646 
 647 /* Now repeat with strides. */
 648             num = *curr_index - prev_index;
 649             for (i=1; i<top_count; i++) {
 650                 for (m=0; m<num; m++) {
 651                     if (is_hindexed_block) {
 652                         flat->indices[j] = flat->indices[j-num] +
 653                             adds[i] - adds[i-1];
 654                     } else {
 655                         /* By using ADIO_Offset we preserve +/- sign and
 656                            avoid >2G integer arithmetic problems */
 657                         ADIO_Offset stride = ints[2+i]-ints[1+i];
 658                         flat->indices[j] = flat->indices[j-num] +
 659                             stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 660                     }
 661                     flat->blocklens[j] = flat->blocklens[j-num];
 662                     j++;
 663                 }
 664             }
 665             *curr_index = j;
 666         }
 667         break;
 668 
 669     case MPI_COMBINER_HINDEXED: 
 670     case MPI_COMBINER_HINDEXED_INTEGER:
 671     #ifdef FLATTEN_DEBUG 
 672     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n");
 673     #endif
 674         top_count = ints[0];
 675         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 676                                 &old_ntypes, &old_combiner); 
 677         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 678 
 679         prev_index = *curr_index;
 680         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 681   {
 682         ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); 
 683   }
 684 
 685         if (prev_index == *curr_index) {
 686 /* simplest case, indexed type made up of basic or contiguous types */
 687             j = *curr_index;
 688             MPI_Type_size_x(types[0], &old_size);
 689             for (i=j, nonzeroth=j; i<j+top_count; i++) {
 690                 if (ints[1+i-j] > 0) {
 691                     /* By using ADIO_Offset we preserve +/- sign and
 692                        avoid >2G integer arithmetic problems */
 693                     ADIO_Offset blocklength = ints[1+i-j];
 694                     flat->indices[nonzeroth] = st_offset + adds[i-j];
 695                     flat->blocklens[nonzeroth] = blocklength*old_size;
 696                     nonzeroth++;
 697                 } else {
 698                     flat->count--;
 699                 }
 700             }
 701             *curr_index = i;
 702         }
 703         else {
 704 /* indexed type made up of noncontiguous derived types */
 705 
 706             j = *curr_index;
 707             num = *curr_index - prev_index;
 708             basic_num = num;
 709 
 710 /* The noncontiguous types have to be replicated blocklens[i] times
 711    and then strided. Replicate the first one. */
 712             MPI_Type_get_extent(types[0], &lb, &old_extent);
 713             for (m=1; m<ints[1]; m++) {
 714                 for (i=0, nonzeroth=j; i<num; i++) {
 715                     if (flat->blocklens[j-num] > 0) {
 716                         flat->indices[nonzeroth] =
 717                             flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 718                         flat->blocklens[nonzeroth] = flat->blocklens[j-num];
 719                         j++;
 720                         nonzeroth++;
 721                     } else {
 722                         flat->count--;
 723                     }
 724                 }
 725             }
 726             *curr_index = j;
 727 
 728 /* Now repeat with strides. */
 729             for (i=1; i<top_count; i++) {
 730                 num = *curr_index - prev_index;
 731                 prev_index = *curr_index;
 732                 for (m=0, nonzeroth=j; m<basic_num; m++) {
 733                     if (flat->blocklens[j-num] > 0) {
 734                         flat->indices[nonzeroth] =
 735                             flat->indices[j-num] + adds[i] - adds[i-1];
 736                         flat->blocklens[nonzeroth] = flat->blocklens[j-num];
 737                         j++;
 738                         nonzeroth++;
 739                     } else {
 740                         flat->count--;
 741                     }
 742                 }
 743                 *curr_index = j;
 744                 for (m=1; m<ints[1+i]; m++) {
 745                     for (k=0,nonzeroth=j; k<basic_num; k++) {
 746                         if (flat->blocklens[j-basic_num] >0) {
 747                             flat->indices[nonzeroth] =
 748                                 flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 749                             flat->blocklens[nonzeroth] = flat->blocklens[j-basic_num];
 750                             j++;
 751                             nonzeroth++;
 752                         }
 753                     }
 754                 }
 755                 *curr_index = j;
 756             }
 757         }
 758         break;
 759 
 760     case MPI_COMBINER_STRUCT: 
 761     case MPI_COMBINER_STRUCT_INTEGER: 
 762     #ifdef FLATTEN_DEBUG 
 763     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n");
 764     #endif
 765         top_count = ints[0];
 766         for (n=0; n<top_count; n++) {
 767             ADIOI_Type_get_envelope(types[n], &old_nints, &old_nadds,
 768                                     &old_ntypes, &old_combiner); 
 769             ADIOI_Datatype_iscontig(types[n], &old_is_contig);
 770 
 771             prev_index = *curr_index;
 772             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 773                 ADIOI_Flatten(types[n], flat, st_offset+adds[n], curr_index);
 774 
 775             if (prev_index == *curr_index) {
 776 /* simplest case, current type is basic or contiguous types */
 777         /* By using ADIO_Offset we preserve +/- sign and 
 778            avoid >2G integer arithmetic problems */
 779                 if (ints[1+n] > 0) {
 780                     ADIO_Offset blocklength = ints[1+n];
 781                     j = *curr_index;
 782                     flat->indices[j] = st_offset + adds[n];
 783                     MPI_Type_size_x(types[n], &old_size);
 784                     flat->blocklens[j] = blocklength * old_size;
 785 #ifdef FLATTEN_DEBUG
 786                     DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",n,adds[n],j, flat->indices[j], j, flat->blocklens[j]);
 787 #endif
 788                     (*curr_index)++;
 789                 }
 790             }
 791             else {
 792 /* current type made up of noncontiguous derived types */
 793 
 794                 j = *curr_index;
 795                 num = *curr_index - prev_index;
 796 
 797 /* The current type has to be replicated blocklens[n] times */
 798                 MPI_Type_get_extent(types[n], &lb, &old_extent);
 799                 for (m=1; m<ints[1+n]; m++) {
 800                     for (i=0; i<num; i++) {
 801                         flat->indices[j] =
 802                             flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 803                         flat->blocklens[j] = flat->blocklens[j-num];
 804 #ifdef FLATTEN_DEBUG
 805                         DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple old_extent "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",old_extent,j, flat->indices[j], j, flat->blocklens[j]);
 806 #endif
 807                         j++;
 808                     }
 809                 }
 810                 *curr_index = j;
 811             }
 812         }
 813         break;
 814 
 815     case MPI_COMBINER_RESIZED: 
 816     #ifdef FLATTEN_DEBUG 
 817     DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_RESIZED\n");
 818     #endif
 819 
 820     /* This is done similar to a type_struct with an lb, datatype, ub */
 821 
 822     /* handle the Lb */
 823         j = *curr_index;
 824         /* when we process resized types, we (recursively) process the lower
 825          * bound, the type being resized, then the upper bound.  In the
 826          * resized-of-resized case, we might find ourselves updating the upper
 827          * bound based on the inner type, but the lower bound based on the
 828          * upper type.  check both lb and ub to prevent mixing updates */
 829         if (flat->lb_idx == -1 && flat->ub_idx == -1) {
 830             flat->indices[j] = st_offset + adds[0];
 831             /* this zero-length blocklens[] element, unlike eleswhere in the
 832              * flattening code, is correct and is used to indicate a lower bound
 833              * marker */
 834             flat->blocklens[j] = 0;
 835             flat->lb_idx = *curr_index;
 836             lb_updated=1;
 837 
 838             #ifdef FLATTEN_DEBUG
 839             DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
 840             #endif
 841 
 842             (*curr_index)++;
 843         } else {
 844             /* skipped over this chunk because something else higher-up in the
 845              * type construction set this for us already */
 846             flat->count--;
 847             st_offset -= adds[0];
 848         }
 849 
 850         /* handle the datatype */
 851 
 852         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 853                                 &old_ntypes, &old_combiner); 
 854         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 855 
 856         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
 857             ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
 858         }
 859         else {
 860             /* current type is basic or contiguous */
 861             j = *curr_index;
 862             flat->indices[j] = st_offset;
 863             MPI_Type_size_x(types[0], &old_size);
 864             flat->blocklens[j] = old_size;
 865 
 866             #ifdef FLATTEN_DEBUG 
 867             DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
 868             #endif
 869 
 870             (*curr_index)++;
 871         }
 872 
 873         /* take care of the extent as a UB */
 874         /* see note above about mixing updates for why we check lb and ub */
 875         if ((flat->lb_idx == -1 && flat->ub_idx == -1) || lb_updated) {
 876             j = *curr_index;
 877             flat->indices[j] = st_offset + adds[0] + adds[1];
 878             /* again, zero-element ok: an upper-bound marker explicitly set by the
 879              * constructor of this resized type */
 880             flat->blocklens[j] = 0;
 881             flat->ub_idx = *curr_index;
 882         } else {
 883             /* skipped over this chunk because something else higher-up in the
 884              * type construction set this for us already */
 885             flat->count--;
 886             (*curr_index)--;
 887         }
 888 
 889         #ifdef FLATTEN_DEBUG 
 890         DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",1,adds[1],j, flat->indices[j], j, flat->blocklens[j]);
 891         #endif
 892 
 893         (*curr_index)++;
 894 
 895         break;
 896 
 897     default:
 898         /* TODO: FIXME (requires changing prototypes to return errors...) */
 899         DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
 900         MPI_Abort(MPI_COMM_WORLD, 1);
 901     }
 902 
 903 #ifndef MPISGI
 904 /* There is a bug in SGI's impl. of MPI_Type_get_contents. It doesn't
 905    return new datatypes. Therefore no need to free. */
 906     for (i=0; i<ntypes; i++) {
 907         MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes,
 908                               &old_combiner);
 909         if (old_combiner != MPI_COMBINER_NAMED) MPI_Type_free(types+i);
 910     }
 911 #endif
 912 
 913     ADIOI_Free(ints);
 914     ADIOI_Free(adds);
 915     ADIOI_Free(types);
 916 
 917   #ifdef FLATTEN_DEBUG 
 918   DBG_FPRINTF(stderr,"ADIOI_Flatten:: return st_offset %#llX, curr_index %#llX\n",st_offset,*curr_index);
 919   #endif
 920 
 921 }
 922 
 923 /********************************************************/
 924 
 925 /* ADIOI_Count_contiguous_blocks
 926  *
 927  * Returns number of contiguous blocks in type, and also updates
 928  * curr_index to reflect the space for the additional blocks.
 929  *
 930  * ASSUMES THAT TYPE IS NOT A BASIC!!!
 931  */
 932 MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_index)
 933 {
 934     int i, n;
 935     MPI_Count count=0, prev_index, num, basic_num;
 936     int top_count, combiner, old_combiner, old_is_contig;
 937     int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
 938     int *ints;
 939     MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
 940     MPI_Datatype *types;
 941 
 942     ADIOI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
 943     if (combiner == MPI_COMBINER_NAMED) {
 944         return 1;  /* builtin types not supposed to be passed to this routine
 945                     */
 946     }
 947     ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int));
 948     adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint));
 949     types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype));
 950     MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
 951 
 952     switch (combiner) {
 953 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
 954     case MPI_COMBINER_DUP:
 955         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 956                                 &old_ntypes, &old_combiner); 
 957         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
 958         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
 959             count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
 960         else {
 961                 count = 1;
 962                 (*curr_index)++;
 963         }
 964         break;
 965 #endif
 966 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
 967     case MPI_COMBINER_SUBARRAY:
 968         {
 969             int dims = ints[0];
 970             MPI_Datatype stype;
 971 
 972             ADIO_Type_create_subarray(dims,
 973                                       &ints[1],        /* sizes */
 974                                       &ints[dims+1],   /* subsizes */
 975                                       &ints[2*dims+1], /* starts */
 976                                       ints[3*dims+1],  /* order */
 977                                       types[0],        /* type */
 978                                       &stype);
 979             count = ADIOI_Count_contiguous_blocks(stype, curr_index);
 980             /* curr_index will have already been updated; just pass
 981              * count back up.
 982              */
 983             MPI_Type_free(&stype);
 984 
 985         }
 986         break;
 987 #endif
 988 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
 989     case MPI_COMBINER_DARRAY:
 990         {
 991             int dims = ints[2];
 992             MPI_Datatype dtype;
 993 
 994             ADIO_Type_create_darray(ints[0],         /* size */
 995                                     ints[1],         /* rank */
 996                                     dims,
 997                                     &ints[3],        /* gsizes */
 998                                     &ints[dims+3],   /* distribs */
 999                                     &ints[2*dims+3], /* dargs */
1000                                     &ints[3*dims+3], /* psizes */
1001                                     ints[4*dims+3],  /* order */
1002                                     types[0],
1003                                     &dtype);
1004             count = ADIOI_Count_contiguous_blocks(dtype, curr_index);
1005             /* curr_index will have already been updated; just pass
1006              * count back up.
1007              */
1008             MPI_Type_free(&dtype);
1009         }
1010         break;
1011 #endif
1012     case MPI_COMBINER_CONTIGUOUS:
1013         top_count = ints[0];
1014         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
1015                                 &old_ntypes, &old_combiner); 
1016         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1017 
1018         prev_index = *curr_index;
1019         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1020             count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1021         else count = 1;
1022 
1023         if (prev_index == *curr_index) 
1024 /* simplest case, made up of basic or contiguous types */
1025             (*curr_index)++;
1026         else {
1027 /* made up of noncontiguous derived types */
1028             num = *curr_index - prev_index;
1029             count *= top_count;
1030             *curr_index += (top_count - 1)*num;
1031         }
1032         break;
1033 
1034     case MPI_COMBINER_VECTOR:
1035     case MPI_COMBINER_HVECTOR:
1036     case MPI_COMBINER_HVECTOR_INTEGER: 
1037         top_count = ints[0];
1038         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
1039                                 &old_ntypes, &old_combiner); 
1040         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1041 
1042         prev_index = *curr_index;
1043         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1044             count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1045         else count = 1;
1046 
1047         if (prev_index == *curr_index) {
1048 /* simplest case, vector of basic or contiguous types */
1049             count = top_count;
1050             *curr_index += count;
1051         }
1052         else {
1053 /* vector of noncontiguous derived types */
1054             num = *curr_index - prev_index;
1055 
1056 /* The noncontiguous types have to be replicated blocklen times
1057    and then strided. */
1058             count *= ints[1] * top_count;
1059 
1060 /* First one */
1061             *curr_index += (ints[1] - 1)*num;
1062 
1063 /* Now repeat with strides. */
1064             num = *curr_index - prev_index;
1065             *curr_index += (top_count - 1)*num;
1066         }
1067         break;
1068 
1069     case MPI_COMBINER_INDEXED: 
1070     case MPI_COMBINER_HINDEXED:
1071     case MPI_COMBINER_HINDEXED_INTEGER:
1072         top_count = ints[0];
1073         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
1074                                 &old_ntypes, &old_combiner); 
1075         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1076 
1077         prev_index = *curr_index;
1078         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1079             count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1080         else count = 1;
1081 
1082         if (prev_index == *curr_index) {
1083 /* simplest case, indexed type made up of basic or contiguous types */
1084             count = top_count;
1085             *curr_index += count;
1086         }
1087         else {
1088 /* indexed type made up of noncontiguous derived types */
1089             basic_num = *curr_index - prev_index;
1090 
1091 /* The noncontiguous types have to be replicated blocklens[i] times
1092    and then strided. */
1093             *curr_index += (ints[1]-1) * basic_num;
1094             count *= ints[1];
1095 
1096 /* Now repeat with strides. */
1097             for (i=1; i<top_count; i++) {
1098                 count += ints[1+i] * basic_num;
1099                 *curr_index += ints[1+i] * basic_num;
1100             }
1101         }
1102         break;
1103 
1104 #if defined HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK && HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK
1105     case MPI_COMBINER_HINDEXED_BLOCK:
1106 #endif
1107     case MPI_COMBINER_INDEXED_BLOCK:
1108         top_count = ints[0];
1109         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
1110                                 &old_ntypes, &old_combiner); 
1111         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1112 
1113         prev_index = *curr_index;
1114         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1115             count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1116         else count = 1;
1117 
1118         if (prev_index == *curr_index) {
1119 /* simplest case, indexed type made up of basic or contiguous types */
1120             count = top_count;
1121             *curr_index += count;
1122         }
1123         else {
1124 /* indexed type made up of noncontiguous derived types */
1125             basic_num = *curr_index - prev_index;
1126 
1127 /* The noncontiguous types have to be replicated blocklens[i] times
1128    and then strided. */
1129             *curr_index += (ints[1]-1) * basic_num;
1130             count *= ints[1];
1131 
1132 /* Now repeat with strides. */
1133             *curr_index += (top_count-1) * count;
1134             count *= top_count;
1135         }
1136         break;
1137 
1138     case MPI_COMBINER_STRUCT: 
1139     case MPI_COMBINER_STRUCT_INTEGER: 
1140         top_count = ints[0];
1141         count = 0;
1142         for (n=0; n<top_count; n++) {
1143             ADIOI_Type_get_envelope(types[n], &old_nints, &old_nadds,
1144                                     &old_ntypes, &old_combiner); 
1145             ADIOI_Datatype_iscontig(types[n], &old_is_contig);
1146 
1147             prev_index = *curr_index;
1148             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1149             count += ADIOI_Count_contiguous_blocks(types[n], curr_index);
1150 
1151             if (prev_index == *curr_index) {
1152 /* simplest case, current type is basic or contiguous types */
1153                 count++;
1154                 (*curr_index)++;
1155             }
1156             else {
1157 /* current type made up of noncontiguous derived types */
1158 /* The current type has to be replicated blocklens[n] times */
1159 
1160                 num = *curr_index - prev_index;
1161                 count += (ints[1+n]-1)*num;
1162                 (*curr_index) += (ints[1+n]-1)*num;
1163             }
1164         }
1165         break;
1166 
1167     case MPI_COMBINER_RESIZED: 
1168         /* treat it as a struct with lb, type, ub */
1169 
1170         /* add 2 for lb and ub */
1171         (*curr_index) += 2;
1172         count += 2;
1173 
1174         /* add for datatype */ 
1175         ADIOI_Type_get_envelope(types[0], &old_nints, &old_nadds,
1176                                 &old_ntypes, &old_combiner); 
1177         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1178 
1179         if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
1180             count += ADIOI_Count_contiguous_blocks(types[0], curr_index);
1181         }
1182         else {
1183         /* basic or contiguous type */
1184             count++;
1185             (*curr_index)++;
1186         }
1187         break;
1188 
1189     default:
1190         /* TODO: FIXME */
1191         DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner);
1192         MPI_Abort(MPI_COMM_WORLD, 1);
1193     }
1194 
1195 #ifndef MPISGI
1196 /* There is a bug in SGI's impl. of MPI_Type_get_contents. It doesn't
1197    return new datatypes. Therefore no need to free. */
1198     for (i=0; i<ntypes; i++) {
1199         MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes,
1200                               &old_combiner);
1201         if (old_combiner != MPI_COMBINER_NAMED) MPI_Type_free(types+i);
1202     }
1203 #endif
1204 
1205     ADIOI_Free(ints);
1206     ADIOI_Free(adds);
1207     ADIOI_Free(types);
1208     return count;
1209 }
1210 
1211 
1212 /****************************************************************/
1213 
1214 /* ADIOI_Optimize_flattened()
1215  *
1216  * Scans the blocks of a flattened type and merges adjacent blocks 
1217  * together, resulting in a shorter blocklist (and thus fewer
1218  * contiguous operations).
1219  *
1220  * NOTE: a further optimization would be to remove zero length blocks. However,
1221  * the first and last blocks must remain as zero length first or last block 
1222  * indicates UB and LB.  Furthermore, once the "zero length blocklen" fix
1223  * went in, the flattened representation should no longer have zero-length
1224  * blocks except for UB and LB markers.
1225  */
1226 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type)
1227 {
1228     int i, j, opt_blocks;
1229     ADIO_Offset *opt_blocklens;
1230     ADIO_Offset *opt_indices;
1231 
1232     opt_blocks = 1;
1233     
1234     /* save number of noncontiguous blocks in opt_blocks */
1235     for (i=0; i < (flat_type->count - 1); i++) {
1236         if ((flat_type->indices[i] + flat_type->blocklens[i] !=
1237              flat_type->indices[i + 1]))
1238             opt_blocks++;
1239     }
1240 
1241     /* if we can't reduce the number of blocks, quit now */
1242     if (opt_blocks == flat_type->count) return;
1243 
1244     opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
1245     opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
1246 
1247     /* fill in new blocklists */
1248     opt_blocklens[0] = flat_type->blocklens[0];
1249     opt_indices[0] = flat_type->indices[0];
1250     j = 0;
1251     for (i=0; i < (flat_type->count - 1); i++) {
1252         if ((flat_type->indices[i] + flat_type->blocklens[i] ==
1253              flat_type->indices[i + 1]))
1254             opt_blocklens[j] += flat_type->blocklens[i + 1];
1255         else {
1256             j++;
1257             opt_indices[j] = flat_type->indices[i + 1];
1258             opt_blocklens[j] = flat_type->blocklens[i + 1];
1259         } 
1260     }
1261     flat_type->count = opt_blocks;
1262     ADIOI_Free(flat_type->blocklens);
1263     ADIOI_Free(flat_type->indices);
1264     flat_type->blocklens = opt_blocklens;
1265     flat_type->indices = opt_indices;
1266     return;
1267 }
1268 
1269 void ADIOI_Delete_flattened(MPI_Datatype datatype)
1270 {
1271     ADIOI_Flatlist_node *flat, *prev;
1272 
1273     prev = flat = ADIOI_Flatlist;
1274     while (flat && (flat->type != datatype)) {
1275         prev = flat;
1276         flat = flat->next;
1277     }
1278     if (flat) {
1279         prev->next = flat->next;
1280         if (flat->blocklens) ADIOI_Free(flat->blocklens);
1281         if (flat->indices) ADIOI_Free(flat->indices);
1282         ADIOI_Free(flat);
1283     }
1284 }
1285 
1286 ADIOI_Flatlist_node * ADIOI_Flatten_and_find(MPI_Datatype datatype)
1287 {
1288     ADIOI_Flatlist_node *node;
1289     ADIOI_Flatten_datatype(datatype);
1290     node = ADIOI_Flatlist;
1291     while (node->type != datatype) node = node->next;
1292     return node;
1293 }

/* [<][>][^][v][top][bottom][index][help] */