root/ompi/mca/io/romio321/romio/adio/common/ad_coll_build_req_new.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. view_state_get_cur_sz
  2. view_state_get_next_len
  3. view_state_add_region
  4. ADIOI_init_view_state
  5. get_next_fr_off
  6. find_next_off
  7. ADIOI_Build_agg_reqs
  8. ADIOI_Build_client_reqs
  9. ADIOI_Build_client_pre_req
  10. process_pre_req
  11. ADIOI_Build_client_req

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /* 
   3  *
   4  *   Copyright (C) 1997 University of Chicago. 
   5  *   See COPYRIGHT notice in top-level directory.
   6  */
   7 
   8 #include <assert.h>
   9 #include "adio.h"
  10 #include "adio_extern.h"
  11 #ifdef AGGREGATION_PROFILE
  12 #include "mpe.h"
  13 #endif
  14 #include "heap-sort.h"
  15 
  16 /*
  17 #define DEBUG1
  18 #define DEBUG2
  19 #define DEBUG3
  20 */
  21 /* #define DEBUG_HEAP */
  22 
  23 #define DTYPE_SKIP
  24 
  25 #ifdef DEBUG3
  26 static char *off_type_name[MAX_OFF_TYPE] = {"TEMP_OFFSETS",
  27                                      "REAL_OFFSETS"};
  28 #endif
  29 
  30 /* Simple function to return the size of the view_state. */
  31 static inline ADIO_Offset view_state_get_cur_sz(view_state *tmp_view_state_p,
  32                                                 int op_type)
  33 {
  34     flatten_state *tmp_state_p = NULL;
  35     switch(op_type)
  36     {   
  37         case TEMP_OFF:
  38             tmp_state_p = &(tmp_view_state_p->tmp_state);
  39             break;
  40         case REAL_OFF:
  41             tmp_state_p = &(tmp_view_state_p->cur_state);
  42             break;
  43         default:
  44             fprintf(stderr, "op_type invalid\n");
  45     }
  46     return tmp_state_p->cur_sz;
  47 }
  48 
  49 /* Simple function to return the len of the next piece of the view_state. */
  50 static inline ADIO_Offset view_state_get_next_len(view_state *tmp_view_state_p,
  51                                                   int op_type)
  52 {
  53     flatten_state *tmp_state_p = NULL;
  54     switch(op_type)
  55     {
  56         case TEMP_OFF:
  57             tmp_state_p = &(tmp_view_state_p->tmp_state);
  58             break;
  59         case REAL_OFF:
  60             tmp_state_p = &(tmp_view_state_p->cur_state);
  61             break;
  62         default:
  63             fprintf(stderr, "op_type invalid\n");
  64     }
  65     return (ADIO_Offset) 
  66         tmp_view_state_p->flat_type_p->blocklens[tmp_state_p->idx] -
  67         tmp_state_p->cur_reg_off;
  68 }
  69 
  70 /* Add up to a region of a file view and no larger than a max size.
  71  * The view_state is always consistent with the abs_off and where the
  72  * index and cur_reg_off point to.  The regions should be coalesced if
  73  * possible later on. */
  74 static inline int view_state_add_region(
  75     ADIO_Offset max_sz,
  76     view_state *tmp_view_state_p, 
  77     ADIO_Offset *st_reg_p,
  78     ADIO_Offset *tmp_reg_sz_p,
  79     int op_type)
  80 {
  81     ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
  82     flatten_state *tmp_state_p = NULL;
  83     int64_t data_sz = 0;
  84 
  85 #ifdef AGGREGATION_PROFILE
  86     /* MPE_Log_event (5020, 0, NULL); */
  87 #endif
  88 
  89     switch(op_type)
  90     {
  91         case TEMP_OFF:
  92             tmp_state_p = &(tmp_view_state_p->tmp_state);
  93             break;
  94         case REAL_OFF:
  95             tmp_state_p = &(tmp_view_state_p->cur_state);
  96             break;
  97         default:
  98             fprintf(stderr, "op_type invalid\n");
  99     }
 100 
 101     tmp_flat_type_p = tmp_view_state_p->flat_type_p;
 102 
 103     *st_reg_p = tmp_state_p->abs_off;
 104 
 105     /* Should be looking at some data (or it's a zero len blocklens
 106      * (i.e. placeholder). */
 107     assert(tmp_state_p->cur_reg_off != 
 108            tmp_flat_type_p->blocklens[tmp_state_p->idx]);
 109     /* Shouldn't have been called if the view_state is done. */
 110     assert(tmp_state_p->cur_sz != tmp_view_state_p->sz);
 111 
 112     /* Make sure we are not in a non-zero region in the flat_type */
 113     assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0);
 114     
 115 #ifdef DEBUG3
 116     fprintf(stderr, "view_state:(blocklens[%Ld]=%d,cur_reg_off=%Ld,"
 117             "max_sz=%Ld)\n", tmp_state_p->idx, 
 118             tmp_flat_type_p->blocklens[tmp_state_p->idx], 
 119             tmp_state_p->cur_reg_off, max_sz);
 120 #endif
 121 
 122     /* Can it add the whole piece? */
 123     if (tmp_flat_type_p->blocklens[tmp_state_p->idx] - 
 124         tmp_state_p->cur_reg_off <= max_sz)
 125     {
 126         data_sz = tmp_flat_type_p->blocklens[tmp_state_p->idx] -
 127             tmp_state_p->cur_reg_off;
 128 
 129         tmp_state_p->cur_sz += data_sz;
 130 
 131         /* Advance the abs_off to the beginning of the next piece */
 132         if (tmp_flat_type_p->count == 1)
 133         {
 134             assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0);
 135             tmp_state_p->abs_off += data_sz;
 136 #ifdef DEBUG3 
 137             fprintf(stderr, "view_state_add_region: %s contig type "
 138                     "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", 
 139                     off_type_name[op_type], tmp_state_p->abs_off - data_sz, 
 140                     tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
 141 #endif
 142         }
 143         else
 144         { 
 145             /* Is this the last region in the datatype? */
 146             if (tmp_state_p->idx == (tmp_flat_type_p->count - 1))
 147             {
 148                 tmp_state_p->abs_off += data_sz -
 149                     tmp_flat_type_p->indices[tmp_flat_type_p->count-1] -
 150                     tmp_flat_type_p->blocklens[tmp_flat_type_p->count-1] +
 151                     tmp_view_state_p->ext;
 152 #ifdef DEBUG3
 153             fprintf(stderr, "view_state_add_region: %s last region for type "
 154                     "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", 
 155                     off_type_name[op_type], tmp_state_p->abs_off - data_sz, 
 156                     tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
 157 #endif
 158             }
 159             else
 160             {
 161                 tmp_state_p->abs_off += 
 162                     tmp_flat_type_p->indices[tmp_state_p->idx + 1] -
 163                     (tmp_flat_type_p->indices[tmp_state_p->idx] +
 164                      tmp_state_p->cur_reg_off);
 165 #ifdef DEBUG3
 166             fprintf(stderr, "view_state_add_region: %s inner region type "
 167                     "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", 
 168                     off_type_name[op_type], tmp_state_p->abs_off - 
 169                     (tmp_flat_type_p->indices[tmp_state_p->idx + 1] -
 170                     (tmp_flat_type_p->indices[tmp_state_p->idx] +
 171                      tmp_state_p->cur_reg_off)), tmp_state_p->abs_off, 
 172                     tmp_state_p->cur_sz, data_sz);
 173 #endif
 174             }
 175             /* Increment idx to next non-zero region in the flat_type */
 176             do {
 177                 tmp_state_p->idx = 
 178                     (tmp_state_p->idx + 1) % tmp_flat_type_p->count;
 179             } while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0);
 180         }
 181         tmp_state_p->cur_reg_off = 0;
 182     }
 183     else /* Add part of the piece */
 184     {
 185         data_sz = max_sz;
 186         tmp_state_p->cur_reg_off += data_sz;
 187         tmp_state_p->abs_off += data_sz;
 188         tmp_state_p->cur_sz += data_sz;
 189 #ifdef DEBUG3 
 190             fprintf(stderr, "view_state_add_region: %s partial region type "
 191                     "(cur_reg_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld\n", 
 192                     off_type_name[op_type], tmp_state_p->cur_reg_off, 
 193                     tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
 194 #endif
 195     }
 196 
 197     *tmp_reg_sz_p = data_sz;
 198 #ifdef AGGREGATION_PROFILE
 199     /* MPE_Log_event (5021, 0, NULL); */
 200 #endif
 201     return 0;
 202 }
 203 
 204 /* Set up the abs_off, idx, and cur_reg_off of a view_state for the
 205  * tmp_state or the cur_state. */
 206 int ADIOI_init_view_state(int file_ptr_type,
 207                     int nprocs, 
 208                     view_state *view_state_arr,
 209                     int op_type)
 210 {
 211     ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
 212     ADIO_Offset tmp_off_used = 0, st_reg = 0, tmp_reg_sz = 0;
 213     int i;
 214     flatten_state *tmp_state_p = NULL;
 215     view_state *tmp_view_p = NULL;
 216 
 217     for (i = 0; i < nprocs; i++)
 218     {
 219         switch(op_type)
 220         {
 221             case TEMP_OFF:
 222                 tmp_state_p = &(view_state_arr[i].tmp_state);
 223                 break;
 224             case REAL_OFF:
 225                 tmp_state_p = &(view_state_arr[i].cur_state);
 226                 break;
 227             default:
 228                 fprintf(stderr, "op_type invalid\n");
 229         }
 230         
 231         tmp_view_p = &(view_state_arr[i]);
 232         tmp_flat_type_p = tmp_view_p->flat_type_p;
 233 
 234         if (file_ptr_type == ADIO_INDIVIDUAL)
 235             tmp_state_p->abs_off = tmp_view_p->fp_ind;
 236         else
 237             tmp_state_p->abs_off = tmp_view_p->disp;
 238         
 239         tmp_off_used = 0;
 240 
 241         /* initialize tmp_state idx */
 242         while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0)
 243             tmp_state_p->idx = (tmp_state_p->idx + 1) % tmp_flat_type_p->count;
 244         if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
 245             tmp_state_p->abs_off += tmp_flat_type_p->indices[tmp_state_p->idx];
 246 
 247         /* Initialize the abs_off by moving into the datatype 
 248          * byte_off bytes.  Since we only do this in the beginning, we
 249          * make the assumption that pieces are added whole until the last
 250          * piece which MAY be partial. */
 251         while (tmp_off_used != tmp_view_p->byte_off)
 252         {
 253             view_state_add_region(
 254                 tmp_view_p->byte_off - tmp_off_used,
 255                 &(view_state_arr[i]), &st_reg, &tmp_reg_sz, 
 256                 op_type);
 257         }
 258         
 259         /* Re-initialize the cur_size so that the abs_off was set to
 260          * the proper position while the actual size = 0.*/
 261         tmp_state_p->cur_sz = 0;
 262 #ifdef DEBUG1
 263         fprintf(stderr, "init_view_state: %s (idx=%d,byte_off=%Ld,"
 264                 "abs_off=%Ld,reg_off=%Ld,sz=%Ld)\n", off_type_name[op_type], 
 265                 i, tmp_view_p->byte_off, tmp_state_p->abs_off,
 266                 tmp_state_p->cur_reg_off, tmp_view_p->sz);
 267 #endif  
 268 
 269     }
 270     return 0;
 271 }
 272 
 273 /* Return the next file realm offset and length for this datatype state
 274  * within a particular file realm. */
 275 static inline int get_next_fr_off(ADIO_File fd,
 276                                   ADIO_Offset off,
 277                                   ADIO_Offset fr_st_off,
 278                                   MPI_Datatype *fr_type_p,
 279                                   ADIO_Offset *fr_next_off_p,
 280                                   ADIO_Offset *fr_max_len_p) 
 281 {
 282     MPI_Aint fr_extent = -1, lb;
 283     ADIO_Offset tmp_off, off_rem;
 284     ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist;
 285     int i = -1, fr_dtype_ct = 0;
 286 
 287     /* Should have already been flattened in calc_file_realms() */
 288     while (fr_node_p->type != (*fr_type_p))
 289         fr_node_p = fr_node_p->next;
 290     assert(fr_node_p != NULL);
 291 
 292     /* Did we get to the first region of the file realm? */
 293     if (off - fr_st_off < 0)
 294     {
 295         *fr_next_off_p = fr_st_off + fr_node_p->indices[0];
 296         *fr_max_len_p = fr_node_p->blocklens[0];
 297         return 0;
 298     }
 299 
 300     /* Calculate how many times to loop through the fr_type 
 301      * and where the next fr_off is. */
 302     MPI_Type_get_extent(*fr_type_p, &lb, &fr_extent);
 303     tmp_off = off - fr_st_off;
 304     fr_dtype_ct = tmp_off / fr_extent;
 305     off_rem = tmp_off % fr_extent;
 306     for (i = 0; i < fr_node_p->count; i++)
 307     {
 308         if (off_rem < fr_node_p->indices[i])
 309         {
 310             *fr_next_off_p = fr_st_off +
 311                 (fr_dtype_ct * fr_extent) + fr_node_p->indices[i];
 312             *fr_max_len_p = fr_node_p->blocklens[i];
 313             return 0;
 314         }
 315         else if (off_rem < fr_node_p->indices[i] + fr_node_p->blocklens[i])
 316         {
 317             *fr_next_off_p = off;
 318             *fr_max_len_p = fr_node_p->blocklens[i] - 
 319                 (off_rem - fr_node_p->indices[i]);
 320             return off;
 321         }
 322     }
 323     
 324     /* Shouldn't get here. */
 325     fprintf(stderr, "get_next_fr_off: Couldn't find the correct "
 326             "location of the next offset for this file realm.\n");
 327     return -1;
 328 }
 329 
 330 /* Look in all the view states for the first offset within a given
 331  * file realm.  Report the end of a contiguous region within the file
 332  * realm (possibly more than the actual view state may be able to
 333  * process contiguously). */
 334 static inline int find_next_off(ADIO_File fd,
 335                                 view_state *view_state_p,
 336                                 ADIO_Offset fr_st_off,
 337                                 MPI_Datatype *fr_type_p,
 338                                 int op_type,
 339                                 ADIO_Offset *cur_off_p,
 340                                 ADIO_Offset *cur_reg_max_len_p)
 341 {
 342     ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
 343     ADIO_Offset tmp_off = -1, fr_next_off = -1, fr_max_len = -1, 
 344         tmp_fr_max_len = -1;
 345     int ret = 0;
 346     flatten_state *tmp_state_p = NULL;
 347     ADIO_Offset tmp_st_off = 0, tmp_reg_sz = 0;
 348 #ifdef DTYPE_SKIP
 349     int skip_type_ct;
 350 #endif
 351 
 352 #ifdef AGGREGATION_PROFILE
 353     /* MPE_Log_event (5022, 0, NULL); */
 354 #endif
 355 
 356     switch(op_type)
 357     {
 358         case TEMP_OFF:
 359             tmp_state_p = &(view_state_p->tmp_state);
 360             break;
 361         case REAL_OFF:
 362             tmp_state_p = &(view_state_p->cur_state);
 363             break;
 364         default:
 365             fprintf(stderr, "op_type invalid\n");
 366     }
 367         
 368     tmp_flat_type_p = view_state_p->flat_type_p;
 369 
 370     /* Can we use this proc? */
 371     if (tmp_state_p->cur_sz < view_state_p->sz) {
 372         tmp_st_off = 0;
 373         tmp_reg_sz = 0;
 374         /* If the current region is not within the file realm, advance
 375          * the state until it is and calculate the end of the next file 
 376          * realm in fr_max_len. */
 377         ret = get_next_fr_off(fd,
 378                               tmp_state_p->abs_off, 
 379                               fr_st_off,
 380                               fr_type_p,
 381                               &fr_next_off,
 382                               &fr_max_len);
 383         
 384         while ((tmp_state_p->abs_off < fr_next_off) &&
 385                (tmp_state_p->cur_sz != view_state_p->sz))
 386         {
 387             
 388         /* While this might appear to be erroneous at first,
 389          * view_state_add_region can only add a single piece at a
 390          * time.  Therefore, it will never overshoot the beginning
 391          * of the next file realm.  When it finally does enter the
 392          * next file realm it will not be able to go beyond its
 393          * first piece. */
 394             
 395 #ifdef DTYPE_SKIP
 396             if (tmp_flat_type_p->count > 1) {
 397                 /* let's see if we can skip whole datatypes */
 398                 skip_type_ct = (fr_next_off - tmp_state_p->abs_off) /
 399                     view_state_p->ext;
 400                 if (skip_type_ct > 0) {
 401                     /* before we go on, let's check if we've actually
 402                      * finished up already */
 403                     tmp_state_p->cur_sz += skip_type_ct *
 404                         view_state_p->type_sz;
 405                     if (tmp_state_p->cur_sz >= view_state_p->sz) {
 406                         tmp_state_p->cur_sz = view_state_p->sz;
 407                         break;
 408                     }
 409                     tmp_state_p->abs_off += skip_type_ct * view_state_p->ext;
 410                 }
 411             }
 412 #endif
 413             view_state_add_region(
 414                 fr_next_off - tmp_state_p->abs_off,
 415                 view_state_p,
 416                 &tmp_st_off,
 417                 &tmp_reg_sz,
 418                 op_type);
 419 
 420             ret = get_next_fr_off(fd,
 421                                   tmp_state_p->abs_off, 
 422                                   fr_st_off,
 423                                   fr_type_p,
 424                                   &fr_next_off,
 425                                   &fr_max_len);
 426         }
 427 
 428         if (tmp_state_p->cur_sz != view_state_p->sz) {
 429             tmp_off = tmp_state_p->abs_off;
 430             /* Calculate how much of the remaining file realm there is from the
 431              * current offset */
 432             tmp_fr_max_len = fr_next_off + fr_max_len - tmp_off;
 433         }
 434     }
 435 
 436     *cur_off_p = tmp_off;
 437     *cur_reg_max_len_p = tmp_fr_max_len;
 438 #ifdef AGGREGATION_PROFILE
 439     /* MPE_Log_event (5023, 0, NULL); */
 440 #endif
 441     return ret;
 442 }
 443 
 444 /* Upon completion of a full collective buffer, end of a file realm
 445  * region (data sieving), or the end of all I/O for an aggregator, we
 446  * should return a list of MPI_Datatypes that correspond to client
 447  * communication into a collective buffer, a list of corresponding
 448  * sizes, and an aggregate MPI_Datatype which will be used as a
 449  * filetype in MPI_File_write/read on the aggregator. */ 
 450 int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs,
 451                          view_state *client_file_view_state_arr,
 452                          MPI_Datatype *client_comm_dtype_arr,
 453                          ADIO_Offset *client_comm_sz_arr,
 454                          ADIO_Offset *agg_dtype_offset_p,
 455                          MPI_Datatype *agg_dtype_p)
 456 {
 457     MPI_Aint **client_disp_arr = NULL, *agg_disp_arr = NULL;
 458     int **client_blk_arr = NULL, *agg_blk_arr = NULL;
 459     ADIO_Offset tmp_coll_buf_sz = 0, st_reg = 0, act_reg_sz = 0;
 460     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
 461     ADIO_Offset ds_fr_end = -1;
 462     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
 463     MPI_Datatype *fr_type_arr = fd->file_realm_types;
 464     int *client_ol_ct_arr = NULL;
 465     int *client_ol_cur_ct_arr = NULL;
 466     int agg_ol_ct = 0, agg_ol_cur_ct = 0;
 467     int cur_off_proc = -1;
 468     int next_off_idx = -1;
 469     int i = 0, j = 0, all_done = -1;
 470     int agg_idx = fd->my_cb_nodes_index;
 471     heap_t offset_heap;
 472     ADIO_Offset next_off = -1, next_reg_max_len = -1;
 473 
 474     /* Used for coalescing ol pairs next to each other. */
 475     ADIO_Offset *client_comm_next_off_arr = NULL;
 476     ADIO_Offset agg_next_off = -1;
 477 #ifdef AGGREGATION_PROFILE
 478     MPE_Log_event (5016, 0, NULL);
 479 #endif
 480 
 481     memset(client_comm_sz_arr, 0, nprocs*sizeof(ADIO_Offset));
 482 
 483     if ((client_comm_next_off_arr = (ADIO_Offset *) 
 484          ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL)
 485     {
 486         fprintf(stderr, "ADIOI_Build_agg_reqs: malloc client_next_off_arr "
 487                 "failed\n");
 488         return -1;
 489     }
 490     
 491     if ((client_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL)
 492     {
 493         fprintf(stderr, "ADIOI_Build_agg_reqs: "
 494                 "malloc client_ol_ct_arr failed\n");
 495         return -1;
 496     }
 497     if ((client_ol_cur_ct_arr = 
 498          (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL)
 499     {
 500         fprintf(stderr, "ADIOI_Build_agg_reqs: "
 501                 "malloc client_ol_cur_ct_arr failed\n");
 502         return -1;
 503     }
 504 
 505     /* On the first pass see how many offset-length pairs are
 506      * necessary for each client.  Then allocate the correct amount of
 507      * offset-length pairs for describing the collective buffer.  All
 508      * data is processed in order by the aggregator's file realm.  On
 509      * the second pass, set the offset-length pairs to the correct
 510      * values. */
 511     for (i = 0; i < MAX_OFF_TYPE; i++)
 512     {
 513         memset(client_comm_next_off_arr, -1, nprocs*sizeof(ADIO_Offset));
 514         tmp_coll_buf_sz = 0;
 515         ds_fr_end = -1;
 516 
 517         /* initialize heap */
 518         ADIOI_Heap_create(&offset_heap, nprocs);
 519         offset_heap.size = 0;
 520         
 521         for (j=0; j<nprocs; j++) {
 522             find_next_off(fd, 
 523                           &client_file_view_state_arr[j],
 524                           fr_st_off_arr[agg_idx],
 525                           &(fr_type_arr[agg_idx]),
 526                           i,
 527                           &cur_off,
 528                           &cur_reg_max_len);
 529             if ((cur_off != -1) && (cur_reg_max_len > 0)) {
 530                 ADIOI_Heap_insert(&offset_heap, cur_off, j, cur_reg_max_len);
 531 #ifdef DEBUG_HEAP
 532                 printf ("initial: inserting offset %lld with "
 533                         "cur_reg_max_len = %lld for p%d\n",
 534                         cur_off, cur_reg_max_len, j);
 535 #endif
 536             }
 537 
 538         }
 539         if (!offset_heap.size)
 540             ADIOI_Heap_insert(&offset_heap, -1, -1, -1);
 541 
 542         while (tmp_coll_buf_sz < fd->hints->cb_buffer_size)
 543         {
 544             /* Find the next process with the next region within the
 545              * file realm and the maximum amount that can be added for
 546              * this particular file realm as a contiguous region. */
 547             ADIOI_Heap_extract_min(&offset_heap, &cur_off, &cur_off_proc,
 548                              &cur_reg_max_len);
 549 #ifdef DEBUG_HEAP
 550             printf ("extracted cur_off %lld from proc %d\n",
 551                     cur_off, cur_off_proc);
 552 #endif
 553 
 554             if (cur_off == -1)
 555                 break;
 556             
 557 #ifdef DEBUG3
 558             fprintf(stderr, "ADIOI_Build_agg_reqs: %s proc %d start/add to"
 559                     " list (max_reg_fr=%Ld,tmp_coll_buf_sz=%Ld,"
 560                     "cb_buffer_size=%d)\n", off_type_name[i], cur_off_proc,
 561                     cur_reg_max_len, tmp_coll_buf_sz, 
 562                     fd->hints->cb_buffer_size);
 563 #endif
 564             
 565             /* We process only contiguous file realm regions if we are
 566              * using data sieving. Note that we only do this for
 567              * writes since reads can be data sieved across each other
 568              * without consistency issues. */
 569             if ((fd->hints->ds_write == ADIOI_HINT_ENABLE ||
 570                  fd->hints->ds_write == ADIOI_HINT_AUTO) &&
 571                 rw_type == ADIOI_WRITE && fd->hints->cb_nodes > 1)
 572             {
 573 #ifdef DEBUG2
 574                 fprintf(stderr, "ADIOI_Build_agg_reqs: "
 575                         "Warning - Data sieving writes on\n");
 576 #endif
 577                 if (ds_fr_end == -1)
 578                 {
 579                     ds_fr_end = cur_off + cur_reg_max_len;
 580 #ifdef DEBUG1
 581                 fprintf(stderr, "ADIOI_Build_agg_reqs: "
 582                         "cur_off=%Ld, cur_reg_max_len=%Ld\n"
 583                         "Data sieving file realm end initialized to %Ld\n",
 584                         cur_off,
 585                         cur_reg_max_len,
 586                         ds_fr_end);
 587 #endif
 588                 }
 589                 else
 590                 {
 591                     /* The next off switched file realms, so we will stop
 592                      * here. */
 593                     if (ds_fr_end != cur_off + cur_reg_max_len)
 594                     {
 595 #ifdef DEBUG1
 596                         fprintf(stderr, "ADIOI_Build_agg_reqs: "
 597                                 "Data sieving file realm end changed from "
 598                                 "%Ld to %Ld\n", ds_fr_end, 
 599                                 cur_off + cur_reg_max_len);
 600 #endif
 601                         break;
 602                     }
 603                 }
 604             }
 605             
 606             /* Add up to the end of the file realm or the collective
 607              * buffer. */
 608             if (cur_reg_max_len > (fd->hints->cb_buffer_size - 
 609                                    tmp_coll_buf_sz))
 610                 cur_reg_max_len = fd->hints->cb_buffer_size - tmp_coll_buf_sz;
 611 
 612             view_state_add_region(
 613                 cur_reg_max_len,
 614                 &(client_file_view_state_arr[cur_off_proc]), 
 615                 &st_reg, &act_reg_sz, i);
 616 
 617             switch(i)
 618             {
 619                 case TEMP_OFF:
 620                     /* Increment the ol list count for each proc and
 621                      * the used part of the collective buffer if the
 622                      * next region is not adjacent to the previous
 623                      * region. */
 624                     if (client_comm_next_off_arr[cur_off_proc] != 
 625                         tmp_coll_buf_sz)
 626                     {
 627                         (client_ol_ct_arr[cur_off_proc])++;
 628                     }
 629                     client_comm_next_off_arr[cur_off_proc] = 
 630                         tmp_coll_buf_sz + act_reg_sz;
 631                     
 632                     if (agg_next_off != st_reg)
 633                         agg_ol_ct++;
 634                     agg_next_off = st_reg + act_reg_sz;
 635                     break;
 636                 case REAL_OFF:
 637                     /* Add this region to the proper client ol list if
 638                      * the next region is not adjacent to the previous
 639                      * region. */
 640                     next_off_idx = client_ol_cur_ct_arr[cur_off_proc];
 641                     if (client_comm_next_off_arr[cur_off_proc] != 
 642                         tmp_coll_buf_sz)
 643                     {
 644                         client_disp_arr[cur_off_proc][next_off_idx] =
 645                             tmp_coll_buf_sz;
 646                         client_blk_arr[cur_off_proc][next_off_idx] = 
 647                             act_reg_sz;
 648                         (client_ol_cur_ct_arr[cur_off_proc])++;
 649                     }
 650                     else
 651                     {
 652                         client_blk_arr[cur_off_proc][next_off_idx - 1] 
 653                             += act_reg_sz;
 654                     }
 655                     client_comm_sz_arr[cur_off_proc] += act_reg_sz;
 656                     client_comm_next_off_arr[cur_off_proc] =
 657                         tmp_coll_buf_sz + act_reg_sz;
 658                     
 659                     /* Add to the aggregator filetype if the next
 660                      * region is not adjacent to the previous
 661                      * region. */
 662                     if (agg_next_off != st_reg)
 663                     {
 664                         /* this will enable initial offsets much further into
 665                          * the file than an MPI_Aint */
 666                         if (!agg_ol_cur_ct)
 667                             *agg_dtype_offset_p = st_reg;
 668                         agg_disp_arr[agg_ol_cur_ct] = st_reg -
 669                             (MPI_Aint) *agg_dtype_offset_p;
 670                         agg_blk_arr[agg_ol_cur_ct] = act_reg_sz;        
 671                         agg_ol_cur_ct++;
 672                     }
 673                     else
 674                     {
 675                         agg_blk_arr[agg_ol_cur_ct - 1] += act_reg_sz;
 676                     }
 677                     agg_next_off = st_reg + act_reg_sz;
 678                     
 679                     break;
 680                 default:
 681                     fprintf(stderr, "ADIOI_Build_agg_reqs: Impossible type\n");
 682             }
 683             tmp_coll_buf_sz += act_reg_sz;
 684 
 685             find_next_off(fd,
 686                           &client_file_view_state_arr[cur_off_proc],
 687                           fr_st_off_arr[agg_idx],
 688                           &(fr_type_arr[agg_idx]),
 689                           i,
 690                           &next_off,
 691                           &next_reg_max_len);
 692 
 693             if ((next_off != -1) || (!offset_heap.size)) {
 694                 ADIOI_Heap_insert(&offset_heap, next_off, cur_off_proc,
 695                             next_reg_max_len);
 696 #ifdef DEBUG_HEAP
 697                 printf ("inserting offset %lld for p%d\n", next_off,
 698                         cur_off_proc);
 699 #endif
 700             }
 701         }
 702         
 703         if (i == TEMP_OFF)
 704         {
 705             /* Allocate offset-length pairs for creating hindexed
 706              * MPI_Datatypes for both the client and the aggregator. */
 707             if ((client_disp_arr = (MPI_Aint **) 
 708                  ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL)
 709             {
 710                 fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
 711                         "client_disp_arr failed\n");
 712                 return -1;
 713             }
 714             if ((client_blk_arr = (int **) ADIOI_Malloc(
 715                      nprocs*sizeof(int *))) == NULL)
 716             {
 717                 ADIOI_Free(client_disp_arr);
 718                 fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
 719                         "client_blk_arr failed\n");
 720                 return -1;
 721             }    
 722             for (j = 0; j < nprocs; j++)
 723             {
 724                 if ((client_disp_arr[j] = (MPI_Aint *) ADIOI_Malloc(
 725                          client_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL)
 726                 {
 727                     fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
 728                             "client_disp_arr[%d] failed\n", j);
 729                     return -1;
 730                 }
 731                 if ((client_blk_arr[j] = (int *) 
 732                      ADIOI_Malloc(client_ol_ct_arr[j]*sizeof(int))) == NULL)
 733                 {
 734                     ADIOI_Free(client_disp_arr[j]);
 735                     fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
 736                             "client_blk_arr[%d] failed\n", j);
 737                     return -1;
 738                 }
 739             }
 740             
 741             if (agg_ol_ct > 0) 
 742             {
 743                 if ((agg_disp_arr = (MPI_Aint *) ADIOI_Malloc(
 744                          agg_ol_ct*sizeof(MPI_Aint))) == NULL)
 745                 {
 746                     fprintf(stderr, 
 747                             "ADIOI_Build_agg_reqs: malloc disp_arr failed\n");
 748                     return -1;
 749                 }
 750                 if ((agg_blk_arr = (int *) 
 751                      ADIOI_Malloc(agg_ol_ct*sizeof(int))) == NULL)
 752                 {
 753                     ADIOI_Free(agg_disp_arr);
 754                     fprintf(stderr, 
 755                             "ADIOI_Build_agg_reqs: malloc blk_arr failed\n");
 756                     return -1;
 757                 }
 758             }
 759         }
 760         ADIOI_Heap_free(&offset_heap);
 761     }
 762     
 763     /* Let the clients know if this aggregator is totally finished
 764      * with all possible client requests. */
 765     all_done = 1;
 766     for (i = 0; i < nprocs; i++)
 767     {
 768         if ((client_file_view_state_arr[i].cur_state.cur_sz !=
 769             client_file_view_state_arr[i].sz) ||
 770             client_comm_sz_arr[i] != 0)
 771         {
 772             all_done = 0;
 773             break;
 774         }
 775     }
 776     if (all_done == 1)
 777     {
 778         for (i = 0; i < nprocs; i++)
 779         {
 780             client_comm_sz_arr[i] = -1;
 781         }
 782     }
 783 
 784     /* Quick check to make sure we found all the ol pairs we thought
 785      * we did */
 786     for (i = 0; i < nprocs; i++)
 787     {
 788         if (client_ol_cur_ct_arr[i] != client_ol_ct_arr[i])
 789         {
 790             fprintf(stderr, "ADIOI_Build_agg_reqs: ERROR Process %d "
 791                     "processed only %d out of %d ol pairs\n", i, 
 792                     client_ol_cur_ct_arr[i],
 793                     client_ol_ct_arr[i]);
 794             return -1;
 795         }
 796     }
 797 #ifdef DEBUG1
 798     fprintf(stderr, "ADIOI_Build_agg_reqs:(client,ol_pairs,size_req)=");
 799     for (i = 0; i < nprocs; i++)
 800     {
 801         fprintf(stderr, "(%d,%d,%Ld)", i, client_ol_ct_arr[i],
 802                 client_comm_sz_arr[i]);
 803         if (i != nprocs - 1)
 804             fprintf(stderr, ",");
 805     }
 806     fprintf(stderr, "\n");
 807 #endif
 808 #ifdef DEBUG1
 809     fprintf(stderr, "ADIOI_Build_agg_reqs: Generated %d of %d "
 810             "aggregate offset-length pairs\n", agg_ol_cur_ct, agg_ol_ct);
 811 #endif
 812 #ifdef DEBUG2
 813     for (i = 0; i < nprocs; i++)
 814     {
 815         if (client_ol_ct_arr[i] > 0)
 816         {
 817             fprintf(stderr, "ADIOI_Build_agg_reqs: p %d (off,len) = ", i);
 818             for (j = 0; j < client_ol_ct_arr[i]; j++)
 819             {
 820                 fprintf(stderr, "[%d](%d,%d) ", j, 
 821                         client_disp_arr[i][j],
 822                         client_blk_arr[i][j]);
 823             }
 824             fprintf(stderr, "\n");
 825         }
 826     }    
 827     if (agg_ol_ct) {
 828         fprintf(stderr, "ADIOI_Build_agg_reqs:agg_type(off,len)=");
 829         for (i = 0; i < agg_ol_ct; i++)
 830             {
 831                 fprintf(stderr, "[%d](%d,%d)",
 832                         i, agg_disp_arr[i], agg_blk_arr[i]);
 833                 if (i != agg_ol_ct - 1)
 834                     fprintf(stderr, ",");
 835             }
 836         fprintf(stderr, "\n");
 837     }
 838 #endif
 839 
 840     assert(agg_ol_cur_ct == agg_ol_ct);
 841 
 842     /* Create all the client and aggregate MPI_Datatypes */
 843     for (i = 0; i < nprocs; i++)
 844     {
 845         if (client_comm_sz_arr[i] > 0)
 846         {
 847             MPI_Type_create_hindexed(client_ol_ct_arr[i], client_blk_arr[i],
 848                                      client_disp_arr[i], MPI_BYTE, 
 849                                      &(client_comm_dtype_arr[i]));
 850             MPI_Type_commit(&(client_comm_dtype_arr[i]));
 851         }
 852         else
 853         {
 854             client_comm_dtype_arr[i] = MPI_BYTE;
 855         }
 856         ADIOI_Free(client_blk_arr[i]);
 857         ADIOI_Free(client_disp_arr[i]);
 858     }
 859     ADIOI_Free(client_blk_arr);
 860     ADIOI_Free(client_disp_arr);
 861 
 862     if (agg_ol_ct > 0) {
 863         if (agg_ol_ct == 1)
 864             MPI_Type_contiguous (agg_blk_arr[0], MPI_BYTE, agg_dtype_p);
 865         else if (agg_ol_ct > 1)
 866             MPI_Type_create_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE,
 867                                      agg_dtype_p);    
 868 
 869         MPI_Type_commit(agg_dtype_p);
 870 
 871         ADIOI_Free(agg_disp_arr);
 872         ADIOI_Free(agg_blk_arr);
 873     }
 874     ADIOI_Free(client_ol_ct_arr);
 875     ADIOI_Free(client_ol_cur_ct_arr);
 876     ADIOI_Free(client_comm_next_off_arr);
 877 #ifdef AGGREGATION_PROFILE
 878     MPE_Log_event (5017, 0, NULL);
 879 #endif
 880     return 0;
 881 }
 882 
 883 /* All sizes from all aggregators are gathered on the clients, which
 884  * then call this function, which will generate the comm datatypes for
 885  * each aggregator (agg_comm_dtype_arr) in the upcoming
 886  * MPI_Alltoallw() */
 887 int ADIOI_Build_client_reqs(ADIO_File fd, 
 888                             int nprocs,
 889                             view_state *my_mem_view_state_arr,
 890                             view_state *agg_file_view_state_arr,
 891                             ADIO_Offset *agg_comm_sz_arr,
 892                             MPI_Datatype *agg_comm_dtype_arr)
 893 {
 894     MPI_Aint **agg_disp_arr = NULL;
 895     int **agg_blk_arr = NULL;
 896     view_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
 897     ADIO_Offset total_agg_comm_sz = 0, cur_total_agg_comm_sz = 0;
 898     ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0;
 899     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
 900     ADIO_Offset tmp_cur_off = -1, tmp_cur_reg_max_len = -1;
 901     ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
 902     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
 903     ADIO_Offset *agg_comm_cur_sz_arr = NULL;
 904     MPI_Datatype *fr_type_arr = fd->file_realm_types;
 905     int *agg_ol_ct_arr = NULL;
 906     int *agg_ol_cur_ct_arr = NULL;
 907     int tmp_agg_fr_idx = -1;
 908     int cur_off_proc = -1;
 909     int i = 0, j = 0;
 910     int agg_next_off_idx = -1;
 911     /* Used for coalescing ol pairs next to each other. */
 912     ADIO_Offset *agg_mem_next_off_arr = NULL;
 913 #ifdef AGGREGATION_PROFILE
 914     MPE_Log_event (5018, 0, NULL);
 915 #endif
 916 
 917 #ifdef DEBUG
 918     fprintf(stderr, "ADIOI_Build_client_reqs:(agg,size_req)=");
 919     for (i = 0; i < nprocs; i++)
 920     {
 921         int tmp_agg_idx = ADIOI_Agg_idx(i, fd);
 922         if (tmp_agg_idx >= 0)
 923         {
 924             fprintf(stderr, "(%d,%Ld)", i, agg_comm_sz_arr[i]);
 925             if (i != fd->hints->cb_nodes - 1)
 926                 fprintf(stderr, ",");
 927         }
 928         fprintf(stderr, "\n");
 929     }
 930 #endif
 931     
 932     if ((agg_mem_next_off_arr = (ADIO_Offset *) ADIOI_Malloc(
 933              nprocs*sizeof(ADIO_Offset))) == NULL)
 934     {
 935         fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_mem_next_off_arr"
 936                 "failed\n");
 937         return -1;
 938     }
 939 
 940     if ((agg_comm_cur_sz_arr = (ADIO_Offset *) 
 941          ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL)
 942     {
 943         fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_comm_cur_sz_arr"
 944                 " failed\n");
 945         return -1;
 946     }
 947     if ((agg_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int)))
 948         == NULL)
 949     {
 950         fprintf(stderr, "ADIOI_Build_client_reqs: "
 951                 "malloc agg_ol_ct_arr failed\n");
 952         return -1;
 953     }
 954     if ((agg_ol_cur_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int)))
 955         == NULL)
 956     {
 957         fprintf(stderr, "ADIOI_Build_client_reqs: "
 958                 "malloc agg_ol_cur_ct_arr failed\n");
 959         return -1;
 960     }
 961 
 962     for (i = 0; i < nprocs; i++)
 963     {
 964         if (agg_comm_sz_arr[i] > 0)
 965             total_agg_comm_sz += agg_comm_sz_arr[i];
 966     }
 967     
 968     /* On the first pass see how many offset-length pairs are
 969      * necessary for each aggregator.  Then allocate the correct
 970      * amount of offset-length pairs for handling each aggregator's
 971      * particular data size.  On the last pass, we actually create the
 972      * offset-length pairs. */
 973     for (i = 0; i < MAX_OFF_TYPE; i++)
 974     {
 975         cur_total_agg_comm_sz = 0;
 976         memset(agg_comm_cur_sz_arr, 0, nprocs*sizeof(ADIO_Offset));
 977         memset(agg_mem_next_off_arr, -1, nprocs*sizeof(ADIO_Offset));
 978         while (total_agg_comm_sz > cur_total_agg_comm_sz)
 979         {
 980             /* Look for the next aggregator offset among all the
 981              * aggregators and their respective file realms. */
 982             cur_off = -1;
 983             for (j = 0; j < nprocs; j++)
 984             {
 985                 tmp_agg_fr_idx = ADIOI_Agg_idx(j, fd);
 986                 assert(tmp_agg_fr_idx < fd->hints->cb_nodes);
 987                 
 988                 /* If this process is not an aggregator or we have
 989                  * finished all the bytes for this aggregator, move
 990                  * along. */
 991                 if (tmp_agg_fr_idx < 0 || 
 992                     agg_comm_cur_sz_arr[j] == agg_comm_sz_arr[j])
 993                 {
 994                     continue;
 995                 }
 996 
 997                 find_next_off(fd,
 998                               &(agg_file_view_state_arr[j]),
 999                               fr_st_off_arr[tmp_agg_fr_idx],
1000                               &(fr_type_arr[tmp_agg_fr_idx]),
1001                               i,
1002                               &tmp_cur_off,
1003                               &tmp_cur_reg_max_len);
1004                 if (tmp_cur_off == -1)
1005                     continue;          
1006 
1007                 if ((cur_off == -1) || 
1008                     (cur_off > tmp_cur_off))
1009                 {
1010                     cur_off_proc = j;
1011                     cur_off = tmp_cur_off;
1012                     cur_reg_max_len = tmp_cur_reg_max_len;
1013                 }
1014             }
1015 
1016             assert(cur_off_proc != -1);
1017             
1018             /* Add up to the end of the file realm or as many bytes
1019              * are left for this particular aggregator in the client's
1020              * filetype */
1021             if (cur_reg_max_len > agg_comm_sz_arr[cur_off_proc] - 
1022                 agg_comm_cur_sz_arr[cur_off_proc])
1023             {
1024                 cur_reg_max_len = agg_comm_sz_arr[cur_off_proc] - 
1025                     agg_comm_cur_sz_arr[cur_off_proc];
1026             }
1027             assert(cur_reg_max_len > 0);
1028             
1029             view_state_add_region(
1030                 cur_reg_max_len,
1031                 &(agg_file_view_state_arr[cur_off_proc]),
1032                 &st_reg, &act_reg_sz, i);
1033             
1034 #ifdef DEBUG2
1035             fprintf(stderr, "ADIOI_Build_client_reqs: %s File region"
1036                     " (proc=%d,off=%Ld,sz=%Ld)\n",
1037                     off_type_name[i], cur_off_proc,
1038                     cur_off, act_reg_sz);
1039 #endif
1040 
1041             /* Before translating the file regions to memory regions,
1042              * we first must advance to the proper point in the
1043              * mem_view_state for this aggregator to match the
1044              * file_view_state. */
1045             tmp_file_state_p = &(agg_file_view_state_arr[cur_off_proc]);
1046             tmp_mem_state_p = &(my_mem_view_state_arr[cur_off_proc]);
1047             assert(view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz >=
1048                    view_state_get_cur_sz(tmp_mem_state_p, i));
1049             while (view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz != 
1050                    view_state_get_cur_sz(tmp_mem_state_p, i))
1051             {
1052                 ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
1053                 view_state_add_region(
1054                     view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz -
1055                     view_state_get_cur_sz(tmp_mem_state_p, i),
1056                     tmp_mem_state_p,
1057                     &fill_st_reg,
1058                     &fill_reg_sz, i);
1059             }
1060             
1061             /* Based on how large the act_reg_sz 1. Figure out how
1062              * many memory offset-length pairs are necessary. 2. Set
1063              * the offset-length pairs. */
1064             tmp_reg_sz = 0;
1065             while (tmp_reg_sz != act_reg_sz)
1066             {
1067                 view_state_add_region(
1068                     act_reg_sz - tmp_reg_sz,
1069                     tmp_mem_state_p,
1070                     &agg_mem_st_reg, &agg_mem_act_reg_sz, 
1071                     i);
1072                 tmp_reg_sz += agg_mem_act_reg_sz;
1073 
1074 #ifdef DEBUG2
1075                 fprintf(stderr, "ADIOI_Build_client_reqs: Mem region %s"
1076                         "(proc=%d,off=%Ld,sz=%Ld)\n",
1077                         off_type_name[i], cur_off_proc,
1078                         agg_mem_st_reg, agg_mem_act_reg_sz);
1079 #endif
1080                 agg_comm_cur_sz_arr[cur_off_proc] += agg_mem_act_reg_sz;
1081                 cur_total_agg_comm_sz += agg_mem_act_reg_sz;        
1082                 switch(i)
1083                 {
1084                     case TEMP_OFF:
1085                         /* Increment the ol list count a particular
1086                          * aggregator if next region is not adjacent
1087                          * to the previous region. */
1088                         if (agg_mem_next_off_arr[cur_off_proc] != 
1089                             agg_mem_st_reg)
1090                         {
1091                             agg_ol_ct_arr[cur_off_proc]++;
1092                         }
1093                         agg_mem_next_off_arr[cur_off_proc] = 
1094                             agg_mem_st_reg + agg_mem_act_reg_sz;
1095                         break;
1096                     case REAL_OFF:
1097                         /* Set the ol list for the memtypes that will
1098                          * map to each aggregator, coaslescing if
1099                          * possible. */
1100                         agg_next_off_idx = agg_ol_cur_ct_arr[cur_off_proc];
1101                         if (agg_mem_next_off_arr[cur_off_proc] != 
1102                             agg_mem_st_reg)
1103                         {
1104                             agg_disp_arr[cur_off_proc][agg_next_off_idx] = 
1105                                 agg_mem_st_reg;
1106                             agg_blk_arr[cur_off_proc][agg_next_off_idx] = 
1107                                 agg_mem_act_reg_sz;
1108                             (agg_ol_cur_ct_arr[cur_off_proc])++;
1109                         }
1110                         else
1111                         {
1112                             agg_blk_arr[cur_off_proc][agg_next_off_idx - 1]
1113                                 += agg_mem_act_reg_sz;
1114                         }
1115                         agg_mem_next_off_arr[cur_off_proc] = 
1116                             agg_mem_st_reg + agg_mem_act_reg_sz;
1117                         break;
1118                     default:
1119                         fprintf(stderr, "ADIOI_Build_client_reqs: "
1120                                 "Impossible type\n");
1121                 }
1122             }
1123         }
1124         
1125         /* On the first pass, allocate the memory structures for
1126          * creating the MPI_hindexed type. */
1127         if (i == TEMP_OFF)
1128         {           
1129             /* Allocate offset-length pairs for creating hindexed
1130              * MPI_Datatypes for each aggregator */
1131             if ((agg_disp_arr = (MPI_Aint **) 
1132                  ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL)
1133             {
1134                 fprintf(stderr, 
1135                         "ADIOI_Build_client_reqs: malloc agg_disp_arr failed\n");
1136                 return -1;
1137             }
1138             if ((agg_blk_arr = (int **) ADIOI_Malloc(nprocs*sizeof(int *))) 
1139                 == NULL)
1140             {
1141                 ADIOI_Free(agg_disp_arr);
1142                 fprintf(stderr, 
1143                         "ADIOI_Build_client_reqs: malloc agg_blk_arr failed\n");
1144                 return -1;
1145             }    
1146             for (j = 0; j < nprocs; j++)
1147             {
1148                 if ((agg_disp_arr[j] = (MPI_Aint *) 
1149                      ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL)
1150                 {
1151                     fprintf(stderr, "ADIOI_Build_client_reqs: malloc "
1152                             "agg_disp_arr[%d] failed\n", j);
1153                     return -1;
1154                 }
1155                 if ((agg_blk_arr[j] = (int *) 
1156                      ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(int))) == NULL)
1157                 {
1158                     ADIOI_Free(agg_disp_arr[j]);
1159                     fprintf(stderr, "ADIOI_Build_client_reqs: malloc "
1160                             "agg_blk_arr[%d] failed\n", j);
1161                     return -1;
1162                 }
1163             }
1164         }
1165     }
1166 
1167 #ifdef DEBUG
1168     fprintf(stderr, "ADIOI_Build_client_reqs:(agg,cur_ol_count=ol_count)=");
1169     for (i = 0; i < nprocs; i++)
1170     {
1171         int tmp_agg_idx = ADIOI_Agg_idx(i, fd);
1172         if (tmp_agg_idx >= 0)
1173         {
1174             fprintf(stderr, "(%d,%d=%d)", i, agg_ol_cur_ct_arr[i],
1175                     agg_ol_ct_arr[i]);
1176             assert(agg_ol_ct_arr[i] == agg_ol_cur_ct_arr[i]);
1177             if (tmp_agg_idx != fd->hints->cb_nodes - 1)
1178                 fprintf(stderr, ",");
1179         }
1180     }
1181     fprintf(stderr, "\n");
1182 #endif
1183 
1184 #ifdef DEBUG2
1185     for (i = 0; i < nprocs; i++)
1186     {
1187         if (agg_ol_ct_arr[i] > 0)
1188         {
1189             fprintf(stderr, "ADIOI_Build_client_reqs: p %d (off,len) = ", i);
1190             for (j = 0; j < agg_ol_ct_arr[i]; j++)
1191             {
1192                 fprintf(stderr, "[%d](%d,%d) ", j,
1193                         agg_disp_arr[i][j],
1194                         agg_blk_arr[i][j]);
1195             }
1196             fprintf(stderr, "\n");
1197         }
1198     }
1199 #endif
1200 
1201     /* Create all the aggregator MPI_Datatypes */
1202     for (i = 0; i < nprocs; i++)
1203     {
1204         if (agg_comm_sz_arr[i] > 0)
1205         {
1206             MPI_Type_create_hindexed(agg_ol_ct_arr[i], agg_blk_arr[i],
1207                                      agg_disp_arr[i], MPI_BYTE,
1208                                      &(agg_comm_dtype_arr[i]));
1209             MPI_Type_commit(&(agg_comm_dtype_arr[i]));
1210         }
1211         else
1212         {
1213             agg_comm_dtype_arr[i] = MPI_BYTE;
1214         }
1215         ADIOI_Free(agg_blk_arr[i]);
1216         ADIOI_Free(agg_disp_arr[i]);
1217     }
1218     ADIOI_Free(agg_blk_arr);
1219     ADIOI_Free(agg_disp_arr);
1220 
1221     ADIOI_Free(agg_mem_next_off_arr);
1222     ADIOI_Free(agg_comm_cur_sz_arr);
1223     ADIOI_Free(agg_ol_ct_arr);
1224     ADIOI_Free(agg_ol_cur_ct_arr);
1225 #ifdef AGGREGATION_PROFILE
1226     MPE_Log_event (5019, 0, NULL);
1227 #endif    
1228     return 0;
1229 }
1230 /* ADIOI_Build_client_pre_req allows a client to calculate the memtype
1231  * offset-length pairs up (up to a limit - max_pre_req_sz or max
1232  * ol_ct). It basically allows ADIOI_Build_client_req to do less work.
1233  * If it called and there already exist some preprocessed memtype
1234  * offset-length pairs, it will exit immediately if a limit has been
1235  * reached or if will add on the old limites to reach the new
1236  * limits. */
1237 
1238 int ADIOI_Build_client_pre_req(ADIO_File fd,
1239                                int agg_rank, int agg_idx,
1240                                view_state *my_mem_view_state_p,
1241                                view_state *agg_file_view_state_p,
1242                                ADIO_Offset max_pre_req_sz,
1243                                int max_ol_ct)
1244 {
1245     ADIO_Offset act_reg_sz = 0, tmp_reg_sz = 0;
1246     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
1247     ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
1248     int agg_ol_ct = 0, agg_ol_cur_ct = 0;
1249     int i, agg_next_off_idx = -1;
1250 
1251     ADIO_Offset cur_sz = 0, max_sz = 0, agg_mem_next_off = -1;
1252     ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
1253     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
1254     MPI_Datatype *fr_type_arr = fd->file_realm_types;
1255     MPI_Aint *tmp_disp_arr = NULL;
1256     int *tmp_blk_arr = NULL, exit_loop = -1;
1257     flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
1258 #ifdef DTYPE_SKIP
1259     int skip_type_ct;
1260 #endif
1261     if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes)
1262     {
1263         fprintf(stderr, "ADIOI_Build_client_pre_req: Invalid agg_idx %d\n",
1264                 agg_idx);
1265         return -1;
1266     }
1267 
1268     if (agg_file_view_state_p->cur_state.cur_sz == 
1269         agg_file_view_state_p->sz || max_pre_req_sz <= 0 ||
1270         max_ol_ct <= 0)
1271     {
1272 #ifdef DEBUG1
1273         fprintf(stderr, 
1274                 "ADIOI_Build_client_pre_req: Nothing to preprocess\n");
1275 #endif
1276         return 0;
1277     }
1278 
1279     /* The new limits have already been surpassed by what already
1280      * exists.  Otherwise we will use the next restrictions */
1281     if ((my_mem_view_state_p->pre_sz >= max_pre_req_sz) ||
1282         (my_mem_view_state_p->pre_ol_ct >= max_ol_ct))
1283     {
1284 #ifdef DEBUG1
1285         fprintf(stderr, 
1286                 "ADIOI_Build_client_pre_req:  Old values surpass new "
1287                 "pre_req values\n");
1288 #endif
1289         return 0;
1290     }
1291     
1292     /* General idea is to first advance the filetype to the file realm
1293      * and then the memtype to the filetype.  The memtype is advanced
1294      * further by peeking at the filetype and then the filetype is
1295      * advanced. */
1296     for (i = 0; i < MAX_OFF_TYPE; i++)
1297     {
1298         switch(i)
1299         {
1300             case TEMP_OFF:
1301                 tmp_mem_state_p  = &(my_mem_view_state_p->tmp_state);
1302                 tmp_file_state_p = &(agg_file_view_state_p->tmp_state);
1303                 break;
1304             case REAL_OFF:
1305                 tmp_mem_state_p  = &(my_mem_view_state_p->cur_state);
1306                 tmp_file_state_p = &(agg_file_view_state_p->cur_state);
1307                 break;
1308             default:
1309                 fprintf(stderr, "ADIOI_Build_client_pre_req: "
1310                         "Invalid off type %d\n", i);
1311         }
1312 
1313         if (i == TEMP_OFF && my_mem_view_state_p->pre_sz > 0)
1314         {
1315             cur_sz = my_mem_view_state_p->pre_sz;
1316             agg_ol_ct = my_mem_view_state_p->pre_ol_ct;
1317             /* Save the old arrays */
1318             tmp_disp_arr = my_mem_view_state_p->pre_disp_arr;
1319             tmp_blk_arr  = my_mem_view_state_p->pre_blk_arr;
1320             my_mem_view_state_p->pre_disp_arr = NULL;
1321             my_mem_view_state_p->pre_blk_arr  = NULL;
1322             agg_mem_next_off =
1323                 tmp_disp_arr[agg_ol_ct - 1] + tmp_blk_arr[agg_ol_ct - 1];
1324         }
1325         else if (i == REAL_OFF && my_mem_view_state_p->pre_sz > 0)
1326         {
1327             cur_sz = my_mem_view_state_p->pre_sz;
1328             agg_ol_cur_ct = my_mem_view_state_p->pre_ol_ct;
1329             
1330             /* Copy the old data to the new data, freeing the old
1331              * arrays */
1332             memcpy(my_mem_view_state_p->pre_disp_arr, tmp_disp_arr, 
1333                    my_mem_view_state_p->pre_ol_ct * sizeof(MPI_Aint));
1334             memcpy(my_mem_view_state_p->pre_blk_arr, tmp_blk_arr, 
1335                    my_mem_view_state_p->pre_ol_ct * sizeof(int));
1336 
1337             ADIOI_Free(tmp_disp_arr);
1338             ADIOI_Free(tmp_blk_arr);
1339 
1340             agg_mem_next_off = 
1341                 my_mem_view_state_p->pre_disp_arr[agg_ol_cur_ct - 1] +
1342                 my_mem_view_state_p->pre_blk_arr[agg_ol_cur_ct - 1];
1343         }
1344         else
1345         {
1346             cur_sz = 0;
1347         }
1348         
1349         /* Max_pre_req_sz may be larger than the amount of data left
1350          * to preprocess */
1351         if (max_pre_req_sz - cur_sz > 
1352             agg_file_view_state_p->sz - tmp_file_state_p->cur_sz)
1353         {
1354             max_sz = cur_sz +
1355                 agg_file_view_state_p->sz - tmp_file_state_p->cur_sz;
1356         }
1357         else
1358             max_sz = max_pre_req_sz;
1359         
1360         assert(cur_sz != max_sz);
1361 #ifdef DEBUG1
1362         fprintf(stderr, 
1363                 "ADIOI_Build_client_pre_req: (cur_sz=%Ld,agg_ol_ct=%d,"
1364                 "agg_mem_next_off=%Ld,max_sz=%Ld,max_ol_ct=%d)\n", 
1365                 cur_sz, agg_ol_ct, agg_mem_next_off, max_sz, max_ol_ct);
1366 #endif
1367         while (cur_sz < max_sz)
1368         {
1369             find_next_off(fd, agg_file_view_state_p,
1370                           fr_st_off_arr[agg_rank],
1371                           &(fr_type_arr[agg_rank]),
1372                           i,
1373                           &cur_off,
1374                           &cur_reg_max_len);
1375             
1376             /* find_next_off may show that the file_view_state is done
1377              * even if cur_sz != max_sz since find_next_off may
1378              * advance the file_view_state to the end here and realize
1379              * that it is done. */
1380             if (cur_off == -1)
1381                 break;
1382 
1383             assert(cur_off != -1);
1384             
1385             /* Before translating the file regions to memory regions,
1386              * we first must advance to the proper point in the
1387              * mem_view_state for this aggregator to match the
1388              * file_view_state. */
1389             while (tmp_file_state_p->cur_sz != tmp_mem_state_p->cur_sz)
1390             {
1391 #ifdef DTYPE_SKIP
1392                 if (my_mem_view_state_p->flat_type_p->count > 1) {
1393                     /* let's see if we can skip whole memory datatypes */
1394                     skip_type_ct =
1395                         (tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz) /
1396                         my_mem_view_state_p->type_sz;
1397                     if (skip_type_ct > 0) {
1398                         tmp_mem_state_p->cur_sz +=
1399                             skip_type_ct * my_mem_view_state_p->type_sz;
1400                         tmp_mem_state_p->abs_off +=
1401                             skip_type_ct * my_mem_view_state_p->ext;
1402                         if (tmp_mem_state_p->cur_sz ==
1403                             tmp_file_state_p->cur_sz)
1404                             break;
1405                     }
1406                 }
1407 #endif
1408                 view_state_add_region(
1409                     tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz,
1410                     my_mem_view_state_p,
1411                     &fill_st_reg,
1412                     &fill_reg_sz, i);
1413             }
1414 
1415             /* Now that the filetype and memtype are advanced to the
1416              * same position, add memtype ol-pairs while we have not
1417              * overstepped the min(end of the current piece in the
1418              * file view, end of the file realm, data left in
1419              * max_sz) */
1420             
1421             if (cur_reg_max_len >  
1422                 view_state_get_next_len(agg_file_view_state_p, i))
1423                 cur_reg_max_len =  
1424                     view_state_get_next_len(agg_file_view_state_p, i);
1425 
1426             if (cur_reg_max_len > max_sz - cur_sz)
1427                 cur_reg_max_len = max_sz - cur_sz;
1428 
1429             assert(cur_reg_max_len > 0);
1430 
1431             /* Add memtype ol pairs while we have not passed
1432              * cur_reg_max_len or the max number of ol pairs
1433              * allowed */
1434             act_reg_sz = 0;
1435             exit_loop = 0;
1436             while ((act_reg_sz < cur_reg_max_len) && 
1437                    (exit_loop == 0))
1438             {
1439                 view_state_add_region(
1440                     cur_reg_max_len - act_reg_sz,
1441                     my_mem_view_state_p,
1442                     &agg_mem_st_reg, &agg_mem_act_reg_sz, 
1443                     i);
1444                 act_reg_sz += agg_mem_act_reg_sz;
1445                 
1446 #ifdef DEBUG2
1447                 fprintf(stderr, "ADIOI_Build_client_pre_req: %s Mem region"
1448                         "(proc=%d,off=%Ld,sz=%Ld)\n",
1449                         off_type_name[i], agg_rank, agg_mem_st_reg, 
1450                         agg_mem_act_reg_sz);
1451 #endif
1452                 switch(i)
1453                 {
1454                     case TEMP_OFF:
1455                         /* Increment the ol list count if the next
1456                          * region is not adjacent to the previous
1457                          * region. */
1458                         if (agg_mem_next_off != agg_mem_st_reg)
1459                         {
1460                             agg_ol_ct++;
1461                             if (agg_ol_ct == max_ol_ct)
1462                                 exit_loop = 1;
1463                         }
1464                         agg_mem_next_off = 
1465                             agg_mem_st_reg + agg_mem_act_reg_sz;
1466                         break;
1467                     case REAL_OFF:
1468                         /* Set the ol list for the memtype that
1469                          * will map to our aggregator, coaslescing
1470                          * if possible. */
1471                         agg_next_off_idx = agg_ol_cur_ct;
1472                         if (agg_mem_next_off != agg_mem_st_reg)
1473                         {
1474                             my_mem_view_state_p->
1475                                 pre_disp_arr[agg_next_off_idx] = 
1476                                 agg_mem_st_reg;
1477                             my_mem_view_state_p->
1478                                 pre_blk_arr[agg_next_off_idx] = 
1479                                 agg_mem_act_reg_sz;
1480                             agg_ol_cur_ct++;
1481                             if (agg_ol_cur_ct == agg_ol_ct)
1482                                 exit_loop = 1;
1483                         }
1484                         else
1485                         {
1486                             my_mem_view_state_p->
1487                                 pre_blk_arr[agg_next_off_idx - 1]
1488                                 += agg_mem_act_reg_sz;
1489                         }
1490                         agg_mem_next_off = 
1491                             agg_mem_st_reg + agg_mem_act_reg_sz;
1492                         break;
1493                     default:
1494                         fprintf(stderr, "ADIOI_Build_client_pre_req: "
1495                                 "Impossible type\n");
1496                 }
1497             }
1498 
1499             /* Advance the filetype flatten state appropriately to
1500              * match the data advanced in the memtype flatten state.
1501              * Should only take at most a single view_state_add_region
1502              * call since the memtype cannot proceed beyond the end of
1503              * a contig piece in the file type. */
1504             view_state_add_region(act_reg_sz - tmp_reg_sz,
1505                                   agg_file_view_state_p,
1506                                   &fill_st_reg, &fill_reg_sz, i);
1507 #ifdef DEBUG2
1508             fprintf(stderr, "ADIOI_Build_client_pre_req: %s File region"
1509                     " (proc=%d,off=%Ld,sz=%Ld)\n",
1510                     off_type_name[i], agg_rank, fill_st_reg, fill_reg_sz);
1511 #endif
1512             if (fill_reg_sz != act_reg_sz)
1513             {
1514                 fprintf(stderr, "ADIOI_Build_client_pre_req: "
1515                         "view_state_add_region failed to match the memtype\n");
1516                 return -1;
1517             }
1518             
1519             cur_sz += act_reg_sz;
1520         }
1521         
1522         /* On the first pass, allocate the memory structures for
1523          * storing the preprocessed information */
1524         if (i == TEMP_OFF)
1525         {
1526             if ((my_mem_view_state_p->pre_disp_arr = (MPI_Aint *)
1527                  ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL)
1528             {
1529                 fprintf(stderr, "ADIOI_Build_client_pre_req: malloc "
1530                         "pre_disp_arr of size %ld failed\n",
1531                         (long int)agg_ol_ct * sizeof(MPI_Aint));
1532                 return -1;
1533             }
1534             if ((my_mem_view_state_p->pre_blk_arr = (int *) 
1535                  ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL)
1536             {
1537                 ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
1538                 fprintf(stderr, "ADIOI_Build_client_pre_req: malloc "
1539                         "agg_blk_arr of size %ld failed\n",
1540                         (long int)agg_ol_ct * sizeof(int));
1541                 return -1;
1542             }
1543         }
1544     }
1545 
1546     my_mem_view_state_p->pre_sz = cur_sz;
1547     my_mem_view_state_p->pre_ol_ct = agg_ol_ct;
1548 
1549 #ifdef DEBUG1
1550     fprintf(stderr, "ADIOI_Build_client_pre_req:(agg=%d,cur_ol_count=%d"
1551             "=ol_count=%d)\n",
1552             agg_rank, my_mem_view_state_p->pre_ol_ct, agg_ol_ct);
1553 #endif
1554 
1555 #ifdef DEBUG2
1556     if (agg_ol_ct > 0)
1557     {
1558         fprintf(stderr, "ADIOI_Build_client_pre_req: agg=%d,pre_sz=%Ld "
1559                 "(off,len) = \n", agg_rank, my_mem_view_state_p->pre_sz);
1560         for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
1561         {
1562             fprintf(stderr, "[%d](%d,%d) ", i, 
1563                     my_mem_view_state_p->pre_disp_arr[i], 
1564                     my_mem_view_state_p->pre_blk_arr[i]);
1565             if (i % 5 == 0 && i != 0)
1566                 fprintf(stderr, "\n");
1567         }
1568         fprintf(stderr, "\n");
1569     }
1570 #endif
1571 
1572     return 0;
1573 }
1574 
1575 /* process_pre_req() allows ADIOI_Build_client_req to use the pre_req
1576  * information. */
1577 
1578 static int process_pre_req(ADIO_File fd,
1579                            int agg_rank,
1580                            int agg_idx,
1581                            view_state *my_mem_view_state_p,
1582                            view_state *agg_file_view_state_p,
1583                            ADIO_Offset agg_comm_sz,
1584                            int off_type,
1585                            MPI_Aint *agg_disp_arr,
1586                            int *agg_blk_arr,
1587                            ADIO_Offset *agg_comm_pre_sz_p,
1588                            ADIO_Offset *agg_comm_cur_sz_p,
1589                            ADIO_Offset *agg_comm_sz_p,
1590                            int *agg_ol_cur_ct_p,
1591                            int *agg_ol_ct_p,
1592                            ADIO_Offset *agg_mem_next_off_p)
1593 {
1594     int i, has_partial = 0;
1595     MPI_Aint partial_disp = 0;
1596     int partial_len = 0;
1597     ADIO_Offset tmp_agg_comm_pre_sz = 0;
1598 
1599     assert (my_mem_view_state_p->pre_sz > 0);
1600     switch(off_type)
1601     {
1602         case TEMP_OFF:
1603             /* Use only some of the precalculated data */
1604             if (my_mem_view_state_p->pre_sz > *agg_comm_sz_p)
1605             {
1606                 for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
1607                 {
1608                     if ((my_mem_view_state_p->pre_blk_arr[i] + 
1609                          *agg_comm_pre_sz_p) > *agg_comm_sz_p)
1610                     {
1611                         has_partial = 1;
1612                         partial_len = *agg_comm_sz_p - *agg_comm_pre_sz_p;
1613                         *agg_comm_pre_sz_p = *agg_comm_sz_p;
1614                         i++;
1615                         break;
1616                     }
1617                     else if ((my_mem_view_state_p->pre_blk_arr[i] +
1618                               *agg_comm_pre_sz_p) == *agg_comm_sz_p)
1619                     {
1620                         *agg_comm_pre_sz_p += 
1621                             my_mem_view_state_p->pre_blk_arr[i];
1622                         i++;
1623                         break;
1624                     }
1625                     else
1626                         *agg_comm_pre_sz_p += 
1627                             my_mem_view_state_p->pre_blk_arr[i];
1628                 }
1629                 
1630                 if (has_partial == 1)
1631                 {
1632                     *agg_mem_next_off_p = 
1633                         my_mem_view_state_p->pre_disp_arr[i - 1] + 
1634                         partial_len;
1635                 }
1636                 else
1637                 {
1638                     *agg_mem_next_off_p = 
1639                         my_mem_view_state_p->pre_disp_arr[i - 1] + 
1640                         my_mem_view_state_p->pre_blk_arr[i - 1];
1641                 }
1642                 
1643                 *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
1644                 *agg_ol_ct_p = i;
1645                 
1646             }
1647             else /* Use all the precalculated data */
1648             {
1649                 *agg_comm_pre_sz_p = my_mem_view_state_p->pre_sz;
1650                 *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
1651                 *agg_ol_ct_p = my_mem_view_state_p->pre_ol_ct;
1652                 *agg_mem_next_off_p = 
1653                     my_mem_view_state_p->pre_disp_arr[
1654                         my_mem_view_state_p->pre_ol_ct - 1] +
1655                     my_mem_view_state_p->pre_blk_arr[
1656                         my_mem_view_state_p->pre_ol_ct - 1];
1657             }
1658 #ifdef DEBUG1
1659             fprintf(stderr, "process_pre_req: TEMP_OFF "
1660                     "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d\n",
1661                     *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p);
1662 #endif
1663             assert(*agg_comm_cur_sz_p <= *agg_comm_sz_p);
1664             break;
1665         case REAL_OFF:
1666             /* Set the ol list for the memtype that will map to our
1667              * aggregator, coaslescing if possible. */
1668             for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
1669             {
1670                 agg_disp_arr[i] = my_mem_view_state_p->pre_disp_arr[i];
1671                 agg_blk_arr[i]  = my_mem_view_state_p->pre_blk_arr[i];
1672                 
1673                 if ((my_mem_view_state_p->pre_blk_arr[i] + 
1674                      tmp_agg_comm_pre_sz) > *agg_comm_pre_sz_p)
1675                 {
1676                     has_partial = 1;
1677                     agg_blk_arr[i] = *agg_comm_pre_sz_p - tmp_agg_comm_pre_sz;
1678                     tmp_agg_comm_pre_sz = *agg_comm_pre_sz_p;
1679                     partial_disp = my_mem_view_state_p->pre_disp_arr[i] +
1680                         agg_blk_arr[i];
1681                     partial_len  = my_mem_view_state_p->pre_blk_arr[i] - 
1682                         agg_blk_arr[i];
1683                     i++;
1684                     break;
1685                 }
1686                 else if ((my_mem_view_state_p->pre_blk_arr[i] +
1687                           tmp_agg_comm_pre_sz) == *agg_comm_pre_sz_p)
1688                 {
1689                     tmp_agg_comm_pre_sz +=  
1690                         my_mem_view_state_p->pre_blk_arr[i];
1691                     i++;
1692                     break;
1693                 }
1694                 else
1695                     tmp_agg_comm_pre_sz +=
1696                         my_mem_view_state_p->pre_blk_arr[i];
1697             }
1698             *agg_mem_next_off_p = agg_disp_arr[i - 1] + agg_blk_arr[i - 1];
1699             *agg_ol_cur_ct_p = i;
1700             *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
1701             
1702             /* Clean up the ol pairs we used */     
1703             if ((i < my_mem_view_state_p->pre_ol_ct) || (has_partial == 1))
1704             {
1705                 int remain_ol_ct = 
1706                     my_mem_view_state_p->pre_ol_ct - i + has_partial;
1707                 MPI_Aint *new_pre_disp_arr = NULL;
1708                 int *new_pre_blk_arr = NULL;
1709                 
1710                 if ((new_pre_disp_arr = (MPI_Aint *)
1711                      ADIOI_Malloc(remain_ol_ct * sizeof(MPI_Aint))) == NULL)
1712                 {
1713                     fprintf(stderr, "process_pre_req: malloc "
1714                             "new_pre_disp_arr failed\n");
1715                     return -1;
1716                 }
1717                 if ((new_pre_blk_arr = (int *)
1718                      ADIOI_Malloc(remain_ol_ct * sizeof(int))) == NULL)
1719                 {
1720                     fprintf(stderr, "process_pre_req: malloc "
1721                             "new_pre_blk_arr failed\n");
1722                     return -1;
1723                 }
1724                 
1725                 memcpy(new_pre_disp_arr, 
1726                        &(my_mem_view_state_p->pre_disp_arr[i - has_partial]),
1727                        remain_ol_ct * sizeof(MPI_Aint));
1728                 memcpy(new_pre_blk_arr, 
1729                        &(my_mem_view_state_p->pre_blk_arr[i - has_partial]),
1730                        remain_ol_ct * sizeof(int));
1731                 
1732                 /* Set the partial len of the first piece */
1733                 if (has_partial == 1)
1734                 {
1735                     /* new_pre_disp_arr[remain_ol_ct - 1] = partial_disp;
1736                        new_pre_blk_arr[remain_ol_ct - 1]  = partial_len; */
1737                     new_pre_disp_arr[0] = partial_disp;
1738                     new_pre_blk_arr[0]  = partial_len;
1739                 }
1740                 
1741                 ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
1742                 ADIOI_Free(my_mem_view_state_p->pre_blk_arr);
1743                 
1744                 my_mem_view_state_p->pre_disp_arr = new_pre_disp_arr;
1745                 my_mem_view_state_p->pre_blk_arr  = new_pre_blk_arr;
1746                 my_mem_view_state_p->pre_ol_ct = remain_ol_ct;
1747                 my_mem_view_state_p->pre_sz -= *agg_comm_pre_sz_p;
1748             }
1749             else /* Used all the precalculated ol pairs */
1750             {
1751                 ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
1752                 ADIOI_Free(my_mem_view_state_p->pre_blk_arr);
1753                 
1754                 my_mem_view_state_p->pre_disp_arr = NULL;
1755                 my_mem_view_state_p->pre_blk_arr = NULL;
1756                 my_mem_view_state_p->pre_ol_ct = 0;
1757                 my_mem_view_state_p->pre_sz = 0;
1758             }
1759 #ifdef DEBUG1
1760             fprintf(stderr, "process_pre_req: REAL_OFF "
1761                     "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d,"
1762                     "agg_ol_cur_ct=%d\n",
1763                     *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p, 
1764                     *agg_ol_cur_ct_p);
1765 #endif
1766             break;
1767         default:
1768             fprintf(stderr, "process_pre_req: Invalid off_type %d\n",
1769                     off_type);
1770     }
1771     return 0;
1772 }
1773 
1774 /* ADIOI_Build_client_req() creates a memory datatype to transfer data
1775  * to/from a particular aggregator. */
1776 
1777 int ADIOI_Build_client_req(ADIO_File fd,
1778                            int agg_rank,
1779                            int agg_idx,
1780                            view_state *my_mem_view_state_p,
1781                            view_state *agg_file_view_state_p,
1782                            ADIO_Offset agg_comm_sz,
1783                            MPI_Datatype *agg_comm_dtype_p)
1784 {
1785     MPI_Aint *agg_disp_arr = NULL;
1786     int *agg_blk_arr = NULL;
1787     ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0;
1788     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
1789     ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
1790     int agg_ol_ct = 0, agg_ol_cur_ct = 0;
1791     int i = 0, agg_next_off_idx = -1;
1792     ADIO_Offset agg_mem_next_off = 0, agg_comm_cur_sz = 0, agg_comm_pre_sz = 0;
1793     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
1794     MPI_Datatype *fr_type_arr = fd->file_realm_types;
1795     flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
1796 #ifdef DTYPE_SKIP
1797     int skip_type_ct;
1798 #endif
1799 
1800     if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes)
1801     {
1802 #ifdef DEBUG1
1803         fprintf(stderr, "ADIOI_Build_client_req: agg_rank %d does not map "
1804                 "to a valid node in cb_node\n", agg_rank);
1805 #endif
1806         return 0;
1807     }
1808 
1809 #ifdef AGGREGATION_PROFILE
1810     MPE_Log_event (5018, 0, NULL);
1811 #endif
1812 
1813 #ifdef DEBUG1
1814     fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,size_req=%Ld)\n",
1815             agg_idx, agg_comm_sz);
1816 #endif
1817     
1818     /* On the first pass see how many offset-length pairs are
1819      * necessary for each aggregator.  Then allocate the correct
1820      * amount of offset-length pairs for handling each aggregator's
1821      * particular data size.  On the last pass, we actually create the
1822      * offset-length pairs. */
1823     for (i = 0; i < MAX_OFF_TYPE; i++)
1824     {
1825         switch(i)
1826         {
1827             case TEMP_OFF:
1828                 tmp_mem_state_p  = &(my_mem_view_state_p->tmp_state);
1829                 tmp_file_state_p = &(agg_file_view_state_p->tmp_state);
1830                 break;
1831             case REAL_OFF:
1832                 tmp_mem_state_p  = &(my_mem_view_state_p->cur_state);
1833                 tmp_file_state_p = &(agg_file_view_state_p->cur_state);
1834                 break;
1835             default:
1836                 fprintf(stderr, "ADIOI_Build_client_pre_req: "
1837                         "Invalid off type %d\n", i);
1838         }
1839 
1840         agg_comm_cur_sz = 0;
1841         agg_mem_next_off = -1;
1842 
1843         /* First try to preprocess anything we can */
1844         if (my_mem_view_state_p->pre_sz > 0)
1845         {
1846             process_pre_req(fd,
1847                             agg_rank,
1848                             agg_idx,
1849                             my_mem_view_state_p,
1850                             agg_file_view_state_p,
1851                             agg_comm_sz,
1852                             i,
1853                             agg_disp_arr,
1854                             agg_blk_arr,
1855                             &agg_comm_pre_sz,
1856                             &agg_comm_cur_sz,
1857                             &agg_comm_sz,
1858                             &agg_ol_cur_ct,
1859                             &agg_ol_ct,
1860                             &agg_mem_next_off);
1861         }
1862         
1863         while (agg_comm_cur_sz < agg_comm_sz)
1864         {       
1865             find_next_off(fd, agg_file_view_state_p,
1866                           fr_st_off_arr[agg_idx],
1867                           &(fr_type_arr[agg_idx]),
1868                           i,
1869                           &cur_off,
1870                           &cur_reg_max_len);
1871             
1872             assert(cur_off != -1);
1873             
1874             /* Add up to the end of the file realm or as many bytes
1875              * are left for this particular aggregator in the client's
1876              * filetype */
1877             if (cur_reg_max_len > (agg_comm_sz - agg_comm_cur_sz))
1878             {
1879                 cur_reg_max_len = agg_comm_sz - agg_comm_cur_sz;
1880             }
1881             assert(cur_reg_max_len > 0);
1882         
1883             view_state_add_region(
1884                 cur_reg_max_len,
1885                 agg_file_view_state_p,
1886                 &st_reg, &act_reg_sz, i);
1887             
1888 #ifdef DEBUG2
1889             fprintf(stderr, "ADIOI_Build_client_req: %s File region"
1890                     " (proc=%d,off=%Ld,sz=%Ld)\n",
1891                     off_type_name[i], agg_rank, cur_off, act_reg_sz);
1892 #endif
1893             
1894             /* Before translating the file regions to memory regions,
1895              * we first must advance to the proper point in the
1896              * mem_view_state for this aggregator to match the
1897              * file_view_state. */
1898             
1899             assert(tmp_file_state_p->cur_sz - act_reg_sz >= 
1900                    tmp_mem_state_p->cur_sz);
1901             
1902             while (tmp_file_state_p->cur_sz - act_reg_sz != 
1903                    tmp_mem_state_p->cur_sz)
1904             {
1905                 ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
1906 #ifdef DTYPE_SKIP
1907                 if (my_mem_view_state_p->flat_type_p->count > 1) {
1908                     /* let's see if we can skip whole memory datatypes */
1909                     skip_type_ct =
1910                         (tmp_file_state_p->cur_sz - act_reg_sz -
1911                          tmp_mem_state_p->cur_sz) /
1912                         my_mem_view_state_p->type_sz;
1913                     if (skip_type_ct > 0) {
1914                         tmp_mem_state_p->cur_sz +=
1915                             skip_type_ct * my_mem_view_state_p->type_sz;
1916                         tmp_mem_state_p->abs_off +=
1917                             skip_type_ct * my_mem_view_state_p->ext;
1918                         if ((tmp_mem_state_p->cur_sz - act_reg_sz) ==
1919                             tmp_file_state_p->cur_sz)
1920                             break;
1921                     }
1922                 }
1923 #endif
1924                 view_state_add_region(
1925                     tmp_file_state_p->cur_sz - 
1926                     act_reg_sz - tmp_mem_state_p->cur_sz,
1927                     my_mem_view_state_p,
1928                     &fill_st_reg,
1929                     &fill_reg_sz, i);
1930             }
1931             
1932             /* Based on how large the act_reg_sz is, first figure
1933              * out how many memory offset-length pairs are
1934              * necessary and then set the offset-length pairs. */
1935             tmp_reg_sz = 0;
1936             while (tmp_reg_sz != act_reg_sz)
1937             {
1938                 view_state_add_region(
1939                     act_reg_sz - tmp_reg_sz,
1940                     my_mem_view_state_p,
1941                     &agg_mem_st_reg, &agg_mem_act_reg_sz, 
1942                     i);
1943                 tmp_reg_sz += agg_mem_act_reg_sz;
1944                 
1945 #ifdef DEBUG2
1946                 fprintf(stderr, "ADIOI_Build_client_req: %s Mem region"
1947                         "(off=%Ld,sz=%Ld)\n",
1948                         off_type_name[i], agg_mem_st_reg, 
1949                         agg_mem_act_reg_sz);
1950 #endif
1951                 agg_comm_cur_sz += agg_mem_act_reg_sz;
1952                 switch(i)
1953                 {
1954                     case TEMP_OFF:
1955                         /* Increment the ol list count if the next
1956                          * region is not adjacent to the previous
1957                          * region. */
1958                         if (agg_mem_next_off != agg_mem_st_reg)
1959                         {
1960                             agg_ol_ct++;
1961                         }
1962                         agg_mem_next_off = 
1963                             agg_mem_st_reg + agg_mem_act_reg_sz;
1964                         break;
1965                     case REAL_OFF:
1966                         /* Set the ol list for the memtype that
1967                          * will map to our aggregator, coaslescing
1968                          * if possible. */
1969                         agg_next_off_idx = agg_ol_cur_ct;
1970                         if (agg_mem_next_off != agg_mem_st_reg)
1971                         {
1972                             agg_disp_arr[agg_next_off_idx] = 
1973                                 agg_mem_st_reg;
1974                             agg_blk_arr[agg_next_off_idx] = 
1975                                 agg_mem_act_reg_sz;
1976                             agg_ol_cur_ct++;
1977                         }
1978                         else
1979                         {
1980                             agg_blk_arr[agg_next_off_idx - 1]
1981                                 += agg_mem_act_reg_sz;
1982                         }
1983                         agg_mem_next_off = 
1984                             agg_mem_st_reg + agg_mem_act_reg_sz;
1985                         break;
1986                     default:
1987                         fprintf(stderr, "ADIOI_Build_client_req: "
1988                                 "Impossible type\n");
1989                 }
1990             }
1991         }
1992         
1993         /* On the first pass, allocate the memory structures for
1994          * creating the MPI_hindexed type. */
1995         if (i == TEMP_OFF)
1996         {           
1997             /* Allocate offset-length pairs for creating hindexed
1998              * MPI_Datatypes for each aggregator */
1999             if ((agg_disp_arr = (MPI_Aint *) 
2000                  ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL)
2001             {
2002                 fprintf(stderr, "ADIOI_Build_client_req: malloc "
2003                         "agg_disp_arr of size %ld failed\n",
2004                         (long int)agg_ol_ct * sizeof(MPI_Aint));
2005                 return -1;
2006             }
2007             if ((agg_blk_arr = (int *) 
2008                  ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL)
2009             {
2010                 ADIOI_Free(agg_disp_arr);
2011                 fprintf(stderr, "ADIOI_Build_client_req: malloc "
2012                         "agg_blk_arr of size %ld failed\n",
2013                         (long int)agg_ol_ct * sizeof(int));
2014                 return -1;
2015             }
2016         }
2017     }
2018 
2019     assert(agg_ol_ct == agg_ol_cur_ct);
2020 #ifdef DEBUG1
2021     fprintf(stderr, 
2022             "ADIOI_Build_client_req:(agg=%d,cur_ol_count=%d=ol_count=%d)\n",
2023             agg_rank, agg_ol_cur_ct, agg_ol_ct);
2024 #endif
2025 
2026 #ifdef DEBUG2
2027     if (agg_ol_ct > 0)
2028     {
2029         fprintf(stderr, "ADIOI_Build_client_req: p %d (off,len) = ", agg_rank);
2030         for (i = 0; i < agg_ol_ct; i++)
2031         {
2032             fprintf(stderr, "[%d](%d,%d) ", i, 
2033                     agg_disp_arr[i], agg_blk_arr[i]);
2034             if (i % 5 == 0 && i != 0)
2035                 fprintf(stderr, "\n");
2036         }
2037         fprintf(stderr, "\n");
2038     }
2039 #endif
2040 #ifdef DEBUG1
2041     fprintf(stderr, 
2042             "ADIOI_Build_client_req:(agg=%d,pre_ol_count=%d)\n",
2043             agg_idx, my_mem_view_state_p->pre_ol_ct);
2044 #endif
2045 
2046 #ifdef DEBUG2
2047     if (my_mem_view_state_p->pre_sz > 0)
2048     {
2049         fprintf(stderr, "ADIOI_Build_client_req: p %d pre(off,len) = ", 
2050                 agg_idx);
2051         for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
2052         {
2053             fprintf(stderr, "[%d](%d,%d) ", i, 
2054                     my_mem_view_state_p->pre_disp_arr[i], 
2055                     my_mem_view_state_p->pre_blk_arr[i]);
2056             if (i % 5 == 0 && i != 0)
2057                 fprintf(stderr, "\n");
2058         }
2059         fprintf(stderr, "\n");
2060     }
2061 #endif
2062 
2063     /* Create the aggregator MPI_Datatype */
2064     if (agg_comm_sz > 0)
2065     {
2066         MPI_Type_create_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE,
2067                                  agg_comm_dtype_p);
2068         MPI_Type_commit(agg_comm_dtype_p);
2069     }
2070     else
2071     {
2072         *agg_comm_dtype_p = MPI_BYTE;
2073     }
2074 
2075     ADIOI_Free(agg_blk_arr);
2076     ADIOI_Free(agg_disp_arr);
2077 
2078 #ifdef AGGREGATION_PROFILE
2079     MPE_Log_event (5019, 0, NULL);
2080 #endif    
2081     return 0;
2082 }
2083 
2084 

/* [<][>][^][v][top][bottom][index][help] */